1//===-- SIInstrInfo.td - SI Instruction Infos -------------*- tablegen -*--===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def isWave32 : Predicate<"Subtarget->getWavefrontSize() == 32">, 10 AssemblerPredicate <(all_of FeatureWavefrontSize32)>; 11def isWave64 : Predicate<"Subtarget->getWavefrontSize() == 64">, 12 AssemblerPredicate <(all_of FeatureWavefrontSize64)>; 13 14class GCNPredicateControl : PredicateControl { 15 Predicate SIAssemblerPredicate = isGFX6GFX7; 16 Predicate VIAssemblerPredicate = isGFX8GFX9; 17} 18 19// Except for the NONE field, this must be kept in sync with the 20// SIEncodingFamily enum in AMDGPUInstrInfo.cpp 21def SIEncodingFamily { 22 int NONE = -1; 23 int SI = 0; 24 int VI = 1; 25 int SDWA = 2; 26 int SDWA9 = 3; 27 int GFX80 = 4; 28 int GFX9 = 5; 29 int GFX10 = 6; 30 int SDWA10 = 7; 31 int GFX90A = 8; 32} 33 34//===----------------------------------------------------------------------===// 35// SI DAG Nodes 36//===----------------------------------------------------------------------===// 37 38def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>; 39 40def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD", 41 SDTypeProfile<1, 3, [SDTCisVT<1, v4i32>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>, 42 [SDNPMayLoad, SDNPMemOperand] 43>; 44 45def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT", 46 SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i16>]>, 47 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue] 48>; 49 50def SIatomic_inc : SDNode<"AMDGPUISD::ATOMIC_INC", SDTAtomic2, 51 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] 52>; 53 54def SIatomic_dec : SDNode<"AMDGPUISD::ATOMIC_DEC", SDTAtomic2, 55 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] 56>; 57 58def SDTAtomic2_f32 : SDTypeProfile<1, 2, [ 59 SDTCisSameAs<0,2>, SDTCisFP<0>, SDTCisPtrTy<1> 60]>; 61 62def SIatomic_fmin : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMIN", SDTAtomic2_f32, 63 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] 64>; 65 66def SIatomic_fmax : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMAX", SDTAtomic2_f32, 67 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] 68>; 69 70// load_d16_{lo|hi} ptr, tied_input 71def SIload_d16 : SDTypeProfile<1, 2, [ 72 SDTCisPtrTy<1>, 73 SDTCisSameAs<0, 2> 74]>; 75 76 77def SDTtbuffer_load : SDTypeProfile<1, 8, 78 [ // vdata 79 SDTCisVT<1, v4i32>, // rsrc 80 SDTCisVT<2, i32>, // vindex(VGPR) 81 SDTCisVT<3, i32>, // voffset(VGPR) 82 SDTCisVT<4, i32>, // soffset(SGPR) 83 SDTCisVT<5, i32>, // offset(imm) 84 SDTCisVT<6, i32>, // format(imm) 85 SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm) 86 SDTCisVT<8, i1> // idxen(imm) 87 ]>; 88 89def SItbuffer_load : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT", SDTtbuffer_load, 90 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>; 91def SItbuffer_load_d16 : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT_D16", 92 SDTtbuffer_load, 93 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>; 94 95def SDTtbuffer_store : SDTypeProfile<0, 9, 96 [ // vdata 97 SDTCisVT<1, v4i32>, // rsrc 98 SDTCisVT<2, i32>, // vindex(VGPR) 99 SDTCisVT<3, i32>, // voffset(VGPR) 100 SDTCisVT<4, i32>, // soffset(SGPR) 101 SDTCisVT<5, i32>, // offset(imm) 102 SDTCisVT<6, i32>, // format(imm) 103 SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm) 104 SDTCisVT<8, i1> // idxen(imm) 105 ]>; 106 107def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTtbuffer_store, 108 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 109def SItbuffer_store_d16 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_D16", 110 SDTtbuffer_store, 111 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 112 113def SDTBufferLoad : SDTypeProfile<1, 7, 114 [ // vdata 115 SDTCisVT<1, v4i32>, // rsrc 116 SDTCisVT<2, i32>, // vindex(VGPR) 117 SDTCisVT<3, i32>, // voffset(VGPR) 118 SDTCisVT<4, i32>, // soffset(SGPR) 119 SDTCisVT<5, i32>, // offset(imm) 120 SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm) 121 SDTCisVT<7, i1>]>; // idxen(imm) 122 123def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad, 124 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 125def SIbuffer_load_ubyte : SDNode <"AMDGPUISD::BUFFER_LOAD_UBYTE", SDTBufferLoad, 126 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 127def SIbuffer_load_ushort : SDNode <"AMDGPUISD::BUFFER_LOAD_USHORT", SDTBufferLoad, 128 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 129def SIbuffer_load_byte : SDNode <"AMDGPUISD::BUFFER_LOAD_BYTE", SDTBufferLoad, 130 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 131def SIbuffer_load_short: SDNode <"AMDGPUISD::BUFFER_LOAD_SHORT", SDTBufferLoad, 132 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 133def SIbuffer_load_format : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT", SDTBufferLoad, 134 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 135def SIbuffer_load_format_d16 : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_D16", 136 SDTBufferLoad, 137 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 138 139def SDTBufferStore : SDTypeProfile<0, 8, 140 [ // vdata 141 SDTCisVT<1, v4i32>, // rsrc 142 SDTCisVT<2, i32>, // vindex(VGPR) 143 SDTCisVT<3, i32>, // voffset(VGPR) 144 SDTCisVT<4, i32>, // soffset(SGPR) 145 SDTCisVT<5, i32>, // offset(imm) 146 SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm) 147 SDTCisVT<7, i1>]>; // idxen(imm) 148 149def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore, 150 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 151def SIbuffer_store_byte: SDNode <"AMDGPUISD::BUFFER_STORE_BYTE", 152 SDTBufferStore, 153 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 154def SIbuffer_store_short : SDNode <"AMDGPUISD::BUFFER_STORE_SHORT", 155 SDTBufferStore, 156 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 157def SIbuffer_store_format : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT", 158 SDTBufferStore, 159 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 160def SIbuffer_store_format_d16 : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT_D16", 161 SDTBufferStore, 162 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 163 164class SDBufferAtomic<string opcode> : SDNode <opcode, 165 SDTypeProfile<1, 8, 166 [SDTCisVT<2, v4i32>, // rsrc 167 SDTCisVT<3, i32>, // vindex(VGPR) 168 SDTCisVT<4, i32>, // voffset(VGPR) 169 SDTCisVT<5, i32>, // soffset(SGPR) 170 SDTCisVT<6, i32>, // offset(imm) 171 SDTCisVT<7, i32>, // cachepolicy(imm) 172 SDTCisVT<8, i1>]>, // idxen(imm) 173 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] 174>; 175 176def SIbuffer_atomic_swap : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SWAP">; 177def SIbuffer_atomic_add : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_ADD">; 178def SIbuffer_atomic_sub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SUB">; 179def SIbuffer_atomic_smin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMIN">; 180def SIbuffer_atomic_umin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMIN">; 181def SIbuffer_atomic_smax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMAX">; 182def SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">; 183def SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">; 184def SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">; 185def SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">; 186def SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">; 187def SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">; 188def SIbuffer_atomic_csub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_CSUB">; 189def SIbuffer_atomic_fadd : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD">; 190def SIbuffer_atomic_fmin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMIN">; 191def SIbuffer_atomic_fmax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMAX">; 192 193def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP", 194 SDTypeProfile<1, 9, 195 [SDTCisVT<0, i32>, // dst 196 SDTCisVT<1, i32>, // src 197 SDTCisVT<2, i32>, // cmp 198 SDTCisVT<3, v4i32>, // rsrc 199 SDTCisVT<4, i32>, // vindex(VGPR) 200 SDTCisVT<5, i32>, // voffset(VGPR) 201 SDTCisVT<6, i32>, // soffset(SGPR) 202 SDTCisVT<7, i32>, // offset(imm) 203 SDTCisVT<8, i32>, // cachepolicy(imm) 204 SDTCisVT<9, i1>]>, // idxen(imm) 205 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] 206>; 207 208class SDGlobalAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode, 209 SDTypeProfile<0, 2, 210 [SDTCisPtrTy<0>, // vaddr 211 SDTCisVT<1, ty>]>, // vdata 212 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] 213>; 214 215def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET", 216 SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]> 217>; 218 219def SIlds : SDNode<"AMDGPUISD::LDS", 220 SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]> 221>; 222 223def SIload_d16_lo : SDNode<"AMDGPUISD::LOAD_D16_LO", 224 SIload_d16, 225 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 226>; 227 228def SIload_d16_lo_u8 : SDNode<"AMDGPUISD::LOAD_D16_LO_U8", 229 SIload_d16, 230 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 231>; 232 233def SIload_d16_lo_i8 : SDNode<"AMDGPUISD::LOAD_D16_LO_I8", 234 SIload_d16, 235 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 236>; 237 238def SIload_d16_hi : SDNode<"AMDGPUISD::LOAD_D16_HI", 239 SIload_d16, 240 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 241>; 242 243def SIload_d16_hi_u8 : SDNode<"AMDGPUISD::LOAD_D16_HI_U8", 244 SIload_d16, 245 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 246>; 247 248def SIload_d16_hi_i8 : SDNode<"AMDGPUISD::LOAD_D16_HI_I8", 249 SIload_d16, 250 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 251>; 252 253def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE", 254 SDTypeProfile<0 ,1, [SDTCisInt<0>]>, 255 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue] 256>; 257 258//===----------------------------------------------------------------------===// 259// ValueType helpers 260//===----------------------------------------------------------------------===// 261 262// Returns 1 if the source arguments have modifiers, 0 if they do not. 263// XXX - do f16 instructions? 264class isFloatType<ValueType SrcVT> { 265 bit ret = !or(!eq(SrcVT.Value, f16.Value), 266 !eq(SrcVT.Value, f32.Value), 267 !eq(SrcVT.Value, f64.Value), 268 !eq(SrcVT.Value, v2f16.Value), 269 !eq(SrcVT.Value, v4f16.Value), 270 !eq(SrcVT.Value, v2f32.Value), 271 !eq(SrcVT.Value, v2f64.Value), 272 !eq(SrcVT.Value, v4f64.Value)); 273} 274 275class isIntType<ValueType SrcVT> { 276 bit ret = !or(!eq(SrcVT.Value, i16.Value), 277 !eq(SrcVT.Value, i32.Value), 278 !eq(SrcVT.Value, i64.Value), 279 !eq(SrcVT.Value, v2i32.Value)); 280} 281 282class isPackedType<ValueType SrcVT> { 283 bit ret = !or(!eq(SrcVT.Value, v2i16.Value), 284 !eq(SrcVT.Value, v2f16.Value), 285 !eq(SrcVT.Value, v4f16.Value), 286 !eq(SrcVT.Value, v2f32.Value)); 287} 288 289 290//===----------------------------------------------------------------------===// 291// PatFrags for global memory operations 292//===----------------------------------------------------------------------===// 293 294foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { 295let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in { 296 297 298defm atomic_inc_#as : binary_atomic_op<SIatomic_inc>; 299defm atomic_dec_#as : binary_atomic_op<SIatomic_dec>; 300defm atomic_load_fmin_#as : binary_atomic_op<SIatomic_fmin, 0>; 301defm atomic_load_fmax_#as : binary_atomic_op<SIatomic_fmax, 0>; 302 303 304} // End let AddressSpaces = ... 305} // End foreach AddrSpace 306 307 308//===----------------------------------------------------------------------===// 309// SDNodes PatFrags for loads/stores with a glue input. 310// This is for SDNodes and PatFrag for local loads and stores to 311// enable s_mov_b32 m0, -1 to be glued to the memory instructions. 312// 313// These mirror the regular load/store PatFrags and rely on special 314// processing during Select() to add the glued copy. 315// 316//===----------------------------------------------------------------------===// 317 318def AMDGPUld_glue : SDNode <"ISD::LOAD", SDTLoad, 319 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 320>; 321 322def AMDGPUatomic_ld_glue : SDNode <"ISD::ATOMIC_LOAD", SDTAtomicLoad, 323 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 324>; 325 326def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr)> { 327 let IsLoad = 1; 328 let IsUnindexed = 1; 329} 330 331def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> { 332 let IsLoad = 1; 333 let IsNonExtLoad = 1; 334} 335 336def atomic_load_8_glue : PatFrag<(ops node:$ptr), 337 (AMDGPUatomic_ld_glue node:$ptr)> { 338 let IsAtomic = 1; 339 let MemoryVT = i8; 340} 341 342def atomic_load_16_glue : PatFrag<(ops node:$ptr), 343 (AMDGPUatomic_ld_glue node:$ptr)> { 344 let IsAtomic = 1; 345 let MemoryVT = i16; 346} 347 348def atomic_load_32_glue : PatFrag<(ops node:$ptr), 349 (AMDGPUatomic_ld_glue node:$ptr)> { 350 let IsAtomic = 1; 351 let MemoryVT = i32; 352} 353 354def atomic_load_64_glue : PatFrag<(ops node:$ptr), 355 (AMDGPUatomic_ld_glue node:$ptr)> { 356 let IsAtomic = 1; 357 let MemoryVT = i64; 358} 359 360def extload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { 361 let IsLoad = 1; 362 let IsAnyExtLoad = 1; 363} 364 365def sextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { 366 let IsLoad = 1; 367 let IsSignExtLoad = 1; 368} 369 370def zextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { 371 let IsLoad = 1; 372 let IsZeroExtLoad = 1; 373} 374 375def extloadi8_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> { 376 let IsLoad = 1; 377 let MemoryVT = i8; 378} 379 380def zextloadi8_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> { 381 let IsLoad = 1; 382 let MemoryVT = i8; 383} 384 385def extloadi16_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> { 386 let IsLoad = 1; 387 let MemoryVT = i16; 388} 389 390def zextloadi16_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> { 391 let IsLoad = 1; 392 let MemoryVT = i16; 393} 394 395def sextloadi8_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> { 396 let IsLoad = 1; 397 let MemoryVT = i8; 398} 399 400def sextloadi16_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> { 401 let IsLoad = 1; 402 let MemoryVT = i16; 403} 404 405 406let IsLoad = 1, AddressSpaces = LoadAddress_local.AddrSpaces in { 407def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> { 408 let IsNonExtLoad = 1; 409} 410 411let MemoryVT = i8 in { 412def extloadi8_local_m0 : PatFrag<(ops node:$ptr), (extloadi8_glue node:$ptr)>; 413def sextloadi8_local_m0 : PatFrag<(ops node:$ptr), (sextloadi8_glue node:$ptr)>; 414def zextloadi8_local_m0 : PatFrag<(ops node:$ptr), (zextloadi8_glue node:$ptr)>; 415} 416 417let MemoryVT = i16 in { 418def extloadi16_local_m0 : PatFrag<(ops node:$ptr), (extloadi16_glue node:$ptr)>; 419def sextloadi16_local_m0 : PatFrag<(ops node:$ptr), (sextloadi16_glue node:$ptr)>; 420def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr)>; 421} 422 423def load_align8_local_m0 : PatFrag<(ops node:$ptr), 424 (load_local_m0 node:$ptr)>, Aligned<8> { 425 let IsLoad = 1; 426 let IsNonExtLoad = 1; 427} 428 429def load_align16_local_m0 : PatFrag<(ops node:$ptr), 430 (load_local_m0 node:$ptr)>, Aligned<16> { 431 let IsLoad = 1; 432 let IsNonExtLoad = 1; 433} 434 435} // End IsLoad = 1 436 437let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in { 438def atomic_load_8_local_m0 : PatFrag<(ops node:$ptr), 439 (atomic_load_8_glue node:$ptr)> { 440 let MemoryVT = i8; 441} 442def atomic_load_16_local_m0 : PatFrag<(ops node:$ptr), 443 (atomic_load_16_glue node:$ptr)> { 444 let MemoryVT = i16; 445} 446def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr), 447 (atomic_load_32_glue node:$ptr)> { 448 let MemoryVT = i32; 449} 450def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr), 451 (atomic_load_64_glue node:$ptr)> { 452 let MemoryVT = i64; 453} 454 455} // End let AddressSpaces = LoadAddress_local.AddrSpaces 456 457 458def AMDGPUst_glue : SDNode <"ISD::STORE", SDTStore, 459 [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue] 460>; 461 462def AMDGPUatomic_st_glue : SDNode <"ISD::ATOMIC_STORE", SDTAtomicStore, 463 [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue] 464>; 465 466def unindexedstore_glue : PatFrag<(ops node:$val, node:$ptr), 467 (AMDGPUst_glue node:$val, node:$ptr)> { 468 let IsStore = 1; 469 let IsUnindexed = 1; 470} 471 472def store_glue : PatFrag<(ops node:$val, node:$ptr), 473 (unindexedstore_glue node:$val, node:$ptr)> { 474 let IsStore = 1; 475 let IsTruncStore = 0; 476} 477 478def truncstore_glue : PatFrag<(ops node:$val, node:$ptr), 479 (unindexedstore_glue node:$val, node:$ptr)> { 480 let IsStore = 1; 481 let IsTruncStore = 1; 482} 483 484def truncstorei8_glue : PatFrag<(ops node:$val, node:$ptr), 485 (truncstore_glue node:$val, node:$ptr)> { 486 let IsStore = 1; 487 let MemoryVT = i8; 488} 489 490def truncstorei16_glue : PatFrag<(ops node:$val, node:$ptr), 491 (truncstore_glue node:$val, node:$ptr)> { 492 let IsStore = 1; 493 let MemoryVT = i16; 494} 495 496let IsStore = 1, AddressSpaces = StoreAddress_local.AddrSpaces in { 497def store_local_m0 : PatFrag<(ops node:$val, node:$ptr), 498 (store_glue node:$val, node:$ptr)> { 499 let IsStore = 1; 500 let IsTruncStore = 0; 501} 502 503def truncstorei8_local_m0 : PatFrag<(ops node:$val, node:$ptr), 504 (unindexedstore_glue node:$val, node:$ptr)> { 505 let IsStore = 1; 506 let MemoryVT = i8; 507} 508 509def truncstorei16_local_m0 : PatFrag<(ops node:$val, node:$ptr), 510 (unindexedstore_glue node:$val, node:$ptr)> { 511 let IsStore = 1; 512 let MemoryVT = i16; 513} 514} 515 516def store_align8_local_m0 : PatFrag <(ops node:$value, node:$ptr), 517 (store_local_m0 node:$value, node:$ptr)>, 518 Aligned<8> { 519 let IsStore = 1; 520 let IsTruncStore = 0; 521} 522 523def store_align16_local_m0 : PatFrag <(ops node:$value, node:$ptr), 524 (store_local_m0 node:$value, node:$ptr)>, 525 Aligned<16> { 526 let IsStore = 1; 527 let IsTruncStore = 0; 528} 529 530let AddressSpaces = StoreAddress_local.AddrSpaces in { 531 532def atomic_store_local_8_m0 : PatFrag < 533 (ops node:$value, node:$ptr), 534 (AMDGPUatomic_st_glue node:$value, node:$ptr)> { 535 let IsAtomic = 1; 536 let MemoryVT = i8; 537} 538def atomic_store_local_16_m0 : PatFrag < 539 (ops node:$value, node:$ptr), 540 (AMDGPUatomic_st_glue node:$value, node:$ptr)> { 541 let IsAtomic = 1; 542 let MemoryVT = i16; 543} 544def atomic_store_local_32_m0 : PatFrag < 545 (ops node:$value, node:$ptr), 546 (AMDGPUatomic_st_glue node:$value, node:$ptr)> { 547 let IsAtomic = 1; 548 let MemoryVT = i32; 549} 550def atomic_store_local_64_m0 : PatFrag < 551 (ops node:$value, node:$ptr), 552 (AMDGPUatomic_st_glue node:$value, node:$ptr)> { 553 let IsAtomic = 1; 554 let MemoryVT = i64; 555} 556} // End let AddressSpaces = StoreAddress_local.AddrSpaces 557 558 559def si_setcc_uniform : PatFrag < 560 (ops node:$lhs, node:$rhs, node:$cond), 561 (setcc node:$lhs, node:$rhs, node:$cond), [{ 562 return !N->isDivergent(); 563}]>; 564 565//===----------------------------------------------------------------------===// 566// SDNodes PatFrags for a16 loads and stores with 3 components. 567// v3f16/v3i16 is widened to v4f16/v4i16, so we need to match on the memory 568// load/store size. 569//===----------------------------------------------------------------------===// 570 571class mubuf_intrinsic_load<SDPatternOperator name, ValueType vt> : PatFrag < 572 (ops node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 573 node:$auxiliary, node:$idxen), 574 (name node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 575 node:$auxiliary, node:$idxen)> { 576 let IsLoad = 1; 577 let MemoryVT = vt; 578} 579 580class mubuf_intrinsic_store<SDPatternOperator name, ValueType vt> : PatFrag < 581 (ops node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 582 node:$auxiliary, node:$idxen), 583 (name node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 584 node:$auxiliary, node:$idxen)> { 585 let IsStore = 1; 586 let MemoryVT = vt; 587} 588 589class mtbuf_intrinsic_load<SDPatternOperator name, ValueType vt> : PatFrag < 590 (ops node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 591 node:$format, node:$auxiliary, node:$idxen), 592 (name node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 593 node:$format, node:$auxiliary, node:$idxen)> { 594 let IsLoad = 1; 595 let MemoryVT = vt; 596} 597 598class mtbuf_intrinsic_store<SDPatternOperator name, ValueType vt> : PatFrag < 599 (ops node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 600 node:$format, node:$auxiliary, node:$idxen), 601 (name node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 602 node:$format, node:$auxiliary, node:$idxen)> { 603 let IsStore = 1; 604 let MemoryVT = vt; 605} 606 607//===----------------------------------------------------------------------===// 608// SDNodes PatFrags for d16 loads 609//===----------------------------------------------------------------------===// 610 611class LoadD16Frag <SDPatternOperator op> : PatFrag< 612 (ops node:$ptr, node:$tied_in), 613 (op node:$ptr, node:$tied_in)> { 614 let IsLoad = 1; 615} 616 617foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { 618let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in { 619 620def load_d16_hi_#as : LoadD16Frag <SIload_d16_hi>; 621 622def az_extloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_u8> { 623 let MemoryVT = i8; 624} 625 626def sextloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_i8> { 627 let MemoryVT = i8; 628} 629 630def load_d16_lo_#as : LoadD16Frag <SIload_d16_lo>; 631 632def az_extloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_u8> { 633 let MemoryVT = i8; 634} 635 636def sextloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_i8> { 637 let MemoryVT = i8; 638} 639 640} // End let AddressSpaces = ... 641} // End foreach AddrSpace 642 643def lshr_rev : PatFrag < 644 (ops node:$src1, node:$src0), 645 (srl $src0, $src1) 646>; 647 648def ashr_rev : PatFrag < 649 (ops node:$src1, node:$src0), 650 (sra $src0, $src1) 651>; 652 653def lshl_rev : PatFrag < 654 (ops node:$src1, node:$src0), 655 (shl $src0, $src1) 656>; 657 658def add_ctpop : PatFrag < 659 (ops node:$src0, node:$src1), 660 (add (ctpop $src0), $src1) 661>; 662 663def xnor : PatFrag < 664 (ops node:$src0, node:$src1), 665 (not (xor $src0, $src1)) 666>; 667 668foreach I = 1-4 in { 669def shl#I#_add : PatFrag < 670 (ops node:$src0, node:$src1), 671 (add (shl_oneuse $src0, (i32 I)), $src1)> { 672 // FIXME: Poor substitute for disabling pattern in SelectionDAG 673 let PredicateCode = [{return false;}]; 674 let GISelPredicateCode = [{return true;}]; 675} 676} 677 678multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0, 679 SDTypeProfile tc = SDTAtomic2, 680 bit IsInt = 1> { 681 682 def _glue : SDNode < 683 !if(is_amdgpu, "AMDGPUISD", "ISD")#"::ATOMIC_"#op_name, tc, 684 [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 685 >; 686 687 let AddressSpaces = StoreAddress_local.AddrSpaces in { 688 defm _local_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>; 689 } 690 691 let AddressSpaces = StoreAddress_region.AddrSpaces in { 692 defm _region_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>; 693 } 694} 695 696defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">; 697defm atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">; 698defm atomic_inc : SIAtomicM0Glue2 <"INC", 1>; 699defm atomic_dec : SIAtomicM0Glue2 <"DEC", 1>; 700defm atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">; 701defm atomic_load_min : SIAtomicM0Glue2 <"LOAD_MIN">; 702defm atomic_load_max : SIAtomicM0Glue2 <"LOAD_MAX">; 703defm atomic_load_or : SIAtomicM0Glue2 <"LOAD_OR">; 704defm atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">; 705defm atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">; 706defm atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">; 707defm atomic_swap : SIAtomicM0Glue2 <"SWAP">; 708defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32, 0>; 709defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32, 0>; 710defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32, 0>; 711 712def as_i1timm : SDNodeXForm<timm, [{ 713 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1); 714}]>; 715 716def as_i8imm : SDNodeXForm<imm, [{ 717 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i8); 718}]>; 719 720def as_i8timm : SDNodeXForm<timm, [{ 721 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); 722}]>; 723 724def as_i16imm : SDNodeXForm<imm, [{ 725 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); 726}]>; 727 728def as_i16timm : SDNodeXForm<timm, [{ 729 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); 730}]>; 731 732def as_i32imm: SDNodeXForm<imm, [{ 733 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); 734}]>; 735 736def as_i32timm: SDNodeXForm<timm, [{ 737 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); 738}]>; 739 740def as_i64imm: SDNodeXForm<imm, [{ 741 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64); 742}]>; 743 744def cond_as_i32imm: SDNodeXForm<cond, [{ 745 return CurDAG->getTargetConstant(N->get(), SDLoc(N), MVT::i32); 746}]>; 747 748// Copied from the AArch64 backend: 749def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{ 750return CurDAG->getTargetConstant( 751 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32); 752}]>; 753 754def frameindex_to_targetframeindex : SDNodeXForm<frameindex, [{ 755 auto FI = cast<FrameIndexSDNode>(N); 756 return CurDAG->getTargetFrameIndex(FI->getIndex(), MVT::i32); 757}]>; 758 759// Copied from the AArch64 backend: 760def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{ 761return CurDAG->getTargetConstant( 762 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64); 763}]>; 764 765class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{ 766 uint64_t Imm = N->getZExtValue(); 767 unsigned Bit = (Imm >> }] # bitnum # [{ ) & 1; 768 return CurDAG->getTargetConstant(Bit, SDLoc(N), MVT::i1); 769}]>; 770 771def SIMM16bit : ImmLeaf <i32, 772 [{return isInt<16>(Imm);}] 773>; 774 775def UIMM16bit : ImmLeaf <i32, 776 [{return isUInt<16>(Imm);}] 777>; 778 779def i64imm_32bit : ImmLeaf<i64, [{ 780 return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm); 781}]>; 782 783def InlineImm16 : ImmLeaf<i16, [{ 784 return isInlineImmediate16(Imm); 785}]>; 786 787def InlineImm32 : ImmLeaf<i32, [{ 788 return isInlineImmediate32(Imm); 789}]>; 790 791def InlineImm64 : ImmLeaf<i64, [{ 792 return isInlineImmediate64(Imm); 793}]>; 794 795def InlineImmFP32 : FPImmLeaf<f32, [{ 796 return isInlineImmediate(Imm); 797}]>; 798 799def InlineImmFP64 : FPImmLeaf<f64, [{ 800 return isInlineImmediate(Imm); 801}]>; 802 803 804class VGPRImm <dag frag> : PatLeaf<frag, [{ 805 return isVGPRImm(N); 806}]>; 807 808def NegateImm : SDNodeXForm<imm, [{ 809 return CurDAG->getConstant(-N->getSExtValue(), SDLoc(N), MVT::i32); 810}]>; 811 812// TODO: When FP inline imm values work? 813def NegSubInlineConst32 : ImmLeaf<i32, [{ 814 return Imm < -16 && Imm >= -64; 815}], NegateImm>; 816 817def NegSubInlineIntConst16 : ImmLeaf<i16, [{ 818 return Imm < -16 && Imm >= -64; 819}], NegateImm>; 820 821def ShiftAmt32Imm : ImmLeaf <i32, [{ 822 return Imm < 32; 823}]>; 824 825def getNegV2I16Imm : SDNodeXForm<build_vector, [{ 826 return SDValue(packNegConstantV2I16(N, *CurDAG), 0); 827}]>; 828 829def NegSubInlineConstV216 : PatLeaf<(build_vector), [{ 830 assert(N->getNumOperands() == 2); 831 assert(N->getOperand(0).getValueType().getSizeInBits() == 16); 832 SDValue Src0 = N->getOperand(0); 833 SDValue Src1 = N->getOperand(1); 834 if (Src0 == Src1) 835 return isNegInlineImmediate(Src0.getNode()); 836 837 return (isNullConstantOrUndef(Src0) && isNegInlineImmediate(Src1.getNode())) || 838 (isNullConstantOrUndef(Src1) && isNegInlineImmediate(Src0.getNode())); 839}], getNegV2I16Imm>; 840 841 842def fp16_zeros_high_16bits : PatLeaf<(f16 VGPR_32:$src), [{ 843 return fp16SrcZerosHighBits(N->getOpcode()); 844}]>; 845 846 847//===----------------------------------------------------------------------===// 848// MUBUF/SMEM Patterns 849//===----------------------------------------------------------------------===// 850 851def extract_cpol : SDNodeXForm<timm, [{ 852 return CurDAG->getTargetConstant(N->getZExtValue() & AMDGPU::CPol::ALL, SDLoc(N), MVT::i8); 853}]>; 854 855def extract_swz : SDNodeXForm<timm, [{ 856 return CurDAG->getTargetConstant((N->getZExtValue() >> 3) & 1, SDLoc(N), MVT::i8); 857}]>; 858 859def set_glc : SDNodeXForm<timm, [{ 860 return CurDAG->getTargetConstant(N->getZExtValue() | AMDGPU::CPol::GLC, SDLoc(N), MVT::i8); 861}]>; 862 863//===----------------------------------------------------------------------===// 864// Custom Operands 865//===----------------------------------------------------------------------===// 866 867def SoppBrTarget : AsmOperandClass { 868 let Name = "SoppBrTarget"; 869 let ParserMethod = "parseSOppBrTarget"; 870} 871 872def sopp_brtarget : Operand<OtherVT> { 873 let EncoderMethod = "getSOPPBrEncoding"; 874 let DecoderMethod = "decodeSoppBrTarget"; 875 let OperandType = "OPERAND_PCREL"; 876 let ParserMatchClass = SoppBrTarget; 877} 878 879def si_ga : Operand<iPTR>; 880 881def InterpSlotMatchClass : AsmOperandClass { 882 let Name = "InterpSlot"; 883 let PredicateMethod = "isInterpSlot"; 884 let ParserMethod = "parseInterpSlot"; 885 let RenderMethod = "addImmOperands"; 886} 887 888def InterpSlot : Operand<i32> { 889 let PrintMethod = "printInterpSlot"; 890 let ParserMatchClass = InterpSlotMatchClass; 891 let OperandType = "OPERAND_IMMEDIATE"; 892} 893 894def AttrMatchClass : AsmOperandClass { 895 let Name = "Attr"; 896 let PredicateMethod = "isInterpAttr"; 897 let ParserMethod = "parseInterpAttr"; 898 let RenderMethod = "addImmOperands"; 899} 900 901// It appears to be necessary to create a separate operand for this to 902// be able to parse attr<num> with no space. 903def Attr : Operand<i32> { 904 let PrintMethod = "printInterpAttr"; 905 let ParserMatchClass = AttrMatchClass; 906 let OperandType = "OPERAND_IMMEDIATE"; 907} 908 909def AttrChanMatchClass : AsmOperandClass { 910 let Name = "AttrChan"; 911 let PredicateMethod = "isAttrChan"; 912 let RenderMethod = "addImmOperands"; 913} 914 915def AttrChan : Operand<i32> { 916 let PrintMethod = "printInterpAttrChan"; 917 let ParserMatchClass = AttrChanMatchClass; 918 let OperandType = "OPERAND_IMMEDIATE"; 919} 920 921def SendMsgMatchClass : AsmOperandClass { 922 let Name = "SendMsg"; 923 let PredicateMethod = "isSendMsg"; 924 let ParserMethod = "parseSendMsgOp"; 925 let RenderMethod = "addImmOperands"; 926} 927 928def SwizzleMatchClass : AsmOperandClass { 929 let Name = "Swizzle"; 930 let PredicateMethod = "isSwizzle"; 931 let ParserMethod = "parseSwizzleOp"; 932 let RenderMethod = "addImmOperands"; 933 let IsOptional = 1; 934} 935 936def EndpgmMatchClass : AsmOperandClass { 937 let Name = "EndpgmImm"; 938 let PredicateMethod = "isEndpgm"; 939 let ParserMethod = "parseEndpgmOp"; 940 let RenderMethod = "addImmOperands"; 941 let IsOptional = 1; 942} 943 944def ExpTgtMatchClass : AsmOperandClass { 945 let Name = "ExpTgt"; 946 let PredicateMethod = "isExpTgt"; 947 let ParserMethod = "parseExpTgt"; 948 let RenderMethod = "printExpTgt"; 949} 950 951def SWaitMatchClass : AsmOperandClass { 952 let Name = "SWaitCnt"; 953 let RenderMethod = "addImmOperands"; 954 let ParserMethod = "parseSWaitCntOps"; 955} 956 957def VReg32OrOffClass : AsmOperandClass { 958 let Name = "VReg32OrOff"; 959 let ParserMethod = "parseVReg32OrOff"; 960} 961 962let OperandType = "OPERAND_IMMEDIATE" in { 963def SendMsgImm : Operand<i32> { 964 let PrintMethod = "printSendMsg"; 965 let ParserMatchClass = SendMsgMatchClass; 966} 967 968def SwizzleImm : Operand<i16> { 969 let PrintMethod = "printSwizzle"; 970 let ParserMatchClass = SwizzleMatchClass; 971} 972 973def EndpgmImm : Operand<i16> { 974 let PrintMethod = "printEndpgm"; 975 let ParserMatchClass = EndpgmMatchClass; 976} 977 978def WAIT_FLAG : Operand <i32> { 979 let ParserMatchClass = SWaitMatchClass; 980 let PrintMethod = "printWaitFlag"; 981} 982} // End OperandType = "OPERAND_IMMEDIATE" 983 984include "SIInstrFormats.td" 985include "VIInstrFormats.td" 986 987def BoolReg : AsmOperandClass { 988 let Name = "BoolReg"; 989 let ParserMethod = "parseBoolReg"; 990 let RenderMethod = "addRegOperands"; 991} 992 993class BoolRC : RegisterOperand<SReg_1> { 994 let ParserMatchClass = BoolReg; 995 let DecoderMethod = "decodeBoolReg"; 996} 997 998def SSrc_i1 : RegisterOperand<SReg_1_XEXEC> { 999 let ParserMatchClass = BoolReg; 1000 let DecoderMethod = "decodeBoolReg"; 1001} 1002 1003def VOPDstS64orS32 : BoolRC { 1004 let PrintMethod = "printVOPDst"; 1005} 1006 1007// SCSrc_i1 is the operand for pseudo instructions only. 1008// Boolean immediates shall not be exposed to codegen instructions. 1009def SCSrc_i1 : RegisterOperand<SReg_1_XEXEC> { 1010 let OperandNamespace = "AMDGPU"; 1011 let OperandType = "OPERAND_REG_IMM_INT32"; 1012 let ParserMatchClass = BoolReg; 1013 let DecoderMethod = "decodeBoolReg"; 1014} 1015 1016// ===----------------------------------------------------------------------===// 1017// ExpSrc* Special cases for exp src operands which are printed as 1018// "off" depending on en operand. 1019// ===----------------------------------------------------------------------===// 1020 1021def ExpSrc0 : RegisterOperand<VGPR_32> { 1022 let PrintMethod = "printExpSrc0"; 1023 let ParserMatchClass = VReg32OrOffClass; 1024} 1025 1026def ExpSrc1 : RegisterOperand<VGPR_32> { 1027 let PrintMethod = "printExpSrc1"; 1028 let ParserMatchClass = VReg32OrOffClass; 1029} 1030 1031def ExpSrc2 : RegisterOperand<VGPR_32> { 1032 let PrintMethod = "printExpSrc2"; 1033 let ParserMatchClass = VReg32OrOffClass; 1034} 1035 1036def ExpSrc3 : RegisterOperand<VGPR_32> { 1037 let PrintMethod = "printExpSrc3"; 1038 let ParserMatchClass = VReg32OrOffClass; 1039} 1040 1041class SDWASrc<ValueType vt> : RegisterOperand<VS_32> { 1042 let OperandNamespace = "AMDGPU"; 1043 string Type = !if(isFloatType<vt>.ret, "FP", "INT"); 1044 let OperandType = "OPERAND_REG_INLINE_C_"#Type#vt.Size; 1045 let DecoderMethod = "decodeSDWASrc"#vt.Size; 1046 let EncoderMethod = "getSDWASrcEncoding"; 1047} 1048 1049def SDWASrc_i32 : SDWASrc<i32>; 1050def SDWASrc_i16 : SDWASrc<i16>; 1051def SDWASrc_f32 : SDWASrc<f32>; 1052def SDWASrc_f16 : SDWASrc<f16>; 1053 1054def SDWAVopcDst : BoolRC { 1055 let OperandNamespace = "AMDGPU"; 1056 let OperandType = "OPERAND_SDWA_VOPC_DST"; 1057 let EncoderMethod = "getSDWAVopcDstEncoding"; 1058 let DecoderMethod = "decodeSDWAVopcDst"; 1059 let PrintMethod = "printVOPDst"; 1060} 1061 1062class NamedMatchClass<string CName, bit Optional = 1> : AsmOperandClass { 1063 let Name = "Imm"#CName; 1064 let PredicateMethod = "is"#CName; 1065 let ParserMethod = !if(Optional, "parseOptionalOperand", "parse"#CName); 1066 let RenderMethod = "addImmOperands"; 1067 let IsOptional = Optional; 1068 let DefaultMethod = !if(Optional, "default"#CName, ?); 1069} 1070 1071class NamedOperandBit<string Name, AsmOperandClass MatchClass> : Operand<i1> { 1072 let PrintMethod = "print"#Name; 1073 let ParserMatchClass = MatchClass; 1074} 1075 1076class NamedOperandBit_0<string Name, AsmOperandClass MatchClass> : 1077 OperandWithDefaultOps<i1, (ops (i1 0))> { 1078 let PrintMethod = "print"#Name; 1079 let ParserMatchClass = MatchClass; 1080} 1081 1082class NamedOperandBit_1<string Name, AsmOperandClass MatchClass> : 1083 OperandWithDefaultOps<i1, (ops (i1 1))> { 1084 let PrintMethod = "print"#Name; 1085 let ParserMatchClass = MatchClass; 1086} 1087 1088class NamedOperandU8<string Name, AsmOperandClass MatchClass> : Operand<i8> { 1089 let PrintMethod = "print"#Name; 1090 let ParserMatchClass = MatchClass; 1091} 1092 1093class NamedOperandU16<string Name, AsmOperandClass MatchClass> : Operand<i16> { 1094 let PrintMethod = "print"#Name; 1095 let ParserMatchClass = MatchClass; 1096} 1097 1098class NamedOperandU32<string Name, AsmOperandClass MatchClass> : Operand<i32> { 1099 let PrintMethod = "print"#Name; 1100 let ParserMatchClass = MatchClass; 1101} 1102 1103class NamedOperandU32_0<string Name, AsmOperandClass MatchClass> : 1104 OperandWithDefaultOps<i32, (ops (i32 0))> { 1105 let PrintMethod = "print"#Name; 1106 let ParserMatchClass = MatchClass; 1107} 1108 1109class NamedOperandU32Default0<string Name, AsmOperandClass MatchClass> : 1110 OperandWithDefaultOps<i32, (ops (i32 0))> { 1111 let PrintMethod = "print"#Name; 1112 let ParserMatchClass = MatchClass; 1113} 1114 1115class NamedOperandU32Default1<string Name, AsmOperandClass MatchClass> : 1116 OperandWithDefaultOps<i32, (ops (i32 1))> { 1117 let PrintMethod = "print"#Name; 1118 let ParserMatchClass = MatchClass; 1119} 1120 1121let OperandType = "OPERAND_IMMEDIATE" in { 1122 1123def offen : NamedOperandBit<"Offen", NamedMatchClass<"Offen">>; 1124def idxen : NamedOperandBit<"Idxen", NamedMatchClass<"Idxen">>; 1125def addr64 : NamedOperandBit<"Addr64", NamedMatchClass<"Addr64">>; 1126 1127def flat_offset : NamedOperandU16<"FlatOffset", NamedMatchClass<"FlatOffset">>; 1128def offset : NamedOperandU16<"Offset", NamedMatchClass<"Offset">>; 1129def offset0 : NamedOperandU8<"Offset0", NamedMatchClass<"Offset0">>; 1130def offset1 : NamedOperandU8<"Offset1", NamedMatchClass<"Offset1">>; 1131 1132def gds : NamedOperandBit<"GDS", NamedMatchClass<"GDS">>; 1133 1134def omod : NamedOperandU32<"OModSI", NamedMatchClass<"OModSI">>; 1135def omod0 : NamedOperandU32_0<"OModSI", NamedMatchClass<"OModSI">>; 1136 1137// We need to make the cases with a default of 0 distinct from no 1138// default to help deal with some cases where the operand appears 1139// before a mandatory operand. 1140def clampmod : NamedOperandBit<"ClampSI", NamedMatchClass<"ClampSI">>; 1141def clampmod0 : NamedOperandBit_0<"ClampSI", NamedMatchClass<"ClampSI">>; 1142def highmod : NamedOperandBit<"High", NamedMatchClass<"High">>; 1143 1144def CPol : NamedOperandU32<"CPol", NamedMatchClass<"CPol">>; 1145def CPol_0 : NamedOperandU32Default0<"CPol", NamedMatchClass<"CPol">>; 1146def CPol_GLC1 : NamedOperandU32Default1<"CPol", NamedMatchClass<"CPol">>; 1147 1148def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>; 1149def TFE_0 : NamedOperandBit_0<"TFE", NamedMatchClass<"TFE">>; 1150def SWZ : NamedOperandBit<"SWZ", NamedMatchClass<"SWZ">>; 1151def SWZ_0 : NamedOperandBit_0<"SWZ", NamedMatchClass<"SWZ">>; 1152def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>; 1153def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>; 1154def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>; 1155def GFX10A16 : NamedOperandBit<"GFX10A16", NamedMatchClass<"GFX10A16">>; 1156def D16 : NamedOperandBit<"D16", NamedMatchClass<"D16">>; 1157def LWE : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>; 1158def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>; 1159def exp_vm : NamedOperandBit<"ExpVM", NamedMatchClass<"ExpVM">>; 1160 1161def FORMAT : NamedOperandU8<"FORMAT", NamedMatchClass<"FORMAT", 0>>; 1162 1163def DMask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>; 1164def Dim : NamedOperandU8<"Dim", NamedMatchClass<"Dim", 0>>; 1165 1166def dpp8 : NamedOperandU32<"DPP8", NamedMatchClass<"DPP8", 0>>; 1167 1168def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>; 1169def row_mask : NamedOperandU32<"RowMask", NamedMatchClass<"RowMask">>; 1170def bank_mask : NamedOperandU32<"BankMask", NamedMatchClass<"BankMask">>; 1171def bound_ctrl : NamedOperandBit<"BoundCtrl", NamedMatchClass<"BoundCtrl">>; 1172def FI : NamedOperandU32<"FI", NamedMatchClass<"FI">>; 1173 1174def dst_sel : NamedOperandU32<"SDWADstSel", NamedMatchClass<"SDWADstSel">>; 1175def src0_sel : NamedOperandU32<"SDWASrc0Sel", NamedMatchClass<"SDWASrc0Sel">>; 1176def src1_sel : NamedOperandU32<"SDWASrc1Sel", NamedMatchClass<"SDWASrc1Sel">>; 1177def dst_unused : NamedOperandU32<"SDWADstUnused", NamedMatchClass<"SDWADstUnused">>; 1178 1179def op_sel0 : NamedOperandU32Default0<"OpSel", NamedMatchClass<"OpSel">>; 1180def op_sel_hi0 : NamedOperandU32Default0<"OpSelHi", NamedMatchClass<"OpSelHi">>; 1181def neg_lo0 : NamedOperandU32Default0<"NegLo", NamedMatchClass<"NegLo">>; 1182def neg_hi0 : NamedOperandU32Default0<"NegHi", NamedMatchClass<"NegHi">>; 1183 1184def blgp : NamedOperandU32<"BLGP", NamedMatchClass<"BLGP">>; 1185def cbsz : NamedOperandU32<"CBSZ", NamedMatchClass<"CBSZ">>; 1186def abid : NamedOperandU32<"ABID", NamedMatchClass<"ABID">>; 1187 1188def hwreg : NamedOperandU32<"Hwreg", NamedMatchClass<"Hwreg", 0>>; 1189 1190def exp_tgt : NamedOperandU32<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> { 1191 1192} 1193 1194} // End OperandType = "OPERAND_IMMEDIATE" 1195 1196class KImmMatchClass<int size> : AsmOperandClass { 1197 let Name = "KImmFP"#size; 1198 let PredicateMethod = "isKImmFP"#size; 1199 let ParserMethod = "parseImm"; 1200 let RenderMethod = "addKImmFP"#size#"Operands"; 1201} 1202 1203class kimmOperand<ValueType vt> : Operand<vt> { 1204 let OperandNamespace = "AMDGPU"; 1205 let OperandType = "OPERAND_KIMM"#vt.Size; 1206 let PrintMethod = "printU"#vt.Size#"ImmOperand"; 1207 let ParserMatchClass = !cast<AsmOperandClass>("KImmFP"#vt.Size#"MatchClass"); 1208 let DecoderMethod = "decodeOperand_f"#vt.Size#"kimm"; 1209} 1210 1211// 32-bit VALU immediate operand that uses the constant bus. 1212def KImmFP32MatchClass : KImmMatchClass<32>; 1213def f32kimm : kimmOperand<i32>; 1214 1215// 32-bit VALU immediate operand with a 16-bit value that uses the 1216// constant bus. 1217def KImmFP16MatchClass : KImmMatchClass<16>; 1218def f16kimm : kimmOperand<i16>; 1219 1220class FPInputModsMatchClass <int opSize> : AsmOperandClass { 1221 let Name = "RegOrImmWithFP"#opSize#"InputMods"; 1222 let ParserMethod = "parseRegOrImmWithFPInputMods"; 1223 let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods"; 1224} 1225 1226def FP16InputModsMatchClass : FPInputModsMatchClass<16>; 1227def FP32InputModsMatchClass : FPInputModsMatchClass<32>; 1228def FP64InputModsMatchClass : FPInputModsMatchClass<64>; 1229 1230class InputMods <AsmOperandClass matchClass> : Operand <i32> { 1231 let OperandNamespace = "AMDGPU"; 1232 let OperandType = "OPERAND_INPUT_MODS"; 1233 let ParserMatchClass = matchClass; 1234} 1235 1236class FPInputMods <FPInputModsMatchClass matchClass> : InputMods <matchClass> { 1237 let PrintMethod = "printOperandAndFPInputMods"; 1238} 1239 1240def FP16InputMods : FPInputMods<FP16InputModsMatchClass>; 1241def FP32InputMods : FPInputMods<FP32InputModsMatchClass>; 1242def FP64InputMods : FPInputMods<FP64InputModsMatchClass>; 1243 1244class IntInputModsMatchClass <int opSize> : AsmOperandClass { 1245 let Name = "RegOrImmWithInt"#opSize#"InputMods"; 1246 let ParserMethod = "parseRegOrImmWithIntInputMods"; 1247 let PredicateMethod = "isRegOrImmWithInt"#opSize#"InputMods"; 1248} 1249def Int32InputModsMatchClass : IntInputModsMatchClass<32>; 1250def Int64InputModsMatchClass : IntInputModsMatchClass<64>; 1251 1252class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass> { 1253 let PrintMethod = "printOperandAndIntInputMods"; 1254} 1255def Int32InputMods : IntInputMods<Int32InputModsMatchClass>; 1256def Int64InputMods : IntInputMods<Int64InputModsMatchClass>; 1257 1258class OpSelModsMatchClass : AsmOperandClass { 1259 let Name = "OpSelMods"; 1260 let ParserMethod = "parseRegOrImm"; 1261 let PredicateMethod = "isRegOrImm"; 1262} 1263 1264def IntOpSelModsMatchClass : OpSelModsMatchClass; 1265def IntOpSelMods : InputMods<IntOpSelModsMatchClass>; 1266 1267class FPSDWAInputModsMatchClass <int opSize> : AsmOperandClass { 1268 let Name = "SDWAWithFP"#opSize#"InputMods"; 1269 let ParserMethod = "parseRegOrImmWithFPInputMods"; 1270 let PredicateMethod = "isSDWAFP"#opSize#"Operand"; 1271} 1272 1273def FP16SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<16>; 1274def FP32SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<32>; 1275 1276class FPSDWAInputMods <FPSDWAInputModsMatchClass matchClass> : 1277 InputMods <matchClass> { 1278 let PrintMethod = "printOperandAndFPInputMods"; 1279} 1280 1281def FP16SDWAInputMods : FPSDWAInputMods<FP16SDWAInputModsMatchClass>; 1282def FP32SDWAInputMods : FPSDWAInputMods<FP32SDWAInputModsMatchClass>; 1283 1284def FPVRegInputModsMatchClass : AsmOperandClass { 1285 let Name = "VRegWithFPInputMods"; 1286 let ParserMethod = "parseRegWithFPInputMods"; 1287 let PredicateMethod = "isVRegWithInputMods"; 1288} 1289 1290def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> { 1291 let PrintMethod = "printOperandAndFPInputMods"; 1292} 1293 1294class IntSDWAInputModsMatchClass <int opSize> : AsmOperandClass { 1295 let Name = "SDWAWithInt"#opSize#"InputMods"; 1296 let ParserMethod = "parseRegOrImmWithIntInputMods"; 1297 let PredicateMethod = "isSDWAInt"#opSize#"Operand"; 1298} 1299 1300def Int16SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<16>; 1301def Int32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32>; 1302 1303class IntSDWAInputMods <IntSDWAInputModsMatchClass matchClass> : 1304 InputMods <matchClass> { 1305 let PrintMethod = "printOperandAndIntInputMods"; 1306} 1307 1308def Int16SDWAInputMods : IntSDWAInputMods<Int16SDWAInputModsMatchClass>; 1309def Int32SDWAInputMods : IntSDWAInputMods<Int32SDWAInputModsMatchClass>; 1310 1311def IntVRegInputModsMatchClass : AsmOperandClass { 1312 let Name = "VRegWithIntInputMods"; 1313 let ParserMethod = "parseRegWithIntInputMods"; 1314 let PredicateMethod = "isVRegWithInputMods"; 1315} 1316 1317def IntVRegInputMods : InputMods <IntVRegInputModsMatchClass> { 1318 let PrintMethod = "printOperandAndIntInputMods"; 1319} 1320 1321class PackedFPInputModsMatchClass <int opSize> : AsmOperandClass { 1322 let Name = "PackedFP"#opSize#"InputMods"; 1323 let ParserMethod = "parseRegOrImm"; 1324 let PredicateMethod = "isRegOrImm"; 1325// let PredicateMethod = "isPackedFP"#opSize#"InputMods"; 1326} 1327 1328class PackedIntInputModsMatchClass <int opSize> : AsmOperandClass { 1329 let Name = "PackedInt"#opSize#"InputMods"; 1330 let ParserMethod = "parseRegOrImm"; 1331 let PredicateMethod = "isRegOrImm"; 1332// let PredicateMethod = "isPackedInt"#opSize#"InputMods"; 1333} 1334 1335def PackedF16InputModsMatchClass : PackedFPInputModsMatchClass<16>; 1336def PackedI16InputModsMatchClass : PackedIntInputModsMatchClass<16>; 1337 1338class PackedFPInputMods <PackedFPInputModsMatchClass matchClass> : InputMods <matchClass> { 1339// let PrintMethod = "printPackedFPInputMods"; 1340} 1341 1342class PackedIntInputMods <PackedIntInputModsMatchClass matchClass> : InputMods <matchClass> { 1343 //let PrintMethod = "printPackedIntInputMods"; 1344} 1345 1346def PackedF16InputMods : PackedFPInputMods<PackedF16InputModsMatchClass>; 1347def PackedI16InputMods : PackedIntInputMods<PackedI16InputModsMatchClass>; 1348 1349//===----------------------------------------------------------------------===// 1350// Complex patterns 1351//===----------------------------------------------------------------------===// 1352 1353def DS1Addr1Offset : ComplexPattern<i32, 2, "SelectDS1Addr1Offset">; 1354def DS64Bit4ByteAligned : ComplexPattern<i32, 3, "SelectDS64Bit4ByteAligned">; 1355def DS128Bit8ByteAligned : ComplexPattern<i64, 3, "SelectDS128Bit8ByteAligned">; 1356 1357def MOVRELOffset : ComplexPattern<i32, 2, "SelectMOVRELOffset">; 1358 1359def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">; 1360def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">; 1361def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">; 1362// VOP3Mods, but the input source is known to never be NaN. 1363def VOP3Mods_nnan : ComplexPattern<fAny, 2, "SelectVOP3Mods_NNaN">; 1364 1365def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">; 1366 1367def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">; 1368 1369def VOP3OpSel : ComplexPattern<untyped, 2, "SelectVOP3OpSel">; 1370 1371def VOP3OpSelMods : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">; 1372 1373def VOP3PMadMixMods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixMods">; 1374 1375//===----------------------------------------------------------------------===// 1376// SI assembler operands 1377//===----------------------------------------------------------------------===// 1378 1379def SIOperand { 1380 int ZERO = 0x80; 1381 int VCC = 0x6A; 1382 int FLAT_SCR = 0x68; 1383} 1384 1385// This should be kept in sync with SISrcMods enum 1386def SRCMODS { 1387 int NONE = 0; 1388 int NEG = 1; 1389 int ABS = 2; 1390 int NEG_ABS = 3; 1391 1392 int NEG_HI = ABS; 1393 int OP_SEL_0 = 4; 1394 int OP_SEL_1 = 8; 1395 int DST_OP_SEL = 8; 1396} 1397 1398def DSTCLAMP { 1399 int NONE = 0; 1400 int ENABLE = 1; 1401} 1402 1403def DSTOMOD { 1404 int NONE = 0; 1405} 1406 1407def HWREG { 1408 int MODE = 1; 1409 int STATUS = 2; 1410 int TRAPSTS = 3; 1411 int HW_ID = 4; 1412 int GPR_ALLOC = 5; 1413 int LDS_ALLOC = 6; 1414 int IB_STS = 7; 1415 int MEM_BASES = 15; 1416 int TBA_LO = 16; 1417 int TBA_HI = 17; 1418 int TMA_LO = 18; 1419 int TMA_HI = 19; 1420 int FLAT_SCR_LO = 20; 1421 int FLAT_SCR_HI = 21; 1422 int XNACK_MASK = 22; 1423 int POPS_PACKER = 25; 1424 int SHADER_CYCLES = 29; 1425} 1426 1427class getHwRegImm<int Reg, int Offset = 0, int Size = 32> { 1428 int ret = !and(!or(Reg, 1429 !shl(Offset, 6), 1430 !shl(!add(Size, -1), 11)), 65535); 1431} 1432 1433//===----------------------------------------------------------------------===// 1434// 1435// SI Instruction multiclass helpers. 1436// 1437// Instructions with _32 take 32-bit operands. 1438// Instructions with _64 take 64-bit operands. 1439// 1440// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit 1441// encoding is the standard encoding, but instruction that make use of 1442// any of the instruction modifiers must use the 64-bit encoding. 1443// 1444// Instructions with _e32 use the 32-bit encoding. 1445// Instructions with _e64 use the 64-bit encoding. 1446// 1447//===----------------------------------------------------------------------===// 1448 1449class SIMCInstr <string pseudo, int subtarget> { 1450 string PseudoInstr = pseudo; 1451 int Subtarget = subtarget; 1452} 1453 1454//===----------------------------------------------------------------------===// 1455// Vector ALU classes 1456//===----------------------------------------------------------------------===// 1457 1458class getNumSrcArgs<ValueType Src0, ValueType Src1, ValueType Src2> { 1459 int ret = 1460 !if (!eq(Src0.Value, untyped.Value), 0, 1461 !if (!eq(Src1.Value, untyped.Value), 1, // VOP1 1462 !if (!eq(Src2.Value, untyped.Value), 2, // VOP2 1463 3))); // VOP3 1464} 1465 1466// Returns the register class to use for the destination of VOP[123C] 1467// instructions for the given VT. 1468class getVALUDstForVT<ValueType VT> { 1469 RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>, 1470 !if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>, 1471 !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>, 1472 !if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32>, 1473 VOPDstS64orS32)))); // else VT == i1 1474} 1475 1476// Returns the register class to use for the destination of VOP[12C] 1477// instructions with SDWA extension 1478class getSDWADstForVT<ValueType VT> { 1479 RegisterOperand ret = !if(!eq(VT.Size, 1), 1480 SDWAVopcDst, // VOPC 1481 VOPDstOperand<VGPR_32>); // VOP1/2 32-bit dst 1482} 1483 1484// Returns the register class to use for source 0 of VOP[12C] 1485// instructions for the given VT. 1486class getVOPSrc0ForVT<ValueType VT> { 1487 bit isFP = isFloatType<VT>.ret; 1488 1489 RegisterOperand ret = 1490 !if(isFP, 1491 !if(!eq(VT.Size, 64), 1492 VSrc_f64, 1493 !if(!eq(VT.Value, f16.Value), 1494 VSrc_f16, 1495 !if(!eq(VT.Value, v2f16.Value), 1496 VSrc_v2f16, 1497 !if(!eq(VT.Value, v4f16.Value), 1498 AVSrc_64, 1499 VSrc_f32 1500 ) 1501 ) 1502 ) 1503 ), 1504 !if(!eq(VT.Size, 64), 1505 VSrc_b64, 1506 !if(!eq(VT.Value, i16.Value), 1507 VSrc_b16, 1508 !if(!eq(VT.Value, v2i16.Value), 1509 VSrc_v2b16, 1510 VSrc_b32 1511 ) 1512 ) 1513 ) 1514 ); 1515} 1516 1517class getSOPSrcForVT<ValueType VT> { 1518 RegisterOperand ret = !if(!eq(VT.Size, 64), SSrc_b64, SSrc_b32); 1519} 1520 1521// Returns the vreg register class to use for source operand given VT 1522class getVregSrcForVT<ValueType VT> { 1523 RegisterClass ret = !if(!eq(VT.Size, 128), VReg_128, 1524 !if(!eq(VT.Size, 96), VReg_96, 1525 !if(!eq(VT.Size, 64), VReg_64, 1526 !if(!eq(VT.Size, 48), VReg_64, 1527 VGPR_32)))); 1528} 1529 1530class getSDWASrcForVT <ValueType VT> { 1531 bit isFP = isFloatType<VT>.ret; 1532 RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32); 1533 RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32); 1534 RegisterOperand ret = !if(isFP, retFlt, retInt); 1535} 1536 1537// Returns the register class to use for sources of VOP3 instructions for the 1538// given VT. 1539class getVOP3SrcForVT<ValueType VT> { 1540 bit isFP = isFloatType<VT>.ret; 1541 RegisterOperand ret = 1542 !if(!eq(VT.Size, 128), 1543 VSrc_128, 1544 !if(!eq(VT.Size, 64), 1545 !if(isFP, 1546 !if(!eq(VT.Value, v2f32.Value), 1547 VSrc_v2f32, 1548 VSrc_f64), 1549 !if(!eq(VT.Value, v2i32.Value), 1550 VSrc_v2b32, 1551 VSrc_b64)), 1552 !if(!eq(VT.Value, i1.Value), 1553 SSrc_i1, 1554 !if(isFP, 1555 !if(!eq(VT.Value, f16.Value), 1556 VSrc_f16, 1557 !if(!eq(VT.Value, v2f16.Value), 1558 VSrc_v2f16, 1559 !if(!eq(VT.Value, v4f16.Value), 1560 AVSrc_64, 1561 VSrc_f32 1562 ) 1563 ) 1564 ), 1565 !if(!eq(VT.Value, i16.Value), 1566 VSrc_b16, 1567 !if(!eq(VT.Value, v2i16.Value), 1568 VSrc_v2b16, 1569 VSrc_b32 1570 ) 1571 ) 1572 ) 1573 ) 1574 ) 1575 ); 1576} 1577 1578// Float or packed int 1579class isModifierType<ValueType SrcVT> { 1580 bit ret = !or(!eq(SrcVT.Value, f16.Value), 1581 !eq(SrcVT.Value, f32.Value), 1582 !eq(SrcVT.Value, f64.Value), 1583 !eq(SrcVT.Value, v2f16.Value), 1584 !eq(SrcVT.Value, v2i16.Value), 1585 !eq(SrcVT.Value, v2f32.Value), 1586 !eq(SrcVT.Value, v2i32.Value)); 1587} 1588 1589// Return type of input modifiers operand for specified input operand 1590class getSrcMod <ValueType VT, bit EnableF32SrcMods> { 1591 bit isFP = isFloatType<VT>.ret; 1592 bit isPacked = isPackedType<VT>.ret; 1593 Operand ret = !if(!eq(VT.Size, 64), 1594 !if(isFP, FP64InputMods, Int64InputMods), 1595 !if(isFP, 1596 !if(!eq(VT.Value, f16.Value), 1597 FP16InputMods, 1598 FP32InputMods 1599 ), 1600 !if(EnableF32SrcMods, FP32InputMods, Int32InputMods)) 1601 ); 1602} 1603 1604class getOpSelMod <ValueType VT> { 1605 Operand ret = !if(!eq(VT.Value, f16.Value), FP16InputMods, IntOpSelMods); 1606} 1607 1608// Return type of input modifiers operand specified input operand for DPP 1609class getSrcModDPP <ValueType VT> { 1610 bit isFP = isFloatType<VT>.ret; 1611 Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods); 1612} 1613 1614// Return type of input modifiers operand specified input operand for SDWA 1615class getSrcModSDWA <ValueType VT> { 1616 Operand ret = !if(!eq(VT.Value, f16.Value), FP16SDWAInputMods, 1617 !if(!eq(VT.Value, f32.Value), FP32SDWAInputMods, 1618 !if(!eq(VT.Value, i16.Value), Int16SDWAInputMods, 1619 Int32SDWAInputMods))); 1620} 1621 1622// Returns the input arguments for VOP[12C] instructions for the given SrcVT. 1623class getIns32 <RegisterOperand Src0RC, RegisterClass Src1RC, int NumSrcArgs> { 1624 dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0), // VOP1 1625 !if(!eq(NumSrcArgs, 2), (ins Src0RC:$src0, Src1RC:$src1), // VOP2 1626 (ins))); 1627} 1628 1629// Returns the input arguments for VOP3 instructions for the given SrcVT. 1630class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC, 1631 RegisterOperand Src2RC, int NumSrcArgs, 1632 bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod, 1633 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { 1634 1635 dag ret = 1636 !if (!eq(NumSrcArgs, 0), 1637 // VOP1 without input operands (V_NOP, V_CLREXCP) 1638 (ins), 1639 /* else */ 1640 !if (!eq(NumSrcArgs, 1), 1641 !if (HasModifiers, 1642 // VOP1 with modifiers 1643 !if(HasOMod, 1644 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1645 clampmod0:$clamp, omod0:$omod), 1646 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1647 clampmod0:$clamp)) 1648 /* else */, 1649 // VOP1 without modifiers 1650 !if (HasClamp, 1651 (ins Src0RC:$src0, clampmod0:$clamp), 1652 (ins Src0RC:$src0)) 1653 /* endif */ ), 1654 !if (!eq(NumSrcArgs, 2), 1655 !if (HasModifiers, 1656 // VOP 2 with modifiers 1657 !if(HasOMod, 1658 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1659 Src1Mod:$src1_modifiers, Src1RC:$src1, 1660 clampmod0:$clamp, omod0:$omod), 1661 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1662 Src1Mod:$src1_modifiers, Src1RC:$src1, 1663 clampmod0:$clamp)) 1664 /* else */, 1665 // VOP2 without modifiers 1666 !if (HasClamp, 1667 (ins Src0RC:$src0, Src1RC:$src1, clampmod0:$clamp), 1668 (ins Src0RC:$src0, Src1RC:$src1)) 1669 1670 /* endif */ ) 1671 /* NumSrcArgs == 3 */, 1672 !if (HasModifiers, 1673 !if (HasSrc2Mods, 1674 // VOP3 with modifiers 1675 !if (HasOMod, 1676 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1677 Src1Mod:$src1_modifiers, Src1RC:$src1, 1678 Src2Mod:$src2_modifiers, Src2RC:$src2, 1679 clampmod0:$clamp, omod0:$omod), 1680 !if (HasClamp, 1681 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1682 Src1Mod:$src1_modifiers, Src1RC:$src1, 1683 Src2Mod:$src2_modifiers, Src2RC:$src2, 1684 clampmod0:$clamp), 1685 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1686 Src1Mod:$src1_modifiers, Src1RC:$src1, 1687 Src2Mod:$src2_modifiers, Src2RC:$src2))), 1688 // VOP3 with modifiers except src2 1689 !if (HasOMod, 1690 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1691 Src1Mod:$src1_modifiers, Src1RC:$src1, 1692 Src2RC:$src2, clampmod0:$clamp, omod0:$omod), 1693 !if (HasClamp, 1694 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1695 Src1Mod:$src1_modifiers, Src1RC:$src1, 1696 Src2RC:$src2, clampmod0:$clamp), 1697 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1698 Src1Mod:$src1_modifiers, Src1RC:$src1, 1699 Src2RC:$src2)))) 1700 /* else */, 1701 // VOP3 without modifiers 1702 !if (HasClamp, 1703 (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2, clampmod0:$clamp), 1704 (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2)) 1705 /* endif */ )))); 1706} 1707 1708class getInsVOP3Base<RegisterOperand Src0RC, RegisterOperand Src1RC, 1709 RegisterOperand Src2RC, int NumSrcArgs, 1710 bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod, 1711 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel, 1712 bit IsVOP3P> { 1713 // getInst64 handles clamp and omod. implicit mutex between vop3p and omod 1714 dag base = getIns64 <Src0RC, Src1RC, Src2RC, NumSrcArgs, 1715 HasClamp, HasModifiers, HasSrc2Mods, HasOMod, 1716 Src0Mod, Src1Mod, Src2Mod>.ret; 1717 dag opsel = (ins op_sel0:$op_sel); 1718 dag vop3pFields = (ins op_sel_hi0:$op_sel_hi, neg_lo0:$neg_lo, neg_hi0:$neg_hi); 1719 dag ret = !con(base, 1720 !if(HasOpSel, opsel,(ins)), 1721 !if(IsVOP3P, vop3pFields,(ins))); 1722} 1723 1724class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC, 1725 RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, 1726 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { 1727 dag ret = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs, 1728 HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, 1729 0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod, 1730 1/*HasOpSel*/, 1/*IsVOP3P*/>.ret; 1731} 1732 1733class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC, 1734 RegisterOperand Src2RC, int NumSrcArgs, 1735 bit HasClamp, bit HasOMod, 1736 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { 1737 dag ret = getInsVOP3Base<Src0RC, Src1RC, 1738 Src2RC, NumSrcArgs, 1739 HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, HasOMod, 1740 Src0Mod, Src1Mod, Src2Mod, 1/*HasOpSel*/, 0>.ret; 1741} 1742 1743class getInsDPPBase <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, 1744 int NumSrcArgs, bit HasModifiers, 1745 Operand Src0Mod, Operand Src1Mod> { 1746 1747 dag ret = !if (!eq(NumSrcArgs, 0), 1748 // VOP1 without input operands (V_NOP) 1749 (ins ), 1750 !if (!eq(NumSrcArgs, 1), 1751 !if (HasModifiers, 1752 // VOP1_DPP with modifiers 1753 (ins OldRC:$old, Src0Mod:$src0_modifiers, 1754 Src0RC:$src0) 1755 /* else */, 1756 // VOP1_DPP without modifiers 1757 (ins OldRC:$old, Src0RC:$src0) 1758 /* endif */), 1759 !if (HasModifiers, 1760 // VOP2_DPP with modifiers 1761 (ins OldRC:$old, 1762 Src0Mod:$src0_modifiers, Src0RC:$src0, 1763 Src1Mod:$src1_modifiers, Src1RC:$src1) 1764 /* else */, 1765 // VOP2_DPP without modifiers 1766 (ins OldRC:$old, 1767 Src0RC:$src0, Src1RC:$src1) 1768 ))); 1769} 1770 1771class getInsDPP <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, 1772 int NumSrcArgs, bit HasModifiers, 1773 Operand Src0Mod, Operand Src1Mod> { 1774 dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, NumSrcArgs, 1775 HasModifiers, Src0Mod, Src1Mod>.ret, 1776 (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 1777 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)); 1778} 1779 1780class getInsDPP16 <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, 1781 int NumSrcArgs, bit HasModifiers, 1782 Operand Src0Mod, Operand Src1Mod> { 1783 dag ret = !con(getInsDPP<OldRC, Src0RC, Src1RC, NumSrcArgs, 1784 HasModifiers, Src0Mod, Src1Mod>.ret, 1785 (ins FI:$fi)); 1786} 1787 1788class getInsDPP8 <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, 1789 int NumSrcArgs, bit HasModifiers, 1790 Operand Src0Mod, Operand Src1Mod> { 1791 dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, NumSrcArgs, 1792 HasModifiers, Src0Mod, Src1Mod>.ret, 1793 (ins dpp8:$dpp8, FI:$fi)); 1794} 1795 1796 1797// Ins for SDWA 1798class getInsSDWA <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs, 1799 bit HasSDWAOMod, Operand Src0Mod, Operand Src1Mod, 1800 ValueType DstVT> { 1801 1802 dag ret = !if(!eq(NumSrcArgs, 0), 1803 // VOP1 without input operands (V_NOP) 1804 (ins), 1805 !if(!eq(NumSrcArgs, 1), 1806 // VOP1 1807 !if(!not(HasSDWAOMod), 1808 // VOP1_SDWA without omod 1809 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1810 clampmod:$clamp, 1811 dst_sel:$dst_sel, dst_unused:$dst_unused, 1812 src0_sel:$src0_sel), 1813 // VOP1_SDWA with omod 1814 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1815 clampmod:$clamp, omod:$omod, 1816 dst_sel:$dst_sel, dst_unused:$dst_unused, 1817 src0_sel:$src0_sel)), 1818 !if(!eq(NumSrcArgs, 2), 1819 !if(!eq(DstVT.Size, 1), 1820 // VOPC_SDWA 1821 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1822 Src1Mod:$src1_modifiers, Src1RC:$src1, 1823 clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel), 1824 // VOP2_SDWA 1825 !if(!not(HasSDWAOMod), 1826 // VOP2_SDWA without omod 1827 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1828 Src1Mod:$src1_modifiers, Src1RC:$src1, 1829 clampmod:$clamp, 1830 dst_sel:$dst_sel, dst_unused:$dst_unused, 1831 src0_sel:$src0_sel, src1_sel:$src1_sel), 1832 // VOP2_SDWA with omod 1833 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1834 Src1Mod:$src1_modifiers, Src1RC:$src1, 1835 clampmod:$clamp, omod:$omod, 1836 dst_sel:$dst_sel, dst_unused:$dst_unused, 1837 src0_sel:$src0_sel, src1_sel:$src1_sel))), 1838 (ins)/* endif */))); 1839} 1840 1841// Outs for DPP 1842class getOutsDPP <bit HasDst, ValueType DstVT, RegisterOperand DstRCDPP> { 1843 dag ret = !if(HasDst, 1844 !if(!eq(DstVT.Size, 1), 1845 (outs), // no dst for VOPC, we use "vcc"-token as dst in SDWA VOPC instructions 1846 (outs DstRCDPP:$vdst)), 1847 (outs)); // V_NOP 1848} 1849 1850// Outs for SDWA 1851class getOutsSDWA <bit HasDst, ValueType DstVT, RegisterOperand DstRCSDWA> { 1852 dag ret = !if(HasDst, 1853 !if(!eq(DstVT.Size, 1), 1854 (outs DstRCSDWA:$sdst), 1855 (outs DstRCSDWA:$vdst)), 1856 (outs)); // V_NOP 1857} 1858 1859// Returns the assembly string for the inputs and outputs of a VOP[12C] 1860// instruction. This does not add the _e32 suffix, so it can be reused 1861// by getAsm64. 1862class getAsm32 <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> { 1863 string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC 1864 string src0 = ", $src0"; 1865 string src1 = ", $src1"; 1866 string src2 = ", $src2"; 1867 string ret = !if(HasDst, dst, "") # 1868 !if(!eq(NumSrcArgs, 1), src0, "") # 1869 !if(!eq(NumSrcArgs, 2), src0#src1, "") # 1870 !if(!eq(NumSrcArgs, 3), src0#src1#src2, ""); 1871} 1872 1873// Returns the assembly string for the inputs and outputs of a VOP3 1874// instruction. 1875class getAsm64 <bit HasDst, int NumSrcArgs, bit HasIntClamp, bit HasModifiers, 1876 bit HasOMod, ValueType DstVT = i32> { 1877 string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC 1878 string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); 1879 string src1 = !if(!eq(NumSrcArgs, 1), "", 1880 !if(!eq(NumSrcArgs, 2), " $src1_modifiers", 1881 " $src1_modifiers,")); 1882 string src2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", ""); 1883 string iclamp = !if(HasIntClamp, "$clamp", ""); 1884 string ret = 1885 !if(!not(HasModifiers), 1886 getAsm32<HasDst, NumSrcArgs, DstVT>.ret # iclamp, 1887 dst#", "#src0#src1#src2#"$clamp"#!if(HasOMod, "$omod", "")); 1888} 1889 1890// Returns the assembly string for the inputs and outputs of a VOP3P 1891// instruction. 1892class getAsmVOP3P <int NumSrcArgs, bit HasModifiers, 1893 bit HasClamp> { 1894 string dst = "$vdst"; 1895 string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); 1896 string src1 = !if(!eq(NumSrcArgs, 1), "", 1897 !if(!eq(NumSrcArgs, 2), " $src1", 1898 " $src1,")); 1899 string src2 = !if(!eq(NumSrcArgs, 3), " $src2", ""); 1900 1901 string mods = !if(HasModifiers, "$neg_lo$neg_hi", ""); 1902 string clamp = !if(HasClamp, "$clamp", ""); 1903 1904 // Each modifier is printed as an array of bits for each operand, so 1905 // all operands are printed as part of src0_modifiers. 1906 string ret = dst#", "#src0#src1#src2#"$op_sel$op_sel_hi"#mods#clamp; 1907} 1908 1909class getAsmVOP3OpSel <int NumSrcArgs, 1910 bit HasClamp, 1911 bit Src0HasMods, 1912 bit Src1HasMods, 1913 bit Src2HasMods> { 1914 string dst = "$vdst"; 1915 1916 string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); 1917 string isrc1 = !if(!eq(NumSrcArgs, 1), "", 1918 !if(!eq(NumSrcArgs, 2), " $src1", 1919 " $src1,")); 1920 string isrc2 = !if(!eq(NumSrcArgs, 3), " $src2", ""); 1921 1922 string fsrc0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); 1923 string fsrc1 = !if(!eq(NumSrcArgs, 1), "", 1924 !if(!eq(NumSrcArgs, 2), " $src1_modifiers", 1925 " $src1_modifiers,")); 1926 string fsrc2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", ""); 1927 1928 string src0 = !if(Src0HasMods, fsrc0, isrc0); 1929 string src1 = !if(Src1HasMods, fsrc1, isrc1); 1930 string src2 = !if(Src2HasMods, fsrc2, isrc2); 1931 1932 string clamp = !if(HasClamp, "$clamp", ""); 1933 1934 string ret = dst#", "#src0#src1#src2#"$op_sel"#clamp; 1935} 1936 1937class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> { 1938 string dst = !if(HasDst, 1939 !if(!eq(DstVT.Size, 1), 1940 "$sdst", 1941 "$vdst"), 1942 ""); // use $sdst for VOPC 1943 string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); 1944 string src1 = !if(!eq(NumSrcArgs, 1), "", 1945 !if(!eq(NumSrcArgs, 2), " $src1_modifiers", 1946 " $src1_modifiers,")); 1947 string args = !if(!not(HasModifiers), 1948 getAsm32<0, NumSrcArgs, DstVT>.ret, 1949 ", "#src0#src1); 1950 string ret = dst#args#" $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 1951} 1952 1953class getAsmDPP16 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> { 1954 string ret = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret#"$fi"; 1955} 1956 1957class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> 1958 : getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT> { 1959 let ret = dst#args#" $dpp8$fi"; 1960} 1961 1962 1963class getAsmSDWA <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> { 1964 string dst = !if(HasDst, 1965 !if(!eq(DstVT.Size, 1), 1966 " vcc", // use vcc token as dst for VOPC instructioins 1967 "$vdst"), 1968 ""); 1969 string src0 = "$src0_modifiers"; 1970 string src1 = "$src1_modifiers"; 1971 string args = !if(!eq(NumSrcArgs, 0), 1972 "", 1973 !if(!eq(NumSrcArgs, 1), 1974 ", "#src0#"$clamp", 1975 ", "#src0#", "#src1#"$clamp" 1976 ) 1977 ); 1978 string sdwa = !if(!eq(NumSrcArgs, 0), 1979 "", 1980 !if(!eq(NumSrcArgs, 1), 1981 " $dst_sel $dst_unused $src0_sel", 1982 !if(!eq(DstVT.Size, 1), 1983 " $src0_sel $src1_sel", // No dst_sel and dst_unused for VOPC 1984 " $dst_sel $dst_unused $src0_sel $src1_sel" 1985 ) 1986 ) 1987 ); 1988 string ret = dst#args#sdwa; 1989} 1990 1991class getAsmSDWA9 <bit HasDst, bit HasOMod, int NumSrcArgs, 1992 ValueType DstVT = i32> { 1993 string dst = !if(HasDst, 1994 !if(!eq(DstVT.Size, 1), 1995 "$sdst", // VOPC 1996 "$vdst"), // VOP1/2 1997 ""); 1998 string src0 = "$src0_modifiers"; 1999 string src1 = "$src1_modifiers"; 2000 string out_mods = !if(!not(HasOMod), "$clamp", "$clamp$omod"); 2001 string args = !if(!eq(NumSrcArgs, 0), "", 2002 !if(!eq(NumSrcArgs, 1), 2003 ", "#src0, 2004 ", "#src0#", "#src1 2005 ) 2006 ); 2007 string sdwa = !if(!eq(NumSrcArgs, 0), "", 2008 !if(!eq(NumSrcArgs, 1), 2009 out_mods#" $dst_sel $dst_unused $src0_sel", 2010 !if(!eq(DstVT.Size, 1), 2011 " $src0_sel $src1_sel", // No dst_sel, dst_unused and output modifiers for VOPC 2012 out_mods#" $dst_sel $dst_unused $src0_sel $src1_sel" 2013 ) 2014 ) 2015 ); 2016 string ret = dst#args#sdwa; 2017} 2018 2019class getHas64BitOps <int NumSrcArgs, ValueType DstVT, ValueType Src0VT, 2020 ValueType Src1VT> { 2021 bit ret = !if(!eq(NumSrcArgs, 3), 2022 0, 2023 !if(!eq(DstVT.Size, 64), 2024 1, 2025 !if(!eq(Src0VT.Size, 64), 2026 1, 2027 !if(!eq(Src1VT.Size, 64), 2028 1, 2029 0 2030 ) 2031 ) 2032 ) 2033 ); 2034} 2035 2036class getHasSDWA <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2037 ValueType Src1VT = i32> { 2038 bit ret = !if(!eq(NumSrcArgs, 3), 2039 0, // NumSrcArgs == 3 - No SDWA for VOP3 2040 !if(!eq(DstVT.Size, 64), 2041 0, // 64-bit dst - No SDWA for 64-bit operands 2042 !if(!eq(Src0VT.Size, 64), 2043 0, // 64-bit src0 2044 !if(!eq(Src1VT.Size, 64), 2045 0, // 64-bit src2 2046 1 2047 ) 2048 ) 2049 ) 2050 ); 2051} 2052 2053class getHasDPP <int NumSrcArgs> { 2054 bit ret = !if(!eq(NumSrcArgs, 3), 2055 0, // NumSrcArgs == 3 - No DPP for VOP3 2056 1); 2057} 2058 2059class getHasExt64BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2060 ValueType Src1VT = i32> { 2061 bit ret = !and(getHasDPP<NumSrcArgs>.ret, 2062 getHas64BitOps<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret); 2063} 2064 2065// Function that checks if instruction supports DPP and SDWA 2066class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2067 ValueType Src1VT = i32> { 2068 bit ret = !or(getHasDPP<NumSrcArgs>.ret, 2069 getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret); 2070} 2071 2072// Return an AGPR+VGPR operand class for the given VGPR register class. 2073class getLdStRegisterOperand<RegisterClass RC> { 2074 RegisterOperand ret = 2075 !if(!eq(RC.Size, 32), AVLdSt_32, 2076 !if(!eq(RC.Size, 64), AVLdSt_64, 2077 !if(!eq(RC.Size, 96), AVLdSt_96, 2078 !if(!eq(RC.Size, 128), AVLdSt_128, 2079 !if(!eq(RC.Size, 160), AVLdSt_160, 2080 RegisterOperand<VReg_1> // invalid register 2081 ))))); 2082} 2083 2084class BitOr<bit a, bit b> { 2085 bit ret = !if(a, 1, !if(b, 1, 0)); 2086} 2087 2088class BitAnd<bit a, bit b> { 2089 bit ret = !if(a, !if(b, 1, 0), 0); 2090} 2091 2092def PatGenMode { 2093 int NoPattern = 0; 2094 int Pattern = 1; 2095} 2096 2097class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0, 2098 bit _EnableClamp = 0> { 2099 2100 field list<ValueType> ArgVT = _ArgVT; 2101 field bit EnableF32SrcMods = _EnableF32SrcMods; 2102 field bit EnableClamp = _EnableClamp; 2103 2104 field ValueType DstVT = ArgVT[0]; 2105 field ValueType Src0VT = ArgVT[1]; 2106 field ValueType Src1VT = ArgVT[2]; 2107 field ValueType Src2VT = ArgVT[3]; 2108 field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret; 2109 field RegisterOperand DstRCDPP = getVALUDstForVT<DstVT>.ret; 2110 field RegisterOperand DstRCSDWA = getSDWADstForVT<DstVT>.ret; 2111 field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret; 2112 field RegisterClass Src1RC32 = getVregSrcForVT<Src1VT>.ret; 2113 field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret; 2114 field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret; 2115 field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret; 2116 field RegisterClass Src0DPP = getVregSrcForVT<Src0VT>.ret; 2117 field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret; 2118 field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret; 2119 field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret; 2120 field Operand Src0Mod = getSrcMod<Src0VT, EnableF32SrcMods>.ret; 2121 field Operand Src1Mod = getSrcMod<Src1VT, EnableF32SrcMods>.ret; 2122 field Operand Src2Mod = getSrcMod<Src2VT, EnableF32SrcMods>.ret; 2123 field Operand Src0ModDPP = getSrcModDPP<Src0VT>.ret; 2124 field Operand Src1ModDPP = getSrcModDPP<Src1VT>.ret; 2125 field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret; 2126 field Operand Src1ModSDWA = getSrcModSDWA<Src1VT>.ret; 2127 2128 2129 field bit HasDst = !ne(DstVT.Value, untyped.Value); 2130 field bit HasDst32 = HasDst; 2131 field bit EmitDst = HasDst; // force dst encoding, see v_movreld_b32 special case 2132 field bit EmitDstSel = EmitDst; 2133 field int NumSrcArgs = getNumSrcArgs<Src0VT, Src1VT, Src2VT>.ret; 2134 field bit HasSrc0 = !ne(Src0VT.Value, untyped.Value); 2135 field bit HasSrc1 = !ne(Src1VT.Value, untyped.Value); 2136 field bit HasSrc2 = !ne(Src2VT.Value, untyped.Value); 2137 2138 // HasSrc*FloatMods affects the SDWA encoding. We ignore EnableF32SrcMods. 2139 field bit HasSrc0FloatMods = isFloatType<Src0VT>.ret; 2140 field bit HasSrc1FloatMods = isFloatType<Src1VT>.ret; 2141 field bit HasSrc2FloatMods = isFloatType<Src2VT>.ret; 2142 2143 // HasSrc*IntMods affects the SDWA encoding. We ignore EnableF32SrcMods. 2144 field bit HasSrc0IntMods = isIntType<Src0VT>.ret; 2145 field bit HasSrc1IntMods = isIntType<Src1VT>.ret; 2146 field bit HasSrc2IntMods = isIntType<Src2VT>.ret; 2147 2148 field bit HasClamp = !or(isModifierType<Src0VT>.ret, EnableClamp); 2149 field bit HasSDWAClamp = EmitDst; 2150 field bit HasFPClamp = !and(isFloatType<DstVT>.ret, HasClamp); 2151 field bit HasIntClamp = !if(isFloatType<DstVT>.ret, 0, HasClamp); 2152 field bit HasClampLo = HasClamp; 2153 field bit HasClampHi = !and(isPackedType<DstVT>.ret, HasClamp); 2154 field bit HasHigh = 0; 2155 2156 field bit IsPacked = isPackedType<Src0VT>.ret; 2157 field bit HasOpSel = IsPacked; 2158 field bit HasOMod = !if(HasOpSel, 0, isFloatType<DstVT>.ret); 2159 field bit HasSDWAOMod = isFloatType<DstVT>.ret; 2160 2161 field bit HasModifiers = !or(isModifierType<Src0VT>.ret, 2162 isModifierType<Src1VT>.ret, 2163 isModifierType<Src2VT>.ret, 2164 HasOMod, 2165 EnableF32SrcMods); 2166 2167 field bit HasSrc0Mods = HasModifiers; 2168 field bit HasSrc1Mods = !if(HasModifiers, !or(HasSrc1FloatMods, HasSrc1IntMods), 0); 2169 field bit HasSrc2Mods = !if(HasModifiers, !or(HasSrc2FloatMods, HasSrc2IntMods), 0); 2170 2171 field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2172 field bit HasExtDPP = getHasDPP<NumSrcArgs>.ret; 2173 field bit HasExt64BitDPP = getHasExt64BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2174 field bit HasExtSDWA = getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2175 field bit HasExtSDWA9 = HasExtSDWA; 2176 field int NeedPatGen = PatGenMode.NoPattern; 2177 2178 field bit IsMAI = 0; 2179 field bit IsDOT = 0; 2180 field bit IsSingle = 0; 2181 2182 field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods); 2183 field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods); 2184 field Operand Src2PackedMod = !if(HasSrc2FloatMods, PackedF16InputMods, PackedI16InputMods); 2185 2186 field dag Outs = !if(HasDst,(outs DstRC:$vdst),(outs)); 2187 2188 // VOP3b instructions are a special case with a second explicit 2189 // output. This is manually overridden for them. 2190 field dag Outs32 = Outs; 2191 field dag Outs64 = Outs; 2192 field dag OutsDPP = getOutsDPP<HasDst, DstVT, DstRCDPP>.ret; 2193 field dag OutsDPP8 = getOutsDPP<HasDst, DstVT, DstRCDPP>.ret; 2194 field dag OutsSDWA = getOutsSDWA<HasDst, DstVT, DstRCSDWA>.ret; 2195 2196 field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret; 2197 field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs, 2198 HasIntClamp, HasModifiers, HasSrc2Mods, 2199 HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret; 2200 field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64, 2201 NumSrcArgs, HasClamp, 2202 Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret; 2203 field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64, 2204 NumSrcArgs, HasClamp, HasOMod, 2205 getOpSelMod<Src0VT>.ret, 2206 getOpSelMod<Src1VT>.ret, 2207 getOpSelMod<Src2VT>.ret>.ret; 2208 field dag InsDPP = !if(HasExtDPP, 2209 getInsDPP<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs, 2210 HasModifiers, Src0ModDPP, Src1ModDPP>.ret, 2211 (ins)); 2212 field dag InsDPP16 = getInsDPP16<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs, 2213 HasModifiers, Src0ModDPP, Src1ModDPP>.ret; 2214 field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs, 0, 2215 Src0ModDPP, Src1ModDPP>.ret; 2216 field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs, 2217 HasSDWAOMod, Src0ModSDWA, Src1ModSDWA, 2218 DstVT>.ret; 2219 2220 2221 field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret; 2222 field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasIntClamp, HasModifiers, HasOMod, DstVT>.ret; 2223 field string AsmVOP3P = getAsmVOP3P<NumSrcArgs, HasModifiers, HasClamp>.ret; 2224 field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs, 2225 HasClamp, 2226 HasSrc0FloatMods, 2227 HasSrc1FloatMods, 2228 HasSrc2FloatMods>.ret; 2229 field string AsmDPP = !if(HasExtDPP, 2230 getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret, ""); 2231 field string AsmDPP16 = getAsmDPP16<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret; 2232 // DPP8 encoding has no fields for modifiers, and it is enforced by setting 2233 // the asm operand name via this HasModifiers flag 2234 field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0 /*HasModifiers*/, DstVT>.ret; 2235 field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret; 2236 field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret; 2237 2238 field string TieRegDPP = "$old"; 2239} 2240 2241class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> { 2242 let HasExt = 0; 2243 let HasExtDPP = 0; 2244 let HasExt64BitDPP = 0; 2245 let HasExtSDWA = 0; 2246 let HasExtSDWA9 = 0; 2247} 2248 2249class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.Pattern> : VOPProfile <p.ArgVT> { 2250 let NeedPatGen = mode; 2251} 2252 2253def VOP_F16_F16 : VOPProfile <[f16, f16, untyped, untyped]>; 2254def VOP_F16_I16 : VOPProfile <[f16, i16, untyped, untyped]>; 2255def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>; 2256 2257def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>; 2258def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>; 2259def VOP_F16_F16_I32 : VOPProfile <[f16, f16, i32, untyped]>; 2260def VOP_I16_I16_I16 : VOPProfile <[i16, i16, i16, untyped]>; 2261def VOP_I16_I16_I16_ARITH : VOPProfile <[i16, i16, i16, untyped], 0, /*EnableClamp=*/1>; 2262 2263def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>; 2264def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>; 2265 2266def VOP_I32_I16_I16_I32 : VOPProfile <[i32, i16, i16, i32, untyped]>; 2267 2268def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>; 2269def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>; 2270def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>; 2271 2272def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>; 2273def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>; 2274def VOP_V2I16_F32_F32 : VOPProfile <[v2i16, f32, f32, untyped]>; 2275def VOP_V2I16_I32_I32 : VOPProfile <[v2i16, i32, i32, untyped]>; 2276 2277def VOP_F32_V2F16_V2F16_V2F16 : VOPProfile <[f32, v2f16, v2f16, v2f16]>; 2278 2279def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>; 2280 2281def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>; 2282def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>; 2283def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>; 2284def VOP_F64_F32 : VOPProfile <[f64, f32, untyped, untyped]>; 2285def VOP_F64_F64 : VOPProfile <[f64, f64, untyped, untyped]>; 2286def VOP_F64_I32 : VOPProfile <[f64, i32, untyped, untyped]>; 2287def VOP_I32_F32 : VOPProfile <[i32, f32, untyped, untyped]>; 2288def VOP_I32_F64 : VOPProfile <[i32, f64, untyped, untyped]>; 2289def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>; 2290def VOP_F16_F32 : VOPProfile <[f16, f32, untyped, untyped]>; 2291def VOP_F32_F16 : VOPProfile <[f32, f16, untyped, untyped]>; 2292def VOP_I64_I64 : VOPProfile <[i64, i64, untyped, untyped]>; 2293 2294def VOP_F32_F32_F16 : VOPProfile <[f32, f32, f16, untyped]>; 2295def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>; 2296def VOP_F32_F32_I32 : VOPProfile <[f32, f32, i32, untyped]>; 2297def VOP_F64_F64_F64 : VOPProfile <[f64, f64, f64, untyped]>; 2298def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>; 2299def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>; 2300def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>; 2301def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>; 2302def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], 0, /*EnableClamp=*/1>; 2303def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>; 2304def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>; 2305 2306def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>; 2307def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>; 2308def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>; 2309 2310def VOP_F16_F32_F16_F32 : VOPProfile <[f16, f32, f16, f32]>; 2311def VOP_F32_F32_F16_F16 : VOPProfile <[f32, f32, f16, f16]>; 2312def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>; 2313def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>; 2314def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>; 2315def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>; 2316def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>; 2317def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>; 2318def VOP_V4I32_I64_I32_V4I32 : VOPProfile <[v4i32, i64, i32, v4i32]>; 2319 2320def VOP_F32_V2F16_V2F16_F32 : VOPProfile <[f32, v2f16, v2f16, f32]>; 2321def VOP_I32_V2I16_V2I16_I32 : VOPProfile <[i32, v2i16, v2i16, i32]>; 2322 2323def VOP_V4F32_F32_F32_V4F32 : VOPProfile <[v4f32, f32, f32, v4f32]>; 2324def VOP_V16F32_F32_F32_V16F32 : VOPProfile <[v16f32, f32, f32, v16f32]>; 2325def VOP_V32F32_F32_F32_V32F32 : VOPProfile <[v32f32, f32, f32, v32f32]>; 2326def VOP_V4F32_V4F16_V4F16_V4F32 : VOPProfile <[v4f32, v4f16, v4f16, v4f32]>; 2327def VOP_V16F32_V4F16_V4F16_V16F32 : VOPProfile <[v16f32, v4f16, v4f16, v16f32]>; 2328def VOP_V32F32_V4F16_V4F16_V32F32 : VOPProfile <[v32f32, v4f16, v4f16, v32f32]>; 2329def VOP_V4F32_V2I16_V2I16_V4F32 : VOPProfile <[v4f32, v2i16, v2i16, v4f32]>; 2330def VOP_V16F32_V2I16_V2I16_V16F32 : VOPProfile <[v16f32, v2i16, v2i16, v16f32]>; 2331def VOP_V32F32_V2I16_V2I16_V32F32 : VOPProfile <[v32f32, v2i16, v2i16, v32f32]>; 2332def VOP_V4I32_I32_I32_V4I32 : VOPProfile <[v4i32, i32, i32, v4i32]>; 2333def VOP_V16I32_I32_I32_V16I32 : VOPProfile <[v16i32, i32, i32, v16i32]>; 2334def VOP_V32I32_I32_I32_V32I32 : VOPProfile <[v32i32, i32, i32, v32i32]>; 2335 2336def VOP_V4F64_F64_F64_V4F64 : VOPProfile <[v4f64, f64, f64, v4f64]>; 2337def VOP_V1F64_F64_F64_V1F64 : VOPProfile <[v1f64, f64, f64, v1f64]>; 2338 2339def VOP_V2F32_V2F32_V2F32_V2F32 : VOPProfile <[v2f32, v2f32, v2f32, v2f32]>; 2340def VOP_V2F32_V2F32_V2F32 : VOPProfile <[v2f32, v2f32, v2f32, untyped]>; 2341def VOP_V2I32_V2I32_V2I32 : VOPProfile <[v2i32, v2i32, v2i32, untyped]>; 2342def VOP_V4F32_V4I16_V4I16_V4F32 : VOPProfile <[v4f32, v4i16, v4i16, v4f32]>; 2343def VOP_V16F32_V4I16_V4I16_V16F32 : VOPProfile <[v16f32, v4i16, v4i16, v16f32]>; 2344def VOP_V32F32_V4I16_V4I16_V32F32 : VOPProfile <[v32f32, v4i16, v4i16, v32f32]>; 2345 2346class Commutable_REV <string revOp, bit isOrig> { 2347 string RevOp = revOp; 2348 bit IsOrig = isOrig; 2349} 2350 2351class AtomicNoRet <string noRetOp, bit isRet> { 2352 string NoRetOp = noRetOp; 2353 bit IsRet = isRet; 2354} 2355 2356//===----------------------------------------------------------------------===// 2357// Interpolation opcodes 2358//===----------------------------------------------------------------------===// 2359 2360class VINTRPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVINTRPDst">; 2361 2362class VINTRP_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> : 2363 VINTRPCommon <outs, ins, "", pattern>, 2364 SIMCInstr<opName, SIEncodingFamily.NONE> { 2365 let isPseudo = 1; 2366 let isCodeGenOnly = 1; 2367} 2368 2369// FIXME-GFX10: WIP. 2370class VINTRP_Real_si <bits <2> op, string opName, dag outs, dag ins, 2371 string asm, int encodingFamily> : 2372 VINTRPCommon <outs, ins, asm, []>, 2373 VINTRPe <op>, 2374 SIMCInstr<opName, encodingFamily> { 2375} 2376 2377class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins, 2378 string asm> : 2379 VINTRPCommon <outs, ins, asm, []>, 2380 VINTRPe_vi <op>, 2381 SIMCInstr<opName, SIEncodingFamily.VI> { 2382 let AssemblerPredicate = VIAssemblerPredicate; 2383 let DecoderNamespace = "GFX8"; 2384} 2385 2386// FIXME-GFX10: WIP. 2387multiclass VINTRP_m <bits <2> op, dag outs, dag ins, string asm, 2388 list<dag> pattern = []> { 2389 def "" : VINTRP_Pseudo <NAME, outs, ins, pattern>; 2390 2391 let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 2392 def _si : VINTRP_Real_si <op, NAME, outs, ins, asm, SIEncodingFamily.SI>; 2393 } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 2394 2395 def _vi : VINTRP_Real_vi <op, NAME, outs, ins, asm>; 2396 2397 let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { 2398 def _gfx10 : VINTRP_Real_si<op, NAME, outs, ins, asm, SIEncodingFamily.GFX10>; 2399 } // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" 2400} 2401//===----------------------------------------------------------------------===// 2402// Vector instruction mappings 2403//===----------------------------------------------------------------------===// 2404 2405// Maps an opcode in e32 form to its e64 equivalent 2406def getVOPe64 : InstrMapping { 2407 let FilterClass = "VOP"; 2408 let RowFields = ["OpName"]; 2409 let ColFields = ["Size", "VOP3"]; 2410 let KeyCol = ["4", "0"]; 2411 let ValueCols = [["8", "1"]]; 2412} 2413 2414// Maps an opcode in e64 form to its e32 equivalent 2415def getVOPe32 : InstrMapping { 2416 let FilterClass = "VOP"; 2417 let RowFields = ["OpName"]; 2418 let ColFields = ["Size", "VOP3"]; 2419 let KeyCol = ["8", "1"]; 2420 let ValueCols = [["4", "0"]]; 2421} 2422 2423// Maps ordinary instructions to their SDWA counterparts 2424def getSDWAOp : InstrMapping { 2425 let FilterClass = "VOP"; 2426 let RowFields = ["OpName"]; 2427 let ColFields = ["AsmVariantName"]; 2428 let KeyCol = ["Default"]; 2429 let ValueCols = [["SDWA"]]; 2430} 2431 2432// Maps SDWA instructions to their ordinary counterparts 2433def getBasicFromSDWAOp : InstrMapping { 2434 let FilterClass = "VOP"; 2435 let RowFields = ["OpName"]; 2436 let ColFields = ["AsmVariantName"]; 2437 let KeyCol = ["SDWA"]; 2438 let ValueCols = [["Default"]]; 2439} 2440 2441// Maps ordinary instructions to their DPP counterparts 2442def getDPPOp32 : InstrMapping { 2443 let FilterClass = "VOP"; 2444 let RowFields = ["OpName"]; 2445 let ColFields = ["AsmVariantName"]; 2446 let KeyCol = ["Default"]; 2447 let ValueCols = [["DPP"]]; 2448} 2449 2450// Maps an commuted opcode to its original version 2451def getCommuteOrig : InstrMapping { 2452 let FilterClass = "Commutable_REV"; 2453 let RowFields = ["RevOp"]; 2454 let ColFields = ["IsOrig"]; 2455 let KeyCol = ["0"]; 2456 let ValueCols = [["1"]]; 2457} 2458 2459// Maps an original opcode to its commuted version 2460def getCommuteRev : InstrMapping { 2461 let FilterClass = "Commutable_REV"; 2462 let RowFields = ["RevOp"]; 2463 let ColFields = ["IsOrig"]; 2464 let KeyCol = ["1"]; 2465 let ValueCols = [["0"]]; 2466} 2467 2468def getMCOpcodeGen : InstrMapping { 2469 let FilterClass = "SIMCInstr"; 2470 let RowFields = ["PseudoInstr"]; 2471 let ColFields = ["Subtarget"]; 2472 let KeyCol = [!cast<string>(SIEncodingFamily.NONE)]; 2473 let ValueCols = [[!cast<string>(SIEncodingFamily.SI)], 2474 [!cast<string>(SIEncodingFamily.VI)], 2475 [!cast<string>(SIEncodingFamily.SDWA)], 2476 [!cast<string>(SIEncodingFamily.SDWA9)], 2477 // GFX80 encoding is added to work around a multiple matching 2478 // issue for buffer instructions with unpacked d16 data. This 2479 // does not actually change the encoding, and thus may be 2480 // removed later. 2481 [!cast<string>(SIEncodingFamily.GFX80)], 2482 [!cast<string>(SIEncodingFamily.GFX9)], 2483 [!cast<string>(SIEncodingFamily.GFX10)], 2484 [!cast<string>(SIEncodingFamily.SDWA10)], 2485 [!cast<string>(SIEncodingFamily.GFX90A)]]; 2486} 2487 2488// Get equivalent SOPK instruction. 2489def getSOPKOp : InstrMapping { 2490 let FilterClass = "SOPKInstTable"; 2491 let RowFields = ["BaseCmpOp"]; 2492 let ColFields = ["IsSOPK"]; 2493 let KeyCol = ["0"]; 2494 let ValueCols = [["1"]]; 2495} 2496 2497def getAddr64Inst : InstrMapping { 2498 let FilterClass = "MUBUFAddr64Table"; 2499 let RowFields = ["OpName"]; 2500 let ColFields = ["IsAddr64"]; 2501 let KeyCol = ["0"]; 2502 let ValueCols = [["1"]]; 2503} 2504 2505def getIfAddr64Inst : InstrMapping { 2506 let FilterClass = "MUBUFAddr64Table"; 2507 let RowFields = ["OpName"]; 2508 let ColFields = ["IsAddr64"]; 2509 let KeyCol = ["1"]; 2510 let ValueCols = [["1"]]; 2511} 2512 2513def getMUBUFNoLdsInst : InstrMapping { 2514 let FilterClass = "MUBUFLdsTable"; 2515 let RowFields = ["OpName"]; 2516 let ColFields = ["IsLds"]; 2517 let KeyCol = ["1"]; 2518 let ValueCols = [["0"]]; 2519} 2520 2521// Maps an atomic opcode to its returnless version. 2522def getAtomicNoRetOp : InstrMapping { 2523 let FilterClass = "AtomicNoRet"; 2524 let RowFields = ["NoRetOp"]; 2525 let ColFields = ["IsRet"]; 2526 let KeyCol = ["1"]; 2527 let ValueCols = [["0"]]; 2528} 2529 2530// Maps a GLOBAL to its SADDR form. 2531def getGlobalSaddrOp : InstrMapping { 2532 let FilterClass = "GlobalSaddrTable"; 2533 let RowFields = ["SaddrOp"]; 2534 let ColFields = ["IsSaddr"]; 2535 let KeyCol = ["0"]; 2536 let ValueCols = [["1"]]; 2537} 2538 2539// Maps a GLOBAL SADDR to its VADDR form. 2540def getGlobalVaddrOp : InstrMapping { 2541 let FilterClass = "GlobalSaddrTable"; 2542 let RowFields = ["SaddrOp"]; 2543 let ColFields = ["IsSaddr"]; 2544 let KeyCol = ["1"]; 2545 let ValueCols = [["0"]]; 2546} 2547 2548// Maps a v_cmpx opcode with sdst to opcode without sdst. 2549def getVCMPXNoSDstOp : InstrMapping { 2550 let FilterClass = "VCMPXNoSDstTable"; 2551 let RowFields = ["NoSDstOp"]; 2552 let ColFields = ["HasSDst"]; 2553 let KeyCol = ["1"]; 2554 let ValueCols = [["0"]]; 2555} 2556 2557// Maps a SOPP to a SOPP with S_NOP 2558def getSOPPWithRelaxation : InstrMapping { 2559 let FilterClass = "SOPPRelaxTable"; 2560 let RowFields = ["KeyName"]; 2561 let ColFields = ["IsRelaxed"]; 2562 let KeyCol = ["0"]; 2563 let ValueCols = [["1"]]; 2564} 2565 2566// Maps flat scratch opcodes by addressing modes 2567def getFlatScratchInstSTfromSS : InstrMapping { 2568 let FilterClass = "FlatScratchInst"; 2569 let RowFields = ["SVOp"]; 2570 let ColFields = ["Mode"]; 2571 let KeyCol = ["SS"]; 2572 let ValueCols = [["ST"]]; 2573} 2574 2575def getFlatScratchInstSSfromSV : InstrMapping { 2576 let FilterClass = "FlatScratchInst"; 2577 let RowFields = ["SVOp"]; 2578 let ColFields = ["Mode"]; 2579 let KeyCol = ["SV"]; 2580 let ValueCols = [["SS"]]; 2581} 2582 2583def getFlatScratchInstSVfromSS : InstrMapping { 2584 let FilterClass = "FlatScratchInst"; 2585 let RowFields = ["SVOp"]; 2586 let ColFields = ["Mode"]; 2587 let KeyCol = ["SS"]; 2588 let ValueCols = [["SV"]]; 2589} 2590 2591include "SIInstructions.td" 2592 2593include "DSInstructions.td" 2594include "MIMGInstructions.td" 2595