1//===-- SIInstrInfo.td - SI Instruction Infos -------------*- tablegen -*--===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def isWave32 : Predicate<"Subtarget->getWavefrontSize() == 32">, 10 AssemblerPredicate <(all_of FeatureWavefrontSize32)>; 11def isWave64 : Predicate<"Subtarget->getWavefrontSize() == 64">, 12 AssemblerPredicate <(all_of FeatureWavefrontSize64)>; 13 14class GCNPredicateControl : PredicateControl { 15 Predicate SIAssemblerPredicate = isGFX6GFX7; 16 Predicate VIAssemblerPredicate = isGFX8GFX9; 17} 18 19// Execpt for the NONE field, this must be kept in sync with the 20// SIEncodingFamily enum in AMDGPUInstrInfo.cpp 21def SIEncodingFamily { 22 int NONE = -1; 23 int SI = 0; 24 int VI = 1; 25 int SDWA = 2; 26 int SDWA9 = 3; 27 int GFX80 = 4; 28 int GFX9 = 5; 29 int GFX10 = 6; 30 int SDWA10 = 7; 31 int GFX10_B = 8; 32} 33 34//===----------------------------------------------------------------------===// 35// SI DAG Nodes 36//===----------------------------------------------------------------------===// 37 38def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>; 39 40def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD", 41 SDTypeProfile<1, 3, [SDTCisVT<1, v4i32>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>, 42 [SDNPMayLoad, SDNPMemOperand] 43>; 44 45def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT", 46 SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i16>]>, 47 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue] 48>; 49 50def SIatomic_inc : SDNode<"AMDGPUISD::ATOMIC_INC", SDTAtomic2, 51 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] 52>; 53 54def SIatomic_dec : SDNode<"AMDGPUISD::ATOMIC_DEC", SDTAtomic2, 55 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] 56>; 57 58def SIatomic_csub : SDNode<"AMDGPUISD::ATOMIC_LOAD_CSUB", SDTAtomic2, 59 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] 60>; 61 62def SDTAtomic2_f32 : SDTypeProfile<1, 2, [ 63 SDTCisSameAs<0,2>, SDTCisFP<0>, SDTCisPtrTy<1> 64]>; 65 66def SIatomic_fmin : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMIN", SDTAtomic2_f32, 67 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] 68>; 69 70def SIatomic_fmax : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMAX", SDTAtomic2_f32, 71 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] 72>; 73 74// load_d16_{lo|hi} ptr, tied_input 75def SIload_d16 : SDTypeProfile<1, 2, [ 76 SDTCisPtrTy<1>, 77 SDTCisSameAs<0, 2> 78]>; 79 80 81def SDTtbuffer_load : SDTypeProfile<1, 8, 82 [ // vdata 83 SDTCisVT<1, v4i32>, // rsrc 84 SDTCisVT<2, i32>, // vindex(VGPR) 85 SDTCisVT<3, i32>, // voffset(VGPR) 86 SDTCisVT<4, i32>, // soffset(SGPR) 87 SDTCisVT<5, i32>, // offset(imm) 88 SDTCisVT<6, i32>, // format(imm) 89 SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm) 90 SDTCisVT<8, i1> // idxen(imm) 91 ]>; 92 93def SItbuffer_load : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT", SDTtbuffer_load, 94 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>; 95def SItbuffer_load_d16 : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT_D16", 96 SDTtbuffer_load, 97 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>; 98 99def SDTtbuffer_store : SDTypeProfile<0, 9, 100 [ // vdata 101 SDTCisVT<1, v4i32>, // rsrc 102 SDTCisVT<2, i32>, // vindex(VGPR) 103 SDTCisVT<3, i32>, // voffset(VGPR) 104 SDTCisVT<4, i32>, // soffset(SGPR) 105 SDTCisVT<5, i32>, // offset(imm) 106 SDTCisVT<6, i32>, // format(imm) 107 SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm) 108 SDTCisVT<8, i1> // idxen(imm) 109 ]>; 110 111def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTtbuffer_store, 112 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 113def SItbuffer_store_d16 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_D16", 114 SDTtbuffer_store, 115 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 116 117def SDTBufferLoad : SDTypeProfile<1, 7, 118 [ // vdata 119 SDTCisVT<1, v4i32>, // rsrc 120 SDTCisVT<2, i32>, // vindex(VGPR) 121 SDTCisVT<3, i32>, // voffset(VGPR) 122 SDTCisVT<4, i32>, // soffset(SGPR) 123 SDTCisVT<5, i32>, // offset(imm) 124 SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm) 125 SDTCisVT<7, i1>]>; // idxen(imm) 126 127def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad, 128 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 129def SIbuffer_load_ubyte : SDNode <"AMDGPUISD::BUFFER_LOAD_UBYTE", SDTBufferLoad, 130 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 131def SIbuffer_load_ushort : SDNode <"AMDGPUISD::BUFFER_LOAD_USHORT", SDTBufferLoad, 132 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 133def SIbuffer_load_byte : SDNode <"AMDGPUISD::BUFFER_LOAD_BYTE", SDTBufferLoad, 134 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 135def SIbuffer_load_short: SDNode <"AMDGPUISD::BUFFER_LOAD_SHORT", SDTBufferLoad, 136 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 137def SIbuffer_load_format : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT", SDTBufferLoad, 138 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 139def SIbuffer_load_format_d16 : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_D16", 140 SDTBufferLoad, 141 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 142 143def SDTBufferStore : SDTypeProfile<0, 8, 144 [ // vdata 145 SDTCisVT<1, v4i32>, // rsrc 146 SDTCisVT<2, i32>, // vindex(VGPR) 147 SDTCisVT<3, i32>, // voffset(VGPR) 148 SDTCisVT<4, i32>, // soffset(SGPR) 149 SDTCisVT<5, i32>, // offset(imm) 150 SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm) 151 SDTCisVT<7, i1>]>; // idxen(imm) 152 153def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore, 154 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 155def SIbuffer_store_byte: SDNode <"AMDGPUISD::BUFFER_STORE_BYTE", 156 SDTBufferStore, 157 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 158def SIbuffer_store_short : SDNode <"AMDGPUISD::BUFFER_STORE_SHORT", 159 SDTBufferStore, 160 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 161def SIbuffer_store_format : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT", 162 SDTBufferStore, 163 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 164def SIbuffer_store_format_d16 : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT_D16", 165 SDTBufferStore, 166 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 167 168class SDBufferAtomic<string opcode> : SDNode <opcode, 169 SDTypeProfile<1, 8, 170 [SDTCisVT<2, v4i32>, // rsrc 171 SDTCisVT<3, i32>, // vindex(VGPR) 172 SDTCisVT<4, i32>, // voffset(VGPR) 173 SDTCisVT<5, i32>, // soffset(SGPR) 174 SDTCisVT<6, i32>, // offset(imm) 175 SDTCisVT<7, i32>, // cachepolicy(imm) 176 SDTCisVT<8, i1>]>, // idxen(imm) 177 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] 178>; 179 180class SDBufferAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode, 181 SDTypeProfile<0, 8, 182 [SDTCisVT<0, ty>, // vdata 183 SDTCisVT<1, v4i32>, // rsrc 184 SDTCisVT<2, i32>, // vindex(VGPR) 185 SDTCisVT<3, i32>, // voffset(VGPR) 186 SDTCisVT<4, i32>, // soffset(SGPR) 187 SDTCisVT<5, i32>, // offset(imm) 188 SDTCisVT<6, i32>, // cachepolicy(imm) 189 SDTCisVT<7, i1>]>, // idxen(imm) 190 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] 191>; 192 193def SIbuffer_atomic_swap : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SWAP">; 194def SIbuffer_atomic_add : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_ADD">; 195def SIbuffer_atomic_sub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SUB">; 196def SIbuffer_atomic_smin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMIN">; 197def SIbuffer_atomic_umin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMIN">; 198def SIbuffer_atomic_smax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMAX">; 199def SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">; 200def SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">; 201def SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">; 202def SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">; 203def SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">; 204def SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">; 205def SIbuffer_atomic_csub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_CSUB">; 206def SIbuffer_atomic_fadd : SDBufferAtomicNoRtn <"AMDGPUISD::BUFFER_ATOMIC_FADD", f32>; 207def SIbuffer_atomic_pk_fadd : SDBufferAtomicNoRtn <"AMDGPUISD::BUFFER_ATOMIC_PK_FADD", v2f16>; 208 209def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP", 210 SDTypeProfile<1, 9, 211 [SDTCisVT<0, i32>, // dst 212 SDTCisVT<1, i32>, // src 213 SDTCisVT<2, i32>, // cmp 214 SDTCisVT<3, v4i32>, // rsrc 215 SDTCisVT<4, i32>, // vindex(VGPR) 216 SDTCisVT<5, i32>, // voffset(VGPR) 217 SDTCisVT<6, i32>, // soffset(SGPR) 218 SDTCisVT<7, i32>, // offset(imm) 219 SDTCisVT<8, i32>, // cachepolicy(imm) 220 SDTCisVT<9, i1>]>, // idxen(imm) 221 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] 222>; 223 224class SDGlobalAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode, 225 SDTypeProfile<0, 2, 226 [SDTCisPtrTy<0>, // vaddr 227 SDTCisVT<1, ty>]>, // vdata 228 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] 229>; 230 231def SIglobal_atomic_pk_fadd : SDGlobalAtomicNoRtn <"AMDGPUISD::ATOMIC_PK_FADD", v2f16>; 232 233def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET", 234 SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]> 235>; 236 237def SIlds : SDNode<"AMDGPUISD::LDS", 238 SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]> 239>; 240 241def SIload_d16_lo : SDNode<"AMDGPUISD::LOAD_D16_LO", 242 SIload_d16, 243 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 244>; 245 246def SIload_d16_lo_u8 : SDNode<"AMDGPUISD::LOAD_D16_LO_U8", 247 SIload_d16, 248 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 249>; 250 251def SIload_d16_lo_i8 : SDNode<"AMDGPUISD::LOAD_D16_LO_I8", 252 SIload_d16, 253 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 254>; 255 256def SIload_d16_hi : SDNode<"AMDGPUISD::LOAD_D16_HI", 257 SIload_d16, 258 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 259>; 260 261def SIload_d16_hi_u8 : SDNode<"AMDGPUISD::LOAD_D16_HI_U8", 262 SIload_d16, 263 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 264>; 265 266def SIload_d16_hi_i8 : SDNode<"AMDGPUISD::LOAD_D16_HI_I8", 267 SIload_d16, 268 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 269>; 270 271def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE", 272 SDTypeProfile<0 ,1, [SDTCisInt<0>]>, 273 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue] 274>; 275 276//===----------------------------------------------------------------------===// 277// ValueType helpers 278//===----------------------------------------------------------------------===// 279 280// Returns 1 if the source arguments have modifiers, 0 if they do not. 281// XXX - do f16 instructions? 282class isFloatType<ValueType SrcVT> { 283 bit ret = 284 !if(!eq(SrcVT.Value, f16.Value), 1, 285 !if(!eq(SrcVT.Value, f32.Value), 1, 286 !if(!eq(SrcVT.Value, f64.Value), 1, 287 !if(!eq(SrcVT.Value, v2f16.Value), 1, 288 !if(!eq(SrcVT.Value, v4f16.Value), 1, 289 !if(!eq(SrcVT.Value, v2f32.Value), 1, 290 !if(!eq(SrcVT.Value, v2f64.Value), 1, 291 0))))))); 292} 293 294class isIntType<ValueType SrcVT> { 295 bit ret = 296 !if(!eq(SrcVT.Value, i16.Value), 1, 297 !if(!eq(SrcVT.Value, i32.Value), 1, 298 !if(!eq(SrcVT.Value, i64.Value), 1, 299 0))); 300} 301 302class isPackedType<ValueType SrcVT> { 303 bit ret = 304 !if(!eq(SrcVT.Value, v2i16.Value), 1, 305 !if(!eq(SrcVT.Value, v2f16.Value), 1, 306 !if(!eq(SrcVT.Value, v4f16.Value), 1, 0) 307 )); 308} 309 310//===----------------------------------------------------------------------===// 311// PatFrags for global memory operations 312//===----------------------------------------------------------------------===// 313 314let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_global").AddrSpaces in { 315defm atomic_csub_global : binary_atomic_op<SIatomic_csub>; 316} 317 318foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { 319let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in { 320 321 322defm atomic_inc_#as : binary_atomic_op<SIatomic_inc>; 323defm atomic_dec_#as : binary_atomic_op<SIatomic_dec>; 324defm atomic_load_fmin_#as : binary_atomic_op<SIatomic_fmin, 0>; 325defm atomic_load_fmax_#as : binary_atomic_op<SIatomic_fmax, 0>; 326 327 328} // End let AddressSpaces = ... 329} // End foreach AddrSpace 330 331def atomic_fadd_global_noret : PatFrag< 332 (ops node:$ptr, node:$value), 333 (atomic_load_fadd node:$ptr, node:$value)> { 334 // FIXME: Move this 335 let MemoryVT = f32; 336 let IsAtomic = 1; 337 let AddressSpaces = StoreAddress_global.AddrSpaces; 338} 339 340def atomic_pk_fadd_global_noret : PatFrag< 341 (ops node:$ptr, node:$value), 342 (SIglobal_atomic_pk_fadd node:$ptr, node:$value)> { 343 // FIXME: Move this 344 let MemoryVT = v2f16; 345 let IsAtomic = 1; 346 let AddressSpaces = StoreAddress_global.AddrSpaces; 347} 348 349//===----------------------------------------------------------------------===// 350// SDNodes PatFrags for loads/stores with a glue input. 351// This is for SDNodes and PatFrag for local loads and stores to 352// enable s_mov_b32 m0, -1 to be glued to the memory instructions. 353// 354// These mirror the regular load/store PatFrags and rely on special 355// processing during Select() to add the glued copy. 356// 357//===----------------------------------------------------------------------===// 358 359def AMDGPUld_glue : SDNode <"ISD::LOAD", SDTLoad, 360 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 361>; 362 363def AMDGPUatomic_ld_glue : SDNode <"ISD::ATOMIC_LOAD", SDTAtomicLoad, 364 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 365>; 366 367def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr)> { 368 let IsLoad = 1; 369 let IsUnindexed = 1; 370} 371 372def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> { 373 let IsLoad = 1; 374 let IsNonExtLoad = 1; 375} 376 377def atomic_load_32_glue : PatFrag<(ops node:$ptr), 378 (AMDGPUatomic_ld_glue node:$ptr)> { 379 let IsAtomic = 1; 380 let MemoryVT = i32; 381} 382 383def atomic_load_64_glue : PatFrag<(ops node:$ptr), 384 (AMDGPUatomic_ld_glue node:$ptr)> { 385 let IsAtomic = 1; 386 let MemoryVT = i64; 387} 388 389def extload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { 390 let IsLoad = 1; 391 let IsAnyExtLoad = 1; 392} 393 394def sextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { 395 let IsLoad = 1; 396 let IsSignExtLoad = 1; 397} 398 399def zextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { 400 let IsLoad = 1; 401 let IsZeroExtLoad = 1; 402} 403 404def extloadi8_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> { 405 let IsLoad = 1; 406 let MemoryVT = i8; 407} 408 409def zextloadi8_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> { 410 let IsLoad = 1; 411 let MemoryVT = i8; 412} 413 414def extloadi16_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> { 415 let IsLoad = 1; 416 let MemoryVT = i16; 417} 418 419def zextloadi16_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> { 420 let IsLoad = 1; 421 let MemoryVT = i16; 422} 423 424def sextloadi8_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> { 425 let IsLoad = 1; 426 let MemoryVT = i8; 427} 428 429def sextloadi16_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> { 430 let IsLoad = 1; 431 let MemoryVT = i16; 432} 433 434 435let IsLoad = 1, AddressSpaces = LoadAddress_local.AddrSpaces in { 436def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> { 437 let IsNonExtLoad = 1; 438} 439 440let MemoryVT = i8 in { 441def extloadi8_local_m0 : PatFrag<(ops node:$ptr), (extloadi8_glue node:$ptr)>; 442def sextloadi8_local_m0 : PatFrag<(ops node:$ptr), (sextloadi8_glue node:$ptr)>; 443def zextloadi8_local_m0 : PatFrag<(ops node:$ptr), (zextloadi8_glue node:$ptr)>; 444} 445 446let MemoryVT = i16 in { 447def extloadi16_local_m0 : PatFrag<(ops node:$ptr), (extloadi16_glue node:$ptr)>; 448def sextloadi16_local_m0 : PatFrag<(ops node:$ptr), (sextloadi16_glue node:$ptr)>; 449def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr)>; 450} 451 452def load_align8_local_m0 : PatFrag<(ops node:$ptr), 453 (load_local_m0 node:$ptr)> { 454 let IsLoad = 1; 455 let IsNonExtLoad = 1; 456 let MinAlignment = 8; 457} 458def load_align16_local_m0 : PatFrag<(ops node:$ptr), 459 (load_local_m0 node:$ptr)> { 460 let IsLoad = 1; 461 let IsNonExtLoad = 1; 462 let MinAlignment = 16; 463} 464 465} // End IsLoad = 1 466 467let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in { 468def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr), 469 (atomic_load_32_glue node:$ptr)> { 470 let MemoryVT = i32; 471} 472def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr), 473 (atomic_load_64_glue node:$ptr)> { 474 let MemoryVT = i64; 475} 476 477} // End let AddressSpaces = LoadAddress_local.AddrSpaces 478 479 480def AMDGPUst_glue : SDNode <"ISD::STORE", SDTStore, 481 [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue] 482>; 483 484def AMDGPUatomic_st_glue : SDNode <"ISD::ATOMIC_STORE", SDTAtomicStore, 485 [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue] 486>; 487 488def unindexedstore_glue : PatFrag<(ops node:$val, node:$ptr), 489 (AMDGPUst_glue node:$val, node:$ptr)> { 490 let IsStore = 1; 491 let IsUnindexed = 1; 492} 493 494def store_glue : PatFrag<(ops node:$val, node:$ptr), 495 (unindexedstore_glue node:$val, node:$ptr)> { 496 let IsStore = 1; 497 let IsTruncStore = 0; 498} 499 500def truncstore_glue : PatFrag<(ops node:$val, node:$ptr), 501 (unindexedstore_glue node:$val, node:$ptr)> { 502 let IsStore = 1; 503 let IsTruncStore = 1; 504} 505 506def truncstorei8_glue : PatFrag<(ops node:$val, node:$ptr), 507 (truncstore_glue node:$val, node:$ptr)> { 508 let IsStore = 1; 509 let MemoryVT = i8; 510} 511 512def truncstorei16_glue : PatFrag<(ops node:$val, node:$ptr), 513 (truncstore_glue node:$val, node:$ptr)> { 514 let IsStore = 1; 515 let MemoryVT = i16; 516} 517 518let IsStore = 1, AddressSpaces = StoreAddress_local.AddrSpaces in { 519def store_local_m0 : PatFrag<(ops node:$val, node:$ptr), 520 (store_glue node:$val, node:$ptr)> { 521 let IsStore = 1; 522 let IsTruncStore = 0; 523} 524 525def truncstorei8_local_m0 : PatFrag<(ops node:$val, node:$ptr), 526 (unindexedstore_glue node:$val, node:$ptr)> { 527 let IsStore = 1; 528 let MemoryVT = i8; 529} 530 531def truncstorei16_local_m0 : PatFrag<(ops node:$val, node:$ptr), 532 (unindexedstore_glue node:$val, node:$ptr)> { 533 let IsStore = 1; 534 let MemoryVT = i16; 535} 536} 537 538def store_align16_local_m0 : PatFrag < 539 (ops node:$value, node:$ptr), 540 (store_local_m0 node:$value, node:$ptr)> { 541 let IsStore = 1; 542 let IsTruncStore = 0; 543 let MinAlignment = 16; 544} 545 546def store_align8_local_m0 : PatFrag < 547 (ops node:$value, node:$ptr), 548 (store_local_m0 node:$value, node:$ptr)> { 549 let IsStore = 1; 550 let IsTruncStore = 0; 551 let MinAlignment = 8; 552} 553 554let AddressSpaces = StoreAddress_local.AddrSpaces in { 555 556def atomic_store_local_32_m0 : PatFrag < 557 (ops node:$value, node:$ptr), 558 (AMDGPUatomic_st_glue node:$value, node:$ptr)> { 559 let IsAtomic = 1; 560 let MemoryVT = i32; 561} 562def atomic_store_local_64_m0 : PatFrag < 563 (ops node:$value, node:$ptr), 564 (AMDGPUatomic_st_glue node:$value, node:$ptr)> { 565 let IsAtomic = 1; 566 let MemoryVT = i64; 567} 568} // End let AddressSpaces = StoreAddress_local.AddrSpaces 569 570 571def si_setcc_uniform : PatFrag < 572 (ops node:$lhs, node:$rhs, node:$cond), 573 (setcc node:$lhs, node:$rhs, node:$cond), [{ 574 for (SDNode *Use : N->uses()) { 575 if (Use->isMachineOpcode() || Use->getOpcode() != ISD::CopyToReg) 576 return false; 577 578 unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg(); 579 if (Reg != AMDGPU::SCC) 580 return false; 581 } 582 return true; 583}]>; 584 585//===----------------------------------------------------------------------===// 586// SDNodes PatFrags for d16 loads 587//===----------------------------------------------------------------------===// 588 589class LoadD16Frag <SDPatternOperator op> : PatFrag< 590 (ops node:$ptr, node:$tied_in), 591 (op node:$ptr, node:$tied_in)> { 592 let IsLoad = 1; 593} 594 595foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { 596let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in { 597 598def load_d16_hi_#as : LoadD16Frag <SIload_d16_hi>; 599 600def az_extloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_u8> { 601 let MemoryVT = i8; 602} 603 604def sextloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_i8> { 605 let MemoryVT = i8; 606} 607 608def load_d16_lo_#as : LoadD16Frag <SIload_d16_lo>; 609 610def az_extloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_u8> { 611 let MemoryVT = i8; 612} 613 614def sextloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_i8> { 615 let MemoryVT = i8; 616} 617 618} // End let AddressSpaces = ... 619} // End foreach AddrSpace 620 621def lshr_rev : PatFrag < 622 (ops node:$src1, node:$src0), 623 (srl $src0, $src1) 624>; 625 626def ashr_rev : PatFrag < 627 (ops node:$src1, node:$src0), 628 (sra $src0, $src1) 629>; 630 631def lshl_rev : PatFrag < 632 (ops node:$src1, node:$src0), 633 (shl $src0, $src1) 634>; 635 636def add_ctpop : PatFrag < 637 (ops node:$src0, node:$src1), 638 (add (ctpop $src0), $src1) 639>; 640 641foreach I = 1-4 in { 642def shl#I#_add : PatFrag < 643 (ops node:$src0, node:$src1), 644 (add (shl_oneuse $src0, (i32 I)), $src1)> { 645 // FIXME: Poor substitute for disabling pattern in SelectionDAG 646 let PredicateCode = [{return false;}]; 647 let GISelPredicateCode = [{return true;}]; 648} 649} 650 651multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0, 652 SDTypeProfile tc = SDTAtomic2, 653 bit IsInt = 1> { 654 655 def _glue : SDNode < 656 !if(is_amdgpu, "AMDGPUISD", "ISD")#"::ATOMIC_"#op_name, tc, 657 [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 658 >; 659 660 let AddressSpaces = StoreAddress_local.AddrSpaces in { 661 defm _local_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>; 662 } 663 664 let AddressSpaces = StoreAddress_region.AddrSpaces in { 665 defm _region_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>; 666 } 667} 668 669defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">; 670defm atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">; 671defm atomic_load_csub : SIAtomicM0Glue2 <"LOAD_CSUB", 1>; 672defm atomic_inc : SIAtomicM0Glue2 <"INC", 1>; 673defm atomic_dec : SIAtomicM0Glue2 <"DEC", 1>; 674defm atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">; 675defm atomic_load_min : SIAtomicM0Glue2 <"LOAD_MIN">; 676defm atomic_load_max : SIAtomicM0Glue2 <"LOAD_MAX">; 677defm atomic_load_or : SIAtomicM0Glue2 <"LOAD_OR">; 678defm atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">; 679defm atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">; 680defm atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">; 681defm atomic_swap : SIAtomicM0Glue2 <"SWAP">; 682defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32, 0>; 683defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32, 0>; 684defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32, 0>; 685 686def as_i1timm : SDNodeXForm<timm, [{ 687 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1); 688}]>; 689 690def as_i8imm : SDNodeXForm<imm, [{ 691 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i8); 692}]>; 693 694def as_i8timm : SDNodeXForm<timm, [{ 695 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); 696}]>; 697 698def as_i16imm : SDNodeXForm<imm, [{ 699 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); 700}]>; 701 702def as_i16timm : SDNodeXForm<timm, [{ 703 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); 704}]>; 705 706def as_i32imm: SDNodeXForm<imm, [{ 707 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); 708}]>; 709 710def as_i32timm: SDNodeXForm<timm, [{ 711 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); 712}]>; 713 714def as_i64imm: SDNodeXForm<imm, [{ 715 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64); 716}]>; 717 718def cond_as_i32imm: SDNodeXForm<cond, [{ 719 return CurDAG->getTargetConstant(N->get(), SDLoc(N), MVT::i32); 720}]>; 721 722// Copied from the AArch64 backend: 723def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{ 724return CurDAG->getTargetConstant( 725 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32); 726}]>; 727 728def frameindex_to_targetframeindex : SDNodeXForm<frameindex, [{ 729 auto FI = cast<FrameIndexSDNode>(N); 730 return CurDAG->getTargetFrameIndex(FI->getIndex(), MVT::i32); 731}]>; 732 733// Copied from the AArch64 backend: 734def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{ 735return CurDAG->getTargetConstant( 736 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64); 737}]>; 738 739class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{ 740 uint64_t Imm = N->getZExtValue(); 741 unsigned Bit = (Imm >> }] # bitnum # [{ ) & 1; 742 return CurDAG->getTargetConstant(Bit, SDLoc(N), MVT::i1); 743}]>; 744 745def SIMM16bit : ImmLeaf <i32, 746 [{return isInt<16>(Imm);}] 747>; 748 749def UIMM16bit : ImmLeaf <i32, 750 [{return isUInt<16>(Imm);}] 751>; 752 753def i64imm_32bit : ImmLeaf<i64, [{ 754 return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm); 755}]>; 756 757def InlineImm16 : ImmLeaf<i16, [{ 758 return isInlineImmediate16(Imm); 759}]>; 760 761def InlineImm32 : ImmLeaf<i32, [{ 762 return isInlineImmediate32(Imm); 763}]>; 764 765def InlineImm64 : ImmLeaf<i64, [{ 766 return isInlineImmediate64(Imm); 767}]>; 768 769def InlineImmFP32 : FPImmLeaf<f32, [{ 770 return isInlineImmediate(Imm); 771}]>; 772 773def InlineImmFP64 : FPImmLeaf<f64, [{ 774 return isInlineImmediate(Imm); 775}]>; 776 777 778class VGPRImm <dag frag> : PatLeaf<frag, [{ 779 return isVGPRImm(N); 780}]>; 781 782def NegateImm : SDNodeXForm<imm, [{ 783 return CurDAG->getConstant(-N->getSExtValue(), SDLoc(N), MVT::i32); 784}]>; 785 786// TODO: When FP inline imm values work? 787def NegSubInlineConst32 : ImmLeaf<i32, [{ 788 return Imm < -16 && Imm >= -64; 789}], NegateImm>; 790 791def NegSubInlineIntConst16 : ImmLeaf<i16, [{ 792 return Imm < -16 && Imm >= -64; 793}], NegateImm>; 794 795def ShiftAmt32Imm : ImmLeaf <i32, [{ 796 return Imm < 32; 797}]>; 798 799def getNegV2I16Imm : SDNodeXForm<build_vector, [{ 800 return SDValue(packNegConstantV2I16(N, *CurDAG), 0); 801}]>; 802 803def NegSubInlineConstV216 : PatLeaf<(build_vector), [{ 804 assert(N->getNumOperands() == 2); 805 assert(N->getOperand(0).getValueType().getSizeInBits() == 16); 806 SDValue Src0 = N->getOperand(0); 807 SDValue Src1 = N->getOperand(1); 808 if (Src0 == Src1) 809 return isNegInlineImmediate(Src0.getNode()); 810 811 return (isNullConstantOrUndef(Src0) && isNegInlineImmediate(Src1.getNode())) || 812 (isNullConstantOrUndef(Src1) && isNegInlineImmediate(Src0.getNode())); 813}], getNegV2I16Imm>; 814 815//===----------------------------------------------------------------------===// 816// MUBUF/SMEM Patterns 817//===----------------------------------------------------------------------===// 818 819def extract_glc : SDNodeXForm<timm, [{ 820 return CurDAG->getTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i8); 821}]>; 822 823def extract_slc : SDNodeXForm<timm, [{ 824 return CurDAG->getTargetConstant((N->getZExtValue() >> 1) & 1, SDLoc(N), MVT::i8); 825}]>; 826 827def extract_dlc : SDNodeXForm<timm, [{ 828 return CurDAG->getTargetConstant((N->getZExtValue() >> 2) & 1, SDLoc(N), MVT::i8); 829}]>; 830 831def extract_swz : SDNodeXForm<timm, [{ 832 return CurDAG->getTargetConstant((N->getZExtValue() >> 3) & 1, SDLoc(N), MVT::i8); 833}]>; 834 835//===----------------------------------------------------------------------===// 836// Custom Operands 837//===----------------------------------------------------------------------===// 838 839def SoppBrTarget : AsmOperandClass { 840 let Name = "SoppBrTarget"; 841 let ParserMethod = "parseSOppBrTarget"; 842} 843 844def sopp_brtarget : Operand<OtherVT> { 845 let EncoderMethod = "getSOPPBrEncoding"; 846 let DecoderMethod = "decodeSoppBrTarget"; 847 let OperandType = "OPERAND_PCREL"; 848 let ParserMatchClass = SoppBrTarget; 849} 850 851def si_ga : Operand<iPTR>; 852 853def InterpSlotMatchClass : AsmOperandClass { 854 let Name = "InterpSlot"; 855 let PredicateMethod = "isInterpSlot"; 856 let ParserMethod = "parseInterpSlot"; 857 let RenderMethod = "addImmOperands"; 858} 859 860def InterpSlot : Operand<i32> { 861 let PrintMethod = "printInterpSlot"; 862 let ParserMatchClass = InterpSlotMatchClass; 863 let OperandType = "OPERAND_IMMEDIATE"; 864} 865 866def AttrMatchClass : AsmOperandClass { 867 let Name = "Attr"; 868 let PredicateMethod = "isInterpAttr"; 869 let ParserMethod = "parseInterpAttr"; 870 let RenderMethod = "addImmOperands"; 871} 872 873// It appears to be necessary to create a separate operand for this to 874// be able to parse attr<num> with no space. 875def Attr : Operand<i32> { 876 let PrintMethod = "printInterpAttr"; 877 let ParserMatchClass = AttrMatchClass; 878 let OperandType = "OPERAND_IMMEDIATE"; 879} 880 881def AttrChanMatchClass : AsmOperandClass { 882 let Name = "AttrChan"; 883 let PredicateMethod = "isAttrChan"; 884 let RenderMethod = "addImmOperands"; 885} 886 887def AttrChan : Operand<i32> { 888 let PrintMethod = "printInterpAttrChan"; 889 let ParserMatchClass = AttrChanMatchClass; 890 let OperandType = "OPERAND_IMMEDIATE"; 891} 892 893def SendMsgMatchClass : AsmOperandClass { 894 let Name = "SendMsg"; 895 let PredicateMethod = "isSendMsg"; 896 let ParserMethod = "parseSendMsgOp"; 897 let RenderMethod = "addImmOperands"; 898} 899 900def SwizzleMatchClass : AsmOperandClass { 901 let Name = "Swizzle"; 902 let PredicateMethod = "isSwizzle"; 903 let ParserMethod = "parseSwizzleOp"; 904 let RenderMethod = "addImmOperands"; 905 let IsOptional = 1; 906} 907 908def EndpgmMatchClass : AsmOperandClass { 909 let Name = "EndpgmImm"; 910 let PredicateMethod = "isEndpgm"; 911 let ParserMethod = "parseEndpgmOp"; 912 let RenderMethod = "addImmOperands"; 913 let IsOptional = 1; 914} 915 916def ExpTgtMatchClass : AsmOperandClass { 917 let Name = "ExpTgt"; 918 let PredicateMethod = "isExpTgt"; 919 let ParserMethod = "parseExpTgt"; 920 let RenderMethod = "printExpTgt"; 921} 922 923def SWaitMatchClass : AsmOperandClass { 924 let Name = "SWaitCnt"; 925 let RenderMethod = "addImmOperands"; 926 let ParserMethod = "parseSWaitCntOps"; 927} 928 929def VReg32OrOffClass : AsmOperandClass { 930 let Name = "VReg32OrOff"; 931 let ParserMethod = "parseVReg32OrOff"; 932} 933 934let OperandType = "OPERAND_IMMEDIATE" in { 935def SendMsgImm : Operand<i32> { 936 let PrintMethod = "printSendMsg"; 937 let ParserMatchClass = SendMsgMatchClass; 938} 939 940def SwizzleImm : Operand<i16> { 941 let PrintMethod = "printSwizzle"; 942 let ParserMatchClass = SwizzleMatchClass; 943} 944 945def EndpgmImm : Operand<i16> { 946 let PrintMethod = "printEndpgm"; 947 let ParserMatchClass = EndpgmMatchClass; 948} 949 950def WAIT_FLAG : Operand <i32> { 951 let ParserMatchClass = SWaitMatchClass; 952 let PrintMethod = "printWaitFlag"; 953} 954} // End OperandType = "OPERAND_IMMEDIATE" 955 956include "SIInstrFormats.td" 957include "VIInstrFormats.td" 958 959def BoolReg : AsmOperandClass { 960 let Name = "BoolReg"; 961 let ParserMethod = "parseBoolReg"; 962 let RenderMethod = "addRegOperands"; 963} 964 965class BoolRC : RegisterOperand<SReg_1> { 966 let ParserMatchClass = BoolReg; 967 let DecoderMethod = "decodeBoolReg"; 968} 969 970def SSrc_i1 : RegisterOperand<SReg_1_XEXEC> { 971 let ParserMatchClass = BoolReg; 972 let DecoderMethod = "decodeBoolReg"; 973} 974 975def VOPDstS64orS32 : BoolRC { 976 let PrintMethod = "printVOPDst"; 977} 978 979// SCSrc_i1 is the operand for pseudo instructions only. 980// Boolean immediates shall not be exposed to codegen instructions. 981def SCSrc_i1 : RegisterOperand<SReg_1_XEXEC> { 982 let OperandNamespace = "AMDGPU"; 983 let OperandType = "OPERAND_REG_IMM_INT32"; 984 let ParserMatchClass = BoolReg; 985 let DecoderMethod = "decodeBoolReg"; 986} 987 988// ===----------------------------------------------------------------------===// 989// ExpSrc* Special cases for exp src operands which are printed as 990// "off" depending on en operand. 991// ===----------------------------------------------------------------------===// 992 993def ExpSrc0 : RegisterOperand<VGPR_32> { 994 let PrintMethod = "printExpSrc0"; 995 let ParserMatchClass = VReg32OrOffClass; 996} 997 998def ExpSrc1 : RegisterOperand<VGPR_32> { 999 let PrintMethod = "printExpSrc1"; 1000 let ParserMatchClass = VReg32OrOffClass; 1001} 1002 1003def ExpSrc2 : RegisterOperand<VGPR_32> { 1004 let PrintMethod = "printExpSrc2"; 1005 let ParserMatchClass = VReg32OrOffClass; 1006} 1007 1008def ExpSrc3 : RegisterOperand<VGPR_32> { 1009 let PrintMethod = "printExpSrc3"; 1010 let ParserMatchClass = VReg32OrOffClass; 1011} 1012 1013class SDWASrc<ValueType vt> : RegisterOperand<VS_32> { 1014 let OperandNamespace = "AMDGPU"; 1015 string Type = !if(isFloatType<vt>.ret, "FP", "INT"); 1016 let OperandType = "OPERAND_REG_INLINE_C_"#Type#vt.Size; 1017 let DecoderMethod = "decodeSDWASrc"#vt.Size; 1018 let EncoderMethod = "getSDWASrcEncoding"; 1019} 1020 1021def SDWASrc_i32 : SDWASrc<i32>; 1022def SDWASrc_i16 : SDWASrc<i16>; 1023def SDWASrc_f32 : SDWASrc<f32>; 1024def SDWASrc_f16 : SDWASrc<f16>; 1025 1026def SDWAVopcDst : BoolRC { 1027 let OperandNamespace = "AMDGPU"; 1028 let OperandType = "OPERAND_SDWA_VOPC_DST"; 1029 let EncoderMethod = "getSDWAVopcDstEncoding"; 1030 let DecoderMethod = "decodeSDWAVopcDst"; 1031 let PrintMethod = "printVOPDst"; 1032} 1033 1034class NamedMatchClass<string CName, bit Optional = 1> : AsmOperandClass { 1035 let Name = "Imm"#CName; 1036 let PredicateMethod = "is"#CName; 1037 let ParserMethod = !if(Optional, "parseOptionalOperand", "parse"#CName); 1038 let RenderMethod = "addImmOperands"; 1039 let IsOptional = Optional; 1040 let DefaultMethod = !if(Optional, "default"#CName, ?); 1041} 1042 1043class NamedOperandBit<string Name, AsmOperandClass MatchClass> : Operand<i1> { 1044 let PrintMethod = "print"#Name; 1045 let ParserMatchClass = MatchClass; 1046} 1047 1048class NamedOperandBit_0<string Name, AsmOperandClass MatchClass> : 1049 OperandWithDefaultOps<i1, (ops (i1 0))> { 1050 let PrintMethod = "print"#Name; 1051 let ParserMatchClass = MatchClass; 1052} 1053 1054class NamedOperandU8<string Name, AsmOperandClass MatchClass> : Operand<i8> { 1055 let PrintMethod = "print"#Name; 1056 let ParserMatchClass = MatchClass; 1057} 1058 1059class NamedOperandU16<string Name, AsmOperandClass MatchClass> : Operand<i16> { 1060 let PrintMethod = "print"#Name; 1061 let ParserMatchClass = MatchClass; 1062} 1063 1064class NamedOperandU32<string Name, AsmOperandClass MatchClass> : Operand<i32> { 1065 let PrintMethod = "print"#Name; 1066 let ParserMatchClass = MatchClass; 1067} 1068 1069class NamedOperandU32_0<string Name, AsmOperandClass MatchClass> : 1070 OperandWithDefaultOps<i32, (ops (i32 0))> { 1071 let PrintMethod = "print"#Name; 1072 let ParserMatchClass = MatchClass; 1073} 1074 1075class NamedOperandU32Default0<string Name, AsmOperandClass MatchClass> : 1076 OperandWithDefaultOps<i32, (ops (i32 0))> { 1077 let PrintMethod = "print"#Name; 1078 let ParserMatchClass = MatchClass; 1079} 1080 1081let OperandType = "OPERAND_IMMEDIATE" in { 1082 1083def offen : NamedOperandBit<"Offen", NamedMatchClass<"Offen">>; 1084def idxen : NamedOperandBit<"Idxen", NamedMatchClass<"Idxen">>; 1085def addr64 : NamedOperandBit<"Addr64", NamedMatchClass<"Addr64">>; 1086 1087def flat_offset : NamedOperandU16<"FlatOffset", NamedMatchClass<"FlatOffset">>; 1088def offset : NamedOperandU16<"Offset", NamedMatchClass<"Offset">>; 1089def offset0 : NamedOperandU8<"Offset0", NamedMatchClass<"Offset0">>; 1090def offset1 : NamedOperandU8<"Offset1", NamedMatchClass<"Offset1">>; 1091 1092def gds : NamedOperandBit<"GDS", NamedMatchClass<"GDS">>; 1093 1094def omod : NamedOperandU32<"OModSI", NamedMatchClass<"OModSI">>; 1095def omod0 : NamedOperandU32_0<"OModSI", NamedMatchClass<"OModSI">>; 1096 1097// We need to make the cases with a default of 0 distinct from no 1098// default to help deal with some cases where the operand appears 1099// before a mandatory operand. 1100def clampmod : NamedOperandBit<"ClampSI", NamedMatchClass<"ClampSI">>; 1101def clampmod0 : NamedOperandBit_0<"ClampSI", NamedMatchClass<"ClampSI">>; 1102def highmod : NamedOperandBit<"High", NamedMatchClass<"High">>; 1103 1104def DLC : NamedOperandBit<"DLC", NamedMatchClass<"DLC">>; 1105def GLC : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>; 1106def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>; 1107def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>; 1108def SWZ : NamedOperandBit<"SWZ", NamedMatchClass<"SWZ">>; 1109def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>; 1110def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>; 1111def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>; 1112def GFX10A16 : NamedOperandBit<"GFX10A16", NamedMatchClass<"GFX10A16">>; 1113def D16 : NamedOperandBit<"D16", NamedMatchClass<"D16">>; 1114def LWE : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>; 1115def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>; 1116def exp_vm : NamedOperandBit<"ExpVM", NamedMatchClass<"ExpVM">>; 1117 1118def FORMAT : NamedOperandU8<"FORMAT", NamedMatchClass<"FORMAT">>; 1119 1120def DMask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>; 1121def Dim : NamedOperandU8<"Dim", NamedMatchClass<"Dim", 0>>; 1122 1123def dpp8 : NamedOperandU32<"DPP8", NamedMatchClass<"DPP8", 0>>; 1124 1125def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>; 1126def row_mask : NamedOperandU32<"RowMask", NamedMatchClass<"RowMask">>; 1127def bank_mask : NamedOperandU32<"BankMask", NamedMatchClass<"BankMask">>; 1128def bound_ctrl : NamedOperandBit<"BoundCtrl", NamedMatchClass<"BoundCtrl">>; 1129def FI : NamedOperandU32<"FI", NamedMatchClass<"FI">>; 1130 1131def dst_sel : NamedOperandU32<"SDWADstSel", NamedMatchClass<"SDWADstSel">>; 1132def src0_sel : NamedOperandU32<"SDWASrc0Sel", NamedMatchClass<"SDWASrc0Sel">>; 1133def src1_sel : NamedOperandU32<"SDWASrc1Sel", NamedMatchClass<"SDWASrc1Sel">>; 1134def dst_unused : NamedOperandU32<"SDWADstUnused", NamedMatchClass<"SDWADstUnused">>; 1135 1136def op_sel : NamedOperandU32Default0<"OpSel", NamedMatchClass<"OpSel">>; 1137def op_sel_hi : NamedOperandU32Default0<"OpSelHi", NamedMatchClass<"OpSelHi">>; 1138def neg_lo : NamedOperandU32Default0<"NegLo", NamedMatchClass<"NegLo">>; 1139def neg_hi : NamedOperandU32Default0<"NegHi", NamedMatchClass<"NegHi">>; 1140 1141def blgp : NamedOperandU32<"BLGP", NamedMatchClass<"BLGP">>; 1142def cbsz : NamedOperandU32<"CBSZ", NamedMatchClass<"CBSZ">>; 1143def abid : NamedOperandU32<"ABID", NamedMatchClass<"ABID">>; 1144 1145def hwreg : NamedOperandU32<"Hwreg", NamedMatchClass<"Hwreg", 0>>; 1146 1147def exp_tgt : NamedOperandU32<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> { 1148 1149} 1150 1151} // End OperandType = "OPERAND_IMMEDIATE" 1152 1153class KImmMatchClass<int size> : AsmOperandClass { 1154 let Name = "KImmFP"#size; 1155 let PredicateMethod = "isKImmFP"#size; 1156 let ParserMethod = "parseImm"; 1157 let RenderMethod = "addKImmFP"#size#"Operands"; 1158} 1159 1160class kimmOperand<ValueType vt> : Operand<vt> { 1161 let OperandNamespace = "AMDGPU"; 1162 let OperandType = "OPERAND_KIMM"#vt.Size; 1163 let PrintMethod = "printU"#vt.Size#"ImmOperand"; 1164 let ParserMatchClass = !cast<AsmOperandClass>("KImmFP"#vt.Size#"MatchClass"); 1165} 1166 1167// 32-bit VALU immediate operand that uses the constant bus. 1168def KImmFP32MatchClass : KImmMatchClass<32>; 1169def f32kimm : kimmOperand<i32>; 1170 1171// 32-bit VALU immediate operand with a 16-bit value that uses the 1172// constant bus. 1173def KImmFP16MatchClass : KImmMatchClass<16>; 1174def f16kimm : kimmOperand<i16>; 1175 1176class FPInputModsMatchClass <int opSize> : AsmOperandClass { 1177 let Name = "RegOrImmWithFP"#opSize#"InputMods"; 1178 let ParserMethod = "parseRegOrImmWithFPInputMods"; 1179 let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods"; 1180} 1181 1182def FP16InputModsMatchClass : FPInputModsMatchClass<16>; 1183def FP32InputModsMatchClass : FPInputModsMatchClass<32>; 1184def FP64InputModsMatchClass : FPInputModsMatchClass<64>; 1185 1186class InputMods <AsmOperandClass matchClass> : Operand <i32> { 1187 let OperandNamespace = "AMDGPU"; 1188 let OperandType = "OPERAND_INPUT_MODS"; 1189 let ParserMatchClass = matchClass; 1190} 1191 1192class FPInputMods <FPInputModsMatchClass matchClass> : InputMods <matchClass> { 1193 let PrintMethod = "printOperandAndFPInputMods"; 1194} 1195 1196def FP16InputMods : FPInputMods<FP16InputModsMatchClass>; 1197def FP32InputMods : FPInputMods<FP32InputModsMatchClass>; 1198def FP64InputMods : FPInputMods<FP64InputModsMatchClass>; 1199 1200class IntInputModsMatchClass <int opSize> : AsmOperandClass { 1201 let Name = "RegOrImmWithInt"#opSize#"InputMods"; 1202 let ParserMethod = "parseRegOrImmWithIntInputMods"; 1203 let PredicateMethod = "isRegOrImmWithInt"#opSize#"InputMods"; 1204} 1205def Int32InputModsMatchClass : IntInputModsMatchClass<32>; 1206def Int64InputModsMatchClass : IntInputModsMatchClass<64>; 1207 1208class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass> { 1209 let PrintMethod = "printOperandAndIntInputMods"; 1210} 1211def Int32InputMods : IntInputMods<Int32InputModsMatchClass>; 1212def Int64InputMods : IntInputMods<Int64InputModsMatchClass>; 1213 1214class OpSelModsMatchClass : AsmOperandClass { 1215 let Name = "OpSelMods"; 1216 let ParserMethod = "parseRegOrImm"; 1217 let PredicateMethod = "isRegOrImm"; 1218} 1219 1220def IntOpSelModsMatchClass : OpSelModsMatchClass; 1221def IntOpSelMods : InputMods<IntOpSelModsMatchClass>; 1222 1223class FPSDWAInputModsMatchClass <int opSize> : AsmOperandClass { 1224 let Name = "SDWAWithFP"#opSize#"InputMods"; 1225 let ParserMethod = "parseRegOrImmWithFPInputMods"; 1226 let PredicateMethod = "isSDWAFP"#opSize#"Operand"; 1227} 1228 1229def FP16SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<16>; 1230def FP32SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<32>; 1231 1232class FPSDWAInputMods <FPSDWAInputModsMatchClass matchClass> : 1233 InputMods <matchClass> { 1234 let PrintMethod = "printOperandAndFPInputMods"; 1235} 1236 1237def FP16SDWAInputMods : FPSDWAInputMods<FP16SDWAInputModsMatchClass>; 1238def FP32SDWAInputMods : FPSDWAInputMods<FP32SDWAInputModsMatchClass>; 1239 1240def FPVRegInputModsMatchClass : AsmOperandClass { 1241 let Name = "VRegWithFPInputMods"; 1242 let ParserMethod = "parseRegWithFPInputMods"; 1243 let PredicateMethod = "isVReg32"; 1244} 1245 1246def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> { 1247 let PrintMethod = "printOperandAndFPInputMods"; 1248} 1249 1250class IntSDWAInputModsMatchClass <int opSize> : AsmOperandClass { 1251 let Name = "SDWAWithInt"#opSize#"InputMods"; 1252 let ParserMethod = "parseRegOrImmWithIntInputMods"; 1253 let PredicateMethod = "isSDWAInt"#opSize#"Operand"; 1254} 1255 1256def Int16SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<16>; 1257def Int32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32>; 1258 1259class IntSDWAInputMods <IntSDWAInputModsMatchClass matchClass> : 1260 InputMods <matchClass> { 1261 let PrintMethod = "printOperandAndIntInputMods"; 1262} 1263 1264def Int16SDWAInputMods : IntSDWAInputMods<Int16SDWAInputModsMatchClass>; 1265def Int32SDWAInputMods : IntSDWAInputMods<Int32SDWAInputModsMatchClass>; 1266 1267def IntVRegInputModsMatchClass : AsmOperandClass { 1268 let Name = "VRegWithIntInputMods"; 1269 let ParserMethod = "parseRegWithIntInputMods"; 1270 let PredicateMethod = "isVReg32"; 1271} 1272 1273def IntVRegInputMods : InputMods <IntVRegInputModsMatchClass> { 1274 let PrintMethod = "printOperandAndIntInputMods"; 1275} 1276 1277class PackedFPInputModsMatchClass <int opSize> : AsmOperandClass { 1278 let Name = "PackedFP"#opSize#"InputMods"; 1279 let ParserMethod = "parseRegOrImm"; 1280 let PredicateMethod = "isRegOrImm"; 1281// let PredicateMethod = "isPackedFP"#opSize#"InputMods"; 1282} 1283 1284class PackedIntInputModsMatchClass <int opSize> : AsmOperandClass { 1285 let Name = "PackedInt"#opSize#"InputMods"; 1286 let ParserMethod = "parseRegOrImm"; 1287 let PredicateMethod = "isRegOrImm"; 1288// let PredicateMethod = "isPackedInt"#opSize#"InputMods"; 1289} 1290 1291def PackedF16InputModsMatchClass : PackedFPInputModsMatchClass<16>; 1292def PackedI16InputModsMatchClass : PackedIntInputModsMatchClass<16>; 1293 1294class PackedFPInputMods <PackedFPInputModsMatchClass matchClass> : InputMods <matchClass> { 1295// let PrintMethod = "printPackedFPInputMods"; 1296} 1297 1298class PackedIntInputMods <PackedIntInputModsMatchClass matchClass> : InputMods <matchClass> { 1299 //let PrintMethod = "printPackedIntInputMods"; 1300} 1301 1302def PackedF16InputMods : PackedFPInputMods<PackedF16InputModsMatchClass>; 1303def PackedI16InputMods : PackedIntInputMods<PackedI16InputModsMatchClass>; 1304 1305//===----------------------------------------------------------------------===// 1306// Complex patterns 1307//===----------------------------------------------------------------------===// 1308 1309def DS1Addr1Offset : ComplexPattern<i32, 2, "SelectDS1Addr1Offset">; 1310def DS64Bit4ByteAligned : ComplexPattern<i32, 3, "SelectDS64Bit4ByteAligned">; 1311 1312def MOVRELOffset : ComplexPattern<i32, 2, "SelectMOVRELOffset">; 1313 1314def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">; 1315def VOP3Mods0Clamp : ComplexPattern<untyped, 3, "SelectVOP3Mods0Clamp">; 1316def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">; 1317def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">; 1318// VOP3Mods, but the input source is known to never be NaN. 1319def VOP3Mods_nnan : ComplexPattern<fAny, 2, "SelectVOP3Mods_NNaN">; 1320 1321def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">; 1322 1323def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">; 1324 1325def VOP3OpSel : ComplexPattern<untyped, 2, "SelectVOP3OpSel">; 1326 1327def VOP3OpSelMods : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">; 1328 1329def VOP3PMadMixMods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixMods">; 1330 1331 1332def Hi16Elt : ComplexPattern<untyped, 1, "SelectHi16Elt">; 1333 1334//===----------------------------------------------------------------------===// 1335// SI assembler operands 1336//===----------------------------------------------------------------------===// 1337 1338def SIOperand { 1339 int ZERO = 0x80; 1340 int VCC = 0x6A; 1341 int FLAT_SCR = 0x68; 1342} 1343 1344// This should be kept in sync with SISrcMods enum 1345def SRCMODS { 1346 int NONE = 0; 1347 int NEG = 1; 1348 int ABS = 2; 1349 int NEG_ABS = 3; 1350 1351 int NEG_HI = ABS; 1352 int OP_SEL_0 = 4; 1353 int OP_SEL_1 = 8; 1354 int DST_OP_SEL = 8; 1355} 1356 1357def DSTCLAMP { 1358 int NONE = 0; 1359 int ENABLE = 1; 1360} 1361 1362def DSTOMOD { 1363 int NONE = 0; 1364} 1365 1366def TRAPID{ 1367 int LLVM_TRAP = 2; 1368 int LLVM_DEBUG_TRAP = 3; 1369} 1370 1371def HWREG { 1372 int MODE = 1; 1373 int STATUS = 2; 1374 int TRAPSTS = 3; 1375 int HW_ID = 4; 1376 int GPR_ALLOC = 5; 1377 int LDS_ALLOC = 6; 1378 int IB_STS = 7; 1379 int MEM_BASES = 15; 1380 int TBA_LO = 16; 1381 int TBA_HI = 17; 1382 int TMA_LO = 18; 1383 int TMA_HI = 19; 1384 int FLAT_SCR_LO = 20; 1385 int FLAT_SCR_HI = 21; 1386 int XNACK_MASK = 22; 1387 int POPS_PACKER = 25; 1388 int SHADER_CYCLES = 29; 1389} 1390 1391class getHwRegImm<int Reg, int Offset = 0, int Size = 32> { 1392 int ret = !or(Reg, 1393 !or(!shl(Offset, 6), 1394 !shl(!add(Size, -1), 11))); 1395} 1396 1397//===----------------------------------------------------------------------===// 1398// 1399// SI Instruction multiclass helpers. 1400// 1401// Instructions with _32 take 32-bit operands. 1402// Instructions with _64 take 64-bit operands. 1403// 1404// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit 1405// encoding is the standard encoding, but instruction that make use of 1406// any of the instruction modifiers must use the 64-bit encoding. 1407// 1408// Instructions with _e32 use the 32-bit encoding. 1409// Instructions with _e64 use the 64-bit encoding. 1410// 1411//===----------------------------------------------------------------------===// 1412 1413class SIMCInstr <string pseudo, int subtarget> { 1414 string PseudoInstr = pseudo; 1415 int Subtarget = subtarget; 1416} 1417 1418//===----------------------------------------------------------------------===// 1419// EXP classes 1420//===----------------------------------------------------------------------===// 1421 1422class EXP_Helper<bit done> : EXPCommon< 1423 (outs), 1424 (ins exp_tgt:$tgt, 1425 ExpSrc0:$src0, ExpSrc1:$src1, ExpSrc2:$src2, ExpSrc3:$src3, 1426 exp_vm:$vm, exp_compr:$compr, i32imm:$en), 1427 "exp$tgt $src0, $src1, $src2, $src3"#!if(done, " done", "")#"$compr$vm", []> { 1428 let AsmMatchConverter = "cvtExp"; 1429} 1430 1431// Split EXP instruction into EXP and EXP_DONE so we can set 1432// mayLoad for done=1. 1433multiclass EXP_m<bit done> { 1434 let mayLoad = done, DisableWQM = 1 in { 1435 let isPseudo = 1, isCodeGenOnly = 1 in { 1436 def "" : EXP_Helper<done>, 1437 SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.NONE>; 1438 } 1439 1440 let done = done in { 1441 def _si : EXP_Helper<done>, 1442 SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.SI>, 1443 EXPe { 1444 let AssemblerPredicate = isGFX6GFX7; 1445 let DecoderNamespace = "GFX6GFX7"; 1446 let DisableDecoder = DisableSIDecoder; 1447 } 1448 1449 def _vi : EXP_Helper<done>, 1450 SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.VI>, 1451 EXPe_vi { 1452 let AssemblerPredicate = isGFX8GFX9; 1453 let DecoderNamespace = "GFX8"; 1454 let DisableDecoder = DisableVIDecoder; 1455 } 1456 1457 def _gfx10 : EXP_Helper<done>, 1458 SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.GFX10>, 1459 EXPe { 1460 let AssemblerPredicate = isGFX10Plus; 1461 let DecoderNamespace = "GFX10"; 1462 let DisableDecoder = DisableSIDecoder; 1463 } 1464 } 1465 } 1466} 1467 1468//===----------------------------------------------------------------------===// 1469// Vector ALU classes 1470//===----------------------------------------------------------------------===// 1471 1472class getNumSrcArgs<ValueType Src0, ValueType Src1, ValueType Src2> { 1473 int ret = 1474 !if (!eq(Src0.Value, untyped.Value), 0, 1475 !if (!eq(Src1.Value, untyped.Value), 1, // VOP1 1476 !if (!eq(Src2.Value, untyped.Value), 2, // VOP2 1477 3))); // VOP3 1478} 1479 1480// Returns the register class to use for the destination of VOP[123C] 1481// instructions for the given VT. 1482class getVALUDstForVT<ValueType VT> { 1483 RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>, 1484 !if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>, 1485 !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>, 1486 !if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32>, 1487 VOPDstS64orS32)))); // else VT == i1 1488} 1489 1490// Returns the register class to use for the destination of VOP[12C] 1491// instructions with SDWA extension 1492class getSDWADstForVT<ValueType VT> { 1493 RegisterOperand ret = !if(!eq(VT.Size, 1), 1494 SDWAVopcDst, // VOPC 1495 VOPDstOperand<VGPR_32>); // VOP1/2 32-bit dst 1496} 1497 1498// Returns the register class to use for source 0 of VOP[12C] 1499// instructions for the given VT. 1500class getVOPSrc0ForVT<ValueType VT> { 1501 bit isFP = isFloatType<VT>.ret; 1502 1503 RegisterOperand ret = 1504 !if(isFP, 1505 !if(!eq(VT.Size, 64), 1506 VSrc_f64, 1507 !if(!eq(VT.Value, f16.Value), 1508 VSrc_f16, 1509 !if(!eq(VT.Value, v2f16.Value), 1510 VSrc_v2f16, 1511 !if(!eq(VT.Value, v4f16.Value), 1512 AVSrc_64, 1513 VSrc_f32 1514 ) 1515 ) 1516 ) 1517 ), 1518 !if(!eq(VT.Size, 64), 1519 VSrc_b64, 1520 !if(!eq(VT.Value, i16.Value), 1521 VSrc_b16, 1522 !if(!eq(VT.Value, v2i16.Value), 1523 VSrc_v2b16, 1524 VSrc_b32 1525 ) 1526 ) 1527 ) 1528 ); 1529} 1530 1531// Returns the vreg register class to use for source operand given VT 1532class getVregSrcForVT<ValueType VT> { 1533 RegisterClass ret = !if(!eq(VT.Size, 128), VReg_128, 1534 !if(!eq(VT.Size, 96), VReg_96, 1535 !if(!eq(VT.Size, 64), VReg_64, 1536 !if(!eq(VT.Size, 48), VReg_64, 1537 VGPR_32)))); 1538} 1539 1540class getSDWASrcForVT <ValueType VT> { 1541 bit isFP = isFloatType<VT>.ret; 1542 RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32); 1543 RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32); 1544 RegisterOperand ret = !if(isFP, retFlt, retInt); 1545} 1546 1547// Returns the register class to use for sources of VOP3 instructions for the 1548// given VT. 1549class getVOP3SrcForVT<ValueType VT> { 1550 bit isFP = isFloatType<VT>.ret; 1551 RegisterOperand ret = 1552 !if(!eq(VT.Size, 128), 1553 VSrc_128, 1554 !if(!eq(VT.Size, 64), 1555 !if(isFP, 1556 VSrc_f64, 1557 VSrc_b64), 1558 !if(!eq(VT.Value, i1.Value), 1559 SSrc_i1, 1560 !if(isFP, 1561 !if(!eq(VT.Value, f16.Value), 1562 VSrc_f16, 1563 !if(!eq(VT.Value, v2f16.Value), 1564 VSrc_v2f16, 1565 !if(!eq(VT.Value, v4f16.Value), 1566 AVSrc_64, 1567 VSrc_f32 1568 ) 1569 ) 1570 ), 1571 !if(!eq(VT.Value, i16.Value), 1572 VSrc_b16, 1573 !if(!eq(VT.Value, v2i16.Value), 1574 VSrc_v2b16, 1575 VSrc_b32 1576 ) 1577 ) 1578 ) 1579 ) 1580 ) 1581 ); 1582} 1583 1584// Float or packed int 1585class isModifierType<ValueType SrcVT> { 1586 bit ret = 1587 !if(!eq(SrcVT.Value, f16.Value), 1, 1588 !if(!eq(SrcVT.Value, f32.Value), 1, 1589 !if(!eq(SrcVT.Value, f64.Value), 1, 1590 !if(!eq(SrcVT.Value, v2f16.Value), 1, 1591 !if(!eq(SrcVT.Value, v2i16.Value), 1, 1592 0))))); 1593} 1594 1595// Return type of input modifiers operand for specified input operand 1596class getSrcMod <ValueType VT, bit EnableF32SrcMods> { 1597 bit isFP = isFloatType<VT>.ret; 1598 bit isPacked = isPackedType<VT>.ret; 1599 Operand ret = !if(!eq(VT.Size, 64), 1600 !if(isFP, FP64InputMods, Int64InputMods), 1601 !if(isFP, 1602 !if(!eq(VT.Value, f16.Value), 1603 FP16InputMods, 1604 FP32InputMods 1605 ), 1606 !if(EnableF32SrcMods, FP32InputMods, Int32InputMods)) 1607 ); 1608} 1609 1610class getOpSelMod <ValueType VT> { 1611 Operand ret = !if(!eq(VT.Value, f16.Value), FP16InputMods, IntOpSelMods); 1612} 1613 1614// Return type of input modifiers operand specified input operand for DPP 1615class getSrcModExt <ValueType VT> { 1616 bit isFP = isFloatType<VT>.ret; 1617 Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods); 1618} 1619 1620// Return type of input modifiers operand specified input operand for SDWA 1621class getSrcModSDWA <ValueType VT> { 1622 Operand ret = !if(!eq(VT.Value, f16.Value), FP16SDWAInputMods, 1623 !if(!eq(VT.Value, f32.Value), FP32SDWAInputMods, 1624 !if(!eq(VT.Value, i16.Value), Int16SDWAInputMods, 1625 Int32SDWAInputMods))); 1626} 1627 1628// Returns the input arguments for VOP[12C] instructions for the given SrcVT. 1629class getIns32 <RegisterOperand Src0RC, RegisterClass Src1RC, int NumSrcArgs> { 1630 dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0), // VOP1 1631 !if(!eq(NumSrcArgs, 2), (ins Src0RC:$src0, Src1RC:$src1), // VOP2 1632 (ins))); 1633} 1634 1635// Returns the input arguments for VOP3 instructions for the given SrcVT. 1636class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC, 1637 RegisterOperand Src2RC, int NumSrcArgs, 1638 bit HasIntClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod, 1639 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { 1640 1641 dag ret = 1642 !if (!eq(NumSrcArgs, 0), 1643 // VOP1 without input operands (V_NOP, V_CLREXCP) 1644 (ins), 1645 /* else */ 1646 !if (!eq(NumSrcArgs, 1), 1647 !if (!eq(HasModifiers, 1), 1648 // VOP1 with modifiers 1649 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1650 clampmod0:$clamp, omod0:$omod) 1651 /* else */, 1652 // VOP1 without modifiers 1653 !if (!eq(HasIntClamp, 1), 1654 (ins Src0RC:$src0, clampmod0:$clamp), 1655 (ins Src0RC:$src0)) 1656 /* endif */ ), 1657 !if (!eq(NumSrcArgs, 2), 1658 !if (!eq(HasModifiers, 1), 1659 // VOP 2 with modifiers 1660 !if( !eq(HasOMod, 1), 1661 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1662 Src1Mod:$src1_modifiers, Src1RC:$src1, 1663 clampmod0:$clamp, omod0:$omod), 1664 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1665 Src1Mod:$src1_modifiers, Src1RC:$src1, 1666 clampmod0:$clamp)) 1667 /* else */, 1668 // VOP2 without modifiers 1669 !if (!eq(HasIntClamp, 1), 1670 (ins Src0RC:$src0, Src1RC:$src1, clampmod0:$clamp), 1671 (ins Src0RC:$src0, Src1RC:$src1)) 1672 1673 /* endif */ ) 1674 /* NumSrcArgs == 3 */, 1675 !if (!eq(HasModifiers, 1), 1676 !if (!eq(HasSrc2Mods, 1), 1677 // VOP3 with modifiers 1678 !if (!eq(HasOMod, 1), 1679 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1680 Src1Mod:$src1_modifiers, Src1RC:$src1, 1681 Src2Mod:$src2_modifiers, Src2RC:$src2, 1682 clampmod0:$clamp, omod0:$omod), 1683 !if (!eq(HasIntClamp, 1), 1684 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1685 Src1Mod:$src1_modifiers, Src1RC:$src1, 1686 Src2Mod:$src2_modifiers, Src2RC:$src2, 1687 clampmod0:$clamp), 1688 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1689 Src1Mod:$src1_modifiers, Src1RC:$src1, 1690 Src2Mod:$src2_modifiers, Src2RC:$src2))), 1691 // VOP3 with modifiers except src2 1692 !if (!eq(HasOMod, 1), 1693 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1694 Src1Mod:$src1_modifiers, Src1RC:$src1, 1695 Src2RC:$src2, clampmod0:$clamp, omod0:$omod), 1696 !if (!eq(HasIntClamp, 1), 1697 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1698 Src1Mod:$src1_modifiers, Src1RC:$src1, 1699 Src2RC:$src2, clampmod0:$clamp), 1700 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1701 Src1Mod:$src1_modifiers, Src1RC:$src1, 1702 Src2RC:$src2)))) 1703 /* else */, 1704 // VOP3 without modifiers 1705 !if (!eq(HasIntClamp, 1), 1706 (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2, clampmod0:$clamp), 1707 (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2)) 1708 /* endif */ )))); 1709} 1710 1711/// XXX - src1 may only allow VGPRs? 1712 1713// The modifiers (except clamp) are dummy operands for the benefit of 1714// printing and parsing. They defer their values to looking at the 1715// srcN_modifiers for what to print. 1716class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC, 1717 RegisterOperand Src2RC, int NumSrcArgs, 1718 bit HasClamp, 1719 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { 1720 dag ret = !if (!eq(NumSrcArgs, 2), 1721 !if (HasClamp, 1722 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1723 Src1Mod:$src1_modifiers, Src1RC:$src1, 1724 clampmod0:$clamp, 1725 op_sel:$op_sel, op_sel_hi:$op_sel_hi, 1726 neg_lo:$neg_lo, neg_hi:$neg_hi), 1727 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1728 Src1Mod:$src1_modifiers, Src1RC:$src1, 1729 op_sel:$op_sel, op_sel_hi:$op_sel_hi, 1730 neg_lo:$neg_lo, neg_hi:$neg_hi)), 1731 // else NumSrcArgs == 3 1732 !if (HasClamp, 1733 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1734 Src1Mod:$src1_modifiers, Src1RC:$src1, 1735 Src2Mod:$src2_modifiers, Src2RC:$src2, 1736 clampmod0:$clamp, 1737 op_sel:$op_sel, op_sel_hi:$op_sel_hi, 1738 neg_lo:$neg_lo, neg_hi:$neg_hi), 1739 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1740 Src1Mod:$src1_modifiers, Src1RC:$src1, 1741 Src2Mod:$src2_modifiers, Src2RC:$src2, 1742 op_sel:$op_sel, op_sel_hi:$op_sel_hi, 1743 neg_lo:$neg_lo, neg_hi:$neg_hi)) 1744 ); 1745} 1746 1747class getInsVOP3OpSel <RegisterOperand Src0RC, 1748 RegisterOperand Src1RC, 1749 RegisterOperand Src2RC, 1750 int NumSrcArgs, 1751 bit HasClamp, 1752 Operand Src0Mod, 1753 Operand Src1Mod, 1754 Operand Src2Mod> { 1755 dag ret = !if (!eq(NumSrcArgs, 2), 1756 !if (HasClamp, 1757 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1758 Src1Mod:$src1_modifiers, Src1RC:$src1, 1759 clampmod0:$clamp, 1760 op_sel:$op_sel), 1761 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1762 Src1Mod:$src1_modifiers, Src1RC:$src1, 1763 op_sel:$op_sel)), 1764 // else NumSrcArgs == 3 1765 !if (HasClamp, 1766 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1767 Src1Mod:$src1_modifiers, Src1RC:$src1, 1768 Src2Mod:$src2_modifiers, Src2RC:$src2, 1769 clampmod0:$clamp, 1770 op_sel:$op_sel), 1771 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1772 Src1Mod:$src1_modifiers, Src1RC:$src1, 1773 Src2Mod:$src2_modifiers, Src2RC:$src2, 1774 op_sel:$op_sel)) 1775 ); 1776} 1777 1778class getInsDPP <RegisterOperand DstRC, RegisterClass Src0RC, RegisterClass Src1RC, 1779 int NumSrcArgs, bit HasModifiers, 1780 Operand Src0Mod, Operand Src1Mod> { 1781 1782 dag ret = !if (!eq(NumSrcArgs, 0), 1783 // VOP1 without input operands (V_NOP) 1784 (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 1785 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl), 1786 !if (!eq(NumSrcArgs, 1), 1787 !if (!eq(HasModifiers, 1), 1788 // VOP1_DPP with modifiers 1789 (ins DstRC:$old, Src0Mod:$src0_modifiers, 1790 Src0RC:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 1791 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl) 1792 /* else */, 1793 // VOP1_DPP without modifiers 1794 (ins DstRC:$old, Src0RC:$src0, 1795 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 1796 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl) 1797 /* endif */) 1798 /* NumSrcArgs == 2 */, 1799 !if (!eq(HasModifiers, 1), 1800 // VOP2_DPP with modifiers 1801 (ins DstRC:$old, 1802 Src0Mod:$src0_modifiers, Src0RC:$src0, 1803 Src1Mod:$src1_modifiers, Src1RC:$src1, 1804 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 1805 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl) 1806 /* else */, 1807 // VOP2_DPP without modifiers 1808 (ins DstRC:$old, 1809 Src0RC:$src0, Src1RC:$src1, dpp_ctrl:$dpp_ctrl, 1810 row_mask:$row_mask, bank_mask:$bank_mask, 1811 bound_ctrl:$bound_ctrl) 1812 /* endif */))); 1813} 1814 1815class getInsDPP16 <RegisterOperand DstRC, RegisterClass Src0RC, RegisterClass Src1RC, 1816 int NumSrcArgs, bit HasModifiers, 1817 Operand Src0Mod, Operand Src1Mod> { 1818 dag ret = !con(getInsDPP<DstRC, Src0RC, Src1RC, NumSrcArgs, 1819 HasModifiers, Src0Mod, Src1Mod>.ret, 1820 (ins FI:$fi)); 1821} 1822 1823class getInsDPP8 <RegisterOperand DstRC, RegisterClass Src0RC, RegisterClass Src1RC, 1824 int NumSrcArgs, bit HasModifiers, 1825 Operand Src0Mod, Operand Src1Mod> { 1826 dag ret = !if (!eq(NumSrcArgs, 0), 1827 // VOP1 without input operands (V_NOP) 1828 (ins dpp8:$dpp8, FI:$fi), 1829 !if (!eq(NumSrcArgs, 1), 1830 !if (!eq(HasModifiers, 1), 1831 // VOP1_DPP with modifiers 1832 (ins DstRC:$old, Src0Mod:$src0_modifiers, 1833 Src0RC:$src0, dpp8:$dpp8, FI:$fi) 1834 /* else */, 1835 // VOP1_DPP without modifiers 1836 (ins DstRC:$old, Src0RC:$src0, dpp8:$dpp8, FI:$fi) 1837 /* endif */) 1838 /* NumSrcArgs == 2 */, 1839 !if (!eq(HasModifiers, 1), 1840 // VOP2_DPP with modifiers 1841 (ins DstRC:$old, 1842 Src0Mod:$src0_modifiers, Src0RC:$src0, 1843 Src1Mod:$src1_modifiers, Src1RC:$src1, 1844 dpp8:$dpp8, FI:$fi) 1845 /* else */, 1846 // VOP2_DPP without modifiers 1847 (ins DstRC:$old, 1848 Src0RC:$src0, Src1RC:$src1, dpp8:$dpp8, FI:$fi) 1849 /* endif */))); 1850} 1851 1852 1853// Ins for SDWA 1854class getInsSDWA <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs, 1855 bit HasSDWAOMod, Operand Src0Mod, Operand Src1Mod, 1856 ValueType DstVT> { 1857 1858 dag ret = !if(!eq(NumSrcArgs, 0), 1859 // VOP1 without input operands (V_NOP) 1860 (ins), 1861 !if(!eq(NumSrcArgs, 1), 1862 // VOP1 1863 !if(!eq(HasSDWAOMod, 0), 1864 // VOP1_SDWA without omod 1865 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1866 clampmod:$clamp, 1867 dst_sel:$dst_sel, dst_unused:$dst_unused, 1868 src0_sel:$src0_sel), 1869 // VOP1_SDWA with omod 1870 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1871 clampmod:$clamp, omod:$omod, 1872 dst_sel:$dst_sel, dst_unused:$dst_unused, 1873 src0_sel:$src0_sel)), 1874 !if(!eq(NumSrcArgs, 2), 1875 !if(!eq(DstVT.Size, 1), 1876 // VOPC_SDWA 1877 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1878 Src1Mod:$src1_modifiers, Src1RC:$src1, 1879 clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel), 1880 // VOP2_SDWA 1881 !if(!eq(HasSDWAOMod, 0), 1882 // VOP2_SDWA without omod 1883 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1884 Src1Mod:$src1_modifiers, Src1RC:$src1, 1885 clampmod:$clamp, 1886 dst_sel:$dst_sel, dst_unused:$dst_unused, 1887 src0_sel:$src0_sel, src1_sel:$src1_sel), 1888 // VOP2_SDWA with omod 1889 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1890 Src1Mod:$src1_modifiers, Src1RC:$src1, 1891 clampmod:$clamp, omod:$omod, 1892 dst_sel:$dst_sel, dst_unused:$dst_unused, 1893 src0_sel:$src0_sel, src1_sel:$src1_sel))), 1894 (ins)/* endif */))); 1895} 1896 1897// Outs for DPP and SDWA 1898class getOutsExt <bit HasDst, ValueType DstVT, RegisterOperand DstRCExt> { 1899 dag ret = !if(HasDst, 1900 !if(!eq(DstVT.Size, 1), 1901 (outs), // no dst for VOPC, we use "vcc"-token as dst in SDWA VOPC instructions 1902 (outs DstRCExt:$vdst)), 1903 (outs)); // V_NOP 1904} 1905 1906// Outs for SDWA 1907class getOutsSDWA <bit HasDst, ValueType DstVT, RegisterOperand DstRCSDWA> { 1908 dag ret = !if(HasDst, 1909 !if(!eq(DstVT.Size, 1), 1910 (outs DstRCSDWA:$sdst), 1911 (outs DstRCSDWA:$vdst)), 1912 (outs)); // V_NOP 1913} 1914 1915// Returns the assembly string for the inputs and outputs of a VOP[12C] 1916// instruction. This does not add the _e32 suffix, so it can be reused 1917// by getAsm64. 1918class getAsm32 <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> { 1919 string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC 1920 string src0 = ", $src0"; 1921 string src1 = ", $src1"; 1922 string src2 = ", $src2"; 1923 string ret = !if(HasDst, dst, "") # 1924 !if(!eq(NumSrcArgs, 1), src0, "") # 1925 !if(!eq(NumSrcArgs, 2), src0#src1, "") # 1926 !if(!eq(NumSrcArgs, 3), src0#src1#src2, ""); 1927} 1928 1929// Returns the assembly string for the inputs and outputs of a VOP3 1930// instruction. 1931class getAsm64 <bit HasDst, int NumSrcArgs, bit HasIntClamp, bit HasModifiers, 1932 bit HasOMod, ValueType DstVT = i32> { 1933 string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC 1934 string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); 1935 string src1 = !if(!eq(NumSrcArgs, 1), "", 1936 !if(!eq(NumSrcArgs, 2), " $src1_modifiers", 1937 " $src1_modifiers,")); 1938 string src2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", ""); 1939 string iclamp = !if(HasIntClamp, "$clamp", ""); 1940 string ret = 1941 !if(!eq(HasModifiers, 0), 1942 getAsm32<HasDst, NumSrcArgs, DstVT>.ret # iclamp, 1943 dst#", "#src0#src1#src2#"$clamp"#!if(HasOMod, "$omod", "")); 1944} 1945 1946// Returns the assembly string for the inputs and outputs of a VOP3P 1947// instruction. 1948class getAsmVOP3P <bit HasDst, int NumSrcArgs, bit HasModifiers, 1949 bit HasClamp, ValueType DstVT = i32> { 1950 string dst = " $vdst"; 1951 string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); 1952 string src1 = !if(!eq(NumSrcArgs, 1), "", 1953 !if(!eq(NumSrcArgs, 2), " $src1", 1954 " $src1,")); 1955 string src2 = !if(!eq(NumSrcArgs, 3), " $src2", ""); 1956 1957 string mods = !if(HasModifiers, "$neg_lo$neg_hi", ""); 1958 string clamp = !if(HasClamp, "$clamp", ""); 1959 1960 // Each modifier is printed as an array of bits for each operand, so 1961 // all operands are printed as part of src0_modifiers. 1962 string ret = dst#", "#src0#src1#src2#"$op_sel$op_sel_hi"#mods#clamp; 1963} 1964 1965class getAsmVOP3OpSel <int NumSrcArgs, 1966 bit HasClamp, 1967 bit Src0HasMods, 1968 bit Src1HasMods, 1969 bit Src2HasMods> { 1970 string dst = " $vdst"; 1971 1972 string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); 1973 string isrc1 = !if(!eq(NumSrcArgs, 1), "", 1974 !if(!eq(NumSrcArgs, 2), " $src1", 1975 " $src1,")); 1976 string isrc2 = !if(!eq(NumSrcArgs, 3), " $src2", ""); 1977 1978 string fsrc0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); 1979 string fsrc1 = !if(!eq(NumSrcArgs, 1), "", 1980 !if(!eq(NumSrcArgs, 2), " $src1_modifiers", 1981 " $src1_modifiers,")); 1982 string fsrc2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", ""); 1983 1984 string src0 = !if(Src0HasMods, fsrc0, isrc0); 1985 string src1 = !if(Src1HasMods, fsrc1, isrc1); 1986 string src2 = !if(Src2HasMods, fsrc2, isrc2); 1987 1988 string clamp = !if(HasClamp, "$clamp", ""); 1989 1990 string ret = dst#", "#src0#src1#src2#"$op_sel"#clamp; 1991} 1992 1993class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> { 1994 string dst = !if(HasDst, 1995 !if(!eq(DstVT.Size, 1), 1996 "$sdst", 1997 "$vdst"), 1998 ""); // use $sdst for VOPC 1999 string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); 2000 string src1 = !if(!eq(NumSrcArgs, 1), "", 2001 !if(!eq(NumSrcArgs, 2), " $src1_modifiers", 2002 " $src1_modifiers,")); 2003 string args = !if(!eq(HasModifiers, 0), 2004 getAsm32<0, NumSrcArgs, DstVT>.ret, 2005 ", "#src0#src1); 2006 string ret = dst#args#" $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 2007} 2008 2009class getAsmDPP16 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> { 2010 string ret = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret#"$fi"; 2011} 2012 2013class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> { 2014 string dst = !if(HasDst, 2015 !if(!eq(DstVT.Size, 1), 2016 "$sdst", 2017 "$vdst"), 2018 ""); // use $sdst for VOPC 2019 string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); 2020 string src1 = !if(!eq(NumSrcArgs, 1), "", 2021 !if(!eq(NumSrcArgs, 2), " $src1_modifiers", 2022 " $src1_modifiers,")); 2023 string args = !if(!eq(HasModifiers, 0), 2024 getAsm32<0, NumSrcArgs, DstVT>.ret, 2025 ", "#src0#src1); 2026 string ret = dst#args#"$dpp8$fi"; 2027} 2028 2029class getAsmSDWA <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> { 2030 string dst = !if(HasDst, 2031 !if(!eq(DstVT.Size, 1), 2032 " vcc", // use vcc token as dst for VOPC instructioins 2033 "$vdst"), 2034 ""); 2035 string src0 = "$src0_modifiers"; 2036 string src1 = "$src1_modifiers"; 2037 string args = !if(!eq(NumSrcArgs, 0), 2038 "", 2039 !if(!eq(NumSrcArgs, 1), 2040 ", "#src0#"$clamp", 2041 ", "#src0#", "#src1#"$clamp" 2042 ) 2043 ); 2044 string sdwa = !if(!eq(NumSrcArgs, 0), 2045 "", 2046 !if(!eq(NumSrcArgs, 1), 2047 " $dst_sel $dst_unused $src0_sel", 2048 !if(!eq(DstVT.Size, 1), 2049 " $src0_sel $src1_sel", // No dst_sel and dst_unused for VOPC 2050 " $dst_sel $dst_unused $src0_sel $src1_sel" 2051 ) 2052 ) 2053 ); 2054 string ret = dst#args#sdwa; 2055} 2056 2057class getAsmSDWA9 <bit HasDst, bit HasOMod, int NumSrcArgs, 2058 ValueType DstVT = i32> { 2059 string dst = !if(HasDst, 2060 !if(!eq(DstVT.Size, 1), 2061 "$sdst", // VOPC 2062 "$vdst"), // VOP1/2 2063 ""); 2064 string src0 = "$src0_modifiers"; 2065 string src1 = "$src1_modifiers"; 2066 string out_mods = !if(!eq(HasOMod, 0), "$clamp", "$clamp$omod"); 2067 string args = !if(!eq(NumSrcArgs, 0), "", 2068 !if(!eq(NumSrcArgs, 1), 2069 ", "#src0, 2070 ", "#src0#", "#src1 2071 ) 2072 ); 2073 string sdwa = !if(!eq(NumSrcArgs, 0), "", 2074 !if(!eq(NumSrcArgs, 1), 2075 out_mods#" $dst_sel $dst_unused $src0_sel", 2076 !if(!eq(DstVT.Size, 1), 2077 " $src0_sel $src1_sel", // No dst_sel, dst_unused and output modifiers for VOPC 2078 out_mods#" $dst_sel $dst_unused $src0_sel $src1_sel" 2079 ) 2080 ) 2081 ); 2082 string ret = dst#args#sdwa; 2083} 2084 2085 2086// Function that checks if instruction supports DPP and SDWA 2087class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2088 ValueType Src1VT = i32> { 2089 bit ret = !if(!eq(NumSrcArgs, 3), 2090 0, // NumSrcArgs == 3 - No DPP or SDWA for VOP3 2091 !if(!eq(DstVT.Size, 64), 2092 0, // 64-bit dst - No DPP or SDWA for 64-bit operands 2093 !if(!eq(Src0VT.Size, 64), 2094 0, // 64-bit src0 2095 !if(!eq(Src1VT.Size, 64), 2096 0, // 64-bit src2 2097 1 2098 ) 2099 ) 2100 ) 2101 ); 2102} 2103 2104class getHasDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2105 ValueType Src1VT = i32> { 2106 bit ret = !if(!eq(NumSrcArgs, 0), 0, 2107 getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret); 2108} 2109 2110class BitOr<bit a, bit b> { 2111 bit ret = !if(a, 1, !if(b, 1, 0)); 2112} 2113 2114class BitAnd<bit a, bit b> { 2115 bit ret = !if(a, !if(b, 1, 0), 0); 2116} 2117 2118def PatGenMode { 2119 int NoPattern = 0; 2120 int Pattern = 1; 2121} 2122 2123class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0, 2124 bit _EnableClamp = 0> { 2125 2126 field list<ValueType> ArgVT = _ArgVT; 2127 field bit EnableF32SrcMods = _EnableF32SrcMods; 2128 field bit EnableClamp = _EnableClamp; 2129 2130 field ValueType DstVT = ArgVT[0]; 2131 field ValueType Src0VT = ArgVT[1]; 2132 field ValueType Src1VT = ArgVT[2]; 2133 field ValueType Src2VT = ArgVT[3]; 2134 field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret; 2135 field RegisterOperand DstRCDPP = getVALUDstForVT<DstVT>.ret; 2136 field RegisterOperand DstRCSDWA = getSDWADstForVT<DstVT>.ret; 2137 field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret; 2138 field RegisterClass Src1RC32 = getVregSrcForVT<Src1VT>.ret; 2139 field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret; 2140 field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret; 2141 field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret; 2142 field RegisterClass Src0DPP = getVregSrcForVT<Src0VT>.ret; 2143 field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret; 2144 field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret; 2145 field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret; 2146 field Operand Src0Mod = getSrcMod<Src0VT, EnableF32SrcMods>.ret; 2147 field Operand Src1Mod = getSrcMod<Src1VT, EnableF32SrcMods>.ret; 2148 field Operand Src2Mod = getSrcMod<Src2VT, EnableF32SrcMods>.ret; 2149 field Operand Src0ModDPP = getSrcModExt<Src0VT>.ret; 2150 field Operand Src1ModDPP = getSrcModExt<Src1VT>.ret; 2151 field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret; 2152 field Operand Src1ModSDWA = getSrcModSDWA<Src1VT>.ret; 2153 2154 2155 field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1); 2156 field bit HasDst32 = HasDst; 2157 field bit EmitDst = HasDst; // force dst encoding, see v_movreld_b32 special case 2158 field int NumSrcArgs = getNumSrcArgs<Src0VT, Src1VT, Src2VT>.ret; 2159 field bit HasSrc0 = !if(!eq(Src0VT.Value, untyped.Value), 0, 1); 2160 field bit HasSrc1 = !if(!eq(Src1VT.Value, untyped.Value), 0, 1); 2161 field bit HasSrc2 = !if(!eq(Src2VT.Value, untyped.Value), 0, 1); 2162 2163 // TODO: Modifiers logic is somewhat adhoc here, to be refined later 2164 // HasModifiers affects the normal and DPP encodings. We take note of EnableF32SrcMods, which 2165 // enables modifiers for i32 type. 2166 field bit HasModifiers = BitOr<isModifierType<Src0VT>.ret, EnableF32SrcMods>.ret; 2167 2168 // HasSrc*FloatMods affects the SDWA encoding. We ignore EnableF32SrcMods. 2169 field bit HasSrc0FloatMods = isFloatType<Src0VT>.ret; 2170 field bit HasSrc1FloatMods = isFloatType<Src1VT>.ret; 2171 field bit HasSrc2FloatMods = isFloatType<Src2VT>.ret; 2172 2173 // HasSrc*IntMods affects the SDWA encoding. We ignore EnableF32SrcMods. 2174 field bit HasSrc0IntMods = isIntType<Src0VT>.ret; 2175 field bit HasSrc1IntMods = isIntType<Src1VT>.ret; 2176 field bit HasSrc2IntMods = isIntType<Src2VT>.ret; 2177 2178 field bit HasSrc0Mods = HasModifiers; 2179 field bit HasSrc1Mods = !if(HasModifiers, BitOr<HasSrc1FloatMods, HasSrc1IntMods>.ret, 0); 2180 field bit HasSrc2Mods = !if(HasModifiers, BitOr<HasSrc2FloatMods, HasSrc2IntMods>.ret, 0); 2181 2182 field bit HasClamp = BitOr<isModifierType<Src0VT>.ret, EnableClamp>.ret; 2183 field bit HasSDWAClamp = EmitDst; 2184 field bit HasFPClamp = BitAnd<isFloatType<DstVT>.ret, HasClamp>.ret; 2185 field bit HasIntClamp = !if(isFloatType<DstVT>.ret, 0, HasClamp); 2186 field bit HasClampLo = HasClamp; 2187 field bit HasClampHi = BitAnd<isPackedType<DstVT>.ret, HasClamp>.ret; 2188 field bit HasHigh = 0; 2189 2190 field bit IsPacked = isPackedType<Src0VT>.ret; 2191 field bit HasOpSel = IsPacked; 2192 field bit HasOMod = !if(HasOpSel, 0, isFloatType<DstVT>.ret); 2193 field bit HasSDWAOMod = isFloatType<DstVT>.ret; 2194 2195 field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2196 field bit HasExtDPP = getHasDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2197 field bit HasExtSDWA = HasExt; 2198 field bit HasExtSDWA9 = HasExt; 2199 field int NeedPatGen = PatGenMode.NoPattern; 2200 2201 field bit IsMAI = 0; 2202 field bit IsDOT = 0; 2203 2204 field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods); 2205 field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods); 2206 field Operand Src2PackedMod = !if(HasSrc2FloatMods, PackedF16InputMods, PackedI16InputMods); 2207 2208 field dag Outs = !if(HasDst,(outs DstRC:$vdst),(outs)); 2209 2210 // VOP3b instructions are a special case with a second explicit 2211 // output. This is manually overridden for them. 2212 field dag Outs32 = Outs; 2213 field dag Outs64 = Outs; 2214 field dag OutsDPP = getOutsExt<HasDst, DstVT, DstRCDPP>.ret; 2215 field dag OutsDPP8 = getOutsExt<HasDst, DstVT, DstRCDPP>.ret; 2216 field dag OutsSDWA = getOutsSDWA<HasDst, DstVT, DstRCSDWA>.ret; 2217 2218 field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret; 2219 field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs, 2220 HasIntClamp, HasModifiers, HasSrc2Mods, 2221 HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret; 2222 field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64, 2223 NumSrcArgs, HasClamp, 2224 Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret; 2225 field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64, 2226 NumSrcArgs, 2227 HasClamp, 2228 getOpSelMod<Src0VT>.ret, 2229 getOpSelMod<Src1VT>.ret, 2230 getOpSelMod<Src2VT>.ret>.ret; 2231 field dag InsDPP = !if(HasExtDPP, 2232 getInsDPP<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs, 2233 HasModifiers, Src0ModDPP, Src1ModDPP>.ret, 2234 (ins)); 2235 field dag InsDPP16 = getInsDPP16<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs, 2236 HasModifiers, Src0ModDPP, Src1ModDPP>.ret; 2237 field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs, 0, 2238 Src0ModDPP, Src1ModDPP>.ret; 2239 field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs, 2240 HasSDWAOMod, Src0ModSDWA, Src1ModSDWA, 2241 DstVT>.ret; 2242 2243 2244 field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret; 2245 field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasIntClamp, HasModifiers, HasOMod, DstVT>.ret; 2246 field string AsmVOP3P = getAsmVOP3P<HasDst, NumSrcArgs, HasModifiers, HasClamp, DstVT>.ret; 2247 field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs, 2248 HasClamp, 2249 HasSrc0FloatMods, 2250 HasSrc1FloatMods, 2251 HasSrc2FloatMods>.ret; 2252 field string AsmDPP = !if(HasExtDPP, 2253 getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret, ""); 2254 field string AsmDPP16 = getAsmDPP16<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret; 2255 field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0, DstVT>.ret; 2256 field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret; 2257 field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret; 2258 2259 field string TieRegDPP = "$old"; 2260} 2261 2262class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> { 2263 let HasExt = 0; 2264 let HasExtDPP = 0; 2265 let HasExtSDWA = 0; 2266 let HasExtSDWA9 = 0; 2267} 2268 2269class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.Pattern> : VOPProfile <p.ArgVT> { 2270 let NeedPatGen = mode; 2271} 2272 2273def VOP_F16_F16 : VOPProfile <[f16, f16, untyped, untyped]>; 2274def VOP_F16_I16 : VOPProfile <[f16, i16, untyped, untyped]>; 2275def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>; 2276 2277def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>; 2278def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>; 2279def VOP_F16_F16_I32 : VOPProfile <[f16, f16, i32, untyped]>; 2280def VOP_I16_I16_I16 : VOPProfile <[i16, i16, i16, untyped]>; 2281def VOP_I16_I16_I16_ARITH : VOPProfile <[i16, i16, i16, untyped], 0, /*EnableClamp=*/1>; 2282 2283def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>; 2284def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>; 2285 2286def VOP_I32_I16_I16_I32 : VOPProfile <[i32, i16, i16, i32, untyped]>; 2287 2288def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>; 2289def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>; 2290def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>; 2291 2292def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>; 2293def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>; 2294def VOP_V2I16_F32_F32 : VOPProfile <[v2i16, f32, f32, untyped]>; 2295def VOP_V2I16_I32_I32 : VOPProfile <[v2i16, i32, i32, untyped]>; 2296 2297def VOP_F32_V2F16_V2F16_V2F16 : VOPProfile <[f32, v2f16, v2f16, v2f16]>; 2298 2299def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>; 2300 2301def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>; 2302def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>; 2303def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>; 2304def VOP_F64_F32 : VOPProfile <[f64, f32, untyped, untyped]>; 2305def VOP_F64_F64 : VOPProfile <[f64, f64, untyped, untyped]>; 2306def VOP_F64_I32 : VOPProfile <[f64, i32, untyped, untyped]>; 2307def VOP_I32_F32 : VOPProfile <[i32, f32, untyped, untyped]>; 2308def VOP_I32_F64 : VOPProfile <[i32, f64, untyped, untyped]>; 2309def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>; 2310def VOP_F16_F32 : VOPProfile <[f16, f32, untyped, untyped]>; 2311def VOP_F32_F16 : VOPProfile <[f32, f16, untyped, untyped]>; 2312 2313def VOP_F32_F32_F16 : VOPProfile <[f32, f32, f16, untyped]>; 2314def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>; 2315def VOP_F32_F32_I32 : VOPProfile <[f32, f32, i32, untyped]>; 2316def VOP_F64_F64_F64 : VOPProfile <[f64, f64, f64, untyped]>; 2317def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>; 2318def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>; 2319def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>; 2320def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>; 2321def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], 0, /*EnableClamp=*/1>; 2322def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>; 2323def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>; 2324 2325def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>; 2326def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>; 2327def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>; 2328 2329def VOP_F16_F32_F16_F32 : VOPProfile <[f16, f32, f16, f32]>; 2330def VOP_F32_F32_F16_F16 : VOPProfile <[f32, f32, f16, f16]>; 2331def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>; 2332def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>; 2333def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>; 2334def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>; 2335def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>; 2336def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>; 2337def VOP_V4I32_I64_I32_V4I32 : VOPProfile <[v4i32, i64, i32, v4i32]>; 2338 2339def VOP_F32_V2F16_V2F16_F32 : VOPProfile <[f32, v2f16, v2f16, f32]>; 2340def VOP_I32_V2I16_V2I16_I32 : VOPProfile <[i32, v2i16, v2i16, i32]>; 2341 2342def VOP_V4F32_F32_F32_V4F32 : VOPProfile <[v4f32, f32, f32, v4f32]>; 2343def VOP_V16F32_F32_F32_V16F32 : VOPProfile <[v16f32, f32, f32, v16f32]>; 2344def VOP_V32F32_F32_F32_V32F32 : VOPProfile <[v32f32, f32, f32, v32f32]>; 2345def VOP_V4F32_V4F16_V4F16_V4F32 : VOPProfile <[v4f32, v4f16, v4f16, v4f32]>; 2346def VOP_V16F32_V4F16_V4F16_V16F32 : VOPProfile <[v16f32, v4f16, v4f16, v16f32]>; 2347def VOP_V32F32_V4F16_V4F16_V32F32 : VOPProfile <[v32f32, v4f16, v4f16, v32f32]>; 2348def VOP_V4F32_V2I16_V2I16_V4F32 : VOPProfile <[v4f32, v2i16, v2i16, v4f32]>; 2349def VOP_V16F32_V2I16_V2I16_V16F32 : VOPProfile <[v16f32, v2i16, v2i16, v16f32]>; 2350def VOP_V32F32_V2I16_V2I16_V32F32 : VOPProfile <[v32f32, v2i16, v2i16, v32f32]>; 2351def VOP_V4I32_I32_I32_V4I32 : VOPProfile <[v4i32, i32, i32, v4i32]>; 2352def VOP_V16I32_I32_I32_V16I32 : VOPProfile <[v16i32, i32, i32, v16i32]>; 2353def VOP_V32I32_I32_I32_V32I32 : VOPProfile <[v32i32, i32, i32, v32i32]>; 2354 2355class Commutable_REV <string revOp, bit isOrig> { 2356 string RevOp = revOp; 2357 bit IsOrig = isOrig; 2358} 2359 2360class AtomicNoRet <string noRetOp, bit isRet> { 2361 string NoRetOp = noRetOp; 2362 bit IsRet = isRet; 2363} 2364 2365//===----------------------------------------------------------------------===// 2366// Interpolation opcodes 2367//===----------------------------------------------------------------------===// 2368 2369class VINTRPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVINTRPDst">; 2370 2371class VINTRP_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> : 2372 VINTRPCommon <outs, ins, "", pattern>, 2373 SIMCInstr<opName, SIEncodingFamily.NONE> { 2374 let isPseudo = 1; 2375 let isCodeGenOnly = 1; 2376} 2377 2378// FIXME-GFX10: WIP. 2379class VINTRP_Real_si <bits <2> op, string opName, dag outs, dag ins, 2380 string asm, int encodingFamily> : 2381 VINTRPCommon <outs, ins, asm, []>, 2382 VINTRPe <op>, 2383 SIMCInstr<opName, encodingFamily> { 2384 let DisableDecoder = DisableSIDecoder; 2385} 2386 2387class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins, 2388 string asm> : 2389 VINTRPCommon <outs, ins, asm, []>, 2390 VINTRPe_vi <op>, 2391 SIMCInstr<opName, SIEncodingFamily.VI> { 2392 let AssemblerPredicate = VIAssemblerPredicate; 2393 let DecoderNamespace = "GFX8"; 2394 let DisableDecoder = DisableVIDecoder; 2395} 2396 2397// FIXME-GFX10: WIP. 2398multiclass VINTRP_m <bits <2> op, dag outs, dag ins, string asm, 2399 list<dag> pattern = []> { 2400 def "" : VINTRP_Pseudo <NAME, outs, ins, pattern>; 2401 2402 let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 2403 def _si : VINTRP_Real_si <op, NAME, outs, ins, asm, SIEncodingFamily.SI>; 2404 } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 2405 2406 def _vi : VINTRP_Real_vi <op, NAME, outs, ins, asm>; 2407 2408 let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { 2409 def _gfx10 : VINTRP_Real_si<op, NAME, outs, ins, asm, SIEncodingFamily.GFX10>; 2410 } // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" 2411} 2412//===----------------------------------------------------------------------===// 2413// Vector instruction mappings 2414//===----------------------------------------------------------------------===// 2415 2416// Maps an opcode in e32 form to its e64 equivalent 2417def getVOPe64 : InstrMapping { 2418 let FilterClass = "VOP"; 2419 let RowFields = ["OpName"]; 2420 let ColFields = ["Size", "VOP3"]; 2421 let KeyCol = ["4", "0"]; 2422 let ValueCols = [["8", "1"]]; 2423} 2424 2425// Maps an opcode in e64 form to its e32 equivalent 2426def getVOPe32 : InstrMapping { 2427 let FilterClass = "VOP"; 2428 let RowFields = ["OpName"]; 2429 let ColFields = ["Size", "VOP3"]; 2430 let KeyCol = ["8", "1"]; 2431 let ValueCols = [["4", "0"]]; 2432} 2433 2434// Maps ordinary instructions to their SDWA counterparts 2435def getSDWAOp : InstrMapping { 2436 let FilterClass = "VOP"; 2437 let RowFields = ["OpName"]; 2438 let ColFields = ["AsmVariantName"]; 2439 let KeyCol = ["Default"]; 2440 let ValueCols = [["SDWA"]]; 2441} 2442 2443// Maps SDWA instructions to their ordinary counterparts 2444def getBasicFromSDWAOp : InstrMapping { 2445 let FilterClass = "VOP"; 2446 let RowFields = ["OpName"]; 2447 let ColFields = ["AsmVariantName"]; 2448 let KeyCol = ["SDWA"]; 2449 let ValueCols = [["Default"]]; 2450} 2451 2452// Maps ordinary instructions to their DPP counterparts 2453def getDPPOp32 : InstrMapping { 2454 let FilterClass = "VOP"; 2455 let RowFields = ["OpName"]; 2456 let ColFields = ["AsmVariantName"]; 2457 let KeyCol = ["Default"]; 2458 let ValueCols = [["DPP"]]; 2459} 2460 2461// Maps an commuted opcode to its original version 2462def getCommuteOrig : InstrMapping { 2463 let FilterClass = "Commutable_REV"; 2464 let RowFields = ["RevOp"]; 2465 let ColFields = ["IsOrig"]; 2466 let KeyCol = ["0"]; 2467 let ValueCols = [["1"]]; 2468} 2469 2470// Maps an original opcode to its commuted version 2471def getCommuteRev : InstrMapping { 2472 let FilterClass = "Commutable_REV"; 2473 let RowFields = ["RevOp"]; 2474 let ColFields = ["IsOrig"]; 2475 let KeyCol = ["1"]; 2476 let ValueCols = [["0"]]; 2477} 2478 2479def getMCOpcodeGen : InstrMapping { 2480 let FilterClass = "SIMCInstr"; 2481 let RowFields = ["PseudoInstr"]; 2482 let ColFields = ["Subtarget"]; 2483 let KeyCol = [!cast<string>(SIEncodingFamily.NONE)]; 2484 let ValueCols = [[!cast<string>(SIEncodingFamily.SI)], 2485 [!cast<string>(SIEncodingFamily.VI)], 2486 [!cast<string>(SIEncodingFamily.SDWA)], 2487 [!cast<string>(SIEncodingFamily.SDWA9)], 2488 // GFX80 encoding is added to work around a multiple matching 2489 // issue for buffer instructions with unpacked d16 data. This 2490 // does not actually change the encoding, and thus may be 2491 // removed later. 2492 [!cast<string>(SIEncodingFamily.GFX80)], 2493 [!cast<string>(SIEncodingFamily.GFX9)], 2494 [!cast<string>(SIEncodingFamily.GFX10)], 2495 [!cast<string>(SIEncodingFamily.SDWA10)], 2496 [!cast<string>(SIEncodingFamily.GFX10_B)]]; 2497} 2498 2499// Get equivalent SOPK instruction. 2500def getSOPKOp : InstrMapping { 2501 let FilterClass = "SOPKInstTable"; 2502 let RowFields = ["BaseCmpOp"]; 2503 let ColFields = ["IsSOPK"]; 2504 let KeyCol = ["0"]; 2505 let ValueCols = [["1"]]; 2506} 2507 2508def getAddr64Inst : InstrMapping { 2509 let FilterClass = "MUBUFAddr64Table"; 2510 let RowFields = ["OpName"]; 2511 let ColFields = ["IsAddr64"]; 2512 let KeyCol = ["0"]; 2513 let ValueCols = [["1"]]; 2514} 2515 2516def getIfAddr64Inst : InstrMapping { 2517 let FilterClass = "MUBUFAddr64Table"; 2518 let RowFields = ["OpName"]; 2519 let ColFields = ["IsAddr64"]; 2520 let KeyCol = ["1"]; 2521 let ValueCols = [["1"]]; 2522} 2523 2524def getMUBUFNoLdsInst : InstrMapping { 2525 let FilterClass = "MUBUFLdsTable"; 2526 let RowFields = ["OpName"]; 2527 let ColFields = ["IsLds"]; 2528 let KeyCol = ["1"]; 2529 let ValueCols = [["0"]]; 2530} 2531 2532// Maps an atomic opcode to its version with a return value. 2533def getAtomicRetOp : InstrMapping { 2534 let FilterClass = "AtomicNoRet"; 2535 let RowFields = ["NoRetOp"]; 2536 let ColFields = ["IsRet"]; 2537 let KeyCol = ["0"]; 2538 let ValueCols = [["1"]]; 2539} 2540 2541// Maps an atomic opcode to its returnless version. 2542def getAtomicNoRetOp : InstrMapping { 2543 let FilterClass = "AtomicNoRet"; 2544 let RowFields = ["NoRetOp"]; 2545 let ColFields = ["IsRet"]; 2546 let KeyCol = ["1"]; 2547 let ValueCols = [["0"]]; 2548} 2549 2550// Maps a GLOBAL to its SADDR form. 2551def getGlobalSaddrOp : InstrMapping { 2552 let FilterClass = "GlobalSaddrTable"; 2553 let RowFields = ["SaddrOp"]; 2554 let ColFields = ["IsSaddr"]; 2555 let KeyCol = ["0"]; 2556 let ValueCols = [["1"]]; 2557} 2558 2559// Maps a v_cmpx opcode with sdst to opcode without sdst. 2560def getVCMPXNoSDstOp : InstrMapping { 2561 let FilterClass = "VCMPXNoSDstTable"; 2562 let RowFields = ["NoSDstOp"]; 2563 let ColFields = ["HasSDst"]; 2564 let KeyCol = ["1"]; 2565 let ValueCols = [["0"]]; 2566} 2567 2568// Maps a SOPP to a SOPP with S_NOP 2569def getSOPPWithRelaxation : InstrMapping { 2570 let FilterClass = "Base_SOPP"; 2571 let RowFields = ["AsmString"]; 2572 let ColFields = ["Size"]; 2573 let KeyCol = ["4"]; 2574 let ValueCols = [["8"]]; 2575} 2576 2577include "SIInstructions.td" 2578 2579include "DSInstructions.td" 2580include "MIMGInstructions.td" 2581