1//===-- SIInstrInfo.td -----------------------------------------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def isWave32 : Predicate<"Subtarget->getWavefrontSize() == 32">, 10 AssemblerPredicate <(all_of FeatureWavefrontSize32)>; 11def isWave64 : Predicate<"Subtarget->getWavefrontSize() == 64">, 12 AssemblerPredicate <(all_of FeatureWavefrontSize64)>; 13 14class GCNPredicateControl : PredicateControl { 15 Predicate SIAssemblerPredicate = isGFX6GFX7; 16 Predicate VIAssemblerPredicate = isGFX8GFX9; 17} 18 19// Except for the NONE field, this must be kept in sync with the 20// SIEncodingFamily enum in SIInstrInfo.cpp and the columns of the 21// getMCOpcodeGen table. 22def SIEncodingFamily { 23 int NONE = -1; 24 int SI = 0; 25 int VI = 1; 26 int SDWA = 2; 27 int SDWA9 = 3; 28 int GFX80 = 4; 29 int GFX9 = 5; 30 int GFX10 = 6; 31 int SDWA10 = 7; 32 int GFX90A = 8; 33 int GFX940 = 9; 34 int GFX11 = 10; 35 int GFX12 = 11; 36} 37 38//===----------------------------------------------------------------------===// 39// SI DAG Nodes 40//===----------------------------------------------------------------------===// 41 42def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>; 43 44def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD", 45 SDTypeProfile<1, 3, [SDTCisVT<1, v4i32>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>, 46 [SDNPMayLoad, SDNPMemOperand] 47>; 48 49def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT", 50 SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i16>]>, 51 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue] 52>; 53 54def SDTAtomic2_f32 : SDTypeProfile<1, 2, [ 55 SDTCisSameAs<0,2>, SDTCisFP<0>, SDTCisPtrTy<1> 56]>; 57 58def SIatomic_fmin : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMIN", SDTAtomic2_f32, 59 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] 60>; 61 62def SIatomic_fmax : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMAX", SDTAtomic2_f32, 63 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] 64>; 65 66// load_d16_{lo|hi} ptr, tied_input 67def SIload_d16 : SDTypeProfile<1, 2, [ 68 SDTCisPtrTy<1>, 69 SDTCisSameAs<0, 2> 70]>; 71 72 73def SDTtbuffer_load : SDTypeProfile<1, 8, 74 [ // vdata 75 SDTCisVT<1, v4i32>, // rsrc 76 SDTCisVT<2, i32>, // vindex(VGPR) 77 SDTCisVT<3, i32>, // voffset(VGPR) 78 SDTCisVT<4, i32>, // soffset(SGPR) 79 SDTCisVT<5, i32>, // offset(imm) 80 SDTCisVT<6, i32>, // format(imm) 81 SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm) 82 SDTCisVT<8, i1> // idxen(imm) 83 ]>; 84 85def SItbuffer_load : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT", SDTtbuffer_load, 86 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>; 87def SItbuffer_load_d16 : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT_D16", 88 SDTtbuffer_load, 89 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>; 90 91def SDTtbuffer_store : SDTypeProfile<0, 9, 92 [ // vdata 93 SDTCisVT<1, v4i32>, // rsrc 94 SDTCisVT<2, i32>, // vindex(VGPR) 95 SDTCisVT<3, i32>, // voffset(VGPR) 96 SDTCisVT<4, i32>, // soffset(SGPR) 97 SDTCisVT<5, i32>, // offset(imm) 98 SDTCisVT<6, i32>, // format(imm) 99 SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm) 100 SDTCisVT<8, i1> // idxen(imm) 101 ]>; 102 103def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTtbuffer_store, 104 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 105def SItbuffer_store_d16 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_D16", 106 SDTtbuffer_store, 107 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 108 109def SDTBufferLoad : SDTypeProfile<1, 7, 110 [ // vdata 111 SDTCisVT<1, v4i32>, // rsrc 112 SDTCisVT<2, i32>, // vindex(VGPR) 113 SDTCisVT<3, i32>, // voffset(VGPR) 114 SDTCisVT<4, i32>, // soffset(SGPR) 115 SDTCisVT<5, i32>, // offset(imm) 116 SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm) 117 SDTCisVT<7, i1>]>; // idxen(imm) 118 119def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad, 120 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 121def SIbuffer_load_ubyte : SDNode <"AMDGPUISD::BUFFER_LOAD_UBYTE", SDTBufferLoad, 122 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 123def SIbuffer_load_ushort : SDNode <"AMDGPUISD::BUFFER_LOAD_USHORT", SDTBufferLoad, 124 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 125def SIbuffer_load_byte : SDNode <"AMDGPUISD::BUFFER_LOAD_BYTE", SDTBufferLoad, 126 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 127def SIbuffer_load_short: SDNode <"AMDGPUISD::BUFFER_LOAD_SHORT", SDTBufferLoad, 128 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 129def SIbuffer_load_format : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT", SDTBufferLoad, 130 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 131def SIbuffer_load_format_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_TFE", SDTBufferLoad, 132 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 133def SIbuffer_load_format_d16 : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_D16", 134 SDTBufferLoad, 135 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 136 137def SDTBufferStore : SDTypeProfile<0, 8, 138 [ // vdata 139 SDTCisVT<1, v4i32>, // rsrc 140 SDTCisVT<2, i32>, // vindex(VGPR) 141 SDTCisVT<3, i32>, // voffset(VGPR) 142 SDTCisVT<4, i32>, // soffset(SGPR) 143 SDTCisVT<5, i32>, // offset(imm) 144 SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm) 145 SDTCisVT<7, i1>]>; // idxen(imm) 146 147def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore, 148 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 149def SIbuffer_store_byte: SDNode <"AMDGPUISD::BUFFER_STORE_BYTE", 150 SDTBufferStore, 151 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 152def SIbuffer_store_short : SDNode <"AMDGPUISD::BUFFER_STORE_SHORT", 153 SDTBufferStore, 154 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 155def SIbuffer_store_format : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT", 156 SDTBufferStore, 157 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 158def SIbuffer_store_format_d16 : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT_D16", 159 SDTBufferStore, 160 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 161 162multiclass SDBufferAtomic<string opcode> { 163 def "" : SDNode <opcode, 164 SDTypeProfile<1, 8, 165 [SDTCisVT<2, v4i32>, // rsrc 166 SDTCisVT<3, i32>, // vindex(VGPR) 167 SDTCisVT<4, i32>, // voffset(VGPR) 168 SDTCisVT<5, i32>, // soffset(SGPR) 169 SDTCisVT<6, i32>, // offset(imm) 170 SDTCisVT<7, i32>, // cachepolicy(imm) 171 SDTCisVT<8, i1>]>, // idxen(imm) 172 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] 173 >; 174 def "_noret" : PatFrag< 175 (ops node:$vdata_in, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, 176 node:$offset, node:$cachepolicy, node:$idxen), 177 (!cast<SDNode>(NAME) node:$vdata_in, node:$rsrc, node:$vindex, 178 node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, 179 node:$idxen)> { 180 let HasNoUse = true; 181 } 182} 183 184defm SIbuffer_atomic_swap : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SWAP">; 185defm SIbuffer_atomic_add : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_ADD">; 186defm SIbuffer_atomic_sub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SUB">; 187defm SIbuffer_atomic_smin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMIN">; 188defm SIbuffer_atomic_umin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMIN">; 189defm SIbuffer_atomic_smax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMAX">; 190defm SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">; 191defm SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">; 192defm SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">; 193defm SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">; 194defm SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">; 195defm SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">; 196defm SIbuffer_atomic_csub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_CSUB">; 197defm SIbuffer_atomic_fadd : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD">; 198defm SIbuffer_atomic_fmin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMIN">; 199defm SIbuffer_atomic_fmax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMAX">; 200 201def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP", 202 SDTypeProfile<1, 9, 203 [SDTCisVT<3, v4i32>, // rsrc 204 SDTCisVT<4, i32>, // vindex(VGPR) 205 SDTCisVT<5, i32>, // voffset(VGPR) 206 SDTCisVT<6, i32>, // soffset(SGPR) 207 SDTCisVT<7, i32>, // offset(imm) 208 SDTCisVT<8, i32>, // cachepolicy(imm) 209 SDTCisVT<9, i1>]>, // idxen(imm) 210 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] 211>; 212 213def SIbuffer_atomic_cmpswap_noret : PatFrag< 214 (ops node:$src, node:$cmp, node:$rsrc, node:$vindex, node:$voffset, 215 node:$soffset, node:$offset, node:$cachepolicy, node:$idxen), 216 (SIbuffer_atomic_cmpswap node:$src, node:$cmp, node:$rsrc, node:$vindex, 217 node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, 218 node:$idxen)> { 219 let HasNoUse = true; 220} 221 222class SDGlobalAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode, 223 SDTypeProfile<0, 2, 224 [SDTCisPtrTy<0>, // vaddr 225 SDTCisVT<1, ty>]>, // vdata 226 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] 227>; 228 229def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET", 230 SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]> 231>; 232 233def SIlds : SDNode<"AMDGPUISD::LDS", 234 SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]> 235>; 236 237def SIload_d16_lo : SDNode<"AMDGPUISD::LOAD_D16_LO", 238 SIload_d16, 239 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 240>; 241 242def SIload_d16_lo_u8 : SDNode<"AMDGPUISD::LOAD_D16_LO_U8", 243 SIload_d16, 244 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 245>; 246 247def SIload_d16_lo_i8 : SDNode<"AMDGPUISD::LOAD_D16_LO_I8", 248 SIload_d16, 249 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 250>; 251 252def SIload_d16_hi : SDNode<"AMDGPUISD::LOAD_D16_HI", 253 SIload_d16, 254 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 255>; 256 257def SIload_d16_hi_u8 : SDNode<"AMDGPUISD::LOAD_D16_HI_U8", 258 SIload_d16, 259 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 260>; 261 262def SIload_d16_hi_i8 : SDNode<"AMDGPUISD::LOAD_D16_HI_I8", 263 SIload_d16, 264 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 265>; 266 267def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE", 268 SDTypeProfile<0 ,1, [SDTCisInt<0>]>, 269 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue] 270>; 271 272def SIfptrunc_round_upward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_UPWARD", 273 SDTFPRoundOp 274>; 275 276def SIfptrunc_round_downward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_DOWNWARD", 277 SDTFPRoundOp 278>; 279 280//===----------------------------------------------------------------------===// 281// ValueType helpers 282//===----------------------------------------------------------------------===// 283 284// Returns 1 if the source arguments have modifiers, 0 if they do not. 285class isFloatType<ValueType SrcVT> { 286 bit ret = !or(!eq(SrcVT.Value, f16.Value), 287 !eq(SrcVT.Value, f32.Value), 288 !eq(SrcVT.Value, f64.Value), 289 !eq(SrcVT.Value, v2f16.Value), 290 !eq(SrcVT.Value, v4f16.Value), 291 !eq(SrcVT.Value, v8f16.Value), 292 !eq(SrcVT.Value, v16f16.Value), 293 !eq(SrcVT.Value, v2f32.Value), 294 !eq(SrcVT.Value, v4f32.Value), 295 !eq(SrcVT.Value, v8f32.Value), 296 !eq(SrcVT.Value, v2f64.Value), 297 !eq(SrcVT.Value, v4f64.Value)); 298} 299 300// XXX - do v2i16 instructions? 301class isIntType<ValueType SrcVT> { 302 bit ret = !or(!eq(SrcVT.Value, i8.Value), 303 !eq(SrcVT.Value, i16.Value), 304 !eq(SrcVT.Value, i32.Value), 305 !eq(SrcVT.Value, i64.Value), 306 !eq(SrcVT.Value, v4i16.Value), 307 !eq(SrcVT.Value, v8i16.Value), 308 !eq(SrcVT.Value, v16i16.Value), 309 !eq(SrcVT.Value, v2i32.Value), 310 !eq(SrcVT.Value, v4i32.Value), 311 !eq(SrcVT.Value, v8i32.Value)); 312} 313 314class isPackedType<ValueType SrcVT> { 315 bit ret = !or(!eq(SrcVT.Value, v2i16.Value), 316 !eq(SrcVT.Value, v2f16.Value), 317 !eq(SrcVT.Value, v4f16.Value), 318 !eq(SrcVT.Value, v2i32.Value), 319 !eq(SrcVT.Value, v2f32.Value), 320 !eq(SrcVT.Value, v4i32.Value), 321 !eq(SrcVT.Value, v4f32.Value), 322 !eq(SrcVT.Value, v8i32.Value), 323 !eq(SrcVT.Value, v8f32.Value)); 324} 325 326 327//===----------------------------------------------------------------------===// 328// PatFrags for global memory operations 329//===----------------------------------------------------------------------===// 330 331defm atomic_load_fmin : binary_atomic_op_all_as<SIatomic_fmin, 0>; 332defm atomic_load_fmax : binary_atomic_op_all_as<SIatomic_fmax, 0>; 333 334//===----------------------------------------------------------------------===// 335// SDNodes PatFrags for loads/stores with a glue input. 336// This is for SDNodes and PatFrag for local loads and stores to 337// enable s_mov_b32 m0, -1 to be glued to the memory instructions. 338// 339// These mirror the regular load/store PatFrags and rely on special 340// processing during Select() to add the glued copy. 341// 342//===----------------------------------------------------------------------===// 343 344def AMDGPUld_glue : SDNode <"ISD::LOAD", SDTLoad, 345 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 346>; 347 348def AMDGPUatomic_ld_glue : SDNode <"ISD::ATOMIC_LOAD", SDTAtomicLoad, 349 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 350>; 351 352def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr)> { 353 let IsLoad = 1; 354 let IsUnindexed = 1; 355} 356 357def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> { 358 let IsLoad = 1; 359 let IsNonExtLoad = 1; 360} 361 362def atomic_load_8_glue : PatFrag<(ops node:$ptr), 363 (AMDGPUatomic_ld_glue node:$ptr)> { 364 let IsAtomic = 1; 365 let MemoryVT = i8; 366} 367 368def atomic_load_16_glue : PatFrag<(ops node:$ptr), 369 (AMDGPUatomic_ld_glue node:$ptr)> { 370 let IsAtomic = 1; 371 let MemoryVT = i16; 372} 373 374def atomic_load_32_glue : PatFrag<(ops node:$ptr), 375 (AMDGPUatomic_ld_glue node:$ptr)> { 376 let IsAtomic = 1; 377 let MemoryVT = i32; 378} 379 380def atomic_load_64_glue : PatFrag<(ops node:$ptr), 381 (AMDGPUatomic_ld_glue node:$ptr)> { 382 let IsAtomic = 1; 383 let MemoryVT = i64; 384} 385 386def extload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { 387 let IsLoad = 1; 388 let IsAnyExtLoad = 1; 389} 390 391def sextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { 392 let IsLoad = 1; 393 let IsSignExtLoad = 1; 394} 395 396def zextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { 397 let IsLoad = 1; 398 let IsZeroExtLoad = 1; 399} 400 401def extloadi8_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> { 402 let IsLoad = 1; 403 let MemoryVT = i8; 404} 405 406def zextloadi8_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> { 407 let IsLoad = 1; 408 let MemoryVT = i8; 409} 410 411def extloadi16_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> { 412 let IsLoad = 1; 413 let MemoryVT = i16; 414} 415 416def zextloadi16_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> { 417 let IsLoad = 1; 418 let MemoryVT = i16; 419} 420 421def sextloadi8_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> { 422 let IsLoad = 1; 423 let MemoryVT = i8; 424} 425 426def sextloadi16_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> { 427 let IsLoad = 1; 428 let MemoryVT = i16; 429} 430 431 432let IsLoad = 1, AddressSpaces = LoadAddress_local.AddrSpaces in { 433def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> { 434 let IsNonExtLoad = 1; 435} 436 437def extloadi8_local_m0 : PatFrag<(ops node:$ptr), (extloadi8_glue node:$ptr)>; 438def sextloadi8_local_m0 : PatFrag<(ops node:$ptr), (sextloadi8_glue node:$ptr)>; 439def zextloadi8_local_m0 : PatFrag<(ops node:$ptr), (zextloadi8_glue node:$ptr)>; 440 441def extloadi16_local_m0 : PatFrag<(ops node:$ptr), (extloadi16_glue node:$ptr)>; 442def sextloadi16_local_m0 : PatFrag<(ops node:$ptr), (sextloadi16_glue node:$ptr)>; 443def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr)>; 444} // End IsLoad = 1, , AddressSpaces = LoadAddress_local.AddrSpaces 445 446def load_align8_local_m0 : PatFrag<(ops node:$ptr), 447 (load_local_m0 node:$ptr)> { 448 let IsLoad = 1; 449 int MinAlignment = 8; 450} 451 452def load_align16_local_m0 : PatFrag<(ops node:$ptr), 453 (load_local_m0 node:$ptr)> { 454 let IsLoad = 1; 455 int MinAlignment = 16; 456} 457 458let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in { 459def atomic_load_8_local_m0 : PatFrag<(ops node:$ptr), 460 (atomic_load_8_glue node:$ptr)>; 461def atomic_load_16_local_m0 : PatFrag<(ops node:$ptr), 462 (atomic_load_16_glue node:$ptr)>; 463def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr), 464 (atomic_load_32_glue node:$ptr)>; 465def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr), 466 (atomic_load_64_glue node:$ptr)>; 467} // End let AddressSpaces = LoadAddress_local.AddrSpaces 468 469 470def AMDGPUst_glue : SDNode <"ISD::STORE", SDTStore, 471 [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue] 472>; 473 474def AMDGPUatomic_st_glue : SDNode <"ISD::ATOMIC_STORE", SDTAtomicStore, 475 [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue] 476>; 477 478def unindexedstore_glue : PatFrag<(ops node:$val, node:$ptr), 479 (AMDGPUst_glue node:$val, node:$ptr)> { 480 let IsStore = 1; 481 let IsUnindexed = 1; 482} 483 484def store_glue : PatFrag<(ops node:$val, node:$ptr), 485 (unindexedstore_glue node:$val, node:$ptr)> { 486 let IsStore = 1; 487 let IsTruncStore = 0; 488} 489 490def truncstore_glue : PatFrag<(ops node:$val, node:$ptr), 491 (unindexedstore_glue node:$val, node:$ptr)> { 492 let IsStore = 1; 493 let IsTruncStore = 1; 494} 495 496def truncstorei8_glue : PatFrag<(ops node:$val, node:$ptr), 497 (truncstore_glue node:$val, node:$ptr)> { 498 let IsStore = 1; 499 let MemoryVT = i8; 500 let IsTruncStore = 1; 501} 502 503def truncstorei16_glue : PatFrag<(ops node:$val, node:$ptr), 504 (truncstore_glue node:$val, node:$ptr)> { 505 let IsStore = 1; 506 let MemoryVT = i16; 507 let IsTruncStore = 1; 508} 509 510let IsStore = 1, AddressSpaces = StoreAddress_local.AddrSpaces in { 511def store_local_m0 : PatFrag<(ops node:$val, node:$ptr), 512 (store_glue node:$val, node:$ptr)>; 513def truncstorei8_local_m0 : PatFrag<(ops node:$val, node:$ptr), 514 (truncstorei8_glue node:$val, node:$ptr)>; 515def truncstorei16_local_m0 : PatFrag<(ops node:$val, node:$ptr), 516 (truncstorei16_glue node:$val, node:$ptr)>; 517} 518 519def store_align8_local_m0 : PatFrag <(ops node:$value, node:$ptr), 520 (store_local_m0 node:$value, node:$ptr)>, 521 Aligned<8> { 522 let IsStore = 1; 523} 524 525def store_align16_local_m0 : PatFrag <(ops node:$value, node:$ptr), 526 (store_local_m0 node:$value, node:$ptr)>, 527 Aligned<16> { 528 let IsStore = 1; 529} 530 531let PredicateCode = [{return cast<MemSDNode>(N)->getAlign() < 4;}], 532 GISelPredicateCode = [{return (*MI.memoperands_begin())->getAlign() < 4;}], 533 AddressSpaces = [ AddrSpaces.Local ] in { 534def load_align_less_than_4_local : PatFrag<(ops node:$ptr), 535 (load_local node:$ptr)> { 536 let IsLoad = 1; 537 let IsNonExtLoad = 1; 538} 539 540def load_align_less_than_4_local_m0 : PatFrag<(ops node:$ptr), 541 (load_local_m0 node:$ptr)> { 542 let IsLoad = 1; 543 let IsNonExtLoad = 1; 544} 545 546def store_align_less_than_4_local : PatFrag <(ops node:$value, node:$ptr), 547 (store_local node:$value, node:$ptr)> { 548 let IsStore = 1; 549 let IsTruncStore = 0; 550} 551 552def store_align_less_than_4_local_m0 : PatFrag <(ops node:$value, node:$ptr), 553 (store_local_m0 node:$value, node:$ptr)> { 554 let IsStore = 1; 555 let IsTruncStore = 0; 556} 557} 558 559def atomic_store_8_glue : PatFrag < 560 (ops node:$ptr, node:$value), 561 (AMDGPUatomic_st_glue node:$ptr, node:$value)> { 562 let IsAtomic = 1; 563 let MemoryVT = i8; 564} 565 566def atomic_store_16_glue : PatFrag < 567 (ops node:$ptr, node:$value), 568 (AMDGPUatomic_st_glue node:$ptr, node:$value)> { 569 let IsAtomic = 1; 570 let MemoryVT = i16; 571} 572 573def atomic_store_32_glue : PatFrag < 574 (ops node:$ptr, node:$value), 575 (AMDGPUatomic_st_glue node:$ptr, node:$value)> { 576 let IsAtomic = 1; 577 let MemoryVT = i32; 578} 579 580def atomic_store_64_glue : PatFrag < 581 (ops node:$ptr, node:$value), 582 (AMDGPUatomic_st_glue node:$ptr, node:$value)> { 583 let IsAtomic = 1; 584 let MemoryVT = i64; 585} 586 587let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces in { 588def atomic_store_8_local_m0 : PatFrag<(ops node:$val, node:$ptr), 589 (atomic_store_8_glue node:$val, node:$ptr)>; 590def atomic_store_16_local_m0 : PatFrag<(ops node:$val, node:$ptr), 591 (atomic_store_16_glue node:$val, node:$ptr)>; 592def atomic_store_32_local_m0 : PatFrag<(ops node:$val, node:$ptr), 593 (atomic_store_32_glue node:$val, node:$ptr)>; 594def atomic_store_64_local_m0 : PatFrag<(ops node:$val, node:$ptr), 595 (atomic_store_64_glue node:$val, node:$ptr)>; 596} // End let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces 597 598 599//===----------------------------------------------------------------------===// 600// SDNodes PatFrags for a16 loads and stores with 3 components. 601// v3f16/v3i16 is widened to v4f16/v4i16, so we need to match on the memory 602// load/store size. 603//===----------------------------------------------------------------------===// 604 605class mubuf_intrinsic_load<SDPatternOperator name, ValueType vt> : PatFrag < 606 (ops node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 607 node:$auxiliary, node:$idxen), 608 (name node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 609 node:$auxiliary, node:$idxen)> { 610 let IsLoad = 1; 611 let MemoryVT = vt; 612} 613 614class mubuf_intrinsic_store<SDPatternOperator name, ValueType vt> : PatFrag < 615 (ops node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 616 node:$auxiliary, node:$idxen), 617 (name node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 618 node:$auxiliary, node:$idxen)> { 619 let IsStore = 1; 620 let MemoryVT = vt; 621} 622 623class mtbuf_intrinsic_load<SDPatternOperator name, ValueType vt> : PatFrag < 624 (ops node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 625 node:$format, node:$auxiliary, node:$idxen), 626 (name node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 627 node:$format, node:$auxiliary, node:$idxen)> { 628 let IsLoad = 1; 629 let MemoryVT = vt; 630} 631 632class mtbuf_intrinsic_store<SDPatternOperator name, ValueType vt> : PatFrag < 633 (ops node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 634 node:$format, node:$auxiliary, node:$idxen), 635 (name node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 636 node:$format, node:$auxiliary, node:$idxen)> { 637 let IsStore = 1; 638 let MemoryVT = vt; 639} 640 641//===----------------------------------------------------------------------===// 642// SDNodes PatFrags for d16 loads 643//===----------------------------------------------------------------------===// 644 645class LoadD16Frag <SDPatternOperator op> : PatFrag< 646 (ops node:$ptr, node:$tied_in), 647 (op node:$ptr, node:$tied_in)> { 648 let IsLoad = 1; 649} 650 651foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { 652let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in { 653 654def load_d16_hi_#as : LoadD16Frag <SIload_d16_hi>; 655 656def az_extloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_u8> { 657 let MemoryVT = i8; 658} 659 660def sextloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_i8> { 661 let MemoryVT = i8; 662} 663 664def load_d16_lo_#as : LoadD16Frag <SIload_d16_lo>; 665 666def az_extloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_u8> { 667 let MemoryVT = i8; 668} 669 670def sextloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_i8> { 671 let MemoryVT = i8; 672} 673 674} // End let AddressSpaces = ... 675} // End foreach AddrSpace 676 677def lshr_rev : PatFrag < 678 (ops node:$src1, node:$src0), 679 (srl $src0, $src1) 680>; 681 682def ashr_rev : PatFrag < 683 (ops node:$src1, node:$src0), 684 (sra $src0, $src1) 685>; 686 687def lshl_rev : PatFrag < 688 (ops node:$src1, node:$src0), 689 (shl $src0, $src1) 690>; 691 692def add_ctpop : PatFrag < 693 (ops node:$src0, node:$src1), 694 (add (ctpop $src0), $src1) 695>; 696 697def xnor : PatFrag < 698 (ops node:$src0, node:$src1), 699 (not (xor $src0, $src1)) 700>; 701 702foreach I = 1-4 in { 703def shl#I#_add : PatFrag < 704 (ops node:$src0, node:$src1), 705 (add (shl_oneuse $src0, (i32 I)), $src1)> { 706 // FIXME: Poor substitute for disabling pattern in SelectionDAG 707 let PredicateCode = [{return false;}]; 708 let GISelPredicateCode = [{return true;}]; 709} 710} 711 712multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0, 713 SDTypeProfile tc = SDTAtomic2, 714 bit IsInt = 1> { 715 716 def _glue : SDNode < 717 !if(is_amdgpu, "AMDGPUISD", "ISD")#"::ATOMIC_"#op_name, tc, 718 [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 719 >; 720 721 let AddressSpaces = StoreAddress_local.AddrSpaces in { 722 defm _local_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>; 723 defm _local_m0 : noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue"), 724 IsInt>; 725 } 726 727 let AddressSpaces = StoreAddress_region.AddrSpaces in { 728 defm _region_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>; 729 defm _region_m0 : noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue"), 730 IsInt>; 731 } 732} 733 734defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">; 735defm atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">; 736defm atomic_load_uinc_wrap : SIAtomicM0Glue2 <"LOAD_UINC_WRAP">; 737defm atomic_load_udec_wrap : SIAtomicM0Glue2 <"LOAD_UDEC_WRAP">; 738defm atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">; 739defm atomic_load_min : SIAtomicM0Glue2 <"LOAD_MIN">; 740defm atomic_load_max : SIAtomicM0Glue2 <"LOAD_MAX">; 741defm atomic_load_or : SIAtomicM0Glue2 <"LOAD_OR">; 742defm atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">; 743defm atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">; 744defm atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">; 745defm atomic_swap : SIAtomicM0Glue2 <"SWAP">; 746defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32, 0>; 747defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32, 0>; 748defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32, 0>; 749 750def as_i1timm : SDNodeXForm<timm, [{ 751 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1); 752}]>; 753 754def as_i8imm : SDNodeXForm<imm, [{ 755 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i8); 756}]>; 757 758def as_i8timm : SDNodeXForm<timm, [{ 759 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); 760}]>; 761 762def as_i16imm : SDNodeXForm<imm, [{ 763 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); 764}]>; 765 766def as_i16timm : SDNodeXForm<timm, [{ 767 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); 768}]>; 769 770def as_i32imm: SDNodeXForm<imm, [{ 771 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); 772}]>; 773 774def as_i32timm: SDNodeXForm<timm, [{ 775 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); 776}]>; 777 778def as_i64imm: SDNodeXForm<imm, [{ 779 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64); 780}]>; 781 782def cond_as_i32imm: SDNodeXForm<cond, [{ 783 return CurDAG->getTargetConstant(N->get(), SDLoc(N), MVT::i32); 784}]>; 785 786// Copied from the AArch64 backend: 787def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{ 788return CurDAG->getTargetConstant( 789 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32); 790}]>; 791 792def frameindex_to_targetframeindex : SDNodeXForm<frameindex, [{ 793 auto FI = cast<FrameIndexSDNode>(N); 794 return CurDAG->getTargetFrameIndex(FI->getIndex(), MVT::i32); 795}]>; 796 797// Copied from the AArch64 backend: 798def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{ 799return CurDAG->getTargetConstant( 800 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64); 801}]>; 802 803class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{ 804 uint64_t Imm = N->getZExtValue(); 805 unsigned Bit = (Imm >> }] # bitnum # [{ ) & 1; 806 return CurDAG->getTargetConstant(Bit, SDLoc(N), MVT::i1); 807}]>; 808 809def SIMM16bit : ImmLeaf <i32, 810 [{return isInt<16>(Imm);}] 811>; 812 813def UIMM16bit : ImmLeaf <i32, 814 [{return isUInt<16>(Imm);}] 815>; 816 817def i64imm_32bit : ImmLeaf<i64, [{ 818 return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm); 819}]>; 820 821def InlineImm16 : ImmLeaf<i16, [{ 822 return isInlineImmediate16(Imm); 823}]>; 824 825def InlineImm32 : ImmLeaf<i32, [{ 826 return isInlineImmediate32(Imm); 827}]>; 828 829def InlineImm64 : ImmLeaf<i64, [{ 830 return isInlineImmediate64(Imm); 831}]>; 832 833def InlineImmFP32 : FPImmLeaf<f32, [{ 834 return isInlineImmediate(Imm); 835}]>; 836 837def InlineImmFP64 : FPImmLeaf<f64, [{ 838 return isInlineImmediate(Imm); 839}]>; 840 841 842class VGPRImm <dag frag> : PatLeaf<frag, [{ 843 return isVGPRImm(N); 844}]>; 845 846def NegateImm : SDNodeXForm<imm, [{ 847 return CurDAG->getConstant(-N->getSExtValue(), SDLoc(N), MVT::i32); 848}]>; 849 850// TODO: When FP inline imm values work? 851def NegSubInlineConst32 : ImmLeaf<i32, [{ 852 return Imm < -16 && Imm >= -64; 853}], NegateImm>; 854 855def NegSubInlineIntConst16 : ImmLeaf<i16, [{ 856 return Imm < -16 && Imm >= -64; 857}], NegateImm>; 858 859def ShiftAmt32Imm : ImmLeaf <i32, [{ 860 return Imm < 32; 861}]>; 862 863def fp16_zeros_high_16bits : PatLeaf<(f16 VGPR_32:$src), [{ 864 return fp16SrcZerosHighBits(N->getOpcode()); 865}]>; 866 867 868//===----------------------------------------------------------------------===// 869// MUBUF/SMEM Patterns 870//===----------------------------------------------------------------------===// 871 872def extract_cpol : SDNodeXForm<timm, [{ 873 return CurDAG->getTargetConstant( 874 N->getZExtValue() & (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12 875 ? AMDGPU::CPol::ALL 876 : AMDGPU::CPol::ALL_pregfx12), 877 SDLoc(N), MVT::i8); 878}]>; 879 880def extract_swz : SDNodeXForm<timm, [{ 881 const bool Swizzle = 882 N->getZExtValue() & (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12 883 ? AMDGPU::CPol::SWZ 884 : AMDGPU::CPol::SWZ_pregfx12); 885 return CurDAG->getTargetConstant(Swizzle, SDLoc(N), MVT::i8); 886}]>; 887 888def set_glc : SDNodeXForm<timm, [{ 889 return CurDAG->getTargetConstant(N->getZExtValue() | AMDGPU::CPol::GLC, SDLoc(N), MVT::i8); 890}]>; 891 892//===----------------------------------------------------------------------===// 893// Custom Operands 894//===----------------------------------------------------------------------===// 895 896def SOPPBrTarget : CustomOperand<OtherVT> { 897 let PrintMethod = "printOperand"; 898 let EncoderMethod = "getSOPPBrEncoding"; 899 let DecoderMethod = "decodeSOPPBrTarget"; 900 let OperandType = "OPERAND_PCREL"; 901} 902 903def si_ga : Operand<iPTR>; 904 905def InterpSlot : CustomOperand<i32>; 906 907// It appears to be necessary to create a separate operand for this to 908// be able to parse attr<num> with no space. 909def InterpAttr : CustomOperand<i32>; 910 911def InterpAttrChan : ImmOperand<i32>; 912 913def SplitBarrier : ImmOperand<i32> { 914 let OperandNamespace = "AMDGPU"; 915 let OperandType = "OPERAND_INLINE_SPLIT_BARRIER_INT32"; 916 let DecoderMethod = "decodeSplitBarrier"; 917 let PrintMethod = "printOperand"; 918} 919 920def VReg32OrOffClass : AsmOperandClass { 921 let Name = "VReg32OrOff"; 922 let ParserMethod = "parseVReg32OrOff"; 923} 924 925def SendMsg : CustomOperand<i32>; 926 927def Swizzle : CustomOperand<i16, 1>; 928 929def Endpgm : CustomOperand<i16, 1>; 930 931def SWaitCnt : CustomOperand<i32>; 932 933def DepCtr : CustomOperand<i32>; 934 935def SDelayALU : CustomOperand<i32>; 936 937include "SIInstrFormats.td" 938include "VIInstrFormats.td" 939 940def BoolReg : AsmOperandClass { 941 let Name = "BoolReg"; 942 let ParserMethod = "parseBoolReg"; 943 let RenderMethod = "addRegOperands"; 944} 945 946class BoolRC : RegisterOperand<SReg_1> { 947 let ParserMatchClass = BoolReg; 948 let DecoderMethod = "decodeBoolReg"; 949} 950 951def SSrc_i1 : RegisterOperand<SReg_1_XEXEC> { 952 let ParserMatchClass = BoolReg; 953 let DecoderMethod = "decodeBoolReg"; 954} 955 956def VOPDstS64orS32 : BoolRC { 957 let PrintMethod = "printVOPDst"; 958} 959 960// SCSrc_i1 is the operand for pseudo instructions only. 961// Boolean immediates shall not be exposed to codegen instructions. 962def SCSrc_i1 : RegisterOperand<SReg_1_XEXEC> { 963 let OperandNamespace = "AMDGPU"; 964 let OperandType = "OPERAND_REG_IMM_INT32"; 965 let ParserMatchClass = BoolReg; 966 let DecoderMethod = "decodeBoolReg"; 967} 968 969// ===----------------------------------------------------------------------===// 970// ExpSrc* Special cases for exp src operands which are printed as 971// "off" depending on en operand. 972// ===----------------------------------------------------------------------===// 973 974def ExpSrc0 : RegisterOperand<VGPR_32> { 975 let PrintMethod = "printExpSrc0"; 976 let ParserMatchClass = VReg32OrOffClass; 977} 978 979def ExpSrc1 : RegisterOperand<VGPR_32> { 980 let PrintMethod = "printExpSrc1"; 981 let ParserMatchClass = VReg32OrOffClass; 982} 983 984def ExpSrc2 : RegisterOperand<VGPR_32> { 985 let PrintMethod = "printExpSrc2"; 986 let ParserMatchClass = VReg32OrOffClass; 987} 988 989def ExpSrc3 : RegisterOperand<VGPR_32> { 990 let PrintMethod = "printExpSrc3"; 991 let ParserMatchClass = VReg32OrOffClass; 992} 993 994class SDWASrc<ValueType vt> : RegisterOperand<VS_32> { 995 let OperandNamespace = "AMDGPU"; 996 string Type = !if(isFloatType<vt>.ret, "FP", "INT"); 997 let OperandType = "OPERAND_REG_INLINE_C_"#Type#vt.Size; 998 let DecoderMethod = "decodeSDWASrc"#vt.Size; 999 let EncoderMethod = "getSDWASrcEncoding"; 1000} 1001 1002def SDWASrc_i32 : SDWASrc<i32>; 1003def SDWASrc_i16 : SDWASrc<i16>; 1004def SDWASrc_f32 : SDWASrc<f32>; 1005def SDWASrc_f16 : SDWASrc<f16>; 1006 1007def SDWAVopcDst : BoolRC { 1008 let OperandNamespace = "AMDGPU"; 1009 let OperandType = "OPERAND_SDWA_VOPC_DST"; 1010 let EncoderMethod = "getSDWAVopcDstEncoding"; 1011 let DecoderMethod = "decodeSDWAVopcDst"; 1012 let PrintMethod = "printVOPDst"; 1013} 1014 1015class NamedIntOperand<ValueType Type, string Prefix, string Name = NAME, 1016 string ConvertMethod = "nullptr"> 1017 : CustomOperand<Type, 1, Name> { 1018 let ParserMethod = 1019 "[this](OperandVector &Operands) -> ParseStatus { "# 1020 "return parseIntWithPrefix(\""#Prefix#"\", Operands, "# 1021 "AMDGPUOperand::"#ImmTy#", "#ConvertMethod#"); }"; 1022} 1023 1024class NamedBitOperand<string Id, string Name = NAME> 1025 : CustomOperand<i1, 1, Name> { 1026 let PredicateMethod = "isImmTy<AMDGPUOperand::"#ImmTy#">"; 1027 let ParserMethod = 1028 "[this](OperandVector &Operands) -> ParseStatus { "# 1029 "return parseNamedBit(\""#Id#"\", Operands, AMDGPUOperand::"#ImmTy#"); }"; 1030 let PrintMethod = "[this](const MCInst *MI, unsigned OpNo, "# 1031 "const MCSubtargetInfo &STI, raw_ostream &O) { "# 1032 "printNamedBit(MI, OpNo, O, \""#Id#"\"); }"; 1033} 1034 1035class DefaultOperand<CustomOperand Op, int Value> 1036 : OperandWithDefaultOps<Op.Type, (ops (Op.Type Value))>, 1037 CustomOperandProps<1> { 1038 let ParserMatchClass = Op.ParserMatchClass; 1039 let PrintMethod = Op.PrintMethod; 1040} 1041 1042class SDWAOperand<string Id, string Name = NAME> 1043 : CustomOperand<i32, 1, Name> { 1044 let ParserMethod = 1045 "[this](OperandVector &Operands) -> ParseStatus { "# 1046 "return parseSDWASel(Operands, \""#Id#"\", AMDGPUOperand::"#ImmTy#"); }"; 1047} 1048 1049class ArrayOperand0<string Id, string Name = NAME> 1050 : OperandWithDefaultOps<i32, (ops (i32 0))>, 1051 CustomOperandProps<1, Name> { 1052 let ParserMethod = 1053 "[this](OperandVector &Operands) -> ParseStatus { "# 1054 "return parseOperandArrayWithPrefix(\""#Id#"\", Operands, "# 1055 "AMDGPUOperand::"#ImmTy#"); }"; 1056} 1057 1058let ImmTy = "ImmTyOffset" in 1059def flat_offset : CustomOperand<i32, 1, "FlatOffset">; 1060def offset : NamedIntOperand<i32, "offset", "Offset">; 1061def offset0 : NamedIntOperand<i8, "offset0", "Offset0">; 1062def offset1 : NamedIntOperand<i8, "offset1", "Offset1">; 1063 1064def gds : NamedBitOperand<"gds", "GDS">; 1065 1066def omod : CustomOperand<i32, 1, "OModSI">; 1067def omod0 : DefaultOperand<omod, 0>; 1068 1069// We need to make the cases with a default of 0 distinct from no 1070// default to help deal with some cases where the operand appears 1071// before a mandatory operand. 1072def clampmod : NamedBitOperand<"clamp", "ClampSI">; 1073def clampmod0 : DefaultOperand<clampmod, 0>; 1074def highmod : NamedBitOperand<"high", "High">; 1075 1076def CPol : CustomOperand<i32, 1>; 1077def CPol_0 : DefaultOperand<CPol, 0>; 1078def CPol_GLC1 : DefaultOperand<CPol, 1>; 1079def CPol_GLC : ValuePredicatedOperand<CPol, "Op.getImm() & CPol::GLC">; 1080def CPol_NonGLC : ValuePredicatedOperand<CPol, "!(Op.getImm() & CPol::GLC)", 1>; 1081def CPol_GLC_WithDefault : DefaultOperand<CPol_GLC, !shl(1, CPolBit.GLC)>; 1082def CPol_NonGLC_WithDefault : DefaultOperand<CPol_NonGLC, 0>; 1083 1084def TFE : NamedBitOperand<"tfe">; 1085def UNorm : NamedBitOperand<"unorm">; 1086def DA : NamedBitOperand<"da">; 1087def R128A16 : CustomOperand<i1, 1>; 1088def A16 : NamedBitOperand<"a16">; 1089def D16 : NamedBitOperand<"d16">; 1090def LWE : NamedBitOperand<"lwe">; 1091def exp_compr : NamedBitOperand<"compr", "ExpCompr">; 1092def exp_vm : NamedBitOperand<"vm", "ExpVM">; 1093 1094def FORMAT : CustomOperand<i8>; 1095 1096def DMask : NamedIntOperand<i16, "dmask">; 1097def Dim : CustomOperand<i8>; 1098 1099def dst_sel : SDWAOperand<"dst_sel", "SDWADstSel">; 1100def src0_sel : SDWAOperand<"src0_sel", "SDWASrc0Sel">; 1101def src1_sel : SDWAOperand<"src1_sel", "SDWASrc1Sel">; 1102def dst_unused : CustomOperand<i32, 1, "SDWADstUnused">; 1103 1104def op_sel0 : ArrayOperand0<"op_sel", "OpSel">; 1105def op_sel_hi0 : ArrayOperand0<"op_sel_hi", "OpSelHi">; 1106def neg_lo0 : ArrayOperand0<"neg_lo", "NegLo">; 1107def neg_hi0 : ArrayOperand0<"neg_hi", "NegHi">; 1108 1109def dpp8 : CustomOperand<i32, 0, "DPP8">; 1110def dpp_ctrl : CustomOperand<i32, 0, "DPPCtrl">; 1111 1112let DefaultValue = "0xf" in { 1113def row_mask : NamedIntOperand<i32, "row_mask", "DppRowMask">; 1114def bank_mask : NamedIntOperand<i32, "bank_mask", "DppBankMask">; 1115} 1116def bound_ctrl : NamedIntOperand<i1, "bound_ctrl", "DppBoundCtrl", 1117 "[this] (int64_t &BC) -> bool { return convertDppBoundCtrl(BC); }">; 1118def FI : NamedIntOperand<i32, "fi", "DppFI">; 1119 1120def blgp : CustomOperand<i32, 1, "BLGP">; 1121def cbsz : NamedIntOperand<i32, "cbsz", "CBSZ">; 1122def abid : NamedIntOperand<i32, "abid", "ABID">; 1123 1124def hwreg : CustomOperand<i32, 0, "Hwreg">; 1125 1126def exp_tgt : CustomOperand<i32, 0, "ExpTgt">; 1127 1128def wait_vdst : NamedIntOperand<i8, "wait_vdst", "WaitVDST">; 1129def wait_exp : NamedIntOperand<i8, "wait_exp", "WaitEXP">; 1130def wait_va_vdst : NamedIntOperand<i8, "wait_va_vdst", "WaitVAVDst">; 1131def wait_va_vsrc : NamedIntOperand<i8, "wait_vm_vsrc", "WaitVMVSrc">; 1132 1133class KImmFPOperand<ValueType vt> : ImmOperand<vt> { 1134 let OperandNamespace = "AMDGPU"; 1135 let OperandType = "OPERAND_KIMM"#vt.Size; 1136 let PrintMethod = "printU"#vt.Size#"ImmOperand"; 1137 let DecoderMethod = "decodeOperand_KImmFP"; 1138} 1139 1140// 32-bit VALU immediate operand that uses the constant bus. 1141def KImmFP32 : KImmFPOperand<i32>; 1142 1143// 32-bit VALU immediate operand with a 16-bit value that uses the 1144// constant bus. 1145def KImmFP16 : KImmFPOperand<i16>; 1146 1147class FPInputModsMatchClass <int opSize> : AsmOperandClass { 1148 let Name = "RegOrImmWithFP"#opSize#"InputMods"; 1149 let ParserMethod = "parseRegOrImmWithFPInputMods"; 1150 let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods"; 1151} 1152 1153class FPVCSrcInputModsMatchClass <int opSize> : FPInputModsMatchClass <opSize> { 1154 let Name = "RegOrInlineImmWithFP"#opSize#"InputMods"; 1155 let PredicateMethod = "isRegOrInlineImmWithFP"#opSize#"InputMods"; 1156} 1157 1158def FP16InputModsMatchClass : FPInputModsMatchClass<16>; 1159def FPT16InputModsMatchClass : FPInputModsMatchClass<16> { 1160 let Name = "RegOrImmWithFPT16InputMods"; 1161 let PredicateMethod = "isRegOrImmWithFPT16InputMods"; 1162} 1163def FP32InputModsMatchClass : FPInputModsMatchClass<32>; 1164def FP64InputModsMatchClass : FPInputModsMatchClass<64>; 1165 1166def FP16VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<16>; 1167def FP32VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<32>; 1168 1169class InputMods <AsmOperandClass matchClass> : Operand <i32> { 1170 let OperandNamespace = "AMDGPU"; 1171 let OperandType = "OPERAND_INPUT_MODS"; 1172 let ParserMatchClass = matchClass; 1173} 1174 1175class FPInputMods <FPInputModsMatchClass matchClass> : InputMods <matchClass> { 1176 let PrintMethod = "printOperandAndFPInputMods"; 1177} 1178 1179def FP16InputMods : FPInputMods<FP16InputModsMatchClass>; 1180def FPT16InputMods : FPInputMods<FPT16InputModsMatchClass>; 1181def FP32InputMods : FPInputMods<FP32InputModsMatchClass>; 1182def FP64InputMods : FPInputMods<FP64InputModsMatchClass>; 1183 1184def FP16VCSrcInputMods : FPInputMods<FP16VCSrcInputModsMatchClass>; 1185def FP32VCSrcInputMods : FPInputMods<FP32VCSrcInputModsMatchClass>; 1186 1187class IntInputModsMatchClass <int opSize> : AsmOperandClass { 1188 let Name = "RegOrImmWithInt"#opSize#"InputMods"; 1189 let ParserMethod = "parseRegOrImmWithIntInputMods"; 1190 let PredicateMethod = "isRegOrImmWithInt"#opSize#"InputMods"; 1191} 1192class IntVCSrcInputModsMatchClass <int opSize> : IntInputModsMatchClass <opSize> { 1193 let Name = "RegOrInlineImmWithInt"#opSize#"InputMods"; 1194 let PredicateMethod = "isRegOrInlineImmWithInt"#opSize#"InputMods"; 1195} 1196def IntT16InputModsMatchClass : IntInputModsMatchClass<16> { 1197 let Name = "RegOrImmWithIntT16InputMods"; 1198 let PredicateMethod = "isRegOrImmWithIntT16InputMods"; 1199} 1200def Int32InputModsMatchClass : IntInputModsMatchClass<32>; 1201def Int64InputModsMatchClass : IntInputModsMatchClass<64>; 1202def Int32VCSrcInputModsMatchClass : IntVCSrcInputModsMatchClass<32>; 1203 1204class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass> { 1205 let PrintMethod = "printOperandAndIntInputMods"; 1206} 1207def IntT16InputMods : IntInputMods<IntT16InputModsMatchClass>; 1208def Int32InputMods : IntInputMods<Int32InputModsMatchClass>; 1209def Int64InputMods : IntInputMods<Int64InputModsMatchClass>; 1210def Int32VCSrcInputMods : IntInputMods<Int32VCSrcInputModsMatchClass>; 1211 1212class OpSelModsMatchClass : AsmOperandClass { 1213 let Name = "OpSelMods"; 1214 let ParserMethod = "parseRegOrImm"; 1215 let PredicateMethod = "isRegOrImm"; 1216} 1217 1218def IntOpSelModsMatchClass : OpSelModsMatchClass; 1219def IntOpSelMods : InputMods<IntOpSelModsMatchClass>; 1220 1221class FPSDWAInputModsMatchClass <int opSize> : AsmOperandClass { 1222 let Name = "SDWAWithFP"#opSize#"InputMods"; 1223 let ParserMethod = "parseRegOrImmWithFPInputMods"; 1224 let PredicateMethod = "isSDWAFP"#opSize#"Operand"; 1225} 1226 1227def FP16SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<16>; 1228def FP32SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<32>; 1229 1230class FPSDWAInputMods <FPSDWAInputModsMatchClass matchClass> : 1231 InputMods <matchClass> { 1232 let PrintMethod = "printOperandAndFPInputMods"; 1233} 1234 1235def FP16SDWAInputMods : FPSDWAInputMods<FP16SDWAInputModsMatchClass>; 1236def FP32SDWAInputMods : FPSDWAInputMods<FP32SDWAInputModsMatchClass>; 1237 1238def FPVRegInputModsMatchClass : AsmOperandClass { 1239 let Name = "VRegWithFPInputMods"; 1240 let ParserMethod = "parseRegWithFPInputMods"; 1241 let PredicateMethod = "isVRegWithInputMods"; 1242} 1243 1244def FPT16VRegInputModsMatchClass : AsmOperandClass { 1245 let Name = "T16VRegWithFPInputMods"; 1246 let ParserMethod = "parseRegWithFPInputMods"; 1247 let PredicateMethod = "isT16VRegWithInputMods"; 1248} 1249 1250def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> { 1251 let PrintMethod = "printOperandAndFPInputMods"; 1252} 1253 1254def FPT16VRegInputMods : InputMods <FPT16VRegInputModsMatchClass> { 1255 let PrintMethod = "printOperandAndFPInputMods"; 1256} 1257 1258class IntSDWAInputModsMatchClass <int opSize> : AsmOperandClass { 1259 let Name = "SDWAWithInt"#opSize#"InputMods"; 1260 let ParserMethod = "parseRegOrImmWithIntInputMods"; 1261 let PredicateMethod = "isSDWAInt"#opSize#"Operand"; 1262} 1263 1264def Int16SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<16>; 1265def Int32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32>; 1266def Bin32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32> { 1267 let Name = "SDWAWithBin32InputMods"; 1268 let ParserMethod = "parseRegOrImm"; 1269} 1270 1271class IntSDWAInputMods <IntSDWAInputModsMatchClass matchClass> : 1272 InputMods <matchClass> { 1273 let PrintMethod = "printOperandAndIntInputMods"; 1274} 1275 1276def Int16SDWAInputMods : IntSDWAInputMods<Int16SDWAInputModsMatchClass>; 1277def Int32SDWAInputMods : IntSDWAInputMods<Int32SDWAInputModsMatchClass>; 1278def Bin32SDWAInputMods : IntSDWAInputMods<Bin32SDWAInputModsMatchClass>; 1279 1280def IntVRegInputModsMatchClass : AsmOperandClass { 1281 let Name = "VRegWithIntInputMods"; 1282 let ParserMethod = "parseRegWithIntInputMods"; 1283 let PredicateMethod = "isVRegWithInputMods"; 1284} 1285 1286def IntT16VRegInputModsMatchClass : AsmOperandClass { 1287 let Name = "T16VRegWithIntInputMods"; 1288 let ParserMethod = "parseRegWithIntInputMods"; 1289 let PredicateMethod = "isT16VRegWithInputMods"; 1290} 1291 1292def IntT16VRegInputMods : InputMods <IntT16VRegInputModsMatchClass> { 1293 let PrintMethod = "printOperandAndIntInputMods"; 1294} 1295 1296def IntVRegInputMods : InputMods <IntVRegInputModsMatchClass> { 1297 let PrintMethod = "printOperandAndIntInputMods"; 1298} 1299 1300class PackedFPInputModsMatchClass <int opSize> : AsmOperandClass { 1301 let Name = "PackedFP"#opSize#"InputMods"; 1302 let ParserMethod = "parseRegOrImm"; 1303 let PredicateMethod = "isRegOrImm"; 1304// let PredicateMethod = "isPackedFP"#opSize#"InputMods"; 1305} 1306 1307class PackedIntInputModsMatchClass <int opSize> : AsmOperandClass { 1308 let Name = "PackedInt"#opSize#"InputMods"; 1309 let ParserMethod = "parseRegOrImm"; 1310 let PredicateMethod = "isRegOrImm"; 1311// let PredicateMethod = "isPackedInt"#opSize#"InputMods"; 1312} 1313 1314def PackedF16InputModsMatchClass : PackedFPInputModsMatchClass<16>; 1315def PackedI16InputModsMatchClass : PackedIntInputModsMatchClass<16>; 1316 1317class PackedFPInputMods <PackedFPInputModsMatchClass matchClass> : InputMods <matchClass> { 1318// let PrintMethod = "printPackedFPInputMods"; 1319} 1320 1321class PackedIntInputMods <PackedIntInputModsMatchClass matchClass> : InputMods <matchClass> { 1322 //let PrintMethod = "printPackedIntInputMods"; 1323} 1324 1325def PackedF16InputMods : PackedFPInputMods<PackedF16InputModsMatchClass>; 1326def PackedI16InputMods : PackedIntInputMods<PackedI16InputModsMatchClass>; 1327 1328//===----------------------------------------------------------------------===// 1329// Complex patterns 1330//===----------------------------------------------------------------------===// 1331 1332def DS1Addr1Offset : ComplexPattern<iPTR, 2, "SelectDS1Addr1Offset">; 1333def DS64Bit4ByteAligned : ComplexPattern<iPTR, 3, "SelectDS64Bit4ByteAligned">; 1334def DS128Bit8ByteAligned : ComplexPattern<iPTR, 3, "SelectDS128Bit8ByteAligned">; 1335 1336def MOVRELOffset : ComplexPattern<iPTR, 2, "SelectMOVRELOffset">; 1337 1338def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">; 1339 1340// Modifiers for floating point instructions. 1341def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">; 1342 1343// VOP3 modifiers used for instructions that do not read canonicalized 1344// floating point values (i.e. integer operations with FP source 1345// modifiers) 1346def VOP3ModsNonCanonicalizing : ComplexPattern<untyped, 2, 1347 "SelectVOP3ModsNonCanonicalizing">; 1348 1349def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">; 1350 1351def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">; 1352 1353def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">; 1354 1355def VOP3PModsDOT : ComplexPattern<untyped, 2, "SelectVOP3PModsDOT">; 1356def DotIUVOP3PMods : ComplexPattern<untyped, 1, "SelectDotIUVOP3PMods">; 1357def WMMAOpSelVOP3PMods : ComplexPattern<untyped, 1, "SelectWMMAOpSelVOP3PMods">; 1358 1359def VOP3OpSel : ComplexPattern<untyped, 2, "SelectVOP3OpSel">; 1360 1361def VOP3OpSelMods : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">; 1362 1363def VOP3PMadMixModsExt : ComplexPattern<untyped, 2, "SelectVOP3PMadMixModsExt">; 1364def VOP3PMadMixMods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixMods">; 1365 1366def VINTERPMods : ComplexPattern<untyped, 2, "SelectVINTERPMods">; 1367def VINTERPModsHi : ComplexPattern<untyped, 2, "SelectVINTERPModsHi">; 1368 1369//===----------------------------------------------------------------------===// 1370// SI assembler operands 1371//===----------------------------------------------------------------------===// 1372 1373def SIOperand { 1374 int ZERO = 0x80; 1375 int VCC = 0x6A; 1376 int FLAT_SCR = 0x68; 1377} 1378 1379// This should be kept in sync with SISrcMods enum 1380def SRCMODS { 1381 int NONE = 0; 1382 int NEG = 1; 1383 int ABS = 2; 1384 int NEG_ABS = 3; 1385 1386 int NEG_HI = ABS; 1387 int OP_SEL_0 = 4; 1388 int OP_SEL_1 = 8; 1389 int DST_OP_SEL = 8; 1390} 1391 1392def DSTCLAMP { 1393 int NONE = 0; 1394 int ENABLE = 1; 1395} 1396 1397def DSTOMOD { 1398 int NONE = 0; 1399} 1400 1401def HWREG { 1402 int MODE = 1; 1403 int STATUS = 2; 1404 int TRAPSTS = 3; 1405 int HW_ID = 4; 1406 int GPR_ALLOC = 5; 1407 int LDS_ALLOC = 6; 1408 int IB_STS = 7; 1409 int MEM_BASES = 15; 1410 int TBA_LO = 16; 1411 int TBA_HI = 17; 1412 int TMA_LO = 18; 1413 int TMA_HI = 19; 1414 int FLAT_SCR_LO = 20; 1415 int FLAT_SCR_HI = 21; 1416 int XNACK_MASK = 22; 1417 int POPS_PACKER = 25; 1418 int SHADER_CYCLES = 29; 1419} 1420 1421class getHwRegImm<int Reg, int Offset = 0, int Size = 32> { 1422 int ret = !and(!or(Reg, 1423 !shl(Offset, 6), 1424 !shl(!add(Size, -1), 11)), 65535); 1425} 1426 1427//===----------------------------------------------------------------------===// 1428// 1429// SI Instruction multiclass helpers. 1430// 1431// Instructions with _32 take 32-bit operands. 1432// Instructions with _64 take 64-bit operands. 1433// 1434// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit 1435// encoding is the standard encoding, but instruction that make use of 1436// any of the instruction modifiers must use the 64-bit encoding. 1437// 1438// Instructions with _e32 use the 32-bit encoding. 1439// Instructions with _e64 use the 64-bit encoding. 1440// 1441//===----------------------------------------------------------------------===// 1442 1443class SIMCInstr <string pseudo, int subtarget> { 1444 string PseudoInstr = pseudo; 1445 int Subtarget = subtarget; 1446} 1447 1448//===----------------------------------------------------------------------===// 1449// Vector ALU classes 1450//===----------------------------------------------------------------------===// 1451 1452class getNumSrcArgs<ValueType Src0, ValueType Src1, ValueType Src2> { 1453 int ret = 1454 !if (!eq(Src0.Value, untyped.Value), 0, 1455 !if (!eq(Src1.Value, untyped.Value), 1, // VOP1 1456 !if (!eq(Src2.Value, untyped.Value), 2, // VOP2 1457 3))); // VOP3 1458} 1459 1460// Returns the register class to use for the destination of VOP[123C] 1461// instructions for the given VT. 1462class getVALUDstForVT<ValueType VT, bit IsTrue16 = 0, bit IsVOP3Encoding = 0> { 1463 defvar op16 = !if(IsTrue16, !if (IsVOP3Encoding, VOPDstOperand_t16, 1464 VOPDstOperand_t16Lo128), 1465 VOPDstOperand<VGPR_32>); 1466 RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>, 1467 !if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>, 1468 !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>, 1469 !if(!eq(VT.Size, 16), op16, 1470 VOPDstS64orS32)))); // else VT == i1 1471} 1472 1473class getVALUDstForVT_fake16<ValueType VT> { 1474 RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>, 1475 !if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>, 1476 !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>, 1477 !if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32_Lo128>, 1478 VOPDstS64orS32)))); // else VT == i1 1479} 1480 1481// Returns the register class to use for the destination of VOP[12C] 1482// instructions with SDWA extension 1483class getSDWADstForVT<ValueType VT> { 1484 RegisterOperand ret = !if(!eq(VT.Size, 1), 1485 SDWAVopcDst, // VOPC 1486 VOPDstOperand<VGPR_32>); // VOP1/2 32-bit dst 1487} 1488 1489// Returns the register class to use for source 0 of VOP[12C] 1490// instructions for the given VT. 1491class getVOPSrc0ForVT<ValueType VT, bit IsTrue16, bit IsFake16 = 1> { 1492 bit isFP = isFloatType<VT>.ret; 1493 1494 RegisterOperand ret = 1495 !if(isFP, 1496 !if(!eq(VT.Size, 64), 1497 VSrc_f64, 1498 !if(!eq(VT.Value, f16.Value), 1499 !if(IsTrue16, 1500 !if(IsFake16, VSrcFake16_f16_Lo128, VSrcT_f16_Lo128), 1501 VSrc_f16 1502 ), 1503 !if(!eq(VT.Value, v2f16.Value), 1504 VSrc_v2f16, 1505 !if(!eq(VT.Value, v4f16.Value), 1506 AVSrc_64, 1507 VSrc_f32 1508 ) 1509 ) 1510 ) 1511 ), 1512 !if(!eq(VT.Size, 64), 1513 VSrc_b64, 1514 !if(!eq(VT.Value, i16.Value), 1515 !if(IsTrue16, 1516 !if(IsFake16, VSrcFake16_b16_Lo128, VSrcT_b16_Lo128), 1517 VSrc_b16 1518 ), 1519 !if(!eq(VT.Value, v2i16.Value), 1520 VSrc_v2b16, 1521 VSrc_b32 1522 ) 1523 ) 1524 ) 1525 ); 1526} 1527 1528class getSOPSrcForVT<ValueType VT> { 1529 RegisterOperand ret = !if(!eq(VT.Size, 64), SSrc_b64, SSrc_b32); 1530} 1531 1532// Returns the vreg register class to use for source operand given VT 1533class getVregSrcForVT<ValueType VT> { 1534 RegisterClass ret = !if(!eq(VT.Size, 128), VReg_128, 1535 !if(!eq(VT.Size, 96), VReg_96, 1536 !if(!eq(VT.Size, 64), VReg_64, 1537 !if(!eq(VT.Size, 48), VReg_64, 1538 VGPR_32)))); 1539} 1540 1541class getVregSrcForVT_t16<ValueType VT, bit IsFake16 = 1> { 1542 RegisterClass ret = !if(!eq(VT.Size, 128), VReg_128, 1543 !if(!eq(VT.Size, 96), VReg_96, 1544 !if(!eq(VT.Size, 64), VReg_64, 1545 !if(!eq(VT.Size, 48), VReg_64, 1546 !if(!eq(VT.Size, 16), 1547 !if(IsFake16, VGPR_32_Lo128, VGPR_16_Lo128), 1548 VGPR_32))))); 1549 1550 RegisterOperand op = !if (!and(!eq(VT.Size, 16), !not(IsFake16)), 1551 VGPRSrc_16_Lo128, RegisterOperand<ret>); 1552} 1553 1554class getSDWASrcForVT <ValueType VT> { 1555 bit isFP = isFloatType<VT>.ret; 1556 RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32); 1557 RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32); 1558 RegisterOperand ret = !if(isFP, retFlt, retInt); 1559} 1560 1561// Returns the register class to use for sources of VOP3 instructions for the 1562// given VT. 1563class getVOP3SrcForVT<ValueType VT, bit IsTrue16 = 0> { 1564 bit isFP = isFloatType<VT>.ret; 1565 RegisterOperand ret = 1566 !if(!eq(VT.Size, 128), 1567 VRegSrc_128, 1568 !if(!eq(VT.Size, 64), 1569 !if(isFP, 1570 !if(!eq(VT.Value, v2f32.Value), 1571 VSrc_v2f32, 1572 VSrc_f64), 1573 !if(!eq(VT.Value, v2i32.Value), 1574 VSrc_v2b32, 1575 VSrc_b64)), 1576 !if(!eq(VT.Value, i1.Value), 1577 SSrc_i1, 1578 !if(isFP, 1579 !if(!eq(VT.Value, f16.Value), 1580 !if(IsTrue16, VSrcT_f16, VSrc_f16), 1581 !if(!eq(VT.Value, v2f16.Value), 1582 VSrc_v2f16, 1583 !if(!eq(VT.Value, v4f16.Value), 1584 AVSrc_64, 1585 VSrc_f32 1586 ) 1587 ) 1588 ), 1589 !if(!eq(VT.Value, i16.Value), 1590 !if(IsTrue16, VSrcT_b16, VSrc_b16), 1591 !if(!eq(VT.Value, v2i16.Value), 1592 VSrc_v2b16, 1593 VSrc_b32 1594 ) 1595 ) 1596 ) 1597 ) 1598 ) 1599 ); 1600} 1601 1602// Src2 of VOP3 DPP instructions cannot be a literal 1603class getVOP3DPPSrcForVT<ValueType VT> { 1604 bit isFP = isFloatType<VT>.ret; 1605 RegisterOperand ret = 1606 !if (!eq(VT.Value, i1.Value), SSrc_i1, 1607 !if (isFP, 1608 !if (!eq(VT.Value, f16.Value), VCSrc_f16, 1609 !if (!eq(VT.Value, v2f16.Value), VCSrc_v2f16, VCSrc_f32)), 1610 !if (!eq(VT.Value, i16.Value), VCSrc_b16, 1611 !if (!eq(VT.Value, v2i16.Value), VCSrc_v2b16, 1612 VCSrc_b32)))); 1613} 1614 1615// Float or packed int 1616class isModifierType<ValueType SrcVT> { 1617 bit ret = !or(!eq(SrcVT.Value, f16.Value), 1618 !eq(SrcVT.Value, f32.Value), 1619 !eq(SrcVT.Value, f64.Value), 1620 !eq(SrcVT.Value, v2f16.Value), 1621 !eq(SrcVT.Value, v2i16.Value), 1622 !eq(SrcVT.Value, v2f32.Value), 1623 !eq(SrcVT.Value, v2i32.Value), 1624 !eq(SrcVT.Value, v4f16.Value), 1625 !eq(SrcVT.Value, v4i16.Value), 1626 !eq(SrcVT.Value, v4f32.Value), 1627 !eq(SrcVT.Value, v4i32.Value), 1628 !eq(SrcVT.Value, v8f16.Value), 1629 !eq(SrcVT.Value, v8i16.Value), 1630 !eq(SrcVT.Value, v8f32.Value), 1631 !eq(SrcVT.Value, v8i32.Value), 1632 !eq(SrcVT.Value, v16f16.Value), 1633 !eq(SrcVT.Value, v16i16.Value)); 1634} 1635 1636// Return type of input modifiers operand for specified input operand 1637class getSrcMod <ValueType VT, bit IsTrue16 = 0> { 1638 bit isFP = isFloatType<VT>.ret; 1639 bit isPacked = isPackedType<VT>.ret; 1640 Operand ret = !if(!eq(VT.Size, 64), 1641 !if(isFP, FP64InputMods, Int64InputMods), 1642 !if(!eq(VT.Size, 16), 1643 !if(isFP, !if(IsTrue16, FPT16InputMods, FP16InputMods), 1644 !if(IsTrue16, IntT16InputMods, IntOpSelMods)), 1645 !if(isFP, FP32InputMods, Int32InputMods))); 1646} 1647 1648class getOpSelMod <ValueType VT> { 1649 Operand ret = !if(!eq(VT.Value, f16.Value), FP16InputMods, IntOpSelMods); 1650} 1651 1652// Return type of input modifiers operand specified input operand for DPP 1653class getSrcModDPP <ValueType VT> { 1654 bit isFP = isFloatType<VT>.ret; 1655 Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods); 1656} 1657 1658class getSrcModDPP_t16 <ValueType VT> { 1659 bit isFP = isFloatType<VT>.ret; 1660 Operand ret = 1661 !if (isFP, 1662 !if (!eq(VT.Value, f16.Value), FPT16VRegInputMods, 1663 FPVRegInputMods), 1664 !if (!eq(VT.Value, i16.Value), IntT16VRegInputMods, 1665 IntVRegInputMods)); 1666} 1667 1668// Return type of input modifiers operand for specified input operand for DPP 1669class getSrcModVOP3DPP <ValueType VT> { 1670 bit isFP = isFloatType<VT>.ret; 1671 bit isPacked = isPackedType<VT>.ret; 1672 Operand ret = 1673 !if (isFP, 1674 !if (!eq(VT.Value, f16.Value), FP16VCSrcInputMods, 1675 FP32VCSrcInputMods), 1676 Int32VCSrcInputMods); 1677} 1678 1679// Return type of input modifiers operand specified input operand for SDWA 1680class getSrcModSDWA <ValueType VT> { 1681 Operand ret = !if(!eq(VT.Value, f16.Value), FP16SDWAInputMods, 1682 !if(!eq(VT.Value, f32.Value), FP32SDWAInputMods, 1683 !if(!eq(VT.Value, i16.Value), Int16SDWAInputMods, 1684 Int32SDWAInputMods))); 1685} 1686 1687// Returns the input arguments for VOP[12C] instructions for the given SrcVT. 1688class getIns32 <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs> { 1689 dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0), // VOP1 1690 !if(!eq(NumSrcArgs, 2), (ins Src0RC:$src0, Src1RC:$src1), // VOP2 1691 (ins))); 1692} 1693 1694// Returns the input arguments for VOP3 instructions for the given SrcVT. 1695class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC, 1696 RegisterOperand Src2RC, int NumSrcArgs, 1697 bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod, 1698 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { 1699 1700 dag ret = 1701 !if (!eq(NumSrcArgs, 0), 1702 // VOP1 without input operands (V_NOP, V_CLREXCP) 1703 (ins), 1704 /* else */ 1705 !if (!eq(NumSrcArgs, 1), 1706 !if (HasModifiers, 1707 // VOP1 with modifiers 1708 !if(HasOMod, 1709 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1710 clampmod0:$clamp, omod0:$omod), 1711 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1712 clampmod0:$clamp)) 1713 /* else */, 1714 // VOP1 without modifiers 1715 !if (HasClamp, 1716 (ins Src0RC:$src0, clampmod0:$clamp), 1717 (ins Src0RC:$src0)) 1718 /* endif */ ), 1719 !if (!eq(NumSrcArgs, 2), 1720 !if (HasModifiers, 1721 // VOP 2 with modifiers 1722 !if(HasOMod, 1723 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1724 Src1Mod:$src1_modifiers, Src1RC:$src1, 1725 clampmod0:$clamp, omod0:$omod), 1726 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1727 Src1Mod:$src1_modifiers, Src1RC:$src1, 1728 clampmod0:$clamp)) 1729 /* else */, 1730 // VOP2 without modifiers 1731 !if (HasClamp, 1732 (ins Src0RC:$src0, Src1RC:$src1, clampmod0:$clamp), 1733 (ins Src0RC:$src0, Src1RC:$src1)) 1734 1735 /* endif */ ) 1736 /* NumSrcArgs == 3 */, 1737 !if (HasModifiers, 1738 !if (HasSrc2Mods, 1739 // VOP3 with modifiers 1740 !if (HasOMod, 1741 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1742 Src1Mod:$src1_modifiers, Src1RC:$src1, 1743 Src2Mod:$src2_modifiers, Src2RC:$src2, 1744 clampmod0:$clamp, omod0:$omod), 1745 !if (HasClamp, 1746 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1747 Src1Mod:$src1_modifiers, Src1RC:$src1, 1748 Src2Mod:$src2_modifiers, Src2RC:$src2, 1749 clampmod0:$clamp), 1750 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1751 Src1Mod:$src1_modifiers, Src1RC:$src1, 1752 Src2Mod:$src2_modifiers, Src2RC:$src2))), 1753 // VOP3 with modifiers except src2 1754 !if (HasOMod, 1755 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1756 Src1Mod:$src1_modifiers, Src1RC:$src1, 1757 Src2RC:$src2, clampmod0:$clamp, omod0:$omod), 1758 !if (HasClamp, 1759 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1760 Src1Mod:$src1_modifiers, Src1RC:$src1, 1761 Src2RC:$src2, clampmod0:$clamp), 1762 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1763 Src1Mod:$src1_modifiers, Src1RC:$src1, 1764 Src2RC:$src2)))) 1765 /* else */, 1766 // VOP3 without modifiers 1767 !if (HasClamp, 1768 (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2, clampmod0:$clamp), 1769 (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2)) 1770 /* endif */ )))); 1771} 1772 1773class getInsVOP3Base<RegisterOperand Src0RC, RegisterOperand Src1RC, 1774 RegisterOperand Src2RC, int NumSrcArgs, 1775 bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod, 1776 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel, 1777 bit IsVOP3P> { 1778 // getInst64 handles clamp and omod. implicit mutex between vop3p and omod 1779 dag base = getIns64 <Src0RC, Src1RC, Src2RC, NumSrcArgs, 1780 HasClamp, HasModifiers, HasSrc2Mods, HasOMod, 1781 Src0Mod, Src1Mod, Src2Mod>.ret; 1782 dag opsel = (ins op_sel0:$op_sel); 1783 dag vop3pOpsel = (ins op_sel_hi0:$op_sel_hi); 1784 dag vop3pFields = !con(!if(HasOpSel, vop3pOpsel, (ins)), (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi)); 1785 1786 dag ret = !con(base, 1787 !if(HasOpSel, opsel,(ins)), 1788 !if(IsVOP3P, vop3pFields,(ins))); 1789} 1790 1791class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC, 1792 RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, bit HasOpSel, 1793 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { 1794 dag ret = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs, 1795 HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, 1796 0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod, 1797 HasOpSel, 1/*IsVOP3P*/>.ret; 1798} 1799 1800class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC, 1801 RegisterOperand Src2RC, int NumSrcArgs, 1802 bit HasClamp, bit HasOMod, 1803 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { 1804 dag ret = getInsVOP3Base<Src0RC, Src1RC, 1805 Src2RC, NumSrcArgs, 1806 HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, HasOMod, 1807 Src0Mod, Src1Mod, Src2Mod, 1/*HasOpSel*/, 0>.ret; 1808} 1809 1810class getInsDPPBase <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, 1811 RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, 1812 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld> { 1813 1814 dag ret = !if(!eq(NumSrcArgs, 0), 1815 // VOP1 without input operands (V_NOP) 1816 (ins ), 1817 !con( 1818 !if(HasOld ,(ins OldRC:$old), (ins)), 1819 !if (!eq(NumSrcArgs, 1), 1820 !if (HasModifiers, 1821 // VOP1_DPP with modifiers 1822 (ins Src0Mod:$src0_modifiers, Src0RC:$src0) 1823 /* else */, 1824 // VOP1_DPP without modifiers 1825 (ins Src0RC:$src0) 1826 /* endif */), 1827 !if (!eq(NumSrcArgs, 2), 1828 !if (HasModifiers, 1829 // VOP2_DPP with modifiers 1830 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1831 Src1Mod:$src1_modifiers, Src1RC:$src1) 1832 /* else */, 1833 // VOP2_DPP without modifiers 1834 (ins Src0RC:$src0, Src1RC:$src1) 1835 ) 1836 /* NumSrcArgs == 3, VOP3 */, 1837 !if (HasModifiers, 1838 // VOP3_DPP with modifiers 1839 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1840 Src1Mod:$src1_modifiers, Src1RC:$src1, 1841 Src2Mod:$src2_modifiers, Src2RC:$src2) 1842 /* else */, 1843 // VOP3_DPP without modifiers 1844 (ins Src0RC:$src0, Src1RC:$src1, 1845 Src2RC:$src2) 1846 ) 1847 ) 1848 ) 1849 ) 1850 ); 1851} 1852 1853class getInsDPP <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, 1854 RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, 1855 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> { 1856 dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs, 1857 HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret, 1858 (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 1859 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)); 1860} 1861 1862class getInsDPP16 <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, 1863 RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, 1864 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> { 1865 dag ret = !con(getInsDPP<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs, 1866 HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret, 1867 (ins FI:$fi)); 1868} 1869 1870class getInsDPP8 <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, 1871 RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, 1872 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> { 1873 dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs, 1874 HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret, 1875 (ins dpp8:$dpp8, FI:$fi)); 1876} 1877 1878class getInsVOP3DPPBase<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld> { 1879 dag old = ( ins OldRC:$old ); 1880 dag base = VOP3Base; 1881 dag ret = !con( 1882 !if(!and(HasOld,!ne(NumSrcArgs, 0)), old, (ins)), 1883 base 1884 ); 1885} 1886 1887class getInsVOP3DPP<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> { 1888 dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret, 1889 (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 1890 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)); 1891} 1892 1893class getInsVOP3DPP16<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> { 1894 dag ret = !con(getInsVOP3DPP<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret, 1895 (ins FI:$fi)); 1896} 1897 1898class getInsVOP3DPP8<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> { 1899 dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret, 1900 (ins dpp8:$dpp8, FI:$fi)); 1901} 1902 1903// Ins for SDWA 1904class getInsSDWA <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs, 1905 bit HasSDWAOMod, Operand Src0Mod, Operand Src1Mod, 1906 ValueType DstVT> { 1907 1908 dag ret = !if(!eq(NumSrcArgs, 0), 1909 // VOP1 without input operands (V_NOP) 1910 (ins), 1911 !if(!eq(NumSrcArgs, 1), 1912 // VOP1 1913 !if(!not(HasSDWAOMod), 1914 // VOP1_SDWA without omod 1915 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1916 clampmod:$clamp, 1917 dst_sel:$dst_sel, dst_unused:$dst_unused, 1918 src0_sel:$src0_sel), 1919 // VOP1_SDWA with omod 1920 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1921 clampmod:$clamp, omod:$omod, 1922 dst_sel:$dst_sel, dst_unused:$dst_unused, 1923 src0_sel:$src0_sel)), 1924 !if(!eq(NumSrcArgs, 2), 1925 !if(!eq(DstVT.Size, 1), 1926 // VOPC_SDWA 1927 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1928 Src1Mod:$src1_modifiers, Src1RC:$src1, 1929 clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel), 1930 // VOP2_SDWA 1931 !if(!not(HasSDWAOMod), 1932 // VOP2_SDWA without omod 1933 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1934 Src1Mod:$src1_modifiers, Src1RC:$src1, 1935 clampmod:$clamp, 1936 dst_sel:$dst_sel, dst_unused:$dst_unused, 1937 src0_sel:$src0_sel, src1_sel:$src1_sel), 1938 // VOP2_SDWA with omod 1939 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1940 Src1Mod:$src1_modifiers, Src1RC:$src1, 1941 clampmod:$clamp, omod:$omod, 1942 dst_sel:$dst_sel, dst_unused:$dst_unused, 1943 src0_sel:$src0_sel, src1_sel:$src1_sel))), 1944 (ins)/* endif */))); 1945} 1946 1947// Outs for DPP 1948class getOutsDPP <bit HasDst, ValueType DstVT, RegisterOperand DstRCDPP> { 1949 dag ret = !if(HasDst, 1950 !if(!eq(DstVT.Size, 1), 1951 (outs), // no dst for VOPC, we use "vcc"-token as dst in SDWA VOPC instructions 1952 (outs DstRCDPP:$vdst)), 1953 (outs)); // V_NOP 1954} 1955 1956// Outs for SDWA 1957class getOutsSDWA <bit HasDst, ValueType DstVT, RegisterOperand DstRCSDWA> { 1958 dag ret = !if(HasDst, 1959 !if(!eq(DstVT.Size, 1), 1960 (outs DstRCSDWA:$sdst), 1961 (outs DstRCSDWA:$vdst)), 1962 (outs)); // V_NOP 1963} 1964 1965// Returns the assembly string for the inputs and outputs of a VOP[12C] 1966// instruction. 1967class getAsm32 <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> { 1968 string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC 1969 string src0 = ", $src0"; 1970 string src1 = ", $src1"; 1971 string src2 = ", $src2"; 1972 string ret = !if(HasDst, dst, "") # 1973 !if(!eq(NumSrcArgs, 1), src0, "") # 1974 !if(!eq(NumSrcArgs, 2), src0#src1, "") # 1975 !if(!eq(NumSrcArgs, 3), src0#src1#src2, ""); 1976} 1977 1978class getAsmVOPDPart <int NumSrcArgs, string XorY> { 1979 string dst = "$vdst" # XorY; 1980 string src0 = ", $src0" # XorY; 1981 string src1 = ", $vsrc1" # XorY; 1982 string ret = dst # 1983 !if(!ge(NumSrcArgs, 1), src0, "") # 1984 !if(!ge(NumSrcArgs, 2), src1, ""); 1985} 1986 1987// Returns the assembly string for the inputs and outputs of a VOP3P 1988// instruction. 1989class getAsmVOP3P <int NumSrcArgs, bit HasModifiers, 1990 bit HasClamp, bit HasOpSel> { 1991 string dst = "$vdst"; 1992 string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); 1993 string src1 = !if(!eq(NumSrcArgs, 1), "", 1994 !if(!eq(NumSrcArgs, 2), " $src1", 1995 " $src1,")); 1996 string src2 = !if(!eq(NumSrcArgs, 3), " $src2", ""); 1997 1998 string mods = !if(HasModifiers, "$neg_lo$neg_hi", ""); 1999 string clamp = !if(HasClamp, "$clamp", ""); 2000 string opsel = !if(HasOpSel, "$op_sel$op_sel_hi", ""); 2001 2002 // Each modifier is printed as an array of bits for each operand, so 2003 // all operands are printed as part of src0_modifiers. 2004 string ret = dst#", "#src0#src1#src2#opsel#mods#clamp; 2005} 2006 2007class getAsmVOP3OpSel <int NumSrcArgs, 2008 bit HasClamp, 2009 bit HasOMod, 2010 bit Src0HasMods, 2011 bit Src1HasMods, 2012 bit Src2HasMods> { 2013 string dst = "$vdst"; 2014 2015 string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); 2016 string isrc1 = !if(!eq(NumSrcArgs, 1), "", 2017 !if(!eq(NumSrcArgs, 2), " $src1", 2018 " $src1,")); 2019 string isrc2 = !if(!eq(NumSrcArgs, 3), " $src2", ""); 2020 2021 string fsrc0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); 2022 string fsrc1 = !if(!eq(NumSrcArgs, 1), "", 2023 !if(!eq(NumSrcArgs, 2), " $src1_modifiers", 2024 " $src1_modifiers,")); 2025 string fsrc2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", ""); 2026 2027 string src0 = !if(Src0HasMods, fsrc0, isrc0); 2028 string src1 = !if(Src1HasMods, fsrc1, isrc1); 2029 string src2 = !if(Src2HasMods, fsrc2, isrc2); 2030 2031 string clamp = !if(HasClamp, "$clamp", ""); 2032 string omod = !if(HasOMod, "$omod", ""); 2033 string ret = dst#", "#src0#src1#src2#"$op_sel"#clamp#omod; 2034} 2035 2036class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> { 2037 string dst = !if(HasDst, 2038 !if(!eq(DstVT.Size, 1), 2039 "$sdst", 2040 "$vdst"), 2041 ""); // use $sdst for VOPC 2042 string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); 2043 string src1 = !if(!eq(NumSrcArgs, 1), "", 2044 !if(!eq(NumSrcArgs, 2), " $src1_modifiers", 2045 " $src1_modifiers,")); 2046 string args = !if(!not(HasModifiers), 2047 getAsm32<0, NumSrcArgs, DstVT>.ret, 2048 ", "#src0#src1); 2049 string ret = dst#args#" $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 2050} 2051 2052class getAsmDPP16 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> { 2053 string ret = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret#"$fi"; 2054} 2055 2056class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> 2057 : getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>{ 2058 let ret = dst#args#" $dpp8$fi"; 2059} 2060 2061class getAsmVOP3Base <int NumSrcArgs, bit HasDst, bit HasClamp, 2062 bit HasOpSel, bit HasOMod, bit IsVOP3P, 2063 bit HasModifiers, bit Src0HasMods, 2064 bit Src1HasMods, bit Src2HasMods, ValueType DstVT = i32> { 2065 string dst = !if(HasDst, 2066 !if(!eq(DstVT.Size, 1), 2067 "$sdst", 2068 "$vdst"), 2069 ""); // use $sdst for VOPC 2070 string src0nomods = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); 2071 string src1nomods = !if(!eq(NumSrcArgs, 1), "", 2072 !if(!eq(NumSrcArgs, 2), " $src1", 2073 " $src1,")); 2074 string src2nomods = !if(!eq(NumSrcArgs, 3), " $src2", ""); 2075 2076 string src0mods = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); 2077 string src1mods = !if(!eq(NumSrcArgs, 1), "", 2078 !if(!eq(NumSrcArgs, 2), " $src1_modifiers", 2079 " $src1_modifiers,")); 2080 string src2mods = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", ""); 2081 2082 string src0 = !if(Src0HasMods, src0mods, src0nomods); 2083 string src1 = !if(Src1HasMods, src1mods, src1nomods); 2084 string src2 = !if(Src2HasMods, src2mods, src2nomods); 2085 string opsel = !if(HasOpSel, "$op_sel", ""); 2086 string 3PMods = !if(IsVOP3P, 2087 !if(HasOpSel, "$op_sel_hi", "") 2088 #!if(HasModifiers, "$neg_lo$neg_hi", ""), 2089 ""); 2090 string clamp = !if(HasClamp, "$clamp", ""); 2091 string omod = !if(HasOMod, "$omod", ""); 2092 2093 string ret = dst#!if(!gt(NumSrcArgs,0),", "#src0#src1#src2#opsel#3PMods#clamp#omod, ""); 2094 2095} 2096 2097class getAsmVOP3DPP<string base> { 2098 string ret = base # " $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 2099} 2100 2101class getAsmVOP3DPP16<string base> { 2102 string ret = getAsmVOP3DPP<base>.ret # "$fi"; 2103} 2104 2105class getAsmVOP3DPP8<string base> { 2106 string ret = base # " $dpp8$fi"; 2107} 2108 2109 2110class getAsmSDWA <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> { 2111 string dst = !if(HasDst, 2112 !if(!eq(DstVT.Size, 1), 2113 " vcc", // use vcc token as dst for VOPC instructions 2114 "$vdst"), 2115 ""); 2116 string src0 = "$src0_modifiers"; 2117 string src1 = "$src1_modifiers"; 2118 string args = !if(!eq(NumSrcArgs, 0), 2119 "", 2120 !if(!eq(NumSrcArgs, 1), 2121 ", "#src0#"$clamp", 2122 ", "#src0#", "#src1#"$clamp" 2123 ) 2124 ); 2125 string sdwa = !if(!eq(NumSrcArgs, 0), 2126 "", 2127 !if(!eq(NumSrcArgs, 1), 2128 " $dst_sel $dst_unused $src0_sel", 2129 !if(!eq(DstVT.Size, 1), 2130 " $src0_sel $src1_sel", // No dst_sel and dst_unused for VOPC 2131 " $dst_sel $dst_unused $src0_sel $src1_sel" 2132 ) 2133 ) 2134 ); 2135 string ret = dst#args#sdwa; 2136} 2137 2138class getAsmSDWA9 <bit HasDst, bit HasOMod, int NumSrcArgs, 2139 ValueType DstVT = i32> { 2140 string dst = !if(HasDst, 2141 !if(!eq(DstVT.Size, 1), 2142 "$sdst", // VOPC 2143 "$vdst"), // VOP1/2 2144 ""); 2145 string src0 = "$src0_modifiers"; 2146 string src1 = "$src1_modifiers"; 2147 string out_mods = !if(!not(HasOMod), "$clamp", "$clamp$omod"); 2148 string args = !if(!eq(NumSrcArgs, 0), "", 2149 !if(!eq(NumSrcArgs, 1), 2150 ", "#src0, 2151 ", "#src0#", "#src1 2152 ) 2153 ); 2154 string sdwa = !if(!eq(NumSrcArgs, 0), "", 2155 !if(!eq(NumSrcArgs, 1), 2156 out_mods#" $dst_sel $dst_unused $src0_sel", 2157 !if(!eq(DstVT.Size, 1), 2158 " $src0_sel $src1_sel", // No dst_sel, dst_unused and output modifiers for VOPC 2159 out_mods#" $dst_sel $dst_unused $src0_sel $src1_sel" 2160 ) 2161 ) 2162 ); 2163 string ret = dst#args#sdwa; 2164} 2165 2166class getHas64BitOps <int NumSrcArgs, ValueType DstVT, ValueType Src0VT, 2167 ValueType Src1VT> { 2168 bit ret = !if(!eq(NumSrcArgs, 3), 2169 0, 2170 !if(!eq(DstVT.Size, 64), 2171 1, 2172 !if(!eq(Src0VT.Size, 64), 2173 1, 2174 !if(!eq(Src1VT.Size, 64), 2175 1, 2176 0 2177 ) 2178 ) 2179 ) 2180 ); 2181} 2182 2183class getHasSDWA <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2184 ValueType Src1VT = i32> { 2185 bit ret = !if(!eq(NumSrcArgs, 3), 2186 0, // NumSrcArgs == 3 - No SDWA for VOP3 2187 !if(!eq(DstVT.Size, 64), 2188 0, // 64-bit dst - No SDWA for 64-bit operands 2189 !if(!eq(Src0VT.Size, 64), 2190 0, // 64-bit src0 2191 !if(!eq(Src1VT.Size, 64), 2192 0, // 64-bit src2 2193 1 2194 ) 2195 ) 2196 ) 2197 ); 2198} 2199 2200class getHasDPP <int NumSrcArgs> { 2201 bit ret = !if(!eq(NumSrcArgs, 3), 2202 0, // NumSrcArgs == 3 - No DPP for VOP3 2203 1); 2204} 2205 2206class getHasExt32BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2207 ValueType Src1VT = i32> { 2208 bit ret = !and(getHasDPP<NumSrcArgs>.ret, 2209 !not(getHas64BitOps<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret)); 2210} 2211 2212class getHasExt64BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2213 ValueType Src1VT = i32> { 2214 bit ret = !and(getHasDPP<NumSrcArgs>.ret, 2215 getHas64BitOps<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret); 2216} 2217 2218// Function that checks if instruction supports DPP and SDWA 2219class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2220 ValueType Src1VT = i32> { 2221 bit ret = !or(getHasDPP<NumSrcArgs>.ret, 2222 getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret); 2223} 2224 2225// Return an AGPR+VGPR operand class for the given VGPR register class. 2226class getLdStRegisterOperand<RegisterClass RC> { 2227 RegisterOperand ret = 2228 !if(!eq(RC.Size, 32), AVLdSt_32, 2229 !if(!eq(RC.Size, 64), AVLdSt_64, 2230 !if(!eq(RC.Size, 96), AVLdSt_96, 2231 !if(!eq(RC.Size, 128), AVLdSt_128, 2232 !if(!eq(RC.Size, 160), AVLdSt_160, 2233 RegisterOperand<VReg_1> // invalid register 2234 ))))); 2235} 2236 2237class getHasVOP3DPP <ValueType DstVT = i32, ValueType Src0VT = i32, 2238 ValueType Src1VT = i32, ValueType Src2VT = i32> { 2239 bit ret = !if(!eq(DstVT.Size, 64), 2240 0, // 64-bit dst No DPP for 64-bit operands 2241 !if(!eq(Src0VT.Size, 64), 2242 0, // 64-bit src0 2243 !if(!eq(Src1VT.Size, 64), 2244 0, // 64-bit src1 2245 !if(!eq(Src2VT.Size, 64), 2246 0, // 64-bit src2 2247 1 2248 ) 2249 ) 2250 ) 2251 ); 2252} 2253 2254 2255def PatGenMode { 2256 int NoPattern = 0; 2257 int Pattern = 1; 2258} 2259 2260class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> { 2261 2262 field list<ValueType> ArgVT = _ArgVT; 2263 field bit EnableClamp = _EnableClamp; 2264 field bit IsTrue16 = 0; 2265 field bit IsRealTrue16 = 0; 2266 2267 field ValueType DstVT = ArgVT[0]; 2268 field ValueType Src0VT = ArgVT[1]; 2269 field ValueType Src1VT = ArgVT[2]; 2270 field ValueType Src2VT = ArgVT[3]; 2271 field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret; 2272 field RegisterOperand DstRCDPP = DstRC; 2273 field RegisterOperand DstRC64 = DstRC; 2274 field RegisterOperand DstRCVOP3DPP = DstRC64; 2275 field RegisterOperand DstRCSDWA = getSDWADstForVT<DstVT>.ret; 2276 field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT, IsTrue16>.ret; 2277 field RegisterOperand Src1RC32 = RegisterOperand<getVregSrcForVT<Src1VT>.ret>; 2278 field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret; 2279 field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret; 2280 field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret; 2281 field RegisterClass Src0DPP = getVregSrcForVT<Src0VT>.ret; 2282 field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret; 2283 field RegisterClass Src2DPP = getVregSrcForVT<Src2VT>.ret; 2284 field RegisterOperand Src0VOP3DPP = VGPRSrc_32; 2285 field RegisterOperand Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT>.ret; 2286 field RegisterOperand Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT>.ret; 2287 field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret; 2288 field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret; 2289 field Operand Src0Mod = getSrcMod<Src0VT>.ret; 2290 field Operand Src1Mod = getSrcMod<Src1VT>.ret; 2291 field Operand Src2Mod = getSrcMod<Src2VT>.ret; 2292 field Operand Src0ModDPP = getSrcModDPP<Src0VT>.ret; 2293 field Operand Src1ModDPP = getSrcModDPP<Src1VT>.ret; 2294 field Operand Src2ModDPP = getSrcModDPP<Src2VT>.ret; 2295 field Operand Src0ModVOP3DPP = getSrcModDPP<Src0VT>.ret; 2296 field Operand Src1ModVOP3DPP = getSrcModDPP<Src1VT>.ret; 2297 field Operand Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT>.ret; 2298 field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret; 2299 field Operand Src1ModSDWA = getSrcModSDWA<Src1VT>.ret; 2300 2301 2302 field bit IsMAI = 0; 2303 field bit IsVOP3P = 0; 2304 field bit IsDOT = 0; 2305 field bit IsSingle = 0; 2306 field bit IsWMMA = 0; 2307 2308 field bit HasDst = !ne(DstVT.Value, untyped.Value); 2309 field bit HasDst32 = HasDst; 2310 field bit EmitDst = HasDst; // force dst encoding, see v_movreld_b32 special case 2311 field bit EmitDstSel = EmitDst; 2312 field int NumSrcArgs = getNumSrcArgs<Src0VT, Src1VT, Src2VT>.ret; 2313 field bit HasSrc0 = !ne(Src0VT.Value, untyped.Value); 2314 field bit HasSrc1 = !ne(Src1VT.Value, untyped.Value); 2315 field bit HasSrc2 = !ne(Src2VT.Value, untyped.Value); 2316 2317 field bit HasSrc0FloatMods = isFloatType<Src0VT>.ret; 2318 field bit HasSrc1FloatMods = isFloatType<Src1VT>.ret; 2319 field bit HasSrc2FloatMods = isFloatType<Src2VT>.ret; 2320 2321 field bit HasSrc0IntMods = isIntType<Src0VT>.ret; 2322 field bit HasSrc1IntMods = isIntType<Src1VT>.ret; 2323 field bit HasSrc2IntMods = isIntType<Src2VT>.ret; 2324 2325 field bit HasClamp = !or(isModifierType<Src0VT>.ret, EnableClamp); 2326 field bit HasSDWAClamp = EmitDst; 2327 field bit HasFPClamp = !and(isFloatType<DstVT>.ret, HasClamp); 2328 field bit HasIntClamp = !if(isFloatType<DstVT>.ret, 0, HasClamp); 2329 field bit HasClampLo = HasClamp; 2330 field bit HasClampHi = !and(isPackedType<DstVT>.ret, HasClamp); 2331 field bit HasHigh = 0; 2332 2333 field bit IsPacked = isPackedType<Src0VT>.ret; 2334 field bit HasOpSel = IsPacked; 2335 field bit HasOMod = !if(IsVOP3P, 0, isFloatType<DstVT>.ret); 2336 field bit HasSDWAOMod = isFloatType<DstVT>.ret; 2337 2338 field bit HasModifiers = !or(isModifierType<Src0VT>.ret, 2339 isModifierType<Src1VT>.ret, 2340 isModifierType<Src2VT>.ret, 2341 HasOMod); 2342 2343 field bit HasSrc0Mods = HasModifiers; 2344 field bit HasSrc1Mods = !if(HasModifiers, !or(HasSrc1FloatMods, HasSrc1IntMods), 0); 2345 field bit HasSrc2Mods = !if(HasModifiers, !or(HasSrc2FloatMods, HasSrc2IntMods), 0); 2346 2347 field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2348 field bit HasExtVOP3DPP = getHasVOP3DPP<DstVT, Src0VT, Src1VT, Src2VT>.ret; 2349 field bit HasExtDPP = !or(getHasDPP<NumSrcArgs>.ret, HasExtVOP3DPP); 2350 field bit HasExt32BitDPP = getHasExt32BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2351 field bit HasExt64BitDPP = getHasExt64BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2352 field bit HasExtSDWA = getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2353 field bit HasExtSDWA9 = HasExtSDWA; 2354 field int NeedPatGen = PatGenMode.NoPattern; 2355 2356 field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods); 2357 field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods); 2358 field Operand Src2PackedMod = !if(HasSrc2FloatMods, PackedF16InputMods, PackedI16InputMods); 2359 2360 field dag Outs = !if(HasDst,(outs DstRC:$vdst),(outs)); 2361 2362 // VOP3b instructions are a special case with a second explicit 2363 // output. This is manually overridden for them. 2364 field dag Outs32 = Outs; 2365 field dag Outs64 = !if(HasDst,(outs DstRC64:$vdst),(outs)); 2366 field dag OutsDPP = getOutsDPP<HasDst, DstVT, DstRCDPP>.ret; 2367 field dag OutsDPP8 = OutsDPP; 2368 field dag OutsVOP3DPP = getOutsDPP<HasDst, DstVT, DstRCVOP3DPP>.ret; 2369 field dag OutsVOP3DPP8 = OutsVOP3DPP; 2370 field dag OutsSDWA = getOutsSDWA<HasDst, DstVT, DstRCSDWA>.ret; 2371 2372 field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret; 2373 field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs, 2374 HasIntClamp, HasModifiers, HasSrc2Mods, 2375 HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret; 2376 field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64, 2377 NumSrcArgs, HasClamp, HasOpSel, 2378 Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret; 2379 field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64, 2380 NumSrcArgs, HasClamp, HasOMod, 2381 getOpSelMod<Src0VT>.ret, 2382 getOpSelMod<Src1VT>.ret, 2383 getOpSelMod<Src2VT>.ret>.ret; 2384 field dag InsDPP = !if(HasExtDPP, 2385 getInsDPP<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs, 2386 HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret, 2387 (ins)); 2388 field dag InsDPP16 = getInsDPP16<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs, 2389 HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret; 2390 field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, 2391 NumSrcArgs, HasModifiers, 2392 Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret; 2393 field dag InsVOP3Base = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, 2394 Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod, 2395 Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel, IsVOP3P>.ret; 2396 field dag InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret; 2397 field dag InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret; 2398 field dag InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret; 2399 field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs, 2400 HasSDWAOMod, Src0ModSDWA, Src1ModSDWA, 2401 DstVT>.ret; 2402 field dag InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X); 2403 // It is a slight misnomer to use the deferred f32 operand type for non-float 2404 // operands, but this operand type will only be used if the other dual 2405 // component is FMAAK or FMAMK 2406 field dag InsVOPDXDeferred = (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X, VGPR_32:$vsrc1X); 2407 field dag InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y); 2408 field dag InsVOPDYDeferred = (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y, VGPR_32:$vsrc1Y); 2409 2410 2411 field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret; 2412 field string AsmDPP = !if(HasExtDPP, 2413 getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret, ""); 2414 field string AsmDPP16 = getAsmDPP16<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret; 2415 // DPP8 encoding has no fields for modifiers, and it is enforced by setting 2416 // the asm operand name via this HasModifiers flag 2417 field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0 /*HasModifiers*/, DstVT>.ret; 2418 field string AsmVOP3Base = getAsmVOP3Base<NumSrcArgs, HasDst, HasClamp, 2419 HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasModifiers, HasModifiers, 2420 HasModifiers, DstVT>.ret; 2421 field string Asm64 = AsmVOP3Base; 2422 field string AsmVOP3P = getAsmVOP3P<NumSrcArgs, HasModifiers, HasClamp, HasOpSel>.ret; 2423 field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs, 2424 HasClamp, 2425 HasOMod, 2426 HasSrc0FloatMods, 2427 HasSrc1FloatMods, 2428 HasSrc2FloatMods>.ret; 2429 field string AsmVOP3DPP = getAsmVOP3DPP<AsmVOP3Base>.ret; 2430 field string AsmVOP3DPP16 = getAsmVOP3DPP16<AsmVOP3Base>.ret; 2431 field string AsmVOP3DPP8 = getAsmVOP3DPP8<AsmVOP3Base>.ret; 2432 field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret; 2433 field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret; 2434 field string AsmVOPDX = getAsmVOPDPart<NumSrcArgs, "X">.ret; 2435 field string AsmVOPDY = getAsmVOPDPart<NumSrcArgs, "Y">.ret; 2436 field string TieRegDPP = "$old"; 2437} 2438 2439 class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> { 2440 let HasExt = 0; 2441 let HasExtDPP = 0; 2442 let HasExtVOP3DPP = 0; 2443 let HasExt32BitDPP = 0; 2444 let HasExt64BitDPP = 0; 2445 let HasExtSDWA = 0; 2446 let HasExtSDWA9 = 0; 2447} 2448 2449class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.NoPattern> : VOPProfile <p.ArgVT> { 2450 let NeedPatGen = mode; 2451} 2452 2453// VOPC_Profile_t16, VOPC_NoSdst_Profile_t16, VOPC_Class_Profile_t16, 2454// VOPC_Class_NoSdst_Profile_t16, and VOP_MAC_F16_t16 do not inherit from this 2455// class, so copy changes to this class in those profiles 2456class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> { 2457 let IsTrue16 = 1; 2458 let IsRealTrue16 = 1; 2459 // Most DstVT are 16-bit, but not all. 2460 let DstRC = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 0 /*IsVOP3Encoding*/>.ret; 2461 let DstRC64 = getVALUDstForVT<DstVT>.ret; 2462 let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret; 2463 let Src1RC32 = getVregSrcForVT_t16<Src1VT, 0 /*IsFake16*/>.op; 2464 let Src0DPP = getVregSrcForVT_t16<Src0VT>.ret; 2465 let Src1DPP = getVregSrcForVT_t16<Src1VT>.ret; 2466 let Src2DPP = getVregSrcForVT_t16<Src2VT>.ret; 2467 let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret; 2468 let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret; 2469 let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret; 2470 2471 let DstRC64 = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 1 /*IsVOP3Encoding*/>.ret; 2472 let Src0RC64 = getVOP3SrcForVT<Src0VT, 1 /*IsTrue16*/>.ret; 2473 let Src1RC64 = getVOP3SrcForVT<Src1VT, 1 /*IsTrue16*/>.ret; 2474 let Src2RC64 = getVOP3SrcForVT<Src2VT, 1 /*IsTrue16*/>.ret; 2475 let Src0Mod = getSrcMod<Src0VT, 1 /*IsTrue16*/>.ret; 2476 let Src1Mod = getSrcMod<Src1VT, 1 /*IsTrue16*/>.ret; 2477 let Src2Mod = getSrcMod<Src2VT, 1 /*IsTrue16*/>.ret; 2478} 2479 2480class VOPProfile_Fake16<VOPProfile P> : VOPProfile<P.ArgVT> { 2481 let IsTrue16 = 1; 2482 // Most DstVT are 16-bit, but not all 2483 let DstRC = getVALUDstForVT_fake16<DstVT>.ret; 2484 let DstRC64 = getVALUDstForVT<DstVT>.ret; 2485 let Src1RC32 = RegisterOperand<getVregSrcForVT_t16<Src1VT>.ret>; 2486 let Src0DPP = getVregSrcForVT_t16<Src0VT>.ret; 2487 let Src1DPP = getVregSrcForVT_t16<Src1VT>.ret; 2488 let Src2DPP = getVregSrcForVT_t16<Src2VT>.ret; 2489 let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret; 2490 let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret; 2491 let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret; 2492} 2493 2494def VOP_F16_F16 : VOPProfile<[f16, f16, untyped, untyped]>; 2495def VOP_F16_I16 : VOPProfile <[f16, i16, untyped, untyped]>; 2496def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>; 2497def VOP_I16_I16 : VOPProfile <[i16, i16, untyped, untyped]>; 2498 2499def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>; 2500def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>; 2501def VOP_F16_F16_I32 : VOPProfile <[f16, f16, i32, untyped]>; 2502def VOP_I16_I16_I16 : VOPProfile <[i16, i16, i16, untyped]>; 2503def VOP_I16_I16_I16_ARITH : VOPProfile <[i16, i16, i16, untyped], /*EnableClamp=*/1>; 2504 2505def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>; 2506def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>; 2507 2508def VOP_I32_I16_I16_I32 : VOPProfile <[i32, i16, i16, i32, untyped]>; 2509def VOP_I32_I16 : VOPProfile <[i32, i16, untyped, untyped]>; 2510def VOP_I16_I32 : VOPProfile <[i16, i32, untyped, untyped]>; 2511 2512def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>; 2513def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>; 2514def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>; 2515 2516def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>; 2517def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>; 2518def VOP_V2I16_F32_F32 : VOPProfile <[v2i16, f32, f32, untyped]>; 2519def VOP_V2I16_I32_I32 : VOPProfile <[v2i16, i32, i32, untyped]>; 2520 2521def VOP_F16_V2F16_V2F16_F16 : VOPProfile <[f16, v2f16, v2f16, f16]>; 2522def VOP_I16_V2I16_V2I16_I16 : VOPProfile <[i16, v2i16, v2i16, i16]>; 2523def VOP_F32_V2I16_V2I16_F32 : VOPProfile <[f32, v2i16, v2i16, f32]>; 2524 2525def VOP_F32_V2F16_V2F16_V2F16 : VOPProfile <[f32, v2f16, v2f16, v2f16]>; 2526 2527def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>; 2528 2529def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>; 2530def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>; 2531def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>; 2532def VOP_F64_F32 : VOPProfile <[f64, f32, untyped, untyped]>; 2533def VOP_F64_F64 : VOPProfile <[f64, f64, untyped, untyped]>; 2534def VOP_F64_I32 : VOPProfile <[f64, i32, untyped, untyped]>; 2535def VOP_I32_F32 : VOPProfile <[i32, f32, untyped, untyped]>; 2536def VOP_I32_F64 : VOPProfile <[i32, f64, untyped, untyped]>; 2537def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>; 2538def VOP_F16_F32 : VOPProfile <[f16, f32, untyped, untyped]>; 2539def VOP_F32_F16 : VOPProfile <[f32, f16, untyped, untyped]>; 2540def VOP_I64_I64 : VOPProfile <[i64, i64, untyped, untyped]>; 2541 2542def VOP_F32_F32_F16 : VOPProfile <[f32, f32, f16, untyped]>; 2543def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>; 2544def VOP_F32_F32_I32 : VOPProfile <[f32, f32, i32, untyped]>; 2545def VOP_F64_F64_F64 : VOPProfile <[f64, f64, f64, untyped]>; 2546def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>; 2547def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>; 2548def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>; 2549def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>; 2550def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], /*EnableClamp=*/1>; 2551def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>; 2552def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>; 2553 2554def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>; 2555def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>; 2556def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>; 2557 2558def VOP_F16_F32_F16_F32 : VOPProfile <[f16, f32, f16, f32]>; 2559def VOP_F32_F32_F16_F16 : VOPProfile <[f32, f32, f16, f16]>; 2560def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>; 2561def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>; 2562def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>; 2563def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>; 2564def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>; 2565def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>; 2566def VOP_V4I32_I64_I32_V4I32 : VOPProfile <[v4i32, i64, i32, v4i32]>; 2567 2568def VOP_F32_V2F16_V2F16_F32 : VOPProfile <[f32, v2f16, v2f16, f32]>; 2569def VOP_I32_V2I16_V2I16_I32 : VOPProfile <[i32, v2i16, v2i16, i32]>; 2570 2571def VOP_V4F32_F32_F32_V4F32 : VOPProfile <[v4f32, f32, f32, v4f32]>; 2572def VOP_V16F32_F32_F32_V16F32 : VOPProfile <[v16f32, f32, f32, v16f32]>; 2573def VOP_V32F32_F32_F32_V32F32 : VOPProfile <[v32f32, f32, f32, v32f32]>; 2574def VOP_V4F32_V4F16_V4F16_V4F32 : VOPProfile <[v4f32, v4f16, v4f16, v4f32]>; 2575def VOP_V16F32_V4F16_V4F16_V16F32 : VOPProfile <[v16f32, v4f16, v4f16, v16f32]>; 2576def VOP_V32F32_V4F16_V4F16_V32F32 : VOPProfile <[v32f32, v4f16, v4f16, v32f32]>; 2577def VOP_V4F32_V2I16_V2I16_V4F32 : VOPProfile <[v4f32, v2i16, v2i16, v4f32]>; 2578def VOP_V16F32_V2I16_V2I16_V16F32 : VOPProfile <[v16f32, v2i16, v2i16, v16f32]>; 2579def VOP_V32F32_V2I16_V2I16_V32F32 : VOPProfile <[v32f32, v2i16, v2i16, v32f32]>; 2580def VOP_V4I32_I32_I32_V4I32 : VOPProfile <[v4i32, i32, i32, v4i32]>; 2581def VOP_V16I32_I32_I32_V16I32 : VOPProfile <[v16i32, i32, i32, v16i32]>; 2582def VOP_V32I32_I32_I32_V32I32 : VOPProfile <[v32i32, i32, i32, v32i32]>; 2583 2584def VOP_V4F64_F64_F64_V4F64 : VOPProfile <[v4f64, f64, f64, v4f64]>; 2585def VOP_V1F64_F64_F64_V1F64 : VOPProfile <[v1f64, f64, f64, v1f64]>; 2586 2587def VOP_V2F32_V2F32_V2F32_V2F32 : VOPProfile <[v2f32, v2f32, v2f32, v2f32]>; 2588def VOP_V2F32_V2F32_V2F32 : VOPProfile <[v2f32, v2f32, v2f32, untyped]>; 2589def VOP_V2I32_V2I32_V2I32 : VOPProfile <[v2i32, v2i32, v2i32, untyped]>; 2590def VOP_V4F32_V4I16_V4I16_V4F32 : VOPProfile <[v4f32, v4i16, v4i16, v4f32]>; 2591def VOP_V16F32_V4I16_V4I16_V16F32 : VOPProfile <[v16f32, v4i16, v4i16, v16f32]>; 2592def VOP_V32F32_V4I16_V4I16_V32F32 : VOPProfile <[v32f32, v4i16, v4i16, v32f32]>; 2593 2594def VOP_V4I32_I64_I64_V4I32 : VOPProfile <[v4i32, i64, i64, v4i32]>; 2595def VOP_V16I32_I64_I64_V16I32 : VOPProfile <[v16i32, i64, i64, v16i32]>; 2596def VOP_V4F32_V2F32_V2F32_V4F32 : VOPProfile <[v4f32, v2f32, v2f32, v4f32]>; 2597def VOP_V16F32_V2F32_V2F32_V16F32 : VOPProfile <[v16f32, v2f32, v2f32, v16f32]>; 2598def VOP_V4F32_I64_I64_V4F32 : VOPProfile <[v4f32, i64, i64, v4f32]>; 2599def VOP_V16F32_I64_I64_V16F32 : VOPProfile <[v16f32, i64, i64, v16f32]>; 2600 2601def VOP_V4F32_V4F16_V8F16_I32 : VOPProfile <[v4f32, v4f16, v8f16, i32]>; 2602def VOP_V16F32_V4F16_V8F16_I32 : VOPProfile <[v16f32, v4f16, v8f16, i32]>; 2603def VOP_V4F32_V4I16_V8I16_I32 : VOPProfile <[v4f32, v4i16, v8i16, i32]>; 2604def VOP_V16F32_V4I16_V8I16_I32 : VOPProfile <[v16f32, v4i16, v8i16, i32]>; 2605def VOP_V4I32_V2I32_V4I32_I32 : VOPProfile <[v4i32, v2i32, v4i32, i32]>; 2606def VOP_V16I32_V2I32_V4I32_I32 : VOPProfile <[v16i32, v2i32, v4i32, i32]>; 2607def VOP_V4F32_V2I32_V4I32_I32 : VOPProfile <[v4f32, v2i32, v4i32, i32]>; 2608def VOP_V16F32_V2I32_V4I32_I32 : VOPProfile <[v16f32, v2i32, v4i32, i32]>; 2609 2610class Commutable_REV <string revOp, bit isOrig> { 2611 string RevOp = revOp; 2612 bit IsOrig = isOrig; 2613} 2614 2615class AtomicNoRet <string noRetOp, bit isRet> { 2616 string NoRetOp = noRetOp; 2617 bit IsRet = isRet; 2618} 2619 2620//===----------------------------------------------------------------------===// 2621// Interpolation opcodes 2622//===----------------------------------------------------------------------===// 2623 2624class VINTRPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVINTRPDst">; 2625 2626class VINTRP_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> : 2627 VINTRPCommon <outs, ins, "", pattern>, 2628 SIMCInstr<opName, SIEncodingFamily.NONE> { 2629 let isPseudo = 1; 2630 let isCodeGenOnly = 1; 2631} 2632 2633// FIXME-GFX10: WIP. 2634class VINTRP_Real_si <bits <2> op, string opName, dag outs, dag ins, 2635 string asm, int encodingFamily> : 2636 VINTRPCommon <outs, ins, asm, []>, 2637 VINTRPe <op>, 2638 SIMCInstr<opName, encodingFamily> { 2639} 2640 2641class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins, 2642 string asm> : 2643 VINTRPCommon <outs, ins, asm, []>, 2644 VINTRPe_vi <op>, 2645 SIMCInstr<opName, SIEncodingFamily.VI> { 2646 let AssemblerPredicate = VIAssemblerPredicate; 2647 let DecoderNamespace = "GFX8"; 2648} 2649 2650// FIXME-GFX10: WIP. 2651multiclass VINTRP_m <bits <2> op, dag outs, dag ins, string asm, 2652 list<dag> pattern = []> { 2653 def "" : VINTRP_Pseudo <NAME, outs, ins, pattern>; 2654 2655 let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 2656 def _si : VINTRP_Real_si <op, NAME, outs, ins, asm, SIEncodingFamily.SI>; 2657 } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 2658 2659 def _vi : VINTRP_Real_vi <op, NAME, outs, ins, asm>; 2660 2661 let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { 2662 def _gfx10 : VINTRP_Real_si<op, NAME, outs, ins, asm, SIEncodingFamily.GFX10>; 2663 } // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" 2664} 2665 2666//===----------------------------------------------------------------------===// 2667// Vector instruction mappings 2668//===----------------------------------------------------------------------===// 2669 2670// Maps an opcode in e32 form to its e64 equivalent 2671def getVOPe64 : InstrMapping { 2672 let FilterClass = "VOP"; 2673 let RowFields = ["OpName"]; 2674 let ColFields = ["Size", "VOP3"]; 2675 let KeyCol = ["4", "0"]; 2676 let ValueCols = [["8", "1"]]; 2677} 2678 2679// Maps an opcode in e64 form to its e32 equivalent 2680def getVOPe32 : InstrMapping { 2681 let FilterClass = "VOP"; 2682 let RowFields = ["OpName"]; 2683 let ColFields = ["Size", "VOP3"]; 2684 let KeyCol = ["8", "1"]; 2685 let ValueCols = [["4", "0"]]; 2686} 2687 2688// Maps ordinary instructions to their SDWA counterparts 2689def getSDWAOp : InstrMapping { 2690 let FilterClass = "VOP"; 2691 let RowFields = ["OpName"]; 2692 let ColFields = ["AsmVariantName"]; 2693 let KeyCol = ["Default"]; 2694 let ValueCols = [["SDWA"]]; 2695} 2696 2697// Maps SDWA instructions to their ordinary counterparts 2698def getBasicFromSDWAOp : InstrMapping { 2699 let FilterClass = "VOP"; 2700 let RowFields = ["OpName"]; 2701 let ColFields = ["AsmVariantName"]; 2702 let KeyCol = ["SDWA"]; 2703 let ValueCols = [["Default"]]; 2704} 2705 2706// Maps ordinary instructions to their DPP counterparts 2707def getDPPOp32 : InstrMapping { 2708 let FilterClass = "VOP"; 2709 let RowFields = ["OpName"]; 2710 let ColFields = ["AsmVariantName"]; 2711 let KeyCol = ["Default"]; 2712 let ValueCols = [["DPP"]]; 2713} 2714 2715def getDPPOp64 : InstrMapping { 2716 let FilterClass = "VOP"; 2717 let RowFields = ["OpName"]; 2718 let ColFields = ["AsmVariantName"]; 2719 let KeyCol = ["VOP3"]; 2720 let ValueCols = [["VOP3_DPP"]]; 2721} 2722 2723// Maps an commuted opcode to its original version 2724def getCommuteOrig : InstrMapping { 2725 let FilterClass = "Commutable_REV"; 2726 let RowFields = ["RevOp"]; 2727 let ColFields = ["IsOrig"]; 2728 let KeyCol = ["0"]; 2729 let ValueCols = [["1"]]; 2730} 2731 2732// Maps an original opcode to its commuted version 2733def getCommuteRev : InstrMapping { 2734 let FilterClass = "Commutable_REV"; 2735 let RowFields = ["RevOp"]; 2736 let ColFields = ["IsOrig"]; 2737 let KeyCol = ["1"]; 2738 let ValueCols = [["0"]]; 2739} 2740 2741def getMCOpcodeGen : InstrMapping { 2742 let FilterClass = "SIMCInstr"; 2743 let RowFields = ["PseudoInstr"]; 2744 let ColFields = ["Subtarget"]; 2745 let KeyCol = [!cast<string>(SIEncodingFamily.NONE)]; 2746 // These columns must be kept in sync with the SIEncodingFamily enumeration. 2747 let ValueCols = [[!cast<string>(SIEncodingFamily.SI)], 2748 [!cast<string>(SIEncodingFamily.VI)], 2749 [!cast<string>(SIEncodingFamily.SDWA)], 2750 [!cast<string>(SIEncodingFamily.SDWA9)], 2751 // GFX80 encoding is added to work around a multiple matching 2752 // issue for buffer instructions with unpacked d16 data. This 2753 // does not actually change the encoding, and thus may be 2754 // removed later. 2755 [!cast<string>(SIEncodingFamily.GFX80)], 2756 [!cast<string>(SIEncodingFamily.GFX9)], 2757 [!cast<string>(SIEncodingFamily.GFX10)], 2758 [!cast<string>(SIEncodingFamily.SDWA10)], 2759 [!cast<string>(SIEncodingFamily.GFX90A)], 2760 [!cast<string>(SIEncodingFamily.GFX940)], 2761 [!cast<string>(SIEncodingFamily.GFX11)], 2762 [!cast<string>(SIEncodingFamily.GFX12)]]; 2763} 2764 2765// Get equivalent SOPK instruction. 2766def getSOPKOp : InstrMapping { 2767 let FilterClass = "SOPKInstTable"; 2768 let RowFields = ["BaseCmpOp"]; 2769 let ColFields = ["IsSOPK"]; 2770 let KeyCol = ["0"]; 2771 let ValueCols = [["1"]]; 2772} 2773 2774def getAddr64Inst : InstrMapping { 2775 let FilterClass = "MUBUFAddr64Table"; 2776 let RowFields = ["OpName"]; 2777 let ColFields = ["IsAddr64"]; 2778 let KeyCol = ["0"]; 2779 let ValueCols = [["1"]]; 2780} 2781 2782def getIfAddr64Inst : InstrMapping { 2783 let FilterClass = "MUBUFAddr64Table"; 2784 let RowFields = ["OpName"]; 2785 let ColFields = ["IsAddr64"]; 2786 let KeyCol = ["1"]; 2787 let ValueCols = [["1"]]; 2788} 2789 2790// Maps an atomic opcode to its returnless version. 2791def getAtomicNoRetOp : InstrMapping { 2792 let FilterClass = "AtomicNoRet"; 2793 let RowFields = ["NoRetOp"]; 2794 let ColFields = ["IsRet"]; 2795 let KeyCol = ["1"]; 2796 let ValueCols = [["0"]]; 2797} 2798 2799// Maps a GLOBAL to its SADDR form. 2800def getGlobalSaddrOp : InstrMapping { 2801 let FilterClass = "GlobalSaddrTable"; 2802 let RowFields = ["SaddrOp"]; 2803 let ColFields = ["IsSaddr"]; 2804 let KeyCol = ["0"]; 2805 let ValueCols = [["1"]]; 2806} 2807 2808// Maps a GLOBAL SADDR to its VADDR form. 2809def getGlobalVaddrOp : InstrMapping { 2810 let FilterClass = "GlobalSaddrTable"; 2811 let RowFields = ["SaddrOp"]; 2812 let ColFields = ["IsSaddr"]; 2813 let KeyCol = ["1"]; 2814 let ValueCols = [["0"]]; 2815} 2816 2817// Maps a v_cmpx opcode with sdst to opcode without sdst. 2818def getVCMPXNoSDstOp : InstrMapping { 2819 let FilterClass = "VCMPXNoSDstTable"; 2820 let RowFields = ["NoSDstOp"]; 2821 let ColFields = ["HasSDst"]; 2822 let KeyCol = ["1"]; 2823 let ValueCols = [["0"]]; 2824} 2825 2826// Maps a SOPP to a SOPP with S_NOP 2827def getSOPPWithRelaxation : InstrMapping { 2828 let FilterClass = "SOPPRelaxTable"; 2829 let RowFields = ["KeyName"]; 2830 let ColFields = ["IsRelaxed"]; 2831 let KeyCol = ["0"]; 2832 let ValueCols = [["1"]]; 2833} 2834 2835// Maps flat scratch opcodes by addressing modes 2836def getFlatScratchInstSTfromSS : InstrMapping { 2837 let FilterClass = "FlatScratchInst"; 2838 let RowFields = ["SVOp"]; 2839 let ColFields = ["Mode"]; 2840 let KeyCol = ["SS"]; 2841 let ValueCols = [["ST"]]; 2842} 2843 2844def getFlatScratchInstSSfromSV : InstrMapping { 2845 let FilterClass = "FlatScratchInst"; 2846 let RowFields = ["SVOp"]; 2847 let ColFields = ["Mode"]; 2848 let KeyCol = ["SV"]; 2849 let ValueCols = [["SS"]]; 2850} 2851 2852def getFlatScratchInstSVfromSVS : InstrMapping { 2853 let FilterClass = "FlatScratchInst"; 2854 let RowFields = ["SVOp"]; 2855 let ColFields = ["Mode"]; 2856 let KeyCol = ["SVS"]; 2857 let ValueCols = [["SV"]]; 2858} 2859 2860def getFlatScratchInstSVfromSS : InstrMapping { 2861 let FilterClass = "FlatScratchInst"; 2862 let RowFields = ["SVOp"]; 2863 let ColFields = ["Mode"]; 2864 let KeyCol = ["SS"]; 2865 let ValueCols = [["SV"]]; 2866} 2867 2868def getMFMAEarlyClobberOp : InstrMapping { 2869 let FilterClass = "MFMATable"; 2870 let RowFields = ["FMAOp"]; 2871 let ColFields = ["IsMac"]; 2872 let KeyCol = ["1"]; 2873 let ValueCols = [["0"]]; 2874} 2875 2876// Maps an v_cmp instruction to its v_cmpx equivalent. 2877def getVCMPXOpFromVCMP : InstrMapping { 2878 let FilterClass = "VCMPVCMPXTable"; 2879 let RowFields = ["VCMPOp"]; 2880 let ColFields = ["IsVCMPX"]; 2881 let KeyCol = ["0"]; 2882 let ValueCols = [["1"]]; 2883} 2884 2885def VOPDComponentTable : GenericTable { 2886 let FilterClass = "VOPD_Component"; 2887 let CppTypeName = "VOPDComponentInfo"; 2888 let Fields = ["BaseVOP", "VOPDOp", "CanBeVOPDX"]; 2889 let PrimaryKey = ["BaseVOP"]; 2890 let PrimaryKeyName = "getVOPDComponentHelper"; 2891} 2892 2893def getVOPDBaseFromComponent : SearchIndex { 2894 let Table = VOPDComponentTable; 2895 let Key = ["VOPDOp"]; 2896} 2897 2898def VOPDPairs : GenericTable { 2899 let FilterClass = "VOPD_Base"; 2900 let CppTypeName = "VOPDInfo"; 2901 let Fields = ["Opcode", "OpX", "OpY", "SubTgt"]; 2902 let PrimaryKey = ["Opcode"]; 2903 let PrimaryKeyName = "getVOPDOpcodeHelper"; 2904} 2905 2906def getVOPDInfoFromComponentOpcodes : SearchIndex { 2907 let Table = VOPDPairs; 2908 let Key = ["OpX", "OpY", "SubTgt"]; 2909} 2910 2911include "SIInstructions.td" 2912 2913include "DSInstructions.td" 2914include "MIMGInstructions.td" 2915