1//===-- SIInstrInfo.td -----------------------------------------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def isWave32 : Predicate<"Subtarget->getWavefrontSize() == 32">, 10 AssemblerPredicate <(all_of FeatureWavefrontSize32)>; 11def isWave64 : Predicate<"Subtarget->getWavefrontSize() == 64">, 12 AssemblerPredicate <(all_of FeatureWavefrontSize64)>; 13 14class GCNPredicateControl : PredicateControl { 15 Predicate SIAssemblerPredicate = isGFX6GFX7; 16 Predicate VIAssemblerPredicate = isGFX8GFX9; 17} 18 19// Except for the NONE field, this must be kept in sync with the 20// SIEncodingFamily enum in SIInstrInfo.cpp and the columns of the 21// getMCOpcodeGen table. 22def SIEncodingFamily { 23 int NONE = -1; 24 int SI = 0; 25 int VI = 1; 26 int SDWA = 2; 27 int SDWA9 = 3; 28 int GFX80 = 4; 29 int GFX9 = 5; 30 int GFX10 = 6; 31 int SDWA10 = 7; 32 int GFX90A = 8; 33 int GFX940 = 9; 34 int GFX11 = 10; 35} 36 37//===----------------------------------------------------------------------===// 38// SI DAG Nodes 39//===----------------------------------------------------------------------===// 40 41def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>; 42 43def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD", 44 SDTypeProfile<1, 3, [SDTCisVT<1, v4i32>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>, 45 [SDNPMayLoad, SDNPMemOperand] 46>; 47 48def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT", 49 SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i16>]>, 50 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue] 51>; 52 53def SDTAtomic2_f32 : SDTypeProfile<1, 2, [ 54 SDTCisSameAs<0,2>, SDTCisFP<0>, SDTCisPtrTy<1> 55]>; 56 57def SIatomic_fmin : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMIN", SDTAtomic2_f32, 58 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] 59>; 60 61def SIatomic_fmax : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMAX", SDTAtomic2_f32, 62 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] 63>; 64 65// load_d16_{lo|hi} ptr, tied_input 66def SIload_d16 : SDTypeProfile<1, 2, [ 67 SDTCisPtrTy<1>, 68 SDTCisSameAs<0, 2> 69]>; 70 71 72def SDTtbuffer_load : SDTypeProfile<1, 8, 73 [ // vdata 74 SDTCisVT<1, v4i32>, // rsrc 75 SDTCisVT<2, i32>, // vindex(VGPR) 76 SDTCisVT<3, i32>, // voffset(VGPR) 77 SDTCisVT<4, i32>, // soffset(SGPR) 78 SDTCisVT<5, i32>, // offset(imm) 79 SDTCisVT<6, i32>, // format(imm) 80 SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm) 81 SDTCisVT<8, i1> // idxen(imm) 82 ]>; 83 84def SItbuffer_load : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT", SDTtbuffer_load, 85 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>; 86def SItbuffer_load_d16 : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT_D16", 87 SDTtbuffer_load, 88 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>; 89 90def SDTtbuffer_store : SDTypeProfile<0, 9, 91 [ // vdata 92 SDTCisVT<1, v4i32>, // rsrc 93 SDTCisVT<2, i32>, // vindex(VGPR) 94 SDTCisVT<3, i32>, // voffset(VGPR) 95 SDTCisVT<4, i32>, // soffset(SGPR) 96 SDTCisVT<5, i32>, // offset(imm) 97 SDTCisVT<6, i32>, // format(imm) 98 SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm) 99 SDTCisVT<8, i1> // idxen(imm) 100 ]>; 101 102def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTtbuffer_store, 103 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 104def SItbuffer_store_d16 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_D16", 105 SDTtbuffer_store, 106 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 107 108def SDTBufferLoad : SDTypeProfile<1, 7, 109 [ // vdata 110 SDTCisVT<1, v4i32>, // rsrc 111 SDTCisVT<2, i32>, // vindex(VGPR) 112 SDTCisVT<3, i32>, // voffset(VGPR) 113 SDTCisVT<4, i32>, // soffset(SGPR) 114 SDTCisVT<5, i32>, // offset(imm) 115 SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm) 116 SDTCisVT<7, i1>]>; // idxen(imm) 117 118def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad, 119 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 120def SIbuffer_load_ubyte : SDNode <"AMDGPUISD::BUFFER_LOAD_UBYTE", SDTBufferLoad, 121 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 122def SIbuffer_load_ushort : SDNode <"AMDGPUISD::BUFFER_LOAD_USHORT", SDTBufferLoad, 123 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 124def SIbuffer_load_byte : SDNode <"AMDGPUISD::BUFFER_LOAD_BYTE", SDTBufferLoad, 125 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 126def SIbuffer_load_short: SDNode <"AMDGPUISD::BUFFER_LOAD_SHORT", SDTBufferLoad, 127 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 128def SIbuffer_load_format : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT", SDTBufferLoad, 129 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 130def SIbuffer_load_format_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_TFE", SDTBufferLoad, 131 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 132def SIbuffer_load_format_d16 : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_D16", 133 SDTBufferLoad, 134 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 135 136def SDTBufferStore : SDTypeProfile<0, 8, 137 [ // vdata 138 SDTCisVT<1, v4i32>, // rsrc 139 SDTCisVT<2, i32>, // vindex(VGPR) 140 SDTCisVT<3, i32>, // voffset(VGPR) 141 SDTCisVT<4, i32>, // soffset(SGPR) 142 SDTCisVT<5, i32>, // offset(imm) 143 SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm) 144 SDTCisVT<7, i1>]>; // idxen(imm) 145 146def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore, 147 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 148def SIbuffer_store_byte: SDNode <"AMDGPUISD::BUFFER_STORE_BYTE", 149 SDTBufferStore, 150 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 151def SIbuffer_store_short : SDNode <"AMDGPUISD::BUFFER_STORE_SHORT", 152 SDTBufferStore, 153 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 154def SIbuffer_store_format : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT", 155 SDTBufferStore, 156 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 157def SIbuffer_store_format_d16 : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT_D16", 158 SDTBufferStore, 159 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 160 161class SDBufferAtomic<string opcode> : SDNode <opcode, 162 SDTypeProfile<1, 8, 163 [SDTCisVT<2, v4i32>, // rsrc 164 SDTCisVT<3, i32>, // vindex(VGPR) 165 SDTCisVT<4, i32>, // voffset(VGPR) 166 SDTCisVT<5, i32>, // soffset(SGPR) 167 SDTCisVT<6, i32>, // offset(imm) 168 SDTCisVT<7, i32>, // cachepolicy(imm) 169 SDTCisVT<8, i1>]>, // idxen(imm) 170 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] 171>; 172 173def SIbuffer_atomic_swap : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SWAP">; 174def SIbuffer_atomic_add : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_ADD">; 175def SIbuffer_atomic_sub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SUB">; 176def SIbuffer_atomic_smin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMIN">; 177def SIbuffer_atomic_umin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMIN">; 178def SIbuffer_atomic_smax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMAX">; 179def SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">; 180def SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">; 181def SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">; 182def SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">; 183def SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">; 184def SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">; 185def SIbuffer_atomic_csub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_CSUB">; 186def SIbuffer_atomic_fadd : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD">; 187def SIbuffer_atomic_fmin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMIN">; 188def SIbuffer_atomic_fmax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMAX">; 189 190multiclass SDBufferAtomicNoRet { 191 def "_noret" : PatFrag< 192 (ops node:$vdata_in, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, 193 node:$offset, node:$cachepolicy, node:$idxen), 194 (!cast<SDNode>(NAME) node:$vdata_in, node:$rsrc, node:$vindex, 195 node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, 196 node:$idxen)> { 197 let HasNoUse = true; 198 } 199} 200 201defm SIbuffer_atomic_swap : SDBufferAtomicNoRet; 202defm SIbuffer_atomic_add : SDBufferAtomicNoRet; 203defm SIbuffer_atomic_sub : SDBufferAtomicNoRet; 204defm SIbuffer_atomic_smin : SDBufferAtomicNoRet; 205defm SIbuffer_atomic_umin : SDBufferAtomicNoRet; 206defm SIbuffer_atomic_smax : SDBufferAtomicNoRet; 207defm SIbuffer_atomic_umax : SDBufferAtomicNoRet; 208defm SIbuffer_atomic_and : SDBufferAtomicNoRet; 209defm SIbuffer_atomic_or : SDBufferAtomicNoRet; 210defm SIbuffer_atomic_xor : SDBufferAtomicNoRet; 211defm SIbuffer_atomic_inc : SDBufferAtomicNoRet; 212defm SIbuffer_atomic_dec : SDBufferAtomicNoRet; 213defm SIbuffer_atomic_fadd : SDBufferAtomicNoRet; 214defm SIbuffer_atomic_fmin : SDBufferAtomicNoRet; 215defm SIbuffer_atomic_fmax : SDBufferAtomicNoRet; 216 217def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP", 218 SDTypeProfile<1, 9, 219 [SDTCisVT<0, i32>, // dst 220 SDTCisVT<1, i32>, // src 221 SDTCisVT<2, i32>, // cmp 222 SDTCisVT<3, v4i32>, // rsrc 223 SDTCisVT<4, i32>, // vindex(VGPR) 224 SDTCisVT<5, i32>, // voffset(VGPR) 225 SDTCisVT<6, i32>, // soffset(SGPR) 226 SDTCisVT<7, i32>, // offset(imm) 227 SDTCisVT<8, i32>, // cachepolicy(imm) 228 SDTCisVT<9, i1>]>, // idxen(imm) 229 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] 230>; 231 232def SIbuffer_atomic_cmpswap_noret : PatFrag< 233 (ops node:$src, node:$cmp, node:$rsrc, node:$vindex, node:$voffset, 234 node:$soffset, node:$offset, node:$cachepolicy, node:$idxen), 235 (SIbuffer_atomic_cmpswap node:$src, node:$cmp, node:$rsrc, node:$vindex, 236 node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, 237 node:$idxen)> { 238 let HasNoUse = true; 239} 240 241class SDGlobalAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode, 242 SDTypeProfile<0, 2, 243 [SDTCisPtrTy<0>, // vaddr 244 SDTCisVT<1, ty>]>, // vdata 245 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] 246>; 247 248def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET", 249 SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]> 250>; 251 252def SIlds : SDNode<"AMDGPUISD::LDS", 253 SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]> 254>; 255 256def SIload_d16_lo : SDNode<"AMDGPUISD::LOAD_D16_LO", 257 SIload_d16, 258 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 259>; 260 261def SIload_d16_lo_u8 : SDNode<"AMDGPUISD::LOAD_D16_LO_U8", 262 SIload_d16, 263 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 264>; 265 266def SIload_d16_lo_i8 : SDNode<"AMDGPUISD::LOAD_D16_LO_I8", 267 SIload_d16, 268 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 269>; 270 271def SIload_d16_hi : SDNode<"AMDGPUISD::LOAD_D16_HI", 272 SIload_d16, 273 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 274>; 275 276def SIload_d16_hi_u8 : SDNode<"AMDGPUISD::LOAD_D16_HI_U8", 277 SIload_d16, 278 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 279>; 280 281def SIload_d16_hi_i8 : SDNode<"AMDGPUISD::LOAD_D16_HI_I8", 282 SIload_d16, 283 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 284>; 285 286def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE", 287 SDTypeProfile<0 ,1, [SDTCisInt<0>]>, 288 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue] 289>; 290 291def SIfptrunc_round_upward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_UPWARD", 292 SDTFPRoundOp 293>; 294 295def SIfptrunc_round_downward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_DOWNWARD", 296 SDTFPRoundOp 297>; 298 299//===----------------------------------------------------------------------===// 300// ValueType helpers 301//===----------------------------------------------------------------------===// 302 303// Returns 1 if the source arguments have modifiers, 0 if they do not. 304class isFloatType<ValueType SrcVT> { 305 bit ret = !or(!eq(SrcVT.Value, f16.Value), 306 !eq(SrcVT.Value, f32.Value), 307 !eq(SrcVT.Value, f64.Value), 308 !eq(SrcVT.Value, v2f16.Value), 309 !eq(SrcVT.Value, v4f16.Value), 310 !eq(SrcVT.Value, v8f16.Value), 311 !eq(SrcVT.Value, v16f16.Value), 312 !eq(SrcVT.Value, v2f32.Value), 313 !eq(SrcVT.Value, v4f32.Value), 314 !eq(SrcVT.Value, v8f32.Value), 315 !eq(SrcVT.Value, v2f64.Value), 316 !eq(SrcVT.Value, v4f64.Value)); 317} 318 319// XXX - do v2i16 instructions? 320class isIntType<ValueType SrcVT> { 321 bit ret = !or(!eq(SrcVT.Value, i8.Value), 322 !eq(SrcVT.Value, i16.Value), 323 !eq(SrcVT.Value, i32.Value), 324 !eq(SrcVT.Value, i64.Value), 325 !eq(SrcVT.Value, v4i16.Value), 326 !eq(SrcVT.Value, v8i16.Value), 327 !eq(SrcVT.Value, v16i16.Value), 328 !eq(SrcVT.Value, v2i32.Value), 329 !eq(SrcVT.Value, v4i32.Value), 330 !eq(SrcVT.Value, v8i32.Value)); 331} 332 333class isPackedType<ValueType SrcVT> { 334 bit ret = !or(!eq(SrcVT.Value, v2i16.Value), 335 !eq(SrcVT.Value, v2f16.Value), 336 !eq(SrcVT.Value, v4f16.Value), 337 !eq(SrcVT.Value, v2i32.Value), 338 !eq(SrcVT.Value, v2f32.Value), 339 !eq(SrcVT.Value, v4i32.Value), 340 !eq(SrcVT.Value, v4f32.Value), 341 !eq(SrcVT.Value, v8i32.Value), 342 !eq(SrcVT.Value, v8f32.Value)); 343} 344 345 346//===----------------------------------------------------------------------===// 347// PatFrags for global memory operations 348//===----------------------------------------------------------------------===// 349 350defm atomic_load_fmin : binary_atomic_op_all_as<SIatomic_fmin, 0>; 351defm atomic_load_fmax : binary_atomic_op_all_as<SIatomic_fmax, 0>; 352 353//===----------------------------------------------------------------------===// 354// SDNodes PatFrags for loads/stores with a glue input. 355// This is for SDNodes and PatFrag for local loads and stores to 356// enable s_mov_b32 m0, -1 to be glued to the memory instructions. 357// 358// These mirror the regular load/store PatFrags and rely on special 359// processing during Select() to add the glued copy. 360// 361//===----------------------------------------------------------------------===// 362 363def AMDGPUld_glue : SDNode <"ISD::LOAD", SDTLoad, 364 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 365>; 366 367def AMDGPUatomic_ld_glue : SDNode <"ISD::ATOMIC_LOAD", SDTAtomicLoad, 368 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 369>; 370 371def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr)> { 372 let IsLoad = 1; 373 let IsUnindexed = 1; 374} 375 376def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> { 377 let IsLoad = 1; 378 let IsNonExtLoad = 1; 379} 380 381def atomic_load_8_glue : PatFrag<(ops node:$ptr), 382 (AMDGPUatomic_ld_glue node:$ptr)> { 383 let IsAtomic = 1; 384 let MemoryVT = i8; 385} 386 387def atomic_load_16_glue : PatFrag<(ops node:$ptr), 388 (AMDGPUatomic_ld_glue node:$ptr)> { 389 let IsAtomic = 1; 390 let MemoryVT = i16; 391} 392 393def atomic_load_32_glue : PatFrag<(ops node:$ptr), 394 (AMDGPUatomic_ld_glue node:$ptr)> { 395 let IsAtomic = 1; 396 let MemoryVT = i32; 397} 398 399def atomic_load_64_glue : PatFrag<(ops node:$ptr), 400 (AMDGPUatomic_ld_glue node:$ptr)> { 401 let IsAtomic = 1; 402 let MemoryVT = i64; 403} 404 405def extload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { 406 let IsLoad = 1; 407 let IsAnyExtLoad = 1; 408} 409 410def sextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { 411 let IsLoad = 1; 412 let IsSignExtLoad = 1; 413} 414 415def zextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { 416 let IsLoad = 1; 417 let IsZeroExtLoad = 1; 418} 419 420def extloadi8_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> { 421 let IsLoad = 1; 422 let MemoryVT = i8; 423} 424 425def zextloadi8_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> { 426 let IsLoad = 1; 427 let MemoryVT = i8; 428} 429 430def extloadi16_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> { 431 let IsLoad = 1; 432 let MemoryVT = i16; 433} 434 435def zextloadi16_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> { 436 let IsLoad = 1; 437 let MemoryVT = i16; 438} 439 440def sextloadi8_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> { 441 let IsLoad = 1; 442 let MemoryVT = i8; 443} 444 445def sextloadi16_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> { 446 let IsLoad = 1; 447 let MemoryVT = i16; 448} 449 450 451let IsLoad = 1, AddressSpaces = LoadAddress_local.AddrSpaces in { 452def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> { 453 let IsNonExtLoad = 1; 454} 455 456def extloadi8_local_m0 : PatFrag<(ops node:$ptr), (extloadi8_glue node:$ptr)>; 457def sextloadi8_local_m0 : PatFrag<(ops node:$ptr), (sextloadi8_glue node:$ptr)>; 458def zextloadi8_local_m0 : PatFrag<(ops node:$ptr), (zextloadi8_glue node:$ptr)>; 459 460def extloadi16_local_m0 : PatFrag<(ops node:$ptr), (extloadi16_glue node:$ptr)>; 461def sextloadi16_local_m0 : PatFrag<(ops node:$ptr), (sextloadi16_glue node:$ptr)>; 462def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr)>; 463} // End IsLoad = 1, , AddressSpaces = LoadAddress_local.AddrSpaces 464 465def load_align8_local_m0 : PatFrag<(ops node:$ptr), 466 (load_local_m0 node:$ptr)> { 467 let IsLoad = 1; 468 int MinAlignment = 8; 469} 470 471def load_align16_local_m0 : PatFrag<(ops node:$ptr), 472 (load_local_m0 node:$ptr)> { 473 let IsLoad = 1; 474 int MinAlignment = 16; 475} 476 477let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in { 478def atomic_load_8_local_m0 : PatFrag<(ops node:$ptr), 479 (atomic_load_8_glue node:$ptr)>; 480def atomic_load_16_local_m0 : PatFrag<(ops node:$ptr), 481 (atomic_load_16_glue node:$ptr)>; 482def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr), 483 (atomic_load_32_glue node:$ptr)>; 484def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr), 485 (atomic_load_64_glue node:$ptr)>; 486} // End let AddressSpaces = LoadAddress_local.AddrSpaces 487 488 489def AMDGPUst_glue : SDNode <"ISD::STORE", SDTStore, 490 [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue] 491>; 492 493def AMDGPUatomic_st_glue : SDNode <"ISD::ATOMIC_STORE", SDTAtomicStore, 494 [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue] 495>; 496 497def unindexedstore_glue : PatFrag<(ops node:$val, node:$ptr), 498 (AMDGPUst_glue node:$val, node:$ptr)> { 499 let IsStore = 1; 500 let IsUnindexed = 1; 501} 502 503def store_glue : PatFrag<(ops node:$val, node:$ptr), 504 (unindexedstore_glue node:$val, node:$ptr)> { 505 let IsStore = 1; 506 let IsTruncStore = 0; 507} 508 509def truncstore_glue : PatFrag<(ops node:$val, node:$ptr), 510 (unindexedstore_glue node:$val, node:$ptr)> { 511 let IsStore = 1; 512 let IsTruncStore = 1; 513} 514 515def truncstorei8_glue : PatFrag<(ops node:$val, node:$ptr), 516 (truncstore_glue node:$val, node:$ptr)> { 517 let IsStore = 1; 518 let MemoryVT = i8; 519 let IsTruncStore = 1; 520} 521 522def truncstorei16_glue : PatFrag<(ops node:$val, node:$ptr), 523 (truncstore_glue node:$val, node:$ptr)> { 524 let IsStore = 1; 525 let MemoryVT = i16; 526 let IsTruncStore = 1; 527} 528 529let IsStore = 1, AddressSpaces = StoreAddress_local.AddrSpaces in { 530def store_local_m0 : PatFrag<(ops node:$val, node:$ptr), 531 (store_glue node:$val, node:$ptr)>; 532def truncstorei8_local_m0 : PatFrag<(ops node:$val, node:$ptr), 533 (truncstorei8_glue node:$val, node:$ptr)>; 534def truncstorei16_local_m0 : PatFrag<(ops node:$val, node:$ptr), 535 (truncstorei16_glue node:$val, node:$ptr)>; 536} 537 538def store_align8_local_m0 : PatFrag <(ops node:$value, node:$ptr), 539 (store_local_m0 node:$value, node:$ptr)>, 540 Aligned<8> { 541 let IsStore = 1; 542} 543 544def store_align16_local_m0 : PatFrag <(ops node:$value, node:$ptr), 545 (store_local_m0 node:$value, node:$ptr)>, 546 Aligned<16> { 547 let IsStore = 1; 548} 549 550let PredicateCode = [{return cast<MemSDNode>(N)->getAlign() < 4;}], 551 GISelPredicateCode = [{return (*MI.memoperands_begin())->getAlign() < 4;}], 552 AddressSpaces = [ AddrSpaces.Local ] in { 553def load_align_less_than_4_local : PatFrag<(ops node:$ptr), 554 (load_local node:$ptr)> { 555 let IsLoad = 1; 556 let IsNonExtLoad = 1; 557} 558 559def load_align_less_than_4_local_m0 : PatFrag<(ops node:$ptr), 560 (load_local_m0 node:$ptr)> { 561 let IsLoad = 1; 562 let IsNonExtLoad = 1; 563} 564 565def store_align_less_than_4_local : PatFrag <(ops node:$value, node:$ptr), 566 (store_local node:$value, node:$ptr)> { 567 let IsStore = 1; 568 let IsTruncStore = 0; 569} 570 571def store_align_less_than_4_local_m0 : PatFrag <(ops node:$value, node:$ptr), 572 (store_local_m0 node:$value, node:$ptr)> { 573 let IsStore = 1; 574 let IsTruncStore = 0; 575} 576} 577 578def atomic_store_8_glue : PatFrag < 579 (ops node:$ptr, node:$value), 580 (AMDGPUatomic_st_glue node:$ptr, node:$value)> { 581 let IsAtomic = 1; 582 let MemoryVT = i8; 583} 584 585def atomic_store_16_glue : PatFrag < 586 (ops node:$ptr, node:$value), 587 (AMDGPUatomic_st_glue node:$ptr, node:$value)> { 588 let IsAtomic = 1; 589 let MemoryVT = i16; 590} 591 592def atomic_store_32_glue : PatFrag < 593 (ops node:$ptr, node:$value), 594 (AMDGPUatomic_st_glue node:$ptr, node:$value)> { 595 let IsAtomic = 1; 596 let MemoryVT = i32; 597} 598 599def atomic_store_64_glue : PatFrag < 600 (ops node:$ptr, node:$value), 601 (AMDGPUatomic_st_glue node:$ptr, node:$value)> { 602 let IsAtomic = 1; 603 let MemoryVT = i64; 604} 605 606let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces in { 607def atomic_store_8_local_m0 : PatFrag<(ops node:$ptr, node:$val), 608 (atomic_store_8_glue node:$ptr, node:$val)>; 609def atomic_store_16_local_m0 : PatFrag<(ops node:$ptr, node:$val), 610 (atomic_store_16_glue node:$ptr, node:$val)>; 611def atomic_store_32_local_m0 : PatFrag<(ops node:$ptr, node:$val), 612 (atomic_store_32_glue node:$ptr, node:$val)>; 613def atomic_store_64_local_m0 : PatFrag<(ops node:$ptr, node:$val), 614 (atomic_store_64_glue node:$ptr, node:$val)>; 615} // End let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces 616 617 618//===----------------------------------------------------------------------===// 619// SDNodes PatFrags for a16 loads and stores with 3 components. 620// v3f16/v3i16 is widened to v4f16/v4i16, so we need to match on the memory 621// load/store size. 622//===----------------------------------------------------------------------===// 623 624class mubuf_intrinsic_load<SDPatternOperator name, ValueType vt> : PatFrag < 625 (ops node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 626 node:$auxiliary, node:$idxen), 627 (name node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 628 node:$auxiliary, node:$idxen)> { 629 let IsLoad = 1; 630 let MemoryVT = vt; 631} 632 633class mubuf_intrinsic_store<SDPatternOperator name, ValueType vt> : PatFrag < 634 (ops node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 635 node:$auxiliary, node:$idxen), 636 (name node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 637 node:$auxiliary, node:$idxen)> { 638 let IsStore = 1; 639 let MemoryVT = vt; 640} 641 642class mtbuf_intrinsic_load<SDPatternOperator name, ValueType vt> : PatFrag < 643 (ops node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 644 node:$format, node:$auxiliary, node:$idxen), 645 (name node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 646 node:$format, node:$auxiliary, node:$idxen)> { 647 let IsLoad = 1; 648 let MemoryVT = vt; 649} 650 651class mtbuf_intrinsic_store<SDPatternOperator name, ValueType vt> : PatFrag < 652 (ops node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 653 node:$format, node:$auxiliary, node:$idxen), 654 (name node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 655 node:$format, node:$auxiliary, node:$idxen)> { 656 let IsStore = 1; 657 let MemoryVT = vt; 658} 659 660//===----------------------------------------------------------------------===// 661// SDNodes PatFrags for d16 loads 662//===----------------------------------------------------------------------===// 663 664class LoadD16Frag <SDPatternOperator op> : PatFrag< 665 (ops node:$ptr, node:$tied_in), 666 (op node:$ptr, node:$tied_in)> { 667 let IsLoad = 1; 668} 669 670foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { 671let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in { 672 673def load_d16_hi_#as : LoadD16Frag <SIload_d16_hi>; 674 675def az_extloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_u8> { 676 let MemoryVT = i8; 677} 678 679def sextloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_i8> { 680 let MemoryVT = i8; 681} 682 683def load_d16_lo_#as : LoadD16Frag <SIload_d16_lo>; 684 685def az_extloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_u8> { 686 let MemoryVT = i8; 687} 688 689def sextloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_i8> { 690 let MemoryVT = i8; 691} 692 693} // End let AddressSpaces = ... 694} // End foreach AddrSpace 695 696def lshr_rev : PatFrag < 697 (ops node:$src1, node:$src0), 698 (srl $src0, $src1) 699>; 700 701def ashr_rev : PatFrag < 702 (ops node:$src1, node:$src0), 703 (sra $src0, $src1) 704>; 705 706def lshl_rev : PatFrag < 707 (ops node:$src1, node:$src0), 708 (shl $src0, $src1) 709>; 710 711def add_ctpop : PatFrag < 712 (ops node:$src0, node:$src1), 713 (add (ctpop $src0), $src1) 714>; 715 716def xnor : PatFrag < 717 (ops node:$src0, node:$src1), 718 (not (xor $src0, $src1)) 719>; 720 721foreach I = 1-4 in { 722def shl#I#_add : PatFrag < 723 (ops node:$src0, node:$src1), 724 (add (shl_oneuse $src0, (i32 I)), $src1)> { 725 // FIXME: Poor substitute for disabling pattern in SelectionDAG 726 let PredicateCode = [{return false;}]; 727 let GISelPredicateCode = [{return true;}]; 728} 729} 730 731multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0, 732 SDTypeProfile tc = SDTAtomic2, 733 bit IsInt = 1> { 734 735 def _glue : SDNode < 736 !if(is_amdgpu, "AMDGPUISD", "ISD")#"::ATOMIC_"#op_name, tc, 737 [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 738 >; 739 740 let AddressSpaces = StoreAddress_local.AddrSpaces in { 741 defm _local_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>; 742 defm _local_m0 : noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue"), 743 IsInt>; 744 } 745 746 let AddressSpaces = StoreAddress_region.AddrSpaces in { 747 defm _region_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>; 748 defm _region_m0 : noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue"), 749 IsInt>; 750 } 751} 752 753defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">; 754defm atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">; 755defm atomic_load_uinc_wrap : SIAtomicM0Glue2 <"LOAD_UINC_WRAP">; 756defm atomic_load_udec_wrap : SIAtomicM0Glue2 <"LOAD_UDEC_WRAP">; 757defm atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">; 758defm atomic_load_min : SIAtomicM0Glue2 <"LOAD_MIN">; 759defm atomic_load_max : SIAtomicM0Glue2 <"LOAD_MAX">; 760defm atomic_load_or : SIAtomicM0Glue2 <"LOAD_OR">; 761defm atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">; 762defm atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">; 763defm atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">; 764defm atomic_swap : SIAtomicM0Glue2 <"SWAP">; 765defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32, 0>; 766defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32, 0>; 767defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32, 0>; 768 769def as_i1timm : SDNodeXForm<timm, [{ 770 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1); 771}]>; 772 773def as_i8imm : SDNodeXForm<imm, [{ 774 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i8); 775}]>; 776 777def as_i8timm : SDNodeXForm<timm, [{ 778 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); 779}]>; 780 781def as_i16imm : SDNodeXForm<imm, [{ 782 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); 783}]>; 784 785def as_i16timm : SDNodeXForm<timm, [{ 786 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); 787}]>; 788 789def as_i32imm: SDNodeXForm<imm, [{ 790 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); 791}]>; 792 793def as_i32timm: SDNodeXForm<timm, [{ 794 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); 795}]>; 796 797def as_i64imm: SDNodeXForm<imm, [{ 798 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64); 799}]>; 800 801def cond_as_i32imm: SDNodeXForm<cond, [{ 802 return CurDAG->getTargetConstant(N->get(), SDLoc(N), MVT::i32); 803}]>; 804 805// Copied from the AArch64 backend: 806def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{ 807return CurDAG->getTargetConstant( 808 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32); 809}]>; 810 811def frameindex_to_targetframeindex : SDNodeXForm<frameindex, [{ 812 auto FI = cast<FrameIndexSDNode>(N); 813 return CurDAG->getTargetFrameIndex(FI->getIndex(), MVT::i32); 814}]>; 815 816// Copied from the AArch64 backend: 817def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{ 818return CurDAG->getTargetConstant( 819 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64); 820}]>; 821 822class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{ 823 uint64_t Imm = N->getZExtValue(); 824 unsigned Bit = (Imm >> }] # bitnum # [{ ) & 1; 825 return CurDAG->getTargetConstant(Bit, SDLoc(N), MVT::i1); 826}]>; 827 828def SIMM16bit : ImmLeaf <i32, 829 [{return isInt<16>(Imm);}] 830>; 831 832def UIMM16bit : ImmLeaf <i32, 833 [{return isUInt<16>(Imm);}] 834>; 835 836def i64imm_32bit : ImmLeaf<i64, [{ 837 return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm); 838}]>; 839 840def InlineImm16 : ImmLeaf<i16, [{ 841 return isInlineImmediate16(Imm); 842}]>; 843 844def InlineImm32 : ImmLeaf<i32, [{ 845 return isInlineImmediate32(Imm); 846}]>; 847 848def InlineImm64 : ImmLeaf<i64, [{ 849 return isInlineImmediate64(Imm); 850}]>; 851 852def InlineImmFP32 : FPImmLeaf<f32, [{ 853 return isInlineImmediate(Imm); 854}]>; 855 856def InlineImmFP64 : FPImmLeaf<f64, [{ 857 return isInlineImmediate(Imm); 858}]>; 859 860 861class VGPRImm <dag frag> : PatLeaf<frag, [{ 862 return isVGPRImm(N); 863}]>; 864 865def NegateImm : SDNodeXForm<imm, [{ 866 return CurDAG->getConstant(-N->getSExtValue(), SDLoc(N), MVT::i32); 867}]>; 868 869// TODO: When FP inline imm values work? 870def NegSubInlineConst32 : ImmLeaf<i32, [{ 871 return Imm < -16 && Imm >= -64; 872}], NegateImm>; 873 874def NegSubInlineIntConst16 : ImmLeaf<i16, [{ 875 return Imm < -16 && Imm >= -64; 876}], NegateImm>; 877 878def ShiftAmt32Imm : ImmLeaf <i32, [{ 879 return Imm < 32; 880}]>; 881 882def getNegV2I16Imm : SDNodeXForm<build_vector, [{ 883 return SDValue(packNegConstantV2I16(N, *CurDAG), 0); 884}]>; 885 886def NegSubInlineConstV216 : PatLeaf<(build_vector), [{ 887 assert(N->getNumOperands() == 2); 888 assert(N->getOperand(0).getValueType().getSizeInBits() == 16); 889 SDValue Src0 = N->getOperand(0); 890 SDValue Src1 = N->getOperand(1); 891 if (Src0 == Src1) 892 return isNegInlineImmediate(Src0.getNode()); 893 894 return (isNullConstantOrUndef(Src0) && isNegInlineImmediate(Src1.getNode())) || 895 (isNullConstantOrUndef(Src1) && isNegInlineImmediate(Src0.getNode())); 896}], getNegV2I16Imm>; 897 898 899def fp16_zeros_high_16bits : PatLeaf<(f16 VGPR_32:$src), [{ 900 return fp16SrcZerosHighBits(N->getOpcode()); 901}]>; 902 903 904//===----------------------------------------------------------------------===// 905// MUBUF/SMEM Patterns 906//===----------------------------------------------------------------------===// 907 908def extract_cpol : SDNodeXForm<timm, [{ 909 return CurDAG->getTargetConstant(N->getZExtValue() & AMDGPU::CPol::ALL, SDLoc(N), MVT::i8); 910}]>; 911 912def extract_swz : SDNodeXForm<timm, [{ 913 return CurDAG->getTargetConstant((N->getZExtValue() >> 3) & 1, SDLoc(N), MVT::i8); 914}]>; 915 916def set_glc : SDNodeXForm<timm, [{ 917 return CurDAG->getTargetConstant(N->getZExtValue() | AMDGPU::CPol::GLC, SDLoc(N), MVT::i8); 918}]>; 919 920//===----------------------------------------------------------------------===// 921// Custom Operands 922//===----------------------------------------------------------------------===// 923 924def SOPPBrTarget : CustomOperand<OtherVT> { 925 let PrintMethod = "printOperand"; 926 let EncoderMethod = "getSOPPBrEncoding"; 927 let DecoderMethod = "decodeSOPPBrTarget"; 928 let OperandType = "OPERAND_PCREL"; 929} 930 931def si_ga : Operand<iPTR>; 932 933def InterpSlot : CustomOperand<i32>; 934 935// It appears to be necessary to create a separate operand for this to 936// be able to parse attr<num> with no space. 937def InterpAttr : CustomOperand<i32>; 938 939def InterpAttrChan : ImmOperand<i32>; 940 941def VReg32OrOffClass : AsmOperandClass { 942 let Name = "VReg32OrOff"; 943 let ParserMethod = "parseVReg32OrOff"; 944} 945 946def SendMsg : CustomOperand<i32>; 947 948def Swizzle : CustomOperand<i16, 1>; 949 950def Endpgm : CustomOperand<i16, 1>; 951 952def SWaitCnt : CustomOperand<i32>; 953 954def DepCtr : CustomOperand<i32>; 955 956def SDelayALU : CustomOperand<i32>; 957 958include "SIInstrFormats.td" 959include "VIInstrFormats.td" 960 961def BoolReg : AsmOperandClass { 962 let Name = "BoolReg"; 963 let ParserMethod = "parseBoolReg"; 964 let RenderMethod = "addRegOperands"; 965} 966 967class BoolRC : RegisterOperand<SReg_1> { 968 let ParserMatchClass = BoolReg; 969 let DecoderMethod = "decodeBoolReg"; 970} 971 972def SSrc_i1 : RegisterOperand<SReg_1_XEXEC> { 973 let ParserMatchClass = BoolReg; 974 let DecoderMethod = "decodeBoolReg"; 975} 976 977def VOPDstS64orS32 : BoolRC { 978 let PrintMethod = "printVOPDst"; 979} 980 981// SCSrc_i1 is the operand for pseudo instructions only. 982// Boolean immediates shall not be exposed to codegen instructions. 983def SCSrc_i1 : RegisterOperand<SReg_1_XEXEC> { 984 let OperandNamespace = "AMDGPU"; 985 let OperandType = "OPERAND_REG_IMM_INT32"; 986 let ParserMatchClass = BoolReg; 987 let DecoderMethod = "decodeBoolReg"; 988} 989 990// ===----------------------------------------------------------------------===// 991// ExpSrc* Special cases for exp src operands which are printed as 992// "off" depending on en operand. 993// ===----------------------------------------------------------------------===// 994 995def ExpSrc0 : RegisterOperand<VGPR_32> { 996 let PrintMethod = "printExpSrc0"; 997 let ParserMatchClass = VReg32OrOffClass; 998} 999 1000def ExpSrc1 : RegisterOperand<VGPR_32> { 1001 let PrintMethod = "printExpSrc1"; 1002 let ParserMatchClass = VReg32OrOffClass; 1003} 1004 1005def ExpSrc2 : RegisterOperand<VGPR_32> { 1006 let PrintMethod = "printExpSrc2"; 1007 let ParserMatchClass = VReg32OrOffClass; 1008} 1009 1010def ExpSrc3 : RegisterOperand<VGPR_32> { 1011 let PrintMethod = "printExpSrc3"; 1012 let ParserMatchClass = VReg32OrOffClass; 1013} 1014 1015class SDWASrc<ValueType vt> : RegisterOperand<VS_32> { 1016 let OperandNamespace = "AMDGPU"; 1017 string Type = !if(isFloatType<vt>.ret, "FP", "INT"); 1018 let OperandType = "OPERAND_REG_INLINE_C_"#Type#vt.Size; 1019 let DecoderMethod = "decodeSDWASrc"#vt.Size; 1020 let EncoderMethod = "getSDWASrcEncoding"; 1021} 1022 1023def SDWASrc_i32 : SDWASrc<i32>; 1024def SDWASrc_i16 : SDWASrc<i16>; 1025def SDWASrc_f32 : SDWASrc<f32>; 1026def SDWASrc_f16 : SDWASrc<f16>; 1027 1028def SDWAVopcDst : BoolRC { 1029 let OperandNamespace = "AMDGPU"; 1030 let OperandType = "OPERAND_SDWA_VOPC_DST"; 1031 let EncoderMethod = "getSDWAVopcDstEncoding"; 1032 let DecoderMethod = "decodeSDWAVopcDst"; 1033 let PrintMethod = "printVOPDst"; 1034} 1035 1036class NamedIntOperand<ValueType Type, string Prefix, string Name = NAME, 1037 string ConvertMethod = "nullptr"> 1038 : CustomOperand<Type, 1, Name> { 1039 let ParserMethod = 1040 "[this](OperandVector &Operands) -> ParseStatus { "# 1041 "return parseIntWithPrefix(\""#Prefix#"\", Operands, "# 1042 "AMDGPUOperand::"#ImmTy#", "#ConvertMethod#"); }"; 1043} 1044 1045class NamedBitOperand<string Id, string Name = NAME> 1046 : CustomOperand<i1, 1, Name> { 1047 let ParserMethod = 1048 "[this](OperandVector &Operands) -> ParseStatus { "# 1049 "return parseNamedBit(\""#Id#"\", Operands, AMDGPUOperand::"#ImmTy#"); }"; 1050 let PrintMethod = "[this](const MCInst *MI, unsigned OpNo, "# 1051 "const MCSubtargetInfo &STI, raw_ostream &O) { "# 1052 "printNamedBit(MI, OpNo, O, \""#Id#"\"); }"; 1053} 1054 1055class DefaultOperand<CustomOperand Op, int Value> 1056 : OperandWithDefaultOps<Op.Type, (ops (Op.Type Value))>, 1057 CustomOperandProps<1, Op.ParserMatchClass.Name> { 1058 let ParserMethod = Op.ParserMatchClass.ParserMethod; 1059 let PrintMethod = Op.PrintMethod; 1060} 1061 1062class SDWAOperand<string Id, string Name = NAME> 1063 : CustomOperand<i32, 1, Name> { 1064 let ParserMethod = 1065 "[this](OperandVector &Operands) -> ParseStatus { "# 1066 "return parseSDWASel(Operands, \""#Id#"\", AMDGPUOperand::"#ImmTy#"); }"; 1067} 1068 1069class ArrayOperand0<string Id, string Name = NAME> 1070 : OperandWithDefaultOps<i32, (ops (i32 0))>, 1071 CustomOperandProps<1, Name> { 1072 let ParserMethod = 1073 "[this](OperandVector &Operands) -> ParseStatus { "# 1074 "return parseOperandArrayWithPrefix(\""#Id#"\", Operands, "# 1075 "AMDGPUOperand::"#ImmTy#"); }"; 1076} 1077 1078let ImmTy = "ImmTyOffset" in 1079def flat_offset : CustomOperand<i32, 1, "FlatOffset">; 1080def offset : NamedIntOperand<i32, "offset", "Offset">; 1081def offset0 : NamedIntOperand<i8, "offset0", "Offset0">; 1082def offset1 : NamedIntOperand<i8, "offset1", "Offset1">; 1083 1084def gds : NamedBitOperand<"gds", "GDS">; 1085 1086def omod : CustomOperand<i32, 1, "OModSI">; 1087def omod0 : DefaultOperand<omod, 0>; 1088 1089// We need to make the cases with a default of 0 distinct from no 1090// default to help deal with some cases where the operand appears 1091// before a mandatory operand. 1092def clampmod : NamedBitOperand<"clamp", "ClampSI">; 1093def clampmod0 : DefaultOperand<clampmod, 0>; 1094def highmod : NamedBitOperand<"high", "High">; 1095 1096def CPol : CustomOperand<i32, 1>; 1097def CPol_0 : DefaultOperand<CPol, 0>; 1098def CPol_GLC1 : DefaultOperand<CPol, 1>; 1099 1100def TFE : NamedBitOperand<"tfe">; 1101def UNorm : NamedBitOperand<"unorm">; 1102def DA : NamedBitOperand<"da">; 1103def R128A16 : CustomOperand<i1, 1>; 1104def A16 : NamedBitOperand<"a16">; 1105def D16 : NamedBitOperand<"d16">; 1106def LWE : NamedBitOperand<"lwe">; 1107def exp_compr : NamedBitOperand<"compr", "ExpCompr">; 1108def exp_vm : NamedBitOperand<"vm", "ExpVM">; 1109 1110def FORMAT : CustomOperand<i8>; 1111 1112def DMask : NamedIntOperand<i16, "dmask">; 1113def Dim : CustomOperand<i8>; 1114 1115def dst_sel : SDWAOperand<"dst_sel", "SDWADstSel">; 1116def src0_sel : SDWAOperand<"src0_sel", "SDWASrc0Sel">; 1117def src1_sel : SDWAOperand<"src1_sel", "SDWASrc1Sel">; 1118def dst_unused : CustomOperand<i32, 1, "SDWADstUnused">; 1119 1120def op_sel0 : ArrayOperand0<"op_sel", "OpSel">; 1121def op_sel_hi0 : ArrayOperand0<"op_sel_hi", "OpSelHi">; 1122def neg_lo0 : ArrayOperand0<"neg_lo", "NegLo">; 1123def neg_hi0 : ArrayOperand0<"neg_hi", "NegHi">; 1124 1125def dpp8 : CustomOperand<i32, 0, "DPP8">; 1126def dpp_ctrl : CustomOperand<i32, 0, "DPPCtrl">; 1127 1128let DefaultValue = "0xf" in { 1129def row_mask : NamedIntOperand<i32, "row_mask", "DppRowMask">; 1130def bank_mask : NamedIntOperand<i32, "bank_mask", "DppBankMask">; 1131} 1132def bound_ctrl : NamedIntOperand<i1, "bound_ctrl", "DppBoundCtrl", 1133 "[this] (int64_t &BC) -> bool { return convertDppBoundCtrl(BC); }">; 1134def FI : NamedIntOperand<i32, "fi", "DppFI">; 1135 1136def blgp : CustomOperand<i32, 1, "BLGP">; 1137def cbsz : NamedIntOperand<i32, "cbsz", "CBSZ">; 1138def abid : NamedIntOperand<i32, "abid", "ABID">; 1139 1140def hwreg : CustomOperand<i32, 0, "Hwreg">; 1141 1142def exp_tgt : CustomOperand<i32, 0, "ExpTgt">; 1143 1144def wait_vdst : NamedIntOperand<i8, "wait_vdst", "WaitVDST">; 1145def wait_exp : NamedIntOperand<i8, "wait_exp", "WaitEXP">; 1146 1147class KImmFPOperand<ValueType vt> : ImmOperand<vt> { 1148 let OperandNamespace = "AMDGPU"; 1149 let OperandType = "OPERAND_KIMM"#vt.Size; 1150 let PrintMethod = "printU"#vt.Size#"ImmOperand"; 1151 let DecoderMethod = "decodeOperand_KImmFP"; 1152} 1153 1154// 32-bit VALU immediate operand that uses the constant bus. 1155def KImmFP32 : KImmFPOperand<i32>; 1156 1157// 32-bit VALU immediate operand with a 16-bit value that uses the 1158// constant bus. 1159def KImmFP16 : KImmFPOperand<i16>; 1160 1161class FPInputModsMatchClass <int opSize> : AsmOperandClass { 1162 let Name = "RegOrImmWithFP"#opSize#"InputMods"; 1163 let ParserMethod = "parseRegOrImmWithFPInputMods"; 1164 let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods"; 1165} 1166 1167class FPVCSrcInputModsMatchClass <int opSize> : FPInputModsMatchClass <opSize> { 1168 let Name = "RegOrInlineImmWithFP"#opSize#"InputMods"; 1169 let PredicateMethod = "isRegOrInlineImmWithFP"#opSize#"InputMods"; 1170} 1171 1172def FP16InputModsMatchClass : FPInputModsMatchClass<16>; 1173def FP32InputModsMatchClass : FPInputModsMatchClass<32>; 1174def FP64InputModsMatchClass : FPInputModsMatchClass<64>; 1175 1176def FP16VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<16>; 1177def FP32VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<32>; 1178 1179class InputMods <AsmOperandClass matchClass> : Operand <i32> { 1180 let OperandNamespace = "AMDGPU"; 1181 let OperandType = "OPERAND_INPUT_MODS"; 1182 let ParserMatchClass = matchClass; 1183} 1184 1185class FPInputMods <FPInputModsMatchClass matchClass> : InputMods <matchClass> { 1186 let PrintMethod = "printOperandAndFPInputMods"; 1187} 1188 1189def FP16InputMods : FPInputMods<FP16InputModsMatchClass>; 1190def FP32InputMods : FPInputMods<FP32InputModsMatchClass>; 1191def FP64InputMods : FPInputMods<FP64InputModsMatchClass>; 1192 1193def FP16VCSrcInputMods : FPInputMods<FP16VCSrcInputModsMatchClass>; 1194def FP32VCSrcInputMods : FPInputMods<FP32VCSrcInputModsMatchClass>; 1195 1196class IntInputModsMatchClass <int opSize> : AsmOperandClass { 1197 let Name = "RegOrImmWithInt"#opSize#"InputMods"; 1198 let ParserMethod = "parseRegOrImmWithIntInputMods"; 1199 let PredicateMethod = "isRegOrImmWithInt"#opSize#"InputMods"; 1200} 1201class IntVCSrcInputModsMatchClass <int opSize> : IntInputModsMatchClass <opSize> { 1202 let Name = "RegOrInlineImmWithInt"#opSize#"InputMods"; 1203 let PredicateMethod = "isRegOrInlineImmWithInt"#opSize#"InputMods"; 1204} 1205def Int32InputModsMatchClass : IntInputModsMatchClass<32>; 1206def Int64InputModsMatchClass : IntInputModsMatchClass<64>; 1207def Int32VCSrcInputModsMatchClass : IntVCSrcInputModsMatchClass<32>; 1208 1209class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass> { 1210 let PrintMethod = "printOperandAndIntInputMods"; 1211} 1212def Int32InputMods : IntInputMods<Int32InputModsMatchClass>; 1213def Int64InputMods : IntInputMods<Int64InputModsMatchClass>; 1214def Int32VCSrcInputMods : IntInputMods<Int32VCSrcInputModsMatchClass>; 1215 1216class OpSelModsMatchClass : AsmOperandClass { 1217 let Name = "OpSelMods"; 1218 let ParserMethod = "parseRegOrImm"; 1219 let PredicateMethod = "isRegOrImm"; 1220} 1221 1222def IntOpSelModsMatchClass : OpSelModsMatchClass; 1223def IntOpSelMods : InputMods<IntOpSelModsMatchClass>; 1224 1225class FPSDWAInputModsMatchClass <int opSize> : AsmOperandClass { 1226 let Name = "SDWAWithFP"#opSize#"InputMods"; 1227 let ParserMethod = "parseRegOrImmWithFPInputMods"; 1228 let PredicateMethod = "isSDWAFP"#opSize#"Operand"; 1229} 1230 1231def FP16SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<16>; 1232def FP32SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<32>; 1233 1234class FPSDWAInputMods <FPSDWAInputModsMatchClass matchClass> : 1235 InputMods <matchClass> { 1236 let PrintMethod = "printOperandAndFPInputMods"; 1237} 1238 1239def FP16SDWAInputMods : FPSDWAInputMods<FP16SDWAInputModsMatchClass>; 1240def FP32SDWAInputMods : FPSDWAInputMods<FP32SDWAInputModsMatchClass>; 1241 1242def FPVRegInputModsMatchClass : AsmOperandClass { 1243 let Name = "VRegWithFPInputMods"; 1244 let ParserMethod = "parseRegWithFPInputMods"; 1245 let PredicateMethod = "isVRegWithInputMods"; 1246} 1247 1248def FPT16VRegInputModsMatchClass : AsmOperandClass { 1249 let Name = "T16VRegWithFPInputMods"; 1250 let ParserMethod = "parseRegWithFPInputMods"; 1251 let PredicateMethod = "isT16VRegWithInputMods"; 1252} 1253 1254def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> { 1255 let PrintMethod = "printOperandAndFPInputMods"; 1256} 1257 1258def FPT16VRegInputMods : InputMods <FPT16VRegInputModsMatchClass> { 1259 let PrintMethod = "printOperandAndFPInputMods"; 1260} 1261 1262class IntSDWAInputModsMatchClass <int opSize> : AsmOperandClass { 1263 let Name = "SDWAWithInt"#opSize#"InputMods"; 1264 let ParserMethod = "parseRegOrImmWithIntInputMods"; 1265 let PredicateMethod = "isSDWAInt"#opSize#"Operand"; 1266} 1267 1268def Int16SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<16>; 1269def Int32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32>; 1270def Bin32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32> { 1271 let Name = "SDWAWithBin32InputMods"; 1272 let ParserMethod = "parseRegOrImm"; 1273} 1274 1275class IntSDWAInputMods <IntSDWAInputModsMatchClass matchClass> : 1276 InputMods <matchClass> { 1277 let PrintMethod = "printOperandAndIntInputMods"; 1278} 1279 1280def Int16SDWAInputMods : IntSDWAInputMods<Int16SDWAInputModsMatchClass>; 1281def Int32SDWAInputMods : IntSDWAInputMods<Int32SDWAInputModsMatchClass>; 1282def Bin32SDWAInputMods : IntSDWAInputMods<Bin32SDWAInputModsMatchClass>; 1283 1284def IntVRegInputModsMatchClass : AsmOperandClass { 1285 let Name = "VRegWithIntInputMods"; 1286 let ParserMethod = "parseRegWithIntInputMods"; 1287 let PredicateMethod = "isVRegWithInputMods"; 1288} 1289 1290def IntT16VRegInputModsMatchClass : AsmOperandClass { 1291 let Name = "T16VRegWithIntInputMods"; 1292 let ParserMethod = "parseRegWithIntInputMods"; 1293 let PredicateMethod = "isT16VRegWithInputMods"; 1294} 1295 1296def IntT16VRegInputMods : InputMods <IntT16VRegInputModsMatchClass> { 1297 let PrintMethod = "printOperandAndIntInputMods"; 1298} 1299 1300def IntVRegInputMods : InputMods <IntVRegInputModsMatchClass> { 1301 let PrintMethod = "printOperandAndIntInputMods"; 1302} 1303 1304class PackedFPInputModsMatchClass <int opSize> : AsmOperandClass { 1305 let Name = "PackedFP"#opSize#"InputMods"; 1306 let ParserMethod = "parseRegOrImm"; 1307 let PredicateMethod = "isRegOrImm"; 1308// let PredicateMethod = "isPackedFP"#opSize#"InputMods"; 1309} 1310 1311class PackedIntInputModsMatchClass <int opSize> : AsmOperandClass { 1312 let Name = "PackedInt"#opSize#"InputMods"; 1313 let ParserMethod = "parseRegOrImm"; 1314 let PredicateMethod = "isRegOrImm"; 1315// let PredicateMethod = "isPackedInt"#opSize#"InputMods"; 1316} 1317 1318def PackedF16InputModsMatchClass : PackedFPInputModsMatchClass<16>; 1319def PackedI16InputModsMatchClass : PackedIntInputModsMatchClass<16>; 1320 1321class PackedFPInputMods <PackedFPInputModsMatchClass matchClass> : InputMods <matchClass> { 1322// let PrintMethod = "printPackedFPInputMods"; 1323} 1324 1325class PackedIntInputMods <PackedIntInputModsMatchClass matchClass> : InputMods <matchClass> { 1326 //let PrintMethod = "printPackedIntInputMods"; 1327} 1328 1329def PackedF16InputMods : PackedFPInputMods<PackedF16InputModsMatchClass>; 1330def PackedI16InputMods : PackedIntInputMods<PackedI16InputModsMatchClass>; 1331 1332//===----------------------------------------------------------------------===// 1333// Complex patterns 1334//===----------------------------------------------------------------------===// 1335 1336def DS1Addr1Offset : ComplexPattern<iPTR, 2, "SelectDS1Addr1Offset">; 1337def DS64Bit4ByteAligned : ComplexPattern<iPTR, 3, "SelectDS64Bit4ByteAligned">; 1338def DS128Bit8ByteAligned : ComplexPattern<iPTR, 3, "SelectDS128Bit8ByteAligned">; 1339 1340def MOVRELOffset : ComplexPattern<iPTR, 2, "SelectMOVRELOffset">; 1341 1342def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">; 1343 1344// Modifiers for floating point instructions. 1345def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">; 1346 1347// VOP3 modifiers used for instructions that do not read canonicalized 1348// floating point values (i.e. integer operations with FP source 1349// modifiers) 1350def VOP3ModsNonCanonicalizing : ComplexPattern<untyped, 2, 1351 "SelectVOP3ModsNonCanonicalizing">; 1352 1353def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">; 1354 1355def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">; 1356 1357def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">; 1358 1359def VOP3PModsDOT : ComplexPattern<untyped, 2, "SelectVOP3PModsDOT">; 1360def DotIUVOP3PMods : ComplexPattern<untyped, 1, "SelectDotIUVOP3PMods">; 1361def WMMAOpSelVOP3PMods : ComplexPattern<untyped, 1, "SelectWMMAOpSelVOP3PMods">; 1362 1363def VOP3OpSel : ComplexPattern<untyped, 2, "SelectVOP3OpSel">; 1364 1365def VOP3OpSelMods : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">; 1366 1367def VOP3PMadMixModsExt : ComplexPattern<untyped, 2, "SelectVOP3PMadMixModsExt">; 1368def VOP3PMadMixMods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixMods">; 1369 1370def VINTERPMods : ComplexPattern<untyped, 2, "SelectVINTERPMods">; 1371def VINTERPModsHi : ComplexPattern<untyped, 2, "SelectVINTERPModsHi">; 1372 1373//===----------------------------------------------------------------------===// 1374// SI assembler operands 1375//===----------------------------------------------------------------------===// 1376 1377def SIOperand { 1378 int ZERO = 0x80; 1379 int VCC = 0x6A; 1380 int FLAT_SCR = 0x68; 1381} 1382 1383// This should be kept in sync with SISrcMods enum 1384def SRCMODS { 1385 int NONE = 0; 1386 int NEG = 1; 1387 int ABS = 2; 1388 int NEG_ABS = 3; 1389 1390 int NEG_HI = ABS; 1391 int OP_SEL_0 = 4; 1392 int OP_SEL_1 = 8; 1393 int DST_OP_SEL = 8; 1394} 1395 1396def DSTCLAMP { 1397 int NONE = 0; 1398 int ENABLE = 1; 1399} 1400 1401def DSTOMOD { 1402 int NONE = 0; 1403} 1404 1405def HWREG { 1406 int MODE = 1; 1407 int STATUS = 2; 1408 int TRAPSTS = 3; 1409 int HW_ID = 4; 1410 int GPR_ALLOC = 5; 1411 int LDS_ALLOC = 6; 1412 int IB_STS = 7; 1413 int MEM_BASES = 15; 1414 int TBA_LO = 16; 1415 int TBA_HI = 17; 1416 int TMA_LO = 18; 1417 int TMA_HI = 19; 1418 int FLAT_SCR_LO = 20; 1419 int FLAT_SCR_HI = 21; 1420 int XNACK_MASK = 22; 1421 int POPS_PACKER = 25; 1422 int SHADER_CYCLES = 29; 1423} 1424 1425class getHwRegImm<int Reg, int Offset = 0, int Size = 32> { 1426 int ret = !and(!or(Reg, 1427 !shl(Offset, 6), 1428 !shl(!add(Size, -1), 11)), 65535); 1429} 1430 1431//===----------------------------------------------------------------------===// 1432// 1433// SI Instruction multiclass helpers. 1434// 1435// Instructions with _32 take 32-bit operands. 1436// Instructions with _64 take 64-bit operands. 1437// 1438// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit 1439// encoding is the standard encoding, but instruction that make use of 1440// any of the instruction modifiers must use the 64-bit encoding. 1441// 1442// Instructions with _e32 use the 32-bit encoding. 1443// Instructions with _e64 use the 64-bit encoding. 1444// 1445//===----------------------------------------------------------------------===// 1446 1447class SIMCInstr <string pseudo, int subtarget> { 1448 string PseudoInstr = pseudo; 1449 int Subtarget = subtarget; 1450} 1451 1452//===----------------------------------------------------------------------===// 1453// Vector ALU classes 1454//===----------------------------------------------------------------------===// 1455 1456class getNumSrcArgs<ValueType Src0, ValueType Src1, ValueType Src2> { 1457 int ret = 1458 !if (!eq(Src0.Value, untyped.Value), 0, 1459 !if (!eq(Src1.Value, untyped.Value), 1, // VOP1 1460 !if (!eq(Src2.Value, untyped.Value), 2, // VOP2 1461 3))); // VOP3 1462} 1463 1464// Returns the register class to use for the destination of VOP[123C] 1465// instructions for the given VT. 1466class getVALUDstForVT<ValueType VT> { 1467 RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>, 1468 !if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>, 1469 !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>, 1470 !if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32>, 1471 VOPDstS64orS32)))); // else VT == i1 1472} 1473 1474class getVALUDstForVT_t16<ValueType VT> { 1475 RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>, 1476 !if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>, 1477 !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>, 1478 !if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32_Lo128>, 1479 VOPDstS64orS32)))); // else VT == i1 1480} 1481 1482// Returns the register class to use for the destination of VOP[12C] 1483// instructions with SDWA extension 1484class getSDWADstForVT<ValueType VT> { 1485 RegisterOperand ret = !if(!eq(VT.Size, 1), 1486 SDWAVopcDst, // VOPC 1487 VOPDstOperand<VGPR_32>); // VOP1/2 32-bit dst 1488} 1489 1490// Returns the register class to use for source 0 of VOP[12C] 1491// instructions for the given VT. 1492class getVOPSrc0ForVT<ValueType VT, bit IsTrue16> { 1493 bit isFP = isFloatType<VT>.ret; 1494 1495 RegisterOperand ret = 1496 !if(isFP, 1497 !if(!eq(VT.Size, 64), 1498 VSrc_f64, 1499 !if(!eq(VT.Value, f16.Value), 1500 !if(IsTrue16, 1501 VSrcT_f16_Lo128, 1502 VSrc_f16 1503 ), 1504 !if(!eq(VT.Value, v2f16.Value), 1505 VSrc_v2f16, 1506 !if(!eq(VT.Value, v4f16.Value), 1507 AVSrc_64, 1508 VSrc_f32 1509 ) 1510 ) 1511 ) 1512 ), 1513 !if(!eq(VT.Size, 64), 1514 VSrc_b64, 1515 !if(!eq(VT.Value, i16.Value), 1516 !if(IsTrue16, 1517 VSrcT_b16_Lo128, 1518 VSrc_b16 1519 ), 1520 !if(!eq(VT.Value, v2i16.Value), 1521 VSrc_v2b16, 1522 VSrc_b32 1523 ) 1524 ) 1525 ) 1526 ); 1527} 1528 1529class getSOPSrcForVT<ValueType VT> { 1530 RegisterOperand ret = !if(!eq(VT.Size, 64), SSrc_b64, SSrc_b32); 1531} 1532 1533// Returns the vreg register class to use for source operand given VT 1534class getVregSrcForVT<ValueType VT> { 1535 RegisterClass ret = !if(!eq(VT.Size, 128), VReg_128, 1536 !if(!eq(VT.Size, 96), VReg_96, 1537 !if(!eq(VT.Size, 64), VReg_64, 1538 !if(!eq(VT.Size, 48), VReg_64, 1539 VGPR_32)))); 1540} 1541 1542class getVregSrcForVT_t16<ValueType VT> { 1543 RegisterClass ret = !if(!eq(VT.Size, 128), VReg_128, 1544 !if(!eq(VT.Size, 96), VReg_96, 1545 !if(!eq(VT.Size, 64), VReg_64, 1546 !if(!eq(VT.Size, 48), VReg_64, 1547 !if(!eq(VT.Size, 16), VGPR_32_Lo128, 1548 VGPR_32))))); 1549} 1550 1551class getSDWASrcForVT <ValueType VT> { 1552 bit isFP = isFloatType<VT>.ret; 1553 RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32); 1554 RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32); 1555 RegisterOperand ret = !if(isFP, retFlt, retInt); 1556} 1557 1558// Returns the register class to use for sources of VOP3 instructions for the 1559// given VT. 1560class getVOP3SrcForVT<ValueType VT> { 1561 bit isFP = isFloatType<VT>.ret; 1562 RegisterOperand ret = 1563 !if(!eq(VT.Size, 128), 1564 VRegSrc_128, 1565 !if(!eq(VT.Size, 64), 1566 !if(isFP, 1567 !if(!eq(VT.Value, v2f32.Value), 1568 VSrc_v2f32, 1569 VSrc_f64), 1570 !if(!eq(VT.Value, v2i32.Value), 1571 VSrc_v2b32, 1572 VSrc_b64)), 1573 !if(!eq(VT.Value, i1.Value), 1574 SSrc_i1, 1575 !if(isFP, 1576 !if(!eq(VT.Value, f16.Value), 1577 VSrc_f16, 1578 !if(!eq(VT.Value, v2f16.Value), 1579 VSrc_v2f16, 1580 !if(!eq(VT.Value, v4f16.Value), 1581 AVSrc_64, 1582 VSrc_f32 1583 ) 1584 ) 1585 ), 1586 !if(!eq(VT.Value, i16.Value), 1587 VSrc_b16, 1588 !if(!eq(VT.Value, v2i16.Value), 1589 VSrc_v2b16, 1590 VSrc_b32 1591 ) 1592 ) 1593 ) 1594 ) 1595 ) 1596 ); 1597} 1598 1599// Src2 of VOP3 DPP instructions cannot be a literal 1600class getVOP3DPPSrcForVT<ValueType VT> { 1601 bit isFP = isFloatType<VT>.ret; 1602 RegisterOperand ret = 1603 !if (!eq(VT.Value, i1.Value), SSrc_i1, 1604 !if (isFP, 1605 !if (!eq(VT.Value, f16.Value), VCSrc_f16, 1606 !if (!eq(VT.Value, v2f16.Value), VCSrc_v2f16, VCSrc_f32)), 1607 !if (!eq(VT.Value, i16.Value), VCSrc_b16, 1608 !if (!eq(VT.Value, v2i16.Value), VCSrc_v2b16, 1609 VCSrc_b32)))); 1610} 1611 1612// Float or packed int 1613class isModifierType<ValueType SrcVT> { 1614 bit ret = !or(!eq(SrcVT.Value, f16.Value), 1615 !eq(SrcVT.Value, f32.Value), 1616 !eq(SrcVT.Value, f64.Value), 1617 !eq(SrcVT.Value, v2f16.Value), 1618 !eq(SrcVT.Value, v2i16.Value), 1619 !eq(SrcVT.Value, v2f32.Value), 1620 !eq(SrcVT.Value, v2i32.Value), 1621 !eq(SrcVT.Value, v4f16.Value), 1622 !eq(SrcVT.Value, v4i16.Value), 1623 !eq(SrcVT.Value, v4f32.Value), 1624 !eq(SrcVT.Value, v4i32.Value), 1625 !eq(SrcVT.Value, v8f16.Value), 1626 !eq(SrcVT.Value, v8i16.Value), 1627 !eq(SrcVT.Value, v8f32.Value), 1628 !eq(SrcVT.Value, v8i32.Value), 1629 !eq(SrcVT.Value, v16f16.Value), 1630 !eq(SrcVT.Value, v16i16.Value)); 1631} 1632 1633// Return type of input modifiers operand for specified input operand 1634class getSrcMod <ValueType VT> { 1635 bit isFP = isFloatType<VT>.ret; 1636 bit isPacked = isPackedType<VT>.ret; 1637 Operand ret = !if(!eq(VT.Size, 64), 1638 !if(isFP, FP64InputMods, Int64InputMods), 1639 !if(isFP, 1640 !if(!eq(VT.Value, f16.Value), 1641 FP16InputMods, 1642 FP32InputMods 1643 ), 1644 Int32InputMods) 1645 ); 1646} 1647 1648class getOpSelMod <ValueType VT> { 1649 Operand ret = !if(!eq(VT.Value, f16.Value), FP16InputMods, IntOpSelMods); 1650} 1651 1652// Return type of input modifiers operand specified input operand for DPP 1653class getSrcModDPP <ValueType VT> { 1654 bit isFP = isFloatType<VT>.ret; 1655 Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods); 1656} 1657 1658class getSrcModDPP_t16 <ValueType VT> { 1659 bit isFP = isFloatType<VT>.ret; 1660 Operand ret = 1661 !if (isFP, 1662 !if (!eq(VT.Value, f16.Value), FPT16VRegInputMods, 1663 FPVRegInputMods), 1664 !if (!eq(VT.Value, i16.Value), IntT16VRegInputMods, 1665 IntVRegInputMods)); 1666} 1667 1668// Return type of input modifiers operand for specified input operand for DPP 1669class getSrcModVOP3DPP <ValueType VT> { 1670 bit isFP = isFloatType<VT>.ret; 1671 bit isPacked = isPackedType<VT>.ret; 1672 Operand ret = 1673 !if (isFP, 1674 !if (!eq(VT.Value, f16.Value), FP16VCSrcInputMods, 1675 FP32VCSrcInputMods), 1676 Int32VCSrcInputMods); 1677} 1678 1679// Return type of input modifiers operand specified input operand for SDWA 1680class getSrcModSDWA <ValueType VT> { 1681 Operand ret = !if(!eq(VT.Value, f16.Value), FP16SDWAInputMods, 1682 !if(!eq(VT.Value, f32.Value), FP32SDWAInputMods, 1683 !if(!eq(VT.Value, i16.Value), Int16SDWAInputMods, 1684 Int32SDWAInputMods))); 1685} 1686 1687// Returns the input arguments for VOP[12C] instructions for the given SrcVT. 1688class getIns32 <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs> { 1689 dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0), // VOP1 1690 !if(!eq(NumSrcArgs, 2), (ins Src0RC:$src0, Src1RC:$src1), // VOP2 1691 (ins))); 1692} 1693 1694// Returns the input arguments for VOP3 instructions for the given SrcVT. 1695class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC, 1696 RegisterOperand Src2RC, int NumSrcArgs, 1697 bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod, 1698 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { 1699 1700 dag ret = 1701 !if (!eq(NumSrcArgs, 0), 1702 // VOP1 without input operands (V_NOP, V_CLREXCP) 1703 (ins), 1704 /* else */ 1705 !if (!eq(NumSrcArgs, 1), 1706 !if (HasModifiers, 1707 // VOP1 with modifiers 1708 !if(HasOMod, 1709 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1710 clampmod0:$clamp, omod0:$omod), 1711 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1712 clampmod0:$clamp)) 1713 /* else */, 1714 // VOP1 without modifiers 1715 !if (HasClamp, 1716 (ins Src0RC:$src0, clampmod0:$clamp), 1717 (ins Src0RC:$src0)) 1718 /* endif */ ), 1719 !if (!eq(NumSrcArgs, 2), 1720 !if (HasModifiers, 1721 // VOP 2 with modifiers 1722 !if(HasOMod, 1723 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1724 Src1Mod:$src1_modifiers, Src1RC:$src1, 1725 clampmod0:$clamp, omod0:$omod), 1726 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1727 Src1Mod:$src1_modifiers, Src1RC:$src1, 1728 clampmod0:$clamp)) 1729 /* else */, 1730 // VOP2 without modifiers 1731 !if (HasClamp, 1732 (ins Src0RC:$src0, Src1RC:$src1, clampmod0:$clamp), 1733 (ins Src0RC:$src0, Src1RC:$src1)) 1734 1735 /* endif */ ) 1736 /* NumSrcArgs == 3 */, 1737 !if (HasModifiers, 1738 !if (HasSrc2Mods, 1739 // VOP3 with modifiers 1740 !if (HasOMod, 1741 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1742 Src1Mod:$src1_modifiers, Src1RC:$src1, 1743 Src2Mod:$src2_modifiers, Src2RC:$src2, 1744 clampmod0:$clamp, omod0:$omod), 1745 !if (HasClamp, 1746 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1747 Src1Mod:$src1_modifiers, Src1RC:$src1, 1748 Src2Mod:$src2_modifiers, Src2RC:$src2, 1749 clampmod0:$clamp), 1750 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1751 Src1Mod:$src1_modifiers, Src1RC:$src1, 1752 Src2Mod:$src2_modifiers, Src2RC:$src2))), 1753 // VOP3 with modifiers except src2 1754 !if (HasOMod, 1755 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1756 Src1Mod:$src1_modifiers, Src1RC:$src1, 1757 Src2RC:$src2, clampmod0:$clamp, omod0:$omod), 1758 !if (HasClamp, 1759 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1760 Src1Mod:$src1_modifiers, Src1RC:$src1, 1761 Src2RC:$src2, clampmod0:$clamp), 1762 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1763 Src1Mod:$src1_modifiers, Src1RC:$src1, 1764 Src2RC:$src2)))) 1765 /* else */, 1766 // VOP3 without modifiers 1767 !if (HasClamp, 1768 (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2, clampmod0:$clamp), 1769 (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2)) 1770 /* endif */ )))); 1771} 1772 1773class getInsVOP3Base<RegisterOperand Src0RC, RegisterOperand Src1RC, 1774 RegisterOperand Src2RC, int NumSrcArgs, 1775 bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod, 1776 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel, 1777 bit IsVOP3P> { 1778 // getInst64 handles clamp and omod. implicit mutex between vop3p and omod 1779 dag base = getIns64 <Src0RC, Src1RC, Src2RC, NumSrcArgs, 1780 HasClamp, HasModifiers, HasSrc2Mods, HasOMod, 1781 Src0Mod, Src1Mod, Src2Mod>.ret; 1782 dag opsel = (ins op_sel0:$op_sel); 1783 dag vop3pOpsel = (ins op_sel_hi0:$op_sel_hi); 1784 dag vop3pFields = !con(!if(HasOpSel, vop3pOpsel, (ins)), (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi)); 1785 1786 dag ret = !con(base, 1787 !if(HasOpSel, opsel,(ins)), 1788 !if(IsVOP3P, vop3pFields,(ins))); 1789} 1790 1791class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC, 1792 RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, bit HasOpSel, 1793 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { 1794 dag ret = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs, 1795 HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, 1796 0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod, 1797 HasOpSel, 1/*IsVOP3P*/>.ret; 1798} 1799 1800class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC, 1801 RegisterOperand Src2RC, int NumSrcArgs, 1802 bit HasClamp, bit HasOMod, 1803 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { 1804 dag ret = getInsVOP3Base<Src0RC, Src1RC, 1805 Src2RC, NumSrcArgs, 1806 HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, HasOMod, 1807 Src0Mod, Src1Mod, Src2Mod, 1/*HasOpSel*/, 0>.ret; 1808} 1809 1810class getInsDPPBase <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, 1811 RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, 1812 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld> { 1813 1814 dag ret = !if(!eq(NumSrcArgs, 0), 1815 // VOP1 without input operands (V_NOP) 1816 (ins ), 1817 !con( 1818 !if(HasOld ,(ins OldRC:$old), (ins)), 1819 !if (!eq(NumSrcArgs, 1), 1820 !if (HasModifiers, 1821 // VOP1_DPP with modifiers 1822 (ins Src0Mod:$src0_modifiers, Src0RC:$src0) 1823 /* else */, 1824 // VOP1_DPP without modifiers 1825 (ins Src0RC:$src0) 1826 /* endif */), 1827 !if (!eq(NumSrcArgs, 2), 1828 !if (HasModifiers, 1829 // VOP2_DPP with modifiers 1830 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1831 Src1Mod:$src1_modifiers, Src1RC:$src1) 1832 /* else */, 1833 // VOP2_DPP without modifiers 1834 (ins Src0RC:$src0, Src1RC:$src1) 1835 ) 1836 /* NumSrcArgs == 3, VOP3 */, 1837 !if (HasModifiers, 1838 // VOP3_DPP with modifiers 1839 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1840 Src1Mod:$src1_modifiers, Src1RC:$src1, 1841 Src2Mod:$src2_modifiers, Src2RC:$src2) 1842 /* else */, 1843 // VOP3_DPP without modifiers 1844 (ins Src0RC:$src0, Src1RC:$src1, 1845 Src2RC:$src2) 1846 ) 1847 ) 1848 ) 1849 ) 1850 ); 1851} 1852 1853class getInsDPP <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, 1854 RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, 1855 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> { 1856 dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs, 1857 HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret, 1858 (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 1859 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)); 1860} 1861 1862class getInsDPP16 <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, 1863 RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, 1864 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> { 1865 dag ret = !con(getInsDPP<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs, 1866 HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret, 1867 (ins FI:$fi)); 1868} 1869 1870class getInsDPP8 <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, 1871 RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, 1872 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> { 1873 dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs, 1874 HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret, 1875 (ins dpp8:$dpp8, FI:$fi)); 1876} 1877 1878class getInsVOP3DPPBase<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld> { 1879 dag old = ( ins OldRC:$old ); 1880 dag base = VOP3Base; 1881 dag ret = !con( 1882 !if(!and(HasOld,!ne(NumSrcArgs, 0)), old, (ins)), 1883 base 1884 ); 1885} 1886 1887class getInsVOP3DPP<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> { 1888 dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret, 1889 (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 1890 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)); 1891} 1892 1893class getInsVOP3DPP16<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> { 1894 dag ret = !con(getInsVOP3DPP<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret, 1895 (ins FI:$fi)); 1896} 1897 1898class getInsVOP3DPP8<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> { 1899 dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret, 1900 (ins dpp8:$dpp8, FI:$fi)); 1901} 1902 1903// Ins for SDWA 1904class getInsSDWA <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs, 1905 bit HasSDWAOMod, Operand Src0Mod, Operand Src1Mod, 1906 ValueType DstVT> { 1907 1908 dag ret = !if(!eq(NumSrcArgs, 0), 1909 // VOP1 without input operands (V_NOP) 1910 (ins), 1911 !if(!eq(NumSrcArgs, 1), 1912 // VOP1 1913 !if(!not(HasSDWAOMod), 1914 // VOP1_SDWA without omod 1915 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1916 clampmod:$clamp, 1917 dst_sel:$dst_sel, dst_unused:$dst_unused, 1918 src0_sel:$src0_sel), 1919 // VOP1_SDWA with omod 1920 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1921 clampmod:$clamp, omod:$omod, 1922 dst_sel:$dst_sel, dst_unused:$dst_unused, 1923 src0_sel:$src0_sel)), 1924 !if(!eq(NumSrcArgs, 2), 1925 !if(!eq(DstVT.Size, 1), 1926 // VOPC_SDWA 1927 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1928 Src1Mod:$src1_modifiers, Src1RC:$src1, 1929 clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel), 1930 // VOP2_SDWA 1931 !if(!not(HasSDWAOMod), 1932 // VOP2_SDWA without omod 1933 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1934 Src1Mod:$src1_modifiers, Src1RC:$src1, 1935 clampmod:$clamp, 1936 dst_sel:$dst_sel, dst_unused:$dst_unused, 1937 src0_sel:$src0_sel, src1_sel:$src1_sel), 1938 // VOP2_SDWA with omod 1939 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1940 Src1Mod:$src1_modifiers, Src1RC:$src1, 1941 clampmod:$clamp, omod:$omod, 1942 dst_sel:$dst_sel, dst_unused:$dst_unused, 1943 src0_sel:$src0_sel, src1_sel:$src1_sel))), 1944 (ins)/* endif */))); 1945} 1946 1947// Outs for DPP 1948class getOutsDPP <bit HasDst, ValueType DstVT, RegisterOperand DstRCDPP> { 1949 dag ret = !if(HasDst, 1950 !if(!eq(DstVT.Size, 1), 1951 (outs), // no dst for VOPC, we use "vcc"-token as dst in SDWA VOPC instructions 1952 (outs DstRCDPP:$vdst)), 1953 (outs)); // V_NOP 1954} 1955 1956// Outs for SDWA 1957class getOutsSDWA <bit HasDst, ValueType DstVT, RegisterOperand DstRCSDWA> { 1958 dag ret = !if(HasDst, 1959 !if(!eq(DstVT.Size, 1), 1960 (outs DstRCSDWA:$sdst), 1961 (outs DstRCSDWA:$vdst)), 1962 (outs)); // V_NOP 1963} 1964 1965// Returns the assembly string for the inputs and outputs of a VOP[12C] 1966// instruction. 1967class getAsm32 <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> { 1968 string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC 1969 string src0 = ", $src0"; 1970 string src1 = ", $src1"; 1971 string src2 = ", $src2"; 1972 string ret = !if(HasDst, dst, "") # 1973 !if(!eq(NumSrcArgs, 1), src0, "") # 1974 !if(!eq(NumSrcArgs, 2), src0#src1, "") # 1975 !if(!eq(NumSrcArgs, 3), src0#src1#src2, ""); 1976} 1977 1978class getAsmVOPDPart <int NumSrcArgs, string XorY> { 1979 string dst = "$vdst" # XorY; 1980 string src0 = ", $src0" # XorY; 1981 string src1 = ", $vsrc1" # XorY; 1982 string ret = dst # 1983 !if(!ge(NumSrcArgs, 1), src0, "") # 1984 !if(!ge(NumSrcArgs, 2), src1, ""); 1985} 1986 1987// Returns the assembly string for the inputs and outputs of a VOP3P 1988// instruction. 1989class getAsmVOP3P <int NumSrcArgs, bit HasModifiers, 1990 bit HasClamp, bit HasOpSel> { 1991 string dst = "$vdst"; 1992 string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); 1993 string src1 = !if(!eq(NumSrcArgs, 1), "", 1994 !if(!eq(NumSrcArgs, 2), " $src1", 1995 " $src1,")); 1996 string src2 = !if(!eq(NumSrcArgs, 3), " $src2", ""); 1997 1998 string mods = !if(HasModifiers, "$neg_lo$neg_hi", ""); 1999 string clamp = !if(HasClamp, "$clamp", ""); 2000 string opsel = !if(HasOpSel, "$op_sel$op_sel_hi", ""); 2001 2002 // Each modifier is printed as an array of bits for each operand, so 2003 // all operands are printed as part of src0_modifiers. 2004 string ret = dst#", "#src0#src1#src2#opsel#mods#clamp; 2005} 2006 2007class getAsmVOP3OpSel <int NumSrcArgs, 2008 bit HasClamp, 2009 bit HasOMod, 2010 bit Src0HasMods, 2011 bit Src1HasMods, 2012 bit Src2HasMods> { 2013 string dst = "$vdst"; 2014 2015 string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); 2016 string isrc1 = !if(!eq(NumSrcArgs, 1), "", 2017 !if(!eq(NumSrcArgs, 2), " $src1", 2018 " $src1,")); 2019 string isrc2 = !if(!eq(NumSrcArgs, 3), " $src2", ""); 2020 2021 string fsrc0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); 2022 string fsrc1 = !if(!eq(NumSrcArgs, 1), "", 2023 !if(!eq(NumSrcArgs, 2), " $src1_modifiers", 2024 " $src1_modifiers,")); 2025 string fsrc2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", ""); 2026 2027 string src0 = !if(Src0HasMods, fsrc0, isrc0); 2028 string src1 = !if(Src1HasMods, fsrc1, isrc1); 2029 string src2 = !if(Src2HasMods, fsrc2, isrc2); 2030 2031 string clamp = !if(HasClamp, "$clamp", ""); 2032 string omod = !if(HasOMod, "$omod", ""); 2033 string ret = dst#", "#src0#src1#src2#"$op_sel"#clamp#omod; 2034} 2035 2036class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> { 2037 string dst = !if(HasDst, 2038 !if(!eq(DstVT.Size, 1), 2039 "$sdst", 2040 "$vdst"), 2041 ""); // use $sdst for VOPC 2042 string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); 2043 string src1 = !if(!eq(NumSrcArgs, 1), "", 2044 !if(!eq(NumSrcArgs, 2), " $src1_modifiers", 2045 " $src1_modifiers,")); 2046 string args = !if(!not(HasModifiers), 2047 getAsm32<0, NumSrcArgs, DstVT>.ret, 2048 ", "#src0#src1); 2049 string ret = dst#args#" $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 2050} 2051 2052class getAsmDPP16 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> { 2053 string ret = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret#"$fi"; 2054} 2055 2056class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> 2057 : getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>{ 2058 let ret = dst#args#" $dpp8$fi"; 2059} 2060 2061class getAsmVOP3Base <int NumSrcArgs, bit HasDst, bit HasClamp, 2062 bit HasOpSel, bit HasOMod, bit IsVOP3P, 2063 bit HasModifiers, bit Src0HasMods, 2064 bit Src1HasMods, bit Src2HasMods, ValueType DstVT = i32> { 2065 string dst = !if(HasDst, 2066 !if(!eq(DstVT.Size, 1), 2067 "$sdst", 2068 "$vdst"), 2069 ""); // use $sdst for VOPC 2070 string src0nomods = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); 2071 string src1nomods = !if(!eq(NumSrcArgs, 1), "", 2072 !if(!eq(NumSrcArgs, 2), " $src1", 2073 " $src1,")); 2074 string src2nomods = !if(!eq(NumSrcArgs, 3), " $src2", ""); 2075 2076 string src0mods = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); 2077 string src1mods = !if(!eq(NumSrcArgs, 1), "", 2078 !if(!eq(NumSrcArgs, 2), " $src1_modifiers", 2079 " $src1_modifiers,")); 2080 string src2mods = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", ""); 2081 2082 string src0 = !if(Src0HasMods, src0mods, src0nomods); 2083 string src1 = !if(Src1HasMods, src1mods, src1nomods); 2084 string src2 = !if(Src2HasMods, src2mods, src2nomods); 2085 string opsel = !if(HasOpSel, "$op_sel", ""); 2086 string 3PMods = !if(IsVOP3P, 2087 !if(HasOpSel, "$op_sel_hi", "") 2088 #!if(HasModifiers, "$neg_lo$neg_hi", ""), 2089 ""); 2090 string clamp = !if(HasClamp, "$clamp", ""); 2091 string omod = !if(HasOMod, "$omod", ""); 2092 2093 string ret = dst#!if(!gt(NumSrcArgs,0),", "#src0#src1#src2#opsel#3PMods#clamp#omod, ""); 2094 2095} 2096 2097class getAsmVOP3DPP<string base> { 2098 string ret = base # " $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 2099} 2100 2101class getAsmVOP3DPP16<string base> { 2102 string ret = getAsmVOP3DPP<base>.ret # "$fi"; 2103} 2104 2105class getAsmVOP3DPP8<string base> { 2106 string ret = base # " $dpp8$fi"; 2107} 2108 2109 2110class getAsmSDWA <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> { 2111 string dst = !if(HasDst, 2112 !if(!eq(DstVT.Size, 1), 2113 " vcc", // use vcc token as dst for VOPC instructions 2114 "$vdst"), 2115 ""); 2116 string src0 = "$src0_modifiers"; 2117 string src1 = "$src1_modifiers"; 2118 string args = !if(!eq(NumSrcArgs, 0), 2119 "", 2120 !if(!eq(NumSrcArgs, 1), 2121 ", "#src0#"$clamp", 2122 ", "#src0#", "#src1#"$clamp" 2123 ) 2124 ); 2125 string sdwa = !if(!eq(NumSrcArgs, 0), 2126 "", 2127 !if(!eq(NumSrcArgs, 1), 2128 " $dst_sel $dst_unused $src0_sel", 2129 !if(!eq(DstVT.Size, 1), 2130 " $src0_sel $src1_sel", // No dst_sel and dst_unused for VOPC 2131 " $dst_sel $dst_unused $src0_sel $src1_sel" 2132 ) 2133 ) 2134 ); 2135 string ret = dst#args#sdwa; 2136} 2137 2138class getAsmSDWA9 <bit HasDst, bit HasOMod, int NumSrcArgs, 2139 ValueType DstVT = i32> { 2140 string dst = !if(HasDst, 2141 !if(!eq(DstVT.Size, 1), 2142 "$sdst", // VOPC 2143 "$vdst"), // VOP1/2 2144 ""); 2145 string src0 = "$src0_modifiers"; 2146 string src1 = "$src1_modifiers"; 2147 string out_mods = !if(!not(HasOMod), "$clamp", "$clamp$omod"); 2148 string args = !if(!eq(NumSrcArgs, 0), "", 2149 !if(!eq(NumSrcArgs, 1), 2150 ", "#src0, 2151 ", "#src0#", "#src1 2152 ) 2153 ); 2154 string sdwa = !if(!eq(NumSrcArgs, 0), "", 2155 !if(!eq(NumSrcArgs, 1), 2156 out_mods#" $dst_sel $dst_unused $src0_sel", 2157 !if(!eq(DstVT.Size, 1), 2158 " $src0_sel $src1_sel", // No dst_sel, dst_unused and output modifiers for VOPC 2159 out_mods#" $dst_sel $dst_unused $src0_sel $src1_sel" 2160 ) 2161 ) 2162 ); 2163 string ret = dst#args#sdwa; 2164} 2165 2166class getHas64BitOps <int NumSrcArgs, ValueType DstVT, ValueType Src0VT, 2167 ValueType Src1VT> { 2168 bit ret = !if(!eq(NumSrcArgs, 3), 2169 0, 2170 !if(!eq(DstVT.Size, 64), 2171 1, 2172 !if(!eq(Src0VT.Size, 64), 2173 1, 2174 !if(!eq(Src1VT.Size, 64), 2175 1, 2176 0 2177 ) 2178 ) 2179 ) 2180 ); 2181} 2182 2183class getHasSDWA <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2184 ValueType Src1VT = i32> { 2185 bit ret = !if(!eq(NumSrcArgs, 3), 2186 0, // NumSrcArgs == 3 - No SDWA for VOP3 2187 !if(!eq(DstVT.Size, 64), 2188 0, // 64-bit dst - No SDWA for 64-bit operands 2189 !if(!eq(Src0VT.Size, 64), 2190 0, // 64-bit src0 2191 !if(!eq(Src1VT.Size, 64), 2192 0, // 64-bit src2 2193 1 2194 ) 2195 ) 2196 ) 2197 ); 2198} 2199 2200class getHasDPP <int NumSrcArgs> { 2201 bit ret = !if(!eq(NumSrcArgs, 3), 2202 0, // NumSrcArgs == 3 - No DPP for VOP3 2203 1); 2204} 2205 2206class getHasExt32BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2207 ValueType Src1VT = i32> { 2208 bit ret = !and(getHasDPP<NumSrcArgs>.ret, 2209 !not(getHas64BitOps<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret)); 2210} 2211 2212class getHasExt64BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2213 ValueType Src1VT = i32> { 2214 bit ret = !and(getHasDPP<NumSrcArgs>.ret, 2215 getHas64BitOps<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret); 2216} 2217 2218// Function that checks if instruction supports DPP and SDWA 2219class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2220 ValueType Src1VT = i32> { 2221 bit ret = !or(getHasDPP<NumSrcArgs>.ret, 2222 getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret); 2223} 2224 2225// Return an AGPR+VGPR operand class for the given VGPR register class. 2226class getLdStRegisterOperand<RegisterClass RC> { 2227 RegisterOperand ret = 2228 !if(!eq(RC.Size, 32), AVLdSt_32, 2229 !if(!eq(RC.Size, 64), AVLdSt_64, 2230 !if(!eq(RC.Size, 96), AVLdSt_96, 2231 !if(!eq(RC.Size, 128), AVLdSt_128, 2232 !if(!eq(RC.Size, 160), AVLdSt_160, 2233 RegisterOperand<VReg_1> // invalid register 2234 ))))); 2235} 2236 2237class getHasVOP3DPP <ValueType DstVT = i32, ValueType Src0VT = i32, 2238 ValueType Src1VT = i32, ValueType Src2VT = i32> { 2239 bit ret = !if(!eq(DstVT.Size, 64), 2240 0, // 64-bit dst No DPP for 64-bit operands 2241 !if(!eq(Src0VT.Size, 64), 2242 0, // 64-bit src0 2243 !if(!eq(Src1VT.Size, 64), 2244 0, // 64-bit src1 2245 !if(!eq(Src2VT.Size, 64), 2246 0, // 64-bit src2 2247 1 2248 ) 2249 ) 2250 ) 2251 ); 2252} 2253 2254 2255def PatGenMode { 2256 int NoPattern = 0; 2257 int Pattern = 1; 2258} 2259 2260class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> { 2261 2262 field list<ValueType> ArgVT = _ArgVT; 2263 field bit EnableClamp = _EnableClamp; 2264 field bit IsTrue16 = 0; 2265 2266 field ValueType DstVT = ArgVT[0]; 2267 field ValueType Src0VT = ArgVT[1]; 2268 field ValueType Src1VT = ArgVT[2]; 2269 field ValueType Src2VT = ArgVT[3]; 2270 field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret; 2271 field RegisterOperand DstRCDPP = DstRC; 2272 field RegisterOperand DstRC64 = DstRC; 2273 field RegisterOperand DstRCVOP3DPP = DstRC64; 2274 field RegisterOperand DstRCSDWA = getSDWADstForVT<DstVT>.ret; 2275 field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT, IsTrue16>.ret; 2276 field RegisterOperand Src1RC32 = RegisterOperand<getVregSrcForVT<Src1VT>.ret>; 2277 field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret; 2278 field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret; 2279 field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret; 2280 field RegisterClass Src0DPP = getVregSrcForVT<Src0VT>.ret; 2281 field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret; 2282 field RegisterClass Src2DPP = getVregSrcForVT<Src2VT>.ret; 2283 field RegisterOperand Src0VOP3DPP = VGPRSrc_32; 2284 field RegisterOperand Src1VOP3DPP = VRegSrc_32; 2285 field RegisterOperand Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT>.ret; 2286 field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret; 2287 field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret; 2288 field Operand Src0Mod = getSrcMod<Src0VT>.ret; 2289 field Operand Src1Mod = getSrcMod<Src1VT>.ret; 2290 field Operand Src2Mod = getSrcMod<Src2VT>.ret; 2291 field Operand Src0ModDPP = getSrcModDPP<Src0VT>.ret; 2292 field Operand Src1ModDPP = getSrcModDPP<Src1VT>.ret; 2293 field Operand Src2ModDPP = getSrcModDPP<Src2VT>.ret; 2294 field Operand Src0ModVOP3DPP = getSrcModDPP<Src0VT>.ret; 2295 field Operand Src1ModVOP3DPP = getSrcModDPP<Src1VT>.ret; 2296 field Operand Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT>.ret; 2297 field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret; 2298 field Operand Src1ModSDWA = getSrcModSDWA<Src1VT>.ret; 2299 2300 2301 field bit IsMAI = 0; 2302 field bit IsVOP3P = 0; 2303 field bit IsDOT = 0; 2304 field bit IsSingle = 0; 2305 field bit IsWMMA = 0; 2306 2307 field bit HasDst = !ne(DstVT.Value, untyped.Value); 2308 field bit HasDst32 = HasDst; 2309 field bit EmitDst = HasDst; // force dst encoding, see v_movreld_b32 special case 2310 field bit EmitDstSel = EmitDst; 2311 field int NumSrcArgs = getNumSrcArgs<Src0VT, Src1VT, Src2VT>.ret; 2312 field bit HasSrc0 = !ne(Src0VT.Value, untyped.Value); 2313 field bit HasSrc1 = !ne(Src1VT.Value, untyped.Value); 2314 field bit HasSrc2 = !ne(Src2VT.Value, untyped.Value); 2315 2316 field bit HasSrc0FloatMods = isFloatType<Src0VT>.ret; 2317 field bit HasSrc1FloatMods = isFloatType<Src1VT>.ret; 2318 field bit HasSrc2FloatMods = isFloatType<Src2VT>.ret; 2319 2320 field bit HasSrc0IntMods = isIntType<Src0VT>.ret; 2321 field bit HasSrc1IntMods = isIntType<Src1VT>.ret; 2322 field bit HasSrc2IntMods = isIntType<Src2VT>.ret; 2323 2324 field bit HasClamp = !or(isModifierType<Src0VT>.ret, EnableClamp); 2325 field bit HasSDWAClamp = EmitDst; 2326 field bit HasFPClamp = !and(isFloatType<DstVT>.ret, HasClamp); 2327 field bit HasIntClamp = !if(isFloatType<DstVT>.ret, 0, HasClamp); 2328 field bit HasClampLo = HasClamp; 2329 field bit HasClampHi = !and(isPackedType<DstVT>.ret, HasClamp); 2330 field bit HasHigh = 0; 2331 2332 field bit IsPacked = isPackedType<Src0VT>.ret; 2333 field bit HasOpSel = IsPacked; 2334 field bit HasOMod = !if(IsVOP3P, 0, isFloatType<DstVT>.ret); 2335 field bit HasSDWAOMod = isFloatType<DstVT>.ret; 2336 2337 field bit HasModifiers = !or(isModifierType<Src0VT>.ret, 2338 isModifierType<Src1VT>.ret, 2339 isModifierType<Src2VT>.ret, 2340 HasOMod); 2341 2342 field bit HasSrc0Mods = HasModifiers; 2343 field bit HasSrc1Mods = !if(HasModifiers, !or(HasSrc1FloatMods, HasSrc1IntMods), 0); 2344 field bit HasSrc2Mods = !if(HasModifiers, !or(HasSrc2FloatMods, HasSrc2IntMods), 0); 2345 2346 field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2347 field bit HasExtVOP3DPP = getHasVOP3DPP<DstVT, Src0VT, Src1VT, Src2VT>.ret; 2348 field bit HasExtDPP = !or(getHasDPP<NumSrcArgs>.ret, HasExtVOP3DPP); 2349 field bit HasExt32BitDPP = getHasExt32BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2350 field bit HasExt64BitDPP = getHasExt64BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2351 field bit HasExtSDWA = getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2352 field bit HasExtSDWA9 = HasExtSDWA; 2353 field int NeedPatGen = PatGenMode.NoPattern; 2354 2355 field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods); 2356 field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods); 2357 field Operand Src2PackedMod = !if(HasSrc2FloatMods, PackedF16InputMods, PackedI16InputMods); 2358 2359 field dag Outs = !if(HasDst,(outs DstRC:$vdst),(outs)); 2360 2361 // VOP3b instructions are a special case with a second explicit 2362 // output. This is manually overridden for them. 2363 field dag Outs32 = Outs; 2364 field dag Outs64 = !if(HasDst,(outs DstRC64:$vdst),(outs)); 2365 field dag OutsDPP = getOutsDPP<HasDst, DstVT, DstRCDPP>.ret; 2366 field dag OutsDPP8 = OutsDPP; 2367 field dag OutsVOP3DPP = getOutsDPP<HasDst, DstVT, DstRCVOP3DPP>.ret; 2368 field dag OutsVOP3DPP8 = OutsVOP3DPP; 2369 field dag OutsSDWA = getOutsSDWA<HasDst, DstVT, DstRCSDWA>.ret; 2370 2371 field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret; 2372 field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs, 2373 HasIntClamp, HasModifiers, HasSrc2Mods, 2374 HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret; 2375 field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64, 2376 NumSrcArgs, HasClamp, HasOpSel, 2377 Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret; 2378 field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64, 2379 NumSrcArgs, HasClamp, HasOMod, 2380 getOpSelMod<Src0VT>.ret, 2381 getOpSelMod<Src1VT>.ret, 2382 getOpSelMod<Src2VT>.ret>.ret; 2383 field dag InsDPP = !if(HasExtDPP, 2384 getInsDPP<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs, 2385 HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret, 2386 (ins)); 2387 field dag InsDPP16 = getInsDPP16<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs, 2388 HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret; 2389 field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, 2390 NumSrcArgs, HasModifiers, 2391 Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret; 2392 field dag InsVOP3Base = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, 2393 Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod, 2394 Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel, IsVOP3P>.ret; 2395 field dag InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret; 2396 field dag InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret; 2397 field dag InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret; 2398 field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs, 2399 HasSDWAOMod, Src0ModSDWA, Src1ModSDWA, 2400 DstVT>.ret; 2401 field dag InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X); 2402 // It is a slight misnomer to use the deferred f32 operand type for non-float 2403 // operands, but this operand type will only be used if the other dual 2404 // component is FMAAK or FMAMK 2405 field dag InsVOPDXDeferred = (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X, VGPR_32:$vsrc1X); 2406 field dag InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y); 2407 field dag InsVOPDYDeferred = (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y, VGPR_32:$vsrc1Y); 2408 2409 2410 field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret; 2411 field string AsmDPP = !if(HasExtDPP, 2412 getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret, ""); 2413 field string AsmDPP16 = getAsmDPP16<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret; 2414 // DPP8 encoding has no fields for modifiers, and it is enforced by setting 2415 // the asm operand name via this HasModifiers flag 2416 field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0 /*HasModifiers*/, DstVT>.ret; 2417 field string AsmVOP3Base = getAsmVOP3Base<NumSrcArgs, HasDst, HasClamp, 2418 HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasModifiers, HasModifiers, 2419 HasModifiers, DstVT>.ret; 2420 field string Asm64 = AsmVOP3Base; 2421 field string AsmVOP3P = getAsmVOP3P<NumSrcArgs, HasModifiers, HasClamp, HasOpSel>.ret; 2422 field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs, 2423 HasClamp, 2424 HasOMod, 2425 HasSrc0FloatMods, 2426 HasSrc1FloatMods, 2427 HasSrc2FloatMods>.ret; 2428 field string AsmVOP3DPP = getAsmVOP3DPP<AsmVOP3Base>.ret; 2429 field string AsmVOP3DPP16 = getAsmVOP3DPP16<AsmVOP3Base>.ret; 2430 field string AsmVOP3DPP8 = getAsmVOP3DPP8<AsmVOP3Base>.ret; 2431 field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret; 2432 field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret; 2433 field string AsmVOPDX = getAsmVOPDPart<NumSrcArgs, "X">.ret; 2434 field string AsmVOPDY = getAsmVOPDPart<NumSrcArgs, "Y">.ret; 2435 field string TieRegDPP = "$old"; 2436} 2437 2438 class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> { 2439 let HasExt = 0; 2440 let HasExtDPP = 0; 2441 let HasExtVOP3DPP = 0; 2442 let HasExt32BitDPP = 0; 2443 let HasExt64BitDPP = 0; 2444 let HasExtSDWA = 0; 2445 let HasExtSDWA9 = 0; 2446} 2447 2448class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.NoPattern> : VOPProfile <p.ArgVT> { 2449 let NeedPatGen = mode; 2450} 2451 2452// VOPC_Profile_t16, VOPC_NoSdst_Profile_t16, VOPC_Class_Profile_t16, 2453// VOPC_Class_NoSdst_Profile_t16, and VOP_MAC_F16_t16 do not inherit from this 2454// class, so copy changes to this class in those profiles 2455class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> { 2456 let IsTrue16 = 1; 2457 // Most DstVT are 16-bit, but not all 2458 let DstRC = getVALUDstForVT_t16<DstVT>.ret; 2459 let DstRC64 = getVALUDstForVT<DstVT>.ret; 2460 let Src1RC32 = RegisterOperand<getVregSrcForVT_t16<Src1VT>.ret>; 2461 let Src0DPP = getVregSrcForVT_t16<Src0VT>.ret; 2462 let Src1DPP = getVregSrcForVT_t16<Src1VT>.ret; 2463 let Src2DPP = getVregSrcForVT_t16<Src2VT>.ret; 2464 let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret; 2465 let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret; 2466 let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret; 2467} 2468 2469def VOP_F16_F16 : VOPProfile<[f16, f16, untyped, untyped]>; 2470def VOP_F16_I16 : VOPProfile <[f16, i16, untyped, untyped]>; 2471def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>; 2472def VOP_I16_I16 : VOPProfile <[i16, i16, untyped, untyped]>; 2473 2474def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>; 2475def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>; 2476def VOP_F16_F16_I32 : VOPProfile <[f16, f16, i32, untyped]>; 2477def VOP_I16_I16_I16 : VOPProfile <[i16, i16, i16, untyped]>; 2478def VOP_I16_I16_I16_ARITH : VOPProfile <[i16, i16, i16, untyped], /*EnableClamp=*/1>; 2479 2480def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>; 2481def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>; 2482 2483def VOP_I32_I16_I16_I32 : VOPProfile <[i32, i16, i16, i32, untyped]>; 2484def VOP_I32_I16 : VOPProfile <[i32, i16, untyped, untyped]>; 2485def VOP_I16_I32 : VOPProfile <[i16, i32, untyped, untyped]>; 2486 2487def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>; 2488def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>; 2489def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>; 2490 2491def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>; 2492def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>; 2493def VOP_V2I16_F32_F32 : VOPProfile <[v2i16, f32, f32, untyped]>; 2494def VOP_V2I16_I32_I32 : VOPProfile <[v2i16, i32, i32, untyped]>; 2495 2496def VOP_F16_V2F16_V2F16_F16 : VOPProfile <[f16, v2f16, v2f16, f16]>; 2497def VOP_I16_V2I16_V2I16_I16 : VOPProfile <[i16, v2i16, v2i16, i16]>; 2498def VOP_F32_V2I16_V2I16_F32 : VOPProfile <[f32, v2i16, v2i16, f32]>; 2499 2500def VOP_F32_V2F16_V2F16_V2F16 : VOPProfile <[f32, v2f16, v2f16, v2f16]>; 2501 2502def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>; 2503 2504def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>; 2505def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>; 2506def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>; 2507def VOP_F64_F32 : VOPProfile <[f64, f32, untyped, untyped]>; 2508def VOP_F64_F64 : VOPProfile <[f64, f64, untyped, untyped]>; 2509def VOP_F64_I32 : VOPProfile <[f64, i32, untyped, untyped]>; 2510def VOP_I32_F32 : VOPProfile <[i32, f32, untyped, untyped]>; 2511def VOP_I32_F64 : VOPProfile <[i32, f64, untyped, untyped]>; 2512def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>; 2513def VOP_F16_F32 : VOPProfile <[f16, f32, untyped, untyped]>; 2514def VOP_F32_F16 : VOPProfile <[f32, f16, untyped, untyped]>; 2515def VOP_I64_I64 : VOPProfile <[i64, i64, untyped, untyped]>; 2516 2517def VOP_F32_F32_F16 : VOPProfile <[f32, f32, f16, untyped]>; 2518def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>; 2519def VOP_F32_F32_I32 : VOPProfile <[f32, f32, i32, untyped]>; 2520def VOP_F64_F64_F64 : VOPProfile <[f64, f64, f64, untyped]>; 2521def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>; 2522def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>; 2523def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>; 2524def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>; 2525def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], /*EnableClamp=*/1>; 2526def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>; 2527def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>; 2528 2529def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>; 2530def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>; 2531def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>; 2532 2533def VOP_F16_F32_F16_F32 : VOPProfile <[f16, f32, f16, f32]>; 2534def VOP_F32_F32_F16_F16 : VOPProfile <[f32, f32, f16, f16]>; 2535def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>; 2536def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>; 2537def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>; 2538def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>; 2539def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>; 2540def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>; 2541def VOP_V4I32_I64_I32_V4I32 : VOPProfile <[v4i32, i64, i32, v4i32]>; 2542 2543def VOP_F32_V2F16_V2F16_F32 : VOPProfile <[f32, v2f16, v2f16, f32]>; 2544def VOP_I32_V2I16_V2I16_I32 : VOPProfile <[i32, v2i16, v2i16, i32]>; 2545 2546def VOP_V4F32_F32_F32_V4F32 : VOPProfile <[v4f32, f32, f32, v4f32]>; 2547def VOP_V16F32_F32_F32_V16F32 : VOPProfile <[v16f32, f32, f32, v16f32]>; 2548def VOP_V32F32_F32_F32_V32F32 : VOPProfile <[v32f32, f32, f32, v32f32]>; 2549def VOP_V4F32_V4F16_V4F16_V4F32 : VOPProfile <[v4f32, v4f16, v4f16, v4f32]>; 2550def VOP_V16F32_V4F16_V4F16_V16F32 : VOPProfile <[v16f32, v4f16, v4f16, v16f32]>; 2551def VOP_V32F32_V4F16_V4F16_V32F32 : VOPProfile <[v32f32, v4f16, v4f16, v32f32]>; 2552def VOP_V4F32_V2I16_V2I16_V4F32 : VOPProfile <[v4f32, v2i16, v2i16, v4f32]>; 2553def VOP_V16F32_V2I16_V2I16_V16F32 : VOPProfile <[v16f32, v2i16, v2i16, v16f32]>; 2554def VOP_V32F32_V2I16_V2I16_V32F32 : VOPProfile <[v32f32, v2i16, v2i16, v32f32]>; 2555def VOP_V4I32_I32_I32_V4I32 : VOPProfile <[v4i32, i32, i32, v4i32]>; 2556def VOP_V16I32_I32_I32_V16I32 : VOPProfile <[v16i32, i32, i32, v16i32]>; 2557def VOP_V32I32_I32_I32_V32I32 : VOPProfile <[v32i32, i32, i32, v32i32]>; 2558 2559def VOP_V4F64_F64_F64_V4F64 : VOPProfile <[v4f64, f64, f64, v4f64]>; 2560def VOP_V1F64_F64_F64_V1F64 : VOPProfile <[v1f64, f64, f64, v1f64]>; 2561 2562def VOP_V2F32_V2F32_V2F32_V2F32 : VOPProfile <[v2f32, v2f32, v2f32, v2f32]>; 2563def VOP_V2F32_V2F32_V2F32 : VOPProfile <[v2f32, v2f32, v2f32, untyped]>; 2564def VOP_V2I32_V2I32_V2I32 : VOPProfile <[v2i32, v2i32, v2i32, untyped]>; 2565def VOP_V4F32_V4I16_V4I16_V4F32 : VOPProfile <[v4f32, v4i16, v4i16, v4f32]>; 2566def VOP_V16F32_V4I16_V4I16_V16F32 : VOPProfile <[v16f32, v4i16, v4i16, v16f32]>; 2567def VOP_V32F32_V4I16_V4I16_V32F32 : VOPProfile <[v32f32, v4i16, v4i16, v32f32]>; 2568 2569def VOP_V4I32_I64_I64_V4I32 : VOPProfile <[v4i32, i64, i64, v4i32]>; 2570def VOP_V16I32_I64_I64_V16I32 : VOPProfile <[v16i32, i64, i64, v16i32]>; 2571def VOP_V4F32_V2F32_V2F32_V4F32 : VOPProfile <[v4f32, v2f32, v2f32, v4f32]>; 2572def VOP_V16F32_V2F32_V2F32_V16F32 : VOPProfile <[v16f32, v2f32, v2f32, v16f32]>; 2573def VOP_V4F32_I64_I64_V4F32 : VOPProfile <[v4f32, i64, i64, v4f32]>; 2574def VOP_V16F32_I64_I64_V16F32 : VOPProfile <[v16f32, i64, i64, v16f32]>; 2575 2576def VOP_V4F32_V4F16_V8F16_I32 : VOPProfile <[v4f32, v4f16, v8f16, i32]>; 2577def VOP_V16F32_V4F16_V8F16_I32 : VOPProfile <[v16f32, v4f16, v8f16, i32]>; 2578def VOP_V4F32_V4I16_V8I16_I32 : VOPProfile <[v4f32, v4i16, v8i16, i32]>; 2579def VOP_V16F32_V4I16_V8I16_I32 : VOPProfile <[v16f32, v4i16, v8i16, i32]>; 2580def VOP_V4I32_V2I32_V4I32_I32 : VOPProfile <[v4i32, v2i32, v4i32, i32]>; 2581def VOP_V16I32_V2I32_V4I32_I32 : VOPProfile <[v16i32, v2i32, v4i32, i32]>; 2582def VOP_V4F32_V2I32_V4I32_I32 : VOPProfile <[v4f32, v2i32, v4i32, i32]>; 2583def VOP_V16F32_V2I32_V4I32_I32 : VOPProfile <[v16f32, v2i32, v4i32, i32]>; 2584 2585class Commutable_REV <string revOp, bit isOrig> { 2586 string RevOp = revOp; 2587 bit IsOrig = isOrig; 2588} 2589 2590class AtomicNoRet <string noRetOp, bit isRet> { 2591 string NoRetOp = noRetOp; 2592 bit IsRet = isRet; 2593} 2594 2595//===----------------------------------------------------------------------===// 2596// Interpolation opcodes 2597//===----------------------------------------------------------------------===// 2598 2599class VINTRPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVINTRPDst">; 2600 2601class VINTRP_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> : 2602 VINTRPCommon <outs, ins, "", pattern>, 2603 SIMCInstr<opName, SIEncodingFamily.NONE> { 2604 let isPseudo = 1; 2605 let isCodeGenOnly = 1; 2606} 2607 2608// FIXME-GFX10: WIP. 2609class VINTRP_Real_si <bits <2> op, string opName, dag outs, dag ins, 2610 string asm, int encodingFamily> : 2611 VINTRPCommon <outs, ins, asm, []>, 2612 VINTRPe <op>, 2613 SIMCInstr<opName, encodingFamily> { 2614} 2615 2616class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins, 2617 string asm> : 2618 VINTRPCommon <outs, ins, asm, []>, 2619 VINTRPe_vi <op>, 2620 SIMCInstr<opName, SIEncodingFamily.VI> { 2621 let AssemblerPredicate = VIAssemblerPredicate; 2622 let DecoderNamespace = "GFX8"; 2623} 2624 2625// FIXME-GFX10: WIP. 2626multiclass VINTRP_m <bits <2> op, dag outs, dag ins, string asm, 2627 list<dag> pattern = []> { 2628 def "" : VINTRP_Pseudo <NAME, outs, ins, pattern>; 2629 2630 let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 2631 def _si : VINTRP_Real_si <op, NAME, outs, ins, asm, SIEncodingFamily.SI>; 2632 } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 2633 2634 def _vi : VINTRP_Real_vi <op, NAME, outs, ins, asm>; 2635 2636 let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { 2637 def _gfx10 : VINTRP_Real_si<op, NAME, outs, ins, asm, SIEncodingFamily.GFX10>; 2638 } // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" 2639} 2640 2641//===----------------------------------------------------------------------===// 2642// Vector instruction mappings 2643//===----------------------------------------------------------------------===// 2644 2645// Maps an opcode in e32 form to its e64 equivalent 2646def getVOPe64 : InstrMapping { 2647 let FilterClass = "VOP"; 2648 let RowFields = ["OpName"]; 2649 let ColFields = ["Size", "VOP3"]; 2650 let KeyCol = ["4", "0"]; 2651 let ValueCols = [["8", "1"]]; 2652} 2653 2654// Maps an opcode in e64 form to its e32 equivalent 2655def getVOPe32 : InstrMapping { 2656 let FilterClass = "VOP"; 2657 let RowFields = ["OpName"]; 2658 let ColFields = ["Size", "VOP3"]; 2659 let KeyCol = ["8", "1"]; 2660 let ValueCols = [["4", "0"]]; 2661} 2662 2663// Maps ordinary instructions to their SDWA counterparts 2664def getSDWAOp : InstrMapping { 2665 let FilterClass = "VOP"; 2666 let RowFields = ["OpName"]; 2667 let ColFields = ["AsmVariantName"]; 2668 let KeyCol = ["Default"]; 2669 let ValueCols = [["SDWA"]]; 2670} 2671 2672// Maps SDWA instructions to their ordinary counterparts 2673def getBasicFromSDWAOp : InstrMapping { 2674 let FilterClass = "VOP"; 2675 let RowFields = ["OpName"]; 2676 let ColFields = ["AsmVariantName"]; 2677 let KeyCol = ["SDWA"]; 2678 let ValueCols = [["Default"]]; 2679} 2680 2681// Maps ordinary instructions to their DPP counterparts 2682def getDPPOp32 : InstrMapping { 2683 let FilterClass = "VOP"; 2684 let RowFields = ["OpName"]; 2685 let ColFields = ["AsmVariantName"]; 2686 let KeyCol = ["Default"]; 2687 let ValueCols = [["DPP"]]; 2688} 2689 2690def getDPPOp64 : InstrMapping { 2691 let FilterClass = "VOP"; 2692 let RowFields = ["OpName"]; 2693 let ColFields = ["AsmVariantName"]; 2694 let KeyCol = ["VOP3"]; 2695 let ValueCols = [["VOP3_DPP"]]; 2696} 2697 2698// Maps an commuted opcode to its original version 2699def getCommuteOrig : InstrMapping { 2700 let FilterClass = "Commutable_REV"; 2701 let RowFields = ["RevOp"]; 2702 let ColFields = ["IsOrig"]; 2703 let KeyCol = ["0"]; 2704 let ValueCols = [["1"]]; 2705} 2706 2707// Maps an original opcode to its commuted version 2708def getCommuteRev : InstrMapping { 2709 let FilterClass = "Commutable_REV"; 2710 let RowFields = ["RevOp"]; 2711 let ColFields = ["IsOrig"]; 2712 let KeyCol = ["1"]; 2713 let ValueCols = [["0"]]; 2714} 2715 2716def getMCOpcodeGen : InstrMapping { 2717 let FilterClass = "SIMCInstr"; 2718 let RowFields = ["PseudoInstr"]; 2719 let ColFields = ["Subtarget"]; 2720 let KeyCol = [!cast<string>(SIEncodingFamily.NONE)]; 2721 // These columns must be kept in sync with the SIEncodingFamily enumeration. 2722 let ValueCols = [[!cast<string>(SIEncodingFamily.SI)], 2723 [!cast<string>(SIEncodingFamily.VI)], 2724 [!cast<string>(SIEncodingFamily.SDWA)], 2725 [!cast<string>(SIEncodingFamily.SDWA9)], 2726 // GFX80 encoding is added to work around a multiple matching 2727 // issue for buffer instructions with unpacked d16 data. This 2728 // does not actually change the encoding, and thus may be 2729 // removed later. 2730 [!cast<string>(SIEncodingFamily.GFX80)], 2731 [!cast<string>(SIEncodingFamily.GFX9)], 2732 [!cast<string>(SIEncodingFamily.GFX10)], 2733 [!cast<string>(SIEncodingFamily.SDWA10)], 2734 [!cast<string>(SIEncodingFamily.GFX90A)], 2735 [!cast<string>(SIEncodingFamily.GFX940)], 2736 [!cast<string>(SIEncodingFamily.GFX11)]]; 2737} 2738 2739// Get equivalent SOPK instruction. 2740def getSOPKOp : InstrMapping { 2741 let FilterClass = "SOPKInstTable"; 2742 let RowFields = ["BaseCmpOp"]; 2743 let ColFields = ["IsSOPK"]; 2744 let KeyCol = ["0"]; 2745 let ValueCols = [["1"]]; 2746} 2747 2748def getAddr64Inst : InstrMapping { 2749 let FilterClass = "MUBUFAddr64Table"; 2750 let RowFields = ["OpName"]; 2751 let ColFields = ["IsAddr64"]; 2752 let KeyCol = ["0"]; 2753 let ValueCols = [["1"]]; 2754} 2755 2756def getIfAddr64Inst : InstrMapping { 2757 let FilterClass = "MUBUFAddr64Table"; 2758 let RowFields = ["OpName"]; 2759 let ColFields = ["IsAddr64"]; 2760 let KeyCol = ["1"]; 2761 let ValueCols = [["1"]]; 2762} 2763 2764// Maps an atomic opcode to its returnless version. 2765def getAtomicNoRetOp : InstrMapping { 2766 let FilterClass = "AtomicNoRet"; 2767 let RowFields = ["NoRetOp"]; 2768 let ColFields = ["IsRet"]; 2769 let KeyCol = ["1"]; 2770 let ValueCols = [["0"]]; 2771} 2772 2773// Maps a GLOBAL to its SADDR form. 2774def getGlobalSaddrOp : InstrMapping { 2775 let FilterClass = "GlobalSaddrTable"; 2776 let RowFields = ["SaddrOp"]; 2777 let ColFields = ["IsSaddr"]; 2778 let KeyCol = ["0"]; 2779 let ValueCols = [["1"]]; 2780} 2781 2782// Maps a GLOBAL SADDR to its VADDR form. 2783def getGlobalVaddrOp : InstrMapping { 2784 let FilterClass = "GlobalSaddrTable"; 2785 let RowFields = ["SaddrOp"]; 2786 let ColFields = ["IsSaddr"]; 2787 let KeyCol = ["1"]; 2788 let ValueCols = [["0"]]; 2789} 2790 2791// Maps a v_cmpx opcode with sdst to opcode without sdst. 2792def getVCMPXNoSDstOp : InstrMapping { 2793 let FilterClass = "VCMPXNoSDstTable"; 2794 let RowFields = ["NoSDstOp"]; 2795 let ColFields = ["HasSDst"]; 2796 let KeyCol = ["1"]; 2797 let ValueCols = [["0"]]; 2798} 2799 2800// Maps a SOPP to a SOPP with S_NOP 2801def getSOPPWithRelaxation : InstrMapping { 2802 let FilterClass = "SOPPRelaxTable"; 2803 let RowFields = ["KeyName"]; 2804 let ColFields = ["IsRelaxed"]; 2805 let KeyCol = ["0"]; 2806 let ValueCols = [["1"]]; 2807} 2808 2809// Maps flat scratch opcodes by addressing modes 2810def getFlatScratchInstSTfromSS : InstrMapping { 2811 let FilterClass = "FlatScratchInst"; 2812 let RowFields = ["SVOp"]; 2813 let ColFields = ["Mode"]; 2814 let KeyCol = ["SS"]; 2815 let ValueCols = [["ST"]]; 2816} 2817 2818def getFlatScratchInstSSfromSV : InstrMapping { 2819 let FilterClass = "FlatScratchInst"; 2820 let RowFields = ["SVOp"]; 2821 let ColFields = ["Mode"]; 2822 let KeyCol = ["SV"]; 2823 let ValueCols = [["SS"]]; 2824} 2825 2826def getFlatScratchInstSVfromSVS : InstrMapping { 2827 let FilterClass = "FlatScratchInst"; 2828 let RowFields = ["SVOp"]; 2829 let ColFields = ["Mode"]; 2830 let KeyCol = ["SVS"]; 2831 let ValueCols = [["SV"]]; 2832} 2833 2834def getFlatScratchInstSVfromSS : InstrMapping { 2835 let FilterClass = "FlatScratchInst"; 2836 let RowFields = ["SVOp"]; 2837 let ColFields = ["Mode"]; 2838 let KeyCol = ["SS"]; 2839 let ValueCols = [["SV"]]; 2840} 2841 2842def getMFMAEarlyClobberOp : InstrMapping { 2843 let FilterClass = "MFMATable"; 2844 let RowFields = ["FMAOp"]; 2845 let ColFields = ["IsMac"]; 2846 let KeyCol = ["1"]; 2847 let ValueCols = [["0"]]; 2848} 2849 2850// Maps an v_cmp instruction to its v_cmpx equivalent. 2851def getVCMPXOpFromVCMP : InstrMapping { 2852 let FilterClass = "VCMPVCMPXTable"; 2853 let RowFields = ["VCMPOp"]; 2854 let ColFields = ["IsVCMPX"]; 2855 let KeyCol = ["0"]; 2856 let ValueCols = [["1"]]; 2857} 2858 2859def VOPDComponentTable : GenericTable { 2860 let FilterClass = "VOPD_Component"; 2861 let CppTypeName = "VOPDComponentInfo"; 2862 let Fields = ["BaseVOP", "VOPDOp", "CanBeVOPDX"]; 2863 let PrimaryKey = ["BaseVOP"]; 2864 let PrimaryKeyName = "getVOPDComponentHelper"; 2865} 2866 2867def getVOPDBaseFromComponent : SearchIndex { 2868 let Table = VOPDComponentTable; 2869 let Key = ["VOPDOp"]; 2870} 2871 2872def VOPDPairs : GenericTable { 2873 let FilterClass = "VOPD_Base"; 2874 let CppTypeName = "VOPDInfo"; 2875 let Fields = ["Opcode", "OpX", "OpY"]; 2876 let PrimaryKey = ["Opcode"]; 2877 let PrimaryKeyName = "getVOPDOpcodeHelper"; 2878} 2879 2880def getVOPDInfoFromComponentOpcodes : SearchIndex { 2881 let Table = VOPDPairs; 2882 let Key = ["OpX", "OpY"]; 2883} 2884 2885include "SIInstructions.td" 2886 2887include "DSInstructions.td" 2888include "MIMGInstructions.td" 2889