1//===-- SIInstrInfo.td -----------------------------------------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def isWave32 : Predicate<"Subtarget->getWavefrontSize() == 32">, 10 AssemblerPredicate <(all_of FeatureWavefrontSize32)>; 11def isWave64 : Predicate<"Subtarget->getWavefrontSize() == 64">, 12 AssemblerPredicate <(all_of FeatureWavefrontSize64)>; 13 14class GCNPredicateControl : PredicateControl { 15 Predicate SIAssemblerPredicate = isGFX6GFX7; 16 Predicate VIAssemblerPredicate = isGFX8GFX9; 17} 18 19// Except for the NONE field, this must be kept in sync with the 20// SIEncodingFamily enum in SIInstrInfo.cpp and the columns of the 21// getMCOpcodeGen table. 22def SIEncodingFamily { 23 int NONE = -1; 24 int SI = 0; 25 int VI = 1; 26 int SDWA = 2; 27 int SDWA9 = 3; 28 int GFX80 = 4; 29 int GFX9 = 5; 30 int GFX10 = 6; 31 int SDWA10 = 7; 32 int GFX90A = 8; 33 int GFX940 = 9; 34 int GFX11 = 10; 35 int GFX12 = 11; 36} 37 38//===----------------------------------------------------------------------===// 39// SI DAG Nodes 40//===----------------------------------------------------------------------===// 41 42def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>; 43 44def SDTSBufferLoad : SDTypeProfile<1, 3, 45 [ // vdata 46 SDTCisVT<1, v4i32>, // rsrc 47 SDTCisVT<2, i32>, // offset(imm) 48 SDTCisVT<3, i32>]>; // cachepolicy 49 50def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD", SDTSBufferLoad, 51 [SDNPMayLoad, SDNPMemOperand]>; 52 53def SIsbuffer_load_byte : SDNode<"AMDGPUISD::SBUFFER_LOAD_BYTE", SDTSBufferLoad, 54 [SDNPMayLoad, SDNPMemOperand]>; 55 56def SIsbuffer_load_ubyte 57 : SDNode<"AMDGPUISD::SBUFFER_LOAD_UBYTE", SDTSBufferLoad, 58 [SDNPMayLoad, SDNPMemOperand]>; 59 60def SIsbuffer_load_short 61 : SDNode<"AMDGPUISD::SBUFFER_LOAD_SHORT", SDTSBufferLoad, 62 [SDNPMayLoad, SDNPMemOperand]>; 63 64def SIsbuffer_load_ushort 65 : SDNode<"AMDGPUISD::SBUFFER_LOAD_USHORT", SDTSBufferLoad, 66 [SDNPMayLoad, SDNPMemOperand]>; 67 68def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT", 69 SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i16>]>, 70 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue] 71>; 72 73def SDTAtomic2_f32 : SDTypeProfile<1, 2, [ 74 SDTCisSameAs<0,2>, SDTCisFP<0>, SDTCisPtrTy<1> 75]>; 76 77def SIatomic_fmin : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMIN", SDTAtomic2_f32, 78 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] 79>; 80 81def SIatomic_fmax : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMAX", SDTAtomic2_f32, 82 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] 83>; 84 85// load_d16_{lo|hi} ptr, tied_input 86def SIload_d16 : SDTypeProfile<1, 2, [ 87 SDTCisPtrTy<1>, 88 SDTCisSameAs<0, 2> 89]>; 90 91 92def SDTtbuffer_load : SDTypeProfile<1, 8, 93 [ // vdata 94 SDTCisVT<1, v4i32>, // rsrc 95 SDTCisVT<2, i32>, // vindex(VGPR) 96 SDTCisVT<3, i32>, // voffset(VGPR) 97 SDTCisVT<4, i32>, // soffset(SGPR) 98 SDTCisVT<5, i32>, // offset(imm) 99 SDTCisVT<6, i32>, // format(imm) 100 SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm) 101 SDTCisVT<8, i1> // idxen(imm) 102 ]>; 103 104def SItbuffer_load : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT", SDTtbuffer_load, 105 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>; 106def SItbuffer_load_d16 : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT_D16", 107 SDTtbuffer_load, 108 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>; 109 110def SDTtbuffer_store : SDTypeProfile<0, 9, 111 [ // vdata 112 SDTCisVT<1, v4i32>, // rsrc 113 SDTCisVT<2, i32>, // vindex(VGPR) 114 SDTCisVT<3, i32>, // voffset(VGPR) 115 SDTCisVT<4, i32>, // soffset(SGPR) 116 SDTCisVT<5, i32>, // offset(imm) 117 SDTCisVT<6, i32>, // format(imm) 118 SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm) 119 SDTCisVT<8, i1> // idxen(imm) 120 ]>; 121 122def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTtbuffer_store, 123 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 124def SItbuffer_store_d16 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_D16", 125 SDTtbuffer_store, 126 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 127 128def SDTBufferLoad : SDTypeProfile<1, 7, 129 [ // vdata 130 SDTCisVT<1, v4i32>, // rsrc 131 SDTCisVT<2, i32>, // vindex(VGPR) 132 SDTCisVT<3, i32>, // voffset(VGPR) 133 SDTCisVT<4, i32>, // soffset(SGPR) 134 SDTCisVT<5, i32>, // offset(imm) 135 SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm) 136 SDTCisVT<7, i1>]>; // idxen(imm) 137 138def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad, 139 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 140def SIbuffer_load_ubyte : SDNode <"AMDGPUISD::BUFFER_LOAD_UBYTE", SDTBufferLoad, 141 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 142def SIbuffer_load_ushort : SDNode <"AMDGPUISD::BUFFER_LOAD_USHORT", SDTBufferLoad, 143 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 144def SIbuffer_load_byte : SDNode <"AMDGPUISD::BUFFER_LOAD_BYTE", SDTBufferLoad, 145 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 146def SIbuffer_load_short: SDNode <"AMDGPUISD::BUFFER_LOAD_SHORT", SDTBufferLoad, 147 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 148def SIbuffer_load_format : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT", SDTBufferLoad, 149 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 150def SIbuffer_load_format_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_TFE", SDTBufferLoad, 151 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 152def SIbuffer_load_format_d16 : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_D16", 153 SDTBufferLoad, 154 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 155 156def SDTBufferStore : SDTypeProfile<0, 8, 157 [ // vdata 158 SDTCisVT<1, v4i32>, // rsrc 159 SDTCisVT<2, i32>, // vindex(VGPR) 160 SDTCisVT<3, i32>, // voffset(VGPR) 161 SDTCisVT<4, i32>, // soffset(SGPR) 162 SDTCisVT<5, i32>, // offset(imm) 163 SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm) 164 SDTCisVT<7, i1>]>; // idxen(imm) 165 166def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore, 167 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 168def SIbuffer_store_byte: SDNode <"AMDGPUISD::BUFFER_STORE_BYTE", 169 SDTBufferStore, 170 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 171def SIbuffer_store_short : SDNode <"AMDGPUISD::BUFFER_STORE_SHORT", 172 SDTBufferStore, 173 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 174def SIbuffer_store_format : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT", 175 SDTBufferStore, 176 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 177def SIbuffer_store_format_d16 : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT_D16", 178 SDTBufferStore, 179 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 180 181multiclass SDBufferAtomic<string opcode> { 182 def "" : SDNode <opcode, 183 SDTypeProfile<1, 8, 184 [SDTCisVT<2, v4i32>, // rsrc 185 SDTCisVT<3, i32>, // vindex(VGPR) 186 SDTCisVT<4, i32>, // voffset(VGPR) 187 SDTCisVT<5, i32>, // soffset(SGPR) 188 SDTCisVT<6, i32>, // offset(imm) 189 SDTCisVT<7, i32>, // cachepolicy(imm) 190 SDTCisVT<8, i1>]>, // idxen(imm) 191 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] 192 >; 193 def "_noret" : PatFrag< 194 (ops node:$vdata_in, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, 195 node:$offset, node:$cachepolicy, node:$idxen), 196 (!cast<SDNode>(NAME) node:$vdata_in, node:$rsrc, node:$vindex, 197 node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, 198 node:$idxen)> { 199 let HasNoUse = true; 200 } 201} 202 203defm SIbuffer_atomic_swap : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SWAP">; 204defm SIbuffer_atomic_add : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_ADD">; 205defm SIbuffer_atomic_sub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SUB">; 206defm SIbuffer_atomic_smin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMIN">; 207defm SIbuffer_atomic_umin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMIN">; 208defm SIbuffer_atomic_smax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMAX">; 209defm SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">; 210defm SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">; 211defm SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">; 212defm SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">; 213defm SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">; 214defm SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">; 215defm SIbuffer_atomic_csub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_CSUB">; 216defm SIbuffer_atomic_fadd : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD">; 217defm SIbuffer_atomic_fadd_bf16 : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD_BF16">; 218defm SIbuffer_atomic_fmin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMIN">; 219defm SIbuffer_atomic_fmax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMAX">; 220defm SIbuffer_atomic_cond_sub_u32 : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_COND_SUB_U32">; 221 222def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP", 223 SDTypeProfile<1, 9, 224 [SDTCisVT<3, v4i32>, // rsrc 225 SDTCisVT<4, i32>, // vindex(VGPR) 226 SDTCisVT<5, i32>, // voffset(VGPR) 227 SDTCisVT<6, i32>, // soffset(SGPR) 228 SDTCisVT<7, i32>, // offset(imm) 229 SDTCisVT<8, i32>, // cachepolicy(imm) 230 SDTCisVT<9, i1>]>, // idxen(imm) 231 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] 232>; 233 234def SIbuffer_atomic_cmpswap_noret : PatFrag< 235 (ops node:$src, node:$cmp, node:$rsrc, node:$vindex, node:$voffset, 236 node:$soffset, node:$offset, node:$cachepolicy, node:$idxen), 237 (SIbuffer_atomic_cmpswap node:$src, node:$cmp, node:$rsrc, node:$vindex, 238 node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, 239 node:$idxen)> { 240 let HasNoUse = true; 241} 242 243class SDGlobalAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode, 244 SDTypeProfile<0, 2, 245 [SDTCisPtrTy<0>, // vaddr 246 SDTCisVT<1, ty>]>, // vdata 247 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] 248>; 249 250def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET", 251 SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]> 252>; 253 254def SIlds : SDNode<"AMDGPUISD::LDS", 255 SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]> 256>; 257 258def SIload_d16_lo : SDNode<"AMDGPUISD::LOAD_D16_LO", 259 SIload_d16, 260 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 261>; 262 263def SIload_d16_lo_u8 : SDNode<"AMDGPUISD::LOAD_D16_LO_U8", 264 SIload_d16, 265 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 266>; 267 268def SIload_d16_lo_i8 : SDNode<"AMDGPUISD::LOAD_D16_LO_I8", 269 SIload_d16, 270 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 271>; 272 273def SIload_d16_hi : SDNode<"AMDGPUISD::LOAD_D16_HI", 274 SIload_d16, 275 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 276>; 277 278def SIload_d16_hi_u8 : SDNode<"AMDGPUISD::LOAD_D16_HI_U8", 279 SIload_d16, 280 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 281>; 282 283def SIload_d16_hi_i8 : SDNode<"AMDGPUISD::LOAD_D16_HI_I8", 284 SIload_d16, 285 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 286>; 287 288def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE", 289 SDTypeProfile<0 ,1, [SDTCisInt<0>]>, 290 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue] 291>; 292 293def SIfptrunc_round_upward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_UPWARD", 294 SDTFPRoundOp 295>; 296 297def SIfptrunc_round_downward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_DOWNWARD", 298 SDTFPRoundOp 299>; 300 301//===----------------------------------------------------------------------===// 302// ValueType helpers 303//===----------------------------------------------------------------------===// 304 305class isIntType<ValueType SrcVT> { 306 bit ret = !and(SrcVT.isInteger, !ne(SrcVT.Value, i1.Value)); 307} 308 309//===----------------------------------------------------------------------===// 310// PatFrags for global memory operations 311//===----------------------------------------------------------------------===// 312 313defm atomic_load_fmin : binary_atomic_op_all_as<SIatomic_fmin, 0>; 314defm atomic_load_fmax : binary_atomic_op_all_as<SIatomic_fmax, 0>; 315 316//===----------------------------------------------------------------------===// 317// SDNodes PatFrags for loads/stores with a glue input. 318// This is for SDNodes and PatFrag for local loads and stores to 319// enable s_mov_b32 m0, -1 to be glued to the memory instructions. 320// 321// These mirror the regular load/store PatFrags and rely on special 322// processing during Select() to add the glued copy. 323// 324//===----------------------------------------------------------------------===// 325 326def AMDGPUld_glue : SDNode <"ISD::LOAD", SDTLoad, 327 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 328>; 329 330def AMDGPUatomic_ld_glue : SDNode <"ISD::ATOMIC_LOAD", SDTAtomicLoad, 331 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 332>; 333 334def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr)> { 335 let IsLoad = 1; 336 let IsUnindexed = 1; 337} 338 339def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> { 340 let IsLoad = 1; 341 let IsNonExtLoad = 1; 342} 343 344def atomic_load_8_glue : PatFrag<(ops node:$ptr), 345 (AMDGPUatomic_ld_glue node:$ptr)> { 346 let IsAtomic = 1; 347 let MemoryVT = i8; 348} 349 350def atomic_load_16_glue : PatFrag<(ops node:$ptr), 351 (AMDGPUatomic_ld_glue node:$ptr)> { 352 let IsAtomic = 1; 353 let MemoryVT = i16; 354} 355 356def atomic_load_32_glue : PatFrag<(ops node:$ptr), 357 (AMDGPUatomic_ld_glue node:$ptr)> { 358 let IsAtomic = 1; 359 let MemoryVT = i32; 360} 361 362def atomic_load_64_glue : PatFrag<(ops node:$ptr), 363 (AMDGPUatomic_ld_glue node:$ptr)> { 364 let IsAtomic = 1; 365 let MemoryVT = i64; 366} 367 368def extload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { 369 let IsLoad = 1; 370 let IsAnyExtLoad = 1; 371} 372 373def sextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { 374 let IsLoad = 1; 375 let IsSignExtLoad = 1; 376} 377 378def zextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { 379 let IsLoad = 1; 380 let IsZeroExtLoad = 1; 381} 382 383def extloadi8_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> { 384 let IsLoad = 1; 385 let MemoryVT = i8; 386} 387 388def zextloadi8_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> { 389 let IsLoad = 1; 390 let MemoryVT = i8; 391} 392 393def extloadi16_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> { 394 let IsLoad = 1; 395 let MemoryVT = i16; 396} 397 398def zextloadi16_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> { 399 let IsLoad = 1; 400 let MemoryVT = i16; 401} 402 403def sextloadi8_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> { 404 let IsLoad = 1; 405 let MemoryVT = i8; 406} 407 408def sextloadi16_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> { 409 let IsLoad = 1; 410 let MemoryVT = i16; 411} 412 413 414let IsLoad = 1, AddressSpaces = LoadAddress_local.AddrSpaces in { 415def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> { 416 let IsNonExtLoad = 1; 417} 418 419def extloadi8_local_m0 : PatFrag<(ops node:$ptr), (extloadi8_glue node:$ptr)>; 420def sextloadi8_local_m0 : PatFrag<(ops node:$ptr), (sextloadi8_glue node:$ptr)>; 421def zextloadi8_local_m0 : PatFrag<(ops node:$ptr), (zextloadi8_glue node:$ptr)>; 422 423def extloadi16_local_m0 : PatFrag<(ops node:$ptr), (extloadi16_glue node:$ptr)>; 424def sextloadi16_local_m0 : PatFrag<(ops node:$ptr), (sextloadi16_glue node:$ptr)>; 425def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr)>; 426} // End IsLoad = 1, , AddressSpaces = LoadAddress_local.AddrSpaces 427 428def load_align8_local_m0 : PatFrag<(ops node:$ptr), 429 (load_local_m0 node:$ptr)> { 430 let IsLoad = 1; 431 int MinAlignment = 8; 432} 433 434def load_align16_local_m0 : PatFrag<(ops node:$ptr), 435 (load_local_m0 node:$ptr)> { 436 let IsLoad = 1; 437 int MinAlignment = 16; 438} 439 440let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in { 441def atomic_load_8_local_m0 : PatFrag<(ops node:$ptr), 442 (atomic_load_8_glue node:$ptr)>; 443def atomic_load_16_local_m0 : PatFrag<(ops node:$ptr), 444 (atomic_load_16_glue node:$ptr)>; 445def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr), 446 (atomic_load_32_glue node:$ptr)>; 447def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr), 448 (atomic_load_64_glue node:$ptr)>; 449} // End let AddressSpaces = LoadAddress_local.AddrSpaces 450 451 452def AMDGPUst_glue : SDNode <"ISD::STORE", SDTStore, 453 [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue] 454>; 455 456def AMDGPUatomic_st_glue : SDNode <"ISD::ATOMIC_STORE", SDTAtomicStore, 457 [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue] 458>; 459 460def unindexedstore_glue : PatFrag<(ops node:$val, node:$ptr), 461 (AMDGPUst_glue node:$val, node:$ptr)> { 462 let IsStore = 1; 463 let IsUnindexed = 1; 464} 465 466def store_glue : PatFrag<(ops node:$val, node:$ptr), 467 (unindexedstore_glue node:$val, node:$ptr)> { 468 let IsStore = 1; 469 let IsTruncStore = 0; 470} 471 472def truncstore_glue : PatFrag<(ops node:$val, node:$ptr), 473 (unindexedstore_glue node:$val, node:$ptr)> { 474 let IsStore = 1; 475 let IsTruncStore = 1; 476} 477 478def truncstorei8_glue : PatFrag<(ops node:$val, node:$ptr), 479 (truncstore_glue node:$val, node:$ptr)> { 480 let IsStore = 1; 481 let MemoryVT = i8; 482 let IsTruncStore = 1; 483} 484 485def truncstorei16_glue : PatFrag<(ops node:$val, node:$ptr), 486 (truncstore_glue node:$val, node:$ptr)> { 487 let IsStore = 1; 488 let MemoryVT = i16; 489 let IsTruncStore = 1; 490} 491 492let IsStore = 1, AddressSpaces = StoreAddress_local.AddrSpaces in { 493def store_local_m0 : PatFrag<(ops node:$val, node:$ptr), 494 (store_glue node:$val, node:$ptr)>; 495def truncstorei8_local_m0 : PatFrag<(ops node:$val, node:$ptr), 496 (truncstorei8_glue node:$val, node:$ptr)>; 497def truncstorei16_local_m0 : PatFrag<(ops node:$val, node:$ptr), 498 (truncstorei16_glue node:$val, node:$ptr)>; 499} 500 501def store_align8_local_m0 : PatFrag <(ops node:$value, node:$ptr), 502 (store_local_m0 node:$value, node:$ptr)>, 503 Aligned<8> { 504 let IsStore = 1; 505} 506 507def store_align16_local_m0 : PatFrag <(ops node:$value, node:$ptr), 508 (store_local_m0 node:$value, node:$ptr)>, 509 Aligned<16> { 510 let IsStore = 1; 511} 512 513let PredicateCode = [{return cast<MemSDNode>(N)->getAlign() < 4;}], 514 GISelPredicateCode = [{return (*MI.memoperands_begin())->getAlign() < 4;}], 515 AddressSpaces = [ AddrSpaces.Local ] in { 516def load_align_less_than_4_local : PatFrag<(ops node:$ptr), 517 (load_local node:$ptr)> { 518 let IsLoad = 1; 519 let IsNonExtLoad = 1; 520} 521 522def load_align_less_than_4_local_m0 : PatFrag<(ops node:$ptr), 523 (load_local_m0 node:$ptr)> { 524 let IsLoad = 1; 525 let IsNonExtLoad = 1; 526} 527 528def store_align_less_than_4_local : PatFrag <(ops node:$value, node:$ptr), 529 (store_local node:$value, node:$ptr)> { 530 let IsStore = 1; 531 let IsTruncStore = 0; 532} 533 534def store_align_less_than_4_local_m0 : PatFrag <(ops node:$value, node:$ptr), 535 (store_local_m0 node:$value, node:$ptr)> { 536 let IsStore = 1; 537 let IsTruncStore = 0; 538} 539} 540 541def atomic_store_8_glue : PatFrag < 542 (ops node:$ptr, node:$value), 543 (AMDGPUatomic_st_glue node:$ptr, node:$value)> { 544 let IsAtomic = 1; 545 let MemoryVT = i8; 546} 547 548def atomic_store_16_glue : PatFrag < 549 (ops node:$ptr, node:$value), 550 (AMDGPUatomic_st_glue node:$ptr, node:$value)> { 551 let IsAtomic = 1; 552 let MemoryVT = i16; 553} 554 555def atomic_store_32_glue : PatFrag < 556 (ops node:$ptr, node:$value), 557 (AMDGPUatomic_st_glue node:$ptr, node:$value)> { 558 let IsAtomic = 1; 559 let MemoryVT = i32; 560} 561 562def atomic_store_64_glue : PatFrag < 563 (ops node:$ptr, node:$value), 564 (AMDGPUatomic_st_glue node:$ptr, node:$value)> { 565 let IsAtomic = 1; 566 let MemoryVT = i64; 567} 568 569let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces in { 570def atomic_store_8_local_m0 : PatFrag<(ops node:$val, node:$ptr), 571 (atomic_store_8_glue node:$val, node:$ptr)>; 572def atomic_store_16_local_m0 : PatFrag<(ops node:$val, node:$ptr), 573 (atomic_store_16_glue node:$val, node:$ptr)>; 574def atomic_store_32_local_m0 : PatFrag<(ops node:$val, node:$ptr), 575 (atomic_store_32_glue node:$val, node:$ptr)>; 576def atomic_store_64_local_m0 : PatFrag<(ops node:$val, node:$ptr), 577 (atomic_store_64_glue node:$val, node:$ptr)>; 578} // End let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces 579 580 581//===----------------------------------------------------------------------===// 582// SDNodes PatFrags for a16 loads and stores with 3 components. 583// v3f16/v3i16 is widened to v4f16/v4i16, so we need to match on the memory 584// load/store size. 585//===----------------------------------------------------------------------===// 586 587class mubuf_intrinsic_load<SDPatternOperator name, ValueType vt> : PatFrag < 588 (ops node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 589 node:$auxiliary, node:$idxen), 590 (name node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 591 node:$auxiliary, node:$idxen)> { 592 let IsLoad = 1; 593 let MemoryVT = vt; 594} 595 596class mubuf_intrinsic_store<SDPatternOperator name, ValueType vt> : PatFrag < 597 (ops node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 598 node:$auxiliary, node:$idxen), 599 (name node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 600 node:$auxiliary, node:$idxen)> { 601 let IsStore = 1; 602 let MemoryVT = vt; 603} 604 605class mtbuf_intrinsic_load<SDPatternOperator name, ValueType vt> : PatFrag < 606 (ops node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 607 node:$format, node:$auxiliary, node:$idxen), 608 (name node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 609 node:$format, node:$auxiliary, node:$idxen)> { 610 let IsLoad = 1; 611 let MemoryVT = vt; 612} 613 614class mtbuf_intrinsic_store<SDPatternOperator name, ValueType vt> : PatFrag < 615 (ops node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 616 node:$format, node:$auxiliary, node:$idxen), 617 (name node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 618 node:$format, node:$auxiliary, node:$idxen)> { 619 let IsStore = 1; 620 let MemoryVT = vt; 621} 622 623//===----------------------------------------------------------------------===// 624// SDNodes PatFrags for d16 loads 625//===----------------------------------------------------------------------===// 626 627class LoadD16Frag <SDPatternOperator op> : PatFrag< 628 (ops node:$ptr, node:$tied_in), 629 (op node:$ptr, node:$tied_in)> { 630 let IsLoad = 1; 631} 632 633foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { 634let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in { 635 636def load_d16_hi_#as : LoadD16Frag <SIload_d16_hi>; 637 638def az_extloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_u8> { 639 let MemoryVT = i8; 640} 641 642def sextloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_i8> { 643 let MemoryVT = i8; 644} 645 646def load_d16_lo_#as : LoadD16Frag <SIload_d16_lo>; 647 648def az_extloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_u8> { 649 let MemoryVT = i8; 650} 651 652def sextloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_i8> { 653 let MemoryVT = i8; 654} 655 656} // End let AddressSpaces = ... 657} // End foreach AddrSpace 658 659def lshr_rev : PatFrag < 660 (ops node:$src1, node:$src0), 661 (srl $src0, $src1) 662>; 663 664def ashr_rev : PatFrag < 665 (ops node:$src1, node:$src0), 666 (sra $src0, $src1) 667>; 668 669def lshl_rev : PatFrag < 670 (ops node:$src1, node:$src0), 671 (shl $src0, $src1) 672>; 673 674def add_ctpop : PatFrag < 675 (ops node:$src0, node:$src1), 676 (add (ctpop $src0), $src1) 677>; 678 679def xnor : PatFrag < 680 (ops node:$src0, node:$src1), 681 (not (xor $src0, $src1)) 682>; 683 684foreach I = 1-4 in { 685def shl#I#_add : PatFrag < 686 (ops node:$src0, node:$src1), 687 (add (shl_oneuse $src0, (i32 I)), $src1)> { 688 // FIXME: Poor substitute for disabling pattern in SelectionDAG 689 let PredicateCode = [{return false;}]; 690 let GISelPredicateCode = [{return true;}]; 691} 692} 693 694multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0, 695 SDTypeProfile tc = SDTAtomic2, 696 bit IsInt = 1> { 697 698 def _glue : SDNode < 699 !if(is_amdgpu, "AMDGPUISD", "ISD")#"::ATOMIC_"#op_name, tc, 700 [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 701 >; 702 703 let AddressSpaces = StoreAddress_local.AddrSpaces in { 704 defm _local_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>; 705 defm _local_m0 : noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue"), 706 IsInt>; 707 } 708 709 let AddressSpaces = StoreAddress_region.AddrSpaces in { 710 defm _region_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>; 711 defm _region_m0 : noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue"), 712 IsInt>; 713 } 714} 715 716defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">; 717defm atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">; 718defm atomic_load_uinc_wrap : SIAtomicM0Glue2 <"LOAD_UINC_WRAP">; 719defm atomic_load_udec_wrap : SIAtomicM0Glue2 <"LOAD_UDEC_WRAP">; 720defm atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">; 721defm atomic_load_min : SIAtomicM0Glue2 <"LOAD_MIN">; 722defm atomic_load_max : SIAtomicM0Glue2 <"LOAD_MAX">; 723defm atomic_load_or : SIAtomicM0Glue2 <"LOAD_OR">; 724defm atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">; 725defm atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">; 726defm atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">; 727defm atomic_swap : SIAtomicM0Glue2 <"SWAP">; 728defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32, 0>; 729defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32, 0>; 730defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32, 0>; 731 732def as_i1timm : SDNodeXForm<timm, [{ 733 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1); 734}]>; 735 736def as_i8imm : SDNodeXForm<imm, [{ 737 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i8); 738}]>; 739 740def as_i8timm : SDNodeXForm<timm, [{ 741 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); 742}]>; 743 744def as_i16imm : SDNodeXForm<imm, [{ 745 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); 746}]>; 747 748def as_i16timm : SDNodeXForm<timm, [{ 749 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); 750}]>; 751 752def as_i32imm: SDNodeXForm<imm, [{ 753 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); 754}]>; 755 756def as_i32timm: SDNodeXForm<timm, [{ 757 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); 758}]>; 759 760def as_i64imm: SDNodeXForm<imm, [{ 761 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64); 762}]>; 763 764def cond_as_i32imm: SDNodeXForm<cond, [{ 765 return CurDAG->getTargetConstant(N->get(), SDLoc(N), MVT::i32); 766}]>; 767 768// Copied from the AArch64 backend: 769def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{ 770return CurDAG->getTargetConstant( 771 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32); 772}]>; 773 774def frameindex_to_targetframeindex : SDNodeXForm<frameindex, [{ 775 auto FI = cast<FrameIndexSDNode>(N); 776 return CurDAG->getTargetFrameIndex(FI->getIndex(), MVT::i32); 777}]>; 778 779// Copied from the AArch64 backend: 780def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{ 781return CurDAG->getTargetConstant( 782 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64); 783}]>; 784 785class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{ 786 uint64_t Imm = N->getZExtValue(); 787 unsigned Bit = (Imm >> }] # bitnum # [{ ) & 1; 788 return CurDAG->getTargetConstant(Bit, SDLoc(N), MVT::i1); 789}]>; 790 791def SIMM16bit : TImmLeaf <i32, 792 [{return isInt<16>(Imm) || isUInt<16>(Imm);}], 793 as_i16timm 794>; 795 796def i64imm_32bit : ImmLeaf<i64, [{ 797 return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm); 798}]>; 799 800def InlineImm16 : ImmLeaf<i16, [{ 801 return isInlineImmediate16(Imm); 802}]>; 803 804def InlineImm32 : ImmLeaf<i32, [{ 805 return isInlineImmediate32(Imm); 806}]>; 807 808def InlineImm64 : ImmLeaf<i64, [{ 809 return isInlineImmediate64(Imm); 810}]>; 811 812def InlineImmFP32 : FPImmLeaf<f32, [{ 813 return isInlineImmediate(Imm); 814}]>; 815 816def InlineImmFP64 : FPImmLeaf<f64, [{ 817 return isInlineImmediate(Imm); 818}]>; 819 820 821class VGPRImm <dag frag> : PatLeaf<frag, [{ 822 return isVGPRImm(N); 823}]>; 824 825def NegateImm : SDNodeXForm<imm, [{ 826 return CurDAG->getConstant(-N->getSExtValue(), SDLoc(N), MVT::i32); 827}]>; 828 829// TODO: When FP inline imm values work? 830def NegSubInlineConst32 : ImmLeaf<i32, [{ 831 return Imm < -16 && Imm >= -64; 832}], NegateImm>; 833 834def NegSubInlineIntConst16 : ImmLeaf<i16, [{ 835 return Imm < -16 && Imm >= -64; 836}], NegateImm>; 837 838def ShiftAmt32Imm : ImmLeaf <i32, [{ 839 return Imm < 32; 840}]>; 841 842def fp16_zeros_high_16bits : PatLeaf<(f16 VGPR_32:$src), [{ 843 return fp16SrcZerosHighBits(N->getOpcode()); 844}]>; 845 846 847//===----------------------------------------------------------------------===// 848// MUBUF/SMEM Patterns 849//===----------------------------------------------------------------------===// 850 851def extract_cpol : SDNodeXForm<timm, [{ 852 return CurDAG->getTargetConstant( 853 N->getZExtValue() & (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12 854 ? AMDGPU::CPol::ALL 855 : AMDGPU::CPol::ALL_pregfx12), 856 SDLoc(N), MVT::i8); 857}]>; 858 859def extract_swz : SDNodeXForm<timm, [{ 860 const bool Swizzle = 861 N->getZExtValue() & (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12 862 ? AMDGPU::CPol::SWZ 863 : AMDGPU::CPol::SWZ_pregfx12); 864 return CurDAG->getTargetConstant(Swizzle, SDLoc(N), MVT::i8); 865}]>; 866 867def extract_cpol_set_glc : SDNodeXForm<timm, [{ 868 const uint32_t cpol = N->getZExtValue() & (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12 869 ? AMDGPU::CPol::ALL 870 : AMDGPU::CPol::ALL_pregfx12); 871 return CurDAG->getTargetConstant(cpol | AMDGPU::CPol::GLC, SDLoc(N), MVT::i8); 872}]>; 873 874//===----------------------------------------------------------------------===// 875// Custom Operands 876//===----------------------------------------------------------------------===// 877 878def SOPPBrTarget : CustomOperand<OtherVT> { 879 let PrintMethod = "printOperand"; 880 let EncoderMethod = "getSOPPBrEncoding"; 881 let DecoderMethod = "decodeSOPPBrTarget"; 882 let OperandType = "OPERAND_PCREL"; 883} 884 885def si_ga : Operand<iPTR>; 886 887def InterpSlot : CustomOperand<i32>; 888 889// It appears to be necessary to create a separate operand for this to 890// be able to parse attr<num> with no space. 891def InterpAttr : CustomOperand<i32>; 892 893def InterpAttrChan : ImmOperand<i32>; 894 895def SplitBarrier : ImmOperand<i32> { 896 let OperandNamespace = "AMDGPU"; 897 let OperandType = "OPERAND_INLINE_SPLIT_BARRIER_INT32"; 898 let DecoderMethod = "decodeSplitBarrier"; 899 let PrintMethod = "printOperand"; 900} 901 902def VReg32OrOffClass : AsmOperandClass { 903 let Name = "VReg32OrOff"; 904 let ParserMethod = "parseVReg32OrOff"; 905} 906 907def SendMsg : CustomOperand<i32>; 908 909def Swizzle : CustomOperand<i16, 1>; 910 911def Endpgm : CustomOperand<i16, 1>; 912 913def SWaitCnt : CustomOperand<i32>; 914 915def DepCtr : CustomOperand<i32>; 916 917def SDelayALU : CustomOperand<i32>; 918 919include "SIInstrFormats.td" 920include "VIInstrFormats.td" 921 922def BoolReg : AsmOperandClass { 923 let Name = "BoolReg"; 924 let ParserMethod = "parseBoolReg"; 925 let RenderMethod = "addRegOperands"; 926} 927 928class BoolRC : RegisterOperand<SReg_1> { 929 let ParserMatchClass = BoolReg; 930 let DecoderMethod = "decodeBoolReg"; 931} 932 933def SSrc_i1 : RegisterOperand<SReg_1_XEXEC> { 934 let ParserMatchClass = BoolReg; 935 let DecoderMethod = "decodeBoolReg"; 936} 937 938def VOPDstS64orS32 : BoolRC { 939 let PrintMethod = "printVOPDst"; 940} 941 942// SCSrc_i1 is the operand for pseudo instructions only. 943// Boolean immediates shall not be exposed to codegen instructions. 944def SCSrc_i1 : RegisterOperand<SReg_1_XEXEC> { 945 let OperandNamespace = "AMDGPU"; 946 let OperandType = "OPERAND_REG_IMM_INT32"; 947 let ParserMatchClass = BoolReg; 948 let DecoderMethod = "decodeBoolReg"; 949} 950 951// ===----------------------------------------------------------------------===// 952// ExpSrc* Special cases for exp src operands which are printed as 953// "off" depending on en operand. 954// ===----------------------------------------------------------------------===// 955 956def ExpSrc0 : RegisterOperand<VGPR_32> { 957 let PrintMethod = "printExpSrc0"; 958 let ParserMatchClass = VReg32OrOffClass; 959} 960 961def ExpSrc1 : RegisterOperand<VGPR_32> { 962 let PrintMethod = "printExpSrc1"; 963 let ParserMatchClass = VReg32OrOffClass; 964} 965 966def ExpSrc2 : RegisterOperand<VGPR_32> { 967 let PrintMethod = "printExpSrc2"; 968 let ParserMatchClass = VReg32OrOffClass; 969} 970 971def ExpSrc3 : RegisterOperand<VGPR_32> { 972 let PrintMethod = "printExpSrc3"; 973 let ParserMatchClass = VReg32OrOffClass; 974} 975 976class SDWASrc<ValueType vt> : RegisterOperand<VS_32> { 977 let OperandNamespace = "AMDGPU"; 978 string Type = !if(vt.isFP, "FP", "INT"); 979 let OperandType = "OPERAND_REG_INLINE_C_"#Type#vt.Size; 980 let DecoderMethod = "decodeSDWASrc"#vt.Size; 981 let EncoderMethod = "getSDWASrcEncoding"; 982} 983 984def SDWASrc_i32 : SDWASrc<i32>; 985def SDWASrc_i16 : SDWASrc<i16>; 986def SDWASrc_f32 : SDWASrc<f32>; 987def SDWASrc_f16 : SDWASrc<f16>; 988 989def SDWAVopcDst : BoolRC { 990 let OperandNamespace = "AMDGPU"; 991 let OperandType = "OPERAND_SDWA_VOPC_DST"; 992 let EncoderMethod = "getSDWAVopcDstEncoding"; 993 let DecoderMethod = "decodeSDWAVopcDst"; 994 let PrintMethod = "printVOPDst"; 995} 996 997class NamedIntOperand<ValueType Type, string Prefix, string Name = NAME, 998 string ConvertMethod = "nullptr"> 999 : CustomOperand<Type, 1, Name> { 1000 let ParserMethod = 1001 "[this](OperandVector &Operands) -> ParseStatus { "# 1002 "return parseIntWithPrefix(\""#Prefix#"\", Operands, "# 1003 "AMDGPUOperand::"#ImmTy#", "#ConvertMethod#"); }"; 1004} 1005 1006class NamedBitOperand<string Id, string Name = NAME> 1007 : CustomOperand<i1, 1, Name> { 1008 let PredicateMethod = "isImmTy<AMDGPUOperand::"#ImmTy#">"; 1009 let ParserMethod = 1010 "[this](OperandVector &Operands) -> ParseStatus { "# 1011 "return parseNamedBit(\""#Id#"\", Operands, AMDGPUOperand::"#ImmTy#"); }"; 1012 let PrintMethod = "[this](const MCInst *MI, unsigned OpNo, "# 1013 "const MCSubtargetInfo &STI, raw_ostream &O) { "# 1014 "printNamedBit(MI, OpNo, O, \""#Id#"\"); }"; 1015} 1016 1017class DefaultOperand<CustomOperand Op, int Value> 1018 : OperandWithDefaultOps<Op.Type, (ops (Op.Type Value))>, 1019 CustomOperandProps<1> { 1020 let ParserMatchClass = Op.ParserMatchClass; 1021 let PrintMethod = Op.PrintMethod; 1022} 1023 1024class SDWAOperand<string Id, string Name = NAME> 1025 : CustomOperand<i32, 1, Name> { 1026 let ParserMethod = 1027 "[this](OperandVector &Operands) -> ParseStatus { "# 1028 "return parseSDWASel(Operands, \""#Id#"\", AMDGPUOperand::"#ImmTy#"); }"; 1029} 1030 1031class ArrayOperand0<string Id, string Name = NAME> 1032 : OperandWithDefaultOps<i32, (ops (i32 0))>, 1033 CustomOperandProps<1, Name> { 1034 let ParserMethod = 1035 "[this](OperandVector &Operands) -> ParseStatus { "# 1036 "return parseOperandArrayWithPrefix(\""#Id#"\", Operands, "# 1037 "AMDGPUOperand::"#ImmTy#"); }"; 1038} 1039 1040let ImmTy = "ImmTyOffset" in 1041def flat_offset : CustomOperand<i32, 1, "FlatOffset">; 1042def offset : NamedIntOperand<i32, "offset", "Offset">; 1043def offset0 : NamedIntOperand<i8, "offset0", "Offset0">; 1044def offset1 : NamedIntOperand<i8, "offset1", "Offset1">; 1045 1046def gds : NamedBitOperand<"gds", "GDS">; 1047 1048def omod : CustomOperand<i32, 1, "OModSI">; 1049def omod0 : DefaultOperand<omod, 0>; 1050 1051// We need to make the cases with a default of 0 distinct from no 1052// default to help deal with some cases where the operand appears 1053// before a mandatory operand. 1054def clampmod : NamedBitOperand<"clamp", "ClampSI">; 1055def clampmod0 : DefaultOperand<clampmod, 0>; 1056def highmod : NamedBitOperand<"high", "High">; 1057 1058def CPol : CustomOperand<i32, 1>; 1059def CPol_0 : DefaultOperand<CPol, 0>; 1060def CPol_GLC1 : DefaultOperand<CPol, 1>; 1061def CPol_GLC : ValuePredicatedOperand<CPol, "Op.getImm() & CPol::GLC">; 1062def CPol_NonGLC : ValuePredicatedOperand<CPol, "!(Op.getImm() & CPol::GLC)", 1>; 1063def CPol_GLC_WithDefault : DefaultOperand<CPol_GLC, !shl(1, CPolBit.GLC)>; 1064def CPol_NonGLC_WithDefault : DefaultOperand<CPol_NonGLC, 0>; 1065 1066def TFE : NamedBitOperand<"tfe">; 1067def UNorm : NamedBitOperand<"unorm">; 1068def DA : NamedBitOperand<"da">; 1069def R128A16 : CustomOperand<i1, 1>; 1070def A16 : NamedBitOperand<"a16">; 1071def D16 : NamedBitOperand<"d16">; 1072def LWE : NamedBitOperand<"lwe">; 1073def exp_compr : NamedBitOperand<"compr", "ExpCompr">; 1074def exp_vm : NamedBitOperand<"vm", "ExpVM">; 1075 1076def FORMAT : CustomOperand<i8>; 1077 1078def DMask : NamedIntOperand<i16, "dmask">; 1079def Dim : CustomOperand<i8>; 1080 1081def dst_sel : SDWAOperand<"dst_sel", "SDWADstSel">; 1082def src0_sel : SDWAOperand<"src0_sel", "SDWASrc0Sel">; 1083def src1_sel : SDWAOperand<"src1_sel", "SDWASrc1Sel">; 1084def dst_unused : CustomOperand<i32, 1, "SDWADstUnused">; 1085 1086def op_sel0 : ArrayOperand0<"op_sel", "OpSel">; 1087def op_sel_hi0 : ArrayOperand0<"op_sel_hi", "OpSelHi">; 1088def neg_lo0 : ArrayOperand0<"neg_lo", "NegLo">; 1089def neg_hi0 : ArrayOperand0<"neg_hi", "NegHi">; 1090 1091def IndexKey16bit : CustomOperand<i32, 1>; 1092def IndexKey8bit : CustomOperand<i32, 1>; 1093 1094def dpp8 : CustomOperand<i32, 0, "DPP8">; 1095def dpp_ctrl : CustomOperand<i32, 0, "DPPCtrl">; 1096 1097let DefaultValue = "0xf" in { 1098def row_mask : NamedIntOperand<i32, "row_mask", "DppRowMask">; 1099def bank_mask : NamedIntOperand<i32, "bank_mask", "DppBankMask">; 1100} 1101def bound_ctrl : NamedIntOperand<i1, "bound_ctrl", "DppBoundCtrl", 1102 "[this] (int64_t &BC) -> bool { return convertDppBoundCtrl(BC); }">; 1103def FI : NamedIntOperand<i32, "fi", "DppFI">; 1104 1105def blgp : CustomOperand<i32, 1, "BLGP">; 1106def cbsz : NamedIntOperand<i32, "cbsz", "CBSZ">; 1107def abid : NamedIntOperand<i32, "abid", "ABID">; 1108 1109def hwreg : CustomOperand<i32, 0, "Hwreg">; 1110 1111def exp_tgt : CustomOperand<i32, 0, "ExpTgt">; 1112 1113def wait_vdst : NamedIntOperand<i8, "wait_vdst", "WaitVDST">; 1114def wait_exp : NamedIntOperand<i8, "wait_exp", "WaitEXP">; 1115def wait_va_vdst : NamedIntOperand<i8, "wait_va_vdst", "WaitVAVDst">; 1116def wait_va_vsrc : NamedIntOperand<i8, "wait_vm_vsrc", "WaitVMVSrc">; 1117 1118class KImmFPOperand<ValueType vt> : ImmOperand<vt> { 1119 let OperandNamespace = "AMDGPU"; 1120 let OperandType = "OPERAND_KIMM"#vt.Size; 1121 let PrintMethod = "printU"#vt.Size#"ImmOperand"; 1122 let DecoderMethod = "decodeOperand_KImmFP"; 1123} 1124 1125// 32-bit VALU immediate operand that uses the constant bus. 1126def KImmFP32 : KImmFPOperand<i32>; 1127 1128// 32-bit VALU immediate operand with a 16-bit value that uses the 1129// constant bus. 1130def KImmFP16 : KImmFPOperand<i16>; 1131 1132class FPInputModsMatchClass <int opSize> : AsmOperandClass { 1133 let Name = "RegOrImmWithFP"#opSize#"InputMods"; 1134 let ParserMethod = "parseRegOrImmWithFPInputMods"; 1135 let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods"; 1136} 1137 1138class FPVCSrcInputModsMatchClass <int opSize> : FPInputModsMatchClass <opSize> { 1139 let Name = "RegOrInlineImmWithFP"#opSize#"InputMods"; 1140 let PredicateMethod = "isRegOrInlineImmWithFP"#opSize#"InputMods"; 1141} 1142 1143def FP16InputModsMatchClass : FPInputModsMatchClass<16>; 1144def FPT16InputModsMatchClass : FPInputModsMatchClass<16> { 1145 let Name = "RegOrImmWithFPT16InputMods"; 1146 let PredicateMethod = "isRegOrImmWithFPT16InputMods"; 1147} 1148def FP32InputModsMatchClass : FPInputModsMatchClass<32>; 1149def FP64InputModsMatchClass : FPInputModsMatchClass<64>; 1150 1151def FP16VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<16>; 1152def FP32VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<32>; 1153 1154class InputMods <AsmOperandClass matchClass> : Operand <i32> { 1155 let OperandNamespace = "AMDGPU"; 1156 let OperandType = "OPERAND_INPUT_MODS"; 1157 let ParserMatchClass = matchClass; 1158} 1159 1160class FPInputMods <FPInputModsMatchClass matchClass> : InputMods <matchClass> { 1161 let PrintMethod = "printOperandAndFPInputMods"; 1162} 1163 1164def FP16InputMods : FPInputMods<FP16InputModsMatchClass>; 1165def FPT16InputMods : FPInputMods<FPT16InputModsMatchClass>; 1166def FP32InputMods : FPInputMods<FP32InputModsMatchClass>; 1167def FP64InputMods : FPInputMods<FP64InputModsMatchClass>; 1168 1169def FP16VCSrcInputMods : FPInputMods<FP16VCSrcInputModsMatchClass>; 1170def FP32VCSrcInputMods : FPInputMods<FP32VCSrcInputModsMatchClass>; 1171 1172class IntInputModsMatchClass <int opSize> : AsmOperandClass { 1173 let Name = "RegOrImmWithInt"#opSize#"InputMods"; 1174 let ParserMethod = "parseRegOrImmWithIntInputMods"; 1175 let PredicateMethod = "isRegOrImmWithInt"#opSize#"InputMods"; 1176} 1177class IntVCSrcInputModsMatchClass <int opSize> : IntInputModsMatchClass <opSize> { 1178 let Name = "RegOrInlineImmWithInt"#opSize#"InputMods"; 1179 let PredicateMethod = "isRegOrInlineImmWithInt"#opSize#"InputMods"; 1180} 1181def IntT16InputModsMatchClass : IntInputModsMatchClass<16> { 1182 let Name = "RegOrImmWithIntT16InputMods"; 1183 let PredicateMethod = "isRegOrImmWithIntT16InputMods"; 1184} 1185def Int32InputModsMatchClass : IntInputModsMatchClass<32>; 1186def Int64InputModsMatchClass : IntInputModsMatchClass<64>; 1187def Int32VCSrcInputModsMatchClass : IntVCSrcInputModsMatchClass<32>; 1188 1189class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass> { 1190 let PrintMethod = "printOperandAndIntInputMods"; 1191} 1192def IntT16InputMods : IntInputMods<IntT16InputModsMatchClass>; 1193def Int32InputMods : IntInputMods<Int32InputModsMatchClass>; 1194def Int64InputMods : IntInputMods<Int64InputModsMatchClass>; 1195def Int32VCSrcInputMods : IntInputMods<Int32VCSrcInputModsMatchClass>; 1196 1197class OpSelModsMatchClass : AsmOperandClass { 1198 let Name = "OpSelMods"; 1199 let ParserMethod = "parseRegOrImm"; 1200 let PredicateMethod = "isRegOrImm"; 1201} 1202 1203def IntOpSelModsMatchClass : OpSelModsMatchClass; 1204def IntOpSelMods : InputMods<IntOpSelModsMatchClass>; 1205 1206class FPSDWAInputModsMatchClass <int opSize> : AsmOperandClass { 1207 let Name = "SDWAWithFP"#opSize#"InputMods"; 1208 let ParserMethod = "parseRegOrImmWithFPInputMods"; 1209 let PredicateMethod = "isSDWAFP"#opSize#"Operand"; 1210} 1211 1212def FP16SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<16>; 1213def FP32SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<32>; 1214 1215class FPSDWAInputMods <FPSDWAInputModsMatchClass matchClass> : 1216 InputMods <matchClass> { 1217 let PrintMethod = "printOperandAndFPInputMods"; 1218} 1219 1220def FP16SDWAInputMods : FPSDWAInputMods<FP16SDWAInputModsMatchClass>; 1221def FP32SDWAInputMods : FPSDWAInputMods<FP32SDWAInputModsMatchClass>; 1222 1223def FPVRegInputModsMatchClass : AsmOperandClass { 1224 let Name = "VRegWithFPInputMods"; 1225 let ParserMethod = "parseRegWithFPInputMods"; 1226 let PredicateMethod = "isVRegWithInputMods"; 1227} 1228 1229class FPT16VRegInputModsMatchClass<bit IsFake16> : AsmOperandClass { 1230 let Name = !if(IsFake16, "Fake16VRegWithFPInputMods", 1231 "T16VRegWithFPInputMods"); 1232 let ParserMethod = "parseRegWithFPInputMods"; 1233 let PredicateMethod = "isT16VRegWithInputMods<" # 1234 !if(IsFake16, "true", "false") # ">"; 1235} 1236 1237def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> { 1238 let PrintMethod = "printOperandAndFPInputMods"; 1239} 1240 1241class FPT16VRegInputMods<bit IsFake16> 1242 : InputMods <FPT16VRegInputModsMatchClass<IsFake16>> { 1243 let PrintMethod = "printOperandAndFPInputMods"; 1244} 1245 1246class IntSDWAInputModsMatchClass <int opSize> : AsmOperandClass { 1247 let Name = "SDWAWithInt"#opSize#"InputMods"; 1248 let ParserMethod = "parseRegOrImmWithIntInputMods"; 1249 let PredicateMethod = "isSDWAInt"#opSize#"Operand"; 1250} 1251 1252def Int16SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<16>; 1253def Int32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32>; 1254def Bin32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32> { 1255 let Name = "SDWAWithBin32InputMods"; 1256 let ParserMethod = "parseRegOrImm"; 1257} 1258 1259class IntSDWAInputMods <IntSDWAInputModsMatchClass matchClass> : 1260 InputMods <matchClass> { 1261 let PrintMethod = "printOperandAndIntInputMods"; 1262} 1263 1264def Int16SDWAInputMods : IntSDWAInputMods<Int16SDWAInputModsMatchClass>; 1265def Int32SDWAInputMods : IntSDWAInputMods<Int32SDWAInputModsMatchClass>; 1266def Bin32SDWAInputMods : IntSDWAInputMods<Bin32SDWAInputModsMatchClass>; 1267 1268def IntVRegInputModsMatchClass : AsmOperandClass { 1269 let Name = "VRegWithIntInputMods"; 1270 let ParserMethod = "parseRegWithIntInputMods"; 1271 let PredicateMethod = "isVRegWithInputMods"; 1272} 1273 1274class IntT16VRegInputModsMatchClass<bit IsFake16> : AsmOperandClass { 1275 let Name = !if(IsFake16, "Fake16VRegWithIntInputMods", 1276 "T16VRegWithIntInputMods"); 1277 let ParserMethod = "parseRegWithIntInputMods"; 1278 let PredicateMethod = "isT16VRegWithInputMods<" # 1279 !if(IsFake16, "true", "false") # ">"; 1280} 1281 1282class IntT16VRegInputMods<bit IsFake16> 1283 : InputMods <IntT16VRegInputModsMatchClass<IsFake16>> { 1284 let PrintMethod = "printOperandAndIntInputMods"; 1285} 1286 1287def IntVRegInputMods : InputMods <IntVRegInputModsMatchClass> { 1288 let PrintMethod = "printOperandAndIntInputMods"; 1289} 1290 1291class PackedFPInputModsMatchClass <int opSize> : AsmOperandClass { 1292 let Name = "PackedFP"#opSize#"InputMods"; 1293 let ParserMethod = "parseRegOrImm"; 1294 let PredicateMethod = "isRegOrImm"; 1295// let PredicateMethod = "isPackedFP"#opSize#"InputMods"; 1296} 1297 1298class PackedIntInputModsMatchClass <int opSize> : AsmOperandClass { 1299 let Name = "PackedInt"#opSize#"InputMods"; 1300 let ParserMethod = "parseRegOrImm"; 1301 let PredicateMethod = "isRegOrImm"; 1302// let PredicateMethod = "isPackedInt"#opSize#"InputMods"; 1303} 1304 1305def PackedF16InputModsMatchClass : PackedFPInputModsMatchClass<16>; 1306def PackedI16InputModsMatchClass : PackedIntInputModsMatchClass<16>; 1307 1308class PackedFPInputMods <PackedFPInputModsMatchClass matchClass> : InputMods <matchClass> { 1309// let PrintMethod = "printPackedFPInputMods"; 1310} 1311 1312class PackedIntInputMods <PackedIntInputModsMatchClass matchClass> : InputMods <matchClass> { 1313 //let PrintMethod = "printPackedIntInputMods"; 1314} 1315 1316def PackedF16InputMods : PackedFPInputMods<PackedF16InputModsMatchClass>; 1317def PackedI16InputMods : PackedIntInputMods<PackedI16InputModsMatchClass>; 1318 1319//===----------------------------------------------------------------------===// 1320// Complex patterns 1321//===----------------------------------------------------------------------===// 1322 1323def DS1Addr1Offset : ComplexPattern<iPTR, 2, "SelectDS1Addr1Offset">; 1324def DS64Bit4ByteAligned : ComplexPattern<iPTR, 3, "SelectDS64Bit4ByteAligned">; 1325def DS128Bit8ByteAligned : ComplexPattern<iPTR, 3, "SelectDS128Bit8ByteAligned">; 1326 1327def MOVRELOffset : ComplexPattern<iPTR, 2, "SelectMOVRELOffset">; 1328 1329def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">; 1330 1331// Modifiers for floating point instructions. 1332def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">; 1333 1334// VOP3 modifiers used for instructions that do not read canonicalized 1335// floating point values (i.e. integer operations with FP source 1336// modifiers) 1337def VOP3ModsNonCanonicalizing : ComplexPattern<untyped, 2, 1338 "SelectVOP3ModsNonCanonicalizing">; 1339 1340def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">; 1341 1342def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">; 1343 1344def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">; 1345 1346def VOP3PModsDOT : ComplexPattern<untyped, 2, "SelectVOP3PModsDOT">; 1347def VOP3PModsNeg : ComplexPattern<untyped, 1, "SelectVOP3PModsNeg">; 1348def WMMAOpSelVOP3PMods : ComplexPattern<untyped, 1, "SelectWMMAOpSelVOP3PMods">; 1349 1350def WMMAModsF32NegAbs : ComplexPattern<untyped, 2, "SelectWMMAModsF32NegAbs">; 1351def WMMAModsF16Neg : ComplexPattern<untyped, 2, "SelectWMMAModsF16Neg">; 1352def WMMAModsF16NegAbs : ComplexPattern<untyped, 2, "SelectWMMAModsF16NegAbs">; 1353def WMMAVISrc : ComplexPattern<untyped, 1, "SelectWMMAVISrc">; 1354def SWMMACIndex8 : ComplexPattern<untyped, 2, "SelectSWMMACIndex8">; 1355def SWMMACIndex16 : ComplexPattern<untyped, 2, "SelectSWMMACIndex16">; 1356 1357def VOP3OpSel : ComplexPattern<untyped, 2, "SelectVOP3OpSel">; 1358 1359def VOP3OpSelMods : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">; 1360 1361def VOP3PMadMixModsExt : ComplexPattern<untyped, 2, "SelectVOP3PMadMixModsExt">; 1362def VOP3PMadMixMods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixMods">; 1363 1364def VINTERPMods : ComplexPattern<untyped, 2, "SelectVINTERPMods">; 1365def VINTERPModsHi : ComplexPattern<untyped, 2, "SelectVINTERPModsHi">; 1366 1367//===----------------------------------------------------------------------===// 1368// SI assembler operands 1369//===----------------------------------------------------------------------===// 1370 1371def SIOperand { 1372 int ZERO = 0x80; 1373 int VCC = 0x6A; 1374 int FLAT_SCR = 0x68; 1375} 1376 1377// This should be kept in sync with SISrcMods enum 1378def SRCMODS { 1379 int NONE = 0; 1380 int NEG = 1; 1381 int ABS = 2; 1382 int NEG_ABS = 3; 1383 1384 int NEG_HI = ABS; 1385 int OP_SEL_0 = 4; 1386 int OP_SEL_1 = 8; 1387 int DST_OP_SEL = 8; 1388} 1389 1390def DSTCLAMP { 1391 int NONE = 0; 1392 int ENABLE = 1; 1393} 1394 1395def DSTOMOD { 1396 int NONE = 0; 1397} 1398 1399def HWREG { 1400 int MODE = 1; 1401 int STATUS = 2; 1402 int TRAPSTS = 3; 1403 int HW_ID = 4; 1404 int GPR_ALLOC = 5; 1405 int LDS_ALLOC = 6; 1406 int IB_STS = 7; 1407 int MEM_BASES = 15; 1408 int TBA_LO = 16; 1409 int TBA_HI = 17; 1410 int TMA_LO = 18; 1411 int TMA_HI = 19; 1412 int FLAT_SCR_LO = 20; 1413 int FLAT_SCR_HI = 21; 1414 int XNACK_MASK = 22; 1415 int POPS_PACKER = 25; 1416 int SHADER_CYCLES = 29; 1417} 1418 1419class getHwRegImm<int Reg, int Offset = 0, int Size = 32> { 1420 int ret = !and(!or(Reg, 1421 !shl(Offset, 6), 1422 !shl(!add(Size, -1), 11)), 65535); 1423} 1424 1425//===----------------------------------------------------------------------===// 1426// 1427// SI Instruction multiclass helpers. 1428// 1429// Instructions with _32 take 32-bit operands. 1430// Instructions with _64 take 64-bit operands. 1431// 1432// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit 1433// encoding is the standard encoding, but instruction that make use of 1434// any of the instruction modifiers must use the 64-bit encoding. 1435// 1436// Instructions with _e32 use the 32-bit encoding. 1437// Instructions with _e64 use the 64-bit encoding. 1438// 1439//===----------------------------------------------------------------------===// 1440 1441class SIMCInstr <string pseudo, int subtarget> { 1442 string PseudoInstr = pseudo; 1443 int Subtarget = subtarget; 1444} 1445 1446//===----------------------------------------------------------------------===// 1447// Vector ALU classes 1448//===----------------------------------------------------------------------===// 1449 1450class getNumSrcArgs<ValueType Src0, ValueType Src1, ValueType Src2> { 1451 int ret = 1452 !if (!eq(Src0.Value, untyped.Value), 0, 1453 !if (!eq(Src1.Value, untyped.Value), 1, // VOP1 1454 !if (!eq(Src2.Value, untyped.Value), 2, // VOP2 1455 3))); // VOP3 1456} 1457 1458// Returns the register class to use for the destination of VOP[123C] 1459// instructions for the given VT. 1460class getVALUDstForVT<ValueType VT, bit IsTrue16 = 0, bit IsVOP3Encoding = 0> { 1461 defvar op16 = !if(IsTrue16, !if (IsVOP3Encoding, VOPDstOperand_t16, 1462 VOPDstOperand_t16Lo128), 1463 VOPDstOperand<VGPR_32>); 1464 RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>, 1465 !if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>, 1466 !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>, 1467 !if(!eq(VT.Size, 16), op16, 1468 VOPDstS64orS32)))); // else VT == i1 1469} 1470 1471class getVALUDstForVT_fake16<ValueType VT> { 1472 RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>, 1473 !if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>, 1474 !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>, 1475 !if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32_Lo128>, 1476 VOPDstS64orS32)))); // else VT == i1 1477} 1478 1479// Returns the register class to use for the destination of VOP[12C] 1480// instructions with SDWA extension 1481class getSDWADstForVT<ValueType VT> { 1482 RegisterOperand ret = !if(!eq(VT.Size, 1), 1483 SDWAVopcDst, // VOPC 1484 VOPDstOperand<VGPR_32>); // VOP1/2 32-bit dst 1485} 1486 1487// Returns the register class to use for source 0 of VOP[12C] 1488// instructions for the given VT. 1489class getVOPSrc0ForVT<ValueType VT, bit IsTrue16, bit IsFake16 = 1> { 1490 RegisterOperand ret = 1491 !if(VT.isFP, 1492 !if(!eq(VT.Size, 64), 1493 VSrc_f64, 1494 !if(!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)), 1495 !if(IsTrue16, 1496 !if(IsFake16, VSrcFake16_f16_Lo128, VSrcT_f16_Lo128), 1497 VSrc_f16 1498 ), 1499 !if(!or(!eq(VT.Value, v2f16.Value), !eq(VT.Value, v2bf16.Value)), 1500 VSrc_v2f16, 1501 !if(!or(!eq(VT.Value, v4f16.Value), !eq(VT.Value, v4bf16.Value)), 1502 AVSrc_64, 1503 VSrc_f32 1504 ) 1505 ) 1506 ) 1507 ), 1508 !if(!eq(VT.Size, 64), 1509 VSrc_b64, 1510 !if(!eq(VT.Value, i16.Value), 1511 !if(IsTrue16, 1512 !if(IsFake16, VSrcFake16_b16_Lo128, VSrcT_b16_Lo128), 1513 VSrc_b16 1514 ), 1515 !if(!eq(VT.Value, v2i16.Value), 1516 VSrc_v2b16, 1517 VSrc_b32 1518 ) 1519 ) 1520 ) 1521 ); 1522} 1523 1524class getSOPSrcForVT<ValueType VT> { 1525 RegisterOperand ret = !if(!eq(VT.Size, 64), SSrc_b64, SSrc_b32); 1526} 1527 1528// Returns the vreg register class to use for source operand given VT 1529class getVregSrcForVT<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 0> { 1530 RegisterOperand ret = 1531 !if (!eq(VT.Size, 128), RegisterOperand<VReg_128>, 1532 !if (!eq(VT.Size, 96), RegisterOperand<VReg_96>, 1533 !if (!eq(VT.Size, 64), RegisterOperand<VReg_64>, 1534 !if (!eq(VT.Size, 48), RegisterOperand<VReg_64>, 1535 !if (!eq(VT.Size, 16), 1536 !if (IsTrue16, 1537 !if (IsFake16, VGPRSrc_32_Lo128, VGPRSrc_16_Lo128), 1538 RegisterOperand<VGPR_32>), 1539 RegisterOperand<VGPR_32>))))); 1540} 1541 1542class getSDWASrcForVT <ValueType VT> { 1543 RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32); 1544 RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32); 1545 RegisterOperand ret = !if(VT.isFP, retFlt, retInt); 1546} 1547 1548// Returns the register class to use for sources of VOP3 instructions for the 1549// given VT. 1550class getVOP3SrcForVT<ValueType VT, bit IsTrue16 = 0> { 1551 RegisterOperand ret = 1552 !if(!eq(VT.Size, 128), 1553 VRegSrc_128, 1554 !if(!eq(VT.Size, 64), 1555 !if(VT.isFP, 1556 !if(!eq(VT.Value, v2f32.Value), 1557 VSrc_v2f32, 1558 VSrc_f64), 1559 !if(!eq(VT.Value, v2i32.Value), 1560 VSrc_v2b32, 1561 VSrc_b64)), 1562 !if(!eq(VT.Value, i1.Value), 1563 SSrc_i1, 1564 !if(VT.isFP, 1565 !if(!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)), 1566 !if(IsTrue16, VSrcT_f16, VSrc_f16), 1567 !if(!or(!eq(VT.Value, v2f16.Value), !eq(VT.Value, v2bf16.Value)), 1568 VSrc_v2f16, 1569 !if(!or(!eq(VT.Value, v4f16.Value), !eq(VT.Value, v4bf16.Value)), 1570 AVSrc_64, 1571 VSrc_f32 1572 ) 1573 ) 1574 ), 1575 !if(!eq(VT.Value, i16.Value), 1576 !if(IsTrue16, VSrcT_b16, VSrc_b16), 1577 !if(!eq(VT.Value, v2i16.Value), 1578 VSrc_v2b16, 1579 VSrc_b32 1580 ) 1581 ) 1582 ) 1583 ) 1584 ) 1585 ); 1586} 1587 1588// Src2 of VOP3 DPP instructions cannot be a literal 1589class getVOP3DPPSrcForVT<ValueType VT> { 1590 RegisterOperand ret = 1591 !if (!eq(VT.Value, i1.Value), SSrc_i1, 1592 !if (VT.isFP, 1593 !if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)), VCSrc_f16, 1594 !if (!or(!eq(VT.Value, v2f16.Value), !eq(VT.Value, v2bf16.Value)), VCSrc_v2f16, VCSrc_f32)), 1595 !if (!eq(VT.Value, i16.Value), VCSrc_b16, 1596 !if (!eq(VT.Value, v2i16.Value), VCSrc_v2b16, 1597 VCSrc_b32)))); 1598} 1599 1600// Float or packed int 1601class isModifierType<ValueType SrcVT> { 1602 bit ret = !or(!eq(SrcVT.Value, f16.Value), 1603 !eq(SrcVT.Value, bf16.Value), 1604 !eq(SrcVT.Value, f32.Value), 1605 !eq(SrcVT.Value, f64.Value), 1606 !eq(SrcVT.Value, v2f16.Value), 1607 !eq(SrcVT.Value, v2i16.Value), 1608 !eq(SrcVT.Value, v2bf16.Value), 1609 !eq(SrcVT.Value, v2f32.Value), 1610 !eq(SrcVT.Value, v2i32.Value), 1611 !eq(SrcVT.Value, v4f16.Value), 1612 !eq(SrcVT.Value, v4i16.Value), 1613 !eq(SrcVT.Value, v4bf16.Value), 1614 !eq(SrcVT.Value, v4f32.Value), 1615 !eq(SrcVT.Value, v4i32.Value), 1616 !eq(SrcVT.Value, v8f16.Value), 1617 !eq(SrcVT.Value, v8i16.Value), 1618 !eq(SrcVT.Value, v8bf16.Value), 1619 !eq(SrcVT.Value, v8f32.Value), 1620 !eq(SrcVT.Value, v8i32.Value), 1621 !eq(SrcVT.Value, v16f16.Value), 1622 !eq(SrcVT.Value, v16i16.Value), 1623 !eq(SrcVT.Value, v16bf16.Value)); 1624} 1625 1626// Return type of input modifiers operand for specified input operand 1627class getSrcMod <ValueType VT, bit IsTrue16 = 0> { 1628 Operand ret = !if(!eq(VT.Size, 64), 1629 !if(VT.isFP, FP64InputMods, Int64InputMods), 1630 !if(!eq(VT.Size, 16), 1631 !if(VT.isFP, !if(IsTrue16, FPT16InputMods, FP16InputMods), 1632 !if(IsTrue16, IntT16InputMods, IntOpSelMods)), 1633 !if(VT.isFP, FP32InputMods, Int32InputMods))); 1634} 1635 1636class getOpSelMod <ValueType VT> { 1637 Operand ret = !if(!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)), 1638 FP16InputMods, IntOpSelMods); 1639} 1640 1641// Return type of input modifiers operand specified input operand for DPP 1642class getSrcModDPP <ValueType VT> { 1643 Operand ret = !if(VT.isFP, FPVRegInputMods, IntVRegInputMods); 1644} 1645 1646class getSrcModDPP_t16 <ValueType VT, bit IsFake16 = 1> { 1647 Operand ret = 1648 !if (VT.isFP, 1649 !if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)), 1650 FPT16VRegInputMods<IsFake16>, FPVRegInputMods), 1651 !if (!eq(VT.Value, i16.Value), 1652 IntT16VRegInputMods<IsFake16>, IntVRegInputMods)); 1653} 1654 1655// Return type of input modifiers operand for specified input operand for DPP 1656class getSrcModVOP3DPP <ValueType VT> { 1657 Operand ret = 1658 !if (VT.isFP, 1659 !if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)), 1660 FP16VCSrcInputMods, FP32VCSrcInputMods), 1661 Int32VCSrcInputMods); 1662} 1663 1664// Return type of input modifiers operand specified input operand for SDWA 1665class getSrcModSDWA <ValueType VT> { 1666 Operand ret = !if(!eq(VT.Value, f16.Value), FP16SDWAInputMods, 1667 !if(!eq(VT.Value, f32.Value), FP32SDWAInputMods, 1668 !if(!eq(VT.Value, i16.Value), Int16SDWAInputMods, 1669 !if(!eq(VT.Value, bf16.Value), FP16SDWAInputMods, 1670 Int32SDWAInputMods)))); 1671} 1672 1673// Returns the input arguments for VOP[12C] instructions for the given SrcVT. 1674class getIns32 <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs> { 1675 dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0), // VOP1 1676 !if(!eq(NumSrcArgs, 2), (ins Src0RC:$src0, Src1RC:$src1), // VOP2 1677 (ins))); 1678} 1679 1680// Returns the input arguments for VOP3 instructions for the given SrcVT. 1681class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC, 1682 RegisterOperand Src2RC, int NumSrcArgs, 1683 bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod, 1684 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { 1685 1686 dag ret = 1687 !if (!eq(NumSrcArgs, 0), 1688 // VOP1 without input operands (V_NOP, V_CLREXCP) 1689 (ins), 1690 /* else */ 1691 !if (!eq(NumSrcArgs, 1), 1692 !if (HasModifiers, 1693 // VOP1 with modifiers 1694 !if(HasOMod, 1695 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1696 clampmod0:$clamp, omod0:$omod), 1697 !if (HasClamp, 1698 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, clampmod0:$clamp), 1699 (ins Src0Mod:$src0_modifiers, Src0RC:$src0))) 1700 /* else */, 1701 // VOP1 without modifiers 1702 !if (HasClamp, 1703 (ins Src0RC:$src0, clampmod0:$clamp), 1704 (ins Src0RC:$src0)) 1705 /* endif */ ), 1706 !if (!eq(NumSrcArgs, 2), 1707 !if (HasModifiers, 1708 // VOP 2 with modifiers 1709 !if(HasOMod, 1710 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1711 Src1Mod:$src1_modifiers, Src1RC:$src1, 1712 clampmod0:$clamp, omod0:$omod), 1713 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1714 Src1Mod:$src1_modifiers, Src1RC:$src1, 1715 clampmod0:$clamp)) 1716 /* else */, 1717 // VOP2 without modifiers 1718 !if (HasClamp, 1719 (ins Src0RC:$src0, Src1RC:$src1, clampmod0:$clamp), 1720 (ins Src0RC:$src0, Src1RC:$src1)) 1721 1722 /* endif */ ) 1723 /* NumSrcArgs == 3 */, 1724 !if (HasModifiers, 1725 !if (HasSrc2Mods, 1726 // VOP3 with modifiers 1727 !if (HasOMod, 1728 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1729 Src1Mod:$src1_modifiers, Src1RC:$src1, 1730 Src2Mod:$src2_modifiers, Src2RC:$src2, 1731 clampmod0:$clamp, omod0:$omod), 1732 !if (HasClamp, 1733 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1734 Src1Mod:$src1_modifiers, Src1RC:$src1, 1735 Src2Mod:$src2_modifiers, Src2RC:$src2, 1736 clampmod0:$clamp), 1737 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1738 Src1Mod:$src1_modifiers, Src1RC:$src1, 1739 Src2Mod:$src2_modifiers, Src2RC:$src2))), 1740 // VOP3 with modifiers except src2 1741 !if (HasOMod, 1742 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1743 Src1Mod:$src1_modifiers, Src1RC:$src1, 1744 Src2RC:$src2, clampmod0:$clamp, omod0:$omod), 1745 !if (HasClamp, 1746 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1747 Src1Mod:$src1_modifiers, Src1RC:$src1, 1748 Src2RC:$src2, clampmod0:$clamp), 1749 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1750 Src1Mod:$src1_modifiers, Src1RC:$src1, 1751 Src2RC:$src2)))) 1752 /* else */, 1753 // VOP3 without modifiers 1754 !if (HasClamp, 1755 (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2, clampmod0:$clamp), 1756 (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2)) 1757 /* endif */ )))); 1758} 1759 1760class getInsVOP3Base<RegisterOperand Src0RC, RegisterOperand Src1RC, 1761 RegisterOperand Src2RC, int NumSrcArgs, 1762 bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod, 1763 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel> { 1764 // getInst64 handles clamp and omod. implicit mutex between vop3p and omod 1765 dag base = getIns64 <Src0RC, Src1RC, Src2RC, NumSrcArgs, 1766 HasClamp, HasModifiers, HasSrc2Mods, HasOMod, 1767 Src0Mod, Src1Mod, Src2Mod>.ret; 1768 dag opsel = (ins op_sel0:$op_sel); 1769 dag ret = !con(base, !if(HasOpSel, opsel, (ins))); 1770} 1771 1772class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC, 1773 RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, bit HasOpSel, 1774 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { 1775 dag base = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs, 1776 HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, 1777 0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod, HasOpSel>.ret; 1778 1779 dag vop3pOpsel = (ins op_sel_hi0:$op_sel_hi); 1780 dag vop3p_neg = (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi); 1781 1782 dag vop3pFields = !con(!if(HasOpSel, vop3pOpsel, (ins)), vop3p_neg); 1783 dag ret = !con(base, vop3pFields); 1784} 1785 1786class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC, 1787 RegisterOperand Src2RC, int NumSrcArgs, 1788 bit HasClamp, bit HasOMod, 1789 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { 1790 dag ret = getInsVOP3Base<Src0RC, Src1RC, 1791 Src2RC, NumSrcArgs, 1792 HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, HasOMod, 1793 Src0Mod, Src1Mod, Src2Mod, /*HasOpSel=*/1>.ret; 1794} 1795 1796class getInsDPPBase <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC, 1797 RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers, 1798 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld> { 1799 dag ret = !if(!eq(NumSrcArgs, 0), 1800 // VOP1 without input operands (V_NOP) 1801 (ins ), 1802 !con( 1803 !if(HasOld ,(ins OldRC:$old), (ins)), 1804 !if (!eq(NumSrcArgs, 1), 1805 !if (HasModifiers, 1806 // VOP1_DPP with modifiers 1807 (ins Src0Mod:$src0_modifiers, Src0RC:$src0) 1808 /* else */, 1809 // VOP1_DPP without modifiers 1810 (ins Src0RC:$src0) 1811 /* endif */), 1812 !if (!eq(NumSrcArgs, 2), 1813 !if (HasModifiers, 1814 // VOP2_DPP with modifiers 1815 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1816 Src1Mod:$src1_modifiers, Src1RC:$src1) 1817 /* else */, 1818 // VOP2_DPP without modifiers 1819 (ins Src0RC:$src0, Src1RC:$src1) 1820 ) 1821 /* NumSrcArgs == 3, VOP3 */, 1822 !if (HasModifiers, 1823 // VOP3_DPP with modifiers 1824 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1825 Src1Mod:$src1_modifiers, Src1RC:$src1, 1826 Src2Mod:$src2_modifiers, Src2RC:$src2) 1827 /* else */, 1828 // VOP3_DPP without modifiers 1829 (ins Src0RC:$src0, Src1RC:$src1, 1830 Src2RC:$src2) 1831 ) 1832 ) 1833 ) 1834 ) 1835 ); 1836} 1837 1838class getInsDPP <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC, 1839 RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers, 1840 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> { 1841 dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs, 1842 HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret, 1843 (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 1844 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)); 1845} 1846 1847class getInsDPP16 <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC, 1848 RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers, 1849 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> { 1850 dag ret = !con(getInsDPP<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs, 1851 HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret, 1852 (ins FI:$fi)); 1853} 1854 1855class getInsDPP8 <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC, 1856 RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers, 1857 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> { 1858 dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs, 1859 HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret, 1860 (ins dpp8:$dpp8, FI:$fi)); 1861} 1862 1863class getInsVOP3DPPBase<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld> { 1864 dag old = ( ins OldRC:$old ); 1865 dag base = VOP3Base; 1866 dag ret = !con( 1867 !if(!and(HasOld,!ne(NumSrcArgs, 0)), old, (ins)), 1868 base 1869 ); 1870} 1871 1872class getInsVOP3DPP<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> { 1873 dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret, 1874 (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 1875 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)); 1876} 1877 1878class getInsVOP3DPP16<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> { 1879 dag ret = !con(getInsVOP3DPP<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret, 1880 (ins FI:$fi)); 1881} 1882 1883class getInsVOP3DPP8<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> { 1884 dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret, 1885 (ins dpp8:$dpp8, FI:$fi)); 1886} 1887 1888// Ins for SDWA 1889class getInsSDWA <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs, 1890 bit HasSDWAOMod, Operand Src0Mod, Operand Src1Mod, 1891 ValueType DstVT> { 1892 1893 dag ret = !if(!eq(NumSrcArgs, 0), 1894 // VOP1 without input operands (V_NOP) 1895 (ins), 1896 !if(!eq(NumSrcArgs, 1), 1897 // VOP1 1898 !if(!not(HasSDWAOMod), 1899 // VOP1_SDWA without omod 1900 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1901 clampmod:$clamp, 1902 dst_sel:$dst_sel, dst_unused:$dst_unused, 1903 src0_sel:$src0_sel), 1904 // VOP1_SDWA with omod 1905 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1906 clampmod:$clamp, omod:$omod, 1907 dst_sel:$dst_sel, dst_unused:$dst_unused, 1908 src0_sel:$src0_sel)), 1909 !if(!eq(NumSrcArgs, 2), 1910 !if(!eq(DstVT.Size, 1), 1911 // VOPC_SDWA 1912 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1913 Src1Mod:$src1_modifiers, Src1RC:$src1, 1914 clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel), 1915 // VOP2_SDWA 1916 !if(!not(HasSDWAOMod), 1917 // VOP2_SDWA without omod 1918 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1919 Src1Mod:$src1_modifiers, Src1RC:$src1, 1920 clampmod:$clamp, 1921 dst_sel:$dst_sel, dst_unused:$dst_unused, 1922 src0_sel:$src0_sel, src1_sel:$src1_sel), 1923 // VOP2_SDWA with omod 1924 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1925 Src1Mod:$src1_modifiers, Src1RC:$src1, 1926 clampmod:$clamp, omod:$omod, 1927 dst_sel:$dst_sel, dst_unused:$dst_unused, 1928 src0_sel:$src0_sel, src1_sel:$src1_sel))), 1929 (ins)/* endif */))); 1930} 1931 1932// Outs for DPP 1933class getOutsDPP <bit HasDst, ValueType DstVT, RegisterOperand DstRCDPP> { 1934 dag ret = !if(HasDst, 1935 !if(!eq(DstVT.Size, 1), 1936 (outs), // no dst for VOPC, we use "vcc"-token as dst in SDWA VOPC instructions 1937 (outs DstRCDPP:$vdst)), 1938 (outs)); // V_NOP 1939} 1940 1941// Outs for SDWA 1942class getOutsSDWA <bit HasDst, ValueType DstVT, RegisterOperand DstRCSDWA> { 1943 dag ret = !if(HasDst, 1944 !if(!eq(DstVT.Size, 1), 1945 (outs DstRCSDWA:$sdst), 1946 (outs DstRCSDWA:$vdst)), 1947 (outs)); // V_NOP 1948} 1949 1950// Returns the assembly string for the inputs and outputs of a VOP[12C] 1951// instruction. 1952class getAsm32 <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> { 1953 string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC 1954 string src0 = ", $src0"; 1955 string src1 = ", $src1"; 1956 string src2 = ", $src2"; 1957 string ret = !if(HasDst, dst, "") # 1958 !if(!eq(NumSrcArgs, 1), src0, "") # 1959 !if(!eq(NumSrcArgs, 2), src0#src1, "") # 1960 !if(!eq(NumSrcArgs, 3), src0#src1#src2, ""); 1961} 1962 1963class getAsmVOPDPart <int NumSrcArgs, string XorY> { 1964 string dst = "$vdst" # XorY; 1965 string src0 = ", $src0" # XorY; 1966 string src1 = ", $vsrc1" # XorY; 1967 string ret = dst # 1968 !if(!ge(NumSrcArgs, 1), src0, "") # 1969 !if(!ge(NumSrcArgs, 2), src1, ""); 1970} 1971 1972// Returns the assembly string for the inputs and outputs of a VOP3P 1973// instruction. 1974class getAsmVOP3P <int NumSrcArgs, bit HasModifiers, 1975 bit HasClamp, bit HasOpSel> { 1976 string dst = "$vdst"; 1977 string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); 1978 string src1 = !if(!eq(NumSrcArgs, 1), "", 1979 !if(!eq(NumSrcArgs, 2), " $src1", 1980 " $src1,")); 1981 string src2 = !if(!eq(NumSrcArgs, 3), " $src2", ""); 1982 1983 string mods = !if(HasModifiers, "$neg_lo$neg_hi", ""); 1984 string clamp = !if(HasClamp, "$clamp", ""); 1985 string opsel = !if(HasOpSel, "$op_sel$op_sel_hi", ""); 1986 1987 // Each modifier is printed as an array of bits for each operand, so 1988 // all operands are printed as part of src0_modifiers. 1989 string ret = dst#", "#src0#src1#src2#opsel#mods#clamp; 1990} 1991 1992class getAsmVOP3OpSel <int NumSrcArgs, 1993 bit HasClamp, 1994 bit HasOMod, 1995 bit Src0HasMods, 1996 bit Src1HasMods, 1997 bit Src2HasMods> { 1998 string dst = "$vdst"; 1999 2000 string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); 2001 string isrc1 = !if(!eq(NumSrcArgs, 1), "", 2002 !if(!eq(NumSrcArgs, 2), " $src1", 2003 " $src1,")); 2004 string isrc2 = !if(!eq(NumSrcArgs, 3), " $src2", ""); 2005 2006 string fsrc0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); 2007 string fsrc1 = !if(!eq(NumSrcArgs, 1), "", 2008 !if(!eq(NumSrcArgs, 2), " $src1_modifiers", 2009 " $src1_modifiers,")); 2010 string fsrc2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", ""); 2011 2012 string src0 = !if(Src0HasMods, fsrc0, isrc0); 2013 string src1 = !if(Src1HasMods, fsrc1, isrc1); 2014 string src2 = !if(Src2HasMods, fsrc2, isrc2); 2015 2016 string clamp = !if(HasClamp, "$clamp", ""); 2017 string omod = !if(HasOMod, "$omod", ""); 2018 string ret = dst#", "#src0#src1#src2#"$op_sel"#clamp#omod; 2019} 2020 2021class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> { 2022 string dst = !if(HasDst, 2023 !if(!eq(DstVT.Size, 1), 2024 "$sdst", 2025 "$vdst"), 2026 ""); // use $sdst for VOPC 2027 string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); 2028 string src1 = !if(!eq(NumSrcArgs, 1), "", 2029 !if(!eq(NumSrcArgs, 2), " $src1_modifiers", 2030 " $src1_modifiers,")); 2031 string args = !if(!not(HasModifiers), 2032 getAsm32<0, NumSrcArgs, DstVT>.ret, 2033 ", "#src0#src1); 2034 string ret = dst#args#" $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 2035} 2036 2037class getAsmDPP16 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> { 2038 string ret = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret#"$fi"; 2039} 2040 2041class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> 2042 : getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>{ 2043 let ret = dst#args#" $dpp8$fi"; 2044} 2045 2046class getAsmVOP3Base <int NumSrcArgs, bit HasDst, bit HasClamp, 2047 bit HasOpSel, bit HasOMod, bit IsVOP3P, 2048 bit HasModifiers, bit Src0HasMods, 2049 bit Src1HasMods, bit Src2HasMods, ValueType DstVT = i32> { 2050 string dst = !if(HasDst, 2051 !if(!eq(DstVT.Size, 1), 2052 "$sdst", 2053 "$vdst"), 2054 ""); // use $sdst for VOPC 2055 string src0nomods = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); 2056 string src1nomods = !if(!eq(NumSrcArgs, 1), "", 2057 !if(!eq(NumSrcArgs, 2), " $src1", 2058 " $src1,")); 2059 string src2nomods = !if(!eq(NumSrcArgs, 3), " $src2", ""); 2060 2061 string src0mods = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); 2062 string src1mods = !if(!eq(NumSrcArgs, 1), "", 2063 !if(!eq(NumSrcArgs, 2), " $src1_modifiers", 2064 " $src1_modifiers,")); 2065 string src2mods = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", ""); 2066 2067 string src0 = !if(Src0HasMods, src0mods, src0nomods); 2068 string src1 = !if(Src1HasMods, src1mods, src1nomods); 2069 string src2 = !if(Src2HasMods, src2mods, src2nomods); 2070 string opsel = !if(HasOpSel, "$op_sel", ""); 2071 string 3PMods = !if(IsVOP3P, 2072 !if(HasOpSel, "$op_sel_hi", "") 2073 #!if(HasModifiers, "$neg_lo$neg_hi", ""), 2074 ""); 2075 string clamp = !if(HasClamp, "$clamp", ""); 2076 string omod = !if(HasOMod, "$omod", ""); 2077 2078 string ret = dst#!if(!gt(NumSrcArgs,0),", "#src0#src1#src2#opsel#3PMods#clamp#omod, ""); 2079 2080} 2081 2082class getAsmVOP3DPP<string base> { 2083 string ret = base # " $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 2084} 2085 2086class getAsmVOP3DPP16<string base> { 2087 string ret = getAsmVOP3DPP<base>.ret # "$fi"; 2088} 2089 2090class getAsmVOP3DPP8<string base> { 2091 string ret = base # " $dpp8$fi"; 2092} 2093 2094 2095class getAsmSDWA <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> { 2096 string dst = !if(HasDst, 2097 !if(!eq(DstVT.Size, 1), 2098 " vcc", // use vcc token as dst for VOPC instructions 2099 "$vdst"), 2100 ""); 2101 string src0 = "$src0_modifiers"; 2102 string src1 = "$src1_modifiers"; 2103 string args = !if(!eq(NumSrcArgs, 0), 2104 "", 2105 !if(!eq(NumSrcArgs, 1), 2106 ", "#src0#"$clamp", 2107 ", "#src0#", "#src1#"$clamp" 2108 ) 2109 ); 2110 string sdwa = !if(!eq(NumSrcArgs, 0), 2111 "", 2112 !if(!eq(NumSrcArgs, 1), 2113 " $dst_sel $dst_unused $src0_sel", 2114 !if(!eq(DstVT.Size, 1), 2115 " $src0_sel $src1_sel", // No dst_sel and dst_unused for VOPC 2116 " $dst_sel $dst_unused $src0_sel $src1_sel" 2117 ) 2118 ) 2119 ); 2120 string ret = dst#args#sdwa; 2121} 2122 2123class getAsmSDWA9 <bit HasDst, bit HasOMod, int NumSrcArgs, 2124 ValueType DstVT = i32> { 2125 string dst = !if(HasDst, 2126 !if(!eq(DstVT.Size, 1), 2127 "$sdst", // VOPC 2128 "$vdst"), // VOP1/2 2129 ""); 2130 string src0 = "$src0_modifiers"; 2131 string src1 = "$src1_modifiers"; 2132 string out_mods = !if(!not(HasOMod), "$clamp", "$clamp$omod"); 2133 string args = !if(!eq(NumSrcArgs, 0), "", 2134 !if(!eq(NumSrcArgs, 1), 2135 ", "#src0, 2136 ", "#src0#", "#src1 2137 ) 2138 ); 2139 string sdwa = !if(!eq(NumSrcArgs, 0), "", 2140 !if(!eq(NumSrcArgs, 1), 2141 out_mods#" $dst_sel $dst_unused $src0_sel", 2142 !if(!eq(DstVT.Size, 1), 2143 " $src0_sel $src1_sel", // No dst_sel, dst_unused and output modifiers for VOPC 2144 out_mods#" $dst_sel $dst_unused $src0_sel $src1_sel" 2145 ) 2146 ) 2147 ); 2148 string ret = dst#args#sdwa; 2149} 2150 2151class getHas64BitOps <int NumSrcArgs, ValueType DstVT, ValueType Src0VT, 2152 ValueType Src1VT> { 2153 bit ret = !if(!eq(NumSrcArgs, 3), 2154 0, 2155 !if(!eq(DstVT.Size, 64), 2156 1, 2157 !if(!eq(Src0VT.Size, 64), 2158 1, 2159 !if(!eq(Src1VT.Size, 64), 2160 1, 2161 0 2162 ) 2163 ) 2164 ) 2165 ); 2166} 2167 2168class getHasSDWA <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2169 ValueType Src1VT = i32> { 2170 bit ret = !if(!eq(NumSrcArgs, 3), 2171 0, // NumSrcArgs == 3 - No SDWA for VOP3 2172 !if(!eq(DstVT.Size, 64), 2173 0, // 64-bit dst - No SDWA for 64-bit operands 2174 !if(!eq(Src0VT.Size, 64), 2175 0, // 64-bit src0 2176 !if(!eq(Src1VT.Size, 64), 2177 0, // 64-bit src2 2178 1 2179 ) 2180 ) 2181 ) 2182 ); 2183} 2184 2185class getHasDPP <int NumSrcArgs> { 2186 bit ret = !if(!eq(NumSrcArgs, 3), 2187 0, // NumSrcArgs == 3 - No DPP for VOP3 2188 1); 2189} 2190 2191class getHasExt32BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2192 ValueType Src1VT = i32> { 2193 bit ret = !and(getHasDPP<NumSrcArgs>.ret, 2194 !not(getHas64BitOps<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret)); 2195} 2196 2197class getHasExt64BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2198 ValueType Src1VT = i32> { 2199 bit ret = !and(getHasDPP<NumSrcArgs>.ret, 2200 getHas64BitOps<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret); 2201} 2202 2203// Function that checks if instruction supports DPP and SDWA 2204class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2205 ValueType Src1VT = i32> { 2206 bit ret = !or(getHasDPP<NumSrcArgs>.ret, 2207 getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret); 2208} 2209 2210// Return an AGPR+VGPR operand class for the given VGPR register class. 2211class getLdStRegisterOperand<RegisterClass RC> { 2212 RegisterOperand ret = 2213 !if(!eq(RC.Size, 32), AVLdSt_32, 2214 !if(!eq(RC.Size, 64), AVLdSt_64, 2215 !if(!eq(RC.Size, 96), AVLdSt_96, 2216 !if(!eq(RC.Size, 128), AVLdSt_128, 2217 !if(!eq(RC.Size, 160), AVLdSt_160, 2218 RegisterOperand<VReg_1> // invalid register 2219 ))))); 2220} 2221 2222class getHasVOP3DPP <ValueType DstVT = i32, ValueType Src0VT = i32, 2223 ValueType Src1VT = i32, ValueType Src2VT = i32> { 2224 bit ret = !if(!eq(DstVT.Size, 64), 2225 0, // 64-bit dst No DPP for 64-bit operands 2226 !if(!eq(Src0VT.Size, 64), 2227 0, // 64-bit src0 2228 !if(!eq(Src1VT.Size, 64), 2229 0, // 64-bit src1 2230 !if(!eq(Src2VT.Size, 64), 2231 0, // 64-bit src2 2232 1 2233 ) 2234 ) 2235 ) 2236 ); 2237} 2238 2239 2240def PatGenMode { 2241 int NoPattern = 0; 2242 int Pattern = 1; 2243} 2244 2245class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> { 2246 2247 field list<ValueType> ArgVT = _ArgVT; 2248 field bit EnableClamp = _EnableClamp; 2249 field bit IsTrue16 = 0; 2250 field bit IsRealTrue16 = 0; 2251 2252 field ValueType DstVT = ArgVT[0]; 2253 field ValueType Src0VT = ArgVT[1]; 2254 field ValueType Src1VT = ArgVT[2]; 2255 field ValueType Src2VT = ArgVT[3]; 2256 field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret; 2257 field RegisterOperand DstRCDPP = DstRC; 2258 field RegisterOperand DstRC64 = DstRC; 2259 field RegisterOperand DstRCVOP3DPP = DstRC64; 2260 field RegisterOperand DstRCSDWA = getSDWADstForVT<DstVT>.ret; 2261 field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT, IsTrue16>.ret; 2262 field RegisterOperand Src1RC32 = getVregSrcForVT<Src1VT>.ret; 2263 field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret; 2264 field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret; 2265 field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret; 2266 field RegisterOperand Src0DPP = getVregSrcForVT<Src0VT>.ret; 2267 field RegisterOperand Src1DPP = getVregSrcForVT<Src1VT>.ret; 2268 field RegisterOperand Src2DPP = getVregSrcForVT<Src2VT>.ret; 2269 field RegisterOperand Src0VOP3DPP = VGPRSrc_32; 2270 field RegisterOperand Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT>.ret; 2271 field RegisterOperand Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT>.ret; 2272 field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret; 2273 field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret; 2274 field Operand Src0Mod = getSrcMod<Src0VT>.ret; 2275 field Operand Src1Mod = getSrcMod<Src1VT>.ret; 2276 field Operand Src2Mod = getSrcMod<Src2VT>.ret; 2277 field Operand Src0ModDPP = getSrcModDPP<Src0VT>.ret; 2278 field Operand Src1ModDPP = getSrcModDPP<Src1VT>.ret; 2279 field Operand Src2ModDPP = getSrcModDPP<Src2VT>.ret; 2280 field Operand Src0ModVOP3DPP = getSrcModDPP<Src0VT>.ret; 2281 field Operand Src1ModVOP3DPP = getSrcModDPP<Src1VT>.ret; 2282 field Operand Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT>.ret; 2283 field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret; 2284 field Operand Src1ModSDWA = getSrcModSDWA<Src1VT>.ret; 2285 2286 2287 field bit IsMAI = 0; 2288 field bit IsVOP3P = 0; 2289 field bit IsDOT = 0; 2290 field bit IsSingle = 0; 2291 field bit IsWMMA = 0; 2292 field bit IsSWMMAC = 0; 2293 2294 field bit IsFP8 = 0; 2295 2296 field bit HasDst = !ne(DstVT.Value, untyped.Value); 2297 field bit HasDst32 = HasDst; 2298 field bit EmitDst = HasDst; // force dst encoding, see v_movreld_b32 special case 2299 field bit EmitDstSel = EmitDst; 2300 field int NumSrcArgs = getNumSrcArgs<Src0VT, Src1VT, Src2VT>.ret; 2301 field bit HasSrc0 = !ne(Src0VT.Value, untyped.Value); 2302 field bit HasSrc1 = !ne(Src1VT.Value, untyped.Value); 2303 field bit HasSrc2 = !ne(Src2VT.Value, untyped.Value); 2304 2305 field bit HasSrc0FloatMods = Src0VT.isFP; 2306 field bit HasSrc1FloatMods = Src1VT.isFP; 2307 field bit HasSrc2FloatMods = Src2VT.isFP; 2308 2309 field bit HasSrc0IntMods = isIntType<Src0VT>.ret; 2310 field bit HasSrc1IntMods = isIntType<Src1VT>.ret; 2311 field bit HasSrc2IntMods = isIntType<Src2VT>.ret; 2312 2313 field bit HasClamp = !or(isModifierType<Src0VT>.ret, EnableClamp); 2314 field bit HasSDWAClamp = EmitDst; 2315 field bit HasFPClamp = !and(DstVT.isFP, HasClamp); 2316 field bit HasIntClamp = !if(DstVT.isFP, 0, HasClamp); 2317 field bit HasClampLo = HasClamp; 2318 field bit HasClampHi = !and(DstVT.isVector, HasClamp); 2319 field bit HasHigh = 0; 2320 2321 field bit IsPacked = Src0VT.isVector; 2322 field bit HasOpSel = IsPacked; 2323 field bit HasOMod = !if(IsVOP3P, 0, DstVT.isFP); 2324 field bit HasSDWAOMod = DstVT.isFP; 2325 2326 field bit HasModifiers = !or(isModifierType<Src0VT>.ret, 2327 isModifierType<Src1VT>.ret, 2328 isModifierType<Src2VT>.ret, 2329 HasOMod); 2330 2331 field bit HasSrc0Mods = HasModifiers; 2332 field bit HasSrc1Mods = !if(HasModifiers, !or(HasSrc1FloatMods, HasSrc1IntMods), 0); 2333 field bit HasSrc2Mods = !if(HasModifiers, !or(HasSrc2FloatMods, HasSrc2IntMods), 0); 2334 2335 field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2336 field bit HasExtVOP3DPP = getHasVOP3DPP<DstVT, Src0VT, Src1VT, Src2VT>.ret; 2337 field bit HasExtDPP = !or(getHasDPP<NumSrcArgs>.ret, HasExtVOP3DPP); 2338 field bit HasExt32BitDPP = getHasExt32BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2339 field bit HasExt64BitDPP = getHasExt64BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2340 field bit HasExtSDWA = getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2341 field bit HasExtSDWA9 = HasExtSDWA; 2342 field int NeedPatGen = PatGenMode.NoPattern; 2343 2344 field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods); 2345 field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods); 2346 field Operand Src2PackedMod = !if(HasSrc2FloatMods, PackedF16InputMods, PackedI16InputMods); 2347 2348 field dag Outs = !if(HasDst,(outs DstRC:$vdst),(outs)); 2349 2350 // VOP3b instructions are a special case with a second explicit 2351 // output. This is manually overridden for them. 2352 field dag Outs32 = Outs; 2353 field dag Outs64 = !if(HasDst,(outs DstRC64:$vdst),(outs)); 2354 field dag OutsDPP = getOutsDPP<HasDst, DstVT, DstRCDPP>.ret; 2355 field dag OutsDPP8 = OutsDPP; 2356 field dag OutsVOP3DPP = getOutsDPP<HasDst, DstVT, DstRCVOP3DPP>.ret; 2357 field dag OutsVOP3DPP8 = OutsVOP3DPP; 2358 field dag OutsSDWA = getOutsSDWA<HasDst, DstVT, DstRCSDWA>.ret; 2359 2360 field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret; 2361 field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs, 2362 HasIntClamp, HasModifiers, HasSrc2Mods, 2363 HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret; 2364 field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64, 2365 NumSrcArgs, HasClamp, HasOpSel, 2366 Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret; 2367 field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64, 2368 NumSrcArgs, HasClamp, HasOMod, 2369 getOpSelMod<Src0VT>.ret, 2370 getOpSelMod<Src1VT>.ret, 2371 getOpSelMod<Src2VT>.ret>.ret; 2372 field dag InsDPP = !if(HasExtDPP, 2373 getInsDPP<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs, 2374 HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret, 2375 (ins)); 2376 field dag InsDPP16 = getInsDPP16<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs, 2377 HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret; 2378 field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, 2379 NumSrcArgs, HasModifiers, 2380 Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret; 2381 defvar InsVOP3DPPBase = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, 2382 Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod, 2383 Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel>.ret; 2384 defvar InsVOP3PDPPBase = getInsVOP3P<Src0VOP3DPP, Src1VOP3DPP, 2385 Src2VOP3DPP, NumSrcArgs, HasClamp, HasOpSel, 2386 Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP>.ret; 2387 2388 field dag InsVOP3Base = !if(IsVOP3P, InsVOP3PDPPBase, InsVOP3DPPBase); 2389 2390 field dag InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret; 2391 field dag InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret; 2392 field dag InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret; 2393 field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs, 2394 HasSDWAOMod, Src0ModSDWA, Src1ModSDWA, 2395 DstVT>.ret; 2396 field dag InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X); 2397 // It is a slight misnomer to use the deferred f32 operand type for non-float 2398 // operands, but this operand type will only be used if the other dual 2399 // component is FMAAK or FMAMK 2400 field dag InsVOPDXDeferred = (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X, VGPR_32:$vsrc1X); 2401 field dag InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y); 2402 field dag InsVOPDYDeferred = (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y, VGPR_32:$vsrc1Y); 2403 2404 2405 field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret; 2406 field string AsmDPP = !if(HasExtDPP, 2407 getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret, ""); 2408 field string AsmDPP16 = getAsmDPP16<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret; 2409 // DPP8 encoding has no fields for modifiers, and it is enforced by setting 2410 // the asm operand name via this HasModifiers flag 2411 field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0 /*HasModifiers*/, DstVT>.ret; 2412 field string AsmVOP3Base = getAsmVOP3Base<NumSrcArgs, HasDst, HasClamp, 2413 HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasModifiers, HasModifiers, 2414 HasModifiers, DstVT>.ret; 2415 field string Asm64 = AsmVOP3Base; 2416 field string AsmVOP3P = getAsmVOP3P<NumSrcArgs, HasModifiers, HasClamp, HasOpSel>.ret; 2417 field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs, 2418 HasClamp, 2419 HasOMod, 2420 HasSrc0FloatMods, 2421 HasSrc1FloatMods, 2422 HasSrc2FloatMods>.ret; 2423 field string AsmVOP3DPP = getAsmVOP3DPP<AsmVOP3Base>.ret; 2424 field string AsmVOP3DPP16 = getAsmVOP3DPP16<AsmVOP3Base>.ret; 2425 field string AsmVOP3DPP8 = getAsmVOP3DPP8<AsmVOP3Base>.ret; 2426 field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret; 2427 field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret; 2428 field string AsmVOPDX = getAsmVOPDPart<NumSrcArgs, "X">.ret; 2429 field string AsmVOPDY = getAsmVOPDPart<NumSrcArgs, "Y">.ret; 2430 field string TieRegDPP = "$old"; 2431} 2432 2433 class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> { 2434 let HasExt = 0; 2435 let HasExtDPP = 0; 2436 let HasExtVOP3DPP = 0; 2437 let HasExt32BitDPP = 0; 2438 let HasExt64BitDPP = 0; 2439 let HasExtSDWA = 0; 2440 let HasExtSDWA9 = 0; 2441} 2442 2443class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.NoPattern> : VOPProfile <p.ArgVT> { 2444 let NeedPatGen = mode; 2445} 2446 2447// VOPC_Profile_t16, VOPC_NoSdst_Profile_t16, VOPC_Class_Profile_t16, 2448// VOPC_Class_NoSdst_Profile_t16, and VOP_MAC_F16_t16 do not inherit from this 2449// class, so copy changes to this class in those profiles 2450class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> { 2451 let IsTrue16 = 1; 2452 let IsRealTrue16 = 1; 2453 // Most DstVT are 16-bit, but not all. 2454 let DstRC = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 0 /*IsVOP3Encoding*/>.ret; 2455 let DstRC64 = getVALUDstForVT<DstVT>.ret; 2456 let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret; 2457 let Src1RC32 = getVregSrcForVT<Src1VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret; 2458 let Src0DPP = getVregSrcForVT<Src0VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret; 2459 let Src1DPP = getVregSrcForVT<Src1VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret; 2460 let Src2DPP = getVregSrcForVT<Src2VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret; 2461 let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0 /*IsFake16*/>.ret; 2462 let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0 /*IsFake16*/>.ret; 2463 let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0 /*IsFake16*/>.ret; 2464 2465 let DstRC64 = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 1 /*IsVOP3Encoding*/>.ret; 2466 let Src0RC64 = getVOP3SrcForVT<Src0VT, 1 /*IsTrue16*/>.ret; 2467 let Src1RC64 = getVOP3SrcForVT<Src1VT, 1 /*IsTrue16*/>.ret; 2468 let Src2RC64 = getVOP3SrcForVT<Src2VT, 1 /*IsTrue16*/>.ret; 2469 let Src0Mod = getSrcMod<Src0VT, 1 /*IsTrue16*/>.ret; 2470 let Src1Mod = getSrcMod<Src1VT, 1 /*IsTrue16*/>.ret; 2471 let Src2Mod = getSrcMod<Src2VT, 1 /*IsTrue16*/>.ret; 2472} 2473 2474class VOPProfile_Fake16<VOPProfile P> : VOPProfile<P.ArgVT> { 2475 let IsTrue16 = 1; 2476 // Most DstVT are 16-bit, but not all 2477 let DstRC = getVALUDstForVT_fake16<DstVT>.ret; 2478 let DstRC64 = getVALUDstForVT<DstVT>.ret; 2479 let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 2480 let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 2481 let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 2482 let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 2483 let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret; 2484 let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret; 2485 let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret; 2486} 2487 2488def VOP_F16_F16 : VOPProfile<[f16, f16, untyped, untyped]>; 2489def VOP_F16_I16 : VOPProfile <[f16, i16, untyped, untyped]>; 2490def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>; 2491def VOP_I16_I16 : VOPProfile <[i16, i16, untyped, untyped]>; 2492 2493def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>; 2494def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>; 2495def VOP_F16_F16_I32 : VOPProfile <[f16, f16, i32, untyped]>; 2496def VOP_I16_I16_I16 : VOPProfile <[i16, i16, i16, untyped]>; 2497def VOP_I16_I16_I16_ARITH : VOPProfile <[i16, i16, i16, untyped], /*EnableClamp=*/1>; 2498 2499def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>; 2500def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>; 2501 2502def VOP_I32_I16_I16_I32 : VOPProfile <[i32, i16, i16, i32, untyped]>; 2503def VOP_I32_I16 : VOPProfile <[i32, i16, untyped, untyped]>; 2504def VOP_I16_I32 : VOPProfile <[i16, i32, untyped, untyped]>; 2505 2506def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>; 2507def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>; 2508def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>; 2509 2510def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>; 2511def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>; 2512def VOP_V2I16_F32_F32 : VOPProfile <[v2i16, f32, f32, untyped]>; 2513def VOP_V2I16_I32_I32 : VOPProfile <[v2i16, i32, i32, untyped]>; 2514 2515def VOP_F16_V2F16_V2F16_F16 : VOPProfile <[f16, v2f16, v2f16, f16]>; 2516def VOP_I16_V2I16_V2I16_I16 : VOPProfile <[i16, v2i16, v2i16, i16]>; 2517def VOP_F32_V2I16_V2I16_F32 : VOPProfile <[f32, v2i16, v2i16, f32]>; 2518 2519def VOP_F32_V2F16_V2F16_V2F16 : VOPProfile <[f32, v2f16, v2f16, v2f16]>; 2520 2521def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>; 2522 2523def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>; 2524def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>; 2525def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>; 2526def VOP_F64_F32 : VOPProfile <[f64, f32, untyped, untyped]>; 2527def VOP_F64_F64 : VOPProfile <[f64, f64, untyped, untyped]>; 2528def VOP_F64_I32 : VOPProfile <[f64, i32, untyped, untyped]>; 2529def VOP_I32_F32 : VOPProfile <[i32, f32, untyped, untyped]>; 2530def VOP_I32_F64 : VOPProfile <[i32, f64, untyped, untyped]>; 2531def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>; 2532def VOP_F16_F32 : VOPProfile <[f16, f32, untyped, untyped]>; 2533def VOP_F32_F16 : VOPProfile <[f32, f16, untyped, untyped]>; 2534def VOP_I64_I64 : VOPProfile <[i64, i64, untyped, untyped]>; 2535 2536def VOP_F32_F32_F16 : VOPProfile <[f32, f32, f16, untyped]>; 2537def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>; 2538def VOP_F32_F32_I32 : VOPProfile <[f32, f32, i32, untyped]>; 2539def VOP_F64_F64_F64 : VOPProfile <[f64, f64, f64, untyped]>; 2540def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>; 2541def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>; 2542def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>; 2543def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>; 2544def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], /*EnableClamp=*/1>; 2545def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>; 2546def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>; 2547 2548def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>; 2549def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>; 2550def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>; 2551 2552def VOP_F16_F32_F16_F32 : VOPProfile <[f16, f32, f16, f32]>; 2553def VOP_F32_F32_F16_F16 : VOPProfile <[f32, f32, f16, f16]>; 2554def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>; 2555def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>; 2556def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>; 2557def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>; 2558def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>; 2559def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>; 2560def VOP_V4I32_I64_I32_V4I32 : VOPProfile <[v4i32, i64, i32, v4i32]>; 2561 2562def VOP_F32_V2F16_V2F16_F32 : VOPProfile <[f32, v2f16, v2f16, f32]>; 2563def VOP_I32_V2I16_V2I16_I32 : VOPProfile <[i32, v2i16, v2i16, i32]>; 2564 2565def VOP_V4F32_F32_F32_V4F32 : VOPProfile <[v4f32, f32, f32, v4f32]>; 2566def VOP_V16F32_F32_F32_V16F32 : VOPProfile <[v16f32, f32, f32, v16f32]>; 2567def VOP_V32F32_F32_F32_V32F32 : VOPProfile <[v32f32, f32, f32, v32f32]>; 2568def VOP_V4F32_V4F16_V4F16_V4F32 : VOPProfile <[v4f32, v4f16, v4f16, v4f32]>; 2569def VOP_V16F32_V4F16_V4F16_V16F32 : VOPProfile <[v16f32, v4f16, v4f16, v16f32]>; 2570def VOP_V32F32_V4F16_V4F16_V32F32 : VOPProfile <[v32f32, v4f16, v4f16, v32f32]>; 2571def VOP_V4F32_V2I16_V2I16_V4F32 : VOPProfile <[v4f32, v2i16, v2i16, v4f32]>; 2572def VOP_V16F32_V2I16_V2I16_V16F32 : VOPProfile <[v16f32, v2i16, v2i16, v16f32]>; 2573def VOP_V32F32_V2I16_V2I16_V32F32 : VOPProfile <[v32f32, v2i16, v2i16, v32f32]>; 2574def VOP_V4I32_I32_I32_V4I32 : VOPProfile <[v4i32, i32, i32, v4i32]>; 2575def VOP_V16I32_I32_I32_V16I32 : VOPProfile <[v16i32, i32, i32, v16i32]>; 2576def VOP_V32I32_I32_I32_V32I32 : VOPProfile <[v32i32, i32, i32, v32i32]>; 2577 2578def VOP_V4F64_F64_F64_V4F64 : VOPProfile <[v4f64, f64, f64, v4f64]>; 2579def VOP_V1F64_F64_F64_V1F64 : VOPProfile <[v1f64, f64, f64, v1f64]>; 2580 2581def VOP_V2F32_V2F32_V2F32_V2F32 : VOPProfile <[v2f32, v2f32, v2f32, v2f32]>; 2582def VOP_V2F32_V2F32_V2F32 : VOPProfile <[v2f32, v2f32, v2f32, untyped]>; 2583def VOP_V2I32_V2I32_V2I32 : VOPProfile <[v2i32, v2i32, v2i32, untyped]>; 2584def VOP_V4F32_V4I16_V4I16_V4F32 : VOPProfile <[v4f32, v4i16, v4i16, v4f32]>; 2585def VOP_V16F32_V4I16_V4I16_V16F32 : VOPProfile <[v16f32, v4i16, v4i16, v16f32]>; 2586def VOP_V32F32_V4I16_V4I16_V32F32 : VOPProfile <[v32f32, v4i16, v4i16, v32f32]>; 2587 2588def VOP_V4I32_I64_I64_V4I32 : VOPProfile <[v4i32, i64, i64, v4i32]>; 2589def VOP_V16I32_I64_I64_V16I32 : VOPProfile <[v16i32, i64, i64, v16i32]>; 2590def VOP_V4F32_V2F32_V2F32_V4F32 : VOPProfile <[v4f32, v2f32, v2f32, v4f32]>; 2591def VOP_V16F32_V2F32_V2F32_V16F32 : VOPProfile <[v16f32, v2f32, v2f32, v16f32]>; 2592def VOP_V4F32_I64_I64_V4F32 : VOPProfile <[v4f32, i64, i64, v4f32]>; 2593def VOP_V16F32_I64_I64_V16F32 : VOPProfile <[v16f32, i64, i64, v16f32]>; 2594 2595def VOP_V4F32_V4F16_V8F16_I32 : VOPProfile <[v4f32, v4f16, v8f16, i32]>; 2596def VOP_V16F32_V4F16_V8F16_I32 : VOPProfile <[v16f32, v4f16, v8f16, i32]>; 2597def VOP_V4F32_V4I16_V8I16_I32 : VOPProfile <[v4f32, v4i16, v8i16, i32]>; 2598def VOP_V16F32_V4I16_V8I16_I32 : VOPProfile <[v16f32, v4i16, v8i16, i32]>; 2599def VOP_V4I32_V2I32_V4I32_I32 : VOPProfile <[v4i32, v2i32, v4i32, i32]>; 2600def VOP_V16I32_V2I32_V4I32_I32 : VOPProfile <[v16i32, v2i32, v4i32, i32]>; 2601def VOP_V4F32_V2I32_V4I32_I32 : VOPProfile <[v4f32, v2i32, v4i32, i32]>; 2602def VOP_V16F32_V2I32_V4I32_I32 : VOPProfile <[v16f32, v2i32, v4i32, i32]>; 2603 2604class Commutable_REV <string revOp, bit isOrig> { 2605 string RevOp = revOp; 2606 bit IsOrig = isOrig; 2607} 2608 2609class AtomicNoRet <string noRetOp, bit isRet> { 2610 string NoRetOp = noRetOp; 2611 bit IsRet = isRet; 2612} 2613 2614//===----------------------------------------------------------------------===// 2615// Interpolation opcodes 2616//===----------------------------------------------------------------------===// 2617 2618class VINTRPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVINTRPDst">; 2619 2620class VINTRP_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> : 2621 VINTRPCommon <outs, ins, "", pattern>, 2622 SIMCInstr<opName, SIEncodingFamily.NONE> { 2623 let isPseudo = 1; 2624 let isCodeGenOnly = 1; 2625} 2626 2627// FIXME-GFX10: WIP. 2628class VINTRP_Real_si <bits <2> op, string opName, dag outs, dag ins, 2629 string asm, int encodingFamily> : 2630 VINTRPCommon <outs, ins, asm, []>, 2631 VINTRPe <op>, 2632 SIMCInstr<opName, encodingFamily> { 2633} 2634 2635class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins, 2636 string asm> : 2637 VINTRPCommon <outs, ins, asm, []>, 2638 VINTRPe_vi <op>, 2639 SIMCInstr<opName, SIEncodingFamily.VI> { 2640 let AssemblerPredicate = VIAssemblerPredicate; 2641 let DecoderNamespace = "GFX8"; 2642} 2643 2644// FIXME-GFX10: WIP. 2645multiclass VINTRP_m <bits <2> op, dag outs, dag ins, string asm, 2646 list<dag> pattern = []> { 2647 def "" : VINTRP_Pseudo <NAME, outs, ins, pattern>; 2648 2649 let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 2650 def _si : VINTRP_Real_si <op, NAME, outs, ins, asm, SIEncodingFamily.SI>; 2651 } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 2652 2653 def _vi : VINTRP_Real_vi <op, NAME, outs, ins, asm>; 2654 2655 let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { 2656 def _gfx10 : VINTRP_Real_si<op, NAME, outs, ins, asm, SIEncodingFamily.GFX10>; 2657 } // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" 2658} 2659 2660//===----------------------------------------------------------------------===// 2661// Vector instruction mappings 2662//===----------------------------------------------------------------------===// 2663 2664// Maps an opcode in e32 form to its e64 equivalent 2665def getVOPe64 : InstrMapping { 2666 let FilterClass = "VOP"; 2667 let RowFields = ["OpName"]; 2668 let ColFields = ["Size", "VOP3"]; 2669 let KeyCol = ["4", "0"]; 2670 let ValueCols = [["8", "1"]]; 2671} 2672 2673// Maps an opcode in e64 form to its e32 equivalent 2674def getVOPe32 : InstrMapping { 2675 let FilterClass = "VOP"; 2676 let RowFields = ["OpName"]; 2677 let ColFields = ["Size", "VOP3"]; 2678 let KeyCol = ["8", "1"]; 2679 let ValueCols = [["4", "0"]]; 2680} 2681 2682// Maps ordinary instructions to their SDWA counterparts 2683def getSDWAOp : InstrMapping { 2684 let FilterClass = "VOP"; 2685 let RowFields = ["OpName"]; 2686 let ColFields = ["AsmVariantName"]; 2687 let KeyCol = ["Default"]; 2688 let ValueCols = [["SDWA"]]; 2689} 2690 2691// Maps SDWA instructions to their ordinary counterparts 2692def getBasicFromSDWAOp : InstrMapping { 2693 let FilterClass = "VOP"; 2694 let RowFields = ["OpName"]; 2695 let ColFields = ["AsmVariantName"]; 2696 let KeyCol = ["SDWA"]; 2697 let ValueCols = [["Default"]]; 2698} 2699 2700// Maps ordinary instructions to their DPP counterparts 2701def getDPPOp32 : InstrMapping { 2702 let FilterClass = "VOP"; 2703 let RowFields = ["OpName"]; 2704 let ColFields = ["AsmVariantName"]; 2705 let KeyCol = ["Default"]; 2706 let ValueCols = [["DPP"]]; 2707} 2708 2709def getDPPOp64 : InstrMapping { 2710 let FilterClass = "VOP"; 2711 let RowFields = ["OpName"]; 2712 let ColFields = ["AsmVariantName"]; 2713 let KeyCol = ["VOP3"]; 2714 let ValueCols = [["VOP3_DPP"]]; 2715} 2716 2717// Maps an commuted opcode to its original version 2718def getCommuteOrig : InstrMapping { 2719 let FilterClass = "Commutable_REV"; 2720 let RowFields = ["RevOp"]; 2721 let ColFields = ["IsOrig"]; 2722 let KeyCol = ["0"]; 2723 let ValueCols = [["1"]]; 2724} 2725 2726// Maps an original opcode to its commuted version 2727def getCommuteRev : InstrMapping { 2728 let FilterClass = "Commutable_REV"; 2729 let RowFields = ["RevOp"]; 2730 let ColFields = ["IsOrig"]; 2731 let KeyCol = ["1"]; 2732 let ValueCols = [["0"]]; 2733} 2734 2735def getMCOpcodeGen : InstrMapping { 2736 let FilterClass = "SIMCInstr"; 2737 let RowFields = ["PseudoInstr"]; 2738 let ColFields = ["Subtarget"]; 2739 let KeyCol = [!cast<string>(SIEncodingFamily.NONE)]; 2740 // These columns must be kept in sync with the SIEncodingFamily enumeration. 2741 let ValueCols = [[!cast<string>(SIEncodingFamily.SI)], 2742 [!cast<string>(SIEncodingFamily.VI)], 2743 [!cast<string>(SIEncodingFamily.SDWA)], 2744 [!cast<string>(SIEncodingFamily.SDWA9)], 2745 // GFX80 encoding is added to work around a multiple matching 2746 // issue for buffer instructions with unpacked d16 data. This 2747 // does not actually change the encoding, and thus may be 2748 // removed later. 2749 [!cast<string>(SIEncodingFamily.GFX80)], 2750 [!cast<string>(SIEncodingFamily.GFX9)], 2751 [!cast<string>(SIEncodingFamily.GFX10)], 2752 [!cast<string>(SIEncodingFamily.SDWA10)], 2753 [!cast<string>(SIEncodingFamily.GFX90A)], 2754 [!cast<string>(SIEncodingFamily.GFX940)], 2755 [!cast<string>(SIEncodingFamily.GFX11)], 2756 [!cast<string>(SIEncodingFamily.GFX12)]]; 2757} 2758 2759// Get equivalent SOPK instruction. 2760def getSOPKOp : InstrMapping { 2761 let FilterClass = "SOPKInstTable"; 2762 let RowFields = ["BaseCmpOp"]; 2763 let ColFields = ["IsSOPK"]; 2764 let KeyCol = ["0"]; 2765 let ValueCols = [["1"]]; 2766} 2767 2768def getAddr64Inst : InstrMapping { 2769 let FilterClass = "MUBUFAddr64Table"; 2770 let RowFields = ["OpName"]; 2771 let ColFields = ["IsAddr64"]; 2772 let KeyCol = ["0"]; 2773 let ValueCols = [["1"]]; 2774} 2775 2776def getIfAddr64Inst : InstrMapping { 2777 let FilterClass = "MUBUFAddr64Table"; 2778 let RowFields = ["OpName"]; 2779 let ColFields = ["IsAddr64"]; 2780 let KeyCol = ["1"]; 2781 let ValueCols = [["1"]]; 2782} 2783 2784// Maps an atomic opcode to its returnless version. 2785def getAtomicNoRetOp : InstrMapping { 2786 let FilterClass = "AtomicNoRet"; 2787 let RowFields = ["NoRetOp"]; 2788 let ColFields = ["IsRet"]; 2789 let KeyCol = ["1"]; 2790 let ValueCols = [["0"]]; 2791} 2792 2793// Maps a GLOBAL to its SADDR form. 2794def getGlobalSaddrOp : InstrMapping { 2795 let FilterClass = "GlobalSaddrTable"; 2796 let RowFields = ["SaddrOp"]; 2797 let ColFields = ["IsSaddr"]; 2798 let KeyCol = ["0"]; 2799 let ValueCols = [["1"]]; 2800} 2801 2802// Maps a GLOBAL SADDR to its VADDR form. 2803def getGlobalVaddrOp : InstrMapping { 2804 let FilterClass = "GlobalSaddrTable"; 2805 let RowFields = ["SaddrOp"]; 2806 let ColFields = ["IsSaddr"]; 2807 let KeyCol = ["1"]; 2808 let ValueCols = [["0"]]; 2809} 2810 2811// Maps a v_cmpx opcode with sdst to opcode without sdst. 2812def getVCMPXNoSDstOp : InstrMapping { 2813 let FilterClass = "VCMPXNoSDstTable"; 2814 let RowFields = ["NoSDstOp"]; 2815 let ColFields = ["HasSDst"]; 2816 let KeyCol = ["1"]; 2817 let ValueCols = [["0"]]; 2818} 2819 2820// Maps a SOPP to a SOPP with S_NOP 2821def getSOPPWithRelaxation : InstrMapping { 2822 let FilterClass = "SOPPRelaxTable"; 2823 let RowFields = ["KeyName"]; 2824 let ColFields = ["IsRelaxed"]; 2825 let KeyCol = ["0"]; 2826 let ValueCols = [["1"]]; 2827} 2828 2829// Maps flat scratch opcodes by addressing modes 2830def getFlatScratchInstSTfromSS : InstrMapping { 2831 let FilterClass = "FlatScratchInst"; 2832 let RowFields = ["SVOp"]; 2833 let ColFields = ["Mode"]; 2834 let KeyCol = ["SS"]; 2835 let ValueCols = [["ST"]]; 2836} 2837 2838def getFlatScratchInstSSfromSV : InstrMapping { 2839 let FilterClass = "FlatScratchInst"; 2840 let RowFields = ["SVOp"]; 2841 let ColFields = ["Mode"]; 2842 let KeyCol = ["SV"]; 2843 let ValueCols = [["SS"]]; 2844} 2845 2846def getFlatScratchInstSVfromSVS : InstrMapping { 2847 let FilterClass = "FlatScratchInst"; 2848 let RowFields = ["SVOp"]; 2849 let ColFields = ["Mode"]; 2850 let KeyCol = ["SVS"]; 2851 let ValueCols = [["SV"]]; 2852} 2853 2854def getFlatScratchInstSVfromSS : InstrMapping { 2855 let FilterClass = "FlatScratchInst"; 2856 let RowFields = ["SVOp"]; 2857 let ColFields = ["Mode"]; 2858 let KeyCol = ["SS"]; 2859 let ValueCols = [["SV"]]; 2860} 2861 2862def getMFMAEarlyClobberOp : InstrMapping { 2863 let FilterClass = "MFMATable"; 2864 let RowFields = ["FMAOp"]; 2865 let ColFields = ["IsMac"]; 2866 let KeyCol = ["1"]; 2867 let ValueCols = [["0"]]; 2868} 2869 2870// Maps an v_cmp instruction to its v_cmpx equivalent. 2871def getVCMPXOpFromVCMP : InstrMapping { 2872 let FilterClass = "VCMPVCMPXTable"; 2873 let RowFields = ["VCMPOp"]; 2874 let ColFields = ["IsVCMPX"]; 2875 let KeyCol = ["0"]; 2876 let ValueCols = [["1"]]; 2877} 2878 2879def VOPDComponentTable : GenericTable { 2880 let FilterClass = "VOPD_Component"; 2881 let CppTypeName = "VOPDComponentInfo"; 2882 let Fields = ["BaseVOP", "VOPDOp", "CanBeVOPDX"]; 2883 let PrimaryKey = ["BaseVOP"]; 2884 let PrimaryKeyName = "getVOPDComponentHelper"; 2885} 2886 2887def getVOPDBaseFromComponent : SearchIndex { 2888 let Table = VOPDComponentTable; 2889 let Key = ["VOPDOp"]; 2890} 2891 2892def VOPDPairs : GenericTable { 2893 let FilterClass = "VOPD_Base"; 2894 let CppTypeName = "VOPDInfo"; 2895 let Fields = ["Opcode", "OpX", "OpY", "SubTgt"]; 2896 let PrimaryKey = ["Opcode"]; 2897 let PrimaryKeyName = "getVOPDOpcodeHelper"; 2898} 2899 2900def getVOPDInfoFromComponentOpcodes : SearchIndex { 2901 let Table = VOPDPairs; 2902 let Key = ["OpX", "OpY", "SubTgt"]; 2903} 2904 2905include "SIInstructions.td" 2906 2907include "DSInstructions.td" 2908include "MIMGInstructions.td" 2909