1//===-- SIInstrInfo.td -----------------------------------------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def isWave32 : Predicate<"Subtarget->getWavefrontSize() == 32">, 10 AssemblerPredicate <(all_of FeatureWavefrontSize32)>; 11def isWave64 : Predicate<"Subtarget->getWavefrontSize() == 64">, 12 AssemblerPredicate <(all_of FeatureWavefrontSize64)>; 13 14class GCNPredicateControl : PredicateControl { 15 Predicate SIAssemblerPredicate = isGFX6GFX7; 16 Predicate VIAssemblerPredicate = isGFX8GFX9; 17} 18 19// Except for the NONE field, this must be kept in sync with the 20// SIEncodingFamily enum in SIInstrInfo.cpp and the columns of the 21// getMCOpcodeGen table. 22def SIEncodingFamily { 23 int NONE = -1; 24 int SI = 0; 25 int VI = 1; 26 int SDWA = 2; 27 int SDWA9 = 3; 28 int GFX80 = 4; 29 int GFX9 = 5; 30 int GFX10 = 6; 31 int SDWA10 = 7; 32 int GFX90A = 8; 33 int GFX940 = 9; 34 int GFX11 = 10; 35} 36 37//===----------------------------------------------------------------------===// 38// SI DAG Nodes 39//===----------------------------------------------------------------------===// 40 41def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>; 42 43def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD", 44 SDTypeProfile<1, 3, [SDTCisVT<1, v4i32>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>, 45 [SDNPMayLoad, SDNPMemOperand] 46>; 47 48def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT", 49 SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i16>]>, 50 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue] 51>; 52 53def SIatomic_inc : SDNode<"AMDGPUISD::ATOMIC_INC", SDTAtomic2, 54 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] 55>; 56 57def SIatomic_dec : SDNode<"AMDGPUISD::ATOMIC_DEC", SDTAtomic2, 58 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] 59>; 60 61def SDTAtomic2_f32 : SDTypeProfile<1, 2, [ 62 SDTCisSameAs<0,2>, SDTCisFP<0>, SDTCisPtrTy<1> 63]>; 64 65def SIatomic_fmin : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMIN", SDTAtomic2_f32, 66 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] 67>; 68 69def SIatomic_fmax : SDNode<"AMDGPUISD::ATOMIC_LOAD_FMAX", SDTAtomic2_f32, 70 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain] 71>; 72 73// load_d16_{lo|hi} ptr, tied_input 74def SIload_d16 : SDTypeProfile<1, 2, [ 75 SDTCisPtrTy<1>, 76 SDTCisSameAs<0, 2> 77]>; 78 79 80def SDTtbuffer_load : SDTypeProfile<1, 8, 81 [ // vdata 82 SDTCisVT<1, v4i32>, // rsrc 83 SDTCisVT<2, i32>, // vindex(VGPR) 84 SDTCisVT<3, i32>, // voffset(VGPR) 85 SDTCisVT<4, i32>, // soffset(SGPR) 86 SDTCisVT<5, i32>, // offset(imm) 87 SDTCisVT<6, i32>, // format(imm) 88 SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm) 89 SDTCisVT<8, i1> // idxen(imm) 90 ]>; 91 92def SItbuffer_load : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT", SDTtbuffer_load, 93 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>; 94def SItbuffer_load_d16 : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT_D16", 95 SDTtbuffer_load, 96 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>; 97 98def SDTtbuffer_store : SDTypeProfile<0, 9, 99 [ // vdata 100 SDTCisVT<1, v4i32>, // rsrc 101 SDTCisVT<2, i32>, // vindex(VGPR) 102 SDTCisVT<3, i32>, // voffset(VGPR) 103 SDTCisVT<4, i32>, // soffset(SGPR) 104 SDTCisVT<5, i32>, // offset(imm) 105 SDTCisVT<6, i32>, // format(imm) 106 SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm) 107 SDTCisVT<8, i1> // idxen(imm) 108 ]>; 109 110def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTtbuffer_store, 111 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 112def SItbuffer_store_d16 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_D16", 113 SDTtbuffer_store, 114 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 115 116def SDTBufferLoad : SDTypeProfile<1, 7, 117 [ // vdata 118 SDTCisVT<1, v4i32>, // rsrc 119 SDTCisVT<2, i32>, // vindex(VGPR) 120 SDTCisVT<3, i32>, // voffset(VGPR) 121 SDTCisVT<4, i32>, // soffset(SGPR) 122 SDTCisVT<5, i32>, // offset(imm) 123 SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm) 124 SDTCisVT<7, i1>]>; // idxen(imm) 125 126def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad, 127 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 128def SIbuffer_load_ubyte : SDNode <"AMDGPUISD::BUFFER_LOAD_UBYTE", SDTBufferLoad, 129 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 130def SIbuffer_load_ushort : SDNode <"AMDGPUISD::BUFFER_LOAD_USHORT", SDTBufferLoad, 131 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 132def SIbuffer_load_byte : SDNode <"AMDGPUISD::BUFFER_LOAD_BYTE", SDTBufferLoad, 133 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 134def SIbuffer_load_short: SDNode <"AMDGPUISD::BUFFER_LOAD_SHORT", SDTBufferLoad, 135 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 136def SIbuffer_load_format : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT", SDTBufferLoad, 137 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 138def SIbuffer_load_format_d16 : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_D16", 139 SDTBufferLoad, 140 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 141 142def SDTBufferStore : SDTypeProfile<0, 8, 143 [ // vdata 144 SDTCisVT<1, v4i32>, // rsrc 145 SDTCisVT<2, i32>, // vindex(VGPR) 146 SDTCisVT<3, i32>, // voffset(VGPR) 147 SDTCisVT<4, i32>, // soffset(SGPR) 148 SDTCisVT<5, i32>, // offset(imm) 149 SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm) 150 SDTCisVT<7, i1>]>; // idxen(imm) 151 152def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore, 153 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 154def SIbuffer_store_byte: SDNode <"AMDGPUISD::BUFFER_STORE_BYTE", 155 SDTBufferStore, 156 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 157def SIbuffer_store_short : SDNode <"AMDGPUISD::BUFFER_STORE_SHORT", 158 SDTBufferStore, 159 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 160def SIbuffer_store_format : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT", 161 SDTBufferStore, 162 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 163def SIbuffer_store_format_d16 : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT_D16", 164 SDTBufferStore, 165 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 166 167class SDBufferAtomic<string opcode> : SDNode <opcode, 168 SDTypeProfile<1, 8, 169 [SDTCisVT<2, v4i32>, // rsrc 170 SDTCisVT<3, i32>, // vindex(VGPR) 171 SDTCisVT<4, i32>, // voffset(VGPR) 172 SDTCisVT<5, i32>, // soffset(SGPR) 173 SDTCisVT<6, i32>, // offset(imm) 174 SDTCisVT<7, i32>, // cachepolicy(imm) 175 SDTCisVT<8, i1>]>, // idxen(imm) 176 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] 177>; 178 179def SIbuffer_atomic_swap : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SWAP">; 180def SIbuffer_atomic_add : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_ADD">; 181def SIbuffer_atomic_sub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SUB">; 182def SIbuffer_atomic_smin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMIN">; 183def SIbuffer_atomic_umin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMIN">; 184def SIbuffer_atomic_smax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMAX">; 185def SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">; 186def SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">; 187def SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">; 188def SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">; 189def SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">; 190def SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">; 191def SIbuffer_atomic_csub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_CSUB">; 192def SIbuffer_atomic_fadd : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD">; 193def SIbuffer_atomic_fmin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMIN">; 194def SIbuffer_atomic_fmax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMAX">; 195 196multiclass SDBufferAtomicNoRet { 197 def "_noret" : PatFrag< 198 (ops node:$vdata_in, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, 199 node:$offset, node:$cachepolicy, node:$idxen), 200 (!cast<SDNode>(NAME) node:$vdata_in, node:$rsrc, node:$vindex, 201 node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, 202 node:$idxen)> { 203 let HasNoUse = true; 204 } 205} 206 207defm SIbuffer_atomic_swap : SDBufferAtomicNoRet; 208defm SIbuffer_atomic_add : SDBufferAtomicNoRet; 209defm SIbuffer_atomic_sub : SDBufferAtomicNoRet; 210defm SIbuffer_atomic_smin : SDBufferAtomicNoRet; 211defm SIbuffer_atomic_umin : SDBufferAtomicNoRet; 212defm SIbuffer_atomic_smax : SDBufferAtomicNoRet; 213defm SIbuffer_atomic_umax : SDBufferAtomicNoRet; 214defm SIbuffer_atomic_and : SDBufferAtomicNoRet; 215defm SIbuffer_atomic_or : SDBufferAtomicNoRet; 216defm SIbuffer_atomic_xor : SDBufferAtomicNoRet; 217defm SIbuffer_atomic_inc : SDBufferAtomicNoRet; 218defm SIbuffer_atomic_dec : SDBufferAtomicNoRet; 219defm SIbuffer_atomic_fadd : SDBufferAtomicNoRet; 220defm SIbuffer_atomic_fmin : SDBufferAtomicNoRet; 221defm SIbuffer_atomic_fmax : SDBufferAtomicNoRet; 222 223def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP", 224 SDTypeProfile<1, 9, 225 [SDTCisVT<0, i32>, // dst 226 SDTCisVT<1, i32>, // src 227 SDTCisVT<2, i32>, // cmp 228 SDTCisVT<3, v4i32>, // rsrc 229 SDTCisVT<4, i32>, // vindex(VGPR) 230 SDTCisVT<5, i32>, // voffset(VGPR) 231 SDTCisVT<6, i32>, // soffset(SGPR) 232 SDTCisVT<7, i32>, // offset(imm) 233 SDTCisVT<8, i32>, // cachepolicy(imm) 234 SDTCisVT<9, i1>]>, // idxen(imm) 235 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] 236>; 237 238def SIbuffer_atomic_cmpswap_noret : PatFrag< 239 (ops node:$src, node:$cmp, node:$rsrc, node:$vindex, node:$voffset, 240 node:$soffset, node:$offset, node:$cachepolicy, node:$idxen), 241 (SIbuffer_atomic_cmpswap node:$src, node:$cmp, node:$rsrc, node:$vindex, 242 node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, 243 node:$idxen)> { 244 let HasNoUse = true; 245} 246 247class SDGlobalAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode, 248 SDTypeProfile<0, 2, 249 [SDTCisPtrTy<0>, // vaddr 250 SDTCisVT<1, ty>]>, // vdata 251 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] 252>; 253 254def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET", 255 SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]> 256>; 257 258def SIlds : SDNode<"AMDGPUISD::LDS", 259 SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]> 260>; 261 262def SIload_d16_lo : SDNode<"AMDGPUISD::LOAD_D16_LO", 263 SIload_d16, 264 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 265>; 266 267def SIload_d16_lo_u8 : SDNode<"AMDGPUISD::LOAD_D16_LO_U8", 268 SIload_d16, 269 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 270>; 271 272def SIload_d16_lo_i8 : SDNode<"AMDGPUISD::LOAD_D16_LO_I8", 273 SIload_d16, 274 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 275>; 276 277def SIload_d16_hi : SDNode<"AMDGPUISD::LOAD_D16_HI", 278 SIload_d16, 279 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 280>; 281 282def SIload_d16_hi_u8 : SDNode<"AMDGPUISD::LOAD_D16_HI_U8", 283 SIload_d16, 284 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 285>; 286 287def SIload_d16_hi_i8 : SDNode<"AMDGPUISD::LOAD_D16_HI_I8", 288 SIload_d16, 289 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 290>; 291 292def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE", 293 SDTypeProfile<0 ,1, [SDTCisInt<0>]>, 294 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue] 295>; 296 297def SIfptrunc_round_upward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_UPWARD", 298 SDTFPRoundOp 299>; 300 301def SIfptrunc_round_downward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_DOWNWARD", 302 SDTFPRoundOp 303>; 304 305//===----------------------------------------------------------------------===// 306// ValueType helpers 307//===----------------------------------------------------------------------===// 308 309// Returns 1 if the source arguments have modifiers, 0 if they do not. 310class isFloatType<ValueType SrcVT> { 311 bit ret = !or(!eq(SrcVT.Value, f16.Value), 312 !eq(SrcVT.Value, f32.Value), 313 !eq(SrcVT.Value, f64.Value), 314 !eq(SrcVT.Value, v2f16.Value), 315 !eq(SrcVT.Value, v4f16.Value), 316 !eq(SrcVT.Value, v8f16.Value), 317 !eq(SrcVT.Value, v16f16.Value), 318 !eq(SrcVT.Value, v2f32.Value), 319 !eq(SrcVT.Value, v4f32.Value), 320 !eq(SrcVT.Value, v8f32.Value), 321 !eq(SrcVT.Value, v2f64.Value), 322 !eq(SrcVT.Value, v4f64.Value)); 323} 324 325// XXX - do v2i16 instructions? 326class isIntType<ValueType SrcVT> { 327 bit ret = !or(!eq(SrcVT.Value, i8.Value), 328 !eq(SrcVT.Value, i16.Value), 329 !eq(SrcVT.Value, i32.Value), 330 !eq(SrcVT.Value, i64.Value), 331 !eq(SrcVT.Value, v4i16.Value), 332 !eq(SrcVT.Value, v8i16.Value), 333 !eq(SrcVT.Value, v16i16.Value), 334 !eq(SrcVT.Value, v2i32.Value), 335 !eq(SrcVT.Value, v4i32.Value), 336 !eq(SrcVT.Value, v8i32.Value)); 337} 338 339class isPackedType<ValueType SrcVT> { 340 bit ret = !or(!eq(SrcVT.Value, v2i16.Value), 341 !eq(SrcVT.Value, v2f16.Value), 342 !eq(SrcVT.Value, v4f16.Value), 343 !eq(SrcVT.Value, v2i32.Value), 344 !eq(SrcVT.Value, v2f32.Value), 345 !eq(SrcVT.Value, v4i32.Value), 346 !eq(SrcVT.Value, v4f32.Value), 347 !eq(SrcVT.Value, v8i32.Value), 348 !eq(SrcVT.Value, v8f32.Value)); 349} 350 351 352//===----------------------------------------------------------------------===// 353// PatFrags for global memory operations 354//===----------------------------------------------------------------------===// 355 356defm atomic_inc : binary_atomic_op_all_as<SIatomic_inc>; 357defm atomic_dec : binary_atomic_op_all_as<SIatomic_dec>; 358defm atomic_load_fmin : binary_atomic_op_all_as<SIatomic_fmin, 0>; 359defm atomic_load_fmax : binary_atomic_op_all_as<SIatomic_fmax, 0>; 360 361//===----------------------------------------------------------------------===// 362// SDNodes PatFrags for loads/stores with a glue input. 363// This is for SDNodes and PatFrag for local loads and stores to 364// enable s_mov_b32 m0, -1 to be glued to the memory instructions. 365// 366// These mirror the regular load/store PatFrags and rely on special 367// processing during Select() to add the glued copy. 368// 369//===----------------------------------------------------------------------===// 370 371def AMDGPUld_glue : SDNode <"ISD::LOAD", SDTLoad, 372 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 373>; 374 375def AMDGPUatomic_ld_glue : SDNode <"ISD::ATOMIC_LOAD", SDTAtomicLoad, 376 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 377>; 378 379def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr)> { 380 let IsLoad = 1; 381 let IsUnindexed = 1; 382} 383 384def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> { 385 let IsLoad = 1; 386 let IsNonExtLoad = 1; 387} 388 389def atomic_load_8_glue : PatFrag<(ops node:$ptr), 390 (AMDGPUatomic_ld_glue node:$ptr)> { 391 let IsAtomic = 1; 392 let MemoryVT = i8; 393} 394 395def atomic_load_16_glue : PatFrag<(ops node:$ptr), 396 (AMDGPUatomic_ld_glue node:$ptr)> { 397 let IsAtomic = 1; 398 let MemoryVT = i16; 399} 400 401def atomic_load_32_glue : PatFrag<(ops node:$ptr), 402 (AMDGPUatomic_ld_glue node:$ptr)> { 403 let IsAtomic = 1; 404 let MemoryVT = i32; 405} 406 407def atomic_load_64_glue : PatFrag<(ops node:$ptr), 408 (AMDGPUatomic_ld_glue node:$ptr)> { 409 let IsAtomic = 1; 410 let MemoryVT = i64; 411} 412 413def extload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { 414 let IsLoad = 1; 415 let IsAnyExtLoad = 1; 416} 417 418def sextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { 419 let IsLoad = 1; 420 let IsSignExtLoad = 1; 421} 422 423def zextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { 424 let IsLoad = 1; 425 let IsZeroExtLoad = 1; 426} 427 428def extloadi8_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> { 429 let IsLoad = 1; 430 let MemoryVT = i8; 431} 432 433def zextloadi8_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> { 434 let IsLoad = 1; 435 let MemoryVT = i8; 436} 437 438def extloadi16_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> { 439 let IsLoad = 1; 440 let MemoryVT = i16; 441} 442 443def zextloadi16_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> { 444 let IsLoad = 1; 445 let MemoryVT = i16; 446} 447 448def sextloadi8_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> { 449 let IsLoad = 1; 450 let MemoryVT = i8; 451} 452 453def sextloadi16_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> { 454 let IsLoad = 1; 455 let MemoryVT = i16; 456} 457 458 459let IsLoad = 1, AddressSpaces = LoadAddress_local.AddrSpaces in { 460def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> { 461 let IsNonExtLoad = 1; 462} 463 464def extloadi8_local_m0 : PatFrag<(ops node:$ptr), (extloadi8_glue node:$ptr)>; 465def sextloadi8_local_m0 : PatFrag<(ops node:$ptr), (sextloadi8_glue node:$ptr)>; 466def zextloadi8_local_m0 : PatFrag<(ops node:$ptr), (zextloadi8_glue node:$ptr)>; 467 468def extloadi16_local_m0 : PatFrag<(ops node:$ptr), (extloadi16_glue node:$ptr)>; 469def sextloadi16_local_m0 : PatFrag<(ops node:$ptr), (sextloadi16_glue node:$ptr)>; 470def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr)>; 471} // End IsLoad = 1, , AddressSpaces = LoadAddress_local.AddrSpaces 472 473def load_align8_local_m0 : PatFrag<(ops node:$ptr), 474 (load_local_m0 node:$ptr)> { 475 let IsLoad = 1; 476 int MinAlignment = 8; 477} 478 479def load_align16_local_m0 : PatFrag<(ops node:$ptr), 480 (load_local_m0 node:$ptr)> { 481 let IsLoad = 1; 482 int MinAlignment = 16; 483} 484 485let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in { 486def atomic_load_8_local_m0 : PatFrag<(ops node:$ptr), 487 (atomic_load_8_glue node:$ptr)>; 488def atomic_load_16_local_m0 : PatFrag<(ops node:$ptr), 489 (atomic_load_16_glue node:$ptr)>; 490def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr), 491 (atomic_load_32_glue node:$ptr)>; 492def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr), 493 (atomic_load_64_glue node:$ptr)>; 494} // End let AddressSpaces = LoadAddress_local.AddrSpaces 495 496 497def AMDGPUst_glue : SDNode <"ISD::STORE", SDTStore, 498 [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue] 499>; 500 501def AMDGPUatomic_st_glue : SDNode <"ISD::ATOMIC_STORE", SDTAtomicStore, 502 [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue] 503>; 504 505def unindexedstore_glue : PatFrag<(ops node:$val, node:$ptr), 506 (AMDGPUst_glue node:$val, node:$ptr)> { 507 let IsStore = 1; 508 let IsUnindexed = 1; 509} 510 511def store_glue : PatFrag<(ops node:$val, node:$ptr), 512 (unindexedstore_glue node:$val, node:$ptr)> { 513 let IsStore = 1; 514 let IsTruncStore = 0; 515} 516 517def truncstore_glue : PatFrag<(ops node:$val, node:$ptr), 518 (unindexedstore_glue node:$val, node:$ptr)> { 519 let IsStore = 1; 520 let IsTruncStore = 1; 521} 522 523def truncstorei8_glue : PatFrag<(ops node:$val, node:$ptr), 524 (truncstore_glue node:$val, node:$ptr)> { 525 let IsStore = 1; 526 let MemoryVT = i8; 527 let IsTruncStore = 1; 528} 529 530def truncstorei16_glue : PatFrag<(ops node:$val, node:$ptr), 531 (truncstore_glue node:$val, node:$ptr)> { 532 let IsStore = 1; 533 let MemoryVT = i16; 534 let IsTruncStore = 1; 535} 536 537let IsStore = 1, AddressSpaces = StoreAddress_local.AddrSpaces in { 538def store_local_m0 : PatFrag<(ops node:$val, node:$ptr), 539 (store_glue node:$val, node:$ptr)>; 540def truncstorei8_local_m0 : PatFrag<(ops node:$val, node:$ptr), 541 (truncstorei8_glue node:$val, node:$ptr)>; 542def truncstorei16_local_m0 : PatFrag<(ops node:$val, node:$ptr), 543 (truncstorei16_glue node:$val, node:$ptr)>; 544} 545 546def store_align8_local_m0 : PatFrag <(ops node:$value, node:$ptr), 547 (store_local_m0 node:$value, node:$ptr)>, 548 Aligned<8> { 549 let IsStore = 1; 550} 551 552def store_align16_local_m0 : PatFrag <(ops node:$value, node:$ptr), 553 (store_local_m0 node:$value, node:$ptr)>, 554 Aligned<16> { 555 let IsStore = 1; 556} 557 558let PredicateCode = [{return cast<MemSDNode>(N)->getAlignment() < 4;}], 559 GISelPredicateCode = [{return (*MI.memoperands_begin())->getAlign() < 4;}], 560 AddressSpaces = [ AddrSpaces.Local ] in { 561def load_align_less_than_4_local : PatFrag<(ops node:$ptr), 562 (load_local node:$ptr)> { 563 let IsLoad = 1; 564 let IsNonExtLoad = 1; 565} 566 567def load_align_less_than_4_local_m0 : PatFrag<(ops node:$ptr), 568 (load_local_m0 node:$ptr)> { 569 let IsLoad = 1; 570 let IsNonExtLoad = 1; 571} 572 573def store_align_less_than_4_local : PatFrag <(ops node:$value, node:$ptr), 574 (store_local node:$value, node:$ptr)> { 575 let IsStore = 1; 576 let IsTruncStore = 0; 577} 578 579def store_align_less_than_4_local_m0 : PatFrag <(ops node:$value, node:$ptr), 580 (store_local_m0 node:$value, node:$ptr)> { 581 let IsStore = 1; 582 let IsTruncStore = 0; 583} 584} 585 586def atomic_store_8_glue : PatFrag < 587 (ops node:$ptr, node:$value), 588 (AMDGPUatomic_st_glue node:$ptr, node:$value)> { 589 let IsAtomic = 1; 590 let MemoryVT = i8; 591} 592 593def atomic_store_16_glue : PatFrag < 594 (ops node:$ptr, node:$value), 595 (AMDGPUatomic_st_glue node:$ptr, node:$value)> { 596 let IsAtomic = 1; 597 let MemoryVT = i16; 598} 599 600def atomic_store_32_glue : PatFrag < 601 (ops node:$ptr, node:$value), 602 (AMDGPUatomic_st_glue node:$ptr, node:$value)> { 603 let IsAtomic = 1; 604 let MemoryVT = i32; 605} 606 607def atomic_store_64_glue : PatFrag < 608 (ops node:$ptr, node:$value), 609 (AMDGPUatomic_st_glue node:$ptr, node:$value)> { 610 let IsAtomic = 1; 611 let MemoryVT = i64; 612} 613 614let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces in { 615def atomic_store_8_local_m0 : PatFrag<(ops node:$ptr, node:$val), 616 (atomic_store_8_glue node:$ptr, node:$val)>; 617def atomic_store_16_local_m0 : PatFrag<(ops node:$ptr, node:$val), 618 (atomic_store_16_glue node:$ptr, node:$val)>; 619def atomic_store_32_local_m0 : PatFrag<(ops node:$ptr, node:$val), 620 (atomic_store_32_glue node:$ptr, node:$val)>; 621def atomic_store_64_local_m0 : PatFrag<(ops node:$ptr, node:$val), 622 (atomic_store_64_glue node:$ptr, node:$val)>; 623} // End let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces 624 625 626def si_setcc_uniform : PatFrag < 627 (ops node:$lhs, node:$rhs, node:$cond), 628 (setcc node:$lhs, node:$rhs, node:$cond), [{ 629 return !N->isDivergent(); 630}]>; 631 632//===----------------------------------------------------------------------===// 633// SDNodes PatFrags for a16 loads and stores with 3 components. 634// v3f16/v3i16 is widened to v4f16/v4i16, so we need to match on the memory 635// load/store size. 636//===----------------------------------------------------------------------===// 637 638class mubuf_intrinsic_load<SDPatternOperator name, ValueType vt> : PatFrag < 639 (ops node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 640 node:$auxiliary, node:$idxen), 641 (name node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 642 node:$auxiliary, node:$idxen)> { 643 let IsLoad = 1; 644 let MemoryVT = vt; 645} 646 647class mubuf_intrinsic_store<SDPatternOperator name, ValueType vt> : PatFrag < 648 (ops node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 649 node:$auxiliary, node:$idxen), 650 (name node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 651 node:$auxiliary, node:$idxen)> { 652 let IsStore = 1; 653 let MemoryVT = vt; 654} 655 656class mtbuf_intrinsic_load<SDPatternOperator name, ValueType vt> : PatFrag < 657 (ops node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 658 node:$format, node:$auxiliary, node:$idxen), 659 (name node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 660 node:$format, node:$auxiliary, node:$idxen)> { 661 let IsLoad = 1; 662 let MemoryVT = vt; 663} 664 665class mtbuf_intrinsic_store<SDPatternOperator name, ValueType vt> : PatFrag < 666 (ops node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 667 node:$format, node:$auxiliary, node:$idxen), 668 (name node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 669 node:$format, node:$auxiliary, node:$idxen)> { 670 let IsStore = 1; 671 let MemoryVT = vt; 672} 673 674//===----------------------------------------------------------------------===// 675// SDNodes PatFrags for d16 loads 676//===----------------------------------------------------------------------===// 677 678class LoadD16Frag <SDPatternOperator op> : PatFrag< 679 (ops node:$ptr, node:$tied_in), 680 (op node:$ptr, node:$tied_in)> { 681 let IsLoad = 1; 682} 683 684foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { 685let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in { 686 687def load_d16_hi_#as : LoadD16Frag <SIload_d16_hi>; 688 689def az_extloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_u8> { 690 let MemoryVT = i8; 691} 692 693def sextloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_i8> { 694 let MemoryVT = i8; 695} 696 697def load_d16_lo_#as : LoadD16Frag <SIload_d16_lo>; 698 699def az_extloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_u8> { 700 let MemoryVT = i8; 701} 702 703def sextloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_i8> { 704 let MemoryVT = i8; 705} 706 707} // End let AddressSpaces = ... 708} // End foreach AddrSpace 709 710def lshr_rev : PatFrag < 711 (ops node:$src1, node:$src0), 712 (srl $src0, $src1) 713>; 714 715def ashr_rev : PatFrag < 716 (ops node:$src1, node:$src0), 717 (sra $src0, $src1) 718>; 719 720def lshl_rev : PatFrag < 721 (ops node:$src1, node:$src0), 722 (shl $src0, $src1) 723>; 724 725def add_ctpop : PatFrag < 726 (ops node:$src0, node:$src1), 727 (add (ctpop $src0), $src1) 728>; 729 730def xnor : PatFrag < 731 (ops node:$src0, node:$src1), 732 (not (xor $src0, $src1)) 733>; 734 735foreach I = 1-4 in { 736def shl#I#_add : PatFrag < 737 (ops node:$src0, node:$src1), 738 (add (shl_oneuse $src0, (i32 I)), $src1)> { 739 // FIXME: Poor substitute for disabling pattern in SelectionDAG 740 let PredicateCode = [{return false;}]; 741 let GISelPredicateCode = [{return true;}]; 742} 743} 744 745multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0, 746 SDTypeProfile tc = SDTAtomic2, 747 bit IsInt = 1> { 748 749 def _glue : SDNode < 750 !if(is_amdgpu, "AMDGPUISD", "ISD")#"::ATOMIC_"#op_name, tc, 751 [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 752 >; 753 754 let AddressSpaces = StoreAddress_local.AddrSpaces in { 755 defm _local_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>; 756 defm _local_m0 : noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue"), 757 IsInt>; 758 } 759 760 let AddressSpaces = StoreAddress_region.AddrSpaces in { 761 defm _region_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue"), IsInt>; 762 defm _region_m0 : noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue"), 763 IsInt>; 764 } 765} 766 767defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">; 768defm atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">; 769defm atomic_inc : SIAtomicM0Glue2 <"INC", 1>; 770defm atomic_dec : SIAtomicM0Glue2 <"DEC", 1>; 771defm atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">; 772defm atomic_load_min : SIAtomicM0Glue2 <"LOAD_MIN">; 773defm atomic_load_max : SIAtomicM0Glue2 <"LOAD_MAX">; 774defm atomic_load_or : SIAtomicM0Glue2 <"LOAD_OR">; 775defm atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">; 776defm atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">; 777defm atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">; 778defm atomic_swap : SIAtomicM0Glue2 <"SWAP">; 779defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32, 0>; 780defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32, 0>; 781defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32, 0>; 782 783def as_i1timm : SDNodeXForm<timm, [{ 784 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1); 785}]>; 786 787def as_i8imm : SDNodeXForm<imm, [{ 788 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i8); 789}]>; 790 791def as_i8timm : SDNodeXForm<timm, [{ 792 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); 793}]>; 794 795def as_i16imm : SDNodeXForm<imm, [{ 796 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); 797}]>; 798 799def as_i16timm : SDNodeXForm<timm, [{ 800 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); 801}]>; 802 803def as_i32imm: SDNodeXForm<imm, [{ 804 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); 805}]>; 806 807def as_i32timm: SDNodeXForm<timm, [{ 808 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); 809}]>; 810 811def as_i64imm: SDNodeXForm<imm, [{ 812 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64); 813}]>; 814 815def cond_as_i32imm: SDNodeXForm<cond, [{ 816 return CurDAG->getTargetConstant(N->get(), SDLoc(N), MVT::i32); 817}]>; 818 819// Copied from the AArch64 backend: 820def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{ 821return CurDAG->getTargetConstant( 822 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32); 823}]>; 824 825def frameindex_to_targetframeindex : SDNodeXForm<frameindex, [{ 826 auto FI = cast<FrameIndexSDNode>(N); 827 return CurDAG->getTargetFrameIndex(FI->getIndex(), MVT::i32); 828}]>; 829 830// Copied from the AArch64 backend: 831def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{ 832return CurDAG->getTargetConstant( 833 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64); 834}]>; 835 836class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{ 837 uint64_t Imm = N->getZExtValue(); 838 unsigned Bit = (Imm >> }] # bitnum # [{ ) & 1; 839 return CurDAG->getTargetConstant(Bit, SDLoc(N), MVT::i1); 840}]>; 841 842def SIMM16bit : ImmLeaf <i32, 843 [{return isInt<16>(Imm);}] 844>; 845 846def UIMM16bit : ImmLeaf <i32, 847 [{return isUInt<16>(Imm);}] 848>; 849 850def i64imm_32bit : ImmLeaf<i64, [{ 851 return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm); 852}]>; 853 854def InlineImm16 : ImmLeaf<i16, [{ 855 return isInlineImmediate16(Imm); 856}]>; 857 858def InlineImm32 : ImmLeaf<i32, [{ 859 return isInlineImmediate32(Imm); 860}]>; 861 862def InlineImm64 : ImmLeaf<i64, [{ 863 return isInlineImmediate64(Imm); 864}]>; 865 866def InlineImmFP32 : FPImmLeaf<f32, [{ 867 return isInlineImmediate(Imm); 868}]>; 869 870def InlineImmFP64 : FPImmLeaf<f64, [{ 871 return isInlineImmediate(Imm); 872}]>; 873 874 875class VGPRImm <dag frag> : PatLeaf<frag, [{ 876 return isVGPRImm(N); 877}]>; 878 879def NegateImm : SDNodeXForm<imm, [{ 880 return CurDAG->getConstant(-N->getSExtValue(), SDLoc(N), MVT::i32); 881}]>; 882 883// TODO: When FP inline imm values work? 884def NegSubInlineConst32 : ImmLeaf<i32, [{ 885 return Imm < -16 && Imm >= -64; 886}], NegateImm>; 887 888def NegSubInlineIntConst16 : ImmLeaf<i16, [{ 889 return Imm < -16 && Imm >= -64; 890}], NegateImm>; 891 892def ShiftAmt32Imm : ImmLeaf <i32, [{ 893 return Imm < 32; 894}]>; 895 896def getNegV2I16Imm : SDNodeXForm<build_vector, [{ 897 return SDValue(packNegConstantV2I16(N, *CurDAG), 0); 898}]>; 899 900def NegSubInlineConstV216 : PatLeaf<(build_vector), [{ 901 assert(N->getNumOperands() == 2); 902 assert(N->getOperand(0).getValueType().getSizeInBits() == 16); 903 SDValue Src0 = N->getOperand(0); 904 SDValue Src1 = N->getOperand(1); 905 if (Src0 == Src1) 906 return isNegInlineImmediate(Src0.getNode()); 907 908 return (isNullConstantOrUndef(Src0) && isNegInlineImmediate(Src1.getNode())) || 909 (isNullConstantOrUndef(Src1) && isNegInlineImmediate(Src0.getNode())); 910}], getNegV2I16Imm>; 911 912 913def fp16_zeros_high_16bits : PatLeaf<(f16 VGPR_32:$src), [{ 914 return fp16SrcZerosHighBits(N->getOpcode()); 915}]>; 916 917 918//===----------------------------------------------------------------------===// 919// MUBUF/SMEM Patterns 920//===----------------------------------------------------------------------===// 921 922def extract_cpol : SDNodeXForm<timm, [{ 923 return CurDAG->getTargetConstant(N->getZExtValue() & AMDGPU::CPol::ALL, SDLoc(N), MVT::i8); 924}]>; 925 926def extract_swz : SDNodeXForm<timm, [{ 927 return CurDAG->getTargetConstant((N->getZExtValue() >> 3) & 1, SDLoc(N), MVT::i8); 928}]>; 929 930def set_glc : SDNodeXForm<timm, [{ 931 return CurDAG->getTargetConstant(N->getZExtValue() | AMDGPU::CPol::GLC, SDLoc(N), MVT::i8); 932}]>; 933 934//===----------------------------------------------------------------------===// 935// Custom Operands 936//===----------------------------------------------------------------------===// 937 938def SoppBrTarget : AsmOperandClass { 939 let Name = "SoppBrTarget"; 940 let ParserMethod = "parseSOppBrTarget"; 941} 942 943def sopp_brtarget : Operand<OtherVT> { 944 let EncoderMethod = "getSOPPBrEncoding"; 945 let DecoderMethod = "decodeSoppBrTarget"; 946 let OperandType = "OPERAND_PCREL"; 947 let ParserMatchClass = SoppBrTarget; 948} 949 950def si_ga : Operand<iPTR>; 951 952def InterpSlotMatchClass : AsmOperandClass { 953 let Name = "InterpSlot"; 954 let PredicateMethod = "isInterpSlot"; 955 let ParserMethod = "parseInterpSlot"; 956 let RenderMethod = "addImmOperands"; 957} 958 959def InterpSlot : Operand<i32> { 960 let PrintMethod = "printInterpSlot"; 961 let ParserMatchClass = InterpSlotMatchClass; 962 let OperandType = "OPERAND_IMMEDIATE"; 963} 964 965def AttrMatchClass : AsmOperandClass { 966 let Name = "Attr"; 967 let PredicateMethod = "isInterpAttr"; 968 let ParserMethod = "parseInterpAttr"; 969 let RenderMethod = "addImmOperands"; 970} 971 972// It appears to be necessary to create a separate operand for this to 973// be able to parse attr<num> with no space. 974def Attr : Operand<i32> { 975 let PrintMethod = "printInterpAttr"; 976 let ParserMatchClass = AttrMatchClass; 977 let OperandType = "OPERAND_IMMEDIATE"; 978} 979 980def AttrChanMatchClass : AsmOperandClass { 981 let Name = "AttrChan"; 982 let PredicateMethod = "isAttrChan"; 983 let RenderMethod = "addImmOperands"; 984} 985 986def AttrChan : Operand<i32> { 987 let PrintMethod = "printInterpAttrChan"; 988 let ParserMatchClass = AttrChanMatchClass; 989 let OperandType = "OPERAND_IMMEDIATE"; 990} 991 992def SendMsgMatchClass : AsmOperandClass { 993 let Name = "SendMsg"; 994 let PredicateMethod = "isSendMsg"; 995 let ParserMethod = "parseSendMsgOp"; 996 let RenderMethod = "addImmOperands"; 997} 998 999def SwizzleMatchClass : AsmOperandClass { 1000 let Name = "Swizzle"; 1001 let PredicateMethod = "isSwizzle"; 1002 let ParserMethod = "parseSwizzleOp"; 1003 let RenderMethod = "addImmOperands"; 1004 let IsOptional = 1; 1005} 1006 1007def EndpgmMatchClass : AsmOperandClass { 1008 let Name = "EndpgmImm"; 1009 let PredicateMethod = "isEndpgm"; 1010 let ParserMethod = "parseEndpgmOp"; 1011 let RenderMethod = "addImmOperands"; 1012 let IsOptional = 1; 1013} 1014 1015def ExpTgtMatchClass : AsmOperandClass { 1016 let Name = "ExpTgt"; 1017 let PredicateMethod = "isExpTgt"; 1018 let ParserMethod = "parseExpTgt"; 1019 let RenderMethod = "printExpTgt"; 1020} 1021 1022def SWaitMatchClass : AsmOperandClass { 1023 let Name = "SWaitCnt"; 1024 let RenderMethod = "addImmOperands"; 1025 let ParserMethod = "parseSWaitCntOps"; 1026} 1027 1028def DepCtrMatchClass : AsmOperandClass { 1029 let Name = "DepCtr"; 1030 let RenderMethod = "addImmOperands"; 1031 let ParserMethod = "parseDepCtrOps"; 1032} 1033 1034def SDelayMatchClass : AsmOperandClass { 1035 let Name = "SDelayAlu"; 1036 let RenderMethod = "addImmOperands"; 1037 let ParserMethod = "parseSDelayAluOps"; 1038} 1039 1040def VReg32OrOffClass : AsmOperandClass { 1041 let Name = "VReg32OrOff"; 1042 let ParserMethod = "parseVReg32OrOff"; 1043} 1044 1045let OperandType = "OPERAND_IMMEDIATE" in { 1046def SendMsgImm : Operand<i32> { 1047 let PrintMethod = "printSendMsg"; 1048 let ParserMatchClass = SendMsgMatchClass; 1049} 1050 1051def SwizzleImm : Operand<i16> { 1052 let PrintMethod = "printSwizzle"; 1053 let ParserMatchClass = SwizzleMatchClass; 1054} 1055 1056def EndpgmImm : Operand<i16> { 1057 let PrintMethod = "printEndpgm"; 1058 let ParserMatchClass = EndpgmMatchClass; 1059} 1060 1061def WAIT_FLAG : Operand <i32> { 1062 let ParserMatchClass = SWaitMatchClass; 1063 let PrintMethod = "printWaitFlag"; 1064} 1065 1066def DepCtrImm : Operand <i32> { 1067 let ParserMatchClass = DepCtrMatchClass; 1068 let PrintMethod = "printDepCtr"; 1069} 1070 1071def DELAY_FLAG : Operand <i32> { 1072 let ParserMatchClass = SDelayMatchClass; 1073 let PrintMethod = "printDelayFlag"; 1074} 1075} // End OperandType = "OPERAND_IMMEDIATE" 1076 1077include "SIInstrFormats.td" 1078include "VIInstrFormats.td" 1079 1080def BoolReg : AsmOperandClass { 1081 let Name = "BoolReg"; 1082 let ParserMethod = "parseBoolReg"; 1083 let RenderMethod = "addRegOperands"; 1084} 1085 1086class BoolRC : RegisterOperand<SReg_1> { 1087 let ParserMatchClass = BoolReg; 1088 let DecoderMethod = "decodeBoolReg"; 1089} 1090 1091def SSrc_i1 : RegisterOperand<SReg_1_XEXEC> { 1092 let ParserMatchClass = BoolReg; 1093 let DecoderMethod = "decodeBoolReg"; 1094} 1095 1096def VOPDstS64orS32 : BoolRC { 1097 let PrintMethod = "printVOPDst"; 1098} 1099 1100// SCSrc_i1 is the operand for pseudo instructions only. 1101// Boolean immediates shall not be exposed to codegen instructions. 1102def SCSrc_i1 : RegisterOperand<SReg_1_XEXEC> { 1103 let OperandNamespace = "AMDGPU"; 1104 let OperandType = "OPERAND_REG_IMM_INT32"; 1105 let ParserMatchClass = BoolReg; 1106 let DecoderMethod = "decodeBoolReg"; 1107} 1108 1109// ===----------------------------------------------------------------------===// 1110// ExpSrc* Special cases for exp src operands which are printed as 1111// "off" depending on en operand. 1112// ===----------------------------------------------------------------------===// 1113 1114def ExpSrc0 : RegisterOperand<VGPR_32> { 1115 let PrintMethod = "printExpSrc0"; 1116 let ParserMatchClass = VReg32OrOffClass; 1117} 1118 1119def ExpSrc1 : RegisterOperand<VGPR_32> { 1120 let PrintMethod = "printExpSrc1"; 1121 let ParserMatchClass = VReg32OrOffClass; 1122} 1123 1124def ExpSrc2 : RegisterOperand<VGPR_32> { 1125 let PrintMethod = "printExpSrc2"; 1126 let ParserMatchClass = VReg32OrOffClass; 1127} 1128 1129def ExpSrc3 : RegisterOperand<VGPR_32> { 1130 let PrintMethod = "printExpSrc3"; 1131 let ParserMatchClass = VReg32OrOffClass; 1132} 1133 1134class SDWASrc<ValueType vt> : RegisterOperand<VS_32> { 1135 let OperandNamespace = "AMDGPU"; 1136 string Type = !if(isFloatType<vt>.ret, "FP", "INT"); 1137 let OperandType = "OPERAND_REG_INLINE_C_"#Type#vt.Size; 1138 let DecoderMethod = "decodeSDWASrc"#vt.Size; 1139 let EncoderMethod = "getSDWASrcEncoding"; 1140} 1141 1142def SDWASrc_i32 : SDWASrc<i32>; 1143def SDWASrc_i16 : SDWASrc<i16>; 1144def SDWASrc_f32 : SDWASrc<f32>; 1145def SDWASrc_f16 : SDWASrc<f16>; 1146 1147def SDWAVopcDst : BoolRC { 1148 let OperandNamespace = "AMDGPU"; 1149 let OperandType = "OPERAND_SDWA_VOPC_DST"; 1150 let EncoderMethod = "getSDWAVopcDstEncoding"; 1151 let DecoderMethod = "decodeSDWAVopcDst"; 1152 let PrintMethod = "printVOPDst"; 1153} 1154 1155class NamedMatchClass<string CName, bit Optional = 1> : AsmOperandClass { 1156 let Name = "Imm"#CName; 1157 let PredicateMethod = "is"#CName; 1158 let ParserMethod = !if(Optional, "parseOptionalOperand", "parse"#CName); 1159 let RenderMethod = "addImmOperands"; 1160 let IsOptional = Optional; 1161 let DefaultMethod = !if(Optional, "default"#CName, ?); 1162} 1163 1164class NamedOperandBit<string Name, AsmOperandClass MatchClass> : Operand<i1> { 1165 let PrintMethod = "print"#Name; 1166 let ParserMatchClass = MatchClass; 1167} 1168 1169class NamedOperandBit_0<string Name, AsmOperandClass MatchClass> : 1170 OperandWithDefaultOps<i1, (ops (i1 0))> { 1171 let PrintMethod = "print"#Name; 1172 let ParserMatchClass = MatchClass; 1173} 1174 1175class NamedOperandBit_1<string Name, AsmOperandClass MatchClass> : 1176 OperandWithDefaultOps<i1, (ops (i1 1))> { 1177 let PrintMethod = "print"#Name; 1178 let ParserMatchClass = MatchClass; 1179} 1180 1181class NamedOperandU8<string Name, AsmOperandClass MatchClass> : Operand<i8> { 1182 let PrintMethod = "print"#Name; 1183 let ParserMatchClass = MatchClass; 1184} 1185 1186class NamedOperandU16<string Name, AsmOperandClass MatchClass> : Operand<i16> { 1187 let PrintMethod = "print"#Name; 1188 let ParserMatchClass = MatchClass; 1189} 1190 1191class NamedOperandU32<string Name, AsmOperandClass MatchClass> : Operand<i32> { 1192 let PrintMethod = "print"#Name; 1193 let ParserMatchClass = MatchClass; 1194} 1195 1196class NamedOperandU32_0<string Name, AsmOperandClass MatchClass> : 1197 OperandWithDefaultOps<i32, (ops (i32 0))> { 1198 let PrintMethod = "print"#Name; 1199 let ParserMatchClass = MatchClass; 1200} 1201 1202class NamedOperandU32Default0<string Name, AsmOperandClass MatchClass> : 1203 OperandWithDefaultOps<i32, (ops (i32 0))> { 1204 let PrintMethod = "print"#Name; 1205 let ParserMatchClass = MatchClass; 1206} 1207 1208class NamedOperandU32Default1<string Name, AsmOperandClass MatchClass> : 1209 OperandWithDefaultOps<i32, (ops (i32 1))> { 1210 let PrintMethod = "print"#Name; 1211 let ParserMatchClass = MatchClass; 1212} 1213 1214let OperandType = "OPERAND_IMMEDIATE" in { 1215 1216def offen : NamedOperandBit<"Offen", NamedMatchClass<"Offen">>; 1217def idxen : NamedOperandBit<"Idxen", NamedMatchClass<"Idxen">>; 1218def addr64 : NamedOperandBit<"Addr64", NamedMatchClass<"Addr64">>; 1219 1220def flat_offset : NamedOperandU16<"FlatOffset", NamedMatchClass<"FlatOffset">>; 1221def offset : NamedOperandU16<"Offset", NamedMatchClass<"Offset">>; 1222def offset0 : NamedOperandU8<"Offset0", NamedMatchClass<"Offset0">>; 1223def offset1 : NamedOperandU8<"Offset1", NamedMatchClass<"Offset1">>; 1224 1225def gds : NamedOperandBit<"GDS", NamedMatchClass<"GDS">>; 1226 1227def omod : NamedOperandU32<"OModSI", NamedMatchClass<"OModSI">>; 1228def omod0 : NamedOperandU32_0<"OModSI", NamedMatchClass<"OModSI">>; 1229 1230// We need to make the cases with a default of 0 distinct from no 1231// default to help deal with some cases where the operand appears 1232// before a mandatory operand. 1233def clampmod : NamedOperandBit<"ClampSI", NamedMatchClass<"ClampSI">>; 1234def clampmod0 : NamedOperandBit_0<"ClampSI", NamedMatchClass<"ClampSI">>; 1235def highmod : NamedOperandBit<"High", NamedMatchClass<"High">>; 1236 1237def CPol : NamedOperandU32<"CPol", NamedMatchClass<"CPol">>; 1238def CPol_0 : NamedOperandU32Default0<"CPol", NamedMatchClass<"CPol">>; 1239def CPol_GLC1 : NamedOperandU32Default1<"CPol", NamedMatchClass<"CPol">>; 1240 1241def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>; 1242def TFE_0 : NamedOperandBit_0<"TFE", NamedMatchClass<"TFE">>; 1243def SWZ : NamedOperandBit<"SWZ", NamedMatchClass<"SWZ">>; 1244def SWZ_0 : NamedOperandBit_0<"SWZ", NamedMatchClass<"SWZ">>; 1245def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>; 1246def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>; 1247def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>; 1248def GFX10A16 : NamedOperandBit<"GFX10A16", NamedMatchClass<"GFX10A16">>; 1249def D16 : NamedOperandBit<"D16", NamedMatchClass<"D16">>; 1250def LWE : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>; 1251def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>; 1252def exp_vm : NamedOperandBit<"ExpVM", NamedMatchClass<"ExpVM">>; 1253 1254def FORMAT : NamedOperandU8<"FORMAT", NamedMatchClass<"FORMAT", 0>>; 1255 1256def DMask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>; 1257def Dim : NamedOperandU8<"Dim", NamedMatchClass<"Dim", 0>>; 1258 1259def dst_sel : NamedOperandU32<"SDWADstSel", NamedMatchClass<"SDWADstSel">>; 1260def src0_sel : NamedOperandU32<"SDWASrc0Sel", NamedMatchClass<"SDWASrc0Sel">>; 1261def src1_sel : NamedOperandU32<"SDWASrc1Sel", NamedMatchClass<"SDWASrc1Sel">>; 1262def dst_unused : NamedOperandU32<"SDWADstUnused", NamedMatchClass<"SDWADstUnused">>; 1263 1264def op_sel0 : NamedOperandU32Default0<"OpSel", NamedMatchClass<"OpSel">>; 1265def op_sel_hi0 : NamedOperandU32Default0<"OpSelHi", NamedMatchClass<"OpSelHi">>; 1266def neg_lo0 : NamedOperandU32Default0<"NegLo", NamedMatchClass<"NegLo">>; 1267def neg_hi0 : NamedOperandU32Default0<"NegHi", NamedMatchClass<"NegHi">>; 1268 1269def dpp8 : NamedOperandU32<"DPP8", NamedMatchClass<"DPP8", 0>>; 1270def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>; 1271 1272def row_mask : NamedOperandU32<"RowMask", NamedMatchClass<"RowMask">>; 1273def bank_mask : NamedOperandU32<"BankMask", NamedMatchClass<"BankMask">>; 1274def bound_ctrl : NamedOperandBit<"BoundCtrl", NamedMatchClass<"BoundCtrl">>; 1275def FI : NamedOperandU32<"FI", NamedMatchClass<"FI">>; 1276 1277def blgp : NamedOperandU32<"BLGP", NamedMatchClass<"BLGP">>; 1278def cbsz : NamedOperandU32<"CBSZ", NamedMatchClass<"CBSZ">>; 1279def abid : NamedOperandU32<"ABID", NamedMatchClass<"ABID">>; 1280 1281def hwreg : NamedOperandU32<"Hwreg", NamedMatchClass<"Hwreg", 0>>; 1282 1283def exp_tgt : NamedOperandU32<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> { 1284 1285} 1286 1287def wait_vdst : NamedOperandU8<"WaitVDST", NamedMatchClass<"WaitVDST">>; 1288def wait_exp : NamedOperandU8<"WaitEXP", NamedMatchClass<"WaitEXP">>; 1289 1290} // End OperandType = "OPERAND_IMMEDIATE" 1291 1292class KImmMatchClass<int size> : AsmOperandClass { 1293 let Name = "KImmFP"#size; 1294 let PredicateMethod = "isKImmFP"#size; 1295 let ParserMethod = "parseImm"; 1296 let RenderMethod = "addKImmFP"#size#"Operands"; 1297} 1298 1299class kimmOperand<ValueType vt> : Operand<vt> { 1300 let OperandNamespace = "AMDGPU"; 1301 let OperandType = "OPERAND_KIMM"#vt.Size; 1302 let PrintMethod = "printU"#vt.Size#"ImmOperand"; 1303 let ParserMatchClass = !cast<AsmOperandClass>("KImmFP"#vt.Size#"MatchClass"); 1304 let DecoderMethod = "decodeOperand_f"#vt.Size#"kimm"; 1305} 1306 1307// 32-bit VALU immediate operand that uses the constant bus. 1308def KImmFP32MatchClass : KImmMatchClass<32>; 1309def f32kimm : kimmOperand<i32>; 1310 1311// 32-bit VALU immediate operand with a 16-bit value that uses the 1312// constant bus. 1313def KImmFP16MatchClass : KImmMatchClass<16>; 1314def f16kimm : kimmOperand<i16>; 1315 1316class FPInputModsMatchClass <int opSize> : AsmOperandClass { 1317 let Name = "RegOrImmWithFP"#opSize#"InputMods"; 1318 let ParserMethod = "parseRegOrImmWithFPInputMods"; 1319 let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods"; 1320} 1321 1322class FPVCSrcInputModsMatchClass <int opSize> : FPInputModsMatchClass <opSize> { 1323 let Name = "RegOrInlineImmWithFP"#opSize#"InputMods"; 1324 let PredicateMethod = "isRegOrInlineImmWithFP"#opSize#"InputMods"; 1325} 1326 1327def FP16InputModsMatchClass : FPInputModsMatchClass<16>; 1328def FP32InputModsMatchClass : FPInputModsMatchClass<32>; 1329def FP64InputModsMatchClass : FPInputModsMatchClass<64>; 1330 1331def FP16VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<16>; 1332def FP32VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<32>; 1333 1334class InputMods <AsmOperandClass matchClass> : Operand <i32> { 1335 let OperandNamespace = "AMDGPU"; 1336 let OperandType = "OPERAND_INPUT_MODS"; 1337 let ParserMatchClass = matchClass; 1338} 1339 1340class FPInputMods <FPInputModsMatchClass matchClass> : InputMods <matchClass> { 1341 let PrintMethod = "printOperandAndFPInputMods"; 1342} 1343 1344def FP16InputMods : FPInputMods<FP16InputModsMatchClass>; 1345def FP32InputMods : FPInputMods<FP32InputModsMatchClass>; 1346def FP64InputMods : FPInputMods<FP64InputModsMatchClass>; 1347 1348def FP16VCSrcInputMods : FPInputMods<FP16VCSrcInputModsMatchClass>; 1349def FP32VCSrcInputMods : FPInputMods<FP32VCSrcInputModsMatchClass>; 1350 1351class IntInputModsMatchClass <int opSize> : AsmOperandClass { 1352 let Name = "RegOrImmWithInt"#opSize#"InputMods"; 1353 let ParserMethod = "parseRegOrImmWithIntInputMods"; 1354 let PredicateMethod = "isRegOrImmWithInt"#opSize#"InputMods"; 1355} 1356class IntVCSrcInputModsMatchClass <int opSize> : IntInputModsMatchClass <opSize> { 1357 let Name = "RegOrInlineImmWithInt"#opSize#"InputMods"; 1358 let PredicateMethod = "isRegOrInlineImmWithInt"#opSize#"InputMods"; 1359} 1360def Int32InputModsMatchClass : IntInputModsMatchClass<32>; 1361def Int64InputModsMatchClass : IntInputModsMatchClass<64>; 1362def Int32VCSrcInputModsMatchClass : IntVCSrcInputModsMatchClass<32>; 1363 1364class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass> { 1365 let PrintMethod = "printOperandAndIntInputMods"; 1366} 1367def Int32InputMods : IntInputMods<Int32InputModsMatchClass>; 1368def Int64InputMods : IntInputMods<Int64InputModsMatchClass>; 1369def Int32VCSrcInputMods : IntInputMods<Int32VCSrcInputModsMatchClass>; 1370 1371class OpSelModsMatchClass : AsmOperandClass { 1372 let Name = "OpSelMods"; 1373 let ParserMethod = "parseRegOrImm"; 1374 let PredicateMethod = "isRegOrImm"; 1375} 1376 1377def IntOpSelModsMatchClass : OpSelModsMatchClass; 1378def IntOpSelMods : InputMods<IntOpSelModsMatchClass>; 1379 1380class FPSDWAInputModsMatchClass <int opSize> : AsmOperandClass { 1381 let Name = "SDWAWithFP"#opSize#"InputMods"; 1382 let ParserMethod = "parseRegOrImmWithFPInputMods"; 1383 let PredicateMethod = "isSDWAFP"#opSize#"Operand"; 1384} 1385 1386def FP16SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<16>; 1387def FP32SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<32>; 1388 1389class FPSDWAInputMods <FPSDWAInputModsMatchClass matchClass> : 1390 InputMods <matchClass> { 1391 let PrintMethod = "printOperandAndFPInputMods"; 1392} 1393 1394def FP16SDWAInputMods : FPSDWAInputMods<FP16SDWAInputModsMatchClass>; 1395def FP32SDWAInputMods : FPSDWAInputMods<FP32SDWAInputModsMatchClass>; 1396 1397def FPVRegInputModsMatchClass : AsmOperandClass { 1398 let Name = "VRegWithFPInputMods"; 1399 let ParserMethod = "parseRegWithFPInputMods"; 1400 let PredicateMethod = "isVRegWithInputMods"; 1401} 1402 1403def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> { 1404 let PrintMethod = "printOperandAndFPInputMods"; 1405} 1406 1407class IntSDWAInputModsMatchClass <int opSize> : AsmOperandClass { 1408 let Name = "SDWAWithInt"#opSize#"InputMods"; 1409 let ParserMethod = "parseRegOrImmWithIntInputMods"; 1410 let PredicateMethod = "isSDWAInt"#opSize#"Operand"; 1411} 1412 1413def Int16SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<16>; 1414def Int32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32>; 1415def Bin32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32> { 1416 let Name = "SDWAWithBin32InputMods"; 1417 let ParserMethod = "parseRegOrImm"; 1418} 1419 1420class IntSDWAInputMods <IntSDWAInputModsMatchClass matchClass> : 1421 InputMods <matchClass> { 1422 let PrintMethod = "printOperandAndIntInputMods"; 1423} 1424 1425def Int16SDWAInputMods : IntSDWAInputMods<Int16SDWAInputModsMatchClass>; 1426def Int32SDWAInputMods : IntSDWAInputMods<Int32SDWAInputModsMatchClass>; 1427def Bin32SDWAInputMods : IntSDWAInputMods<Bin32SDWAInputModsMatchClass>; 1428 1429def IntVRegInputModsMatchClass : AsmOperandClass { 1430 let Name = "VRegWithIntInputMods"; 1431 let ParserMethod = "parseRegWithIntInputMods"; 1432 let PredicateMethod = "isVRegWithInputMods"; 1433} 1434 1435def IntVRegInputMods : InputMods <IntVRegInputModsMatchClass> { 1436 let PrintMethod = "printOperandAndIntInputMods"; 1437} 1438 1439class PackedFPInputModsMatchClass <int opSize> : AsmOperandClass { 1440 let Name = "PackedFP"#opSize#"InputMods"; 1441 let ParserMethod = "parseRegOrImm"; 1442 let PredicateMethod = "isRegOrImm"; 1443// let PredicateMethod = "isPackedFP"#opSize#"InputMods"; 1444} 1445 1446class PackedIntInputModsMatchClass <int opSize> : AsmOperandClass { 1447 let Name = "PackedInt"#opSize#"InputMods"; 1448 let ParserMethod = "parseRegOrImm"; 1449 let PredicateMethod = "isRegOrImm"; 1450// let PredicateMethod = "isPackedInt"#opSize#"InputMods"; 1451} 1452 1453def PackedF16InputModsMatchClass : PackedFPInputModsMatchClass<16>; 1454def PackedI16InputModsMatchClass : PackedIntInputModsMatchClass<16>; 1455 1456class PackedFPInputMods <PackedFPInputModsMatchClass matchClass> : InputMods <matchClass> { 1457// let PrintMethod = "printPackedFPInputMods"; 1458} 1459 1460class PackedIntInputMods <PackedIntInputModsMatchClass matchClass> : InputMods <matchClass> { 1461 //let PrintMethod = "printPackedIntInputMods"; 1462} 1463 1464def PackedF16InputMods : PackedFPInputMods<PackedF16InputModsMatchClass>; 1465def PackedI16InputMods : PackedIntInputMods<PackedI16InputModsMatchClass>; 1466 1467//===----------------------------------------------------------------------===// 1468// Complex patterns 1469//===----------------------------------------------------------------------===// 1470 1471def DS1Addr1Offset : ComplexPattern<iPTR, 2, "SelectDS1Addr1Offset">; 1472def DS64Bit4ByteAligned : ComplexPattern<iPTR, 3, "SelectDS64Bit4ByteAligned">; 1473def DS128Bit8ByteAligned : ComplexPattern<iPTR, 3, "SelectDS128Bit8ByteAligned">; 1474 1475def MOVRELOffset : ComplexPattern<iPTR, 2, "SelectMOVRELOffset">; 1476 1477def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">; 1478def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">; 1479def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">; 1480// VOP3Mods, but the input source is known to never be NaN. 1481def VOP3Mods_nnan : ComplexPattern<fAny, 2, "SelectVOP3Mods_NNaN">; 1482 1483def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">; 1484 1485def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">; 1486 1487def VOP3PModsDOT : ComplexPattern<untyped, 2, "SelectVOP3PModsDOT">; 1488def DotIUVOP3PMods : ComplexPattern<untyped, 1, "SelectDotIUVOP3PMods">; 1489def WMMAOpSelVOP3PMods : ComplexPattern<untyped, 1, "SelectWMMAOpSelVOP3PMods">; 1490 1491def VOP3OpSel : ComplexPattern<untyped, 2, "SelectVOP3OpSel">; 1492 1493def VOP3OpSelMods : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">; 1494 1495def VOP3PMadMixMods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixMods">; 1496 1497def VINTERPMods : ComplexPattern<untyped, 2, "SelectVINTERPMods">; 1498def VINTERPModsHi : ComplexPattern<untyped, 2, "SelectVINTERPModsHi">; 1499 1500//===----------------------------------------------------------------------===// 1501// SI assembler operands 1502//===----------------------------------------------------------------------===// 1503 1504def SIOperand { 1505 int ZERO = 0x80; 1506 int VCC = 0x6A; 1507 int FLAT_SCR = 0x68; 1508} 1509 1510// This should be kept in sync with SISrcMods enum 1511def SRCMODS { 1512 int NONE = 0; 1513 int NEG = 1; 1514 int ABS = 2; 1515 int NEG_ABS = 3; 1516 1517 int NEG_HI = ABS; 1518 int OP_SEL_0 = 4; 1519 int OP_SEL_1 = 8; 1520 int DST_OP_SEL = 8; 1521} 1522 1523def DSTCLAMP { 1524 int NONE = 0; 1525 int ENABLE = 1; 1526} 1527 1528def DSTOMOD { 1529 int NONE = 0; 1530} 1531 1532def HWREG { 1533 int MODE = 1; 1534 int STATUS = 2; 1535 int TRAPSTS = 3; 1536 int HW_ID = 4; 1537 int GPR_ALLOC = 5; 1538 int LDS_ALLOC = 6; 1539 int IB_STS = 7; 1540 int MEM_BASES = 15; 1541 int TBA_LO = 16; 1542 int TBA_HI = 17; 1543 int TMA_LO = 18; 1544 int TMA_HI = 19; 1545 int FLAT_SCR_LO = 20; 1546 int FLAT_SCR_HI = 21; 1547 int XNACK_MASK = 22; 1548 int POPS_PACKER = 25; 1549 int SHADER_CYCLES = 29; 1550} 1551 1552class getHwRegImm<int Reg, int Offset = 0, int Size = 32> { 1553 int ret = !and(!or(Reg, 1554 !shl(Offset, 6), 1555 !shl(!add(Size, -1), 11)), 65535); 1556} 1557 1558//===----------------------------------------------------------------------===// 1559// 1560// SI Instruction multiclass helpers. 1561// 1562// Instructions with _32 take 32-bit operands. 1563// Instructions with _64 take 64-bit operands. 1564// 1565// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit 1566// encoding is the standard encoding, but instruction that make use of 1567// any of the instruction modifiers must use the 64-bit encoding. 1568// 1569// Instructions with _e32 use the 32-bit encoding. 1570// Instructions with _e64 use the 64-bit encoding. 1571// 1572//===----------------------------------------------------------------------===// 1573 1574class SIMCInstr <string pseudo, int subtarget> { 1575 string PseudoInstr = pseudo; 1576 int Subtarget = subtarget; 1577} 1578 1579//===----------------------------------------------------------------------===// 1580// Vector ALU classes 1581//===----------------------------------------------------------------------===// 1582 1583class getNumSrcArgs<ValueType Src0, ValueType Src1, ValueType Src2> { 1584 int ret = 1585 !if (!eq(Src0.Value, untyped.Value), 0, 1586 !if (!eq(Src1.Value, untyped.Value), 1, // VOP1 1587 !if (!eq(Src2.Value, untyped.Value), 2, // VOP2 1588 3))); // VOP3 1589} 1590 1591// Returns the register class to use for the destination of VOP[123C] 1592// instructions for the given VT. 1593class getVALUDstForVT<ValueType VT> { 1594 RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>, 1595 !if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>, 1596 !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>, 1597 !if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32>, 1598 VOPDstS64orS32)))); // else VT == i1 1599} 1600 1601// Returns the register class to use for the destination of VOP[12C] 1602// instructions with SDWA extension 1603class getSDWADstForVT<ValueType VT> { 1604 RegisterOperand ret = !if(!eq(VT.Size, 1), 1605 SDWAVopcDst, // VOPC 1606 VOPDstOperand<VGPR_32>); // VOP1/2 32-bit dst 1607} 1608 1609// Returns the register class to use for source 0 of VOP[12C] 1610// instructions for the given VT. 1611class getVOPSrc0ForVT<ValueType VT> { 1612 bit isFP = isFloatType<VT>.ret; 1613 1614 RegisterOperand ret = 1615 !if(isFP, 1616 !if(!eq(VT.Size, 64), 1617 VSrc_f64, 1618 !if(!eq(VT.Value, f16.Value), 1619 VSrc_f16, 1620 !if(!eq(VT.Value, v2f16.Value), 1621 VSrc_v2f16, 1622 !if(!eq(VT.Value, v4f16.Value), 1623 AVSrc_64, 1624 VSrc_f32 1625 ) 1626 ) 1627 ) 1628 ), 1629 !if(!eq(VT.Size, 64), 1630 VSrc_b64, 1631 !if(!eq(VT.Value, i16.Value), 1632 VSrc_b16, 1633 !if(!eq(VT.Value, v2i16.Value), 1634 VSrc_v2b16, 1635 VSrc_b32 1636 ) 1637 ) 1638 ) 1639 ); 1640} 1641 1642class getSOPSrcForVT<ValueType VT> { 1643 RegisterOperand ret = !if(!eq(VT.Size, 64), SSrc_b64, SSrc_b32); 1644} 1645 1646// Returns the vreg register class to use for source operand given VT 1647class getVregSrcForVT<ValueType VT> { 1648 RegisterClass ret = !if(!eq(VT.Size, 128), VReg_128, 1649 !if(!eq(VT.Size, 96), VReg_96, 1650 !if(!eq(VT.Size, 64), VReg_64, 1651 !if(!eq(VT.Size, 48), VReg_64, 1652 VGPR_32)))); 1653} 1654 1655class getSDWASrcForVT <ValueType VT> { 1656 bit isFP = isFloatType<VT>.ret; 1657 RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32); 1658 RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32); 1659 RegisterOperand ret = !if(isFP, retFlt, retInt); 1660} 1661 1662// Returns the register class to use for sources of VOP3 instructions for the 1663// given VT. 1664class getVOP3SrcForVT<ValueType VT> { 1665 bit isFP = isFloatType<VT>.ret; 1666 RegisterOperand ret = 1667 !if(!eq(VT.Size, 128), 1668 VSrc_128, 1669 !if(!eq(VT.Size, 64), 1670 !if(isFP, 1671 !if(!eq(VT.Value, v2f32.Value), 1672 VSrc_v2f32, 1673 VSrc_f64), 1674 !if(!eq(VT.Value, v2i32.Value), 1675 VSrc_v2b32, 1676 VSrc_b64)), 1677 !if(!eq(VT.Value, i1.Value), 1678 SSrc_i1, 1679 !if(isFP, 1680 !if(!eq(VT.Value, f16.Value), 1681 VSrc_f16, 1682 !if(!eq(VT.Value, v2f16.Value), 1683 VSrc_v2f16, 1684 !if(!eq(VT.Value, v4f16.Value), 1685 AVSrc_64, 1686 VSrc_f32 1687 ) 1688 ) 1689 ), 1690 !if(!eq(VT.Value, i16.Value), 1691 VSrc_b16, 1692 !if(!eq(VT.Value, v2i16.Value), 1693 VSrc_v2b16, 1694 VSrc_b32 1695 ) 1696 ) 1697 ) 1698 ) 1699 ) 1700 ); 1701} 1702 1703// Src2 of VOP3 DPP instructions cannot be a literal 1704class getVOP3DPPSrcForVT<ValueType VT> { 1705 bit isFP = isFloatType<VT>.ret; 1706 RegisterOperand ret = 1707 !if (!eq(VT.Value, i1.Value), SSrc_i1, 1708 !if (isFP, 1709 !if (!eq(VT.Value, f16.Value), VCSrc_f16, 1710 !if (!eq(VT.Value, v2f16.Value), VCSrc_v2f16, VCSrc_f32)), 1711 !if (!eq(VT.Value, i16.Value), VCSrc_b16, 1712 !if (!eq(VT.Value, v2i16.Value), VCSrc_v2b16, 1713 VCSrc_b32)))); 1714} 1715 1716// Float or packed int 1717class isModifierType<ValueType SrcVT> { 1718 bit ret = !or(!eq(SrcVT.Value, f16.Value), 1719 !eq(SrcVT.Value, f32.Value), 1720 !eq(SrcVT.Value, f64.Value), 1721 !eq(SrcVT.Value, v2f16.Value), 1722 !eq(SrcVT.Value, v2i16.Value), 1723 !eq(SrcVT.Value, v2f32.Value), 1724 !eq(SrcVT.Value, v2i32.Value), 1725 !eq(SrcVT.Value, v4f16.Value), 1726 !eq(SrcVT.Value, v4i16.Value), 1727 !eq(SrcVT.Value, v4f32.Value), 1728 !eq(SrcVT.Value, v4i32.Value), 1729 !eq(SrcVT.Value, v8f16.Value), 1730 !eq(SrcVT.Value, v8i16.Value), 1731 !eq(SrcVT.Value, v8f32.Value), 1732 !eq(SrcVT.Value, v8i32.Value), 1733 !eq(SrcVT.Value, v16f16.Value), 1734 !eq(SrcVT.Value, v16i16.Value)); 1735} 1736 1737// Return type of input modifiers operand for specified input operand 1738class getSrcMod <ValueType VT, bit EnableF32SrcMods> { 1739 bit isFP = isFloatType<VT>.ret; 1740 bit isPacked = isPackedType<VT>.ret; 1741 Operand ret = !if(!eq(VT.Size, 64), 1742 !if(isFP, FP64InputMods, Int64InputMods), 1743 !if(isFP, 1744 !if(!eq(VT.Value, f16.Value), 1745 FP16InputMods, 1746 FP32InputMods 1747 ), 1748 !if(EnableF32SrcMods, FP32InputMods, Int32InputMods)) 1749 ); 1750} 1751 1752class getOpSelMod <ValueType VT> { 1753 Operand ret = !if(!eq(VT.Value, f16.Value), FP16InputMods, IntOpSelMods); 1754} 1755 1756// Return type of input modifiers operand specified input operand for DPP 1757class getSrcModDPP <ValueType VT> { 1758 bit isFP = isFloatType<VT>.ret; 1759 Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods); 1760} 1761 1762// Return type of input modifiers operand for specified input operand for DPP 1763class getSrcModVOP3DPP <ValueType VT, bit EnableF32SrcMods> { 1764 bit isFP = isFloatType<VT>.ret; 1765 bit isPacked = isPackedType<VT>.ret; 1766 Operand ret = 1767 !if (isFP, 1768 !if (!eq(VT.Value, f16.Value), FP16VCSrcInputMods, 1769 FP32VCSrcInputMods), 1770 !if (EnableF32SrcMods, FP32VCSrcInputMods, Int32VCSrcInputMods)); 1771} 1772 1773// Return type of input modifiers operand specified input operand for SDWA 1774class getSrcModSDWA <ValueType VT> { 1775 Operand ret = !if(!eq(VT.Value, f16.Value), FP16SDWAInputMods, 1776 !if(!eq(VT.Value, f32.Value), FP32SDWAInputMods, 1777 !if(!eq(VT.Value, i16.Value), Int16SDWAInputMods, 1778 Int32SDWAInputMods))); 1779} 1780 1781// Returns the input arguments for VOP[12C] instructions for the given SrcVT. 1782class getIns32 <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs> { 1783 dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0), // VOP1 1784 !if(!eq(NumSrcArgs, 2), (ins Src0RC:$src0, Src1RC:$src1), // VOP2 1785 (ins))); 1786} 1787 1788// Returns the input arguments for VOP3 instructions for the given SrcVT. 1789class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC, 1790 RegisterOperand Src2RC, int NumSrcArgs, 1791 bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod, 1792 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { 1793 1794 dag ret = 1795 !if (!eq(NumSrcArgs, 0), 1796 // VOP1 without input operands (V_NOP, V_CLREXCP) 1797 (ins), 1798 /* else */ 1799 !if (!eq(NumSrcArgs, 1), 1800 !if (HasModifiers, 1801 // VOP1 with modifiers 1802 !if(HasOMod, 1803 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1804 clampmod0:$clamp, omod0:$omod), 1805 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1806 clampmod0:$clamp)) 1807 /* else */, 1808 // VOP1 without modifiers 1809 !if (HasClamp, 1810 (ins Src0RC:$src0, clampmod0:$clamp), 1811 (ins Src0RC:$src0)) 1812 /* endif */ ), 1813 !if (!eq(NumSrcArgs, 2), 1814 !if (HasModifiers, 1815 // VOP 2 with modifiers 1816 !if(HasOMod, 1817 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1818 Src1Mod:$src1_modifiers, Src1RC:$src1, 1819 clampmod0:$clamp, omod0:$omod), 1820 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1821 Src1Mod:$src1_modifiers, Src1RC:$src1, 1822 clampmod0:$clamp)) 1823 /* else */, 1824 // VOP2 without modifiers 1825 !if (HasClamp, 1826 (ins Src0RC:$src0, Src1RC:$src1, clampmod0:$clamp), 1827 (ins Src0RC:$src0, Src1RC:$src1)) 1828 1829 /* endif */ ) 1830 /* NumSrcArgs == 3 */, 1831 !if (HasModifiers, 1832 !if (HasSrc2Mods, 1833 // VOP3 with modifiers 1834 !if (HasOMod, 1835 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1836 Src1Mod:$src1_modifiers, Src1RC:$src1, 1837 Src2Mod:$src2_modifiers, Src2RC:$src2, 1838 clampmod0:$clamp, omod0:$omod), 1839 !if (HasClamp, 1840 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1841 Src1Mod:$src1_modifiers, Src1RC:$src1, 1842 Src2Mod:$src2_modifiers, Src2RC:$src2, 1843 clampmod0:$clamp), 1844 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1845 Src1Mod:$src1_modifiers, Src1RC:$src1, 1846 Src2Mod:$src2_modifiers, Src2RC:$src2))), 1847 // VOP3 with modifiers except src2 1848 !if (HasOMod, 1849 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1850 Src1Mod:$src1_modifiers, Src1RC:$src1, 1851 Src2RC:$src2, clampmod0:$clamp, omod0:$omod), 1852 !if (HasClamp, 1853 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1854 Src1Mod:$src1_modifiers, Src1RC:$src1, 1855 Src2RC:$src2, clampmod0:$clamp), 1856 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1857 Src1Mod:$src1_modifiers, Src1RC:$src1, 1858 Src2RC:$src2)))) 1859 /* else */, 1860 // VOP3 without modifiers 1861 !if (HasClamp, 1862 (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2, clampmod0:$clamp), 1863 (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2)) 1864 /* endif */ )))); 1865} 1866 1867class getInsVOP3Base<RegisterOperand Src0RC, RegisterOperand Src1RC, 1868 RegisterOperand Src2RC, int NumSrcArgs, 1869 bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod, 1870 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel, 1871 bit IsVOP3P> { 1872 // getInst64 handles clamp and omod. implicit mutex between vop3p and omod 1873 dag base = getIns64 <Src0RC, Src1RC, Src2RC, NumSrcArgs, 1874 HasClamp, HasModifiers, HasSrc2Mods, HasOMod, 1875 Src0Mod, Src1Mod, Src2Mod>.ret; 1876 dag opsel = (ins op_sel0:$op_sel); 1877 dag vop3pOpsel = (ins op_sel_hi0:$op_sel_hi); 1878 dag vop3pFields = !con(!if(HasOpSel, vop3pOpsel, (ins)), (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi)); 1879 1880 dag ret = !con(base, 1881 !if(HasOpSel, opsel,(ins)), 1882 !if(IsVOP3P, vop3pFields,(ins))); 1883} 1884 1885class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC, 1886 RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, bit HasOpSel, 1887 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { 1888 dag ret = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs, 1889 HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, 1890 0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod, 1891 HasOpSel, 1/*IsVOP3P*/>.ret; 1892} 1893 1894class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC, 1895 RegisterOperand Src2RC, int NumSrcArgs, 1896 bit HasClamp, bit HasOMod, 1897 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { 1898 dag ret = getInsVOP3Base<Src0RC, Src1RC, 1899 Src2RC, NumSrcArgs, 1900 HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, HasOMod, 1901 Src0Mod, Src1Mod, Src2Mod, 1/*HasOpSel*/, 0>.ret; 1902} 1903 1904class getInsDPPBase <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, 1905 RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, 1906 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld> { 1907 1908 dag ret = !if(!eq(NumSrcArgs, 0), 1909 // VOP1 without input operands (V_NOP) 1910 (ins ), 1911 !con( 1912 !if(HasOld ,(ins OldRC:$old), (ins)), 1913 !if (!eq(NumSrcArgs, 1), 1914 !if (HasModifiers, 1915 // VOP1_DPP with modifiers 1916 (ins Src0Mod:$src0_modifiers, Src0RC:$src0) 1917 /* else */, 1918 // VOP1_DPP without modifiers 1919 (ins Src0RC:$src0) 1920 /* endif */), 1921 !if (!eq(NumSrcArgs, 2), 1922 !if (HasModifiers, 1923 // VOP2_DPP with modifiers 1924 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1925 Src1Mod:$src1_modifiers, Src1RC:$src1) 1926 /* else */, 1927 // VOP2_DPP without modifiers 1928 (ins Src0RC:$src0, Src1RC:$src1) 1929 ) 1930 /* NumSrcArgs == 3, VOP3 */, 1931 !if (HasModifiers, 1932 // VOP3_DPP with modifiers 1933 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1934 Src1Mod:$src1_modifiers, Src1RC:$src1, 1935 Src2Mod:$src2_modifiers, Src2RC:$src2) 1936 /* else */, 1937 // VOP3_DPP without modifiers 1938 (ins Src0RC:$src0, Src1RC:$src1, 1939 Src2RC:$src2) 1940 ) 1941 ) 1942 ) 1943 ) 1944 ); 1945} 1946 1947class getInsDPP <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, 1948 RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, 1949 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> { 1950 dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs, 1951 HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret, 1952 (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 1953 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)); 1954} 1955 1956class getInsDPP16 <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, 1957 RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, 1958 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> { 1959 dag ret = !con(getInsDPP<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs, 1960 HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret, 1961 (ins FI:$fi)); 1962} 1963 1964class getInsDPP8 <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, 1965 RegisterClass Src2RC, int NumSrcArgs, bit HasModifiers, 1966 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> { 1967 dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs, 1968 HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret, 1969 (ins dpp8:$dpp8, FI:$fi)); 1970} 1971 1972class getInsVOP3DPPBase<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld> { 1973 dag old = ( ins OldRC:$old ); 1974 dag base = VOP3Base; 1975 dag ret = !con( 1976 !if(!and(HasOld,!ne(NumSrcArgs, 0)), old, (ins)), 1977 base 1978 ); 1979} 1980 1981class getInsVOP3DPP<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> { 1982 dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret, 1983 (ins dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 1984 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)); 1985} 1986 1987class getInsVOP3DPP16<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> { 1988 dag ret = !con(getInsVOP3DPP<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret, 1989 (ins FI:$fi)); 1990} 1991 1992class getInsVOP3DPP8<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> { 1993 dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret, 1994 (ins dpp8:$dpp8, FI:$fi)); 1995} 1996 1997// Ins for SDWA 1998class getInsSDWA <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs, 1999 bit HasSDWAOMod, Operand Src0Mod, Operand Src1Mod, 2000 ValueType DstVT> { 2001 2002 dag ret = !if(!eq(NumSrcArgs, 0), 2003 // VOP1 without input operands (V_NOP) 2004 (ins), 2005 !if(!eq(NumSrcArgs, 1), 2006 // VOP1 2007 !if(!not(HasSDWAOMod), 2008 // VOP1_SDWA without omod 2009 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 2010 clampmod:$clamp, 2011 dst_sel:$dst_sel, dst_unused:$dst_unused, 2012 src0_sel:$src0_sel), 2013 // VOP1_SDWA with omod 2014 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 2015 clampmod:$clamp, omod:$omod, 2016 dst_sel:$dst_sel, dst_unused:$dst_unused, 2017 src0_sel:$src0_sel)), 2018 !if(!eq(NumSrcArgs, 2), 2019 !if(!eq(DstVT.Size, 1), 2020 // VOPC_SDWA 2021 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 2022 Src1Mod:$src1_modifiers, Src1RC:$src1, 2023 clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel), 2024 // VOP2_SDWA 2025 !if(!not(HasSDWAOMod), 2026 // VOP2_SDWA without omod 2027 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 2028 Src1Mod:$src1_modifiers, Src1RC:$src1, 2029 clampmod:$clamp, 2030 dst_sel:$dst_sel, dst_unused:$dst_unused, 2031 src0_sel:$src0_sel, src1_sel:$src1_sel), 2032 // VOP2_SDWA with omod 2033 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 2034 Src1Mod:$src1_modifiers, Src1RC:$src1, 2035 clampmod:$clamp, omod:$omod, 2036 dst_sel:$dst_sel, dst_unused:$dst_unused, 2037 src0_sel:$src0_sel, src1_sel:$src1_sel))), 2038 (ins)/* endif */))); 2039} 2040 2041// Outs for DPP 2042class getOutsDPP <bit HasDst, ValueType DstVT, RegisterOperand DstRCDPP> { 2043 dag ret = !if(HasDst, 2044 !if(!eq(DstVT.Size, 1), 2045 (outs), // no dst for VOPC, we use "vcc"-token as dst in SDWA VOPC instructions 2046 (outs DstRCDPP:$vdst)), 2047 (outs)); // V_NOP 2048} 2049 2050// Outs for SDWA 2051class getOutsSDWA <bit HasDst, ValueType DstVT, RegisterOperand DstRCSDWA> { 2052 dag ret = !if(HasDst, 2053 !if(!eq(DstVT.Size, 1), 2054 (outs DstRCSDWA:$sdst), 2055 (outs DstRCSDWA:$vdst)), 2056 (outs)); // V_NOP 2057} 2058 2059// Returns the assembly string for the inputs and outputs of a VOP[12C] 2060// instruction. This does not add the _e32 suffix, so it can be reused 2061// by getAsm64. 2062class getAsm32 <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> { 2063 string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC 2064 string src0 = ", $src0"; 2065 string src1 = ", $src1"; 2066 string src2 = ", $src2"; 2067 string ret = !if(HasDst, dst, "") # 2068 !if(!eq(NumSrcArgs, 1), src0, "") # 2069 !if(!eq(NumSrcArgs, 2), src0#src1, "") # 2070 !if(!eq(NumSrcArgs, 3), src0#src1#src2, ""); 2071} 2072 2073class getAsmVOPDPart <int NumSrcArgs, string XorY> { 2074 string dst = "$vdst" # XorY; 2075 string src0 = ", $src0" # XorY; 2076 string src1 = ", $vsrc1" # XorY; 2077 string ret = dst # 2078 !if(!ge(NumSrcArgs, 1), src0, "") # 2079 !if(!ge(NumSrcArgs, 2), src1, ""); 2080} 2081 2082// Returns the assembly string for the inputs and outputs of a VOP3 2083// instruction. 2084class getAsm64 <bit HasDst, int NumSrcArgs, bit HasIntClamp, bit HasModifiers, 2085 bit HasOMod, ValueType DstVT = i32> { 2086 string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC 2087 string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); 2088 string src1 = !if(!eq(NumSrcArgs, 1), "", 2089 !if(!eq(NumSrcArgs, 2), " $src1_modifiers", 2090 " $src1_modifiers,")); 2091 string src2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", ""); 2092 string iclamp = !if(HasIntClamp, "$clamp", ""); 2093 string ret = 2094 !if(!not(HasModifiers), 2095 getAsm32<HasDst, NumSrcArgs, DstVT>.ret # iclamp, 2096 dst#", "#src0#src1#src2#"$clamp"#!if(HasOMod, "$omod", "")); 2097} 2098 2099// Returns the assembly string for the inputs and outputs of a VOP3P 2100// instruction. 2101class getAsmVOP3P <int NumSrcArgs, bit HasModifiers, 2102 bit HasClamp, bit HasOpSel> { 2103 string dst = "$vdst"; 2104 string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); 2105 string src1 = !if(!eq(NumSrcArgs, 1), "", 2106 !if(!eq(NumSrcArgs, 2), " $src1", 2107 " $src1,")); 2108 string src2 = !if(!eq(NumSrcArgs, 3), " $src2", ""); 2109 2110 string mods = !if(HasModifiers, "$neg_lo$neg_hi", ""); 2111 string clamp = !if(HasClamp, "$clamp", ""); 2112 string opsel = !if(HasOpSel, "$op_sel$op_sel_hi", ""); 2113 2114 // Each modifier is printed as an array of bits for each operand, so 2115 // all operands are printed as part of src0_modifiers. 2116 string ret = dst#", "#src0#src1#src2#opsel#mods#clamp; 2117} 2118 2119class getAsmVOP3OpSel <int NumSrcArgs, 2120 bit HasClamp, 2121 bit Src0HasMods, 2122 bit Src1HasMods, 2123 bit Src2HasMods> { 2124 string dst = "$vdst"; 2125 2126 string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); 2127 string isrc1 = !if(!eq(NumSrcArgs, 1), "", 2128 !if(!eq(NumSrcArgs, 2), " $src1", 2129 " $src1,")); 2130 string isrc2 = !if(!eq(NumSrcArgs, 3), " $src2", ""); 2131 2132 string fsrc0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); 2133 string fsrc1 = !if(!eq(NumSrcArgs, 1), "", 2134 !if(!eq(NumSrcArgs, 2), " $src1_modifiers", 2135 " $src1_modifiers,")); 2136 string fsrc2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", ""); 2137 2138 string src0 = !if(Src0HasMods, fsrc0, isrc0); 2139 string src1 = !if(Src1HasMods, fsrc1, isrc1); 2140 string src2 = !if(Src2HasMods, fsrc2, isrc2); 2141 2142 string clamp = !if(HasClamp, "$clamp", ""); 2143 string omod = ""; 2144 string ret = dst#", "#src0#src1#src2#"$op_sel"#clamp#omod; 2145} 2146 2147class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> { 2148 string dst = !if(HasDst, 2149 !if(!eq(DstVT.Size, 1), 2150 "$sdst", 2151 "$vdst"), 2152 ""); // use $sdst for VOPC 2153 string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); 2154 string src1 = !if(!eq(NumSrcArgs, 1), "", 2155 !if(!eq(NumSrcArgs, 2), " $src1_modifiers", 2156 " $src1_modifiers,")); 2157 string args = !if(!not(HasModifiers), 2158 getAsm32<0, NumSrcArgs, DstVT>.ret, 2159 ", "#src0#src1); 2160 string ret = dst#args#" $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 2161} 2162 2163class getAsmDPP16 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> { 2164 string ret = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret#"$fi"; 2165} 2166 2167class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> 2168 : getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>{ 2169 let ret = dst#args#" $dpp8$fi"; 2170} 2171 2172class getAsmVOP3DPPBase <int NumSrcArgs, bit HasDst, bit HasClamp, 2173 bit HasOpSel, bit HasOMod, bit IsVOP3P, 2174 bit HasModifiers, bit Src0HasMods, 2175 bit Src1HasMods, bit Src2HasMods, ValueType DstVT = i32> { 2176 string dst = !if(HasDst, 2177 !if(!eq(DstVT.Size, 1), 2178 "$sdst", 2179 "$vdst"), 2180 ""); // use $sdst for VOPC 2181 string src0nomods = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); 2182 string src1nomods = !if(!eq(NumSrcArgs, 1), "", 2183 !if(!eq(NumSrcArgs, 2), " $src1", 2184 " $src1,")); 2185 string src2nomods = !if(!eq(NumSrcArgs, 3), " $src2", ""); 2186 2187 string src0mods = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); 2188 string src1mods = !if(!eq(NumSrcArgs, 1), "", 2189 !if(!eq(NumSrcArgs, 2), " $src1_modifiers", 2190 " $src1_modifiers,")); 2191 string src2mods = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", ""); 2192 2193 string src0 = !if(Src0HasMods, src0mods, src0nomods); 2194 string src1 = !if(Src1HasMods, src1mods, src1nomods); 2195 string src2 = !if(Src2HasMods, src2mods, src2nomods); 2196 string opsel = !if(HasOpSel, "$op_sel", ""); 2197 string 3PMods = !if(IsVOP3P, 2198 !if(HasOpSel, "$op_sel_hi", "") 2199 #!if(HasModifiers, "$neg_lo$neg_hi", ""), 2200 ""); 2201 string clamp = !if(HasClamp, "$clamp", ""); 2202 string omod = !if(HasOMod, "$omod", ""); 2203 2204 string ret = dst#", "#src0#src1#src2#opsel#3PMods#clamp#omod; 2205 2206} 2207 2208class getAsmVOP3DPP<string base> { 2209 string ret = base # " $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 2210} 2211 2212class getAsmVOP3DPP16<string base> { 2213 string ret = getAsmVOP3DPP<base>.ret # "$fi"; 2214} 2215 2216class getAsmVOP3DPP8<string base> { 2217 string ret = base # " $dpp8$fi"; 2218} 2219 2220 2221class getAsmSDWA <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> { 2222 string dst = !if(HasDst, 2223 !if(!eq(DstVT.Size, 1), 2224 " vcc", // use vcc token as dst for VOPC instructions 2225 "$vdst"), 2226 ""); 2227 string src0 = "$src0_modifiers"; 2228 string src1 = "$src1_modifiers"; 2229 string args = !if(!eq(NumSrcArgs, 0), 2230 "", 2231 !if(!eq(NumSrcArgs, 1), 2232 ", "#src0#"$clamp", 2233 ", "#src0#", "#src1#"$clamp" 2234 ) 2235 ); 2236 string sdwa = !if(!eq(NumSrcArgs, 0), 2237 "", 2238 !if(!eq(NumSrcArgs, 1), 2239 " $dst_sel $dst_unused $src0_sel", 2240 !if(!eq(DstVT.Size, 1), 2241 " $src0_sel $src1_sel", // No dst_sel and dst_unused for VOPC 2242 " $dst_sel $dst_unused $src0_sel $src1_sel" 2243 ) 2244 ) 2245 ); 2246 string ret = dst#args#sdwa; 2247} 2248 2249class getAsmSDWA9 <bit HasDst, bit HasOMod, int NumSrcArgs, 2250 ValueType DstVT = i32> { 2251 string dst = !if(HasDst, 2252 !if(!eq(DstVT.Size, 1), 2253 "$sdst", // VOPC 2254 "$vdst"), // VOP1/2 2255 ""); 2256 string src0 = "$src0_modifiers"; 2257 string src1 = "$src1_modifiers"; 2258 string out_mods = !if(!not(HasOMod), "$clamp", "$clamp$omod"); 2259 string args = !if(!eq(NumSrcArgs, 0), "", 2260 !if(!eq(NumSrcArgs, 1), 2261 ", "#src0, 2262 ", "#src0#", "#src1 2263 ) 2264 ); 2265 string sdwa = !if(!eq(NumSrcArgs, 0), "", 2266 !if(!eq(NumSrcArgs, 1), 2267 out_mods#" $dst_sel $dst_unused $src0_sel", 2268 !if(!eq(DstVT.Size, 1), 2269 " $src0_sel $src1_sel", // No dst_sel, dst_unused and output modifiers for VOPC 2270 out_mods#" $dst_sel $dst_unused $src0_sel $src1_sel" 2271 ) 2272 ) 2273 ); 2274 string ret = dst#args#sdwa; 2275} 2276 2277class getHas64BitOps <int NumSrcArgs, ValueType DstVT, ValueType Src0VT, 2278 ValueType Src1VT> { 2279 bit ret = !if(!eq(NumSrcArgs, 3), 2280 0, 2281 !if(!eq(DstVT.Size, 64), 2282 1, 2283 !if(!eq(Src0VT.Size, 64), 2284 1, 2285 !if(!eq(Src1VT.Size, 64), 2286 1, 2287 0 2288 ) 2289 ) 2290 ) 2291 ); 2292} 2293 2294class getHasSDWA <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2295 ValueType Src1VT = i32> { 2296 bit ret = !if(!eq(NumSrcArgs, 3), 2297 0, // NumSrcArgs == 3 - No SDWA for VOP3 2298 !if(!eq(DstVT.Size, 64), 2299 0, // 64-bit dst - No SDWA for 64-bit operands 2300 !if(!eq(Src0VT.Size, 64), 2301 0, // 64-bit src0 2302 !if(!eq(Src1VT.Size, 64), 2303 0, // 64-bit src2 2304 1 2305 ) 2306 ) 2307 ) 2308 ); 2309} 2310 2311class getHasDPP <int NumSrcArgs> { 2312 bit ret = !if(!eq(NumSrcArgs, 3), 2313 0, // NumSrcArgs == 3 - No DPP for VOP3 2314 1); 2315} 2316 2317class getHasExt32BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2318 ValueType Src1VT = i32> { 2319 bit ret = !and(getHasDPP<NumSrcArgs>.ret, 2320 !not(getHas64BitOps<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret)); 2321} 2322 2323class getHasExt64BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2324 ValueType Src1VT = i32> { 2325 bit ret = !and(getHasDPP<NumSrcArgs>.ret, 2326 getHas64BitOps<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret); 2327} 2328 2329// Function that checks if instruction supports DPP and SDWA 2330class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2331 ValueType Src1VT = i32> { 2332 bit ret = !or(getHasDPP<NumSrcArgs>.ret, 2333 getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret); 2334} 2335 2336// Return an AGPR+VGPR operand class for the given VGPR register class. 2337class getLdStRegisterOperand<RegisterClass RC> { 2338 RegisterOperand ret = 2339 !if(!eq(RC.Size, 32), AVLdSt_32, 2340 !if(!eq(RC.Size, 64), AVLdSt_64, 2341 !if(!eq(RC.Size, 96), AVLdSt_96, 2342 !if(!eq(RC.Size, 128), AVLdSt_128, 2343 !if(!eq(RC.Size, 160), AVLdSt_160, 2344 RegisterOperand<VReg_1> // invalid register 2345 ))))); 2346} 2347 2348class BitOr<bit a, bit b> { 2349 bit ret = !if(a, 1, !if(b, 1, 0)); 2350} 2351 2352class BitAnd<bit a, bit b> { 2353 bit ret = !if(a, !if(b, 1, 0), 0); 2354} 2355 2356class getHasVOP3DPP <ValueType DstVT = i32, ValueType Src0VT = i32, 2357 ValueType Src1VT = i32, ValueType Src2VT = i32> { 2358 bit ret = !if(!eq(DstVT.Size, 64), 2359 0, // 64-bit dst No DPP for 64-bit operands 2360 !if(!eq(Src0VT.Size, 64), 2361 0, // 64-bit src0 2362 !if(!eq(Src1VT.Size, 64), 2363 0, // 64-bit src1 2364 !if(!eq(Src2VT.Size, 64), 2365 0, // 64-bit src2 2366 1 2367 ) 2368 ) 2369 ) 2370 ); 2371} 2372 2373 2374def PatGenMode { 2375 int NoPattern = 0; 2376 int Pattern = 1; 2377} 2378 2379class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0, 2380 bit _EnableClamp = 0> { 2381 2382 field list<ValueType> ArgVT = _ArgVT; 2383 field bit EnableF32SrcMods = _EnableF32SrcMods; 2384 field bit EnableClamp = _EnableClamp; 2385 2386 field ValueType DstVT = ArgVT[0]; 2387 field ValueType Src0VT = ArgVT[1]; 2388 field ValueType Src1VT = ArgVT[2]; 2389 field ValueType Src2VT = ArgVT[3]; 2390 field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret; 2391 field RegisterOperand DstRC64 = DstRC; 2392 field RegisterOperand DstRCDPP = getVALUDstForVT<DstVT>.ret; 2393 field RegisterOperand DstRCSDWA = getSDWADstForVT<DstVT>.ret; 2394 field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT>.ret; 2395 field RegisterOperand Src1RC32 = RegisterOperand<getVregSrcForVT<Src1VT>.ret>; 2396 field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret; 2397 field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret; 2398 field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret; 2399 field RegisterClass Src0DPP = getVregSrcForVT<Src0VT>.ret; 2400 field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret; 2401 field RegisterClass Src2DPP = getVregSrcForVT<Src2VT>.ret; 2402 field RegisterOperand Src0VOP3DPP = VGPRSrc_32; 2403 field RegisterOperand Src1VOP3DPP = VGPRSrc_32; 2404 field RegisterOperand Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT>.ret; 2405 field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret; 2406 field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret; 2407 field Operand Src0Mod = getSrcMod<Src0VT, EnableF32SrcMods>.ret; 2408 field Operand Src1Mod = getSrcMod<Src1VT, EnableF32SrcMods>.ret; 2409 field Operand Src2Mod = getSrcMod<Src2VT, EnableF32SrcMods>.ret; 2410 field Operand Src0ModDPP = getSrcModDPP<Src0VT>.ret; 2411 field Operand Src1ModDPP = getSrcModDPP<Src1VT>.ret; 2412 field Operand Src2ModDPP = getSrcModDPP<Src2VT>.ret; 2413 field Operand Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, EnableF32SrcMods>.ret; 2414 field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret; 2415 field Operand Src1ModSDWA = getSrcModSDWA<Src1VT>.ret; 2416 2417 2418 field bit HasDst = !ne(DstVT.Value, untyped.Value); 2419 field bit HasDst32 = HasDst; 2420 field bit EmitDst = HasDst; // force dst encoding, see v_movreld_b32 special case 2421 field bit EmitDstSel = EmitDst; 2422 field int NumSrcArgs = getNumSrcArgs<Src0VT, Src1VT, Src2VT>.ret; 2423 field bit HasSrc0 = !ne(Src0VT.Value, untyped.Value); 2424 field bit HasSrc1 = !ne(Src1VT.Value, untyped.Value); 2425 field bit HasSrc2 = !ne(Src2VT.Value, untyped.Value); 2426 2427 // HasSrc*FloatMods affects the SDWA encoding. We ignore EnableF32SrcMods. 2428 field bit HasSrc0FloatMods = isFloatType<Src0VT>.ret; 2429 field bit HasSrc1FloatMods = isFloatType<Src1VT>.ret; 2430 field bit HasSrc2FloatMods = isFloatType<Src2VT>.ret; 2431 2432 // HasSrc*IntMods affects the SDWA encoding. We ignore EnableF32SrcMods. 2433 field bit HasSrc0IntMods = isIntType<Src0VT>.ret; 2434 field bit HasSrc1IntMods = isIntType<Src1VT>.ret; 2435 field bit HasSrc2IntMods = isIntType<Src2VT>.ret; 2436 2437 field bit HasClamp = !or(isModifierType<Src0VT>.ret, EnableClamp); 2438 field bit HasSDWAClamp = EmitDst; 2439 field bit HasFPClamp = !and(isFloatType<DstVT>.ret, HasClamp); 2440 field bit HasIntClamp = !if(isFloatType<DstVT>.ret, 0, HasClamp); 2441 field bit HasClampLo = HasClamp; 2442 field bit HasClampHi = !and(isPackedType<DstVT>.ret, HasClamp); 2443 field bit HasHigh = 0; 2444 2445 field bit IsPacked = isPackedType<Src0VT>.ret; 2446 field bit HasOpSel = IsPacked; 2447 field bit HasOMod = !if(HasOpSel, 0, isFloatType<DstVT>.ret); 2448 field bit HasSDWAOMod = isFloatType<DstVT>.ret; 2449 2450 field bit HasModifiers = !or(isModifierType<Src0VT>.ret, 2451 isModifierType<Src1VT>.ret, 2452 isModifierType<Src2VT>.ret, 2453 HasOMod, 2454 EnableF32SrcMods); 2455 2456 field bit HasSrc0Mods = HasModifiers; 2457 field bit HasSrc1Mods = !if(HasModifiers, !or(HasSrc1FloatMods, HasSrc1IntMods), 0); 2458 field bit HasSrc2Mods = !if(HasModifiers, !or(HasSrc2FloatMods, HasSrc2IntMods), 0); 2459 2460 field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2461 field bit HasExtVOP3DPP = getHasVOP3DPP<DstVT, Src0VT, Src1VT, Src2VT>.ret; 2462 field bit HasExtDPP = !if(!or(getHasDPP<NumSrcArgs>.ret, 2463 HasExtVOP3DPP), 1, 0); 2464 field bit HasExt32BitDPP = getHasExt32BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2465 field bit HasExt64BitDPP = getHasExt64BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2466 field bit HasExtSDWA = getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2467 field bit HasExtSDWA9 = HasExtSDWA; 2468 field int NeedPatGen = PatGenMode.NoPattern; 2469 2470 field bit IsMAI = 0; 2471 field bit IsVOP3P = 0; 2472 field bit IsDOT = 0; 2473 field bit IsSingle = 0; 2474 field bit IsWMMA = 0; 2475 2476 field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods); 2477 field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods); 2478 field Operand Src2PackedMod = !if(HasSrc2FloatMods, PackedF16InputMods, PackedI16InputMods); 2479 2480 field dag Outs = !if(HasDst,(outs DstRC:$vdst),(outs)); 2481 2482 // VOP3b instructions are a special case with a second explicit 2483 // output. This is manually overridden for them. 2484 field dag Outs32 = Outs; 2485 field dag Outs64 = !if(HasDst,(outs DstRC64:$vdst),(outs)); 2486 field dag OutsDPP = getOutsDPP<HasDst, DstVT, DstRCDPP>.ret; 2487 field dag OutsDPP8 = getOutsDPP<HasDst, DstVT, DstRCDPP>.ret; 2488 field dag OutsVOP3DPP = OutsDPP; 2489 field dag OutsVOP3DPP8 = OutsDPP8; 2490 field dag OutsSDWA = getOutsSDWA<HasDst, DstVT, DstRCSDWA>.ret; 2491 2492 field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret; 2493 field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs, 2494 HasIntClamp, HasModifiers, HasSrc2Mods, 2495 HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret; 2496 field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64, 2497 NumSrcArgs, HasClamp, HasOpSel, 2498 Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret; 2499 field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64, 2500 NumSrcArgs, HasClamp, HasOMod, 2501 getOpSelMod<Src0VT>.ret, 2502 getOpSelMod<Src1VT>.ret, 2503 getOpSelMod<Src2VT>.ret>.ret; 2504 field dag InsDPP = !if(HasExtDPP, 2505 getInsDPP<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs, 2506 HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret, 2507 (ins)); 2508 field dag InsDPP16 = getInsDPP16<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs, 2509 HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret; 2510 field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, 2511 NumSrcArgs, HasModifiers, 2512 Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret; 2513 field dag InsVOP3Base = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, 2514 Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod, 2515 Src0ModDPP, Src1ModDPP, Src2ModVOP3DPP, HasOpSel, IsVOP3P>.ret; 2516 field dag InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, DstRCDPP, NumSrcArgs>.ret; 2517 field dag InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, DstRCDPP, NumSrcArgs>.ret; 2518 field dag InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, DstRCDPP, NumSrcArgs>.ret; 2519 field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs, 2520 HasSDWAOMod, Src0ModSDWA, Src1ModSDWA, 2521 DstVT>.ret; 2522 field dag InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X); 2523 // It is a slight misnomer to use the deferred f32 operand type for non-float 2524 // operands, but this operand type will only be used if the other dual 2525 // component is FMAAK or FMAMK 2526 field dag InsVOPDXDeferred = (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X, VGPR_32:$vsrc1X); 2527 field dag InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y); 2528 field dag InsVOPDYDeferred = (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y, VGPR_32:$vsrc1Y); 2529 2530 2531 field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret; 2532 field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasIntClamp, HasModifiers, HasOMod, DstVT>.ret; 2533 field string AsmVOP3P = getAsmVOP3P<NumSrcArgs, HasModifiers, HasClamp, HasOpSel>.ret; 2534 field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs, 2535 HasClamp, 2536 HasSrc0FloatMods, 2537 HasSrc1FloatMods, 2538 HasSrc2FloatMods>.ret; 2539 field string AsmDPP = !if(HasExtDPP, 2540 getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret, ""); 2541 field string AsmDPP16 = getAsmDPP16<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret; 2542 // DPP8 encoding has no fields for modifiers, and it is enforced by setting 2543 // the asm operand name via this HasModifiers flag 2544 field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0 /*HasModifiers*/, DstVT>.ret; 2545 field string AsmVOP3DPPBase = getAsmVOP3DPPBase<NumSrcArgs, HasDst, HasClamp, 2546 HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasModifiers, HasModifiers, 2547 HasModifiers, DstVT>.ret; 2548 field string AsmVOP3DPP = getAsmVOP3DPP<AsmVOP3DPPBase>.ret; 2549 field string AsmVOP3DPP16 = getAsmVOP3DPP16<AsmVOP3DPPBase>.ret; 2550 field string AsmVOP3DPP8 = getAsmVOP3DPP8<AsmVOP3DPPBase>.ret; 2551 field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret; 2552 field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret; 2553 field string AsmVOPDX = getAsmVOPDPart<NumSrcArgs, "X">.ret; 2554 field string AsmVOPDY = getAsmVOPDPart<NumSrcArgs, "Y">.ret; 2555 field string TieRegDPP = "$old"; 2556} 2557 2558 class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> { 2559 let HasExt = 0; 2560 let HasExtDPP = 0; 2561 let HasExtVOP3DPP = 0; 2562 let HasExt32BitDPP = 0; 2563 let HasExt64BitDPP = 0; 2564 let HasExtSDWA = 0; 2565 let HasExtSDWA9 = 0; 2566} 2567 2568class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.NoPattern> : VOPProfile <p.ArgVT> { 2569 let NeedPatGen = mode; 2570} 2571def VOP_F16_F16 : VOPProfile <[f16, f16, untyped, untyped]>; 2572def VOP_F16_I16 : VOPProfile <[f16, i16, untyped, untyped]>; 2573def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>; 2574def VOP_I16_I16 : VOPProfile <[i16, i16, untyped, untyped]>; 2575 2576def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>; 2577def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>; 2578def VOP_F16_F16_I32 : VOPProfile <[f16, f16, i32, untyped]>; 2579def VOP_I16_I16_I16 : VOPProfile <[i16, i16, i16, untyped]>; 2580def VOP_I16_I16_I16_ARITH : VOPProfile <[i16, i16, i16, untyped], 0, /*EnableClamp=*/1>; 2581 2582def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>; 2583def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>; 2584 2585def VOP_I32_I16_I16_I32 : VOPProfile <[i32, i16, i16, i32, untyped]>; 2586def VOP_I32_I16 : VOPProfile <[i32, i16, untyped, untyped]>; 2587 2588def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>; 2589def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>; 2590def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>; 2591 2592def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>; 2593def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>; 2594def VOP_V2I16_F32_F32 : VOPProfile <[v2i16, f32, f32, untyped]>; 2595def VOP_V2I16_I32_I32 : VOPProfile <[v2i16, i32, i32, untyped]>; 2596 2597def VOP_F16_V2F16_V2F16_F16 : VOPProfile <[f16, v2f16, v2f16, f16]>; 2598def VOP_I16_V2I16_V2I16_I16 : VOPProfile <[i16, v2i16, v2i16, i16]>; 2599def VOP_F32_V2I16_V2I16_F32 : VOPProfile <[f32, v2i16, v2i16, f32]>; 2600 2601def VOP_F32_V2F16_V2F16_V2F16 : VOPProfile <[f32, v2f16, v2f16, v2f16]>; 2602 2603def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>; 2604 2605def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>; 2606def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>; 2607def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>; 2608def VOP_F64_F32 : VOPProfile <[f64, f32, untyped, untyped]>; 2609def VOP_F64_F64 : VOPProfile <[f64, f64, untyped, untyped]>; 2610def VOP_F64_I32 : VOPProfile <[f64, i32, untyped, untyped]>; 2611def VOP_I32_F32 : VOPProfile <[i32, f32, untyped, untyped]>; 2612def VOP_I32_F64 : VOPProfile <[i32, f64, untyped, untyped]>; 2613def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>; 2614def VOP_F16_F32 : VOPProfile <[f16, f32, untyped, untyped]>; 2615def VOP_F32_F16 : VOPProfile <[f32, f16, untyped, untyped]>; 2616def VOP_I64_I64 : VOPProfile <[i64, i64, untyped, untyped]>; 2617 2618def VOP_F32_F32_F16 : VOPProfile <[f32, f32, f16, untyped]>; 2619def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>; 2620def VOP_F32_F32_I32 : VOPProfile <[f32, f32, i32, untyped]>; 2621def VOP_F64_F64_F64 : VOPProfile <[f64, f64, f64, untyped]>; 2622def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>; 2623def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>; 2624def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>; 2625def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>; 2626def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], 0, /*EnableClamp=*/1>; 2627def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>; 2628def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>; 2629 2630def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>; 2631def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>; 2632def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>; 2633 2634def VOP_F16_F32_F16_F32 : VOPProfile <[f16, f32, f16, f32]>; 2635def VOP_F32_F32_F16_F16 : VOPProfile <[f32, f32, f16, f16]>; 2636def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>; 2637def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>; 2638def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>; 2639def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>; 2640def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>; 2641def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>; 2642def VOP_V4I32_I64_I32_V4I32 : VOPProfile <[v4i32, i64, i32, v4i32]>; 2643 2644def VOP_F32_V2F16_V2F16_F32 : VOPProfile <[f32, v2f16, v2f16, f32]>; 2645def VOP_I32_V2I16_V2I16_I32 : VOPProfile <[i32, v2i16, v2i16, i32]>; 2646 2647def VOP_V4F32_F32_F32_V4F32 : VOPProfile <[v4f32, f32, f32, v4f32]>; 2648def VOP_V16F32_F32_F32_V16F32 : VOPProfile <[v16f32, f32, f32, v16f32]>; 2649def VOP_V32F32_F32_F32_V32F32 : VOPProfile <[v32f32, f32, f32, v32f32]>; 2650def VOP_V4F32_V4F16_V4F16_V4F32 : VOPProfile <[v4f32, v4f16, v4f16, v4f32]>; 2651def VOP_V16F32_V4F16_V4F16_V16F32 : VOPProfile <[v16f32, v4f16, v4f16, v16f32]>; 2652def VOP_V32F32_V4F16_V4F16_V32F32 : VOPProfile <[v32f32, v4f16, v4f16, v32f32]>; 2653def VOP_V4F32_V2I16_V2I16_V4F32 : VOPProfile <[v4f32, v2i16, v2i16, v4f32]>; 2654def VOP_V16F32_V2I16_V2I16_V16F32 : VOPProfile <[v16f32, v2i16, v2i16, v16f32]>; 2655def VOP_V32F32_V2I16_V2I16_V32F32 : VOPProfile <[v32f32, v2i16, v2i16, v32f32]>; 2656def VOP_V4I32_I32_I32_V4I32 : VOPProfile <[v4i32, i32, i32, v4i32]>; 2657def VOP_V16I32_I32_I32_V16I32 : VOPProfile <[v16i32, i32, i32, v16i32]>; 2658def VOP_V32I32_I32_I32_V32I32 : VOPProfile <[v32i32, i32, i32, v32i32]>; 2659 2660def VOP_V4F64_F64_F64_V4F64 : VOPProfile <[v4f64, f64, f64, v4f64]>; 2661def VOP_V1F64_F64_F64_V1F64 : VOPProfile <[v1f64, f64, f64, v1f64]>; 2662 2663def VOP_V2F32_V2F32_V2F32_V2F32 : VOPProfile <[v2f32, v2f32, v2f32, v2f32]>; 2664def VOP_V2F32_V2F32_V2F32 : VOPProfile <[v2f32, v2f32, v2f32, untyped]>; 2665def VOP_V2I32_V2I32_V2I32 : VOPProfile <[v2i32, v2i32, v2i32, untyped]>; 2666def VOP_V4F32_V4I16_V4I16_V4F32 : VOPProfile <[v4f32, v4i16, v4i16, v4f32]>; 2667def VOP_V16F32_V4I16_V4I16_V16F32 : VOPProfile <[v16f32, v4i16, v4i16, v16f32]>; 2668def VOP_V32F32_V4I16_V4I16_V32F32 : VOPProfile <[v32f32, v4i16, v4i16, v32f32]>; 2669 2670def VOP_V4I32_I64_I64_V4I32 : VOPProfile <[v4i32, i64, i64, v4i32]>; 2671def VOP_V16I32_I64_I64_V16I32 : VOPProfile <[v16i32, i64, i64, v16i32]>; 2672def VOP_V4F32_V2F32_V2F32_V4F32 : VOPProfile <[v4f32, v2f32, v2f32, v4f32]>; 2673def VOP_V16F32_V2F32_V2F32_V16F32 : VOPProfile <[v16f32, v2f32, v2f32, v16f32]>; 2674def VOP_V4F32_I64_I64_V4F32 : VOPProfile <[v4f32, i64, i64, v4f32]>; 2675def VOP_V16F32_I64_I64_V16F32 : VOPProfile <[v16f32, i64, i64, v16f32]>; 2676 2677def VOP_V4F32_V4F16_V8F16_I32 : VOPProfile <[v4f32, v4f16, v8f16, i32]>; 2678def VOP_V16F32_V4F16_V8F16_I32 : VOPProfile <[v16f32, v4f16, v8f16, i32]>; 2679def VOP_V4F32_V4I16_V8I16_I32 : VOPProfile <[v4f32, v4i16, v8i16, i32]>; 2680def VOP_V16F32_V4I16_V8I16_I32 : VOPProfile <[v16f32, v4i16, v8i16, i32]>; 2681def VOP_V4I32_V2I32_V4I32_I32 : VOPProfile <[v4i32, v2i32, v4i32, i32]>; 2682def VOP_V16I32_V2I32_V4I32_I32 : VOPProfile <[v16i32, v2i32, v4i32, i32]>; 2683def VOP_V4F32_V2I32_V4I32_I32 : VOPProfile <[v4f32, v2i32, v4i32, i32]>; 2684def VOP_V16F32_V2I32_V4I32_I32 : VOPProfile <[v16f32, v2i32, v4i32, i32]>; 2685 2686class Commutable_REV <string revOp, bit isOrig> { 2687 string RevOp = revOp; 2688 bit IsOrig = isOrig; 2689} 2690 2691class AtomicNoRet <string noRetOp, bit isRet> { 2692 string NoRetOp = noRetOp; 2693 bit IsRet = isRet; 2694} 2695 2696//===----------------------------------------------------------------------===// 2697// Interpolation opcodes 2698//===----------------------------------------------------------------------===// 2699 2700class VINTRPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVINTRPDst">; 2701 2702class VINTRP_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> : 2703 VINTRPCommon <outs, ins, "", pattern>, 2704 SIMCInstr<opName, SIEncodingFamily.NONE> { 2705 let isPseudo = 1; 2706 let isCodeGenOnly = 1; 2707} 2708 2709// FIXME-GFX10: WIP. 2710class VINTRP_Real_si <bits <2> op, string opName, dag outs, dag ins, 2711 string asm, int encodingFamily> : 2712 VINTRPCommon <outs, ins, asm, []>, 2713 VINTRPe <op>, 2714 SIMCInstr<opName, encodingFamily> { 2715} 2716 2717class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins, 2718 string asm> : 2719 VINTRPCommon <outs, ins, asm, []>, 2720 VINTRPe_vi <op>, 2721 SIMCInstr<opName, SIEncodingFamily.VI> { 2722 let AssemblerPredicate = VIAssemblerPredicate; 2723 let DecoderNamespace = "GFX8"; 2724} 2725 2726// FIXME-GFX10: WIP. 2727multiclass VINTRP_m <bits <2> op, dag outs, dag ins, string asm, 2728 list<dag> pattern = []> { 2729 def "" : VINTRP_Pseudo <NAME, outs, ins, pattern>; 2730 2731 let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 2732 def _si : VINTRP_Real_si <op, NAME, outs, ins, asm, SIEncodingFamily.SI>; 2733 } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 2734 2735 def _vi : VINTRP_Real_vi <op, NAME, outs, ins, asm>; 2736 2737 let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { 2738 def _gfx10 : VINTRP_Real_si<op, NAME, outs, ins, asm, SIEncodingFamily.GFX10>; 2739 } // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" 2740} 2741 2742//===----------------------------------------------------------------------===// 2743// Vector instruction mappings 2744//===----------------------------------------------------------------------===// 2745 2746// Maps an opcode in e32 form to its e64 equivalent 2747def getVOPe64 : InstrMapping { 2748 let FilterClass = "VOP"; 2749 let RowFields = ["OpName"]; 2750 let ColFields = ["Size", "VOP3"]; 2751 let KeyCol = ["4", "0"]; 2752 let ValueCols = [["8", "1"]]; 2753} 2754 2755// Maps an opcode in e64 form to its e32 equivalent 2756def getVOPe32 : InstrMapping { 2757 let FilterClass = "VOP"; 2758 let RowFields = ["OpName"]; 2759 let ColFields = ["Size", "VOP3"]; 2760 let KeyCol = ["8", "1"]; 2761 let ValueCols = [["4", "0"]]; 2762} 2763 2764// Maps ordinary instructions to their SDWA counterparts 2765def getSDWAOp : InstrMapping { 2766 let FilterClass = "VOP"; 2767 let RowFields = ["OpName"]; 2768 let ColFields = ["AsmVariantName"]; 2769 let KeyCol = ["Default"]; 2770 let ValueCols = [["SDWA"]]; 2771} 2772 2773// Maps SDWA instructions to their ordinary counterparts 2774def getBasicFromSDWAOp : InstrMapping { 2775 let FilterClass = "VOP"; 2776 let RowFields = ["OpName"]; 2777 let ColFields = ["AsmVariantName"]; 2778 let KeyCol = ["SDWA"]; 2779 let ValueCols = [["Default"]]; 2780} 2781 2782// Maps ordinary instructions to their DPP counterparts 2783def getDPPOp32 : InstrMapping { 2784 let FilterClass = "VOP"; 2785 let RowFields = ["OpName"]; 2786 let ColFields = ["AsmVariantName"]; 2787 let KeyCol = ["Default"]; 2788 let ValueCols = [["DPP"]]; 2789} 2790 2791def getDPPOp64 : InstrMapping { 2792 let FilterClass = "VOP"; 2793 let RowFields = ["OpName"]; 2794 let ColFields = ["AsmVariantName"]; 2795 let KeyCol = ["VOP3"]; 2796 let ValueCols = [["VOP3_DPP"]]; 2797} 2798 2799// Maps an commuted opcode to its original version 2800def getCommuteOrig : InstrMapping { 2801 let FilterClass = "Commutable_REV"; 2802 let RowFields = ["RevOp"]; 2803 let ColFields = ["IsOrig"]; 2804 let KeyCol = ["0"]; 2805 let ValueCols = [["1"]]; 2806} 2807 2808// Maps an original opcode to its commuted version 2809def getCommuteRev : InstrMapping { 2810 let FilterClass = "Commutable_REV"; 2811 let RowFields = ["RevOp"]; 2812 let ColFields = ["IsOrig"]; 2813 let KeyCol = ["1"]; 2814 let ValueCols = [["0"]]; 2815} 2816 2817def getMCOpcodeGen : InstrMapping { 2818 let FilterClass = "SIMCInstr"; 2819 let RowFields = ["PseudoInstr"]; 2820 let ColFields = ["Subtarget"]; 2821 let KeyCol = [!cast<string>(SIEncodingFamily.NONE)]; 2822 // These columns must be kept in sync with the SIEncodingFamily enumeration. 2823 let ValueCols = [[!cast<string>(SIEncodingFamily.SI)], 2824 [!cast<string>(SIEncodingFamily.VI)], 2825 [!cast<string>(SIEncodingFamily.SDWA)], 2826 [!cast<string>(SIEncodingFamily.SDWA9)], 2827 // GFX80 encoding is added to work around a multiple matching 2828 // issue for buffer instructions with unpacked d16 data. This 2829 // does not actually change the encoding, and thus may be 2830 // removed later. 2831 [!cast<string>(SIEncodingFamily.GFX80)], 2832 [!cast<string>(SIEncodingFamily.GFX9)], 2833 [!cast<string>(SIEncodingFamily.GFX10)], 2834 [!cast<string>(SIEncodingFamily.SDWA10)], 2835 [!cast<string>(SIEncodingFamily.GFX90A)], 2836 [!cast<string>(SIEncodingFamily.GFX940)], 2837 [!cast<string>(SIEncodingFamily.GFX11)]]; 2838} 2839 2840// Get equivalent SOPK instruction. 2841def getSOPKOp : InstrMapping { 2842 let FilterClass = "SOPKInstTable"; 2843 let RowFields = ["BaseCmpOp"]; 2844 let ColFields = ["IsSOPK"]; 2845 let KeyCol = ["0"]; 2846 let ValueCols = [["1"]]; 2847} 2848 2849def getAddr64Inst : InstrMapping { 2850 let FilterClass = "MUBUFAddr64Table"; 2851 let RowFields = ["OpName"]; 2852 let ColFields = ["IsAddr64"]; 2853 let KeyCol = ["0"]; 2854 let ValueCols = [["1"]]; 2855} 2856 2857def getIfAddr64Inst : InstrMapping { 2858 let FilterClass = "MUBUFAddr64Table"; 2859 let RowFields = ["OpName"]; 2860 let ColFields = ["IsAddr64"]; 2861 let KeyCol = ["1"]; 2862 let ValueCols = [["1"]]; 2863} 2864 2865// Maps an atomic opcode to its returnless version. 2866def getAtomicNoRetOp : InstrMapping { 2867 let FilterClass = "AtomicNoRet"; 2868 let RowFields = ["NoRetOp"]; 2869 let ColFields = ["IsRet"]; 2870 let KeyCol = ["1"]; 2871 let ValueCols = [["0"]]; 2872} 2873 2874// Maps a GLOBAL to its SADDR form. 2875def getGlobalSaddrOp : InstrMapping { 2876 let FilterClass = "GlobalSaddrTable"; 2877 let RowFields = ["SaddrOp"]; 2878 let ColFields = ["IsSaddr"]; 2879 let KeyCol = ["0"]; 2880 let ValueCols = [["1"]]; 2881} 2882 2883// Maps a GLOBAL SADDR to its VADDR form. 2884def getGlobalVaddrOp : InstrMapping { 2885 let FilterClass = "GlobalSaddrTable"; 2886 let RowFields = ["SaddrOp"]; 2887 let ColFields = ["IsSaddr"]; 2888 let KeyCol = ["1"]; 2889 let ValueCols = [["0"]]; 2890} 2891 2892// Maps a v_cmpx opcode with sdst to opcode without sdst. 2893def getVCMPXNoSDstOp : InstrMapping { 2894 let FilterClass = "VCMPXNoSDstTable"; 2895 let RowFields = ["NoSDstOp"]; 2896 let ColFields = ["HasSDst"]; 2897 let KeyCol = ["1"]; 2898 let ValueCols = [["0"]]; 2899} 2900 2901// Maps a SOPP to a SOPP with S_NOP 2902def getSOPPWithRelaxation : InstrMapping { 2903 let FilterClass = "SOPPRelaxTable"; 2904 let RowFields = ["KeyName"]; 2905 let ColFields = ["IsRelaxed"]; 2906 let KeyCol = ["0"]; 2907 let ValueCols = [["1"]]; 2908} 2909 2910// Maps flat scratch opcodes by addressing modes 2911def getFlatScratchInstSTfromSS : InstrMapping { 2912 let FilterClass = "FlatScratchInst"; 2913 let RowFields = ["SVOp"]; 2914 let ColFields = ["Mode"]; 2915 let KeyCol = ["SS"]; 2916 let ValueCols = [["ST"]]; 2917} 2918 2919def getFlatScratchInstSSfromSV : InstrMapping { 2920 let FilterClass = "FlatScratchInst"; 2921 let RowFields = ["SVOp"]; 2922 let ColFields = ["Mode"]; 2923 let KeyCol = ["SV"]; 2924 let ValueCols = [["SS"]]; 2925} 2926 2927def getFlatScratchInstSVfromSVS : InstrMapping { 2928 let FilterClass = "FlatScratchInst"; 2929 let RowFields = ["SVOp"]; 2930 let ColFields = ["Mode"]; 2931 let KeyCol = ["SVS"]; 2932 let ValueCols = [["SV"]]; 2933} 2934 2935def getFlatScratchInstSVfromSS : InstrMapping { 2936 let FilterClass = "FlatScratchInst"; 2937 let RowFields = ["SVOp"]; 2938 let ColFields = ["Mode"]; 2939 let KeyCol = ["SS"]; 2940 let ValueCols = [["SV"]]; 2941} 2942 2943def getMFMAEarlyClobberOp : InstrMapping { 2944 let FilterClass = "MFMATable"; 2945 let RowFields = ["FMAOp"]; 2946 let ColFields = ["IsMac"]; 2947 let KeyCol = ["1"]; 2948 let ValueCols = [["0"]]; 2949} 2950 2951// Maps an v_cmp instruction to its v_cmpx equivalent. 2952def getVCMPXOpFromVCMP : InstrMapping { 2953 let FilterClass = "VCMPVCMPXTable"; 2954 let RowFields = ["VCMPOp"]; 2955 let ColFields = ["IsVCMPX"]; 2956 let KeyCol = ["0"]; 2957 let ValueCols = [["1"]]; 2958} 2959 2960def VOPDComponentTable : GenericTable { 2961 let FilterClass = "VOPD_Component"; 2962 let CppTypeName = "VOPDComponentInfo"; 2963 let Fields = ["BaseVOP", "VOPDOp", "CanBeVOPDX"]; 2964 let PrimaryKey = ["BaseVOP"]; 2965 let PrimaryKeyName = "getVOPDComponentHelper"; 2966} 2967 2968def VOPDPairs : GenericTable { 2969 let FilterClass = "VOPD_Base"; 2970 let CppTypeName = "VOPDInfo"; 2971 let Fields = ["Opcode", "OpX", "OpY"]; 2972 let PrimaryKey = ["Opcode"]; 2973 let PrimaryKeyName = "getVOPDOpcodeHelper"; 2974} 2975 2976def getVOPDInfoFromComponentOpcodes : SearchIndex { 2977 let Table = VOPDPairs; 2978 let Key = ["OpX", "OpY"]; 2979} 2980 2981include "SIInstructions.td" 2982 2983include "DSInstructions.td" 2984include "MIMGInstructions.td" 2985