1 2//===-- VOP1Instructions.td - Vector Instruction Definitions --------------===// 3// 4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5// See https://llvm.org/LICENSE.txt for license information. 6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7// 8//===----------------------------------------------------------------------===// 9 10//===----------------------------------------------------------------------===// 11// VOP1 Classes 12//===----------------------------------------------------------------------===// 13 14class VOP1e <bits<8> op, VOPProfile P> : Enc32 { 15 bits<8> vdst; 16 bits<9> src0; 17 18 let Inst{8-0} = !if(P.HasSrc0, src0{8-0}, ?); 19 let Inst{16-9} = op; 20 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 21 let Inst{31-25} = 0x3f; //encoding 22} 23 24class VOP1_SDWAe <bits<8> op, VOPProfile P> : VOP_SDWAe <P> { 25 bits<8> vdst; 26 27 let Inst{8-0} = 0xf9; // sdwa 28 let Inst{16-9} = op; 29 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 30 let Inst{31-25} = 0x3f; // encoding 31} 32 33class VOP1_SDWA9Ae <bits<8> op, VOPProfile P> : VOP_SDWA9Ae <P> { 34 bits<8> vdst; 35 36 let Inst{8-0} = 0xf9; // sdwa 37 let Inst{16-9} = op; 38 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 39 let Inst{31-25} = 0x3f; // encoding 40} 41 42class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP1Only = 0> : 43 VOP_Pseudo <opName, !if(VOP1Only, "", "_e32"), P, P.Outs32, P.Ins32, "", pattern> { 44 45 let AsmOperands = P.Asm32; 46 47 let Size = 4; 48 let mayLoad = 0; 49 let mayStore = 0; 50 let hasSideEffects = 0; 51 52 let ReadsModeReg = !or(P.DstVT.isFP, P.Src0VT.isFP); 53 54 let mayRaiseFPException = ReadsModeReg; 55 56 let VOP1 = 1; 57 let VALU = 1; 58 let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]); 59 60 let AsmVariantName = AMDGPUAsmVariants.Default; 61} 62 63class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemonic > : 64 VOP_Real <ps>, 65 InstSI <ps.OutOperandList, ps.InOperandList, real_name # ps.AsmOperands, []>, 66 SIMCInstr <ps.PseudoInstr, EncodingFamily> { 67 68 let VALU = 1; 69 let VOP1 = 1; 70 let isPseudo = 0; 71 let isCodeGenOnly = 0; 72 73 let Constraints = ps.Constraints; 74 let DisableEncoding = ps.DisableEncoding; 75 76 // copy relevant pseudo op flags 77 let SubtargetPredicate = ps.SubtargetPredicate; 78 let OtherPredicates = ps.OtherPredicates; 79 let AsmMatchConverter = ps.AsmMatchConverter; 80 let AsmVariantName = ps.AsmVariantName; 81 let Constraints = ps.Constraints; 82 let DisableEncoding = ps.DisableEncoding; 83 let TSFlags = ps.TSFlags; 84 let UseNamedOperandTable = ps.UseNamedOperandTable; 85 let Uses = ps.Uses; 86 let Defs = ps.Defs; 87 let SchedRW = ps.SchedRW; 88 let mayLoad = ps.mayLoad; 89 let mayStore = ps.mayStore; 90 let TRANS = ps.TRANS; 91} 92 93class VOP1_Real_Gen <VOP1_Pseudo ps, GFXGen Gen, string real_name = ps.Mnemonic> : 94 VOP1_Real <ps, Gen.Subtarget, real_name> { 95 let AssemblerPredicate = Gen.AssemblerPredicate; 96 let DecoderNamespace = Gen.DecoderNamespace; 97} 98 99class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 100 VOP_SDWA_Pseudo <OpName, P, pattern> { 101 let AsmMatchConverter = "cvtSdwaVOP1"; 102} 103 104class VOP1_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 105 VOP_DPP_Pseudo <OpName, P, pattern> { 106} 107 108class getVOP1Pat <SDPatternOperator node, VOPProfile P> : LetDummies { 109 list<dag> ret = 110 !if(P.HasModifiers, 111 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods P.Src0VT:$src0, i32:$src0_modifiers))))], 112 !if(P.HasOMod, 113 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3OMods P.Src0VT:$src0, 114 i1:$clamp, i32:$omod))))], 115 [(set P.DstVT:$vdst, (node (P.Src0VT P.Src0RC32:$src0)))] 116 ) 117 ); 118} 119 120multiclass VOP1Inst <string opName, VOPProfile P, 121 SDPatternOperator node = null_frag, int VOPDOp = -1> { 122 // We only want to set this on the basic, non-SDWA or DPP forms. 123 defvar should_mov_imm = !or(!eq(opName, "v_mov_b32"), 124 !eq(opName, "v_mov_b64")); 125 126 let isMoveImm = should_mov_imm in { 127 if !eq(VOPDOp, -1) then 128 def _e32 : VOP1_Pseudo <opName, P>; 129 else 130 // Only for V_MOV_B32 131 def _e32 : VOP1_Pseudo <opName, P>, VOPD_Component<VOPDOp, opName>; 132 def _e64 : VOP3InstBase <opName, P, node>; 133 } 134 135 if P.HasExtSDWA then 136 def _sdwa : VOP1_SDWA_Pseudo <opName, P>; 137 138 if P.HasExtDPP then 139 def _dpp : VOP1_DPP_Pseudo <opName, P>; 140 141 let SubtargetPredicate = isGFX11Plus in { 142 if P.HasExtVOP3DPP then 143 def _e64_dpp : VOP3_DPP_Pseudo <opName, P>; 144 } // End SubtargetPredicate = isGFX11Plus 145 146 def : LetDummies, AMDGPUMnemonicAlias<opName#"_e32", opName>; 147 def : LetDummies, AMDGPUMnemonicAlias<opName#"_e64", opName>; 148 149 if P.HasExtSDWA then 150 def : LetDummies, AMDGPUMnemonicAlias<opName#"_sdwa", opName>; 151 152 if P.HasExtDPP then 153 def : LetDummies, AMDGPUMnemonicAlias<opName#"_dpp", opName, AMDGPUAsmVariants.DPP>; 154} 155 156multiclass VOP1Inst_t16_with_profiles<string opName, 157 VOPProfile P, 158 VOPProfile P_t16, 159 VOPProfile P_fake16, 160 SDPatternOperator node = null_frag> { 161 let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts] in { 162 defm NAME : VOP1Inst<opName, P, node>; 163 } 164 let OtherPredicates = [UseRealTrue16Insts] in { 165 defm _t16 : VOP1Inst<opName#"_t16", P_t16, node>; 166 } 167 let OtherPredicates = [UseFakeTrue16Insts] in { 168 defm _fake16 : VOP1Inst<opName#"_fake16", P_fake16, node>; 169 } 170} 171 172multiclass VOP1Inst_t16<string opName, VOPProfile P, 173 SDPatternOperator node = null_frag> : 174 VOP1Inst_t16_with_profiles<opName, P, VOPProfile_True16<P>, VOPProfile_Fake16<P>, node>; 175 176// Special profile for instructions which have clamp 177// and output modifiers (but have no input modifiers) 178class VOPProfileI2F<ValueType dstVt, ValueType srcVt> : 179 VOPProfile<[dstVt, srcVt, untyped, untyped]> { 180 181 let Ins64 = (ins Src0RC64:$src0, Clamp:$clamp, omod:$omod); 182 let InsVOP3Base = (ins Src0VOP3DPP:$src0, Clamp:$clamp, omod:$omod); 183 let AsmVOP3Base = "$vdst, $src0$clamp$omod"; 184 185 let HasModifiers = 0; 186 let HasClamp = 1; 187} 188 189class VOPProfileI2F_True16<ValueType dstVt, ValueType srcVt> : 190 VOPProfile_Fake16<VOPProfile<[dstVt, srcVt, untyped, untyped]>> { 191 192 let Ins64 = (ins Src0RC64:$src0, Clamp:$clamp, omod:$omod); 193 let InsVOP3Base = (ins Src0VOP3DPP:$src0, Clamp:$clamp, omod:$omod); 194 let AsmVOP3Base = "$vdst, $src0$clamp$omod"; 195 196 let HasModifiers = 0; 197 let HasClamp = 1; 198} 199 200def VOP1_F64_I32 : VOPProfileI2F <f64, i32>; 201def VOP1_F32_I32 : VOPProfileI2F <f32, i32>; 202def VOP1_F16_I16 : VOPProfileI2F <f16, i16>; 203def VOP1_F16_I16_t16 : VOPProfileI2F_True16 <f16, i16>; 204 205def VOP_NOP_PROFILE : VOPProfile <[untyped, untyped, untyped, untyped]>{ 206 let HasExtVOP3DPP = 0; 207} 208 209// OMod clears exceptions when set. OMod was always an operand, but its 210// now explicitly set. 211class VOP_SPECIAL_OMOD_PROF<ValueType dstVt, ValueType srcVt> : 212 VOPProfile<[dstVt, srcVt, untyped, untyped]> { 213 214 let HasOMod = 1; 215} 216def VOP_I32_F32_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f32>; 217def VOP_I32_F64_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f64>; 218def VOP_I16_F16_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i16, f16>; 219def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_Fake16<VOP_I16_F16> { 220 let HasOMod = 1; 221} 222 223//===----------------------------------------------------------------------===// 224// VOP1 Instructions 225//===----------------------------------------------------------------------===// 226 227defm V_NOP : VOP1Inst <"v_nop", VOP_NOP_PROFILE>; 228 229def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> { 230 let InsVOPDX = (ins Src0RC32:$src0X); 231 let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X); 232 let InsVOPDY = (ins Src0RC32:$src0Y); 233 let InsVOPDYDeferred = (ins VSrc_f32_Deferred:$src0Y); 234} 235 236let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 237defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>; 238 239let SubtargetPredicate = isGFX940Plus, SchedRW = [Write64Bit] in 240defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>; 241} // End isMoveImm = 1 242 243def VOP_READFIRSTLANE : VOPProfile <[i32, i32, untyped, untyped]> { 244 let DstRC = RegisterOperand<SReg_32>; 245 let Src0RC32 = VRegOrLdsSrc_32; 246 let Asm32 = " $vdst, $src0"; 247} 248 249// FIXME: Specify SchedRW for READFIRSTLANE_B32 250// TODO: There is VOP3 encoding also 251def V_READFIRSTLANE_B32 : VOP1_Pseudo <"v_readfirstlane_b32", VOP_READFIRSTLANE, 252 [], 1> { 253 let isConvergent = 1; 254 let IsInvalidSingleUseConsumer = 1; 255} 256 257foreach vt = Reg32Types.types in { 258 def : GCNPat<(vt (int_amdgcn_readfirstlane (vt VRegOrLdsSrc_32:$src0))), 259 (V_READFIRSTLANE_B32 (vt VRegOrLdsSrc_32:$src0)) 260 >; 261} 262 263let isReMaterializable = 1 in { 264let SchedRW = [WriteDoubleCvt] in { 265// OMod clears exceptions when set in this instruction 266defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64_SPECIAL_OMOD, fp_to_sint>; 267 268let mayRaiseFPException = 0 in { 269defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>; 270} 271 272defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>; 273defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, any_fpextend>; 274// OMod clears exceptions when set in this instruction 275defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64_SPECIAL_OMOD, fp_to_uint>; 276 277let mayRaiseFPException = 0 in { 278defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>; 279} 280 281} // End SchedRW = [WriteDoubleCvt] 282 283let SchedRW = [WriteFloatCvt] in { 284 285// XXX: Does this really not raise exceptions? The manual claims the 286// 16-bit ones can. 287let mayRaiseFPException = 0 in { 288defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>; 289defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>; 290} 291 292// OMod clears exceptions when set in these 2 instructions 293defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_uint>; 294defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_sint>; 295let FPDPRounding = 1, isReMaterializable = 0 in { 296 let OtherPredicates = [NotHasTrue16BitInsts] in 297 defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, any_fpround>; 298 let OtherPredicates = [HasTrue16BitInsts] in 299 defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_Fake16<VOP_F16_F32>, any_fpround>; 300} // End FPDPRounding = 1, isReMaterializable = 0 301 302let OtherPredicates = [NotHasTrue16BitInsts] in 303defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, any_fpextend>; 304let OtherPredicates = [HasTrue16BitInsts] in 305defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_Fake16<VOP_F32_F16>, any_fpextend>; 306 307let ReadsModeReg = 0, mayRaiseFPException = 0 in { 308defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>; 309defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>; 310defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>; 311} // End ReadsModeReg = 0, mayRaiseFPException = 0 312} // End SchedRW = [WriteFloatCvt] 313 314let ReadsModeReg = 0, mayRaiseFPException = 0 in { 315defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>; 316defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>; 317defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP1_F32_I32, AMDGPUcvt_f32_ubyte2>; 318defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f32_ubyte3>; 319} // ReadsModeReg = 0, mayRaiseFPException = 0 320 321defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>; 322defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>; 323defm V_CEIL_F32 : VOP1Inst <"v_ceil_f32", VOP_F32_F32, fceil>; 324defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, froundeven>; 325defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>; 326 327let TRANS = 1, SchedRW = [WriteTrans32] in { 328defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, AMDGPUexp>; 329defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, AMDGPUlog>; 330defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>; 331defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32, AMDGPUrcp_iflag>; 332defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>; 333defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, int_amdgcn_sqrt>; 334} // End TRANS = 1, SchedRW = [WriteTrans32] 335 336let TRANS = 1, SchedRW = [WriteTrans64] in { 337defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64, AMDGPUrcp>; 338defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64, AMDGPUrsq>; 339defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, int_amdgcn_sqrt>; 340} // End TRANS = 1, SchedRW = [WriteTrans64] 341 342let TRANS = 1, SchedRW = [WriteTrans32] in { 343defm V_SIN_F32 : VOP1Inst <"v_sin_f32", VOP_F32_F32, AMDGPUsin>; 344defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>; 345} // End TRANS = 1, SchedRW = [WriteTrans32] 346 347defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>; 348defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32, DivergentUnaryFrag<bitreverse>>; 349defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32, AMDGPUffbh_u32>; 350defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32, AMDGPUffbl_b32>; 351defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32, AMDGPUffbh_i32>; 352 353let SchedRW = [WriteDoubleAdd] in { 354defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64_SPECIAL_OMOD, int_amdgcn_frexp_exp>; 355defm V_FREXP_MANT_F64 : VOP1Inst <"v_frexp_mant_f64", VOP_F64_F64, int_amdgcn_frexp_mant>; 356let FPDPRounding = 1 in { 357defm V_FRACT_F64 : VOP1Inst <"v_fract_f64", VOP_F64_F64, AMDGPUfract>; 358} // End FPDPRounding = 1 359} // End SchedRW = [WriteDoubleAdd] 360 361defm V_FREXP_EXP_I32_F32 : VOP1Inst <"v_frexp_exp_i32_f32", VOP_I32_F32, int_amdgcn_frexp_exp>; 362defm V_FREXP_MANT_F32 : VOP1Inst <"v_frexp_mant_f32", VOP_F32_F32, int_amdgcn_frexp_mant>; 363} // End isReMaterializable = 1 364 365defm V_CLREXCP : VOP1Inst <"v_clrexcp", VOP_NO_EXT<VOP_NONE>>; 366 367// Restrict src0 to be VGPR 368def VOP_MOVRELS : VOPProfile<[i32, i32, untyped, untyped]> { 369 let Src0RC32 = VRegSrc_32; 370 let Src0RC64 = VRegSrc_32; 371 let IsInvalidSingleUseConsumer = 1; 372} 373 374// Special case because there are no true output operands. Hack vdst 375// to be a src operand. The custom inserter must add a tied implicit 376// def and use of the super register since there seems to be no way to 377// add an implicit def of a virtual register in tablegen. 378class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, untyped]> { 379 let Src0RC32 = VOPDstOperand<VGPR_32>; 380 let Src0RC64 = VOPDstOperand<VGPR_32>; 381 382 let Outs = (outs); 383 let Ins32 = (ins Src0RC32:$vdst, Src1RC:$src0); 384 let Ins64 = (ins Src0RC64:$vdst, Src1RC:$src0); 385 let Asm32 = getAsm32<1, 1>.ret; 386 387 let OutsSDWA = (outs Src0RC32:$vdst); 388 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 389 Clamp:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, 390 src0_sel:$src0_sel); 391 let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret; 392 393 let OutsDPP = (outs Src0RC32:$vdst); 394 let InsDPP16 = (ins Src0RC32:$old, Src0RC32:$src0, 395 dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, 396 DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl, Dpp16FI:$fi); 397 let AsmDPP16 = getAsmDPP16<1, 1, 0>.ret; 398 let InsDPP8 = (ins Src0RC32:$old, Src0RC32:$src0, dpp8:$dpp8, Dpp8FI:$fi); 399 let AsmDPP8 = getAsmDPP8<1, 1, 0>.ret; 400 401 let OutsVOP3DPP = (outs Src0RC64:$vdst); 402 let InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, Src0RC64, NumSrcArgs>.ret; 403 let InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, Src0RC64, NumSrcArgs>.ret; 404 let InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, Src0RC64, NumSrcArgs>.ret; 405 406 let AsmVOP3Base = 407 getAsmVOP3Base<NumSrcArgs, 1 /* HasDst */, HasClamp, 408 HasOpSel, HasOMod, IsVOP3P, HasModifiers, 409 HasModifiers, HasModifiers, HasModifiers>.ret; 410 411 let HasDst = 0; 412 let EmitDst = 1; // force vdst emission 413} 414 415let IsInvalidSingleUseProducer = 1 in { 416 def VOP_MOVRELD : VOP_MOVREL<VSrc_b32>; 417 def VOP_MOVRELSD : VOP_MOVREL<VRegSrc_32> { 418 let IsInvalidSingleUseConsumer = 1; 419 } 420} 421 422let SubtargetPredicate = HasMovrel, Uses = [M0, EXEC] in { 423 // v_movreld_b32 is a special case because the destination output 424 // register is really a source. It isn't actually read (but may be 425 // written), and is only to provide the base register to start 426 // indexing from. Tablegen seems to not let you define an implicit 427 // virtual register output for the super register being written into, 428 // so this must have an implicit def of the register added to it. 429defm V_MOVRELD_B32 : VOP1Inst <"v_movreld_b32", VOP_MOVRELD>; 430defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_MOVRELS>; 431defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_MOVRELSD>; 432} // End Uses = [M0, EXEC] 433 434let isReMaterializable = 1 in { 435let SubtargetPredicate = isGFX6GFX7 in { 436 let TRANS = 1, SchedRW = [WriteTrans32] in { 437 defm V_LOG_CLAMP_F32 : 438 VOP1Inst<"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>; 439 defm V_RCP_CLAMP_F32 : 440 VOP1Inst<"v_rcp_clamp_f32", VOP_F32_F32>; 441 defm V_RCP_LEGACY_F32 : 442 VOP1Inst<"v_rcp_legacy_f32", VOP_F32_F32, AMDGPUrcp_legacy>; 443 defm V_RSQ_CLAMP_F32 : 444 VOP1Inst<"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>; 445 defm V_RSQ_LEGACY_F32 : 446 VOP1Inst<"v_rsq_legacy_f32", VOP_F32_F32, int_amdgcn_rsq_legacy>; 447 } // End TRANS = 1, SchedRW = [WriteTrans32] 448 449 let SchedRW = [WriteTrans64] in { 450 defm V_RCP_CLAMP_F64 : 451 VOP1Inst<"v_rcp_clamp_f64", VOP_F64_F64>; 452 defm V_RSQ_CLAMP_F64 : 453 VOP1Inst<"v_rsq_clamp_f64", VOP_F64_F64, AMDGPUrsq_clamp>; 454 } // End SchedRW = [WriteTrans64] 455} // End SubtargetPredicate = isGFX6GFX7 456 457let SubtargetPredicate = isGFX7GFX8GFX9 in { 458 let TRANS = 1, SchedRW = [WriteTrans32] in { 459 defm V_LOG_LEGACY_F32 : VOP1Inst<"v_log_legacy_f32", VOP_F32_F32>; 460 defm V_EXP_LEGACY_F32 : VOP1Inst<"v_exp_legacy_f32", VOP_F32_F32>; 461 } // End TRANS = 1, SchedRW = [WriteTrans32] 462} // End SubtargetPredicate = isGFX7GFX8GFX9 463 464let SubtargetPredicate = isGFX7Plus in { 465 let SchedRW = [WriteDoubleAdd] in { 466 defm V_TRUNC_F64 : VOP1Inst<"v_trunc_f64", VOP_F64_F64, ftrunc>; 467 defm V_CEIL_F64 : VOP1Inst<"v_ceil_f64", VOP_F64_F64, fceil>; 468 defm V_RNDNE_F64 : VOP1Inst<"v_rndne_f64", VOP_F64_F64, froundeven>; 469 defm V_FLOOR_F64 : VOP1Inst<"v_floor_f64", VOP_F64_F64, ffloor>; 470 } // End SchedRW = [WriteDoubleAdd] 471} // End SubtargetPredicate = isGFX7Plus 472} // End isReMaterializable = 1 473 474let FPDPRounding = 1 in { 475let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { 476defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>; 477defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>; 478} 479let OtherPredicates = [HasTrue16BitInsts] in { 480defm V_CVT_F16_U16_t16 : VOP1Inst <"v_cvt_f16_u16_t16", VOP1_F16_I16_t16, uint_to_fp>; 481defm V_CVT_F16_I16_t16 : VOP1Inst <"v_cvt_f16_i16_t16", VOP1_F16_I16_t16, sint_to_fp>; 482} 483} // End FPDPRounding = 1 484// OMod clears exceptions when set in these two instructions 485let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { 486defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_uint>; 487defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_sint>; 488} 489let OtherPredicates = [HasTrue16BitInsts] in { 490defm V_CVT_U16_F16_t16 : VOP1Inst <"v_cvt_u16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, fp_to_uint>; 491defm V_CVT_I16_F16_t16 : VOP1Inst <"v_cvt_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, fp_to_sint>; 492} 493let TRANS = 1, SchedRW = [WriteTrans32] in { 494defm V_RCP_F16 : VOP1Inst_t16 <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>; 495defm V_SQRT_F16 : VOP1Inst_t16 <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>; 496defm V_RSQ_F16 : VOP1Inst_t16 <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>; 497defm V_LOG_F16 : VOP1Inst_t16 <"v_log_f16", VOP_F16_F16, AMDGPUlogf16>; 498defm V_EXP_F16 : VOP1Inst_t16 <"v_exp_f16", VOP_F16_F16, AMDGPUexpf16>; 499defm V_SIN_F16 : VOP1Inst_t16 <"v_sin_f16", VOP_F16_F16, AMDGPUsin>; 500defm V_COS_F16 : VOP1Inst_t16 <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; 501} // End TRANS = 1, SchedRW = [WriteTrans32] 502defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>; 503let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { 504defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16_SPECIAL_OMOD, int_amdgcn_frexp_exp>; 505} 506let OtherPredicates = [HasTrue16BitInsts] in { 507defm V_FREXP_EXP_I16_F16_t16 : VOP1Inst <"v_frexp_exp_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, int_amdgcn_frexp_exp>; 508} 509defm V_FLOOR_F16 : VOP1Inst_t16 <"v_floor_f16", VOP_F16_F16, ffloor>; 510defm V_CEIL_F16 : VOP1Inst_t16 <"v_ceil_f16", VOP_F16_F16, fceil>; 511defm V_TRUNC_F16 : VOP1Inst_t16 <"v_trunc_f16", VOP_F16_F16, ftrunc>; 512defm V_RNDNE_F16 : VOP1Inst_t16 <"v_rndne_f16", VOP_F16_F16, froundeven>; 513let FPDPRounding = 1 in { 514defm V_FRACT_F16 : VOP1Inst_t16 <"v_fract_f16", VOP_F16_F16, AMDGPUfract>; 515} // End FPDPRounding = 1 516 517let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { 518def : GCNPat< 519 (f32 (f16_to_fp i16:$src)), 520 (V_CVT_F32_F16_e32 $src) 521>; 522def : GCNPat< 523 (i16 (AMDGPUfp_to_f16 f32:$src)), 524 (V_CVT_F16_F32_e32 $src) 525>; 526} 527let OtherPredicates = [HasTrue16BitInsts] in { 528def : GCNPat< 529 (f32 (f16_to_fp i16:$src)), 530 (V_CVT_F32_F16_t16_e32 $src) 531>; 532def : GCNPat< 533 (i16 (AMDGPUfp_to_f16 f32:$src)), 534 (V_CVT_F16_F32_t16_e32 $src) 535>; 536} 537 538def VOP_SWAP_I32 : VOPProfile<[i32, i32, untyped, untyped]> { 539 let Outs32 = (outs VGPR_32:$vdst, VRegSrc_32:$vdst1); 540 let Ins32 = (ins VRegSrc_32:$src0, VGPR_32:$src1); 541 let Asm32 = " $vdst, $src0"; 542} 543 544let SubtargetPredicate = isGFX9Plus in { 545 def V_SWAP_B32 : VOP1_Pseudo<"v_swap_b32", VOP_SWAP_I32, [], 1> { 546 let Constraints = "$vdst = $src1, $vdst1 = $src0"; 547 let DisableEncoding = "$vdst1,$src1"; 548 let SchedRW = [Write64Bit, Write64Bit]; 549 let IsInvalidSingleUseConsumer = 1; 550 } 551 552 let isReMaterializable = 1 in 553 defm V_SAT_PK_U8_I16 : VOP1Inst_t16<"v_sat_pk_u8_i16", VOP_I16_I32>; 554 555 let mayRaiseFPException = 0 in { 556 let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { 557 defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16_SPECIAL_OMOD>; 558 defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16_SPECIAL_OMOD>; 559 } 560 let OtherPredicates = [HasTrue16BitInsts] in { 561 defm V_CVT_NORM_I16_F16_t16 : VOP1Inst<"v_cvt_norm_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16>; 562 defm V_CVT_NORM_U16_F16_t16 : VOP1Inst<"v_cvt_norm_u16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16>; 563 } 564 } // End mayRaiseFPException = 0 565} // End SubtargetPredicate = isGFX9Plus 566 567let SubtargetPredicate = isGFX9Only in { 568 defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>; 569} // End SubtargetPredicate = isGFX9Only 570 571class VOPProfile_Base_CVT_F32_F8<ValueType vt> : VOPProfileI2F <vt, i32> { 572 let HasExtDPP = 1; 573 let HasExtSDWA = 1; 574 let HasExtSDWA9 = 1; 575 let HasExt = 1; 576 let DstRCSDWA = getVALUDstForVT<vt>.ret; 577 let InsSDWA = (ins Bin32SDWAInputMods:$src0_modifiers, Src0SDWA:$src0, 578 Clamp:$clamp, omod:$omod, src0_sel:$src0_sel); 579 let AsmSDWA = "$vdst, $src0_modifiers$clamp$omod $src0_sel"; // No dst_sel 580 let AsmSDWA9 = AsmSDWA; 581 let EmitDstSel = 0; 582} 583 584def VOPProfileCVT_F32_F8 : VOPProfile_Base_CVT_F32_F8 <f32>; 585def VOPProfileCVT_PK_F32_F8 : VOPProfile_Base_CVT_F32_F8 <v2f32>; 586 587let OtherPredicates = [HasFP8ConversionInsts], mayRaiseFPException = 0, 588 SchedRW = [WriteFloatCvt] in { 589 defm V_CVT_F32_FP8 : VOP1Inst<"v_cvt_f32_fp8", VOPProfileCVT_F32_F8>; 590 defm V_CVT_F32_BF8 : VOP1Inst<"v_cvt_f32_bf8", VOPProfileCVT_F32_F8>; 591 defm V_CVT_PK_F32_FP8 : VOP1Inst<"v_cvt_pk_f32_fp8", VOPProfileCVT_PK_F32_F8>; 592 defm V_CVT_PK_F32_BF8 : VOP1Inst<"v_cvt_pk_f32_bf8", VOPProfileCVT_PK_F32_F8>; 593} 594 595class Cvt_F32_F8_Pat<SDPatternOperator node, int index, 596 VOP1_SDWA_Pseudo inst_sdwa> : GCNPat< 597 (f32 (node i32:$src, index)), 598 (inst_sdwa 0, $src, 0, 0, index) 599>; 600 601let SubtargetPredicate = isGFX9Only in { 602let OtherPredicates = [HasCvtFP8VOP1Bug] in { 603 def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)), 604 (V_CVT_F32_FP8_sdwa 0, $src, 0, 0, 0)>; 605 def : GCNPat<(f32 (int_amdgcn_cvt_f32_bf8 i32:$src, 0)), 606 (V_CVT_F32_BF8_sdwa 0, $src, 0, 0, 0)>; 607} 608 609let OtherPredicates = [HasNoCvtFP8VOP1Bug] in { 610 def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)), 611 (V_CVT_F32_FP8_e32 $src)>; 612 def : GCNPat<(f32 (int_amdgcn_cvt_f32_bf8 i32:$src, 0)), 613 (V_CVT_F32_BF8_e32 $src)>; 614} 615 616foreach Index = [1, 2, 3] in { 617 def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index, V_CVT_F32_FP8_sdwa>; 618 def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index, V_CVT_F32_BF8_sdwa>; 619} 620} // End SubtargetPredicate = isGFX9Only 621 622class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index, 623 VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat< 624 (v2f32 (node i32:$src, index)), 625 !if (index, 626 (inst_sdwa 0, $src, 0, 0, SDWA.WORD_1), 627 (inst_e32 $src)) 628>; 629 630let SubtargetPredicate = isGFX9Only in { 631 foreach Index = [0, -1] in { 632 def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index, 633 V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>; 634 def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_bf8, Index, 635 V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_sdwa>; 636 } 637} 638 639 640// Similar to VOPProfile_Base_CVT_F32_F8, but for VOP3 instructions. 641def VOPProfile_Base_CVT_PK_F32_F8_OpSel : VOPProfileI2F <v2f32, i32> { 642 let HasOpSel = 1; 643 let HasExtVOP3DPP = 0; 644} 645 646class VOPProfile_Base_CVT_F_F8_ByteSel<ValueType DstVT> : VOPProfile<[DstVT, i32, untyped, untyped]> { 647 let IsFP8SrcByteSel = 1; 648 let HasOpSel = 0; 649 let HasExtDPP = 1; 650 let HasExtVOP3DPP = 1; 651 let HasExtSDWA = 0; 652 let HasClamp = 0; 653 let HasOMod = 0; 654 let HasModifiers = 0; 655 656 defvar bytesel = (ins ByteSel:$byte_sel); 657 let Ins64 = !con(getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs, 658 HasClamp, HasModifiers, HasSrc2Mods, 659 HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret, 660 bytesel); 661 let InsVOP3Base = !con(getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, Src2VOP3DPP, 662 NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, 663 HasOMod, Src0ModVOP3DPP, Src1ModVOP3DPP, 664 Src2ModVOP3DPP, HasOpSel>.ret, 665 bytesel); 666} 667 668let SubtargetPredicate = isGFX12Plus, OtherPredicates = [HasFP8ConversionInsts], 669 mayRaiseFPException = 0, SchedRW = [WriteFloatCvt] in { 670 defm V_CVT_F32_FP8_OP_SEL : VOP1Inst<"v_cvt_f32_fp8_op_sel", VOPProfile_Base_CVT_F_F8_ByteSel<f32>>; 671 defm V_CVT_F32_BF8_OP_SEL : VOP1Inst<"v_cvt_f32_bf8_op_sel", VOPProfile_Base_CVT_F_F8_ByteSel<f32>>; 672 defm V_CVT_PK_F32_FP8_OP_SEL : VOP1Inst<"v_cvt_pk_f32_fp8_op_sel", VOPProfile_Base_CVT_PK_F32_F8_OpSel>; 673 defm V_CVT_PK_F32_BF8_OP_SEL : VOP1Inst<"v_cvt_pk_f32_bf8_op_sel", VOPProfile_Base_CVT_PK_F32_F8_OpSel>; 674} 675 676class Cvt_F_F8_Pat_ByteSel<SDPatternOperator node, VOP3_Pseudo inst> : GCNPat< 677 (node i32:$src0, timm:$byte_sel), 678 (inst $src0, (as_i32timm $byte_sel)) 679>; 680 681let SubtargetPredicate = isGFX12Plus, OtherPredicates = [HasFP8ConversionInsts] in { 682 def : Cvt_F_F8_Pat_ByteSel<int_amdgcn_cvt_f32_fp8, V_CVT_F32_FP8_OP_SEL_e64>; 683 def : Cvt_F_F8_Pat_ByteSel<int_amdgcn_cvt_f32_bf8, V_CVT_F32_BF8_OP_SEL_e64>; 684} 685 686class Cvt_PK_F32_F8_Pat_OpSel<SDPatternOperator node, int index, 687 VOP1_Pseudo inst_e32, VOP3_Pseudo inst_e64> : GCNPat< 688 (v2f32 (node i32:$src, index)), 689 !if (index, 690 (inst_e64 SRCMODS.OP_SEL_0, $src, 0, 0, SRCMODS.NONE), 691 (inst_e32 $src)) 692>; 693 694let SubtargetPredicate = isGFX12Plus, OtherPredicates = [HasFP8ConversionInsts] in { 695 foreach Index = [0, -1] in { 696 def : Cvt_PK_F32_F8_Pat_OpSel<int_amdgcn_cvt_pk_f32_fp8, Index, 697 V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_OP_SEL_e64>; 698 def : Cvt_PK_F32_F8_Pat_OpSel<int_amdgcn_cvt_pk_f32_bf8, Index, 699 V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_OP_SEL_e64>; 700 } 701} 702 703let SubtargetPredicate = isGFX10Plus in { 704 defm V_PIPEFLUSH : VOP1Inst<"v_pipeflush", VOP_NO_EXT<VOP_NONE>>; 705 706 let Uses = [M0] in { 707 defm V_MOVRELSD_2_B32 : 708 VOP1Inst<"v_movrelsd_2_b32", VOP_MOVRELSD>; 709 710 def V_SWAPREL_B32 : VOP1_Pseudo<"v_swaprel_b32", VOP_SWAP_I32, [], 1> { 711 let Constraints = "$vdst = $src1, $vdst1 = $src0"; 712 let DisableEncoding = "$vdst1,$src1"; 713 let SchedRW = [Write64Bit, Write64Bit]; 714 let IsInvalidSingleUseConsumer = 1; 715 let IsInvalidSingleUseProducer = 1; 716 } 717 } // End Uses = [M0] 718} // End SubtargetPredicate = isGFX10Plus 719 720def VOPProfileAccMov : VOP_NO_EXT<VOP_I32_I32> { 721 let DstRC = RegisterOperand<AGPR_32>; 722 let Src0RC32 = ARegSrc_32; 723 let Asm32 = " $vdst, $src0"; 724} 725 726def V_ACCVGPR_MOV_B32 : VOP1_Pseudo<"v_accvgpr_mov_b32", VOPProfileAccMov, [], 1> { 727 let SubtargetPredicate = isGFX90APlus; 728 let isReMaterializable = 1; 729 let isAsCheapAsAMove = 1; 730} 731 732let SubtargetPredicate = isGFX11Plus in { 733 // Restrict src0 to be VGPR 734 def V_PERMLANE64_B32 : VOP1_Pseudo<"v_permlane64_b32", VOP_MOVRELS, 735 [], /*VOP1Only=*/ 1> { 736 let IsInvalidSingleUseConsumer = 1; 737 let IsInvalidSingleUseProducer = 1; 738 } 739 defm V_MOV_B16_t16 : VOP1Inst<"v_mov_b16_t16", VOPProfile_True16<VOP_I16_I16>>; 740 defm V_NOT_B16 : VOP1Inst_t16<"v_not_b16", VOP_I16_I16>; 741 defm V_CVT_I32_I16 : VOP1Inst_t16<"v_cvt_i32_i16", VOP_I32_I16>; 742 defm V_CVT_U32_U16 : VOP1Inst_t16<"v_cvt_u32_u16", VOP_I32_I16>; 743} // End SubtargetPredicate = isGFX11Plus 744 745foreach vt = Reg32Types.types in { 746 def : GCNPat<(int_amdgcn_permlane64 (vt VRegSrc_32:$src0)), 747 (vt (V_PERMLANE64_B32 (vt VRegSrc_32:$src0))) 748 >; 749} 750 751//===----------------------------------------------------------------------===// 752// Target-specific instruction encodings. 753//===----------------------------------------------------------------------===// 754 755class VOP1_DPP<bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP16 = 0> : 756 VOP_DPP<ps.OpName, p, isDPP16> { 757 let hasSideEffects = ps.hasSideEffects; 758 let Defs = ps.Defs; 759 let SchedRW = ps.SchedRW; 760 let Uses = ps.Uses; 761 let TRANS = ps.TRANS; 762 let SubtargetPredicate = ps.SubtargetPredicate; 763 let OtherPredicates = ps.OtherPredicates; 764 765 bits<8> vdst; 766 let Inst{8-0} = 0xfa; 767 let Inst{16-9} = op; 768 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 769 let Inst{31-25} = 0x3f; 770} 771 772class VOP1_DPP16<bits<8> op, VOP1_DPP_Pseudo ps, int subtarget, VOPProfile p = ps.Pfl> : 773 VOP1_DPP<op, ps, p, 1>, 774 SIMCInstr <ps.PseudoInstr, subtarget> { 775 let AssemblerPredicate = HasDPP16; 776} 777 778class VOP1_DPP16_Gen<bits<8> op, VOP1_DPP_Pseudo ps, GFXGen Gen, VOPProfile p = ps.Pfl> : 779 VOP1_DPP16 <op, ps, Gen.Subtarget, p> { 780 let AssemblerPredicate = Gen.AssemblerPredicate; 781 let DecoderNamespace = Gen.DecoderNamespace; 782} 783 784class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> : 785 VOP_DPP8<ps.OpName, p> { 786 let hasSideEffects = ps.hasSideEffects; 787 let Defs = ps.Defs; 788 let SchedRW = ps.SchedRW; 789 let Uses = ps.Uses; 790 let SubtargetPredicate = ps.SubtargetPredicate; 791 let OtherPredicates = ps.OtherPredicates; 792 793 bits<8> vdst; 794 let Inst{8-0} = fi; 795 let Inst{16-9} = op; 796 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 797 let Inst{31-25} = 0x3f; 798} 799 800class VOP1_DPP8_Gen<bits<8> op, VOP1_Pseudo ps, GFXGen Gen, VOPProfile p = ps.Pfl> : 801 VOP1_DPP8<op, ps, p> { 802 let AssemblerPredicate = Gen.AssemblerPredicate; 803 let DecoderNamespace = Gen.DecoderNamespace; 804} 805 806//===----------------------------------------------------------------------===// 807// GFX11, GFX12 808//===----------------------------------------------------------------------===// 809 810multiclass VOP1Only_Real<GFXGen Gen, bits<9> op> { 811 let IsSingle = 1 in 812 def Gen.Suffix : 813 VOP1_Real_Gen<!cast<VOP1_Pseudo>(NAME), Gen>, 814 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 815} 816 817multiclass VOP1_Real_e32<GFXGen Gen, bits<9> op, string opName = NAME> { 818 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 819 def _e32#Gen.Suffix : 820 VOP1_Real_Gen<ps, Gen>, 821 VOP1e<op{7-0}, ps.Pfl>; 822} 823 824multiclass VOP1_Real_e32_with_name<GFXGen Gen, bits<9> op, string opName, 825 string asmName> { 826 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 827 let AsmString = asmName # ps.AsmOperands, 828 DecoderNamespace = Gen.DecoderNamespace # 829 !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in { 830 defm NAME : VOP1_Real_e32<Gen, op, opName>; 831 } 832} 833 834multiclass VOP1_Real_e64<GFXGen Gen, bits<9> op> { 835 def _e64#Gen.Suffix : 836 VOP3_Real_Gen<!cast<VOP3_Pseudo>(NAME#"_e64"), Gen>, 837 VOP3e_gfx11_gfx12<{0, 1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 838} 839 840multiclass VOP1_Real_dpp<GFXGen Gen, bits<9> op, string opName = NAME> { 841 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 842 def _dpp#Gen.Suffix : VOP1_DPP16_Gen<op{7-0}, !cast<VOP1_DPP_Pseudo>(opName#"_dpp"), Gen>; 843} 844 845multiclass VOP1_Real_dpp_with_name<GFXGen Gen, bits<9> op, string opName, 846 string asmName> { 847 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 848 let AsmString = asmName # ps.Pfl.AsmDPP16, 849 DecoderNamespace = Gen.DecoderNamespace # 850 !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in { 851 defm NAME : VOP1_Real_dpp<Gen, op, opName>; 852 } 853} 854 855multiclass VOP1_Real_dpp8<GFXGen Gen, bits<9> op, string opName = NAME> { 856 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 857 def _dpp8#Gen.Suffix : VOP1_DPP8_Gen<op{7-0}, ps, Gen>; 858} 859 860multiclass VOP1_Real_dpp8_with_name<GFXGen Gen, bits<9> op, string opName, 861 string asmName> { 862 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 863 let AsmString = asmName # ps.Pfl.AsmDPP8, 864 DecoderNamespace = Gen.DecoderNamespace # 865 !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in { 866 defm NAME : VOP1_Real_dpp8<Gen, op, opName>; 867 } 868} 869 870multiclass VOP1_Realtriple_e64<GFXGen Gen, bits<9> op> { 871 defm NAME : VOP3_Realtriple<Gen, {0, 1, 1, op{6-0}}, /*isSingle=*/ 0, NAME>; 872} 873 874multiclass VOP1_Realtriple_e64_with_name<GFXGen Gen, bits<9> op, string opName, 875 string asmName> { 876 defm NAME : VOP3_Realtriple_with_name<Gen, {0, 1, 1, op{6-0}}, opName, 877 asmName>; 878} 879 880multiclass VOP1_Real_FULL<GFXGen Gen, bits<9> op> : 881 VOP1_Real_e32<Gen, op>, VOP1_Realtriple_e64<Gen, op>, 882 VOP1_Real_dpp<Gen, op>, VOP1_Real_dpp8<Gen, op>; 883 884multiclass VOP1_Real_NO_VOP3_with_name_gfx11<bits<9> op, string opName, 885 string asmName> { 886 defm NAME : VOP1_Real_e32_with_name<GFX11Gen, op, opName, asmName>, 887 VOP1_Real_dpp_with_name<GFX11Gen, op, opName, asmName>, 888 VOP1_Real_dpp8_with_name<GFX11Gen, op, opName, asmName>; 889 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 890 def gfx11_alias : AMDGPUMnemonicAlias<ps.Mnemonic, asmName> { 891 let AssemblerPredicate = isGFX11Plus; 892 } 893} 894 895multiclass VOP1_Real_NO_VOP3_with_name_gfx12<bits<9> op, string opName, 896 string asmName> { 897 defm NAME : VOP1_Real_e32_with_name<GFX12Gen, op, opName, asmName>, 898 VOP1_Real_dpp_with_name<GFX12Gen, op, opName, asmName>, 899 VOP1_Real_dpp8_with_name<GFX12Gen, op, opName, asmName>; 900} 901 902multiclass VOP1_Real_FULL_with_name<GFXGen Gen, bits<9> op, string opName, 903 string asmName> : 904 VOP1_Real_e32_with_name<Gen, op, opName, asmName>, 905 VOP1_Real_dpp_with_name<Gen, op, opName, asmName>, 906 VOP1_Real_dpp8_with_name<Gen, op, opName, asmName>, 907 VOP1_Realtriple_e64_with_name<Gen, op, opName, asmName>; 908 909multiclass VOP1_Real_NO_DPP<GFXGen Gen, bits<9> op> : 910 VOP1_Real_e32<Gen, op>, VOP1_Real_e64<Gen, op>; 911 912multiclass VOP1_Real_FULL_t16_gfx11_gfx12<bits<9> op, string asmName, 913 string opName = NAME> : 914 VOP1_Real_FULL_with_name<GFX11Gen, op, opName, asmName>, 915 VOP1_Real_FULL_with_name<GFX12Gen, op, opName, asmName>; 916 917multiclass VOP1_Real_FULL_with_name_gfx11_gfx12<bits<9> op, string opName, 918 string asmName> : 919 VOP1_Real_FULL_with_name<GFX11Gen, op, opName, asmName>, 920 VOP1_Real_FULL_with_name<GFX12Gen, op, opName, asmName>; 921 922multiclass VOP1Only_Real_gfx11_gfx12<bits<9> op> : 923 VOP1Only_Real<GFX11Gen, op>, VOP1Only_Real<GFX12Gen, op>; 924 925multiclass VOP1_Real_FULL_gfx11_gfx12<bits<9> op> : 926 VOP1_Real_FULL<GFX11Gen, op>, VOP1_Real_FULL<GFX12Gen, op>; 927 928multiclass VOP1_Real_NO_DPP_OP_SEL_with_name<GFXGen Gen, bits<9> op, 929 string opName, string asmName> : 930 VOP1_Real_e32_with_name<Gen, op, opName, asmName>, 931 VOP3_Real_with_name<Gen, {0, 1, 1, op{6-0}}, opName, asmName>; 932 933 934defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name<GFX12Gen, 0x06c, "V_CVT_F32_FP8_OP_SEL", "v_cvt_f32_fp8">; 935defm V_CVT_F32_BF8 : VOP1_Real_FULL_with_name<GFX12Gen, 0x06d, "V_CVT_F32_BF8_OP_SEL", "v_cvt_f32_bf8">; 936 937// Define VOP1 instructions using the pseudo instruction with its old profile and 938// VOP3 using the OpSel profile for the pseudo instruction. 939defm V_CVT_PK_F32_FP8 : VOP1_Real_e32_with_name<GFX12Gen, 0x06e, "V_CVT_PK_F32_FP8", "v_cvt_pk_f32_fp8">; 940defm V_CVT_PK_F32_FP8 : VOP3_Real_with_name<GFX12Gen, 0x1ee, "V_CVT_PK_F32_FP8_OP_SEL", "v_cvt_pk_f32_fp8">; 941 942defm V_CVT_PK_F32_BF8 : VOP1_Real_e32_with_name<GFX12Gen, 0x06f, "V_CVT_PK_F32_BF8", "v_cvt_pk_f32_bf8">; 943defm V_CVT_PK_F32_BF8 : VOP3_Real_with_name<GFX12Gen, 0x1ef, "V_CVT_PK_F32_BF8_OP_SEL", "v_cvt_pk_f32_bf8">; 944 945defm V_CVT_NEAREST_I32_F32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x00c, 946 "V_CVT_RPI_I32_F32", "v_cvt_nearest_i32_f32">; 947defm V_CVT_FLOOR_I32_F32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x00d, 948 "V_CVT_FLR_I32_F32", "v_cvt_floor_i32_f32">; 949defm V_CLZ_I32_U32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x039, 950 "V_FFBH_U32", "v_clz_i32_u32">; 951defm V_CTZ_I32_B32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x03a, 952 "V_FFBL_B32", "v_ctz_i32_b32">; 953defm V_CLS_I32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x03b, 954 "V_FFBH_I32", "v_cls_i32">; 955defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11_gfx12<0x067>; 956defm V_MOV_B16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x01c, "v_mov_b16">; 957defm V_NOT_B16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x069, "v_not_b16">; 958defm V_CVT_I32_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06a, "v_cvt_i32_i16">; 959defm V_CVT_U32_U16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">; 960 961defm V_CVT_F16_U16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x050, "v_cvt_f16_u16">; 962defm V_CVT_F16_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x051, "v_cvt_f16_i16">; 963defm V_CVT_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x052, "v_cvt_u16_f16">; 964defm V_CVT_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x053, "v_cvt_i16_f16">; 965defm V_RCP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">; 966defm V_RCP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">; 967defm V_SQRT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x055, "v_sqrt_f16">; 968defm V_SQRT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x055, "v_sqrt_f16">; 969defm V_RSQ_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x056, "v_rsq_f16">; 970defm V_RSQ_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x056, "v_rsq_f16">; 971defm V_LOG_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16">; 972defm V_LOG_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16">; 973defm V_EXP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">; 974defm V_EXP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">; 975defm V_FREXP_MANT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x059, "v_frexp_mant_f16">; 976defm V_FREXP_EXP_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05a, "v_frexp_exp_i16_f16">; 977defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">; 978defm V_FLOOR_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">; 979defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">; 980defm V_CEIL_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">; 981defm V_TRUNC_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05d, "v_trunc_f16">; 982defm V_RNDNE_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05e, "v_rndne_f16">; 983defm V_FRACT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05f, "v_fract_f16">; 984defm V_SIN_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x060, "v_sin_f16">; 985defm V_COS_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x061, "v_cos_f16">; 986defm V_SAT_PK_U8_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">; 987defm V_CVT_NORM_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x063, "v_cvt_norm_i16_f16">; 988defm V_CVT_NORM_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x064, "v_cvt_norm_u16_f16">; 989 990defm V_CVT_F16_F32_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00a, "v_cvt_f16_f32">; 991defm V_CVT_F32_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00b, "v_cvt_f32_f16">; 992 993//===----------------------------------------------------------------------===// 994// GFX10. 995//===----------------------------------------------------------------------===// 996 997let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { 998 multiclass VOP1Only_Real_gfx10<bits<9> op> { 999 def _gfx10 : 1000 VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.GFX10>, 1001 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 1002 } 1003 multiclass VOP1_Real_e32_gfx10<bits<9> op> { 1004 def _e32_gfx10 : 1005 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>, 1006 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 1007 } 1008 multiclass VOP1_Real_e64_gfx10<bits<9> op> { 1009 def _e64_gfx10 : 1010 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 1011 VOP3e_gfx10<{0, 1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1012 } 1013 multiclass VOP1_Real_sdwa_gfx10<bits<9> op> { 1014 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 1015 def _sdwa_gfx10 : 1016 VOP_SDWA10_Real<!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 1017 VOP1_SDWA9Ae<op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1018 } 1019 multiclass VOP1_Real_dpp_gfx10<bits<9> op> { 1020 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then 1021 def _dpp_gfx10 : VOP1_DPP16<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX10>; 1022 } 1023 multiclass VOP1_Real_dpp8_gfx10<bits<9> op> { 1024 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then 1025 def _dpp8_gfx10 : VOP1_DPP8<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")>; 1026 } 1027} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" 1028 1029multiclass VOP1_Real_gfx10<bits<9> op> : 1030 VOP1_Real_e32_gfx10<op>, VOP1_Real_e64_gfx10<op>, 1031 VOP1_Real_sdwa_gfx10<op>, VOP1_Real_dpp_gfx10<op>, 1032 VOP1_Real_dpp8_gfx10<op>; 1033 1034multiclass VOP1_Real_gfx10_FULL_gfx11_gfx12<bits<9> op> : 1035 VOP1_Real_gfx10<op>, 1036 VOP1_Real_FULL<GFX11Gen, op>, 1037 VOP1_Real_FULL<GFX12Gen, op>; 1038 1039multiclass VOP1_Real_gfx10_NO_DPP_gfx11_gfx12<bits<9> op> : 1040 VOP1_Real_gfx10<op>, 1041 VOP1_Real_NO_DPP<GFX11Gen, op>, 1042 VOP1_Real_NO_DPP<GFX12Gen, op>; 1043 1044multiclass VOP1Only_Real_gfx10_gfx11_gfx12<bits<9> op> : 1045 VOP1Only_Real_gfx10<op>, 1046 VOP1Only_Real<GFX11Gen, op>, 1047 VOP1Only_Real<GFX12Gen, op>; 1048 1049defm V_PIPEFLUSH : VOP1_Real_gfx10_NO_DPP_gfx11_gfx12<0x01b>; 1050defm V_MOVRELSD_2_B32 : VOP1_Real_gfx10_FULL_gfx11_gfx12<0x048>; 1051defm V_CVT_F16_U16 : VOP1_Real_gfx10<0x050>; 1052defm V_CVT_F16_I16 : VOP1_Real_gfx10<0x051>; 1053defm V_CVT_U16_F16 : VOP1_Real_gfx10<0x052>; 1054defm V_CVT_I16_F16 : VOP1_Real_gfx10<0x053>; 1055defm V_RCP_F16 : VOP1_Real_gfx10<0x054>; 1056defm V_SQRT_F16 : VOP1_Real_gfx10<0x055>; 1057defm V_RSQ_F16 : VOP1_Real_gfx10<0x056>; 1058defm V_LOG_F16 : VOP1_Real_gfx10<0x057>; 1059defm V_EXP_F16 : VOP1_Real_gfx10<0x058>; 1060defm V_FREXP_MANT_F16 : VOP1_Real_gfx10<0x059>; 1061defm V_FREXP_EXP_I16_F16 : VOP1_Real_gfx10<0x05a>; 1062defm V_FLOOR_F16 : VOP1_Real_gfx10<0x05b>; 1063defm V_CEIL_F16 : VOP1_Real_gfx10<0x05c>; 1064defm V_TRUNC_F16 : VOP1_Real_gfx10<0x05d>; 1065defm V_RNDNE_F16 : VOP1_Real_gfx10<0x05e>; 1066defm V_FRACT_F16 : VOP1_Real_gfx10<0x05f>; 1067defm V_SIN_F16 : VOP1_Real_gfx10<0x060>; 1068defm V_COS_F16 : VOP1_Real_gfx10<0x061>; 1069defm V_SAT_PK_U8_I16 : VOP1_Real_gfx10<0x062>; 1070defm V_CVT_NORM_I16_F16 : VOP1_Real_gfx10<0x063>; 1071defm V_CVT_NORM_U16_F16 : VOP1_Real_gfx10<0x064>; 1072 1073defm V_SWAP_B32 : VOP1Only_Real_gfx10_gfx11_gfx12<0x065>; 1074defm V_SWAPREL_B32 : VOP1Only_Real_gfx10_gfx11_gfx12<0x068>; 1075 1076//===----------------------------------------------------------------------===// 1077// GFX7, GFX10, GFX11, GFX12 1078//===----------------------------------------------------------------------===// 1079 1080let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in { 1081 multiclass VOP1_Real_e32_gfx7<bits<9> op> { 1082 def _e32_gfx7 : 1083 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>, 1084 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 1085 } 1086 multiclass VOP1_Real_e64_gfx7<bits<9> op> { 1087 def _e64_gfx7 : 1088 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 1089 VOP3e_gfx6_gfx7<{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1090 } 1091} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" 1092 1093multiclass VOP1_Real_gfx7<bits<9> op> : 1094 VOP1_Real_e32_gfx7<op>, VOP1_Real_e64_gfx7<op>; 1095 1096multiclass VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<bits<9> op> : 1097 VOP1_Real_gfx7<op>, VOP1_Real_gfx10<op>, VOP1_Real_NO_DPP<GFX11Gen, op>, 1098 VOP1_Real_NO_DPP<GFX12Gen, op>; 1099 1100defm V_LOG_LEGACY_F32 : VOP1_Real_gfx7<0x045>; 1101defm V_EXP_LEGACY_F32 : VOP1_Real_gfx7<0x046>; 1102 1103defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x017>; 1104defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x018>; 1105defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x019>; 1106defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x01a>; 1107 1108//===----------------------------------------------------------------------===// 1109// GFX6, GFX7, GFX10, GFX11, GFX12 1110//===----------------------------------------------------------------------===// 1111 1112let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 1113 multiclass VOP1_Real_e32_gfx6_gfx7<bits<9> op> { 1114 def _e32_gfx6_gfx7 : 1115 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>, 1116 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 1117 } 1118 multiclass VOP1_Real_e64_gfx6_gfx7<bits<9> op> { 1119 def _e64_gfx6_gfx7 : 1120 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 1121 VOP3e_gfx6_gfx7<{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1122 } 1123 multiclass VOP1Only_Real_gfx6_gfx7<bits<9> op> { 1124 def _gfx6_gfx7 : 1125 VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.SI>, 1126 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 1127 } 1128} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 1129 1130multiclass VOP1_Real_gfx6_gfx7<bits<9> op> : 1131 VOP1_Real_e32_gfx6_gfx7<op>, VOP1_Real_e64_gfx6_gfx7<op>; 1132 1133multiclass VOP1_Real_gfx6_gfx7_gfx10<bits<9> op> : 1134 VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10<op>; 1135 1136multiclass VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<bits<9> op> : 1137 VOP1_Real_gfx6_gfx7_gfx10<op>, VOP1_Real_FULL<GFX11Gen, op>, 1138 VOP1_Real_FULL<GFX12Gen, op>; 1139 1140multiclass VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<bits<9> op> : 1141 VOP1_Real_gfx6_gfx7_gfx10<op>, VOP1_Real_NO_DPP<GFX11Gen, op>, 1142 VOP1_Real_NO_DPP<GFX12Gen, op>; 1143 1144multiclass VOP1Only_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<9> op> : 1145 VOP1Only_Real_gfx6_gfx7<op>, VOP1Only_Real_gfx10_gfx11_gfx12<op>; 1146 1147defm V_LOG_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x026>; 1148defm V_RCP_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x028>; 1149defm V_RCP_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x029>; 1150defm V_RSQ_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x02c>; 1151defm V_RSQ_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x02d>; 1152defm V_RCP_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x030>; 1153defm V_RSQ_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x032>; 1154 1155defm V_NOP : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x000>; 1156defm V_MOV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x001>; 1157defm V_READFIRSTLANE_B32 : VOP1Only_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x002>; 1158defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x003>; 1159defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x004>; 1160defm V_CVT_F32_I32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x005>; 1161defm V_CVT_F32_U32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x006>; 1162defm V_CVT_U32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x007>; 1163defm V_CVT_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x008>; 1164defm V_CVT_F16_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00a>; 1165defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10<0x00b>; 1166defm V_CVT_RPI_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00c>; 1167defm V_CVT_FLR_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00d>; 1168defm V_CVT_OFF_F32_I4 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x00e>; 1169defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x00f>; 1170defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x010>; 1171defm V_CVT_F32_UBYTE0 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x011>; 1172defm V_CVT_F32_UBYTE1 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x012>; 1173defm V_CVT_F32_UBYTE2 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x013>; 1174defm V_CVT_F32_UBYTE3 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x014>; 1175defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x015>; 1176defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x016>; 1177defm V_FRACT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x020>; 1178defm V_TRUNC_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x021>; 1179defm V_CEIL_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x022>; 1180defm V_RNDNE_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x023>; 1181defm V_FLOOR_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x024>; 1182defm V_EXP_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x025>; 1183defm V_LOG_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x027>; 1184defm V_RCP_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x02a>; 1185defm V_RCP_IFLAG_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x02b>; 1186defm V_RSQ_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x02e>; 1187defm V_RCP_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x02f>; 1188defm V_RSQ_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x031>; 1189defm V_SQRT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x033>; 1190defm V_SQRT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x034>; 1191defm V_SIN_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x035>; 1192defm V_COS_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x036>; 1193defm V_NOT_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x037>; 1194defm V_BFREV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x038>; 1195defm V_FFBH_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x039>; 1196defm V_FFBL_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x03a>; 1197defm V_FFBH_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x03b>; 1198defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x03c>; 1199defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x03d>; 1200defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x03e>; 1201defm V_FREXP_EXP_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x03f>; 1202defm V_FREXP_MANT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x040>; 1203defm V_CLREXCP : VOP1_Real_gfx6_gfx7_gfx10<0x041>; 1204defm V_MOVRELD_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x042>; 1205defm V_MOVRELS_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x043>; 1206defm V_MOVRELSD_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x044>; 1207 1208//===----------------------------------------------------------------------===// 1209// GFX8, GFX9 (VI). 1210//===----------------------------------------------------------------------===// 1211 1212class VOP1_DPPe <bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile P = ps.Pfl> : 1213 VOP_DPPe <P> { 1214 bits<8> vdst; 1215 let Inst{8-0} = 0xfa; // dpp 1216 let Inst{16-9} = op; 1217 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 1218 let Inst{31-25} = 0x3f; //encoding 1219} 1220 1221let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 1222 multiclass VOP1Only_Real_vi <bits<10> op> { 1223 def _vi : 1224 VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.VI>, 1225 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 1226 } 1227 1228 multiclass VOP1_Real_e32e64_vi <bits<10> op> { 1229 def _e32_vi : 1230 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>, 1231 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 1232 def _e64_vi : 1233 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 1234 VOP3e_vi <!add(0x140, op), !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1235 } 1236} 1237 1238multiclass VOP1_Real_vi <bits<10> op> { 1239 defm NAME : VOP1_Real_e32e64_vi <op>; 1240 1241 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA then 1242 def _sdwa_vi : 1243 VOP_SDWA_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 1244 VOP1_SDWAe <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1245 1246 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 1247 def _sdwa_gfx9 : 1248 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 1249 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1250 1251 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 1252 def _dpp_vi : 1253 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>, 1254 VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; 1255} 1256 1257defm V_NOP : VOP1_Real_vi <0x0>; 1258defm V_MOV_B32 : VOP1_Real_vi <0x1>; 1259defm V_READFIRSTLANE_B32 : VOP1Only_Real_vi <0x2>; 1260defm V_CVT_I32_F64 : VOP1_Real_vi <0x3>; 1261defm V_CVT_F64_I32 : VOP1_Real_vi <0x4>; 1262defm V_CVT_F32_I32 : VOP1_Real_vi <0x5>; 1263defm V_CVT_F32_U32 : VOP1_Real_vi <0x6>; 1264defm V_CVT_U32_F32 : VOP1_Real_vi <0x7>; 1265defm V_CVT_I32_F32 : VOP1_Real_vi <0x8>; 1266defm V_CVT_F16_F32 : VOP1_Real_vi <0xa>; 1267defm V_CVT_F32_F16 : VOP1_Real_vi <0xb>; 1268defm V_CVT_RPI_I32_F32 : VOP1_Real_vi <0xc>; 1269defm V_CVT_FLR_I32_F32 : VOP1_Real_vi <0xd>; 1270defm V_CVT_OFF_F32_I4 : VOP1_Real_vi <0xe>; 1271defm V_CVT_F32_F64 : VOP1_Real_vi <0xf>; 1272defm V_CVT_F64_F32 : VOP1_Real_vi <0x10>; 1273defm V_CVT_F32_UBYTE0 : VOP1_Real_vi <0x11>; 1274defm V_CVT_F32_UBYTE1 : VOP1_Real_vi <0x12>; 1275defm V_CVT_F32_UBYTE2 : VOP1_Real_vi <0x13>; 1276defm V_CVT_F32_UBYTE3 : VOP1_Real_vi <0x14>; 1277defm V_CVT_U32_F64 : VOP1_Real_vi <0x15>; 1278defm V_CVT_F64_U32 : VOP1_Real_vi <0x16>; 1279defm V_FRACT_F32 : VOP1_Real_vi <0x1b>; 1280defm V_TRUNC_F32 : VOP1_Real_vi <0x1c>; 1281defm V_CEIL_F32 : VOP1_Real_vi <0x1d>; 1282defm V_RNDNE_F32 : VOP1_Real_vi <0x1e>; 1283defm V_FLOOR_F32 : VOP1_Real_vi <0x1f>; 1284defm V_EXP_F32 : VOP1_Real_vi <0x20>; 1285defm V_LOG_F32 : VOP1_Real_vi <0x21>; 1286defm V_RCP_F32 : VOP1_Real_vi <0x22>; 1287defm V_RCP_IFLAG_F32 : VOP1_Real_vi <0x23>; 1288defm V_RSQ_F32 : VOP1_Real_vi <0x24>; 1289defm V_RCP_F64 : VOP1_Real_vi <0x25>; 1290defm V_RSQ_F64 : VOP1_Real_vi <0x26>; 1291defm V_SQRT_F32 : VOP1_Real_vi <0x27>; 1292defm V_SQRT_F64 : VOP1_Real_vi <0x28>; 1293defm V_SIN_F32 : VOP1_Real_vi <0x29>; 1294defm V_COS_F32 : VOP1_Real_vi <0x2a>; 1295defm V_NOT_B32 : VOP1_Real_vi <0x2b>; 1296defm V_BFREV_B32 : VOP1_Real_vi <0x2c>; 1297defm V_FFBH_U32 : VOP1_Real_vi <0x2d>; 1298defm V_FFBL_B32 : VOP1_Real_vi <0x2e>; 1299defm V_FFBH_I32 : VOP1_Real_vi <0x2f>; 1300defm V_FREXP_EXP_I32_F64 : VOP1_Real_vi <0x30>; 1301defm V_FREXP_MANT_F64 : VOP1_Real_vi <0x31>; 1302defm V_FRACT_F64 : VOP1_Real_vi <0x32>; 1303defm V_FREXP_EXP_I32_F32 : VOP1_Real_vi <0x33>; 1304defm V_FREXP_MANT_F32 : VOP1_Real_vi <0x34>; 1305defm V_CLREXCP : VOP1_Real_vi <0x35>; 1306defm V_MOVRELD_B32 : VOP1_Real_e32e64_vi <0x36>; 1307defm V_MOVRELS_B32 : VOP1_Real_e32e64_vi <0x37>; 1308defm V_MOVRELSD_B32 : VOP1_Real_e32e64_vi <0x38>; 1309defm V_TRUNC_F64 : VOP1_Real_vi <0x17>; 1310defm V_CEIL_F64 : VOP1_Real_vi <0x18>; 1311defm V_FLOOR_F64 : VOP1_Real_vi <0x1A>; 1312defm V_RNDNE_F64 : VOP1_Real_vi <0x19>; 1313defm V_LOG_LEGACY_F32 : VOP1_Real_vi <0x4c>; 1314defm V_EXP_LEGACY_F32 : VOP1_Real_vi <0x4b>; 1315defm V_CVT_F16_U16 : VOP1_Real_vi <0x39>; 1316defm V_CVT_F16_I16 : VOP1_Real_vi <0x3a>; 1317defm V_CVT_U16_F16 : VOP1_Real_vi <0x3b>; 1318defm V_CVT_I16_F16 : VOP1_Real_vi <0x3c>; 1319defm V_RCP_F16 : VOP1_Real_vi <0x3d>; 1320defm V_SQRT_F16 : VOP1_Real_vi <0x3e>; 1321defm V_RSQ_F16 : VOP1_Real_vi <0x3f>; 1322defm V_LOG_F16 : VOP1_Real_vi <0x40>; 1323defm V_EXP_F16 : VOP1_Real_vi <0x41>; 1324defm V_FREXP_MANT_F16 : VOP1_Real_vi <0x42>; 1325defm V_FREXP_EXP_I16_F16 : VOP1_Real_vi <0x43>; 1326defm V_FLOOR_F16 : VOP1_Real_vi <0x44>; 1327defm V_CEIL_F16 : VOP1_Real_vi <0x45>; 1328defm V_TRUNC_F16 : VOP1_Real_vi <0x46>; 1329defm V_RNDNE_F16 : VOP1_Real_vi <0x47>; 1330defm V_FRACT_F16 : VOP1_Real_vi <0x48>; 1331defm V_SIN_F16 : VOP1_Real_vi <0x49>; 1332defm V_COS_F16 : VOP1_Real_vi <0x4a>; 1333defm V_SWAP_B32 : VOP1Only_Real_vi <0x51>; 1334 1335defm V_SAT_PK_U8_I16 : VOP1_Real_vi<0x4f>; 1336defm V_CVT_NORM_I16_F16 : VOP1_Real_vi<0x4d>; 1337defm V_CVT_NORM_U16_F16 : VOP1_Real_vi<0x4e>; 1338 1339defm V_ACCVGPR_MOV_B32 : VOP1Only_Real_vi<0x52>; 1340 1341let VOP1 = 1, SubtargetPredicate = isGFX8GFX9, Uses = [EXEC, M0], Size = V_MOV_B32_e32.Size in { 1342 1343// Copy of v_mov_b32 with $vdst as a use operand for use with VGPR 1344// indexing mode. vdst can't be treated as a def for codegen purposes, 1345// and an implicit use and def of the super register should be added. 1346def V_MOV_B32_indirect_write : VPseudoInstSI<(outs), 1347 (ins getVALUDstForVT<i32>.ret:$vdst, getVOPSrc0ForVT<i32, 0>.ret:$src0)>, 1348 PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT<i32>.ret:$vdst, 1349 getVOPSrc0ForVT<i32, 0>.ret:$src0)>; 1350 1351// Copy of v_mov_b32 for use with VGPR indexing mode. An implicit use of the 1352// super register should be added. 1353def V_MOV_B32_indirect_read : VPseudoInstSI< 1354 (outs getVALUDstForVT<i32>.ret:$vdst), 1355 (ins getVOPSrc0ForVT<i32, 0>.ret:$src0)>, 1356 PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT<i32>.ret:$vdst, 1357 getVOPSrc0ForVT<i32, 0>.ret:$src0)>; 1358 1359} // End VOP1 = 1, SubtargetPredicate = isGFX8GFX9, Uses = [M0] 1360 1361let OtherPredicates = [isGFX8Plus] in { 1362 1363def : GCNPat < 1364 (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, 1365 timm:$bank_mask, timm:$bound_ctrl)), 1366 (V_MOV_B32_dpp VGPR_32:$src, VGPR_32:$src, (as_i32timm $dpp_ctrl), 1367 (as_i32timm $row_mask), (as_i32timm $bank_mask), 1368 (as_i1timm $bound_ctrl)) 1369>; 1370 1371foreach vt = Reg32Types.types in { 1372def : GCNPat < 1373 (vt (int_amdgcn_update_dpp vt:$old, vt:$src, timm:$dpp_ctrl, 1374 timm:$row_mask, timm:$bank_mask, 1375 timm:$bound_ctrl)), 1376 (V_MOV_B32_dpp VGPR_32:$old, VGPR_32:$src, (as_i32timm $dpp_ctrl), 1377 (as_i32timm $row_mask), (as_i32timm $bank_mask), 1378 (as_i1timm $bound_ctrl)) 1379>; 1380} 1381 1382} // End OtherPredicates = [isGFX8Plus] 1383 1384let OtherPredicates = [isGFX8Plus] in { 1385def : GCNPat< 1386 (i32 (anyext i16:$src)), 1387 (COPY $src) 1388>; 1389 1390def : GCNPat< 1391 (i64 (anyext i16:$src)), 1392 (REG_SEQUENCE VReg_64, 1393 (i32 (COPY $src)), sub0, 1394 (V_MOV_B32_e32 (i32 0)), sub1) 1395>; 1396 1397def : GCNPat< 1398 (i16 (trunc i32:$src)), 1399 (COPY $src) 1400>; 1401 1402def : GCNPat < 1403 (i16 (trunc i64:$src)), 1404 (EXTRACT_SUBREG $src, sub0) 1405>; 1406 1407} // End OtherPredicates = [isGFX8Plus] 1408 1409//===----------------------------------------------------------------------===// 1410// GFX9 1411//===----------------------------------------------------------------------===// 1412 1413let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in { 1414 multiclass VOP1_Real_gfx9 <bits<10> op> { 1415 defm NAME : VOP1_Real_e32e64_vi <op>; 1416 1417 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 1418 def _sdwa_gfx9 : 1419 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 1420 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1421 1422 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 1423 def _dpp_gfx9 : 1424 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, 1425 VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; 1426 } 1427 1428 multiclass VOP1_Real_NoDstSel_SDWA_gfx9 <bits<10> op> { 1429 defm NAME : VOP1_Real_e32e64_vi <op>; 1430 1431 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 1432 def _sdwa_gfx9 : 1433 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 1434 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 1435 let Inst{42-40} = 6; 1436 } 1437 1438 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 1439 def _dpp_gfx9 : 1440 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, 1441 VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; 1442 } 1443} 1444 1445defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>; 1446 1447let AssemblerPredicate = isGFX940Plus in 1448defm V_MOV_B64 : VOP1_Real_gfx9 <0x38>; 1449 1450defm V_CVT_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x54>; 1451defm V_CVT_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x55>; 1452defm V_CVT_PK_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x56>; 1453defm V_CVT_PK_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x57>; 1454 1455class MovDPP8Pattern<Predicate Pred, Instruction Inst> : GCNPat < 1456 (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)), 1457 (Inst VGPR_32:$src, VGPR_32:$src, (as_i32timm $dpp8), (i32 DPP8Mode.FI_0))> { 1458 let OtherPredicates = [Pred]; 1459} 1460 1461def : MovDPP8Pattern<isGFX10Only, V_MOV_B32_dpp8_gfx10>; 1462def : MovDPP8Pattern<isGFX11Only, V_MOV_B32_dpp8_gfx11>; 1463def : MovDPP8Pattern<isGFX12Only, V_MOV_B32_dpp8_gfx12>; 1464