1//===-- VOP1Instructions.td - Vector Instruction Definitions --------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10// VOP1 Classes 11//===----------------------------------------------------------------------===// 12 13class VOP1e <bits<8> op, VOPProfile P> : Enc32 { 14 bits<8> vdst; 15 bits<9> src0; 16 17 let Inst{8-0} = !if(P.HasSrc0, src0{8-0}, ?); 18 let Inst{16-9} = op; 19 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 20 let Inst{31-25} = 0x3f; //encoding 21} 22 23class VOP1_SDWAe <bits<8> op, VOPProfile P> : VOP_SDWAe <P> { 24 bits<8> vdst; 25 26 let Inst{8-0} = 0xf9; // sdwa 27 let Inst{16-9} = op; 28 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 29 let Inst{31-25} = 0x3f; // encoding 30} 31 32class VOP1_SDWA9Ae <bits<8> op, VOPProfile P> : VOP_SDWA9Ae <P> { 33 bits<8> vdst; 34 35 let Inst{8-0} = 0xf9; // sdwa 36 let Inst{16-9} = op; 37 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 38 let Inst{31-25} = 0x3f; // encoding 39} 40 41class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP1Only = 0> : 42 VOP_Pseudo <opName, !if(VOP1Only, "", "_e32"), P, P.Outs32, P.Ins32, "", pattern> { 43 44 let AsmOperands = P.Asm32; 45 46 let Size = 4; 47 let mayLoad = 0; 48 let mayStore = 0; 49 let hasSideEffects = 0; 50 51 let ReadsModeReg = !or(P.DstVT.isFP, P.Src0VT.isFP); 52 53 let mayRaiseFPException = ReadsModeReg; 54 55 let VOP1 = 1; 56 let VALU = 1; 57 let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]); 58 59 let AsmVariantName = AMDGPUAsmVariants.Default; 60} 61 62class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemonic > : 63 VOP_Real <ps>, 64 InstSI <ps.OutOperandList, ps.InOperandList, real_name # ps.AsmOperands, []>, 65 SIMCInstr <ps.PseudoInstr, EncodingFamily> { 66 67 let VALU = 1; 68 let VOP1 = 1; 69 let isPseudo = 0; 70 let isCodeGenOnly = 0; 71 72 let Constraints = ps.Constraints; 73 let DisableEncoding = ps.DisableEncoding; 74 75 // copy relevant pseudo op flags 76 let SubtargetPredicate = ps.SubtargetPredicate; 77 let OtherPredicates = ps.OtherPredicates; 78 let AsmMatchConverter = ps.AsmMatchConverter; 79 let AsmVariantName = ps.AsmVariantName; 80 let Constraints = ps.Constraints; 81 let DisableEncoding = ps.DisableEncoding; 82 let TSFlags = ps.TSFlags; 83 let UseNamedOperandTable = ps.UseNamedOperandTable; 84 let Uses = ps.Uses; 85 let Defs = ps.Defs; 86 let SchedRW = ps.SchedRW; 87 let mayLoad = ps.mayLoad; 88 let mayStore = ps.mayStore; 89 let TRANS = ps.TRANS; 90} 91 92class VOP1_Real_Gen <VOP1_Pseudo ps, GFXGen Gen, string real_name = ps.Mnemonic> : 93 VOP1_Real <ps, Gen.Subtarget, real_name> { 94 let AssemblerPredicate = Gen.AssemblerPredicate; 95 let DecoderNamespace = Gen.DecoderNamespace; 96} 97 98class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 99 VOP_SDWA_Pseudo <OpName, P, pattern> { 100 let AsmMatchConverter = "cvtSdwaVOP1"; 101} 102 103class VOP1_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 104 VOP_DPP_Pseudo <OpName, P, pattern> { 105} 106 107class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { 108 list<dag> ret = 109 !if(P.HasModifiers, 110 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods P.Src0VT:$src0, i32:$src0_modifiers))))], 111 !if(P.HasOMod, 112 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3OMods P.Src0VT:$src0, 113 i1:$clamp, i32:$omod))))], 114 [(set P.DstVT:$vdst, (node P.Src0VT:$src0))] 115 ) 116 ); 117} 118 119multiclass VOP1Inst <string opName, VOPProfile P, 120 SDPatternOperator node = null_frag, int VOPDOp = -1> { 121 // We only want to set this on the basic, non-SDWA or DPP forms. 122 defvar should_mov_imm = !or(!eq(opName, "v_mov_b32"), 123 !eq(opName, "v_mov_b64")); 124 125 let isMoveImm = should_mov_imm in { 126 if !eq(VOPDOp, -1) then 127 def _e32 : VOP1_Pseudo <opName, P>; 128 else 129 // Only for V_MOV_B32 130 def _e32 : VOP1_Pseudo <opName, P>, VOPD_Component<VOPDOp, opName>; 131 def _e64 : VOP3InstBase <opName, P, node>; 132 } 133 134 if P.HasExtSDWA then 135 def _sdwa : VOP1_SDWA_Pseudo <opName, P>; 136 137 if P.HasExtDPP then 138 def _dpp : VOP1_DPP_Pseudo <opName, P>; 139 140 let SubtargetPredicate = isGFX11Plus in { 141 if P.HasExtVOP3DPP then 142 def _e64_dpp : VOP3_DPP_Pseudo <opName, P>; 143 } // End SubtargetPredicate = isGFX11Plus 144 145 def : MnemonicAlias<opName#"_e32", opName>, LetDummies; 146 def : MnemonicAlias<opName#"_e64", opName>, LetDummies; 147 148 if P.HasExtSDWA then 149 def : MnemonicAlias<opName#"_sdwa", opName>, LetDummies; 150 151 if P.HasExtDPP then 152 def : MnemonicAlias<opName#"_dpp", opName, AMDGPUAsmVariants.DPP>, LetDummies; 153} 154 155multiclass VOP1Inst_t16<string opName, 156 VOPProfile P, 157 SDPatternOperator node = null_frag> { 158 let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts] in { 159 defm NAME : VOP1Inst<opName, P, node>; 160 } 161 let OtherPredicates = [UseRealTrue16Insts] in { 162 defm _t16 : VOP1Inst<opName#"_t16", VOPProfile_True16<P>, node>; 163 } 164 let OtherPredicates = [UseFakeTrue16Insts] in { 165 defm _fake16 : VOP1Inst<opName#"_fake16", VOPProfile_Fake16<P>, node>; 166 } 167} 168 169// Special profile for instructions which have clamp 170// and output modifiers (but have no input modifiers) 171class VOPProfileI2F<ValueType dstVt, ValueType srcVt> : 172 VOPProfile<[dstVt, srcVt, untyped, untyped]> { 173 174 let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod); 175 let InsVOP3Base = (ins Src0VOP3DPP:$src0, clampmod:$clamp, omod:$omod); 176 let AsmVOP3Base = "$vdst, $src0$clamp$omod"; 177 178 let HasModifiers = 0; 179 let HasClamp = 1; 180} 181 182class VOPProfileI2F_True16<ValueType dstVt, ValueType srcVt> : 183 VOPProfile_Fake16<VOPProfile<[dstVt, srcVt, untyped, untyped]>> { 184 185 let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod); 186 let InsVOP3Base = (ins Src0VOP3DPP:$src0, clampmod:$clamp, omod:$omod); 187 let AsmVOP3Base = "$vdst, $src0$clamp$omod"; 188 189 let HasModifiers = 0; 190 let HasClamp = 1; 191} 192 193def VOP1_F64_I32 : VOPProfileI2F <f64, i32>; 194def VOP1_F32_I32 : VOPProfileI2F <f32, i32>; 195def VOP1_F16_I16 : VOPProfileI2F <f16, i16>; 196def VOP1_F16_I16_t16 : VOPProfileI2F_True16 <f16, i16>; 197 198def VOP_NOP_PROFILE : VOPProfile <[untyped, untyped, untyped, untyped]>{ 199 let HasExtVOP3DPP = 0; 200} 201 202// OMod clears exceptions when set. OMod was always an operand, but its 203// now explicitly set. 204class VOP_SPECIAL_OMOD_PROF<ValueType dstVt, ValueType srcVt> : 205 VOPProfile<[dstVt, srcVt, untyped, untyped]> { 206 207 let HasOMod = 1; 208} 209def VOP_I32_F32_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f32>; 210def VOP_I32_F64_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f64>; 211def VOP_I16_F16_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i16, f16>; 212def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_Fake16<VOP_I16_F16> { 213 let HasOMod = 1; 214} 215 216//===----------------------------------------------------------------------===// 217// VOP1 Instructions 218//===----------------------------------------------------------------------===// 219 220let VOPAsmPrefer32Bit = 1 in { 221defm V_NOP : VOP1Inst <"v_nop", VOP_NOP_PROFILE>; 222} 223 224def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> { 225 let InsVOPDX = (ins Src0RC32:$src0X); 226 let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X); 227 let InsVOPDY = (ins Src0RC32:$src0Y); 228 let InsVOPDYDeferred = (ins VSrc_f32_Deferred:$src0Y); 229} 230 231let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 232defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>; 233 234let SubtargetPredicate = isGFX940Plus, SchedRW = [Write64Bit] in 235defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>; 236} // End isMoveImm = 1 237 238// FIXME: Specify SchedRW for READFIRSTLANE_B32 239// TODO: Make profile for this, there is VOP3 encoding also 240def V_READFIRSTLANE_B32 : 241 InstSI <(outs SReg_32:$vdst), 242 (ins VRegOrLdsSrc_32:$src0), 243 "v_readfirstlane_b32 $vdst, $src0", 244 [(set i32:$vdst, (int_amdgcn_readfirstlane (i32 VRegOrLdsSrc_32:$src0)))]>, 245 Enc32 { 246 247 let isCodeGenOnly = 0; 248 let UseNamedOperandTable = 1; 249 250 let Size = 4; 251 let mayLoad = 0; 252 let mayStore = 0; 253 let hasSideEffects = 0; 254 255 let VOP1 = 1; 256 let VALU = 1; 257 let Uses = [EXEC]; 258 let isConvergent = 1; 259 260 bits<8> vdst; 261 bits<9> src0; 262 263 let Inst{8-0} = src0; 264 let Inst{16-9} = 0x2; 265 let Inst{24-17} = vdst; 266 let Inst{31-25} = 0x3f; //encoding 267} 268 269let isReMaterializable = 1 in { 270let SchedRW = [WriteDoubleCvt] in { 271// OMod clears exceptions when set in this instruction 272defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64_SPECIAL_OMOD, fp_to_sint>; 273 274let mayRaiseFPException = 0 in { 275defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>; 276} 277 278defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>; 279defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, any_fpextend>; 280// OMod clears exceptions when set in this instruction 281defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64_SPECIAL_OMOD, fp_to_uint>; 282 283let mayRaiseFPException = 0 in { 284defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>; 285} 286 287} // End SchedRW = [WriteDoubleCvt] 288 289let SchedRW = [WriteFloatCvt] in { 290 291// XXX: Does this really not raise exceptions? The manual claims the 292// 16-bit ones can. 293let mayRaiseFPException = 0 in { 294defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>; 295defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>; 296} 297 298// OMod clears exceptions when set in these 2 instructions 299defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_uint>; 300defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_sint>; 301let FPDPRounding = 1, isReMaterializable = 0 in { 302 let OtherPredicates = [NotHasTrue16BitInsts] in 303 defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, any_fpround>; 304 let OtherPredicates = [HasTrue16BitInsts] in 305 defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_Fake16<VOP_F16_F32>, any_fpround>; 306} // End FPDPRounding = 1, isReMaterializable = 0 307 308let OtherPredicates = [NotHasTrue16BitInsts] in 309defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, any_fpextend>; 310let OtherPredicates = [HasTrue16BitInsts] in 311defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_Fake16<VOP_F32_F16>, any_fpextend>; 312 313let ReadsModeReg = 0, mayRaiseFPException = 0 in { 314defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>; 315defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>; 316defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>; 317} // End ReadsModeReg = 0, mayRaiseFPException = 0 318} // End SchedRW = [WriteFloatCvt] 319 320let ReadsModeReg = 0, mayRaiseFPException = 0 in { 321defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>; 322defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>; 323defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP1_F32_I32, AMDGPUcvt_f32_ubyte2>; 324defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f32_ubyte3>; 325} // ReadsModeReg = 0, mayRaiseFPException = 0 326 327defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>; 328defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>; 329defm V_CEIL_F32 : VOP1Inst <"v_ceil_f32", VOP_F32_F32, fceil>; 330defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, froundeven>; 331defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>; 332 333let TRANS = 1, SchedRW = [WriteTrans32] in { 334defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, AMDGPUexp>; 335defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, AMDGPUlog>; 336defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>; 337defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32, AMDGPUrcp_iflag>; 338defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>; 339defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, int_amdgcn_sqrt>; 340} // End TRANS = 1, SchedRW = [WriteTrans32] 341 342let TRANS = 1, SchedRW = [WriteTrans64] in { 343defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64, AMDGPUrcp>; 344defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64, AMDGPUrsq>; 345defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, int_amdgcn_sqrt>; 346} // End TRANS = 1, SchedRW = [WriteTrans64] 347 348let TRANS = 1, SchedRW = [WriteTrans32] in { 349defm V_SIN_F32 : VOP1Inst <"v_sin_f32", VOP_F32_F32, AMDGPUsin>; 350defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>; 351} // End TRANS = 1, SchedRW = [WriteTrans32] 352 353defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>; 354defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32, DivergentUnaryFrag<bitreverse>>; 355defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32, AMDGPUffbh_u32>; 356defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32, AMDGPUffbl_b32>; 357defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32, AMDGPUffbh_i32>; 358 359let SchedRW = [WriteDoubleAdd] in { 360defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64_SPECIAL_OMOD, int_amdgcn_frexp_exp>; 361defm V_FREXP_MANT_F64 : VOP1Inst <"v_frexp_mant_f64", VOP_F64_F64, int_amdgcn_frexp_mant>; 362let FPDPRounding = 1 in { 363defm V_FRACT_F64 : VOP1Inst <"v_fract_f64", VOP_F64_F64, AMDGPUfract>; 364} // End FPDPRounding = 1 365} // End SchedRW = [WriteDoubleAdd] 366 367defm V_FREXP_EXP_I32_F32 : VOP1Inst <"v_frexp_exp_i32_f32", VOP_I32_F32, int_amdgcn_frexp_exp>; 368defm V_FREXP_MANT_F32 : VOP1Inst <"v_frexp_mant_f32", VOP_F32_F32, int_amdgcn_frexp_mant>; 369} // End isReMaterializable = 1 370 371let VOPAsmPrefer32Bit = 1 in { 372defm V_CLREXCP : VOP1Inst <"v_clrexcp", VOP_NO_EXT<VOP_NONE>>; 373} 374 375// Restrict src0 to be VGPR 376def VOP_MOVRELS : VOPProfile<[i32, i32, untyped, untyped]> { 377 let Src0RC32 = VRegSrc_32; 378 let Src0RC64 = VRegSrc_32; 379} 380 381// Special case because there are no true output operands. Hack vdst 382// to be a src operand. The custom inserter must add a tied implicit 383// def and use of the super register since there seems to be no way to 384// add an implicit def of a virtual register in tablegen. 385class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, untyped]> { 386 let Src0RC32 = VOPDstOperand<VGPR_32>; 387 let Src0RC64 = VOPDstOperand<VGPR_32>; 388 389 let Outs = (outs); 390 let Ins32 = (ins Src0RC32:$vdst, Src1RC:$src0); 391 let Ins64 = (ins Src0RC64:$vdst, Src1RC:$src0); 392 let Asm32 = getAsm32<1, 1>.ret; 393 394 let OutsSDWA = (outs Src0RC32:$vdst); 395 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 396 clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, 397 src0_sel:$src0_sel); 398 let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret; 399 400 let OutsDPP = (outs Src0RC32:$vdst); 401 let InsDPP16 = (ins Src0RC32:$old, Src0RC32:$src0, 402 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 403 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl, FI:$fi); 404 let AsmDPP16 = getAsmDPP16<1, 1, 0>.ret; 405 let InsDPP8 = (ins Src0RC32:$old, Src0RC32:$src0, dpp8:$dpp8, FI:$fi); 406 let AsmDPP8 = getAsmDPP8<1, 1, 0>.ret; 407 408 let OutsVOP3DPP = (outs Src0RC64:$vdst); 409 let InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, Src0RC64, NumSrcArgs>.ret; 410 let InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, Src0RC64, NumSrcArgs>.ret; 411 let InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, Src0RC64, NumSrcArgs>.ret; 412 413 let AsmVOP3Base = 414 getAsmVOP3Base<NumSrcArgs, 1 /* HasDst */, HasClamp, 415 HasOpSel, HasOMod, IsVOP3P, HasModifiers, 416 HasModifiers, HasModifiers, HasModifiers>.ret; 417 418 let HasDst = 0; 419 let EmitDst = 1; // force vdst emission 420} 421 422def VOP_MOVRELD : VOP_MOVREL<VSrc_b32>; 423def VOP_MOVRELSD : VOP_MOVREL<VRegSrc_32>; 424 425let SubtargetPredicate = HasMovrel, Uses = [M0, EXEC] in { 426 // v_movreld_b32 is a special case because the destination output 427 // register is really a source. It isn't actually read (but may be 428 // written), and is only to provide the base register to start 429 // indexing from. Tablegen seems to not let you define an implicit 430 // virtual register output for the super register being written into, 431 // so this must have an implicit def of the register added to it. 432defm V_MOVRELD_B32 : VOP1Inst <"v_movreld_b32", VOP_MOVRELD>; 433defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_MOVRELS>; 434defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_MOVRELSD>; 435} // End Uses = [M0, EXEC] 436 437let isReMaterializable = 1 in { 438let SubtargetPredicate = isGFX6GFX7 in { 439 let TRANS = 1, SchedRW = [WriteTrans32] in { 440 defm V_LOG_CLAMP_F32 : 441 VOP1Inst<"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>; 442 defm V_RCP_CLAMP_F32 : 443 VOP1Inst<"v_rcp_clamp_f32", VOP_F32_F32>; 444 defm V_RCP_LEGACY_F32 : 445 VOP1Inst<"v_rcp_legacy_f32", VOP_F32_F32, AMDGPUrcp_legacy>; 446 defm V_RSQ_CLAMP_F32 : 447 VOP1Inst<"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>; 448 defm V_RSQ_LEGACY_F32 : 449 VOP1Inst<"v_rsq_legacy_f32", VOP_F32_F32, int_amdgcn_rsq_legacy>; 450 } // End TRANS = 1, SchedRW = [WriteTrans32] 451 452 let SchedRW = [WriteTrans64] in { 453 defm V_RCP_CLAMP_F64 : 454 VOP1Inst<"v_rcp_clamp_f64", VOP_F64_F64>; 455 defm V_RSQ_CLAMP_F64 : 456 VOP1Inst<"v_rsq_clamp_f64", VOP_F64_F64, AMDGPUrsq_clamp>; 457 } // End SchedRW = [WriteTrans64] 458} // End SubtargetPredicate = isGFX6GFX7 459 460let SubtargetPredicate = isGFX7GFX8GFX9 in { 461 let TRANS = 1, SchedRW = [WriteTrans32] in { 462 defm V_LOG_LEGACY_F32 : VOP1Inst<"v_log_legacy_f32", VOP_F32_F32>; 463 defm V_EXP_LEGACY_F32 : VOP1Inst<"v_exp_legacy_f32", VOP_F32_F32>; 464 } // End TRANS = 1, SchedRW = [WriteTrans32] 465} // End SubtargetPredicate = isGFX7GFX8GFX9 466 467let SubtargetPredicate = isGFX7Plus in { 468 let SchedRW = [WriteDoubleAdd] in { 469 defm V_TRUNC_F64 : VOP1Inst<"v_trunc_f64", VOP_F64_F64, ftrunc>; 470 defm V_CEIL_F64 : VOP1Inst<"v_ceil_f64", VOP_F64_F64, fceil>; 471 defm V_RNDNE_F64 : VOP1Inst<"v_rndne_f64", VOP_F64_F64, froundeven>; 472 defm V_FLOOR_F64 : VOP1Inst<"v_floor_f64", VOP_F64_F64, ffloor>; 473 } // End SchedRW = [WriteDoubleAdd] 474} // End SubtargetPredicate = isGFX7Plus 475} // End isReMaterializable = 1 476 477let FPDPRounding = 1 in { 478let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { 479defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>; 480defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>; 481} 482let OtherPredicates = [HasTrue16BitInsts] in { 483defm V_CVT_F16_U16_t16 : VOP1Inst <"v_cvt_f16_u16_t16", VOP1_F16_I16_t16, uint_to_fp>; 484defm V_CVT_F16_I16_t16 : VOP1Inst <"v_cvt_f16_i16_t16", VOP1_F16_I16_t16, sint_to_fp>; 485} 486} // End FPDPRounding = 1 487// OMod clears exceptions when set in these two instructions 488let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { 489defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_uint>; 490defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_sint>; 491} 492let OtherPredicates = [HasTrue16BitInsts] in { 493defm V_CVT_U16_F16_t16 : VOP1Inst <"v_cvt_u16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, fp_to_uint>; 494defm V_CVT_I16_F16_t16 : VOP1Inst <"v_cvt_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, fp_to_sint>; 495} 496let TRANS = 1, SchedRW = [WriteTrans32] in { 497defm V_RCP_F16 : VOP1Inst_t16 <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>; 498defm V_SQRT_F16 : VOP1Inst_t16 <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>; 499defm V_RSQ_F16 : VOP1Inst_t16 <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>; 500defm V_LOG_F16 : VOP1Inst_t16 <"v_log_f16", VOP_F16_F16, AMDGPUlogf16>; 501defm V_EXP_F16 : VOP1Inst_t16 <"v_exp_f16", VOP_F16_F16, AMDGPUexpf16>; 502defm V_SIN_F16 : VOP1Inst_t16 <"v_sin_f16", VOP_F16_F16, AMDGPUsin>; 503defm V_COS_F16 : VOP1Inst_t16 <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; 504} // End TRANS = 1, SchedRW = [WriteTrans32] 505defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>; 506let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { 507defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16_SPECIAL_OMOD, int_amdgcn_frexp_exp>; 508} 509let OtherPredicates = [HasTrue16BitInsts] in { 510defm V_FREXP_EXP_I16_F16_t16 : VOP1Inst <"v_frexp_exp_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, int_amdgcn_frexp_exp>; 511} 512defm V_FLOOR_F16 : VOP1Inst_t16 <"v_floor_f16", VOP_F16_F16, ffloor>; 513defm V_CEIL_F16 : VOP1Inst_t16 <"v_ceil_f16", VOP_F16_F16, fceil>; 514defm V_TRUNC_F16 : VOP1Inst_t16 <"v_trunc_f16", VOP_F16_F16, ftrunc>; 515defm V_RNDNE_F16 : VOP1Inst_t16 <"v_rndne_f16", VOP_F16_F16, froundeven>; 516let FPDPRounding = 1 in { 517defm V_FRACT_F16 : VOP1Inst_t16 <"v_fract_f16", VOP_F16_F16, AMDGPUfract>; 518} // End FPDPRounding = 1 519 520let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { 521def : GCNPat< 522 (f32 (f16_to_fp i16:$src)), 523 (V_CVT_F32_F16_e32 $src) 524>; 525def : GCNPat< 526 (i16 (AMDGPUfp_to_f16 f32:$src)), 527 (V_CVT_F16_F32_e32 $src) 528>; 529} 530let OtherPredicates = [HasTrue16BitInsts] in { 531def : GCNPat< 532 (f32 (f16_to_fp i16:$src)), 533 (V_CVT_F32_F16_t16_e32 $src) 534>; 535def : GCNPat< 536 (i16 (AMDGPUfp_to_f16 f32:$src)), 537 (V_CVT_F16_F32_t16_e32 $src) 538>; 539} 540 541def VOP_SWAP_I32 : VOPProfile<[i32, i32, untyped, untyped]> { 542 let Outs32 = (outs VGPR_32:$vdst, VRegSrc_32:$vdst1); 543 let Ins32 = (ins VRegSrc_32:$src0, VGPR_32:$src1); 544 let Asm32 = " $vdst, $src0"; 545} 546 547let SubtargetPredicate = isGFX9Plus in { 548 def V_SWAP_B32 : VOP1_Pseudo<"v_swap_b32", VOP_SWAP_I32, [], 1> { 549 let Constraints = "$vdst = $src1, $vdst1 = $src0"; 550 let DisableEncoding = "$vdst1,$src1"; 551 let SchedRW = [Write64Bit, Write64Bit]; 552 } 553 554 let isReMaterializable = 1 in 555 defm V_SAT_PK_U8_I16 : VOP1Inst_t16<"v_sat_pk_u8_i16", VOP_I16_I32>; 556 557 let mayRaiseFPException = 0 in { 558 let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { 559 defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16_SPECIAL_OMOD>; 560 defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16_SPECIAL_OMOD>; 561 } 562 let OtherPredicates = [HasTrue16BitInsts] in { 563 defm V_CVT_NORM_I16_F16_t16 : VOP1Inst<"v_cvt_norm_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16>; 564 defm V_CVT_NORM_U16_F16_t16 : VOP1Inst<"v_cvt_norm_u16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16>; 565 } 566 } // End mayRaiseFPException = 0 567} // End SubtargetPredicate = isGFX9Plus 568 569let SubtargetPredicate = isGFX9Only in { 570 defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>; 571} // End SubtargetPredicate = isGFX9Only 572 573class VOPProfile_Base_CVT_F32_F8<ValueType vt> : VOPProfileI2F <vt, i32> { 574 let HasExtDPP = 1; 575 let HasExtSDWA = 1; 576 let HasExtSDWA9 = 1; 577 let HasExt = 1; 578 let DstRCSDWA = getVALUDstForVT<vt>.ret; 579 let InsSDWA = (ins Bin32SDWAInputMods:$src0_modifiers, Src0SDWA:$src0, 580 clampmod:$clamp, omod:$omod, src0_sel:$src0_sel); 581 let AsmSDWA = "$vdst, $src0_modifiers$clamp$omod $src0_sel"; // No dst_sel 582 let AsmSDWA9 = AsmSDWA; 583 let EmitDstSel = 0; 584} 585 586def VOPProfileCVT_F32_F8 : VOPProfile_Base_CVT_F32_F8 <f32>; 587def VOPProfileCVT_PK_F32_F8 : VOPProfile_Base_CVT_F32_F8 <v2f32>; 588 589let SubtargetPredicate = HasFP8ConversionInsts, mayRaiseFPException = 0, 590 SchedRW = [WriteFloatCvt] in { 591 defm V_CVT_F32_FP8 : VOP1Inst<"v_cvt_f32_fp8", VOPProfileCVT_F32_F8>; 592 defm V_CVT_F32_BF8 : VOP1Inst<"v_cvt_f32_bf8", VOPProfileCVT_F32_F8>; 593 defm V_CVT_PK_F32_FP8 : VOP1Inst<"v_cvt_pk_f32_fp8", VOPProfileCVT_PK_F32_F8>; 594 defm V_CVT_PK_F32_BF8 : VOP1Inst<"v_cvt_pk_f32_bf8", VOPProfileCVT_PK_F32_F8>; 595} 596 597class Cvt_F32_F8_Pat<SDPatternOperator node, int index, 598 VOP1_SDWA_Pseudo inst_sdwa> : GCNPat< 599 (f32 (node i32:$src, index)), 600 (inst_sdwa 0, $src, 0, 0, index) 601>; 602 603let SubtargetPredicate = isGFX9Only in { 604let OtherPredicates = [HasCvtFP8VOP1Bug] in { 605 def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)), 606 (V_CVT_F32_FP8_sdwa 0, $src, 0, 0, 0)>; 607 def : GCNPat<(f32 (int_amdgcn_cvt_f32_bf8 i32:$src, 0)), 608 (V_CVT_F32_BF8_sdwa 0, $src, 0, 0, 0)>; 609} 610 611let OtherPredicates = [HasNoCvtFP8VOP1Bug] in { 612 def : GCNPat<(f32 (int_amdgcn_cvt_f32_fp8 i32:$src, 0)), 613 (V_CVT_F32_FP8_e32 $src)>; 614 def : GCNPat<(f32 (int_amdgcn_cvt_f32_bf8 i32:$src, 0)), 615 (V_CVT_F32_BF8_e32 $src)>; 616} 617 618foreach Index = [1, 2, 3] in { 619 def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index, V_CVT_F32_FP8_sdwa>; 620 def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index, V_CVT_F32_BF8_sdwa>; 621} 622} // End SubtargetPredicate = isGFX9Only 623 624class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index, 625 VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat< 626 (v2f32 (node i32:$src, index)), 627 !if (index, 628 (inst_sdwa 0, $src, 0, 0, SDWA.WORD_1), 629 (inst_e32 $src)) 630>; 631 632let SubtargetPredicate = isGFX9Only in { 633 foreach Index = [0, -1] in { 634 def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index, 635 V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>; 636 def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_bf8, Index, 637 V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_sdwa>; 638 } 639} 640 641 642// Similar to VOPProfile_Base_CVT_F32_F8, but for VOP3 instructions. 643def VOPProfile_Base_CVT_PK_F32_F8_OpSel : VOPProfileI2F <v2f32, i32> { 644 let HasOpSel = 1; 645 let HasExtVOP3DPP = 0; 646} 647 648def VOPProfile_Base_CVT_F32_F8_OpSel : VOPProfile<[f32, i32, untyped, untyped]> { 649 let HasOpSel = 1; 650 let HasExtDPP = 1; 651 let HasExtVOP3DPP = 1; 652 let IsFP8 = 1; 653 let HasClamp = 0; 654 let HasOMod = 0; 655 let HasModifiers = 1; 656 let Src1VOP3DPP = Src1RC64; 657} 658 659let SubtargetPredicate = isGFX12Plus, mayRaiseFPException = 0, 660 SchedRW = [WriteFloatCvt] in { 661 defm V_CVT_F32_FP8_OP_SEL : VOP1Inst<"v_cvt_f32_fp8_op_sel", VOPProfile_Base_CVT_F32_F8_OpSel>; 662 defm V_CVT_F32_BF8_OP_SEL : VOP1Inst<"v_cvt_f32_bf8_op_sel", VOPProfile_Base_CVT_F32_F8_OpSel>; 663 defm V_CVT_PK_F32_FP8_OP_SEL : VOP1Inst<"v_cvt_pk_f32_fp8_op_sel", VOPProfile_Base_CVT_PK_F32_F8_OpSel>; 664 defm V_CVT_PK_F32_BF8_OP_SEL : VOP1Inst<"v_cvt_pk_f32_bf8_op_sel", VOPProfile_Base_CVT_PK_F32_F8_OpSel>; 665} 666 667class Cvt_F32_F8_Pat_OpSel<SDPatternOperator node, bits<2> index, 668 VOP1_Pseudo inst_e32, VOP3_Pseudo inst_e64> : GCNPat< 669 (f32 (node i32:$src, index)), 670 !if (index, 671 (inst_e64 !if(index{0}, 672 !if(index{1}, !or(SRCMODS.OP_SEL_0, SRCMODS.OP_SEL_1), 673 SRCMODS.OP_SEL_0), 674 !if(index{1}, SRCMODS.OP_SEL_1, 0)), 675 $src, 0), 676 (inst_e32 $src)) 677>; 678 679let SubtargetPredicate = isGFX12Plus in { 680 foreach Index = [0, 1, 2, 3] in { 681 def : Cvt_F32_F8_Pat_OpSel<int_amdgcn_cvt_f32_fp8, Index, 682 V_CVT_F32_FP8_e32, V_CVT_F32_FP8_OP_SEL_e64>; 683 def : Cvt_F32_F8_Pat_OpSel<int_amdgcn_cvt_f32_bf8, Index, 684 V_CVT_F32_BF8_e32, V_CVT_F32_BF8_OP_SEL_e64>; 685 } 686} 687 688class Cvt_PK_F32_F8_Pat_OpSel<SDPatternOperator node, int index, 689 VOP1_Pseudo inst_e32, VOP3_Pseudo inst_e64> : GCNPat< 690 (v2f32 (node i32:$src, index)), 691 !if (index, 692 (inst_e64 SRCMODS.OP_SEL_0, $src, 0, 0, SRCMODS.NONE), 693 (inst_e32 $src)) 694>; 695 696let SubtargetPredicate = isGFX12Plus in { 697 foreach Index = [0, -1] in { 698 def : Cvt_PK_F32_F8_Pat_OpSel<int_amdgcn_cvt_pk_f32_fp8, Index, 699 V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_OP_SEL_e64>; 700 def : Cvt_PK_F32_F8_Pat_OpSel<int_amdgcn_cvt_pk_f32_bf8, Index, 701 V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_OP_SEL_e64>; 702 } 703} 704 705let SubtargetPredicate = isGFX10Plus in { 706 defm V_PIPEFLUSH : VOP1Inst<"v_pipeflush", VOP_NO_EXT<VOP_NONE>>; 707 708 let Uses = [M0] in { 709 defm V_MOVRELSD_2_B32 : 710 VOP1Inst<"v_movrelsd_2_b32", VOP_MOVRELSD>; 711 712 def V_SWAPREL_B32 : VOP1_Pseudo<"v_swaprel_b32", VOP_SWAP_I32, [], 1> { 713 let Constraints = "$vdst = $src1, $vdst1 = $src0"; 714 let DisableEncoding = "$vdst1,$src1"; 715 let SchedRW = [Write64Bit, Write64Bit]; 716 } 717 } // End Uses = [M0] 718} // End SubtargetPredicate = isGFX10Plus 719 720def VOPProfileAccMov : VOP_NO_EXT<VOP_I32_I32> { 721 let DstRC = RegisterOperand<AGPR_32>; 722 let Src0RC32 = ARegSrc_32; 723 let Asm32 = " $vdst, $src0"; 724} 725 726def V_ACCVGPR_MOV_B32 : VOP1_Pseudo<"v_accvgpr_mov_b32", VOPProfileAccMov, [], 1> { 727 let SubtargetPredicate = isGFX90APlus; 728 let isReMaterializable = 1; 729 let isAsCheapAsAMove = 1; 730} 731 732let SubtargetPredicate = isGFX11Plus in { 733 // Restrict src0 to be VGPR 734 def V_PERMLANE64_B32 : VOP1_Pseudo<"v_permlane64_b32", VOP_MOVRELS, 735 getVOP1Pat64<int_amdgcn_permlane64, 736 VOP_MOVRELS>.ret, 737 /*VOP1Only=*/ 1>; 738 defm V_MOV_B16_t16 : VOP1Inst<"v_mov_b16_t16", VOPProfile_True16<VOP_I16_I16>>; 739 defm V_NOT_B16 : VOP1Inst_t16<"v_not_b16", VOP_I16_I16>; 740 defm V_CVT_I32_I16 : VOP1Inst_t16<"v_cvt_i32_i16", VOP_I32_I16>; 741 defm V_CVT_U32_U16 : VOP1Inst_t16<"v_cvt_u32_u16", VOP_I32_I16>; 742} // End SubtargetPredicate = isGFX11Plus 743 744//===----------------------------------------------------------------------===// 745// Target-specific instruction encodings. 746//===----------------------------------------------------------------------===// 747 748class VOP1_DPP<bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP16 = 0> : 749 VOP_DPP<ps.OpName, p, isDPP16> { 750 let hasSideEffects = ps.hasSideEffects; 751 let Defs = ps.Defs; 752 let SchedRW = ps.SchedRW; 753 let Uses = ps.Uses; 754 let TRANS = ps.TRANS; 755 let OtherPredicates = ps.OtherPredicates; 756 757 bits<8> vdst; 758 let Inst{8-0} = 0xfa; 759 let Inst{16-9} = op; 760 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 761 let Inst{31-25} = 0x3f; 762} 763 764class VOP1_DPP16<bits<8> op, VOP1_DPP_Pseudo ps, int subtarget, VOPProfile p = ps.Pfl> : 765 VOP1_DPP<op, ps, p, 1>, 766 SIMCInstr <ps.PseudoInstr, subtarget> { 767 let AssemblerPredicate = HasDPP16; 768 let SubtargetPredicate = HasDPP16; 769} 770 771class VOP1_DPP16_Gen<bits<8> op, VOP1_DPP_Pseudo ps, GFXGen Gen, VOPProfile p = ps.Pfl> : 772 VOP1_DPP16 <op, ps, Gen.Subtarget, p> { 773 let AssemblerPredicate = Gen.AssemblerPredicate; 774 let DecoderNamespace = "DPP"#Gen.DecoderNamespace; 775} 776 777class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> : 778 VOP_DPP8<ps.OpName, p> { 779 let hasSideEffects = ps.hasSideEffects; 780 let Defs = ps.Defs; 781 let SchedRW = ps.SchedRW; 782 let Uses = ps.Uses; 783 let OtherPredicates = ps.OtherPredicates; 784 785 bits<8> vdst; 786 let Inst{8-0} = fi; 787 let Inst{16-9} = op; 788 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 789 let Inst{31-25} = 0x3f; 790} 791 792class VOP1_DPP8_Gen<bits<8> op, VOP1_Pseudo ps, GFXGen Gen, VOPProfile p = ps.Pfl> : 793 VOP1_DPP8<op, ps, p> { 794 let AssemblerPredicate = Gen.AssemblerPredicate; 795 let DecoderNamespace = "DPP8"#Gen.DecoderNamespace; 796} 797 798//===----------------------------------------------------------------------===// 799// GFX11, GFX12 800//===----------------------------------------------------------------------===// 801 802multiclass VOP1Only_Real<GFXGen Gen, bits<9> op> { 803 let IsSingle = 1 in 804 def Gen.Suffix : 805 VOP1_Real_Gen<!cast<VOP1_Pseudo>(NAME), Gen>, 806 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 807} 808 809multiclass VOP1_Real_e32<GFXGen Gen, bits<9> op, string opName = NAME> { 810 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 811 def _e32#Gen.Suffix : 812 VOP1_Real_Gen<ps, Gen>, 813 VOP1e<op{7-0}, ps.Pfl>; 814} 815 816multiclass VOP1_Real_e32_with_name<GFXGen Gen, bits<9> op, string opName, 817 string asmName> { 818 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 819 let AsmString = asmName # ps.AsmOperands, 820 DecoderNamespace = Gen.DecoderNamespace # 821 !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in { 822 defm NAME : VOP1_Real_e32<Gen, op, opName>; 823 } 824} 825 826multiclass VOP1_Real_e64<GFXGen Gen, bits<9> op> { 827 def _e64#Gen.Suffix : 828 VOP3_Real_Gen<!cast<VOP3_Pseudo>(NAME#"_e64"), Gen>, 829 VOP3e_gfx11_gfx12<{0, 1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 830} 831 832multiclass VOP1_Real_dpp<GFXGen Gen, bits<9> op, string opName = NAME> { 833 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 834 def _dpp#Gen.Suffix : VOP1_DPP16_Gen<op{7-0}, !cast<VOP1_DPP_Pseudo>(opName#"_dpp"), Gen>; 835} 836 837multiclass VOP1_Real_dpp_with_name<GFXGen Gen, bits<9> op, string opName, 838 string asmName> { 839 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 840 let AsmString = asmName # ps.Pfl.AsmDPP16, 841 DecoderNamespace = "DPP" # Gen.DecoderNamespace # 842 !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in { 843 defm NAME : VOP1_Real_dpp<Gen, op, opName>; 844 } 845} 846 847multiclass VOP1_Real_dpp8<GFXGen Gen, bits<9> op, string opName = NAME> { 848 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 849 def _dpp8#Gen.Suffix : VOP1_DPP8_Gen<op{7-0}, ps, Gen>; 850} 851 852multiclass VOP1_Real_dpp8_with_name<GFXGen Gen, bits<9> op, string opName, 853 string asmName> { 854 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 855 let AsmString = asmName # ps.Pfl.AsmDPP8, 856 DecoderNamespace = "DPP8" # Gen.DecoderNamespace # 857 !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in { 858 defm NAME : VOP1_Real_dpp8<Gen, op, opName>; 859 } 860} 861 862multiclass VOP1_Realtriple_e64<GFXGen Gen, bits<9> op> { 863 defm NAME : VOP3_Realtriple<Gen, {0, 1, 1, op{6-0}}, /*isSingle=*/ 0, NAME>; 864} 865 866multiclass VOP1_Realtriple_e64_with_name<GFXGen Gen, bits<9> op, string opName, 867 string asmName> { 868 defm NAME : VOP3_Realtriple_with_name<Gen, {0, 1, 1, op{6-0}}, opName, 869 asmName>; 870} 871 872multiclass VOP1_Real_FULL<GFXGen Gen, bits<9> op> : 873 VOP1_Real_e32<Gen, op>, VOP1_Realtriple_e64<Gen, op>, 874 VOP1_Real_dpp<Gen, op>, VOP1_Real_dpp8<Gen, op>; 875 876multiclass VOP1_Real_NO_VOP3_with_name_gfx11<bits<9> op, string opName, 877 string asmName> { 878 defm NAME : VOP1_Real_e32_with_name<GFX11Gen, op, opName, asmName>, 879 VOP1_Real_dpp_with_name<GFX11Gen, op, opName, asmName>, 880 VOP1_Real_dpp8_with_name<GFX11Gen, op, opName, asmName>; 881 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 882 def gfx11_alias : MnemonicAlias<ps.Mnemonic, asmName>, 883 Requires<[isGFX11Plus]>; 884} 885 886multiclass VOP1_Real_NO_VOP3_with_name_gfx12<bits<9> op, string opName, 887 string asmName> { 888 defm NAME : VOP1_Real_e32_with_name<GFX12Gen, op, opName, asmName>, 889 VOP1_Real_dpp_with_name<GFX12Gen, op, opName, asmName>, 890 VOP1_Real_dpp8_with_name<GFX12Gen, op, opName, asmName>; 891} 892 893multiclass VOP1_Real_FULL_with_name<GFXGen Gen, bits<9> op, string opName, 894 string asmName> : 895 VOP1_Real_e32_with_name<Gen, op, opName, asmName>, 896 VOP1_Real_dpp_with_name<Gen, op, opName, asmName>, 897 VOP1_Real_dpp8_with_name<Gen, op, opName, asmName>, 898 VOP1_Realtriple_e64_with_name<Gen, op, opName, asmName>; 899 900multiclass VOP1_Real_NO_DPP<GFXGen Gen, bits<9> op> : 901 VOP1_Real_e32<Gen, op>, VOP1_Real_e64<Gen, op>; 902 903multiclass VOP1_Real_FULL_t16_gfx11_gfx12<bits<9> op, string asmName, 904 string opName = NAME> : 905 VOP1_Real_FULL_with_name<GFX11Gen, op, opName, asmName>, 906 VOP1_Real_FULL_with_name<GFX12Gen, op, opName, asmName>; 907 908multiclass VOP1_Real_FULL_with_name_gfx11_gfx12<bits<9> op, string opName, 909 string asmName> : 910 VOP1_Real_FULL_with_name<GFX11Gen, op, opName, asmName>, 911 VOP1_Real_FULL_with_name<GFX12Gen, op, opName, asmName>; 912 913multiclass VOP1Only_Real_gfx11_gfx12<bits<9> op> : 914 VOP1Only_Real<GFX11Gen, op>, VOP1Only_Real<GFX12Gen, op>; 915 916multiclass VOP1_Real_FULL_gfx11_gfx12<bits<9> op> : 917 VOP1_Real_FULL<GFX11Gen, op>, VOP1_Real_FULL<GFX12Gen, op>; 918 919multiclass VOP1_Real_NO_DPP_OP_SEL_with_name<GFXGen Gen, bits<9> op, 920 string opName, string asmName> : 921 VOP1_Real_e32_with_name<Gen, op, opName, asmName>, 922 VOP3_Real_with_name<Gen, {0, 1, 1, op{6-0}}, opName, asmName>; 923 924 925// Define VOP1 instructions using the pseudo instruction with its old profile and 926// VOP3 using the OpSel profile for the pseudo instruction. 927defm V_CVT_F32_FP8 : VOP1_Real_NO_VOP3_with_name_gfx12<0x06c, "V_CVT_F32_FP8", "v_cvt_f32_fp8">; 928defm V_CVT_F32_FP8 : VOP1_Realtriple_e64_with_name<GFX12Gen, 0x06c, "V_CVT_F32_FP8_OP_SEL", "v_cvt_f32_fp8">; 929 930defm V_CVT_F32_BF8 : VOP1_Real_NO_VOP3_with_name_gfx12<0x06d, "V_CVT_F32_BF8", "v_cvt_f32_bf8">; 931defm V_CVT_F32_BF8 : VOP1_Realtriple_e64_with_name<GFX12Gen, 0x06d, "V_CVT_F32_BF8_OP_SEL", "v_cvt_f32_bf8">; 932 933defm V_CVT_PK_F32_FP8 : VOP1_Real_e32_with_name<GFX12Gen, 0x06e, "V_CVT_PK_F32_FP8", "v_cvt_pk_f32_fp8">; 934defm V_CVT_PK_F32_FP8 : VOP3_Real_with_name<GFX12Gen, 0x1ee, "V_CVT_PK_F32_FP8_OP_SEL", "v_cvt_pk_f32_fp8">; 935 936defm V_CVT_PK_F32_BF8 : VOP1_Real_e32_with_name<GFX12Gen, 0x06f, "V_CVT_PK_F32_BF8", "v_cvt_pk_f32_bf8">; 937defm V_CVT_PK_F32_BF8 : VOP3_Real_with_name<GFX12Gen, 0x1ef, "V_CVT_PK_F32_BF8_OP_SEL", "v_cvt_pk_f32_bf8">; 938 939defm V_CVT_NEAREST_I32_F32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x00c, 940 "V_CVT_RPI_I32_F32", "v_cvt_nearest_i32_f32">; 941defm V_CVT_FLOOR_I32_F32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x00d, 942 "V_CVT_FLR_I32_F32", "v_cvt_floor_i32_f32">; 943defm V_CLZ_I32_U32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x039, 944 "V_FFBH_U32", "v_clz_i32_u32">; 945defm V_CTZ_I32_B32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x03a, 946 "V_FFBL_B32", "v_ctz_i32_b32">; 947defm V_CLS_I32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x03b, 948 "V_FFBH_I32", "v_cls_i32">; 949defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11_gfx12<0x067>; 950defm V_MOV_B16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x01c, "v_mov_b16">; 951defm V_NOT_B16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x069, "v_not_b16">; 952defm V_CVT_I32_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06a, "v_cvt_i32_i16">; 953defm V_CVT_U32_U16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">; 954 955defm V_CVT_F16_U16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x050, "v_cvt_f16_u16">; 956defm V_CVT_F16_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x051, "v_cvt_f16_i16">; 957defm V_CVT_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x052, "v_cvt_u16_f16">; 958defm V_CVT_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x053, "v_cvt_i16_f16">; 959defm V_RCP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">; 960defm V_SQRT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x055, "v_sqrt_f16">; 961defm V_RSQ_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x056, "v_rsq_f16">; 962defm V_LOG_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16">; 963defm V_EXP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">; 964defm V_FREXP_MANT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x059, "v_frexp_mant_f16">; 965defm V_FREXP_EXP_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05a, "v_frexp_exp_i16_f16">; 966defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">; 967defm V_FLOOR_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">; 968defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">; 969defm V_CEIL_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">; 970defm V_TRUNC_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05d, "v_trunc_f16">; 971defm V_RNDNE_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05e, "v_rndne_f16">; 972defm V_FRACT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05f, "v_fract_f16">; 973defm V_SIN_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x060, "v_sin_f16">; 974defm V_COS_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x061, "v_cos_f16">; 975defm V_SAT_PK_U8_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">; 976defm V_CVT_NORM_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x063, "v_cvt_norm_i16_f16">; 977defm V_CVT_NORM_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x064, "v_cvt_norm_u16_f16">; 978 979defm V_CVT_F16_F32_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00a, "v_cvt_f16_f32">; 980defm V_CVT_F32_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x00b, "v_cvt_f32_f16">; 981 982//===----------------------------------------------------------------------===// 983// GFX10. 984//===----------------------------------------------------------------------===// 985 986let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { 987 multiclass VOP1Only_Real_gfx10<bits<9> op> { 988 def _gfx10 : 989 VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.GFX10>, 990 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 991 } 992 multiclass VOP1_Real_e32_gfx10<bits<9> op> { 993 def _e32_gfx10 : 994 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>, 995 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 996 } 997 multiclass VOP1_Real_e64_gfx10<bits<9> op> { 998 def _e64_gfx10 : 999 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 1000 VOP3e_gfx10<{0, 1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1001 } 1002 multiclass VOP1_Real_sdwa_gfx10<bits<9> op> { 1003 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 1004 def _sdwa_gfx10 : 1005 VOP_SDWA10_Real<!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 1006 VOP1_SDWA9Ae<op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 1007 let DecoderNamespace = "SDWA10"; 1008 } 1009 } 1010 multiclass VOP1_Real_dpp_gfx10<bits<9> op> { 1011 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then 1012 def _dpp_gfx10 : VOP1_DPP16<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX10> { 1013 let DecoderNamespace = "SDWA10"; 1014 } 1015 } 1016 multiclass VOP1_Real_dpp8_gfx10<bits<9> op> { 1017 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then 1018 def _dpp8_gfx10 : VOP1_DPP8<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")> { 1019 let DecoderNamespace = "DPP8"; 1020 } 1021 } 1022} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" 1023 1024multiclass VOP1_Real_gfx10<bits<9> op> : 1025 VOP1_Real_e32_gfx10<op>, VOP1_Real_e64_gfx10<op>, 1026 VOP1_Real_sdwa_gfx10<op>, VOP1_Real_dpp_gfx10<op>, 1027 VOP1_Real_dpp8_gfx10<op>; 1028 1029multiclass VOP1_Real_gfx10_FULL_gfx11_gfx12<bits<9> op> : 1030 VOP1_Real_gfx10<op>, 1031 VOP1_Real_FULL<GFX11Gen, op>, 1032 VOP1_Real_FULL<GFX12Gen, op>; 1033 1034multiclass VOP1_Real_gfx10_NO_DPP_gfx11_gfx12<bits<9> op> : 1035 VOP1_Real_gfx10<op>, 1036 VOP1_Real_NO_DPP<GFX11Gen, op>, 1037 VOP1_Real_NO_DPP<GFX12Gen, op>; 1038 1039multiclass VOP1Only_Real_gfx10_gfx11_gfx12<bits<9> op> : 1040 VOP1Only_Real_gfx10<op>, 1041 VOP1Only_Real<GFX11Gen, op>, 1042 VOP1Only_Real<GFX12Gen, op>; 1043 1044defm V_PIPEFLUSH : VOP1_Real_gfx10_NO_DPP_gfx11_gfx12<0x01b>; 1045defm V_MOVRELSD_2_B32 : VOP1_Real_gfx10_FULL_gfx11_gfx12<0x048>; 1046defm V_CVT_F16_U16 : VOP1_Real_gfx10<0x050>; 1047defm V_CVT_F16_I16 : VOP1_Real_gfx10<0x051>; 1048defm V_CVT_U16_F16 : VOP1_Real_gfx10<0x052>; 1049defm V_CVT_I16_F16 : VOP1_Real_gfx10<0x053>; 1050defm V_RCP_F16 : VOP1_Real_gfx10<0x054>; 1051defm V_SQRT_F16 : VOP1_Real_gfx10<0x055>; 1052defm V_RSQ_F16 : VOP1_Real_gfx10<0x056>; 1053defm V_LOG_F16 : VOP1_Real_gfx10<0x057>; 1054defm V_EXP_F16 : VOP1_Real_gfx10<0x058>; 1055defm V_FREXP_MANT_F16 : VOP1_Real_gfx10<0x059>; 1056defm V_FREXP_EXP_I16_F16 : VOP1_Real_gfx10<0x05a>; 1057defm V_FLOOR_F16 : VOP1_Real_gfx10<0x05b>; 1058defm V_CEIL_F16 : VOP1_Real_gfx10<0x05c>; 1059defm V_TRUNC_F16 : VOP1_Real_gfx10<0x05d>; 1060defm V_RNDNE_F16 : VOP1_Real_gfx10<0x05e>; 1061defm V_FRACT_F16 : VOP1_Real_gfx10<0x05f>; 1062defm V_SIN_F16 : VOP1_Real_gfx10<0x060>; 1063defm V_COS_F16 : VOP1_Real_gfx10<0x061>; 1064defm V_SAT_PK_U8_I16 : VOP1_Real_gfx10<0x062>; 1065defm V_CVT_NORM_I16_F16 : VOP1_Real_gfx10<0x063>; 1066defm V_CVT_NORM_U16_F16 : VOP1_Real_gfx10<0x064>; 1067 1068defm V_SWAP_B32 : VOP1Only_Real_gfx10_gfx11_gfx12<0x065>; 1069defm V_SWAPREL_B32 : VOP1Only_Real_gfx10_gfx11_gfx12<0x068>; 1070 1071//===----------------------------------------------------------------------===// 1072// GFX7, GFX10, GFX11, GFX12 1073//===----------------------------------------------------------------------===// 1074 1075let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in { 1076 multiclass VOP1_Real_e32_gfx7<bits<9> op> { 1077 def _e32_gfx7 : 1078 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>, 1079 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 1080 } 1081 multiclass VOP1_Real_e64_gfx7<bits<9> op> { 1082 def _e64_gfx7 : 1083 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 1084 VOP3e_gfx6_gfx7<{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1085 } 1086} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" 1087 1088multiclass VOP1_Real_gfx7<bits<9> op> : 1089 VOP1_Real_e32_gfx7<op>, VOP1_Real_e64_gfx7<op>; 1090 1091multiclass VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<bits<9> op> : 1092 VOP1_Real_gfx7<op>, VOP1_Real_gfx10<op>, VOP1_Real_NO_DPP<GFX11Gen, op>, 1093 VOP1_Real_NO_DPP<GFX12Gen, op>; 1094 1095defm V_LOG_LEGACY_F32 : VOP1_Real_gfx7<0x045>; 1096defm V_EXP_LEGACY_F32 : VOP1_Real_gfx7<0x046>; 1097 1098defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x017>; 1099defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x018>; 1100defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x019>; 1101defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x01a>; 1102 1103//===----------------------------------------------------------------------===// 1104// GFX6, GFX7, GFX10, GFX11, GFX12 1105//===----------------------------------------------------------------------===// 1106 1107let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 1108 multiclass VOP1_Real_e32_gfx6_gfx7<bits<9> op> { 1109 def _e32_gfx6_gfx7 : 1110 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>, 1111 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 1112 } 1113 multiclass VOP1_Real_e64_gfx6_gfx7<bits<9> op> { 1114 def _e64_gfx6_gfx7 : 1115 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 1116 VOP3e_gfx6_gfx7<{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1117 } 1118} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 1119 1120multiclass VOP1_Real_gfx6_gfx7<bits<9> op> : 1121 VOP1_Real_e32_gfx6_gfx7<op>, VOP1_Real_e64_gfx6_gfx7<op>; 1122 1123multiclass VOP1_Real_gfx6_gfx7_gfx10<bits<9> op> : 1124 VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10<op>; 1125 1126multiclass VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<bits<9> op> : 1127 VOP1_Real_gfx6_gfx7_gfx10<op>, VOP1_Real_FULL<GFX11Gen, op>, 1128 VOP1_Real_FULL<GFX12Gen, op>; 1129 1130multiclass VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<bits<9> op> : 1131 VOP1_Real_gfx6_gfx7_gfx10<op>, VOP1_Real_NO_DPP<GFX11Gen, op>, 1132 VOP1_Real_NO_DPP<GFX12Gen, op>; 1133 1134defm V_LOG_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x026>; 1135defm V_RCP_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x028>; 1136defm V_RCP_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x029>; 1137defm V_RSQ_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x02c>; 1138defm V_RSQ_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x02d>; 1139defm V_RCP_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x030>; 1140defm V_RSQ_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x032>; 1141 1142defm V_NOP : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x000>; 1143defm V_MOV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x001>; 1144defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x003>; 1145defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x004>; 1146defm V_CVT_F32_I32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x005>; 1147defm V_CVT_F32_U32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x006>; 1148defm V_CVT_U32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x007>; 1149defm V_CVT_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x008>; 1150defm V_CVT_F16_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00a>; 1151defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10<0x00b>; 1152defm V_CVT_RPI_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00c>; 1153defm V_CVT_FLR_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00d>; 1154defm V_CVT_OFF_F32_I4 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x00e>; 1155defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x00f>; 1156defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x010>; 1157defm V_CVT_F32_UBYTE0 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x011>; 1158defm V_CVT_F32_UBYTE1 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x012>; 1159defm V_CVT_F32_UBYTE2 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x013>; 1160defm V_CVT_F32_UBYTE3 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x014>; 1161defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x015>; 1162defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x016>; 1163defm V_FRACT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x020>; 1164defm V_TRUNC_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x021>; 1165defm V_CEIL_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x022>; 1166defm V_RNDNE_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x023>; 1167defm V_FLOOR_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x024>; 1168defm V_EXP_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x025>; 1169defm V_LOG_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x027>; 1170defm V_RCP_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x02a>; 1171defm V_RCP_IFLAG_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x02b>; 1172defm V_RSQ_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x02e>; 1173defm V_RCP_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x02f>; 1174defm V_RSQ_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x031>; 1175defm V_SQRT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x033>; 1176defm V_SQRT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x034>; 1177defm V_SIN_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x035>; 1178defm V_COS_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x036>; 1179defm V_NOT_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x037>; 1180defm V_BFREV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x038>; 1181defm V_FFBH_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x039>; 1182defm V_FFBL_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x03a>; 1183defm V_FFBH_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x03b>; 1184defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x03c>; 1185defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x03d>; 1186defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11_gfx12<0x03e>; 1187defm V_FREXP_EXP_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x03f>; 1188defm V_FREXP_MANT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x040>; 1189defm V_CLREXCP : VOP1_Real_gfx6_gfx7_gfx10<0x041>; 1190defm V_MOVRELD_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x042>; 1191defm V_MOVRELS_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x043>; 1192defm V_MOVRELSD_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11_gfx12<0x044>; 1193 1194//===----------------------------------------------------------------------===// 1195// GFX8, GFX9 (VI). 1196//===----------------------------------------------------------------------===// 1197 1198class VOP1_DPPe <bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile P = ps.Pfl> : 1199 VOP_DPPe <P> { 1200 bits<8> vdst; 1201 let Inst{8-0} = 0xfa; // dpp 1202 let Inst{16-9} = op; 1203 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 1204 let Inst{31-25} = 0x3f; //encoding 1205} 1206 1207multiclass VOP1Only_Real_vi <bits<10> op> { 1208 let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 1209 def _vi : 1210 VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.VI>, 1211 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 1212 } 1213} 1214 1215multiclass VOP1_Real_e32e64_vi <bits<10> op> { 1216 let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 1217 def _e32_vi : 1218 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>, 1219 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 1220 def _e64_vi : 1221 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 1222 VOP3e_vi <!add(0x140, op), !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1223 } 1224} 1225 1226multiclass VOP1_Real_vi <bits<10> op> { 1227 defm NAME : VOP1_Real_e32e64_vi <op>; 1228 1229 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA then 1230 def _sdwa_vi : 1231 VOP_SDWA_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 1232 VOP1_SDWAe <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1233 1234 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 1235 def _sdwa_gfx9 : 1236 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 1237 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1238 1239 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 1240 def _dpp_vi : 1241 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>, 1242 VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; 1243} 1244 1245defm V_NOP : VOP1_Real_vi <0x0>; 1246defm V_MOV_B32 : VOP1_Real_vi <0x1>; 1247defm V_CVT_I32_F64 : VOP1_Real_vi <0x3>; 1248defm V_CVT_F64_I32 : VOP1_Real_vi <0x4>; 1249defm V_CVT_F32_I32 : VOP1_Real_vi <0x5>; 1250defm V_CVT_F32_U32 : VOP1_Real_vi <0x6>; 1251defm V_CVT_U32_F32 : VOP1_Real_vi <0x7>; 1252defm V_CVT_I32_F32 : VOP1_Real_vi <0x8>; 1253defm V_CVT_F16_F32 : VOP1_Real_vi <0xa>; 1254defm V_CVT_F32_F16 : VOP1_Real_vi <0xb>; 1255defm V_CVT_RPI_I32_F32 : VOP1_Real_vi <0xc>; 1256defm V_CVT_FLR_I32_F32 : VOP1_Real_vi <0xd>; 1257defm V_CVT_OFF_F32_I4 : VOP1_Real_vi <0xe>; 1258defm V_CVT_F32_F64 : VOP1_Real_vi <0xf>; 1259defm V_CVT_F64_F32 : VOP1_Real_vi <0x10>; 1260defm V_CVT_F32_UBYTE0 : VOP1_Real_vi <0x11>; 1261defm V_CVT_F32_UBYTE1 : VOP1_Real_vi <0x12>; 1262defm V_CVT_F32_UBYTE2 : VOP1_Real_vi <0x13>; 1263defm V_CVT_F32_UBYTE3 : VOP1_Real_vi <0x14>; 1264defm V_CVT_U32_F64 : VOP1_Real_vi <0x15>; 1265defm V_CVT_F64_U32 : VOP1_Real_vi <0x16>; 1266defm V_FRACT_F32 : VOP1_Real_vi <0x1b>; 1267defm V_TRUNC_F32 : VOP1_Real_vi <0x1c>; 1268defm V_CEIL_F32 : VOP1_Real_vi <0x1d>; 1269defm V_RNDNE_F32 : VOP1_Real_vi <0x1e>; 1270defm V_FLOOR_F32 : VOP1_Real_vi <0x1f>; 1271defm V_EXP_F32 : VOP1_Real_vi <0x20>; 1272defm V_LOG_F32 : VOP1_Real_vi <0x21>; 1273defm V_RCP_F32 : VOP1_Real_vi <0x22>; 1274defm V_RCP_IFLAG_F32 : VOP1_Real_vi <0x23>; 1275defm V_RSQ_F32 : VOP1_Real_vi <0x24>; 1276defm V_RCP_F64 : VOP1_Real_vi <0x25>; 1277defm V_RSQ_F64 : VOP1_Real_vi <0x26>; 1278defm V_SQRT_F32 : VOP1_Real_vi <0x27>; 1279defm V_SQRT_F64 : VOP1_Real_vi <0x28>; 1280defm V_SIN_F32 : VOP1_Real_vi <0x29>; 1281defm V_COS_F32 : VOP1_Real_vi <0x2a>; 1282defm V_NOT_B32 : VOP1_Real_vi <0x2b>; 1283defm V_BFREV_B32 : VOP1_Real_vi <0x2c>; 1284defm V_FFBH_U32 : VOP1_Real_vi <0x2d>; 1285defm V_FFBL_B32 : VOP1_Real_vi <0x2e>; 1286defm V_FFBH_I32 : VOP1_Real_vi <0x2f>; 1287defm V_FREXP_EXP_I32_F64 : VOP1_Real_vi <0x30>; 1288defm V_FREXP_MANT_F64 : VOP1_Real_vi <0x31>; 1289defm V_FRACT_F64 : VOP1_Real_vi <0x32>; 1290defm V_FREXP_EXP_I32_F32 : VOP1_Real_vi <0x33>; 1291defm V_FREXP_MANT_F32 : VOP1_Real_vi <0x34>; 1292defm V_CLREXCP : VOP1_Real_vi <0x35>; 1293defm V_MOVRELD_B32 : VOP1_Real_e32e64_vi <0x36>; 1294defm V_MOVRELS_B32 : VOP1_Real_e32e64_vi <0x37>; 1295defm V_MOVRELSD_B32 : VOP1_Real_e32e64_vi <0x38>; 1296defm V_TRUNC_F64 : VOP1_Real_vi <0x17>; 1297defm V_CEIL_F64 : VOP1_Real_vi <0x18>; 1298defm V_FLOOR_F64 : VOP1_Real_vi <0x1A>; 1299defm V_RNDNE_F64 : VOP1_Real_vi <0x19>; 1300defm V_LOG_LEGACY_F32 : VOP1_Real_vi <0x4c>; 1301defm V_EXP_LEGACY_F32 : VOP1_Real_vi <0x4b>; 1302defm V_CVT_F16_U16 : VOP1_Real_vi <0x39>; 1303defm V_CVT_F16_I16 : VOP1_Real_vi <0x3a>; 1304defm V_CVT_U16_F16 : VOP1_Real_vi <0x3b>; 1305defm V_CVT_I16_F16 : VOP1_Real_vi <0x3c>; 1306defm V_RCP_F16 : VOP1_Real_vi <0x3d>; 1307defm V_SQRT_F16 : VOP1_Real_vi <0x3e>; 1308defm V_RSQ_F16 : VOP1_Real_vi <0x3f>; 1309defm V_LOG_F16 : VOP1_Real_vi <0x40>; 1310defm V_EXP_F16 : VOP1_Real_vi <0x41>; 1311defm V_FREXP_MANT_F16 : VOP1_Real_vi <0x42>; 1312defm V_FREXP_EXP_I16_F16 : VOP1_Real_vi <0x43>; 1313defm V_FLOOR_F16 : VOP1_Real_vi <0x44>; 1314defm V_CEIL_F16 : VOP1_Real_vi <0x45>; 1315defm V_TRUNC_F16 : VOP1_Real_vi <0x46>; 1316defm V_RNDNE_F16 : VOP1_Real_vi <0x47>; 1317defm V_FRACT_F16 : VOP1_Real_vi <0x48>; 1318defm V_SIN_F16 : VOP1_Real_vi <0x49>; 1319defm V_COS_F16 : VOP1_Real_vi <0x4a>; 1320defm V_SWAP_B32 : VOP1Only_Real_vi <0x51>; 1321 1322defm V_SAT_PK_U8_I16 : VOP1_Real_vi<0x4f>; 1323defm V_CVT_NORM_I16_F16 : VOP1_Real_vi<0x4d>; 1324defm V_CVT_NORM_U16_F16 : VOP1_Real_vi<0x4e>; 1325 1326defm V_ACCVGPR_MOV_B32 : VOP1Only_Real_vi<0x52>; 1327 1328let VOP1 = 1, SubtargetPredicate = isGFX8GFX9, Uses = [EXEC, M0], Size = V_MOV_B32_e32.Size in { 1329 1330// Copy of v_mov_b32 with $vdst as a use operand for use with VGPR 1331// indexing mode. vdst can't be treated as a def for codegen purposes, 1332// and an implicit use and def of the super register should be added. 1333def V_MOV_B32_indirect_write : VPseudoInstSI<(outs), 1334 (ins getVALUDstForVT<i32>.ret:$vdst, getVOPSrc0ForVT<i32, 0>.ret:$src0)>, 1335 PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT<i32>.ret:$vdst, 1336 getVOPSrc0ForVT<i32, 0>.ret:$src0)>; 1337 1338// Copy of v_mov_b32 for use with VGPR indexing mode. An implicit use of the 1339// super register should be added. 1340def V_MOV_B32_indirect_read : VPseudoInstSI< 1341 (outs getVALUDstForVT<i32>.ret:$vdst), 1342 (ins getVOPSrc0ForVT<i32, 0>.ret:$src0)>, 1343 PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT<i32>.ret:$vdst, 1344 getVOPSrc0ForVT<i32, 0>.ret:$src0)>; 1345 1346} // End VOP1 = 1, SubtargetPredicate = isGFX8GFX9, Uses = [M0] 1347 1348let OtherPredicates = [isGFX8Plus] in { 1349 1350def : GCNPat < 1351 (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, 1352 timm:$bank_mask, timm:$bound_ctrl)), 1353 (V_MOV_B32_dpp VGPR_32:$src, VGPR_32:$src, (as_i32timm $dpp_ctrl), 1354 (as_i32timm $row_mask), (as_i32timm $bank_mask), 1355 (as_i1timm $bound_ctrl)) 1356>; 1357 1358class UpdateDPPPat<ValueType vt> : GCNPat < 1359 (vt (int_amdgcn_update_dpp vt:$old, vt:$src, timm:$dpp_ctrl, 1360 timm:$row_mask, timm:$bank_mask, 1361 timm:$bound_ctrl)), 1362 (V_MOV_B32_dpp VGPR_32:$old, VGPR_32:$src, (as_i32timm $dpp_ctrl), 1363 (as_i32timm $row_mask), (as_i32timm $bank_mask), 1364 (as_i1timm $bound_ctrl)) 1365>; 1366 1367def : UpdateDPPPat<i32>; 1368def : UpdateDPPPat<f32>; 1369def : UpdateDPPPat<v2i16>; 1370def : UpdateDPPPat<v2f16>; 1371 1372} // End OtherPredicates = [isGFX8Plus] 1373 1374let OtherPredicates = [isGFX8Plus] in { 1375def : GCNPat< 1376 (i32 (anyext i16:$src)), 1377 (COPY $src) 1378>; 1379 1380def : GCNPat< 1381 (i64 (anyext i16:$src)), 1382 (REG_SEQUENCE VReg_64, 1383 (i32 (COPY $src)), sub0, 1384 (V_MOV_B32_e32 (i32 0)), sub1) 1385>; 1386 1387def : GCNPat< 1388 (i16 (trunc i32:$src)), 1389 (COPY $src) 1390>; 1391 1392def : GCNPat < 1393 (i16 (trunc i64:$src)), 1394 (EXTRACT_SUBREG $src, sub0) 1395>; 1396 1397} // End OtherPredicates = [isGFX8Plus] 1398 1399//===----------------------------------------------------------------------===// 1400// GFX9 1401//===----------------------------------------------------------------------===// 1402 1403multiclass VOP1_Real_gfx9 <bits<10> op> { 1404 let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in { 1405 defm NAME : VOP1_Real_e32e64_vi <op>; 1406 } 1407 1408 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 1409 def _sdwa_gfx9 : 1410 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 1411 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1412 1413 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 1414 def _dpp_gfx9 : 1415 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, 1416 VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; 1417 1418} 1419 1420multiclass VOP1_Real_NoDstSel_SDWA_gfx9 <bits<10> op> { 1421 let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in { 1422 defm NAME : VOP1_Real_e32e64_vi <op>; 1423 } 1424 1425 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 1426 def _sdwa_gfx9 : 1427 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 1428 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 1429 let Inst{42-40} = 6; 1430 } 1431 1432 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 1433 def _dpp_gfx9 : 1434 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, 1435 VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; 1436} 1437 1438defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>; 1439 1440let AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX9" in 1441defm V_MOV_B64 : VOP1_Real_gfx9 <0x38>; 1442 1443let OtherPredicates = [HasFP8ConversionInsts] in { 1444defm V_CVT_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x54>; 1445defm V_CVT_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x55>; 1446defm V_CVT_PK_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x56>; 1447defm V_CVT_PK_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x57>; 1448} 1449 1450//===----------------------------------------------------------------------===// 1451// GFX10 1452//===----------------------------------------------------------------------===// 1453 1454let OtherPredicates = [isGFX10Only] in { 1455def : GCNPat < 1456 (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)), 1457 (V_MOV_B32_dpp8_gfx10 VGPR_32:$src, VGPR_32:$src, 1458 (as_i32timm $dpp8), (i32 DPP8Mode.FI_0)) 1459>; 1460} // End OtherPredicates = [isGFX10Only] 1461 1462//===----------------------------------------------------------------------===// 1463// GFX11 1464//===----------------------------------------------------------------------===// 1465 1466let OtherPredicates = [isGFX11Only] in { 1467def : GCNPat < 1468 (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)), 1469 (V_MOV_B32_dpp8_gfx11 VGPR_32:$src, VGPR_32:$src, 1470 (as_i32timm $dpp8), (i32 DPP8Mode.FI_0)) 1471>; 1472} // End OtherPredicates = [isGFX11Only] 1473 1474//===----------------------------------------------------------------------===// 1475// GFX12 1476//===----------------------------------------------------------------------===// 1477 1478let OtherPredicates = [isGFX12Only] in { 1479def : GCNPat < 1480 (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)), 1481 (V_MOV_B32_dpp8_gfx12 VGPR_32:$src, VGPR_32:$src, 1482 (as_i32timm $dpp8), (i32 DPP8Mode.FI_0)) 1483>; 1484} // End OtherPredicates = [isGFX12Only] 1485