1//===-- VOP1Instructions.td - Vector Instruction Definitions --------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10// VOP1 Classes 11//===----------------------------------------------------------------------===// 12 13class VOP1e <bits<8> op, VOPProfile P> : Enc32 { 14 bits<8> vdst; 15 bits<9> src0; 16 17 let Inst{8-0} = !if(P.HasSrc0, src0{8-0}, ?); 18 let Inst{16-9} = op; 19 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 20 let Inst{31-25} = 0x3f; //encoding 21} 22 23class VOP1_SDWAe <bits<8> op, VOPProfile P> : VOP_SDWAe <P> { 24 bits<8> vdst; 25 26 let Inst{8-0} = 0xf9; // sdwa 27 let Inst{16-9} = op; 28 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 29 let Inst{31-25} = 0x3f; // encoding 30} 31 32class VOP1_SDWA9Ae <bits<8> op, VOPProfile P> : VOP_SDWA9Ae <P> { 33 bits<8> vdst; 34 35 let Inst{8-0} = 0xf9; // sdwa 36 let Inst{16-9} = op; 37 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 38 let Inst{31-25} = 0x3f; // encoding 39} 40 41class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP1Only = 0> : 42 VOP_Pseudo <opName, !if(VOP1Only, "", "_e32"), P, P.Outs32, P.Ins32, "", pattern> { 43 44 let AsmOperands = P.Asm32; 45 46 let Size = 4; 47 let mayLoad = 0; 48 let mayStore = 0; 49 let hasSideEffects = 0; 50 51 let ReadsModeReg = !or(isFloatType<P.DstVT>.ret, isFloatType<P.Src0VT>.ret); 52 53 let mayRaiseFPException = ReadsModeReg; 54 55 let VOP1 = 1; 56 let VALU = 1; 57 let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]); 58 59 let AsmVariantName = AMDGPUAsmVariants.Default; 60} 61 62class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemonic > : 63 VOP_Real <ps>, 64 InstSI <ps.OutOperandList, ps.InOperandList, real_name # ps.AsmOperands, []>, 65 SIMCInstr <ps.PseudoInstr, EncodingFamily> { 66 67 let VALU = 1; 68 let VOP1 = 1; 69 let isPseudo = 0; 70 let isCodeGenOnly = 0; 71 72 let Constraints = ps.Constraints; 73 let DisableEncoding = ps.DisableEncoding; 74 75 // copy relevant pseudo op flags 76 let SubtargetPredicate = ps.SubtargetPredicate; 77 let AsmMatchConverter = ps.AsmMatchConverter; 78 let AsmVariantName = ps.AsmVariantName; 79 let Constraints = ps.Constraints; 80 let DisableEncoding = ps.DisableEncoding; 81 let TSFlags = ps.TSFlags; 82 let UseNamedOperandTable = ps.UseNamedOperandTable; 83 let Uses = ps.Uses; 84 let Defs = ps.Defs; 85 let SchedRW = ps.SchedRW; 86 let mayLoad = ps.mayLoad; 87 let mayStore = ps.mayStore; 88 let TRANS = ps.TRANS; 89} 90 91class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 92 VOP_SDWA_Pseudo <OpName, P, pattern> { 93 let AsmMatchConverter = "cvtSdwaVOP1"; 94} 95 96class VOP1_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 97 VOP_DPP_Pseudo <OpName, P, pattern> { 98} 99 100class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { 101 list<dag> ret = 102 !if(P.HasModifiers, 103 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods P.Src0VT:$src0, i32:$src0_modifiers))))], 104 !if(P.HasOMod, 105 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3OMods P.Src0VT:$src0, 106 i1:$clamp, i32:$omod))))], 107 [(set P.DstVT:$vdst, (node P.Src0VT:$src0))] 108 ) 109 ); 110} 111 112multiclass VOP1Inst <string opName, VOPProfile P, 113 SDPatternOperator node = null_frag, int VOPDOp = -1> { 114 // We only want to set this on the basic, non-SDWA or DPP forms. 115 defvar should_mov_imm = !or(!eq(opName, "v_mov_b32"), 116 !eq(opName, "v_mov_b64")); 117 118 let isMoveImm = should_mov_imm in { 119 if !eq(VOPDOp, -1) then 120 def _e32 : VOP1_Pseudo <opName, P>; 121 else 122 // Only for V_MOV_B32 123 def _e32 : VOP1_Pseudo <opName, P>, VOPD_Component<VOPDOp, opName>; 124 def _e64 : VOP3InstBase <opName, P, node>; 125 } 126 127 if P.HasExtSDWA then 128 def _sdwa : VOP1_SDWA_Pseudo <opName, P>; 129 130 if P.HasExtDPP then 131 def _dpp : VOP1_DPP_Pseudo <opName, P>; 132 133 let SubtargetPredicate = isGFX11Plus in { 134 if P.HasExtVOP3DPP then 135 def _e64_dpp : VOP3_DPP_Pseudo <opName, P>; 136 } // End SubtargetPredicate = isGFX11Plus 137 138 def : MnemonicAlias<opName#"_e32", opName>, LetDummies; 139 def : MnemonicAlias<opName#"_e64", opName>, LetDummies; 140 141 if P.HasExtSDWA then 142 def : MnemonicAlias<opName#"_sdwa", opName>, LetDummies; 143 144 if P.HasExtDPP then 145 def : MnemonicAlias<opName#"_dpp", opName, AMDGPUAsmVariants.DPP>, LetDummies; 146} 147 148multiclass VOP1Inst_t16<string opName, 149 VOPProfile P, 150 SDPatternOperator node = null_frag> { 151 let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts] in { 152 defm NAME : VOP1Inst<opName, P, node>; 153 } 154 let OtherPredicates = [HasTrue16BitInsts] in { 155 defm _t16 : VOP1Inst<opName#"_t16", VOPProfile_True16<P>, node>; 156 } 157} 158 159// Special profile for instructions which have clamp 160// and output modifiers (but have no input modifiers) 161class VOPProfileI2F<ValueType dstVt, ValueType srcVt> : 162 VOPProfile<[dstVt, srcVt, untyped, untyped]> { 163 164 let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod); 165 let InsVOP3Base = (ins Src0VOP3DPP:$src0, clampmod:$clamp, omod:$omod); 166 let AsmVOP3Base = "$vdst, $src0$clamp$omod"; 167 168 let HasModifiers = 0; 169 let HasClamp = 1; 170} 171 172class VOPProfileI2F_True16<ValueType dstVt, ValueType srcVt> : 173 VOPProfile_True16<VOPProfile<[dstVt, srcVt, untyped, untyped]>> { 174 175 let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod); 176 let InsVOP3Base = (ins Src0VOP3DPP:$src0, clampmod:$clamp, omod:$omod); 177 let AsmVOP3Base = "$vdst, $src0$clamp$omod"; 178 179 let HasModifiers = 0; 180 let HasClamp = 1; 181} 182 183def VOP1_F64_I32 : VOPProfileI2F <f64, i32>; 184def VOP1_F32_I32 : VOPProfileI2F <f32, i32>; 185def VOP1_F16_I16 : VOPProfileI2F <f16, i16>; 186def VOP1_F16_I16_t16 : VOPProfileI2F_True16 <f16, i16>; 187 188def VOP_NOP_PROFILE : VOPProfile <[untyped, untyped, untyped, untyped]>{ 189 let HasExtVOP3DPP = 0; 190} 191 192// OMod clears exceptions when set. OMod was always an operand, but its 193// now explicitly set. 194class VOP_SPECIAL_OMOD_PROF<ValueType dstVt, ValueType srcVt> : 195 VOPProfile<[dstVt, srcVt, untyped, untyped]> { 196 197 let HasOMod = 1; 198} 199def VOP_I32_F32_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f32>; 200def VOP_I32_F64_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f64>; 201def VOP_I16_F16_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i16, f16>; 202def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_True16<VOP_I16_F16> { 203 let HasOMod = 1; 204} 205 206//===----------------------------------------------------------------------===// 207// VOP1 Instructions 208//===----------------------------------------------------------------------===// 209 210let VOPAsmPrefer32Bit = 1 in { 211defm V_NOP : VOP1Inst <"v_nop", VOP_NOP_PROFILE>; 212} 213 214def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> { 215 let InsVOPDX = (ins Src0RC32:$src0X); 216 let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X); 217 let InsVOPDY = (ins Src0RC32:$src0Y); 218 let InsVOPDYDeferred = (ins VSrc_f32_Deferred:$src0Y); 219} 220 221let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 222defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>; 223 224let SubtargetPredicate = isGFX940Plus in 225defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>; 226} // End isMoveImm = 1 227 228// FIXME: Specify SchedRW for READFIRSTLANE_B32 229// TODO: Make profile for this, there is VOP3 encoding also 230def V_READFIRSTLANE_B32 : 231 InstSI <(outs SReg_32:$vdst), 232 (ins VRegOrLdsSrc_32:$src0), 233 "v_readfirstlane_b32 $vdst, $src0", 234 [(set i32:$vdst, (int_amdgcn_readfirstlane (i32 VRegOrLdsSrc_32:$src0)))]>, 235 Enc32 { 236 237 let isCodeGenOnly = 0; 238 let UseNamedOperandTable = 1; 239 240 let Size = 4; 241 let mayLoad = 0; 242 let mayStore = 0; 243 let hasSideEffects = 0; 244 245 let VOP1 = 1; 246 let VALU = 1; 247 let Uses = [EXEC]; 248 let isConvergent = 1; 249 250 bits<8> vdst; 251 bits<9> src0; 252 253 let Inst{8-0} = src0; 254 let Inst{16-9} = 0x2; 255 let Inst{24-17} = vdst; 256 let Inst{31-25} = 0x3f; //encoding 257} 258 259let isReMaterializable = 1 in { 260let SchedRW = [WriteDoubleCvt] in { 261// OMod clears exceptions when set in this instruction 262defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64_SPECIAL_OMOD, fp_to_sint>; 263 264let mayRaiseFPException = 0 in { 265defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>; 266} 267 268defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>; 269defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, any_fpextend>; 270// OMod clears exceptions when set in this instruction 271defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64_SPECIAL_OMOD, fp_to_uint>; 272 273let mayRaiseFPException = 0 in { 274defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>; 275} 276 277} // End SchedRW = [WriteDoubleCvt] 278 279let SchedRW = [WriteFloatCvt] in { 280 281// XXX: Does this really not raise exceptions? The manual claims the 282// 16-bit ones can. 283let mayRaiseFPException = 0 in { 284defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>; 285defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>; 286} 287 288// OMod clears exceptions when set in these 2 instructions 289defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_uint>; 290defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_sint>; 291let FPDPRounding = 1, isReMaterializable = 0 in { 292 let OtherPredicates = [NotHasTrue16BitInsts] in 293 defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, any_fpround>; 294 let OtherPredicates = [HasTrue16BitInsts] in 295 defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_True16<VOP_F16_F32>, any_fpround>; 296} // End FPDPRounding = 1, isReMaterializable = 0 297 298let OtherPredicates = [NotHasTrue16BitInsts] in 299defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, any_fpextend>; 300let OtherPredicates = [HasTrue16BitInsts] in 301defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_True16<VOP_F32_F16>, any_fpextend>; 302 303let ReadsModeReg = 0, mayRaiseFPException = 0 in { 304defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>; 305defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>; 306defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>; 307} // End ReadsModeReg = 0, mayRaiseFPException = 0 308} // End SchedRW = [WriteFloatCvt] 309 310let ReadsModeReg = 0, mayRaiseFPException = 0 in { 311defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>; 312defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>; 313defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP1_F32_I32, AMDGPUcvt_f32_ubyte2>; 314defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f32_ubyte3>; 315} // ReadsModeReg = 0, mayRaiseFPException = 0 316 317defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>; 318defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>; 319defm V_CEIL_F32 : VOP1Inst <"v_ceil_f32", VOP_F32_F32, fceil>; 320defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, frint>; 321defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>; 322 323let TRANS = 1, SchedRW = [WriteTrans32] in { 324defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, AMDGPUexp>; 325defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, AMDGPUlog>; 326defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>; 327defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32, AMDGPUrcp_iflag>; 328defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>; 329defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, any_amdgcn_sqrt>; 330} // End TRANS = 1, SchedRW = [WriteTrans32] 331 332let TRANS = 1, SchedRW = [WriteTrans64] in { 333defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64, AMDGPUrcp>; 334defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64, AMDGPUrsq>; 335defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, int_amdgcn_sqrt>; 336} // End TRANS = 1, SchedRW = [WriteTrans64] 337 338let TRANS = 1, SchedRW = [WriteTrans32] in { 339defm V_SIN_F32 : VOP1Inst <"v_sin_f32", VOP_F32_F32, AMDGPUsin>; 340defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>; 341} // End TRANS = 1, SchedRW = [WriteTrans32] 342 343defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>; 344defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32, DivergentUnaryFrag<bitreverse>>; 345defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32, AMDGPUffbh_u32>; 346defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32, AMDGPUffbl_b32>; 347defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32, AMDGPUffbh_i32>; 348 349let SchedRW = [WriteDoubleAdd] in { 350defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64_SPECIAL_OMOD, int_amdgcn_frexp_exp>; 351defm V_FREXP_MANT_F64 : VOP1Inst <"v_frexp_mant_f64", VOP_F64_F64, int_amdgcn_frexp_mant>; 352let FPDPRounding = 1 in { 353defm V_FRACT_F64 : VOP1Inst <"v_fract_f64", VOP_F64_F64, AMDGPUfract>; 354} // End FPDPRounding = 1 355} // End SchedRW = [WriteDoubleAdd] 356 357defm V_FREXP_EXP_I32_F32 : VOP1Inst <"v_frexp_exp_i32_f32", VOP_I32_F32, int_amdgcn_frexp_exp>; 358defm V_FREXP_MANT_F32 : VOP1Inst <"v_frexp_mant_f32", VOP_F32_F32, int_amdgcn_frexp_mant>; 359} // End isReMaterializable = 1 360 361let VOPAsmPrefer32Bit = 1 in { 362defm V_CLREXCP : VOP1Inst <"v_clrexcp", VOP_NO_EXT<VOP_NONE>>; 363} 364 365// Restrict src0 to be VGPR 366def VOP_MOVRELS : VOPProfile<[i32, i32, untyped, untyped]> { 367 let Src0RC32 = VRegSrc_32; 368 let Src0RC64 = VRegSrc_32; 369} 370 371// Special case because there are no true output operands. Hack vdst 372// to be a src operand. The custom inserter must add a tied implicit 373// def and use of the super register since there seems to be no way to 374// add an implicit def of a virtual register in tablegen. 375class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, untyped]> { 376 let Src0RC32 = VOPDstOperand<VGPR_32>; 377 let Src0RC64 = VOPDstOperand<VGPR_32>; 378 379 let Outs = (outs); 380 let Ins32 = (ins Src0RC32:$vdst, Src1RC:$src0); 381 let Ins64 = (ins Src0RC64:$vdst, Src1RC:$src0); 382 let Asm32 = getAsm32<1, 1>.ret; 383 384 let OutsSDWA = (outs Src0RC32:$vdst); 385 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 386 clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, 387 src0_sel:$src0_sel); 388 let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret; 389 390 let OutsDPP = (outs Src0RC32:$vdst); 391 let InsDPP16 = (ins Src0RC32:$old, Src0RC32:$src0, 392 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 393 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl, FI:$fi); 394 let AsmDPP16 = getAsmDPP16<1, 1, 0>.ret; 395 let InsDPP8 = (ins Src0RC32:$old, Src0RC32:$src0, dpp8:$dpp8, FI:$fi); 396 let AsmDPP8 = getAsmDPP8<1, 1, 0>.ret; 397 398 let OutsVOP3DPP = (outs Src0RC64:$vdst); 399 let InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, Src0RC64, NumSrcArgs>.ret; 400 let InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, Src0RC64, NumSrcArgs>.ret; 401 let InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, Src0RC64, NumSrcArgs>.ret; 402 403 let AsmVOP3Base = 404 getAsmVOP3Base<NumSrcArgs, 1 /* HasDst */, HasClamp, 405 HasOpSel, HasOMod, IsVOP3P, HasModifiers, 406 HasModifiers, HasModifiers, HasModifiers>.ret; 407 408 let HasDst = 0; 409 let EmitDst = 1; // force vdst emission 410} 411 412def VOP_MOVRELD : VOP_MOVREL<VSrc_b32>; 413def VOP_MOVRELSD : VOP_MOVREL<VRegSrc_32>; 414 415let SubtargetPredicate = HasMovrel, Uses = [M0, EXEC] in { 416 // v_movreld_b32 is a special case because the destination output 417 // register is really a source. It isn't actually read (but may be 418 // written), and is only to provide the base register to start 419 // indexing from. Tablegen seems to not let you define an implicit 420 // virtual register output for the super register being written into, 421 // so this must have an implicit def of the register added to it. 422defm V_MOVRELD_B32 : VOP1Inst <"v_movreld_b32", VOP_MOVRELD>; 423defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_MOVRELS>; 424defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_MOVRELSD>; 425} // End Uses = [M0, EXEC] 426 427let isReMaterializable = 1 in { 428let SubtargetPredicate = isGFX6GFX7 in { 429 let TRANS = 1, SchedRW = [WriteTrans32] in { 430 defm V_LOG_CLAMP_F32 : 431 VOP1Inst<"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>; 432 defm V_RCP_CLAMP_F32 : 433 VOP1Inst<"v_rcp_clamp_f32", VOP_F32_F32>; 434 defm V_RCP_LEGACY_F32 : 435 VOP1Inst<"v_rcp_legacy_f32", VOP_F32_F32, AMDGPUrcp_legacy>; 436 defm V_RSQ_CLAMP_F32 : 437 VOP1Inst<"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>; 438 defm V_RSQ_LEGACY_F32 : 439 VOP1Inst<"v_rsq_legacy_f32", VOP_F32_F32, int_amdgcn_rsq_legacy>; 440 } // End TRANS = 1, SchedRW = [WriteTrans32] 441 442 let SchedRW = [WriteTrans64] in { 443 defm V_RCP_CLAMP_F64 : 444 VOP1Inst<"v_rcp_clamp_f64", VOP_F64_F64>; 445 defm V_RSQ_CLAMP_F64 : 446 VOP1Inst<"v_rsq_clamp_f64", VOP_F64_F64, AMDGPUrsq_clamp>; 447 } // End SchedRW = [WriteTrans64] 448} // End SubtargetPredicate = isGFX6GFX7 449 450let SubtargetPredicate = isGFX7GFX8GFX9 in { 451 let TRANS = 1, SchedRW = [WriteTrans32] in { 452 defm V_LOG_LEGACY_F32 : VOP1Inst<"v_log_legacy_f32", VOP_F32_F32>; 453 defm V_EXP_LEGACY_F32 : VOP1Inst<"v_exp_legacy_f32", VOP_F32_F32>; 454 } // End TRANS = 1, SchedRW = [WriteTrans32] 455} // End SubtargetPredicate = isGFX7GFX8GFX9 456 457let SubtargetPredicate = isGFX7Plus in { 458 let SchedRW = [WriteDoubleAdd] in { 459 defm V_TRUNC_F64 : VOP1Inst<"v_trunc_f64", VOP_F64_F64, ftrunc>; 460 defm V_CEIL_F64 : VOP1Inst<"v_ceil_f64", VOP_F64_F64, fceil>; 461 defm V_RNDNE_F64 : VOP1Inst<"v_rndne_f64", VOP_F64_F64, frint>; 462 defm V_FLOOR_F64 : VOP1Inst<"v_floor_f64", VOP_F64_F64, ffloor>; 463 } // End SchedRW = [WriteDoubleAdd] 464} // End SubtargetPredicate = isGFX7Plus 465} // End isReMaterializable = 1 466 467let FPDPRounding = 1 in { 468let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { 469defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>; 470defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>; 471} 472let OtherPredicates = [HasTrue16BitInsts] in { 473defm V_CVT_F16_U16_t16 : VOP1Inst <"v_cvt_f16_u16_t16", VOP1_F16_I16_t16, uint_to_fp>; 474defm V_CVT_F16_I16_t16 : VOP1Inst <"v_cvt_f16_i16_t16", VOP1_F16_I16_t16, sint_to_fp>; 475} 476} // End FPDPRounding = 1 477// OMod clears exceptions when set in these two instructions 478let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { 479defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_uint>; 480defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_sint>; 481} 482let OtherPredicates = [HasTrue16BitInsts] in { 483defm V_CVT_U16_F16_t16 : VOP1Inst <"v_cvt_u16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, fp_to_uint>; 484defm V_CVT_I16_F16_t16 : VOP1Inst <"v_cvt_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, fp_to_sint>; 485} 486let TRANS = 1, SchedRW = [WriteTrans32] in { 487defm V_RCP_F16 : VOP1Inst_t16 <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>; 488defm V_SQRT_F16 : VOP1Inst_t16 <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>; 489defm V_RSQ_F16 : VOP1Inst_t16 <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>; 490defm V_LOG_F16 : VOP1Inst_t16 <"v_log_f16", VOP_F16_F16, AMDGPUlogf16>; 491defm V_EXP_F16 : VOP1Inst_t16 <"v_exp_f16", VOP_F16_F16, AMDGPUexpf16>; 492defm V_SIN_F16 : VOP1Inst_t16 <"v_sin_f16", VOP_F16_F16, AMDGPUsin>; 493defm V_COS_F16 : VOP1Inst_t16 <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; 494} // End TRANS = 1, SchedRW = [WriteTrans32] 495defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>; 496let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { 497defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16_SPECIAL_OMOD, int_amdgcn_frexp_exp>; 498} 499let OtherPredicates = [HasTrue16BitInsts] in { 500defm V_FREXP_EXP_I16_F16_t16 : VOP1Inst <"v_frexp_exp_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, int_amdgcn_frexp_exp>; 501} 502defm V_FLOOR_F16 : VOP1Inst_t16 <"v_floor_f16", VOP_F16_F16, ffloor>; 503defm V_CEIL_F16 : VOP1Inst_t16 <"v_ceil_f16", VOP_F16_F16, fceil>; 504defm V_TRUNC_F16 : VOP1Inst_t16 <"v_trunc_f16", VOP_F16_F16, ftrunc>; 505defm V_RNDNE_F16 : VOP1Inst_t16 <"v_rndne_f16", VOP_F16_F16, frint>; 506let FPDPRounding = 1 in { 507defm V_FRACT_F16 : VOP1Inst_t16 <"v_fract_f16", VOP_F16_F16, AMDGPUfract>; 508} // End FPDPRounding = 1 509 510let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { 511def : GCNPat< 512 (f32 (f16_to_fp i16:$src)), 513 (V_CVT_F32_F16_e32 $src) 514>; 515def : GCNPat< 516 (i16 (AMDGPUfp_to_f16 f32:$src)), 517 (V_CVT_F16_F32_e32 $src) 518>; 519} 520let OtherPredicates = [HasTrue16BitInsts] in { 521def : GCNPat< 522 (f32 (f16_to_fp i16:$src)), 523 (V_CVT_F32_F16_t16_e32 $src) 524>; 525def : GCNPat< 526 (i16 (AMDGPUfp_to_f16 f32:$src)), 527 (V_CVT_F16_F32_t16_e32 $src) 528>; 529} 530 531def VOP_SWAP_I32 : VOPProfile<[i32, i32, untyped, untyped]> { 532 let Outs32 = (outs VGPR_32:$vdst, VRegSrc_32:$vdst1); 533 let Ins32 = (ins VRegSrc_32:$src0, VGPR_32:$src1); 534 let Asm32 = " $vdst, $src0"; 535} 536 537let SubtargetPredicate = isGFX9Plus in { 538 def V_SWAP_B32 : VOP1_Pseudo<"v_swap_b32", VOP_SWAP_I32, [], 1> { 539 let Constraints = "$vdst = $src1, $vdst1 = $src0"; 540 let DisableEncoding = "$vdst1,$src1"; 541 let SchedRW = [Write64Bit, Write64Bit]; 542 } 543 544 let isReMaterializable = 1 in 545 defm V_SAT_PK_U8_I16 : VOP1Inst_t16<"v_sat_pk_u8_i16", VOP_I16_I32>; 546 547 let mayRaiseFPException = 0 in { 548 let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { 549 defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16_SPECIAL_OMOD>; 550 defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16_SPECIAL_OMOD>; 551 } 552 let OtherPredicates = [HasTrue16BitInsts] in { 553 defm V_CVT_NORM_I16_F16_t16 : VOP1Inst<"v_cvt_norm_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16>; 554 defm V_CVT_NORM_U16_F16_t16 : VOP1Inst<"v_cvt_norm_u16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16>; 555 } 556 } // End mayRaiseFPException = 0 557} // End SubtargetPredicate = isGFX9Plus 558 559let SubtargetPredicate = isGFX9Only in { 560 defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>; 561} // End SubtargetPredicate = isGFX9Only 562 563class VOPProfile_Base_CVT_F32_F8<ValueType vt> : VOPProfileI2F <vt, i32> { 564 let HasExtSDWA = 1; 565 let HasExtSDWA9 = 1; 566 let HasExt = 1; 567 let DstRCSDWA = getVALUDstForVT<vt>.ret; 568 let InsSDWA = (ins Bin32SDWAInputMods:$src0_modifiers, Src0SDWA:$src0, 569 clampmod:$clamp, omod:$omod, src0_sel:$src0_sel); 570 let AsmSDWA = "$vdst, $src0_modifiers$clamp$omod $src0_sel"; // No dst_sel 571 let AsmSDWA9 = AsmSDWA; 572 let EmitDstSel = 0; 573} 574 575def VOPProfileCVT_F32_F8 : VOPProfile_Base_CVT_F32_F8 <f32>; 576def VOPProfileCVT_PK_F32_F8 : VOPProfile_Base_CVT_F32_F8 <v2f32>; 577 578let SubtargetPredicate = HasFP8Insts, mayRaiseFPException = 0, 579 SchedRW = [WriteFloatCvt] in { 580 defm V_CVT_F32_FP8 : VOP1Inst<"v_cvt_f32_fp8", VOPProfileCVT_F32_F8>; 581 defm V_CVT_F32_BF8 : VOP1Inst<"v_cvt_f32_bf8", VOPProfileCVT_F32_F8>; 582 defm V_CVT_PK_F32_FP8 : VOP1Inst<"v_cvt_pk_f32_fp8", VOPProfileCVT_PK_F32_F8>; 583 defm V_CVT_PK_F32_BF8 : VOP1Inst<"v_cvt_pk_f32_bf8", VOPProfileCVT_PK_F32_F8>; 584} 585 586class Cvt_F32_F8_Pat<SDPatternOperator node, int index, 587 VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat< 588 (f32 (node i32:$src, index)), 589 !if (index, 590 (inst_sdwa 0, $src, 0, 0, index), 591 (inst_e32 $src)) 592>; 593 594foreach Index = [0, 1, 2, 3] in { 595 def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index, 596 V_CVT_F32_FP8_e32, V_CVT_F32_FP8_sdwa>; 597 def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index, 598 V_CVT_F32_BF8_e32, V_CVT_F32_BF8_sdwa>; 599} 600 601class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index, 602 VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat< 603 (v2f32 (node i32:$src, index)), 604 !if (index, 605 (inst_sdwa 0, $src, 0, 0, SDWA.WORD_1), 606 (inst_e32 $src)) 607>; 608 609foreach Index = [0, -1] in { 610 def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index, 611 V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>; 612 def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_bf8, Index, 613 V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_sdwa>; 614} 615 616let SubtargetPredicate = isGFX10Plus in { 617 defm V_PIPEFLUSH : VOP1Inst<"v_pipeflush", VOP_NO_EXT<VOP_NONE>>; 618 619 let Uses = [M0] in { 620 defm V_MOVRELSD_2_B32 : 621 VOP1Inst<"v_movrelsd_2_b32", VOP_MOVRELSD>; 622 623 def V_SWAPREL_B32 : VOP1_Pseudo<"v_swaprel_b32", VOP_SWAP_I32, [], 1> { 624 let Constraints = "$vdst = $src1, $vdst1 = $src0"; 625 let DisableEncoding = "$vdst1,$src1"; 626 let SchedRW = [Write64Bit, Write64Bit]; 627 } 628 } // End Uses = [M0] 629} // End SubtargetPredicate = isGFX10Plus 630 631def VOPProfileAccMov : VOP_NO_EXT<VOP_I32_I32> { 632 let DstRC = RegisterOperand<AGPR_32>; 633 let Src0RC32 = ARegSrc_32; 634 let Asm32 = " $vdst, $src0"; 635} 636 637def V_ACCVGPR_MOV_B32 : VOP1_Pseudo<"v_accvgpr_mov_b32", VOPProfileAccMov, [], 1> { 638 let SubtargetPredicate = isGFX90APlus; 639 let isReMaterializable = 1; 640 let isAsCheapAsAMove = 1; 641} 642 643let SubtargetPredicate = isGFX11Plus in { 644 // Restrict src0 to be VGPR 645 def V_PERMLANE64_B32 : VOP1_Pseudo<"v_permlane64_b32", VOP_MOVRELS, 646 getVOP1Pat64<int_amdgcn_permlane64, 647 VOP_MOVRELS>.ret, 648 /*VOP1Only=*/ 1>; 649 defm V_NOT_B16 : VOP1Inst_t16<"v_not_b16", VOP_I16_I16>; 650 defm V_CVT_I32_I16 : VOP1Inst_t16<"v_cvt_i32_i16", VOP_I32_I16>; 651 defm V_CVT_U32_U16 : VOP1Inst_t16<"v_cvt_u32_u16", VOP_I32_I16>; 652} // End SubtargetPredicate = isGFX11Plus 653 654//===----------------------------------------------------------------------===// 655// Target-specific instruction encodings. 656//===----------------------------------------------------------------------===// 657 658class VOP1_DPP<bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP16 = 0> : 659 VOP_DPP<ps.OpName, p, isDPP16> { 660 let hasSideEffects = ps.hasSideEffects; 661 let Defs = ps.Defs; 662 let SchedRW = ps.SchedRW; 663 let Uses = ps.Uses; 664 let TRANS = ps.TRANS; 665 666 bits<8> vdst; 667 let Inst{8-0} = 0xfa; 668 let Inst{16-9} = op; 669 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 670 let Inst{31-25} = 0x3f; 671} 672 673class VOP1_DPP16<bits<8> op, VOP1_DPP_Pseudo ps, int subtarget, VOPProfile p = ps.Pfl> : 674 VOP1_DPP<op, ps, p, 1>, 675 SIMCInstr <ps.PseudoInstr, subtarget> { 676 let AssemblerPredicate = HasDPP16; 677 let SubtargetPredicate = HasDPP16; 678} 679 680class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> : 681 VOP_DPP8<ps.OpName, p> { 682 let hasSideEffects = ps.hasSideEffects; 683 let Defs = ps.Defs; 684 let SchedRW = ps.SchedRW; 685 let Uses = ps.Uses; 686 687 bits<8> vdst; 688 let Inst{8-0} = fi; 689 let Inst{16-9} = op; 690 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 691 let Inst{31-25} = 0x3f; 692} 693 694//===----------------------------------------------------------------------===// 695// GFX11. 696//===----------------------------------------------------------------------===// 697 698let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in { 699 multiclass VOP1Only_Real_gfx11<bits<9> op> { 700 let IsSingle = 1 in 701 def _gfx11 : 702 VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.GFX11>, 703 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 704 } 705 multiclass VOP1_Real_e32_gfx11<bits<9> op, string opName = NAME> { 706 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 707 def _e32_gfx11 : 708 VOP1_Real<ps, SIEncodingFamily.GFX11>, 709 VOP1e<op{7-0}, ps.Pfl>; 710 } 711 multiclass VOP1_Real_e32_with_name_gfx11<bits<9> op, string opName, 712 string asmName> { 713 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 714 let AsmString = asmName # ps.AsmOperands in { 715 defm NAME : VOP1_Real_e32_gfx11<op, opName>; 716 } 717 } 718 multiclass VOP1_Real_e64_gfx11<bits<9> op> { 719 def _e64_gfx11 : 720 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX11>, 721 VOP3e_gfx11<{0, 1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 722 } 723 multiclass VOP1_Real_dpp_gfx11<bits<9> op, string opName = NAME> { 724 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 725 def _dpp_gfx11 : VOP1_DPP16<op{7-0}, !cast<VOP1_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX11> { 726 let DecoderNamespace = "DPPGFX11"; 727 } 728 } 729 multiclass VOP1_Real_dpp_with_name_gfx11<bits<9> op, string opName, 730 string asmName> { 731 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 732 let AsmString = asmName # ps.Pfl.AsmDPP16, DecoderNamespace = "DPPGFX11" in { 733 defm NAME : VOP1_Real_dpp_gfx11<op, opName>; 734 } 735 } 736 multiclass VOP1_Real_dpp8_gfx11<bits<9> op, string opName = NAME> { 737 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 738 def _dpp8_gfx11 : VOP1_DPP8<op{7-0}, ps> { 739 let DecoderNamespace = "DPP8GFX11"; 740 } 741 } 742 multiclass VOP1_Real_dpp8_with_name_gfx11<bits<9> op, string opName, 743 string asmName> { 744 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 745 let AsmString = asmName # ps.Pfl.AsmDPP8, DecoderNamespace = "DPP8GFX11" in { 746 defm NAME : VOP1_Real_dpp8_gfx11<op, opName>; 747 } 748 } 749} // End AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" 750 751multiclass VOP1_Realtriple_e64_gfx11<bits<9> op> { 752 defm NAME : VOP3_Realtriple_gfx11<{0, 1, 1, op{6-0}}, /*isSingle=*/ 0, NAME>; 753} 754multiclass VOP1_Realtriple_e64_with_name_gfx11<bits<9> op, string opName, 755 string asmName> { 756 defm NAME : VOP3_Realtriple_with_name_gfx11<{0, 1, 1, op{6-0}}, opName, 757 asmName>; 758} 759 760multiclass VOP1_Real_FULL_gfx11<bits<9> op> : 761 VOP1_Real_e32_gfx11<op>, VOP1_Realtriple_e64_gfx11<op>, 762 VOP1_Real_dpp_gfx11<op>, VOP1_Real_dpp8_gfx11<op>; 763 764multiclass VOP1_Real_NO_VOP3_with_name_gfx11<bits<9> op, string opName, 765 string asmName> { 766 defm NAME : VOP1_Real_e32_with_name_gfx11<op, opName, asmName>, 767 VOP1_Real_dpp_with_name_gfx11<op, opName, asmName>, 768 VOP1_Real_dpp8_with_name_gfx11<op, opName, asmName>; 769 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 770 def gfx11_alias : MnemonicAlias<ps.Mnemonic, asmName>, 771 Requires<[isGFX11Plus]>; 772} 773 774multiclass VOP1_Real_FULL_with_name_gfx11<bits<9> op, string opName, 775 string asmName> : 776 VOP1_Real_NO_VOP3_with_name_gfx11<op, opName, asmName>, 777 VOP1_Realtriple_e64_with_name_gfx11<op, opName, asmName>; 778 779multiclass VOP1_Real_FULL_t16_gfx11<bits<9> op, string asmName, 780 string opName = NAME> : 781 VOP1_Real_FULL_with_name_gfx11<op, opName, asmName>; 782 783multiclass VOP1_Real_NO_DPP_gfx11<bits<9> op> : 784 VOP1_Real_e32_gfx11<op>, VOP1_Real_e64_gfx11<op>; 785 786defm V_CVT_NEAREST_I32_F32 : VOP1_Real_FULL_with_name_gfx11<0x00c, 787 "V_CVT_RPI_I32_F32", "v_cvt_nearest_i32_f32">; 788defm V_CVT_FLOOR_I32_F32 : VOP1_Real_FULL_with_name_gfx11<0x00d, 789 "V_CVT_FLR_I32_F32", "v_cvt_floor_i32_f32">; 790defm V_CLZ_I32_U32 : VOP1_Real_FULL_with_name_gfx11<0x039, 791 "V_FFBH_U32", "v_clz_i32_u32">; 792defm V_CTZ_I32_B32 : VOP1_Real_FULL_with_name_gfx11<0x03a, 793 "V_FFBL_B32", "v_ctz_i32_b32">; 794defm V_CLS_I32 : VOP1_Real_FULL_with_name_gfx11<0x03b, 795 "V_FFBH_I32", "v_cls_i32">; 796defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11<0x067>; 797defm V_NOT_B16_t16 : VOP1_Real_FULL_t16_gfx11<0x069, "v_not_b16">; 798defm V_CVT_I32_I16_t16 : VOP1_Real_FULL_t16_gfx11<0x06a, "v_cvt_i32_i16">; 799defm V_CVT_U32_U16_t16 : VOP1_Real_FULL_t16_gfx11<0x06b, "v_cvt_u32_u16">; 800 801defm V_CVT_F16_U16_t16 : VOP1_Real_FULL_t16_gfx11<0x050, "v_cvt_f16_u16">; 802defm V_CVT_F16_I16_t16 : VOP1_Real_FULL_t16_gfx11<0x051, "v_cvt_f16_i16">; 803defm V_CVT_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x052, "v_cvt_u16_f16">; 804defm V_CVT_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x053, "v_cvt_i16_f16">; 805defm V_RCP_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x054, "v_rcp_f16">; 806defm V_SQRT_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x055, "v_sqrt_f16">; 807defm V_RSQ_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x056, "v_rsq_f16">; 808defm V_LOG_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x057, "v_log_f16">; 809defm V_EXP_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x058, "v_exp_f16">; 810defm V_FREXP_MANT_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x059, "v_frexp_mant_f16">; 811defm V_FREXP_EXP_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x05a, "v_frexp_exp_i16_f16">; 812defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x05b, "v_floor_f16">; 813defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x05c, "v_ceil_f16">; 814defm V_TRUNC_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x05d, "v_trunc_f16">; 815defm V_RNDNE_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x05e, "v_rndne_f16">; 816defm V_FRACT_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x05f, "v_fract_f16">; 817defm V_SIN_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x060, "v_sin_f16">; 818defm V_COS_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x061, "v_cos_f16">; 819defm V_SAT_PK_U8_I16_t16 : VOP1_Real_FULL_t16_gfx11<0x062, "v_sat_pk_u8_i16">; 820defm V_CVT_NORM_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x063, "v_cvt_norm_i16_f16">; 821defm V_CVT_NORM_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x064, "v_cvt_norm_u16_f16">; 822 823defm V_CVT_F16_F32_t16 : VOP1_Real_FULL_t16_gfx11<0x00a, "v_cvt_f16_f32">; 824defm V_CVT_F32_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x00b, "v_cvt_f32_f16">; 825 826//===----------------------------------------------------------------------===// 827// GFX10. 828//===----------------------------------------------------------------------===// 829 830let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { 831 multiclass VOP1Only_Real_gfx10<bits<9> op> { 832 def _gfx10 : 833 VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.GFX10>, 834 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 835 } 836 multiclass VOP1_Real_e32_gfx10<bits<9> op> { 837 def _e32_gfx10 : 838 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>, 839 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 840 } 841 multiclass VOP1_Real_e64_gfx10<bits<9> op> { 842 def _e64_gfx10 : 843 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 844 VOP3e_gfx10<{0, 1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 845 } 846 multiclass VOP1_Real_sdwa_gfx10<bits<9> op> { 847 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 848 def _sdwa_gfx10 : 849 VOP_SDWA10_Real<!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 850 VOP1_SDWA9Ae<op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 851 let DecoderNamespace = "SDWA10"; 852 } 853 } 854 multiclass VOP1_Real_dpp_gfx10<bits<9> op> { 855 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then 856 def _dpp_gfx10 : VOP1_DPP16<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX10> { 857 let DecoderNamespace = "SDWA10"; 858 } 859 } 860 multiclass VOP1_Real_dpp8_gfx10<bits<9> op> { 861 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then 862 def _dpp8_gfx10 : VOP1_DPP8<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")> { 863 let DecoderNamespace = "DPP8"; 864 } 865 } 866} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" 867 868multiclass VOP1_Real_gfx10<bits<9> op> : 869 VOP1_Real_e32_gfx10<op>, VOP1_Real_e64_gfx10<op>, 870 VOP1_Real_sdwa_gfx10<op>, VOP1_Real_dpp_gfx10<op>, 871 VOP1_Real_dpp8_gfx10<op>; 872 873multiclass VOP1_Real_gfx10_FULL_gfx11<bits<9> op> : 874 VOP1_Real_gfx10<op>, VOP1_Real_FULL_gfx11<op>; 875 876multiclass VOP1_Real_gfx10_NO_DPP_gfx11<bits<9> op> : 877 VOP1_Real_gfx10<op>, VOP1_Real_NO_DPP_gfx11<op>; 878 879multiclass VOP1Only_Real_gfx10_gfx11<bits<9> op> : 880 VOP1Only_Real_gfx10<op>, VOP1Only_Real_gfx11<op>; 881 882defm V_PIPEFLUSH : VOP1_Real_gfx10_NO_DPP_gfx11<0x01b>; 883defm V_MOVRELSD_2_B32 : VOP1_Real_gfx10_FULL_gfx11<0x048>; 884defm V_CVT_F16_U16 : VOP1_Real_gfx10<0x050>; 885defm V_CVT_F16_I16 : VOP1_Real_gfx10<0x051>; 886defm V_CVT_U16_F16 : VOP1_Real_gfx10<0x052>; 887defm V_CVT_I16_F16 : VOP1_Real_gfx10<0x053>; 888defm V_RCP_F16 : VOP1_Real_gfx10<0x054>; 889defm V_SQRT_F16 : VOP1_Real_gfx10<0x055>; 890defm V_RSQ_F16 : VOP1_Real_gfx10<0x056>; 891defm V_LOG_F16 : VOP1_Real_gfx10<0x057>; 892defm V_EXP_F16 : VOP1_Real_gfx10<0x058>; 893defm V_FREXP_MANT_F16 : VOP1_Real_gfx10<0x059>; 894defm V_FREXP_EXP_I16_F16 : VOP1_Real_gfx10<0x05a>; 895defm V_FLOOR_F16 : VOP1_Real_gfx10<0x05b>; 896defm V_CEIL_F16 : VOP1_Real_gfx10<0x05c>; 897defm V_TRUNC_F16 : VOP1_Real_gfx10<0x05d>; 898defm V_RNDNE_F16 : VOP1_Real_gfx10<0x05e>; 899defm V_FRACT_F16 : VOP1_Real_gfx10<0x05f>; 900defm V_SIN_F16 : VOP1_Real_gfx10<0x060>; 901defm V_COS_F16 : VOP1_Real_gfx10<0x061>; 902defm V_SAT_PK_U8_I16 : VOP1_Real_gfx10<0x062>; 903defm V_CVT_NORM_I16_F16 : VOP1_Real_gfx10<0x063>; 904defm V_CVT_NORM_U16_F16 : VOP1_Real_gfx10<0x064>; 905 906defm V_SWAP_B32 : VOP1Only_Real_gfx10_gfx11<0x065>; 907defm V_SWAPREL_B32 : VOP1Only_Real_gfx10_gfx11<0x068>; 908 909//===----------------------------------------------------------------------===// 910// GFX7, GFX10. 911//===----------------------------------------------------------------------===// 912 913let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in { 914 multiclass VOP1_Real_e32_gfx7<bits<9> op> { 915 def _e32_gfx7 : 916 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>, 917 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 918 } 919 multiclass VOP1_Real_e64_gfx7<bits<9> op> { 920 def _e64_gfx7 : 921 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 922 VOP3e_gfx6_gfx7<{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 923 } 924} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" 925 926multiclass VOP1_Real_gfx7<bits<9> op> : 927 VOP1_Real_e32_gfx7<op>, VOP1_Real_e64_gfx7<op>; 928 929multiclass VOP1_Real_gfx7_gfx10<bits<9> op> : 930 VOP1_Real_gfx7<op>, VOP1_Real_gfx10<op>; 931 932multiclass VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<bits<9> op> : 933 VOP1_Real_gfx7_gfx10<op>, VOP1_Real_NO_DPP_gfx11<op>; 934 935defm V_LOG_LEGACY_F32 : VOP1_Real_gfx7<0x045>; 936defm V_EXP_LEGACY_F32 : VOP1_Real_gfx7<0x046>; 937 938defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<0x017>; 939defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<0x018>; 940defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<0x019>; 941defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<0x01a>; 942 943//===----------------------------------------------------------------------===// 944// GFX6, GFX7, GFX10, GFX11. 945//===----------------------------------------------------------------------===// 946 947let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 948 multiclass VOP1_Real_e32_gfx6_gfx7<bits<9> op> { 949 def _e32_gfx6_gfx7 : 950 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>, 951 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 952 } 953 multiclass VOP1_Real_e64_gfx6_gfx7<bits<9> op> { 954 def _e64_gfx6_gfx7 : 955 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 956 VOP3e_gfx6_gfx7<{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 957 } 958} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 959 960multiclass VOP1_Real_gfx6_gfx7<bits<9> op> : 961 VOP1_Real_e32_gfx6_gfx7<op>, VOP1_Real_e64_gfx6_gfx7<op>; 962 963multiclass VOP1_Real_gfx6_gfx7_gfx10<bits<9> op> : 964 VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10<op>; 965 966multiclass VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<bits<9> op> : 967 VOP1_Real_gfx6_gfx7_gfx10<op>, VOP1_Real_FULL_gfx11<op>; 968 969multiclass VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<bits<9> op> : 970 VOP1_Real_gfx6_gfx7_gfx10<op>, VOP1_Real_NO_DPP_gfx11<op>; 971 972defm V_LOG_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x026>; 973defm V_RCP_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x028>; 974defm V_RCP_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x029>; 975defm V_RSQ_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x02c>; 976defm V_RSQ_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x02d>; 977defm V_RCP_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x030>; 978defm V_RSQ_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x032>; 979 980defm V_NOP : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x000>; 981defm V_MOV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x001>; 982defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x003>; 983defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x004>; 984defm V_CVT_F32_I32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x005>; 985defm V_CVT_F32_U32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x006>; 986defm V_CVT_U32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x007>; 987defm V_CVT_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x008>; 988defm V_CVT_F16_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00a>; 989defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10<0x00b>; 990defm V_CVT_RPI_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00c>; 991defm V_CVT_FLR_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00d>; 992defm V_CVT_OFF_F32_I4 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x00e>; 993defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x00f>; 994defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x010>; 995defm V_CVT_F32_UBYTE0 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x011>; 996defm V_CVT_F32_UBYTE1 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x012>; 997defm V_CVT_F32_UBYTE2 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x013>; 998defm V_CVT_F32_UBYTE3 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x014>; 999defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x015>; 1000defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x016>; 1001defm V_FRACT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x020>; 1002defm V_TRUNC_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x021>; 1003defm V_CEIL_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x022>; 1004defm V_RNDNE_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x023>; 1005defm V_FLOOR_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x024>; 1006defm V_EXP_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x025>; 1007defm V_LOG_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x027>; 1008defm V_RCP_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x02a>; 1009defm V_RCP_IFLAG_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x02b>; 1010defm V_RSQ_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x02e>; 1011defm V_RCP_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x02f>; 1012defm V_RSQ_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x031>; 1013defm V_SQRT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x033>; 1014defm V_SQRT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x034>; 1015defm V_SIN_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x035>; 1016defm V_COS_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x036>; 1017defm V_NOT_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x037>; 1018defm V_BFREV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x038>; 1019defm V_FFBH_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x039>; 1020defm V_FFBL_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x03a>; 1021defm V_FFBH_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x03b>; 1022defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x03c>; 1023defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x03d>; 1024defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x03e>; 1025defm V_FREXP_EXP_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x03f>; 1026defm V_FREXP_MANT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x040>; 1027defm V_CLREXCP : VOP1_Real_gfx6_gfx7_gfx10<0x041>; 1028defm V_MOVRELD_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x042>; 1029defm V_MOVRELS_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x043>; 1030defm V_MOVRELSD_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x044>; 1031 1032//===----------------------------------------------------------------------===// 1033// GFX8, GFX9 (VI). 1034//===----------------------------------------------------------------------===// 1035 1036class VOP1_DPPe <bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile P = ps.Pfl> : 1037 VOP_DPPe <P> { 1038 bits<8> vdst; 1039 let Inst{8-0} = 0xfa; // dpp 1040 let Inst{16-9} = op; 1041 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 1042 let Inst{31-25} = 0x3f; //encoding 1043} 1044 1045multiclass VOP1Only_Real_vi <bits<10> op> { 1046 let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 1047 def _vi : 1048 VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.VI>, 1049 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 1050 } 1051} 1052 1053multiclass VOP1_Real_e32e64_vi <bits<10> op> { 1054 let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 1055 def _e32_vi : 1056 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>, 1057 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 1058 def _e64_vi : 1059 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 1060 VOP3e_vi <!add(0x140, op), !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1061 } 1062} 1063 1064multiclass VOP1_Real_vi <bits<10> op> { 1065 defm NAME : VOP1_Real_e32e64_vi <op>; 1066 1067 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA then 1068 def _sdwa_vi : 1069 VOP_SDWA_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 1070 VOP1_SDWAe <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1071 1072 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 1073 def _sdwa_gfx9 : 1074 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 1075 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1076 1077 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 1078 def _dpp_vi : 1079 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>, 1080 VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; 1081} 1082 1083defm V_NOP : VOP1_Real_vi <0x0>; 1084defm V_MOV_B32 : VOP1_Real_vi <0x1>; 1085defm V_CVT_I32_F64 : VOP1_Real_vi <0x3>; 1086defm V_CVT_F64_I32 : VOP1_Real_vi <0x4>; 1087defm V_CVT_F32_I32 : VOP1_Real_vi <0x5>; 1088defm V_CVT_F32_U32 : VOP1_Real_vi <0x6>; 1089defm V_CVT_U32_F32 : VOP1_Real_vi <0x7>; 1090defm V_CVT_I32_F32 : VOP1_Real_vi <0x8>; 1091defm V_CVT_F16_F32 : VOP1_Real_vi <0xa>; 1092defm V_CVT_F32_F16 : VOP1_Real_vi <0xb>; 1093defm V_CVT_RPI_I32_F32 : VOP1_Real_vi <0xc>; 1094defm V_CVT_FLR_I32_F32 : VOP1_Real_vi <0xd>; 1095defm V_CVT_OFF_F32_I4 : VOP1_Real_vi <0xe>; 1096defm V_CVT_F32_F64 : VOP1_Real_vi <0xf>; 1097defm V_CVT_F64_F32 : VOP1_Real_vi <0x10>; 1098defm V_CVT_F32_UBYTE0 : VOP1_Real_vi <0x11>; 1099defm V_CVT_F32_UBYTE1 : VOP1_Real_vi <0x12>; 1100defm V_CVT_F32_UBYTE2 : VOP1_Real_vi <0x13>; 1101defm V_CVT_F32_UBYTE3 : VOP1_Real_vi <0x14>; 1102defm V_CVT_U32_F64 : VOP1_Real_vi <0x15>; 1103defm V_CVT_F64_U32 : VOP1_Real_vi <0x16>; 1104defm V_FRACT_F32 : VOP1_Real_vi <0x1b>; 1105defm V_TRUNC_F32 : VOP1_Real_vi <0x1c>; 1106defm V_CEIL_F32 : VOP1_Real_vi <0x1d>; 1107defm V_RNDNE_F32 : VOP1_Real_vi <0x1e>; 1108defm V_FLOOR_F32 : VOP1_Real_vi <0x1f>; 1109defm V_EXP_F32 : VOP1_Real_vi <0x20>; 1110defm V_LOG_F32 : VOP1_Real_vi <0x21>; 1111defm V_RCP_F32 : VOP1_Real_vi <0x22>; 1112defm V_RCP_IFLAG_F32 : VOP1_Real_vi <0x23>; 1113defm V_RSQ_F32 : VOP1_Real_vi <0x24>; 1114defm V_RCP_F64 : VOP1_Real_vi <0x25>; 1115defm V_RSQ_F64 : VOP1_Real_vi <0x26>; 1116defm V_SQRT_F32 : VOP1_Real_vi <0x27>; 1117defm V_SQRT_F64 : VOP1_Real_vi <0x28>; 1118defm V_SIN_F32 : VOP1_Real_vi <0x29>; 1119defm V_COS_F32 : VOP1_Real_vi <0x2a>; 1120defm V_NOT_B32 : VOP1_Real_vi <0x2b>; 1121defm V_BFREV_B32 : VOP1_Real_vi <0x2c>; 1122defm V_FFBH_U32 : VOP1_Real_vi <0x2d>; 1123defm V_FFBL_B32 : VOP1_Real_vi <0x2e>; 1124defm V_FFBH_I32 : VOP1_Real_vi <0x2f>; 1125defm V_FREXP_EXP_I32_F64 : VOP1_Real_vi <0x30>; 1126defm V_FREXP_MANT_F64 : VOP1_Real_vi <0x31>; 1127defm V_FRACT_F64 : VOP1_Real_vi <0x32>; 1128defm V_FREXP_EXP_I32_F32 : VOP1_Real_vi <0x33>; 1129defm V_FREXP_MANT_F32 : VOP1_Real_vi <0x34>; 1130defm V_CLREXCP : VOP1_Real_vi <0x35>; 1131defm V_MOVRELD_B32 : VOP1_Real_e32e64_vi <0x36>; 1132defm V_MOVRELS_B32 : VOP1_Real_e32e64_vi <0x37>; 1133defm V_MOVRELSD_B32 : VOP1_Real_e32e64_vi <0x38>; 1134defm V_TRUNC_F64 : VOP1_Real_vi <0x17>; 1135defm V_CEIL_F64 : VOP1_Real_vi <0x18>; 1136defm V_FLOOR_F64 : VOP1_Real_vi <0x1A>; 1137defm V_RNDNE_F64 : VOP1_Real_vi <0x19>; 1138defm V_LOG_LEGACY_F32 : VOP1_Real_vi <0x4c>; 1139defm V_EXP_LEGACY_F32 : VOP1_Real_vi <0x4b>; 1140defm V_CVT_F16_U16 : VOP1_Real_vi <0x39>; 1141defm V_CVT_F16_I16 : VOP1_Real_vi <0x3a>; 1142defm V_CVT_U16_F16 : VOP1_Real_vi <0x3b>; 1143defm V_CVT_I16_F16 : VOP1_Real_vi <0x3c>; 1144defm V_RCP_F16 : VOP1_Real_vi <0x3d>; 1145defm V_SQRT_F16 : VOP1_Real_vi <0x3e>; 1146defm V_RSQ_F16 : VOP1_Real_vi <0x3f>; 1147defm V_LOG_F16 : VOP1_Real_vi <0x40>; 1148defm V_EXP_F16 : VOP1_Real_vi <0x41>; 1149defm V_FREXP_MANT_F16 : VOP1_Real_vi <0x42>; 1150defm V_FREXP_EXP_I16_F16 : VOP1_Real_vi <0x43>; 1151defm V_FLOOR_F16 : VOP1_Real_vi <0x44>; 1152defm V_CEIL_F16 : VOP1_Real_vi <0x45>; 1153defm V_TRUNC_F16 : VOP1_Real_vi <0x46>; 1154defm V_RNDNE_F16 : VOP1_Real_vi <0x47>; 1155defm V_FRACT_F16 : VOP1_Real_vi <0x48>; 1156defm V_SIN_F16 : VOP1_Real_vi <0x49>; 1157defm V_COS_F16 : VOP1_Real_vi <0x4a>; 1158defm V_SWAP_B32 : VOP1Only_Real_vi <0x51>; 1159 1160defm V_SAT_PK_U8_I16 : VOP1_Real_vi<0x4f>; 1161defm V_CVT_NORM_I16_F16 : VOP1_Real_vi<0x4d>; 1162defm V_CVT_NORM_U16_F16 : VOP1_Real_vi<0x4e>; 1163 1164defm V_ACCVGPR_MOV_B32 : VOP1Only_Real_vi<0x52>; 1165 1166let VOP1 = 1, SubtargetPredicate = isGFX8GFX9, Uses = [EXEC, M0] in { 1167 1168// Copy of v_mov_b32 with $vdst as a use operand for use with VGPR 1169// indexing mode. vdst can't be treated as a def for codegen purposes, 1170// and an implicit use and def of the super register should be added. 1171def V_MOV_B32_indirect_write : VPseudoInstSI<(outs), 1172 (ins getVALUDstForVT<i32>.ret:$vdst, getVOPSrc0ForVT<i32, 0>.ret:$src0)>, 1173 PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT<i32>.ret:$vdst, 1174 getVOPSrc0ForVT<i32, 0>.ret:$src0)>; 1175 1176// Copy of v_mov_b32 for use with VGPR indexing mode. An implicit use of the 1177// super register should be added. 1178def V_MOV_B32_indirect_read : VPseudoInstSI< 1179 (outs getVALUDstForVT<i32>.ret:$vdst), 1180 (ins getVOPSrc0ForVT<i32, 0>.ret:$src0)>, 1181 PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT<i32>.ret:$vdst, 1182 getVOPSrc0ForVT<i32, 0>.ret:$src0)>; 1183 1184} // End VOP1 = 1, SubtargetPredicate = isGFX8GFX9, Uses = [M0] 1185 1186let OtherPredicates = [isGFX8Plus] in { 1187 1188def : GCNPat < 1189 (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, 1190 timm:$bank_mask, timm:$bound_ctrl)), 1191 (V_MOV_B32_dpp VGPR_32:$src, VGPR_32:$src, (as_i32timm $dpp_ctrl), 1192 (as_i32timm $row_mask), (as_i32timm $bank_mask), 1193 (as_i1timm $bound_ctrl)) 1194>; 1195 1196def : GCNPat < 1197 (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl, 1198 timm:$row_mask, timm:$bank_mask, 1199 timm:$bound_ctrl)), 1200 (V_MOV_B32_dpp VGPR_32:$old, VGPR_32:$src, (as_i32timm $dpp_ctrl), 1201 (as_i32timm $row_mask), (as_i32timm $bank_mask), 1202 (as_i1timm $bound_ctrl)) 1203>; 1204 1205} // End OtherPredicates = [isGFX8Plus] 1206 1207let OtherPredicates = [isGFX8Plus] in { 1208def : GCNPat< 1209 (i32 (anyext i16:$src)), 1210 (COPY $src) 1211>; 1212 1213def : GCNPat< 1214 (i64 (anyext i16:$src)), 1215 (REG_SEQUENCE VReg_64, 1216 (i32 (COPY $src)), sub0, 1217 (V_MOV_B32_e32 (i32 0)), sub1) 1218>; 1219 1220def : GCNPat< 1221 (i16 (trunc i32:$src)), 1222 (COPY $src) 1223>; 1224 1225def : GCNPat < 1226 (i16 (trunc i64:$src)), 1227 (EXTRACT_SUBREG $src, sub0) 1228>; 1229 1230} // End OtherPredicates = [isGFX8Plus] 1231 1232//===----------------------------------------------------------------------===// 1233// GFX9 1234//===----------------------------------------------------------------------===// 1235 1236multiclass VOP1_Real_gfx9 <bits<10> op> { 1237 let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in { 1238 defm NAME : VOP1_Real_e32e64_vi <op>; 1239 } 1240 1241 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 1242 def _sdwa_gfx9 : 1243 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 1244 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1245 1246 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 1247 def _dpp_gfx9 : 1248 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, 1249 VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; 1250 1251} 1252 1253multiclass VOP1_Real_NoDstSel_SDWA_gfx9 <bits<10> op> { 1254 let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in { 1255 defm NAME : VOP1_Real_e32e64_vi <op>; 1256 } 1257 1258 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 1259 def _sdwa_gfx9 : 1260 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 1261 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 1262 let Inst{42-40} = 6; 1263 } 1264 1265 if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 1266 def _dpp_gfx9 : 1267 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, 1268 VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; 1269} 1270 1271defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>; 1272 1273let AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX9" in 1274defm V_MOV_B64 : VOP1_Real_gfx9 <0x38>; 1275 1276let OtherPredicates = [HasFP8Insts] in { 1277defm V_CVT_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x54>; 1278defm V_CVT_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x55>; 1279defm V_CVT_PK_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x56>; 1280defm V_CVT_PK_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x57>; 1281} 1282 1283//===----------------------------------------------------------------------===// 1284// GFX10 1285//===----------------------------------------------------------------------===// 1286 1287let OtherPredicates = [isGFX10Only] in { 1288def : GCNPat < 1289 (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)), 1290 (V_MOV_B32_dpp8_gfx10 VGPR_32:$src, VGPR_32:$src, 1291 (as_i32timm $dpp8), (i32 DPP8Mode.FI_0)) 1292>; 1293} // End OtherPredicates = [isGFX10Only] 1294 1295//===----------------------------------------------------------------------===// 1296// GFX11 1297//===----------------------------------------------------------------------===// 1298 1299let OtherPredicates = [isGFX11Only] in { 1300def : GCNPat < 1301 (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)), 1302 (V_MOV_B32_dpp8_gfx11 VGPR_32:$src, VGPR_32:$src, 1303 (as_i32timm $dpp8), (i32 DPP8Mode.FI_0)) 1304>; 1305} // End OtherPredicates = [isGFX11Only] 1306