1//===-- VOP1Instructions.td - Vector Instruction Definitions --------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10// VOP1 Classes 11//===----------------------------------------------------------------------===// 12 13class VOP1e <bits<8> op, VOPProfile P> : Enc32 { 14 bits<8> vdst; 15 bits<9> src0; 16 17 let Inst{8-0} = !if(P.HasSrc0, src0{8-0}, ?); 18 let Inst{16-9} = op; 19 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 20 let Inst{31-25} = 0x3f; //encoding 21} 22 23class VOP1_SDWAe <bits<8> op, VOPProfile P> : VOP_SDWAe <P> { 24 bits<8> vdst; 25 26 let Inst{8-0} = 0xf9; // sdwa 27 let Inst{16-9} = op; 28 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 29 let Inst{31-25} = 0x3f; // encoding 30} 31 32class VOP1_SDWA9Ae <bits<8> op, VOPProfile P> : VOP_SDWA9Ae <P> { 33 bits<8> vdst; 34 35 let Inst{8-0} = 0xf9; // sdwa 36 let Inst{16-9} = op; 37 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 38 let Inst{31-25} = 0x3f; // encoding 39} 40 41class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP1Only = 0> : 42 VOP_Pseudo <opName, !if(VOP1Only, "", "_e32"), P, P.Outs32, P.Ins32, "", pattern> { 43 44 let AsmOperands = P.Asm32; 45 46 let Size = 4; 47 let mayLoad = 0; 48 let mayStore = 0; 49 let hasSideEffects = 0; 50 51 let ReadsModeReg = !or(isFloatType<P.DstVT>.ret, isFloatType<P.Src0VT>.ret); 52 53 let mayRaiseFPException = ReadsModeReg; 54 55 let VOP1 = 1; 56 let VALU = 1; 57 let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]); 58 59 let AsmVariantName = AMDGPUAsmVariants.Default; 60} 61 62class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemonic > : 63 VOP_Real <ps>, 64 InstSI <ps.OutOperandList, ps.InOperandList, real_name # ps.AsmOperands, []>, 65 SIMCInstr <ps.PseudoInstr, EncodingFamily> { 66 67 let VALU = 1; 68 let VOP1 = 1; 69 let isPseudo = 0; 70 let isCodeGenOnly = 0; 71 72 let Constraints = ps.Constraints; 73 let DisableEncoding = ps.DisableEncoding; 74 75 // copy relevant pseudo op flags 76 let SubtargetPredicate = ps.SubtargetPredicate; 77 let AsmMatchConverter = ps.AsmMatchConverter; 78 let AsmVariantName = ps.AsmVariantName; 79 let Constraints = ps.Constraints; 80 let DisableEncoding = ps.DisableEncoding; 81 let TSFlags = ps.TSFlags; 82 let UseNamedOperandTable = ps.UseNamedOperandTable; 83 let Uses = ps.Uses; 84 let Defs = ps.Defs; 85 let SchedRW = ps.SchedRW; 86 let mayLoad = ps.mayLoad; 87 let mayStore = ps.mayStore; 88 let TRANS = ps.TRANS; 89} 90 91class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 92 VOP_SDWA_Pseudo <OpName, P, pattern> { 93 let AsmMatchConverter = "cvtSdwaVOP1"; 94} 95 96class VOP1_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 97 VOP_DPP_Pseudo <OpName, P, pattern> { 98} 99 100class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { 101 list<dag> ret = 102 !if(P.HasModifiers, 103 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods P.Src0VT:$src0, i32:$src0_modifiers))))], 104 !if(P.HasOMod, 105 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3OMods P.Src0VT:$src0, 106 i1:$clamp, i32:$omod))))], 107 [(set P.DstVT:$vdst, (node P.Src0VT:$src0))] 108 ) 109 ); 110} 111 112multiclass VOP1Inst <string opName, VOPProfile P, 113 SDPatternOperator node = null_frag, int VOPDOp = -1> { 114 // We only want to set this on the basic, non-SDWA or DPP forms. 115 defvar should_mov_imm = !or(!eq(opName, "v_mov_b32"), 116 !eq(opName, "v_mov_b64")); 117 118 let isMoveImm = should_mov_imm in { 119 if !eq(VOPDOp, -1) then 120 def _e32 : VOP1_Pseudo <opName, P>; 121 else 122 // Only for V_MOV_B32 123 def _e32 : VOP1_Pseudo <opName, P>, VOPD_Component<VOPDOp, "v_mov_b32">; 124 def _e64 : VOP3InstBase <opName, P, node>; 125 } 126 127 foreach _ = BoolToList<P.HasExtSDWA>.ret in 128 def _sdwa : VOP1_SDWA_Pseudo <opName, P>; 129 130 foreach _ = BoolToList<P.HasExtDPP>.ret in 131 def _dpp : VOP1_DPP_Pseudo <opName, P>; 132 133 let SubtargetPredicate = isGFX11Plus in { 134 foreach _ = BoolToList<P.HasExtVOP3DPP>.ret in 135 def _e64_dpp : VOP3_DPP_Pseudo <opName, P>; 136 } // End SubtargetPredicate = isGFX11Plus 137 138 def : MnemonicAlias<opName#"_e32", opName>, LetDummies; 139 def : MnemonicAlias<opName#"_e64", opName>, LetDummies; 140 141 foreach _ = BoolToList<P.HasExtSDWA>.ret in 142 def : MnemonicAlias<opName#"_sdwa", opName>, LetDummies; 143 144 foreach _ = BoolToList<P.HasExtDPP>.ret in 145 def : MnemonicAlias<opName#"_dpp", opName, AMDGPUAsmVariants.DPP>, LetDummies; 146} 147 148multiclass VOP1Inst_t16<string opName, 149 VOPProfile P, 150 SDPatternOperator node = null_frag> { 151 let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts] in { 152 defm NAME : VOP1Inst<opName, P, node>; 153 } 154 let OtherPredicates = [HasTrue16BitInsts] in { 155 defm _t16 : VOP1Inst<opName#"_t16", VOPProfile_True16<P>, node>; 156 } 157} 158 159// Special profile for instructions which have clamp 160// and output modifiers (but have no input modifiers) 161class VOPProfileI2F<ValueType dstVt, ValueType srcVt> : 162 VOPProfile<[dstVt, srcVt, untyped, untyped]> { 163 164 let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod); 165 let InsVOP3Base = (ins Src0VOP3DPP:$src0, clampmod:$clamp, omod:$omod); 166 let AsmVOP3Base = "$vdst, $src0$clamp$omod"; 167 168 let HasModifiers = 0; 169 let HasClamp = 1; 170} 171 172class VOPProfileI2F_True16<ValueType dstVt, ValueType srcVt> : 173 VOPProfile_True16<VOPProfile<[dstVt, srcVt, untyped, untyped]>> { 174 175 let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod); 176 let InsVOP3Base = (ins Src0VOP3DPP:$src0, clampmod:$clamp, omod:$omod); 177 let AsmVOP3Base = "$vdst, $src0$clamp$omod"; 178 179 let HasModifiers = 0; 180 let HasClamp = 1; 181} 182 183def VOP1_F64_I32 : VOPProfileI2F <f64, i32>; 184def VOP1_F32_I32 : VOPProfileI2F <f32, i32>; 185def VOP1_F16_I16 : VOPProfileI2F <f16, i16>; 186def VOP1_F16_I16_t16 : VOPProfileI2F_True16 <f16, i16>; 187 188def VOP_NOP_PROFILE : VOPProfile <[untyped, untyped, untyped, untyped]>{ 189 let HasExtVOP3DPP = 0; 190} 191 192// OMod clears exceptions when set. OMod was always an operand, but its 193// now explicitly set. 194class VOP_SPECIAL_OMOD_PROF<ValueType dstVt, ValueType srcVt> : 195 VOPProfile<[dstVt, srcVt, untyped, untyped]> { 196 197 let HasOMod = 1; 198} 199def VOP_I32_F32_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f32>; 200def VOP_I32_F64_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f64>; 201def VOP_I16_F16_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i16, f16>; 202def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_True16<VOP_I16_F16> { 203 let HasOMod = 1; 204} 205 206//===----------------------------------------------------------------------===// 207// VOP1 Instructions 208//===----------------------------------------------------------------------===// 209 210let VOPAsmPrefer32Bit = 1 in { 211defm V_NOP : VOP1Inst <"v_nop", VOP_NOP_PROFILE>; 212} 213 214def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> { 215 let InsVOPDX = (ins Src0RC32:$src0X); 216 let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X); 217 let InsVOPDY = (ins Src0RC32:$src0Y); 218 let InsVOPDYDeferred = (ins VSrc_f32_Deferred:$src0Y); 219} 220 221let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 222defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>; 223 224let SubtargetPredicate = isGFX940Plus in 225defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>; 226} // End isMoveImm = 1 227 228// FIXME: Specify SchedRW for READFIRSTLANE_B32 229// TODO: Make profile for this, there is VOP3 encoding also 230def V_READFIRSTLANE_B32 : 231 InstSI <(outs SReg_32:$vdst), 232 (ins VRegOrLds_32:$src0), 233 "v_readfirstlane_b32 $vdst, $src0", 234 [(set i32:$vdst, (int_amdgcn_readfirstlane (i32 VRegOrLds_32:$src0)))]>, 235 Enc32 { 236 237 let isCodeGenOnly = 0; 238 let UseNamedOperandTable = 1; 239 240 let Size = 4; 241 let mayLoad = 0; 242 let mayStore = 0; 243 let hasSideEffects = 0; 244 245 let VOP1 = 1; 246 let VALU = 1; 247 let Uses = [EXEC]; 248 let isConvergent = 1; 249 250 bits<8> vdst; 251 bits<9> src0; 252 253 let Inst{8-0} = src0; 254 let Inst{16-9} = 0x2; 255 let Inst{24-17} = vdst; 256 let Inst{31-25} = 0x3f; //encoding 257} 258 259let isReMaterializable = 1 in { 260let SchedRW = [WriteDoubleCvt] in { 261// OMod clears exceptions when set in this instruction 262defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64_SPECIAL_OMOD, fp_to_sint>; 263 264let mayRaiseFPException = 0 in { 265defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>; 266} 267 268defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>; 269defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>; 270// OMod clears exceptions when set in this instruction 271defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64_SPECIAL_OMOD, fp_to_uint>; 272 273let mayRaiseFPException = 0 in { 274defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>; 275} 276 277} // End SchedRW = [WriteDoubleCvt] 278 279let SchedRW = [WriteFloatCvt] in { 280 281// XXX: Does this really not raise exceptions? The manual claims the 282// 16-bit ones can. 283let mayRaiseFPException = 0 in { 284defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>; 285defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>; 286} 287 288// OMod clears exceptions when set in these 2 instructions 289defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_uint>; 290defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_sint>; 291let FPDPRounding = 1, isReMaterializable = 0 in { 292 let OtherPredicates = [NotHasTrue16BitInsts] in 293 defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>; 294 let OtherPredicates = [HasTrue16BitInsts] in 295 defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_True16<VOP_F16_F32>, fpround>; 296} // End FPDPRounding = 1, isReMaterializable = 0 297 298let OtherPredicates = [NotHasTrue16BitInsts] in 299defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>; 300let OtherPredicates = [HasTrue16BitInsts] in 301defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_True16<VOP_F32_F16>, fpextend>; 302 303let ReadsModeReg = 0, mayRaiseFPException = 0 in { 304defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>; 305defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>; 306defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>; 307} // End ReadsModeReg = 0, mayRaiseFPException = 0 308} // End SchedRW = [WriteFloatCvt] 309 310let ReadsModeReg = 0, mayRaiseFPException = 0 in { 311defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>; 312defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>; 313defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP1_F32_I32, AMDGPUcvt_f32_ubyte2>; 314defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f32_ubyte3>; 315} // ReadsModeReg = 0, mayRaiseFPException = 0 316 317defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>; 318defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>; 319defm V_CEIL_F32 : VOP1Inst <"v_ceil_f32", VOP_F32_F32, fceil>; 320defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, frint>; 321defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>; 322 323let TRANS = 1, SchedRW = [WriteTrans32] in { 324defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, fexp2>; 325defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, flog2>; 326defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>; 327defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32, AMDGPUrcp_iflag>; 328defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>; 329defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, any_amdgcn_sqrt>; 330} // End TRANS = 1, SchedRW = [WriteTrans32] 331 332let TRANS = 1, SchedRW = [WriteTrans64] in { 333defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64, AMDGPUrcp>; 334defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64, AMDGPUrsq>; 335defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, any_amdgcn_sqrt>; 336} // End TRANS = 1, SchedRW = [WriteTrans64] 337 338let TRANS = 1, SchedRW = [WriteTrans32] in { 339defm V_SIN_F32 : VOP1Inst <"v_sin_f32", VOP_F32_F32, AMDGPUsin>; 340defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>; 341} // End TRANS = 1, SchedRW = [WriteTrans32] 342 343defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>; 344defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32, DivergentUnaryFrag<bitreverse>>; 345defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32, AMDGPUffbh_u32>; 346defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32, AMDGPUffbl_b32>; 347defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32, AMDGPUffbh_i32>; 348 349let SchedRW = [WriteDoubleAdd] in { 350defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64_SPECIAL_OMOD, int_amdgcn_frexp_exp>; 351defm V_FREXP_MANT_F64 : VOP1Inst <"v_frexp_mant_f64", VOP_F64_F64, int_amdgcn_frexp_mant>; 352let FPDPRounding = 1 in { 353defm V_FRACT_F64 : VOP1Inst <"v_fract_f64", VOP_F64_F64, AMDGPUfract>; 354} // End FPDPRounding = 1 355} // End SchedRW = [WriteDoubleAdd] 356 357defm V_FREXP_EXP_I32_F32 : VOP1Inst <"v_frexp_exp_i32_f32", VOP_I32_F32, int_amdgcn_frexp_exp>; 358defm V_FREXP_MANT_F32 : VOP1Inst <"v_frexp_mant_f32", VOP_F32_F32, int_amdgcn_frexp_mant>; 359} // End isReMaterializable = 1 360 361let VOPAsmPrefer32Bit = 1 in { 362defm V_CLREXCP : VOP1Inst <"v_clrexcp", VOP_NO_EXT<VOP_NONE>>; 363} 364 365// Restrict src0 to be VGPR 366def VOP_MOVRELS : VOPProfile<[i32, i32, untyped, untyped]> { 367 let Src0RC32 = VRegSrc_32; 368 let Src0RC64 = VRegSrc_32; 369} 370 371// Special case because there are no true output operands. Hack vdst 372// to be a src operand. The custom inserter must add a tied implicit 373// def and use of the super register since there seems to be no way to 374// add an implicit def of a virtual register in tablegen. 375class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, untyped]> { 376 let Src0RC32 = VOPDstOperand<VGPR_32>; 377 let Src0RC64 = VOPDstOperand<VGPR_32>; 378 379 let Outs = (outs); 380 let Ins32 = (ins Src0RC32:$vdst, Src1RC:$src0); 381 let Ins64 = (ins Src0RC64:$vdst, Src1RC:$src0); 382 let Asm32 = getAsm32<1, 1>.ret; 383 384 let OutsSDWA = (outs Src0RC32:$vdst); 385 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 386 clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, 387 src0_sel:$src0_sel); 388 let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret; 389 390 let OutsDPP = (outs Src0RC32:$vdst); 391 let InsDPP16 = (ins Src0RC32:$old, Src0RC32:$src0, 392 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 393 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl, FI:$fi); 394 let AsmDPP16 = getAsmDPP16<1, 1, 0>.ret; 395 let InsDPP8 = (ins Src0RC32:$old, Src0RC32:$src0, dpp8:$dpp8, FI:$fi); 396 let AsmDPP8 = getAsmDPP8<1, 1, 0>.ret; 397 398 let OutsVOP3DPP = (outs Src0RC64:$vdst); 399 let InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, Src0RC64, NumSrcArgs>.ret; 400 let InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, Src0RC64, NumSrcArgs>.ret; 401 let InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, Src0RC64, NumSrcArgs>.ret; 402 403 let AsmVOP3Base = 404 getAsmVOP3Base<NumSrcArgs, 1 /* HasDst */, HasClamp, 405 HasOpSel, HasOMod, IsVOP3P, HasModifiers, 406 HasModifiers, HasModifiers, HasModifiers>.ret; 407 408 let HasDst = 0; 409 let EmitDst = 1; // force vdst emission 410} 411 412def VOP_MOVRELD : VOP_MOVREL<VSrc_b32>; 413def VOP_MOVRELSD : VOP_MOVREL<VRegSrc_32>; 414 415let SubtargetPredicate = HasMovrel, Uses = [M0, EXEC] in { 416 // v_movreld_b32 is a special case because the destination output 417 // register is really a source. It isn't actually read (but may be 418 // written), and is only to provide the base register to start 419 // indexing from. Tablegen seems to not let you define an implicit 420 // virtual register output for the super register being written into, 421 // so this must have an implicit def of the register added to it. 422defm V_MOVRELD_B32 : VOP1Inst <"v_movreld_b32", VOP_MOVRELD>; 423defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_MOVRELS>; 424defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_MOVRELSD>; 425} // End Uses = [M0, EXEC] 426 427let isReMaterializable = 1 in { 428let SubtargetPredicate = isGFX6GFX7 in { 429 let TRANS = 1, SchedRW = [WriteTrans32] in { 430 defm V_LOG_CLAMP_F32 : 431 VOP1Inst<"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>; 432 defm V_RCP_CLAMP_F32 : 433 VOP1Inst<"v_rcp_clamp_f32", VOP_F32_F32>; 434 defm V_RCP_LEGACY_F32 : 435 VOP1Inst<"v_rcp_legacy_f32", VOP_F32_F32, AMDGPUrcp_legacy>; 436 defm V_RSQ_CLAMP_F32 : 437 VOP1Inst<"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>; 438 defm V_RSQ_LEGACY_F32 : 439 VOP1Inst<"v_rsq_legacy_f32", VOP_F32_F32, int_amdgcn_rsq_legacy>; 440 } // End TRANS = 1, SchedRW = [WriteTrans32] 441 442 let SchedRW = [WriteTrans64] in { 443 defm V_RCP_CLAMP_F64 : 444 VOP1Inst<"v_rcp_clamp_f64", VOP_F64_F64>; 445 defm V_RSQ_CLAMP_F64 : 446 VOP1Inst<"v_rsq_clamp_f64", VOP_F64_F64, AMDGPUrsq_clamp>; 447 } // End SchedRW = [WriteTrans64] 448} // End SubtargetPredicate = isGFX6GFX7 449 450let SubtargetPredicate = isGFX7GFX8GFX9 in { 451 let TRANS = 1, SchedRW = [WriteTrans32] in { 452 defm V_LOG_LEGACY_F32 : VOP1Inst<"v_log_legacy_f32", VOP_F32_F32>; 453 defm V_EXP_LEGACY_F32 : VOP1Inst<"v_exp_legacy_f32", VOP_F32_F32>; 454 } // End TRANS = 1, SchedRW = [WriteTrans32] 455} // End SubtargetPredicate = isGFX7GFX8GFX9 456 457let SubtargetPredicate = isGFX7Plus in { 458 let SchedRW = [WriteDoubleAdd] in { 459 defm V_TRUNC_F64 : VOP1Inst<"v_trunc_f64", VOP_F64_F64, ftrunc>; 460 defm V_CEIL_F64 : VOP1Inst<"v_ceil_f64", VOP_F64_F64, fceil>; 461 defm V_RNDNE_F64 : VOP1Inst<"v_rndne_f64", VOP_F64_F64, frint>; 462 defm V_FLOOR_F64 : VOP1Inst<"v_floor_f64", VOP_F64_F64, ffloor>; 463 } // End SchedRW = [WriteDoubleAdd] 464} // End SubtargetPredicate = isGFX7Plus 465} // End isReMaterializable = 1 466 467let FPDPRounding = 1 in { 468let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { 469defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>; 470defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>; 471} 472let OtherPredicates = [HasTrue16BitInsts] in { 473defm V_CVT_F16_U16_t16 : VOP1Inst <"v_cvt_f16_u16_t16", VOP1_F16_I16_t16, uint_to_fp>; 474defm V_CVT_F16_I16_t16 : VOP1Inst <"v_cvt_f16_i16_t16", VOP1_F16_I16_t16, sint_to_fp>; 475} 476} // End FPDPRounding = 1 477// OMod clears exceptions when set in these two instructions 478let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { 479defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_uint>; 480defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_sint>; 481} 482let OtherPredicates = [HasTrue16BitInsts] in { 483defm V_CVT_U16_F16_t16 : VOP1Inst <"v_cvt_u16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, fp_to_uint>; 484defm V_CVT_I16_F16_t16 : VOP1Inst <"v_cvt_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, fp_to_sint>; 485} 486let TRANS = 1, SchedRW = [WriteTrans32] in { 487defm V_RCP_F16 : VOP1Inst_t16 <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>; 488defm V_SQRT_F16 : VOP1Inst_t16 <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>; 489defm V_RSQ_F16 : VOP1Inst_t16 <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>; 490defm V_LOG_F16 : VOP1Inst_t16 <"v_log_f16", VOP_F16_F16, flog2>; 491defm V_EXP_F16 : VOP1Inst_t16 <"v_exp_f16", VOP_F16_F16, fexp2>; 492defm V_SIN_F16 : VOP1Inst_t16 <"v_sin_f16", VOP_F16_F16, AMDGPUsin>; 493defm V_COS_F16 : VOP1Inst_t16 <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; 494} // End TRANS = 1, SchedRW = [WriteTrans32] 495defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>; 496let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { 497defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16_SPECIAL_OMOD, int_amdgcn_frexp_exp>; 498} 499let OtherPredicates = [HasTrue16BitInsts] in { 500defm V_FREXP_EXP_I16_F16_t16 : VOP1Inst <"v_frexp_exp_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16, int_amdgcn_frexp_exp>; 501} 502defm V_FLOOR_F16 : VOP1Inst_t16 <"v_floor_f16", VOP_F16_F16, ffloor>; 503defm V_CEIL_F16 : VOP1Inst_t16 <"v_ceil_f16", VOP_F16_F16, fceil>; 504defm V_TRUNC_F16 : VOP1Inst_t16 <"v_trunc_f16", VOP_F16_F16, ftrunc>; 505defm V_RNDNE_F16 : VOP1Inst_t16 <"v_rndne_f16", VOP_F16_F16, frint>; 506let FPDPRounding = 1 in { 507defm V_FRACT_F16 : VOP1Inst_t16 <"v_fract_f16", VOP_F16_F16, AMDGPUfract>; 508} // End FPDPRounding = 1 509 510let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { 511def : GCNPat< 512 (f32 (f16_to_fp i16:$src)), 513 (V_CVT_F32_F16_e32 $src) 514>; 515def : GCNPat< 516 (i16 (AMDGPUfp_to_f16 f32:$src)), 517 (V_CVT_F16_F32_e32 $src) 518>; 519} 520let OtherPredicates = [HasTrue16BitInsts] in { 521def : GCNPat< 522 (f32 (f16_to_fp i16:$src)), 523 (V_CVT_F32_F16_t16_e32 $src) 524>; 525def : GCNPat< 526 (i16 (AMDGPUfp_to_f16 f32:$src)), 527 (V_CVT_F16_F32_t16_e32 $src) 528>; 529} 530 531def VOP_SWAP_I32 : VOPProfile<[i32, i32, i32, untyped]> { 532 let Outs32 = (outs VGPR_32:$vdst, VGPR_32:$vdst1); 533 let Ins32 = (ins VGPR_32:$src0, VGPR_32:$src1); 534 let Outs64 = Outs32; 535 let Asm32 = " $vdst, $src0"; 536 let Asm64 = ""; 537 let Ins64 = (ins); 538} 539 540let SubtargetPredicate = isGFX9Plus in { 541 def V_SWAP_B32 : VOP1_Pseudo<"v_swap_b32", VOP_SWAP_I32, [], 1> { 542 let Constraints = "$vdst = $src1, $vdst1 = $src0"; 543 let DisableEncoding = "$vdst1,$src1"; 544 let SchedRW = [Write64Bit, Write64Bit]; 545 } 546 547 let isReMaterializable = 1 in 548 defm V_SAT_PK_U8_I16 : VOP1Inst_t16<"v_sat_pk_u8_i16", VOP_I16_I32>; 549 550 let mayRaiseFPException = 0 in { 551 let OtherPredicates = [Has16BitInsts, NotHasTrue16BitInsts] in { 552 defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16_SPECIAL_OMOD>; 553 defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16_SPECIAL_OMOD>; 554 } 555 let OtherPredicates = [HasTrue16BitInsts] in { 556 defm V_CVT_NORM_I16_F16_t16 : VOP1Inst<"v_cvt_norm_i16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16>; 557 defm V_CVT_NORM_U16_F16_t16 : VOP1Inst<"v_cvt_norm_u16_f16_t16", VOP_I16_F16_SPECIAL_OMOD_t16>; 558 } 559 } // End mayRaiseFPException = 0 560} // End SubtargetPredicate = isGFX9Plus 561 562let SubtargetPredicate = isGFX9Only in { 563 defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>; 564} // End SubtargetPredicate = isGFX9Only 565 566class VOPProfile_Base_CVT_F32_F8<ValueType vt> : VOPProfileI2F <vt, i32> { 567 let HasExtSDWA = 1; 568 let HasExtSDWA9 = 1; 569 let HasExt = 1; 570 let DstRCSDWA = getVALUDstForVT<vt>.ret; 571 let InsSDWA = (ins Bin32SDWAInputMods:$src0_modifiers, Src0SDWA:$src0, 572 clampmod:$clamp, omod:$omod, src0_sel:$src0_sel); 573 let AsmSDWA = "$vdst, $src0_modifiers$clamp$omod $src0_sel"; // No dst_sel 574 let AsmSDWA9 = AsmSDWA; 575 let EmitDstSel = 0; 576} 577 578def VOPProfileCVT_F32_F8 : VOPProfile_Base_CVT_F32_F8 <f32>; 579def VOPProfileCVT_PK_F32_F8 : VOPProfile_Base_CVT_F32_F8 <v2f32>; 580 581let SubtargetPredicate = HasFP8Insts, mayRaiseFPException = 0, 582 SchedRW = [WriteFloatCvt] in { 583 defm V_CVT_F32_FP8 : VOP1Inst<"v_cvt_f32_fp8", VOPProfileCVT_F32_F8>; 584 defm V_CVT_F32_BF8 : VOP1Inst<"v_cvt_f32_bf8", VOPProfileCVT_F32_F8>; 585 defm V_CVT_PK_F32_FP8 : VOP1Inst<"v_cvt_pk_f32_fp8", VOPProfileCVT_PK_F32_F8>; 586 defm V_CVT_PK_F32_BF8 : VOP1Inst<"v_cvt_pk_f32_bf8", VOPProfileCVT_PK_F32_F8>; 587} 588 589class Cvt_F32_F8_Pat<SDPatternOperator node, int index, 590 VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat< 591 (f32 (node i32:$src, index)), 592 !if (index, 593 (inst_sdwa 0, $src, 0, 0, index), 594 (inst_e32 $src)) 595>; 596 597foreach Index = [0, 1, 2, 3] in { 598 def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index, 599 V_CVT_F32_FP8_e32, V_CVT_F32_FP8_sdwa>; 600 def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index, 601 V_CVT_F32_BF8_e32, V_CVT_F32_BF8_sdwa>; 602} 603 604class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index, 605 VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat< 606 (v2f32 (node i32:$src, index)), 607 !if (index, 608 (inst_sdwa 0, $src, 0, 0, SDWA.WORD_1), 609 (inst_e32 $src)) 610>; 611 612foreach Index = [0, -1] in { 613 def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index, 614 V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>; 615 def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_bf8, Index, 616 V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_sdwa>; 617} 618 619let SubtargetPredicate = isGFX10Plus in { 620 defm V_PIPEFLUSH : VOP1Inst<"v_pipeflush", VOP_NO_EXT<VOP_NONE>>; 621 622 let Uses = [M0] in { 623 defm V_MOVRELSD_2_B32 : 624 VOP1Inst<"v_movrelsd_2_b32", VOP_MOVRELSD>; 625 626 def V_SWAPREL_B32 : VOP1_Pseudo<"v_swaprel_b32", VOP_SWAP_I32, [], 1> { 627 let Constraints = "$vdst = $src1, $vdst1 = $src0"; 628 let DisableEncoding = "$vdst1,$src1"; 629 let SchedRW = [Write64Bit, Write64Bit]; 630 } 631 } // End Uses = [M0] 632} // End SubtargetPredicate = isGFX10Plus 633 634def VOPProfileAccMov : VOP_NO_EXT<VOP_I32_I32> { 635 let DstRC = RegisterOperand<AGPR_32>; 636 let Src0RC32 = RegisterOperand<AGPR_32>; 637 let Asm32 = " $vdst, $src0"; 638} 639 640def V_ACCVGPR_MOV_B32 : VOP1_Pseudo<"v_accvgpr_mov_b32", VOPProfileAccMov, [], 1> { 641 let SubtargetPredicate = isGFX90APlus; 642 let isReMaterializable = 1; 643 let isAsCheapAsAMove = 1; 644} 645 646let SubtargetPredicate = isGFX11Plus in { 647 // Restrict src0 to be VGPR 648 def V_PERMLANE64_B32 : VOP1_Pseudo<"v_permlane64_b32", VOP_MOVRELS, 649 getVOP1Pat64<int_amdgcn_permlane64, 650 VOP_MOVRELS>.ret, 651 /*VOP1Only=*/ 1>; 652 defm V_NOT_B16 : VOP1Inst_t16<"v_not_b16", VOP_I16_I16>; 653 defm V_CVT_I32_I16 : VOP1Inst_t16<"v_cvt_i32_i16", VOP_I32_I16>; 654 defm V_CVT_U32_U16 : VOP1Inst_t16<"v_cvt_u32_u16", VOP_I32_I16>; 655} // End SubtargetPredicate = isGFX11Plus 656 657//===----------------------------------------------------------------------===// 658// Target-specific instruction encodings. 659//===----------------------------------------------------------------------===// 660 661class VOP1_DPP<bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP16 = 0> : 662 VOP_DPP<ps.OpName, p, isDPP16> { 663 let hasSideEffects = ps.hasSideEffects; 664 let Defs = ps.Defs; 665 let SchedRW = ps.SchedRW; 666 let Uses = ps.Uses; 667 let TRANS = ps.TRANS; 668 669 bits<8> vdst; 670 let Inst{8-0} = 0xfa; 671 let Inst{16-9} = op; 672 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 673 let Inst{31-25} = 0x3f; 674} 675 676class VOP1_DPP16<bits<8> op, VOP1_DPP_Pseudo ps, int subtarget, VOPProfile p = ps.Pfl> : 677 VOP1_DPP<op, ps, p, 1>, 678 SIMCInstr <ps.PseudoInstr, subtarget> { 679 let AssemblerPredicate = HasDPP16; 680 let SubtargetPredicate = HasDPP16; 681} 682 683class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> : 684 VOP_DPP8<ps.OpName, p> { 685 let hasSideEffects = ps.hasSideEffects; 686 let Defs = ps.Defs; 687 let SchedRW = ps.SchedRW; 688 let Uses = ps.Uses; 689 690 bits<8> vdst; 691 let Inst{8-0} = fi; 692 let Inst{16-9} = op; 693 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 694 let Inst{31-25} = 0x3f; 695} 696 697//===----------------------------------------------------------------------===// 698// GFX11. 699//===----------------------------------------------------------------------===// 700 701let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in { 702 multiclass VOP1Only_Real_gfx11<bits<9> op> { 703 let IsSingle = 1 in 704 def _gfx11 : 705 VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.GFX11>, 706 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 707 } 708 multiclass VOP1_Real_e32_gfx11<bits<9> op, string opName = NAME> { 709 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 710 def _e32_gfx11 : 711 VOP1_Real<ps, SIEncodingFamily.GFX11>, 712 VOP1e<op{7-0}, ps.Pfl>; 713 } 714 multiclass VOP1_Real_e32_with_name_gfx11<bits<9> op, string opName, 715 string asmName> { 716 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 717 let AsmString = asmName # ps.AsmOperands in { 718 defm NAME : VOP1_Real_e32_gfx11<op, opName>; 719 } 720 } 721 multiclass VOP1_Real_e64_gfx11<bits<9> op> { 722 def _e64_gfx11 : 723 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX11>, 724 VOP3e_gfx11<{0, 1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 725 } 726 multiclass VOP1_Real_dpp_gfx11<bits<9> op, string opName = NAME> { 727 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 728 def _dpp_gfx11 : VOP1_DPP16<op{7-0}, !cast<VOP1_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX11> { 729 let DecoderNamespace = "DPPGFX11"; 730 } 731 } 732 multiclass VOP1_Real_dpp_with_name_gfx11<bits<9> op, string opName, 733 string asmName> { 734 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 735 let AsmString = asmName # ps.Pfl.AsmDPP16, DecoderNamespace = "DPPGFX11" in { 736 defm NAME : VOP1_Real_dpp_gfx11<op, opName>; 737 } 738 } 739 multiclass VOP1_Real_dpp8_gfx11<bits<9> op, string opName = NAME> { 740 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 741 def _dpp8_gfx11 : VOP1_DPP8<op{7-0}, ps> { 742 let DecoderNamespace = "DPP8GFX11"; 743 } 744 } 745 multiclass VOP1_Real_dpp8_with_name_gfx11<bits<9> op, string opName, 746 string asmName> { 747 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 748 let AsmString = asmName # ps.Pfl.AsmDPP8, DecoderNamespace = "DPP8GFX11" in { 749 defm NAME : VOP1_Real_dpp8_gfx11<op, opName>; 750 } 751 } 752} // End AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" 753 754multiclass VOP1_Realtriple_e64_gfx11<bits<9> op> { 755 defm NAME : VOP3_Realtriple_gfx11<{0, 1, 1, op{6-0}}, /*isSingle=*/ 0, NAME>; 756} 757multiclass VOP1_Realtriple_e64_with_name_gfx11<bits<9> op, string opName, 758 string asmName> { 759 defm NAME : VOP3_Realtriple_with_name_gfx11<{0, 1, 1, op{6-0}}, opName, 760 asmName>; 761} 762 763multiclass VOP1_Real_FULL_gfx11<bits<9> op> : 764 VOP1_Real_e32_gfx11<op>, VOP1_Realtriple_e64_gfx11<op>, 765 VOP1_Real_dpp_gfx11<op>, VOP1_Real_dpp8_gfx11<op>; 766 767multiclass VOP1_Real_NO_VOP3_with_name_gfx11<bits<9> op, string opName, 768 string asmName> { 769 defm NAME : VOP1_Real_e32_with_name_gfx11<op, opName, asmName>, 770 VOP1_Real_dpp_with_name_gfx11<op, opName, asmName>, 771 VOP1_Real_dpp8_with_name_gfx11<op, opName, asmName>; 772 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 773 def gfx11_alias : MnemonicAlias<ps.Mnemonic, asmName>, 774 Requires<[isGFX11Plus]>; 775} 776 777multiclass VOP1_Real_FULL_with_name_gfx11<bits<9> op, string opName, 778 string asmName> : 779 VOP1_Real_NO_VOP3_with_name_gfx11<op, opName, asmName>, 780 VOP1_Realtriple_e64_with_name_gfx11<op, opName, asmName>; 781 782multiclass VOP1_Real_FULL_t16_gfx11<bits<9> op, string asmName, 783 string opName = NAME> : 784 VOP1_Real_FULL_with_name_gfx11<op, opName, asmName>; 785 786multiclass VOP1_Real_NO_DPP_gfx11<bits<9> op> : 787 VOP1_Real_e32_gfx11<op>, VOP1_Real_e64_gfx11<op>; 788 789defm V_CVT_NEAREST_I32_F32 : VOP1_Real_FULL_with_name_gfx11<0x00c, 790 "V_CVT_RPI_I32_F32", "v_cvt_nearest_i32_f32">; 791defm V_CVT_FLOOR_I32_F32 : VOP1_Real_FULL_with_name_gfx11<0x00d, 792 "V_CVT_FLR_I32_F32", "v_cvt_floor_i32_f32">; 793defm V_CLZ_I32_U32 : VOP1_Real_FULL_with_name_gfx11<0x039, 794 "V_FFBH_U32", "v_clz_i32_u32">; 795defm V_CTZ_I32_B32 : VOP1_Real_FULL_with_name_gfx11<0x03a, 796 "V_FFBL_B32", "v_ctz_i32_b32">; 797defm V_CLS_I32 : VOP1_Real_FULL_with_name_gfx11<0x03b, 798 "V_FFBH_I32", "v_cls_i32">; 799defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11<0x067>; 800defm V_NOT_B16_t16 : VOP1_Real_FULL_t16_gfx11<0x069, "v_not_b16">; 801defm V_CVT_I32_I16_t16 : VOP1_Real_FULL_t16_gfx11<0x06a, "v_cvt_i32_i16">; 802defm V_CVT_U32_U16_t16 : VOP1_Real_FULL_t16_gfx11<0x06b, "v_cvt_u32_u16">; 803 804defm V_CVT_F16_U16_t16 : VOP1_Real_FULL_t16_gfx11<0x050, "v_cvt_f16_u16">; 805defm V_CVT_F16_I16_t16 : VOP1_Real_FULL_t16_gfx11<0x051, "v_cvt_f16_i16">; 806defm V_CVT_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x052, "v_cvt_u16_f16">; 807defm V_CVT_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x053, "v_cvt_i16_f16">; 808defm V_RCP_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x054, "v_rcp_f16">; 809defm V_SQRT_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x055, "v_sqrt_f16">; 810defm V_RSQ_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x056, "v_rsq_f16">; 811defm V_LOG_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x057, "v_log_f16">; 812defm V_EXP_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x058, "v_exp_f16">; 813defm V_FREXP_MANT_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x059, "v_frexp_mant_f16">; 814defm V_FREXP_EXP_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x05a, "v_frexp_exp_i16_f16">; 815defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x05b, "v_floor_f16">; 816defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x05c, "v_ceil_f16">; 817defm V_TRUNC_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x05d, "v_trunc_f16">; 818defm V_RNDNE_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x05e, "v_rndne_f16">; 819defm V_FRACT_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x05f, "v_fract_f16">; 820defm V_SIN_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x060, "v_sin_f16">; 821defm V_COS_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x061, "v_cos_f16">; 822defm V_SAT_PK_U8_I16_t16 : VOP1_Real_FULL_t16_gfx11<0x062, "v_sat_pk_u8_i16">; 823defm V_CVT_NORM_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x063, "v_cvt_norm_i16_f16">; 824defm V_CVT_NORM_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x064, "v_cvt_norm_u16_f16">; 825 826defm V_CVT_F16_F32_t16 : VOP1_Real_FULL_t16_gfx11<0x00a, "v_cvt_f16_f32">; 827defm V_CVT_F32_F16_t16 : VOP1_Real_FULL_t16_gfx11<0x00b, "v_cvt_f32_f16">; 828 829//===----------------------------------------------------------------------===// 830// GFX10. 831//===----------------------------------------------------------------------===// 832 833let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { 834 multiclass VOP1Only_Real_gfx10<bits<9> op> { 835 def _gfx10 : 836 VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.GFX10>, 837 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 838 } 839 multiclass VOP1_Real_e32_gfx10<bits<9> op> { 840 def _e32_gfx10 : 841 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>, 842 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 843 } 844 multiclass VOP1_Real_e64_gfx10<bits<9> op> { 845 def _e64_gfx10 : 846 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 847 VOP3e_gfx10<{0, 1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 848 } 849 multiclass VOP1_Real_sdwa_gfx10<bits<9> op> { 850 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 851 def _sdwa_gfx10 : 852 VOP_SDWA10_Real<!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 853 VOP1_SDWA9Ae<op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 854 let DecoderNamespace = "SDWA10"; 855 } 856 } 857 multiclass VOP1_Real_dpp_gfx10<bits<9> op> { 858 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP>.ret in 859 def _dpp_gfx10 : VOP1_DPP16<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX10> { 860 let DecoderNamespace = "SDWA10"; 861 } 862 } 863 multiclass VOP1_Real_dpp8_gfx10<bits<9> op> { 864 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP>.ret in 865 def _dpp8_gfx10 : VOP1_DPP8<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")> { 866 let DecoderNamespace = "DPP8"; 867 } 868 } 869} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" 870 871multiclass VOP1_Real_gfx10<bits<9> op> : 872 VOP1_Real_e32_gfx10<op>, VOP1_Real_e64_gfx10<op>, 873 VOP1_Real_sdwa_gfx10<op>, VOP1_Real_dpp_gfx10<op>, 874 VOP1_Real_dpp8_gfx10<op>; 875 876multiclass VOP1_Real_gfx10_FULL_gfx11<bits<9> op> : 877 VOP1_Real_gfx10<op>, VOP1_Real_FULL_gfx11<op>; 878 879multiclass VOP1_Real_gfx10_NO_DPP_gfx11<bits<9> op> : 880 VOP1_Real_gfx10<op>, VOP1_Real_NO_DPP_gfx11<op>; 881 882multiclass VOP1Only_Real_gfx10_gfx11<bits<9> op> : 883 VOP1Only_Real_gfx10<op>, VOP1Only_Real_gfx11<op>; 884 885defm V_PIPEFLUSH : VOP1_Real_gfx10_NO_DPP_gfx11<0x01b>; 886defm V_MOVRELSD_2_B32 : VOP1_Real_gfx10_FULL_gfx11<0x048>; 887defm V_CVT_F16_U16 : VOP1_Real_gfx10<0x050>; 888defm V_CVT_F16_I16 : VOP1_Real_gfx10<0x051>; 889defm V_CVT_U16_F16 : VOP1_Real_gfx10<0x052>; 890defm V_CVT_I16_F16 : VOP1_Real_gfx10<0x053>; 891defm V_RCP_F16 : VOP1_Real_gfx10<0x054>; 892defm V_SQRT_F16 : VOP1_Real_gfx10<0x055>; 893defm V_RSQ_F16 : VOP1_Real_gfx10<0x056>; 894defm V_LOG_F16 : VOP1_Real_gfx10<0x057>; 895defm V_EXP_F16 : VOP1_Real_gfx10<0x058>; 896defm V_FREXP_MANT_F16 : VOP1_Real_gfx10<0x059>; 897defm V_FREXP_EXP_I16_F16 : VOP1_Real_gfx10<0x05a>; 898defm V_FLOOR_F16 : VOP1_Real_gfx10<0x05b>; 899defm V_CEIL_F16 : VOP1_Real_gfx10<0x05c>; 900defm V_TRUNC_F16 : VOP1_Real_gfx10<0x05d>; 901defm V_RNDNE_F16 : VOP1_Real_gfx10<0x05e>; 902defm V_FRACT_F16 : VOP1_Real_gfx10<0x05f>; 903defm V_SIN_F16 : VOP1_Real_gfx10<0x060>; 904defm V_COS_F16 : VOP1_Real_gfx10<0x061>; 905defm V_SAT_PK_U8_I16 : VOP1_Real_gfx10<0x062>; 906defm V_CVT_NORM_I16_F16 : VOP1_Real_gfx10<0x063>; 907defm V_CVT_NORM_U16_F16 : VOP1_Real_gfx10<0x064>; 908 909defm V_SWAP_B32 : VOP1Only_Real_gfx10_gfx11<0x065>; 910defm V_SWAPREL_B32 : VOP1Only_Real_gfx10_gfx11<0x068>; 911 912//===----------------------------------------------------------------------===// 913// GFX7, GFX10. 914//===----------------------------------------------------------------------===// 915 916let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in { 917 multiclass VOP1_Real_e32_gfx7<bits<9> op> { 918 def _e32_gfx7 : 919 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>, 920 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 921 } 922 multiclass VOP1_Real_e64_gfx7<bits<9> op> { 923 def _e64_gfx7 : 924 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 925 VOP3e_gfx6_gfx7<{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 926 } 927} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" 928 929multiclass VOP1_Real_gfx7<bits<9> op> : 930 VOP1_Real_e32_gfx7<op>, VOP1_Real_e64_gfx7<op>; 931 932multiclass VOP1_Real_gfx7_gfx10<bits<9> op> : 933 VOP1_Real_gfx7<op>, VOP1_Real_gfx10<op>; 934 935multiclass VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<bits<9> op> : 936 VOP1_Real_gfx7_gfx10<op>, VOP1_Real_NO_DPP_gfx11<op>; 937 938defm V_LOG_LEGACY_F32 : VOP1_Real_gfx7<0x045>; 939defm V_EXP_LEGACY_F32 : VOP1_Real_gfx7<0x046>; 940 941defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<0x017>; 942defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<0x018>; 943defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<0x019>; 944defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<0x01a>; 945 946//===----------------------------------------------------------------------===// 947// GFX6, GFX7, GFX10, GFX11. 948//===----------------------------------------------------------------------===// 949 950let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 951 multiclass VOP1_Real_e32_gfx6_gfx7<bits<9> op> { 952 def _e32_gfx6_gfx7 : 953 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>, 954 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 955 } 956 multiclass VOP1_Real_e64_gfx6_gfx7<bits<9> op> { 957 def _e64_gfx6_gfx7 : 958 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 959 VOP3e_gfx6_gfx7<{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 960 } 961} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 962 963multiclass VOP1_Real_gfx6_gfx7<bits<9> op> : 964 VOP1_Real_e32_gfx6_gfx7<op>, VOP1_Real_e64_gfx6_gfx7<op>; 965 966multiclass VOP1_Real_gfx6_gfx7_gfx10<bits<9> op> : 967 VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10<op>; 968 969multiclass VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<bits<9> op> : 970 VOP1_Real_gfx6_gfx7_gfx10<op>, VOP1_Real_FULL_gfx11<op>; 971 972multiclass VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<bits<9> op> : 973 VOP1_Real_gfx6_gfx7_gfx10<op>, VOP1_Real_NO_DPP_gfx11<op>; 974 975defm V_LOG_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x026>; 976defm V_RCP_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x028>; 977defm V_RCP_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x029>; 978defm V_RSQ_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x02c>; 979defm V_RSQ_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x02d>; 980defm V_RCP_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x030>; 981defm V_RSQ_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x032>; 982 983defm V_NOP : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x000>; 984defm V_MOV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x001>; 985defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x003>; 986defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x004>; 987defm V_CVT_F32_I32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x005>; 988defm V_CVT_F32_U32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x006>; 989defm V_CVT_U32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x007>; 990defm V_CVT_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x008>; 991defm V_CVT_F16_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00a>; 992defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10<0x00b>; 993defm V_CVT_RPI_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00c>; 994defm V_CVT_FLR_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00d>; 995defm V_CVT_OFF_F32_I4 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x00e>; 996defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x00f>; 997defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x010>; 998defm V_CVT_F32_UBYTE0 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x011>; 999defm V_CVT_F32_UBYTE1 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x012>; 1000defm V_CVT_F32_UBYTE2 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x013>; 1001defm V_CVT_F32_UBYTE3 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x014>; 1002defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x015>; 1003defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x016>; 1004defm V_FRACT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x020>; 1005defm V_TRUNC_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x021>; 1006defm V_CEIL_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x022>; 1007defm V_RNDNE_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x023>; 1008defm V_FLOOR_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x024>; 1009defm V_EXP_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x025>; 1010defm V_LOG_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x027>; 1011defm V_RCP_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x02a>; 1012defm V_RCP_IFLAG_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x02b>; 1013defm V_RSQ_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x02e>; 1014defm V_RCP_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x02f>; 1015defm V_RSQ_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x031>; 1016defm V_SQRT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x033>; 1017defm V_SQRT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x034>; 1018defm V_SIN_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x035>; 1019defm V_COS_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x036>; 1020defm V_NOT_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x037>; 1021defm V_BFREV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x038>; 1022defm V_FFBH_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x039>; 1023defm V_FFBL_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x03a>; 1024defm V_FFBH_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x03b>; 1025defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x03c>; 1026defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x03d>; 1027defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x03e>; 1028defm V_FREXP_EXP_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x03f>; 1029defm V_FREXP_MANT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x040>; 1030defm V_CLREXCP : VOP1_Real_gfx6_gfx7_gfx10<0x041>; 1031defm V_MOVRELD_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x042>; 1032defm V_MOVRELS_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x043>; 1033defm V_MOVRELSD_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x044>; 1034 1035//===----------------------------------------------------------------------===// 1036// GFX8, GFX9 (VI). 1037//===----------------------------------------------------------------------===// 1038 1039class VOP1_DPPe <bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile P = ps.Pfl> : 1040 VOP_DPPe <P> { 1041 bits<8> vdst; 1042 let Inst{8-0} = 0xfa; // dpp 1043 let Inst{16-9} = op; 1044 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 1045 let Inst{31-25} = 0x3f; //encoding 1046} 1047 1048multiclass VOP1Only_Real_vi <bits<10> op> { 1049 let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 1050 def _vi : 1051 VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.VI>, 1052 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 1053 } 1054} 1055 1056multiclass VOP1_Real_e32e64_vi <bits<10> op> { 1057 let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 1058 def _e32_vi : 1059 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>, 1060 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 1061 def _e64_vi : 1062 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 1063 VOP3e_vi <!add(0x140, op), !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1064 } 1065} 1066 1067multiclass VOP1_Real_vi <bits<10> op> { 1068 defm NAME : VOP1_Real_e32e64_vi <op>; 1069 1070 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA>.ret in 1071 def _sdwa_vi : 1072 VOP_SDWA_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 1073 VOP1_SDWAe <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1074 1075 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 1076 def _sdwa_gfx9 : 1077 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 1078 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1079 1080 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 1081 def _dpp_vi : 1082 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>, 1083 VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; 1084} 1085 1086defm V_NOP : VOP1_Real_vi <0x0>; 1087defm V_MOV_B32 : VOP1_Real_vi <0x1>; 1088defm V_CVT_I32_F64 : VOP1_Real_vi <0x3>; 1089defm V_CVT_F64_I32 : VOP1_Real_vi <0x4>; 1090defm V_CVT_F32_I32 : VOP1_Real_vi <0x5>; 1091defm V_CVT_F32_U32 : VOP1_Real_vi <0x6>; 1092defm V_CVT_U32_F32 : VOP1_Real_vi <0x7>; 1093defm V_CVT_I32_F32 : VOP1_Real_vi <0x8>; 1094defm V_CVT_F16_F32 : VOP1_Real_vi <0xa>; 1095defm V_CVT_F32_F16 : VOP1_Real_vi <0xb>; 1096defm V_CVT_RPI_I32_F32 : VOP1_Real_vi <0xc>; 1097defm V_CVT_FLR_I32_F32 : VOP1_Real_vi <0xd>; 1098defm V_CVT_OFF_F32_I4 : VOP1_Real_vi <0xe>; 1099defm V_CVT_F32_F64 : VOP1_Real_vi <0xf>; 1100defm V_CVT_F64_F32 : VOP1_Real_vi <0x10>; 1101defm V_CVT_F32_UBYTE0 : VOP1_Real_vi <0x11>; 1102defm V_CVT_F32_UBYTE1 : VOP1_Real_vi <0x12>; 1103defm V_CVT_F32_UBYTE2 : VOP1_Real_vi <0x13>; 1104defm V_CVT_F32_UBYTE3 : VOP1_Real_vi <0x14>; 1105defm V_CVT_U32_F64 : VOP1_Real_vi <0x15>; 1106defm V_CVT_F64_U32 : VOP1_Real_vi <0x16>; 1107defm V_FRACT_F32 : VOP1_Real_vi <0x1b>; 1108defm V_TRUNC_F32 : VOP1_Real_vi <0x1c>; 1109defm V_CEIL_F32 : VOP1_Real_vi <0x1d>; 1110defm V_RNDNE_F32 : VOP1_Real_vi <0x1e>; 1111defm V_FLOOR_F32 : VOP1_Real_vi <0x1f>; 1112defm V_EXP_F32 : VOP1_Real_vi <0x20>; 1113defm V_LOG_F32 : VOP1_Real_vi <0x21>; 1114defm V_RCP_F32 : VOP1_Real_vi <0x22>; 1115defm V_RCP_IFLAG_F32 : VOP1_Real_vi <0x23>; 1116defm V_RSQ_F32 : VOP1_Real_vi <0x24>; 1117defm V_RCP_F64 : VOP1_Real_vi <0x25>; 1118defm V_RSQ_F64 : VOP1_Real_vi <0x26>; 1119defm V_SQRT_F32 : VOP1_Real_vi <0x27>; 1120defm V_SQRT_F64 : VOP1_Real_vi <0x28>; 1121defm V_SIN_F32 : VOP1_Real_vi <0x29>; 1122defm V_COS_F32 : VOP1_Real_vi <0x2a>; 1123defm V_NOT_B32 : VOP1_Real_vi <0x2b>; 1124defm V_BFREV_B32 : VOP1_Real_vi <0x2c>; 1125defm V_FFBH_U32 : VOP1_Real_vi <0x2d>; 1126defm V_FFBL_B32 : VOP1_Real_vi <0x2e>; 1127defm V_FFBH_I32 : VOP1_Real_vi <0x2f>; 1128defm V_FREXP_EXP_I32_F64 : VOP1_Real_vi <0x30>; 1129defm V_FREXP_MANT_F64 : VOP1_Real_vi <0x31>; 1130defm V_FRACT_F64 : VOP1_Real_vi <0x32>; 1131defm V_FREXP_EXP_I32_F32 : VOP1_Real_vi <0x33>; 1132defm V_FREXP_MANT_F32 : VOP1_Real_vi <0x34>; 1133defm V_CLREXCP : VOP1_Real_vi <0x35>; 1134defm V_MOVRELD_B32 : VOP1_Real_e32e64_vi <0x36>; 1135defm V_MOVRELS_B32 : VOP1_Real_e32e64_vi <0x37>; 1136defm V_MOVRELSD_B32 : VOP1_Real_e32e64_vi <0x38>; 1137defm V_TRUNC_F64 : VOP1_Real_vi <0x17>; 1138defm V_CEIL_F64 : VOP1_Real_vi <0x18>; 1139defm V_FLOOR_F64 : VOP1_Real_vi <0x1A>; 1140defm V_RNDNE_F64 : VOP1_Real_vi <0x19>; 1141defm V_LOG_LEGACY_F32 : VOP1_Real_vi <0x4c>; 1142defm V_EXP_LEGACY_F32 : VOP1_Real_vi <0x4b>; 1143defm V_CVT_F16_U16 : VOP1_Real_vi <0x39>; 1144defm V_CVT_F16_I16 : VOP1_Real_vi <0x3a>; 1145defm V_CVT_U16_F16 : VOP1_Real_vi <0x3b>; 1146defm V_CVT_I16_F16 : VOP1_Real_vi <0x3c>; 1147defm V_RCP_F16 : VOP1_Real_vi <0x3d>; 1148defm V_SQRT_F16 : VOP1_Real_vi <0x3e>; 1149defm V_RSQ_F16 : VOP1_Real_vi <0x3f>; 1150defm V_LOG_F16 : VOP1_Real_vi <0x40>; 1151defm V_EXP_F16 : VOP1_Real_vi <0x41>; 1152defm V_FREXP_MANT_F16 : VOP1_Real_vi <0x42>; 1153defm V_FREXP_EXP_I16_F16 : VOP1_Real_vi <0x43>; 1154defm V_FLOOR_F16 : VOP1_Real_vi <0x44>; 1155defm V_CEIL_F16 : VOP1_Real_vi <0x45>; 1156defm V_TRUNC_F16 : VOP1_Real_vi <0x46>; 1157defm V_RNDNE_F16 : VOP1_Real_vi <0x47>; 1158defm V_FRACT_F16 : VOP1_Real_vi <0x48>; 1159defm V_SIN_F16 : VOP1_Real_vi <0x49>; 1160defm V_COS_F16 : VOP1_Real_vi <0x4a>; 1161defm V_SWAP_B32 : VOP1Only_Real_vi <0x51>; 1162 1163defm V_SAT_PK_U8_I16 : VOP1_Real_vi<0x4f>; 1164defm V_CVT_NORM_I16_F16 : VOP1_Real_vi<0x4d>; 1165defm V_CVT_NORM_U16_F16 : VOP1_Real_vi<0x4e>; 1166 1167defm V_ACCVGPR_MOV_B32 : VOP1Only_Real_vi<0x52>; 1168 1169let VOP1 = 1, SubtargetPredicate = isGFX8GFX9, Uses = [EXEC, M0] in { 1170 1171// Copy of v_mov_b32 with $vdst as a use operand for use with VGPR 1172// indexing mode. vdst can't be treated as a def for codegen purposes, 1173// and an implicit use and def of the super register should be added. 1174def V_MOV_B32_indirect_write : VPseudoInstSI<(outs), 1175 (ins getVALUDstForVT<i32>.ret:$vdst, getVOPSrc0ForVT<i32, 0>.ret:$src0)>, 1176 PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT<i32>.ret:$vdst, 1177 getVOPSrc0ForVT<i32, 0>.ret:$src0)>; 1178 1179// Copy of v_mov_b32 for use with VGPR indexing mode. An implicit use of the 1180// super register should be added. 1181def V_MOV_B32_indirect_read : VPseudoInstSI< 1182 (outs getVALUDstForVT<i32>.ret:$vdst), 1183 (ins getVOPSrc0ForVT<i32, 0>.ret:$src0)>, 1184 PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT<i32>.ret:$vdst, 1185 getVOPSrc0ForVT<i32, 0>.ret:$src0)>; 1186 1187} // End VOP1 = 1, SubtargetPredicate = isGFX8GFX9, Uses = [M0] 1188 1189let OtherPredicates = [isGFX8Plus] in { 1190 1191def : GCNPat < 1192 (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, 1193 timm:$bank_mask, timm:$bound_ctrl)), 1194 (V_MOV_B32_dpp VGPR_32:$src, VGPR_32:$src, (as_i32timm $dpp_ctrl), 1195 (as_i32timm $row_mask), (as_i32timm $bank_mask), 1196 (as_i1timm $bound_ctrl)) 1197>; 1198 1199def : GCNPat < 1200 (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl, 1201 timm:$row_mask, timm:$bank_mask, 1202 timm:$bound_ctrl)), 1203 (V_MOV_B32_dpp VGPR_32:$old, VGPR_32:$src, (as_i32timm $dpp_ctrl), 1204 (as_i32timm $row_mask), (as_i32timm $bank_mask), 1205 (as_i1timm $bound_ctrl)) 1206>; 1207 1208} // End OtherPredicates = [isGFX8Plus] 1209 1210let OtherPredicates = [isGFX8Plus] in { 1211def : GCNPat< 1212 (i32 (anyext i16:$src)), 1213 (COPY $src) 1214>; 1215 1216def : GCNPat< 1217 (i64 (anyext i16:$src)), 1218 (REG_SEQUENCE VReg_64, 1219 (i32 (COPY $src)), sub0, 1220 (V_MOV_B32_e32 (i32 0)), sub1) 1221>; 1222 1223def : GCNPat< 1224 (i16 (trunc i32:$src)), 1225 (COPY $src) 1226>; 1227 1228def : GCNPat < 1229 (i16 (trunc i64:$src)), 1230 (EXTRACT_SUBREG $src, sub0) 1231>; 1232 1233} // End OtherPredicates = [isGFX8Plus] 1234 1235//===----------------------------------------------------------------------===// 1236// GFX9 1237//===----------------------------------------------------------------------===// 1238 1239multiclass VOP1_Real_gfx9 <bits<10> op> { 1240 let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in { 1241 defm NAME : VOP1_Real_e32e64_vi <op>; 1242 } 1243 1244 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 1245 def _sdwa_gfx9 : 1246 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 1247 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1248 1249 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 1250 def _dpp_gfx9 : 1251 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, 1252 VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; 1253 1254} 1255 1256multiclass VOP1_Real_NoDstSel_SDWA_gfx9 <bits<10> op> { 1257 let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in { 1258 defm NAME : VOP1_Real_e32e64_vi <op>; 1259 } 1260 1261 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 1262 def _sdwa_gfx9 : 1263 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 1264 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 1265 let Inst{42-40} = 6; 1266 } 1267 1268 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 1269 def _dpp_gfx9 : 1270 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, 1271 VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; 1272} 1273 1274defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>; 1275 1276let AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX9" in 1277defm V_MOV_B64 : VOP1_Real_gfx9 <0x38>; 1278 1279let OtherPredicates = [HasFP8Insts] in { 1280defm V_CVT_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x54>; 1281defm V_CVT_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x55>; 1282defm V_CVT_PK_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x56>; 1283defm V_CVT_PK_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x57>; 1284} 1285 1286//===----------------------------------------------------------------------===// 1287// GFX10 1288//===----------------------------------------------------------------------===// 1289 1290let OtherPredicates = [isGFX10Only] in { 1291def : GCNPat < 1292 (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)), 1293 (V_MOV_B32_dpp8_gfx10 VGPR_32:$src, VGPR_32:$src, 1294 (as_i32timm $dpp8), (i32 DPP8Mode.FI_0)) 1295>; 1296} // End OtherPredicates = [isGFX10Only] 1297 1298//===----------------------------------------------------------------------===// 1299// GFX11 1300//===----------------------------------------------------------------------===// 1301 1302let OtherPredicates = [isGFX11Only] in { 1303def : GCNPat < 1304 (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)), 1305 (V_MOV_B32_dpp8_gfx11 VGPR_32:$src, VGPR_32:$src, 1306 (as_i32timm $dpp8), (i32 DPP8Mode.FI_0)) 1307>; 1308} // End OtherPredicates = [isGFX11Only] 1309