1//===-- VOP1Instructions.td - Vector Instruction Definitions --------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10// VOP1 Classes 11//===----------------------------------------------------------------------===// 12 13class VOP1e <bits<8> op, VOPProfile P> : Enc32 { 14 bits<8> vdst; 15 bits<9> src0; 16 17 let Inst{8-0} = !if(P.HasSrc0, src0{8-0}, ?); 18 let Inst{16-9} = op; 19 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 20 let Inst{31-25} = 0x3f; //encoding 21} 22 23class VOP1_SDWAe <bits<8> op, VOPProfile P> : VOP_SDWAe <P> { 24 bits<8> vdst; 25 26 let Inst{8-0} = 0xf9; // sdwa 27 let Inst{16-9} = op; 28 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 29 let Inst{31-25} = 0x3f; // encoding 30} 31 32class VOP1_SDWA9Ae <bits<8> op, VOPProfile P> : VOP_SDWA9Ae <P> { 33 bits<8> vdst; 34 35 let Inst{8-0} = 0xf9; // sdwa 36 let Inst{16-9} = op; 37 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 38 let Inst{31-25} = 0x3f; // encoding 39} 40 41class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP1Only = 0> : 42 VOP_Pseudo <opName, !if(VOP1Only, "", "_e32"), P, P.Outs32, P.Ins32, "", pattern> { 43 44 let AsmOperands = P.Asm32; 45 46 let Size = 4; 47 let mayLoad = 0; 48 let mayStore = 0; 49 let hasSideEffects = 0; 50 51 let ReadsModeReg = !or(isFloatType<P.DstVT>.ret, isFloatType<P.Src0VT>.ret); 52 53 let mayRaiseFPException = ReadsModeReg; 54 55 let VOP1 = 1; 56 let VALU = 1; 57 let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]); 58 59 let AsmVariantName = AMDGPUAsmVariants.Default; 60} 61 62class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemonic > : 63 VOP_Real <ps>, 64 InstSI <ps.OutOperandList, ps.InOperandList, real_name # ps.AsmOperands, []>, 65 SIMCInstr <ps.PseudoInstr, EncodingFamily> { 66 67 let VALU = 1; 68 let VOP1 = 1; 69 let isPseudo = 0; 70 let isCodeGenOnly = 0; 71 72 let Constraints = ps.Constraints; 73 let DisableEncoding = ps.DisableEncoding; 74 75 // copy relevant pseudo op flags 76 let SubtargetPredicate = ps.SubtargetPredicate; 77 let AsmMatchConverter = ps.AsmMatchConverter; 78 let AsmVariantName = ps.AsmVariantName; 79 let Constraints = ps.Constraints; 80 let DisableEncoding = ps.DisableEncoding; 81 let TSFlags = ps.TSFlags; 82 let UseNamedOperandTable = ps.UseNamedOperandTable; 83 let Uses = ps.Uses; 84 let Defs = ps.Defs; 85 let SchedRW = ps.SchedRW; 86 let mayLoad = ps.mayLoad; 87 let mayStore = ps.mayStore; 88 let TRANS = ps.TRANS; 89} 90 91class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 92 VOP_SDWA_Pseudo <OpName, P, pattern> { 93 let AsmMatchConverter = "cvtSdwaVOP1"; 94} 95 96class VOP1_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 97 VOP_DPP_Pseudo <OpName, P, pattern> { 98} 99 100class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { 101 list<dag> ret = 102 !if(P.HasModifiers, 103 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods P.Src0VT:$src0, i32:$src0_modifiers))))], 104 !if(P.HasOMod, 105 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3OMods P.Src0VT:$src0, 106 i1:$clamp, i32:$omod))))], 107 [(set P.DstVT:$vdst, (node P.Src0VT:$src0))] 108 ) 109 ); 110} 111 112multiclass VOP1Inst <string opName, VOPProfile P, 113 SDPatternOperator node = null_frag, int VOPDOp = -1> { 114 // We only want to set this on the basic, non-SDWA or DPP forms. 115 defvar should_mov_imm = !or(!eq(opName, "v_mov_b32"), 116 !eq(opName, "v_mov_b64")); 117 118 let isMoveImm = should_mov_imm in { 119 if !eq(VOPDOp, -1) then 120 def _e32 : VOP1_Pseudo <opName, P>; 121 else 122 // Only for V_MOV_B32 123 def _e32 : VOP1_Pseudo <opName, P>, VOPD_Component<VOPDOp, "v_mov_b32">; 124 def _e64 : VOP3InstBase <opName, P, node>; 125 } 126 127 foreach _ = BoolToList<P.HasExtSDWA>.ret in 128 def _sdwa : VOP1_SDWA_Pseudo <opName, P>; 129 130 foreach _ = BoolToList<P.HasExtDPP>.ret in 131 def _dpp : VOP1_DPP_Pseudo <opName, P>; 132 133 let SubtargetPredicate = isGFX11Plus in { 134 foreach _ = BoolToList<P.HasExtVOP3DPP>.ret in 135 def _e64_dpp : VOP3_DPP_Pseudo <opName, P>; 136 } // End SubtargetPredicate = isGFX11Plus 137 138 def : MnemonicAlias<opName#"_e32", opName>, LetDummies; 139 def : MnemonicAlias<opName#"_e64", opName>, LetDummies; 140 141 foreach _ = BoolToList<P.HasExtSDWA>.ret in 142 def : MnemonicAlias<opName#"_sdwa", opName>, LetDummies; 143 144 foreach _ = BoolToList<P.HasExtDPP>.ret in 145 def : MnemonicAlias<opName#"_dpp", opName>, LetDummies; 146} 147 148// Special profile for instructions which have clamp 149// and output modifiers (but have no input modifiers) 150class VOPProfileI2F<ValueType dstVt, ValueType srcVt> : 151 VOPProfile<[dstVt, srcVt, untyped, untyped]> { 152 153 let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod); 154 let InsVOP3Base = (ins Src0DPP:$src0, clampmod:$clamp, omod:$omod); 155 let Asm64 = "$vdst, $src0$clamp$omod"; 156 let AsmVOP3DPPBase = Asm64; 157 158 let HasModifiers = 0; 159 let HasClamp = 1; 160} 161 162def VOP1_F64_I32 : VOPProfileI2F <f64, i32>; 163def VOP1_F32_I32 : VOPProfileI2F <f32, i32>; 164def VOP1_F16_I16 : VOPProfileI2F <f16, i16>; 165 166def VOP_NOP_PROFILE : VOPProfile <[untyped, untyped, untyped, untyped]>{ 167 let HasExtVOP3DPP = 0; 168} 169 170// OMod clears exceptions when set. OMod was always an operand, but its 171// now explicitly set. 172class VOP_SPECIAL_OMOD_PROF<ValueType dstVt, ValueType srcVt> : 173 VOPProfile<[dstVt, srcVt, untyped, untyped]> { 174 175 let HasOMod = 1; 176} 177def VOP_I32_F32_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f32>; 178def VOP_I32_F64_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f64>; 179def VOP_I16_F16_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i16, f16>; 180 181//===----------------------------------------------------------------------===// 182// VOP1 Instructions 183//===----------------------------------------------------------------------===// 184 185let VOPAsmPrefer32Bit = 1 in { 186defm V_NOP : VOP1Inst <"v_nop", VOP_NOP_PROFILE>; 187} 188 189def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> { 190 let InsVOPDX = (ins Src0RC32:$src0X); 191 let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X); 192 let InsVOPDY = (ins Src0RC32:$src0Y); 193 let InsVOPDYDeferred = (ins VSrc_f32_Deferred:$src0Y); 194} 195 196let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 197defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>; 198 199let SubtargetPredicate = isGFX940Plus in 200defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>; 201} // End isMoveImm = 1 202 203// FIXME: Specify SchedRW for READFIRSTLANE_B32 204// TODO: Make profile for this, there is VOP3 encoding also 205def V_READFIRSTLANE_B32 : 206 InstSI <(outs SReg_32:$vdst), 207 (ins VRegOrLds_32:$src0), 208 "v_readfirstlane_b32 $vdst, $src0", 209 [(set i32:$vdst, (int_amdgcn_readfirstlane (i32 VRegOrLds_32:$src0)))]>, 210 Enc32 { 211 212 let isCodeGenOnly = 0; 213 let UseNamedOperandTable = 1; 214 215 let Size = 4; 216 let mayLoad = 0; 217 let mayStore = 0; 218 let hasSideEffects = 0; 219 220 let VOP1 = 1; 221 let VALU = 1; 222 let Uses = [EXEC]; 223 let isConvergent = 1; 224 225 bits<8> vdst; 226 bits<9> src0; 227 228 let Inst{8-0} = src0; 229 let Inst{16-9} = 0x2; 230 let Inst{24-17} = vdst; 231 let Inst{31-25} = 0x3f; //encoding 232} 233 234let isReMaterializable = 1 in { 235let SchedRW = [WriteDoubleCvt] in { 236// OMod clears exceptions when set in this instruction 237defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64_SPECIAL_OMOD, fp_to_sint>; 238 239let mayRaiseFPException = 0 in { 240defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>; 241} 242 243defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>; 244defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>; 245// OMod clears exceptions when set in this instruction 246defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64_SPECIAL_OMOD, fp_to_uint>; 247 248let mayRaiseFPException = 0 in { 249defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>; 250} 251 252} // End SchedRW = [WriteDoubleCvt] 253 254let SchedRW = [WriteFloatCvt] in { 255 256// XXX: Does this really not raise exceptions? The manual claims the 257// 16-bit ones can. 258let mayRaiseFPException = 0 in { 259defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>; 260defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>; 261} 262 263// OMod clears exceptions when set in these 2 instructions 264defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_uint>; 265defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_sint>; 266let FPDPRounding = 1, isReMaterializable = 0 in { 267defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>; 268} // End FPDPRounding = 1, isReMaterializable = 0 269 270defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>; 271 272let ReadsModeReg = 0, mayRaiseFPException = 0 in { 273defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>; 274defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>; 275defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>; 276} // End ReadsModeReg = 0, mayRaiseFPException = 0 277} // End SchedRW = [WriteFloatCvt] 278 279let ReadsModeReg = 0, mayRaiseFPException = 0 in { 280defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>; 281defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>; 282defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP1_F32_I32, AMDGPUcvt_f32_ubyte2>; 283defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f32_ubyte3>; 284} // ReadsModeReg = 0, mayRaiseFPException = 0 285 286defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>; 287defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>; 288defm V_CEIL_F32 : VOP1Inst <"v_ceil_f32", VOP_F32_F32, fceil>; 289defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, frint>; 290defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>; 291 292let TRANS = 1, SchedRW = [WriteTrans32] in { 293defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, fexp2>; 294defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, flog2>; 295defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>; 296defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32, AMDGPUrcp_iflag>; 297defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>; 298defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, any_amdgcn_sqrt>; 299} // End TRANS = 1, SchedRW = [WriteTrans32] 300 301let TRANS = 1, SchedRW = [WriteTrans64] in { 302defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64, AMDGPUrcp>; 303defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64, AMDGPUrsq>; 304defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, any_amdgcn_sqrt>; 305} // End TRANS = 1, SchedRW = [WriteTrans64] 306 307let TRANS = 1, SchedRW = [WriteTrans32] in { 308defm V_SIN_F32 : VOP1Inst <"v_sin_f32", VOP_F32_F32, AMDGPUsin>; 309defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>; 310} // End TRANS = 1, SchedRW = [WriteTrans32] 311 312defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>; 313defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32, DivergentUnaryFrag<bitreverse>>; 314defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32, AMDGPUffbh_u32>; 315defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32, AMDGPUffbl_b32>; 316defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32, AMDGPUffbh_i32>; 317 318let SchedRW = [WriteDoubleAdd] in { 319defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64_SPECIAL_OMOD, int_amdgcn_frexp_exp>; 320defm V_FREXP_MANT_F64 : VOP1Inst <"v_frexp_mant_f64", VOP_F64_F64, int_amdgcn_frexp_mant>; 321let FPDPRounding = 1 in { 322defm V_FRACT_F64 : VOP1Inst <"v_fract_f64", VOP_F64_F64, AMDGPUfract>; 323} // End FPDPRounding = 1 324} // End SchedRW = [WriteDoubleAdd] 325 326defm V_FREXP_EXP_I32_F32 : VOP1Inst <"v_frexp_exp_i32_f32", VOP_I32_F32, int_amdgcn_frexp_exp>; 327defm V_FREXP_MANT_F32 : VOP1Inst <"v_frexp_mant_f32", VOP_F32_F32, int_amdgcn_frexp_mant>; 328} // End isReMaterializable = 1 329 330let VOPAsmPrefer32Bit = 1 in { 331defm V_CLREXCP : VOP1Inst <"v_clrexcp", VOP_NO_EXT<VOP_NONE>>; 332} 333 334// Restrict src0 to be VGPR 335def VOP_MOVRELS : VOPProfile<[i32, i32, untyped, untyped]> { 336 let Src0RC32 = VRegSrc_32; 337 let Src0RC64 = VRegSrc_32; 338} 339 340// Special case because there are no true output operands. Hack vdst 341// to be a src operand. The custom inserter must add a tied implicit 342// def and use of the super register since there seems to be no way to 343// add an implicit def of a virtual register in tablegen. 344class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, untyped]> { 345 let Src0RC32 = VOPDstOperand<VGPR_32>; 346 let Src0RC64 = VOPDstOperand<VGPR_32>; 347 348 let Outs = (outs); 349 let Ins32 = (ins Src0RC32:$vdst, Src1RC:$src0); 350 let Ins64 = (ins Src0RC64:$vdst, Src1RC:$src0); 351 let Asm32 = getAsm32<1, 1>.ret; 352 let Asm64 = getAsm64<1, 1, 0, 0, 1>.ret; 353 354 let OutsSDWA = (outs Src0RC32:$vdst); 355 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 356 clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, 357 src0_sel:$src0_sel); 358 let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret; 359 360 let OutsDPP = (outs Src0RC32:$vdst); 361 let InsDPP16 = (ins Src0RC32:$old, Src0RC32:$src0, 362 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 363 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl, FI:$fi); 364 let AsmDPP16 = getAsmDPP16<1, 1, 0>.ret; 365 366 let OutsDPP8 = (outs Src0RC32:$vdst); 367 let InsDPP8 = (ins Src0RC32:$old, Src0RC32:$src0, dpp8:$dpp8, FI:$fi); 368 let AsmDPP8 = getAsmDPP8<1, 1, 0>.ret; 369 370 let HasDst = 0; 371 let EmitDst = 1; // force vdst emission 372} 373 374def VOP_MOVRELD : VOP_MOVREL<VSrc_b32>; 375def VOP_MOVRELSD : VOP_MOVREL<VRegSrc_32>; 376 377let SubtargetPredicate = HasMovrel, Uses = [M0, EXEC] in { 378 // v_movreld_b32 is a special case because the destination output 379 // register is really a source. It isn't actually read (but may be 380 // written), and is only to provide the base register to start 381 // indexing from. Tablegen seems to not let you define an implicit 382 // virtual register output for the super register being written into, 383 // so this must have an implicit def of the register added to it. 384defm V_MOVRELD_B32 : VOP1Inst <"v_movreld_b32", VOP_MOVRELD>; 385defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_MOVRELS>; 386defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_MOVRELSD>; 387} // End Uses = [M0, EXEC] 388 389let isReMaterializable = 1 in { 390let SubtargetPredicate = isGFX6GFX7 in { 391 let TRANS = 1, SchedRW = [WriteTrans32] in { 392 defm V_LOG_CLAMP_F32 : 393 VOP1Inst<"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>; 394 defm V_RCP_CLAMP_F32 : 395 VOP1Inst<"v_rcp_clamp_f32", VOP_F32_F32>; 396 defm V_RCP_LEGACY_F32 : 397 VOP1Inst<"v_rcp_legacy_f32", VOP_F32_F32, AMDGPUrcp_legacy>; 398 defm V_RSQ_CLAMP_F32 : 399 VOP1Inst<"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>; 400 defm V_RSQ_LEGACY_F32 : 401 VOP1Inst<"v_rsq_legacy_f32", VOP_F32_F32, int_amdgcn_rsq_legacy>; 402 } // End TRANS = 1, SchedRW = [WriteTrans32] 403 404 let SchedRW = [WriteTrans64] in { 405 defm V_RCP_CLAMP_F64 : 406 VOP1Inst<"v_rcp_clamp_f64", VOP_F64_F64>; 407 defm V_RSQ_CLAMP_F64 : 408 VOP1Inst<"v_rsq_clamp_f64", VOP_F64_F64, AMDGPUrsq_clamp>; 409 } // End SchedRW = [WriteTrans64] 410} // End SubtargetPredicate = isGFX6GFX7 411 412let SubtargetPredicate = isGFX7GFX8GFX9 in { 413 let TRANS = 1, SchedRW = [WriteTrans32] in { 414 defm V_LOG_LEGACY_F32 : VOP1Inst<"v_log_legacy_f32", VOP_F32_F32>; 415 defm V_EXP_LEGACY_F32 : VOP1Inst<"v_exp_legacy_f32", VOP_F32_F32>; 416 } // End TRANS = 1, SchedRW = [WriteTrans32] 417} // End SubtargetPredicate = isGFX7GFX8GFX9 418 419let SubtargetPredicate = isGFX7Plus in { 420 let SchedRW = [WriteDoubleAdd] in { 421 defm V_TRUNC_F64 : VOP1Inst<"v_trunc_f64", VOP_F64_F64, ftrunc>; 422 defm V_CEIL_F64 : VOP1Inst<"v_ceil_f64", VOP_F64_F64, fceil>; 423 defm V_RNDNE_F64 : VOP1Inst<"v_rndne_f64", VOP_F64_F64, frint>; 424 defm V_FLOOR_F64 : VOP1Inst<"v_floor_f64", VOP_F64_F64, ffloor>; 425 } // End SchedRW = [WriteDoubleAdd] 426} // End SubtargetPredicate = isGFX7Plus 427} // End isReMaterializable = 1 428 429let SubtargetPredicate = Has16BitInsts in { 430 431let FPDPRounding = 1 in { 432defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>; 433defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>; 434} // End FPDPRounding = 1 435// OMod clears exceptions when set in these two instructions 436defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_uint>; 437defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_sint>; 438let TRANS = 1, SchedRW = [WriteTrans32] in { 439defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>; 440defm V_SQRT_F16 : VOP1Inst <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>; 441defm V_RSQ_F16 : VOP1Inst <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>; 442defm V_LOG_F16 : VOP1Inst <"v_log_f16", VOP_F16_F16, flog2>; 443defm V_EXP_F16 : VOP1Inst <"v_exp_f16", VOP_F16_F16, fexp2>; 444defm V_SIN_F16 : VOP1Inst <"v_sin_f16", VOP_F16_F16, AMDGPUsin>; 445defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; 446} // End TRANS = 1, SchedRW = [WriteTrans32] 447defm V_FREXP_MANT_F16 : VOP1Inst <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>; 448defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16_SPECIAL_OMOD, int_amdgcn_frexp_exp>; 449defm V_FLOOR_F16 : VOP1Inst <"v_floor_f16", VOP_F16_F16, ffloor>; 450defm V_CEIL_F16 : VOP1Inst <"v_ceil_f16", VOP_F16_F16, fceil>; 451defm V_TRUNC_F16 : VOP1Inst <"v_trunc_f16", VOP_F16_F16, ftrunc>; 452defm V_RNDNE_F16 : VOP1Inst <"v_rndne_f16", VOP_F16_F16, frint>; 453let FPDPRounding = 1 in { 454defm V_FRACT_F16 : VOP1Inst <"v_fract_f16", VOP_F16_F16, AMDGPUfract>; 455} // End FPDPRounding = 1 456 457} 458 459let OtherPredicates = [Has16BitInsts] in { 460 461def : GCNPat< 462 (f32 (f16_to_fp i16:$src)), 463 (V_CVT_F32_F16_e32 $src) 464>; 465 466def : GCNPat< 467 (i16 (AMDGPUfp_to_f16 f32:$src)), 468 (V_CVT_F16_F32_e32 $src) 469>; 470 471} 472 473def VOP_SWAP_I32 : VOPProfile<[i32, i32, i32, untyped]> { 474 let Outs32 = (outs VGPR_32:$vdst, VGPR_32:$vdst1); 475 let Ins32 = (ins VGPR_32:$src0, VGPR_32:$src1); 476 let Outs64 = Outs32; 477 let Asm32 = " $vdst, $src0"; 478 let Asm64 = ""; 479 let Ins64 = (ins); 480} 481 482let SubtargetPredicate = isGFX9Plus in { 483 def V_SWAP_B32 : VOP1_Pseudo<"v_swap_b32", VOP_SWAP_I32, [], 1> { 484 let Constraints = "$vdst = $src1, $vdst1 = $src0"; 485 let DisableEncoding = "$vdst1,$src1"; 486 let SchedRW = [Write64Bit, Write64Bit]; 487 } 488 489 let isReMaterializable = 1 in 490 defm V_SAT_PK_U8_I16 : VOP1Inst<"v_sat_pk_u8_i16", VOP_I32_I32>; 491 492 let mayRaiseFPException = 0 in { 493 defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16_SPECIAL_OMOD>; 494 defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16_SPECIAL_OMOD>; 495 } // End mayRaiseFPException = 0 496} // End SubtargetPredicate = isGFX9Plus 497 498let SubtargetPredicate = isGFX9Only in { 499 defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>; 500} // End SubtargetPredicate = isGFX9Only 501 502class VOPProfile_Base_CVT_F32_F8<ValueType vt> : VOPProfileI2F <vt, i32> { 503 let HasExtSDWA = 1; 504 let HasExtSDWA9 = 1; 505 let HasExt = 1; 506 let DstRCSDWA = getVALUDstForVT<vt>.ret; 507 let InsSDWA = (ins Bin32SDWAInputMods:$src0_modifiers, Src0SDWA:$src0, 508 clampmod:$clamp, omod:$omod, src0_sel:$src0_sel); 509 let AsmSDWA = "$vdst, $src0_modifiers$clamp$omod $src0_sel"; // No dst_sel 510 let AsmSDWA9 = AsmSDWA; 511 let EmitDstSel = 0; 512} 513 514def VOPProfileCVT_F32_F8 : VOPProfile_Base_CVT_F32_F8 <f32>; 515def VOPProfileCVT_PK_F32_F8 : VOPProfile_Base_CVT_F32_F8 <v2f32>; 516 517let SubtargetPredicate = HasFP8Insts, mayRaiseFPException = 0, 518 SchedRW = [WriteFloatCvt] in { 519 defm V_CVT_F32_FP8 : VOP1Inst<"v_cvt_f32_fp8", VOPProfileCVT_F32_F8>; 520 defm V_CVT_F32_BF8 : VOP1Inst<"v_cvt_f32_bf8", VOPProfileCVT_F32_F8>; 521 defm V_CVT_PK_F32_FP8 : VOP1Inst<"v_cvt_pk_f32_fp8", VOPProfileCVT_PK_F32_F8>; 522 defm V_CVT_PK_F32_BF8 : VOP1Inst<"v_cvt_pk_f32_bf8", VOPProfileCVT_PK_F32_F8>; 523} 524 525class Cvt_F32_F8_Pat<SDPatternOperator node, int index, 526 VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat< 527 (f32 (node i32:$src, index)), 528 !if (index, 529 (inst_sdwa 0, $src, 0, 0, index), 530 (inst_e32 $src)) 531>; 532 533foreach Index = [0, 1, 2, 3] in { 534 def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_fp8, Index, 535 V_CVT_F32_FP8_e32, V_CVT_F32_FP8_sdwa>; 536 def : Cvt_F32_F8_Pat<int_amdgcn_cvt_f32_bf8, Index, 537 V_CVT_F32_BF8_e32, V_CVT_F32_BF8_sdwa>; 538} 539 540class Cvt_PK_F32_F8_Pat<SDPatternOperator node, int index, 541 VOP1_Pseudo inst_e32, VOP1_SDWA_Pseudo inst_sdwa> : GCNPat< 542 (v2f32 (node i32:$src, index)), 543 !if (index, 544 (inst_sdwa 0, $src, 0, 0, SDWA.WORD_1), 545 (inst_e32 $src)) 546>; 547 548foreach Index = [0, -1] in { 549 def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_fp8, Index, 550 V_CVT_PK_F32_FP8_e32, V_CVT_PK_F32_FP8_sdwa>; 551 def : Cvt_PK_F32_F8_Pat<int_amdgcn_cvt_pk_f32_bf8, Index, 552 V_CVT_PK_F32_BF8_e32, V_CVT_PK_F32_BF8_sdwa>; 553} 554 555let SubtargetPredicate = isGFX10Plus in { 556 defm V_PIPEFLUSH : VOP1Inst<"v_pipeflush", VOP_NO_EXT<VOP_NONE>>; 557 558 let Uses = [M0] in { 559 defm V_MOVRELSD_2_B32 : 560 VOP1Inst<"v_movrelsd_2_b32", VOP_MOVRELSD>; 561 562 def V_SWAPREL_B32 : VOP1_Pseudo<"v_swaprel_b32", VOP_SWAP_I32, [], 1> { 563 let Constraints = "$vdst = $src1, $vdst1 = $src0"; 564 let DisableEncoding = "$vdst1,$src1"; 565 let SchedRW = [Write64Bit, Write64Bit]; 566 } 567 } // End Uses = [M0] 568} // End SubtargetPredicate = isGFX10Plus 569 570def VOPProfileAccMov : VOP_NO_EXT<VOP_I32_I32> { 571 let DstRC = RegisterOperand<AGPR_32>; 572 let Src0RC32 = RegisterOperand<AGPR_32>; 573 let Asm32 = " $vdst, $src0"; 574} 575 576def V_ACCVGPR_MOV_B32 : VOP1_Pseudo<"v_accvgpr_mov_b32", VOPProfileAccMov, [], 1> { 577 let SubtargetPredicate = isGFX90APlus; 578 let isReMaterializable = 1; 579 let isAsCheapAsAMove = 1; 580} 581 582let SubtargetPredicate = isGFX11Plus in { 583 // Restrict src0 to be VGPR 584 def V_PERMLANE64_B32 : VOP1_Pseudo<"v_permlane64_b32", VOP_MOVRELS, 585 getVOP1Pat64<int_amdgcn_permlane64, 586 VOP_MOVRELS>.ret, 587 /*VOP1Only=*/ 1>; 588 defm V_NOT_B16 : VOP1Inst<"v_not_b16", VOP_I16_I16>; 589 defm V_CVT_I32_I16 : VOP1Inst<"v_cvt_i32_i16", VOP_I32_I16>; 590 defm V_CVT_U32_U16 : VOP1Inst<"v_cvt_u32_u16", VOP_I16_I16>; 591} // End SubtargetPredicate = isGFX11Plus 592 593//===----------------------------------------------------------------------===// 594// Target-specific instruction encodings. 595//===----------------------------------------------------------------------===// 596 597class VOP1_DPP<bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP16 = 0> : 598 VOP_DPP<ps.OpName, p, isDPP16> { 599 let hasSideEffects = ps.hasSideEffects; 600 let Defs = ps.Defs; 601 let SchedRW = ps.SchedRW; 602 let Uses = ps.Uses; 603 let TRANS = ps.TRANS; 604 605 bits<8> vdst; 606 let Inst{8-0} = 0xfa; 607 let Inst{16-9} = op; 608 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 609 let Inst{31-25} = 0x3f; 610} 611 612class VOP1_DPP16<bits<8> op, VOP1_DPP_Pseudo ps, int subtarget, VOPProfile p = ps.Pfl> : 613 VOP1_DPP<op, ps, p, 1>, 614 SIMCInstr <ps.PseudoInstr, subtarget> { 615 let AssemblerPredicate = HasDPP16; 616 let SubtargetPredicate = HasDPP16; 617} 618 619class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> : 620 VOP_DPP8<ps.OpName, p> { 621 let hasSideEffects = ps.hasSideEffects; 622 let Defs = ps.Defs; 623 let SchedRW = ps.SchedRW; 624 let Uses = ps.Uses; 625 626 bits<8> vdst; 627 let Inst{8-0} = fi; 628 let Inst{16-9} = op; 629 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 630 let Inst{31-25} = 0x3f; 631} 632 633//===----------------------------------------------------------------------===// 634// GFX11. 635//===----------------------------------------------------------------------===// 636 637let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in { 638 multiclass VOP1Only_Real_gfx11<bits<9> op> { 639 let IsSingle = 1 in 640 def _gfx11 : 641 VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.GFX11>, 642 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 643 } 644 multiclass VOP1_Real_e32_gfx11<bits<9> op, string opName = NAME> { 645 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 646 def _e32_gfx11 : 647 VOP1_Real<ps, SIEncodingFamily.GFX11>, 648 VOP1e<op{7-0}, ps.Pfl>; 649 } 650 multiclass VOP1_Real_e32_with_name_gfx11<bits<9> op, string opName, 651 string asmName> { 652 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 653 let AsmString = asmName # ps.AsmOperands in { 654 defm NAME : VOP1_Real_e32_gfx11<op, opName>, 655 MnemonicAlias<ps.Mnemonic, asmName>, Requires<[isGFX11Plus]>; 656 } 657 } 658 multiclass VOP1_Real_e64_gfx11<bits<9> op> { 659 def _e64_gfx11 : 660 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX11>, 661 VOP3e_gfx11<{0, 1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 662 } 663 multiclass VOP1_Real_dpp_gfx11<bits<9> op, string opName = NAME> { 664 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 665 def _dpp_gfx11 : VOP1_DPP16<op{7-0}, !cast<VOP1_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX11> { 666 let DecoderNamespace = "DPPGFX11"; 667 } 668 } 669 multiclass VOP1_Real_dpp_with_name_gfx11<bits<9> op, string opName, 670 string asmName> { 671 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 672 let AsmString = asmName # ps.Pfl.AsmDPP16, DecoderNamespace = "DPPGFX11" in { 673 defm NAME : VOP1_Real_dpp_gfx11<op, opName>, 674 MnemonicAlias<ps.Mnemonic, asmName>, Requires<[isGFX11Plus]>; 675 } 676 } 677 multiclass VOP1_Real_dpp8_gfx11<bits<9> op, string opName = NAME> { 678 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 679 def _dpp8_gfx11 : VOP1_DPP8<op{7-0}, ps> { 680 let DecoderNamespace = "DPP8GFX11"; 681 } 682 } 683 multiclass VOP1_Real_dpp8_with_name_gfx11<bits<9> op, string opName, 684 string asmName> { 685 defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); 686 let AsmString = asmName # ps.Pfl.AsmDPP8, DecoderNamespace = "DPP8GFX11" in { 687 defm NAME : VOP1_Real_dpp8_gfx11<op, opName>, 688 MnemonicAlias<ps.Mnemonic, asmName>, Requires<[isGFX11Plus]>; 689 } 690 } 691} // End AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" 692 693multiclass VOP1_Realtriple_e64_gfx11<bits<9> op> { 694 defm NAME : VOP3_Realtriple_gfx11<{0, 1, 1, op{6-0}}, /*isSingle=*/ 0, NAME>; 695} 696multiclass VOP1_Realtriple_e64_with_name_gfx11<bits<9> op, string opName, 697 string asmName> { 698 defm NAME : VOP3_Realtriple_with_name_gfx11<{0, 1, 1, op{6-0}}, opName, 699 asmName>; 700} 701 702multiclass VOP1_Real_FULL_gfx11<bits<9> op> : 703 VOP1_Real_e32_gfx11<op>, VOP1_Realtriple_e64_gfx11<op>, 704 VOP1_Real_dpp_gfx11<op>, VOP1_Real_dpp8_gfx11<op>; 705 706multiclass VOP1_Real_NO_VOP3_with_name_gfx11<bits<9> op, string opName, 707 string asmName> : 708 VOP1_Real_e32_with_name_gfx11<op, opName, asmName>, 709 VOP1_Real_dpp_with_name_gfx11<op, opName, asmName>, 710 VOP1_Real_dpp8_with_name_gfx11<op, opName, asmName>; 711 712multiclass VOP1_Real_FULL_with_name_gfx11<bits<9> op, string opName, 713 string asmName> : 714 VOP1_Real_NO_VOP3_with_name_gfx11<op, opName, asmName>, 715 VOP1_Realtriple_e64_with_name_gfx11<op, opName, asmName>; 716 717multiclass VOP1_Real_NO_DPP_gfx11<bits<9> op> : 718 VOP1_Real_e32_gfx11<op>, VOP1_Real_e64_gfx11<op>; 719 720defm V_CVT_NEAREST_I32_F32 : VOP1_Real_FULL_with_name_gfx11<0x00c, 721 "V_CVT_RPI_I32_F32", "v_cvt_nearest_i32_f32">; 722defm V_CVT_FLOOR_I32_F32 : VOP1_Real_FULL_with_name_gfx11<0x00d, 723 "V_CVT_FLR_I32_F32", "v_cvt_floor_i32_f32">; 724defm V_CLZ_I32_U32 : VOP1_Real_FULL_with_name_gfx11<0x039, 725 "V_FFBH_U32", "v_clz_i32_u32">; 726defm V_CTZ_I32_B32 : VOP1_Real_FULL_with_name_gfx11<0x03a, 727 "V_FFBL_B32", "v_ctz_i32_b32">; 728defm V_CLS_I32 : VOP1_Real_FULL_with_name_gfx11<0x03b, 729 "V_FFBH_I32", "v_cls_i32">; 730defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11<0x067>; 731defm V_NOT_B16 : VOP1_Real_FULL_gfx11<0x069>; 732defm V_CVT_I32_I16 : VOP1_Real_FULL_gfx11<0x06a>; 733defm V_CVT_U32_U16 : VOP1_Real_FULL_gfx11<0x06b>; 734 735//===----------------------------------------------------------------------===// 736// GFX10. 737//===----------------------------------------------------------------------===// 738 739let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { 740 multiclass VOP1Only_Real_gfx10<bits<9> op> { 741 def _gfx10 : 742 VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.GFX10>, 743 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 744 } 745 multiclass VOP1_Real_e32_gfx10<bits<9> op> { 746 def _e32_gfx10 : 747 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>, 748 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 749 } 750 multiclass VOP1_Real_e64_gfx10<bits<9> op> { 751 def _e64_gfx10 : 752 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 753 VOP3e_gfx10<{0, 1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 754 } 755 multiclass VOP1_Real_sdwa_gfx10<bits<9> op> { 756 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 757 def _sdwa_gfx10 : 758 VOP_SDWA10_Real<!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 759 VOP1_SDWA9Ae<op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 760 let DecoderNamespace = "SDWA10"; 761 } 762 } 763 multiclass VOP1_Real_dpp_gfx10<bits<9> op> { 764 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP>.ret in 765 def _dpp_gfx10 : VOP1_DPP16<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX10> { 766 let DecoderNamespace = "SDWA10"; 767 } 768 } 769 multiclass VOP1_Real_dpp8_gfx10<bits<9> op> { 770 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP>.ret in 771 def _dpp8_gfx10 : VOP1_DPP8<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")> { 772 let DecoderNamespace = "DPP8"; 773 } 774 } 775} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" 776 777multiclass VOP1_Real_gfx10<bits<9> op> : 778 VOP1_Real_e32_gfx10<op>, VOP1_Real_e64_gfx10<op>, 779 VOP1_Real_sdwa_gfx10<op>, VOP1_Real_dpp_gfx10<op>, 780 VOP1_Real_dpp8_gfx10<op>; 781 782multiclass VOP1_Real_gfx10_FULL_gfx11<bits<9> op> : 783 VOP1_Real_gfx10<op>, VOP1_Real_FULL_gfx11<op>; 784 785multiclass VOP1_Real_gfx10_NO_DPP_gfx11<bits<9> op> : 786 VOP1_Real_gfx10<op>, VOP1_Real_NO_DPP_gfx11<op>; 787 788multiclass VOP1Only_Real_gfx10_gfx11<bits<9> op> : 789 VOP1Only_Real_gfx10<op>, VOP1Only_Real_gfx11<op>; 790 791defm V_PIPEFLUSH : VOP1_Real_gfx10_NO_DPP_gfx11<0x01b>; 792defm V_MOVRELSD_2_B32 : VOP1_Real_gfx10_FULL_gfx11<0x048>; 793defm V_CVT_F16_U16 : VOP1_Real_gfx10_FULL_gfx11<0x050>; 794defm V_CVT_F16_I16 : VOP1_Real_gfx10_FULL_gfx11<0x051>; 795defm V_CVT_U16_F16 : VOP1_Real_gfx10_FULL_gfx11<0x052>; 796defm V_CVT_I16_F16 : VOP1_Real_gfx10_FULL_gfx11<0x053>; 797defm V_RCP_F16 : VOP1_Real_gfx10_FULL_gfx11<0x054>; 798defm V_SQRT_F16 : VOP1_Real_gfx10_FULL_gfx11<0x055>; 799defm V_RSQ_F16 : VOP1_Real_gfx10_FULL_gfx11<0x056>; 800defm V_LOG_F16 : VOP1_Real_gfx10_FULL_gfx11<0x057>; 801defm V_EXP_F16 : VOP1_Real_gfx10_FULL_gfx11<0x058>; 802defm V_FREXP_MANT_F16 : VOP1_Real_gfx10_FULL_gfx11<0x059>; 803defm V_FREXP_EXP_I16_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05a>; 804defm V_FLOOR_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05b>; 805defm V_CEIL_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05c>; 806defm V_TRUNC_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05d>; 807defm V_RNDNE_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05e>; 808defm V_FRACT_F16 : VOP1_Real_gfx10_FULL_gfx11<0x05f>; 809defm V_SIN_F16 : VOP1_Real_gfx10_FULL_gfx11<0x060>; 810defm V_COS_F16 : VOP1_Real_gfx10_FULL_gfx11<0x061>; 811defm V_SAT_PK_U8_I16 : VOP1_Real_gfx10_FULL_gfx11<0x062>; 812defm V_CVT_NORM_I16_F16 : VOP1_Real_gfx10_FULL_gfx11<0x063>; 813defm V_CVT_NORM_U16_F16 : VOP1_Real_gfx10_FULL_gfx11<0x064>; 814 815defm V_SWAP_B32 : VOP1Only_Real_gfx10_gfx11<0x065>; 816defm V_SWAPREL_B32 : VOP1Only_Real_gfx10_gfx11<0x068>; 817 818//===----------------------------------------------------------------------===// 819// GFX7, GFX10. 820//===----------------------------------------------------------------------===// 821 822let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in { 823 multiclass VOP1_Real_e32_gfx7<bits<9> op> { 824 def _e32_gfx7 : 825 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>, 826 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 827 } 828 multiclass VOP1_Real_e64_gfx7<bits<9> op> { 829 def _e64_gfx7 : 830 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 831 VOP3e_gfx6_gfx7<{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 832 } 833} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" 834 835multiclass VOP1_Real_gfx7<bits<9> op> : 836 VOP1_Real_e32_gfx7<op>, VOP1_Real_e64_gfx7<op>; 837 838multiclass VOP1_Real_gfx7_gfx10<bits<9> op> : 839 VOP1_Real_gfx7<op>, VOP1_Real_gfx10<op>; 840 841multiclass VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<bits<9> op> : 842 VOP1_Real_gfx7_gfx10<op>, VOP1_Real_NO_DPP_gfx11<op>; 843 844defm V_LOG_LEGACY_F32 : VOP1_Real_gfx7<0x045>; 845defm V_EXP_LEGACY_F32 : VOP1_Real_gfx7<0x046>; 846 847defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<0x017>; 848defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<0x018>; 849defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<0x019>; 850defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10_NO_DPP_gfx11<0x01a>; 851 852//===----------------------------------------------------------------------===// 853// GFX6, GFX7, GFX10, GFX11. 854//===----------------------------------------------------------------------===// 855 856let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 857 multiclass VOP1_Real_e32_gfx6_gfx7<bits<9> op> { 858 def _e32_gfx6_gfx7 : 859 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>, 860 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 861 } 862 multiclass VOP1_Real_e64_gfx6_gfx7<bits<9> op> { 863 def _e64_gfx6_gfx7 : 864 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 865 VOP3e_gfx6_gfx7<{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 866 } 867} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 868 869multiclass VOP1_Real_gfx6_gfx7<bits<9> op> : 870 VOP1_Real_e32_gfx6_gfx7<op>, VOP1_Real_e64_gfx6_gfx7<op>; 871 872multiclass VOP1_Real_gfx6_gfx7_gfx10<bits<9> op> : 873 VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10<op>; 874 875multiclass VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<bits<9> op> : 876 VOP1_Real_gfx6_gfx7_gfx10<op>, VOP1_Real_FULL_gfx11<op>; 877 878multiclass VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<bits<9> op> : 879 VOP1_Real_gfx6_gfx7_gfx10<op>, VOP1_Real_NO_DPP_gfx11<op>; 880 881defm V_LOG_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x026>; 882defm V_RCP_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x028>; 883defm V_RCP_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x029>; 884defm V_RSQ_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x02c>; 885defm V_RSQ_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x02d>; 886defm V_RCP_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x030>; 887defm V_RSQ_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x032>; 888 889defm V_NOP : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x000>; 890defm V_MOV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x001>; 891defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x003>; 892defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x004>; 893defm V_CVT_F32_I32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x005>; 894defm V_CVT_F32_U32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x006>; 895defm V_CVT_U32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x007>; 896defm V_CVT_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x008>; 897defm V_CVT_F16_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x00a>; 898defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x00b>; 899defm V_CVT_RPI_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00c>; 900defm V_CVT_FLR_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00d>; 901defm V_CVT_OFF_F32_I4 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x00e>; 902defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x00f>; 903defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x010>; 904defm V_CVT_F32_UBYTE0 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x011>; 905defm V_CVT_F32_UBYTE1 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x012>; 906defm V_CVT_F32_UBYTE2 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x013>; 907defm V_CVT_F32_UBYTE3 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x014>; 908defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x015>; 909defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x016>; 910defm V_FRACT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x020>; 911defm V_TRUNC_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x021>; 912defm V_CEIL_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x022>; 913defm V_RNDNE_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x023>; 914defm V_FLOOR_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x024>; 915defm V_EXP_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x025>; 916defm V_LOG_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x027>; 917defm V_RCP_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x02a>; 918defm V_RCP_IFLAG_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x02b>; 919defm V_RSQ_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x02e>; 920defm V_RCP_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x02f>; 921defm V_RSQ_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x031>; 922defm V_SQRT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x033>; 923defm V_SQRT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x034>; 924defm V_SIN_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x035>; 925defm V_COS_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x036>; 926defm V_NOT_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x037>; 927defm V_BFREV_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x038>; 928defm V_FFBH_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x039>; 929defm V_FFBL_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x03a>; 930defm V_FFBH_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x03b>; 931defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x03c>; 932defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x03d>; 933defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10_NO_DPP_gfx11<0x03e>; 934defm V_FREXP_EXP_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x03f>; 935defm V_FREXP_MANT_F32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x040>; 936defm V_CLREXCP : VOP1_Real_gfx6_gfx7_gfx10<0x041>; 937defm V_MOVRELD_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x042>; 938defm V_MOVRELS_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x043>; 939defm V_MOVRELSD_B32 : VOP1_Real_gfx6_gfx7_gfx10_FULL_gfx11<0x044>; 940 941//===----------------------------------------------------------------------===// 942// GFX8, GFX9 (VI). 943//===----------------------------------------------------------------------===// 944 945class VOP1_DPPe <bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile P = ps.Pfl> : 946 VOP_DPPe <P> { 947 bits<8> vdst; 948 let Inst{8-0} = 0xfa; // dpp 949 let Inst{16-9} = op; 950 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 951 let Inst{31-25} = 0x3f; //encoding 952} 953 954multiclass VOP1Only_Real_vi <bits<10> op> { 955 let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 956 def _vi : 957 VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.VI>, 958 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 959 } 960} 961 962multiclass VOP1_Real_e32e64_vi <bits<10> op> { 963 let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 964 def _e32_vi : 965 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>, 966 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 967 def _e64_vi : 968 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 969 VOP3e_vi <!add(0x140, op), !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 970 } 971} 972 973multiclass VOP1_Real_vi <bits<10> op> { 974 defm NAME : VOP1_Real_e32e64_vi <op>; 975 976 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA>.ret in 977 def _sdwa_vi : 978 VOP_SDWA_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 979 VOP1_SDWAe <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 980 981 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 982 def _sdwa_gfx9 : 983 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 984 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 985 986 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 987 def _dpp_vi : 988 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>, 989 VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; 990} 991 992defm V_NOP : VOP1_Real_vi <0x0>; 993defm V_MOV_B32 : VOP1_Real_vi <0x1>; 994defm V_CVT_I32_F64 : VOP1_Real_vi <0x3>; 995defm V_CVT_F64_I32 : VOP1_Real_vi <0x4>; 996defm V_CVT_F32_I32 : VOP1_Real_vi <0x5>; 997defm V_CVT_F32_U32 : VOP1_Real_vi <0x6>; 998defm V_CVT_U32_F32 : VOP1_Real_vi <0x7>; 999defm V_CVT_I32_F32 : VOP1_Real_vi <0x8>; 1000defm V_CVT_F16_F32 : VOP1_Real_vi <0xa>; 1001defm V_CVT_F32_F16 : VOP1_Real_vi <0xb>; 1002defm V_CVT_RPI_I32_F32 : VOP1_Real_vi <0xc>; 1003defm V_CVT_FLR_I32_F32 : VOP1_Real_vi <0xd>; 1004defm V_CVT_OFF_F32_I4 : VOP1_Real_vi <0xe>; 1005defm V_CVT_F32_F64 : VOP1_Real_vi <0xf>; 1006defm V_CVT_F64_F32 : VOP1_Real_vi <0x10>; 1007defm V_CVT_F32_UBYTE0 : VOP1_Real_vi <0x11>; 1008defm V_CVT_F32_UBYTE1 : VOP1_Real_vi <0x12>; 1009defm V_CVT_F32_UBYTE2 : VOP1_Real_vi <0x13>; 1010defm V_CVT_F32_UBYTE3 : VOP1_Real_vi <0x14>; 1011defm V_CVT_U32_F64 : VOP1_Real_vi <0x15>; 1012defm V_CVT_F64_U32 : VOP1_Real_vi <0x16>; 1013defm V_FRACT_F32 : VOP1_Real_vi <0x1b>; 1014defm V_TRUNC_F32 : VOP1_Real_vi <0x1c>; 1015defm V_CEIL_F32 : VOP1_Real_vi <0x1d>; 1016defm V_RNDNE_F32 : VOP1_Real_vi <0x1e>; 1017defm V_FLOOR_F32 : VOP1_Real_vi <0x1f>; 1018defm V_EXP_F32 : VOP1_Real_vi <0x20>; 1019defm V_LOG_F32 : VOP1_Real_vi <0x21>; 1020defm V_RCP_F32 : VOP1_Real_vi <0x22>; 1021defm V_RCP_IFLAG_F32 : VOP1_Real_vi <0x23>; 1022defm V_RSQ_F32 : VOP1_Real_vi <0x24>; 1023defm V_RCP_F64 : VOP1_Real_vi <0x25>; 1024defm V_RSQ_F64 : VOP1_Real_vi <0x26>; 1025defm V_SQRT_F32 : VOP1_Real_vi <0x27>; 1026defm V_SQRT_F64 : VOP1_Real_vi <0x28>; 1027defm V_SIN_F32 : VOP1_Real_vi <0x29>; 1028defm V_COS_F32 : VOP1_Real_vi <0x2a>; 1029defm V_NOT_B32 : VOP1_Real_vi <0x2b>; 1030defm V_BFREV_B32 : VOP1_Real_vi <0x2c>; 1031defm V_FFBH_U32 : VOP1_Real_vi <0x2d>; 1032defm V_FFBL_B32 : VOP1_Real_vi <0x2e>; 1033defm V_FFBH_I32 : VOP1_Real_vi <0x2f>; 1034defm V_FREXP_EXP_I32_F64 : VOP1_Real_vi <0x30>; 1035defm V_FREXP_MANT_F64 : VOP1_Real_vi <0x31>; 1036defm V_FRACT_F64 : VOP1_Real_vi <0x32>; 1037defm V_FREXP_EXP_I32_F32 : VOP1_Real_vi <0x33>; 1038defm V_FREXP_MANT_F32 : VOP1_Real_vi <0x34>; 1039defm V_CLREXCP : VOP1_Real_vi <0x35>; 1040defm V_MOVRELD_B32 : VOP1_Real_e32e64_vi <0x36>; 1041defm V_MOVRELS_B32 : VOP1_Real_e32e64_vi <0x37>; 1042defm V_MOVRELSD_B32 : VOP1_Real_e32e64_vi <0x38>; 1043defm V_TRUNC_F64 : VOP1_Real_vi <0x17>; 1044defm V_CEIL_F64 : VOP1_Real_vi <0x18>; 1045defm V_FLOOR_F64 : VOP1_Real_vi <0x1A>; 1046defm V_RNDNE_F64 : VOP1_Real_vi <0x19>; 1047defm V_LOG_LEGACY_F32 : VOP1_Real_vi <0x4c>; 1048defm V_EXP_LEGACY_F32 : VOP1_Real_vi <0x4b>; 1049defm V_CVT_F16_U16 : VOP1_Real_vi <0x39>; 1050defm V_CVT_F16_I16 : VOP1_Real_vi <0x3a>; 1051defm V_CVT_U16_F16 : VOP1_Real_vi <0x3b>; 1052defm V_CVT_I16_F16 : VOP1_Real_vi <0x3c>; 1053defm V_RCP_F16 : VOP1_Real_vi <0x3d>; 1054defm V_SQRT_F16 : VOP1_Real_vi <0x3e>; 1055defm V_RSQ_F16 : VOP1_Real_vi <0x3f>; 1056defm V_LOG_F16 : VOP1_Real_vi <0x40>; 1057defm V_EXP_F16 : VOP1_Real_vi <0x41>; 1058defm V_FREXP_MANT_F16 : VOP1_Real_vi <0x42>; 1059defm V_FREXP_EXP_I16_F16 : VOP1_Real_vi <0x43>; 1060defm V_FLOOR_F16 : VOP1_Real_vi <0x44>; 1061defm V_CEIL_F16 : VOP1_Real_vi <0x45>; 1062defm V_TRUNC_F16 : VOP1_Real_vi <0x46>; 1063defm V_RNDNE_F16 : VOP1_Real_vi <0x47>; 1064defm V_FRACT_F16 : VOP1_Real_vi <0x48>; 1065defm V_SIN_F16 : VOP1_Real_vi <0x49>; 1066defm V_COS_F16 : VOP1_Real_vi <0x4a>; 1067defm V_SWAP_B32 : VOP1Only_Real_vi <0x51>; 1068 1069defm V_SAT_PK_U8_I16 : VOP1_Real_vi<0x4f>; 1070defm V_CVT_NORM_I16_F16 : VOP1_Real_vi<0x4d>; 1071defm V_CVT_NORM_U16_F16 : VOP1_Real_vi<0x4e>; 1072 1073defm V_ACCVGPR_MOV_B32 : VOP1Only_Real_vi<0x52>; 1074 1075let VOP1 = 1, SubtargetPredicate = isGFX8GFX9, Uses = [EXEC, M0] in { 1076 1077// Copy of v_mov_b32 with $vdst as a use operand for use with VGPR 1078// indexing mode. vdst can't be treated as a def for codegen purposes, 1079// and an implicit use and def of the super register should be added. 1080def V_MOV_B32_indirect_write : VPseudoInstSI<(outs), 1081 (ins getVALUDstForVT<i32>.ret:$vdst, getVOPSrc0ForVT<i32>.ret:$src0)>, 1082 PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT<i32>.ret:$vdst, 1083 getVOPSrc0ForVT<i32>.ret:$src0)>; 1084 1085// Copy of v_mov_b32 for use with VGPR indexing mode. An implicit use of the 1086// super register should be added. 1087def V_MOV_B32_indirect_read : VPseudoInstSI< 1088 (outs getVALUDstForVT<i32>.ret:$vdst), 1089 (ins getVOPSrc0ForVT<i32>.ret:$src0)>, 1090 PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT<i32>.ret:$vdst, 1091 getVOPSrc0ForVT<i32>.ret:$src0)>; 1092 1093} // End VOP1 = 1, SubtargetPredicate = isGFX8GFX9, Uses = [M0] 1094 1095let OtherPredicates = [isGFX8Plus] in { 1096 1097def : GCNPat < 1098 (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, 1099 timm:$bank_mask, timm:$bound_ctrl)), 1100 (V_MOV_B32_dpp VGPR_32:$src, VGPR_32:$src, (as_i32timm $dpp_ctrl), 1101 (as_i32timm $row_mask), (as_i32timm $bank_mask), 1102 (as_i1timm $bound_ctrl)) 1103>; 1104 1105def : GCNPat < 1106 (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl, 1107 timm:$row_mask, timm:$bank_mask, 1108 timm:$bound_ctrl)), 1109 (V_MOV_B32_dpp VGPR_32:$old, VGPR_32:$src, (as_i32timm $dpp_ctrl), 1110 (as_i32timm $row_mask), (as_i32timm $bank_mask), 1111 (as_i1timm $bound_ctrl)) 1112>; 1113 1114} // End OtherPredicates = [isGFX8Plus] 1115 1116let OtherPredicates = [isGFX8Plus] in { 1117def : GCNPat< 1118 (i32 (anyext i16:$src)), 1119 (COPY $src) 1120>; 1121 1122def : GCNPat< 1123 (i64 (anyext i16:$src)), 1124 (REG_SEQUENCE VReg_64, 1125 (i32 (COPY $src)), sub0, 1126 (V_MOV_B32_e32 (i32 0)), sub1) 1127>; 1128 1129def : GCNPat< 1130 (i16 (trunc i32:$src)), 1131 (COPY $src) 1132>; 1133 1134def : GCNPat < 1135 (i16 (trunc i64:$src)), 1136 (EXTRACT_SUBREG $src, sub0) 1137>; 1138 1139} // End OtherPredicates = [isGFX8Plus] 1140 1141//===----------------------------------------------------------------------===// 1142// GFX9 1143//===----------------------------------------------------------------------===// 1144 1145multiclass VOP1_Real_gfx9 <bits<10> op> { 1146 let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in { 1147 defm NAME : VOP1_Real_e32e64_vi <op>; 1148 } 1149 1150 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 1151 def _sdwa_gfx9 : 1152 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 1153 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1154 1155 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 1156 def _dpp_gfx9 : 1157 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, 1158 VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; 1159 1160} 1161 1162multiclass VOP1_Real_NoDstSel_SDWA_gfx9 <bits<10> op> { 1163 let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in { 1164 defm NAME : VOP1_Real_e32e64_vi <op>; 1165 } 1166 1167 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 1168 def _sdwa_gfx9 : 1169 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 1170 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 1171 let Inst{42-40} = 6; 1172 } 1173 1174 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 1175 def _dpp_gfx9 : 1176 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, 1177 VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; 1178} 1179 1180defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>; 1181 1182let AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX9" in 1183defm V_MOV_B64 : VOP1_Real_gfx9 <0x38>; 1184 1185let OtherPredicates = [HasFP8Insts] in { 1186defm V_CVT_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x54>; 1187defm V_CVT_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x55>; 1188defm V_CVT_PK_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x56>; 1189defm V_CVT_PK_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x57>; 1190} 1191 1192//===----------------------------------------------------------------------===// 1193// GFX10 1194//===----------------------------------------------------------------------===// 1195 1196let OtherPredicates = [isGFX10Only] in { 1197def : GCNPat < 1198 (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)), 1199 (V_MOV_B32_dpp8_gfx10 VGPR_32:$src, VGPR_32:$src, 1200 (as_i32timm $dpp8), (i32 DPP8Mode.FI_0)) 1201>; 1202} // End OtherPredicates = [isGFX10Only] 1203 1204//===----------------------------------------------------------------------===// 1205// GFX11 1206//===----------------------------------------------------------------------===// 1207 1208let OtherPredicates = [isGFX11Only] in { 1209def : GCNPat < 1210 (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)), 1211 (V_MOV_B32_dpp8_gfx11 VGPR_32:$src, VGPR_32:$src, 1212 (as_i32timm $dpp8), (i32 DPP8Mode.FI_0)) 1213>; 1214} // End OtherPredicates = [isGFX11Only] 1215