1//===-- VOP1Instructions.td - Vector Instruction Definitions --------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10// VOP1 Classes 11//===----------------------------------------------------------------------===// 12 13class VOP1e <bits<8> op, VOPProfile P> : Enc32 { 14 bits<8> vdst; 15 bits<9> src0; 16 17 let Inst{8-0} = !if(P.HasSrc0, src0{8-0}, ?); 18 let Inst{16-9} = op; 19 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 20 let Inst{31-25} = 0x3f; //encoding 21} 22 23class VOP1_SDWAe <bits<8> op, VOPProfile P> : VOP_SDWAe <P> { 24 bits<8> vdst; 25 26 let Inst{8-0} = 0xf9; // sdwa 27 let Inst{16-9} = op; 28 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 29 let Inst{31-25} = 0x3f; // encoding 30} 31 32class VOP1_SDWA9Ae <bits<8> op, VOPProfile P> : VOP_SDWA9Ae <P> { 33 bits<8> vdst; 34 35 let Inst{8-0} = 0xf9; // sdwa 36 let Inst{16-9} = op; 37 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 38 let Inst{31-25} = 0x3f; // encoding 39} 40 41class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP1Only = 0> : 42 VOP_Pseudo <opName, !if(VOP1Only, "", "_e32"), P, P.Outs32, P.Ins32, "", pattern> { 43 44 let AsmOperands = P.Asm32; 45 46 let Size = 4; 47 let mayLoad = 0; 48 let mayStore = 0; 49 let hasSideEffects = 0; 50 51 let ReadsModeReg = !or(isFloatType<P.DstVT>.ret, isFloatType<P.Src0VT>.ret); 52 53 let mayRaiseFPException = ReadsModeReg; 54 55 let VOP1 = 1; 56 let VALU = 1; 57 let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]); 58 59 let AsmVariantName = AMDGPUAsmVariants.Default; 60} 61 62class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily> : 63 InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>, 64 SIMCInstr <ps.PseudoInstr, EncodingFamily> { 65 66 let isPseudo = 0; 67 let isCodeGenOnly = 0; 68 69 let Constraints = ps.Constraints; 70 let DisableEncoding = ps.DisableEncoding; 71 72 // copy relevant pseudo op flags 73 let SubtargetPredicate = ps.SubtargetPredicate; 74 let AsmMatchConverter = ps.AsmMatchConverter; 75 let AsmVariantName = ps.AsmVariantName; 76 let Constraints = ps.Constraints; 77 let DisableEncoding = ps.DisableEncoding; 78 let TSFlags = ps.TSFlags; 79 let UseNamedOperandTable = ps.UseNamedOperandTable; 80 let Uses = ps.Uses; 81 let Defs = ps.Defs; 82} 83 84class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 85 VOP_SDWA_Pseudo <OpName, P, pattern> { 86 let AsmMatchConverter = "cvtSdwaVOP1"; 87} 88 89class VOP1_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 90 VOP_DPP_Pseudo <OpName, P, pattern> { 91} 92 93class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { 94 list<dag> ret = 95 !if(P.HasModifiers, 96 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods P.Src0VT:$src0, i32:$src0_modifiers))))], 97 !if(P.HasOMod, 98 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3OMods P.Src0VT:$src0, 99 i1:$clamp, i32:$omod))))], 100 [(set P.DstVT:$vdst, (node P.Src0VT:$src0))] 101 ) 102 ); 103} 104 105multiclass VOP1Inst <string opName, VOPProfile P, 106 SDPatternOperator node = null_frag> { 107 // We only want to set this on the basic, non-SDWA or DPP forms. 108 defvar should_mov_imm = !eq(opName, "v_mov_b32"); 109 110 let isMoveImm = should_mov_imm in { 111 def _e32 : VOP1_Pseudo <opName, P>; 112 def _e64 : VOP3_Pseudo <opName, P, getVOP1Pat64<node, P>.ret>; 113 } 114 115 foreach _ = BoolToList<P.HasExtSDWA>.ret in 116 def _sdwa : VOP1_SDWA_Pseudo <opName, P>; 117 118 foreach _ = BoolToList<P.HasExtDPP>.ret in 119 def _dpp : VOP1_DPP_Pseudo <opName, P>; 120 121 def : MnemonicAlias<opName#"_e32", opName>, LetDummies; 122 def : MnemonicAlias<opName#"_e64", opName>, LetDummies; 123 124 foreach _ = BoolToList<P.HasExtSDWA>.ret in 125 def : MnemonicAlias<opName#"_sdwa", opName>, LetDummies; 126 127 foreach _ = BoolToList<P.HasExtDPP>.ret in 128 def : MnemonicAlias<opName#"_dpp", opName>, LetDummies; 129} 130 131// Special profile for instructions which have clamp 132// and output modifiers (but have no input modifiers) 133class VOPProfileI2F<ValueType dstVt, ValueType srcVt> : 134 VOPProfile<[dstVt, srcVt, untyped, untyped]> { 135 136 let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod); 137 let Asm64 = "$vdst, $src0$clamp$omod"; 138 139 let HasModifiers = 0; 140 let HasClamp = 1; 141 let HasOMod = 1; 142} 143 144def VOP1_F64_I32 : VOPProfileI2F <f64, i32>; 145def VOP1_F32_I32 : VOPProfileI2F <f32, i32>; 146def VOP1_F16_I16 : VOPProfileI2F <f16, i16>; 147 148//===----------------------------------------------------------------------===// 149// VOP1 Instructions 150//===----------------------------------------------------------------------===// 151 152let VOPAsmPrefer32Bit = 1 in { 153defm V_NOP : VOP1Inst <"v_nop", VOP_NONE>; 154} 155 156let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 157defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOP_I32_I32>; 158} // End isMoveImm = 1 159 160// FIXME: Specify SchedRW for READFIRSTLANE_B32 161// TODO: Make profile for this, there is VOP3 encoding also 162def V_READFIRSTLANE_B32 : 163 InstSI <(outs SReg_32:$vdst), 164 (ins VRegOrLds_32:$src0), 165 "v_readfirstlane_b32 $vdst, $src0", 166 [(set i32:$vdst, (int_amdgcn_readfirstlane (i32 VRegOrLds_32:$src0)))]>, 167 Enc32 { 168 169 let isCodeGenOnly = 0; 170 let UseNamedOperandTable = 1; 171 172 let Size = 4; 173 let mayLoad = 0; 174 let mayStore = 0; 175 let hasSideEffects = 0; 176 177 let VOP1 = 1; 178 let VALU = 1; 179 let Uses = [EXEC]; 180 let isConvergent = 1; 181 182 bits<8> vdst; 183 bits<9> src0; 184 185 let Inst{8-0} = src0; 186 let Inst{16-9} = 0x2; 187 let Inst{24-17} = vdst; 188 let Inst{31-25} = 0x3f; //encoding 189} 190 191let SchedRW = [WriteDoubleCvt] in { 192defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>; 193 194let mayRaiseFPException = 0 in { 195defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>; 196} 197 198defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>; 199defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>; 200defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>; 201 202let mayRaiseFPException = 0 in { 203defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>; 204} 205 206} // End SchedRW = [WriteDoubleCvt] 207 208let SchedRW = [WriteFloatCvt] in { 209 210// XXX: Does this really not raise exceptions? The manual claims the 211// 16-bit ones can. 212let mayRaiseFPException = 0 in { 213defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>; 214defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>; 215} 216 217defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>; 218defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>; 219let FPDPRounding = 1 in { 220defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>; 221} // End FPDPRounding = 1 222 223defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>; 224 225let ReadsModeReg = 0, mayRaiseFPException = 0 in { 226defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>; 227defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>; 228defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>; 229} // End ReadsModeReg = 0, mayRaiseFPException = 0 230} // End SchedRW = [WriteFloatCvt] 231 232let ReadsModeReg = 0, mayRaiseFPException = 0 in { 233defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>; 234defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>; 235defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP1_F32_I32, AMDGPUcvt_f32_ubyte2>; 236defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f32_ubyte3>; 237} // ReadsModeReg = 0, mayRaiseFPException = 0 238 239defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>; 240defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>; 241defm V_CEIL_F32 : VOP1Inst <"v_ceil_f32", VOP_F32_F32, fceil>; 242defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, frint>; 243defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>; 244 245let SchedRW = [WriteTrans32] in { 246defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, fexp2>; 247defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, flog2>; 248defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>; 249defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32, AMDGPUrcp_iflag>; 250defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>; 251defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, any_amdgcn_sqrt>; 252} // End SchedRW = [WriteTrans32] 253 254let SchedRW = [WriteTrans64] in { 255defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64, AMDGPUrcp>; 256defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64, AMDGPUrsq>; 257defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, any_amdgcn_sqrt>; 258} // End SchedRW = [WriteTrans64] 259 260let SchedRW = [WriteTrans32] in { 261defm V_SIN_F32 : VOP1Inst <"v_sin_f32", VOP_F32_F32, AMDGPUsin>; 262defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>; 263} // End SchedRW = [WriteTrans32] 264 265defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>; 266defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32, bitreverse>; 267defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32, AMDGPUffbh_u32>; 268defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32, AMDGPUffbl_b32>; 269defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32, AMDGPUffbh_i32>; 270 271let SchedRW = [WriteDoubleAdd] in { 272defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64, int_amdgcn_frexp_exp>; 273defm V_FREXP_MANT_F64 : VOP1Inst <"v_frexp_mant_f64", VOP_F64_F64, int_amdgcn_frexp_mant>; 274let FPDPRounding = 1 in { 275defm V_FRACT_F64 : VOP1Inst <"v_fract_f64", VOP_F64_F64, AMDGPUfract>; 276} // End FPDPRounding = 1 277} // End SchedRW = [WriteDoubleAdd] 278 279defm V_FREXP_EXP_I32_F32 : VOP1Inst <"v_frexp_exp_i32_f32", VOP_I32_F32, int_amdgcn_frexp_exp>; 280defm V_FREXP_MANT_F32 : VOP1Inst <"v_frexp_mant_f32", VOP_F32_F32, int_amdgcn_frexp_mant>; 281 282let VOPAsmPrefer32Bit = 1 in { 283defm V_CLREXCP : VOP1Inst <"v_clrexcp", VOP_NO_EXT<VOP_NONE>>; 284} 285 286// Restrict src0 to be VGPR 287def VOP_MOVRELS : VOPProfile<[i32, i32, untyped, untyped]> { 288 let Src0RC32 = VRegSrc_32; 289 let Src0RC64 = VRegSrc_32; 290} 291 292// Special case because there are no true output operands. Hack vdst 293// to be a src operand. The custom inserter must add a tied implicit 294// def and use of the super register since there seems to be no way to 295// add an implicit def of a virtual register in tablegen. 296class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, untyped]> { 297 let Src0RC32 = VOPDstOperand<VGPR_32>; 298 let Src0RC64 = VOPDstOperand<VGPR_32>; 299 300 let Outs = (outs); 301 let Ins32 = (ins Src0RC32:$vdst, Src1RC:$src0); 302 let Ins64 = (ins Src0RC64:$vdst, Src1RC:$src0); 303 let Asm32 = getAsm32<1, 1>.ret; 304 let Asm64 = getAsm64<1, 1, 0, 0, 1>.ret; 305 306 let OutsSDWA = (outs Src0RC32:$vdst); 307 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 308 clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, 309 src0_sel:$src0_sel); 310 let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret; 311 312 let OutsDPP = (outs Src0RC32:$vdst); 313 let InsDPP16 = (ins Src0RC32:$old, Src0RC32:$src0, 314 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 315 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl, FI:$fi); 316 let AsmDPP16 = getAsmDPP16<1, 1, 0>.ret; 317 318 let OutsDPP8 = (outs Src0RC32:$vdst); 319 let InsDPP8 = (ins Src0RC32:$old, Src0RC32:$src0, dpp8:$dpp8, FI:$fi); 320 let AsmDPP8 = getAsmDPP8<1, 1, 0>.ret; 321 322 let HasDst = 0; 323 let EmitDst = 1; // force vdst emission 324} 325 326def VOP_MOVRELD : VOP_MOVREL<VSrc_b32>; 327def VOP_MOVRELSD : VOP_MOVREL<VRegSrc_32>; 328 329let SubtargetPredicate = HasMovrel, Uses = [M0, EXEC] in { 330 // v_movreld_b32 is a special case because the destination output 331 // register is really a source. It isn't actually read (but may be 332 // written), and is only to provide the base register to start 333 // indexing from. Tablegen seems to not let you define an implicit 334 // virtual register output for the super register being written into, 335 // so this must have an implicit def of the register added to it. 336defm V_MOVRELD_B32 : VOP1Inst <"v_movreld_b32", VOP_MOVRELD>; 337defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_MOVRELS>; 338defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_MOVRELSD>; 339} // End Uses = [M0, EXEC] 340 341defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>; 342 343let SubtargetPredicate = isGFX6GFX7 in { 344 let SchedRW = [WriteTrans32] in { 345 defm V_LOG_CLAMP_F32 : 346 VOP1Inst<"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>; 347 defm V_RCP_CLAMP_F32 : 348 VOP1Inst<"v_rcp_clamp_f32", VOP_F32_F32>; 349 defm V_RCP_LEGACY_F32 : 350 VOP1Inst<"v_rcp_legacy_f32", VOP_F32_F32, AMDGPUrcp_legacy>; 351 defm V_RSQ_CLAMP_F32 : 352 VOP1Inst<"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>; 353 defm V_RSQ_LEGACY_F32 : 354 VOP1Inst<"v_rsq_legacy_f32", VOP_F32_F32, int_amdgcn_rsq_legacy>; 355 } // End SchedRW = [WriteTrans32] 356 357 let SchedRW = [WriteDouble] in { 358 defm V_RCP_CLAMP_F64 : 359 VOP1Inst<"v_rcp_clamp_f64", VOP_F64_F64>; 360 defm V_RSQ_CLAMP_F64 : 361 VOP1Inst<"v_rsq_clamp_f64", VOP_F64_F64, AMDGPUrsq_clamp>; 362 } // End SchedRW = [WriteDouble] 363} // End SubtargetPredicate = isGFX6GFX7 364 365let SubtargetPredicate = isGFX7GFX8GFX9 in { 366 let SchedRW = [WriteTrans32] in { 367 defm V_LOG_LEGACY_F32 : VOP1Inst<"v_log_legacy_f32", VOP_F32_F32>; 368 defm V_EXP_LEGACY_F32 : VOP1Inst<"v_exp_legacy_f32", VOP_F32_F32>; 369 } // End SchedRW = [WriteTrans32] 370} // End SubtargetPredicate = isGFX7GFX8GFX9 371 372let SubtargetPredicate = isGFX7Plus in { 373 let SchedRW = [WriteDoubleAdd] in { 374 defm V_TRUNC_F64 : VOP1Inst<"v_trunc_f64", VOP_F64_F64, ftrunc>; 375 defm V_CEIL_F64 : VOP1Inst<"v_ceil_f64", VOP_F64_F64, fceil>; 376 defm V_RNDNE_F64 : VOP1Inst<"v_rndne_f64", VOP_F64_F64, frint>; 377 defm V_FLOOR_F64 : VOP1Inst<"v_floor_f64", VOP_F64_F64, ffloor>; 378 } // End SchedRW = [WriteDoubleAdd] 379} // End SubtargetPredicate = isGFX7Plus 380 381let SubtargetPredicate = Has16BitInsts in { 382 383let FPDPRounding = 1 in { 384defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>; 385defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>; 386} // End FPDPRounding = 1 387defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16, fp_to_uint>; 388defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16, fp_to_sint>; 389let SchedRW = [WriteTrans32] in { 390defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>; 391defm V_SQRT_F16 : VOP1Inst <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>; 392defm V_RSQ_F16 : VOP1Inst <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>; 393defm V_LOG_F16 : VOP1Inst <"v_log_f16", VOP_F16_F16, flog2>; 394defm V_EXP_F16 : VOP1Inst <"v_exp_f16", VOP_F16_F16, fexp2>; 395defm V_SIN_F16 : VOP1Inst <"v_sin_f16", VOP_F16_F16, AMDGPUsin>; 396defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; 397} // End SchedRW = [WriteTrans32] 398defm V_FREXP_MANT_F16 : VOP1Inst <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>; 399defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16, int_amdgcn_frexp_exp>; 400defm V_FLOOR_F16 : VOP1Inst <"v_floor_f16", VOP_F16_F16, ffloor>; 401defm V_CEIL_F16 : VOP1Inst <"v_ceil_f16", VOP_F16_F16, fceil>; 402defm V_TRUNC_F16 : VOP1Inst <"v_trunc_f16", VOP_F16_F16, ftrunc>; 403defm V_RNDNE_F16 : VOP1Inst <"v_rndne_f16", VOP_F16_F16, frint>; 404let FPDPRounding = 1 in { 405defm V_FRACT_F16 : VOP1Inst <"v_fract_f16", VOP_F16_F16, AMDGPUfract>; 406} // End FPDPRounding = 1 407 408} 409 410let OtherPredicates = [Has16BitInsts] in { 411 412def : GCNPat< 413 (f32 (f16_to_fp i16:$src)), 414 (V_CVT_F32_F16_e32 $src) 415>; 416 417def : GCNPat< 418 (i16 (AMDGPUfp_to_f16 f32:$src)), 419 (V_CVT_F16_F32_e32 $src) 420>; 421 422} 423 424def VOP_SWAP_I32 : VOPProfile<[i32, i32, i32, untyped]> { 425 let Outs32 = (outs VGPR_32:$vdst, VGPR_32:$vdst1); 426 let Ins32 = (ins VGPR_32:$src0, VGPR_32:$src1); 427 let Outs64 = Outs32; 428 let Asm32 = " $vdst, $src0"; 429 let Asm64 = ""; 430 let Ins64 = (ins); 431} 432 433let SubtargetPredicate = isGFX9Plus in { 434 def V_SWAP_B32 : VOP1_Pseudo<"v_swap_b32", VOP_SWAP_I32, [], 1> { 435 let Constraints = "$vdst = $src1, $vdst1 = $src0"; 436 let DisableEncoding = "$vdst1,$src1"; 437 let SchedRW = [Write64Bit, Write64Bit]; 438 } 439 440 defm V_SAT_PK_U8_I16 : VOP1Inst<"v_sat_pk_u8_i16", VOP_I32_I32>; 441 442 let mayRaiseFPException = 0 in { 443 defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16>; 444 defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16>; 445 } // End mayRaiseFPException = 0 446} // End SubtargetPredicate = isGFX9Plus 447 448let SubtargetPredicate = isGFX9Only in { 449 defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>; 450} // End SubtargetPredicate = isGFX9Only 451 452let SubtargetPredicate = isGFX10Plus in { 453 defm V_PIPEFLUSH : VOP1Inst<"v_pipeflush", VOP_NONE>; 454 455 let Uses = [M0] in { 456 defm V_MOVRELSD_2_B32 : 457 VOP1Inst<"v_movrelsd_2_b32", VOP_MOVRELSD>; 458 459 def V_SWAPREL_B32 : VOP1_Pseudo<"v_swaprel_b32", VOP_SWAP_I32, [], 1> { 460 let Constraints = "$vdst = $src1, $vdst1 = $src0"; 461 let DisableEncoding = "$vdst1,$src1"; 462 let SchedRW = [Write64Bit, Write64Bit]; 463 } 464 } // End Uses = [M0] 465} // End SubtargetPredicate = isGFX10Plus 466 467//===----------------------------------------------------------------------===// 468// Target-specific instruction encodings. 469//===----------------------------------------------------------------------===// 470 471class VOP1_DPP<bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP16 = 0> : 472 VOP_DPP<ps.OpName, p, isDPP16> { 473 let hasSideEffects = ps.hasSideEffects; 474 let Defs = ps.Defs; 475 let SchedRW = ps.SchedRW; 476 let Uses = ps.Uses; 477 478 bits<8> vdst; 479 let Inst{8-0} = 0xfa; 480 let Inst{16-9} = op; 481 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 482 let Inst{31-25} = 0x3f; 483} 484 485class VOP1_DPP16<bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl> : 486 VOP1_DPP<op, ps, p, 1>, 487 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX10> { 488 let AssemblerPredicate = HasDPP16; 489 let SubtargetPredicate = HasDPP16; 490} 491 492class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> : 493 VOP_DPP8<ps.OpName, p> { 494 let hasSideEffects = ps.hasSideEffects; 495 let Defs = ps.Defs; 496 let SchedRW = ps.SchedRW; 497 let Uses = ps.Uses; 498 499 bits<8> vdst; 500 let Inst{8-0} = fi; 501 let Inst{16-9} = op; 502 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 503 let Inst{31-25} = 0x3f; 504 505 let AssemblerPredicate = HasDPP8; 506 let SubtargetPredicate = HasDPP8; 507} 508 509//===----------------------------------------------------------------------===// 510// GFX10. 511//===----------------------------------------------------------------------===// 512 513let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { 514 multiclass VOP1Only_Real_gfx10<bits<9> op> { 515 def _gfx10 : 516 VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.GFX10>, 517 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 518 } 519 multiclass VOP1_Real_e32_gfx10<bits<9> op> { 520 def _e32_gfx10 : 521 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>, 522 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 523 } 524 multiclass VOP1_Real_e64_gfx10<bits<9> op> { 525 def _e64_gfx10 : 526 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 527 VOP3e_gfx10<{0, 1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 528 } 529 multiclass VOP1_Real_sdwa_gfx10<bits<9> op> { 530 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 531 def _sdwa_gfx10 : 532 VOP_SDWA10_Real<!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 533 VOP1_SDWA9Ae<op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 534 let DecoderNamespace = "SDWA10"; 535 } 536 } 537 multiclass VOP1_Real_dpp_gfx10<bits<9> op> { 538 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 539 def _dpp_gfx10 : VOP1_DPP16<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")> { 540 let DecoderNamespace = "SDWA10"; 541 } 542 } 543 multiclass VOP1_Real_dpp8_gfx10<bits<9> op> { 544 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 545 def _dpp8_gfx10 : VOP1_DPP8<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")> { 546 let DecoderNamespace = "DPP8"; 547 } 548 } 549} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" 550 551multiclass VOP1_Real_gfx10<bits<9> op> : 552 VOP1_Real_e32_gfx10<op>, VOP1_Real_e64_gfx10<op>, 553 VOP1_Real_sdwa_gfx10<op>, VOP1_Real_dpp_gfx10<op>, 554 VOP1_Real_dpp8_gfx10<op>; 555 556defm V_PIPEFLUSH : VOP1_Real_gfx10<0x01b>; 557defm V_MOVRELSD_2_B32 : VOP1_Real_gfx10<0x048>; 558defm V_CVT_F16_U16 : VOP1_Real_gfx10<0x050>; 559defm V_CVT_F16_I16 : VOP1_Real_gfx10<0x051>; 560defm V_CVT_U16_F16 : VOP1_Real_gfx10<0x052>; 561defm V_CVT_I16_F16 : VOP1_Real_gfx10<0x053>; 562defm V_RCP_F16 : VOP1_Real_gfx10<0x054>; 563defm V_SQRT_F16 : VOP1_Real_gfx10<0x055>; 564defm V_RSQ_F16 : VOP1_Real_gfx10<0x056>; 565defm V_LOG_F16 : VOP1_Real_gfx10<0x057>; 566defm V_EXP_F16 : VOP1_Real_gfx10<0x058>; 567defm V_FREXP_MANT_F16 : VOP1_Real_gfx10<0x059>; 568defm V_FREXP_EXP_I16_F16 : VOP1_Real_gfx10<0x05a>; 569defm V_FLOOR_F16 : VOP1_Real_gfx10<0x05b>; 570defm V_CEIL_F16 : VOP1_Real_gfx10<0x05c>; 571defm V_TRUNC_F16 : VOP1_Real_gfx10<0x05d>; 572defm V_RNDNE_F16 : VOP1_Real_gfx10<0x05e>; 573defm V_FRACT_F16 : VOP1_Real_gfx10<0x05f>; 574defm V_SIN_F16 : VOP1_Real_gfx10<0x060>; 575defm V_COS_F16 : VOP1_Real_gfx10<0x061>; 576defm V_SAT_PK_U8_I16 : VOP1_Real_gfx10<0x062>; 577defm V_CVT_NORM_I16_F16 : VOP1_Real_gfx10<0x063>; 578defm V_CVT_NORM_U16_F16 : VOP1_Real_gfx10<0x064>; 579 580defm V_SWAP_B32 : VOP1Only_Real_gfx10<0x065>; 581defm V_SWAPREL_B32 : VOP1Only_Real_gfx10<0x068>; 582 583//===----------------------------------------------------------------------===// 584// GFX7, GFX10. 585//===----------------------------------------------------------------------===// 586 587let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in { 588 multiclass VOP1_Real_e32_gfx7<bits<9> op> { 589 def _e32_gfx7 : 590 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>, 591 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 592 } 593 multiclass VOP1_Real_e64_gfx7<bits<9> op> { 594 def _e64_gfx7 : 595 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 596 VOP3e_gfx6_gfx7<{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 597 } 598} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" 599 600multiclass VOP1_Real_gfx7<bits<9> op> : 601 VOP1_Real_e32_gfx7<op>, VOP1_Real_e64_gfx7<op>; 602 603multiclass VOP1_Real_gfx7_gfx10<bits<9> op> : 604 VOP1_Real_gfx7<op>, VOP1_Real_gfx10<op>; 605 606defm V_LOG_LEGACY_F32 : VOP1_Real_gfx7<0x045>; 607defm V_EXP_LEGACY_F32 : VOP1_Real_gfx7<0x046>; 608 609defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10<0x017>; 610defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10<0x018>; 611defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10<0x019>; 612defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10<0x01a>; 613 614//===----------------------------------------------------------------------===// 615// GFX6, GFX7, GFX10. 616//===----------------------------------------------------------------------===// 617 618let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 619 multiclass VOP1_Real_e32_gfx6_gfx7<bits<9> op> { 620 def _e32_gfx6_gfx7 : 621 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>, 622 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 623 } 624 multiclass VOP1_Real_e64_gfx6_gfx7<bits<9> op> { 625 def _e64_gfx6_gfx7 : 626 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 627 VOP3e_gfx6_gfx7<{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 628 } 629} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 630 631multiclass VOP1_Real_gfx6_gfx7<bits<9> op> : 632 VOP1_Real_e32_gfx6_gfx7<op>, VOP1_Real_e64_gfx6_gfx7<op>; 633 634multiclass VOP1_Real_gfx6_gfx7_gfx10<bits<9> op> : 635 VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10<op>; 636 637defm V_LOG_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x026>; 638defm V_RCP_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x028>; 639defm V_RCP_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x029>; 640defm V_RSQ_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x02c>; 641defm V_RSQ_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x02d>; 642defm V_RCP_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x030>; 643defm V_RSQ_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x032>; 644 645defm V_NOP : VOP1_Real_gfx6_gfx7_gfx10<0x000>; 646defm V_MOV_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x001>; 647defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x003>; 648defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x004>; 649defm V_CVT_F32_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x005>; 650defm V_CVT_F32_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x006>; 651defm V_CVT_U32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x007>; 652defm V_CVT_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x008>; 653defm V_MOV_FED_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x009>; 654defm V_CVT_F16_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00a>; 655defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10<0x00b>; 656defm V_CVT_RPI_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00c>; 657defm V_CVT_FLR_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00d>; 658defm V_CVT_OFF_F32_I4 : VOP1_Real_gfx6_gfx7_gfx10<0x00e>; 659defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x00f>; 660defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x010>; 661defm V_CVT_F32_UBYTE0 : VOP1_Real_gfx6_gfx7_gfx10<0x011>; 662defm V_CVT_F32_UBYTE1 : VOP1_Real_gfx6_gfx7_gfx10<0x012>; 663defm V_CVT_F32_UBYTE2 : VOP1_Real_gfx6_gfx7_gfx10<0x013>; 664defm V_CVT_F32_UBYTE3 : VOP1_Real_gfx6_gfx7_gfx10<0x014>; 665defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x015>; 666defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x016>; 667defm V_FRACT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x020>; 668defm V_TRUNC_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x021>; 669defm V_CEIL_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x022>; 670defm V_RNDNE_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x023>; 671defm V_FLOOR_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x024>; 672defm V_EXP_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x025>; 673defm V_LOG_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x027>; 674defm V_RCP_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x02a>; 675defm V_RCP_IFLAG_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x02b>; 676defm V_RSQ_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x02e>; 677defm V_RCP_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x02f>; 678defm V_RSQ_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x031>; 679defm V_SQRT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x033>; 680defm V_SQRT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x034>; 681defm V_SIN_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x035>; 682defm V_COS_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x036>; 683defm V_NOT_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x037>; 684defm V_BFREV_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x038>; 685defm V_FFBH_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x039>; 686defm V_FFBL_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x03a>; 687defm V_FFBH_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x03b>; 688defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03c>; 689defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03d>; 690defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03e>; 691defm V_FREXP_EXP_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x03f>; 692defm V_FREXP_MANT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x040>; 693defm V_CLREXCP : VOP1_Real_gfx6_gfx7_gfx10<0x041>; 694defm V_MOVRELD_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x042>; 695defm V_MOVRELS_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x043>; 696defm V_MOVRELSD_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x044>; 697 698//===----------------------------------------------------------------------===// 699// GFX8, GFX9 (VI). 700//===----------------------------------------------------------------------===// 701 702class VOP1_DPPe <bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile P = ps.Pfl> : 703 VOP_DPPe <P> { 704 bits<8> vdst; 705 let Inst{8-0} = 0xfa; // dpp 706 let Inst{16-9} = op; 707 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 708 let Inst{31-25} = 0x3f; //encoding 709} 710 711multiclass VOP1Only_Real_vi <bits<10> op> { 712 let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 713 def _vi : 714 VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.VI>, 715 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 716 } 717} 718 719multiclass VOP1_Real_e32e64_vi <bits<10> op> { 720 let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 721 def _e32_vi : 722 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>, 723 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 724 def _e64_vi : 725 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 726 VOP3e_vi <!add(0x140, op), !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 727 } 728} 729 730multiclass VOP1_Real_vi <bits<10> op> { 731 defm NAME : VOP1_Real_e32e64_vi <op>; 732 733 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA>.ret in 734 def _sdwa_vi : 735 VOP_SDWA_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 736 VOP1_SDWAe <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 737 738 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 739 def _sdwa_gfx9 : 740 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 741 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 742 743 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 744 def _dpp_vi : 745 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>, 746 VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; 747} 748 749defm V_NOP : VOP1_Real_vi <0x0>; 750defm V_MOV_B32 : VOP1_Real_vi <0x1>; 751defm V_CVT_I32_F64 : VOP1_Real_vi <0x3>; 752defm V_CVT_F64_I32 : VOP1_Real_vi <0x4>; 753defm V_CVT_F32_I32 : VOP1_Real_vi <0x5>; 754defm V_CVT_F32_U32 : VOP1_Real_vi <0x6>; 755defm V_CVT_U32_F32 : VOP1_Real_vi <0x7>; 756defm V_CVT_I32_F32 : VOP1_Real_vi <0x8>; 757defm V_MOV_FED_B32 : VOP1_Real_vi <0x9>; 758defm V_CVT_F16_F32 : VOP1_Real_vi <0xa>; 759defm V_CVT_F32_F16 : VOP1_Real_vi <0xb>; 760defm V_CVT_RPI_I32_F32 : VOP1_Real_vi <0xc>; 761defm V_CVT_FLR_I32_F32 : VOP1_Real_vi <0xd>; 762defm V_CVT_OFF_F32_I4 : VOP1_Real_vi <0xe>; 763defm V_CVT_F32_F64 : VOP1_Real_vi <0xf>; 764defm V_CVT_F64_F32 : VOP1_Real_vi <0x10>; 765defm V_CVT_F32_UBYTE0 : VOP1_Real_vi <0x11>; 766defm V_CVT_F32_UBYTE1 : VOP1_Real_vi <0x12>; 767defm V_CVT_F32_UBYTE2 : VOP1_Real_vi <0x13>; 768defm V_CVT_F32_UBYTE3 : VOP1_Real_vi <0x14>; 769defm V_CVT_U32_F64 : VOP1_Real_vi <0x15>; 770defm V_CVT_F64_U32 : VOP1_Real_vi <0x16>; 771defm V_FRACT_F32 : VOP1_Real_vi <0x1b>; 772defm V_TRUNC_F32 : VOP1_Real_vi <0x1c>; 773defm V_CEIL_F32 : VOP1_Real_vi <0x1d>; 774defm V_RNDNE_F32 : VOP1_Real_vi <0x1e>; 775defm V_FLOOR_F32 : VOP1_Real_vi <0x1f>; 776defm V_EXP_F32 : VOP1_Real_vi <0x20>; 777defm V_LOG_F32 : VOP1_Real_vi <0x21>; 778defm V_RCP_F32 : VOP1_Real_vi <0x22>; 779defm V_RCP_IFLAG_F32 : VOP1_Real_vi <0x23>; 780defm V_RSQ_F32 : VOP1_Real_vi <0x24>; 781defm V_RCP_F64 : VOP1_Real_vi <0x25>; 782defm V_RSQ_F64 : VOP1_Real_vi <0x26>; 783defm V_SQRT_F32 : VOP1_Real_vi <0x27>; 784defm V_SQRT_F64 : VOP1_Real_vi <0x28>; 785defm V_SIN_F32 : VOP1_Real_vi <0x29>; 786defm V_COS_F32 : VOP1_Real_vi <0x2a>; 787defm V_NOT_B32 : VOP1_Real_vi <0x2b>; 788defm V_BFREV_B32 : VOP1_Real_vi <0x2c>; 789defm V_FFBH_U32 : VOP1_Real_vi <0x2d>; 790defm V_FFBL_B32 : VOP1_Real_vi <0x2e>; 791defm V_FFBH_I32 : VOP1_Real_vi <0x2f>; 792defm V_FREXP_EXP_I32_F64 : VOP1_Real_vi <0x30>; 793defm V_FREXP_MANT_F64 : VOP1_Real_vi <0x31>; 794defm V_FRACT_F64 : VOP1_Real_vi <0x32>; 795defm V_FREXP_EXP_I32_F32 : VOP1_Real_vi <0x33>; 796defm V_FREXP_MANT_F32 : VOP1_Real_vi <0x34>; 797defm V_CLREXCP : VOP1_Real_vi <0x35>; 798defm V_MOVRELD_B32 : VOP1_Real_e32e64_vi <0x36>; 799defm V_MOVRELS_B32 : VOP1_Real_e32e64_vi <0x37>; 800defm V_MOVRELSD_B32 : VOP1_Real_e32e64_vi <0x38>; 801defm V_TRUNC_F64 : VOP1_Real_vi <0x17>; 802defm V_CEIL_F64 : VOP1_Real_vi <0x18>; 803defm V_FLOOR_F64 : VOP1_Real_vi <0x1A>; 804defm V_RNDNE_F64 : VOP1_Real_vi <0x19>; 805defm V_LOG_LEGACY_F32 : VOP1_Real_vi <0x4c>; 806defm V_EXP_LEGACY_F32 : VOP1_Real_vi <0x4b>; 807defm V_CVT_F16_U16 : VOP1_Real_vi <0x39>; 808defm V_CVT_F16_I16 : VOP1_Real_vi <0x3a>; 809defm V_CVT_U16_F16 : VOP1_Real_vi <0x3b>; 810defm V_CVT_I16_F16 : VOP1_Real_vi <0x3c>; 811defm V_RCP_F16 : VOP1_Real_vi <0x3d>; 812defm V_SQRT_F16 : VOP1_Real_vi <0x3e>; 813defm V_RSQ_F16 : VOP1_Real_vi <0x3f>; 814defm V_LOG_F16 : VOP1_Real_vi <0x40>; 815defm V_EXP_F16 : VOP1_Real_vi <0x41>; 816defm V_FREXP_MANT_F16 : VOP1_Real_vi <0x42>; 817defm V_FREXP_EXP_I16_F16 : VOP1_Real_vi <0x43>; 818defm V_FLOOR_F16 : VOP1_Real_vi <0x44>; 819defm V_CEIL_F16 : VOP1_Real_vi <0x45>; 820defm V_TRUNC_F16 : VOP1_Real_vi <0x46>; 821defm V_RNDNE_F16 : VOP1_Real_vi <0x47>; 822defm V_FRACT_F16 : VOP1_Real_vi <0x48>; 823defm V_SIN_F16 : VOP1_Real_vi <0x49>; 824defm V_COS_F16 : VOP1_Real_vi <0x4a>; 825defm V_SWAP_B32 : VOP1Only_Real_vi <0x51>; 826 827defm V_SAT_PK_U8_I16 : VOP1_Real_vi<0x4f>; 828defm V_CVT_NORM_I16_F16 : VOP1_Real_vi<0x4d>; 829defm V_CVT_NORM_U16_F16 : VOP1_Real_vi<0x4e>; 830 831// Copy of v_mov_b32 with $vdst as a use operand for use with VGPR 832// indexing mode. vdst can't be treated as a def for codegen purposes, 833// and an implicit use and def of the super register should be added. 834def V_MOV_B32_indirect : VPseudoInstSI<(outs), 835 (ins getVALUDstForVT<i32>.ret:$vdst, getVOPSrc0ForVT<i32>.ret:$src0)>, 836 PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT<i32>.ret:$vdst, 837 getVOPSrc0ForVT<i32>.ret:$src0)> { 838 let VOP1 = 1; 839 let SubtargetPredicate = isGFX8GFX9; 840} 841 842let OtherPredicates = [isGFX8Plus] in { 843 844def : GCNPat < 845 (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, 846 timm:$bank_mask, timm:$bound_ctrl)), 847 (V_MOV_B32_dpp VGPR_32:$src, VGPR_32:$src, (as_i32timm $dpp_ctrl), 848 (as_i32timm $row_mask), (as_i32timm $bank_mask), 849 (as_i1timm $bound_ctrl)) 850>; 851 852def : GCNPat < 853 (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl, 854 timm:$row_mask, timm:$bank_mask, 855 timm:$bound_ctrl)), 856 (V_MOV_B32_dpp VGPR_32:$old, VGPR_32:$src, (as_i32timm $dpp_ctrl), 857 (as_i32timm $row_mask), (as_i32timm $bank_mask), 858 (as_i1timm $bound_ctrl)) 859>; 860 861} // End OtherPredicates = [isGFX8Plus] 862 863let OtherPredicates = [isGFX8Plus] in { 864def : GCNPat< 865 (i32 (anyext i16:$src)), 866 (COPY $src) 867>; 868 869def : GCNPat< 870 (i64 (anyext i16:$src)), 871 (REG_SEQUENCE VReg_64, 872 (i32 (COPY $src)), sub0, 873 (V_MOV_B32_e32 (i32 0)), sub1) 874>; 875 876def : GCNPat< 877 (i16 (trunc i32:$src)), 878 (COPY $src) 879>; 880 881def : GCNPat < 882 (i16 (trunc i64:$src)), 883 (EXTRACT_SUBREG $src, sub0) 884>; 885 886} // End OtherPredicates = [isGFX8Plus] 887 888//===----------------------------------------------------------------------===// 889// GFX9 890//===----------------------------------------------------------------------===// 891 892multiclass VOP1_Real_gfx9 <bits<10> op> { 893 let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in { 894 defm NAME : VOP1_Real_e32e64_vi <op>; 895 } 896 897 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 898 def _sdwa_gfx9 : 899 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 900 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 901 902 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 903 def _dpp_gfx9 : 904 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, 905 VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; 906 907} 908 909defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>; 910 911//===----------------------------------------------------------------------===// 912// GFX10 913//===----------------------------------------------------------------------===// 914 915let OtherPredicates = [isGFX10Plus] in { 916def : GCNPat < 917 (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)), 918 (V_MOV_B32_dpp8_gfx10 VGPR_32:$src, VGPR_32:$src, 919 (as_i32timm $dpp8), (i32 DPP8Mode.FI_0)) 920>; 921} // End OtherPredicates = [isGFX10Plus] 922