1//===-- VOP1Instructions.td - Vector Instruction Definitions --------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10// VOP1 Classes 11//===----------------------------------------------------------------------===// 12 13class VOP1e <bits<8> op, VOPProfile P> : Enc32 { 14 bits<8> vdst; 15 bits<9> src0; 16 17 let Inst{8-0} = !if(P.HasSrc0, src0{8-0}, ?); 18 let Inst{16-9} = op; 19 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 20 let Inst{31-25} = 0x3f; //encoding 21} 22 23class VOP1_SDWAe <bits<8> op, VOPProfile P> : VOP_SDWAe <P> { 24 bits<8> vdst; 25 26 let Inst{8-0} = 0xf9; // sdwa 27 let Inst{16-9} = op; 28 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 29 let Inst{31-25} = 0x3f; // encoding 30} 31 32class VOP1_SDWA9Ae <bits<8> op, VOPProfile P> : VOP_SDWA9Ae <P> { 33 bits<8> vdst; 34 35 let Inst{8-0} = 0xf9; // sdwa 36 let Inst{16-9} = op; 37 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 38 let Inst{31-25} = 0x3f; // encoding 39} 40 41class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP1Only = 0> : 42 VOP_Pseudo <opName, !if(VOP1Only, "", "_e32"), P, P.Outs32, P.Ins32, "", pattern> { 43 44 let AsmOperands = P.Asm32; 45 46 let Size = 4; 47 let mayLoad = 0; 48 let mayStore = 0; 49 let hasSideEffects = 0; 50 51 let ReadsModeReg = !or(isFloatType<P.DstVT>.ret, isFloatType<P.Src0VT>.ret); 52 53 let mayRaiseFPException = ReadsModeReg; 54 55 let VOP1 = 1; 56 let VALU = 1; 57 let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]); 58 59 let AsmVariantName = AMDGPUAsmVariants.Default; 60} 61 62class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily> : 63 InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>, 64 SIMCInstr <ps.PseudoInstr, EncodingFamily> { 65 66 let isPseudo = 0; 67 let isCodeGenOnly = 0; 68 69 let Constraints = ps.Constraints; 70 let DisableEncoding = ps.DisableEncoding; 71 72 // copy relevant pseudo op flags 73 let SubtargetPredicate = ps.SubtargetPredicate; 74 let AsmMatchConverter = ps.AsmMatchConverter; 75 let AsmVariantName = ps.AsmVariantName; 76 let Constraints = ps.Constraints; 77 let DisableEncoding = ps.DisableEncoding; 78 let TSFlags = ps.TSFlags; 79 let UseNamedOperandTable = ps.UseNamedOperandTable; 80 let Uses = ps.Uses; 81 let Defs = ps.Defs; 82} 83 84class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 85 VOP_SDWA_Pseudo <OpName, P, pattern> { 86 let AsmMatchConverter = "cvtSdwaVOP1"; 87} 88 89class VOP1_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 90 VOP_DPP_Pseudo <OpName, P, pattern> { 91} 92 93class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { 94 list<dag> ret = 95 !if(P.HasModifiers, 96 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods P.Src0VT:$src0, i32:$src0_modifiers))))], 97 !if(P.HasOMod, 98 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3OMods P.Src0VT:$src0, 99 i1:$clamp, i32:$omod))))], 100 [(set P.DstVT:$vdst, (node P.Src0VT:$src0))] 101 ) 102 ); 103} 104 105multiclass VOP1Inst <string opName, VOPProfile P, 106 SDPatternOperator node = null_frag> { 107 // We only want to set this on the basic, non-SDWA or DPP forms. 108 defvar should_mov_imm = !eq(opName, "v_mov_b32"); 109 110 let isMoveImm = should_mov_imm in { 111 def _e32 : VOP1_Pseudo <opName, P>; 112 def _e64 : VOP3_Pseudo <opName, P, getVOP1Pat64<node, P>.ret>; 113 } 114 115 foreach _ = BoolToList<P.HasExtSDWA>.ret in 116 def _sdwa : VOP1_SDWA_Pseudo <opName, P>; 117 118 foreach _ = BoolToList<P.HasExtDPP>.ret in 119 def _dpp : VOP1_DPP_Pseudo <opName, P>; 120 121 def : MnemonicAlias<opName#"_e32", opName>, LetDummies; 122 def : MnemonicAlias<opName#"_e64", opName>, LetDummies; 123 124 foreach _ = BoolToList<P.HasExtSDWA>.ret in 125 def : MnemonicAlias<opName#"_sdwa", opName>, LetDummies; 126 127 foreach _ = BoolToList<P.HasExtDPP>.ret in 128 def : MnemonicAlias<opName#"_dpp", opName>, LetDummies; 129} 130 131// Special profile for instructions which have clamp 132// and output modifiers (but have no input modifiers) 133class VOPProfileI2F<ValueType dstVt, ValueType srcVt> : 134 VOPProfile<[dstVt, srcVt, untyped, untyped]> { 135 136 let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod); 137 let Asm64 = "$vdst, $src0$clamp$omod"; 138 139 let HasModifiers = 0; 140 let HasClamp = 1; 141} 142 143def VOP1_F64_I32 : VOPProfileI2F <f64, i32>; 144def VOP1_F32_I32 : VOPProfileI2F <f32, i32>; 145def VOP1_F16_I16 : VOPProfileI2F <f16, i16>; 146 147//===----------------------------------------------------------------------===// 148// VOP1 Instructions 149//===----------------------------------------------------------------------===// 150 151let VOPAsmPrefer32Bit = 1 in { 152defm V_NOP : VOP1Inst <"v_nop", VOP_NONE>; 153} 154 155let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 156defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOP_I32_I32>; 157} // End isMoveImm = 1 158 159// FIXME: Specify SchedRW for READFIRSTLANE_B32 160// TODO: Make profile for this, there is VOP3 encoding also 161def V_READFIRSTLANE_B32 : 162 InstSI <(outs SReg_32:$vdst), 163 (ins VRegOrLds_32:$src0), 164 "v_readfirstlane_b32 $vdst, $src0", 165 [(set i32:$vdst, (int_amdgcn_readfirstlane (i32 VRegOrLds_32:$src0)))]>, 166 Enc32 { 167 168 let isCodeGenOnly = 0; 169 let UseNamedOperandTable = 1; 170 171 let Size = 4; 172 let mayLoad = 0; 173 let mayStore = 0; 174 let hasSideEffects = 0; 175 176 let VOP1 = 1; 177 let VALU = 1; 178 let Uses = [EXEC]; 179 let isConvergent = 1; 180 181 bits<8> vdst; 182 bits<9> src0; 183 184 let Inst{8-0} = src0; 185 let Inst{16-9} = 0x2; 186 let Inst{24-17} = vdst; 187 let Inst{31-25} = 0x3f; //encoding 188} 189 190let SchedRW = [WriteDoubleCvt] in { 191defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>; 192 193let mayRaiseFPException = 0 in { 194defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>; 195} 196 197defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>; 198defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>; 199defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>; 200 201let mayRaiseFPException = 0 in { 202defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>; 203} 204 205} // End SchedRW = [WriteDoubleCvt] 206 207let SchedRW = [WriteFloatCvt] in { 208 209// XXX: Does this really not raise exceptions? The manual claims the 210// 16-bit ones can. 211let mayRaiseFPException = 0 in { 212defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>; 213defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>; 214} 215 216defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>; 217defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>; 218let FPDPRounding = 1 in { 219defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>; 220} // End FPDPRounding = 1 221 222defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>; 223 224let ReadsModeReg = 0, mayRaiseFPException = 0 in { 225defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>; 226defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>; 227defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>; 228} // End ReadsModeReg = 0, mayRaiseFPException = 0 229} // End SchedRW = [WriteFloatCvt] 230 231let ReadsModeReg = 0, mayRaiseFPException = 0 in { 232defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>; 233defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>; 234defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP1_F32_I32, AMDGPUcvt_f32_ubyte2>; 235defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f32_ubyte3>; 236} // ReadsModeReg = 0, mayRaiseFPException = 0 237 238defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>; 239defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>; 240defm V_CEIL_F32 : VOP1Inst <"v_ceil_f32", VOP_F32_F32, fceil>; 241defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, frint>; 242defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>; 243 244let TRANS = 1, SchedRW = [WriteTrans32] in { 245defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, fexp2>; 246defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, flog2>; 247defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>; 248defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32, AMDGPUrcp_iflag>; 249defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>; 250defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, any_amdgcn_sqrt>; 251} // End TRANS = 1, SchedRW = [WriteTrans32] 252 253let TRANS = 1, SchedRW = [WriteTrans64] in { 254defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64, AMDGPUrcp>; 255defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64, AMDGPUrsq>; 256defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, any_amdgcn_sqrt>; 257} // End TRANS = 1, SchedRW = [WriteTrans64] 258 259let TRANS = 1, SchedRW = [WriteTrans32] in { 260defm V_SIN_F32 : VOP1Inst <"v_sin_f32", VOP_F32_F32, AMDGPUsin>; 261defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>; 262} // End TRANS = 1, SchedRW = [WriteTrans32] 263 264defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>; 265defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32, bitreverse>; 266defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32, AMDGPUffbh_u32>; 267defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32, AMDGPUffbl_b32>; 268defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32, AMDGPUffbh_i32>; 269 270let SchedRW = [WriteDoubleAdd] in { 271defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64, int_amdgcn_frexp_exp>; 272defm V_FREXP_MANT_F64 : VOP1Inst <"v_frexp_mant_f64", VOP_F64_F64, int_amdgcn_frexp_mant>; 273let FPDPRounding = 1 in { 274defm V_FRACT_F64 : VOP1Inst <"v_fract_f64", VOP_F64_F64, AMDGPUfract>; 275} // End FPDPRounding = 1 276} // End SchedRW = [WriteDoubleAdd] 277 278defm V_FREXP_EXP_I32_F32 : VOP1Inst <"v_frexp_exp_i32_f32", VOP_I32_F32, int_amdgcn_frexp_exp>; 279defm V_FREXP_MANT_F32 : VOP1Inst <"v_frexp_mant_f32", VOP_F32_F32, int_amdgcn_frexp_mant>; 280 281let VOPAsmPrefer32Bit = 1 in { 282defm V_CLREXCP : VOP1Inst <"v_clrexcp", VOP_NO_EXT<VOP_NONE>>; 283} 284 285// Restrict src0 to be VGPR 286def VOP_MOVRELS : VOPProfile<[i32, i32, untyped, untyped]> { 287 let Src0RC32 = VRegSrc_32; 288 let Src0RC64 = VRegSrc_32; 289} 290 291// Special case because there are no true output operands. Hack vdst 292// to be a src operand. The custom inserter must add a tied implicit 293// def and use of the super register since there seems to be no way to 294// add an implicit def of a virtual register in tablegen. 295class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, untyped]> { 296 let Src0RC32 = VOPDstOperand<VGPR_32>; 297 let Src0RC64 = VOPDstOperand<VGPR_32>; 298 299 let Outs = (outs); 300 let Ins32 = (ins Src0RC32:$vdst, Src1RC:$src0); 301 let Ins64 = (ins Src0RC64:$vdst, Src1RC:$src0); 302 let Asm32 = getAsm32<1, 1>.ret; 303 let Asm64 = getAsm64<1, 1, 0, 0, 1>.ret; 304 305 let OutsSDWA = (outs Src0RC32:$vdst); 306 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 307 clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, 308 src0_sel:$src0_sel); 309 let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret; 310 311 let OutsDPP = (outs Src0RC32:$vdst); 312 let InsDPP16 = (ins Src0RC32:$old, Src0RC32:$src0, 313 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 314 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl, FI:$fi); 315 let AsmDPP16 = getAsmDPP16<1, 1, 0>.ret; 316 317 let OutsDPP8 = (outs Src0RC32:$vdst); 318 let InsDPP8 = (ins Src0RC32:$old, Src0RC32:$src0, dpp8:$dpp8, FI:$fi); 319 let AsmDPP8 = getAsmDPP8<1, 1, 0>.ret; 320 321 let HasDst = 0; 322 let EmitDst = 1; // force vdst emission 323} 324 325def VOP_MOVRELD : VOP_MOVREL<VSrc_b32>; 326def VOP_MOVRELSD : VOP_MOVREL<VRegSrc_32>; 327 328let SubtargetPredicate = HasMovrel, Uses = [M0, EXEC] in { 329 // v_movreld_b32 is a special case because the destination output 330 // register is really a source. It isn't actually read (but may be 331 // written), and is only to provide the base register to start 332 // indexing from. Tablegen seems to not let you define an implicit 333 // virtual register output for the super register being written into, 334 // so this must have an implicit def of the register added to it. 335defm V_MOVRELD_B32 : VOP1Inst <"v_movreld_b32", VOP_MOVRELD>; 336defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_MOVRELS>; 337defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_MOVRELSD>; 338} // End Uses = [M0, EXEC] 339 340let SubtargetPredicate = isGFX6GFX7 in { 341 let TRANS = 1, SchedRW = [WriteTrans32] in { 342 defm V_LOG_CLAMP_F32 : 343 VOP1Inst<"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>; 344 defm V_RCP_CLAMP_F32 : 345 VOP1Inst<"v_rcp_clamp_f32", VOP_F32_F32>; 346 defm V_RCP_LEGACY_F32 : 347 VOP1Inst<"v_rcp_legacy_f32", VOP_F32_F32, AMDGPUrcp_legacy>; 348 defm V_RSQ_CLAMP_F32 : 349 VOP1Inst<"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>; 350 defm V_RSQ_LEGACY_F32 : 351 VOP1Inst<"v_rsq_legacy_f32", VOP_F32_F32, int_amdgcn_rsq_legacy>; 352 } // End TRANS = 1, SchedRW = [WriteTrans32] 353 354 let SchedRW = [WriteDouble] in { 355 defm V_RCP_CLAMP_F64 : 356 VOP1Inst<"v_rcp_clamp_f64", VOP_F64_F64>; 357 defm V_RSQ_CLAMP_F64 : 358 VOP1Inst<"v_rsq_clamp_f64", VOP_F64_F64, AMDGPUrsq_clamp>; 359 } // End SchedRW = [WriteDouble] 360} // End SubtargetPredicate = isGFX6GFX7 361 362let SubtargetPredicate = isGFX7GFX8GFX9 in { 363 let TRANS = 1, SchedRW = [WriteTrans32] in { 364 defm V_LOG_LEGACY_F32 : VOP1Inst<"v_log_legacy_f32", VOP_F32_F32>; 365 defm V_EXP_LEGACY_F32 : VOP1Inst<"v_exp_legacy_f32", VOP_F32_F32>; 366 } // End TRANS = 1, SchedRW = [WriteTrans32] 367} // End SubtargetPredicate = isGFX7GFX8GFX9 368 369let SubtargetPredicate = isGFX7Plus in { 370 let SchedRW = [WriteDoubleAdd] in { 371 defm V_TRUNC_F64 : VOP1Inst<"v_trunc_f64", VOP_F64_F64, ftrunc>; 372 defm V_CEIL_F64 : VOP1Inst<"v_ceil_f64", VOP_F64_F64, fceil>; 373 defm V_RNDNE_F64 : VOP1Inst<"v_rndne_f64", VOP_F64_F64, frint>; 374 defm V_FLOOR_F64 : VOP1Inst<"v_floor_f64", VOP_F64_F64, ffloor>; 375 } // End SchedRW = [WriteDoubleAdd] 376} // End SubtargetPredicate = isGFX7Plus 377 378let SubtargetPredicate = Has16BitInsts in { 379 380let FPDPRounding = 1 in { 381defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>; 382defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>; 383} // End FPDPRounding = 1 384defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16, fp_to_uint>; 385defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16, fp_to_sint>; 386let TRANS = 1, SchedRW = [WriteTrans32] in { 387defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>; 388defm V_SQRT_F16 : VOP1Inst <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>; 389defm V_RSQ_F16 : VOP1Inst <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>; 390defm V_LOG_F16 : VOP1Inst <"v_log_f16", VOP_F16_F16, flog2>; 391defm V_EXP_F16 : VOP1Inst <"v_exp_f16", VOP_F16_F16, fexp2>; 392defm V_SIN_F16 : VOP1Inst <"v_sin_f16", VOP_F16_F16, AMDGPUsin>; 393defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; 394} // End TRANS = 1, SchedRW = [WriteTrans32] 395defm V_FREXP_MANT_F16 : VOP1Inst <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>; 396defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16, int_amdgcn_frexp_exp>; 397defm V_FLOOR_F16 : VOP1Inst <"v_floor_f16", VOP_F16_F16, ffloor>; 398defm V_CEIL_F16 : VOP1Inst <"v_ceil_f16", VOP_F16_F16, fceil>; 399defm V_TRUNC_F16 : VOP1Inst <"v_trunc_f16", VOP_F16_F16, ftrunc>; 400defm V_RNDNE_F16 : VOP1Inst <"v_rndne_f16", VOP_F16_F16, frint>; 401let FPDPRounding = 1 in { 402defm V_FRACT_F16 : VOP1Inst <"v_fract_f16", VOP_F16_F16, AMDGPUfract>; 403} // End FPDPRounding = 1 404 405} 406 407let OtherPredicates = [Has16BitInsts] in { 408 409def : GCNPat< 410 (f32 (f16_to_fp i16:$src)), 411 (V_CVT_F32_F16_e32 $src) 412>; 413 414def : GCNPat< 415 (i16 (AMDGPUfp_to_f16 f32:$src)), 416 (V_CVT_F16_F32_e32 $src) 417>; 418 419} 420 421def VOP_SWAP_I32 : VOPProfile<[i32, i32, i32, untyped]> { 422 let Outs32 = (outs VGPR_32:$vdst, VGPR_32:$vdst1); 423 let Ins32 = (ins VGPR_32:$src0, VGPR_32:$src1); 424 let Outs64 = Outs32; 425 let Asm32 = " $vdst, $src0"; 426 let Asm64 = ""; 427 let Ins64 = (ins); 428} 429 430let SubtargetPredicate = isGFX9Plus in { 431 def V_SWAP_B32 : VOP1_Pseudo<"v_swap_b32", VOP_SWAP_I32, [], 1> { 432 let Constraints = "$vdst = $src1, $vdst1 = $src0"; 433 let DisableEncoding = "$vdst1,$src1"; 434 let SchedRW = [Write64Bit, Write64Bit]; 435 } 436 437 defm V_SAT_PK_U8_I16 : VOP1Inst<"v_sat_pk_u8_i16", VOP_I32_I32>; 438 439 let mayRaiseFPException = 0 in { 440 defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16>; 441 defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16>; 442 } // End mayRaiseFPException = 0 443} // End SubtargetPredicate = isGFX9Plus 444 445let SubtargetPredicate = isGFX9Only in { 446 defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>; 447} // End SubtargetPredicate = isGFX9Only 448 449let SubtargetPredicate = isGFX10Plus in { 450 defm V_PIPEFLUSH : VOP1Inst<"v_pipeflush", VOP_NONE>; 451 452 let Uses = [M0] in { 453 defm V_MOVRELSD_2_B32 : 454 VOP1Inst<"v_movrelsd_2_b32", VOP_MOVRELSD>; 455 456 def V_SWAPREL_B32 : VOP1_Pseudo<"v_swaprel_b32", VOP_SWAP_I32, [], 1> { 457 let Constraints = "$vdst = $src1, $vdst1 = $src0"; 458 let DisableEncoding = "$vdst1,$src1"; 459 let SchedRW = [Write64Bit, Write64Bit]; 460 } 461 } // End Uses = [M0] 462} // End SubtargetPredicate = isGFX10Plus 463 464//===----------------------------------------------------------------------===// 465// Target-specific instruction encodings. 466//===----------------------------------------------------------------------===// 467 468class VOP1_DPP<bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP16 = 0> : 469 VOP_DPP<ps.OpName, p, isDPP16> { 470 let hasSideEffects = ps.hasSideEffects; 471 let Defs = ps.Defs; 472 let SchedRW = ps.SchedRW; 473 let Uses = ps.Uses; 474 475 bits<8> vdst; 476 let Inst{8-0} = 0xfa; 477 let Inst{16-9} = op; 478 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 479 let Inst{31-25} = 0x3f; 480} 481 482class VOP1_DPP16<bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl> : 483 VOP1_DPP<op, ps, p, 1>, 484 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX10> { 485 let AssemblerPredicate = HasDPP16; 486 let SubtargetPredicate = HasDPP16; 487} 488 489class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> : 490 VOP_DPP8<ps.OpName, p> { 491 let hasSideEffects = ps.hasSideEffects; 492 let Defs = ps.Defs; 493 let SchedRW = ps.SchedRW; 494 let Uses = ps.Uses; 495 496 bits<8> vdst; 497 let Inst{8-0} = fi; 498 let Inst{16-9} = op; 499 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 500 let Inst{31-25} = 0x3f; 501 502 let AssemblerPredicate = HasDPP8; 503 let SubtargetPredicate = HasDPP8; 504} 505 506//===----------------------------------------------------------------------===// 507// GFX10. 508//===----------------------------------------------------------------------===// 509 510let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { 511 multiclass VOP1Only_Real_gfx10<bits<9> op> { 512 def _gfx10 : 513 VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.GFX10>, 514 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 515 } 516 multiclass VOP1_Real_e32_gfx10<bits<9> op> { 517 def _e32_gfx10 : 518 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>, 519 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 520 } 521 multiclass VOP1_Real_e64_gfx10<bits<9> op> { 522 def _e64_gfx10 : 523 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 524 VOP3e_gfx10<{0, 1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 525 } 526 multiclass VOP1_Real_sdwa_gfx10<bits<9> op> { 527 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 528 def _sdwa_gfx10 : 529 VOP_SDWA10_Real<!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 530 VOP1_SDWA9Ae<op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 531 let DecoderNamespace = "SDWA10"; 532 } 533 } 534 multiclass VOP1_Real_dpp_gfx10<bits<9> op> { 535 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 536 def _dpp_gfx10 : VOP1_DPP16<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")> { 537 let DecoderNamespace = "SDWA10"; 538 } 539 } 540 multiclass VOP1_Real_dpp8_gfx10<bits<9> op> { 541 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 542 def _dpp8_gfx10 : VOP1_DPP8<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")> { 543 let DecoderNamespace = "DPP8"; 544 } 545 } 546} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" 547 548multiclass VOP1_Real_gfx10<bits<9> op> : 549 VOP1_Real_e32_gfx10<op>, VOP1_Real_e64_gfx10<op>, 550 VOP1_Real_sdwa_gfx10<op>, VOP1_Real_dpp_gfx10<op>, 551 VOP1_Real_dpp8_gfx10<op>; 552 553defm V_PIPEFLUSH : VOP1_Real_gfx10<0x01b>; 554defm V_MOVRELSD_2_B32 : VOP1_Real_gfx10<0x048>; 555defm V_CVT_F16_U16 : VOP1_Real_gfx10<0x050>; 556defm V_CVT_F16_I16 : VOP1_Real_gfx10<0x051>; 557defm V_CVT_U16_F16 : VOP1_Real_gfx10<0x052>; 558defm V_CVT_I16_F16 : VOP1_Real_gfx10<0x053>; 559defm V_RCP_F16 : VOP1_Real_gfx10<0x054>; 560defm V_SQRT_F16 : VOP1_Real_gfx10<0x055>; 561defm V_RSQ_F16 : VOP1_Real_gfx10<0x056>; 562defm V_LOG_F16 : VOP1_Real_gfx10<0x057>; 563defm V_EXP_F16 : VOP1_Real_gfx10<0x058>; 564defm V_FREXP_MANT_F16 : VOP1_Real_gfx10<0x059>; 565defm V_FREXP_EXP_I16_F16 : VOP1_Real_gfx10<0x05a>; 566defm V_FLOOR_F16 : VOP1_Real_gfx10<0x05b>; 567defm V_CEIL_F16 : VOP1_Real_gfx10<0x05c>; 568defm V_TRUNC_F16 : VOP1_Real_gfx10<0x05d>; 569defm V_RNDNE_F16 : VOP1_Real_gfx10<0x05e>; 570defm V_FRACT_F16 : VOP1_Real_gfx10<0x05f>; 571defm V_SIN_F16 : VOP1_Real_gfx10<0x060>; 572defm V_COS_F16 : VOP1_Real_gfx10<0x061>; 573defm V_SAT_PK_U8_I16 : VOP1_Real_gfx10<0x062>; 574defm V_CVT_NORM_I16_F16 : VOP1_Real_gfx10<0x063>; 575defm V_CVT_NORM_U16_F16 : VOP1_Real_gfx10<0x064>; 576 577defm V_SWAP_B32 : VOP1Only_Real_gfx10<0x065>; 578defm V_SWAPREL_B32 : VOP1Only_Real_gfx10<0x068>; 579 580//===----------------------------------------------------------------------===// 581// GFX7, GFX10. 582//===----------------------------------------------------------------------===// 583 584let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in { 585 multiclass VOP1_Real_e32_gfx7<bits<9> op> { 586 def _e32_gfx7 : 587 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>, 588 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 589 } 590 multiclass VOP1_Real_e64_gfx7<bits<9> op> { 591 def _e64_gfx7 : 592 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 593 VOP3e_gfx6_gfx7<{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 594 } 595} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" 596 597multiclass VOP1_Real_gfx7<bits<9> op> : 598 VOP1_Real_e32_gfx7<op>, VOP1_Real_e64_gfx7<op>; 599 600multiclass VOP1_Real_gfx7_gfx10<bits<9> op> : 601 VOP1_Real_gfx7<op>, VOP1_Real_gfx10<op>; 602 603defm V_LOG_LEGACY_F32 : VOP1_Real_gfx7<0x045>; 604defm V_EXP_LEGACY_F32 : VOP1_Real_gfx7<0x046>; 605 606defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10<0x017>; 607defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10<0x018>; 608defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10<0x019>; 609defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10<0x01a>; 610 611//===----------------------------------------------------------------------===// 612// GFX6, GFX7, GFX10. 613//===----------------------------------------------------------------------===// 614 615let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 616 multiclass VOP1_Real_e32_gfx6_gfx7<bits<9> op> { 617 def _e32_gfx6_gfx7 : 618 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>, 619 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 620 } 621 multiclass VOP1_Real_e64_gfx6_gfx7<bits<9> op> { 622 def _e64_gfx6_gfx7 : 623 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 624 VOP3e_gfx6_gfx7<{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 625 } 626} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 627 628multiclass VOP1_Real_gfx6_gfx7<bits<9> op> : 629 VOP1_Real_e32_gfx6_gfx7<op>, VOP1_Real_e64_gfx6_gfx7<op>; 630 631multiclass VOP1_Real_gfx6_gfx7_gfx10<bits<9> op> : 632 VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10<op>; 633 634defm V_LOG_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x026>; 635defm V_RCP_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x028>; 636defm V_RCP_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x029>; 637defm V_RSQ_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x02c>; 638defm V_RSQ_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x02d>; 639defm V_RCP_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x030>; 640defm V_RSQ_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x032>; 641 642defm V_NOP : VOP1_Real_gfx6_gfx7_gfx10<0x000>; 643defm V_MOV_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x001>; 644defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x003>; 645defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x004>; 646defm V_CVT_F32_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x005>; 647defm V_CVT_F32_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x006>; 648defm V_CVT_U32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x007>; 649defm V_CVT_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x008>; 650defm V_CVT_F16_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00a>; 651defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10<0x00b>; 652defm V_CVT_RPI_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00c>; 653defm V_CVT_FLR_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00d>; 654defm V_CVT_OFF_F32_I4 : VOP1_Real_gfx6_gfx7_gfx10<0x00e>; 655defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x00f>; 656defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x010>; 657defm V_CVT_F32_UBYTE0 : VOP1_Real_gfx6_gfx7_gfx10<0x011>; 658defm V_CVT_F32_UBYTE1 : VOP1_Real_gfx6_gfx7_gfx10<0x012>; 659defm V_CVT_F32_UBYTE2 : VOP1_Real_gfx6_gfx7_gfx10<0x013>; 660defm V_CVT_F32_UBYTE3 : VOP1_Real_gfx6_gfx7_gfx10<0x014>; 661defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x015>; 662defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x016>; 663defm V_FRACT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x020>; 664defm V_TRUNC_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x021>; 665defm V_CEIL_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x022>; 666defm V_RNDNE_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x023>; 667defm V_FLOOR_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x024>; 668defm V_EXP_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x025>; 669defm V_LOG_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x027>; 670defm V_RCP_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x02a>; 671defm V_RCP_IFLAG_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x02b>; 672defm V_RSQ_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x02e>; 673defm V_RCP_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x02f>; 674defm V_RSQ_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x031>; 675defm V_SQRT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x033>; 676defm V_SQRT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x034>; 677defm V_SIN_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x035>; 678defm V_COS_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x036>; 679defm V_NOT_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x037>; 680defm V_BFREV_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x038>; 681defm V_FFBH_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x039>; 682defm V_FFBL_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x03a>; 683defm V_FFBH_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x03b>; 684defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03c>; 685defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03d>; 686defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03e>; 687defm V_FREXP_EXP_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x03f>; 688defm V_FREXP_MANT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x040>; 689defm V_CLREXCP : VOP1_Real_gfx6_gfx7_gfx10<0x041>; 690defm V_MOVRELD_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x042>; 691defm V_MOVRELS_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x043>; 692defm V_MOVRELSD_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x044>; 693 694//===----------------------------------------------------------------------===// 695// GFX8, GFX9 (VI). 696//===----------------------------------------------------------------------===// 697 698class VOP1_DPPe <bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile P = ps.Pfl> : 699 VOP_DPPe <P> { 700 bits<8> vdst; 701 let Inst{8-0} = 0xfa; // dpp 702 let Inst{16-9} = op; 703 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 704 let Inst{31-25} = 0x3f; //encoding 705} 706 707multiclass VOP1Only_Real_vi <bits<10> op> { 708 let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 709 def _vi : 710 VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.VI>, 711 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 712 } 713} 714 715multiclass VOP1_Real_e32e64_vi <bits<10> op> { 716 let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 717 def _e32_vi : 718 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>, 719 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 720 def _e64_vi : 721 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 722 VOP3e_vi <!add(0x140, op), !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 723 } 724} 725 726multiclass VOP1_Real_vi <bits<10> op> { 727 defm NAME : VOP1_Real_e32e64_vi <op>; 728 729 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA>.ret in 730 def _sdwa_vi : 731 VOP_SDWA_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 732 VOP1_SDWAe <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 733 734 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 735 def _sdwa_gfx9 : 736 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 737 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 738 739 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 740 def _dpp_vi : 741 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>, 742 VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; 743} 744 745defm V_NOP : VOP1_Real_vi <0x0>; 746defm V_MOV_B32 : VOP1_Real_vi <0x1>; 747defm V_CVT_I32_F64 : VOP1_Real_vi <0x3>; 748defm V_CVT_F64_I32 : VOP1_Real_vi <0x4>; 749defm V_CVT_F32_I32 : VOP1_Real_vi <0x5>; 750defm V_CVT_F32_U32 : VOP1_Real_vi <0x6>; 751defm V_CVT_U32_F32 : VOP1_Real_vi <0x7>; 752defm V_CVT_I32_F32 : VOP1_Real_vi <0x8>; 753defm V_CVT_F16_F32 : VOP1_Real_vi <0xa>; 754defm V_CVT_F32_F16 : VOP1_Real_vi <0xb>; 755defm V_CVT_RPI_I32_F32 : VOP1_Real_vi <0xc>; 756defm V_CVT_FLR_I32_F32 : VOP1_Real_vi <0xd>; 757defm V_CVT_OFF_F32_I4 : VOP1_Real_vi <0xe>; 758defm V_CVT_F32_F64 : VOP1_Real_vi <0xf>; 759defm V_CVT_F64_F32 : VOP1_Real_vi <0x10>; 760defm V_CVT_F32_UBYTE0 : VOP1_Real_vi <0x11>; 761defm V_CVT_F32_UBYTE1 : VOP1_Real_vi <0x12>; 762defm V_CVT_F32_UBYTE2 : VOP1_Real_vi <0x13>; 763defm V_CVT_F32_UBYTE3 : VOP1_Real_vi <0x14>; 764defm V_CVT_U32_F64 : VOP1_Real_vi <0x15>; 765defm V_CVT_F64_U32 : VOP1_Real_vi <0x16>; 766defm V_FRACT_F32 : VOP1_Real_vi <0x1b>; 767defm V_TRUNC_F32 : VOP1_Real_vi <0x1c>; 768defm V_CEIL_F32 : VOP1_Real_vi <0x1d>; 769defm V_RNDNE_F32 : VOP1_Real_vi <0x1e>; 770defm V_FLOOR_F32 : VOP1_Real_vi <0x1f>; 771defm V_EXP_F32 : VOP1_Real_vi <0x20>; 772defm V_LOG_F32 : VOP1_Real_vi <0x21>; 773defm V_RCP_F32 : VOP1_Real_vi <0x22>; 774defm V_RCP_IFLAG_F32 : VOP1_Real_vi <0x23>; 775defm V_RSQ_F32 : VOP1_Real_vi <0x24>; 776defm V_RCP_F64 : VOP1_Real_vi <0x25>; 777defm V_RSQ_F64 : VOP1_Real_vi <0x26>; 778defm V_SQRT_F32 : VOP1_Real_vi <0x27>; 779defm V_SQRT_F64 : VOP1_Real_vi <0x28>; 780defm V_SIN_F32 : VOP1_Real_vi <0x29>; 781defm V_COS_F32 : VOP1_Real_vi <0x2a>; 782defm V_NOT_B32 : VOP1_Real_vi <0x2b>; 783defm V_BFREV_B32 : VOP1_Real_vi <0x2c>; 784defm V_FFBH_U32 : VOP1_Real_vi <0x2d>; 785defm V_FFBL_B32 : VOP1_Real_vi <0x2e>; 786defm V_FFBH_I32 : VOP1_Real_vi <0x2f>; 787defm V_FREXP_EXP_I32_F64 : VOP1_Real_vi <0x30>; 788defm V_FREXP_MANT_F64 : VOP1_Real_vi <0x31>; 789defm V_FRACT_F64 : VOP1_Real_vi <0x32>; 790defm V_FREXP_EXP_I32_F32 : VOP1_Real_vi <0x33>; 791defm V_FREXP_MANT_F32 : VOP1_Real_vi <0x34>; 792defm V_CLREXCP : VOP1_Real_vi <0x35>; 793defm V_MOVRELD_B32 : VOP1_Real_e32e64_vi <0x36>; 794defm V_MOVRELS_B32 : VOP1_Real_e32e64_vi <0x37>; 795defm V_MOVRELSD_B32 : VOP1_Real_e32e64_vi <0x38>; 796defm V_TRUNC_F64 : VOP1_Real_vi <0x17>; 797defm V_CEIL_F64 : VOP1_Real_vi <0x18>; 798defm V_FLOOR_F64 : VOP1_Real_vi <0x1A>; 799defm V_RNDNE_F64 : VOP1_Real_vi <0x19>; 800defm V_LOG_LEGACY_F32 : VOP1_Real_vi <0x4c>; 801defm V_EXP_LEGACY_F32 : VOP1_Real_vi <0x4b>; 802defm V_CVT_F16_U16 : VOP1_Real_vi <0x39>; 803defm V_CVT_F16_I16 : VOP1_Real_vi <0x3a>; 804defm V_CVT_U16_F16 : VOP1_Real_vi <0x3b>; 805defm V_CVT_I16_F16 : VOP1_Real_vi <0x3c>; 806defm V_RCP_F16 : VOP1_Real_vi <0x3d>; 807defm V_SQRT_F16 : VOP1_Real_vi <0x3e>; 808defm V_RSQ_F16 : VOP1_Real_vi <0x3f>; 809defm V_LOG_F16 : VOP1_Real_vi <0x40>; 810defm V_EXP_F16 : VOP1_Real_vi <0x41>; 811defm V_FREXP_MANT_F16 : VOP1_Real_vi <0x42>; 812defm V_FREXP_EXP_I16_F16 : VOP1_Real_vi <0x43>; 813defm V_FLOOR_F16 : VOP1_Real_vi <0x44>; 814defm V_CEIL_F16 : VOP1_Real_vi <0x45>; 815defm V_TRUNC_F16 : VOP1_Real_vi <0x46>; 816defm V_RNDNE_F16 : VOP1_Real_vi <0x47>; 817defm V_FRACT_F16 : VOP1_Real_vi <0x48>; 818defm V_SIN_F16 : VOP1_Real_vi <0x49>; 819defm V_COS_F16 : VOP1_Real_vi <0x4a>; 820defm V_SWAP_B32 : VOP1Only_Real_vi <0x51>; 821 822defm V_SAT_PK_U8_I16 : VOP1_Real_vi<0x4f>; 823defm V_CVT_NORM_I16_F16 : VOP1_Real_vi<0x4d>; 824defm V_CVT_NORM_U16_F16 : VOP1_Real_vi<0x4e>; 825 826// Copy of v_mov_b32 with $vdst as a use operand for use with VGPR 827// indexing mode. vdst can't be treated as a def for codegen purposes, 828// and an implicit use and def of the super register should be added. 829def V_MOV_B32_indirect : VPseudoInstSI<(outs), 830 (ins getVALUDstForVT<i32>.ret:$vdst, getVOPSrc0ForVT<i32>.ret:$src0)>, 831 PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT<i32>.ret:$vdst, 832 getVOPSrc0ForVT<i32>.ret:$src0)> { 833 let VOP1 = 1; 834 let SubtargetPredicate = isGFX8GFX9; 835} 836 837let OtherPredicates = [isGFX8Plus] in { 838 839def : GCNPat < 840 (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, 841 timm:$bank_mask, timm:$bound_ctrl)), 842 (V_MOV_B32_dpp VGPR_32:$src, VGPR_32:$src, (as_i32timm $dpp_ctrl), 843 (as_i32timm $row_mask), (as_i32timm $bank_mask), 844 (as_i1timm $bound_ctrl)) 845>; 846 847def : GCNPat < 848 (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl, 849 timm:$row_mask, timm:$bank_mask, 850 timm:$bound_ctrl)), 851 (V_MOV_B32_dpp VGPR_32:$old, VGPR_32:$src, (as_i32timm $dpp_ctrl), 852 (as_i32timm $row_mask), (as_i32timm $bank_mask), 853 (as_i1timm $bound_ctrl)) 854>; 855 856} // End OtherPredicates = [isGFX8Plus] 857 858let OtherPredicates = [isGFX8Plus] in { 859def : GCNPat< 860 (i32 (anyext i16:$src)), 861 (COPY $src) 862>; 863 864def : GCNPat< 865 (i64 (anyext i16:$src)), 866 (REG_SEQUENCE VReg_64, 867 (i32 (COPY $src)), sub0, 868 (V_MOV_B32_e32 (i32 0)), sub1) 869>; 870 871def : GCNPat< 872 (i16 (trunc i32:$src)), 873 (COPY $src) 874>; 875 876def : GCNPat < 877 (i16 (trunc i64:$src)), 878 (EXTRACT_SUBREG $src, sub0) 879>; 880 881} // End OtherPredicates = [isGFX8Plus] 882 883//===----------------------------------------------------------------------===// 884// GFX9 885//===----------------------------------------------------------------------===// 886 887multiclass VOP1_Real_gfx9 <bits<10> op> { 888 let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in { 889 defm NAME : VOP1_Real_e32e64_vi <op>; 890 } 891 892 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 893 def _sdwa_gfx9 : 894 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 895 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 896 897 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 898 def _dpp_gfx9 : 899 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, 900 VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; 901 902} 903 904defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>; 905 906//===----------------------------------------------------------------------===// 907// GFX10 908//===----------------------------------------------------------------------===// 909 910let OtherPredicates = [isGFX10Plus] in { 911def : GCNPat < 912 (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)), 913 (V_MOV_B32_dpp8_gfx10 VGPR_32:$src, VGPR_32:$src, 914 (as_i32timm $dpp8), (i32 DPP8Mode.FI_0)) 915>; 916} // End OtherPredicates = [isGFX10Plus] 917