1//===-- VOP1Instructions.td - Vector Instruction Defintions ---------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10// VOP1 Classes 11//===----------------------------------------------------------------------===// 12 13class VOP1e <bits<8> op, VOPProfile P> : Enc32 { 14 bits<8> vdst; 15 bits<9> src0; 16 17 let Inst{8-0} = !if(P.HasSrc0, src0{8-0}, ?); 18 let Inst{16-9} = op; 19 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 20 let Inst{31-25} = 0x3f; //encoding 21} 22 23class VOP1_SDWAe <bits<8> op, VOPProfile P> : VOP_SDWAe <P> { 24 bits<8> vdst; 25 26 let Inst{8-0} = 0xf9; // sdwa 27 let Inst{16-9} = op; 28 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 29 let Inst{31-25} = 0x3f; // encoding 30} 31 32class VOP1_SDWA9Ae <bits<8> op, VOPProfile P> : VOP_SDWA9Ae <P> { 33 bits<8> vdst; 34 35 let Inst{8-0} = 0xf9; // sdwa 36 let Inst{16-9} = op; 37 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 38 let Inst{31-25} = 0x3f; // encoding 39} 40 41class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP1Only = 0> : 42 VOP_Pseudo <opName, !if(VOP1Only, "", "_e32"), P, P.Outs32, P.Ins32, "", pattern> { 43 44 let AsmOperands = P.Asm32; 45 46 let Size = 4; 47 let mayLoad = 0; 48 let mayStore = 0; 49 let hasSideEffects = 0; 50 51 let VOP1 = 1; 52 let VALU = 1; 53 let Uses = [EXEC]; 54 55 let AsmVariantName = AMDGPUAsmVariants.Default; 56} 57 58class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily> : 59 InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>, 60 SIMCInstr <ps.PseudoInstr, EncodingFamily> { 61 62 let isPseudo = 0; 63 let isCodeGenOnly = 0; 64 65 let Constraints = ps.Constraints; 66 let DisableEncoding = ps.DisableEncoding; 67 68 // copy relevant pseudo op flags 69 let SubtargetPredicate = ps.SubtargetPredicate; 70 let AsmMatchConverter = ps.AsmMatchConverter; 71 let AsmVariantName = ps.AsmVariantName; 72 let Constraints = ps.Constraints; 73 let DisableEncoding = ps.DisableEncoding; 74 let TSFlags = ps.TSFlags; 75 let UseNamedOperandTable = ps.UseNamedOperandTable; 76 let Uses = ps.Uses; 77 let Defs = ps.Defs; 78} 79 80class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 81 VOP_SDWA_Pseudo <OpName, P, pattern> { 82 let AsmMatchConverter = "cvtSdwaVOP1"; 83} 84 85class VOP1_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 86 VOP_DPP_Pseudo <OpName, P, pattern> { 87} 88 89class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { 90 list<dag> ret = 91 !if(P.HasModifiers, 92 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, 93 i32:$src0_modifiers, 94 i1:$clamp, i32:$omod))))], 95 !if(P.HasOMod, 96 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3OMods P.Src0VT:$src0, 97 i1:$clamp, i32:$omod))))], 98 [(set P.DstVT:$vdst, (node P.Src0VT:$src0))] 99 ) 100 ); 101} 102 103multiclass VOP1Inst <string opName, VOPProfile P, 104 SDPatternOperator node = null_frag> { 105 def _e32 : VOP1_Pseudo <opName, P>; 106 def _e64 : VOP3_Pseudo <opName, P, getVOP1Pat64<node, P>.ret>; 107 108 foreach _ = BoolToList<P.HasExtSDWA>.ret in 109 def _sdwa : VOP1_SDWA_Pseudo <opName, P>; 110 111 foreach _ = BoolToList<P.HasExtDPP>.ret in 112 def _dpp : VOP1_DPP_Pseudo <opName, P>; 113 114 def : MnemonicAlias<opName#"_e32", opName>, LetDummies; 115 def : MnemonicAlias<opName#"_e64", opName>, LetDummies; 116 117 foreach _ = BoolToList<P.HasExtSDWA>.ret in 118 def : MnemonicAlias<opName#"_sdwa", opName>, LetDummies; 119 120 foreach _ = BoolToList<P.HasExtDPP>.ret in 121 def : MnemonicAlias<opName#"_dpp", opName>, LetDummies; 122} 123 124// Special profile for instructions which have clamp 125// and output modifiers (but have no input modifiers) 126class VOPProfileI2F<ValueType dstVt, ValueType srcVt> : 127 VOPProfile<[dstVt, srcVt, untyped, untyped]> { 128 129 let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod); 130 let Asm64 = "$vdst, $src0$clamp$omod"; 131 132 let HasModifiers = 0; 133 let HasClamp = 1; 134 let HasOMod = 1; 135} 136 137def VOP1_F64_I32 : VOPProfileI2F <f64, i32>; 138def VOP1_F32_I32 : VOPProfileI2F <f32, i32>; 139def VOP1_F16_I16 : VOPProfileI2F <f16, i16>; 140 141//===----------------------------------------------------------------------===// 142// VOP1 Instructions 143//===----------------------------------------------------------------------===// 144 145let VOPAsmPrefer32Bit = 1 in { 146defm V_NOP : VOP1Inst <"v_nop", VOP_NONE>; 147} 148 149let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in { 150defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOP_I32_I32>; 151} // End isMoveImm = 1 152 153// FIXME: Specify SchedRW for READFIRSTLANE_B32 154// TODO: Make profile for this, there is VOP3 encoding also 155def V_READFIRSTLANE_B32 : 156 InstSI <(outs SReg_32:$vdst), 157 (ins VRegOrLds_32:$src0), 158 "v_readfirstlane_b32 $vdst, $src0", 159 [(set i32:$vdst, (int_amdgcn_readfirstlane (i32 VRegOrLds_32:$src0)))]>, 160 Enc32 { 161 162 let isCodeGenOnly = 0; 163 let UseNamedOperandTable = 1; 164 165 let Size = 4; 166 let mayLoad = 0; 167 let mayStore = 0; 168 let hasSideEffects = 0; 169 170 let VOP1 = 1; 171 let VALU = 1; 172 let Uses = [EXEC]; 173 let isConvergent = 1; 174 175 bits<8> vdst; 176 bits<9> src0; 177 178 let Inst{8-0} = src0; 179 let Inst{16-9} = 0x2; 180 let Inst{24-17} = vdst; 181 let Inst{31-25} = 0x3f; //encoding 182} 183 184let SchedRW = [WriteDoubleCvt] in { 185defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>; 186defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>; 187defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>; 188defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>; 189defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>; 190defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>; 191} // End SchedRW = [WriteDoubleCvt] 192 193let SchedRW = [WriteQuarterRate32] in { 194defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>; 195defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>; 196defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>; 197defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>; 198let FPDPRounding = 1 in { 199defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>; 200} // End FPDPRounding = 1 201defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>; 202defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>; 203defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>; 204defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>; 205} // End SchedRW = [WriteQuarterRate32] 206 207defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>; 208defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>; 209defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP1_F32_I32, AMDGPUcvt_f32_ubyte2>; 210defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f32_ubyte3>; 211 212defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>; 213defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>; 214defm V_CEIL_F32 : VOP1Inst <"v_ceil_f32", VOP_F32_F32, fceil>; 215defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, frint>; 216defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>; 217 218let SchedRW = [WriteQuarterRate32] in { 219defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, fexp2>; 220defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, flog2>; 221defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>; 222defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32, AMDGPUrcp_iflag>; 223defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>; 224defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, fsqrt>; 225} // End SchedRW = [WriteQuarterRate32] 226 227let SchedRW = [WriteDouble] in { 228defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64, AMDGPUrcp>; 229defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64, AMDGPUrsq>; 230} // End SchedRW = [WriteDouble]; 231 232let SchedRW = [WriteDouble] in { 233defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, fsqrt>; 234} // End SchedRW = [WriteDouble] 235 236let SchedRW = [WriteQuarterRate32] in { 237defm V_SIN_F32 : VOP1Inst <"v_sin_f32", VOP_F32_F32, AMDGPUsin>; 238defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>; 239} // End SchedRW = [WriteQuarterRate32] 240 241defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>; 242defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32, bitreverse>; 243defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32, AMDGPUffbh_u32>; 244defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32>; 245defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32, AMDGPUffbh_i32>; 246 247let SchedRW = [WriteDoubleAdd] in { 248defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64, int_amdgcn_frexp_exp>; 249defm V_FREXP_MANT_F64 : VOP1Inst <"v_frexp_mant_f64", VOP_F64_F64, int_amdgcn_frexp_mant>; 250let FPDPRounding = 1 in { 251defm V_FRACT_F64 : VOP1Inst <"v_fract_f64", VOP_F64_F64, AMDGPUfract>; 252} // End FPDPRounding = 1 253} // End SchedRW = [WriteDoubleAdd] 254 255defm V_FREXP_EXP_I32_F32 : VOP1Inst <"v_frexp_exp_i32_f32", VOP_I32_F32, int_amdgcn_frexp_exp>; 256defm V_FREXP_MANT_F32 : VOP1Inst <"v_frexp_mant_f32", VOP_F32_F32, int_amdgcn_frexp_mant>; 257 258let VOPAsmPrefer32Bit = 1 in { 259defm V_CLREXCP : VOP1Inst <"v_clrexcp", VOP_NO_EXT<VOP_NONE>>; 260} 261 262// Restrict src0 to be VGPR 263def VOP_MOVRELS : VOPProfile<[i32, i32, untyped, untyped]> { 264 let Src0RC32 = VRegSrc_32; 265 let Src0RC64 = VRegSrc_32; 266} 267 268// Special case because there are no true output operands. Hack vdst 269// to be a src operand. The custom inserter must add a tied implicit 270// def and use of the super register since there seems to be no way to 271// add an implicit def of a virtual register in tablegen. 272class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, untyped]> { 273 let Src0RC32 = VOPDstOperand<VGPR_32>; 274 let Src0RC64 = VOPDstOperand<VGPR_32>; 275 276 let Outs = (outs); 277 let Ins32 = (ins Src0RC32:$vdst, Src1RC:$src0); 278 let Ins64 = (ins Src0RC64:$vdst, Src1RC:$src0); 279 let Asm32 = getAsm32<1, 1>.ret; 280 let Asm64 = getAsm64<1, 1, 0, 0, 1>.ret; 281 282 let OutsSDWA = (outs Src0RC32:$vdst); 283 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 284 clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, 285 src0_sel:$src0_sel); 286 let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret; 287 288 let OutsDPP = (outs Src0RC32:$vdst); 289 let InsDPP16 = (ins Src0RC32:$old, Src0RC32:$src0, 290 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 291 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl, FI:$fi); 292 let AsmDPP16 = getAsmDPP16<1, 1, 0>.ret; 293 294 let OutsDPP8 = (outs Src0RC32:$vdst); 295 let InsDPP8 = (ins Src0RC32:$old, Src0RC32:$src0, dpp8:$dpp8, FI:$fi); 296 let AsmDPP8 = getAsmDPP8<1, 1, 0>.ret; 297 298 let HasDst = 0; 299 let EmitDst = 1; // force vdst emission 300} 301 302def VOP_MOVRELD : VOP_MOVREL<VSrc_b32>; 303def VOP_MOVRELSD : VOP_MOVREL<VRegSrc_32>; 304 305let SubtargetPredicate = HasMovrel, Uses = [M0, EXEC] in { 306 // v_movreld_b32 is a special case because the destination output 307 // register is really a source. It isn't actually read (but may be 308 // written), and is only to provide the base register to start 309 // indexing from. Tablegen seems to not let you define an implicit 310 // virtual register output for the super register being written into, 311 // so this must have an implicit def of the register added to it. 312defm V_MOVRELD_B32 : VOP1Inst <"v_movreld_b32", VOP_MOVRELD>; 313defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_MOVRELS>; 314defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_MOVRELSD>; 315} // End Uses = [M0, EXEC] 316 317defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>; 318 319let SubtargetPredicate = isGFX6GFX7 in { 320 let SchedRW = [WriteQuarterRate32] in { 321 defm V_LOG_CLAMP_F32 : 322 VOP1Inst<"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>; 323 defm V_RCP_CLAMP_F32 : 324 VOP1Inst<"v_rcp_clamp_f32", VOP_F32_F32>; 325 defm V_RCP_LEGACY_F32 : 326 VOP1Inst<"v_rcp_legacy_f32", VOP_F32_F32, AMDGPUrcp_legacy>; 327 defm V_RSQ_CLAMP_F32 : 328 VOP1Inst<"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>; 329 defm V_RSQ_LEGACY_F32 : 330 VOP1Inst<"v_rsq_legacy_f32", VOP_F32_F32, AMDGPUrsq_legacy>; 331 } // End SchedRW = [WriteQuarterRate32] 332 333 let SchedRW = [WriteDouble] in { 334 defm V_RCP_CLAMP_F64 : 335 VOP1Inst<"v_rcp_clamp_f64", VOP_F64_F64>; 336 defm V_RSQ_CLAMP_F64 : 337 VOP1Inst<"v_rsq_clamp_f64", VOP_F64_F64, AMDGPUrsq_clamp>; 338 } // End SchedRW = [WriteDouble] 339} // End SubtargetPredicate = isGFX6GFX7 340 341let SubtargetPredicate = isGFX7GFX8GFX9 in { 342 let SchedRW = [WriteQuarterRate32] in { 343 defm V_LOG_LEGACY_F32 : VOP1Inst<"v_log_legacy_f32", VOP_F32_F32>; 344 defm V_EXP_LEGACY_F32 : VOP1Inst<"v_exp_legacy_f32", VOP_F32_F32>; 345 } // End SchedRW = [WriteQuarterRate32] 346} // End SubtargetPredicate = isGFX7GFX8GFX9 347 348let SubtargetPredicate = isGFX7Plus in { 349 let SchedRW = [WriteDoubleAdd] in { 350 defm V_TRUNC_F64 : VOP1Inst<"v_trunc_f64", VOP_F64_F64, ftrunc>; 351 defm V_CEIL_F64 : VOP1Inst<"v_ceil_f64", VOP_F64_F64, fceil>; 352 defm V_RNDNE_F64 : VOP1Inst<"v_rndne_f64", VOP_F64_F64, frint>; 353 defm V_FLOOR_F64 : VOP1Inst<"v_floor_f64", VOP_F64_F64, ffloor>; 354 } // End SchedRW = [WriteDoubleAdd] 355} // End SubtargetPredicate = isGFX7Plus 356 357let SubtargetPredicate = Has16BitInsts in { 358 359let FPDPRounding = 1 in { 360defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>; 361defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>; 362} // End FPDPRounding = 1 363defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16, fp_to_uint>; 364defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16, fp_to_sint>; 365let SchedRW = [WriteQuarterRate32] in { 366defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>; 367defm V_SQRT_F16 : VOP1Inst <"v_sqrt_f16", VOP_F16_F16, fsqrt>; 368defm V_RSQ_F16 : VOP1Inst <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>; 369defm V_LOG_F16 : VOP1Inst <"v_log_f16", VOP_F16_F16, flog2>; 370defm V_EXP_F16 : VOP1Inst <"v_exp_f16", VOP_F16_F16, fexp2>; 371defm V_SIN_F16 : VOP1Inst <"v_sin_f16", VOP_F16_F16, AMDGPUsin>; 372defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; 373} // End SchedRW = [WriteQuarterRate32] 374defm V_FREXP_MANT_F16 : VOP1Inst <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>; 375defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16, int_amdgcn_frexp_exp>; 376defm V_FLOOR_F16 : VOP1Inst <"v_floor_f16", VOP_F16_F16, ffloor>; 377defm V_CEIL_F16 : VOP1Inst <"v_ceil_f16", VOP_F16_F16, fceil>; 378defm V_TRUNC_F16 : VOP1Inst <"v_trunc_f16", VOP_F16_F16, ftrunc>; 379defm V_RNDNE_F16 : VOP1Inst <"v_rndne_f16", VOP_F16_F16, frint>; 380let FPDPRounding = 1 in { 381defm V_FRACT_F16 : VOP1Inst <"v_fract_f16", VOP_F16_F16, AMDGPUfract>; 382} // End FPDPRounding = 1 383 384} 385 386let OtherPredicates = [Has16BitInsts] in { 387 388def : GCNPat< 389 (f32 (f16_to_fp i16:$src)), 390 (V_CVT_F32_F16_e32 $src) 391>; 392 393def : GCNPat< 394 (i16 (AMDGPUfp_to_f16 f32:$src)), 395 (V_CVT_F16_F32_e32 $src) 396>; 397 398} 399 400def VOP_SWAP_I32 : VOPProfile<[i32, i32, i32, untyped]> { 401 let Outs32 = (outs VGPR_32:$vdst, VGPR_32:$vdst1); 402 let Ins32 = (ins VGPR_32:$src0, VGPR_32:$src1); 403 let Outs64 = Outs32; 404 let Asm32 = " $vdst, $src0"; 405 let Asm64 = ""; 406 let Ins64 = (ins); 407} 408 409let SubtargetPredicate = isGFX9Plus in { 410 def V_SWAP_B32 : VOP1_Pseudo<"v_swap_b32", VOP_SWAP_I32, [], 1> { 411 let Constraints = "$vdst = $src1, $vdst1 = $src0"; 412 let DisableEncoding = "$vdst1,$src1"; 413 let SchedRW = [Write64Bit, Write64Bit]; 414 } 415 416 defm V_SAT_PK_U8_I16 : VOP1Inst<"v_sat_pk_u8_i16", VOP_I32_I32>; 417 defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16>; 418 defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16>; 419} // End SubtargetPredicate = isGFX9Plus 420 421let SubtargetPredicate = isGFX9Only in { 422 defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>; 423} // End SubtargetPredicate = isGFX9Only 424 425let SubtargetPredicate = isGFX10Plus in { 426 defm V_PIPEFLUSH : VOP1Inst<"v_pipeflush", VOP_NONE>; 427 428 let Uses = [M0] in { 429 defm V_MOVRELSD_2_B32 : 430 VOP1Inst<"v_movrelsd_2_b32", VOP_MOVRELSD>; 431 432 def V_SWAPREL_B32 : VOP1_Pseudo<"v_swaprel_b32", VOP_SWAP_I32, [], 1> { 433 let Constraints = "$vdst = $src1, $vdst1 = $src0"; 434 let DisableEncoding = "$vdst1,$src1"; 435 let SchedRW = [Write64Bit, Write64Bit]; 436 } 437 } // End Uses = [M0] 438} // End SubtargetPredicate = isGFX10Plus 439 440//===----------------------------------------------------------------------===// 441// Target-specific instruction encodings. 442//===----------------------------------------------------------------------===// 443 444class VOP1_DPP<bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP16 = 0> : 445 VOP_DPP<ps.OpName, p, isDPP16> { 446 let hasSideEffects = ps.hasSideEffects; 447 let Defs = ps.Defs; 448 let SchedRW = ps.SchedRW; 449 let Uses = ps.Uses; 450 451 bits<8> vdst; 452 let Inst{8-0} = 0xfa; 453 let Inst{16-9} = op; 454 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 455 let Inst{31-25} = 0x3f; 456} 457 458class VOP1_DPP16<bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl> : 459 VOP1_DPP<op, ps, p, 1>, 460 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX10> { 461 let AssemblerPredicate = !if(p.HasExt, HasDPP16, DisableInst); 462 let SubtargetPredicate = HasDPP16; 463} 464 465class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> : 466 VOP_DPP8<ps.OpName, p> { 467 let hasSideEffects = ps.hasSideEffects; 468 let Defs = ps.Defs; 469 let SchedRW = ps.SchedRW; 470 let Uses = ps.Uses; 471 472 bits<8> vdst; 473 let Inst{8-0} = fi; 474 let Inst{16-9} = op; 475 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 476 let Inst{31-25} = 0x3f; 477 478 let AssemblerPredicate = !if(p.HasExt, HasDPP8, DisableInst); 479 let SubtargetPredicate = HasDPP8; 480} 481 482//===----------------------------------------------------------------------===// 483// GFX10. 484//===----------------------------------------------------------------------===// 485 486let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { 487 multiclass VOP1Only_Real_gfx10<bits<9> op> { 488 def _gfx10 : 489 VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.GFX10>, 490 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 491 } 492 multiclass VOP1_Real_e32_gfx10<bits<9> op> { 493 def _e32_gfx10 : 494 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>, 495 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 496 } 497 multiclass VOP1_Real_e64_gfx10<bits<9> op> { 498 def _e64_gfx10 : 499 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 500 VOP3e_gfx10<{0, 1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 501 } 502 multiclass VOP1_Real_sdwa_gfx10<bits<9> op> { 503 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 504 def _sdwa_gfx10 : 505 VOP_SDWA10_Real<!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 506 VOP1_SDWA9Ae<op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 507 let DecoderNamespace = "SDWA10"; 508 } 509 } 510 multiclass VOP1_Real_dpp_gfx10<bits<9> op> { 511 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 512 def _dpp_gfx10 : VOP1_DPP16<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")> { 513 let DecoderNamespace = "SDWA10"; 514 } 515 } 516 multiclass VOP1_Real_dpp8_gfx10<bits<9> op> { 517 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 518 def _dpp8_gfx10 : VOP1_DPP8<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")> { 519 let DecoderNamespace = "DPP8"; 520 } 521 } 522} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" 523 524multiclass VOP1_Real_gfx10<bits<9> op> : 525 VOP1_Real_e32_gfx10<op>, VOP1_Real_e64_gfx10<op>, 526 VOP1_Real_sdwa_gfx10<op>, VOP1_Real_dpp_gfx10<op>, 527 VOP1_Real_dpp8_gfx10<op>; 528 529defm V_PIPEFLUSH : VOP1_Real_gfx10<0x01b>; 530defm V_MOVRELSD_2_B32 : VOP1_Real_gfx10<0x048>; 531defm V_CVT_F16_U16 : VOP1_Real_gfx10<0x050>; 532defm V_CVT_F16_I16 : VOP1_Real_gfx10<0x051>; 533defm V_CVT_U16_F16 : VOP1_Real_gfx10<0x052>; 534defm V_CVT_I16_F16 : VOP1_Real_gfx10<0x053>; 535defm V_RCP_F16 : VOP1_Real_gfx10<0x054>; 536defm V_SQRT_F16 : VOP1_Real_gfx10<0x055>; 537defm V_RSQ_F16 : VOP1_Real_gfx10<0x056>; 538defm V_LOG_F16 : VOP1_Real_gfx10<0x057>; 539defm V_EXP_F16 : VOP1_Real_gfx10<0x058>; 540defm V_FREXP_MANT_F16 : VOP1_Real_gfx10<0x059>; 541defm V_FREXP_EXP_I16_F16 : VOP1_Real_gfx10<0x05a>; 542defm V_FLOOR_F16 : VOP1_Real_gfx10<0x05b>; 543defm V_CEIL_F16 : VOP1_Real_gfx10<0x05c>; 544defm V_TRUNC_F16 : VOP1_Real_gfx10<0x05d>; 545defm V_RNDNE_F16 : VOP1_Real_gfx10<0x05e>; 546defm V_FRACT_F16 : VOP1_Real_gfx10<0x05f>; 547defm V_SIN_F16 : VOP1_Real_gfx10<0x060>; 548defm V_COS_F16 : VOP1_Real_gfx10<0x061>; 549defm V_SAT_PK_U8_I16 : VOP1_Real_gfx10<0x062>; 550defm V_CVT_NORM_I16_F16 : VOP1_Real_gfx10<0x063>; 551defm V_CVT_NORM_U16_F16 : VOP1_Real_gfx10<0x064>; 552 553defm V_SWAP_B32 : VOP1Only_Real_gfx10<0x065>; 554defm V_SWAPREL_B32 : VOP1Only_Real_gfx10<0x068>; 555 556//===----------------------------------------------------------------------===// 557// GFX7, GFX10. 558//===----------------------------------------------------------------------===// 559 560let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in { 561 multiclass VOP1_Real_e32_gfx7<bits<9> op> { 562 def _e32_gfx7 : 563 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>, 564 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 565 } 566 multiclass VOP1_Real_e64_gfx7<bits<9> op> { 567 def _e64_gfx7 : 568 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 569 VOP3e_gfx6_gfx7<{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 570 } 571} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" 572 573multiclass VOP1_Real_gfx7<bits<9> op> : 574 VOP1_Real_e32_gfx7<op>, VOP1_Real_e64_gfx7<op>; 575 576multiclass VOP1_Real_gfx7_gfx10<bits<9> op> : 577 VOP1_Real_gfx7<op>, VOP1_Real_gfx10<op>; 578 579defm V_LOG_LEGACY_F32 : VOP1_Real_gfx7<0x045>; 580defm V_EXP_LEGACY_F32 : VOP1_Real_gfx7<0x046>; 581 582defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10<0x017>; 583defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10<0x018>; 584defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10<0x019>; 585defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10<0x01a>; 586 587//===----------------------------------------------------------------------===// 588// GFX6, GFX7, GFX10. 589//===----------------------------------------------------------------------===// 590 591let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 592 multiclass VOP1_Real_e32_gfx6_gfx7<bits<9> op> { 593 def _e32_gfx6_gfx7 : 594 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>, 595 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 596 } 597 multiclass VOP1_Real_e64_gfx6_gfx7<bits<9> op> { 598 def _e64_gfx6_gfx7 : 599 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 600 VOP3e_gfx6_gfx7<{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 601 } 602} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 603 604multiclass VOP1_Real_gfx6_gfx7<bits<9> op> : 605 VOP1_Real_e32_gfx6_gfx7<op>, VOP1_Real_e64_gfx6_gfx7<op>; 606 607multiclass VOP1_Real_gfx6_gfx7_gfx10<bits<9> op> : 608 VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10<op>; 609 610defm V_LOG_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x026>; 611defm V_RCP_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x028>; 612defm V_RCP_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x029>; 613defm V_RSQ_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x02c>; 614defm V_RSQ_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x02d>; 615defm V_RCP_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x030>; 616defm V_RSQ_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x032>; 617 618defm V_NOP : VOP1_Real_gfx6_gfx7_gfx10<0x000>; 619defm V_MOV_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x001>; 620defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x003>; 621defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x004>; 622defm V_CVT_F32_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x005>; 623defm V_CVT_F32_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x006>; 624defm V_CVT_U32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x007>; 625defm V_CVT_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x008>; 626defm V_MOV_FED_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x009>; 627defm V_CVT_F16_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00a>; 628defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10<0x00b>; 629defm V_CVT_RPI_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00c>; 630defm V_CVT_FLR_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00d>; 631defm V_CVT_OFF_F32_I4 : VOP1_Real_gfx6_gfx7_gfx10<0x00e>; 632defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x00f>; 633defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x010>; 634defm V_CVT_F32_UBYTE0 : VOP1_Real_gfx6_gfx7_gfx10<0x011>; 635defm V_CVT_F32_UBYTE1 : VOP1_Real_gfx6_gfx7_gfx10<0x012>; 636defm V_CVT_F32_UBYTE2 : VOP1_Real_gfx6_gfx7_gfx10<0x013>; 637defm V_CVT_F32_UBYTE3 : VOP1_Real_gfx6_gfx7_gfx10<0x014>; 638defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x015>; 639defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x016>; 640defm V_FRACT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x020>; 641defm V_TRUNC_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x021>; 642defm V_CEIL_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x022>; 643defm V_RNDNE_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x023>; 644defm V_FLOOR_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x024>; 645defm V_EXP_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x025>; 646defm V_LOG_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x027>; 647defm V_RCP_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x02a>; 648defm V_RCP_IFLAG_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x02b>; 649defm V_RSQ_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x02e>; 650defm V_RCP_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x02f>; 651defm V_RSQ_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x031>; 652defm V_SQRT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x033>; 653defm V_SQRT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x034>; 654defm V_SIN_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x035>; 655defm V_COS_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x036>; 656defm V_NOT_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x037>; 657defm V_BFREV_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x038>; 658defm V_FFBH_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x039>; 659defm V_FFBL_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x03a>; 660defm V_FFBH_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x03b>; 661defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03c>; 662defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03d>; 663defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03e>; 664defm V_FREXP_EXP_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x03f>; 665defm V_FREXP_MANT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x040>; 666defm V_CLREXCP : VOP1_Real_gfx6_gfx7_gfx10<0x041>; 667defm V_MOVRELD_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x042>; 668defm V_MOVRELS_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x043>; 669defm V_MOVRELSD_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x044>; 670 671//===----------------------------------------------------------------------===// 672// GFX8, GFX9 (VI). 673//===----------------------------------------------------------------------===// 674 675class VOP1_DPPe <bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile P = ps.Pfl> : 676 VOP_DPPe <P> { 677 bits<8> vdst; 678 let Inst{8-0} = 0xfa; // dpp 679 let Inst{16-9} = op; 680 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 681 let Inst{31-25} = 0x3f; //encoding 682} 683 684multiclass VOP1Only_Real_vi <bits<10> op> { 685 let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 686 def _vi : 687 VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.VI>, 688 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 689 } 690} 691 692multiclass VOP1_Real_e32e64_vi <bits<10> op> { 693 let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 694 def _e32_vi : 695 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>, 696 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 697 def _e64_vi : 698 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 699 VOP3e_vi <!add(0x140, op), !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 700 } 701} 702 703multiclass VOP1_Real_vi <bits<10> op> { 704 defm NAME : VOP1_Real_e32e64_vi <op>; 705 706 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA>.ret in 707 def _sdwa_vi : 708 VOP_SDWA_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 709 VOP1_SDWAe <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 710 711 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 712 def _sdwa_gfx9 : 713 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 714 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 715 716 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 717 def _dpp_vi : 718 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>, 719 VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; 720} 721 722defm V_NOP : VOP1_Real_vi <0x0>; 723defm V_MOV_B32 : VOP1_Real_vi <0x1>; 724defm V_CVT_I32_F64 : VOP1_Real_vi <0x3>; 725defm V_CVT_F64_I32 : VOP1_Real_vi <0x4>; 726defm V_CVT_F32_I32 : VOP1_Real_vi <0x5>; 727defm V_CVT_F32_U32 : VOP1_Real_vi <0x6>; 728defm V_CVT_U32_F32 : VOP1_Real_vi <0x7>; 729defm V_CVT_I32_F32 : VOP1_Real_vi <0x8>; 730defm V_MOV_FED_B32 : VOP1_Real_vi <0x9>; 731defm V_CVT_F16_F32 : VOP1_Real_vi <0xa>; 732defm V_CVT_F32_F16 : VOP1_Real_vi <0xb>; 733defm V_CVT_RPI_I32_F32 : VOP1_Real_vi <0xc>; 734defm V_CVT_FLR_I32_F32 : VOP1_Real_vi <0xd>; 735defm V_CVT_OFF_F32_I4 : VOP1_Real_vi <0xe>; 736defm V_CVT_F32_F64 : VOP1_Real_vi <0xf>; 737defm V_CVT_F64_F32 : VOP1_Real_vi <0x10>; 738defm V_CVT_F32_UBYTE0 : VOP1_Real_vi <0x11>; 739defm V_CVT_F32_UBYTE1 : VOP1_Real_vi <0x12>; 740defm V_CVT_F32_UBYTE2 : VOP1_Real_vi <0x13>; 741defm V_CVT_F32_UBYTE3 : VOP1_Real_vi <0x14>; 742defm V_CVT_U32_F64 : VOP1_Real_vi <0x15>; 743defm V_CVT_F64_U32 : VOP1_Real_vi <0x16>; 744defm V_FRACT_F32 : VOP1_Real_vi <0x1b>; 745defm V_TRUNC_F32 : VOP1_Real_vi <0x1c>; 746defm V_CEIL_F32 : VOP1_Real_vi <0x1d>; 747defm V_RNDNE_F32 : VOP1_Real_vi <0x1e>; 748defm V_FLOOR_F32 : VOP1_Real_vi <0x1f>; 749defm V_EXP_F32 : VOP1_Real_vi <0x20>; 750defm V_LOG_F32 : VOP1_Real_vi <0x21>; 751defm V_RCP_F32 : VOP1_Real_vi <0x22>; 752defm V_RCP_IFLAG_F32 : VOP1_Real_vi <0x23>; 753defm V_RSQ_F32 : VOP1_Real_vi <0x24>; 754defm V_RCP_F64 : VOP1_Real_vi <0x25>; 755defm V_RSQ_F64 : VOP1_Real_vi <0x26>; 756defm V_SQRT_F32 : VOP1_Real_vi <0x27>; 757defm V_SQRT_F64 : VOP1_Real_vi <0x28>; 758defm V_SIN_F32 : VOP1_Real_vi <0x29>; 759defm V_COS_F32 : VOP1_Real_vi <0x2a>; 760defm V_NOT_B32 : VOP1_Real_vi <0x2b>; 761defm V_BFREV_B32 : VOP1_Real_vi <0x2c>; 762defm V_FFBH_U32 : VOP1_Real_vi <0x2d>; 763defm V_FFBL_B32 : VOP1_Real_vi <0x2e>; 764defm V_FFBH_I32 : VOP1_Real_vi <0x2f>; 765defm V_FREXP_EXP_I32_F64 : VOP1_Real_vi <0x30>; 766defm V_FREXP_MANT_F64 : VOP1_Real_vi <0x31>; 767defm V_FRACT_F64 : VOP1_Real_vi <0x32>; 768defm V_FREXP_EXP_I32_F32 : VOP1_Real_vi <0x33>; 769defm V_FREXP_MANT_F32 : VOP1_Real_vi <0x34>; 770defm V_CLREXCP : VOP1_Real_vi <0x35>; 771defm V_MOVRELD_B32 : VOP1_Real_e32e64_vi <0x36>; 772defm V_MOVRELS_B32 : VOP1_Real_e32e64_vi <0x37>; 773defm V_MOVRELSD_B32 : VOP1_Real_e32e64_vi <0x38>; 774defm V_TRUNC_F64 : VOP1_Real_vi <0x17>; 775defm V_CEIL_F64 : VOP1_Real_vi <0x18>; 776defm V_FLOOR_F64 : VOP1_Real_vi <0x1A>; 777defm V_RNDNE_F64 : VOP1_Real_vi <0x19>; 778defm V_LOG_LEGACY_F32 : VOP1_Real_vi <0x4c>; 779defm V_EXP_LEGACY_F32 : VOP1_Real_vi <0x4b>; 780defm V_CVT_F16_U16 : VOP1_Real_vi <0x39>; 781defm V_CVT_F16_I16 : VOP1_Real_vi <0x3a>; 782defm V_CVT_U16_F16 : VOP1_Real_vi <0x3b>; 783defm V_CVT_I16_F16 : VOP1_Real_vi <0x3c>; 784defm V_RCP_F16 : VOP1_Real_vi <0x3d>; 785defm V_SQRT_F16 : VOP1_Real_vi <0x3e>; 786defm V_RSQ_F16 : VOP1_Real_vi <0x3f>; 787defm V_LOG_F16 : VOP1_Real_vi <0x40>; 788defm V_EXP_F16 : VOP1_Real_vi <0x41>; 789defm V_FREXP_MANT_F16 : VOP1_Real_vi <0x42>; 790defm V_FREXP_EXP_I16_F16 : VOP1_Real_vi <0x43>; 791defm V_FLOOR_F16 : VOP1_Real_vi <0x44>; 792defm V_CEIL_F16 : VOP1_Real_vi <0x45>; 793defm V_TRUNC_F16 : VOP1_Real_vi <0x46>; 794defm V_RNDNE_F16 : VOP1_Real_vi <0x47>; 795defm V_FRACT_F16 : VOP1_Real_vi <0x48>; 796defm V_SIN_F16 : VOP1_Real_vi <0x49>; 797defm V_COS_F16 : VOP1_Real_vi <0x4a>; 798defm V_SWAP_B32 : VOP1Only_Real_vi <0x51>; 799 800defm V_SAT_PK_U8_I16 : VOP1_Real_vi<0x4f>; 801defm V_CVT_NORM_I16_F16 : VOP1_Real_vi<0x4d>; 802defm V_CVT_NORM_U16_F16 : VOP1_Real_vi<0x4e>; 803 804// Copy of v_mov_b32 with $vdst as a use operand for use with VGPR 805// indexing mode. vdst can't be treated as a def for codegen purposes, 806// and an implicit use and def of the super register should be added. 807def V_MOV_B32_indirect : VPseudoInstSI<(outs), 808 (ins getVALUDstForVT<i32>.ret:$vdst, getVOPSrc0ForVT<i32>.ret:$src0)>, 809 PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT<i32>.ret:$vdst, 810 getVOPSrc0ForVT<i32>.ret:$src0)> { 811 let VOP1 = 1; 812 let SubtargetPredicate = isGFX8GFX9; 813} 814 815// This is a pseudo variant of the v_movreld_b32 instruction in which the 816// vector operand appears only twice, once as def and once as use. Using this 817// pseudo avoids problems with the Two Address instructions pass. 818class V_MOVRELD_B32_pseudo<RegisterClass rc> : VPseudoInstSI < 819 (outs rc:$vdst), 820 (ins rc:$vsrc, VSrc_b32:$val, i32imm:$offset)> { 821 let VOP1 = 1; 822 823 let Constraints = "$vsrc = $vdst"; 824 let Uses = [M0, EXEC]; 825 826 let SubtargetPredicate = HasMovrel; 827} 828 829def V_MOVRELD_B32_V1 : V_MOVRELD_B32_pseudo<VGPR_32>; 830def V_MOVRELD_B32_V2 : V_MOVRELD_B32_pseudo<VReg_64>; 831def V_MOVRELD_B32_V4 : V_MOVRELD_B32_pseudo<VReg_128>; 832def V_MOVRELD_B32_V8 : V_MOVRELD_B32_pseudo<VReg_256>; 833def V_MOVRELD_B32_V16 : V_MOVRELD_B32_pseudo<VReg_512>; 834 835let OtherPredicates = [isGFX8Plus] in { 836 837def : GCNPat < 838 (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, timm:$bank_mask, 839 timm:$bound_ctrl)), 840 (V_MOV_B32_dpp $src, $src, (as_i32imm $dpp_ctrl), 841 (as_i32imm $row_mask), (as_i32imm $bank_mask), 842 (as_i1imm $bound_ctrl)) 843>; 844 845def : GCNPat < 846 (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl, timm:$row_mask, 847 timm:$bank_mask, timm:$bound_ctrl)), 848 (V_MOV_B32_dpp $old, $src, (as_i32imm $dpp_ctrl), 849 (as_i32imm $row_mask), (as_i32imm $bank_mask), 850 (as_i1imm $bound_ctrl)) 851>; 852 853} // End OtherPredicates = [isGFX8Plus] 854 855let OtherPredicates = [isGFX8Plus] in { 856def : GCNPat< 857 (i32 (anyext i16:$src)), 858 (COPY $src) 859>; 860 861def : GCNPat< 862 (i64 (anyext i16:$src)), 863 (REG_SEQUENCE VReg_64, 864 (i32 (COPY $src)), sub0, 865 (V_MOV_B32_e32 (i32 0)), sub1) 866>; 867 868def : GCNPat< 869 (i16 (trunc i32:$src)), 870 (COPY $src) 871>; 872 873def : GCNPat < 874 (i16 (trunc i64:$src)), 875 (EXTRACT_SUBREG $src, sub0) 876>; 877 878} // End OtherPredicates = [isGFX8Plus] 879 880//===----------------------------------------------------------------------===// 881// GFX9 882//===----------------------------------------------------------------------===// 883 884multiclass VOP1_Real_gfx9 <bits<10> op> { 885 let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in { 886 defm NAME : VOP1_Real_e32e64_vi <op>; 887 } 888 889 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 890 def _sdwa_gfx9 : 891 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 892 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 893 894 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 895 def _dpp_gfx9 : 896 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, 897 VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; 898 899} 900 901defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>; 902 903//===----------------------------------------------------------------------===// 904// GFX10 905//===----------------------------------------------------------------------===// 906 907let OtherPredicates = [isGFX10Plus] in { 908def : GCNPat < 909 (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)), 910 (V_MOV_B32_dpp8_gfx10 $src, $src, (as_i32imm $dpp8), (i32 DPP8Mode.FI_0)) 911>; 912} // End OtherPredicates = [isGFX10Plus] 913