1//===-- VOP1Instructions.td - Vector Instruction Defintions ---------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10// VOP1 Classes 11//===----------------------------------------------------------------------===// 12 13class VOP1e <bits<8> op, VOPProfile P> : Enc32 { 14 bits<8> vdst; 15 bits<9> src0; 16 17 let Inst{8-0} = !if(P.HasSrc0, src0{8-0}, ?); 18 let Inst{16-9} = op; 19 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 20 let Inst{31-25} = 0x3f; //encoding 21} 22 23class VOP1_SDWAe <bits<8> op, VOPProfile P> : VOP_SDWAe <P> { 24 bits<8> vdst; 25 26 let Inst{8-0} = 0xf9; // sdwa 27 let Inst{16-9} = op; 28 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 29 let Inst{31-25} = 0x3f; // encoding 30} 31 32class VOP1_SDWA9Ae <bits<8> op, VOPProfile P> : VOP_SDWA9Ae <P> { 33 bits<8> vdst; 34 35 let Inst{8-0} = 0xf9; // sdwa 36 let Inst{16-9} = op; 37 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 38 let Inst{31-25} = 0x3f; // encoding 39} 40 41class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP1Only = 0> : 42 VOP_Pseudo <opName, !if(VOP1Only, "", "_e32"), P, P.Outs32, P.Ins32, "", pattern> { 43 44 let AsmOperands = P.Asm32; 45 46 let Size = 4; 47 let mayLoad = 0; 48 let mayStore = 0; 49 let hasSideEffects = 0; 50 51 let VOP1 = 1; 52 let VALU = 1; 53 let Uses = [EXEC]; 54 55 let AsmVariantName = AMDGPUAsmVariants.Default; 56} 57 58class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily> : 59 InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>, 60 SIMCInstr <ps.PseudoInstr, EncodingFamily> { 61 62 let isPseudo = 0; 63 let isCodeGenOnly = 0; 64 65 let Constraints = ps.Constraints; 66 let DisableEncoding = ps.DisableEncoding; 67 68 // copy relevant pseudo op flags 69 let SubtargetPredicate = ps.SubtargetPredicate; 70 let AsmMatchConverter = ps.AsmMatchConverter; 71 let AsmVariantName = ps.AsmVariantName; 72 let Constraints = ps.Constraints; 73 let DisableEncoding = ps.DisableEncoding; 74 let TSFlags = ps.TSFlags; 75 let UseNamedOperandTable = ps.UseNamedOperandTable; 76 let Uses = ps.Uses; 77 let Defs = ps.Defs; 78} 79 80class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 81 VOP_SDWA_Pseudo <OpName, P, pattern> { 82 let AsmMatchConverter = "cvtSdwaVOP1"; 83} 84 85class VOP1_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 86 VOP_DPP_Pseudo <OpName, P, pattern> { 87} 88 89class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { 90 list<dag> ret = 91 !if(P.HasModifiers, 92 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, 93 i32:$src0_modifiers, 94 i1:$clamp, i32:$omod))))], 95 !if(P.HasOMod, 96 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3OMods P.Src0VT:$src0, 97 i1:$clamp, i32:$omod))))], 98 [(set P.DstVT:$vdst, (node P.Src0VT:$src0))] 99 ) 100 ); 101} 102 103multiclass VOP1Inst <string opName, VOPProfile P, 104 SDPatternOperator node = null_frag> { 105 def _e32 : VOP1_Pseudo <opName, P>; 106 def _e64 : VOP3_Pseudo <opName, P, getVOP1Pat64<node, P>.ret>; 107 def _sdwa : VOP1_SDWA_Pseudo <opName, P>; 108 foreach _ = BoolToList<P.HasExtDPP>.ret in 109 def _dpp : VOP1_DPP_Pseudo <opName, P>; 110} 111 112// Special profile for instructions which have clamp 113// and output modifiers (but have no input modifiers) 114class VOPProfileI2F<ValueType dstVt, ValueType srcVt> : 115 VOPProfile<[dstVt, srcVt, untyped, untyped]> { 116 117 let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod); 118 let Asm64 = "$vdst, $src0$clamp$omod"; 119 120 let HasModifiers = 0; 121 let HasClamp = 1; 122 let HasOMod = 1; 123} 124 125def VOP1_F64_I32 : VOPProfileI2F <f64, i32>; 126def VOP1_F32_I32 : VOPProfileI2F <f32, i32>; 127def VOP1_F16_I16 : VOPProfileI2F <f16, i16>; 128 129//===----------------------------------------------------------------------===// 130// VOP1 Instructions 131//===----------------------------------------------------------------------===// 132 133let VOPAsmPrefer32Bit = 1 in { 134defm V_NOP : VOP1Inst <"v_nop", VOP_NONE>; 135} 136 137let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in { 138defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOP_I32_I32>; 139} // End isMoveImm = 1 140 141// FIXME: Specify SchedRW for READFIRSTLANE_B32 142// TODO: Make profile for this, there is VOP3 encoding also 143def V_READFIRSTLANE_B32 : 144 InstSI <(outs SReg_32:$vdst), 145 (ins VRegOrLds_32:$src0), 146 "v_readfirstlane_b32 $vdst, $src0", 147 [(set i32:$vdst, (int_amdgcn_readfirstlane i32:$src0))]>, 148 Enc32 { 149 150 let isCodeGenOnly = 0; 151 let UseNamedOperandTable = 1; 152 153 let Size = 4; 154 let mayLoad = 0; 155 let mayStore = 0; 156 let hasSideEffects = 0; 157 158 let VOP1 = 1; 159 let VALU = 1; 160 let Uses = [EXEC]; 161 let isConvergent = 1; 162 163 bits<8> vdst; 164 bits<9> src0; 165 166 let Inst{8-0} = src0; 167 let Inst{16-9} = 0x2; 168 let Inst{24-17} = vdst; 169 let Inst{31-25} = 0x3f; //encoding 170} 171 172let SchedRW = [WriteDoubleCvt] in { 173defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>; 174defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>; 175defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>; 176defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>; 177defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>; 178defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>; 179} // End SchedRW = [WriteDoubleCvt] 180 181let SchedRW = [WriteQuarterRate32] in { 182defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>; 183defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>; 184defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>; 185defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>; 186let FPDPRounding = 1 in { 187defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>; 188} // End FPDPRounding = 1 189defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>; 190defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>; 191defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>; 192defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>; 193} // End SchedRW = [WriteQuarterRate32] 194 195defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>; 196defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>; 197defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP1_F32_I32, AMDGPUcvt_f32_ubyte2>; 198defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f32_ubyte3>; 199 200defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>; 201defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>; 202defm V_CEIL_F32 : VOP1Inst <"v_ceil_f32", VOP_F32_F32, fceil>; 203defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, frint>; 204defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>; 205 206let SchedRW = [WriteQuarterRate32] in { 207defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, fexp2>; 208defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, flog2>; 209defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>; 210defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32, AMDGPUrcp_iflag>; 211defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>; 212defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, fsqrt>; 213} // End SchedRW = [WriteQuarterRate32] 214 215let SchedRW = [WriteDouble] in { 216defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64, AMDGPUrcp>; 217defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64, AMDGPUrsq>; 218} // End SchedRW = [WriteDouble]; 219 220let SchedRW = [WriteDouble] in { 221defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, fsqrt>; 222} // End SchedRW = [WriteDouble] 223 224let SchedRW = [WriteQuarterRate32] in { 225defm V_SIN_F32 : VOP1Inst <"v_sin_f32", VOP_F32_F32, AMDGPUsin>; 226defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>; 227} // End SchedRW = [WriteQuarterRate32] 228 229defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>; 230defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32>; 231defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32>; 232defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32>; 233defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32>; 234 235let SchedRW = [WriteDoubleAdd] in { 236defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64, int_amdgcn_frexp_exp>; 237defm V_FREXP_MANT_F64 : VOP1Inst <"v_frexp_mant_f64", VOP_F64_F64, int_amdgcn_frexp_mant>; 238let FPDPRounding = 1 in { 239defm V_FRACT_F64 : VOP1Inst <"v_fract_f64", VOP_F64_F64, AMDGPUfract>; 240} // End FPDPRounding = 1 241} // End SchedRW = [WriteDoubleAdd] 242 243defm V_FREXP_EXP_I32_F32 : VOP1Inst <"v_frexp_exp_i32_f32", VOP_I32_F32, int_amdgcn_frexp_exp>; 244defm V_FREXP_MANT_F32 : VOP1Inst <"v_frexp_mant_f32", VOP_F32_F32, int_amdgcn_frexp_mant>; 245 246let VOPAsmPrefer32Bit = 1 in { 247defm V_CLREXCP : VOP1Inst <"v_clrexcp", VOP_NO_EXT<VOP_NONE>>; 248} 249 250// Restrict src0 to be VGPR 251def VOP_I32_VI32_NO_EXT : VOPProfile<[i32, i32, untyped, untyped]> { 252 let Src0RC32 = VRegSrc_32; 253 let Src0RC64 = VRegSrc_32; 254 255 let HasExt = 0; 256 let HasExtDPP = 0; 257 let HasExtSDWA = 0; 258 let HasExtSDWA9 = 0; 259} 260 261// Special case because there are no true output operands. Hack vdst 262// to be a src operand. The custom inserter must add a tied implicit 263// def and use of the super register since there seems to be no way to 264// add an implicit def of a virtual register in tablegen. 265def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> { 266 let Src0RC32 = VOPDstOperand<VGPR_32>; 267 let Src0RC64 = VOPDstOperand<VGPR_32>; 268 269 let Outs = (outs); 270 let Ins32 = (ins Src0RC32:$vdst, VSrc_b32:$src0); 271 let Ins64 = (ins Src0RC64:$vdst, VSrc_b32:$src0); 272 let InsDPP = (ins DstRC:$vdst, DstRC:$old, Src0RC32:$src0, 273 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 274 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 275 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 276 277 let InsSDWA = (ins Src0RC32:$vdst, Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 278 clampmod:$clamp, omod:$omod, dst_sel:$dst_sel, dst_unused:$dst_unused, 279 src0_sel:$src0_sel); 280 281 let Asm32 = getAsm32<1, 1>.ret; 282 let Asm64 = getAsm64<1, 1, 0, 0, 1>.ret; 283 let AsmDPP = getAsmDPP<1, 1, 0>.ret; 284 let AsmDPP16 = getAsmDPP16<1, 1, 0>.ret; 285 let AsmSDWA = getAsmSDWA<1, 1>.ret; 286 let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret; 287 288 let HasExt = 0; 289 let HasExtDPP = 0; 290 let HasExtSDWA = 0; 291 let HasExtSDWA9 = 0; 292 293 let HasDst = 0; 294 let EmitDst = 1; // force vdst emission 295} 296 297let SubtargetPredicate = HasMovrel, Uses = [M0, EXEC] in { 298// v_movreld_b32 is a special case because the destination output 299 // register is really a source. It isn't actually read (but may be 300 // written), and is only to provide the base register to start 301 // indexing from. Tablegen seems to not let you define an implicit 302 // virtual register output for the super register being written into, 303 // so this must have an implicit def of the register added to it. 304defm V_MOVRELD_B32 : VOP1Inst <"v_movreld_b32", VOP_MOVRELD>; 305defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_I32_VI32_NO_EXT>; 306defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_NO_EXT<VOP_I32_I32>>; 307} // End Uses = [M0, EXEC] 308 309defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>; 310 311let SubtargetPredicate = isGFX6GFX7 in { 312 let SchedRW = [WriteQuarterRate32] in { 313 defm V_LOG_CLAMP_F32 : 314 VOP1Inst<"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>; 315 defm V_RCP_CLAMP_F32 : 316 VOP1Inst<"v_rcp_clamp_f32", VOP_F32_F32>; 317 defm V_RCP_LEGACY_F32 : 318 VOP1Inst<"v_rcp_legacy_f32", VOP_F32_F32, AMDGPUrcp_legacy>; 319 defm V_RSQ_CLAMP_F32 : 320 VOP1Inst<"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>; 321 defm V_RSQ_LEGACY_F32 : 322 VOP1Inst<"v_rsq_legacy_f32", VOP_F32_F32, AMDGPUrsq_legacy>; 323 } // End SchedRW = [WriteQuarterRate32] 324 325 let SchedRW = [WriteDouble] in { 326 defm V_RCP_CLAMP_F64 : 327 VOP1Inst<"v_rcp_clamp_f64", VOP_F64_F64>; 328 defm V_RSQ_CLAMP_F64 : 329 VOP1Inst<"v_rsq_clamp_f64", VOP_F64_F64, AMDGPUrsq_clamp>; 330 } // End SchedRW = [WriteDouble] 331} // End SubtargetPredicate = isGFX6GFX7 332 333let SubtargetPredicate = isGFX7GFX8GFX9 in { 334 let SchedRW = [WriteQuarterRate32] in { 335 defm V_LOG_LEGACY_F32 : VOP1Inst<"v_log_legacy_f32", VOP_F32_F32>; 336 defm V_EXP_LEGACY_F32 : VOP1Inst<"v_exp_legacy_f32", VOP_F32_F32>; 337 } // End SchedRW = [WriteQuarterRate32] 338} // End SubtargetPredicate = isGFX7GFX8GFX9 339 340let SubtargetPredicate = isGFX7Plus in { 341 let SchedRW = [WriteDoubleAdd] in { 342 defm V_TRUNC_F64 : VOP1Inst<"v_trunc_f64", VOP_F64_F64, ftrunc>; 343 defm V_CEIL_F64 : VOP1Inst<"v_ceil_f64", VOP_F64_F64, fceil>; 344 defm V_RNDNE_F64 : VOP1Inst<"v_rndne_f64", VOP_F64_F64, frint>; 345 defm V_FLOOR_F64 : VOP1Inst<"v_floor_f64", VOP_F64_F64, ffloor>; 346 } // End SchedRW = [WriteDoubleAdd] 347} // End SubtargetPredicate = isGFX7Plus 348 349let SubtargetPredicate = Has16BitInsts in { 350 351let FPDPRounding = 1 in { 352defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>; 353defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>; 354} // End FPDPRounding = 1 355defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16, fp_to_uint>; 356defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16, fp_to_sint>; 357let SchedRW = [WriteQuarterRate32] in { 358defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>; 359defm V_SQRT_F16 : VOP1Inst <"v_sqrt_f16", VOP_F16_F16, fsqrt>; 360defm V_RSQ_F16 : VOP1Inst <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>; 361defm V_LOG_F16 : VOP1Inst <"v_log_f16", VOP_F16_F16, flog2>; 362defm V_EXP_F16 : VOP1Inst <"v_exp_f16", VOP_F16_F16, fexp2>; 363defm V_SIN_F16 : VOP1Inst <"v_sin_f16", VOP_F16_F16, AMDGPUsin>; 364defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; 365} // End SchedRW = [WriteQuarterRate32] 366defm V_FREXP_MANT_F16 : VOP1Inst <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>; 367defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16, int_amdgcn_frexp_exp>; 368defm V_FLOOR_F16 : VOP1Inst <"v_floor_f16", VOP_F16_F16, ffloor>; 369defm V_CEIL_F16 : VOP1Inst <"v_ceil_f16", VOP_F16_F16, fceil>; 370defm V_TRUNC_F16 : VOP1Inst <"v_trunc_f16", VOP_F16_F16, ftrunc>; 371defm V_RNDNE_F16 : VOP1Inst <"v_rndne_f16", VOP_F16_F16, frint>; 372let FPDPRounding = 1 in { 373defm V_FRACT_F16 : VOP1Inst <"v_fract_f16", VOP_F16_F16, AMDGPUfract>; 374} // End FPDPRounding = 1 375 376} 377 378let OtherPredicates = [Has16BitInsts] in { 379 380def : GCNPat< 381 (f32 (f16_to_fp i16:$src)), 382 (V_CVT_F32_F16_e32 $src) 383>; 384 385def : GCNPat< 386 (i16 (AMDGPUfp_to_f16 f32:$src)), 387 (V_CVT_F16_F32_e32 $src) 388>; 389 390} 391 392def VOP_SWAP_I32 : VOPProfile<[i32, i32, i32, untyped]> { 393 let Outs32 = (outs VGPR_32:$vdst, VGPR_32:$vdst1); 394 let Ins32 = (ins VGPR_32:$src0, VGPR_32:$src1); 395 let Outs64 = Outs32; 396 let Asm32 = " $vdst, $src0"; 397 let Asm64 = ""; 398 let Ins64 = (ins); 399} 400 401let SubtargetPredicate = isGFX9Plus in { 402 def V_SWAP_B32 : VOP1_Pseudo<"v_swap_b32", VOP_SWAP_I32, [], 1> { 403 let Constraints = "$vdst = $src1, $vdst1 = $src0"; 404 let DisableEncoding = "$vdst1,$src1"; 405 let SchedRW = [Write64Bit, Write64Bit]; 406 } 407 408 defm V_SAT_PK_U8_I16 : VOP1Inst<"v_sat_pk_u8_i16", VOP_I32_I32>; 409 defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16>; 410 defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16>; 411} // End SubtargetPredicate = isGFX9Plus 412 413let SubtargetPredicate = isGFX9Only in { 414 defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>; 415} // End SubtargetPredicate = isGFX9Only 416 417let SubtargetPredicate = isGFX10Plus in { 418 defm V_PIPEFLUSH : VOP1Inst<"v_pipeflush", VOP_NONE>; 419 420 let Uses = [M0] in { 421 // FIXME-GFX10: Should V_MOVRELSD_2_B32 be VOP_NO_EXT? 422 defm V_MOVRELSD_2_B32 : 423 VOP1Inst<"v_movrelsd_2_b32", VOP_NO_EXT<VOP_I32_I32>>; 424 425 def V_SWAPREL_B32 : VOP1_Pseudo<"v_swaprel_b32", VOP_SWAP_I32, [], 1> { 426 let Constraints = "$vdst = $src1, $vdst1 = $src0"; 427 let DisableEncoding = "$vdst1,$src1"; 428 let SchedRW = [Write64Bit, Write64Bit]; 429 } 430 } // End Uses = [M0] 431} // End SubtargetPredicate = isGFX10Plus 432 433//===----------------------------------------------------------------------===// 434// Target-specific instruction encodings. 435//===----------------------------------------------------------------------===// 436 437class VOP1_DPP<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP16 = 0> : 438 VOP_DPP<ps.OpName, p, isDPP16> { 439 let hasSideEffects = ps.hasSideEffects; 440 let Defs = ps.Defs; 441 let SchedRW = ps.SchedRW; 442 let Uses = ps.Uses; 443 444 bits<8> vdst; 445 let Inst{8-0} = 0xfa; 446 let Inst{16-9} = op; 447 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 448 let Inst{31-25} = 0x3f; 449} 450 451class VOP1_DPP16<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> : 452 VOP1_DPP<op, ps, p, 1> { 453 let AssemblerPredicate = !if(p.HasExt, HasDPP16, DisableInst); 454 let SubtargetPredicate = HasDPP16; 455} 456 457class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> : 458 VOP_DPP8<ps.OpName, p> { 459 let hasSideEffects = ps.hasSideEffects; 460 let Defs = ps.Defs; 461 let SchedRW = ps.SchedRW; 462 let Uses = ps.Uses; 463 464 bits<8> vdst; 465 let Inst{8-0} = fi; 466 let Inst{16-9} = op; 467 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 468 let Inst{31-25} = 0x3f; 469 470 let AssemblerPredicate = !if(p.HasExt, HasDPP8, DisableInst); 471 let SubtargetPredicate = HasDPP8; 472} 473 474//===----------------------------------------------------------------------===// 475// GFX10. 476//===----------------------------------------------------------------------===// 477 478let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { 479 multiclass VOP1Only_Real_gfx10<bits<9> op> { 480 def _gfx10 : 481 VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.GFX10>, 482 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 483 } 484 multiclass VOP1_Real_e32_gfx10<bits<9> op> { 485 def _e32_gfx10 : 486 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>, 487 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 488 } 489 multiclass VOP1_Real_e64_gfx10<bits<9> op> { 490 def _e64_gfx10 : 491 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 492 VOP3e_gfx10<{0, 1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 493 } 494 multiclass VOP1_Real_sdwa_gfx10<bits<9> op> { 495 def _sdwa_gfx10 : 496 VOP_SDWA10_Real<!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 497 VOP1_SDWA9Ae<op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 498 let DecoderNamespace = "SDWA10"; 499 } 500 } 501 multiclass VOP1_Real_dpp_gfx10<bits<9> op> { 502 def _dpp_gfx10 : VOP1_DPP16<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")> { 503 let DecoderNamespace = "SDWA10"; 504 } 505 } 506 multiclass VOP1_Real_dpp8_gfx10<bits<9> op> { 507 def _dpp8_gfx10 : VOP1_DPP8<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")> { 508 let DecoderNamespace = "DPP8"; 509 } 510 } 511} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" 512 513multiclass VOP1_Real_gfx10_no_dpp<bits<9> op> : 514 VOP1_Real_e32_gfx10<op>, VOP1_Real_e64_gfx10<op>, 515 VOP1_Real_sdwa_gfx10<op>; 516 517multiclass VOP1_Real_gfx10_no_dpp8<bits<9> op> : 518 VOP1_Real_e32_gfx10<op>, VOP1_Real_e64_gfx10<op>, 519 VOP1_Real_sdwa_gfx10<op>, VOP1_Real_dpp_gfx10<op>; 520 521multiclass VOP1_Real_gfx10<bits<9> op> : 522 VOP1_Real_gfx10_no_dpp8<op>, VOP1_Real_dpp8_gfx10<op>; 523 524defm V_PIPEFLUSH : VOP1_Real_gfx10<0x01b>; 525defm V_MOVRELSD_2_B32 : VOP1_Real_gfx10<0x048>; 526defm V_CVT_F16_U16 : VOP1_Real_gfx10<0x050>; 527defm V_CVT_F16_I16 : VOP1_Real_gfx10<0x051>; 528defm V_CVT_U16_F16 : VOP1_Real_gfx10<0x052>; 529defm V_CVT_I16_F16 : VOP1_Real_gfx10<0x053>; 530defm V_RCP_F16 : VOP1_Real_gfx10<0x054>; 531defm V_SQRT_F16 : VOP1_Real_gfx10<0x055>; 532defm V_RSQ_F16 : VOP1_Real_gfx10<0x056>; 533defm V_LOG_F16 : VOP1_Real_gfx10<0x057>; 534defm V_EXP_F16 : VOP1_Real_gfx10<0x058>; 535defm V_FREXP_MANT_F16 : VOP1_Real_gfx10<0x059>; 536defm V_FREXP_EXP_I16_F16 : VOP1_Real_gfx10<0x05a>; 537defm V_FLOOR_F16 : VOP1_Real_gfx10<0x05b>; 538defm V_CEIL_F16 : VOP1_Real_gfx10<0x05c>; 539defm V_TRUNC_F16 : VOP1_Real_gfx10<0x05d>; 540defm V_RNDNE_F16 : VOP1_Real_gfx10<0x05e>; 541defm V_FRACT_F16 : VOP1_Real_gfx10<0x05f>; 542defm V_SIN_F16 : VOP1_Real_gfx10<0x060>; 543defm V_COS_F16 : VOP1_Real_gfx10<0x061>; 544defm V_SAT_PK_U8_I16 : VOP1_Real_gfx10<0x062>; 545defm V_CVT_NORM_I16_F16 : VOP1_Real_gfx10<0x063>; 546defm V_CVT_NORM_U16_F16 : VOP1_Real_gfx10<0x064>; 547 548defm V_SWAP_B32 : VOP1Only_Real_gfx10<0x065>; 549defm V_SWAPREL_B32 : VOP1Only_Real_gfx10<0x068>; 550 551//===----------------------------------------------------------------------===// 552// GFX7, GFX10. 553//===----------------------------------------------------------------------===// 554 555let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in { 556 multiclass VOP1_Real_e32_gfx7<bits<9> op> { 557 def _e32_gfx7 : 558 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>, 559 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 560 } 561 multiclass VOP1_Real_e64_gfx7<bits<9> op> { 562 def _e64_gfx7 : 563 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 564 VOP3e_gfx6_gfx7<{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 565 } 566} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" 567 568multiclass VOP1_Real_gfx7<bits<9> op> : 569 VOP1_Real_e32_gfx7<op>, VOP1_Real_e64_gfx7<op>; 570 571multiclass VOP1_Real_gfx7_gfx10<bits<9> op> : 572 VOP1_Real_gfx7<op>, VOP1_Real_gfx10<op>; 573 574defm V_LOG_LEGACY_F32 : VOP1_Real_gfx7<0x045>; 575defm V_EXP_LEGACY_F32 : VOP1_Real_gfx7<0x046>; 576 577defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10<0x017>; 578defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10<0x018>; 579defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10<0x019>; 580defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10<0x01a>; 581 582//===----------------------------------------------------------------------===// 583// GFX6, GFX7, GFX10. 584//===----------------------------------------------------------------------===// 585 586let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 587 multiclass VOP1_Real_e32_gfx6_gfx7<bits<9> op> { 588 def _e32_gfx6_gfx7 : 589 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>, 590 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 591 } 592 multiclass VOP1_Real_e64_gfx6_gfx7<bits<9> op> { 593 def _e64_gfx6_gfx7 : 594 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 595 VOP3e_gfx6_gfx7<{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 596 } 597} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 598 599multiclass VOP1_Real_gfx6_gfx7<bits<9> op> : 600 VOP1_Real_e32_gfx6_gfx7<op>, VOP1_Real_e64_gfx6_gfx7<op>; 601 602multiclass VOP1_Real_gfx6_gfx7_gfx10<bits<9> op> : 603 VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10<op>; 604 605multiclass VOP1_Real_gfx6_gfx7_gfx10_no_dpp8<bits<9> op> : 606 VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10_no_dpp8<op>; 607 608multiclass VOP1_Real_gfx6_gfx7_gfx10_no_dpp<bits<9> op> : 609 VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10_no_dpp<op>; 610 611defm V_LOG_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x026>; 612defm V_RCP_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x028>; 613defm V_RCP_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x029>; 614defm V_RSQ_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x02c>; 615defm V_RSQ_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x02d>; 616defm V_RCP_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x030>; 617defm V_RSQ_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x032>; 618 619defm V_NOP : VOP1_Real_gfx6_gfx7_gfx10<0x000>; 620defm V_MOV_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x001>; 621defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x003>; 622defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x004>; 623defm V_CVT_F32_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x005>; 624defm V_CVT_F32_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x006>; 625defm V_CVT_U32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x007>; 626defm V_CVT_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x008>; 627defm V_MOV_FED_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x009>; 628defm V_CVT_F16_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00a>; 629defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10<0x00b>; 630defm V_CVT_RPI_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00c>; 631defm V_CVT_FLR_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00d>; 632defm V_CVT_OFF_F32_I4 : VOP1_Real_gfx6_gfx7_gfx10<0x00e>; 633defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x00f>; 634defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x010>; 635defm V_CVT_F32_UBYTE0 : VOP1_Real_gfx6_gfx7_gfx10<0x011>; 636defm V_CVT_F32_UBYTE1 : VOP1_Real_gfx6_gfx7_gfx10<0x012>; 637defm V_CVT_F32_UBYTE2 : VOP1_Real_gfx6_gfx7_gfx10<0x013>; 638defm V_CVT_F32_UBYTE3 : VOP1_Real_gfx6_gfx7_gfx10<0x014>; 639defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x015>; 640defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x016>; 641defm V_FRACT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x020>; 642defm V_TRUNC_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x021>; 643defm V_CEIL_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x022>; 644defm V_RNDNE_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x023>; 645defm V_FLOOR_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x024>; 646defm V_EXP_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x025>; 647defm V_LOG_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x027>; 648defm V_RCP_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x02a>; 649defm V_RCP_IFLAG_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x02b>; 650defm V_RSQ_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x02e>; 651defm V_RCP_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x02f>; 652defm V_RSQ_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x031>; 653defm V_SQRT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x033>; 654defm V_SQRT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x034>; 655defm V_SIN_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x035>; 656defm V_COS_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x036>; 657defm V_NOT_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x037>; 658defm V_BFREV_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x038>; 659defm V_FFBH_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x039>; 660defm V_FFBL_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x03a>; 661defm V_FFBH_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x03b>; 662defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03c>; 663defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03d>; 664defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03e>; 665defm V_FREXP_EXP_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x03f>; 666defm V_FREXP_MANT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x040>; 667defm V_CLREXCP : VOP1_Real_gfx6_gfx7_gfx10<0x041>; 668defm V_MOVRELD_B32 : VOP1_Real_gfx6_gfx7_gfx10_no_dpp<0x042>; 669defm V_MOVRELS_B32 : VOP1_Real_gfx6_gfx7_gfx10_no_dpp8<0x043>; 670defm V_MOVRELSD_B32 : VOP1_Real_gfx6_gfx7_gfx10_no_dpp8<0x044>; 671 672//===----------------------------------------------------------------------===// 673// GFX8, GFX9 (VI). 674//===----------------------------------------------------------------------===// 675 676class VOP1_DPPe <bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile P = ps.Pfl> : 677 VOP_DPPe <P> { 678 bits<8> vdst; 679 let Inst{8-0} = 0xfa; // dpp 680 let Inst{16-9} = op; 681 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 682 let Inst{31-25} = 0x3f; //encoding 683} 684 685multiclass VOP1Only_Real_vi <bits<10> op> { 686 let AssemblerPredicates = [isGFX8GFX9], DecoderNamespace = "GFX8" in { 687 def _vi : 688 VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.VI>, 689 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 690 } 691} 692 693multiclass VOP1_Real_e32e64_vi <bits<10> op> { 694 let AssemblerPredicates = [isGFX8GFX9], DecoderNamespace = "GFX8" in { 695 def _e32_vi : 696 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>, 697 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 698 def _e64_vi : 699 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 700 VOP3e_vi <!add(0x140, op), !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 701 } 702} 703 704multiclass VOP1_Real_vi <bits<10> op> { 705 defm NAME : VOP1_Real_e32e64_vi <op>; 706 707 def _sdwa_vi : 708 VOP_SDWA_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 709 VOP1_SDWAe <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 710 711 def _sdwa_gfx9 : 712 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 713 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 714 715 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 716 def _dpp_vi : 717 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>, 718 VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; 719} 720 721defm V_NOP : VOP1_Real_vi <0x0>; 722defm V_MOV_B32 : VOP1_Real_vi <0x1>; 723defm V_CVT_I32_F64 : VOP1_Real_vi <0x3>; 724defm V_CVT_F64_I32 : VOP1_Real_vi <0x4>; 725defm V_CVT_F32_I32 : VOP1_Real_vi <0x5>; 726defm V_CVT_F32_U32 : VOP1_Real_vi <0x6>; 727defm V_CVT_U32_F32 : VOP1_Real_vi <0x7>; 728defm V_CVT_I32_F32 : VOP1_Real_vi <0x8>; 729defm V_MOV_FED_B32 : VOP1_Real_vi <0x9>; 730defm V_CVT_F16_F32 : VOP1_Real_vi <0xa>; 731defm V_CVT_F32_F16 : VOP1_Real_vi <0xb>; 732defm V_CVT_RPI_I32_F32 : VOP1_Real_vi <0xc>; 733defm V_CVT_FLR_I32_F32 : VOP1_Real_vi <0xd>; 734defm V_CVT_OFF_F32_I4 : VOP1_Real_vi <0xe>; 735defm V_CVT_F32_F64 : VOP1_Real_vi <0xf>; 736defm V_CVT_F64_F32 : VOP1_Real_vi <0x10>; 737defm V_CVT_F32_UBYTE0 : VOP1_Real_vi <0x11>; 738defm V_CVT_F32_UBYTE1 : VOP1_Real_vi <0x12>; 739defm V_CVT_F32_UBYTE2 : VOP1_Real_vi <0x13>; 740defm V_CVT_F32_UBYTE3 : VOP1_Real_vi <0x14>; 741defm V_CVT_U32_F64 : VOP1_Real_vi <0x15>; 742defm V_CVT_F64_U32 : VOP1_Real_vi <0x16>; 743defm V_FRACT_F32 : VOP1_Real_vi <0x1b>; 744defm V_TRUNC_F32 : VOP1_Real_vi <0x1c>; 745defm V_CEIL_F32 : VOP1_Real_vi <0x1d>; 746defm V_RNDNE_F32 : VOP1_Real_vi <0x1e>; 747defm V_FLOOR_F32 : VOP1_Real_vi <0x1f>; 748defm V_EXP_F32 : VOP1_Real_vi <0x20>; 749defm V_LOG_F32 : VOP1_Real_vi <0x21>; 750defm V_RCP_F32 : VOP1_Real_vi <0x22>; 751defm V_RCP_IFLAG_F32 : VOP1_Real_vi <0x23>; 752defm V_RSQ_F32 : VOP1_Real_vi <0x24>; 753defm V_RCP_F64 : VOP1_Real_vi <0x25>; 754defm V_RSQ_F64 : VOP1_Real_vi <0x26>; 755defm V_SQRT_F32 : VOP1_Real_vi <0x27>; 756defm V_SQRT_F64 : VOP1_Real_vi <0x28>; 757defm V_SIN_F32 : VOP1_Real_vi <0x29>; 758defm V_COS_F32 : VOP1_Real_vi <0x2a>; 759defm V_NOT_B32 : VOP1_Real_vi <0x2b>; 760defm V_BFREV_B32 : VOP1_Real_vi <0x2c>; 761defm V_FFBH_U32 : VOP1_Real_vi <0x2d>; 762defm V_FFBL_B32 : VOP1_Real_vi <0x2e>; 763defm V_FFBH_I32 : VOP1_Real_vi <0x2f>; 764defm V_FREXP_EXP_I32_F64 : VOP1_Real_vi <0x30>; 765defm V_FREXP_MANT_F64 : VOP1_Real_vi <0x31>; 766defm V_FRACT_F64 : VOP1_Real_vi <0x32>; 767defm V_FREXP_EXP_I32_F32 : VOP1_Real_vi <0x33>; 768defm V_FREXP_MANT_F32 : VOP1_Real_vi <0x34>; 769defm V_CLREXCP : VOP1_Real_vi <0x35>; 770defm V_MOVRELD_B32 : VOP1_Real_e32e64_vi <0x36>; 771defm V_MOVRELS_B32 : VOP1_Real_e32e64_vi <0x37>; 772defm V_MOVRELSD_B32 : VOP1_Real_e32e64_vi <0x38>; 773defm V_TRUNC_F64 : VOP1_Real_vi <0x17>; 774defm V_CEIL_F64 : VOP1_Real_vi <0x18>; 775defm V_FLOOR_F64 : VOP1_Real_vi <0x1A>; 776defm V_RNDNE_F64 : VOP1_Real_vi <0x19>; 777defm V_LOG_LEGACY_F32 : VOP1_Real_vi <0x4c>; 778defm V_EXP_LEGACY_F32 : VOP1_Real_vi <0x4b>; 779defm V_CVT_F16_U16 : VOP1_Real_vi <0x39>; 780defm V_CVT_F16_I16 : VOP1_Real_vi <0x3a>; 781defm V_CVT_U16_F16 : VOP1_Real_vi <0x3b>; 782defm V_CVT_I16_F16 : VOP1_Real_vi <0x3c>; 783defm V_RCP_F16 : VOP1_Real_vi <0x3d>; 784defm V_SQRT_F16 : VOP1_Real_vi <0x3e>; 785defm V_RSQ_F16 : VOP1_Real_vi <0x3f>; 786defm V_LOG_F16 : VOP1_Real_vi <0x40>; 787defm V_EXP_F16 : VOP1_Real_vi <0x41>; 788defm V_FREXP_MANT_F16 : VOP1_Real_vi <0x42>; 789defm V_FREXP_EXP_I16_F16 : VOP1_Real_vi <0x43>; 790defm V_FLOOR_F16 : VOP1_Real_vi <0x44>; 791defm V_CEIL_F16 : VOP1_Real_vi <0x45>; 792defm V_TRUNC_F16 : VOP1_Real_vi <0x46>; 793defm V_RNDNE_F16 : VOP1_Real_vi <0x47>; 794defm V_FRACT_F16 : VOP1_Real_vi <0x48>; 795defm V_SIN_F16 : VOP1_Real_vi <0x49>; 796defm V_COS_F16 : VOP1_Real_vi <0x4a>; 797defm V_SWAP_B32 : VOP1Only_Real_vi <0x51>; 798 799defm V_SAT_PK_U8_I16 : VOP1_Real_vi<0x4f>; 800defm V_CVT_NORM_I16_F16 : VOP1_Real_vi<0x4d>; 801defm V_CVT_NORM_U16_F16 : VOP1_Real_vi<0x4e>; 802 803// Copy of v_mov_b32 with $vdst as a use operand for use with VGPR 804// indexing mode. vdst can't be treated as a def for codegen purposes, 805// and an implicit use and def of the super register should be added. 806def V_MOV_B32_indirect : VPseudoInstSI<(outs), 807 (ins getVALUDstForVT<i32>.ret:$vdst, getVOPSrc0ForVT<i32>.ret:$src0)>, 808 PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT<i32>.ret:$vdst, 809 getVOPSrc0ForVT<i32>.ret:$src0)> { 810 let VOP1 = 1; 811 let SubtargetPredicate = isGFX8GFX9; 812} 813 814// This is a pseudo variant of the v_movreld_b32 instruction in which the 815// vector operand appears only twice, once as def and once as use. Using this 816// pseudo avoids problems with the Two Address instructions pass. 817class V_MOVRELD_B32_pseudo<RegisterClass rc> : VPseudoInstSI < 818 (outs rc:$vdst), 819 (ins rc:$vsrc, VSrc_b32:$val, i32imm:$offset)> { 820 let VOP1 = 1; 821 822 let Constraints = "$vsrc = $vdst"; 823 let Uses = [M0, EXEC]; 824 825 let SubtargetPredicate = HasMovrel; 826} 827 828def V_MOVRELD_B32_V1 : V_MOVRELD_B32_pseudo<VGPR_32>; 829def V_MOVRELD_B32_V2 : V_MOVRELD_B32_pseudo<VReg_64>; 830def V_MOVRELD_B32_V4 : V_MOVRELD_B32_pseudo<VReg_128>; 831def V_MOVRELD_B32_V8 : V_MOVRELD_B32_pseudo<VReg_256>; 832def V_MOVRELD_B32_V16 : V_MOVRELD_B32_pseudo<VReg_512>; 833 834let OtherPredicates = [isGFX8GFX9] in { 835 836def : GCNPat < 837 (i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask, 838 imm:$bound_ctrl)), 839 (V_MOV_B32_dpp $src, $src, (as_i32imm $dpp_ctrl), 840 (as_i32imm $row_mask), (as_i32imm $bank_mask), 841 (as_i1imm $bound_ctrl)) 842>; 843 844def : GCNPat < 845 (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, imm:$dpp_ctrl, imm:$row_mask, 846 imm:$bank_mask, imm:$bound_ctrl)), 847 (V_MOV_B32_dpp $old, $src, (as_i32imm $dpp_ctrl), 848 (as_i32imm $row_mask), (as_i32imm $bank_mask), 849 (as_i1imm $bound_ctrl)) 850>; 851 852} // End OtherPredicates = [isGFX8GFX9] 853 854let OtherPredicates = [isGFX8Plus] in { 855def : GCNPat< 856 (i32 (anyext i16:$src)), 857 (COPY $src) 858>; 859 860def : GCNPat< 861 (i64 (anyext i16:$src)), 862 (REG_SEQUENCE VReg_64, 863 (i32 (COPY $src)), sub0, 864 (V_MOV_B32_e32 (i32 0)), sub1) 865>; 866 867def : GCNPat< 868 (i16 (trunc i32:$src)), 869 (COPY $src) 870>; 871 872def : GCNPat < 873 (i16 (trunc i64:$src)), 874 (EXTRACT_SUBREG $src, sub0) 875>; 876 877} // End OtherPredicates = [isGFX8Plus] 878 879//===----------------------------------------------------------------------===// 880// GFX9 881//===----------------------------------------------------------------------===// 882 883multiclass VOP1_Real_gfx9 <bits<10> op> { 884 let AssemblerPredicates = [isGFX9Only], DecoderNamespace = "GFX9" in { 885 defm NAME : VOP1_Real_e32e64_vi <op>; 886 } 887 888 def _sdwa_gfx9 : 889 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 890 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 891 892 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 893 def _dpp_gfx9 : 894 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, 895 VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; 896 897} 898 899defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>; 900 901//===----------------------------------------------------------------------===// 902// GFX10 903//===----------------------------------------------------------------------===// 904 905let OtherPredicates = [isGFX10Plus] in { 906def : GCNPat < 907 (i32 (int_amdgcn_mov_dpp8 i32:$src, imm:$dpp8)), 908 (V_MOV_B32_dpp8_gfx10 $src, $src, (as_i32imm $dpp8), (i32 DPP8Mode.FI_0)) 909>; 910 911def : GCNPat < 912 (i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask, 913 imm:$bound_ctrl)), 914 (V_MOV_B32_dpp_gfx10 $src, $src, (as_i32imm $dpp_ctrl), 915 (as_i32imm $row_mask), (as_i32imm $bank_mask), 916 (as_i1imm $bound_ctrl), (i32 0)) 917>; 918 919def : GCNPat < 920 (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, imm:$dpp_ctrl, imm:$row_mask, 921 imm:$bank_mask, imm:$bound_ctrl)), 922 (V_MOV_B32_dpp_gfx10 $old, $src, (as_i32imm $dpp_ctrl), 923 (as_i32imm $row_mask), (as_i32imm $bank_mask), 924 (as_i1imm $bound_ctrl), (i32 0)) 925>; 926} // End OtherPredicates = [isGFX10Plus] 927