1//===-- VOP1Instructions.td - Vector Instruction Definitions --------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10// VOP1 Classes 11//===----------------------------------------------------------------------===// 12 13class VOP1e <bits<8> op, VOPProfile P> : Enc32 { 14 bits<8> vdst; 15 bits<9> src0; 16 17 let Inst{8-0} = !if(P.HasSrc0, src0{8-0}, ?); 18 let Inst{16-9} = op; 19 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 20 let Inst{31-25} = 0x3f; //encoding 21} 22 23class VOP1_SDWAe <bits<8> op, VOPProfile P> : VOP_SDWAe <P> { 24 bits<8> vdst; 25 26 let Inst{8-0} = 0xf9; // sdwa 27 let Inst{16-9} = op; 28 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 29 let Inst{31-25} = 0x3f; // encoding 30} 31 32class VOP1_SDWA9Ae <bits<8> op, VOPProfile P> : VOP_SDWA9Ae <P> { 33 bits<8> vdst; 34 35 let Inst{8-0} = 0xf9; // sdwa 36 let Inst{16-9} = op; 37 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 38 let Inst{31-25} = 0x3f; // encoding 39} 40 41class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP1Only = 0> : 42 VOP_Pseudo <opName, !if(VOP1Only, "", "_e32"), P, P.Outs32, P.Ins32, "", pattern> { 43 44 let AsmOperands = P.Asm32; 45 46 let Size = 4; 47 let mayLoad = 0; 48 let mayStore = 0; 49 let hasSideEffects = 0; 50 51 let ReadsModeReg = !or(isFloatType<P.DstVT>.ret, isFloatType<P.Src0VT>.ret); 52 53 let mayRaiseFPException = ReadsModeReg; 54 55 let VOP1 = 1; 56 let VALU = 1; 57 let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]); 58 59 let AsmVariantName = AMDGPUAsmVariants.Default; 60} 61 62class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily> : 63 VOP_Real <ps>, 64 InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>, 65 SIMCInstr <ps.PseudoInstr, EncodingFamily> { 66 67 let VALU = 1; 68 let VOP1 = 1; 69 let isPseudo = 0; 70 let isCodeGenOnly = 0; 71 72 let Constraints = ps.Constraints; 73 let DisableEncoding = ps.DisableEncoding; 74 75 // copy relevant pseudo op flags 76 let SubtargetPredicate = ps.SubtargetPredicate; 77 let AsmMatchConverter = ps.AsmMatchConverter; 78 let AsmVariantName = ps.AsmVariantName; 79 let Constraints = ps.Constraints; 80 let DisableEncoding = ps.DisableEncoding; 81 let TSFlags = ps.TSFlags; 82 let UseNamedOperandTable = ps.UseNamedOperandTable; 83 let Uses = ps.Uses; 84 let Defs = ps.Defs; 85 let SchedRW = ps.SchedRW; 86 let mayLoad = ps.mayLoad; 87 let mayStore = ps.mayStore; 88 let TRANS = ps.TRANS; 89} 90 91class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 92 VOP_SDWA_Pseudo <OpName, P, pattern> { 93 let AsmMatchConverter = "cvtSdwaVOP1"; 94} 95 96class VOP1_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 97 VOP_DPP_Pseudo <OpName, P, pattern> { 98} 99 100class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { 101 list<dag> ret = 102 !if(P.HasModifiers, 103 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods P.Src0VT:$src0, i32:$src0_modifiers))))], 104 !if(P.HasOMod, 105 [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3OMods P.Src0VT:$src0, 106 i1:$clamp, i32:$omod))))], 107 [(set P.DstVT:$vdst, (node P.Src0VT:$src0))] 108 ) 109 ); 110} 111 112multiclass VOP1Inst <string opName, VOPProfile P, 113 SDPatternOperator node = null_frag> { 114 // We only want to set this on the basic, non-SDWA or DPP forms. 115 defvar should_mov_imm = !eq(opName, "v_mov_b32"); 116 117 let isMoveImm = should_mov_imm in { 118 def _e32 : VOP1_Pseudo <opName, P>; 119 def _e64 : VOP3_Pseudo <opName, P, getVOP1Pat64<node, P>.ret>; 120 } 121 122 foreach _ = BoolToList<P.HasExtSDWA>.ret in 123 def _sdwa : VOP1_SDWA_Pseudo <opName, P>; 124 125 foreach _ = BoolToList<P.HasExtDPP>.ret in 126 def _dpp : VOP1_DPP_Pseudo <opName, P>; 127 128 def : MnemonicAlias<opName#"_e32", opName>, LetDummies; 129 def : MnemonicAlias<opName#"_e64", opName>, LetDummies; 130 131 foreach _ = BoolToList<P.HasExtSDWA>.ret in 132 def : MnemonicAlias<opName#"_sdwa", opName>, LetDummies; 133 134 foreach _ = BoolToList<P.HasExtDPP>.ret in 135 def : MnemonicAlias<opName#"_dpp", opName>, LetDummies; 136} 137 138// Special profile for instructions which have clamp 139// and output modifiers (but have no input modifiers) 140class VOPProfileI2F<ValueType dstVt, ValueType srcVt> : 141 VOPProfile<[dstVt, srcVt, untyped, untyped]> { 142 143 let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod); 144 let Asm64 = "$vdst, $src0$clamp$omod"; 145 146 let HasModifiers = 0; 147 let HasClamp = 1; 148} 149 150def VOP1_F64_I32 : VOPProfileI2F <f64, i32>; 151def VOP1_F32_I32 : VOPProfileI2F <f32, i32>; 152def VOP1_F16_I16 : VOPProfileI2F <f16, i16>; 153 154class VOP_SPECIAL_OMOD_PROF<ValueType dstVt, ValueType srcVt> : 155 VOPProfile<[dstVt, srcVt, untyped, untyped]> { 156 157 let HasOMod = 1; 158} 159def VOP_I32_F32_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f32>; 160def VOP_I32_F64_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f64>; 161def VOP_I16_F16_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i16, f16>; 162 163//===----------------------------------------------------------------------===// 164// VOP1 Instructions 165//===----------------------------------------------------------------------===// 166 167let VOPAsmPrefer32Bit = 1 in { 168defm V_NOP : VOP1Inst <"v_nop", VOP_NONE>; 169} 170 171let isReMaterializable = 1, isAsCheapAsAMove = 1 in { 172defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOP_I32_I32>; 173} // End isMoveImm = 1 174 175// FIXME: Specify SchedRW for READFIRSTLANE_B32 176// TODO: Make profile for this, there is VOP3 encoding also 177def V_READFIRSTLANE_B32 : 178 InstSI <(outs SReg_32:$vdst), 179 (ins VRegOrLds_32:$src0), 180 "v_readfirstlane_b32 $vdst, $src0", 181 [(set i32:$vdst, (int_amdgcn_readfirstlane (i32 VRegOrLds_32:$src0)))]>, 182 Enc32 { 183 184 let isCodeGenOnly = 0; 185 let UseNamedOperandTable = 1; 186 187 let Size = 4; 188 let mayLoad = 0; 189 let mayStore = 0; 190 let hasSideEffects = 0; 191 192 let VOP1 = 1; 193 let VALU = 1; 194 let Uses = [EXEC]; 195 let isConvergent = 1; 196 197 bits<8> vdst; 198 bits<9> src0; 199 200 let Inst{8-0} = src0; 201 let Inst{16-9} = 0x2; 202 let Inst{24-17} = vdst; 203 let Inst{31-25} = 0x3f; //encoding 204} 205 206let isReMaterializable = 1 in { 207let SchedRW = [WriteDoubleCvt] in { 208// OMod clears exceptions when set in this instruction 209defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64_SPECIAL_OMOD, fp_to_sint>; 210 211let mayRaiseFPException = 0 in { 212defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>; 213} 214 215defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>; 216defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>; 217// OMod clears exceptions when set in this instruction 218defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64_SPECIAL_OMOD, fp_to_uint>; 219 220let mayRaiseFPException = 0 in { 221defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>; 222} 223 224} // End SchedRW = [WriteDoubleCvt] 225 226let SchedRW = [WriteFloatCvt] in { 227 228// XXX: Does this really not raise exceptions? The manual claims the 229// 16-bit ones can. 230let mayRaiseFPException = 0 in { 231defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>; 232defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>; 233} 234 235// OMod clears exceptions when set in these 2 instructions 236defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_uint>; 237defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_sint>; 238let FPDPRounding = 1, isReMaterializable = 0 in { 239defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>; 240} // End FPDPRounding = 1, isReMaterializable = 0 241 242defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>; 243 244let ReadsModeReg = 0, mayRaiseFPException = 0 in { 245defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>; 246defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>; 247defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>; 248} // End ReadsModeReg = 0, mayRaiseFPException = 0 249} // End SchedRW = [WriteFloatCvt] 250 251let ReadsModeReg = 0, mayRaiseFPException = 0 in { 252defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>; 253defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>; 254defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP1_F32_I32, AMDGPUcvt_f32_ubyte2>; 255defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f32_ubyte3>; 256} // ReadsModeReg = 0, mayRaiseFPException = 0 257 258defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>; 259defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>; 260defm V_CEIL_F32 : VOP1Inst <"v_ceil_f32", VOP_F32_F32, fceil>; 261defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, frint>; 262defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>; 263 264let TRANS = 1, SchedRW = [WriteTrans32] in { 265defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, fexp2>; 266defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, flog2>; 267defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>; 268defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32, AMDGPUrcp_iflag>; 269defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>; 270defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, any_amdgcn_sqrt>; 271} // End TRANS = 1, SchedRW = [WriteTrans32] 272 273let TRANS = 1, SchedRW = [WriteTrans64] in { 274defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64, AMDGPUrcp>; 275defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64, AMDGPUrsq>; 276defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, any_amdgcn_sqrt>; 277} // End TRANS = 1, SchedRW = [WriteTrans64] 278 279let TRANS = 1, SchedRW = [WriteTrans32] in { 280defm V_SIN_F32 : VOP1Inst <"v_sin_f32", VOP_F32_F32, AMDGPUsin>; 281defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>; 282} // End TRANS = 1, SchedRW = [WriteTrans32] 283 284defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>; 285defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32, bitreverse>; 286defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32, AMDGPUffbh_u32>; 287defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32, AMDGPUffbl_b32>; 288defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32, AMDGPUffbh_i32>; 289 290let SchedRW = [WriteDoubleAdd] in { 291defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64_SPECIAL_OMOD, int_amdgcn_frexp_exp>; 292defm V_FREXP_MANT_F64 : VOP1Inst <"v_frexp_mant_f64", VOP_F64_F64, int_amdgcn_frexp_mant>; 293let FPDPRounding = 1 in { 294defm V_FRACT_F64 : VOP1Inst <"v_fract_f64", VOP_F64_F64, AMDGPUfract>; 295} // End FPDPRounding = 1 296} // End SchedRW = [WriteDoubleAdd] 297 298defm V_FREXP_EXP_I32_F32 : VOP1Inst <"v_frexp_exp_i32_f32", VOP_I32_F32, int_amdgcn_frexp_exp>; 299defm V_FREXP_MANT_F32 : VOP1Inst <"v_frexp_mant_f32", VOP_F32_F32, int_amdgcn_frexp_mant>; 300} // End isReMaterializable = 1 301 302let VOPAsmPrefer32Bit = 1 in { 303defm V_CLREXCP : VOP1Inst <"v_clrexcp", VOP_NO_EXT<VOP_NONE>>; 304} 305 306// Restrict src0 to be VGPR 307def VOP_MOVRELS : VOPProfile<[i32, i32, untyped, untyped]> { 308 let Src0RC32 = VRegSrc_32; 309 let Src0RC64 = VRegSrc_32; 310} 311 312// Special case because there are no true output operands. Hack vdst 313// to be a src operand. The custom inserter must add a tied implicit 314// def and use of the super register since there seems to be no way to 315// add an implicit def of a virtual register in tablegen. 316class VOP_MOVREL<RegisterOperand Src1RC> : VOPProfile<[untyped, i32, untyped, untyped]> { 317 let Src0RC32 = VOPDstOperand<VGPR_32>; 318 let Src0RC64 = VOPDstOperand<VGPR_32>; 319 320 let Outs = (outs); 321 let Ins32 = (ins Src0RC32:$vdst, Src1RC:$src0); 322 let Ins64 = (ins Src0RC64:$vdst, Src1RC:$src0); 323 let Asm32 = getAsm32<1, 1>.ret; 324 let Asm64 = getAsm64<1, 1, 0, 0, 1>.ret; 325 326 let OutsSDWA = (outs Src0RC32:$vdst); 327 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 328 clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, 329 src0_sel:$src0_sel); 330 let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret; 331 332 let OutsDPP = (outs Src0RC32:$vdst); 333 let InsDPP16 = (ins Src0RC32:$old, Src0RC32:$src0, 334 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 335 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl, FI:$fi); 336 let AsmDPP16 = getAsmDPP16<1, 1, 0>.ret; 337 338 let OutsDPP8 = (outs Src0RC32:$vdst); 339 let InsDPP8 = (ins Src0RC32:$old, Src0RC32:$src0, dpp8:$dpp8, FI:$fi); 340 let AsmDPP8 = getAsmDPP8<1, 1, 0>.ret; 341 342 let HasDst = 0; 343 let EmitDst = 1; // force vdst emission 344} 345 346def VOP_MOVRELD : VOP_MOVREL<VSrc_b32>; 347def VOP_MOVRELSD : VOP_MOVREL<VRegSrc_32>; 348 349let SubtargetPredicate = HasMovrel, Uses = [M0, EXEC] in { 350 // v_movreld_b32 is a special case because the destination output 351 // register is really a source. It isn't actually read (but may be 352 // written), and is only to provide the base register to start 353 // indexing from. Tablegen seems to not let you define an implicit 354 // virtual register output for the super register being written into, 355 // so this must have an implicit def of the register added to it. 356defm V_MOVRELD_B32 : VOP1Inst <"v_movreld_b32", VOP_MOVRELD>; 357defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_MOVRELS>; 358defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_MOVRELSD>; 359} // End Uses = [M0, EXEC] 360 361let isReMaterializable = 1 in { 362let SubtargetPredicate = isGFX6GFX7 in { 363 let TRANS = 1, SchedRW = [WriteTrans32] in { 364 defm V_LOG_CLAMP_F32 : 365 VOP1Inst<"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>; 366 defm V_RCP_CLAMP_F32 : 367 VOP1Inst<"v_rcp_clamp_f32", VOP_F32_F32>; 368 defm V_RCP_LEGACY_F32 : 369 VOP1Inst<"v_rcp_legacy_f32", VOP_F32_F32, AMDGPUrcp_legacy>; 370 defm V_RSQ_CLAMP_F32 : 371 VOP1Inst<"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>; 372 defm V_RSQ_LEGACY_F32 : 373 VOP1Inst<"v_rsq_legacy_f32", VOP_F32_F32, int_amdgcn_rsq_legacy>; 374 } // End TRANS = 1, SchedRW = [WriteTrans32] 375 376 let SchedRW = [WriteTrans64] in { 377 defm V_RCP_CLAMP_F64 : 378 VOP1Inst<"v_rcp_clamp_f64", VOP_F64_F64>; 379 defm V_RSQ_CLAMP_F64 : 380 VOP1Inst<"v_rsq_clamp_f64", VOP_F64_F64, AMDGPUrsq_clamp>; 381 } // End SchedRW = [WriteTrans64] 382} // End SubtargetPredicate = isGFX6GFX7 383 384let SubtargetPredicate = isGFX7GFX8GFX9 in { 385 let TRANS = 1, SchedRW = [WriteTrans32] in { 386 defm V_LOG_LEGACY_F32 : VOP1Inst<"v_log_legacy_f32", VOP_F32_F32>; 387 defm V_EXP_LEGACY_F32 : VOP1Inst<"v_exp_legacy_f32", VOP_F32_F32>; 388 } // End TRANS = 1, SchedRW = [WriteTrans32] 389} // End SubtargetPredicate = isGFX7GFX8GFX9 390 391let SubtargetPredicate = isGFX7Plus in { 392 let SchedRW = [WriteDoubleAdd] in { 393 defm V_TRUNC_F64 : VOP1Inst<"v_trunc_f64", VOP_F64_F64, ftrunc>; 394 defm V_CEIL_F64 : VOP1Inst<"v_ceil_f64", VOP_F64_F64, fceil>; 395 defm V_RNDNE_F64 : VOP1Inst<"v_rndne_f64", VOP_F64_F64, frint>; 396 defm V_FLOOR_F64 : VOP1Inst<"v_floor_f64", VOP_F64_F64, ffloor>; 397 } // End SchedRW = [WriteDoubleAdd] 398} // End SubtargetPredicate = isGFX7Plus 399} // End isReMaterializable = 1 400 401let SubtargetPredicate = Has16BitInsts in { 402 403let FPDPRounding = 1 in { 404defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>; 405defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>; 406} // End FPDPRounding = 1 407// OMod clears exceptions when set in these two instructions 408defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_uint>; 409defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16_SPECIAL_OMOD, fp_to_sint>; 410let TRANS = 1, SchedRW = [WriteTrans32] in { 411defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>; 412defm V_SQRT_F16 : VOP1Inst <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>; 413defm V_RSQ_F16 : VOP1Inst <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>; 414defm V_LOG_F16 : VOP1Inst <"v_log_f16", VOP_F16_F16, flog2>; 415defm V_EXP_F16 : VOP1Inst <"v_exp_f16", VOP_F16_F16, fexp2>; 416defm V_SIN_F16 : VOP1Inst <"v_sin_f16", VOP_F16_F16, AMDGPUsin>; 417defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; 418} // End TRANS = 1, SchedRW = [WriteTrans32] 419defm V_FREXP_MANT_F16 : VOP1Inst <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>; 420defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16_SPECIAL_OMOD, int_amdgcn_frexp_exp>; 421defm V_FLOOR_F16 : VOP1Inst <"v_floor_f16", VOP_F16_F16, ffloor>; 422defm V_CEIL_F16 : VOP1Inst <"v_ceil_f16", VOP_F16_F16, fceil>; 423defm V_TRUNC_F16 : VOP1Inst <"v_trunc_f16", VOP_F16_F16, ftrunc>; 424defm V_RNDNE_F16 : VOP1Inst <"v_rndne_f16", VOP_F16_F16, frint>; 425let FPDPRounding = 1 in { 426defm V_FRACT_F16 : VOP1Inst <"v_fract_f16", VOP_F16_F16, AMDGPUfract>; 427} // End FPDPRounding = 1 428 429} 430 431let OtherPredicates = [Has16BitInsts] in { 432 433def : GCNPat< 434 (f32 (f16_to_fp i16:$src)), 435 (V_CVT_F32_F16_e32 $src) 436>; 437 438def : GCNPat< 439 (i16 (AMDGPUfp_to_f16 f32:$src)), 440 (V_CVT_F16_F32_e32 $src) 441>; 442 443} 444 445def VOP_SWAP_I32 : VOPProfile<[i32, i32, i32, untyped]> { 446 let Outs32 = (outs VGPR_32:$vdst, VGPR_32:$vdst1); 447 let Ins32 = (ins VGPR_32:$src0, VGPR_32:$src1); 448 let Outs64 = Outs32; 449 let Asm32 = " $vdst, $src0"; 450 let Asm64 = ""; 451 let Ins64 = (ins); 452} 453 454let SubtargetPredicate = isGFX9Plus in { 455 def V_SWAP_B32 : VOP1_Pseudo<"v_swap_b32", VOP_SWAP_I32, [], 1> { 456 let Constraints = "$vdst = $src1, $vdst1 = $src0"; 457 let DisableEncoding = "$vdst1,$src1"; 458 let SchedRW = [Write64Bit, Write64Bit]; 459 } 460 461 let isReMaterializable = 1 in 462 defm V_SAT_PK_U8_I16 : VOP1Inst<"v_sat_pk_u8_i16", VOP_I32_I32>; 463 464 let mayRaiseFPException = 0 in { 465 defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16_SPECIAL_OMOD>; 466 defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16_SPECIAL_OMOD>; 467 } // End mayRaiseFPException = 0 468} // End SubtargetPredicate = isGFX9Plus 469 470let SubtargetPredicate = isGFX9Only in { 471 defm V_SCREEN_PARTITION_4SE_B32 : VOP1Inst <"v_screen_partition_4se_b32", VOP_I32_I32>; 472} // End SubtargetPredicate = isGFX9Only 473 474let SubtargetPredicate = isGFX10Plus in { 475 defm V_PIPEFLUSH : VOP1Inst<"v_pipeflush", VOP_NONE>; 476 477 let Uses = [M0] in { 478 defm V_MOVRELSD_2_B32 : 479 VOP1Inst<"v_movrelsd_2_b32", VOP_MOVRELSD>; 480 481 def V_SWAPREL_B32 : VOP1_Pseudo<"v_swaprel_b32", VOP_SWAP_I32, [], 1> { 482 let Constraints = "$vdst = $src1, $vdst1 = $src0"; 483 let DisableEncoding = "$vdst1,$src1"; 484 let SchedRW = [Write64Bit, Write64Bit]; 485 } 486 } // End Uses = [M0] 487} // End SubtargetPredicate = isGFX10Plus 488 489def VOPProfileAccMov : VOP_NO_EXT<VOP_I32_I32> { 490 let DstRC = RegisterOperand<AGPR_32>; 491 let Src0RC32 = RegisterOperand<AGPR_32>; 492 let Asm32 = " $vdst, $src0"; 493} 494 495def V_ACCVGPR_MOV_B32 : VOP1_Pseudo<"v_accvgpr_mov_b32", VOPProfileAccMov, [], 1> { 496 let SubtargetPredicate = isGFX90APlus; 497 let isReMaterializable = 1; 498 let isAsCheapAsAMove = 1; 499} 500 501//===----------------------------------------------------------------------===// 502// Target-specific instruction encodings. 503//===----------------------------------------------------------------------===// 504 505class VOP1_DPP<bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP16 = 0> : 506 VOP_DPP<ps.OpName, p, isDPP16> { 507 let hasSideEffects = ps.hasSideEffects; 508 let Defs = ps.Defs; 509 let SchedRW = ps.SchedRW; 510 let Uses = ps.Uses; 511 let TRANS = ps.TRANS; 512 513 bits<8> vdst; 514 let Inst{8-0} = 0xfa; 515 let Inst{16-9} = op; 516 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 517 let Inst{31-25} = 0x3f; 518} 519 520class VOP1_DPP16<bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl> : 521 VOP1_DPP<op, ps, p, 1>, 522 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX10> { 523 let AssemblerPredicate = HasDPP16; 524 let SubtargetPredicate = HasDPP16; 525} 526 527class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> : 528 VOP_DPP8<ps.OpName, p> { 529 let hasSideEffects = ps.hasSideEffects; 530 let Defs = ps.Defs; 531 let SchedRW = ps.SchedRW; 532 let Uses = ps.Uses; 533 534 bits<8> vdst; 535 let Inst{8-0} = fi; 536 let Inst{16-9} = op; 537 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 538 let Inst{31-25} = 0x3f; 539} 540 541//===----------------------------------------------------------------------===// 542// GFX10. 543//===----------------------------------------------------------------------===// 544 545let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { 546 multiclass VOP1Only_Real_gfx10<bits<9> op> { 547 def _gfx10 : 548 VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.GFX10>, 549 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 550 } 551 multiclass VOP1_Real_e32_gfx10<bits<9> op> { 552 def _e32_gfx10 : 553 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>, 554 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 555 } 556 multiclass VOP1_Real_e64_gfx10<bits<9> op> { 557 def _e64_gfx10 : 558 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 559 VOP3e_gfx10<{0, 1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 560 } 561 multiclass VOP1_Real_sdwa_gfx10<bits<9> op> { 562 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 563 def _sdwa_gfx10 : 564 VOP_SDWA10_Real<!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 565 VOP1_SDWA9Ae<op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 566 let DecoderNamespace = "SDWA10"; 567 } 568 } 569 multiclass VOP1_Real_dpp_gfx10<bits<9> op> { 570 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 571 def _dpp_gfx10 : VOP1_DPP16<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")> { 572 let DecoderNamespace = "SDWA10"; 573 } 574 } 575 multiclass VOP1_Real_dpp8_gfx10<bits<9> op> { 576 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 577 def _dpp8_gfx10 : VOP1_DPP8<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")> { 578 let DecoderNamespace = "DPP8"; 579 } 580 } 581} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" 582 583multiclass VOP1_Real_gfx10<bits<9> op> : 584 VOP1_Real_e32_gfx10<op>, VOP1_Real_e64_gfx10<op>, 585 VOP1_Real_sdwa_gfx10<op>, VOP1_Real_dpp_gfx10<op>, 586 VOP1_Real_dpp8_gfx10<op>; 587 588defm V_PIPEFLUSH : VOP1_Real_gfx10<0x01b>; 589defm V_MOVRELSD_2_B32 : VOP1_Real_gfx10<0x048>; 590defm V_CVT_F16_U16 : VOP1_Real_gfx10<0x050>; 591defm V_CVT_F16_I16 : VOP1_Real_gfx10<0x051>; 592defm V_CVT_U16_F16 : VOP1_Real_gfx10<0x052>; 593defm V_CVT_I16_F16 : VOP1_Real_gfx10<0x053>; 594defm V_RCP_F16 : VOP1_Real_gfx10<0x054>; 595defm V_SQRT_F16 : VOP1_Real_gfx10<0x055>; 596defm V_RSQ_F16 : VOP1_Real_gfx10<0x056>; 597defm V_LOG_F16 : VOP1_Real_gfx10<0x057>; 598defm V_EXP_F16 : VOP1_Real_gfx10<0x058>; 599defm V_FREXP_MANT_F16 : VOP1_Real_gfx10<0x059>; 600defm V_FREXP_EXP_I16_F16 : VOP1_Real_gfx10<0x05a>; 601defm V_FLOOR_F16 : VOP1_Real_gfx10<0x05b>; 602defm V_CEIL_F16 : VOP1_Real_gfx10<0x05c>; 603defm V_TRUNC_F16 : VOP1_Real_gfx10<0x05d>; 604defm V_RNDNE_F16 : VOP1_Real_gfx10<0x05e>; 605defm V_FRACT_F16 : VOP1_Real_gfx10<0x05f>; 606defm V_SIN_F16 : VOP1_Real_gfx10<0x060>; 607defm V_COS_F16 : VOP1_Real_gfx10<0x061>; 608defm V_SAT_PK_U8_I16 : VOP1_Real_gfx10<0x062>; 609defm V_CVT_NORM_I16_F16 : VOP1_Real_gfx10<0x063>; 610defm V_CVT_NORM_U16_F16 : VOP1_Real_gfx10<0x064>; 611 612defm V_SWAP_B32 : VOP1Only_Real_gfx10<0x065>; 613defm V_SWAPREL_B32 : VOP1Only_Real_gfx10<0x068>; 614 615//===----------------------------------------------------------------------===// 616// GFX7, GFX10. 617//===----------------------------------------------------------------------===// 618 619let AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in { 620 multiclass VOP1_Real_e32_gfx7<bits<9> op> { 621 def _e32_gfx7 : 622 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>, 623 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 624 } 625 multiclass VOP1_Real_e64_gfx7<bits<9> op> { 626 def _e64_gfx7 : 627 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 628 VOP3e_gfx6_gfx7<{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 629 } 630} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" 631 632multiclass VOP1_Real_gfx7<bits<9> op> : 633 VOP1_Real_e32_gfx7<op>, VOP1_Real_e64_gfx7<op>; 634 635multiclass VOP1_Real_gfx7_gfx10<bits<9> op> : 636 VOP1_Real_gfx7<op>, VOP1_Real_gfx10<op>; 637 638defm V_LOG_LEGACY_F32 : VOP1_Real_gfx7<0x045>; 639defm V_EXP_LEGACY_F32 : VOP1_Real_gfx7<0x046>; 640 641defm V_TRUNC_F64 : VOP1_Real_gfx7_gfx10<0x017>; 642defm V_CEIL_F64 : VOP1_Real_gfx7_gfx10<0x018>; 643defm V_RNDNE_F64 : VOP1_Real_gfx7_gfx10<0x019>; 644defm V_FLOOR_F64 : VOP1_Real_gfx7_gfx10<0x01a>; 645 646//===----------------------------------------------------------------------===// 647// GFX6, GFX7, GFX10. 648//===----------------------------------------------------------------------===// 649 650let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 651 multiclass VOP1_Real_e32_gfx6_gfx7<bits<9> op> { 652 def _e32_gfx6_gfx7 : 653 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>, 654 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 655 } 656 multiclass VOP1_Real_e64_gfx6_gfx7<bits<9> op> { 657 def _e64_gfx6_gfx7 : 658 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 659 VOP3e_gfx6_gfx7<{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 660 } 661} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 662 663multiclass VOP1_Real_gfx6_gfx7<bits<9> op> : 664 VOP1_Real_e32_gfx6_gfx7<op>, VOP1_Real_e64_gfx6_gfx7<op>; 665 666multiclass VOP1_Real_gfx6_gfx7_gfx10<bits<9> op> : 667 VOP1_Real_gfx6_gfx7<op>, VOP1_Real_gfx10<op>; 668 669defm V_LOG_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x026>; 670defm V_RCP_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x028>; 671defm V_RCP_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x029>; 672defm V_RSQ_CLAMP_F32 : VOP1_Real_gfx6_gfx7<0x02c>; 673defm V_RSQ_LEGACY_F32 : VOP1_Real_gfx6_gfx7<0x02d>; 674defm V_RCP_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x030>; 675defm V_RSQ_CLAMP_F64 : VOP1_Real_gfx6_gfx7<0x032>; 676 677defm V_NOP : VOP1_Real_gfx6_gfx7_gfx10<0x000>; 678defm V_MOV_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x001>; 679defm V_CVT_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x003>; 680defm V_CVT_F64_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x004>; 681defm V_CVT_F32_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x005>; 682defm V_CVT_F32_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x006>; 683defm V_CVT_U32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x007>; 684defm V_CVT_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x008>; 685defm V_CVT_F16_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00a>; 686defm V_CVT_F32_F16 : VOP1_Real_gfx6_gfx7_gfx10<0x00b>; 687defm V_CVT_RPI_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00c>; 688defm V_CVT_FLR_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x00d>; 689defm V_CVT_OFF_F32_I4 : VOP1_Real_gfx6_gfx7_gfx10<0x00e>; 690defm V_CVT_F32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x00f>; 691defm V_CVT_F64_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x010>; 692defm V_CVT_F32_UBYTE0 : VOP1_Real_gfx6_gfx7_gfx10<0x011>; 693defm V_CVT_F32_UBYTE1 : VOP1_Real_gfx6_gfx7_gfx10<0x012>; 694defm V_CVT_F32_UBYTE2 : VOP1_Real_gfx6_gfx7_gfx10<0x013>; 695defm V_CVT_F32_UBYTE3 : VOP1_Real_gfx6_gfx7_gfx10<0x014>; 696defm V_CVT_U32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x015>; 697defm V_CVT_F64_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x016>; 698defm V_FRACT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x020>; 699defm V_TRUNC_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x021>; 700defm V_CEIL_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x022>; 701defm V_RNDNE_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x023>; 702defm V_FLOOR_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x024>; 703defm V_EXP_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x025>; 704defm V_LOG_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x027>; 705defm V_RCP_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x02a>; 706defm V_RCP_IFLAG_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x02b>; 707defm V_RSQ_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x02e>; 708defm V_RCP_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x02f>; 709defm V_RSQ_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x031>; 710defm V_SQRT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x033>; 711defm V_SQRT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x034>; 712defm V_SIN_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x035>; 713defm V_COS_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x036>; 714defm V_NOT_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x037>; 715defm V_BFREV_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x038>; 716defm V_FFBH_U32 : VOP1_Real_gfx6_gfx7_gfx10<0x039>; 717defm V_FFBL_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x03a>; 718defm V_FFBH_I32 : VOP1_Real_gfx6_gfx7_gfx10<0x03b>; 719defm V_FREXP_EXP_I32_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03c>; 720defm V_FREXP_MANT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03d>; 721defm V_FRACT_F64 : VOP1_Real_gfx6_gfx7_gfx10<0x03e>; 722defm V_FREXP_EXP_I32_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x03f>; 723defm V_FREXP_MANT_F32 : VOP1_Real_gfx6_gfx7_gfx10<0x040>; 724defm V_CLREXCP : VOP1_Real_gfx6_gfx7_gfx10<0x041>; 725defm V_MOVRELD_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x042>; 726defm V_MOVRELS_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x043>; 727defm V_MOVRELSD_B32 : VOP1_Real_gfx6_gfx7_gfx10<0x044>; 728 729//===----------------------------------------------------------------------===// 730// GFX8, GFX9 (VI). 731//===----------------------------------------------------------------------===// 732 733class VOP1_DPPe <bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile P = ps.Pfl> : 734 VOP_DPPe <P> { 735 bits<8> vdst; 736 let Inst{8-0} = 0xfa; // dpp 737 let Inst{16-9} = op; 738 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 739 let Inst{31-25} = 0x3f; //encoding 740} 741 742multiclass VOP1Only_Real_vi <bits<10> op> { 743 let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 744 def _vi : 745 VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.VI>, 746 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>; 747 } 748} 749 750multiclass VOP1_Real_e32e64_vi <bits<10> op> { 751 let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 752 def _e32_vi : 753 VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>, 754 VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>; 755 def _e64_vi : 756 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 757 VOP3e_vi <!add(0x140, op), !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 758 } 759} 760 761multiclass VOP1_Real_vi <bits<10> op> { 762 defm NAME : VOP1_Real_e32e64_vi <op>; 763 764 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA>.ret in 765 def _sdwa_vi : 766 VOP_SDWA_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 767 VOP1_SDWAe <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 768 769 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 770 def _sdwa_gfx9 : 771 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 772 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 773 774 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 775 def _dpp_vi : 776 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>, 777 VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; 778} 779 780defm V_NOP : VOP1_Real_vi <0x0>; 781defm V_MOV_B32 : VOP1_Real_vi <0x1>; 782defm V_CVT_I32_F64 : VOP1_Real_vi <0x3>; 783defm V_CVT_F64_I32 : VOP1_Real_vi <0x4>; 784defm V_CVT_F32_I32 : VOP1_Real_vi <0x5>; 785defm V_CVT_F32_U32 : VOP1_Real_vi <0x6>; 786defm V_CVT_U32_F32 : VOP1_Real_vi <0x7>; 787defm V_CVT_I32_F32 : VOP1_Real_vi <0x8>; 788defm V_CVT_F16_F32 : VOP1_Real_vi <0xa>; 789defm V_CVT_F32_F16 : VOP1_Real_vi <0xb>; 790defm V_CVT_RPI_I32_F32 : VOP1_Real_vi <0xc>; 791defm V_CVT_FLR_I32_F32 : VOP1_Real_vi <0xd>; 792defm V_CVT_OFF_F32_I4 : VOP1_Real_vi <0xe>; 793defm V_CVT_F32_F64 : VOP1_Real_vi <0xf>; 794defm V_CVT_F64_F32 : VOP1_Real_vi <0x10>; 795defm V_CVT_F32_UBYTE0 : VOP1_Real_vi <0x11>; 796defm V_CVT_F32_UBYTE1 : VOP1_Real_vi <0x12>; 797defm V_CVT_F32_UBYTE2 : VOP1_Real_vi <0x13>; 798defm V_CVT_F32_UBYTE3 : VOP1_Real_vi <0x14>; 799defm V_CVT_U32_F64 : VOP1_Real_vi <0x15>; 800defm V_CVT_F64_U32 : VOP1_Real_vi <0x16>; 801defm V_FRACT_F32 : VOP1_Real_vi <0x1b>; 802defm V_TRUNC_F32 : VOP1_Real_vi <0x1c>; 803defm V_CEIL_F32 : VOP1_Real_vi <0x1d>; 804defm V_RNDNE_F32 : VOP1_Real_vi <0x1e>; 805defm V_FLOOR_F32 : VOP1_Real_vi <0x1f>; 806defm V_EXP_F32 : VOP1_Real_vi <0x20>; 807defm V_LOG_F32 : VOP1_Real_vi <0x21>; 808defm V_RCP_F32 : VOP1_Real_vi <0x22>; 809defm V_RCP_IFLAG_F32 : VOP1_Real_vi <0x23>; 810defm V_RSQ_F32 : VOP1_Real_vi <0x24>; 811defm V_RCP_F64 : VOP1_Real_vi <0x25>; 812defm V_RSQ_F64 : VOP1_Real_vi <0x26>; 813defm V_SQRT_F32 : VOP1_Real_vi <0x27>; 814defm V_SQRT_F64 : VOP1_Real_vi <0x28>; 815defm V_SIN_F32 : VOP1_Real_vi <0x29>; 816defm V_COS_F32 : VOP1_Real_vi <0x2a>; 817defm V_NOT_B32 : VOP1_Real_vi <0x2b>; 818defm V_BFREV_B32 : VOP1_Real_vi <0x2c>; 819defm V_FFBH_U32 : VOP1_Real_vi <0x2d>; 820defm V_FFBL_B32 : VOP1_Real_vi <0x2e>; 821defm V_FFBH_I32 : VOP1_Real_vi <0x2f>; 822defm V_FREXP_EXP_I32_F64 : VOP1_Real_vi <0x30>; 823defm V_FREXP_MANT_F64 : VOP1_Real_vi <0x31>; 824defm V_FRACT_F64 : VOP1_Real_vi <0x32>; 825defm V_FREXP_EXP_I32_F32 : VOP1_Real_vi <0x33>; 826defm V_FREXP_MANT_F32 : VOP1_Real_vi <0x34>; 827defm V_CLREXCP : VOP1_Real_vi <0x35>; 828defm V_MOVRELD_B32 : VOP1_Real_e32e64_vi <0x36>; 829defm V_MOVRELS_B32 : VOP1_Real_e32e64_vi <0x37>; 830defm V_MOVRELSD_B32 : VOP1_Real_e32e64_vi <0x38>; 831defm V_TRUNC_F64 : VOP1_Real_vi <0x17>; 832defm V_CEIL_F64 : VOP1_Real_vi <0x18>; 833defm V_FLOOR_F64 : VOP1_Real_vi <0x1A>; 834defm V_RNDNE_F64 : VOP1_Real_vi <0x19>; 835defm V_LOG_LEGACY_F32 : VOP1_Real_vi <0x4c>; 836defm V_EXP_LEGACY_F32 : VOP1_Real_vi <0x4b>; 837defm V_CVT_F16_U16 : VOP1_Real_vi <0x39>; 838defm V_CVT_F16_I16 : VOP1_Real_vi <0x3a>; 839defm V_CVT_U16_F16 : VOP1_Real_vi <0x3b>; 840defm V_CVT_I16_F16 : VOP1_Real_vi <0x3c>; 841defm V_RCP_F16 : VOP1_Real_vi <0x3d>; 842defm V_SQRT_F16 : VOP1_Real_vi <0x3e>; 843defm V_RSQ_F16 : VOP1_Real_vi <0x3f>; 844defm V_LOG_F16 : VOP1_Real_vi <0x40>; 845defm V_EXP_F16 : VOP1_Real_vi <0x41>; 846defm V_FREXP_MANT_F16 : VOP1_Real_vi <0x42>; 847defm V_FREXP_EXP_I16_F16 : VOP1_Real_vi <0x43>; 848defm V_FLOOR_F16 : VOP1_Real_vi <0x44>; 849defm V_CEIL_F16 : VOP1_Real_vi <0x45>; 850defm V_TRUNC_F16 : VOP1_Real_vi <0x46>; 851defm V_RNDNE_F16 : VOP1_Real_vi <0x47>; 852defm V_FRACT_F16 : VOP1_Real_vi <0x48>; 853defm V_SIN_F16 : VOP1_Real_vi <0x49>; 854defm V_COS_F16 : VOP1_Real_vi <0x4a>; 855defm V_SWAP_B32 : VOP1Only_Real_vi <0x51>; 856 857defm V_SAT_PK_U8_I16 : VOP1_Real_vi<0x4f>; 858defm V_CVT_NORM_I16_F16 : VOP1_Real_vi<0x4d>; 859defm V_CVT_NORM_U16_F16 : VOP1_Real_vi<0x4e>; 860 861defm V_ACCVGPR_MOV_B32 : VOP1Only_Real_vi<0x52>; 862 863let VOP1 = 1, SubtargetPredicate = isGFX8GFX9, Uses = [EXEC, M0] in { 864 865// Copy of v_mov_b32 with $vdst as a use operand for use with VGPR 866// indexing mode. vdst can't be treated as a def for codegen purposes, 867// and an implicit use and def of the super register should be added. 868def V_MOV_B32_indirect_write : VPseudoInstSI<(outs), 869 (ins getVALUDstForVT<i32>.ret:$vdst, getVOPSrc0ForVT<i32>.ret:$src0)>, 870 PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT<i32>.ret:$vdst, 871 getVOPSrc0ForVT<i32>.ret:$src0)>; 872 873// Copy of v_mov_b32 for use with VGPR indexing mode. An implicit use of the 874// super register should be added. 875def V_MOV_B32_indirect_read : VPseudoInstSI< 876 (outs getVALUDstForVT<i32>.ret:$vdst), 877 (ins getVOPSrc0ForVT<i32>.ret:$src0)>, 878 PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT<i32>.ret:$vdst, 879 getVOPSrc0ForVT<i32>.ret:$src0)>; 880 881} // End VOP1 = 1, SubtargetPredicate = isGFX8GFX9, Uses = [M0] 882 883let OtherPredicates = [isGFX8Plus] in { 884 885def : GCNPat < 886 (i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, 887 timm:$bank_mask, timm:$bound_ctrl)), 888 (V_MOV_B32_dpp VGPR_32:$src, VGPR_32:$src, (as_i32timm $dpp_ctrl), 889 (as_i32timm $row_mask), (as_i32timm $bank_mask), 890 (as_i1timm $bound_ctrl)) 891>; 892 893def : GCNPat < 894 (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl, 895 timm:$row_mask, timm:$bank_mask, 896 timm:$bound_ctrl)), 897 (V_MOV_B32_dpp VGPR_32:$old, VGPR_32:$src, (as_i32timm $dpp_ctrl), 898 (as_i32timm $row_mask), (as_i32timm $bank_mask), 899 (as_i1timm $bound_ctrl)) 900>; 901 902} // End OtherPredicates = [isGFX8Plus] 903 904let OtherPredicates = [isGFX8Plus] in { 905def : GCNPat< 906 (i32 (anyext i16:$src)), 907 (COPY $src) 908>; 909 910def : GCNPat< 911 (i64 (anyext i16:$src)), 912 (REG_SEQUENCE VReg_64, 913 (i32 (COPY $src)), sub0, 914 (V_MOV_B32_e32 (i32 0)), sub1) 915>; 916 917def : GCNPat< 918 (i16 (trunc i32:$src)), 919 (COPY $src) 920>; 921 922def : GCNPat < 923 (i16 (trunc i64:$src)), 924 (EXTRACT_SUBREG $src, sub0) 925>; 926 927} // End OtherPredicates = [isGFX8Plus] 928 929//===----------------------------------------------------------------------===// 930// GFX9 931//===----------------------------------------------------------------------===// 932 933multiclass VOP1_Real_gfx9 <bits<10> op> { 934 let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in { 935 defm NAME : VOP1_Real_e32e64_vi <op>; 936 } 937 938 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 939 def _sdwa_gfx9 : 940 VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>, 941 VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 942 943 foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 944 def _dpp_gfx9 : 945 VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, 946 VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>; 947 948} 949 950defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>; 951 952//===----------------------------------------------------------------------===// 953// GFX10 954//===----------------------------------------------------------------------===// 955 956let OtherPredicates = [isGFX10Plus] in { 957def : GCNPat < 958 (i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)), 959 (V_MOV_B32_dpp8_gfx10 VGPR_32:$src, VGPR_32:$src, 960 (as_i32timm $dpp8), (i32 DPP8Mode.FI_0)) 961>; 962} // End OtherPredicates = [isGFX10Plus] 963