15ffd83dbSDimitry Andric//===-- VOP3Instructions.td - Vector Instruction Definitions --------------===// 20b57cec5SDimitry Andric// 30b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric// 70b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric// Special case for v_div_fmas_{f32|f64}, since it seems to be the 100b57cec5SDimitry Andric// only VOP instruction that implicitly reads VCC. 110b57cec5SDimitry Andriclet Asm64 = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod" in { 120b57cec5SDimitry Andricdef VOP_F32_F32_F32_F32_VCC : VOPProfile<[f32, f32, f32, f32]> { 130b57cec5SDimitry Andric let Outs64 = (outs DstRC.RegClass:$vdst); 1481ad6265SDimitry Andric let HasExtVOP3DPP = 0; 1581ad6265SDimitry Andric let HasExtDPP = 0; 16*0fca6ea1SDimitry Andric let IsSingle = 1; 170b57cec5SDimitry Andric} 180b57cec5SDimitry Andricdef VOP_F64_F64_F64_F64_VCC : VOPProfile<[f64, f64, f64, f64]> { 190b57cec5SDimitry Andric let Outs64 = (outs DstRC.RegClass:$vdst); 20*0fca6ea1SDimitry Andric let IsSingle = 1; 210b57cec5SDimitry Andric} 220b57cec5SDimitry Andric} 230b57cec5SDimitry Andric 240b57cec5SDimitry Andricclass VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> { 250b57cec5SDimitry Andric let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); 26e8d8bef9SDimitry Andric let Asm64 = "$vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod"; 27fe6060f1SDimitry Andric let IsSingle = 1; 2881ad6265SDimitry Andric let HasExtVOP3DPP = 0; 2981ad6265SDimitry Andric let HasExtDPP = 0; 300b57cec5SDimitry Andric} 310b57cec5SDimitry Andric 32fe6060f1SDimitry Andricdef VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile<f32>; 33fe6060f1SDimitry Andricdef VOP3b_F64_I1_F64_F64_F64 : VOP3b_Profile<f64>; 340b57cec5SDimitry Andric 350b57cec5SDimitry Andricdef VOP3b_I64_I1_I32_I32_I64 : VOPProfile<[i64, i32, i32, i64]> { 360b57cec5SDimitry Andric let HasClamp = 1; 370b57cec5SDimitry Andric 3881ad6265SDimitry Andric let IsSingle = 1; 390b57cec5SDimitry Andric let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); 400b57cec5SDimitry Andric let Asm64 = "$vdst, $sdst, $src0, $src1, $src2$clamp"; 410b57cec5SDimitry Andric} 420b57cec5SDimitry Andric 4381ad6265SDimitry Andricclass V_MUL_PROF<VOPProfile P> : VOP3_Profile<P> { 4481ad6265SDimitry Andric let HasExtVOP3DPP = 0; 4581ad6265SDimitry Andric let HasExtDPP = 0; 4681ad6265SDimitry Andric} 4781ad6265SDimitry Andric 4881ad6265SDimitry Andricdef DIV_FIXUP_F32_PROF : VOP3_Profile<VOP_F32_F32_F32_F32> { 4981ad6265SDimitry Andric let HasExtVOP3DPP = 0; 5081ad6265SDimitry Andric let HasExtDPP = 0; 5181ad6265SDimitry Andric} 5281ad6265SDimitry Andric 530b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 540b57cec5SDimitry Andric// VOP3 INTERP 550b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 560b57cec5SDimitry Andric 570b57cec5SDimitry Andricclass VOP3Interp<string OpName, VOPProfile P, list<dag> pattern = []> : 580b57cec5SDimitry Andric VOP3_Pseudo<OpName, P, pattern> { 590b57cec5SDimitry Andric let AsmMatchConverter = "cvtVOP3Interp"; 605ffd83dbSDimitry Andric let mayRaiseFPException = 0; 610b57cec5SDimitry Andric} 620b57cec5SDimitry Andric 630b57cec5SDimitry Andricdef VOP3_INTERP : VOPProfile<[f32, f32, i32, untyped]> { 64bdd1243dSDimitry Andric let Src0Mod = FPVRegInputMods; 650b57cec5SDimitry Andric let Ins64 = (ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0, 6606c3fb27SDimitry Andric InterpAttr:$attr, InterpAttrChan:$attrchan, 67*0fca6ea1SDimitry Andric Clamp0:$clamp, omod0:$omod); 680b57cec5SDimitry Andric 690b57cec5SDimitry Andric let Asm64 = "$vdst, $src0_modifiers, $attr$attrchan$clamp$omod"; 700b57cec5SDimitry Andric} 710b57cec5SDimitry Andric 720b57cec5SDimitry Andricdef VOP3_INTERP_MOV : VOPProfile<[f32, i32, i32, untyped]> { 730b57cec5SDimitry Andric let Ins64 = (ins InterpSlot:$src0, 7406c3fb27SDimitry Andric InterpAttr:$attr, InterpAttrChan:$attrchan, 75*0fca6ea1SDimitry Andric Clamp0:$clamp, omod0:$omod); 760b57cec5SDimitry Andric 770b57cec5SDimitry Andric let Asm64 = "$vdst, $src0, $attr$attrchan$clamp$omod"; 780b57cec5SDimitry Andric 790b57cec5SDimitry Andric let HasClamp = 1; 80e8d8bef9SDimitry Andric let HasSrc0Mods = 0; 810b57cec5SDimitry Andric} 820b57cec5SDimitry Andric 830b57cec5SDimitry Andricclass getInterp16Asm <bit HasSrc2, bit HasOMod> { 840b57cec5SDimitry Andric string src2 = !if(HasSrc2, ", $src2_modifiers", ""); 850b57cec5SDimitry Andric string omod = !if(HasOMod, "$omod", ""); 860b57cec5SDimitry Andric string ret = 870b57cec5SDimitry Andric " $vdst, $src0_modifiers, $attr$attrchan"#src2#"$high$clamp"#omod; 880b57cec5SDimitry Andric} 890b57cec5SDimitry Andric 900b57cec5SDimitry Andricclass getInterp16Ins <bit HasSrc2, bit HasOMod, 910b57cec5SDimitry Andric Operand Src0Mod, Operand Src2Mod> { 920b57cec5SDimitry Andric dag ret = !if(HasSrc2, 930b57cec5SDimitry Andric !if(HasOMod, 940b57cec5SDimitry Andric (ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0, 9506c3fb27SDimitry Andric InterpAttr:$attr, InterpAttrChan:$attrchan, 960b57cec5SDimitry Andric Src2Mod:$src2_modifiers, VRegSrc_32:$src2, 97*0fca6ea1SDimitry Andric highmod:$high, Clamp0:$clamp, omod0:$omod), 980b57cec5SDimitry Andric (ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0, 9906c3fb27SDimitry Andric InterpAttr:$attr, InterpAttrChan:$attrchan, 1000b57cec5SDimitry Andric Src2Mod:$src2_modifiers, VRegSrc_32:$src2, 101*0fca6ea1SDimitry Andric highmod:$high, Clamp0:$clamp) 1020b57cec5SDimitry Andric ), 1030b57cec5SDimitry Andric (ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0, 10406c3fb27SDimitry Andric InterpAttr:$attr, InterpAttrChan:$attrchan, 105*0fca6ea1SDimitry Andric highmod:$high, Clamp0:$clamp, omod0:$omod) 1060b57cec5SDimitry Andric ); 1070b57cec5SDimitry Andric} 1080b57cec5SDimitry Andric 1090b57cec5SDimitry Andricclass VOP3_INTERP16 <list<ValueType> ArgVT> : VOPProfile<ArgVT> { 110*0fca6ea1SDimitry Andric let IsSingle = 1; 111e8d8bef9SDimitry Andric let HasOMod = !ne(DstVT.Value, f16.Value); 1120b57cec5SDimitry Andric let HasHigh = 1; 1130b57cec5SDimitry Andric 114bdd1243dSDimitry Andric let Src0Mod = FPVRegInputMods; 115bdd1243dSDimitry Andric let Src2Mod = FPVRegInputMods; 116bdd1243dSDimitry Andric 117fe6060f1SDimitry Andric let Outs64 = (outs DstRC.RegClass:$vdst); 1180b57cec5SDimitry Andric let Ins64 = getInterp16Ins<HasSrc2, HasOMod, Src0Mod, Src2Mod>.ret; 1190b57cec5SDimitry Andric let Asm64 = getInterp16Asm<HasSrc2, HasOMod>.ret; 1200b57cec5SDimitry Andric} 1210b57cec5SDimitry Andric 1220b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 1230b57cec5SDimitry Andric// VOP3 Instructions 1240b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 1250b57cec5SDimitry Andric 1260b57cec5SDimitry Andriclet isCommutable = 1 in { 1270b57cec5SDimitry Andric 128fe6060f1SDimitry Andriclet isReMaterializable = 1 in { 1295ffd83dbSDimitry Andriclet mayRaiseFPException = 0 in { 1305ffd83dbSDimitry Andriclet SubtargetPredicate = HasMadMacF32Insts in { 131e8d8bef9SDimitry Andricdefm V_MAD_LEGACY_F32 : VOP3Inst <"v_mad_legacy_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>; 132bdd1243dSDimitry Andricdefm V_MAD_F32 : VOP3Inst <"v_mad_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, any_fmad>; 1335ffd83dbSDimitry Andric} // End SubtargetPredicate = HasMadMacInsts 1345ffd83dbSDimitry Andric 135e8d8bef9SDimitry Andriclet SubtargetPredicate = HasFmaLegacy32 in 136e8d8bef9SDimitry Andricdefm V_FMA_LEGACY_F32 : VOP3Inst <"v_fma_legacy_f32", 137e8d8bef9SDimitry Andric VOP3_Profile<VOP_F32_F32_F32_F32>, 138e8d8bef9SDimitry Andric int_amdgcn_fma_legacy>; 1395ffd83dbSDimitry Andric} 1405ffd83dbSDimitry Andric 141e8d8bef9SDimitry Andricdefm V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; 142e8d8bef9SDimitry Andricdefm V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; 143e8d8bef9SDimitry Andricdefm V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, any_fma>; 144e8d8bef9SDimitry Andricdefm V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_lerp>; 1450b57cec5SDimitry Andric 1460b57cec5SDimitry Andriclet SchedRW = [WriteDoubleAdd] in { 1470b57cec5SDimitry Andriclet FPDPRounding = 1 in { 148e8d8bef9SDimitry Andricdefm V_FMA_F64 : VOP3Inst <"v_fma_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, any_fma>; 1495f757f3fSDimitry Andriclet SubtargetPredicate = isNotGFX12Plus in { 150349cc55cSDimitry Andricdefm V_ADD_F64 : VOP3Inst <"v_add_f64", VOP3_Profile<VOP_F64_F64_F64>, any_fadd>; 151bdd1243dSDimitry Andricdefm V_MUL_F64 : VOP3Inst <"v_mul_f64", VOP3_Profile<VOP_F64_F64_F64>, any_fmul>; 1525f757f3fSDimitry Andric} // End SubtargetPredicate = isNotGFX12Plus 1530b57cec5SDimitry Andric} // End FPDPRounding = 1 1545f757f3fSDimitry Andriclet SubtargetPredicate = isNotGFX12Plus in { 155349cc55cSDimitry Andricdefm V_MIN_F64 : VOP3Inst <"v_min_f64", VOP3_Profile<VOP_F64_F64_F64>, fminnum_like>; 156349cc55cSDimitry Andricdefm V_MAX_F64 : VOP3Inst <"v_max_f64", VOP3_Profile<VOP_F64_F64_F64>, fmaxnum_like>; 1575f757f3fSDimitry Andric} // End SubtargetPredicate = isNotGFX12Plus 1580b57cec5SDimitry Andric} // End SchedRW = [WriteDoubleAdd] 1590b57cec5SDimitry Andric 160*0fca6ea1SDimitry Andriclet SchedRW = [WriteIntMul], IsInvalidSingleUseConsumer = 1 in { 16181ad6265SDimitry Andricdefm V_MUL_LO_U32 : VOP3Inst <"v_mul_lo_u32", V_MUL_PROF<VOP_I32_I32_I32>, DivergentBinFrag<mul>>; 16281ad6265SDimitry Andricdefm V_MUL_HI_U32 : VOP3Inst <"v_mul_hi_u32", V_MUL_PROF<VOP_I32_I32_I32>, mulhu>; 16381ad6265SDimitry Andricdefm V_MUL_LO_I32 : VOP3Inst <"v_mul_lo_i32", V_MUL_PROF<VOP_I32_I32_I32>>; 16481ad6265SDimitry Andricdefm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", V_MUL_PROF<VOP_I32_I32_I32>, mulhs>; 165*0fca6ea1SDimitry Andric} // End SchedRW = [WriteIntMul], IsInvalidSingleUseConsumer = 1 1665f757f3fSDimitry Andric 1675f757f3fSDimitry Andriclet SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in { 1685f757f3fSDimitry Andricdefm V_MINIMUM_F32 : VOP3Inst <"v_minimum_f32", VOP3_Profile<VOP_F32_F32_F32>, DivergentBinFrag<fminimum>>; 1695f757f3fSDimitry Andricdefm V_MAXIMUM_F32 : VOP3Inst <"v_maximum_f32", VOP3_Profile<VOP_F32_F32_F32>, DivergentBinFrag<fmaximum>>; 1705f757f3fSDimitry Andricdefm V_MINIMUM_F16 : VOP3Inst <"v_minimum_f16", VOP3_Profile<VOP_F16_F16_F16>, DivergentBinFrag<fminimum>>; 1715f757f3fSDimitry Andricdefm V_MAXIMUM_F16 : VOP3Inst <"v_maximum_f16", VOP3_Profile<VOP_F16_F16_F16>, DivergentBinFrag<fmaximum>>; 1725f757f3fSDimitry Andric 1735f757f3fSDimitry Andriclet SchedRW = [WriteDoubleAdd] in { 1745f757f3fSDimitry Andricdefm V_MINIMUM_F64 : VOP3Inst <"v_minimum_f64", VOP3_Profile<VOP_F64_F64_F64>, fminimum>; 1755f757f3fSDimitry Andricdefm V_MAXIMUM_F64 : VOP3Inst <"v_maximum_f64", VOP3_Profile<VOP_F64_F64_F64>, fmaximum>; 1765f757f3fSDimitry Andric} // End SchedRW = [WriteDoubleAdd] 1775f757f3fSDimitry Andric} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 1785f757f3fSDimitry Andric 179fe6060f1SDimitry Andric} // End isReMaterializable = 1 1800b57cec5SDimitry Andric 1815ffd83dbSDimitry Andriclet Uses = [MODE, VCC, EXEC] in { 1820b57cec5SDimitry Andric// v_div_fmas_f32: 1830b57cec5SDimitry Andric// result = src0 * src1 + src2 1840b57cec5SDimitry Andric// if (vcc) 1850b57cec5SDimitry Andric// result *= 2^32 1860b57cec5SDimitry Andric// 187e8d8bef9SDimitry Andriclet SchedRW = [WriteFloatFMA] in 188e8d8bef9SDimitry Andricdefm V_DIV_FMAS_F32 : VOP3Inst_Pseudo_Wrapper <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC, []>; 1890b57cec5SDimitry Andric// v_div_fmas_f64: 1900b57cec5SDimitry Andric// result = src0 * src1 + src2 1910b57cec5SDimitry Andric// if (vcc) 1920b57cec5SDimitry Andric// result *= 2^64 1930b57cec5SDimitry Andric// 194e8d8bef9SDimitry Andriclet SchedRW = [WriteDouble], FPDPRounding = 1 in 195e8d8bef9SDimitry Andricdefm V_DIV_FMAS_F64 : VOP3Inst_Pseudo_Wrapper <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC, []>; 196e8d8bef9SDimitry Andric} // End Uses = [MODE, VCC, EXEC] 1970b57cec5SDimitry Andric 1980b57cec5SDimitry Andric} // End isCommutable = 1 1990b57cec5SDimitry Andric 200fe6060f1SDimitry Andriclet isReMaterializable = 1 in { 2015ffd83dbSDimitry Andriclet mayRaiseFPException = 0 in { 202e8d8bef9SDimitry Andricdefm V_CUBEID_F32 : VOP3Inst <"v_cubeid_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubeid>; 203e8d8bef9SDimitry Andricdefm V_CUBESC_F32 : VOP3Inst <"v_cubesc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubesc>; 204e8d8bef9SDimitry Andricdefm V_CUBETC_F32 : VOP3Inst <"v_cubetc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubetc>; 205e8d8bef9SDimitry Andricdefm V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubema>; 2065ffd83dbSDimitry Andric} // End mayRaiseFPException 2075ffd83dbSDimitry Andric 208e8d8bef9SDimitry Andricdefm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_u32>; 209e8d8bef9SDimitry Andricdefm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_i32>; 210e8d8bef9SDimitry Andricdefm V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfi>; 211e8d8bef9SDimitry Andricdefm V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, fshr>; 212e8d8bef9SDimitry Andricdefm V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_alignbyte>; 2135ffd83dbSDimitry Andric 214fe6060f1SDimitry Andric// XXX - No FPException seems suspect but manual doesn't say it does 215fe6060f1SDimitry Andriclet mayRaiseFPException = 0 in { 216fe6060f1SDimitry Andric let isCommutable = 1 in { 217e8d8bef9SDimitry Andric defm V_MIN3_I32 : VOP3Inst <"v_min3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmin3>; 218e8d8bef9SDimitry Andric defm V_MIN3_U32 : VOP3Inst <"v_min3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumin3>; 219e8d8bef9SDimitry Andric defm V_MAX3_I32 : VOP3Inst <"v_max3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmax3>; 220e8d8bef9SDimitry Andric defm V_MAX3_U32 : VOP3Inst <"v_max3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumax3>; 221e8d8bef9SDimitry Andric defm V_MED3_I32 : VOP3Inst <"v_med3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmed3>; 222e8d8bef9SDimitry Andric defm V_MED3_U32 : VOP3Inst <"v_med3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumed3>; 223fe6060f1SDimitry Andric } // End isCommutable = 1 224fe6060f1SDimitry Andric defm V_MIN3_F32 : VOP3Inst <"v_min3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmin3>; 225fe6060f1SDimitry Andric defm V_MAX3_F32 : VOP3Inst <"v_max3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmax3>; 226fe6060f1SDimitry Andric defm V_MED3_F32 : VOP3Inst <"v_med3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmed3>; 2275ffd83dbSDimitry Andric} // End mayRaiseFPException = 0 2285ffd83dbSDimitry Andric 2295f757f3fSDimitry Andriclet SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in { 2305f757f3fSDimitry Andric defm V_MINIMUM3_F32 : VOP3Inst <"v_minimum3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfminimum3>; 2315f757f3fSDimitry Andric defm V_MAXIMUM3_F32 : VOP3Inst <"v_maximum3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmaximum3>; 2325f757f3fSDimitry Andric} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 2335f757f3fSDimitry Andric 234fe6060f1SDimitry Andriclet isCommutable = 1 in { 235e8d8bef9SDimitry Andric defm V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; 236e8d8bef9SDimitry Andric defm V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; 237e8d8bef9SDimitry Andric defm V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; 238e8d8bef9SDimitry Andric defm V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; 239fe6060f1SDimitry Andric} // End isCommutable = 1 240e8d8bef9SDimitry Andricdefm V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile<VOP_I32_F32_I32_I32>, int_amdgcn_cvt_pk_u8_f32>; 241e8d8bef9SDimitry Andric 24281ad6265SDimitry Andricdefm V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", DIV_FIXUP_F32_PROF, AMDGPUdiv_fixup>; 2430b57cec5SDimitry Andric 2440b57cec5SDimitry Andriclet SchedRW = [WriteDoubleAdd], FPDPRounding = 1 in { 245e8d8bef9SDimitry Andric defm V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, AMDGPUdiv_fixup>; 24606c3fb27SDimitry Andric defm V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, any_fldexp>; 2470b57cec5SDimitry Andric} // End SchedRW = [WriteDoubleAdd], FPDPRounding = 1 248fe6060f1SDimitry Andric} // End isReMaterializable = 1 2490b57cec5SDimitry Andric 2505ffd83dbSDimitry Andric 2515ffd83dbSDimitry Andriclet mayRaiseFPException = 0 in { // Seems suspicious but manual doesn't say it does. 252e8d8bef9SDimitry Andric let SchedRW = [WriteFloatFMA, WriteSALU] in 253349cc55cSDimitry Andric defm V_DIV_SCALE_F32 : VOP3Inst_Pseudo_Wrapper <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32> ; 2540b57cec5SDimitry Andric 2550b57cec5SDimitry Andric // Double precision division pre-scale. 256e8d8bef9SDimitry Andric let SchedRW = [WriteDouble, WriteSALU], FPDPRounding = 1 in 257349cc55cSDimitry Andric defm V_DIV_SCALE_F64 : VOP3Inst_Pseudo_Wrapper <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64>; 2585ffd83dbSDimitry Andric} // End mayRaiseFPException = 0 2590b57cec5SDimitry Andric 260fe6060f1SDimitry Andriclet isReMaterializable = 1 in 261e8d8bef9SDimitry Andricdefm V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>; 2620b57cec5SDimitry Andric 263*0fca6ea1SDimitry Andriclet Constraints = "@earlyclobber $vdst", IsInvalidSingleUseConsumer = 1 in { 264e8d8bef9SDimitry Andricdefm V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>; 265*0fca6ea1SDimitry Andric} // End Constraints = "@earlyclobber $vdst", IsInvalidSingleUseConsumer = 1 2660b57cec5SDimitry Andric 267e8d8bef9SDimitry Andric 268fe6060f1SDimitry Andriclet isReMaterializable = 1 in { 269e8d8bef9SDimitry Andriclet SchedRW = [WriteDouble] in { 270e8d8bef9SDimitry Andricdefm V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I32>, int_amdgcn_trig_preop>; 271e8d8bef9SDimitry Andric} // End SchedRW = [WriteDouble] 2720b57cec5SDimitry Andric 2730b57cec5SDimitry Andriclet SchedRW = [Write64Bit] in { 2745ffd83dbSDimitry Andric let SubtargetPredicate = isGFX6GFX7 in { 275349cc55cSDimitry Andric defm V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile<VOP_I64_I64_I32>, cshl_64>; 276349cc55cSDimitry Andric defm V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile<VOP_I64_I64_I32>, csrl_64>; 277349cc55cSDimitry Andric defm V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile<VOP_I64_I64_I32>, csra_64>; 2785ffd83dbSDimitry Andric } // End SubtargetPredicate = isGFX6GFX7 2790b57cec5SDimitry Andric 280*0fca6ea1SDimitry Andric let IsInvalidSingleUseConsumer = 1 in { 2810b57cec5SDimitry Andric let SubtargetPredicate = isGFX8Plus in { 282349cc55cSDimitry Andric defm V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile<VOP_I64_I32_I64>, clshr_rev_64>; 283349cc55cSDimitry Andric defm V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>, cashr_rev_64>; 284*0fca6ea1SDimitry Andric } // End SubtargetPredicate = isGFX8Plus, , IsInvalidSingleUseConsumer = 1 2855f757f3fSDimitry Andric 2865f757f3fSDimitry Andric let SubtargetPredicate = isGFX8GFX9GFX10GFX11 in { 2875f757f3fSDimitry Andric defm V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>, clshl_rev_64>; 2885f757f3fSDimitry Andric } // End SubtargetPredicate = isGFX8GFX9GFX10GFX11 289*0fca6ea1SDimitry Andric } // End IsInvalidSingleUseConsumer = 1 2900b57cec5SDimitry Andric} // End SchedRW = [Write64Bit] 291fe6060f1SDimitry Andric} // End isReMaterializable = 1 2920b57cec5SDimitry Andric 2935ffd83dbSDimitry Andricdef : GCNPat< 294bdd1243dSDimitry Andric (i32 (DivergentUnaryFrag<sext> i16:$src)), 29506c3fb27SDimitry Andric (i32 (V_BFE_I32_e64 i16:$src, (i32 0), (i32 0x10))) 2965ffd83dbSDimitry Andric>; 2975ffd83dbSDimitry Andric 298fe6060f1SDimitry Andriclet isReMaterializable = 1 in { 29981ad6265SDimitry Andriclet SubtargetPredicate = isGFX6GFX7GFX10Plus in { 300e8d8bef9SDimitry Andricdefm V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>; 30181ad6265SDimitry Andric} // End SubtargetPredicate = isGFX6GFX7GFX10Plus 3020b57cec5SDimitry Andric 3030b57cec5SDimitry Andriclet SchedRW = [Write32Bit] in { 3040b57cec5SDimitry Andriclet SubtargetPredicate = isGFX8Plus in { 305e8d8bef9SDimitry Andricdefm V_PERM_B32 : VOP3Inst <"v_perm_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUperm>; 3060b57cec5SDimitry Andric} // End SubtargetPredicate = isGFX8Plus 3070b57cec5SDimitry Andric} // End SchedRW = [Write32Bit] 308fe6060f1SDimitry Andric} // End isReMaterializable = 1 3090b57cec5SDimitry Andric 31081ad6265SDimitry Andricdef VOPProfileMQSAD : VOP3_Profile<VOP_V4I32_I64_I32_V4I32, VOP3_CLAMP> { 31181ad6265SDimitry Andric let HasModifiers = 0; 31281ad6265SDimitry Andric} 3130b57cec5SDimitry Andric 314*0fca6ea1SDimitry Andriclet SubtargetPredicate = isGFX7Plus, IsInvalidSingleUseConsumer = 1 in { 3150b57cec5SDimitry Andriclet Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in { 316e8d8bef9SDimitry Andricdefm V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>; 31781ad6265SDimitry Andricdefm V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOPProfileMQSAD>; 3180b57cec5SDimitry Andric} // End Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] 319*0fca6ea1SDimitry Andric} // End SubtargetPredicate = isGFX7Plus, IsInvalidSingleUseConsumer = 1 3200b57cec5SDimitry Andric 321*0fca6ea1SDimitry Andriclet isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU], IsInvalidSingleUseConsumer = 1 in { 322bdd1243dSDimitry Andric let SubtargetPredicate = isGFX7Plus, OtherPredicates = [HasNotMADIntraFwdBug] in { 323e8d8bef9SDimitry Andric defm V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>; 324e8d8bef9SDimitry Andric defm V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>; 32581ad6265SDimitry Andric } 326bdd1243dSDimitry Andric let SubtargetPredicate = isGFX11Only, OtherPredicates = [HasMADIntraFwdBug], 327bdd1243dSDimitry Andric Constraints = "@earlyclobber $vdst" in { 328bdd1243dSDimitry Andric defm V_MAD_U64_U32_gfx11 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>; 329bdd1243dSDimitry Andric defm V_MAD_I64_I32_gfx11 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>; 330bdd1243dSDimitry Andric } 331*0fca6ea1SDimitry Andric} // End isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU], IsInvalidSingleUseConsumer = 1 3320b57cec5SDimitry Andric 3330b57cec5SDimitry Andric 334e8d8bef9SDimitry Andriclet FPDPRounding = 1 in { 335e8d8bef9SDimitry Andric let Predicates = [Has16BitInsts, isGFX8Only] in { 336e8d8bef9SDimitry Andric defm V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUdiv_fixup>; 337e8d8bef9SDimitry Andric defm V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, any_fma>; 338e8d8bef9SDimitry Andric } // End Predicates = [Has16BitInsts, isGFX8Only] 3390b57cec5SDimitry Andric 34006c3fb27SDimitry Andric let renamedInGFX9 = 1, SubtargetPredicate = isGFX9Plus in { 341e8d8bef9SDimitry Andric defm V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_div_fixup_f16_gfx9", 342e8d8bef9SDimitry Andric VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUdiv_fixup>; 343e8d8bef9SDimitry Andric defm V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, any_fma>; 34406c3fb27SDimitry Andric } // End renamedInGFX9 = 1, SubtargetPredicate = isGFX9Plus 345e8d8bef9SDimitry Andric} // End FPDPRounding = 1 3460b57cec5SDimitry Andric 3470b57cec5SDimitry Andriclet SubtargetPredicate = Has16BitInsts, isCommutable = 1 in { 3480b57cec5SDimitry Andric 3490b57cec5SDimitry Andriclet renamedInGFX9 = 1 in { 350e8d8bef9SDimitry Andric defm V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>; 351e8d8bef9SDimitry Andric defm V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>; 3520b57cec5SDimitry Andric let FPDPRounding = 1 in { 353bdd1243dSDimitry Andric defm V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, any_fmad>; 3545ffd83dbSDimitry Andric let Uses = [MODE, M0, EXEC] in { 355fe6060f1SDimitry Andric let OtherPredicates = [isNotGFX90APlus] in 356480093f4SDimitry Andric // For some reason the intrinsic operands are in a different order 357480093f4SDimitry Andric // from the instruction operands. 3580b57cec5SDimitry Andric def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>, 359480093f4SDimitry Andric [(set f16:$vdst, 360480093f4SDimitry Andric (int_amdgcn_interp_p2_f16 (VOP3Mods f32:$src2, i32:$src2_modifiers), 361480093f4SDimitry Andric (VOP3Mods f32:$src0, i32:$src0_modifiers), 362480093f4SDimitry Andric (i32 timm:$attrchan), 3638bcb0991SDimitry Andric (i32 timm:$attr), 3648bcb0991SDimitry Andric (i1 timm:$high), 365480093f4SDimitry Andric M0))]>; 3665ffd83dbSDimitry Andric } // End Uses = [M0, MODE, EXEC] 3670b57cec5SDimitry Andric } // End FPDPRounding = 1 3680b57cec5SDimitry Andric} // End renamedInGFX9 = 1 3690b57cec5SDimitry Andric 370e8d8bef9SDimitry Andriclet SubtargetPredicate = isGFX9Only, FPDPRounding = 1 in { 371e8d8bef9SDimitry Andric defm V_MAD_F16_gfx9 : VOP3Inst <"v_mad_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>> ; 372e8d8bef9SDimitry Andric} // End SubtargetPredicate = isGFX9Only, FPDPRounding = 1 3730b57cec5SDimitry Andric 3740b57cec5SDimitry Andriclet SubtargetPredicate = isGFX9Plus in { 375e8d8bef9SDimitry Andricdefm V_MAD_U16_gfx9 : VOP3Inst <"v_mad_u16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>>; 376e8d8bef9SDimitry Andricdefm V_MAD_I16_gfx9 : VOP3Inst <"v_mad_i16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>>; 377fe6060f1SDimitry Andriclet OtherPredicates = [isNotGFX90APlus] in 3780b57cec5SDimitry Andricdef V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f16, f32, i32, f32]>>; 3790b57cec5SDimitry Andric} // End SubtargetPredicate = isGFX9Plus 3800b57cec5SDimitry Andric 381fe6060f1SDimitry Andric// This predicate should only apply to the selection pattern. The 382fe6060f1SDimitry Andric// instruction still exists and should decode on subtargets with 383fe6060f1SDimitry Andric// other bank counts. 384fe6060f1SDimitry Andriclet OtherPredicates = [isNotGFX90APlus, has32BankLDS], Uses = [MODE, M0, EXEC], FPDPRounding = 1 in { 3850b57cec5SDimitry Andricdef V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32, i32, untyped]>, 3865ffd83dbSDimitry Andric [(set f32:$vdst, (int_amdgcn_interp_p1_f16 (VOP3Mods f32:$src0, i32:$src0_modifiers), 3875ffd83dbSDimitry Andric (i32 timm:$attrchan), 3888bcb0991SDimitry Andric (i32 timm:$attr), 389fe6060f1SDimitry Andric (i1 timm:$high), M0))]>; 390fe6060f1SDimitry Andric} // End OtherPredicates = [isNotGFX90APlus, has32BankLDS], Uses = [MODE, M0, EXEC], FPDPRounding = 1 3915ffd83dbSDimitry Andric 392fe6060f1SDimitry Andriclet OtherPredicates = [isNotGFX90APlus], Uses = [MODE, M0, EXEC], FPDPRounding = 1 in { 3935ffd83dbSDimitry Andricdef V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>>; 394fe6060f1SDimitry Andric} // End OtherPredicates = [isNotGFX90APlus], Uses = [MODE, M0, EXEC], FPDPRounding = 1 3950b57cec5SDimitry Andric 3960b57cec5SDimitry Andric} // End SubtargetPredicate = Has16BitInsts, isCommutable = 1 3970b57cec5SDimitry Andric 398e8d8bef9SDimitry Andricdef : GCNPat< 399bdd1243dSDimitry Andric (i64 (DivergentUnaryFrag<sext> i16:$src)), 400e8d8bef9SDimitry Andric (REG_SEQUENCE VReg_64, 401e8d8bef9SDimitry Andric (i32 (V_BFE_I32_e64 $src, (S_MOV_B32 (i32 0)), (S_MOV_B32 (i32 0x10)))), sub0, 402e8d8bef9SDimitry Andric (i32 (COPY_TO_REGCLASS 403e8d8bef9SDimitry Andric (V_ASHRREV_I32_e32 (S_MOV_B32 (i32 0x1f)), (i32 (V_BFE_I32_e64 $src, (S_MOV_B32 (i32 0)), (S_MOV_B32 (i32 0x10)))) 404e8d8bef9SDimitry Andric ), VGPR_32)), sub1) 405e8d8bef9SDimitry Andric>; 406e8d8bef9SDimitry Andric 407fe6060f1SDimitry Andriclet SubtargetPredicate = isGFX8Plus, Uses = [MODE, M0, EXEC], OtherPredicates = [isNotGFX90APlus] in { 4080b57cec5SDimitry Andricdef V_INTERP_P1_F32_e64 : VOP3Interp <"v_interp_p1_f32", VOP3_INTERP>; 4090b57cec5SDimitry Andricdef V_INTERP_P2_F32_e64 : VOP3Interp <"v_interp_p2_f32", VOP3_INTERP>; 4100b57cec5SDimitry Andricdef V_INTERP_MOV_F32_e64 : VOP3Interp <"v_interp_mov_f32", VOP3_INTERP_MOV>; 411fe6060f1SDimitry Andric} // End SubtargetPredicate = isGFX8Plus, Uses = [MODE, M0, EXEC], OtherPredicates = [isNotGFX90APlus] 4120b57cec5SDimitry Andric 41306c3fb27SDimitry Andric// Note: 16-bit instructions produce a 0 result in the high 16-bits 41406c3fb27SDimitry Andric// on GFX8 and GFX9 and preserve high 16 bits on GFX10+ 41506c3fb27SDimitry Andricmulticlass Arithmetic_i16_0Hi_TernaryPats <SDPatternOperator op, Instruction inst> { 4160b57cec5SDimitry Andric def : GCNPat< 41706c3fb27SDimitry Andric (i32 (zext (op i16:$src0, i16:$src1, i16:$src2))), 41806c3fb27SDimitry Andric (inst VSrc_b16:$src0, VSrc_b16:$src1, VSrc_b16:$src2) 4190b57cec5SDimitry Andric >; 4200b57cec5SDimitry Andric} 4210b57cec5SDimitry Andric 42206c3fb27SDimitry Andriclet Predicates = [Has16BitInsts, isGFX8GFX9] in { 42306c3fb27SDimitry Andricdefm : Arithmetic_i16_0Hi_TernaryPats<imad, V_MAD_U16_e64>; 42406c3fb27SDimitry Andric} 42506c3fb27SDimitry Andric 42606c3fb27SDimitry Andriclet Predicates = [Has16BitInsts, isGFX6GFX7GFX8GFX9] in { 42706c3fb27SDimitry Andric 42806c3fb27SDimitry Andric// FIXME: Should be able to just pass imad to the instruction 42906c3fb27SDimitry Andric// definition pattern, but the implied clamp input interferes. 43006c3fb27SDimitry Andricmulticlass Ternary_i16_Pats <SDPatternOperator op, Instruction inst> { 43106c3fb27SDimitry Andric def : GCNPat < 43206c3fb27SDimitry Andric (op i16:$src0, i16:$src1, i16:$src2), 43306c3fb27SDimitry Andric (inst i16:$src0, i16:$src1, i16:$src2, (i1 0)) 43406c3fb27SDimitry Andric >; 43506c3fb27SDimitry Andric} 43606c3fb27SDimitry Andric 43706c3fb27SDimitry Andricdefm: Ternary_i16_Pats<imad, V_MAD_U16_e64>; 4380b57cec5SDimitry Andric 4390b57cec5SDimitry Andric} // End Predicates = [Has16BitInsts, isGFX6GFX7GFX8GFX9] 4400b57cec5SDimitry Andric 4410b57cec5SDimitry Andric 44206c3fb27SDimitry Andricclass Ternary_i16_Pats_gfx9<SDPatternOperator op1, SDPatternOperator op2, 44306c3fb27SDimitry Andric Instruction inst> : GCNPat < 4440b57cec5SDimitry Andric (op2 (op1 i16:$src0, i16:$src1), i16:$src2), 4450b57cec5SDimitry Andric (inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE) 4460b57cec5SDimitry Andric>; 4470b57cec5SDimitry Andric 44806c3fb27SDimitry Andriclet Predicates = [Has16BitInsts, isGFX10Plus] in { 44906c3fb27SDimitry Andricdef: Ternary_i16_Pats_gfx9<mul, add, V_MAD_U16_gfx9_e64>; 4500b57cec5SDimitry Andric} // End Predicates = [Has16BitInsts, isGFX10Plus] 4510b57cec5SDimitry Andric 45281ad6265SDimitry Andricclass ThreeOpFragSDAG<SDPatternOperator op1, SDPatternOperator op2> : PatFrag< 4530b57cec5SDimitry Andric (ops node:$x, node:$y, node:$z), 4540b57cec5SDimitry Andric // When the inner operation is used multiple times, selecting 3-op 4550b57cec5SDimitry Andric // instructions may still be beneficial -- if the other users can be 4560b57cec5SDimitry Andric // combined similarly. Let's be conservative for now. 4570b57cec5SDimitry Andric (op2 (HasOneUseBinOp<op1> node:$x, node:$y), node:$z), 4580b57cec5SDimitry Andric [{ 4590b57cec5SDimitry Andric // Only use VALU ops when the result is divergent. 4600b57cec5SDimitry Andric if (!N->isDivergent()) 4610b57cec5SDimitry Andric return false; 4620b57cec5SDimitry Andric 4630b57cec5SDimitry Andric // Check constant bus limitations. 4640b57cec5SDimitry Andric // 4650b57cec5SDimitry Andric // Note: Use !isDivergent as a conservative proxy for whether the value 4660b57cec5SDimitry Andric // is in an SGPR (uniform values can end up in VGPRs as well). 4670b57cec5SDimitry Andric unsigned ConstantBusUses = 0; 4680b57cec5SDimitry Andric for (unsigned i = 0; i < 3; ++i) { 4690b57cec5SDimitry Andric if (!Operands[i]->isDivergent() && 4700b57cec5SDimitry Andric !isInlineImmediate(Operands[i].getNode())) { 4710b57cec5SDimitry Andric ConstantBusUses++; 472e8d8bef9SDimitry Andric // This uses AMDGPU::V_ADD3_U32_e64, but all three operand instructions 4730b57cec5SDimitry Andric // have the same constant bus limit. 474e8d8bef9SDimitry Andric if (ConstantBusUses > Subtarget->getConstantBusLimit(AMDGPU::V_ADD3_U32_e64)) 4750b57cec5SDimitry Andric return false; 4760b57cec5SDimitry Andric } 4770b57cec5SDimitry Andric } 4780b57cec5SDimitry Andric 4790b57cec5SDimitry Andric return true; 4805ffd83dbSDimitry Andric }]> { 4810b57cec5SDimitry Andric let PredicateCodeUsesOperands = 1; 48281ad6265SDimitry Andric} 4835ffd83dbSDimitry Andric 48481ad6265SDimitry Andricclass ThreeOpFrag<SDPatternOperator op1, SDPatternOperator op2> : ThreeOpFragSDAG<op1, op2> { 4855ffd83dbSDimitry Andric // The divergence predicate is irrelevant in GlobalISel, as we have 486e8d8bef9SDimitry Andric // proper register bank checks. We just need to verify the constant 487e8d8bef9SDimitry Andric // bus restriction when all the sources are considered. 4885ffd83dbSDimitry Andric // 4895ffd83dbSDimitry Andric // FIXME: With unlucky SGPR operands, we could penalize code by 4905ffd83dbSDimitry Andric // blocking folding SGPR->VGPR copies later. 4915ffd83dbSDimitry Andric // FIXME: There's no register bank verifier 492e8d8bef9SDimitry Andric let GISelPredicateCode = [{ 493e8d8bef9SDimitry Andric const int ConstantBusLimit = Subtarget->getConstantBusLimit(AMDGPU::V_ADD3_U32_e64); 494e8d8bef9SDimitry Andric int ConstantBusUses = 0; 495e8d8bef9SDimitry Andric for (unsigned i = 0; i < 3; ++i) { 496e8d8bef9SDimitry Andric const RegisterBank *RegBank = RBI.getRegBank(Operands[i]->getReg(), MRI, TRI); 497e8d8bef9SDimitry Andric if (RegBank->getID() == AMDGPU::SGPRRegBankID) { 498e8d8bef9SDimitry Andric if (++ConstantBusUses > ConstantBusLimit) 499e8d8bef9SDimitry Andric return false; 500e8d8bef9SDimitry Andric } 501e8d8bef9SDimitry Andric } 502e8d8bef9SDimitry Andric return true; 503e8d8bef9SDimitry Andric }]; 5040b57cec5SDimitry Andric} 5050b57cec5SDimitry Andric 50681ad6265SDimitry Andricdef shl_0_to_4 : PatFrag< 50781ad6265SDimitry Andric (ops node:$src0, node:$src1), (shl node:$src0, node:$src1), 50881ad6265SDimitry Andric [{ 50981ad6265SDimitry Andric if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 51081ad6265SDimitry Andric return C->getZExtValue() <= 4; 51181ad6265SDimitry Andric } 51281ad6265SDimitry Andric return false; 51381ad6265SDimitry Andric }]> { 51481ad6265SDimitry Andric let GISelPredicateCode = [{ 51581ad6265SDimitry Andric int64_t Imm = 0; 51681ad6265SDimitry Andric if (!mi_match(MI.getOperand(2).getReg(), MRI, m_ICst(Imm)) && 51781ad6265SDimitry Andric !mi_match(MI.getOperand(2).getReg(), MRI, m_Copy(m_ICst(Imm)))) 51881ad6265SDimitry Andric return false; 51981ad6265SDimitry Andric return (uint64_t)Imm <= 4; 52081ad6265SDimitry Andric }]; 52181ad6265SDimitry Andric} 52281ad6265SDimitry Andric 523fcaf7f86SDimitry Andricdef VOP3_CVT_PK_F8_F32_Profile : VOP3_Profile<VOP_I32_F32_F32, VOP3_OPSEL> { 524fcaf7f86SDimitry Andric let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0, 525fcaf7f86SDimitry Andric FP32InputMods:$src1_modifiers, Src1RC64:$src1, 526fcaf7f86SDimitry Andric VGPR_32:$vdst_in, op_sel0:$op_sel); 527b3edf446SDimitry Andric let InsVOP3DPP = (ins VGPR_32:$old, 528b3edf446SDimitry Andric FP32InputMods:$src0_modifiers, Src0VOP3DPP:$src0, 529b3edf446SDimitry Andric FP32InputMods:$src1_modifiers, Src1VOP3DPP:$src1, 530b3edf446SDimitry Andric VGPR_32:$vdst_in, op_sel0:$op_sel, 531*0fca6ea1SDimitry Andric dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, 532*0fca6ea1SDimitry Andric DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl); 533b3edf446SDimitry Andric 534b3edf446SDimitry Andric let InsVOP3DPP16 = (ins VGPR_32:$old, 535b3edf446SDimitry Andric FP32InputMods:$src0_modifiers, Src0VOP3DPP:$src0, 536b3edf446SDimitry Andric FP32InputMods:$src1_modifiers, Src1VOP3DPP:$src1, 537b3edf446SDimitry Andric VGPR_32:$vdst_in, op_sel0:$op_sel, 538*0fca6ea1SDimitry Andric dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, 539*0fca6ea1SDimitry Andric DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl, Dpp16FI:$fi); 540b3edf446SDimitry Andric let InsVOP3DPP8 = (ins VGPR_32:$old, 541b3edf446SDimitry Andric FP32InputMods:$src0_modifiers, Src0VOP3DPP:$src0, 542b3edf446SDimitry Andric FP32InputMods:$src1_modifiers, Src1VOP3DPP:$src1, 543*0fca6ea1SDimitry Andric VGPR_32:$vdst_in, op_sel0:$op_sel, dpp8:$dpp8, Dpp8FI:$fi); 544b3edf446SDimitry Andric 545fcaf7f86SDimitry Andric let HasClamp = 0; 546b3edf446SDimitry Andric let HasExtVOP3DPP = 1; 547fcaf7f86SDimitry Andric} 548fcaf7f86SDimitry Andric 549fcaf7f86SDimitry Andricdef VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, i32, f32]>, 550fcaf7f86SDimitry Andric VOP3_OPSEL> { 551fcaf7f86SDimitry Andric let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0, 552fcaf7f86SDimitry Andric FP32InputMods:$src1_modifiers, Src1RC64:$src1, 553fcaf7f86SDimitry Andric FP32InputMods:$src2_modifiers, VGPR_32:$src2, 554fcaf7f86SDimitry Andric op_sel0:$op_sel); 555b3edf446SDimitry Andric let InsVOP3DPP16 = (ins VGPR_32:$old, 556b3edf446SDimitry Andric FP32InputMods:$src0_modifiers, Src0VOP3DPP:$src0, 557b3edf446SDimitry Andric FP32InputMods:$src1_modifiers, Src1VOP3DPP:$src1, 558b3edf446SDimitry Andric FP32InputMods:$src2_modifiers, VGPR_32:$src2, 559*0fca6ea1SDimitry Andric op_sel0:$op_sel, dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, 560*0fca6ea1SDimitry Andric DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl, Dpp16FI:$fi); 561b3edf446SDimitry Andric let InsVOP3DPP8 = (ins VGPR_32:$old, 562b3edf446SDimitry Andric FP32InputMods:$src0_modifiers, Src0VOP3DPP:$src0, 563b3edf446SDimitry Andric FP32InputMods:$src1_modifiers, Src1VOP3DPP:$src1, 564b3edf446SDimitry Andric FP32InputMods:$src2_modifiers, VGPR_32:$src2, 565*0fca6ea1SDimitry Andric op_sel0:$op_sel, dpp8:$dpp8, Dpp8FI:$fi); 566fcaf7f86SDimitry Andric let HasClamp = 0; 567fcaf7f86SDimitry Andric let HasSrc2 = 0; 568fcaf7f86SDimitry Andric let HasSrc2Mods = 1; 569b3edf446SDimitry Andric let HasExtVOP3DPP = 1; 570b3edf446SDimitry Andric let HasOpSel = 1; 571fcaf7f86SDimitry Andric let AsmVOP3OpSel = !subst(", $src2_modifiers", "", 572bdd1243dSDimitry Andric getAsmVOP3OpSel<3, HasClamp, HasOMod, 573fcaf7f86SDimitry Andric HasSrc0FloatMods, HasSrc1FloatMods, 574fcaf7f86SDimitry Andric HasSrc2FloatMods>.ret); 575b3edf446SDimitry Andric let AsmVOP3DPP16 = !subst(", $src2_modifiers", "", 576b3edf446SDimitry Andric getAsmVOP3DPP16<getAsmVOP3Base<3, 1, HasClamp, 1, 577b3edf446SDimitry Andric HasOMod, 0, 1, HasSrc0FloatMods, 578b3edf446SDimitry Andric HasSrc1FloatMods, 579b3edf446SDimitry Andric HasSrc2FloatMods>.ret>.ret); 580b3edf446SDimitry Andric let AsmVOP3DPP8 = !subst(", $src2_modifiers", "", 581b3edf446SDimitry Andric getAsmVOP3DPP8<getAsmVOP3Base<3, 1, HasClamp, 1, 582b3edf446SDimitry Andric HasOMod, 0, 1, HasSrc0FloatMods, 583b3edf446SDimitry Andric HasSrc1FloatMods, 584b3edf446SDimitry Andric HasSrc2FloatMods>.ret>.ret); 585fcaf7f86SDimitry Andric} 586fcaf7f86SDimitry Andric 587*0fca6ea1SDimitry Andricclass VOP3_CVT_SR_F8_ByteSel_Profile<ValueType SrcVT> : 588*0fca6ea1SDimitry Andric VOP3_Profile<VOPProfile<[i32, SrcVT, i32, untyped]>> { 589*0fca6ea1SDimitry Andric let IsFP8DstByteSel = 1; 590*0fca6ea1SDimitry Andric let HasClamp = 0; 591*0fca6ea1SDimitry Andric defvar bytesel = (ins VGPR_32:$vdst_in, ByteSel:$byte_sel); 592*0fca6ea1SDimitry Andric let Ins64 = !con(getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs, 593*0fca6ea1SDimitry Andric HasClamp, HasModifiers, HasSrc2Mods, 594*0fca6ea1SDimitry Andric HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret, 595*0fca6ea1SDimitry Andric bytesel); 596*0fca6ea1SDimitry Andric let InsVOP3Base = !con( 597*0fca6ea1SDimitry Andric getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, 598*0fca6ea1SDimitry Andric Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod, 599*0fca6ea1SDimitry Andric Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel>.ret, 600*0fca6ea1SDimitry Andric bytesel); 601*0fca6ea1SDimitry Andric} 602*0fca6ea1SDimitry Andric 6035f757f3fSDimitry Andricdef IsPow2Plus1: PatLeaf<(i32 imm), [{ 6045f757f3fSDimitry Andric uint32_t V = N->getZExtValue(); 6055f757f3fSDimitry Andric return isPowerOf2_32(V - 1); 6065f757f3fSDimitry Andric}]>; 6075f757f3fSDimitry Andric 6085f757f3fSDimitry Andricdef Log2_32: SDNodeXForm<imm, [{ 6095f757f3fSDimitry Andric uint32_t V = N->getZExtValue(); 6105f757f3fSDimitry Andric return CurDAG->getTargetConstant(Log2_32(V - 1), SDLoc(N), MVT::i32); 6115f757f3fSDimitry Andric}]>; 6125f757f3fSDimitry Andric 6130b57cec5SDimitry Andriclet SubtargetPredicate = isGFX9Plus in { 614fe6060f1SDimitry Andriclet isCommutable = 1, isReMaterializable = 1 in { 615e8d8bef9SDimitry Andric defm V_ADD3_U32 : VOP3Inst <"v_add3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>; 616e8d8bef9SDimitry Andric defm V_AND_OR_B32 : VOP3Inst <"v_and_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>; 617e8d8bef9SDimitry Andric defm V_OR3_B32 : VOP3Inst <"v_or3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>; 618e8d8bef9SDimitry Andric defm V_XAD_U32 : VOP3Inst <"v_xad_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>; 619fe6060f1SDimitry Andric defm V_ADD_I32 : VOP3Inst <"v_add_i32", VOP3_Profile<VOP_I32_I32_I32_ARITH>>; 620fe6060f1SDimitry Andric defm V_ADD_LSHL_U32 : VOP3Inst <"v_add_lshl_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>; 621fe6060f1SDimitry Andric} // End isCommutable = 1, isReMaterializable = 1 622fe6060f1SDimitry Andric// TODO src0 contains the opsel bit for dst, so if we commute, need to mask and swap this 623fe6060f1SDimitry Andric// to the new src0. 624e8d8bef9SDimitry Andricdefm V_MED3_F16 : VOP3Inst <"v_med3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmed3>; 625e8d8bef9SDimitry Andricdefm V_MED3_I16 : VOP3Inst <"v_med3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUsmed3>; 626e8d8bef9SDimitry Andricdefm V_MED3_U16 : VOP3Inst <"v_med3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUumed3>; 6270b57cec5SDimitry Andric 628e8d8bef9SDimitry Andricdefm V_MIN3_F16 : VOP3Inst <"v_min3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmin3>; 629e8d8bef9SDimitry Andricdefm V_MIN3_I16 : VOP3Inst <"v_min3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUsmin3>; 630e8d8bef9SDimitry Andricdefm V_MIN3_U16 : VOP3Inst <"v_min3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUumin3>; 6310b57cec5SDimitry Andric 632e8d8bef9SDimitry Andricdefm V_MAX3_F16 : VOP3Inst <"v_max3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmax3>; 633e8d8bef9SDimitry Andricdefm V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUsmax3>; 634e8d8bef9SDimitry Andricdefm V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUumax3>; 6350b57cec5SDimitry Andric 6365f757f3fSDimitry Andriclet SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in { 6375f757f3fSDimitry Andric defm V_MINIMUM3_F16 : VOP3Inst <"v_minimum3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfminimum3>; 6385f757f3fSDimitry Andric defm V_MAXIMUM3_F16 : VOP3Inst <"v_maximum3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmaximum3>; 6395f757f3fSDimitry Andric} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 6405f757f3fSDimitry Andric 641e8d8bef9SDimitry Andricdefm V_ADD_I16 : VOP3Inst <"v_add_i16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>>; 642e8d8bef9SDimitry Andricdefm V_SUB_I16 : VOP3Inst <"v_sub_i16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>>; 6430b57cec5SDimitry Andric 644e8d8bef9SDimitry Andricdefm V_MAD_U32_U16 : VOP3Inst <"v_mad_u32_u16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>; 645e8d8bef9SDimitry Andricdefm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>; 6460b57cec5SDimitry Andric 647e8d8bef9SDimitry Andricdefm V_CVT_PKNORM_I16_F16 : VOP3Inst <"v_cvt_pknorm_i16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>; 648e8d8bef9SDimitry Andricdefm V_CVT_PKNORM_U16_F16 : VOP3Inst <"v_cvt_pknorm_u16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>; 6490b57cec5SDimitry Andric 650fe6060f1SDimitry Andricdefm V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>; 651fe6060f1SDimitry Andric 652fe6060f1SDimitry Andriclet isReMaterializable = 1 in { 653e8d8bef9SDimitry Andricdefm V_SUB_I32 : VOP3Inst <"v_sub_i32", VOP3_Profile<VOP_I32_I32_I32_ARITH>>; 654fe6060f1SDimitry Andricdefm V_LSHL_ADD_U32 : VOP3Inst <"v_lshl_add_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>; 655fe6060f1SDimitry Andricdefm V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>; 656fe6060f1SDimitry Andric} // End isReMaterializable = 1 6570b57cec5SDimitry Andric 65881ad6265SDimitry Andric// V_LSHL_ADD_U64: D0.u64 = (S0.u64 << S1.u[2:0]) + S2.u64 65981ad6265SDimitry Andric// src0 is shifted left by 0-4 (use “0” to get ADD_U64). 66081ad6265SDimitry Andriclet SubtargetPredicate = isGFX940Plus in 66181ad6265SDimitry Andricdefm V_LSHL_ADD_U64 : VOP3Inst <"v_lshl_add_u64", VOP3_Profile<VOP_I64_I64_I32_I64>>; 6620b57cec5SDimitry Andric 663*0fca6ea1SDimitry Andriclet OtherPredicates = [HasFP8ConversionInsts], mayRaiseFPException = 0, 664fcaf7f86SDimitry Andric SchedRW = [WriteFloatCvt] in { 665fcaf7f86SDimitry Andric let Constraints = "$vdst = $vdst_in", DisableEncoding = "$vdst_in" in { 666fcaf7f86SDimitry Andric defm V_CVT_PK_FP8_F32 : VOP3Inst<"v_cvt_pk_fp8_f32", VOP3_CVT_PK_F8_F32_Profile>; 667fcaf7f86SDimitry Andric defm V_CVT_PK_BF8_F32 : VOP3Inst<"v_cvt_pk_bf8_f32", VOP3_CVT_PK_F8_F32_Profile>; 668*0fca6ea1SDimitry Andric 669*0fca6ea1SDimitry Andric let SubtargetPredicate = isGFX12Plus in { 670*0fca6ea1SDimitry Andric defm V_CVT_SR_FP8_F32_gfx12 : VOP3Inst<"v_cvt_sr_fp8_f32_gfx12", VOP3_CVT_SR_F8_ByteSel_Profile<f32>>; 671*0fca6ea1SDimitry Andric defm V_CVT_SR_BF8_F32_gfx12 : VOP3Inst<"v_cvt_sr_bf8_f32_gfx12", VOP3_CVT_SR_F8_ByteSel_Profile<f32>>; 672*0fca6ea1SDimitry Andric } 673fcaf7f86SDimitry Andric } 674fcaf7f86SDimitry Andric 675fcaf7f86SDimitry Andric // These instructions have non-standard use of op_sel. In particular they are 676fcaf7f86SDimitry Andric // using op_sel bits 2 and 3 while only having two sources. Therefore dummy 677fcaf7f86SDimitry Andric // src2 is used to hold the op_sel value. 678*0fca6ea1SDimitry Andric let Constraints = "$vdst = $src2", DisableEncoding = "$src2", SubtargetPredicate = isGFX940Plus in { 679fcaf7f86SDimitry Andric defm V_CVT_SR_FP8_F32 : VOP3Inst<"v_cvt_sr_fp8_f32", VOP3_CVT_SR_F8_F32_Profile>; 680fcaf7f86SDimitry Andric defm V_CVT_SR_BF8_F32 : VOP3Inst<"v_cvt_sr_bf8_f32", VOP3_CVT_SR_F8_F32_Profile>; 681fcaf7f86SDimitry Andric } 682fcaf7f86SDimitry Andric} 683fcaf7f86SDimitry Andric 684fcaf7f86SDimitry Andricclass Cvt_PK_F8_F32_Pat<SDPatternOperator node, int index, VOP3_Pseudo inst> : GCNPat< 685fcaf7f86SDimitry Andric (i32 (node f32:$src0, f32:$src1, i32:$old, index)), 686b3edf446SDimitry Andric (inst !if(index, SRCMODS.DST_OP_SEL, 0), $src0, 0, $src1, $old, 0) 687fcaf7f86SDimitry Andric>; 688fcaf7f86SDimitry Andric 689fcaf7f86SDimitry Andricclass Cvt_SR_F8_F32_Pat<SDPatternOperator node, bits<2> index, VOP3_Pseudo inst> : GCNPat< 690fcaf7f86SDimitry Andric (i32 (node f32:$src0, i32:$src1, i32:$old, index)), 691fcaf7f86SDimitry Andric (inst !if(index{1}, SRCMODS.DST_OP_SEL, 0), $src0, 0, $src1, 692b3edf446SDimitry Andric !if(index{0}, SRCMODS.OP_SEL_0, 0), $old, 0) 693fcaf7f86SDimitry Andric>; 694fcaf7f86SDimitry Andric 695*0fca6ea1SDimitry Andricclass Cvt_SR_F8_ByteSel_Pat<SDPatternOperator node, VOP3_Pseudo inst, ValueType SrcVT> : GCNPat< 696*0fca6ea1SDimitry Andric (i32 (node (VOP3Mods SrcVT:$src0, i32:$src0_modifiers), (VOP3Mods i32:$src1, i32:$src1_modifiers), 697*0fca6ea1SDimitry Andric i32:$old, timm:$byte_sel)), 698*0fca6ea1SDimitry Andric (inst $src0_modifiers, $src0, $src1_modifiers, $src1, $old, (as_i32timm $byte_sel)) 699*0fca6ea1SDimitry Andric>; 700*0fca6ea1SDimitry Andric 701*0fca6ea1SDimitry Andriclet OtherPredicates = [HasFP8ConversionInsts] in { 702fcaf7f86SDimitry Andricforeach Index = [0, -1] in { 703fcaf7f86SDimitry Andric def : Cvt_PK_F8_F32_Pat<int_amdgcn_cvt_pk_fp8_f32, Index, V_CVT_PK_FP8_F32_e64>; 704fcaf7f86SDimitry Andric def : Cvt_PK_F8_F32_Pat<int_amdgcn_cvt_pk_bf8_f32, Index, V_CVT_PK_BF8_F32_e64>; 705fcaf7f86SDimitry Andric} 706fcaf7f86SDimitry Andric 707*0fca6ea1SDimitry Andriclet SubtargetPredicate = isGFX940Plus in { 708fcaf7f86SDimitry Andric foreach Index = [0, 1, 2, 3] in { 709fcaf7f86SDimitry Andric def : Cvt_SR_F8_F32_Pat<int_amdgcn_cvt_sr_fp8_f32, Index, V_CVT_SR_FP8_F32_e64>; 710fcaf7f86SDimitry Andric def : Cvt_SR_F8_F32_Pat<int_amdgcn_cvt_sr_bf8_f32, Index, V_CVT_SR_BF8_F32_e64>; 711fcaf7f86SDimitry Andric } 712*0fca6ea1SDimitry Andric} 713*0fca6ea1SDimitry Andric 714*0fca6ea1SDimitry Andriclet SubtargetPredicate = isGFX12Plus in { 715*0fca6ea1SDimitry Andric def : Cvt_SR_F8_ByteSel_Pat<int_amdgcn_cvt_sr_fp8_f32, V_CVT_SR_FP8_F32_gfx12_e64, f32>; 716*0fca6ea1SDimitry Andric def : Cvt_SR_F8_ByteSel_Pat<int_amdgcn_cvt_sr_bf8_f32, V_CVT_SR_BF8_F32_gfx12_e64, f32>; 717*0fca6ea1SDimitry Andric} 718*0fca6ea1SDimitry Andric} 719fcaf7f86SDimitry Andric 7200b57cec5SDimitry Andricclass ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instruction inst> : GCNPat < 7210b57cec5SDimitry Andric // This matches (op2 (op1 i32:$src0, i32:$src1), i32:$src2) with conditions. 7220b57cec5SDimitry Andric (ThreeOpFrag<op1, op2> i32:$src0, i32:$src1, i32:$src2), 7235ffd83dbSDimitry Andric (inst VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2) 7240b57cec5SDimitry Andric>; 7250b57cec5SDimitry Andric 726349cc55cSDimitry Andricdef : ThreeOp_i32_Pats<cshl_32, add, V_LSHL_ADD_U32_e64>; 727349cc55cSDimitry Andricdef : ThreeOp_i32_Pats<add, cshl_32, V_ADD_LSHL_U32_e64>; 728e8d8bef9SDimitry Andricdef : ThreeOp_i32_Pats<add, add, V_ADD3_U32_e64>; 729bdd1243dSDimitry Andricdef : ThreeOp_i32_Pats<ptradd, ptradd, V_ADD3_U32_e64>; 730349cc55cSDimitry Andricdef : ThreeOp_i32_Pats<cshl_32, or, V_LSHL_OR_B32_e64>; 731e8d8bef9SDimitry Andricdef : ThreeOp_i32_Pats<and, or, V_AND_OR_B32_e64>; 732e8d8bef9SDimitry Andricdef : ThreeOp_i32_Pats<or, or, V_OR3_B32_e64>; 733e8d8bef9SDimitry Andricdef : ThreeOp_i32_Pats<xor, add, V_XAD_U32_e64>; 7340b57cec5SDimitry Andric 7355f757f3fSDimitry Andricdef : GCNPat< 7365f757f3fSDimitry Andric (DivergentBinFrag<mul> i32:$src0, IsPow2Plus1:$src1), 7375f757f3fSDimitry Andric (V_LSHL_ADD_U32_e64 i32:$src0, (i32 (Log2_32 imm:$src1)), i32:$src0)>; 7385f757f3fSDimitry Andric 73981ad6265SDimitry Andriclet SubtargetPredicate = isGFX940Plus in 74081ad6265SDimitry Andricdef : GCNPat< 74181ad6265SDimitry Andric (ThreeOpFrag<shl_0_to_4, add> i64:$src0, i32:$src1, i64:$src2), 74281ad6265SDimitry Andric (V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2) 74381ad6265SDimitry Andric>; 74481ad6265SDimitry Andric 745e8d8bef9SDimitry Andricdef : VOPBinOpClampPat<saddsat, V_ADD_I32_e64, i32>; 746e8d8bef9SDimitry Andricdef : VOPBinOpClampPat<ssubsat, V_SUB_I32_e64, i32>; 747e8d8bef9SDimitry Andric 748bdd1243dSDimitry Andricdef : GCNPat<(DivergentBinFrag<or> (or_oneuse i64:$src0, i64:$src1), i64:$src2), 749349cc55cSDimitry Andric (REG_SEQUENCE VReg_64, 750349cc55cSDimitry Andric (V_OR3_B32_e64 (i32 (EXTRACT_SUBREG $src0, sub0)), 751349cc55cSDimitry Andric (i32 (EXTRACT_SUBREG $src1, sub0)), 752349cc55cSDimitry Andric (i32 (EXTRACT_SUBREG $src2, sub0))), sub0, 753349cc55cSDimitry Andric (V_OR3_B32_e64 (i32 (EXTRACT_SUBREG $src0, sub1)), 754349cc55cSDimitry Andric (i32 (EXTRACT_SUBREG $src1, sub1)), 755349cc55cSDimitry Andric (i32 (EXTRACT_SUBREG $src2, sub1))), sub1)>; 756e8d8bef9SDimitry Andric 757e8d8bef9SDimitry Andric// FIXME: Probably should hardcode clamp bit in pseudo and avoid this. 758e8d8bef9SDimitry Andricclass OpSelBinOpClampPat<SDPatternOperator node, 759e8d8bef9SDimitry Andric Instruction inst> : GCNPat< 760e8d8bef9SDimitry Andric (node (i16 (VOP3OpSel i16:$src0, i32:$src0_modifiers)), 761e8d8bef9SDimitry Andric (i16 (VOP3OpSel i16:$src1, i32:$src1_modifiers))), 762e8d8bef9SDimitry Andric (inst $src0_modifiers, $src0, $src1_modifiers, $src1, DSTCLAMP.ENABLE, 0) 763e8d8bef9SDimitry Andric>; 764e8d8bef9SDimitry Andric 765e8d8bef9SDimitry Andricdef : OpSelBinOpClampPat<saddsat, V_ADD_I16_e64>; 766e8d8bef9SDimitry Andricdef : OpSelBinOpClampPat<ssubsat, V_SUB_I16_e64>; 7670b57cec5SDimitry Andric} // End SubtargetPredicate = isGFX9Plus 7680b57cec5SDimitry Andric 76981ad6265SDimitry Andricmulticlass IMAD32_Pats <VOP3_Pseudo inst> { 77081ad6265SDimitry Andric def : GCNPat < 77181ad6265SDimitry Andric (ThreeOpFrag<mul, add> i32:$src0, i32:$src1, i32:$src2), 772*0fca6ea1SDimitry Andric (EXTRACT_SUBREG (inst i32:$src0, i32:$src1, 77381ad6265SDimitry Andric (REG_SEQUENCE SReg_64, // Use scalar and let it be legalized 77481ad6265SDimitry Andric $src2, sub0, 77581ad6265SDimitry Andric (i32 (IMPLICIT_DEF)), sub1), 77681ad6265SDimitry Andric 0 /* clamp */), 77781ad6265SDimitry Andric sub0) 77881ad6265SDimitry Andric >; 779*0fca6ea1SDimitry Andric 780*0fca6ea1SDimitry Andric // GISel-specific pattern that avoids creating a SGPR->VGPR copy if 781*0fca6ea1SDimitry Andric // $src2 is a VGPR. 782*0fca6ea1SDimitry Andric def : GCNPat < 783*0fca6ea1SDimitry Andric (ThreeOpFrag<mul, add> i32:$src0, i32:$src1, VGPR_32:$src2), 784*0fca6ea1SDimitry Andric (EXTRACT_SUBREG (inst i32:$src0, i32:$src1, 785*0fca6ea1SDimitry Andric (REG_SEQUENCE VReg_64, 786*0fca6ea1SDimitry Andric $src2, sub0, 787*0fca6ea1SDimitry Andric (i32 (IMPLICIT_DEF)), sub1), 788*0fca6ea1SDimitry Andric 0 /* clamp */), 789*0fca6ea1SDimitry Andric sub0) 790*0fca6ea1SDimitry Andric >; 791*0fca6ea1SDimitry Andric 79281ad6265SDimitry Andric // Immediate src2 in the pattern above will not fold because it would be partially 79381ad6265SDimitry Andric // undef. Hence define specialized pattern for this case. 79481ad6265SDimitry Andric def : GCNPat < 795*0fca6ea1SDimitry Andric (ThreeOpFrag<mul, add> i32:$src0, i32:$src1, (i32 imm:$src2)), 796*0fca6ea1SDimitry Andric (EXTRACT_SUBREG (inst i32:$src0, i32:$src1, (i64 (as_i64imm $src2)), 0 /* clamp */), sub0) 79781ad6265SDimitry Andric >; 79881ad6265SDimitry Andric} 79981ad6265SDimitry Andric 8005f757f3fSDimitry Andric// Handle cases where amdgpu-codegenprepare-mul24 made a mul24 instead of a normal mul. 8015f757f3fSDimitry Andric// We need to separate this because otherwise OtherPredicates would be overriden. 8025f757f3fSDimitry Andricclass IMAD32_Mul24_Pat<VOP3_Pseudo inst>: GCNPat < 8035f757f3fSDimitry Andric (i64 (add (i64 (AMDGPUmul_u24 i32:$src0, i32:$src1)), i64:$src2)), 8045f757f3fSDimitry Andric (inst $src0, $src1, $src2, 0 /* clamp */) 8055f757f3fSDimitry Andric >; 8065f757f3fSDimitry Andric 807bdd1243dSDimitry Andric// exclude pre-GFX9 where it was slow 8085f757f3fSDimitry Andriclet OtherPredicates = [HasNotMADIntraFwdBug], SubtargetPredicate = isGFX9Plus in { 80981ad6265SDimitry Andric defm : IMAD32_Pats<V_MAD_U64_U32_e64>; 8105f757f3fSDimitry Andric def : IMAD32_Mul24_Pat<V_MAD_U64_U32_e64>; 8115f757f3fSDimitry Andric} 8125f757f3fSDimitry Andriclet OtherPredicates = [HasMADIntraFwdBug], SubtargetPredicate = isGFX11Only in { 81381ad6265SDimitry Andric defm : IMAD32_Pats<V_MAD_U64_U32_gfx11_e64>; 8145f757f3fSDimitry Andric def : IMAD32_Mul24_Pat<V_MAD_U64_U32_gfx11_e64>; 8155f757f3fSDimitry Andric} 81681ad6265SDimitry Andric 8170b57cec5SDimitry Andricdef VOP3_PERMLANE_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, i32]>, VOP3_OPSEL> { 8180b57cec5SDimitry Andric let InsVOP3OpSel = (ins IntOpSelMods:$src0_modifiers, VRegSrc_32:$src0, 819bdd1243dSDimitry Andric IntOpSelMods:$src1_modifiers, SSrc_b32:$src1, 820bdd1243dSDimitry Andric IntOpSelMods:$src2_modifiers, SSrc_b32:$src2, 821e8d8bef9SDimitry Andric VGPR_32:$vdst_in, op_sel0:$op_sel); 8220b57cec5SDimitry Andric let HasClamp = 0; 82381ad6265SDimitry Andric let HasExtVOP3DPP = 0; 82481ad6265SDimitry Andric let HasExtDPP = 0; 8250b57cec5SDimitry Andric} 8260b57cec5SDimitry Andric 8275f757f3fSDimitry Andricdef VOP3_PERMLANE_VAR_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, untyped]>, VOP3_OPSEL> { 8285f757f3fSDimitry Andric let InsVOP3OpSel = (ins IntOpSelMods:$src0_modifiers, VRegSrc_32:$src0, 8295f757f3fSDimitry Andric IntOpSelMods:$src1_modifiers, VRegSrc_32:$src1, 8305f757f3fSDimitry Andric VGPR_32:$vdst_in, op_sel0:$op_sel); 8315f757f3fSDimitry Andric let HasClamp = 0; 8325f757f3fSDimitry Andric let HasExtVOP3DPP = 0; 8335f757f3fSDimitry Andric let HasExtDPP = 0; 8345f757f3fSDimitry Andric} 8355f757f3fSDimitry Andric 83606c3fb27SDimitry Andricdef opsel_i1timm : SDNodeXForm<timm, [{ 83706c3fb27SDimitry Andric return CurDAG->getTargetConstant( 83806c3fb27SDimitry Andric N->getZExtValue() ? SISrcMods::OP_SEL_0 : SISrcMods::NONE, 83906c3fb27SDimitry Andric SDLoc(N), MVT::i32); 84006c3fb27SDimitry Andric}]>; 84106c3fb27SDimitry Andricdef gi_opsel_i1timm : GICustomOperandRenderer<"renderOpSelTImm">, 84206c3fb27SDimitry Andric GISDNodeXFormEquiv<opsel_i1timm>; 84306c3fb27SDimitry Andric 8445ffd83dbSDimitry Andricclass PermlanePat<SDPatternOperator permlane, 845*0fca6ea1SDimitry Andric Instruction inst, ValueType vt> : GCNPat< 846*0fca6ea1SDimitry Andric (vt (permlane vt:$vdst_in, vt:$src0, i32:$src1, i32:$src2, 847*0fca6ea1SDimitry Andric timm:$fi, timm:$bc)), 84806c3fb27SDimitry Andric (inst (opsel_i1timm $fi), VGPR_32:$src0, (opsel_i1timm $bc), 8495ffd83dbSDimitry Andric SCSrc_b32:$src1, 0, SCSrc_b32:$src2, VGPR_32:$vdst_in) 8505ffd83dbSDimitry Andric>; 8515ffd83dbSDimitry Andric 8525f757f3fSDimitry Andricclass PermlaneVarPat<SDPatternOperator permlane, 8535f757f3fSDimitry Andric Instruction inst> : GCNPat< 8545f757f3fSDimitry Andric (permlane i32:$vdst_in, i32:$src0, i32:$src1, 8555f757f3fSDimitry Andric timm:$fi, timm:$bc), 8565f757f3fSDimitry Andric (inst (opsel_i1timm $fi), VGPR_32:$src0, (opsel_i1timm $bc), 8575f757f3fSDimitry Andric VGPR_32:$src1, VGPR_32:$vdst_in) 8585f757f3fSDimitry Andric>; 8595ffd83dbSDimitry Andric 8600b57cec5SDimitry Andriclet SubtargetPredicate = isGFX10Plus in { 861fe6060f1SDimitry Andric let isCommutable = 1, isReMaterializable = 1 in { 862e8d8bef9SDimitry Andric defm V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>; 863fe6060f1SDimitry Andric } // End isCommutable = 1, isReMaterializable = 1 864e8d8bef9SDimitry Andric def : ThreeOp_i32_Pats<xor, xor, V_XOR3_B32_e64>; 8650b57cec5SDimitry Andric 866*0fca6ea1SDimitry Andric let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in", IsInvalidSingleUseConsumer = 1, IsInvalidSingleUseProducer = 1 in { 867e8d8bef9SDimitry Andric defm V_PERMLANE16_B32 : VOP3Inst<"v_permlane16_b32", VOP3_PERMLANE_Profile>; 868e8d8bef9SDimitry Andric defm V_PERMLANEX16_B32 : VOP3Inst<"v_permlanex16_b32", VOP3_PERMLANE_Profile>; 869*0fca6ea1SDimitry Andric } // End $vdst = $vdst_in, DisableEncoding $vdst_in, IsInvalidSingleUseConsumer = 1, IsInvalidSingleUseProducer = 1 8700b57cec5SDimitry Andric 871*0fca6ea1SDimitry Andric foreach vt = Reg32Types.types in { 872*0fca6ea1SDimitry Andric def : PermlanePat<int_amdgcn_permlane16, V_PERMLANE16_B32_e64, vt>; 873*0fca6ea1SDimitry Andric def : PermlanePat<int_amdgcn_permlanex16, V_PERMLANEX16_B32_e64, vt>; 874*0fca6ea1SDimitry Andric } 8755ffd83dbSDimitry Andric 87681ad6265SDimitry Andric defm V_ADD_NC_U16 : VOP3Inst <"v_add_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, add>; 87781ad6265SDimitry Andric defm V_SUB_NC_U16 : VOP3Inst <"v_sub_nc_u16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>, sub>; 87881ad6265SDimitry Andric 87981ad6265SDimitry Andric def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_e64>; 88081ad6265SDimitry Andric def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_e64>; 88181ad6265SDimitry Andric 88281ad6265SDimitry Andric // Undo sub x, c -> add x, -c canonicalization since c is more likely 88381ad6265SDimitry Andric // an inline immediate than -c. 88481ad6265SDimitry Andric def : GCNPat< 88581ad6265SDimitry Andric (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)), 88681ad6265SDimitry Andric (V_SUB_NC_U16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0) 88781ad6265SDimitry Andric >; 88881ad6265SDimitry Andric 8890b57cec5SDimitry Andric} // End SubtargetPredicate = isGFX10Plus 8900b57cec5SDimitry Andric 8915f757f3fSDimitry Andriclet SubtargetPredicate = isGFX12Plus in { 8925f757f3fSDimitry Andric let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in { 8935f757f3fSDimitry Andric defm V_PERMLANE16_VAR_B32 : VOP3Inst<"v_permlane16_var_b32", VOP3_PERMLANE_VAR_Profile>; 8945f757f3fSDimitry Andric defm V_PERMLANEX16_VAR_B32 : VOP3Inst<"v_permlanex16_var_b32", VOP3_PERMLANE_VAR_Profile>; 8955f757f3fSDimitry Andric } // End $vdst = $vdst_in, DisableEncoding $vdst_in 8965f757f3fSDimitry Andric 8975f757f3fSDimitry Andric def : PermlaneVarPat<int_amdgcn_permlane16_var, V_PERMLANE16_VAR_B32_e64>; 8985f757f3fSDimitry Andric def : PermlaneVarPat<int_amdgcn_permlanex16_var, V_PERMLANEX16_VAR_B32_e64>; 8995f757f3fSDimitry Andric 9005f757f3fSDimitry Andric} // End SubtargetPredicate = isGFX12Plus 9015f757f3fSDimitry Andric 9025ffd83dbSDimitry Andricclass DivFmasPat<ValueType vt, Instruction inst, Register CondReg> : GCNPat< 9035ffd83dbSDimitry Andric (AMDGPUdiv_fmas (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)), 9045ffd83dbSDimitry Andric (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), 9055ffd83dbSDimitry Andric (vt (VOP3Mods vt:$src2, i32:$src2_modifiers)), 9065ffd83dbSDimitry Andric (i1 CondReg)), 9075ffd83dbSDimitry Andric (inst $src0_modifiers, $src0, $src1_modifiers, $src1, $src2_modifiers, $src2) 9085ffd83dbSDimitry Andric>; 9095ffd83dbSDimitry Andric 9105ffd83dbSDimitry Andriclet WaveSizePredicate = isWave64 in { 911e8d8bef9SDimitry Andricdef : DivFmasPat<f32, V_DIV_FMAS_F32_e64, VCC>; 912e8d8bef9SDimitry Andricdef : DivFmasPat<f64, V_DIV_FMAS_F64_e64, VCC>; 9135ffd83dbSDimitry Andric} 9145ffd83dbSDimitry Andric 9155ffd83dbSDimitry Andriclet WaveSizePredicate = isWave32 in { 916e8d8bef9SDimitry Andricdef : DivFmasPat<f32, V_DIV_FMAS_F32_e64, VCC_LO>; 917e8d8bef9SDimitry Andricdef : DivFmasPat<f64, V_DIV_FMAS_F64_e64, VCC_LO>; 9185ffd83dbSDimitry Andric} 9195ffd83dbSDimitry Andric 920*0fca6ea1SDimitry Andricclass VOP3_DOT_Profile<VOPProfile P> : VOP3_Profile<P, VOP3_OPSEL> { 92181ad6265SDimitry Andric let HasClamp = 0; 92281ad6265SDimitry Andric let HasOMod = 0; 92381ad6265SDimitry Andric} 92481ad6265SDimitry Andric 92581ad6265SDimitry Andriclet SubtargetPredicate = isGFX11Plus in { 92681ad6265SDimitry Andric defm V_MAXMIN_F32 : VOP3Inst<"v_maxmin_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>; 92781ad6265SDimitry Andric defm V_MINMAX_F32 : VOP3Inst<"v_minmax_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>; 92881ad6265SDimitry Andric defm V_MAXMIN_F16 : VOP3Inst<"v_maxmin_f16", VOP3_Profile<VOP_F16_F16_F16_F16>>; 92981ad6265SDimitry Andric defm V_MINMAX_F16 : VOP3Inst<"v_minmax_f16", VOP3_Profile<VOP_F16_F16_F16_F16>>; 93081ad6265SDimitry Andric defm V_MAXMIN_U32 : VOP3Inst<"v_maxmin_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>; 93181ad6265SDimitry Andric defm V_MINMAX_U32 : VOP3Inst<"v_minmax_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>; 93281ad6265SDimitry Andric defm V_MAXMIN_I32 : VOP3Inst<"v_maxmin_i32", VOP3_Profile<VOP_I32_I32_I32_I32>>; 93381ad6265SDimitry Andric defm V_MINMAX_I32 : VOP3Inst<"v_minmax_i32", VOP3_Profile<VOP_I32_I32_I32_I32>>; 93481ad6265SDimitry Andric defm V_CVT_PK_I16_F32 : VOP3Inst<"v_cvt_pk_i16_f32", VOP3_Profile<VOP_V2I16_F32_F32>>; 93581ad6265SDimitry Andric defm V_CVT_PK_U16_F32 : VOP3Inst<"v_cvt_pk_u16_f32", VOP3_Profile<VOP_V2I16_F32_F32>>; 93681ad6265SDimitry Andric} // End SubtargetPredicate = isGFX11Plus 93781ad6265SDimitry Andric 9385f757f3fSDimitry Andriclet SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in { 9395f757f3fSDimitry Andric defm V_MAXIMUMMINIMUM_F32 : VOP3Inst<"v_maximumminimum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>; 9405f757f3fSDimitry Andric defm V_MINIMUMMAXIMUM_F32 : VOP3Inst<"v_minimummaximum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>; 9415f757f3fSDimitry Andric defm V_MAXIMUMMINIMUM_F16 : VOP3Inst<"v_maximumminimum_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>; 9425f757f3fSDimitry Andric defm V_MINIMUMMAXIMUM_F16 : VOP3Inst<"v_minimummaximum_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>; 9435f757f3fSDimitry Andric} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 9445f757f3fSDimitry Andric 945*0fca6ea1SDimitry Andriclet OtherPredicates = [HasDot9Insts], IsDOT=1 in { 94681ad6265SDimitry Andric defm V_DOT2_F16_F16 : VOP3Inst<"v_dot2_f16_f16", VOP3_DOT_Profile<VOP_F16_V2F16_V2F16_F16>, int_amdgcn_fdot2_f16_f16>; 947*0fca6ea1SDimitry Andric defm V_DOT2_BF16_BF16 : VOP3Inst<"v_dot2_bf16_bf16", VOP3_DOT_Profile<VOP_BF16_V2BF16_V2BF16_BF16>, int_amdgcn_fdot2_bf16_bf16>; 94881ad6265SDimitry Andric} 94981ad6265SDimitry Andric 9505f757f3fSDimitry Andricclass VOP_Pseudo_Scalar<RegisterClass Dst, RegisterOperand SrcOp, 9515f757f3fSDimitry Andric ValueType dstVt, ValueType srcVt = dstVt> 9525f757f3fSDimitry Andric : VOPProfile<[dstVt, srcVt, untyped, untyped]> { 9535f757f3fSDimitry Andric let DstRC = VOPDstOperand<Dst>; 9545f757f3fSDimitry Andric let Src0RC64 = SrcOp; 9555f757f3fSDimitry Andric 9565f757f3fSDimitry Andric let HasOMod = 1; 9575f757f3fSDimitry Andric let HasModifiers = 1; 9585f757f3fSDimitry Andric} 9595f757f3fSDimitry Andric 9605f757f3fSDimitry Andricdef VOP_Pseudo_Scalar_F32 : VOP_Pseudo_Scalar<SReg_32_XEXEC, SSrc_f32, f32>; 9615f757f3fSDimitry Andricdef VOP_Pseudo_Scalar_F16 : VOP_Pseudo_Scalar<SReg_32_XEXEC, SSrc_f16, f32, f16>; 9625f757f3fSDimitry Andric 9635f757f3fSDimitry Andriclet SubtargetPredicate = HasPseudoScalarTrans, TRANS = 1, 9645f757f3fSDimitry Andric isReMaterializable = 1, SchedRW = [WritePseudoScalarTrans] in { 9655f757f3fSDimitry Andric defm V_S_EXP_F32 : VOP3PseudoScalarInst<"v_s_exp_f32", VOP_Pseudo_Scalar_F32, AMDGPUexp>; 9665f757f3fSDimitry Andric defm V_S_EXP_F16 : VOP3PseudoScalarInst<"v_s_exp_f16", VOP_Pseudo_Scalar_F16>; 9675f757f3fSDimitry Andric defm V_S_LOG_F32 : VOP3PseudoScalarInst<"v_s_log_f32", VOP_Pseudo_Scalar_F32, AMDGPUlog>; 9685f757f3fSDimitry Andric defm V_S_LOG_F16 : VOP3PseudoScalarInst<"v_s_log_f16", VOP_Pseudo_Scalar_F16>; 9695f757f3fSDimitry Andric defm V_S_RCP_F32 : VOP3PseudoScalarInst<"v_s_rcp_f32", VOP_Pseudo_Scalar_F32, AMDGPUrcp>; 9705f757f3fSDimitry Andric defm V_S_RCP_F16 : VOP3PseudoScalarInst<"v_s_rcp_f16", VOP_Pseudo_Scalar_F16>; 9715f757f3fSDimitry Andric defm V_S_RSQ_F32 : VOP3PseudoScalarInst<"v_s_rsq_f32", VOP_Pseudo_Scalar_F32, AMDGPUrsq>; 9725f757f3fSDimitry Andric defm V_S_RSQ_F16 : VOP3PseudoScalarInst<"v_s_rsq_f16", VOP_Pseudo_Scalar_F16>; 9735f757f3fSDimitry Andric defm V_S_SQRT_F32 : VOP3PseudoScalarInst<"v_s_sqrt_f32", VOP_Pseudo_Scalar_F32, any_amdgcn_sqrt>; 9745f757f3fSDimitry Andric defm V_S_SQRT_F16 : VOP3PseudoScalarInst<"v_s_sqrt_f16", VOP_Pseudo_Scalar_F16>; 9755f757f3fSDimitry Andric} 9765f757f3fSDimitry Andric 9775f757f3fSDimitry Andricclass PseudoScalarPatF16<SDPatternOperator node, VOP3_Pseudo inst> : GCNPat < 9785f757f3fSDimitry Andric (f16 (UniformUnaryFrag<node> (f16 (VOP3Mods0 f16:$src0, i32:$src0_modifiers, 9795f757f3fSDimitry Andric i1:$clamp, i32:$omod)))), 9805f757f3fSDimitry Andric (f16 (COPY_TO_REGCLASS (f32 (inst i32:$src0_modifiers, f16:$src0, i1:$clamp, 9815f757f3fSDimitry Andric i32:$omod)), 9825f757f3fSDimitry Andric SReg_32_XEXEC)) 9835f757f3fSDimitry Andric>; 9845f757f3fSDimitry Andric 9855f757f3fSDimitry Andriclet SubtargetPredicate = HasPseudoScalarTrans in { 9865f757f3fSDimitry Andric def : PseudoScalarPatF16<AMDGPUexpf16, V_S_EXP_F16_e64>; 9875f757f3fSDimitry Andric def : PseudoScalarPatF16<AMDGPUlogf16, V_S_LOG_F16_e64>; 9885f757f3fSDimitry Andric def : PseudoScalarPatF16<AMDGPUrcp, V_S_RCP_F16_e64>; 9895f757f3fSDimitry Andric def : PseudoScalarPatF16<AMDGPUrsq, V_S_RSQ_F16_e64>; 9905f757f3fSDimitry Andric def : PseudoScalarPatF16<any_amdgcn_sqrt, V_S_SQRT_F16_e64>; 9915f757f3fSDimitry Andric} 9925f757f3fSDimitry Andric 9930b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 9940b57cec5SDimitry Andric// Integer Clamp Patterns 9950b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 9960b57cec5SDimitry Andric 9970b57cec5SDimitry Andricclass getClampPat<VOPProfile P, SDPatternOperator node> { 9980b57cec5SDimitry Andric dag ret3 = (P.DstVT (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2)); 9990b57cec5SDimitry Andric dag ret2 = (P.DstVT (node P.Src0VT:$src0, P.Src1VT:$src1)); 10000b57cec5SDimitry Andric dag ret1 = (P.DstVT (node P.Src0VT:$src0)); 10010b57cec5SDimitry Andric dag ret = !if(!eq(P.NumSrcArgs, 3), ret3, 10020b57cec5SDimitry Andric !if(!eq(P.NumSrcArgs, 2), ret2, 10030b57cec5SDimitry Andric ret1)); 10040b57cec5SDimitry Andric} 10050b57cec5SDimitry Andric 10060b57cec5SDimitry Andricclass getClampRes<VOPProfile P, Instruction inst> { 10070b57cec5SDimitry Andric dag ret3 = (inst P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, (i1 0)); 10080b57cec5SDimitry Andric dag ret2 = (inst P.Src0VT:$src0, P.Src1VT:$src1, (i1 0)); 10090b57cec5SDimitry Andric dag ret1 = (inst P.Src0VT:$src0, (i1 0)); 10100b57cec5SDimitry Andric dag ret = !if(!eq(P.NumSrcArgs, 3), ret3, 10110b57cec5SDimitry Andric !if(!eq(P.NumSrcArgs, 2), ret2, 10120b57cec5SDimitry Andric ret1)); 10130b57cec5SDimitry Andric} 10140b57cec5SDimitry Andric 1015e8d8bef9SDimitry Andricclass IntClampPat<VOP3InstBase inst, SDPatternOperator node> : GCNPat< 10160b57cec5SDimitry Andric getClampPat<inst.Pfl, node>.ret, 10170b57cec5SDimitry Andric getClampRes<inst.Pfl, inst>.ret 10180b57cec5SDimitry Andric>; 10190b57cec5SDimitry Andric 1020e8d8bef9SDimitry Andricdef : IntClampPat<V_MAD_I32_I24_e64, AMDGPUmad_i24>; 1021e8d8bef9SDimitry Andricdef : IntClampPat<V_MAD_U32_U24_e64, AMDGPUmad_u24>; 10220b57cec5SDimitry Andric 1023e8d8bef9SDimitry Andricdef : IntClampPat<V_SAD_U8_e64, int_amdgcn_sad_u8>; 1024e8d8bef9SDimitry Andricdef : IntClampPat<V_SAD_HI_U8_e64, int_amdgcn_sad_hi_u8>; 1025e8d8bef9SDimitry Andricdef : IntClampPat<V_SAD_U16_e64, int_amdgcn_sad_u16>; 10260b57cec5SDimitry Andric 1027e8d8bef9SDimitry Andricdef : IntClampPat<V_MSAD_U8_e64, int_amdgcn_msad_u8>; 1028e8d8bef9SDimitry Andricdef : IntClampPat<V_MQSAD_PK_U16_U8_e64, int_amdgcn_mqsad_pk_u16_u8>; 10290b57cec5SDimitry Andric 1030e8d8bef9SDimitry Andricdef : IntClampPat<V_QSAD_PK_U16_U8_e64, int_amdgcn_qsad_pk_u16_u8>; 1031e8d8bef9SDimitry Andricdef : IntClampPat<V_MQSAD_U32_U8_e64, int_amdgcn_mqsad_u32_u8>; 10320b57cec5SDimitry Andric 10330b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 10340b57cec5SDimitry Andric// Target-specific instruction encodings. 10350b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 10360b57cec5SDimitry Andric 10370b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 10385f757f3fSDimitry Andric// GFX12. 103981ad6265SDimitry Andric//===----------------------------------------------------------------------===// 104081ad6265SDimitry Andric 10415f757f3fSDimitry Andricdefm V_MIN3_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x229, "V_MIN3_F32", "v_min3_num_f32">; 10425f757f3fSDimitry Andricdefm V_MAX3_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x22a, "V_MAX3_F32", "v_max3_num_f32">; 10435f757f3fSDimitry Andricdefm V_MIN3_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x22b, "V_MIN3_F16", "v_min3_num_f16">; 10445f757f3fSDimitry Andricdefm V_MAX3_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x22c, "V_MAX3_F16", "v_max3_num_f16">; 10455f757f3fSDimitry Andricdefm V_MINIMUM3_F32 : VOP3Only_Realtriple_gfx12<0x22d>; 10465f757f3fSDimitry Andricdefm V_MAXIMUM3_F32 : VOP3Only_Realtriple_gfx12<0x22e>; 10475f757f3fSDimitry Andricdefm V_MINIMUM3_F16 : VOP3Only_Realtriple_t16_gfx12<0x22f>; 10485f757f3fSDimitry Andricdefm V_MAXIMUM3_F16 : VOP3Only_Realtriple_t16_gfx12<0x230>; 10495f757f3fSDimitry Andricdefm V_MED3_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x231, "V_MED3_F32", "v_med3_num_f32">; 10505f757f3fSDimitry Andricdefm V_MED3_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x232, "V_MED3_F16", "v_med3_num_f16">; 10515f757f3fSDimitry Andricdefm V_MINMAX_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x268, "V_MINMAX_F32", "v_minmax_num_f32">; 10525f757f3fSDimitry Andricdefm V_MAXMIN_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x269, "V_MAXMIN_F32", "v_maxmin_num_f32">; 10535f757f3fSDimitry Andricdefm V_MINMAX_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x26a, "V_MINMAX_F16", "v_minmax_num_f16">; 10545f757f3fSDimitry Andricdefm V_MAXMIN_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x26b, "V_MAXMIN_F16", "v_maxmin_num_f16">; 10555f757f3fSDimitry Andricdefm V_MINIMUMMAXIMUM_F32 : VOP3Only_Realtriple_gfx12<0x26c>; 10565f757f3fSDimitry Andricdefm V_MAXIMUMMINIMUM_F32 : VOP3Only_Realtriple_gfx12<0x26d>; 10575f757f3fSDimitry Andricdefm V_MINIMUMMAXIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x26e>; 10585f757f3fSDimitry Andricdefm V_MAXIMUMMINIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x26f>; 10595f757f3fSDimitry Andricdefm V_S_EXP_F32 : VOP3Only_Real_Base_gfx12<0x280>; 10605f757f3fSDimitry Andricdefm V_S_EXP_F16 : VOP3Only_Real_Base_gfx12<0x281>; 10615f757f3fSDimitry Andricdefm V_S_LOG_F32 : VOP3Only_Real_Base_gfx12<0x282>; 10625f757f3fSDimitry Andricdefm V_S_LOG_F16 : VOP3Only_Real_Base_gfx12<0x283>; 10635f757f3fSDimitry Andricdefm V_S_RCP_F32 : VOP3Only_Real_Base_gfx12<0x284>; 10645f757f3fSDimitry Andricdefm V_S_RCP_F16 : VOP3Only_Real_Base_gfx12<0x285>; 10655f757f3fSDimitry Andricdefm V_S_RSQ_F32 : VOP3Only_Real_Base_gfx12<0x286>; 10665f757f3fSDimitry Andricdefm V_S_RSQ_F16 : VOP3Only_Real_Base_gfx12<0x287>; 10675f757f3fSDimitry Andricdefm V_S_SQRT_F32 : VOP3Only_Real_Base_gfx12<0x288>; 10685f757f3fSDimitry Andricdefm V_S_SQRT_F16 : VOP3Only_Real_Base_gfx12<0x289>; 10695f757f3fSDimitry Andricdefm V_MAD_CO_U64_U32 : VOP3be_Real_with_name_gfx12<0x2fe, "V_MAD_U64_U32", "v_mad_co_u64_u32">; 10705f757f3fSDimitry Andricdefm V_MAD_CO_I64_I32 : VOP3be_Real_with_name_gfx12<0x2ff, "V_MAD_I64_I32", "v_mad_co_i64_i32">; 10715f757f3fSDimitry Andricdefm V_MINIMUM_F64 : VOP3Only_Real_Base_gfx12<0x341>; 10725f757f3fSDimitry Andricdefm V_MAXIMUM_F64 : VOP3Only_Real_Base_gfx12<0x342>; 10735f757f3fSDimitry Andricdefm V_MINIMUM_F32 : VOP3Only_Realtriple_gfx12<0x365>; 10745f757f3fSDimitry Andricdefm V_MAXIMUM_F32 : VOP3Only_Realtriple_gfx12<0x366>; 10755f757f3fSDimitry Andricdefm V_MINIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x367>; 10765f757f3fSDimitry Andricdefm V_MAXIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x368>; 10775f757f3fSDimitry Andric 10785f757f3fSDimitry Andricdefm V_PERMLANE16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x30f>; 10795f757f3fSDimitry Andricdefm V_PERMLANEX16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x310>; 10805f757f3fSDimitry Andric 1081b3edf446SDimitry Andricdefm V_CVT_PK_FP8_F32 : VOP3Only_Realtriple_gfx12<0x369>; 1082b3edf446SDimitry Andricdefm V_CVT_PK_BF8_F32 : VOP3Only_Realtriple_gfx12<0x36a>; 1083*0fca6ea1SDimitry Andricdefm V_CVT_SR_FP8_F32_gfx12 : VOP3_Realtriple_with_name_gfx12<0x36b, "V_CVT_SR_FP8_F32_gfx12", "v_cvt_sr_fp8_f32" >; 1084*0fca6ea1SDimitry Andricdefm V_CVT_SR_BF8_F32_gfx12 : VOP3_Realtriple_with_name_gfx12<0x36c, "V_CVT_SR_BF8_F32_gfx12", "v_cvt_sr_bf8_f32">; 1085b3edf446SDimitry Andric 10865f757f3fSDimitry Andric//===----------------------------------------------------------------------===// 10875f757f3fSDimitry Andric// GFX11, GFX12 10885f757f3fSDimitry Andric//===----------------------------------------------------------------------===// 10895f757f3fSDimitry Andric 10905f757f3fSDimitry Andricmulticlass VOP3_Real_with_name_gfx11_gfx12<bits<10> op, string opName, 10915f757f3fSDimitry Andric string asmName> : 10925f757f3fSDimitry Andric VOP3_Real_with_name<GFX11Gen, op, opName, asmName>, 10935f757f3fSDimitry Andric VOP3_Real_with_name<GFX12Gen, op, opName, asmName>; 10945f757f3fSDimitry Andric 10955f757f3fSDimitry Andricmulticlass VOP3_Realtriple_gfx11_gfx12<bits<10> op> : 10965f757f3fSDimitry Andric VOP3_Realtriple<GFX11Gen, op>, VOP3_Realtriple<GFX12Gen, op>; 10975f757f3fSDimitry Andric 10985f757f3fSDimitry Andricmulticlass VOP3_Real_Base_gfx11_gfx12<bits<10> op> : 10995f757f3fSDimitry Andric VOP3_Real_Base<GFX11Gen, op>, VOP3_Real_Base<GFX12Gen, op>; 11005f757f3fSDimitry Andric 11015f757f3fSDimitry Andricmulticlass VOP3_Realtriple_with_name_gfx11_gfx12<bits<10> op, string opName, 11025f757f3fSDimitry Andric string asmName> : 11035f757f3fSDimitry Andric VOP3_Realtriple_with_name<GFX11Gen, op, opName, asmName>, 11045f757f3fSDimitry Andric VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName>; 11055f757f3fSDimitry Andric 11065f757f3fSDimitry Andricmulticlass VOP3Dot_Realtriple_gfx11_gfx12<bits<10> op> : 11075f757f3fSDimitry Andric VOP3Dot_Realtriple<GFX11Gen, op>, VOP3Dot_Realtriple<GFX12Gen, op>; 11085f757f3fSDimitry Andric 11095f757f3fSDimitry Andricmulticlass VOP3be_Real_gfx11_gfx12<bits<10> op, string opName, string asmName> : 11105f757f3fSDimitry Andric VOP3be_Real<GFX11Gen, op, opName, asmName>, 11115f757f3fSDimitry Andric VOP3be_Real<GFX12Gen, op, opName, asmName>; 11125f757f3fSDimitry Andric 11135f757f3fSDimitry Andricmulticlass VOP3_Real_No_Suffix_gfx11_gfx12<bits<10> op> : 11145f757f3fSDimitry Andric VOP3_Real_No_Suffix<GFX11Gen, op>, VOP3_Real_No_Suffix<GFX12Gen, op>; 11155f757f3fSDimitry Andric 11165f757f3fSDimitry Andricdefm V_FMA_DX9_ZERO_F32 : VOP3_Real_with_name_gfx11_gfx12<0x209, "V_FMA_LEGACY_F32", "v_fma_dx9_zero_f32">; 11175f757f3fSDimitry Andricdefm V_MAD_I32_I24 : VOP3_Realtriple_gfx11_gfx12<0x20a>; 11185f757f3fSDimitry Andricdefm V_MAD_U32_U24 : VOP3_Realtriple_gfx11_gfx12<0x20b>; 11195f757f3fSDimitry Andricdefm V_CUBEID_F32 : VOP3_Realtriple_gfx11_gfx12<0x20c>; 11205f757f3fSDimitry Andricdefm V_CUBESC_F32 : VOP3_Realtriple_gfx11_gfx12<0x20d>; 11215f757f3fSDimitry Andricdefm V_CUBETC_F32 : VOP3_Realtriple_gfx11_gfx12<0x20e>; 11225f757f3fSDimitry Andricdefm V_CUBEMA_F32 : VOP3_Realtriple_gfx11_gfx12<0x20f>; 11235f757f3fSDimitry Andricdefm V_BFE_U32 : VOP3_Realtriple_gfx11_gfx12<0x210>; 11245f757f3fSDimitry Andricdefm V_BFE_I32 : VOP3_Realtriple_gfx11_gfx12<0x211>; 11255f757f3fSDimitry Andricdefm V_BFI_B32 : VOP3_Realtriple_gfx11_gfx12<0x212>; 11265f757f3fSDimitry Andricdefm V_FMA_F32 : VOP3_Realtriple_gfx11_gfx12<0x213>; 11275f757f3fSDimitry Andricdefm V_FMA_F64 : VOP3_Real_Base_gfx11_gfx12<0x214>; 11285f757f3fSDimitry Andricdefm V_LERP_U8 : VOP3_Realtriple_gfx11_gfx12<0x215>; 11295f757f3fSDimitry Andricdefm V_ALIGNBIT_B32 : VOP3_Realtriple_gfx11_gfx12<0x216>; 11305f757f3fSDimitry Andricdefm V_ALIGNBYTE_B32 : VOP3_Realtriple_gfx11_gfx12<0x217>; 11315f757f3fSDimitry Andricdefm V_MULLIT_F32 : VOP3_Realtriple_gfx11_gfx12<0x218>; 113281ad6265SDimitry Andricdefm V_MIN3_F32 : VOP3_Realtriple_gfx11<0x219>; 11335f757f3fSDimitry Andricdefm V_MIN3_I32 : VOP3_Realtriple_gfx11_gfx12<0x21a>; 11345f757f3fSDimitry Andricdefm V_MIN3_U32 : VOP3_Realtriple_gfx11_gfx12<0x21b>; 113581ad6265SDimitry Andricdefm V_MAX3_F32 : VOP3_Realtriple_gfx11<0x21c>; 11365f757f3fSDimitry Andricdefm V_MAX3_I32 : VOP3_Realtriple_gfx11_gfx12<0x21d>; 11375f757f3fSDimitry Andricdefm V_MAX3_U32 : VOP3_Realtriple_gfx11_gfx12<0x21e>; 113881ad6265SDimitry Andricdefm V_MED3_F32 : VOP3_Realtriple_gfx11<0x21f>; 11395f757f3fSDimitry Andricdefm V_MED3_I32 : VOP3_Realtriple_gfx11_gfx12<0x220>; 11405f757f3fSDimitry Andricdefm V_MED3_U32 : VOP3_Realtriple_gfx11_gfx12<0x221>; 11415f757f3fSDimitry Andricdefm V_SAD_U8 : VOP3_Realtriple_gfx11_gfx12<0x222>; 11425f757f3fSDimitry Andricdefm V_SAD_HI_U8 : VOP3_Realtriple_gfx11_gfx12<0x223>; 11435f757f3fSDimitry Andricdefm V_SAD_U16 : VOP3_Realtriple_gfx11_gfx12<0x224>; 11445f757f3fSDimitry Andricdefm V_SAD_U32 : VOP3_Realtriple_gfx11_gfx12<0x225>; 11455f757f3fSDimitry Andricdefm V_CVT_PK_U8_F32 : VOP3_Realtriple_gfx11_gfx12<0x226>; 11465f757f3fSDimitry Andricdefm V_DIV_FIXUP_F32 : VOP3_Real_Base_gfx11_gfx12<0x227>; 11475f757f3fSDimitry Andricdefm V_DIV_FIXUP_F64 : VOP3_Real_Base_gfx11_gfx12<0x228>; 11485f757f3fSDimitry Andricdefm V_DIV_FMAS_F32 : VOP3_Real_Base_gfx11_gfx12<0x237>; 11495f757f3fSDimitry Andricdefm V_DIV_FMAS_F64 : VOP3_Real_Base_gfx11_gfx12<0x238>; 11505f757f3fSDimitry Andricdefm V_MSAD_U8 : VOP3_Realtriple_gfx11_gfx12<0x239>; 11515f757f3fSDimitry Andricdefm V_QSAD_PK_U16_U8 : VOP3_Real_Base_gfx11_gfx12<0x23a>; 11525f757f3fSDimitry Andricdefm V_MQSAD_PK_U16_U8 : VOP3_Real_Base_gfx11_gfx12<0x23b>; 11535f757f3fSDimitry Andricdefm V_MQSAD_U32_U8 : VOP3_Real_Base_gfx11_gfx12<0x23d>; 11545f757f3fSDimitry Andricdefm V_XOR3_B32 : VOP3_Realtriple_gfx11_gfx12<0x240>; 11555f757f3fSDimitry Andricdefm V_MAD_U16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x241, "V_MAD_U16_gfx9", "v_mad_u16">; 11565f757f3fSDimitry Andricdefm V_PERM_B32 : VOP3_Realtriple_gfx11_gfx12<0x244>; 11575f757f3fSDimitry Andricdefm V_XAD_U32 : VOP3_Realtriple_gfx11_gfx12<0x245>; 11585f757f3fSDimitry Andricdefm V_LSHL_ADD_U32 : VOP3_Realtriple_gfx11_gfx12<0x246>; 11595f757f3fSDimitry Andricdefm V_ADD_LSHL_U32 : VOP3_Realtriple_gfx11_gfx12<0x247>; 11605f757f3fSDimitry Andricdefm V_FMA_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x248, "V_FMA_F16_gfx9", "v_fma_f16">; 116181ad6265SDimitry Andricdefm V_MIN3_F16 : VOP3_Realtriple_gfx11<0x249>; 11625f757f3fSDimitry Andricdefm V_MIN3_I16 : VOP3_Realtriple_gfx11_gfx12<0x24a>; 11635f757f3fSDimitry Andricdefm V_MIN3_U16 : VOP3_Realtriple_gfx11_gfx12<0x24b>; 116481ad6265SDimitry Andricdefm V_MAX3_F16 : VOP3_Realtriple_gfx11<0x24c>; 11655f757f3fSDimitry Andricdefm V_MAX3_I16 : VOP3_Realtriple_gfx11_gfx12<0x24d>; 11665f757f3fSDimitry Andricdefm V_MAX3_U16 : VOP3_Realtriple_gfx11_gfx12<0x24e>; 116781ad6265SDimitry Andricdefm V_MED3_F16 : VOP3_Realtriple_gfx11<0x24f>; 11685f757f3fSDimitry Andricdefm V_MED3_I16 : VOP3_Realtriple_gfx11_gfx12<0x250>; 11695f757f3fSDimitry Andricdefm V_MED3_U16 : VOP3_Realtriple_gfx11_gfx12<0x251>; 11705f757f3fSDimitry Andricdefm V_MAD_I16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x253, "V_MAD_I16_gfx9", "v_mad_i16">; 11715f757f3fSDimitry Andricdefm V_DIV_FIXUP_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x254, "V_DIV_FIXUP_F16_gfx9", "v_div_fixup_f16">; 11725f757f3fSDimitry Andricdefm V_ADD3_U32 : VOP3_Realtriple_gfx11_gfx12<0x255>; 11735f757f3fSDimitry Andricdefm V_LSHL_OR_B32 : VOP3_Realtriple_gfx11_gfx12<0x256>; 11745f757f3fSDimitry Andricdefm V_AND_OR_B32 : VOP3_Realtriple_gfx11_gfx12<0x257>; 11755f757f3fSDimitry Andricdefm V_OR3_B32 : VOP3_Realtriple_gfx11_gfx12<0x258>; 11765f757f3fSDimitry Andricdefm V_MAD_U32_U16 : VOP3_Realtriple_gfx11_gfx12<0x259>; 11775f757f3fSDimitry Andricdefm V_MAD_I32_I16 : VOP3_Realtriple_gfx11_gfx12<0x25a>; 11785f757f3fSDimitry Andricdefm V_PERMLANE16_B32 : VOP3_Real_Base_gfx11_gfx12<0x25b>; 11795f757f3fSDimitry Andricdefm V_PERMLANEX16_B32 : VOP3_Real_Base_gfx11_gfx12<0x25c>; 118081ad6265SDimitry Andricdefm V_MAXMIN_F32 : VOP3_Realtriple_gfx11<0x25e>; 118181ad6265SDimitry Andricdefm V_MINMAX_F32 : VOP3_Realtriple_gfx11<0x25f>; 118281ad6265SDimitry Andricdefm V_MAXMIN_F16 : VOP3_Realtriple_gfx11<0x260>; 118381ad6265SDimitry Andricdefm V_MINMAX_F16 : VOP3_Realtriple_gfx11<0x261>; 11845f757f3fSDimitry Andricdefm V_MAXMIN_U32 : VOP3_Realtriple_gfx11_gfx12<0x262>; 11855f757f3fSDimitry Andricdefm V_MINMAX_U32 : VOP3_Realtriple_gfx11_gfx12<0x263>; 11865f757f3fSDimitry Andricdefm V_MAXMIN_I32 : VOP3_Realtriple_gfx11_gfx12<0x264>; 11875f757f3fSDimitry Andricdefm V_MINMAX_I32 : VOP3_Realtriple_gfx11_gfx12<0x265>; 11885f757f3fSDimitry Andricdefm V_DOT2_F16_F16 : VOP3Dot_Realtriple_gfx11_gfx12<0x266>; 11895f757f3fSDimitry Andricdefm V_DOT2_BF16_BF16 : VOP3Dot_Realtriple_gfx11_gfx12<0x267>; 11905f757f3fSDimitry Andricdefm V_DIV_SCALE_F32 : VOP3be_Real_gfx11_gfx12<0x2fc, "V_DIV_SCALE_F32", "v_div_scale_f32">; 11915f757f3fSDimitry Andricdefm V_DIV_SCALE_F64 : VOP3be_Real_gfx11_gfx12<0x2fd, "V_DIV_SCALE_F64", "v_div_scale_f64">; 119281ad6265SDimitry Andricdefm V_MAD_U64_U32_gfx11 : VOP3be_Real_gfx11<0x2fe, "V_MAD_U64_U32_gfx11", "v_mad_u64_u32">; 119381ad6265SDimitry Andricdefm V_MAD_I64_I32_gfx11 : VOP3be_Real_gfx11<0x2ff, "V_MAD_I64_I32_gfx11", "v_mad_i64_i32">; 11945f757f3fSDimitry Andricdefm V_ADD_NC_U16 : VOP3Only_Realtriple_gfx11_gfx12<0x303>; 11955f757f3fSDimitry Andricdefm V_SUB_NC_U16 : VOP3Only_Realtriple_gfx11_gfx12<0x304>; 11965f757f3fSDimitry Andricdefm V_MUL_LO_U16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x305, "v_mul_lo_u16">; 11975f757f3fSDimitry Andricdefm V_CVT_PK_I16_F32 : VOP3_Realtriple_gfx11_gfx12<0x306>; 11985f757f3fSDimitry Andricdefm V_CVT_PK_U16_F32 : VOP3_Realtriple_gfx11_gfx12<0x307>; 11995f757f3fSDimitry Andricdefm V_MAX_U16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x309, "v_max_u16">; 12005f757f3fSDimitry Andricdefm V_MAX_I16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x30a, "v_max_i16">; 12015f757f3fSDimitry Andricdefm V_MIN_U16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x30b, "v_min_u16">; 12025f757f3fSDimitry Andricdefm V_MIN_I16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x30c, "v_min_i16">; 12035f757f3fSDimitry Andricdefm V_ADD_NC_I16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x30d, "V_ADD_I16", "v_add_nc_i16">; 12045f757f3fSDimitry Andricdefm V_SUB_NC_I16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x30e, "V_SUB_I16", "v_sub_nc_i16">; 12055f757f3fSDimitry Andricdefm V_PACK_B32_F16 : VOP3_Realtriple_gfx11_gfx12<0x311>; 12065f757f3fSDimitry Andricdefm V_CVT_PK_NORM_I16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x312, "V_CVT_PKNORM_I16_F16" , "v_cvt_pk_norm_i16_f16" >; 12075f757f3fSDimitry Andricdefm V_CVT_PK_NORM_U16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x313, "V_CVT_PKNORM_U16_F16" , "v_cvt_pk_norm_u16_f16" >; 12085f757f3fSDimitry Andricdefm V_SUB_NC_I32 : VOP3_Realtriple_with_name_gfx11_gfx12<0x325, "V_SUB_I32", "v_sub_nc_i32">; 12095f757f3fSDimitry Andricdefm V_ADD_NC_I32 : VOP3_Realtriple_with_name_gfx11_gfx12<0x326, "V_ADD_I32", "v_add_nc_i32">; 121081ad6265SDimitry Andricdefm V_ADD_F64 : VOP3_Real_Base_gfx11<0x327>; 121181ad6265SDimitry Andricdefm V_MUL_F64 : VOP3_Real_Base_gfx11<0x328>; 121281ad6265SDimitry Andricdefm V_MIN_F64 : VOP3_Real_Base_gfx11<0x329>; 121381ad6265SDimitry Andricdefm V_MAX_F64 : VOP3_Real_Base_gfx11<0x32a>; 12145f757f3fSDimitry Andricdefm V_LDEXP_F64 : VOP3_Real_Base_gfx11_gfx12<0x32b>; 12155f757f3fSDimitry Andricdefm V_MUL_LO_U32 : VOP3_Real_Base_gfx11_gfx12<0x32c>; 12165f757f3fSDimitry Andricdefm V_MUL_HI_U32 : VOP3_Real_Base_gfx11_gfx12<0x32d>; 12175f757f3fSDimitry Andricdefm V_MUL_HI_I32 : VOP3_Real_Base_gfx11_gfx12<0x32e>; 12185f757f3fSDimitry Andricdefm V_TRIG_PREOP_F64 : VOP3_Real_Base_gfx11_gfx12<0x32f>; 12195f757f3fSDimitry Andricdefm V_LSHLREV_B16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x338, "v_lshlrev_b16">; 12205f757f3fSDimitry Andricdefm V_LSHRREV_B16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x339, "v_lshrrev_b16">; 12215f757f3fSDimitry Andricdefm V_ASHRREV_I16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x33a, "v_ashrrev_i16">; 122281ad6265SDimitry Andricdefm V_LSHLREV_B64 : VOP3_Real_Base_gfx11<0x33c>; 12235f757f3fSDimitry Andricdefm V_LSHRREV_B64 : VOP3_Real_Base_gfx11_gfx12<0x33d>; 12245f757f3fSDimitry Andricdefm V_ASHRREV_I64 : VOP3_Real_Base_gfx11_gfx12<0x33e>; 12255f757f3fSDimitry Andricdefm V_READLANE_B32 : VOP3_Real_No_Suffix_gfx11_gfx12<0x360>; // Pseudo in VOP2 122681ad6265SDimitry Andriclet InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in { 12275f757f3fSDimitry Andric defm V_WRITELANE_B32 : VOP3_Real_No_Suffix_gfx11_gfx12<0x361>; // Pseudo in VOP2 122881ad6265SDimitry Andric} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) 12295f757f3fSDimitry Andricdefm V_AND_B16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x362, "v_and_b16">; 12305f757f3fSDimitry Andricdefm V_OR_B16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x363, "v_or_b16">; 12315f757f3fSDimitry Andricdefm V_XOR_B16_t16 : VOP3Only_Realtriple_t16_gfx11_gfx12<0x364, "v_xor_b16">; 123281ad6265SDimitry Andric 123381ad6265SDimitry Andric//===----------------------------------------------------------------------===// 12340b57cec5SDimitry Andric// GFX10. 12350b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 12360b57cec5SDimitry Andric 123781ad6265SDimitry Andriclet AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { 12380b57cec5SDimitry Andric multiclass VOP3_Real_gfx10<bits<10> op> { 12390b57cec5SDimitry Andric def _gfx10 : 1240e8d8bef9SDimitry Andric VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 1241e8d8bef9SDimitry Andric VOP3e_gfx10<op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl>; 1242e8d8bef9SDimitry Andric } 1243e8d8bef9SDimitry Andric multiclass VOP3_Real_No_Suffix_gfx10<bits<10> op> { 1244e8d8bef9SDimitry Andric def _gfx10 : 12450b57cec5SDimitry Andric VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.GFX10>, 12460b57cec5SDimitry Andric VOP3e_gfx10<op, !cast<VOP_Pseudo>(NAME).Pfl>; 12470b57cec5SDimitry Andric } 12480b57cec5SDimitry Andric multiclass VOP3_Real_gfx10_with_name<bits<10> op, string opName, 12490b57cec5SDimitry Andric string asmName> { 12500b57cec5SDimitry Andric def _gfx10 : 1251e8d8bef9SDimitry Andric VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>, 1252e8d8bef9SDimitry Andric VOP3e_gfx10<op, !cast<VOP3_Pseudo>(opName#"_e64").Pfl> { 1253e8d8bef9SDimitry Andric VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName#"_e64"); 12540b57cec5SDimitry Andric let AsmString = asmName # ps.AsmOperands; 1255fe6060f1SDimitry Andric let IsSingle = 1; 12560b57cec5SDimitry Andric } 12570b57cec5SDimitry Andric } 12580b57cec5SDimitry Andric multiclass VOP3be_Real_gfx10<bits<10> op> { 12590b57cec5SDimitry Andric def _gfx10 : 1260e8d8bef9SDimitry Andric VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 1261e8d8bef9SDimitry Andric VOP3be_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 12620b57cec5SDimitry Andric } 12630b57cec5SDimitry Andric multiclass VOP3Interp_Real_gfx10<bits<10> op> { 12640b57cec5SDimitry Andric def _gfx10 : 12650b57cec5SDimitry Andric VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.GFX10>, 12660b57cec5SDimitry Andric VOP3Interp_gfx10<op, !cast<VOP3_Pseudo>(NAME).Pfl>; 12670b57cec5SDimitry Andric } 12680b57cec5SDimitry Andric multiclass VOP3OpSel_Real_gfx10<bits<10> op> { 12690b57cec5SDimitry Andric def _gfx10 : 1270e8d8bef9SDimitry Andric VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 1271e8d8bef9SDimitry Andric VOP3OpSel_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 12720b57cec5SDimitry Andric } 12730b57cec5SDimitry Andric multiclass VOP3OpSel_Real_gfx10_with_name<bits<10> op, string opName, 12740b57cec5SDimitry Andric string asmName> { 12750b57cec5SDimitry Andric def _gfx10 : 1276e8d8bef9SDimitry Andric VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>, 1277e8d8bef9SDimitry Andric VOP3OpSel_gfx10<op, !cast<VOP3_Pseudo>(opName#"_e64").Pfl> { 1278e8d8bef9SDimitry Andric VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName#"_e64"); 12790b57cec5SDimitry Andric let AsmString = asmName # ps.AsmOperands; 12800b57cec5SDimitry Andric } 12810b57cec5SDimitry Andric } 128281ad6265SDimitry Andric} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" 12830b57cec5SDimitry Andric 1284*0fca6ea1SDimitry Andriclet IsInvalidSingleUseConsumer = 1 in { 1285e8d8bef9SDimitry Andric defm V_READLANE_B32 : VOP3_Real_No_Suffix_gfx10<0x360>; 1286*0fca6ea1SDimitry Andric let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in), IsInvalidSingleUseProducer = 1 in { 1287e8d8bef9SDimitry Andric defm V_WRITELANE_B32 : VOP3_Real_No_Suffix_gfx10<0x361>; 1288*0fca6ea1SDimitry Andric } // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32: $src1, VGPR_32:$vdst_in), IsInvalidSingleUseProducer = 1 1289*0fca6ea1SDimitry Andric} // End IsInvalidSingleUseConsumer = 1 12900b57cec5SDimitry Andric 1291e8d8bef9SDimitry Andriclet SubtargetPredicate = isGFX10Before1030 in { 1292e8d8bef9SDimitry Andric defm V_MUL_LO_I32 : VOP3_Real_gfx10<0x16b>; 1293e8d8bef9SDimitry Andric} 1294e8d8bef9SDimitry Andric 12950b57cec5SDimitry Andricdefm V_XOR3_B32 : VOP3_Real_gfx10<0x178>; 12960b57cec5SDimitry Andricdefm V_LSHLREV_B64 : VOP3_Real_gfx10<0x2ff>; 12970b57cec5SDimitry Andricdefm V_LSHRREV_B64 : VOP3_Real_gfx10<0x300>; 12980b57cec5SDimitry Andricdefm V_ASHRREV_I64 : VOP3_Real_gfx10<0x301>; 12990b57cec5SDimitry Andricdefm V_PERM_B32 : VOP3_Real_gfx10<0x344>; 13000b57cec5SDimitry Andricdefm V_XAD_U32 : VOP3_Real_gfx10<0x345>; 13010b57cec5SDimitry Andricdefm V_LSHL_ADD_U32 : VOP3_Real_gfx10<0x346>; 13020b57cec5SDimitry Andricdefm V_ADD_LSHL_U32 : VOP3_Real_gfx10<0x347>; 13030b57cec5SDimitry Andricdefm V_ADD3_U32 : VOP3_Real_gfx10<0x36d>; 13040b57cec5SDimitry Andricdefm V_LSHL_OR_B32 : VOP3_Real_gfx10<0x36f>; 13050b57cec5SDimitry Andricdefm V_AND_OR_B32 : VOP3_Real_gfx10<0x371>; 13060b57cec5SDimitry Andricdefm V_OR3_B32 : VOP3_Real_gfx10<0x372>; 13070b57cec5SDimitry Andric 13080b57cec5SDimitry Andric// TODO-GFX10: add MC tests for v_add/sub_nc_i16 13090b57cec5SDimitry Andricdefm V_ADD_NC_I16 : 13100b57cec5SDimitry Andric VOP3OpSel_Real_gfx10_with_name<0x30d, "V_ADD_I16", "v_add_nc_i16">; 13110b57cec5SDimitry Andricdefm V_SUB_NC_I16 : 13120b57cec5SDimitry Andric VOP3OpSel_Real_gfx10_with_name<0x30e, "V_SUB_I16", "v_sub_nc_i16">; 13130b57cec5SDimitry Andricdefm V_SUB_NC_I32 : 1314e8d8bef9SDimitry Andric VOP3_Real_gfx10_with_name<0x376, "V_SUB_I32", "v_sub_nc_i32">; 13150b57cec5SDimitry Andricdefm V_ADD_NC_I32 : 1316e8d8bef9SDimitry Andric VOP3_Real_gfx10_with_name<0x37f, "V_ADD_I32", "v_add_nc_i32">; 13170b57cec5SDimitry Andric 1318480093f4SDimitry Andricdefm V_INTERP_P1_F32_e64 : VOP3Interp_Real_gfx10<0x200>; 1319480093f4SDimitry Andricdefm V_INTERP_P2_F32_e64 : VOP3Interp_Real_gfx10<0x201>; 1320480093f4SDimitry Andricdefm V_INTERP_MOV_F32_e64 : VOP3Interp_Real_gfx10<0x202>; 1321480093f4SDimitry Andric 13220b57cec5SDimitry Andricdefm V_INTERP_P1LL_F16 : VOP3Interp_Real_gfx10<0x342>; 13230b57cec5SDimitry Andricdefm V_INTERP_P1LV_F16 : VOP3Interp_Real_gfx10<0x343>; 13240b57cec5SDimitry Andricdefm V_INTERP_P2_F16 : VOP3Interp_Real_gfx10<0x35a>; 13250b57cec5SDimitry Andric 13260b57cec5SDimitry Andricdefm V_PACK_B32_F16 : VOP3OpSel_Real_gfx10<0x311>; 13270b57cec5SDimitry Andricdefm V_CVT_PKNORM_I16_F16 : VOP3OpSel_Real_gfx10<0x312>; 13280b57cec5SDimitry Andricdefm V_CVT_PKNORM_U16_F16 : VOP3OpSel_Real_gfx10<0x313>; 13290b57cec5SDimitry Andric 13300b57cec5SDimitry Andricdefm V_MIN3_F16 : VOP3OpSel_Real_gfx10<0x351>; 13310b57cec5SDimitry Andricdefm V_MIN3_I16 : VOP3OpSel_Real_gfx10<0x352>; 13320b57cec5SDimitry Andricdefm V_MIN3_U16 : VOP3OpSel_Real_gfx10<0x353>; 13330b57cec5SDimitry Andricdefm V_MAX3_F16 : VOP3OpSel_Real_gfx10<0x354>; 13340b57cec5SDimitry Andricdefm V_MAX3_I16 : VOP3OpSel_Real_gfx10<0x355>; 13350b57cec5SDimitry Andricdefm V_MAX3_U16 : VOP3OpSel_Real_gfx10<0x356>; 13360b57cec5SDimitry Andricdefm V_MED3_F16 : VOP3OpSel_Real_gfx10<0x357>; 13370b57cec5SDimitry Andricdefm V_MED3_I16 : VOP3OpSel_Real_gfx10<0x358>; 13380b57cec5SDimitry Andricdefm V_MED3_U16 : VOP3OpSel_Real_gfx10<0x359>; 13390b57cec5SDimitry Andricdefm V_MAD_U32_U16 : VOP3OpSel_Real_gfx10<0x373>; 13400b57cec5SDimitry Andricdefm V_MAD_I32_I16 : VOP3OpSel_Real_gfx10<0x375>; 13410b57cec5SDimitry Andric 13420b57cec5SDimitry Andricdefm V_MAD_U16 : 13430b57cec5SDimitry Andric VOP3OpSel_Real_gfx10_with_name<0x340, "V_MAD_U16_gfx9", "v_mad_u16">; 13440b57cec5SDimitry Andricdefm V_FMA_F16 : 13450b57cec5SDimitry Andric VOP3OpSel_Real_gfx10_with_name<0x34b, "V_FMA_F16_gfx9", "v_fma_f16">; 13460b57cec5SDimitry Andricdefm V_MAD_I16 : 13470b57cec5SDimitry Andric VOP3OpSel_Real_gfx10_with_name<0x35e, "V_MAD_I16_gfx9", "v_mad_i16">; 13480b57cec5SDimitry Andricdefm V_DIV_FIXUP_F16 : 13490b57cec5SDimitry Andric VOP3OpSel_Real_gfx10_with_name<0x35f, "V_DIV_FIXUP_F16_gfx9", "v_div_fixup_f16">; 13500b57cec5SDimitry Andric 135181ad6265SDimitry Andricdefm V_ADD_NC_U16 : VOP3OpSel_Real_gfx10<0x303>; 135281ad6265SDimitry Andricdefm V_SUB_NC_U16 : VOP3OpSel_Real_gfx10<0x304>; 135381ad6265SDimitry Andric 13540b57cec5SDimitry Andric// FIXME-GFX10-OPSEL: Need to add "selective" opsel support to some of these 13550b57cec5SDimitry Andric// (they do not support SDWA or DPP). 1356e8d8bef9SDimitry Andricdefm V_MUL_LO_U16 : VOP3_Real_gfx10_with_name<0x305, "V_MUL_LO_U16", "v_mul_lo_u16">; 1357e8d8bef9SDimitry Andricdefm V_LSHRREV_B16 : VOP3_Real_gfx10_with_name<0x307, "V_LSHRREV_B16", "v_lshrrev_b16">; 1358e8d8bef9SDimitry Andricdefm V_ASHRREV_I16 : VOP3_Real_gfx10_with_name<0x308, "V_ASHRREV_I16", "v_ashrrev_i16">; 1359e8d8bef9SDimitry Andricdefm V_MAX_U16 : VOP3_Real_gfx10_with_name<0x309, "V_MAX_U16", "v_max_u16">; 1360e8d8bef9SDimitry Andricdefm V_MAX_I16 : VOP3_Real_gfx10_with_name<0x30a, "V_MAX_I16", "v_max_i16">; 1361e8d8bef9SDimitry Andricdefm V_MIN_U16 : VOP3_Real_gfx10_with_name<0x30b, "V_MIN_U16", "v_min_u16">; 1362e8d8bef9SDimitry Andricdefm V_MIN_I16 : VOP3_Real_gfx10_with_name<0x30c, "V_MIN_I16", "v_min_i16">; 1363e8d8bef9SDimitry Andricdefm V_LSHLREV_B16 : VOP3_Real_gfx10_with_name<0x314, "V_LSHLREV_B16", "v_lshlrev_b16">; 13640b57cec5SDimitry Andricdefm V_PERMLANE16_B32 : VOP3OpSel_Real_gfx10<0x377>; 13650b57cec5SDimitry Andricdefm V_PERMLANEX16_B32 : VOP3OpSel_Real_gfx10<0x378>; 13660b57cec5SDimitry Andric 13670b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 13680b57cec5SDimitry Andric// GFX7, GFX10. 13690b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 13700b57cec5SDimitry Andric 13710b57cec5SDimitry Andriclet AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" in { 13720b57cec5SDimitry Andric multiclass VOP3_Real_gfx7<bits<10> op> { 13730b57cec5SDimitry Andric def _gfx7 : 1374e8d8bef9SDimitry Andric VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 1375e8d8bef9SDimitry Andric VOP3e_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 13760b57cec5SDimitry Andric } 13770b57cec5SDimitry Andric multiclass VOP3be_Real_gfx7<bits<10> op> { 13780b57cec5SDimitry Andric def _gfx7 : 1379e8d8bef9SDimitry Andric VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 1380e8d8bef9SDimitry Andric VOP3be_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 13810b57cec5SDimitry Andric } 13820b57cec5SDimitry Andric} // End AssemblerPredicate = isGFX7Only, DecoderNamespace = "GFX7" 13830b57cec5SDimitry Andric 13840b57cec5SDimitry Andricmulticlass VOP3_Real_gfx7_gfx10<bits<10> op> : 13850b57cec5SDimitry Andric VOP3_Real_gfx7<op>, VOP3_Real_gfx10<op>; 13860b57cec5SDimitry Andric 13870b57cec5SDimitry Andricmulticlass VOP3be_Real_gfx7_gfx10<bits<10> op> : 13880b57cec5SDimitry Andric VOP3be_Real_gfx7<op>, VOP3be_Real_gfx10<op>; 13890b57cec5SDimitry Andric 13900b57cec5SDimitry Andricdefm V_QSAD_PK_U16_U8 : VOP3_Real_gfx7_gfx10<0x172>; 13910b57cec5SDimitry Andricdefm V_MQSAD_U32_U8 : VOP3_Real_gfx7_gfx10<0x175>; 13920b57cec5SDimitry Andricdefm V_MAD_U64_U32 : VOP3be_Real_gfx7_gfx10<0x176>; 13930b57cec5SDimitry Andricdefm V_MAD_I64_I32 : VOP3be_Real_gfx7_gfx10<0x177>; 13940b57cec5SDimitry Andric 13950b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 13960b57cec5SDimitry Andric// GFX6, GFX7, GFX10. 13970b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 13980b57cec5SDimitry Andric 13990b57cec5SDimitry Andriclet AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 14000b57cec5SDimitry Andric multiclass VOP3_Real_gfx6_gfx7<bits<10> op> { 14010b57cec5SDimitry Andric def _gfx6_gfx7 : 1402e8d8bef9SDimitry Andric VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 1403e8d8bef9SDimitry Andric VOP3e_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 14040b57cec5SDimitry Andric } 14050b57cec5SDimitry Andric multiclass VOP3be_Real_gfx6_gfx7<bits<10> op> { 14060b57cec5SDimitry Andric def _gfx6_gfx7 : 1407e8d8bef9SDimitry Andric VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 1408e8d8bef9SDimitry Andric VOP3be_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 14090b57cec5SDimitry Andric } 14100b57cec5SDimitry Andric} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 14110b57cec5SDimitry Andric 14120b57cec5SDimitry Andricmulticlass VOP3_Real_gfx6_gfx7_gfx10<bits<10> op> : 14130b57cec5SDimitry Andric VOP3_Real_gfx6_gfx7<op>, VOP3_Real_gfx10<op>; 14140b57cec5SDimitry Andric 14150b57cec5SDimitry Andricmulticlass VOP3be_Real_gfx6_gfx7_gfx10<bits<10> op> : 14160b57cec5SDimitry Andric VOP3be_Real_gfx6_gfx7<op>, VOP3be_Real_gfx10<op>; 14170b57cec5SDimitry Andric 14180b57cec5SDimitry Andricdefm V_LSHL_B64 : VOP3_Real_gfx6_gfx7<0x161>; 14190b57cec5SDimitry Andricdefm V_LSHR_B64 : VOP3_Real_gfx6_gfx7<0x162>; 14200b57cec5SDimitry Andricdefm V_ASHR_I64 : VOP3_Real_gfx6_gfx7<0x163>; 1421e8d8bef9SDimitry Andricdefm V_MUL_LO_I32 : VOP3_Real_gfx6_gfx7<0x16b>; 14220b57cec5SDimitry Andric 14230b57cec5SDimitry Andricdefm V_MAD_LEGACY_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x140>; 14240b57cec5SDimitry Andricdefm V_MAD_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x141>; 14250b57cec5SDimitry Andricdefm V_MAD_I32_I24 : VOP3_Real_gfx6_gfx7_gfx10<0x142>; 14260b57cec5SDimitry Andricdefm V_MAD_U32_U24 : VOP3_Real_gfx6_gfx7_gfx10<0x143>; 14270b57cec5SDimitry Andricdefm V_CUBEID_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x144>; 14280b57cec5SDimitry Andricdefm V_CUBESC_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x145>; 14290b57cec5SDimitry Andricdefm V_CUBETC_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x146>; 14300b57cec5SDimitry Andricdefm V_CUBEMA_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x147>; 14310b57cec5SDimitry Andricdefm V_BFE_U32 : VOP3_Real_gfx6_gfx7_gfx10<0x148>; 14320b57cec5SDimitry Andricdefm V_BFE_I32 : VOP3_Real_gfx6_gfx7_gfx10<0x149>; 14330b57cec5SDimitry Andricdefm V_BFI_B32 : VOP3_Real_gfx6_gfx7_gfx10<0x14a>; 14340b57cec5SDimitry Andricdefm V_FMA_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x14b>; 14350b57cec5SDimitry Andricdefm V_FMA_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x14c>; 14360b57cec5SDimitry Andricdefm V_LERP_U8 : VOP3_Real_gfx6_gfx7_gfx10<0x14d>; 14370b57cec5SDimitry Andricdefm V_ALIGNBIT_B32 : VOP3_Real_gfx6_gfx7_gfx10<0x14e>; 14380b57cec5SDimitry Andricdefm V_ALIGNBYTE_B32 : VOP3_Real_gfx6_gfx7_gfx10<0x14f>; 14390b57cec5SDimitry Andricdefm V_MULLIT_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x150>; 14400b57cec5SDimitry Andricdefm V_MIN3_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x151>; 14410b57cec5SDimitry Andricdefm V_MIN3_I32 : VOP3_Real_gfx6_gfx7_gfx10<0x152>; 14420b57cec5SDimitry Andricdefm V_MIN3_U32 : VOP3_Real_gfx6_gfx7_gfx10<0x153>; 14430b57cec5SDimitry Andricdefm V_MAX3_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x154>; 14440b57cec5SDimitry Andricdefm V_MAX3_I32 : VOP3_Real_gfx6_gfx7_gfx10<0x155>; 14450b57cec5SDimitry Andricdefm V_MAX3_U32 : VOP3_Real_gfx6_gfx7_gfx10<0x156>; 14460b57cec5SDimitry Andricdefm V_MED3_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x157>; 14470b57cec5SDimitry Andricdefm V_MED3_I32 : VOP3_Real_gfx6_gfx7_gfx10<0x158>; 14480b57cec5SDimitry Andricdefm V_MED3_U32 : VOP3_Real_gfx6_gfx7_gfx10<0x159>; 14490b57cec5SDimitry Andricdefm V_SAD_U8 : VOP3_Real_gfx6_gfx7_gfx10<0x15a>; 14500b57cec5SDimitry Andricdefm V_SAD_HI_U8 : VOP3_Real_gfx6_gfx7_gfx10<0x15b>; 14510b57cec5SDimitry Andricdefm V_SAD_U16 : VOP3_Real_gfx6_gfx7_gfx10<0x15c>; 14520b57cec5SDimitry Andricdefm V_SAD_U32 : VOP3_Real_gfx6_gfx7_gfx10<0x15d>; 14530b57cec5SDimitry Andricdefm V_CVT_PK_U8_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x15e>; 14540b57cec5SDimitry Andricdefm V_DIV_FIXUP_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x15f>; 14550b57cec5SDimitry Andricdefm V_DIV_FIXUP_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x160>; 14560b57cec5SDimitry Andricdefm V_ADD_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x164>; 14570b57cec5SDimitry Andricdefm V_MUL_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x165>; 14580b57cec5SDimitry Andricdefm V_MIN_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x166>; 14590b57cec5SDimitry Andricdefm V_MAX_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x167>; 14600b57cec5SDimitry Andricdefm V_LDEXP_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x168>; 14610b57cec5SDimitry Andricdefm V_MUL_LO_U32 : VOP3_Real_gfx6_gfx7_gfx10<0x169>; 14620b57cec5SDimitry Andricdefm V_MUL_HI_U32 : VOP3_Real_gfx6_gfx7_gfx10<0x16a>; 14630b57cec5SDimitry Andricdefm V_MUL_HI_I32 : VOP3_Real_gfx6_gfx7_gfx10<0x16c>; 14640b57cec5SDimitry Andricdefm V_DIV_FMAS_F32 : VOP3_Real_gfx6_gfx7_gfx10<0x16f>; 14650b57cec5SDimitry Andricdefm V_DIV_FMAS_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x170>; 14660b57cec5SDimitry Andricdefm V_MSAD_U8 : VOP3_Real_gfx6_gfx7_gfx10<0x171>; 14670b57cec5SDimitry Andricdefm V_MQSAD_PK_U16_U8 : VOP3_Real_gfx6_gfx7_gfx10<0x173>; 14680b57cec5SDimitry Andricdefm V_TRIG_PREOP_F64 : VOP3_Real_gfx6_gfx7_gfx10<0x174>; 14690b57cec5SDimitry Andricdefm V_DIV_SCALE_F32 : VOP3be_Real_gfx6_gfx7_gfx10<0x16d>; 14700b57cec5SDimitry Andricdefm V_DIV_SCALE_F64 : VOP3be_Real_gfx6_gfx7_gfx10<0x16e>; 14710b57cec5SDimitry Andric 14725ffd83dbSDimitry Andric// NB: Same opcode as v_mad_legacy_f32 14735ffd83dbSDimitry Andriclet DecoderNamespace = "GFX10_B" in 14745ffd83dbSDimitry Andricdefm V_FMA_LEGACY_F32 : VOP3_Real_gfx10<0x140>; 14755ffd83dbSDimitry Andric 14760b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 14770b57cec5SDimitry Andric// GFX8, GFX9 (VI). 14780b57cec5SDimitry Andric//===----------------------------------------------------------------------===// 14790b57cec5SDimitry Andric 1480480093f4SDimitry Andriclet AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 14810b57cec5SDimitry Andric 14820b57cec5SDimitry Andricmulticlass VOP3_Real_vi<bits<10> op> { 1483e8d8bef9SDimitry Andric def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 1484e8d8bef9SDimitry Andric VOP3e_vi <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl>; 1485e8d8bef9SDimitry Andric} 1486e8d8bef9SDimitry Andricmulticlass VOP3_Real_No_Suffix_vi<bits<10> op> { 14870b57cec5SDimitry Andric def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>, 14880b57cec5SDimitry Andric VOP3e_vi <op, !cast<VOP_Pseudo>(NAME).Pfl>; 14890b57cec5SDimitry Andric} 14900b57cec5SDimitry Andric 14910b57cec5SDimitry Andricmulticlass VOP3be_Real_vi<bits<10> op> { 1492e8d8bef9SDimitry Andric def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 1493e8d8bef9SDimitry Andric VOP3be_vi <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl>; 14940b57cec5SDimitry Andric} 14950b57cec5SDimitry Andric 14960b57cec5SDimitry Andricmulticlass VOP3OpSel_Real_gfx9<bits<10> op> { 1497e8d8bef9SDimitry Andric def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 1498e8d8bef9SDimitry Andric VOP3OpSel_gfx9 <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl>; 14990b57cec5SDimitry Andric} 15000b57cec5SDimitry Andric 1501fcaf7f86SDimitry Andricmulticlass VOP3OpSel_Real_gfx9_forced_opsel2<bits<10> op> { 1502fcaf7f86SDimitry Andric def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 1503fcaf7f86SDimitry Andric VOP3OpSel_gfx9 <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl> { 1504fcaf7f86SDimitry Andric let Inst{13} = src2_modifiers{2}; // op_sel(2) 1505fcaf7f86SDimitry Andric } 1506fcaf7f86SDimitry Andric} 1507fcaf7f86SDimitry Andric 15080b57cec5SDimitry Andricmulticlass VOP3Interp_Real_vi<bits<10> op> { 15090b57cec5SDimitry Andric def _vi : VOP3_Real<!cast<VOP_Pseudo>(NAME), SIEncodingFamily.VI>, 15100b57cec5SDimitry Andric VOP3Interp_vi <op, !cast<VOP_Pseudo>(NAME).Pfl>; 15110b57cec5SDimitry Andric} 15120b57cec5SDimitry Andric 1513480093f4SDimitry Andric} // End AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" 15140b57cec5SDimitry Andric 1515480093f4SDimitry Andriclet AssemblerPredicate = isGFX8Only, DecoderNamespace = "GFX8" in { 15160b57cec5SDimitry Andric 15170b57cec5SDimitry Andricmulticlass VOP3_F16_Real_vi<bits<10> op> { 1518e8d8bef9SDimitry Andric def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 1519e8d8bef9SDimitry Andric VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 15200b57cec5SDimitry Andric} 15210b57cec5SDimitry Andric 15220b57cec5SDimitry Andricmulticlass VOP3Interp_F16_Real_vi<bits<10> op> { 15230b57cec5SDimitry Andric def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>, 15240b57cec5SDimitry Andric VOP3Interp_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl>; 15250b57cec5SDimitry Andric} 15260b57cec5SDimitry Andric 1527480093f4SDimitry Andric} // End AssemblerPredicate = isGFX8Only, DecoderNamespace = "GFX8" 15280b57cec5SDimitry Andric 1529480093f4SDimitry Andriclet AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in { 15300b57cec5SDimitry Andric 15310b57cec5SDimitry Andricmulticlass VOP3_F16_Real_gfx9<bits<10> op, string OpName, string AsmName> { 1532e8d8bef9SDimitry Andric def _gfx9 : VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.GFX9>, 1533e8d8bef9SDimitry Andric VOP3e_vi <op, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> { 1534e8d8bef9SDimitry Andric VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64"); 15350b57cec5SDimitry Andric let AsmString = AsmName # ps.AsmOperands; 15360b57cec5SDimitry Andric } 15370b57cec5SDimitry Andric} 15380b57cec5SDimitry Andric 15390b57cec5SDimitry Andricmulticlass VOP3OpSel_F16_Real_gfx9<bits<10> op, string AsmName> { 1540e8d8bef9SDimitry Andric def _gfx9 : VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>, 1541e8d8bef9SDimitry Andric VOP3OpSel_gfx9 <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 1542e8d8bef9SDimitry Andric VOP3_Pseudo ps = !cast<VOP3_Pseudo>(NAME#"_e64"); 15430b57cec5SDimitry Andric let AsmString = AsmName # ps.AsmOperands; 15440b57cec5SDimitry Andric } 15450b57cec5SDimitry Andric} 15460b57cec5SDimitry Andric 15470b57cec5SDimitry Andricmulticlass VOP3Interp_F16_Real_gfx9<bits<10> op, string OpName, string AsmName> { 15480b57cec5SDimitry Andric def _gfx9 : VOP3_Real<!cast<VOP3_Pseudo>(OpName), SIEncodingFamily.GFX9>, 15490b57cec5SDimitry Andric VOP3Interp_vi <op, !cast<VOP3_Pseudo>(OpName).Pfl> { 15500b57cec5SDimitry Andric VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName); 15510b57cec5SDimitry Andric let AsmString = AsmName # ps.AsmOperands; 15520b57cec5SDimitry Andric } 15530b57cec5SDimitry Andric} 15540b57cec5SDimitry Andric 15550b57cec5SDimitry Andricmulticlass VOP3_Real_gfx9<bits<10> op, string AsmName> { 1556e8d8bef9SDimitry Andric def _gfx9 : VOP3_Real<!cast<VOP_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>, 1557e8d8bef9SDimitry Andric VOP3e_vi <op, !cast<VOP_Pseudo>(NAME#"_e64").Pfl> { 1558e8d8bef9SDimitry Andric VOP_Pseudo ps = !cast<VOP_Pseudo>(NAME#"_e64"); 15590b57cec5SDimitry Andric let AsmString = AsmName # ps.AsmOperands; 15600b57cec5SDimitry Andric } 15610b57cec5SDimitry Andric} 15620b57cec5SDimitry Andric 1563480093f4SDimitry Andric} // End AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" 15640b57cec5SDimitry Andric 15650b57cec5SDimitry Andricdefm V_MAD_U64_U32 : VOP3be_Real_vi <0x1E8>; 15660b57cec5SDimitry Andricdefm V_MAD_I64_I32 : VOP3be_Real_vi <0x1E9>; 15670b57cec5SDimitry Andric 15680b57cec5SDimitry Andricdefm V_MAD_LEGACY_F32 : VOP3_Real_vi <0x1c0>; 15690b57cec5SDimitry Andricdefm V_MAD_F32 : VOP3_Real_vi <0x1c1>; 15700b57cec5SDimitry Andricdefm V_MAD_I32_I24 : VOP3_Real_vi <0x1c2>; 15710b57cec5SDimitry Andricdefm V_MAD_U32_U24 : VOP3_Real_vi <0x1c3>; 15720b57cec5SDimitry Andricdefm V_CUBEID_F32 : VOP3_Real_vi <0x1c4>; 15730b57cec5SDimitry Andricdefm V_CUBESC_F32 : VOP3_Real_vi <0x1c5>; 15740b57cec5SDimitry Andricdefm V_CUBETC_F32 : VOP3_Real_vi <0x1c6>; 15750b57cec5SDimitry Andricdefm V_CUBEMA_F32 : VOP3_Real_vi <0x1c7>; 15760b57cec5SDimitry Andricdefm V_BFE_U32 : VOP3_Real_vi <0x1c8>; 15770b57cec5SDimitry Andricdefm V_BFE_I32 : VOP3_Real_vi <0x1c9>; 15780b57cec5SDimitry Andricdefm V_BFI_B32 : VOP3_Real_vi <0x1ca>; 15790b57cec5SDimitry Andricdefm V_FMA_F32 : VOP3_Real_vi <0x1cb>; 15800b57cec5SDimitry Andricdefm V_FMA_F64 : VOP3_Real_vi <0x1cc>; 15810b57cec5SDimitry Andricdefm V_LERP_U8 : VOP3_Real_vi <0x1cd>; 15820b57cec5SDimitry Andricdefm V_ALIGNBIT_B32 : VOP3_Real_vi <0x1ce>; 15830b57cec5SDimitry Andricdefm V_ALIGNBYTE_B32 : VOP3_Real_vi <0x1cf>; 15840b57cec5SDimitry Andricdefm V_MIN3_F32 : VOP3_Real_vi <0x1d0>; 15850b57cec5SDimitry Andricdefm V_MIN3_I32 : VOP3_Real_vi <0x1d1>; 15860b57cec5SDimitry Andricdefm V_MIN3_U32 : VOP3_Real_vi <0x1d2>; 15870b57cec5SDimitry Andricdefm V_MAX3_F32 : VOP3_Real_vi <0x1d3>; 15880b57cec5SDimitry Andricdefm V_MAX3_I32 : VOP3_Real_vi <0x1d4>; 15890b57cec5SDimitry Andricdefm V_MAX3_U32 : VOP3_Real_vi <0x1d5>; 15900b57cec5SDimitry Andricdefm V_MED3_F32 : VOP3_Real_vi <0x1d6>; 15910b57cec5SDimitry Andricdefm V_MED3_I32 : VOP3_Real_vi <0x1d7>; 15920b57cec5SDimitry Andricdefm V_MED3_U32 : VOP3_Real_vi <0x1d8>; 15930b57cec5SDimitry Andricdefm V_SAD_U8 : VOP3_Real_vi <0x1d9>; 15940b57cec5SDimitry Andricdefm V_SAD_HI_U8 : VOP3_Real_vi <0x1da>; 15950b57cec5SDimitry Andricdefm V_SAD_U16 : VOP3_Real_vi <0x1db>; 15960b57cec5SDimitry Andricdefm V_SAD_U32 : VOP3_Real_vi <0x1dc>; 15970b57cec5SDimitry Andricdefm V_CVT_PK_U8_F32 : VOP3_Real_vi <0x1dd>; 15980b57cec5SDimitry Andricdefm V_DIV_FIXUP_F32 : VOP3_Real_vi <0x1de>; 15990b57cec5SDimitry Andricdefm V_DIV_FIXUP_F64 : VOP3_Real_vi <0x1df>; 16000b57cec5SDimitry Andricdefm V_DIV_SCALE_F32 : VOP3be_Real_vi <0x1e0>; 16010b57cec5SDimitry Andricdefm V_DIV_SCALE_F64 : VOP3be_Real_vi <0x1e1>; 16020b57cec5SDimitry Andricdefm V_DIV_FMAS_F32 : VOP3_Real_vi <0x1e2>; 16030b57cec5SDimitry Andricdefm V_DIV_FMAS_F64 : VOP3_Real_vi <0x1e3>; 16040b57cec5SDimitry Andricdefm V_MSAD_U8 : VOP3_Real_vi <0x1e4>; 16050b57cec5SDimitry Andricdefm V_QSAD_PK_U16_U8 : VOP3_Real_vi <0x1e5>; 16060b57cec5SDimitry Andricdefm V_MQSAD_PK_U16_U8 : VOP3_Real_vi <0x1e6>; 16070b57cec5SDimitry Andricdefm V_MQSAD_U32_U8 : VOP3_Real_vi <0x1e7>; 16080b57cec5SDimitry Andric 16090b57cec5SDimitry Andricdefm V_PERM_B32 : VOP3_Real_vi <0x1ed>; 16100b57cec5SDimitry Andric 16110b57cec5SDimitry Andricdefm V_MAD_F16 : VOP3_F16_Real_vi <0x1ea>; 16120b57cec5SDimitry Andricdefm V_MAD_U16 : VOP3_F16_Real_vi <0x1eb>; 16130b57cec5SDimitry Andricdefm V_MAD_I16 : VOP3_F16_Real_vi <0x1ec>; 16140b57cec5SDimitry Andricdefm V_FMA_F16 : VOP3_F16_Real_vi <0x1ee>; 16150b57cec5SDimitry Andricdefm V_DIV_FIXUP_F16 : VOP3_F16_Real_vi <0x1ef>; 16160b57cec5SDimitry Andricdefm V_INTERP_P2_F16 : VOP3Interp_F16_Real_vi <0x276>; 16170b57cec5SDimitry Andric 16180b57cec5SDimitry Andriclet FPDPRounding = 1 in { 16190b57cec5SDimitry Andricdefm V_MAD_LEGACY_F16 : VOP3_F16_Real_gfx9 <0x1ea, "V_MAD_F16", "v_mad_legacy_f16">; 16200b57cec5SDimitry Andricdefm V_FMA_LEGACY_F16 : VOP3_F16_Real_gfx9 <0x1ee, "V_FMA_F16", "v_fma_legacy_f16">; 16210b57cec5SDimitry Andricdefm V_DIV_FIXUP_LEGACY_F16 : VOP3_F16_Real_gfx9 <0x1ef, "V_DIV_FIXUP_F16", "v_div_fixup_legacy_f16">; 16220b57cec5SDimitry Andricdefm V_INTERP_P2_LEGACY_F16 : VOP3Interp_F16_Real_gfx9 <0x276, "V_INTERP_P2_F16", "v_interp_p2_legacy_f16">; 16230b57cec5SDimitry Andric} // End FPDPRounding = 1 16240b57cec5SDimitry Andric 16250b57cec5SDimitry Andricdefm V_MAD_LEGACY_U16 : VOP3_F16_Real_gfx9 <0x1eb, "V_MAD_U16", "v_mad_legacy_u16">; 16260b57cec5SDimitry Andricdefm V_MAD_LEGACY_I16 : VOP3_F16_Real_gfx9 <0x1ec, "V_MAD_I16", "v_mad_legacy_i16">; 16270b57cec5SDimitry Andric 16280b57cec5SDimitry Andricdefm V_MAD_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x203, "v_mad_f16">; 16290b57cec5SDimitry Andricdefm V_MAD_U16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x204, "v_mad_u16">; 16300b57cec5SDimitry Andricdefm V_MAD_I16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x205, "v_mad_i16">; 16310b57cec5SDimitry Andricdefm V_FMA_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x206, "v_fma_f16">; 16320b57cec5SDimitry Andricdefm V_DIV_FIXUP_F16_gfx9 : VOP3OpSel_F16_Real_gfx9 <0x207, "v_div_fixup_f16">; 16330b57cec5SDimitry Andricdefm V_INTERP_P2_F16_gfx9 : VOP3Interp_F16_Real_gfx9 <0x277, "V_INTERP_P2_F16_gfx9", "v_interp_p2_f16">; 16340b57cec5SDimitry Andric 1635e8d8bef9SDimitry Andricdefm V_ADD_I32 : VOP3_Real_vi <0x29c>; 1636e8d8bef9SDimitry Andricdefm V_SUB_I32 : VOP3_Real_vi <0x29d>; 16370b57cec5SDimitry Andric 16380b57cec5SDimitry Andricdefm V_INTERP_P1_F32_e64 : VOP3Interp_Real_vi <0x270>; 16390b57cec5SDimitry Andricdefm V_INTERP_P2_F32_e64 : VOP3Interp_Real_vi <0x271>; 16400b57cec5SDimitry Andricdefm V_INTERP_MOV_F32_e64 : VOP3Interp_Real_vi <0x272>; 16410b57cec5SDimitry Andric 16420b57cec5SDimitry Andricdefm V_INTERP_P1LL_F16 : VOP3Interp_Real_vi <0x274>; 16430b57cec5SDimitry Andricdefm V_INTERP_P1LV_F16 : VOP3Interp_Real_vi <0x275>; 16440b57cec5SDimitry Andricdefm V_ADD_F64 : VOP3_Real_vi <0x280>; 16450b57cec5SDimitry Andricdefm V_MUL_F64 : VOP3_Real_vi <0x281>; 16460b57cec5SDimitry Andricdefm V_MIN_F64 : VOP3_Real_vi <0x282>; 16470b57cec5SDimitry Andricdefm V_MAX_F64 : VOP3_Real_vi <0x283>; 16480b57cec5SDimitry Andricdefm V_LDEXP_F64 : VOP3_Real_vi <0x284>; 16490b57cec5SDimitry Andricdefm V_MUL_LO_U32 : VOP3_Real_vi <0x285>; 16500b57cec5SDimitry Andric 16510b57cec5SDimitry Andric// removed from VI as identical to V_MUL_LO_U32 16520b57cec5SDimitry Andriclet isAsmParserOnly = 1 in { 16530b57cec5SDimitry Andricdefm V_MUL_LO_I32 : VOP3_Real_vi <0x285>; 16540b57cec5SDimitry Andric} 16550b57cec5SDimitry Andric 16560b57cec5SDimitry Andricdefm V_MUL_HI_U32 : VOP3_Real_vi <0x286>; 16570b57cec5SDimitry Andricdefm V_MUL_HI_I32 : VOP3_Real_vi <0x287>; 16580b57cec5SDimitry Andric 1659e8d8bef9SDimitry Andricdefm V_READLANE_B32 : VOP3_Real_No_Suffix_vi <0x289>; 1660e8d8bef9SDimitry Andricdefm V_WRITELANE_B32 : VOP3_Real_No_Suffix_vi <0x28a>; 16610b57cec5SDimitry Andric 16620b57cec5SDimitry Andricdefm V_LSHLREV_B64 : VOP3_Real_vi <0x28f>; 16630b57cec5SDimitry Andricdefm V_LSHRREV_B64 : VOP3_Real_vi <0x290>; 16640b57cec5SDimitry Andricdefm V_ASHRREV_I64 : VOP3_Real_vi <0x291>; 16650b57cec5SDimitry Andricdefm V_TRIG_PREOP_F64 : VOP3_Real_vi <0x292>; 16660b57cec5SDimitry Andric 16670b57cec5SDimitry Andricdefm V_LSHL_ADD_U32 : VOP3_Real_vi <0x1fd>; 16680b57cec5SDimitry Andricdefm V_ADD_LSHL_U32 : VOP3_Real_vi <0x1fe>; 16690b57cec5SDimitry Andricdefm V_ADD3_U32 : VOP3_Real_vi <0x1ff>; 16700b57cec5SDimitry Andricdefm V_LSHL_OR_B32 : VOP3_Real_vi <0x200>; 16710b57cec5SDimitry Andricdefm V_AND_OR_B32 : VOP3_Real_vi <0x201>; 16720b57cec5SDimitry Andricdefm V_OR3_B32 : VOP3_Real_vi <0x202>; 16730b57cec5SDimitry Andricdefm V_PACK_B32_F16 : VOP3OpSel_Real_gfx9 <0x2a0>; 16740b57cec5SDimitry Andric 16750b57cec5SDimitry Andricdefm V_XAD_U32 : VOP3_Real_vi <0x1f3>; 16760b57cec5SDimitry Andric 16770b57cec5SDimitry Andricdefm V_MIN3_F16 : VOP3OpSel_Real_gfx9 <0x1f4>; 16780b57cec5SDimitry Andricdefm V_MIN3_I16 : VOP3OpSel_Real_gfx9 <0x1f5>; 16790b57cec5SDimitry Andricdefm V_MIN3_U16 : VOP3OpSel_Real_gfx9 <0x1f6>; 16800b57cec5SDimitry Andric 16810b57cec5SDimitry Andricdefm V_MAX3_F16 : VOP3OpSel_Real_gfx9 <0x1f7>; 16820b57cec5SDimitry Andricdefm V_MAX3_I16 : VOP3OpSel_Real_gfx9 <0x1f8>; 16830b57cec5SDimitry Andricdefm V_MAX3_U16 : VOP3OpSel_Real_gfx9 <0x1f9>; 16840b57cec5SDimitry Andric 16850b57cec5SDimitry Andricdefm V_MED3_F16 : VOP3OpSel_Real_gfx9 <0x1fa>; 16860b57cec5SDimitry Andricdefm V_MED3_I16 : VOP3OpSel_Real_gfx9 <0x1fb>; 16870b57cec5SDimitry Andricdefm V_MED3_U16 : VOP3OpSel_Real_gfx9 <0x1fc>; 16880b57cec5SDimitry Andric 16890b57cec5SDimitry Andricdefm V_ADD_I16 : VOP3OpSel_Real_gfx9 <0x29e>; 16900b57cec5SDimitry Andricdefm V_SUB_I16 : VOP3OpSel_Real_gfx9 <0x29f>; 16910b57cec5SDimitry Andric 16920b57cec5SDimitry Andricdefm V_MAD_U32_U16 : VOP3OpSel_Real_gfx9 <0x1f1>; 16930b57cec5SDimitry Andricdefm V_MAD_I32_I16 : VOP3OpSel_Real_gfx9 <0x1f2>; 16940b57cec5SDimitry Andric 16950b57cec5SDimitry Andricdefm V_CVT_PKNORM_I16_F16 : VOP3OpSel_Real_gfx9 <0x299>; 16960b57cec5SDimitry Andricdefm V_CVT_PKNORM_U16_F16 : VOP3OpSel_Real_gfx9 <0x29a>; 169781ad6265SDimitry Andric 169881ad6265SDimitry Andricdefm V_LSHL_ADD_U64 : VOP3_Real_vi <0x208>; 1699fcaf7f86SDimitry Andric 1700fcaf7f86SDimitry Andricdefm V_CVT_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x2a2>; 1701fcaf7f86SDimitry Andricdefm V_CVT_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x2a3>; 1702fcaf7f86SDimitry Andricdefm V_CVT_SR_FP8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a4>; 1703fcaf7f86SDimitry Andricdefm V_CVT_SR_BF8_F32 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x2a5>; 1704