1//===-- VOP2Instructions.td - Vector Instruction Definitions --------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10// VOP2 Classes 11//===----------------------------------------------------------------------===// 12 13class VOP2e <bits<6> op, VOPProfile P> : Enc32 { 14 bits<8> vdst; 15 bits<9> src0; 16 bits<8> src1; 17 18 let Inst{8-0} = !if(P.HasSrc0, src0, 0); 19 let Inst{16-9} = !if(P.HasSrc1, src1, 0); 20 let Inst{24-17} = !if(P.EmitDst, vdst, 0); 21 let Inst{30-25} = op; 22 let Inst{31} = 0x0; //encoding 23} 24 25class VOP2_MADKe <bits<6> op, VOPProfile P> : Enc64 { 26 bits<8> vdst; 27 bits<9> src0; 28 bits<8> src1; 29 bits<32> imm; 30 31 let Inst{8-0} = !if(P.HasSrc0, src0, 0); 32 let Inst{16-9} = !if(P.HasSrc1, src1, 0); 33 let Inst{24-17} = !if(P.EmitDst, vdst, 0); 34 let Inst{30-25} = op; 35 let Inst{31} = 0x0; // encoding 36 let Inst{63-32} = imm; 37} 38 39class VOP2_SDWAe <bits<6> op, VOPProfile P> : VOP_SDWAe <P> { 40 bits<8> vdst; 41 bits<8> src1; 42 43 let Inst{8-0} = 0xf9; // sdwa 44 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 45 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 46 let Inst{30-25} = op; 47 let Inst{31} = 0x0; // encoding 48} 49 50class VOP2_SDWA9Ae <bits<6> op, VOPProfile P> : VOP_SDWA9Ae <P> { 51 bits<8> vdst; 52 bits<9> src1; 53 54 let Inst{8-0} = 0xf9; // sdwa 55 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 56 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 57 let Inst{30-25} = op; 58 let Inst{31} = 0x0; // encoding 59 let Inst{63} = !if(P.HasSrc1, src1{8}, 0); // src1_sgpr 60} 61 62class VOP2_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], string suffix = "_e32"> : 63 VOP_Pseudo <opName, suffix, P, P.Outs32, P.Ins32, "", pattern> { 64 65 let AsmOperands = P.Asm32; 66 67 let Size = 4; 68 let mayLoad = 0; 69 let mayStore = 0; 70 let hasSideEffects = 0; 71 72 let ReadsModeReg = !or(isFloatType<P.DstVT>.ret, isFloatType<P.Src0VT>.ret); 73 74 let mayRaiseFPException = ReadsModeReg; 75 76 let VOP2 = 1; 77 let VALU = 1; 78 let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]); 79 80 let AsmVariantName = AMDGPUAsmVariants.Default; 81} 82 83class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemonic> : 84 VOP_Real <ps>, 85 InstSI <ps.OutOperandList, ps.InOperandList, real_name # ps.AsmOperands, []>, 86 SIMCInstr <ps.PseudoInstr, EncodingFamily> { 87 88 let VALU = 1; 89 let VOP2 = 1; 90 let isPseudo = 0; 91 let isCodeGenOnly = 0; 92 93 let Constraints = ps.Constraints; 94 let DisableEncoding = ps.DisableEncoding; 95 96 // copy relevant pseudo op flags 97 let SubtargetPredicate = ps.SubtargetPredicate; 98 let OtherPredicates = ps.OtherPredicates; 99 let AsmMatchConverter = ps.AsmMatchConverter; 100 let AsmVariantName = ps.AsmVariantName; 101 let Constraints = ps.Constraints; 102 let DisableEncoding = ps.DisableEncoding; 103 let TSFlags = ps.TSFlags; 104 let UseNamedOperandTable = ps.UseNamedOperandTable; 105 let Uses = ps.Uses; 106 let Defs = ps.Defs; 107 let SchedRW = ps.SchedRW; 108 let mayLoad = ps.mayLoad; 109 let mayStore = ps.mayStore; 110} 111 112class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 113 VOP_SDWA_Pseudo <OpName, P, pattern> { 114 let AsmMatchConverter = "cvtSdwaVOP2"; 115} 116 117class VOP2_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 118 VOP_DPP_Pseudo <OpName, P, pattern> { 119} 120 121 122class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { 123 list<dag> ret = !if(P.HasModifiers, 124 [(set P.DstVT:$vdst, 125 (node (P.Src0VT 126 !if(P.HasOMod, 127 (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), 128 (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))), 129 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], 130 [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]); 131} 132 133multiclass VOP2Inst_e32<string opName, 134 VOPProfile P, 135 SDPatternOperator node = null_frag, 136 string revOp = opName, 137 bit GFX9Renamed = 0> { 138 let renamedInGFX9 = GFX9Renamed in { 139 def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>, 140 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>; 141 } // End renamedInGFX9 = GFX9Renamed 142} 143multiclass 144 VOP2Inst_e32_VOPD<string opName, VOPProfile P, bits<5> VOPDOp, 145 string VOPDName, SDPatternOperator node = null_frag, 146 string revOp = opName, bit GFX9Renamed = 0> { 147 defm NAME : VOP2Inst_e32<opName, P, node, revOp, GFX9Renamed>, 148 VOPD_Component<VOPDOp, VOPDName>; 149} 150multiclass VOP2Inst_e64<string opName, 151 VOPProfile P, 152 SDPatternOperator node = null_frag, 153 string revOp = opName, 154 bit GFX9Renamed = 0> { 155 let renamedInGFX9 = GFX9Renamed in { 156 def _e64 : VOP3InstBase <opName, P, node, 1>, 157 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; 158 159 let SubtargetPredicate = isGFX11Plus in { 160 foreach _ = BoolToList<P.HasExtVOP3DPP>.ret in 161 def _e64_dpp : VOP3_DPP_Pseudo <opName, P>; 162 } // End SubtargetPredicate = isGFX11Plus 163 } // End renamedInGFX9 = GFX9Renamed 164} 165 166multiclass VOP2Inst_sdwa<string opName, 167 VOPProfile P, 168 bit GFX9Renamed = 0> { 169 let renamedInGFX9 = GFX9Renamed in { 170 foreach _ = BoolToList<P.HasExtSDWA>.ret in 171 def _sdwa : VOP2_SDWA_Pseudo <opName, P>; 172 } // End renamedInGFX9 = GFX9Renamed 173} 174 175multiclass VOP2Inst<string opName, 176 VOPProfile P, 177 SDPatternOperator node = null_frag, 178 string revOp = opName, 179 bit GFX9Renamed = 0> : 180 VOP2Inst_e32<opName, P, node, revOp, GFX9Renamed>, 181 VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>, 182 VOP2Inst_sdwa<opName, P, GFX9Renamed> { 183 let renamedInGFX9 = GFX9Renamed in { 184 foreach _ = BoolToList<P.HasExtDPP>.ret in 185 def _dpp : VOP2_DPP_Pseudo <opName, P>; 186 } 187} 188 189multiclass VOP2Inst_VOPD<string opName, 190 VOPProfile P, 191 bits<5> VOPDOp, 192 string VOPDName, 193 SDPatternOperator node = null_frag, 194 string revOp = opName, 195 bit GFX9Renamed = 0> : 196 VOP2Inst_e32_VOPD<opName, P, VOPDOp, VOPDName, node, revOp, GFX9Renamed>, 197 VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>, 198 VOP2Inst_sdwa<opName, P, GFX9Renamed> { 199 let renamedInGFX9 = GFX9Renamed in { 200 foreach _ = BoolToList<P.HasExtDPP>.ret in 201 def _dpp : VOP2_DPP_Pseudo <opName, P>; 202 } 203} 204 205multiclass VOP2bInst <string opName, 206 VOPProfile P, 207 SDPatternOperator node = null_frag, 208 string revOp = opName, 209 bit GFX9Renamed = 0, 210 bit useSGPRInput = !eq(P.NumSrcArgs, 3)> { 211 let renamedInGFX9 = GFX9Renamed in { 212 let SchedRW = [Write32Bit, WriteSALU] in { 213 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in { 214 def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>, 215 Commutable_REV<revOp#"_e32", !eq(revOp, opName)> { 216 let usesCustomInserter = true; 217 } 218 219 foreach _ = BoolToList<P.HasExtSDWA>.ret in 220 def _sdwa : VOP2_SDWA_Pseudo <opName, P> { 221 let AsmMatchConverter = "cvtSdwaVOP2b"; 222 } 223 foreach _ = BoolToList<P.HasExtDPP>.ret in 224 def _dpp : VOP2_DPP_Pseudo <opName, P>; 225 } // End Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] 226 227 def _e64 : VOP3InstBase <opName, P, node, 1>, 228 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; 229 230 let SubtargetPredicate = isGFX11Plus in { 231 foreach _ = BoolToList<P.HasExtVOP3DPP>.ret in 232 def _e64_dpp : VOP3_DPP_Pseudo <opName, P>; 233 } // End SubtargetPredicate = isGFX11Plus 234 } 235 } 236} 237 238class VOP2bInstAlias <VOP2_Pseudo ps, Instruction inst, 239 string OpName, string opnd> : 240 InstAlias <OpName#" "#!subst("vcc", opnd, ps.Pfl.Asm32), 241 (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0, 242 ps.Pfl.Src1RC32:$src1)>, 243 PredicateControl { 244} 245 246multiclass VOP2bInstAliases<VOP2_Pseudo ps, VOP2_Real inst, string OpName> { 247 let WaveSizePredicate = isWave32 in { 248 def : VOP2bInstAlias<ps, inst, OpName, "vcc_lo">; 249 } 250 let WaveSizePredicate = isWave64 in { 251 def : VOP2bInstAlias<ps, inst, OpName, "vcc">; 252 } 253} 254 255multiclass 256 VOP2eInst_Base<string opName, VOPProfile P, bits<5> VOPDOp, string VOPDName, 257 SDPatternOperator node, string revOp, bit useSGPRInput> { 258 259 let SchedRW = [Write32Bit] in { 260 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in { 261 if !eq(VOPDOp, -1) then 262 def _e32 : VOP2_Pseudo <opName, P>, 263 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>; 264 else 265 def _e32 : VOP2_Pseudo <opName, P>, 266 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>, 267 VOPD_Component<VOPDOp, VOPDName>; 268 269 foreach _ = BoolToList<P.HasExtSDWA>.ret in 270 def _sdwa : VOP2_SDWA_Pseudo <opName, P> { 271 let AsmMatchConverter = "cvtSdwaVOP2e"; 272 } 273 274 foreach _ = BoolToList<P.HasExtDPP>.ret in 275 def _dpp : VOP2_DPP_Pseudo <opName, P>; 276 } 277 278 def _e64 : VOP3InstBase <opName, P, node, 1>, 279 Commutable_REV<revOp#"_e64", !eq(revOp, opName)> { 280 let isReMaterializable = 1; 281 } 282 283 let SubtargetPredicate = isGFX11Plus in { 284 foreach _ = BoolToList<P.HasExtVOP3DPP>.ret in 285 def _e64_dpp : VOP3_DPP_Pseudo <opName, P>; 286 } // End SubtargetPredicate = isGFX11Plus 287 } 288} 289 290multiclass 291 VOP2eInst<string opName, VOPProfile P, SDPatternOperator node = null_frag, 292 string revOp = opName, bit useSGPRInput = !eq(P.NumSrcArgs, 3)> 293 : VOP2eInst_Base<opName, P, -1, "", node, revOp, useSGPRInput>; 294 295multiclass 296 VOP2eInst_VOPD<string opName, VOPProfile P, bits<5> VOPDOp, string VOPDName, 297 SDPatternOperator node = null_frag, string revOp = opName, 298 bit useSGPRInput = !eq(P.NumSrcArgs, 3)> 299 : VOP2eInst_Base<opName, P, VOPDOp, VOPDName, node, revOp, useSGPRInput>; 300 301class VOP2eInstAlias <VOP2_Pseudo ps, Instruction inst, string opnd = ""> : 302 InstAlias <ps.OpName#" "#ps.Pfl.Asm32#", "#opnd, 303 (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0, 304 ps.Pfl.Src1RC32:$src1)>, PredicateControl; 305 306class VOP2e64InstAlias <VOP3_Pseudo ps, Instruction inst> : 307 InstAlias <ps.OpName#" "#ps.Pfl.Asm64, 308 (inst ps.Pfl.DstRC:$vdst, VOPDstS64orS32:$sdst, 309 ps.Pfl.Src0RC32:$src0, ps.Pfl.Src1RC32:$src1, clampmod:$clamp)>, 310 PredicateControl; 311 312multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> { 313 let WaveSizePredicate = isWave32 in { 314 def : VOP2eInstAlias<ps, inst, "vcc_lo">; 315 } 316 let WaveSizePredicate = isWave64 in { 317 def : VOP2eInstAlias<ps, inst, "vcc">; 318 } 319} 320 321class VOP_MADK_Base<ValueType vt> : VOPProfile <[vt, vt, vt, vt]> { 322 string AsmVOPDXDeferred = ?; 323} 324 325class VOP_MADAK <ValueType vt> : VOP_MADK_Base<vt> { 326 field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm); 327 field dag Ins32 = !if(!eq(vt.Size, 32), 328 (ins VSrc_f32_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm), 329 (ins VSrc_f16_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm)); 330 field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, VGPR_32:$vsrc1X, ImmOpType:$imm); 331 // Note that both src0X and imm are deferred 332 let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, VGPR_32:$vsrc1X, ImmOpType:$immDeferred); 333 field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, VGPR_32:$vsrc1Y, ImmOpType:$imm); 334 335 field string Asm32 = "$vdst, $src0, $src1, $imm"; 336 field string AsmVOPDX = "$vdstX, $src0X, $vsrc1X, $imm"; 337 let AsmVOPDXDeferred = "$vdstX, $src0X, $vsrc1X, $immDeferred"; 338 field string AsmVOPDY = "$vdstY, $src0Y, $vsrc1Y, $imm"; 339 field bit HasExt = 0; 340 let IsSingle = 1; 341} 342 343def VOP_MADAK_F16 : VOP_MADAK <f16>; 344def VOP_MADAK_F32 : VOP_MADAK <f32>; 345 346class VOP_MADMK <ValueType vt> : VOP_MADK_Base<vt> { 347 field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm); 348 field dag Ins32 = (ins VSrc_f32_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1); 349 field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$imm, VGPR_32:$vsrc1X); 350 let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$immDeferred, VGPR_32:$vsrc1X); 351 field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, ImmOpType:$imm, VGPR_32:$vsrc1Y); 352 353 field string Asm32 = "$vdst, $src0, $imm, $src1"; 354 field string AsmVOPDX = "$vdstX, $src0X, $imm, $vsrc1X"; 355 let AsmVOPDXDeferred = "$vdstX, $src0X, $immDeferred, $vsrc1X"; 356 field string AsmVOPDY = "$vdstY, $src0Y, $imm, $vsrc1Y"; 357 field bit HasExt = 0; 358 let IsSingle = 1; 359} 360 361def VOP_MADMK_F16 : VOP_MADMK <f16>; 362def VOP_MADMK_F32 : VOP_MADMK <f32>; 363 364class getRegisterOperandForVT<ValueType VT> { 365 RegisterOperand ret = RegisterOperand<getVregSrcForVT<VT>.ret>; 366} 367 368// FIXME: Remove src2_modifiers. It isn't used, so is wasting memory 369// and processing time but it makes it easier to convert to mad. 370class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, vt0]> { 371 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT<Src2VT>.ret:$src2); 372 let Ins64 = getIns64<Src0RC64, Src1RC64, getRegisterOperandForVT<Src2VT>.ret, 3, 373 0, HasModifiers, HasModifiers, HasOMod, 374 Src0Mod, Src1Mod, Src2Mod>.ret; 375 let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 376 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 377 getVregSrcForVT<Src2VT>.ret:$src2, // stub argument 378 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 379 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 380 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 381 let InsVOP3Base = getIns64<Src0VOP3DPP, Src1RC64, RegisterOperand<VGPR_32>, 3, 382 0, HasModifiers, HasModifiers, HasOMod, 383 Src0Mod, Src1Mod, Src2Mod>.ret; 384 385 let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 386 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 387 getVregSrcForVT<Src2VT>.ret:$src2, // stub argument 388 dpp8:$dpp8, FI:$fi); 389 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 390 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, 391 getVregSrcForVT<Src2VT>.ret:$src2, // stub argument 392 clampmod:$clamp, omod:$omod, 393 dst_sel:$dst_sel, dst_unused:$dst_unused, 394 src0_sel:$src0_sel, src1_sel:$src1_sel); 395 let Asm32 = getAsm32<1, 2, vt0>.ret; 396 let Asm64 = getAsm64<1, 2, 0, HasModifiers, HasOMod, vt0>.ret; 397 let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt0>.ret; 398 let AsmDPP16 = getAsmDPP16<1, 2, HasModifiers, vt0>.ret; 399 let AsmDPP8 = getAsmDPP8<1, 2, 0, vt0>.ret; 400 let AsmSDWA = getAsmSDWA<1, 2, vt0>.ret; 401 let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt0>.ret; 402 let HasSrc2 = 0; 403 let HasSrc2Mods = 0; 404 405 let HasExt = 1; 406 let HasExtDPP = 1; 407 let HasExt32BitDPP = 1; 408 let HasExtSDWA = 1; 409 let HasExtSDWA9 = 0; 410 let TieRegDPP = "$src2"; 411} 412 413def VOP_MAC_F16 : VOP_MAC <f16>; 414def VOP_MAC_F32 : VOP_MAC <f32>; 415let HasExtDPP = 0, HasExt32BitDPP = 0 in 416def VOP_MAC_LEGACY_F32 : VOP_MAC <f32>; 417let HasExtSDWA = 0, HasExt32BitDPP = 0, HasExt64BitDPP = 1 in 418def VOP_MAC_F64 : VOP_MAC <f64>; 419 420class VOP_DOT_ACC<ValueType vt0, ValueType vt1> : VOP_MAC<vt0, vt1> { 421 let HasClamp = 0; 422 let HasExtSDWA = 0; 423 let HasOpSel = 0; 424 let IsPacked = 0; 425} 426 427def VOP_DOT_ACC_F32_V2F16 : VOP_DOT_ACC<f32, v2f16> { 428 let Src0ModDPP = FPVRegInputMods; 429 let Src1ModDPP = FPVRegInputMods; 430} 431 432def VOP_DOT_ACC_I32_I32 : VOP_DOT_ACC<i32, i32> { 433 let HasExtVOP3DPP = 0; 434 let HasSrc0Mods = 1; 435 let HasSrc1Mods = 1; 436} 437 438// Write out to vcc or arbitrary SGPR. 439def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], 0, /*EnableClamp=*/1> { 440 let Asm32 = "$vdst, vcc, $src0, $src1"; 441 let Asm64 = "$vdst, $sdst, $src0, $src1$clamp"; 442 let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 443 let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 444 let AsmDPP = "$vdst, vcc, $src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 445 let AsmDPP8 = "$vdst, vcc, $src0, $src1 $dpp8$fi"; 446 let AsmDPP16 = AsmDPP#"$fi"; 447 let AsmVOP3DPPBase = Asm64; 448 let InsDPP = (ins DstRCDPP:$old, 449 Src0DPP:$src0, 450 Src1DPP:$src1, 451 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 452 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 453 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 454 let InsDPP8 = (ins DstRCDPP:$old, 455 Src0DPP:$src0, 456 Src1DPP:$src1, 457 dpp8:$dpp8, FI:$fi); 458 let Outs32 = (outs DstRC:$vdst); 459 let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); 460 let OutsVOP3DPP = Outs64; 461 let OutsVOP3DPP8 = Outs64; 462} 463 464// Write out to vcc or arbitrary SGPR and read in from vcc or 465// arbitrary SGPR. 466def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], 0, /*EnableClamp=*/1> { 467 let HasSrc2Mods = 0; 468 let Asm32 = "$vdst, vcc, $src0, $src1, vcc"; 469 let Asm64 = "$vdst, $sdst, $src0, $src1, $src2$clamp"; 470 let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 471 let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 472 let AsmDPP = "$vdst, vcc, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 473 let AsmDPP8 = "$vdst, vcc, $src0, $src1, vcc $dpp8$fi"; 474 let AsmDPP16 = AsmDPP#"$fi"; 475 let Outs32 = (outs DstRC:$vdst); 476 let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); 477 let AsmVOP3DPPBase = Asm64; 478 let OutsVOP3DPP = Outs64; 479 let OutsVOP3DPP8 = Outs64; 480 481 // Suppress src2 implied by type since the 32-bit encoding uses an 482 // implicit VCC use. 483 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); 484 485 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 486 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, 487 clampmod:$clamp, 488 dst_sel:$dst_sel, dst_unused:$dst_unused, 489 src0_sel:$src0_sel, src1_sel:$src1_sel); 490 491 let InsDPP = (ins DstRCDPP:$old, 492 Src0DPP:$src0, 493 Src1DPP:$src1, 494 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 495 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 496 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 497 let InsDPP8 = (ins DstRCDPP:$old, 498 Src0DPP:$src0, 499 Src1DPP:$src1, 500 dpp8:$dpp8, FI:$fi); 501 502 let HasExt = 1; 503 let HasExtDPP = 1; 504 let HasExt32BitDPP = 1; 505 let HasExtSDWA = 1; 506 let HasExtSDWA9 = 1; 507} 508 509// Read in from vcc or arbitrary SGPR. 510class VOP2e_SGPR<list<ValueType> ArgVT> : VOPProfile<ArgVT, /*EnableF32SrcMods=*/1> { 511 let Asm32 = "$vdst, $src0, $src1"; 512 let Asm64 = "$vdst, $src0_modifiers, $src1_modifiers, $src2"; 513 let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 514 let AsmSDWA9 = "$vdst, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 515 let AsmDPP = "$vdst, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 516 let AsmDPP8 = "$vdst, $src0, $src1, vcc $dpp8$fi"; 517 let AsmDPP16 = AsmDPP#"$fi"; 518 let AsmVOP3DPPBase = Asm64; 519 520 let Outs32 = (outs DstRC:$vdst); 521 let Outs64 = (outs DstRC:$vdst); 522 523 // Suppress src2 implied by type since the 32-bit encoding uses an 524 // implicit VCC use. 525 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); 526 527 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 528 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, 529 clampmod:$clamp, 530 dst_sel:$dst_sel, dst_unused:$dst_unused, 531 src0_sel:$src0_sel, src1_sel:$src1_sel); 532 533 let InsDPP = (ins DstRCDPP:$old, 534 Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 535 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 536 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 537 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 538 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 539 let InsDPP8 = (ins DstRCDPP:$old, 540 Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 541 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 542 dpp8:$dpp8, FI:$fi); 543 544 let HasExt = 1; 545 let HasExtDPP = 1; 546 let HasExt32BitDPP = 1; 547 let HasExtSDWA = 1; 548 let HasExtSDWA9 = 1; 549} 550 551def VOP2e_I32_I32_I32_I1 : VOP2e_SGPR<[i32, i32, i32, i1]>; 552def VOP2e_I16_I16_I16_I1 : VOP2e_SGPR<[i16, i16, i16, i1]>; 553 554def VOP_READLANE : VOPProfile<[i32, i32, i32, untyped]> { 555 let Outs32 = (outs SReg_32:$vdst); 556 let Outs64 = Outs32; 557 let Ins32 = (ins VRegOrLds_32:$src0, SCSrc_b32:$src1); 558 let Ins64 = Ins32; 559 let Asm32 = " $vdst, $src0, $src1"; 560 let Asm64 = Asm32; 561 562 let HasExt = 0; 563 let HasExtDPP = 0; 564 let HasExt32BitDPP = 0; 565 let HasExt64BitDPP = 0; 566 let HasExtSDWA = 0; 567 let HasExtSDWA9 = 0; 568} 569 570def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> { 571 let Outs32 = (outs VGPR_32:$vdst); 572 let Outs64 = Outs32; 573 let Ins32 = (ins SCSrc_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in); 574 let Ins64 = Ins32; 575 let Asm32 = " $vdst, $src0, $src1"; 576 let Asm64 = Asm32; 577 let HasSrc2 = 0; 578 let HasSrc2Mods = 0; 579 580 let HasExt = 0; 581 let HasExtDPP = 0; 582 let HasExt32BitDPP = 0; 583 let HasExt64BitDPP = 0; 584 let HasExtSDWA = 0; 585 let HasExtSDWA9 = 0; 586} 587 588//===----------------------------------------------------------------------===// 589// VOP2 Instructions 590//===----------------------------------------------------------------------===// 591 592let SubtargetPredicate = isGFX11Plus in 593defm V_CNDMASK_B16 : VOP2eInst <"v_cndmask_b16", VOP2e_I16_I16_I16_I1>; 594defm V_CNDMASK_B32 : VOP2eInst_VOPD <"v_cndmask_b32", VOP2e_I32_I32_I32_I1, 0x9, "v_cndmask_b32">; 595let SubtargetPredicate = HasMadMacF32Insts, isReMaterializable = 1 in 596def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>; 597 598let isCommutable = 1 in { 599let isReMaterializable = 1 in { 600defm V_ADD_F32 : VOP2Inst_VOPD <"v_add_f32", VOP_F32_F32_F32, 0x4, "v_add_f32", any_fadd>; 601defm V_SUB_F32 : VOP2Inst_VOPD <"v_sub_f32", VOP_F32_F32_F32, 0x5, "v_sub_f32", any_fsub>; 602defm V_SUBREV_F32 : VOP2Inst_VOPD <"v_subrev_f32", VOP_F32_F32_F32, 0x6, "v_subrev_f32", null_frag, "v_sub_f32">; 603defm V_MUL_LEGACY_F32 : VOP2Inst_VOPD <"v_mul_legacy_f32", VOP_F32_F32_F32, 0x7, "v_mul_dx9_zero_f32", AMDGPUfmul_legacy>; 604defm V_MUL_F32 : VOP2Inst_VOPD <"v_mul_f32", VOP_F32_F32_F32, 0x3, "v_mul_f32", any_fmul>; 605defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32_ARITH, AMDGPUmul_i24>; 606defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_I32_I32_I32, AMDGPUmulhi_i24>; 607defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32_ARITH, AMDGPUmul_u24>; 608defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>; 609defm V_MIN_F32 : VOP2Inst_VOPD <"v_min_f32", VOP_F32_F32_F32, 0xb, "v_min_f32", fminnum_like>; 610defm V_MAX_F32 : VOP2Inst_VOPD <"v_max_f32", VOP_F32_F32_F32, 0xa, "v_max_f32", fmaxnum_like>; 611defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smin>; 612defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smax>; 613defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>; 614defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umax>; 615defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, clshr_rev_32, "v_lshr_b32">; 616defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, cashr_rev_32, "v_ashr_i32">; 617defm V_LSHLREV_B32 : VOP2Inst_VOPD <"v_lshlrev_b32", VOP_I32_I32_I32, 0x11, "v_lshlrev_b32", clshl_rev_32, "v_lshl_b32">; 618defm V_AND_B32 : VOP2Inst_VOPD <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, 0x12, "v_and_b32", and>; 619defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>; 620defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, xor>; 621} // End isReMaterializable = 1 622 623let mayRaiseFPException = 0 in { 624let OtherPredicates = [HasMadMacF32Insts] in { 625let Constraints = "$vdst = $src2", DisableEncoding="$src2", 626 isConvertibleToThreeAddress = 1 in { 627defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>; 628 629let SubtargetPredicate = isGFX6GFX7GFX10 in 630defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_MAC_LEGACY_F32>; 631} // End Constraints = "$vdst = $src2", DisableEncoding="$src2", 632 // isConvertibleToThreeAddress = 1 633 634let isReMaterializable = 1 in 635def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, []>; 636} // End OtherPredicates = [HasMadMacF32Insts] 637} // End mayRaiseFPException = 0 638 639// No patterns so that the scalar instructions are always selected. 640// The scalar versions will be replaced with vector when needed later. 641defm V_ADD_CO_U32 : VOP2bInst <"v_add_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_add_co_u32", 1>; 642defm V_SUB_CO_U32 : VOP2bInst <"v_sub_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32", 1>; 643defm V_SUBREV_CO_U32 : VOP2bInst <"v_subrev_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32", 1>; 644defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_addc_u32", 1>; 645defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>; 646defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>; 647 648 649let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1 in { 650defm V_ADD_U32 : VOP2Inst_VOPD <"v_add_u32", VOP_I32_I32_I32_ARITH, 0x10, "v_add_nc_u32", null_frag, "v_add_u32", 1>; 651defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>; 652defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>; 653} 654 655} // End isCommutable = 1 656 657// These are special and do not read the exec mask. 658let isConvergent = 1, Uses = []<Register> in { 659def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE, 660 [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))]>; 661 662let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in { 663def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, 664 [(set i32:$vdst, (int_amdgcn_writelane i32:$src0, i32:$src1, i32:$vdst_in))]>; 665} // End $vdst = $vdst_in, DisableEncoding $vdst_in 666} // End isConvergent = 1 667 668let isReMaterializable = 1 in { 669defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_I32_I32_I32>; 670defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_I32_I32_I32, add_ctpop>; 671defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_lo>; 672defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_hi>; 673defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, AMDGPUldexp>; 674 675let ReadsModeReg = 0, mayRaiseFPException = 0 in { 676defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_i16_f32>; 677defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_u16_f32>; 678} 679 680defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_V2F16_F32_F32, AMDGPUpkrtz_f16_f32>; 681defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_V2I16_I32_I32, AMDGPUpk_u16_u32>; 682defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_V2I16_I32_I32, AMDGPUpk_i16_i32>; 683 684 685let SubtargetPredicate = isGFX6GFX7 in { 686defm V_MIN_LEGACY_F32 : VOP2Inst <"v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy>; 687defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>; 688} // End SubtargetPredicate = isGFX6GFX7 689 690let isCommutable = 1 in { 691let SubtargetPredicate = isGFX6GFX7 in { 692defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, csrl_32>; 693defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, csra_32>; 694defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, cshl_32>; 695} // End SubtargetPredicate = isGFX6GFX7 696} // End isCommutable = 1 697} // End isReMaterializable = 1 698 699defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_NO_EXT<VOP_I32_F32_I32>>; // TODO: set "Uses = dst" 700 701class DivergentBinOp<SDPatternOperator Op, VOP_Pseudo Inst> : 702 GCNPat< 703 (getDivergentFrag<Op>.ret Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1), 704 !if(!cast<Commutable_REV>(Inst).IsOrig, 705 (Inst $src0, $src1), 706 (Inst $src1, $src0) 707 ) 708 >; 709 710class DivergentClampingBinOp<SDPatternOperator Op, VOP_Pseudo Inst> : 711 GCNPat< 712 (getDivergentFrag<Op>.ret Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1), 713 !if(!cast<Commutable_REV>(Inst).IsOrig, 714 (Inst $src0, $src1, 0), 715 (Inst $src1, $src0, 0) 716 ) 717 >; 718 719def : DivergentBinOp<csrl_32, V_LSHRREV_B32_e64>; 720def : DivergentBinOp<csra_32, V_ASHRREV_I32_e64>; 721def : DivergentBinOp<cshl_32, V_LSHLREV_B32_e64>; 722 723let SubtargetPredicate = HasAddNoCarryInsts in { 724 def : DivergentClampingBinOp<add, V_ADD_U32_e64>; 725 def : DivergentClampingBinOp<sub, V_SUB_U32_e64>; 726} 727 728let SubtargetPredicate = isGFX6GFX7GFX8GFX9, Predicates = [isGFX6GFX7GFX8GFX9] in { 729def : DivergentClampingBinOp<add, V_ADD_CO_U32_e64>; 730def : DivergentClampingBinOp<sub, V_SUB_CO_U32_e64>; 731} 732 733def : DivergentBinOp<adde, V_ADDC_U32_e32>; 734def : DivergentBinOp<sube, V_SUBB_U32_e32>; 735 736class divergent_i64_BinOp <SDPatternOperator Op, Instruction Inst> : 737 GCNPat< 738 (getDivergentFrag<Op>.ret i64:$src0, i64:$src1), 739 (REG_SEQUENCE VReg_64, 740 (Inst 741 (i32 (EXTRACT_SUBREG $src0, sub0)), 742 (i32 (EXTRACT_SUBREG $src1, sub0)) 743 ), sub0, 744 (Inst 745 (i32 (EXTRACT_SUBREG $src0, sub1)), 746 (i32 (EXTRACT_SUBREG $src1, sub1)) 747 ), sub1 748 ) 749 >; 750 751def : divergent_i64_BinOp <and, V_AND_B32_e64>; 752def : divergent_i64_BinOp <or, V_OR_B32_e64>; 753def : divergent_i64_BinOp <xor, V_XOR_B32_e64>; 754 755 756let SubtargetPredicate = Has16BitInsts in { 757let isReMaterializable = 1 in { 758let FPDPRounding = 1 in { 759def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">; 760defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I32, AMDGPUldexp>; 761} // End FPDPRounding = 1 762 763defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16, clshl_rev_16>; 764defm V_LSHRREV_B16 : VOP2Inst <"v_lshrrev_b16", VOP_I16_I16_I16, clshr_rev_16>; 765defm V_ASHRREV_I16 : VOP2Inst <"v_ashrrev_i16", VOP_I16_I16_I16, cashr_rev_16>; 766 767let isCommutable = 1 in { 768let FPDPRounding = 1 in { 769defm V_ADD_F16 : VOP2Inst <"v_add_f16", VOP_F16_F16_F16, any_fadd>; 770defm V_SUB_F16 : VOP2Inst <"v_sub_f16", VOP_F16_F16_F16, any_fsub>; 771defm V_SUBREV_F16 : VOP2Inst <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">; 772defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16, any_fmul>; 773 774let mayRaiseFPException = 0 in { 775def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">; 776} 777 778} // End FPDPRounding = 1 779 780defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16, mul>; 781defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>; 782defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16, fminnum_like>; 783defm V_MAX_U16 : VOP2Inst <"v_max_u16", VOP_I16_I16_I16, umax>; 784defm V_MAX_I16 : VOP2Inst <"v_max_i16", VOP_I16_I16_I16, smax>; 785defm V_MIN_U16 : VOP2Inst <"v_min_u16", VOP_I16_I16_I16, umin>; 786defm V_MIN_I16 : VOP2Inst <"v_min_i16", VOP_I16_I16_I16, smin>; 787 788let SubtargetPredicate = isGFX8GFX9 in { 789 defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16_ARITH, add>; 790 defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16_ARITH, sub>; 791 defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16_ARITH, null_frag, "v_sub_u16">; 792} 793} // End isCommutable = 1 794} // End isReMaterializable = 1 795 796// FIXME: Missing FPDPRounding 797let Constraints = "$vdst = $src2", DisableEncoding="$src2", 798 isConvertibleToThreeAddress = 1, isCommutable = 1 in { 799defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>; 800} 801} // End SubtargetPredicate = Has16BitInsts 802 803let SubtargetPredicate = HasDLInsts in { 804 805let isReMaterializable = 1 in 806defm V_XNOR_B32 : VOP2Inst <"v_xnor_b32", VOP_I32_I32_I32, xnor>; 807 808def : GCNPat< 809 (i32 (DivergentUnaryFrag<not> (xor_oneuse i32:$src0, i32:$src1))), 810 (i32 (V_XNOR_B32_e64 $src0, $src1)) 811>; 812 813def : GCNPat< 814 (i32 (DivergentBinFrag<xor_oneuse> (not i32:$src0), i32:$src1)), 815 (i32 (V_XNOR_B32_e64 $src0, $src1)) 816>; 817 818def : GCNPat< 819 (i64 (DivergentUnaryFrag<not> (xor_oneuse i64:$src0, i64:$src1))), 820 (REG_SEQUENCE VReg_64, (i32 (V_XNOR_B32_e64 821 (i32 (EXTRACT_SUBREG $src0, sub0)), 822 (i32 (EXTRACT_SUBREG $src1, sub0)))), sub0, 823 (i32 (V_XNOR_B32_e64 824 (i32 (EXTRACT_SUBREG $src0, sub1)), 825 (i32 (EXTRACT_SUBREG $src1, sub1)))), sub1) 826>; 827 828def : GCNPat< 829 (i64 (DivergentBinFrag<xor_oneuse> (not i64:$src0), i64:$src1)), 830 (REG_SEQUENCE VReg_64, (i32 (V_XNOR_B32_e64 831 (i32 (EXTRACT_SUBREG $src0, sub0)), 832 (i32 (EXTRACT_SUBREG $src1, sub0)))), sub0, 833 (i32 (V_XNOR_B32_e64 834 (i32 (EXTRACT_SUBREG $src0, sub1)), 835 (i32 (EXTRACT_SUBREG $src1, sub1)))), sub1) 836>; 837 838let Constraints = "$vdst = $src2", 839 DisableEncoding = "$src2", 840 isConvertibleToThreeAddress = 1, 841 isCommutable = 1 in 842defm V_FMAC_F32 : VOP2Inst_VOPD <"v_fmac_f32", VOP_MAC_F32, 0x0, "v_fmac_f32">; 843 844} // End SubtargetPredicate = HasDLInsts 845 846let SubtargetPredicate = HasFmaLegacy32 in { 847 848let Constraints = "$vdst = $src2", 849 DisableEncoding = "$src2", 850 isConvertibleToThreeAddress = 1, 851 isCommutable = 1 in 852defm V_FMAC_LEGACY_F32 : VOP2Inst <"v_fmac_legacy_f32", VOP_MAC_LEGACY_F32>; 853 854} // End SubtargetPredicate = HasFmaLegacy32 855 856let SubtargetPredicate = isGFX90APlus, 857 Constraints = "$vdst = $src2", 858 DisableEncoding="$src2", 859 isConvertibleToThreeAddress = 1, 860 isCommutable = 1, 861 SchedRW = [WriteDoubleAdd] in 862defm V_FMAC_F64 : VOP2Inst <"v_fmac_f64", VOP_MAC_F64>; 863 864let Constraints = "$vdst = $src2", 865 DisableEncoding="$src2", 866 isConvertibleToThreeAddress = 1, 867 isCommutable = 1, 868 IsDOT = 1 in { 869 let SubtargetPredicate = HasDot5Insts in 870 defm V_DOT2C_F32_F16 : VOP2Inst_VOPD<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16, 0xc, "v_dot2acc_f32_f16">; 871 let SubtargetPredicate = HasDot6Insts in 872 defm V_DOT4C_I32_I8 : VOP2Inst<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>; 873 874 let SubtargetPredicate = HasDot4Insts in 875 defm V_DOT2C_I32_I16 : VOP2Inst<"v_dot2c_i32_i16", VOP_DOT_ACC_I32_I32>; 876 let SubtargetPredicate = HasDot3Insts in 877 defm V_DOT8C_I32_I4 : VOP2Inst<"v_dot8c_i32_i4", VOP_DOT_ACC_I32_I32>; 878} 879 880let AddedComplexity = 30 in { 881 def : GCNPat< 882 (f32 (AMDGPUfdot2 v2f16:$src0, v2f16:$src1, f32:$src2, (i1 DSTCLAMP.NONE))), 883 (f32 (V_DOT2C_F32_F16_e32 $src0, $src1, $src2)) 884 > { 885 let SubtargetPredicate = HasDot5Insts; 886 } 887 def : GCNPat< 888 (i32 (int_amdgcn_sdot4 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 889 (i32 (V_DOT4C_I32_I8_e32 $src0, $src1, $src2)) 890 > { 891 let SubtargetPredicate = HasDot6Insts; 892 } 893 def : GCNPat< 894 (i32 (int_amdgcn_sdot2 v2i16:$src0, v2i16:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 895 (i32 (V_DOT2C_I32_I16_e32 $src0, $src1, $src2)) 896 > { 897 let SubtargetPredicate = HasDot4Insts; 898 } 899 def : GCNPat< 900 (i32 (int_amdgcn_sdot8 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 901 (i32 (V_DOT8C_I32_I4_e32 $src0, $src1, $src2)) 902 > { 903 let SubtargetPredicate = HasDot3Insts; 904 } 905} // End AddedComplexity = 30 906 907let SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1 in { 908def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">, VOPD_Component<0x2, "v_fmamk_f32">; 909 910let isCommutable = 1 in 911def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">, VOPD_Component<0x1, "v_fmaak_f32">; 912} 913 914let SubtargetPredicate = isGFX10Plus in { 915 916let FPDPRounding = 1, isReMaterializable = 1 in { 917def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">; 918 919let isCommutable = 1 in 920def V_FMAAK_F16 : VOP2_Pseudo <"v_fmaak_f16", VOP_MADAK_F16, [], "">; 921} // End FPDPRounding = 1, isReMaterializable = 1 922 923let Constraints = "$vdst = $src2", 924 DisableEncoding="$src2", 925 isConvertibleToThreeAddress = 1, 926 isCommutable = 1 in { 927defm V_FMAC_F16 : VOP2Inst <"v_fmac_f16", VOP_MAC_F16>; 928} 929 930} // End SubtargetPredicate = isGFX10Plus 931 932let SubtargetPredicate = HasPkFmacF16Inst in { 933defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>; 934} // End SubtargetPredicate = HasPkFmacF16Inst 935 936// Note: 16-bit instructions produce a 0 result in the high 16-bits 937// on GFX8 and GFX9 and preserve high 16 bits on GFX10+ 938multiclass Arithmetic_i16_0Hi_Pats <SDPatternOperator op, Instruction inst> { 939 940def : GCNPat< 941 (i32 (zext (op i16:$src0, i16:$src1))), 942 (inst VSrc_b16:$src0, VSrc_b16:$src1) 943>; 944 945def : GCNPat< 946 (i64 (zext (op i16:$src0, i16:$src1))), 947 (REG_SEQUENCE VReg_64, 948 (inst $src0, $src1), sub0, 949 (V_MOV_B32_e32 (i32 0)), sub1) 950>; 951} 952 953class ZExt_i16_i1_Pat <SDNode ext> : GCNPat < 954 (i16 (ext i1:$src)), 955 (V_CNDMASK_B32_e64 (i32 0/*src0mod*/), (i32 0/*src0*/), 956 (i32 0/*src1mod*/), (i32 1/*src1*/), 957 $src) 958>; 959 960foreach vt = [i16, v2i16] in { 961def : GCNPat < 962 (and vt:$src0, vt:$src1), 963 (V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 964>; 965 966def : GCNPat < 967 (or vt:$src0, vt:$src1), 968 (V_OR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 969>; 970 971def : GCNPat < 972 (xor vt:$src0, vt:$src1), 973 (V_XOR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 974>; 975} 976 977let Predicates = [Has16BitInsts, isGFX8GFX9] in { 978 979// Undo sub x, c -> add x, -c canonicalization since c is more likely 980// an inline immediate than -c. 981// TODO: Also do for 64-bit. 982def : GCNPat< 983 (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)), 984 (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1) 985>; 986 987def : GCNPat< 988 (i32 (zext (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)))), 989 (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1) 990>; 991 992defm : Arithmetic_i16_0Hi_Pats<add, V_ADD_U16_e64>; 993defm : Arithmetic_i16_0Hi_Pats<mul, V_MUL_LO_U16_e64>; 994defm : Arithmetic_i16_0Hi_Pats<sub, V_SUB_U16_e64>; 995defm : Arithmetic_i16_0Hi_Pats<smin, V_MIN_I16_e64>; 996defm : Arithmetic_i16_0Hi_Pats<smax, V_MAX_I16_e64>; 997defm : Arithmetic_i16_0Hi_Pats<umin, V_MIN_U16_e64>; 998defm : Arithmetic_i16_0Hi_Pats<umax, V_MAX_U16_e64>; 999defm : Arithmetic_i16_0Hi_Pats<clshl_rev_16, V_LSHLREV_B16_e64>; 1000defm : Arithmetic_i16_0Hi_Pats<clshr_rev_16, V_LSHRREV_B16_e64>; 1001defm : Arithmetic_i16_0Hi_Pats<cashr_rev_16, V_ASHRREV_I16_e64>; 1002 1003} // End Predicates = [Has16BitInsts, isGFX8GFX9] 1004 1005let Predicates = [Has16BitInsts] in { 1006 1007def : ZExt_i16_i1_Pat<zext>; 1008def : ZExt_i16_i1_Pat<anyext>; 1009 1010def : GCNPat < 1011 (i16 (sext i1:$src)), 1012 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), 1013 /*src1mod*/(i32 0), /*src1*/(i32 -1), $src) 1014>; 1015 1016} // End Predicates = [Has16BitInsts] 1017 1018 1019let SubtargetPredicate = HasIntClamp in { 1020// Set clamp bit for saturation. 1021def : VOPBinOpClampPat<uaddsat, V_ADD_CO_U32_e64, i32>; 1022def : VOPBinOpClampPat<usubsat, V_SUB_CO_U32_e64, i32>; 1023} 1024 1025let SubtargetPredicate = HasAddNoCarryInsts, OtherPredicates = [HasIntClamp] in { 1026let AddedComplexity = 1 in { // Prefer over form with carry-out. 1027def : VOPBinOpClampPat<uaddsat, V_ADD_U32_e64, i32>; 1028def : VOPBinOpClampPat<usubsat, V_SUB_U32_e64, i32>; 1029} 1030} 1031 1032let SubtargetPredicate = Has16BitInsts, OtherPredicates = [HasIntClamp] in { 1033def : VOPBinOpClampPat<uaddsat, V_ADD_U16_e64, i16>; 1034def : VOPBinOpClampPat<usubsat, V_SUB_U16_e64, i16>; 1035} 1036 1037let SubtargetPredicate = isGFX11Plus in { 1038 let isCommutable = 1 in { 1039 defm V_AND_B16 : VOP2Inst <"v_and_b16", VOP_I16_I16_I16, and>; 1040 defm V_OR_B16 : VOP2Inst <"v_or_b16", VOP_I16_I16_I16, or>; 1041 defm V_XOR_B16 : VOP2Inst <"v_xor_b16", VOP_I16_I16_I16, xor>; 1042 } // End isCommutable = 1 1043} // End SubtargetPredicate = isGFX11Plus 1044 1045//===----------------------------------------------------------------------===// 1046// DPP Encodings 1047//===----------------------------------------------------------------------===// 1048 1049class VOP2_DPP<bits<6> op, VOP2_DPP_Pseudo ps, 1050 string opName = ps.OpName, VOPProfile p = ps.Pfl, 1051 bit IsDPP16 = 0> : 1052 VOP_DPP<opName, p, IsDPP16> { 1053 let hasSideEffects = ps.hasSideEffects; 1054 let Defs = ps.Defs; 1055 let SchedRW = ps.SchedRW; 1056 let Uses = ps.Uses; 1057 1058 bits<8> vdst; 1059 bits<8> src1; 1060 let Inst{8-0} = 0xfa; 1061 let Inst{16-9} = !if(p.HasSrc1, src1{7-0}, 0); 1062 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 1063 let Inst{30-25} = op; 1064 let Inst{31} = 0x0; 1065} 1066 1067class Base_VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, 1068 string opName = ps.OpName, VOPProfile p = ps.Pfl> : 1069 VOP2_DPP<op, ps, opName, p, 1> { 1070 let AssemblerPredicate = HasDPP16; 1071 let SubtargetPredicate = HasDPP16; 1072 let OtherPredicates = ps.OtherPredicates; 1073} 1074 1075class VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, int subtarget, 1076 string opName = ps.OpName, VOPProfile p = ps.Pfl> : 1077 Base_VOP2_DPP16<op, ps, opName, p>, 1078 SIMCInstr <ps.PseudoInstr, subtarget>; 1079 1080class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps, 1081 VOPProfile p = ps.Pfl> : 1082 VOP_DPP8<ps.OpName, p> { 1083 let hasSideEffects = ps.hasSideEffects; 1084 let Defs = ps.Defs; 1085 let SchedRW = ps.SchedRW; 1086 let Uses = ps.Uses; 1087 1088 bits<8> vdst; 1089 bits<8> src1; 1090 1091 let Inst{8-0} = fi; 1092 let Inst{16-9} = !if(p.HasSrc1, src1{7-0}, 0); 1093 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 1094 let Inst{30-25} = op; 1095 let Inst{31} = 0x0; 1096 1097 let OtherPredicates = ps.OtherPredicates; 1098} 1099 1100//===----------------------------------------------------------------------===// 1101// GFX11. 1102//===----------------------------------------------------------------------===// 1103 1104let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in { 1105 //===------------------------------- VOP2 -------------------------------===// 1106 multiclass VOP2Only_Real_MADK_gfx11<bits<6> op> { 1107 def _gfx11 : 1108 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX11>, 1109 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1110 } 1111 multiclass VOP2_Real_e32_gfx11<bits<6> op> { 1112 def _e32_gfx11 : 1113 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX11>, 1114 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 1115 } 1116 multiclass VOP2Only_Real_e32_gfx11<bits<6> op> { 1117 let IsSingle = 1 in 1118 defm NAME: VOP2_Real_e32_gfx11<op>; 1119 } 1120 multiclass VOP2_Real_e64_gfx11<bits<6> op> { 1121 def _e64_gfx11 : 1122 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX11>, 1123 VOP3e_gfx11<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1124 } 1125 multiclass VOP2_Real_dpp_gfx11<bits<6> op> { 1126 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 1127 def _dpp_gfx11 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX11> { 1128 let DecoderNamespace = "DPPGFX11"; 1129 } 1130 } 1131 multiclass VOP2_Real_dpp8_gfx11<bits<6> op> { 1132 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 1133 def _dpp8_gfx11 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(NAME#"_e32")> { 1134 let DecoderNamespace = "DPP8GFX11"; 1135 } 1136 } 1137 1138 //===------------------------- VOP2 (with name) -------------------------===// 1139 multiclass VOP2_Real_e32_with_name_gfx11<bits<6> op, string opName, 1140 string asmName, bit single = 0> { 1141 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1142 def _e32_gfx11 : 1143 VOP2_Real<ps, SIEncodingFamily.GFX11, asmName>, 1144 VOP2e<op{5-0}, ps.Pfl>, 1145 MnemonicAlias<ps.Mnemonic, asmName>, Requires<[isGFX11Plus]> { 1146 let AsmString = asmName # ps.AsmOperands; 1147 let IsSingle = single; 1148 } 1149 } 1150 multiclass VOP2_Real_e64_with_name_gfx11<bits<6> op, string opName, 1151 string asmName> { 1152 defvar ps = !cast<VOP3_Pseudo>(opName#"_e64"); 1153 def _e64_gfx11 : 1154 VOP3_Real<ps, SIEncodingFamily.GFX11>, 1155 VOP3e_gfx11<{0, 1, 0, 0, op{5-0}}, ps.Pfl>, 1156 MnemonicAlias<ps.Mnemonic, asmName>, Requires<[isGFX11Plus]> { 1157 let AsmString = asmName # ps.AsmOperands; 1158 } 1159 } 1160 1161 multiclass VOP2_Real_dpp_with_name_gfx11<bits<6> op, string opName, 1162 string asmName> { 1163 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1164 foreach _ = BoolToList<ps.Pfl.HasExtDPP>.ret in 1165 def _dpp_gfx11 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), 1166 SIEncodingFamily.GFX11> { 1167 let AsmString = asmName # ps.Pfl.AsmDPP16; 1168 let DecoderNamespace = "DPPGFX11"; 1169 } 1170 } 1171 multiclass VOP2_Real_dpp8_with_name_gfx11<bits<6> op, string opName, 1172 string asmName> { 1173 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1174 foreach _ = BoolToList<ps.Pfl.HasExtDPP>.ret in 1175 def _dpp8_gfx11 : VOP2_DPP8<op, ps> { 1176 let AsmString = asmName # ps.Pfl.AsmDPP8; 1177 let DecoderNamespace = "DPP8GFX11"; 1178 } 1179 } 1180 1181 //===------------------------------ VOP2be ------------------------------===// 1182 multiclass VOP2be_Real_e32_gfx11<bits<6> op, string opName, string asmName> { 1183 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1184 def _e32_gfx11 : 1185 VOP2_Real<ps, SIEncodingFamily.GFX11>, 1186 VOP2e<op{5-0}, ps.Pfl> { 1187 let AsmString = asmName # !subst(", vcc", "", ps.AsmOperands); 1188 } 1189 } 1190 multiclass VOP2be_Real_dpp_gfx11<bits<6> op, string opName, string asmName> { 1191 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1192 def _dpp_gfx11 : 1193 VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX11, asmName> { 1194 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1195 let AsmString = asmName # !subst(", vcc", "", AsmDPP); 1196 let DecoderNamespace = "DPPGFX11"; 1197 } 1198 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1199 def _dpp_w32_gfx11 : 1200 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1201 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1202 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP); 1203 let isAsmParserOnly = 1; 1204 let WaveSizePredicate = isWave32; 1205 } 1206 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1207 def _dpp_w64_gfx11 : 1208 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1209 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1210 let AsmString = asmName # AsmDPP; 1211 let isAsmParserOnly = 1; 1212 let WaveSizePredicate = isWave64; 1213 } 1214 } 1215 multiclass VOP2be_Real_dpp8_gfx11<bits<6> op, string opName, string asmName> { 1216 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1217 def _dpp8_gfx11 : 1218 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1219 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1220 let AsmString = asmName # !subst(", vcc", "", AsmDPP8); 1221 let DecoderNamespace = "DPP8GFX11"; 1222 } 1223 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1224 def _dpp8_w32_gfx11 : 1225 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1226 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1227 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8); 1228 let isAsmParserOnly = 1; 1229 let WaveSizePredicate = isWave32; 1230 } 1231 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1232 def _dpp8_w64_gfx11 : 1233 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1234 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1235 let AsmString = asmName # AsmDPP8; 1236 let isAsmParserOnly = 1; 1237 let WaveSizePredicate = isWave64; 1238 } 1239 } 1240 1241} // End AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" 1242 1243// We don't want to override separate decoderNamespaces within these 1244multiclass VOP2_Realtriple_e64_gfx11<bits<6> op> { 1245 defm NAME : VOP3_Realtriple_gfx11<{0, 1, 0, 0, op{5-0}}, /*isSingle=*/ 0, NAME> ; 1246} 1247multiclass VOP2_Realtriple_e64_with_name_gfx11<bits<6> op, string opName, 1248 string asmName> { 1249 defm NAME : VOP3_Realtriple_with_name_gfx11<{0, 1, 0, 0, op{5-0}}, opName, asmName> ; 1250} 1251 1252multiclass VOP2be_Real_gfx11<bits<6> op, string opName, string asmName> : 1253 VOP2be_Real_e32_gfx11<op, opName, asmName>, 1254 VOP3be_Realtriple_gfx11<{0, 1, 0, 0, op{5-0}}, /*isSingle=*/ 0, opName, asmName>, 1255 VOP2be_Real_dpp_gfx11<op, opName, asmName>, 1256 VOP2be_Real_dpp8_gfx11<op, opName, asmName>; 1257 1258// Only for CNDMASK 1259multiclass VOP2e_Real_gfx11<bits<6> op, string opName, string asmName> : 1260 VOP2_Real_e32_gfx11<op>, 1261 VOP2_Realtriple_e64_gfx11<op>, 1262 VOP2be_Real_dpp_gfx11<op, opName, asmName>, 1263 VOP2be_Real_dpp8_gfx11<op, opName, asmName>; 1264 1265multiclass VOP2Only_Real_gfx11<bits<6> op> : 1266 VOP2Only_Real_e32_gfx11<op>, 1267 VOP2_Real_dpp_gfx11<op>, 1268 VOP2_Real_dpp8_gfx11<op>; 1269 1270multiclass VOP2_Real_NO_VOP3_gfx11<bits<6> op> : 1271 VOP2_Real_e32_gfx11<op>, VOP2_Real_dpp_gfx11<op>, VOP2_Real_dpp8_gfx11<op>; 1272 1273multiclass VOP2_Real_FULL_gfx11<bits<6> op> : 1274 VOP2_Realtriple_e64_gfx11<op>, VOP2_Real_NO_VOP3_gfx11<op>; 1275 1276multiclass VOP2_Real_NO_VOP3_with_name_gfx11<bits<6> op, string opName, 1277 string asmName, bit isSingle = 0> : 1278 VOP2_Real_e32_with_name_gfx11<op, opName, asmName, isSingle>, 1279 VOP2_Real_dpp_with_name_gfx11<op, opName, asmName>, 1280 VOP2_Real_dpp8_with_name_gfx11<op, opName, asmName>; 1281 1282multiclass VOP2_Real_FULL_with_name_gfx11<bits<6> op, string opName, 1283 string asmName> : 1284 VOP2_Realtriple_e64_with_name_gfx11<op, opName, asmName>, 1285 VOP2_Real_NO_VOP3_with_name_gfx11<op, opName, asmName>; 1286 1287multiclass VOP2_Real_NO_DPP_gfx11<bits<6> op> : 1288 VOP2_Real_e32_gfx11<op>, VOP2_Real_e64_gfx11<op>; 1289 1290multiclass VOP2_Real_NO_DPP_with_name_gfx11<bits<6> op, string opName, 1291 string asmName> : 1292 VOP2_Real_e32_with_name_gfx11<op, opName, asmName>, 1293 VOP2_Real_e64_with_name_gfx11<op, opName, asmName>; 1294 1295defm V_CNDMASK_B32 : VOP2e_Real_gfx11<0x001, "V_CNDMASK_B32", 1296 "v_cndmask_b32">; 1297defm V_DOT2ACC_F32_F16 : VOP2_Real_NO_VOP3_with_name_gfx11<0x002, 1298 "V_DOT2C_F32_F16", "v_dot2acc_f32_f16", 1>; 1299defm V_FMAC_DX9_ZERO_F32 : VOP2_Real_NO_DPP_with_name_gfx11<0x006, 1300 "V_FMAC_LEGACY_F32", "v_fmac_dx9_zero_f32">; 1301defm V_MUL_DX9_ZERO_F32 : VOP2_Real_FULL_with_name_gfx11<0x007, 1302 "V_MUL_LEGACY_F32", "v_mul_dx9_zero_f32">; 1303defm V_LSHLREV_B32 : VOP2_Real_FULL_gfx11<0x018>; 1304defm V_LSHRREV_B32 : VOP2_Real_FULL_gfx11<0x019>; 1305defm V_ASHRREV_I32 : VOP2_Real_FULL_gfx11<0x01a>; 1306defm V_ADD_CO_CI_U32 : 1307 VOP2be_Real_gfx11<0x020, "V_ADDC_U32", "v_add_co_ci_u32">; 1308defm V_SUB_CO_CI_U32 : 1309 VOP2be_Real_gfx11<0x021, "V_SUBB_U32", "v_sub_co_ci_u32">; 1310defm V_SUBREV_CO_CI_U32 : 1311 VOP2be_Real_gfx11<0x022, "V_SUBBREV_U32", "v_subrev_co_ci_u32">; 1312 1313defm V_CVT_PK_RTZ_F16_F32 : VOP2_Real_FULL_with_name_gfx11<0x02f, 1314 "V_CVT_PKRTZ_F16_F32", "v_cvt_pk_rtz_f16_f32">; 1315defm V_PK_FMAC_F16 : VOP2Only_Real_gfx11<0x03c>; 1316 1317// VOP3 only. 1318defm V_CNDMASK_B16 : VOP3Only_Realtriple_gfx11<0x25d>; 1319defm V_LDEXP_F32 : VOP3Only_Realtriple_gfx11<0x31c>; 1320defm V_BFM_B32 : VOP3Only_Realtriple_gfx11<0x31d>; 1321defm V_BCNT_U32_B32 : VOP3Only_Realtriple_gfx11<0x31e>; 1322defm V_MBCNT_LO_U32_B32 : VOP3Only_Realtriple_gfx11<0x31f>; 1323defm V_MBCNT_HI_U32_B32 : VOP3Only_Realtriple_gfx11<0x320>; 1324defm V_CVT_PKNORM_I16_F32 : VOP3Only_Realtriple_gfx11<0x321>; 1325defm V_CVT_PKNORM_U16_F32 : VOP3Only_Realtriple_gfx11<0x322>; 1326defm V_CVT_PK_U16_U32 : VOP3Only_Realtriple_gfx11<0x323>; 1327defm V_CVT_PK_I16_I32 : VOP3Only_Realtriple_gfx11<0x324>; 1328defm V_ADD_CO_U32 : VOP3beOnly_Realtriple_gfx11<0x300>; 1329defm V_SUB_CO_U32 : VOP3beOnly_Realtriple_gfx11<0x301>; 1330defm V_SUBREV_CO_U32 : VOP3beOnly_Realtriple_gfx11<0x302>; 1331 1332let SubtargetPredicate = isGFX11Plus in { 1333 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx11>; 1334 1335 defm : VOP2bInstAliases< 1336 V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx11, "v_add_co_ci_u32">; 1337 defm : VOP2bInstAliases< 1338 V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx11, "v_sub_co_ci_u32">; 1339 defm : VOP2bInstAliases< 1340 V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx11, "v_subrev_co_ci_u32">; 1341} // End SubtargetPredicate = isGFX11Plus 1342 1343//===----------------------------------------------------------------------===// 1344// GFX10. 1345//===----------------------------------------------------------------------===// 1346 1347let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { 1348 //===------------------------------- VOP2 -------------------------------===// 1349 multiclass VOP2Only_Real_MADK_gfx10<bits<6> op> { 1350 def _gfx10 : 1351 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX10>, 1352 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1353 } 1354 multiclass VOP2Only_Real_MADK_gfx10_with_name<bits<6> op, string opName, 1355 string asmName> { 1356 def _gfx10 : 1357 VOP2_Real<!cast<VOP2_Pseudo>(opName), SIEncodingFamily.GFX10>, 1358 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(opName).Pfl> { 1359 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName); 1360 let AsmString = asmName # ps.AsmOperands; 1361 } 1362 } 1363 multiclass VOP2_Real_e32_gfx10<bits<6> op> { 1364 def _e32_gfx10 : 1365 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>, 1366 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 1367 } 1368 multiclass VOP2_Real_e64_gfx10<bits<6> op> { 1369 def _e64_gfx10 : 1370 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 1371 VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1372 } 1373 multiclass VOP2_Real_sdwa_gfx10<bits<6> op> { 1374 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 1375 def _sdwa_gfx10 : 1376 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 1377 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 1378 let DecoderNamespace = "SDWA10"; 1379 } 1380 } 1381 multiclass VOP2_Real_dpp_gfx10<bits<6> op> { 1382 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP>.ret in 1383 def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX10> { 1384 let DecoderNamespace = "SDWA10"; 1385 } 1386 } 1387 multiclass VOP2_Real_dpp8_gfx10<bits<6> op> { 1388 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP>.ret in 1389 def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(NAME#"_e32")> { 1390 let DecoderNamespace = "DPP8"; 1391 } 1392 } 1393 1394 //===------------------------- VOP2 (with name) -------------------------===// 1395 multiclass VOP2_Real_e32_gfx10_with_name<bits<6> op, string opName, 1396 string asmName> { 1397 def _e32_gfx10 : 1398 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>, 1399 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> { 1400 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1401 let AsmString = asmName # ps.AsmOperands; 1402 } 1403 } 1404 multiclass VOP2_Real_e64_gfx10_with_name<bits<6> op, string opName, 1405 string asmName> { 1406 def _e64_gfx10 : 1407 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>, 1408 VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, 1409 !cast<VOP3_Pseudo>(opName#"_e64").Pfl> { 1410 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName#"_e64"); 1411 let AsmString = asmName # ps.AsmOperands; 1412 } 1413 } 1414 let DecoderNamespace = "SDWA10" in { 1415 multiclass VOP2_Real_sdwa_gfx10_with_name<bits<6> op, string opName, 1416 string asmName> { 1417 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in 1418 def _sdwa_gfx10 : 1419 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1420 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1421 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1422 let AsmString = asmName # ps.AsmOperands; 1423 } 1424 } 1425 multiclass VOP2_Real_dpp_gfx10_with_name<bits<6> op, string opName, 1426 string asmName> { 1427 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in 1428 def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX10> { 1429 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1430 let AsmString = asmName # ps.Pfl.AsmDPP16; 1431 } 1432 } 1433 multiclass VOP2_Real_dpp8_gfx10_with_name<bits<6> op, string opName, 1434 string asmName> { 1435 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in 1436 def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1437 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1438 let AsmString = asmName # ps.Pfl.AsmDPP8; 1439 let DecoderNamespace = "DPP8"; 1440 } 1441 } 1442 } // End DecoderNamespace = "SDWA10" 1443 1444 //===------------------------------ VOP2be ------------------------------===// 1445 multiclass VOP2be_Real_e32_gfx10<bits<6> op, string opName, string asmName> { 1446 def _e32_gfx10 : 1447 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>, 1448 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> { 1449 VOP2_Pseudo Ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1450 let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); 1451 } 1452 } 1453 multiclass VOP2be_Real_e64_gfx10<bits<6> op, string opName, string asmName> { 1454 def _e64_gfx10 : 1455 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>, 1456 VOP3be_gfx10<{0, 1, 0, 0, op{5-0}}, 1457 !cast<VOP3_Pseudo>(opName#"_e64").Pfl> { 1458 VOP3_Pseudo Ps = !cast<VOP3_Pseudo>(opName#"_e64"); 1459 let AsmString = asmName # Ps.AsmOperands; 1460 } 1461 } 1462 multiclass VOP2be_Real_sdwa_gfx10<bits<6> op, string opName, string asmName> { 1463 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in 1464 def _sdwa_gfx10 : 1465 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1466 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1467 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1468 let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); 1469 let DecoderNamespace = "SDWA10"; 1470 } 1471 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in 1472 def _sdwa_w32_gfx10 : 1473 Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1474 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1475 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1476 let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands); 1477 let isAsmParserOnly = 1; 1478 let DecoderNamespace = "SDWA10"; 1479 let WaveSizePredicate = isWave32; 1480 } 1481 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in 1482 def _sdwa_w64_gfx10 : 1483 Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1484 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1485 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1486 let AsmString = asmName # Ps.AsmOperands; 1487 let isAsmParserOnly = 1; 1488 let DecoderNamespace = "SDWA10"; 1489 let WaveSizePredicate = isWave64; 1490 } 1491 } 1492 multiclass VOP2be_Real_dpp_gfx10<bits<6> op, string opName, string asmName> { 1493 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in 1494 def _dpp_gfx10 : 1495 VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX10, asmName> { 1496 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1497 let AsmString = asmName # !subst(", vcc", "", AsmDPP); 1498 let DecoderNamespace = "SDWA10"; 1499 } 1500 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in 1501 def _dpp_w32_gfx10 : 1502 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1503 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1504 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP); 1505 let isAsmParserOnly = 1; 1506 let WaveSizePredicate = isWave32; 1507 } 1508 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in 1509 def _dpp_w64_gfx10 : 1510 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1511 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1512 let AsmString = asmName # AsmDPP; 1513 let isAsmParserOnly = 1; 1514 let WaveSizePredicate = isWave64; 1515 } 1516 } 1517 multiclass VOP2be_Real_dpp8_gfx10<bits<6> op, string opName, string asmName> { 1518 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in 1519 def _dpp8_gfx10 : 1520 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1521 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1522 let AsmString = asmName # !subst(", vcc", "", AsmDPP8); 1523 let DecoderNamespace = "DPP8"; 1524 } 1525 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in 1526 def _dpp8_w32_gfx10 : 1527 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1528 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1529 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8); 1530 let isAsmParserOnly = 1; 1531 let WaveSizePredicate = isWave32; 1532 } 1533 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in 1534 def _dpp8_w64_gfx10 : 1535 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1536 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1537 let AsmString = asmName # AsmDPP8; 1538 let isAsmParserOnly = 1; 1539 let WaveSizePredicate = isWave64; 1540 } 1541 } 1542 1543 //===----------------------------- VOP3Only -----------------------------===// 1544 multiclass VOP3Only_Real_gfx10<bits<10> op> { 1545 def _e64_gfx10 : 1546 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 1547 VOP3e_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 1548 let IsSingle = 1; 1549 } 1550 } 1551 1552 //===---------------------------- VOP3beOnly ----------------------------===// 1553 multiclass VOP3beOnly_Real_gfx10<bits<10> op> { 1554 def _e64_gfx10 : 1555 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 1556 VOP3be_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 1557 let IsSingle = 1; 1558 } 1559 } 1560} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" 1561 1562multiclass VOP2Only_Real_MADK_gfx10_gfx11<bits<6> op> : 1563 VOP2Only_Real_MADK_gfx10<op>, VOP2Only_Real_MADK_gfx11<op>; 1564 1565multiclass VOP2be_Real_gfx10<bits<6> op, string opName, string asmName> : 1566 VOP2be_Real_e32_gfx10<op, opName, asmName>, 1567 VOP2be_Real_e64_gfx10<op, opName, asmName>, 1568 VOP2be_Real_sdwa_gfx10<op, opName, asmName>, 1569 VOP2be_Real_dpp_gfx10<op, opName, asmName>, 1570 VOP2be_Real_dpp8_gfx10<op, opName, asmName>; 1571 1572multiclass VOP2e_Real_gfx10<bits<6> op, string opName, string asmName> : 1573 VOP2_Real_e32_gfx10<op>, 1574 VOP2_Real_e64_gfx10<op>, 1575 VOP2be_Real_sdwa_gfx10<op, opName, asmName>, 1576 VOP2be_Real_dpp_gfx10<op, opName, asmName>, 1577 VOP2be_Real_dpp8_gfx10<op, opName, asmName>; 1578 1579multiclass VOP2_Real_gfx10<bits<6> op> : 1580 VOP2_Real_e32_gfx10<op>, VOP2_Real_e64_gfx10<op>, 1581 VOP2_Real_sdwa_gfx10<op>, VOP2_Real_dpp_gfx10<op>, VOP2_Real_dpp8_gfx10<op>; 1582 1583multiclass VOP2_Real_gfx10_gfx11<bits<6> op> : 1584 VOP2_Real_gfx10<op>, VOP2_Real_FULL_gfx11<op>; 1585 1586multiclass VOP2_Real_with_name_gfx10<bits<6> op, string opName, 1587 string asmName> : 1588 VOP2_Real_e32_gfx10_with_name<op, opName, asmName>, 1589 VOP2_Real_e64_gfx10_with_name<op, opName, asmName>, 1590 VOP2_Real_sdwa_gfx10_with_name<op, opName, asmName>, 1591 VOP2_Real_dpp_gfx10_with_name<op, opName, asmName>, 1592 VOP2_Real_dpp8_gfx10_with_name<op, opName, asmName>; 1593 1594multiclass VOP2_Real_with_name_gfx10_gfx11<bits<6> op, string opName, 1595 string asmName> : 1596 VOP2_Real_with_name_gfx10<op, opName, asmName>, 1597 VOP2_Real_FULL_with_name_gfx11<op, opName, asmName>; 1598 1599// NB: Same opcode as v_mac_legacy_f32 1600let DecoderNamespace = "GFX10_B" in 1601defm V_FMAC_LEGACY_F32 : VOP2_Real_gfx10<0x006>; 1602 1603defm V_XNOR_B32 : VOP2_Real_gfx10_gfx11<0x01e>; 1604defm V_FMAC_F32 : VOP2_Real_gfx10_gfx11<0x02b>; 1605defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10_gfx11<0x02c>; 1606defm V_FMAAK_F32 : VOP2Only_Real_MADK_gfx10_gfx11<0x02d>; 1607defm V_ADD_F16 : VOP2_Real_gfx10_gfx11<0x032>; 1608defm V_SUB_F16 : VOP2_Real_gfx10_gfx11<0x033>; 1609defm V_SUBREV_F16 : VOP2_Real_gfx10_gfx11<0x034>; 1610defm V_MUL_F16 : VOP2_Real_gfx10_gfx11<0x035>; 1611defm V_FMAC_F16 : VOP2_Real_gfx10_gfx11<0x036>; 1612defm V_FMAMK_F16 : VOP2Only_Real_MADK_gfx10_gfx11<0x037>; 1613defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10_gfx11<0x038>; 1614defm V_MAX_F16 : VOP2_Real_gfx10_gfx11<0x039>; 1615defm V_MIN_F16 : VOP2_Real_gfx10_gfx11<0x03a>; 1616defm V_LDEXP_F16 : VOP2_Real_gfx10_gfx11<0x03b>; 1617 1618let IsSingle = 1 in { 1619 defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>; 1620} 1621 1622// VOP2 no carry-in, carry-out. 1623defm V_ADD_NC_U32 : 1624 VOP2_Real_with_name_gfx10_gfx11<0x025, "V_ADD_U32", "v_add_nc_u32">; 1625defm V_SUB_NC_U32 : 1626 VOP2_Real_with_name_gfx10_gfx11<0x026, "V_SUB_U32", "v_sub_nc_u32">; 1627defm V_SUBREV_NC_U32 : 1628 VOP2_Real_with_name_gfx10_gfx11<0x027, "V_SUBREV_U32", "v_subrev_nc_u32">; 1629 1630// VOP2 carry-in, carry-out. 1631defm V_ADD_CO_CI_U32 : 1632 VOP2be_Real_gfx10<0x028, "V_ADDC_U32", "v_add_co_ci_u32">; 1633defm V_SUB_CO_CI_U32 : 1634 VOP2be_Real_gfx10<0x029, "V_SUBB_U32", "v_sub_co_ci_u32">; 1635defm V_SUBREV_CO_CI_U32 : 1636 VOP2be_Real_gfx10<0x02a, "V_SUBBREV_U32", "v_subrev_co_ci_u32">; 1637 1638defm V_CNDMASK_B32 : 1639 VOP2e_Real_gfx10<0x001, "V_CNDMASK_B32", "v_cndmask_b32">; 1640 1641// VOP3 only. 1642defm V_BFM_B32 : VOP3Only_Real_gfx10<0x363>; 1643defm V_BCNT_U32_B32 : VOP3Only_Real_gfx10<0x364>; 1644defm V_MBCNT_LO_U32_B32 : VOP3Only_Real_gfx10<0x365>; 1645defm V_MBCNT_HI_U32_B32 : VOP3Only_Real_gfx10<0x366>; 1646defm V_LDEXP_F32 : VOP3Only_Real_gfx10<0x362>; 1647defm V_CVT_PKNORM_I16_F32 : VOP3Only_Real_gfx10<0x368>; 1648defm V_CVT_PKNORM_U16_F32 : VOP3Only_Real_gfx10<0x369>; 1649defm V_CVT_PK_U16_U32 : VOP3Only_Real_gfx10<0x36a>; 1650defm V_CVT_PK_I16_I32 : VOP3Only_Real_gfx10<0x36b>; 1651 1652// VOP3 carry-out. 1653defm V_ADD_CO_U32 : VOP3beOnly_Real_gfx10<0x30f>; 1654defm V_SUB_CO_U32 : VOP3beOnly_Real_gfx10<0x310>; 1655defm V_SUBREV_CO_U32 : VOP3beOnly_Real_gfx10<0x319>; 1656 1657let SubtargetPredicate = isGFX10Only in { 1658 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx10>; 1659 1660 defm : VOP2bInstAliases< 1661 V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx10, "v_add_co_ci_u32">; 1662 defm : VOP2bInstAliases< 1663 V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx10, "v_sub_co_ci_u32">; 1664 defm : VOP2bInstAliases< 1665 V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx10, "v_subrev_co_ci_u32">; 1666} // End SubtargetPredicate = isGFX10Only 1667 1668//===----------------------------------------------------------------------===// 1669// GFX6, GFX7, GFX10, GFX11 1670//===----------------------------------------------------------------------===// 1671 1672class VOP2_DPPe <bits<6> op, VOP2_DPP_Pseudo ps, VOPProfile P = ps.Pfl> : 1673 VOP_DPPe <P> { 1674 bits<8> vdst; 1675 bits<8> src1; 1676 let Inst{8-0} = 0xfa; //dpp 1677 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 1678 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 1679 let Inst{30-25} = op; 1680 let Inst{31} = 0x0; //encoding 1681} 1682 1683let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 1684 multiclass VOP2_Lane_Real_gfx6_gfx7<bits<6> op> { 1685 def _gfx6_gfx7 : 1686 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>, 1687 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1688 } 1689 multiclass VOP2Only_Real_MADK_gfx6_gfx7<bits<6> op> { 1690 def _gfx6_gfx7 : 1691 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>, 1692 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1693 } 1694 multiclass VOP2_Real_e32_gfx6_gfx7<bits<6> op, string opName = NAME> { 1695 def _e32_gfx6_gfx7 : 1696 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.SI>, 1697 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl>; 1698 } 1699 multiclass VOP2_Real_e64_gfx6_gfx7<bits<6> op, string opName = NAME> { 1700 def _e64_gfx6_gfx7 : 1701 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.SI>, 1702 VOP3e_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(opName#"_e64").Pfl>; 1703 } 1704 multiclass VOP2be_Real_e64_gfx6_gfx7<bits<6> op, string opName = NAME> { 1705 def _e64_gfx6_gfx7 : 1706 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.SI>, 1707 VOP3be_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(opName#"_e64").Pfl>; 1708 } 1709} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 1710 1711multiclass VOP2Only_Real_MADK_gfx6_gfx7_gfx10<bits<6> op> : 1712 VOP2Only_Real_MADK_gfx6_gfx7<op>, VOP2Only_Real_MADK_gfx10<op>; 1713 1714multiclass VOP2_Real_gfx6_gfx7<bits<6> op> : 1715 VOP2_Real_e32_gfx6_gfx7<op>, VOP2_Real_e64_gfx6_gfx7<op>; 1716 1717multiclass VOP2_Real_gfx6_gfx7_gfx10<bits<6> op> : 1718 VOP2_Real_gfx6_gfx7<op>, VOP2_Real_gfx10<op>; 1719 1720multiclass VOP2_Real_gfx6_gfx7_gfx10_gfx11<bits<6> op> : 1721 VOP2_Real_gfx6_gfx7_gfx10<op>, VOP2_Real_FULL_gfx11<op>; 1722 1723multiclass VOP2be_Real_gfx6_gfx7<bits<6> op> : 1724 VOP2_Real_e32_gfx6_gfx7<op>, VOP2be_Real_e64_gfx6_gfx7<op>; 1725 1726multiclass VOP2be_Real_gfx6_gfx7_with_name<bits<6> op, 1727 string opName, string asmName> { 1728 defvar ps32 = !cast<VOP2_Pseudo>(opName#"_e32"); 1729 defvar ps64 = !cast<VOP3_Pseudo>(opName#"_e64"); 1730 1731 let AsmString = asmName # ps32.AsmOperands in { 1732 defm "" : VOP2_Real_e32_gfx6_gfx7<op, opName>; 1733 } 1734 1735 let AsmString = asmName # ps64.AsmOperands in { 1736 defm "" : VOP2be_Real_e64_gfx6_gfx7<op, opName>; 1737 } 1738} 1739 1740defm V_CNDMASK_B32 : VOP2_Real_gfx6_gfx7<0x000>; 1741defm V_MIN_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00d>; 1742defm V_MAX_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00e>; 1743defm V_LSHR_B32 : VOP2_Real_gfx6_gfx7<0x015>; 1744defm V_ASHR_I32 : VOP2_Real_gfx6_gfx7<0x017>; 1745defm V_LSHL_B32 : VOP2_Real_gfx6_gfx7<0x019>; 1746defm V_BFM_B32 : VOP2_Real_gfx6_gfx7<0x01e>; 1747defm V_BCNT_U32_B32 : VOP2_Real_gfx6_gfx7<0x022>; 1748defm V_MBCNT_LO_U32_B32 : VOP2_Real_gfx6_gfx7<0x023>; 1749defm V_MBCNT_HI_U32_B32 : VOP2_Real_gfx6_gfx7<0x024>; 1750defm V_LDEXP_F32 : VOP2_Real_gfx6_gfx7<0x02b>; 1751defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_gfx6_gfx7<0x02c>; 1752defm V_CVT_PKNORM_I16_F32 : VOP2_Real_gfx6_gfx7<0x02d>; 1753defm V_CVT_PKNORM_U16_F32 : VOP2_Real_gfx6_gfx7<0x02e>; 1754defm V_CVT_PK_U16_U32 : VOP2_Real_gfx6_gfx7<0x030>; 1755defm V_CVT_PK_I16_I32 : VOP2_Real_gfx6_gfx7<0x031>; 1756 1757// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in 1758// VI, but the VI instructions behave the same as the SI versions. 1759defm V_ADD_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x025, "V_ADD_CO_U32", "v_add_i32">; 1760defm V_SUB_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x026, "V_SUB_CO_U32", "v_sub_i32">; 1761defm V_SUBREV_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x027, "V_SUBREV_CO_U32", "v_subrev_i32">; 1762defm V_ADDC_U32 : VOP2be_Real_gfx6_gfx7<0x028>; 1763defm V_SUBB_U32 : VOP2be_Real_gfx6_gfx7<0x029>; 1764defm V_SUBBREV_U32 : VOP2be_Real_gfx6_gfx7<0x02a>; 1765 1766defm V_READLANE_B32 : VOP2_Lane_Real_gfx6_gfx7<0x001>; 1767 1768let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in { 1769 defm V_WRITELANE_B32 : VOP2_Lane_Real_gfx6_gfx7<0x002>; 1770} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) 1771 1772let SubtargetPredicate = isGFX6GFX7 in { 1773 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx6_gfx7>; 1774 defm : VOP2eInstAliases<V_ADD_CO_U32_e32, V_ADD_I32_e32_gfx6_gfx7>; 1775 defm : VOP2eInstAliases<V_SUB_CO_U32_e32, V_SUB_I32_e32_gfx6_gfx7>; 1776 defm : VOP2eInstAliases<V_SUBREV_CO_U32_e32, V_SUBREV_I32_e32_gfx6_gfx7>; 1777 1778 def : VOP2e64InstAlias<V_ADD_CO_U32_e64, V_ADD_I32_e64_gfx6_gfx7>; 1779 def : VOP2e64InstAlias<V_SUB_CO_U32_e64, V_SUB_I32_e64_gfx6_gfx7>; 1780 def : VOP2e64InstAlias<V_SUBREV_CO_U32_e64, V_SUBREV_I32_e64_gfx6_gfx7>; 1781} // End SubtargetPredicate = isGFX6GFX7 1782 1783defm V_ADD_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x003>; 1784defm V_SUB_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x004>; 1785defm V_SUBREV_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x005>; 1786defm V_MAC_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x006>; 1787defm V_MUL_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x007>; 1788defm V_MUL_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x008>; 1789defm V_MUL_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x009>; 1790defm V_MUL_HI_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00a>; 1791defm V_MUL_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00b>; 1792defm V_MUL_HI_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00c>; 1793defm V_MIN_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00f>; 1794defm V_MAX_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x010>; 1795defm V_MIN_I32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x011>; 1796defm V_MAX_I32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x012>; 1797defm V_MIN_U32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x013>; 1798defm V_MAX_U32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x014>; 1799defm V_LSHRREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x016>; 1800defm V_ASHRREV_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x018>; 1801defm V_LSHLREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01a>; 1802defm V_AND_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x01b>; 1803defm V_OR_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x01c>; 1804defm V_XOR_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x01d>; 1805defm V_MAC_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x01f>; 1806defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x02f>; 1807defm V_MADMK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x020>; 1808defm V_MADAK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x021>; 1809 1810//===----------------------------------------------------------------------===// 1811// GFX8, GFX9 (VI). 1812//===----------------------------------------------------------------------===// 1813 1814let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 1815 1816multiclass VOP2_Real_MADK_vi <bits<6> op> { 1817 def _vi : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>, 1818 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1819} 1820 1821multiclass VOP2_Real_MADK_gfx940 <bits<6> op> { 1822 def _gfx940 : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX940>, 1823 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl> { 1824 let DecoderNamespace = "GFX9"; 1825 } 1826} 1827 1828multiclass VOP2_Real_e32_vi <bits<6> op> { 1829 def _e32_vi : 1830 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>, 1831 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 1832} 1833 1834multiclass VOP2_Real_e64_vi <bits<10> op> { 1835 def _e64_vi : 1836 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 1837 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1838} 1839 1840multiclass VOP2_Real_e64only_vi <bits<10> op> { 1841 def _e64_vi : 1842 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 1843 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 1844 let IsSingle = 1; 1845 } 1846} 1847 1848multiclass Base_VOP2_Real_e32e64_vi <bits<6> op> : 1849 VOP2_Real_e32_vi<op>, 1850 VOP2_Real_e64_vi<{0, 1, 0, 0, op{5-0}}>; 1851 1852} // End AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" 1853 1854multiclass VOP2_SDWA_Real <bits<6> op> { 1855 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA>.ret in 1856 def _sdwa_vi : 1857 VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 1858 VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1859} 1860 1861multiclass VOP2_SDWA9_Real <bits<6> op> { 1862 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 1863 def _sdwa_gfx9 : 1864 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 1865 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1866} 1867 1868let AssemblerPredicate = isGFX8Only in { 1869 1870multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName> { 1871 def _e32_vi : 1872 VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.VI>, 1873 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> { 1874 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32"); 1875 let AsmString = AsmName # ps.AsmOperands; 1876 let DecoderNamespace = "GFX8"; 1877 } 1878 def _e64_vi : 1879 VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.VI>, 1880 VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> { 1881 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64"); 1882 let AsmString = AsmName # ps.AsmOperands; 1883 let DecoderNamespace = "GFX8"; 1884 } 1885 foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA>.ret in 1886 def _sdwa_vi : 1887 VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>, 1888 VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> { 1889 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa"); 1890 let AsmString = AsmName # ps.AsmOperands; 1891 } 1892 foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP>.ret in 1893 def _dpp_vi : 1894 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.VI>, 1895 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> { 1896 VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp"); 1897 let AsmString = AsmName # ps.AsmOperands; 1898 } 1899} 1900} 1901 1902let AssemblerPredicate = isGFX9Only in { 1903 1904multiclass VOP2be_Real_e32e64_gfx9 <bits<6> op, string OpName, string AsmName> { 1905 def _e32_gfx9 : 1906 VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.GFX9>, 1907 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> { 1908 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32"); 1909 let AsmString = AsmName # ps.AsmOperands; 1910 let DecoderNamespace = "GFX9"; 1911 } 1912 def _e64_gfx9 : 1913 VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.GFX9>, 1914 VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> { 1915 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64"); 1916 let AsmString = AsmName # ps.AsmOperands; 1917 let DecoderNamespace = "GFX9"; 1918 } 1919 foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA9>.ret in 1920 def _sdwa_gfx9 : 1921 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>, 1922 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> { 1923 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa"); 1924 let AsmString = AsmName # ps.AsmOperands; 1925 } 1926 foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP>.ret in 1927 def _dpp_gfx9 : 1928 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.GFX9>, 1929 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> { 1930 VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp"); 1931 let AsmString = AsmName # ps.AsmOperands; 1932 let DecoderNamespace = "SDWA9"; 1933 } 1934} 1935 1936multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> { 1937 def _e32_gfx9 : 1938 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX9>, 1939 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>{ 1940 let DecoderNamespace = "GFX9"; 1941 } 1942 def _e64_gfx9 : 1943 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>, 1944 VOP3e_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 1945 let DecoderNamespace = "GFX9"; 1946 } 1947 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 1948 def _sdwa_gfx9 : 1949 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 1950 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 1951 } 1952 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 1953 def _dpp_gfx9 : 1954 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, 1955 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> { 1956 let DecoderNamespace = "SDWA9"; 1957 } 1958} 1959 1960} // AssemblerPredicate = isGFX9Only 1961 1962multiclass VOP2_Real_e32e64_vi <bits<6> op> : 1963 Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA_Real<op>, VOP2_SDWA9_Real<op> { 1964 1965 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 1966 def _dpp_vi : 1967 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>, 1968 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>; 1969} 1970 1971defm V_CNDMASK_B32 : VOP2_Real_e32e64_vi <0x0>; 1972defm V_ADD_F32 : VOP2_Real_e32e64_vi <0x1>; 1973defm V_SUB_F32 : VOP2_Real_e32e64_vi <0x2>; 1974defm V_SUBREV_F32 : VOP2_Real_e32e64_vi <0x3>; 1975let AssemblerPredicate = isGCN3ExcludingGFX90A in 1976defm V_MUL_LEGACY_F32 : VOP2_Real_e32e64_vi <0x4>; 1977defm V_MUL_F32 : VOP2_Real_e32e64_vi <0x5>; 1978defm V_MUL_I32_I24 : VOP2_Real_e32e64_vi <0x6>; 1979defm V_MUL_HI_I32_I24 : VOP2_Real_e32e64_vi <0x7>; 1980defm V_MUL_U32_U24 : VOP2_Real_e32e64_vi <0x8>; 1981defm V_MUL_HI_U32_U24 : VOP2_Real_e32e64_vi <0x9>; 1982defm V_MIN_F32 : VOP2_Real_e32e64_vi <0xa>; 1983defm V_MAX_F32 : VOP2_Real_e32e64_vi <0xb>; 1984defm V_MIN_I32 : VOP2_Real_e32e64_vi <0xc>; 1985defm V_MAX_I32 : VOP2_Real_e32e64_vi <0xd>; 1986defm V_MIN_U32 : VOP2_Real_e32e64_vi <0xe>; 1987defm V_MAX_U32 : VOP2_Real_e32e64_vi <0xf>; 1988defm V_LSHRREV_B32 : VOP2_Real_e32e64_vi <0x10>; 1989defm V_ASHRREV_I32 : VOP2_Real_e32e64_vi <0x11>; 1990defm V_LSHLREV_B32 : VOP2_Real_e32e64_vi <0x12>; 1991defm V_AND_B32 : VOP2_Real_e32e64_vi <0x13>; 1992defm V_OR_B32 : VOP2_Real_e32e64_vi <0x14>; 1993defm V_XOR_B32 : VOP2_Real_e32e64_vi <0x15>; 1994defm V_MAC_F32 : VOP2_Real_e32e64_vi <0x16>; 1995defm V_MADMK_F32 : VOP2_Real_MADK_vi <0x17>; 1996defm V_MADAK_F32 : VOP2_Real_MADK_vi <0x18>; 1997 1998defm V_ADD_U32 : VOP2be_Real_e32e64_vi_only <0x19, "V_ADD_CO_U32", "v_add_u32">; 1999defm V_SUB_U32 : VOP2be_Real_e32e64_vi_only <0x1a, "V_SUB_CO_U32", "v_sub_u32">; 2000defm V_SUBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1b, "V_SUBREV_CO_U32", "v_subrev_u32">; 2001defm V_ADDC_U32 : VOP2be_Real_e32e64_vi_only <0x1c, "V_ADDC_U32", "v_addc_u32">; 2002defm V_SUBB_U32 : VOP2be_Real_e32e64_vi_only <0x1d, "V_SUBB_U32", "v_subb_u32">; 2003defm V_SUBBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1e, "V_SUBBREV_U32", "v_subbrev_u32">; 2004 2005defm V_ADD_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x19, "V_ADD_CO_U32", "v_add_co_u32">; 2006defm V_SUB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1a, "V_SUB_CO_U32", "v_sub_co_u32">; 2007defm V_SUBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1b, "V_SUBREV_CO_U32", "v_subrev_co_u32">; 2008defm V_ADDC_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1c, "V_ADDC_U32", "v_addc_co_u32">; 2009defm V_SUBB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1d, "V_SUBB_U32", "v_subb_co_u32">; 2010defm V_SUBBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1e, "V_SUBBREV_U32", "v_subbrev_co_u32">; 2011 2012defm V_ADD_U32 : VOP2_Real_e32e64_gfx9 <0x34>; 2013defm V_SUB_U32 : VOP2_Real_e32e64_gfx9 <0x35>; 2014defm V_SUBREV_U32 : VOP2_Real_e32e64_gfx9 <0x36>; 2015 2016defm V_BFM_B32 : VOP2_Real_e64only_vi <0x293>; 2017defm V_BCNT_U32_B32 : VOP2_Real_e64only_vi <0x28b>; 2018defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64only_vi <0x28c>; 2019defm V_MBCNT_HI_U32_B32 : VOP2_Real_e64only_vi <0x28d>; 2020defm V_LDEXP_F32 : VOP2_Real_e64only_vi <0x288>; 2021defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64only_vi <0x1f0>; 2022defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64only_vi <0x294>; 2023defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64only_vi <0x295>; 2024defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e64only_vi <0x296>; 2025defm V_CVT_PK_U16_U32 : VOP2_Real_e64only_vi <0x297>; 2026defm V_CVT_PK_I16_I32 : VOP2_Real_e64only_vi <0x298>; 2027 2028defm V_ADD_F16 : VOP2_Real_e32e64_vi <0x1f>; 2029defm V_SUB_F16 : VOP2_Real_e32e64_vi <0x20>; 2030defm V_SUBREV_F16 : VOP2_Real_e32e64_vi <0x21>; 2031defm V_MUL_F16 : VOP2_Real_e32e64_vi <0x22>; 2032defm V_MAC_F16 : VOP2_Real_e32e64_vi <0x23>; 2033defm V_MADMK_F16 : VOP2_Real_MADK_vi <0x24>; 2034defm V_MADAK_F16 : VOP2_Real_MADK_vi <0x25>; 2035defm V_ADD_U16 : VOP2_Real_e32e64_vi <0x26>; 2036defm V_SUB_U16 : VOP2_Real_e32e64_vi <0x27>; 2037defm V_SUBREV_U16 : VOP2_Real_e32e64_vi <0x28>; 2038defm V_MUL_LO_U16 : VOP2_Real_e32e64_vi <0x29>; 2039defm V_LSHLREV_B16 : VOP2_Real_e32e64_vi <0x2a>; 2040defm V_LSHRREV_B16 : VOP2_Real_e32e64_vi <0x2b>; 2041defm V_ASHRREV_I16 : VOP2_Real_e32e64_vi <0x2c>; 2042defm V_MAX_F16 : VOP2_Real_e32e64_vi <0x2d>; 2043defm V_MIN_F16 : VOP2_Real_e32e64_vi <0x2e>; 2044defm V_MAX_U16 : VOP2_Real_e32e64_vi <0x2f>; 2045defm V_MAX_I16 : VOP2_Real_e32e64_vi <0x30>; 2046defm V_MIN_U16 : VOP2_Real_e32e64_vi <0x31>; 2047defm V_MIN_I16 : VOP2_Real_e32e64_vi <0x32>; 2048defm V_LDEXP_F16 : VOP2_Real_e32e64_vi <0x33>; 2049 2050let SubtargetPredicate = isGFX8GFX9 in { 2051 2052// Aliases to simplify matching of floating-point instructions that 2053// are VOP2 on SI and VOP3 on VI. 2054class SI2_VI3Alias <string name, VOP3_Real inst> : InstAlias < 2055 name#" $dst, $src0, $src1", 2056 !if(inst.Pfl.HasOMod, 2057 (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0), 2058 (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0)) 2059>, PredicateControl { 2060 let UseInstAsmMatchConverter = 0; 2061 let AsmVariantName = AMDGPUAsmVariants.VOP3; 2062} 2063 2064def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>; 2065def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>; 2066def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>; 2067def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>; 2068def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>; 2069 2070defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_vi>; 2071 2072} // End SubtargetPredicate = isGFX8GFX9 2073 2074let SubtargetPredicate = isGFX9Only in { 2075 2076defm : VOP2bInstAliases<V_ADD_U32_e32, V_ADD_CO_U32_e32_gfx9, "v_add_co_u32">; 2077defm : VOP2bInstAliases<V_ADDC_U32_e32, V_ADDC_CO_U32_e32_gfx9, "v_addc_co_u32">; 2078defm : VOP2bInstAliases<V_SUB_U32_e32, V_SUB_CO_U32_e32_gfx9, "v_sub_co_u32">; 2079defm : VOP2bInstAliases<V_SUBB_U32_e32, V_SUBB_CO_U32_e32_gfx9, "v_subb_co_u32">; 2080defm : VOP2bInstAliases<V_SUBREV_U32_e32, V_SUBREV_CO_U32_e32_gfx9, "v_subrev_co_u32">; 2081defm : VOP2bInstAliases<V_SUBBREV_U32_e32, V_SUBBREV_CO_U32_e32_gfx9, "v_subbrev_co_u32">; 2082 2083} // End SubtargetPredicate = isGFX9Only 2084 2085let SubtargetPredicate = HasDLInsts in { 2086 2087defm V_FMAC_F32 : VOP2_Real_e32e64_vi <0x3b>; 2088defm V_XNOR_B32 : VOP2_Real_e32e64_vi <0x3d>; 2089 2090} // End SubtargetPredicate = HasDLInsts 2091 2092let AssemblerPredicate = isGFX90APlus, DecoderNamespace = "GFX90A" in { 2093 multiclass VOP2_Real_e32_gfx90a <bits<6> op> { 2094 def _e32_gfx90a : 2095 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX90A>, 2096 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 2097 } 2098 2099 multiclass VOP2_Real_e64_gfx90a <bits<10> op> { 2100 def _e64_gfx90a : 2101 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX90A>, 2102 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 2103 } 2104 2105 multiclass Base_VOP2_Real_e32e64_gfx90a <bits<6> op> : 2106 VOP2_Real_e32_gfx90a<op>, 2107 VOP2_Real_e64_gfx90a<{0, 1, 0, 0, op{5-0}}>; 2108 2109 multiclass VOP2_Real_e32e64_gfx90a <bits<6> op> : 2110 Base_VOP2_Real_e32e64_gfx90a<op> { 2111 2112 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 2113 def _dpp_gfx90a : 2114 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX90A>, 2115 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> { 2116 let DecoderNamespace = "SDWA9"; 2117 } 2118 } 2119} // End AssemblerPredicate = isGFX90APlus, DecoderNamespace = "GFX90A" 2120 2121let SubtargetPredicate = isGFX90APlus in { 2122 defm V_FMAC_F64 : VOP2_Real_e32e64_gfx90a <0x4>; 2123 let IsSingle = 1 in { 2124 defm V_MUL_LEGACY_F32 : VOP2_Real_e64_gfx90a <0x2a1>; 2125 } 2126} // End SubtargetPredicate = isGFX90APlus 2127 2128let SubtargetPredicate = HasFmaakFmamkF32Insts in { 2129defm V_FMAMK_F32 : VOP2_Real_MADK_gfx940 <0x17>; 2130defm V_FMAAK_F32 : VOP2_Real_MADK_gfx940 <0x18>; 2131} 2132 2133multiclass VOP2_Real_DOT_ACC_gfx9<bits<6> op> : VOP2_Real_e32_vi<op> { 2134 def _dpp_vi : VOP2_DPP<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>; 2135} 2136 2137multiclass VOP2_Real_DOT_ACC_gfx10<bits<6> op> : 2138 VOP2_Real_e32_gfx10<op>, 2139 VOP2_Real_dpp_gfx10<op>, 2140 VOP2_Real_dpp8_gfx10<op>; 2141 2142let SubtargetPredicate = HasDot5Insts in { 2143 defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx9<0x37>; 2144 // NB: Opcode conflicts with V_DOT8C_I32_I4 2145 // This opcode exists in gfx 10.1* only 2146 defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx10<0x02>; 2147} 2148 2149let SubtargetPredicate = HasDot6Insts in { 2150 defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx9<0x39>; 2151 defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx10<0x0d>; 2152} 2153 2154let SubtargetPredicate = HasDot4Insts in { 2155 defm V_DOT2C_I32_I16 : VOP2_Real_DOT_ACC_gfx9<0x38>; 2156} 2157let SubtargetPredicate = HasDot3Insts in { 2158 defm V_DOT8C_I32_I4 : VOP2_Real_DOT_ACC_gfx9<0x3a>; 2159} 2160 2161let SubtargetPredicate = HasPkFmacF16Inst in { 2162defm V_PK_FMAC_F16 : VOP2_Real_e32_vi<0x3c>; 2163} // End SubtargetPredicate = HasPkFmacF16Inst 2164 2165let SubtargetPredicate = HasDot3Insts in { 2166 // NB: Opcode conflicts with V_DOT2C_F32_F16 2167 let DecoderNamespace = "GFX10_B" in 2168 defm V_DOT8C_I32_I4 : VOP2_Real_DOT_ACC_gfx10<0x02>; 2169} 2170