1//===-- VOP2Instructions.td - Vector Instruction Definitions --------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10// VOP2 Classes 11//===----------------------------------------------------------------------===// 12 13class VOP2e <bits<6> op, VOPProfile P> : Enc32 { 14 bits<8> vdst; 15 bits<9> src0; 16 bits<8> src1; 17 18 let Inst{8-0} = !if(P.HasSrc0, src0, 0); 19 let Inst{16-9} = !if(P.HasSrc1, src1, 0); 20 let Inst{24-17} = !if(P.EmitDst, vdst, 0); 21 let Inst{30-25} = op; 22 let Inst{31} = 0x0; //encoding 23} 24 25class VOP2_MADKe <bits<6> op, VOPProfile P> : Enc64 { 26 bits<8> vdst; 27 bits<9> src0; 28 bits<8> src1; 29 bits<32> imm; 30 31 let Inst{8-0} = !if(P.HasSrc0, src0, 0); 32 let Inst{16-9} = !if(P.HasSrc1, src1, 0); 33 let Inst{24-17} = !if(P.EmitDst, vdst, 0); 34 let Inst{30-25} = op; 35 let Inst{31} = 0x0; // encoding 36 let Inst{63-32} = imm; 37} 38 39class VOP2_SDWAe <bits<6> op, VOPProfile P> : VOP_SDWAe <P> { 40 bits<8> vdst; 41 bits<8> src1; 42 43 let Inst{8-0} = 0xf9; // sdwa 44 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 45 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 46 let Inst{30-25} = op; 47 let Inst{31} = 0x0; // encoding 48} 49 50class VOP2_SDWA9Ae <bits<6> op, VOPProfile P> : VOP_SDWA9Ae <P> { 51 bits<8> vdst; 52 bits<9> src1; 53 54 let Inst{8-0} = 0xf9; // sdwa 55 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 56 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 57 let Inst{30-25} = op; 58 let Inst{31} = 0x0; // encoding 59 let Inst{63} = !if(P.HasSrc1, src1{8}, 0); // src1_sgpr 60} 61 62class VOP2_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], string suffix = "_e32"> : 63 VOP_Pseudo <opName, suffix, P, P.Outs32, P.Ins32, "", pattern> { 64 65 let AsmOperands = P.Asm32; 66 67 let Size = 4; 68 let mayLoad = 0; 69 let mayStore = 0; 70 let hasSideEffects = 0; 71 72 let ReadsModeReg = !or(P.DstVT.isFP, P.Src0VT.isFP); 73 74 let mayRaiseFPException = ReadsModeReg; 75 76 let VOP2 = 1; 77 let VALU = 1; 78 let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]); 79 80 let AsmVariantName = AMDGPUAsmVariants.Default; 81} 82 83class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemonic> : 84 VOP_Real <ps>, 85 InstSI <ps.OutOperandList, ps.InOperandList, real_name # ps.AsmOperands, []>, 86 SIMCInstr <ps.PseudoInstr, EncodingFamily> { 87 88 let VALU = 1; 89 let VOP2 = 1; 90 let isPseudo = 0; 91 let isCodeGenOnly = 0; 92 93 let Constraints = ps.Constraints; 94 let DisableEncoding = ps.DisableEncoding; 95 96 // copy relevant pseudo op flags 97 let SubtargetPredicate = ps.SubtargetPredicate; 98 let OtherPredicates = ps.OtherPredicates; 99 let AsmMatchConverter = ps.AsmMatchConverter; 100 let AsmVariantName = ps.AsmVariantName; 101 let Constraints = ps.Constraints; 102 let DisableEncoding = ps.DisableEncoding; 103 let TSFlags = ps.TSFlags; 104 let UseNamedOperandTable = ps.UseNamedOperandTable; 105 let Uses = ps.Uses; 106 let Defs = ps.Defs; 107 let SchedRW = ps.SchedRW; 108 let mayLoad = ps.mayLoad; 109 let mayStore = ps.mayStore; 110} 111 112class VOP2_Real_Gen <VOP2_Pseudo ps, GFXGen Gen, string real_name = ps.Mnemonic> : 113 VOP2_Real <ps, Gen.Subtarget, real_name> { 114 let AssemblerPredicate = Gen.AssemblerPredicate; 115 let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []); 116 let DecoderNamespace = Gen.DecoderNamespace# 117 !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); 118} 119 120class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 121 VOP_SDWA_Pseudo <OpName, P, pattern> { 122 let AsmMatchConverter = "cvtSdwaVOP2"; 123} 124 125class VOP2_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 126 VOP_DPP_Pseudo <OpName, P, pattern> { 127} 128 129 130class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { 131 list<dag> ret = !if(P.HasModifiers, 132 [(set P.DstVT:$vdst, 133 (node (P.Src0VT 134 !if(P.HasOMod, 135 (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), 136 (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))), 137 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], 138 [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]); 139} 140 141multiclass VOP2Inst_e32<string opName, 142 VOPProfile P, 143 SDPatternOperator node = null_frag, 144 string revOp = opName, 145 bit GFX9Renamed = 0> { 146 let renamedInGFX9 = GFX9Renamed in { 147 def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>, 148 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>; 149 } // End renamedInGFX9 = GFX9Renamed 150} 151multiclass 152 VOP2Inst_e32_VOPD<string opName, VOPProfile P, bits<5> VOPDOp, 153 string VOPDName, SDPatternOperator node = null_frag, 154 string revOp = opName, bit GFX9Renamed = 0> { 155 defm NAME : VOP2Inst_e32<opName, P, node, revOp, GFX9Renamed>, 156 VOPD_Component<VOPDOp, VOPDName>; 157} 158multiclass VOP2Inst_e64<string opName, 159 VOPProfile P, 160 SDPatternOperator node = null_frag, 161 string revOp = opName, 162 bit GFX9Renamed = 0> { 163 let renamedInGFX9 = GFX9Renamed in { 164 def _e64 : VOP3InstBase <opName, P, node, 1>, 165 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; 166 167 let SubtargetPredicate = isGFX11Plus in { 168 if P.HasExtVOP3DPP then 169 def _e64_dpp : VOP3_DPP_Pseudo <opName, P>; 170 } // End SubtargetPredicate = isGFX11Plus 171 } // End renamedInGFX9 = GFX9Renamed 172} 173 174multiclass VOP2Inst_sdwa<string opName, 175 VOPProfile P, 176 bit GFX9Renamed = 0> { 177 let renamedInGFX9 = GFX9Renamed in { 178 if P.HasExtSDWA then 179 def _sdwa : VOP2_SDWA_Pseudo <opName, P>; 180 } // End renamedInGFX9 = GFX9Renamed 181} 182 183multiclass VOP2Inst<string opName, 184 VOPProfile P, 185 SDPatternOperator node = null_frag, 186 string revOp = opName, 187 bit GFX9Renamed = 0> : 188 VOP2Inst_e32<opName, P, node, revOp, GFX9Renamed>, 189 VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>, 190 VOP2Inst_sdwa<opName, P, GFX9Renamed> { 191 let renamedInGFX9 = GFX9Renamed in { 192 if P.HasExtDPP then 193 def _dpp : VOP2_DPP_Pseudo <opName, P>; 194 } 195} 196 197multiclass VOP2Inst_t16<string opName, 198 VOPProfile P, 199 SDPatternOperator node = null_frag, 200 string revOp = opName, 201 bit GFX9Renamed = 0> { 202 let SubtargetPredicate = NotHasTrue16BitInsts, OtherPredicates = [Has16BitInsts] in { 203 defm NAME : VOP2Inst<opName, P, node, revOp, GFX9Renamed>; 204 } 205 let SubtargetPredicate = UseRealTrue16Insts in { 206 defm _t16 : VOP2Inst<opName#"_t16", VOPProfile_True16<P>, node, revOp#"_t16", GFX9Renamed>; 207 } 208 let SubtargetPredicate = UseFakeTrue16Insts in { 209 defm _fake16 : VOP2Inst<opName#"_fake16", VOPProfile_Fake16<P>, node, revOp#"_fake16", GFX9Renamed>; 210 } 211} 212 213// Creating a _t16_e32 pseudo when there is no corresponding real instruction on 214// any subtarget is a problem. It makes getMCOpcodeGen return -1, which we 215// assume means the instruction is already a real. The fix is to not create that 216// _t16_e32 pseudo 217multiclass VOP2Inst_e64_t16<string opName, 218 VOPProfile P, 219 SDPatternOperator node = null_frag, 220 string revOp = opName, 221 bit GFX9Renamed = 0> { 222 let SubtargetPredicate = NotHasTrue16BitInsts, OtherPredicates = [Has16BitInsts] in { 223 defm NAME : VOP2Inst<opName, P, node, revOp, GFX9Renamed>; 224 } 225 let SubtargetPredicate = HasTrue16BitInsts in { 226 defm _t16 : VOP2Inst_e64<opName#"_t16", VOPProfile_Fake16<P>, node, revOp#"_t16", GFX9Renamed>; 227 } 228} 229 230multiclass VOP2Inst_VOPD<string opName, 231 VOPProfile P, 232 bits<5> VOPDOp, 233 string VOPDName, 234 SDPatternOperator node = null_frag, 235 string revOp = opName, 236 bit GFX9Renamed = 0> : 237 VOP2Inst_e32_VOPD<opName, P, VOPDOp, VOPDName, node, revOp, GFX9Renamed>, 238 VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>, 239 VOP2Inst_sdwa<opName, P, GFX9Renamed> { 240 let renamedInGFX9 = GFX9Renamed in { 241 if P.HasExtDPP then 242 def _dpp : VOP2_DPP_Pseudo <opName, P>; 243 } 244} 245 246multiclass VOP2bInst <string opName, 247 VOPProfile P, 248 SDPatternOperator node = null_frag, 249 string revOp = opName, 250 bit GFX9Renamed = 0, 251 bit useSGPRInput = !eq(P.NumSrcArgs, 3)> { 252 let renamedInGFX9 = GFX9Renamed in { 253 let SchedRW = [Write32Bit, WriteSALU] in { 254 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in { 255 def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>, 256 Commutable_REV<revOp#"_e32", !eq(revOp, opName)> { 257 let usesCustomInserter = true; 258 } 259 260 if P.HasExtSDWA then 261 def _sdwa : VOP2_SDWA_Pseudo <opName, P> { 262 let AsmMatchConverter = "cvtSdwaVOP2b"; 263 } 264 if P.HasExtDPP then 265 def _dpp : VOP2_DPP_Pseudo <opName, P>; 266 } // End Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] 267 268 def _e64 : VOP3InstBase <opName, P, node, 1>, 269 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; 270 271 let SubtargetPredicate = isGFX11Plus in { 272 if P.HasExtVOP3DPP then 273 def _e64_dpp : VOP3_DPP_Pseudo <opName, P>; 274 } // End SubtargetPredicate = isGFX11Plus 275 } 276 } 277} 278 279class VOP2bInstAlias <VOP2_Pseudo ps, Instruction inst, 280 string OpName, string opnd> : 281 InstAlias <OpName#" "#!subst("vcc", opnd, ps.Pfl.Asm32), 282 (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0, 283 ps.Pfl.Src1RC32:$src1), 284 1, inst.AsmVariantName>, 285 PredicateControl { 286} 287 288multiclass VOP2bInstAliases<VOP2_Pseudo ps, VOP2_Real inst, string OpName> { 289 let WaveSizePredicate = isWave32 in { 290 def : VOP2bInstAlias<ps, inst, OpName, "vcc_lo">; 291 } 292 let WaveSizePredicate = isWave64 in { 293 def : VOP2bInstAlias<ps, inst, OpName, "vcc">; 294 } 295} 296 297multiclass 298 VOP2eInst_Base<string opName, VOPProfile P, bits<5> VOPDOp, string VOPDName, 299 SDPatternOperator node, string revOp, bit useSGPRInput> { 300 301 let SchedRW = [Write32Bit] in { 302 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in { 303 if !eq(VOPDOp, -1) then 304 def _e32 : VOP2_Pseudo <opName, P>, 305 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>; 306 else 307 def _e32 : VOP2_Pseudo <opName, P>, 308 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>, 309 VOPD_Component<VOPDOp, VOPDName>; 310 311 if P.HasExtSDWA then 312 def _sdwa : VOP2_SDWA_Pseudo <opName, P> { 313 let AsmMatchConverter = "cvtSdwaVOP2e"; 314 } 315 316 if P.HasExtDPP then 317 def _dpp : VOP2_DPP_Pseudo <opName, P>; 318 } 319 320 def _e64 : VOP3InstBase <opName, P, node, 1>, 321 Commutable_REV<revOp#"_e64", !eq(revOp, opName)> { 322 let isReMaterializable = 1; 323 } 324 325 let SubtargetPredicate = isGFX11Plus in { 326 if P.HasExtVOP3DPP then 327 def _e64_dpp : VOP3_DPP_Pseudo <opName, P>; 328 } // End SubtargetPredicate = isGFX11Plus 329 } 330} 331 332multiclass 333 VOP2eInst<string opName, VOPProfile P, SDPatternOperator node = null_frag, 334 string revOp = opName, bit useSGPRInput = !eq(P.NumSrcArgs, 3)> 335 : VOP2eInst_Base<opName, P, -1, "", node, revOp, useSGPRInput>; 336 337multiclass 338 VOP2eInst_VOPD<string opName, VOPProfile P, bits<5> VOPDOp, string VOPDName, 339 SDPatternOperator node = null_frag, string revOp = opName, 340 bit useSGPRInput = !eq(P.NumSrcArgs, 3)> 341 : VOP2eInst_Base<opName, P, VOPDOp, VOPDName, node, revOp, useSGPRInput>; 342 343class VOP2eInstAlias <VOP2_Pseudo ps, Instruction inst, string opnd = ""> : 344 InstAlias <ps.OpName#" "#ps.Pfl.Asm32#", "#opnd, 345 (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0, 346 ps.Pfl.Src1RC32:$src1), 347 1, inst.AsmVariantName>, 348 PredicateControl; 349 350class VOP2e64InstAlias <VOP3_Pseudo ps, Instruction inst> : 351 InstAlias <ps.OpName#" "#ps.Pfl.Asm64, 352 (inst ps.Pfl.DstRC:$vdst, VOPDstS64orS32:$sdst, 353 ps.Pfl.Src0RC32:$src0, ps.Pfl.Src1RC32:$src1, clampmod:$clamp), 354 1, inst.AsmVariantName>, 355 PredicateControl; 356 357multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> { 358 let WaveSizePredicate = isWave32 in { 359 def : VOP2eInstAlias<ps, inst, "vcc_lo">; 360 } 361 let WaveSizePredicate = isWave64 in { 362 def : VOP2eInstAlias<ps, inst, "vcc">; 363 } 364} 365 366class VOP_MADK_Base<ValueType vt> : VOPProfile <[vt, vt, vt, vt]> { 367 string AsmVOPDXDeferred = ?; 368} 369 370class VOP_MADAK <ValueType vt> : VOP_MADK_Base<vt> { 371 field Operand ImmOpType = !if(!eq(vt.Size, 32), KImmFP32, KImmFP16); 372 field dag Ins32 = !if(!eq(vt.Size, 32), 373 (ins VSrc_f32_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm), 374 (ins VSrc_f16_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm)); 375 field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, VGPR_32:$vsrc1X, ImmOpType:$imm); 376 // Note that both src0X and imm are deferred 377 let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, VGPR_32:$vsrc1X, ImmOpType:$immDeferred); 378 field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, VGPR_32:$vsrc1Y, ImmOpType:$imm); 379 380 field string Asm32 = "$vdst, $src0, $src1, $imm"; 381 field string AsmVOPDX = "$vdstX, $src0X, $vsrc1X, $imm"; 382 let AsmVOPDXDeferred = "$vdstX, $src0X, $vsrc1X, $immDeferred"; 383 field string AsmVOPDY = "$vdstY, $src0Y, $vsrc1Y, $imm"; 384 field bit HasExt = 0; 385 let IsSingle = 1; 386} 387 388def VOP_MADAK_F16 : VOP_MADAK <f16>; 389def VOP_MADAK_F16_t16 : VOP_MADAK <f16> { 390 let IsTrue16 = 1; 391 let DstRC = VOPDstOperand<VGPR_32_Lo128>; 392 let Ins32 = (ins VSrcFake16_f16_Lo128_Deferred:$src0, VGPR_32_Lo128:$src1, ImmOpType:$imm); 393} 394def VOP_MADAK_F32 : VOP_MADAK <f32>; 395 396class VOP_MADMK <ValueType vt> : VOP_MADK_Base<vt> { 397 field Operand ImmOpType = !if(!eq(vt.Size, 32), KImmFP32, KImmFP16); 398 field dag Ins32 = !if(!eq(vt.Size, 32), 399 (ins VSrc_f32_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1), 400 (ins VSrc_f16_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1)); 401 field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$imm, VGPR_32:$vsrc1X); 402 let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$immDeferred, VGPR_32:$vsrc1X); 403 field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, ImmOpType:$imm, VGPR_32:$vsrc1Y); 404 405 field string Asm32 = "$vdst, $src0, $imm, $src1"; 406 field string AsmVOPDX = "$vdstX, $src0X, $imm, $vsrc1X"; 407 let AsmVOPDXDeferred = "$vdstX, $src0X, $immDeferred, $vsrc1X"; 408 field string AsmVOPDY = "$vdstY, $src0Y, $imm, $vsrc1Y"; 409 field bit HasExt = 0; 410 let IsSingle = 1; 411} 412 413def VOP_MADMK_F16 : VOP_MADMK <f16>; 414def VOP_MADMK_F16_t16 : VOP_MADMK <f16> { 415 let IsTrue16 = 1; 416 let DstRC = VOPDstOperand<VGPR_32_Lo128>; 417 let Ins32 = (ins VSrcFake16_f16_Lo128_Deferred:$src0, ImmOpType:$imm, VGPR_32_Lo128:$src1); 418} 419def VOP_MADMK_F32 : VOP_MADMK <f32>; 420 421// FIXME: Remove src2_modifiers. It isn't used, so is wasting memory 422// and processing time but it makes it easier to convert to mad. 423class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, vt0]> { 424 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT<Src2VT>.ret:$src2); 425 let Ins64 = getIns64<Src0RC64, Src1RC64, getVregSrcForVT<Src2VT>.ret, 3, 426 0, HasModifiers, HasModifiers, HasOMod, 427 Src0Mod, Src1Mod, Src2Mod>.ret; 428 let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 429 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 430 getVregSrcForVT<Src2VT>.ret:$src2, // stub argument 431 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 432 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 433 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 434 let InsVOP3Base = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, RegisterOperand<VGPR_32>, 3, 435 0, HasModifiers, HasModifiers, HasOMod, 436 Src0ModVOP3DPP, Src1ModVOP3DPP, Src2Mod, HasOpSel>.ret; 437 // We need a dummy src2 tied to dst to track the use of that register for s_delay_alu 438 let InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X, VGPRSrc_32:$src2X); 439 let InsVOPDXDeferred = 440 (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X, 441 VGPR_32:$vsrc1X, VGPRSrc_32:$src2X); 442 let InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y, VGPRSrc_32:$src2Y); 443 let InsVOPDYDeferred = 444 (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y, 445 VGPR_32:$vsrc1Y, VGPRSrc_32:$src2Y); 446 447 let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 448 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 449 getVregSrcForVT<Src2VT>.ret:$src2, // stub argument 450 dpp8:$dpp8, FI:$fi); 451 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 452 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, 453 getVregSrcForVT<Src2VT>.ret:$src2, // stub argument 454 clampmod:$clamp, omod:$omod, 455 dst_sel:$dst_sel, dst_unused:$dst_unused, 456 src0_sel:$src0_sel, src1_sel:$src1_sel); 457 let Asm32 = getAsm32<1, 2, vt0>.ret; 458 let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt0>.ret; 459 let AsmDPP16 = getAsmDPP16<1, 2, HasModifiers, vt0>.ret; 460 let AsmDPP8 = getAsmDPP8<1, 2, 0, vt0>.ret; 461 let AsmSDWA = getAsmSDWA<1, 2, vt0>.ret; 462 let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt0>.ret; 463 let AsmVOP3Base = 464 getAsmVOP3Base<2 /*NumSrcArgs*/, HasDst, HasClamp, 465 HasOpSel, HasOMod, IsVOP3P, HasModifiers, 466 HasModifiers, HasModifiers, 467 0 /*Src2HasMods*/, DstVT>.ret; 468 let HasSrc2 = 0; 469 let HasSrc2Mods = 0; 470 471 let HasExt = 1; 472 let HasExtDPP = 1; 473 let HasExt32BitDPP = 1; 474 let HasExtSDWA = 1; 475 let HasExtSDWA9 = 0; 476 let TieRegDPP = "$src2"; 477} 478 479def VOP_MAC_F16 : VOP_MAC <f16>; 480def VOP_MAC_F16_t16 : VOP_MAC <f16> { 481 let IsTrue16 = 1; 482 let HasOpSel = 1; 483 let AsmVOP3OpSel = getAsmVOP3OpSel<2/*NumSrcArgs*/, HasClamp, HasOMod, 484 HasSrc0FloatMods, HasSrc1FloatMods, HasSrc2FloatMods>.ret; 485 let DstRC = VOPDstOperand<VGPR_32_Lo128>; 486 let DstRC64 = VOPDstOperand<VGPR_32>; 487 let Src1RC32 = VGPRSrc_32_Lo128; 488 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret:$src2); 489 let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 490 let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 491 let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 492 let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret; 493 let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret; 494 let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret; 495 let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 496 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 497 getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret:$src2, // stub argument 498 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 499 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 500 let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 501 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 502 getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret:$src2, // stub argument 503 dpp8:$dpp8, FI:$fi); 504 let Src2Mod = FP32InputMods; // dummy unused modifiers 505 let Src2RC64 = VGPRSrc_32; // stub argument 506} 507def VOP_MAC_F32 : VOP_MAC <f32>; 508let HasExtDPP = 0, HasExt32BitDPP = 0 in 509def VOP_MAC_LEGACY_F32 : VOP_MAC <f32>; 510let HasExtSDWA = 0, HasExt32BitDPP = 0, HasExt64BitDPP = 1 in 511def VOP_MAC_F64 : VOP_MAC <f64>; 512 513class VOP_DOT_ACC<ValueType vt0, ValueType vt1> : VOP_MAC<vt0, vt1> { 514 let HasClamp = 0; 515 let HasExtSDWA = 0; 516 let HasOpSel = 0; 517 let IsPacked = 0; 518} 519 520def VOP_DOT_ACC_F32_V2F16 : VOP_DOT_ACC<f32, v2f16> { 521 let Src0ModDPP = FPVRegInputMods; 522 let Src1ModDPP = FPVRegInputMods; 523 let HasClamp = 1; 524} 525 526def VOP_DOT_ACC_I32_I32 : VOP_DOT_ACC<i32, i32> { 527 let HasExtVOP3DPP = 0; 528 let HasSrc0Mods = 1; 529 let HasSrc1Mods = 1; 530 let HasClamp = 1; 531 532 let Src0Mod = Int32InputMods; 533 let Src1Mod = Int32InputMods; 534 let Ins64 = getIns64<Src0RC64, Src1RC64, getVregSrcForVT<Src2VT>.ret, 535 3 /*NumSrcArgs*/, HasClamp, 1 /*HasModifiers*/, 536 1 /*HasSrc2Mods*/, HasOMod, 537 Src0Mod, Src1Mod, Src2Mod>.ret; 538 let Asm64 = "$vdst, $src0, $src1$clamp"; 539} 540 541// Write out to vcc or arbitrary SGPR. 542def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], /*EnableClamp=*/1> { 543 let Asm32 = "$vdst, vcc, $src0, $src1"; 544 let AsmVOP3Base = "$vdst, $sdst, $src0, $src1$clamp"; 545 let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 546 let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 547 let AsmDPP = "$vdst, vcc, $src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 548 let AsmDPP8 = "$vdst, vcc, $src0, $src1 $dpp8$fi"; 549 let AsmDPP16 = AsmDPP#"$fi"; 550 let InsDPP = (ins DstRCDPP:$old, 551 Src0DPP:$src0, 552 Src1DPP:$src1, 553 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 554 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 555 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 556 let InsDPP8 = (ins DstRCDPP:$old, 557 Src0DPP:$src0, 558 Src1DPP:$src1, 559 dpp8:$dpp8, FI:$fi); 560 let Outs32 = (outs DstRC:$vdst); 561 let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); 562 let OutsVOP3DPP = Outs64; 563 let OutsVOP3DPP8 = Outs64; 564} 565 566// Write out to vcc or arbitrary SGPR and read in from vcc or 567// arbitrary SGPR. 568def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableClamp=*/1> { 569 let HasSrc2Mods = 0; 570 let Asm32 = "$vdst, vcc, $src0, $src1, vcc"; 571 let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 572 let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 573 let AsmDPP = "$vdst, vcc, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 574 let AsmDPP8 = "$vdst, vcc, $src0, $src1, vcc $dpp8$fi"; 575 let AsmDPP16 = AsmDPP#"$fi"; 576 let Outs32 = (outs DstRC:$vdst); 577 let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); 578 let AsmVOP3Base = "$vdst, $sdst, $src0, $src1, $src2$clamp"; 579 let OutsVOP3DPP = Outs64; 580 let OutsVOP3DPP8 = Outs64; 581 582 // Suppress src2 implied by type since the 32-bit encoding uses an 583 // implicit VCC use. 584 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); 585 586 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 587 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, 588 clampmod:$clamp, 589 dst_sel:$dst_sel, dst_unused:$dst_unused, 590 src0_sel:$src0_sel, src1_sel:$src1_sel); 591 592 let InsDPP = (ins DstRCDPP:$old, 593 Src0DPP:$src0, 594 Src1DPP:$src1, 595 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 596 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 597 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 598 let InsDPP8 = (ins DstRCDPP:$old, 599 Src0DPP:$src0, 600 Src1DPP:$src1, 601 dpp8:$dpp8, FI:$fi); 602 603 let HasExt = 1; 604 let HasExtDPP = 1; 605 let HasExt32BitDPP = 1; 606 let HasExtSDWA = 1; 607 let HasExtSDWA9 = 1; 608} 609 610// Read in from vcc or arbitrary SGPR. 611class VOP2e_SGPR<list<ValueType> ArgVT> : VOPProfile<ArgVT> { 612 let Asm32 = "$vdst, $src0, $src1"; 613 let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 614 let AsmSDWA9 = "$vdst, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 615 let AsmDPP = "$vdst, $src0_modifiers, $src1_modifiers, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 616 let AsmDPP8 = "$vdst, $src0, $src1, vcc $dpp8$fi"; 617 let AsmDPP16 = AsmDPP#"$fi"; 618 let AsmVOP3Base = "$vdst, $src0_modifiers, $src1_modifiers, $src2"; 619 620 let Outs32 = (outs DstRC:$vdst); 621 let Outs64 = (outs DstRC:$vdst); 622 623 // Suppress src2 implied by type since the 32-bit encoding uses an 624 // implicit VCC use. 625 let Ins32 = (ins VSrc_f32:$src0, Src1RC32:$src1); 626 627 let HasModifiers = 1; 628 629 // Select FP modifiers for VOP3 630 let Src0Mod = !if(!eq(Src0VT.Size, 16), FP16InputMods, FP32InputMods); 631 let Src1Mod = Src0Mod; 632 633 let HasSrc0IntMods = 0; 634 let HasSrc1IntMods = 0; 635 let HasSrc0FloatMods = 1; 636 let HasSrc1FloatMods = 1; 637 let InsSDWA = (ins FP32SDWAInputMods:$src0_modifiers, SDWASrc_f32:$src0, 638 FP32SDWAInputMods:$src1_modifiers, SDWASrc_f32:$src1, 639 clampmod:$clamp, 640 dst_sel:$dst_sel, dst_unused:$dst_unused, 641 src0_sel:$src0_sel, src1_sel:$src1_sel); 642 643 let InsDPP = (ins DstRCDPP:$old, 644 FPVRegInputMods:$src0_modifiers, Src0DPP:$src0, 645 FPVRegInputMods:$src1_modifiers, Src1DPP:$src1, 646 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 647 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 648 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 649 let InsDPP8 = (ins DstRCDPP:$old, 650 FPVRegInputMods:$src0_modifiers, Src0DPP:$src0, 651 FPVRegInputMods:$src1_modifiers, Src1DPP:$src1, 652 dpp8:$dpp8, FI:$fi); 653 654 let Src0ModVOP3DPP = FPVRegInputMods; 655 let Src1ModVOP3DPP = FPVRegInputMods; 656 657 let HasExt = 1; 658 let HasExtDPP = 1; 659 let HasExt32BitDPP = 1; 660 let HasExtSDWA = 1; 661 let HasExtSDWA9 = 1; 662} 663 664def VOP2e_I32_I32_I32_I1 : VOP2e_SGPR<[i32, i32, i32, i1]>; 665def VOP2e_I16_I16_I16_I1 : VOP2e_SGPR<[i16, i16, i16, i1]>; 666 667def VOP_READLANE : VOPProfile<[i32, i32, i32, untyped]> { 668 let Outs32 = (outs SReg_32:$vdst); 669 let Outs64 = Outs32; 670 let Ins32 = (ins VRegOrLdsSrc_32:$src0, SCSrc_b32:$src1); 671 let Ins64 = Ins32; 672 let Asm32 = " $vdst, $src0, $src1"; 673 let Asm64 = Asm32; 674 675 let HasExt = 0; 676 let HasExtDPP = 0; 677 let HasExt32BitDPP = 0; 678 let HasExt64BitDPP = 0; 679 let HasExtSDWA = 0; 680 let HasExtSDWA9 = 0; 681} 682 683def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> { 684 let Outs32 = (outs VGPR_32:$vdst); 685 let Outs64 = Outs32; 686 let Ins32 = (ins SCSrc_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in); 687 let Ins64 = Ins32; 688 let Asm32 = " $vdst, $src0, $src1"; 689 let Asm64 = Asm32; 690 let HasSrc2 = 0; 691 let HasSrc2Mods = 0; 692 693 let HasExt = 0; 694 let HasExtDPP = 0; 695 let HasExt32BitDPP = 0; 696 let HasExt64BitDPP = 0; 697 let HasExtSDWA = 0; 698 let HasExtSDWA9 = 0; 699} 700 701//===----------------------------------------------------------------------===// 702// VOP2 Instructions 703//===----------------------------------------------------------------------===// 704 705let SubtargetPredicate = isGFX11Plus in 706defm V_CNDMASK_B16 : VOP2eInst <"v_cndmask_b16", VOP2e_I16_I16_I16_I1>; 707defm V_CNDMASK_B32 : VOP2eInst_VOPD <"v_cndmask_b32", VOP2e_I32_I32_I32_I1, 0x9, "v_cndmask_b32">; 708let SubtargetPredicate = HasMadMacF32Insts, isReMaterializable = 1 in 709def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>; 710 711let isCommutable = 1 in { 712let isReMaterializable = 1 in { 713defm V_ADD_F32 : VOP2Inst_VOPD <"v_add_f32", VOP_F32_F32_F32, 0x4, "v_add_f32", any_fadd>; 714defm V_SUB_F32 : VOP2Inst_VOPD <"v_sub_f32", VOP_F32_F32_F32, 0x5, "v_sub_f32", any_fsub>; 715defm V_SUBREV_F32 : VOP2Inst_VOPD <"v_subrev_f32", VOP_F32_F32_F32, 0x6, "v_subrev_f32", null_frag, "v_sub_f32">; 716defm V_MUL_LEGACY_F32 : VOP2Inst_VOPD <"v_mul_legacy_f32", VOP_F32_F32_F32, 0x7, "v_mul_dx9_zero_f32", AMDGPUfmul_legacy>; 717defm V_MUL_F32 : VOP2Inst_VOPD <"v_mul_f32", VOP_F32_F32_F32, 0x3, "v_mul_f32", any_fmul>; 718defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32_ARITH, AMDGPUmul_i24>; 719defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_I32_I32_I32, AMDGPUmulhi_i24>; 720defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32_ARITH, AMDGPUmul_u24>; 721defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>; 722defm V_MIN_F32 : VOP2Inst_VOPD <"v_min_f32", VOP_F32_F32_F32, 0xb, "v_min_f32", fminnum_like>; 723defm V_MAX_F32 : VOP2Inst_VOPD <"v_max_f32", VOP_F32_F32_F32, 0xa, "v_max_f32", fmaxnum_like>; 724defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smin>; 725defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smax>; 726defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>; 727defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umax>; 728defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, clshr_rev_32, "v_lshr_b32">; 729defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, cashr_rev_32, "v_ashr_i32">; 730defm V_LSHLREV_B32 : VOP2Inst_VOPD <"v_lshlrev_b32", VOP_I32_I32_I32, 0x11, "v_lshlrev_b32", clshl_rev_32, "v_lshl_b32">; 731defm V_AND_B32 : VOP2Inst_VOPD <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, 0x12, "v_and_b32", and>; 732defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>; 733defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, xor>; 734} // End isReMaterializable = 1 735 736let mayRaiseFPException = 0 in { 737let OtherPredicates = [HasMadMacF32Insts] in { 738let Constraints = "$vdst = $src2", DisableEncoding="$src2", 739 isConvertibleToThreeAddress = 1 in { 740defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>; 741 742let SubtargetPredicate = isGFX6GFX7GFX10 in 743defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_MAC_LEGACY_F32>; 744} // End Constraints = "$vdst = $src2", DisableEncoding="$src2", 745 // isConvertibleToThreeAddress = 1 746 747let isReMaterializable = 1 in 748def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, []>; 749} // End OtherPredicates = [HasMadMacF32Insts] 750} // End mayRaiseFPException = 0 751 752// No patterns so that the scalar instructions are always selected. 753// The scalar versions will be replaced with vector when needed later. 754defm V_ADD_CO_U32 : VOP2bInst <"v_add_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_add_co_u32", 1>; 755defm V_SUB_CO_U32 : VOP2bInst <"v_sub_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32", 1>; 756defm V_SUBREV_CO_U32 : VOP2bInst <"v_subrev_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32", 1>; 757defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_addc_u32", 1>; 758defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>; 759defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>; 760 761 762let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1 in { 763defm V_ADD_U32 : VOP2Inst_VOPD <"v_add_u32", VOP_I32_I32_I32_ARITH, 0x10, "v_add_nc_u32", null_frag, "v_add_u32", 1>; 764defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>; 765defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>; 766} 767 768} // End isCommutable = 1 769 770// These are special and do not read the exec mask. 771let isConvergent = 1, Uses = []<Register> in { 772def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE, 773 [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))]>; 774let IsNeverUniform = 1, Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in { 775def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, 776 [(set i32:$vdst, (int_amdgcn_writelane i32:$src0, i32:$src1, i32:$vdst_in))]>; 777} // End IsNeverUniform, $vdst = $vdst_in, DisableEncoding $vdst_in 778} // End isConvergent = 1 779 780let isReMaterializable = 1 in { 781defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_I32_I32_I32>; 782defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_I32_I32_I32, add_ctpop>; 783let IsNeverUniform = 1 in { 784defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_lo>; 785defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_hi>; 786} // End IsNeverUniform = 1 787defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, any_fldexp>; 788 789let ReadsModeReg = 0, mayRaiseFPException = 0 in { 790defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_i16_f32>; 791defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_u16_f32>; 792} 793 794defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_V2F16_F32_F32, AMDGPUpkrtz_f16_f32>; 795defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_V2I16_I32_I32, AMDGPUpk_u16_u32>; 796defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_V2I16_I32_I32, AMDGPUpk_i16_i32>; 797 798 799let SubtargetPredicate = isGFX6GFX7 in { 800defm V_MIN_LEGACY_F32 : VOP2Inst <"v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy>; 801defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>; 802} // End SubtargetPredicate = isGFX6GFX7 803 804let isCommutable = 1 in { 805let SubtargetPredicate = isGFX6GFX7 in { 806defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, csrl_32>; 807defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, csra_32>; 808defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, cshl_32>; 809} // End SubtargetPredicate = isGFX6GFX7 810} // End isCommutable = 1 811} // End isReMaterializable = 1 812 813defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_NO_EXT<VOP_I32_F32_I32>>; // TODO: set "Uses = dst" 814 815class DivergentBinOp<SDPatternOperator Op, VOP_Pseudo Inst> : 816 GCNPat< 817 (DivergentBinFrag<Op> Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1), 818 !if(!cast<Commutable_REV>(Inst).IsOrig, 819 (Inst $src0, $src1), 820 (Inst $src1, $src0) 821 ) 822 >; 823 824class DivergentClampingBinOp<SDPatternOperator Op, VOP_Pseudo Inst> : 825 GCNPat< 826 (DivergentBinFrag<Op> Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1), 827 !if(!cast<Commutable_REV>(Inst).IsOrig, 828 (Inst $src0, $src1, 0), 829 (Inst $src1, $src0, 0) 830 ) 831 >; 832 833def : DivergentBinOp<csrl_32, V_LSHRREV_B32_e64>; 834def : DivergentBinOp<csra_32, V_ASHRREV_I32_e64>; 835def : DivergentBinOp<cshl_32, V_LSHLREV_B32_e64>; 836 837let SubtargetPredicate = HasAddNoCarryInsts in { 838 def : DivergentClampingBinOp<add, V_ADD_U32_e64>; 839 def : DivergentClampingBinOp<sub, V_SUB_U32_e64>; 840} 841 842let SubtargetPredicate = isGFX6GFX7GFX8GFX9, Predicates = [isGFX6GFX7GFX8GFX9] in { 843def : DivergentClampingBinOp<add, V_ADD_CO_U32_e64>; 844def : DivergentClampingBinOp<sub, V_SUB_CO_U32_e64>; 845} 846 847def : DivergentBinOp<adde, V_ADDC_U32_e32>; 848def : DivergentBinOp<sube, V_SUBB_U32_e32>; 849 850class divergent_i64_BinOp <SDPatternOperator Op, Instruction Inst> : 851 GCNPat< 852 (DivergentBinFrag<Op> i64:$src0, i64:$src1), 853 (REG_SEQUENCE VReg_64, 854 (Inst 855 (i32 (EXTRACT_SUBREG $src0, sub0)), 856 (i32 (EXTRACT_SUBREG $src1, sub0)) 857 ), sub0, 858 (Inst 859 (i32 (EXTRACT_SUBREG $src0, sub1)), 860 (i32 (EXTRACT_SUBREG $src1, sub1)) 861 ), sub1 862 ) 863 >; 864 865def : divergent_i64_BinOp <and, V_AND_B32_e64>; 866def : divergent_i64_BinOp <or, V_OR_B32_e64>; 867def : divergent_i64_BinOp <xor, V_XOR_B32_e64>; 868 869// mul24 w/ 64 bit output. 870class mul24_64_Pat<SDPatternOperator Op, Instruction InstLo, Instruction InstHi> : GCNPat< 871 (i64 (Op i32:$src0, i32:$src1)), 872 (REG_SEQUENCE VReg_64, 873 (InstLo $src0, $src1), sub0, 874 (InstHi $src0, $src1), sub1) 875>; 876 877def : mul24_64_Pat<AMDGPUmul_i24, V_MUL_I32_I24_e64, V_MUL_HI_I32_I24_e64>; 878def : mul24_64_Pat<AMDGPUmul_u24, V_MUL_U32_U24_e64, V_MUL_HI_U32_U24_e64>; 879 880//===----------------------------------------------------------------------===// 881// 16-Bit Operand Instructions 882//===----------------------------------------------------------------------===// 883 884// The ldexp.f16 intrinsic expects a integer src1 operand, though the hardware 885// encoding treats src1 as an f16 886def LDEXP_F16_VOPProfile : VOPProfile <[f16, f16, f16, untyped]> { 887 let Src1Mod = Int32InputMods; 888 let Src1ModDPP = IntVRegInputMods; 889 let Src1ModVOP3DPP = IntVRegInputMods; 890 // SDWA sext is the only modifier allowed. 891 let HasSrc1IntMods = 1; 892 let HasSrc1FloatMods = 0; 893 let Src1ModSDWA = Int16SDWAInputMods; 894} 895def LDEXP_F16_VOPProfile_True16 : VOPProfile_Fake16<VOP_F16_F16_F16> { 896 let Src1RC32 = RegisterOperand<VGPR_32_Lo128>; 897 let Src1DPP = RegisterOperand<VGPR_32_Lo128>; 898 let Src1ModDPP = IntT16VRegInputMods</* IsFake16= */ 1>; 899} 900 901let isReMaterializable = 1 in { 902let FPDPRounding = 1 in { 903 let SubtargetPredicate = NotHasTrue16BitInsts, OtherPredicates = [Has16BitInsts] in 904 defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", LDEXP_F16_VOPProfile>; 905 let SubtargetPredicate = HasTrue16BitInsts in 906 defm V_LDEXP_F16_t16 : VOP2Inst <"v_ldexp_f16_t16", LDEXP_F16_VOPProfile_True16>; 907} // End FPDPRounding = 1 908// FIXME VOP3 Only instructions. NFC using VOPProfile_True16 for these until a planned change to use a new register class for VOP3 encoded True16 instuctions 909defm V_LSHLREV_B16 : VOP2Inst_e64_t16 <"v_lshlrev_b16", VOP_I16_I16_I16, clshl_rev_16>; 910defm V_LSHRREV_B16 : VOP2Inst_e64_t16 <"v_lshrrev_b16", VOP_I16_I16_I16, clshr_rev_16>; 911defm V_ASHRREV_I16 : VOP2Inst_e64_t16 <"v_ashrrev_i16", VOP_I16_I16_I16, cashr_rev_16>; 912let isCommutable = 1 in { 913let FPDPRounding = 1 in { 914defm V_ADD_F16 : VOP2Inst_t16 <"v_add_f16", VOP_F16_F16_F16, any_fadd>; 915defm V_SUB_F16 : VOP2Inst_t16 <"v_sub_f16", VOP_F16_F16_F16, any_fsub>; 916defm V_SUBREV_F16 : VOP2Inst_t16 <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">; 917defm V_MUL_F16 : VOP2Inst_t16 <"v_mul_f16", VOP_F16_F16_F16, any_fmul>; 918} // End FPDPRounding = 1 919defm V_MUL_LO_U16 : VOP2Inst_e64_t16 <"v_mul_lo_u16", VOP_I16_I16_I16, mul>; 920defm V_MAX_F16 : VOP2Inst_t16 <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>; 921defm V_MIN_F16 : VOP2Inst_t16 <"v_min_f16", VOP_F16_F16_F16, fminnum_like>; 922defm V_MAX_U16 : VOP2Inst_e64_t16 <"v_max_u16", VOP_I16_I16_I16, umax>; 923defm V_MAX_I16 : VOP2Inst_e64_t16 <"v_max_i16", VOP_I16_I16_I16, smax>; 924defm V_MIN_U16 : VOP2Inst_e64_t16 <"v_min_u16", VOP_I16_I16_I16, umin>; 925defm V_MIN_I16 : VOP2Inst_e64_t16 <"v_min_i16", VOP_I16_I16_I16, smin>; 926} // End isCommutable = 1 927} // End isReMaterializable = 1 928 929class LDEXP_F16_Pat <SDPatternOperator op, VOP_Pseudo inst, VOPProfile P = inst.Pfl> : GCNPat < 930 (P.DstVT (op (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)), 931 (i16 (VOP3Mods0 P.Src1VT:$src1, i32:$src1_modifiers)))), 932 (inst $src0_modifiers, $src0, 933 $src1_modifiers, $src1, 934 $clamp, /* clamp */ 935 $omod /* omod */) 936>; 937 938let OtherPredicates = [NotHasTrue16BitInsts] in 939def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_e64>; 940 941let OtherPredicates = [HasTrue16BitInsts] in 942def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_t16_e64>; 943 944let SubtargetPredicate = isGFX11Plus in { 945 let isCommutable = 1 in { 946 defm V_AND_B16_t16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_Fake16<VOP_I16_I16_I16>, and>; 947 defm V_OR_B16_t16 : VOP2Inst_e64 <"v_or_b16_t16", VOPProfile_Fake16<VOP_I16_I16_I16>, or>; 948 defm V_XOR_B16_t16 : VOP2Inst_e64 <"v_xor_b16_t16", VOPProfile_Fake16<VOP_I16_I16_I16>, xor>; 949 } // End isCommutable = 1 950} // End SubtargetPredicate = isGFX11Plus 951 952let FPDPRounding = 1, isReMaterializable = 1, FixedSize = 1 in { 953let SubtargetPredicate = isGFX10Plus, OtherPredicates = [NotHasTrue16BitInsts] in { 954def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">; 955} 956let SubtargetPredicate = HasTrue16BitInsts in { 957def V_FMAMK_F16_t16 : VOP2_Pseudo <"v_fmamk_f16_t16", VOP_MADMK_F16_t16, [], "">; 958} 959 960let isCommutable = 1 in { 961let SubtargetPredicate = isGFX10Plus, OtherPredicates = [NotHasTrue16BitInsts] in { 962def V_FMAAK_F16 : VOP2_Pseudo <"v_fmaak_f16", VOP_MADAK_F16, [], "">; 963} 964let SubtargetPredicate = HasTrue16BitInsts in { 965def V_FMAAK_F16_t16 : VOP2_Pseudo <"v_fmaak_f16_t16", VOP_MADAK_F16_t16, [], "">; 966} 967} // End isCommutable = 1 968} // End FPDPRounding = 1, isReMaterializable = 1, FixedSize = 1 969 970let Constraints = "$vdst = $src2", 971 DisableEncoding="$src2", 972 isConvertibleToThreeAddress = 1, 973 isCommutable = 1 in { 974let SubtargetPredicate = isGFX10Plus, OtherPredicates = [NotHasTrue16BitInsts] in { 975defm V_FMAC_F16 : VOP2Inst <"v_fmac_f16", VOP_MAC_F16>; 976} 977let SubtargetPredicate = HasTrue16BitInsts in { 978defm V_FMAC_F16_t16 : VOP2Inst <"v_fmac_f16_t16", VOP_MAC_F16_t16>; 979} 980} // End FMAC Constraints 981 982let SubtargetPredicate = Has16BitInsts in { 983let isReMaterializable = 1 in { 984let FPDPRounding = 1 in { 985def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">; 986} // End FPDPRounding = 1 987let isCommutable = 1 in { 988let mayRaiseFPException = 0 in { 989def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">; 990} 991let SubtargetPredicate = isGFX8GFX9 in { 992 defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16_ARITH, add>; 993 defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16_ARITH, sub>; 994 defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16_ARITH, null_frag, "v_sub_u16">; 995} 996} // End isCommutable = 1 997} // End isReMaterializable = 1 998 999// FIXME: Missing FPDPRounding 1000let Constraints = "$vdst = $src2", DisableEncoding="$src2", 1001 isConvertibleToThreeAddress = 1, isCommutable = 1 in { 1002defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>; 1003} 1004} // End SubtargetPredicate = Has16BitInsts 1005 1006 1007let SubtargetPredicate = HasDLInsts in { 1008 1009let isReMaterializable = 1 in 1010defm V_XNOR_B32 : VOP2Inst <"v_xnor_b32", VOP_I32_I32_I32, xnor>; 1011 1012def : GCNPat< 1013 (i32 (DivergentUnaryFrag<not> (xor_oneuse i32:$src0, i32:$src1))), 1014 (i32 (V_XNOR_B32_e64 $src0, $src1)) 1015>; 1016 1017def : GCNPat< 1018 (i32 (DivergentBinFrag<xor_oneuse> (not i32:$src0), i32:$src1)), 1019 (i32 (V_XNOR_B32_e64 $src0, $src1)) 1020>; 1021 1022def : GCNPat< 1023 (i64 (DivergentUnaryFrag<not> (xor_oneuse i64:$src0, i64:$src1))), 1024 (REG_SEQUENCE VReg_64, (i32 (V_XNOR_B32_e64 1025 (i32 (EXTRACT_SUBREG $src0, sub0)), 1026 (i32 (EXTRACT_SUBREG $src1, sub0)))), sub0, 1027 (i32 (V_XNOR_B32_e64 1028 (i32 (EXTRACT_SUBREG $src0, sub1)), 1029 (i32 (EXTRACT_SUBREG $src1, sub1)))), sub1) 1030>; 1031 1032def : GCNPat< 1033 (i64 (DivergentBinFrag<xor_oneuse> (not i64:$src0), i64:$src1)), 1034 (REG_SEQUENCE VReg_64, (i32 (V_XNOR_B32_e64 1035 (i32 (EXTRACT_SUBREG $src0, sub0)), 1036 (i32 (EXTRACT_SUBREG $src1, sub0)))), sub0, 1037 (i32 (V_XNOR_B32_e64 1038 (i32 (EXTRACT_SUBREG $src0, sub1)), 1039 (i32 (EXTRACT_SUBREG $src1, sub1)))), sub1) 1040>; 1041 1042let Constraints = "$vdst = $src2", 1043 DisableEncoding = "$src2", 1044 isConvertibleToThreeAddress = 1, 1045 isCommutable = 1 in 1046defm V_FMAC_F32 : VOP2Inst_VOPD <"v_fmac_f32", VOP_MAC_F32, 0x0, "v_fmac_f32">; 1047} // End SubtargetPredicate = HasDLInsts 1048 1049let SubtargetPredicate = HasFmaLegacy32 in { 1050 1051let Constraints = "$vdst = $src2", 1052 DisableEncoding = "$src2", 1053 isConvertibleToThreeAddress = 1, 1054 isCommutable = 1 in 1055defm V_FMAC_LEGACY_F32 : VOP2Inst <"v_fmac_legacy_f32", VOP_MAC_LEGACY_F32>; 1056 1057} // End SubtargetPredicate = HasFmaLegacy32 1058 1059let SubtargetPredicate = HasFmacF64Inst, 1060 Constraints = "$vdst = $src2", 1061 DisableEncoding="$src2", 1062 isConvertibleToThreeAddress = 1, 1063 isCommutable = 1, 1064 SchedRW = [WriteDoubleAdd] in 1065defm V_FMAC_F64 : VOP2Inst <"v_fmac_f64", VOP_MAC_F64>; 1066 1067let Constraints = "$vdst = $src2", 1068 DisableEncoding="$src2", 1069 isConvertibleToThreeAddress = 1, 1070 isCommutable = 1, 1071 IsDOT = 1 in { 1072 let SubtargetPredicate = HasDot5Insts in 1073 defm V_DOT2C_F32_F16 : VOP2Inst_VOPD<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16, 0xc, "v_dot2acc_f32_f16">; 1074 let SubtargetPredicate = HasDot6Insts in 1075 defm V_DOT4C_I32_I8 : VOP2Inst<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>; 1076 1077 let SubtargetPredicate = HasDot4Insts in 1078 defm V_DOT2C_I32_I16 : VOP2Inst<"v_dot2c_i32_i16", VOP_DOT_ACC_I32_I32>; 1079 let SubtargetPredicate = HasDot3Insts in 1080 defm V_DOT8C_I32_I4 : VOP2Inst<"v_dot8c_i32_i4", VOP_DOT_ACC_I32_I32>; 1081} 1082 1083let AddedComplexity = 30 in { 1084 def : GCNPat< 1085 (f32 (AMDGPUfdot2 v2f16:$src0, v2f16:$src1, f32:$src2, (i1 DSTCLAMP.NONE))), 1086 (f32 (V_DOT2C_F32_F16_e32 $src0, $src1, $src2)) 1087 > { 1088 let SubtargetPredicate = HasDot5Insts; 1089 } 1090 def : GCNPat< 1091 (i32 (int_amdgcn_sdot4 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 1092 (i32 (V_DOT4C_I32_I8_e32 $src0, $src1, $src2)) 1093 > { 1094 let SubtargetPredicate = HasDot6Insts; 1095 } 1096 def : GCNPat< 1097 (i32 (int_amdgcn_sdot2 v2i16:$src0, v2i16:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 1098 (i32 (V_DOT2C_I32_I16_e32 $src0, $src1, $src2)) 1099 > { 1100 let SubtargetPredicate = HasDot4Insts; 1101 } 1102 def : GCNPat< 1103 (i32 (int_amdgcn_sdot8 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 1104 (i32 (V_DOT8C_I32_I4_e32 $src0, $src1, $src2)) 1105 > { 1106 let SubtargetPredicate = HasDot3Insts; 1107 } 1108} // End AddedComplexity = 30 1109 1110let SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1, FixedSize = 1 in { 1111def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">, VOPD_Component<0x2, "v_fmamk_f32">; 1112 1113let isCommutable = 1 in 1114def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">, VOPD_Component<0x1, "v_fmaak_f32">; 1115} // End SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1, FixedSize = 1 1116 1117let SubtargetPredicate = HasPkFmacF16Inst in { 1118defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>; 1119} // End SubtargetPredicate = HasPkFmacF16Inst 1120 1121// Note: 16-bit instructions produce a 0 result in the high 16-bits 1122// on GFX8 and GFX9 and preserve high 16 bits on GFX10+ 1123multiclass Arithmetic_i16_0Hi_Pats <SDPatternOperator op, Instruction inst> { 1124 1125def : GCNPat< 1126 (i32 (zext (op i16:$src0, i16:$src1))), 1127 (inst VSrc_b16:$src0, VSrc_b16:$src1) 1128>; 1129 1130def : GCNPat< 1131 (i64 (zext (op i16:$src0, i16:$src1))), 1132 (REG_SEQUENCE VReg_64, 1133 (inst $src0, $src1), sub0, 1134 (V_MOV_B32_e32 (i32 0)), sub1) 1135>; 1136} 1137 1138class ZExt_i16_i1_Pat <SDNode ext> : GCNPat < 1139 (i16 (ext i1:$src)), 1140 (V_CNDMASK_B32_e64 (i32 0/*src0mod*/), (i32 0/*src0*/), 1141 (i32 0/*src1mod*/), (i32 1/*src1*/), 1142 $src) 1143>; 1144 1145foreach vt = [i16, v2i16] in { 1146def : GCNPat < 1147 (and vt:$src0, vt:$src1), 1148 (V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 1149>; 1150 1151def : GCNPat < 1152 (or vt:$src0, vt:$src1), 1153 (V_OR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 1154>; 1155 1156def : GCNPat < 1157 (xor vt:$src0, vt:$src1), 1158 (V_XOR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 1159>; 1160} 1161 1162let Predicates = [Has16BitInsts, isGFX8GFX9] in { 1163 1164// Undo sub x, c -> add x, -c canonicalization since c is more likely 1165// an inline immediate than -c. 1166// TODO: Also do for 64-bit. 1167def : GCNPat< 1168 (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)), 1169 (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1) 1170>; 1171 1172def : GCNPat< 1173 (i32 (zext (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)))), 1174 (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1) 1175>; 1176 1177defm : Arithmetic_i16_0Hi_Pats<add, V_ADD_U16_e64>; 1178defm : Arithmetic_i16_0Hi_Pats<mul, V_MUL_LO_U16_e64>; 1179defm : Arithmetic_i16_0Hi_Pats<sub, V_SUB_U16_e64>; 1180defm : Arithmetic_i16_0Hi_Pats<smin, V_MIN_I16_e64>; 1181defm : Arithmetic_i16_0Hi_Pats<smax, V_MAX_I16_e64>; 1182defm : Arithmetic_i16_0Hi_Pats<umin, V_MIN_U16_e64>; 1183defm : Arithmetic_i16_0Hi_Pats<umax, V_MAX_U16_e64>; 1184defm : Arithmetic_i16_0Hi_Pats<clshl_rev_16, V_LSHLREV_B16_e64>; 1185defm : Arithmetic_i16_0Hi_Pats<clshr_rev_16, V_LSHRREV_B16_e64>; 1186defm : Arithmetic_i16_0Hi_Pats<cashr_rev_16, V_ASHRREV_I16_e64>; 1187 1188} // End Predicates = [Has16BitInsts, isGFX8GFX9] 1189 1190let Predicates = [Has16BitInsts] in { 1191 1192def : ZExt_i16_i1_Pat<zext>; 1193def : ZExt_i16_i1_Pat<anyext>; 1194 1195def : GCNPat < 1196 (i16 (sext i1:$src)), 1197 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), 1198 /*src1mod*/(i32 0), /*src1*/(i32 -1), $src) 1199>; 1200 1201} // End Predicates = [Has16BitInsts] 1202 1203 1204let SubtargetPredicate = HasIntClamp in { 1205// Set clamp bit for saturation. 1206def : VOPBinOpClampPat<uaddsat, V_ADD_CO_U32_e64, i32>; 1207def : VOPBinOpClampPat<usubsat, V_SUB_CO_U32_e64, i32>; 1208} 1209 1210let SubtargetPredicate = HasAddNoCarryInsts, OtherPredicates = [HasIntClamp] in { 1211let AddedComplexity = 1 in { // Prefer over form with carry-out. 1212def : VOPBinOpClampPat<uaddsat, V_ADD_U32_e64, i32>; 1213def : VOPBinOpClampPat<usubsat, V_SUB_U32_e64, i32>; 1214} 1215} 1216 1217let SubtargetPredicate = Has16BitInsts, OtherPredicates = [HasIntClamp] in { 1218def : VOPBinOpClampPat<uaddsat, V_ADD_U16_e64, i16>; 1219def : VOPBinOpClampPat<usubsat, V_SUB_U16_e64, i16>; 1220} 1221 1222let SubtargetPredicate = isGFX12Plus, isReMaterializable = 1 in { 1223 let SchedRW = [WriteDoubleAdd], isCommutable = 1 in { 1224 let FPDPRounding = 1 in { 1225 defm V_ADD_F64_pseudo : VOP2Inst <"v_add_f64_pseudo", VOP_F64_F64_F64, any_fadd>; 1226 defm V_MUL_F64_pseudo : VOP2Inst <"v_mul_f64_pseudo", VOP_F64_F64_F64, fmul>; 1227 } // End FPDPRounding = 1 1228 defm V_MIN_NUM_F64 : VOP2Inst <"v_min_num_f64", VOP_F64_F64_F64, fminnum_like>; 1229 defm V_MAX_NUM_F64 : VOP2Inst <"v_max_num_f64", VOP_F64_F64_F64, fmaxnum_like>; 1230 } // End SchedRW = [WriteDoubleAdd], isCommutable = 1 1231 let SchedRW = [Write64Bit] in { 1232 defm V_LSHLREV_B64_pseudo : VOP2Inst <"v_lshlrev_b64_pseudo", VOP_I64_I32_I64, clshl_rev_64>; 1233 } // End SchedRW = [Write64Bit] 1234} // End SubtargetPredicate = isGFX12Plus, isReMaterializable = 1 1235 1236//===----------------------------------------------------------------------===// 1237// DPP Encodings 1238//===----------------------------------------------------------------------===// 1239 1240class VOP2_DPP<bits<6> op, VOP2_DPP_Pseudo ps, 1241 string opName = ps.OpName, VOPProfile p = ps.Pfl, 1242 bit IsDPP16 = 0> : 1243 VOP_DPP<opName, p, IsDPP16> { 1244 let hasSideEffects = ps.hasSideEffects; 1245 let Defs = ps.Defs; 1246 let SchedRW = ps.SchedRW; 1247 let Uses = ps.Uses; 1248 1249 bits<8> vdst; 1250 bits<8> src1; 1251 let Inst{8-0} = 0xfa; 1252 let Inst{16-9} = !if(p.HasSrc1, src1{7-0}, 0); 1253 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 1254 let Inst{30-25} = op; 1255 let Inst{31} = 0x0; 1256} 1257 1258class Base_VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, 1259 string opName = ps.OpName, VOPProfile p = ps.Pfl> : 1260 VOP2_DPP<op, ps, opName, p, 1> { 1261 let AssemblerPredicate = HasDPP16; 1262 let SubtargetPredicate = HasDPP16; 1263 let OtherPredicates = ps.OtherPredicates; 1264} 1265 1266class VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, int subtarget, 1267 string opName = ps.OpName, VOPProfile p = ps.Pfl> : 1268 Base_VOP2_DPP16<op, ps, opName, p>, 1269 SIMCInstr <ps.PseudoInstr, subtarget>; 1270 1271class VOP2_DPP16_Gen<bits<6> op, VOP2_DPP_Pseudo ps, GFXGen Gen, 1272 string opName = ps.OpName, VOPProfile p = ps.Pfl> : 1273 VOP2_DPP16<op, ps, Gen.Subtarget, opName, p> { 1274 let AssemblerPredicate = Gen.AssemblerPredicate; 1275 let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []); 1276 let DecoderNamespace = "DPP"#Gen.DecoderNamespace# 1277 !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); 1278} 1279 1280class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps, 1281 VOPProfile p = ps.Pfl> : 1282 VOP_DPP8<ps.OpName, p> { 1283 let hasSideEffects = ps.hasSideEffects; 1284 let Defs = ps.Defs; 1285 let SchedRW = ps.SchedRW; 1286 let Uses = ps.Uses; 1287 1288 bits<8> vdst; 1289 bits<8> src1; 1290 1291 let Inst{8-0} = fi; 1292 let Inst{16-9} = !if(p.HasSrc1, src1{7-0}, 0); 1293 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 1294 let Inst{30-25} = op; 1295 let Inst{31} = 0x0; 1296 1297 let OtherPredicates = ps.OtherPredicates; 1298} 1299 1300class VOP2_DPP8_Gen<bits<6> op, VOP2_Pseudo ps, GFXGen Gen, 1301 VOPProfile p = ps.Pfl> : 1302 VOP2_DPP8<op, ps, p> { 1303 let AssemblerPredicate = Gen.AssemblerPredicate; 1304 let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []); 1305 let DecoderNamespace = "DPP8"#Gen.DecoderNamespace# 1306 !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); 1307} 1308 1309//===----------------------------------------------------------------------===// 1310// GFX11, GFX12 1311//===----------------------------------------------------------------------===// 1312 1313//===------------------------------- VOP2 -------------------------------===// 1314multiclass VOP2Only_Real_MADK<GFXGen Gen, bits<6> op> { 1315 def Gen.Suffix : 1316 VOP2_Real_Gen<!cast<VOP2_Pseudo>(NAME), Gen>, 1317 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1318} 1319 1320multiclass VOP2Only_Real_MADK_with_name<GFXGen Gen, bits<6> op, string asmName, 1321 string opName = NAME> { 1322 def Gen.Suffix : 1323 VOP2_Real_Gen<!cast<VOP2_Pseudo>(opName), Gen>, 1324 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(opName).Pfl> { 1325 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName); 1326 let AsmString = asmName # ps.AsmOperands; 1327 } 1328} 1329 1330multiclass VOP2_Real_e32<GFXGen Gen, bits<6> op> { 1331 def _e32#Gen.Suffix : 1332 VOP2_Real_Gen<!cast<VOP2_Pseudo>(NAME#"_e32"), Gen>, 1333 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 1334} 1335 1336multiclass VOP2Only_Real_e32<GFXGen Gen, bits<6> op> { 1337 let IsSingle = 1 in 1338 defm NAME: VOP2_Real_e32<Gen, op>; 1339} 1340 1341multiclass VOP2_Real_e64<GFXGen Gen, bits<6> op> { 1342 def _e64#Gen.Suffix : 1343 VOP3_Real_Gen<!cast<VOP3_Pseudo>(NAME#"_e64"), Gen>, 1344 VOP3e_gfx11_gfx12<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1345} 1346 1347multiclass VOP2_Real_dpp<GFXGen Gen, bits<6> op> { 1348 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 1349 def _dpp#Gen.Suffix : VOP2_DPP16_Gen<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), Gen>; 1350} 1351 1352multiclass VOP2_Real_dpp8<GFXGen Gen, bits<6> op> { 1353 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 1354 def _dpp8#Gen.Suffix : VOP2_DPP8_Gen<op, !cast<VOP2_Pseudo>(NAME#"_e32"), Gen>; 1355} 1356 1357//===------------------------- VOP2 (with name) -------------------------===// 1358multiclass VOP2_Real_e32_with_name<GFXGen Gen, bits<6> op, string opName, 1359 string asmName, bit single = 0> { 1360 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1361 def _e32#Gen.Suffix : 1362 VOP2_Real_Gen<ps, Gen, asmName>, 1363 VOP2e<op{5-0}, ps.Pfl> { 1364 let AsmString = asmName # ps.AsmOperands; 1365 let IsSingle = single; 1366 } 1367} 1368multiclass VOP2_Real_e64_with_name<GFXGen Gen, bits<6> op, string opName, 1369 string asmName> { 1370 defvar ps = !cast<VOP3_Pseudo>(opName#"_e64"); 1371 def _e64#Gen.Suffix : 1372 VOP3_Real_Gen<ps, Gen>, 1373 VOP3e_gfx11_gfx12<{0, 1, 0, 0, op{5-0}}, ps.Pfl> { 1374 let AsmString = asmName # ps.AsmOperands; 1375 } 1376} 1377 1378multiclass VOP2_Real_dpp_with_name<GFXGen Gen, bits<6> op, string opName, 1379 string asmName> { 1380 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1381 if ps.Pfl.HasExtDPP then 1382 def _dpp#Gen.Suffix : VOP2_DPP16_Gen<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), Gen> { 1383 let AsmString = asmName # ps.Pfl.AsmDPP16; 1384 } 1385} 1386multiclass VOP2_Real_dpp8_with_name<GFXGen Gen, bits<6> op, string opName, 1387 string asmName> { 1388 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1389 if ps.Pfl.HasExtDPP then 1390 def _dpp8#Gen.Suffix : VOP2_DPP8_Gen<op, ps, Gen> { 1391 let AsmString = asmName # ps.Pfl.AsmDPP8; 1392 } 1393} 1394 1395//===------------------------------ VOP2be ------------------------------===// 1396multiclass VOP2be_Real_e32<GFXGen Gen, bits<6> op, string opName, string asmName> { 1397 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1398 def _e32#Gen.Suffix : 1399 VOP2_Real_Gen<ps, Gen>, 1400 VOP2e<op{5-0}, ps.Pfl> { 1401 let AsmString = asmName # !subst(", vcc", "", ps.AsmOperands); 1402 } 1403} 1404multiclass VOP2be_Real_dpp<GFXGen Gen, bits<6> op, string opName, string asmName> { 1405 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then 1406 def _dpp#Gen.Suffix : 1407 VOP2_DPP16_Gen<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), Gen, asmName> { 1408 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1409 let AsmString = asmName # !subst(", vcc", "", AsmDPP); 1410 } 1411 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then 1412 def _dpp_w32#Gen.Suffix : 1413 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1414 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1415 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP); 1416 let isAsmParserOnly = 1; 1417 let WaveSizePredicate = isWave32; 1418 let AssemblerPredicate = Gen.AssemblerPredicate; 1419 let DecoderNamespace = Gen.DecoderNamespace; 1420 } 1421 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then 1422 def _dpp_w64#Gen.Suffix : 1423 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1424 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1425 let AsmString = asmName # AsmDPP; 1426 let isAsmParserOnly = 1; 1427 let WaveSizePredicate = isWave64; 1428 let AssemblerPredicate = Gen.AssemblerPredicate; 1429 let DecoderNamespace = Gen.DecoderNamespace; 1430 } 1431} 1432multiclass VOP2be_Real_dpp8<GFXGen Gen, bits<6> op, string opName, string asmName> { 1433 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then 1434 def _dpp8#Gen.Suffix : 1435 VOP2_DPP8_Gen<op, !cast<VOP2_Pseudo>(opName#"_e32"), Gen> { 1436 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1437 let AsmString = asmName # !subst(", vcc", "", AsmDPP8); 1438 } 1439 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then 1440 def _dpp8_w32#Gen.Suffix : 1441 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1442 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1443 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8); 1444 let isAsmParserOnly = 1; 1445 let WaveSizePredicate = isWave32; 1446 let AssemblerPredicate = Gen.AssemblerPredicate; 1447 let DecoderNamespace = Gen.DecoderNamespace; 1448 } 1449 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then 1450 def _dpp8_w64#Gen.Suffix : 1451 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1452 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1453 let AsmString = asmName # AsmDPP8; 1454 let isAsmParserOnly = 1; 1455 let WaveSizePredicate = isWave64; 1456 let AssemblerPredicate = Gen.AssemblerPredicate; 1457 let DecoderNamespace = Gen.DecoderNamespace; 1458 } 1459} 1460 1461// We don't want to override separate decoderNamespaces within these 1462multiclass VOP2_Realtriple_e64<GFXGen Gen, bits<6> op> { 1463 defm NAME : VOP3_Realtriple<Gen, {0, 1, 0, 0, op{5-0}}, /*isSingle=*/ 0, NAME> ; 1464} 1465 1466multiclass VOP2_Realtriple_e64_with_name<GFXGen Gen, bits<6> op, string opName, 1467 string asmName> { 1468 defm NAME : VOP3_Realtriple_with_name<Gen, {0, 1, 0, 0, op{5-0}}, opName, asmName> ; 1469} 1470 1471multiclass VOP2be_Real<GFXGen Gen, bits<6> op, string opName, string asmName> : 1472 VOP2be_Real_e32<Gen, op, opName, asmName>, 1473 VOP3be_Realtriple<Gen, {0, 1, 0, 0, op{5-0}}, /*isSingle=*/ 0, opName, asmName>, 1474 VOP2be_Real_dpp<Gen, op, opName, asmName>, 1475 VOP2be_Real_dpp8<Gen, op, opName, asmName>; 1476 1477// Only for CNDMASK 1478multiclass VOP2e_Real<GFXGen Gen, bits<6> op, string opName, string asmName> : 1479 VOP2_Real_e32<Gen, op>, 1480 VOP2_Realtriple_e64<Gen, op>, 1481 VOP2be_Real_dpp<Gen, op, opName, asmName>, 1482 VOP2be_Real_dpp8<Gen, op, opName, asmName>; 1483 1484multiclass VOP2Only_Real<GFXGen Gen, bits<6> op> : 1485 VOP2Only_Real_e32<Gen, op>, 1486 VOP2_Real_dpp<Gen, op>, 1487 VOP2_Real_dpp8<Gen, op>; 1488 1489multiclass VOP2_Real_FULL<GFXGen Gen, bits<6> op> : 1490 VOP2_Realtriple_e64<Gen, op>, 1491 VOP2_Real_e32<Gen, op>, 1492 VOP2_Real_dpp<Gen, op>, 1493 VOP2_Real_dpp8<Gen, op>; 1494 1495multiclass VOP2_Real_NO_VOP3_with_name<GFXGen Gen, bits<6> op, string opName, 1496 string asmName, bit isSingle = 0> { 1497 defm NAME : VOP2_Real_e32_with_name<Gen, op, opName, asmName, isSingle>, 1498 VOP2_Real_dpp_with_name<Gen, op, opName, asmName>, 1499 VOP2_Real_dpp8_with_name<Gen, op, opName, asmName>; 1500 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1501 def Gen.Suffix#"_alias" : MnemonicAlias<ps.Mnemonic, asmName>, Requires<[Gen.AssemblerPredicate]>; 1502} 1503 1504multiclass VOP2_Real_FULL_with_name<GFXGen Gen, bits<6> op, string opName, 1505 string asmName> : 1506 VOP2_Realtriple_e64_with_name<Gen, op, opName, asmName>, 1507 VOP2_Real_NO_VOP3_with_name<Gen, op, opName, asmName>; 1508 1509multiclass VOP2_Real_NO_DPP_with_name<GFXGen Gen, bits<6> op, string opName, 1510 string asmName> { 1511 defm NAME : VOP2_Real_e32_with_name<Gen, op, opName, asmName>, 1512 VOP2_Real_e64_with_name<Gen, op, opName, asmName>; 1513 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1514 def Gen.Suffix#"_alias" : MnemonicAlias<ps.Mnemonic, asmName>, Requires<[Gen.AssemblerPredicate]>; 1515} 1516 1517multiclass VOP2_Real_NO_DPP_with_alias<GFXGen Gen, bits<6> op, string alias> { 1518 defm NAME : VOP2_Real_e32<Gen, op>, 1519 VOP2_Real_e64<Gen, op>; 1520 def Gen.Suffix#"_alias" : MnemonicAlias<alias, NAME>, Requires<[Gen.AssemblerPredicate]>; 1521} 1522 1523//===----------------------------------------------------------------------===// 1524// GFX12. 1525//===----------------------------------------------------------------------===// 1526 1527multiclass VOP2be_Real_gfx12<bits<6> op, string opName, string asmName> : 1528 VOP2be_Real<GFX12Gen, op, opName, asmName>; 1529 1530// Only for CNDMASK 1531multiclass VOP2e_Real_gfx12<bits<6> op, string opName, string asmName> : 1532 VOP2e_Real<GFX12Gen, op, opName, asmName>; 1533 1534multiclass VOP2_Real_FULL_with_name_gfx12<bits<6> op, string opName, 1535 string asmName> : 1536 VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>; 1537 1538multiclass VOP2_Real_FULL_t16_with_name_gfx12<bits<6> op, string opName, 1539 string asmName, string alias> { 1540 defm NAME : VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>; 1541 def _gfx12_2nd_alias : MnemonicAlias<alias, asmName>, Requires<[isGFX12Only]>; 1542} 1543 1544multiclass VOP2_Real_NO_DPP_with_name_gfx12<bits<6> op, string opName, 1545 string asmName> : 1546 VOP2_Real_NO_DPP_with_name<GFX12Gen, op, opName, asmName>; 1547 1548multiclass VOP2_Real_NO_DPP_with_alias_gfx12<bits<6> op, string alias> : 1549 VOP2_Real_NO_DPP_with_alias<GFX12Gen, op, alias>; 1550 1551defm V_ADD_F64 : VOP2_Real_NO_DPP_with_name_gfx12<0x002, "V_ADD_F64_pseudo", "v_add_f64">; 1552defm V_MUL_F64 : VOP2_Real_NO_DPP_with_name_gfx12<0x006, "V_MUL_F64_pseudo", "v_mul_f64">; 1553defm V_LSHLREV_B64 : VOP2_Real_NO_DPP_with_name_gfx12<0x01f, "V_LSHLREV_B64_pseudo", "v_lshlrev_b64">; 1554defm V_MIN_NUM_F64 : VOP2_Real_NO_DPP_with_alias_gfx12<0x00d, "v_min_f64">; 1555defm V_MAX_NUM_F64 : VOP2_Real_NO_DPP_with_alias_gfx12<0x00e, "v_max_f64">; 1556 1557defm V_CNDMASK_B32 : VOP2e_Real_gfx12<0x001, "V_CNDMASK_B32", "v_cndmask_b32">; 1558defm V_ADD_CO_CI_U32 : 1559 VOP2be_Real_gfx12<0x020, "V_ADDC_U32", "v_add_co_ci_u32">; 1560defm V_SUB_CO_CI_U32 : 1561 VOP2be_Real_gfx12<0x021, "V_SUBB_U32", "v_sub_co_ci_u32">; 1562defm V_SUBREV_CO_CI_U32 : 1563 VOP2be_Real_gfx12<0x022, "V_SUBBREV_U32", "v_subrev_co_ci_u32">; 1564 1565defm V_MIN_NUM_F32 : VOP2_Real_FULL_with_name_gfx12<0x015, "V_MIN_F32", "v_min_num_f32">; 1566defm V_MAX_NUM_F32 : VOP2_Real_FULL_with_name_gfx12<0x016, "V_MAX_F32", "v_max_num_f32">; 1567defm V_MIN_NUM_F16 : VOP2_Real_FULL_t16_with_name_gfx12<0x030, "V_MIN_F16_t16", "v_min_num_f16", "v_min_f16">; 1568defm V_MIN_NUM_F16_fake16 : VOP2_Real_FULL_t16_with_name_gfx12<0x030, "V_MIN_F16_fake16", "v_min_num_f16", "v_min_f16">; 1569defm V_MAX_NUM_F16 : VOP2_Real_FULL_t16_with_name_gfx12<0x031, "V_MAX_F16_t16", "v_max_num_f16", "v_max_f16">; 1570defm V_MAX_NUM_F16_fake16 : VOP2_Real_FULL_t16_with_name_gfx12<0x031, "V_MAX_F16_fake16", "v_max_num_f16", "v_max_f16">; 1571 1572let SubtargetPredicate = isGFX12Plus in { 1573 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx12>; 1574 1575 defm : VOP2bInstAliases< 1576 V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx12, "v_add_co_ci_u32">; 1577 defm : VOP2bInstAliases< 1578 V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx12, "v_sub_co_ci_u32">; 1579 defm : VOP2bInstAliases< 1580 V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx12, "v_subrev_co_ci_u32">; 1581} // End SubtargetPredicate = isGFX12Plus 1582 1583//===----------------------------------------------------------------------===// 1584// GFX11. 1585//===----------------------------------------------------------------------===// 1586 1587multiclass VOP2be_Real_gfx11<bits<6> op, string opName, string asmName> : 1588 VOP2be_Real<GFX11Gen, op, opName, asmName>; 1589 1590// Only for CNDMASK 1591multiclass VOP2e_Real_gfx11<bits<6> op, string opName, string asmName> : 1592 VOP2e_Real<GFX11Gen, op, opName, asmName>; 1593 1594multiclass VOP2_Real_NO_VOP3_with_name_gfx11<bits<6> op, string opName, 1595 string asmName, bit isSingle = 0> { 1596 defm NAME : VOP2_Real_e32_with_name<GFX11Gen, op, opName, asmName, isSingle>, 1597 VOP2_Real_dpp_with_name<GFX11Gen, op, opName, asmName>, 1598 VOP2_Real_dpp8_with_name<GFX11Gen, op, opName, asmName>; 1599 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1600 def _gfx11_alias : MnemonicAlias<ps.Mnemonic, asmName>, Requires<[isGFX11Only]>; 1601} 1602 1603multiclass VOP2_Real_NO_DPP_with_name_gfx11<bits<6> op, string opName, 1604 string asmName> : 1605 VOP2_Real_NO_DPP_with_name<GFX11Gen, op, opName, asmName>; 1606 1607multiclass VOP2_Real_FULL_gfx11_gfx12<bits<6> op> : 1608 VOP2_Real_FULL<GFX11Gen, op>, VOP2_Real_FULL<GFX12Gen, op>; 1609 1610multiclass VOP2_Real_FULL_with_name_gfx11_gfx12<bits<6> op, string opName, 1611 string asmName> : 1612 VOP2_Real_FULL_with_name<GFX11Gen, op, opName, asmName>, 1613 VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>; 1614 1615multiclass VOP2_Real_e32_gfx11_gfx12<bits<6> op> : 1616 VOP2Only_Real<GFX11Gen, op>, VOP2Only_Real<GFX12Gen, op>; 1617 1618multiclass VOP3Only_Realtriple_gfx11_gfx12<bits<10> op> : 1619 VOP3Only_Realtriple<GFX11Gen, op>, VOP3Only_Realtriple<GFX12Gen, op>; 1620 1621multiclass VOP3Only_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName> : 1622 VOP3Only_Realtriple_t16<GFX11Gen, op, asmName>, 1623 VOP3Only_Realtriple_t16<GFX12Gen, op, asmName>; 1624 1625multiclass VOP3beOnly_Realtriple_gfx11_gfx12<bits<10> op> : 1626 VOP3beOnly_Realtriple<GFX11Gen, op>, VOP3beOnly_Realtriple<GFX12Gen, op>; 1627 1628multiclass VOP2Only_Real_MADK_with_name_gfx11_gfx12<bits<6> op, string asmName, 1629 string opName = NAME> : 1630 VOP2Only_Real_MADK_with_name<GFX11Gen, op, asmName, opName>, 1631 VOP2Only_Real_MADK_with_name<GFX12Gen, op, asmName, opName>; 1632 1633multiclass VOP2_Real_FULL_t16_gfx11<bits<6> op, string asmName, 1634 string opName = NAME> : 1635 VOP2_Real_FULL_with_name<GFX11Gen, op, opName, asmName>; 1636 1637multiclass VOP2_Real_FULL_t16_gfx11_gfx12<bits<6> op, string asmName, 1638 string opName = NAME> : 1639 VOP2_Real_FULL_with_name_gfx11_gfx12<op, opName, asmName>; 1640 1641multiclass VOP2_Real_FULL_gfx11<bits<6> op> : 1642 VOP2_Real_FULL<GFX11Gen, op>; 1643 1644defm V_CNDMASK_B32 : VOP2e_Real_gfx11<0x001, "V_CNDMASK_B32", 1645 "v_cndmask_b32">; 1646defm V_DOT2ACC_F32_F16 : VOP2_Real_NO_VOP3_with_name_gfx11<0x002, 1647 "V_DOT2C_F32_F16", "v_dot2acc_f32_f16", 1>; 1648defm V_FMAC_DX9_ZERO_F32 : VOP2_Real_NO_DPP_with_name_gfx11<0x006, 1649 "V_FMAC_LEGACY_F32", "v_fmac_dx9_zero_f32">; 1650defm V_MUL_DX9_ZERO_F32 : VOP2_Real_FULL_with_name_gfx11_gfx12<0x007, 1651 "V_MUL_LEGACY_F32", "v_mul_dx9_zero_f32">; 1652defm V_LSHLREV_B32 : VOP2_Real_FULL_gfx11_gfx12<0x018>; 1653defm V_LSHRREV_B32 : VOP2_Real_FULL_gfx11_gfx12<0x019>; 1654defm V_ASHRREV_I32 : VOP2_Real_FULL_gfx11_gfx12<0x01a>; 1655defm V_ADD_CO_CI_U32 : 1656 VOP2be_Real_gfx11<0x020, "V_ADDC_U32", "v_add_co_ci_u32">; 1657defm V_SUB_CO_CI_U32 : 1658 VOP2be_Real_gfx11<0x021, "V_SUBB_U32", "v_sub_co_ci_u32">; 1659defm V_SUBREV_CO_CI_U32 : 1660 VOP2be_Real_gfx11<0x022, "V_SUBBREV_U32", "v_subrev_co_ci_u32">; 1661 1662defm V_CVT_PK_RTZ_F16_F32 : VOP2_Real_FULL_with_name_gfx11_gfx12<0x02f, 1663 "V_CVT_PKRTZ_F16_F32", "v_cvt_pk_rtz_f16_f32">; 1664defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx11_gfx12<0x03c>; 1665 1666defm V_ADD_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x032, "v_add_f16">; 1667defm V_ADD_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x032, "v_add_f16">; 1668defm V_SUB_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x033, "v_sub_f16">; 1669defm V_SUB_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x033, "v_sub_f16">; 1670defm V_SUBREV_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x034, "v_subrev_f16">; 1671defm V_SUBREV_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x034, "v_subrev_f16">; 1672defm V_MUL_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x035, "v_mul_f16">; 1673defm V_MUL_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x035, "v_mul_f16">; 1674defm V_FMAC_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x036, "v_fmac_f16">; 1675defm V_LDEXP_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x03b, "v_ldexp_f16">; 1676defm V_MAX_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">; 1677defm V_MAX_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">; 1678defm V_MIN_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">; 1679defm V_MIN_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">; 1680defm V_FMAMK_F16_t16 : VOP2Only_Real_MADK_with_name_gfx11_gfx12<0x037, "v_fmamk_f16">; 1681defm V_FMAAK_F16_t16 : VOP2Only_Real_MADK_with_name_gfx11_gfx12<0x038, "v_fmaak_f16">; 1682 1683// VOP3 only. 1684defm V_CNDMASK_B16 : VOP3Only_Realtriple_gfx11_gfx12<0x25d>; 1685defm V_LDEXP_F32 : VOP3Only_Realtriple_gfx11_gfx12<0x31c>; 1686defm V_BFM_B32 : VOP3Only_Realtriple_gfx11_gfx12<0x31d>; 1687defm V_BCNT_U32_B32 : VOP3Only_Realtriple_gfx11_gfx12<0x31e>; 1688defm V_MBCNT_LO_U32_B32 : VOP3Only_Realtriple_gfx11_gfx12<0x31f>; 1689defm V_MBCNT_HI_U32_B32 : VOP3Only_Realtriple_gfx11_gfx12<0x320>; 1690defm V_CVT_PK_NORM_I16_F32 : VOP3Only_Realtriple_with_name_gfx11_gfx12<0x321, "V_CVT_PKNORM_I16_F32", "v_cvt_pk_norm_i16_f32">; 1691defm V_CVT_PK_NORM_U16_F32 : VOP3Only_Realtriple_with_name_gfx11_gfx12<0x322, "V_CVT_PKNORM_U16_F32", "v_cvt_pk_norm_u16_f32">; 1692defm V_CVT_PK_U16_U32 : VOP3Only_Realtriple_gfx11_gfx12<0x323>; 1693defm V_CVT_PK_I16_I32 : VOP3Only_Realtriple_gfx11_gfx12<0x324>; 1694defm V_ADD_CO_U32 : VOP3beOnly_Realtriple_gfx11_gfx12<0x300>; 1695defm V_SUB_CO_U32 : VOP3beOnly_Realtriple_gfx11_gfx12<0x301>; 1696defm V_SUBREV_CO_U32 : VOP3beOnly_Realtriple_gfx11_gfx12<0x302>; 1697 1698let SubtargetPredicate = isGFX11Only in { 1699 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx11>; 1700 1701 defm : VOP2bInstAliases< 1702 V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx11, "v_add_co_ci_u32">; 1703 defm : VOP2bInstAliases< 1704 V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx11, "v_sub_co_ci_u32">; 1705 defm : VOP2bInstAliases< 1706 V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx11, "v_subrev_co_ci_u32">; 1707} // End SubtargetPredicate = isGFX11Only 1708 1709//===----------------------------------------------------------------------===// 1710// GFX10. 1711//===----------------------------------------------------------------------===// 1712 1713let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { 1714 //===------------------------------- VOP2 -------------------------------===// 1715 multiclass VOP2Only_Real_MADK_gfx10<bits<6> op> { 1716 def _gfx10 : 1717 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX10>, 1718 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1719 } 1720 multiclass VOP2Only_Real_MADK_gfx10_with_name<bits<6> op, string opName, 1721 string asmName> { 1722 def _gfx10 : 1723 VOP2_Real<!cast<VOP2_Pseudo>(opName), SIEncodingFamily.GFX10>, 1724 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(opName).Pfl> { 1725 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName); 1726 let AsmString = asmName # ps.AsmOperands; 1727 } 1728 } 1729 multiclass VOP2_Real_e32_gfx10<bits<6> op> { 1730 def _e32_gfx10 : 1731 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>, 1732 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 1733 } 1734 multiclass VOP2_Real_e64_gfx10<bits<6> op> { 1735 def _e64_gfx10 : 1736 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 1737 VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1738 } 1739 multiclass VOP2_Real_sdwa_gfx10<bits<6> op> { 1740 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 1741 def _sdwa_gfx10 : 1742 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 1743 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 1744 let DecoderNamespace = "SDWA10"; 1745 } 1746 } 1747 multiclass VOP2_Real_dpp_gfx10<bits<6> op> { 1748 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then 1749 def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX10> { 1750 let DecoderNamespace = "SDWA10"; 1751 } 1752 } 1753 multiclass VOP2_Real_dpp8_gfx10<bits<6> op> { 1754 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then 1755 def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(NAME#"_e32")> { 1756 let DecoderNamespace = "DPP8"; 1757 } 1758 } 1759 1760 //===------------------------- VOP2 (with name) -------------------------===// 1761 multiclass VOP2_Real_e32_gfx10_with_name<bits<6> op, string opName, 1762 string asmName> { 1763 def _e32_gfx10 : 1764 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>, 1765 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> { 1766 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1767 let AsmString = asmName # ps.AsmOperands; 1768 } 1769 } 1770 multiclass VOP2_Real_e64_gfx10_with_name<bits<6> op, string opName, 1771 string asmName> { 1772 def _e64_gfx10 : 1773 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>, 1774 VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, 1775 !cast<VOP3_Pseudo>(opName#"_e64").Pfl> { 1776 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName#"_e64"); 1777 let AsmString = asmName # ps.AsmOperands; 1778 } 1779 } 1780 let DecoderNamespace = "SDWA10" in { 1781 multiclass VOP2_Real_sdwa_gfx10_with_name<bits<6> op, string opName, 1782 string asmName> { 1783 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then 1784 def _sdwa_gfx10 : 1785 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1786 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1787 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1788 let AsmString = asmName # ps.AsmOperands; 1789 } 1790 } 1791 multiclass VOP2_Real_dpp_gfx10_with_name<bits<6> op, string opName, 1792 string asmName> { 1793 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 1794 def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX10> { 1795 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1796 let AsmString = asmName # ps.Pfl.AsmDPP16; 1797 } 1798 } 1799 multiclass VOP2_Real_dpp8_gfx10_with_name<bits<6> op, string opName, 1800 string asmName> { 1801 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 1802 def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1803 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1804 let AsmString = asmName # ps.Pfl.AsmDPP8; 1805 let DecoderNamespace = "DPP8"; 1806 } 1807 } 1808 } // End DecoderNamespace = "SDWA10" 1809 1810 //===------------------------------ VOP2be ------------------------------===// 1811 multiclass VOP2be_Real_e32_gfx10<bits<6> op, string opName, string asmName> { 1812 def _e32_gfx10 : 1813 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>, 1814 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> { 1815 VOP2_Pseudo Ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1816 let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); 1817 } 1818 } 1819 multiclass VOP2be_Real_e64_gfx10<bits<6> op, string opName, string asmName> { 1820 def _e64_gfx10 : 1821 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>, 1822 VOP3be_gfx10<{0, 1, 0, 0, op{5-0}}, 1823 !cast<VOP3_Pseudo>(opName#"_e64").Pfl> { 1824 VOP3_Pseudo Ps = !cast<VOP3_Pseudo>(opName#"_e64"); 1825 let AsmString = asmName # Ps.AsmOperands; 1826 } 1827 } 1828 multiclass VOP2be_Real_sdwa_gfx10<bits<6> op, string opName, string asmName> { 1829 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then 1830 def _sdwa_gfx10 : 1831 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1832 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1833 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1834 let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); 1835 let DecoderNamespace = "SDWA10"; 1836 } 1837 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then 1838 def _sdwa_w32_gfx10 : 1839 Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1840 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1841 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1842 let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands); 1843 let isAsmParserOnly = 1; 1844 let DecoderNamespace = "SDWA10"; 1845 let WaveSizePredicate = isWave32; 1846 } 1847 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then 1848 def _sdwa_w64_gfx10 : 1849 Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1850 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1851 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1852 let AsmString = asmName # Ps.AsmOperands; 1853 let isAsmParserOnly = 1; 1854 let DecoderNamespace = "SDWA10"; 1855 let WaveSizePredicate = isWave64; 1856 } 1857 } 1858 multiclass VOP2be_Real_dpp_gfx10<bits<6> op, string opName, string asmName> { 1859 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 1860 def _dpp_gfx10 : 1861 VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX10, asmName> { 1862 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1863 let AsmString = asmName # !subst(", vcc", "", AsmDPP); 1864 let DecoderNamespace = "SDWA10"; 1865 } 1866 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 1867 def _dpp_w32_gfx10 : 1868 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1869 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1870 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP); 1871 let isAsmParserOnly = 1; 1872 let WaveSizePredicate = isWave32; 1873 } 1874 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 1875 def _dpp_w64_gfx10 : 1876 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1877 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1878 let AsmString = asmName # AsmDPP; 1879 let isAsmParserOnly = 1; 1880 let WaveSizePredicate = isWave64; 1881 } 1882 } 1883 multiclass VOP2be_Real_dpp8_gfx10<bits<6> op, string opName, string asmName> { 1884 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 1885 def _dpp8_gfx10 : 1886 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1887 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1888 let AsmString = asmName # !subst(", vcc", "", AsmDPP8); 1889 let DecoderNamespace = "DPP8"; 1890 } 1891 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 1892 def _dpp8_w32_gfx10 : 1893 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1894 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1895 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8); 1896 let isAsmParserOnly = 1; 1897 let WaveSizePredicate = isWave32; 1898 } 1899 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 1900 def _dpp8_w64_gfx10 : 1901 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1902 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1903 let AsmString = asmName # AsmDPP8; 1904 let isAsmParserOnly = 1; 1905 let WaveSizePredicate = isWave64; 1906 } 1907 } 1908 1909 //===----------------------------- VOP3Only -----------------------------===// 1910 multiclass VOP3Only_Real_gfx10<bits<10> op> { 1911 def _e64_gfx10 : 1912 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 1913 VOP3e_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 1914 let IsSingle = 1; 1915 } 1916 } 1917 1918 //===---------------------------- VOP3beOnly ----------------------------===// 1919 multiclass VOP3beOnly_Real_gfx10<bits<10> op> { 1920 def _e64_gfx10 : 1921 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 1922 VOP3be_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 1923 let IsSingle = 1; 1924 } 1925 } 1926} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" 1927 1928multiclass VOP2Only_Real_MADK_gfx10_gfx11<bits<6> op> : 1929 VOP2Only_Real_MADK_gfx10<op>, VOP2Only_Real_MADK<GFX11Gen, op>; 1930 1931multiclass VOP2Only_Real_MADK_gfx10_gfx11_gfx12<bits<6> op> : 1932 VOP2Only_Real_MADK_gfx10_gfx11<op>, VOP2Only_Real_MADK<GFX12Gen, op>; 1933 1934multiclass VOP2be_Real_gfx10<bits<6> op, string opName, string asmName> : 1935 VOP2be_Real_e32_gfx10<op, opName, asmName>, 1936 VOP2be_Real_e64_gfx10<op, opName, asmName>, 1937 VOP2be_Real_sdwa_gfx10<op, opName, asmName>, 1938 VOP2be_Real_dpp_gfx10<op, opName, asmName>, 1939 VOP2be_Real_dpp8_gfx10<op, opName, asmName>; 1940 1941multiclass VOP2e_Real_gfx10<bits<6> op, string opName, string asmName> : 1942 VOP2_Real_e32_gfx10<op>, 1943 VOP2_Real_e64_gfx10<op>, 1944 VOP2be_Real_sdwa_gfx10<op, opName, asmName>, 1945 VOP2be_Real_dpp_gfx10<op, opName, asmName>, 1946 VOP2be_Real_dpp8_gfx10<op, opName, asmName>; 1947 1948multiclass VOP2_Real_gfx10<bits<6> op> : 1949 VOP2_Real_e32_gfx10<op>, VOP2_Real_e64_gfx10<op>, 1950 VOP2_Real_sdwa_gfx10<op>, VOP2_Real_dpp_gfx10<op>, VOP2_Real_dpp8_gfx10<op>; 1951 1952multiclass VOP2_Real_gfx10_gfx11<bits<6> op> : 1953 VOP2_Real_gfx10<op>, VOP2_Real_FULL<GFX11Gen, op>; 1954 1955multiclass VOP2_Real_gfx10_gfx11_gfx12<bits<6> op> : 1956 VOP2_Real_gfx10_gfx11<op>, VOP2_Real_FULL<GFX12Gen, op>; 1957 1958multiclass VOP2_Real_with_name_gfx10<bits<6> op, string opName, 1959 string asmName> : 1960 VOP2_Real_e32_gfx10_with_name<op, opName, asmName>, 1961 VOP2_Real_e64_gfx10_with_name<op, opName, asmName>, 1962 VOP2_Real_sdwa_gfx10_with_name<op, opName, asmName>, 1963 VOP2_Real_dpp_gfx10_with_name<op, opName, asmName>, 1964 VOP2_Real_dpp8_gfx10_with_name<op, opName, asmName>; 1965 1966multiclass VOP2_Real_with_name_gfx10_gfx11_gfx12<bits<6> op, string opName, 1967 string asmName> : 1968 VOP2_Real_with_name_gfx10<op, opName, asmName>, 1969 VOP2_Real_FULL_with_name<GFX11Gen, op, opName, asmName>, 1970 VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>; 1971 1972// NB: Same opcode as v_mac_legacy_f32 1973let DecoderNamespace = "GFX10_B" in 1974defm V_FMAC_LEGACY_F32 : VOP2_Real_gfx10<0x006>; 1975 1976defm V_XNOR_B32 : VOP2_Real_gfx10_gfx11_gfx12<0x01e>; 1977defm V_FMAC_F32 : VOP2_Real_gfx10_gfx11_gfx12<0x02b>; 1978defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10_gfx11_gfx12<0x02c>; 1979defm V_FMAAK_F32 : VOP2Only_Real_MADK_gfx10_gfx11_gfx12<0x02d>; 1980defm V_ADD_F16 : VOP2_Real_gfx10<0x032>; 1981defm V_SUB_F16 : VOP2_Real_gfx10<0x033>; 1982defm V_SUBREV_F16 : VOP2_Real_gfx10<0x034>; 1983defm V_MUL_F16 : VOP2_Real_gfx10<0x035>; 1984defm V_FMAC_F16 : VOP2_Real_gfx10<0x036>; 1985defm V_FMAMK_F16 : VOP2Only_Real_MADK_gfx10<0x037>; 1986defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10<0x038>; 1987defm V_MAX_F16 : VOP2_Real_gfx10<0x039>; 1988defm V_MIN_F16 : VOP2_Real_gfx10<0x03a>; 1989defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>; 1990 1991let IsSingle = 1 in { 1992 defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>; 1993} 1994 1995// VOP2 no carry-in, carry-out. 1996defm V_ADD_NC_U32 : 1997 VOP2_Real_with_name_gfx10_gfx11_gfx12<0x025, "V_ADD_U32", "v_add_nc_u32">; 1998defm V_SUB_NC_U32 : 1999 VOP2_Real_with_name_gfx10_gfx11_gfx12<0x026, "V_SUB_U32", "v_sub_nc_u32">; 2000defm V_SUBREV_NC_U32 : 2001 VOP2_Real_with_name_gfx10_gfx11_gfx12<0x027, "V_SUBREV_U32", "v_subrev_nc_u32">; 2002 2003// VOP2 carry-in, carry-out. 2004defm V_ADD_CO_CI_U32 : 2005 VOP2be_Real_gfx10<0x028, "V_ADDC_U32", "v_add_co_ci_u32">; 2006defm V_SUB_CO_CI_U32 : 2007 VOP2be_Real_gfx10<0x029, "V_SUBB_U32", "v_sub_co_ci_u32">; 2008defm V_SUBREV_CO_CI_U32 : 2009 VOP2be_Real_gfx10<0x02a, "V_SUBBREV_U32", "v_subrev_co_ci_u32">; 2010 2011defm V_CNDMASK_B32 : 2012 VOP2e_Real_gfx10<0x001, "V_CNDMASK_B32", "v_cndmask_b32">; 2013 2014// VOP3 only. 2015defm V_BFM_B32 : VOP3Only_Real_gfx10<0x363>; 2016defm V_BCNT_U32_B32 : VOP3Only_Real_gfx10<0x364>; 2017defm V_MBCNT_LO_U32_B32 : VOP3Only_Real_gfx10<0x365>; 2018defm V_MBCNT_HI_U32_B32 : VOP3Only_Real_gfx10<0x366>; 2019defm V_LDEXP_F32 : VOP3Only_Real_gfx10<0x362>; 2020defm V_CVT_PKNORM_I16_F32 : VOP3Only_Real_gfx10<0x368>; 2021defm V_CVT_PKNORM_U16_F32 : VOP3Only_Real_gfx10<0x369>; 2022defm V_CVT_PK_U16_U32 : VOP3Only_Real_gfx10<0x36a>; 2023defm V_CVT_PK_I16_I32 : VOP3Only_Real_gfx10<0x36b>; 2024 2025// VOP3 carry-out. 2026defm V_ADD_CO_U32 : VOP3beOnly_Real_gfx10<0x30f>; 2027defm V_SUB_CO_U32 : VOP3beOnly_Real_gfx10<0x310>; 2028defm V_SUBREV_CO_U32 : VOP3beOnly_Real_gfx10<0x319>; 2029 2030let SubtargetPredicate = isGFX10Only in { 2031 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx10>; 2032 2033 defm : VOP2bInstAliases< 2034 V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx10, "v_add_co_ci_u32">; 2035 defm : VOP2bInstAliases< 2036 V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx10, "v_sub_co_ci_u32">; 2037 defm : VOP2bInstAliases< 2038 V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx10, "v_subrev_co_ci_u32">; 2039} // End SubtargetPredicate = isGFX10Only 2040 2041//===----------------------------------------------------------------------===// 2042// GFX6, GFX7, GFX10, GFX11 2043//===----------------------------------------------------------------------===// 2044 2045class VOP2_DPPe <bits<6> op, VOP2_DPP_Pseudo ps, VOPProfile P = ps.Pfl> : 2046 VOP_DPPe <P> { 2047 bits<8> vdst; 2048 bits<8> src1; 2049 let Inst{8-0} = 0xfa; //dpp 2050 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 2051 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 2052 let Inst{30-25} = op; 2053 let Inst{31} = 0x0; //encoding 2054} 2055 2056let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 2057 multiclass VOP2_Lane_Real_gfx6_gfx7<bits<6> op> { 2058 def _gfx6_gfx7 : 2059 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>, 2060 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 2061 } 2062 multiclass VOP2Only_Real_MADK_gfx6_gfx7<bits<6> op> { 2063 def _gfx6_gfx7 : 2064 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>, 2065 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 2066 } 2067 multiclass VOP2_Real_e32_gfx6_gfx7<bits<6> op, string opName = NAME> { 2068 def _e32_gfx6_gfx7 : 2069 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.SI>, 2070 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl>; 2071 } 2072 multiclass VOP2_Real_e64_gfx6_gfx7<bits<6> op, string opName = NAME> { 2073 def _e64_gfx6_gfx7 : 2074 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.SI>, 2075 VOP3e_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(opName#"_e64").Pfl>; 2076 } 2077 multiclass VOP2be_Real_e64_gfx6_gfx7<bits<6> op, string opName = NAME> { 2078 def _e64_gfx6_gfx7 : 2079 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.SI>, 2080 VOP3be_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(opName#"_e64").Pfl>; 2081 } 2082} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 2083 2084multiclass VOP2Only_Real_MADK_gfx6_gfx7_gfx10<bits<6> op> : 2085 VOP2Only_Real_MADK_gfx6_gfx7<op>, VOP2Only_Real_MADK_gfx10<op>; 2086 2087multiclass VOP2_Real_gfx6_gfx7<bits<6> op> : 2088 VOP2_Real_e32_gfx6_gfx7<op>, VOP2_Real_e64_gfx6_gfx7<op>; 2089 2090multiclass VOP2_Real_gfx6_gfx7_gfx10<bits<6> op> : 2091 VOP2_Real_gfx6_gfx7<op>, VOP2_Real_gfx10<op>; 2092 2093multiclass VOP2_Real_gfx6_gfx7_gfx10_gfx11<bits<6> op> : 2094 VOP2_Real_gfx6_gfx7_gfx10<op>, VOP2_Real_FULL<GFX11Gen, op>; 2095 2096multiclass VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<6> op> : 2097 VOP2_Real_gfx6_gfx7_gfx10_gfx11<op>, VOP2_Real_FULL<GFX12Gen, op>; 2098 2099multiclass VOP2be_Real_gfx6_gfx7<bits<6> op> : 2100 VOP2_Real_e32_gfx6_gfx7<op>, VOP2be_Real_e64_gfx6_gfx7<op>; 2101 2102multiclass VOP2be_Real_gfx6_gfx7_with_name<bits<6> op, 2103 string opName, string asmName> { 2104 defvar ps32 = !cast<VOP2_Pseudo>(opName#"_e32"); 2105 defvar ps64 = !cast<VOP3_Pseudo>(opName#"_e64"); 2106 2107 let AsmString = asmName # ps32.AsmOperands in { 2108 defm "" : VOP2_Real_e32_gfx6_gfx7<op, opName>; 2109 } 2110 2111 let AsmString = asmName # ps64.AsmOperands in { 2112 defm "" : VOP2be_Real_e64_gfx6_gfx7<op, opName>; 2113 } 2114} 2115 2116defm V_CNDMASK_B32 : VOP2_Real_gfx6_gfx7<0x000>; 2117defm V_MIN_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00d>; 2118defm V_MAX_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00e>; 2119defm V_LSHR_B32 : VOP2_Real_gfx6_gfx7<0x015>; 2120defm V_ASHR_I32 : VOP2_Real_gfx6_gfx7<0x017>; 2121defm V_LSHL_B32 : VOP2_Real_gfx6_gfx7<0x019>; 2122defm V_BFM_B32 : VOP2_Real_gfx6_gfx7<0x01e>; 2123defm V_BCNT_U32_B32 : VOP2_Real_gfx6_gfx7<0x022>; 2124defm V_MBCNT_LO_U32_B32 : VOP2_Real_gfx6_gfx7<0x023>; 2125defm V_MBCNT_HI_U32_B32 : VOP2_Real_gfx6_gfx7<0x024>; 2126defm V_LDEXP_F32 : VOP2_Real_gfx6_gfx7<0x02b>; 2127defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_gfx6_gfx7<0x02c>; 2128defm V_CVT_PKNORM_I16_F32 : VOP2_Real_gfx6_gfx7<0x02d>; 2129defm V_CVT_PKNORM_U16_F32 : VOP2_Real_gfx6_gfx7<0x02e>; 2130defm V_CVT_PK_U16_U32 : VOP2_Real_gfx6_gfx7<0x030>; 2131defm V_CVT_PK_I16_I32 : VOP2_Real_gfx6_gfx7<0x031>; 2132 2133// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in 2134// VI, but the VI instructions behave the same as the SI versions. 2135defm V_ADD_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x025, "V_ADD_CO_U32", "v_add_i32">; 2136defm V_SUB_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x026, "V_SUB_CO_U32", "v_sub_i32">; 2137defm V_SUBREV_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x027, "V_SUBREV_CO_U32", "v_subrev_i32">; 2138defm V_ADDC_U32 : VOP2be_Real_gfx6_gfx7<0x028>; 2139defm V_SUBB_U32 : VOP2be_Real_gfx6_gfx7<0x029>; 2140defm V_SUBBREV_U32 : VOP2be_Real_gfx6_gfx7<0x02a>; 2141 2142defm V_READLANE_B32 : VOP2_Lane_Real_gfx6_gfx7<0x001>; 2143 2144let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in { 2145 defm V_WRITELANE_B32 : VOP2_Lane_Real_gfx6_gfx7<0x002>; 2146} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) 2147 2148let SubtargetPredicate = isGFX6GFX7 in { 2149 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx6_gfx7>; 2150 defm : VOP2eInstAliases<V_ADD_CO_U32_e32, V_ADD_I32_e32_gfx6_gfx7>; 2151 defm : VOP2eInstAliases<V_SUB_CO_U32_e32, V_SUB_I32_e32_gfx6_gfx7>; 2152 defm : VOP2eInstAliases<V_SUBREV_CO_U32_e32, V_SUBREV_I32_e32_gfx6_gfx7>; 2153 2154 def : VOP2e64InstAlias<V_ADD_CO_U32_e64, V_ADD_I32_e64_gfx6_gfx7>; 2155 def : VOP2e64InstAlias<V_SUB_CO_U32_e64, V_SUB_I32_e64_gfx6_gfx7>; 2156 def : VOP2e64InstAlias<V_SUBREV_CO_U32_e64, V_SUBREV_I32_e64_gfx6_gfx7>; 2157} // End SubtargetPredicate = isGFX6GFX7 2158 2159defm V_ADD_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x003>; 2160defm V_SUB_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x004>; 2161defm V_SUBREV_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x005>; 2162defm V_MAC_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x006>; 2163defm V_MUL_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x007>; 2164defm V_MUL_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x008>; 2165defm V_MUL_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x009>; 2166defm V_MUL_HI_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00a>; 2167defm V_MUL_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00b>; 2168defm V_MUL_HI_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00c>; 2169defm V_MIN_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00f>; 2170defm V_MAX_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x010>; 2171defm V_MIN_I32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x011>; 2172defm V_MAX_I32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x012>; 2173defm V_MIN_U32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x013>; 2174defm V_MAX_U32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x014>; 2175defm V_LSHRREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x016>; 2176defm V_ASHRREV_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x018>; 2177defm V_LSHLREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01a>; 2178defm V_AND_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x01b>; 2179defm V_OR_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x01c>; 2180defm V_XOR_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x01d>; 2181defm V_MAC_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x01f>; 2182defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x02f>; 2183defm V_MADMK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x020>; 2184defm V_MADAK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x021>; 2185 2186//===----------------------------------------------------------------------===// 2187// GFX8, GFX9 (VI). 2188//===----------------------------------------------------------------------===// 2189 2190let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 2191 2192multiclass VOP2_Real_MADK_vi <bits<6> op> { 2193 def _vi : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>, 2194 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 2195} 2196 2197multiclass VOP2_Real_MADK_gfx940 <bits<6> op> { 2198 def _gfx940 : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX940>, 2199 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl> { 2200 let DecoderNamespace = "GFX9"; 2201 } 2202} 2203 2204multiclass VOP2_Real_e32_vi <bits<6> op> { 2205 def _e32_vi : 2206 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>, 2207 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 2208} 2209 2210multiclass VOP2_Real_e64_vi <bits<10> op> { 2211 def _e64_vi : 2212 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 2213 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 2214} 2215 2216multiclass VOP2_Real_e64only_vi <bits<10> op> { 2217 def _e64_vi : 2218 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 2219 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 2220 let IsSingle = 1; 2221 } 2222} 2223 2224multiclass Base_VOP2_Real_e32e64_vi <bits<6> op> : 2225 VOP2_Real_e32_vi<op>, 2226 VOP2_Real_e64_vi<{0, 1, 0, 0, op{5-0}}>; 2227 2228} // End AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" 2229 2230multiclass VOP2_SDWA_Real <bits<6> op> { 2231 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA then 2232 def _sdwa_vi : 2233 VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 2234 VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 2235} 2236 2237multiclass VOP2_SDWA9_Real <bits<6> op> { 2238 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 2239 def _sdwa_gfx9 : 2240 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 2241 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 2242} 2243 2244let AssemblerPredicate = isGFX8Only in { 2245 2246multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName> { 2247 def _e32_vi : 2248 VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.VI>, 2249 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> { 2250 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32"); 2251 let AsmString = AsmName # ps.AsmOperands; 2252 let DecoderNamespace = "GFX8"; 2253 } 2254 def _e64_vi : 2255 VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.VI>, 2256 VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> { 2257 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64"); 2258 let AsmString = AsmName # ps.AsmOperands; 2259 let DecoderNamespace = "GFX8"; 2260 } 2261 if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA then 2262 def _sdwa_vi : 2263 VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>, 2264 VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> { 2265 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa"); 2266 let AsmString = AsmName # ps.AsmOperands; 2267 } 2268 if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP then 2269 def _dpp_vi : 2270 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.VI>, 2271 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> { 2272 VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp"); 2273 let AsmString = AsmName # ps.AsmOperands; 2274 } 2275} 2276} 2277 2278let AssemblerPredicate = isGFX9Only in { 2279 2280multiclass VOP2be_Real_e32e64_gfx9 <bits<6> op, string OpName, string AsmName> { 2281 def _e32_gfx9 : 2282 VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.GFX9>, 2283 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> { 2284 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32"); 2285 let AsmString = AsmName # ps.AsmOperands; 2286 let DecoderNamespace = "GFX9"; 2287 } 2288 def _e64_gfx9 : 2289 VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.GFX9>, 2290 VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> { 2291 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64"); 2292 let AsmString = AsmName # ps.AsmOperands; 2293 let DecoderNamespace = "GFX9"; 2294 } 2295 if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA9 then 2296 def _sdwa_gfx9 : 2297 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>, 2298 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> { 2299 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa"); 2300 let AsmString = AsmName # ps.AsmOperands; 2301 } 2302 if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP then 2303 def _dpp_gfx9 : 2304 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.GFX9>, 2305 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> { 2306 VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp"); 2307 let AsmString = AsmName # ps.AsmOperands; 2308 let DecoderNamespace = "SDWA9"; 2309 } 2310} 2311 2312multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> { 2313 def _e32_gfx9 : 2314 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX9>, 2315 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>{ 2316 let DecoderNamespace = "GFX9"; 2317 } 2318 def _e64_gfx9 : 2319 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>, 2320 VOP3e_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 2321 let DecoderNamespace = "GFX9"; 2322 } 2323 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 2324 def _sdwa_gfx9 : 2325 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 2326 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 2327 } 2328 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 2329 def _dpp_gfx9 : 2330 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, 2331 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> { 2332 let DecoderNamespace = "SDWA9"; 2333 } 2334} 2335 2336} // AssemblerPredicate = isGFX9Only 2337 2338multiclass VOP2_Real_e32e64_vi <bits<6> op> : 2339 Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA_Real<op>, VOP2_SDWA9_Real<op> { 2340 2341 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 2342 def _dpp_vi : 2343 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>, 2344 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>; 2345} 2346 2347defm V_CNDMASK_B32 : VOP2_Real_e32e64_vi <0x0>; 2348defm V_ADD_F32 : VOP2_Real_e32e64_vi <0x1>; 2349defm V_SUB_F32 : VOP2_Real_e32e64_vi <0x2>; 2350defm V_SUBREV_F32 : VOP2_Real_e32e64_vi <0x3>; 2351let AssemblerPredicate = isGCN3ExcludingGFX90A in 2352defm V_MUL_LEGACY_F32 : VOP2_Real_e32e64_vi <0x4>; 2353defm V_MUL_F32 : VOP2_Real_e32e64_vi <0x5>; 2354defm V_MUL_I32_I24 : VOP2_Real_e32e64_vi <0x6>; 2355defm V_MUL_HI_I32_I24 : VOP2_Real_e32e64_vi <0x7>; 2356defm V_MUL_U32_U24 : VOP2_Real_e32e64_vi <0x8>; 2357defm V_MUL_HI_U32_U24 : VOP2_Real_e32e64_vi <0x9>; 2358defm V_MIN_F32 : VOP2_Real_e32e64_vi <0xa>; 2359defm V_MAX_F32 : VOP2_Real_e32e64_vi <0xb>; 2360defm V_MIN_I32 : VOP2_Real_e32e64_vi <0xc>; 2361defm V_MAX_I32 : VOP2_Real_e32e64_vi <0xd>; 2362defm V_MIN_U32 : VOP2_Real_e32e64_vi <0xe>; 2363defm V_MAX_U32 : VOP2_Real_e32e64_vi <0xf>; 2364defm V_LSHRREV_B32 : VOP2_Real_e32e64_vi <0x10>; 2365defm V_ASHRREV_I32 : VOP2_Real_e32e64_vi <0x11>; 2366defm V_LSHLREV_B32 : VOP2_Real_e32e64_vi <0x12>; 2367defm V_AND_B32 : VOP2_Real_e32e64_vi <0x13>; 2368defm V_OR_B32 : VOP2_Real_e32e64_vi <0x14>; 2369defm V_XOR_B32 : VOP2_Real_e32e64_vi <0x15>; 2370defm V_MAC_F32 : VOP2_Real_e32e64_vi <0x16>; 2371defm V_MADMK_F32 : VOP2_Real_MADK_vi <0x17>; 2372defm V_MADAK_F32 : VOP2_Real_MADK_vi <0x18>; 2373 2374defm V_ADD_U32 : VOP2be_Real_e32e64_vi_only <0x19, "V_ADD_CO_U32", "v_add_u32">; 2375defm V_SUB_U32 : VOP2be_Real_e32e64_vi_only <0x1a, "V_SUB_CO_U32", "v_sub_u32">; 2376defm V_SUBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1b, "V_SUBREV_CO_U32", "v_subrev_u32">; 2377defm V_ADDC_U32 : VOP2be_Real_e32e64_vi_only <0x1c, "V_ADDC_U32", "v_addc_u32">; 2378defm V_SUBB_U32 : VOP2be_Real_e32e64_vi_only <0x1d, "V_SUBB_U32", "v_subb_u32">; 2379defm V_SUBBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1e, "V_SUBBREV_U32", "v_subbrev_u32">; 2380 2381defm V_ADD_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x19, "V_ADD_CO_U32", "v_add_co_u32">; 2382defm V_SUB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1a, "V_SUB_CO_U32", "v_sub_co_u32">; 2383defm V_SUBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1b, "V_SUBREV_CO_U32", "v_subrev_co_u32">; 2384defm V_ADDC_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1c, "V_ADDC_U32", "v_addc_co_u32">; 2385defm V_SUBB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1d, "V_SUBB_U32", "v_subb_co_u32">; 2386defm V_SUBBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1e, "V_SUBBREV_U32", "v_subbrev_co_u32">; 2387 2388defm V_ADD_U32 : VOP2_Real_e32e64_gfx9 <0x34>; 2389defm V_SUB_U32 : VOP2_Real_e32e64_gfx9 <0x35>; 2390defm V_SUBREV_U32 : VOP2_Real_e32e64_gfx9 <0x36>; 2391 2392defm V_BFM_B32 : VOP2_Real_e64only_vi <0x293>; 2393defm V_BCNT_U32_B32 : VOP2_Real_e64only_vi <0x28b>; 2394defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64only_vi <0x28c>; 2395defm V_MBCNT_HI_U32_B32 : VOP2_Real_e64only_vi <0x28d>; 2396defm V_LDEXP_F32 : VOP2_Real_e64only_vi <0x288>; 2397defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64only_vi <0x1f0>; 2398defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64only_vi <0x294>; 2399defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64only_vi <0x295>; 2400defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e64only_vi <0x296>; 2401defm V_CVT_PK_U16_U32 : VOP2_Real_e64only_vi <0x297>; 2402defm V_CVT_PK_I16_I32 : VOP2_Real_e64only_vi <0x298>; 2403 2404defm V_ADD_F16 : VOP2_Real_e32e64_vi <0x1f>; 2405defm V_SUB_F16 : VOP2_Real_e32e64_vi <0x20>; 2406defm V_SUBREV_F16 : VOP2_Real_e32e64_vi <0x21>; 2407defm V_MUL_F16 : VOP2_Real_e32e64_vi <0x22>; 2408defm V_MAC_F16 : VOP2_Real_e32e64_vi <0x23>; 2409defm V_MADMK_F16 : VOP2_Real_MADK_vi <0x24>; 2410defm V_MADAK_F16 : VOP2_Real_MADK_vi <0x25>; 2411defm V_ADD_U16 : VOP2_Real_e32e64_vi <0x26>; 2412defm V_SUB_U16 : VOP2_Real_e32e64_vi <0x27>; 2413defm V_SUBREV_U16 : VOP2_Real_e32e64_vi <0x28>; 2414defm V_MUL_LO_U16 : VOP2_Real_e32e64_vi <0x29>; 2415defm V_LSHLREV_B16 : VOP2_Real_e32e64_vi <0x2a>; 2416defm V_LSHRREV_B16 : VOP2_Real_e32e64_vi <0x2b>; 2417defm V_ASHRREV_I16 : VOP2_Real_e32e64_vi <0x2c>; 2418defm V_MAX_F16 : VOP2_Real_e32e64_vi <0x2d>; 2419defm V_MIN_F16 : VOP2_Real_e32e64_vi <0x2e>; 2420defm V_MAX_U16 : VOP2_Real_e32e64_vi <0x2f>; 2421defm V_MAX_I16 : VOP2_Real_e32e64_vi <0x30>; 2422defm V_MIN_U16 : VOP2_Real_e32e64_vi <0x31>; 2423defm V_MIN_I16 : VOP2_Real_e32e64_vi <0x32>; 2424defm V_LDEXP_F16 : VOP2_Real_e32e64_vi <0x33>; 2425 2426let SubtargetPredicate = isGFX8GFX9 in { 2427 2428// Aliases to simplify matching of floating-point instructions that 2429// are VOP2 on SI and VOP3 on VI. 2430class SI2_VI3Alias <string name, VOP3_Real inst> : InstAlias < 2431 name#" $dst, $src0, $src1", 2432 !if(inst.Pfl.HasOMod, 2433 (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0), 2434 (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0)) 2435>, PredicateControl { 2436 let UseInstAsmMatchConverter = 0; 2437 let AsmVariantName = AMDGPUAsmVariants.VOP3; 2438} 2439 2440def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>; 2441def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>; 2442def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>; 2443def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>; 2444def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>; 2445 2446defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_vi>; 2447 2448} // End SubtargetPredicate = isGFX8GFX9 2449 2450let SubtargetPredicate = isGFX9Only in { 2451 2452defm : VOP2bInstAliases<V_ADD_U32_e32, V_ADD_CO_U32_e32_gfx9, "v_add_co_u32">; 2453defm : VOP2bInstAliases<V_ADDC_U32_e32, V_ADDC_CO_U32_e32_gfx9, "v_addc_co_u32">; 2454defm : VOP2bInstAliases<V_SUB_U32_e32, V_SUB_CO_U32_e32_gfx9, "v_sub_co_u32">; 2455defm : VOP2bInstAliases<V_SUBB_U32_e32, V_SUBB_CO_U32_e32_gfx9, "v_subb_co_u32">; 2456defm : VOP2bInstAliases<V_SUBREV_U32_e32, V_SUBREV_CO_U32_e32_gfx9, "v_subrev_co_u32">; 2457defm : VOP2bInstAliases<V_SUBBREV_U32_e32, V_SUBBREV_CO_U32_e32_gfx9, "v_subbrev_co_u32">; 2458 2459} // End SubtargetPredicate = isGFX9Only 2460 2461let SubtargetPredicate = HasDLInsts in { 2462 2463defm V_FMAC_F32 : VOP2_Real_e32e64_vi <0x3b>; 2464defm V_XNOR_B32 : VOP2_Real_e32e64_vi <0x3d>; 2465 2466} // End SubtargetPredicate = HasDLInsts 2467 2468let AssemblerPredicate = isGFX90APlus, DecoderNamespace = "GFX90A" in { 2469 multiclass VOP2_Real_e32_gfx90a <bits<6> op> { 2470 def _e32_gfx90a : 2471 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX90A>, 2472 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 2473 } 2474 2475 multiclass VOP2_Real_e64_gfx90a <bits<10> op> { 2476 def _e64_gfx90a : 2477 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX90A>, 2478 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 2479 } 2480 2481 multiclass Base_VOP2_Real_e32e64_gfx90a <bits<6> op> : 2482 VOP2_Real_e32_gfx90a<op>, 2483 VOP2_Real_e64_gfx90a<{0, 1, 0, 0, op{5-0}}>; 2484 2485 multiclass VOP2_Real_e32e64_gfx90a <bits<6> op> : 2486 Base_VOP2_Real_e32e64_gfx90a<op> { 2487 2488 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 2489 def _dpp_gfx90a : 2490 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX90A>, 2491 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> { 2492 let DecoderNamespace = "SDWA9"; 2493 } 2494 } 2495} // End AssemblerPredicate = isGFX90APlus, DecoderNamespace = "GFX90A" 2496 2497let SubtargetPredicate = HasFmacF64Inst in { 2498 defm V_FMAC_F64 : VOP2_Real_e32e64_gfx90a <0x4>; 2499} // End SubtargetPredicate = HasFmacF64Inst 2500 2501let SubtargetPredicate = isGFX90APlus, IsSingle = 1 in { 2502 defm V_MUL_LEGACY_F32 : VOP2_Real_e64_gfx90a <0x2a1>; 2503} 2504 2505let SubtargetPredicate = HasFmaakFmamkF32Insts in { 2506defm V_FMAMK_F32 : VOP2_Real_MADK_gfx940 <0x17>; 2507defm V_FMAAK_F32 : VOP2_Real_MADK_gfx940 <0x18>; 2508} 2509 2510multiclass VOP2_Real_DOT_ACC_gfx9<bits<6> op> : Base_VOP2_Real_e32e64_vi<op> { 2511 let SubtargetPredicate = isGFX9Only in 2512 def _dpp_vi : VOP2_DPP<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>; 2513} 2514 2515multiclass VOP2_Real_DOT_ACC_gfx10<bits<6> op> : 2516 VOP2_Real_e32_gfx10<op>, 2517 VOP2_Real_dpp_gfx10<op>, 2518 VOP2_Real_dpp8_gfx10<op>; 2519 2520multiclass VOP2Only_Real_DOT_ACC_gfx10<bits<6> op> : VOP2_Real_dpp_gfx10<op>, 2521 VOP2_Real_dpp8_gfx10<op> { 2522 let IsSingle = 1 in 2523 defm NAME : VOP2_Real_e32_gfx10<op>; 2524} 2525 2526let OtherPredicates = [HasDot5Insts] in { 2527 defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx9<0x37>; 2528 // NB: Opcode conflicts with V_DOT8C_I32_I4 2529 // This opcode exists in gfx 10.1* only 2530 defm V_DOT2C_F32_F16 : VOP2Only_Real_DOT_ACC_gfx10<0x02>; 2531} 2532 2533let OtherPredicates = [HasDot6Insts] in { 2534 defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx9<0x39>; 2535 defm V_DOT4C_I32_I8 : VOP2Only_Real_DOT_ACC_gfx10<0x0d>; 2536} 2537 2538let OtherPredicates = [HasDot4Insts] in { 2539 defm V_DOT2C_I32_I16 : VOP2_Real_DOT_ACC_gfx9<0x38>; 2540} 2541let OtherPredicates = [HasDot3Insts] in { 2542 defm V_DOT8C_I32_I4 : VOP2_Real_DOT_ACC_gfx9<0x3a>; 2543} 2544 2545let SubtargetPredicate = HasPkFmacF16Inst in { 2546defm V_PK_FMAC_F16 : VOP2_Real_e32_vi<0x3c>; 2547} // End SubtargetPredicate = HasPkFmacF16Inst 2548 2549let SubtargetPredicate = HasDot3Insts in { 2550 // NB: Opcode conflicts with V_DOT2C_F32_F16 2551 let DecoderNamespace = "GFX10_B" in 2552 defm V_DOT8C_I32_I4 : VOP2_Real_DOT_ACC_gfx10<0x02>; 2553} 2554