1//===-- VOP2Instructions.td - Vector Instruction Definitions --------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10// VOP2 Classes 11//===----------------------------------------------------------------------===// 12 13class VOP2e <bits<6> op, VOPProfile P> : Enc32 { 14 bits<8> vdst; 15 bits<9> src0; 16 bits<8> src1; 17 18 let Inst{8-0} = !if(P.HasSrc0, src0, 0); 19 let Inst{16-9} = !if(P.HasSrc1, src1, 0); 20 let Inst{24-17} = !if(P.EmitDst, vdst, 0); 21 let Inst{30-25} = op; 22 let Inst{31} = 0x0; //encoding 23} 24 25class VOP2_MADKe <bits<6> op, VOPProfile P> : Enc64 { 26 bits<8> vdst; 27 bits<9> src0; 28 bits<8> src1; 29 bits<32> imm; 30 31 let Inst{8-0} = !if(P.HasSrc0, src0, 0); 32 let Inst{16-9} = !if(P.HasSrc1, src1, 0); 33 let Inst{24-17} = !if(P.EmitDst, vdst, 0); 34 let Inst{30-25} = op; 35 let Inst{31} = 0x0; // encoding 36 let Inst{63-32} = imm; 37} 38 39class VOP2_SDWAe <bits<6> op, VOPProfile P> : VOP_SDWAe <P> { 40 bits<8> vdst; 41 bits<8> src1; 42 43 let Inst{8-0} = 0xf9; // sdwa 44 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 45 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 46 let Inst{30-25} = op; 47 let Inst{31} = 0x0; // encoding 48} 49 50class VOP2_SDWA9Ae <bits<6> op, VOPProfile P> : VOP_SDWA9Ae <P> { 51 bits<8> vdst; 52 bits<9> src1; 53 54 let Inst{8-0} = 0xf9; // sdwa 55 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 56 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 57 let Inst{30-25} = op; 58 let Inst{31} = 0x0; // encoding 59 let Inst{63} = !if(P.HasSrc1, src1{8}, 0); // src1_sgpr 60} 61 62class VOP2_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], string suffix = "_e32"> : 63 VOP_Pseudo <opName, suffix, P, P.Outs32, P.Ins32, "", pattern> { 64 65 let AsmOperands = P.Asm32; 66 67 let Size = 4; 68 let mayLoad = 0; 69 let mayStore = 0; 70 let hasSideEffects = 0; 71 72 let ReadsModeReg = !or(P.DstVT.isFP, P.Src0VT.isFP); 73 74 let mayRaiseFPException = ReadsModeReg; 75 76 let VOP2 = 1; 77 let VALU = 1; 78 let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]); 79 80 let AsmVariantName = AMDGPUAsmVariants.Default; 81} 82 83class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemonic> : 84 VOP_Real <ps>, 85 InstSI <ps.OutOperandList, ps.InOperandList, real_name # ps.AsmOperands, []>, 86 SIMCInstr <ps.PseudoInstr, EncodingFamily> { 87 88 let VALU = 1; 89 let VOP2 = 1; 90 let isPseudo = 0; 91 let isCodeGenOnly = 0; 92 93 let Constraints = ps.Constraints; 94 let DisableEncoding = ps.DisableEncoding; 95 96 // copy relevant pseudo op flags 97 let SubtargetPredicate = ps.SubtargetPredicate; 98 let OtherPredicates = ps.OtherPredicates; 99 let AsmMatchConverter = ps.AsmMatchConverter; 100 let AsmVariantName = ps.AsmVariantName; 101 let Constraints = ps.Constraints; 102 let DisableEncoding = ps.DisableEncoding; 103 let TSFlags = ps.TSFlags; 104 let UseNamedOperandTable = ps.UseNamedOperandTable; 105 let Uses = ps.Uses; 106 let Defs = ps.Defs; 107 let SchedRW = ps.SchedRW; 108 let mayLoad = ps.mayLoad; 109 let mayStore = ps.mayStore; 110} 111 112class VOP2_Real_Gen <VOP2_Pseudo ps, GFXGen Gen, string real_name = ps.Mnemonic> : 113 VOP2_Real <ps, Gen.Subtarget, real_name> { 114 let AssemblerPredicate = Gen.AssemblerPredicate; 115 let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate); 116 let DecoderNamespace = Gen.DecoderNamespace# 117 !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); 118} 119 120class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 121 VOP_SDWA_Pseudo <OpName, P, pattern> { 122 let AsmMatchConverter = "cvtSdwaVOP2"; 123} 124 125class VOP2_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 126 VOP_DPP_Pseudo <OpName, P, pattern> { 127} 128 129 130class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { 131 list<dag> ret = !if(P.HasModifiers, 132 [(set P.DstVT:$vdst, 133 (node (P.Src0VT 134 !if(P.HasOMod, 135 (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), 136 (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))), 137 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], 138 [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]); 139} 140 141multiclass VOP2Inst_e32<string opName, 142 VOPProfile P, 143 SDPatternOperator node = null_frag, 144 string revOp = opName, 145 bit GFX9Renamed = 0> { 146 let renamedInGFX9 = GFX9Renamed in { 147 def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>, 148 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>; 149 } // End renamedInGFX9 = GFX9Renamed 150} 151multiclass 152 VOP2Inst_e32_VOPD<string opName, VOPProfile P, bits<5> VOPDOp, 153 string VOPDName, SDPatternOperator node = null_frag, 154 string revOp = opName, bit GFX9Renamed = 0> { 155 defm NAME : VOP2Inst_e32<opName, P, node, revOp, GFX9Renamed>, 156 VOPD_Component<VOPDOp, VOPDName>; 157} 158multiclass VOP2Inst_e64<string opName, 159 VOPProfile P, 160 SDPatternOperator node = null_frag, 161 string revOp = opName, 162 bit GFX9Renamed = 0> { 163 let renamedInGFX9 = GFX9Renamed in { 164 def _e64 : VOP3InstBase <opName, P, node, 1>, 165 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; 166 167 let SubtargetPredicate = isGFX11Plus in { 168 if P.HasExtVOP3DPP then 169 def _e64_dpp : VOP3_DPP_Pseudo <opName, P>; 170 } // End SubtargetPredicate = isGFX11Plus 171 } // End renamedInGFX9 = GFX9Renamed 172} 173 174multiclass VOP2Inst_sdwa<string opName, 175 VOPProfile P, 176 bit GFX9Renamed = 0> { 177 let renamedInGFX9 = GFX9Renamed in { 178 if P.HasExtSDWA then 179 def _sdwa : VOP2_SDWA_Pseudo <opName, P>; 180 } // End renamedInGFX9 = GFX9Renamed 181} 182 183multiclass VOP2Inst<string opName, 184 VOPProfile P, 185 SDPatternOperator node = null_frag, 186 string revOp = opName, 187 bit GFX9Renamed = 0> : 188 VOP2Inst_e32<opName, P, node, revOp, GFX9Renamed>, 189 VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>, 190 VOP2Inst_sdwa<opName, P, GFX9Renamed> { 191 let renamedInGFX9 = GFX9Renamed in { 192 if P.HasExtDPP then 193 def _dpp : VOP2_DPP_Pseudo <opName, P>; 194 } 195} 196 197multiclass VOP2Inst_t16<string opName, 198 VOPProfile P, 199 SDPatternOperator node = null_frag, 200 string revOp = opName, 201 bit GFX9Renamed = 0> { 202 let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in { 203 defm NAME : VOP2Inst<opName, P, node, revOp, GFX9Renamed>; 204 } 205 let SubtargetPredicate = UseRealTrue16Insts in { 206 defm _t16 : VOP2Inst<opName#"_t16", VOPProfile_True16<P>, node, revOp#"_t16", GFX9Renamed>; 207 } 208 let SubtargetPredicate = UseFakeTrue16Insts in { 209 defm _fake16 : VOP2Inst<opName#"_fake16", VOPProfile_Fake16<P>, node, revOp#"_fake16", GFX9Renamed>; 210 } 211} 212 213// Creating a _t16_e32 pseudo when there is no corresponding real instruction on 214// any subtarget is a problem. It makes getMCOpcodeGen return -1, which we 215// assume means the instruction is already a real. The fix is to not create that 216// _t16_e32 pseudo 217multiclass VOP2Inst_e64_t16<string opName, 218 VOPProfile P, 219 SDPatternOperator node = null_frag, 220 string revOp = opName, 221 bit GFX9Renamed = 0> { 222 let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in { 223 defm NAME : VOP2Inst<opName, P, node, revOp, GFX9Renamed>; 224 } 225 let SubtargetPredicate = HasTrue16BitInsts in { 226 defm _t16 : VOP2Inst_e64<opName#"_t16", VOPProfile_Fake16<P>, node, revOp#"_t16", GFX9Renamed>; 227 } 228} 229 230multiclass VOP2Inst_VOPD<string opName, 231 VOPProfile P, 232 bits<5> VOPDOp, 233 string VOPDName, 234 SDPatternOperator node = null_frag, 235 string revOp = opName, 236 bit GFX9Renamed = 0> : 237 VOP2Inst_e32_VOPD<opName, P, VOPDOp, VOPDName, node, revOp, GFX9Renamed>, 238 VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>, 239 VOP2Inst_sdwa<opName, P, GFX9Renamed> { 240 let renamedInGFX9 = GFX9Renamed in { 241 if P.HasExtDPP then 242 def _dpp : VOP2_DPP_Pseudo <opName, P>; 243 } 244} 245 246multiclass VOP2bInst <string opName, 247 VOPProfile P, 248 SDPatternOperator node = null_frag, 249 string revOp = opName, 250 bit GFX9Renamed = 0, 251 bit useSGPRInput = !eq(P.NumSrcArgs, 3)> { 252 let renamedInGFX9 = GFX9Renamed in { 253 let SchedRW = [Write32Bit, WriteSALU] in { 254 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in { 255 def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>, 256 Commutable_REV<revOp#"_e32", !eq(revOp, opName)> { 257 let usesCustomInserter = true; 258 } 259 260 if P.HasExtSDWA then 261 def _sdwa : VOP2_SDWA_Pseudo <opName, P> { 262 let AsmMatchConverter = "cvtSdwaVOP2b"; 263 } 264 if P.HasExtDPP then 265 def _dpp : VOP2_DPP_Pseudo <opName, P>; 266 } // End Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] 267 268 def _e64 : VOP3InstBase <opName, P, node, 1>, 269 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; 270 271 let SubtargetPredicate = isGFX11Plus in { 272 if P.HasExtVOP3DPP then 273 def _e64_dpp : VOP3_DPP_Pseudo <opName, P>; 274 } // End SubtargetPredicate = isGFX11Plus 275 } 276 } 277} 278 279class VOP2bInstAlias <VOP2_Pseudo ps, Instruction inst, 280 string OpName, string opnd> : 281 InstAlias <OpName#" "#!subst("vcc", opnd, ps.Pfl.Asm32), 282 (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0, 283 ps.Pfl.Src1RC32:$src1), 284 1, inst.AsmVariantName>, 285 PredicateControl { 286} 287 288multiclass VOP2bInstAliases<VOP2_Pseudo ps, VOP2_Real inst, string OpName> { 289 let WaveSizePredicate = isWave32 in { 290 def : VOP2bInstAlias<ps, inst, OpName, "vcc_lo">; 291 } 292 let WaveSizePredicate = isWave64 in { 293 def : VOP2bInstAlias<ps, inst, OpName, "vcc">; 294 } 295} 296 297multiclass 298 VOP2eInst_Base<string opName, VOPProfile P, bits<5> VOPDOp, string VOPDName, 299 SDPatternOperator node, string revOp, bit useSGPRInput> { 300 301 let SchedRW = [Write32Bit] in { 302 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in { 303 if !eq(VOPDOp, -1) then 304 def _e32 : VOP2_Pseudo <opName, P>, 305 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>; 306 else 307 def _e32 : VOP2_Pseudo <opName, P>, 308 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>, 309 VOPD_Component<VOPDOp, VOPDName>; 310 311 if P.HasExtSDWA then 312 def _sdwa : VOP2_SDWA_Pseudo <opName, P> { 313 let AsmMatchConverter = "cvtSdwaVOP2e"; 314 } 315 316 if P.HasExtDPP then 317 def _dpp : VOP2_DPP_Pseudo <opName, P>; 318 } 319 320 def _e64 : VOP3InstBase <opName, P, node, 1>, 321 Commutable_REV<revOp#"_e64", !eq(revOp, opName)> { 322 let isReMaterializable = 1; 323 } 324 325 let SubtargetPredicate = isGFX11Plus in { 326 if P.HasExtVOP3DPP then 327 def _e64_dpp : VOP3_DPP_Pseudo <opName, P>; 328 } // End SubtargetPredicate = isGFX11Plus 329 } 330} 331 332multiclass 333 VOP2eInst<string opName, VOPProfile P, SDPatternOperator node = null_frag, 334 string revOp = opName, bit useSGPRInput = !eq(P.NumSrcArgs, 3)> 335 : VOP2eInst_Base<opName, P, -1, "", node, revOp, useSGPRInput>; 336 337multiclass 338 VOP2eInst_VOPD<string opName, VOPProfile P, bits<5> VOPDOp, string VOPDName, 339 SDPatternOperator node = null_frag, string revOp = opName, 340 bit useSGPRInput = !eq(P.NumSrcArgs, 3)> 341 : VOP2eInst_Base<opName, P, VOPDOp, VOPDName, node, revOp, useSGPRInput>; 342 343class VOP2eInstAlias <VOP2_Pseudo ps, Instruction inst, string opnd = ""> : 344 InstAlias <ps.OpName#" "#ps.Pfl.Asm32#", "#opnd, 345 (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0, 346 ps.Pfl.Src1RC32:$src1), 347 1, inst.AsmVariantName>, 348 PredicateControl; 349 350class VOP2e64InstAlias <VOP3_Pseudo ps, Instruction inst> : 351 InstAlias <ps.OpName#" "#ps.Pfl.Asm64, 352 (inst ps.Pfl.DstRC:$vdst, VOPDstS64orS32:$sdst, 353 ps.Pfl.Src0RC32:$src0, ps.Pfl.Src1RC32:$src1, Clamp:$clamp), 354 1, inst.AsmVariantName>, 355 PredicateControl; 356 357multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> { 358 let WaveSizePredicate = isWave32 in { 359 def : VOP2eInstAlias<ps, inst, "vcc_lo">; 360 } 361 let WaveSizePredicate = isWave64 in { 362 def : VOP2eInstAlias<ps, inst, "vcc">; 363 } 364} 365 366class VOP_MADK_Base<ValueType vt> : VOPProfile <[vt, vt, vt, vt]> { 367 string AsmVOPDXDeferred = ?; 368} 369 370class VOP_MADAK <ValueType vt> : VOP_MADK_Base<vt> { 371 field Operand ImmOpType = !if(!eq(vt.Size, 32), KImmFP32, KImmFP16); 372 field dag Ins32 = !if(!eq(vt.Size, 32), 373 (ins VSrc_f32_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm), 374 (ins VSrc_f16_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm)); 375 field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, VGPR_32:$vsrc1X, ImmOpType:$imm); 376 // Note that both src0X and imm are deferred 377 let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, VGPR_32:$vsrc1X, ImmOpType:$immDeferred); 378 field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, VGPR_32:$vsrc1Y, ImmOpType:$imm); 379 380 field string Asm32 = "$vdst, $src0, $src1, $imm"; 381 field string AsmVOPDX = "$vdstX, $src0X, $vsrc1X, $imm"; 382 let AsmVOPDXDeferred = "$vdstX, $src0X, $vsrc1X, $immDeferred"; 383 field string AsmVOPDY = "$vdstY, $src0Y, $vsrc1Y, $imm"; 384 field bit HasExt = 0; 385 let IsSingle = 1; 386} 387 388def VOP_MADAK_F16 : VOP_MADAK <f16>; 389def VOP_MADAK_F16_t16 : VOP_MADAK <f16> { 390 let IsTrue16 = 1; 391 let DstRC = VOPDstOperand<VGPR_32_Lo128>; 392 let Ins32 = (ins VSrcFake16_f16_Lo128_Deferred:$src0, VGPR_32_Lo128:$src1, ImmOpType:$imm); 393} 394def VOP_MADAK_F32 : VOP_MADAK <f32>; 395 396class VOP_MADMK <ValueType vt> : VOP_MADK_Base<vt> { 397 field Operand ImmOpType = !if(!eq(vt.Size, 32), KImmFP32, KImmFP16); 398 field dag Ins32 = !if(!eq(vt.Size, 32), 399 (ins VSrc_f32_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1), 400 (ins VSrc_f16_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1)); 401 field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$imm, VGPR_32:$vsrc1X); 402 let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$immDeferred, VGPR_32:$vsrc1X); 403 field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, ImmOpType:$imm, VGPR_32:$vsrc1Y); 404 405 field string Asm32 = "$vdst, $src0, $imm, $src1"; 406 field string AsmVOPDX = "$vdstX, $src0X, $imm, $vsrc1X"; 407 let AsmVOPDXDeferred = "$vdstX, $src0X, $immDeferred, $vsrc1X"; 408 field string AsmVOPDY = "$vdstY, $src0Y, $imm, $vsrc1Y"; 409 field bit HasExt = 0; 410 let IsSingle = 1; 411} 412 413def VOP_MADMK_F16 : VOP_MADMK <f16>; 414def VOP_MADMK_F16_t16 : VOP_MADMK <f16> { 415 let IsTrue16 = 1; 416 let DstRC = VOPDstOperand<VGPR_32_Lo128>; 417 let Ins32 = (ins VSrcFake16_f16_Lo128_Deferred:$src0, ImmOpType:$imm, VGPR_32_Lo128:$src1); 418} 419def VOP_MADMK_F32 : VOP_MADMK <f32>; 420 421// FIXME: Remove src2_modifiers. It isn't used, so is wasting memory 422// and processing time but it makes it easier to convert to mad. 423class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, vt0]> { 424 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT<Src2VT>.ret:$src2); 425 let Ins64 = getIns64<Src0RC64, Src1RC64, getVregSrcForVT<Src2VT>.ret, 3, 426 0, HasModifiers, HasModifiers, HasOMod, 427 Src0Mod, Src1Mod, Src2Mod>.ret; 428 let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 429 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 430 getVregSrcForVT<Src2VT>.ret:$src2, // stub argument 431 dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, 432 DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl); 433 let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi)); 434 let InsVOP3Base = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, RegisterOperand<VGPR_32>, 3, 435 0, HasModifiers, HasModifiers, HasOMod, 436 Src0ModVOP3DPP, Src1ModVOP3DPP, Src2Mod, HasOpSel>.ret; 437 // We need a dummy src2 tied to dst to track the use of that register for s_delay_alu 438 let InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X, VGPRSrc_32:$src2X); 439 let InsVOPDXDeferred = 440 (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X, 441 VGPR_32:$vsrc1X, VGPRSrc_32:$src2X); 442 let InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y, VGPRSrc_32:$src2Y); 443 let InsVOPDYDeferred = 444 (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y, 445 VGPR_32:$vsrc1Y, VGPRSrc_32:$src2Y); 446 447 let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 448 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 449 getVregSrcForVT<Src2VT>.ret:$src2, // stub argument 450 dpp8:$dpp8, Dpp8FI:$fi); 451 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 452 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, 453 getVregSrcForVT<Src2VT>.ret:$src2, // stub argument 454 Clamp:$clamp, omod:$omod, 455 dst_sel:$dst_sel, dst_unused:$dst_unused, 456 src0_sel:$src0_sel, src1_sel:$src1_sel); 457 let Asm32 = getAsm32<1, 2, vt0>.ret; 458 let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt0>.ret; 459 let AsmDPP16 = getAsmDPP16<1, 2, HasModifiers, vt0>.ret; 460 let AsmDPP8 = getAsmDPP8<1, 2, 0, vt0>.ret; 461 let AsmSDWA = getAsmSDWA<1, 2, vt0>.ret; 462 let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt0>.ret; 463 let AsmVOP3Base = 464 getAsmVOP3Base<2 /*NumSrcArgs*/, HasDst, HasClamp, 465 HasOpSel, HasOMod, IsVOP3P, HasModifiers, 466 HasModifiers, HasModifiers, 467 0 /*Src2HasMods*/, DstVT>.ret; 468 let HasSrc2 = 0; 469 let HasSrc2Mods = 0; 470 471 let HasExt = 1; 472 let HasExtDPP = 1; 473 let HasExt32BitDPP = 1; 474 let HasExtSDWA = 1; 475 let HasExtSDWA9 = 0; 476 let TieRegDPP = "$src2"; 477} 478 479def VOP_MAC_F16 : VOP_MAC <f16>; 480def VOP_MAC_F16_t16 : VOP_MAC <f16> { 481 let IsTrue16 = 1; 482 let HasOpSel = 1; 483 let AsmVOP3OpSel = getAsmVOP3OpSel<2/*NumSrcArgs*/, HasClamp, HasOMod, 484 HasSrc0FloatMods, HasSrc1FloatMods, HasSrc2FloatMods>.ret; 485 let DstRC = VOPDstOperand<VGPR_32_Lo128>; 486 let DstRC64 = VOPDstOperand<VGPR_32>; 487 let Src1RC32 = VGPRSrc_32_Lo128; 488 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret:$src2); 489 let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 490 let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 491 let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 492 let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret; 493 let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret; 494 let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret; 495 let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 496 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 497 getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret:$src2, // stub argument 498 dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, 499 DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl); 500 let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 501 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 502 getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret:$src2, // stub argument 503 dpp8:$dpp8, Dpp8FI:$fi); 504 let Src2Mod = FP32InputMods; // dummy unused modifiers 505 let Src2RC64 = VGPRSrc_32; // stub argument 506 let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret; 507} 508def VOP_MAC_F32 : VOP_MAC <f32>; 509let HasExtDPP = 0, HasExt32BitDPP = 0 in 510def VOP_MAC_LEGACY_F32 : VOP_MAC <f32>; 511let HasExtSDWA = 0, HasExt32BitDPP = 0, HasExt64BitDPP = 1 in 512def VOP_MAC_F64 : VOP_MAC <f64>; 513 514class VOP_DOT_ACC<ValueType vt0, ValueType vt1> : VOP_MAC<vt0, vt1> { 515 let HasClamp = 0; 516 let HasExtSDWA = 0; 517 let HasOpSel = 0; 518 let IsPacked = 0; 519} 520 521def VOP_DOT_ACC_F32_V2F16 : VOP_DOT_ACC<f32, v2f16> { 522 let Src0ModDPP = FPVRegInputMods; 523 let Src1ModDPP = FPVRegInputMods; 524 let HasClamp = 1; 525} 526 527def VOP_DOT_ACC_I32_I32 : VOP_DOT_ACC<i32, i32> { 528 let HasExtVOP3DPP = 0; 529 let HasSrc0Mods = 1; 530 let HasSrc1Mods = 1; 531 let HasClamp = 1; 532 533 let Src0Mod = Int32InputMods; 534 let Src1Mod = Int32InputMods; 535 let Ins64 = getIns64<Src0RC64, Src1RC64, getVregSrcForVT<Src2VT>.ret, 536 3 /*NumSrcArgs*/, HasClamp, 1 /*HasModifiers*/, 537 1 /*HasSrc2Mods*/, HasOMod, 538 Src0Mod, Src1Mod, Src2Mod>.ret; 539 let Asm64 = "$vdst, $src0, $src1$clamp"; 540} 541 542// Write out to vcc or arbitrary SGPR. 543def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], /*EnableClamp=*/1> { 544 let Asm32 = "$vdst, vcc, $src0, $src1"; 545 let AsmVOP3Base = "$vdst, $sdst, $src0, $src1$clamp"; 546 let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 547 let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 548 let AsmDPP = "$vdst, vcc, $src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 549 let AsmDPP8 = "$vdst, vcc, $src0, $src1 $dpp8$fi"; 550 let AsmDPP16 = AsmDPP#"$fi"; 551 let InsDPP = (ins DstRCDPP:$old, 552 Src0DPP:$src0, 553 Src1DPP:$src1, 554 dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, 555 DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl); 556 let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi)); 557 let InsDPP8 = (ins DstRCDPP:$old, 558 Src0DPP:$src0, 559 Src1DPP:$src1, 560 dpp8:$dpp8, Dpp8FI:$fi); 561 let Outs32 = (outs DstRC:$vdst); 562 let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); 563 let OutsVOP3DPP = Outs64; 564 let OutsVOP3DPP8 = Outs64; 565} 566 567// Write out to vcc or arbitrary SGPR and read in from vcc or 568// arbitrary SGPR. 569def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableClamp=*/1> { 570 let HasSrc2Mods = 0; 571 let Asm32 = "$vdst, vcc, $src0, $src1, vcc"; 572 let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 573 let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 574 let AsmDPP = "$vdst, vcc, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 575 let AsmDPP8 = "$vdst, vcc, $src0, $src1, vcc $dpp8$fi"; 576 let AsmDPP16 = AsmDPP#"$fi"; 577 let Outs32 = (outs DstRC:$vdst); 578 let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); 579 let AsmVOP3Base = "$vdst, $sdst, $src0, $src1, $src2$clamp"; 580 let OutsVOP3DPP = Outs64; 581 let OutsVOP3DPP8 = Outs64; 582 583 // Suppress src2 implied by type since the 32-bit encoding uses an 584 // implicit VCC use. 585 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); 586 587 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 588 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, 589 Clamp:$clamp, 590 dst_sel:$dst_sel, dst_unused:$dst_unused, 591 src0_sel:$src0_sel, src1_sel:$src1_sel); 592 593 let InsDPP = (ins DstRCDPP:$old, 594 Src0DPP:$src0, 595 Src1DPP:$src1, 596 dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, 597 DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl); 598 let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi)); 599 let InsDPP8 = (ins DstRCDPP:$old, 600 Src0DPP:$src0, 601 Src1DPP:$src1, 602 dpp8:$dpp8, Dpp8FI:$fi); 603 604 let HasExt = 1; 605 let HasExtDPP = 1; 606 let HasExt32BitDPP = 1; 607 let HasExtSDWA = 1; 608 let HasExtSDWA9 = 1; 609} 610 611// Read in from vcc or arbitrary SGPR. 612class VOP2e_SGPR<list<ValueType> ArgVT> : VOPProfile<ArgVT> { 613 let Asm32 = "$vdst, $src0, $src1"; 614 let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 615 let AsmSDWA9 = "$vdst, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 616 let AsmDPP = "$vdst, $src0_modifiers, $src1_modifiers, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 617 let AsmDPP8 = "$vdst, $src0, $src1, vcc $dpp8$fi"; 618 let AsmDPP16 = AsmDPP#"$fi"; 619 let AsmVOP3Base = "$vdst, $src0_modifiers, $src1_modifiers, $src2"; 620 621 let Outs32 = (outs DstRC:$vdst); 622 let Outs64 = (outs DstRC64:$vdst); 623 624 // Suppress src2 implied by type since the 32-bit encoding uses an 625 // implicit VCC use. 626 let Ins32 = (ins VSrc_f32:$src0, Src1RC32:$src1); 627 628 let HasModifiers = 1; 629 630 // Select FP modifiers for VOP3 631 let Src0Mod = !if(!eq(Src0VT.Size, 16), FP16InputMods, FP32InputMods); 632 let Src1Mod = Src0Mod; 633 634 let HasSrc0IntMods = 0; 635 let HasSrc1IntMods = 0; 636 let HasSrc0FloatMods = 1; 637 let HasSrc1FloatMods = 1; 638 let InsSDWA = (ins FP32SDWAInputMods:$src0_modifiers, SDWASrc_f32:$src0, 639 FP32SDWAInputMods:$src1_modifiers, SDWASrc_f32:$src1, 640 Clamp:$clamp, 641 dst_sel:$dst_sel, dst_unused:$dst_unused, 642 src0_sel:$src0_sel, src1_sel:$src1_sel); 643 644 let InsDPP = (ins DstRCDPP:$old, 645 FPVRegInputMods:$src0_modifiers, Src0DPP:$src0, 646 FPVRegInputMods:$src1_modifiers, Src1DPP:$src1, 647 dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, 648 DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl); 649 let InsDPP16 = !con(InsDPP, (ins Dpp16FI:$fi)); 650 let InsDPP8 = (ins DstRCDPP:$old, 651 FPVRegInputMods:$src0_modifiers, Src0DPP:$src0, 652 FPVRegInputMods:$src1_modifiers, Src1DPP:$src1, 653 dpp8:$dpp8, Dpp8FI:$fi); 654 655 let Src0ModVOP3DPP = FPVRegInputMods; 656 let Src1ModVOP3DPP = FP32VCSrcInputMods; 657 658 let HasExt = 1; 659 let HasExtDPP = 1; 660 let HasExt32BitDPP = 1; 661 let HasExtSDWA = 1; 662 let HasExtSDWA9 = 1; 663} 664 665def VOP2e_I32_I32_I32_I1 : VOP2e_SGPR<[i32, i32, i32, i1]>; 666def VOP2e_I16_I16_I16_I1_fake16 : VOP2e_SGPR<[i16, i16, i16, i1]> { 667 let IsTrue16 = 1; 668 let DstRC64 = getVALUDstForVT<DstVT>.ret; 669 670 let Src0Mod = getSrcMod<f16>.ret; 671 let Src1Mod = getSrcMod<f16>.ret; 672 673 let Src0VOP3DPP = VGPRSrc_32; 674 let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT>.ret; 675 let Src1ModVOP3DPP = getSrcModVOP3DPP<f16, 1/*IsFake16*/>.ret; 676} 677 678def VOP_READLANE : VOPProfile<[i32, i32, i32, untyped]> { 679 let Outs32 = (outs SReg_32:$vdst); 680 let Outs64 = Outs32; 681 let Ins32 = (ins VRegOrLdsSrc_32:$src0, SCSrc_b32:$src1); 682 let Ins64 = Ins32; 683 let Asm32 = " $vdst, $src0, $src1"; 684 let Asm64 = Asm32; 685 686 let HasExt = 0; 687 let HasExtDPP = 0; 688 let HasExt32BitDPP = 0; 689 let HasExt64BitDPP = 0; 690 let HasExtSDWA = 0; 691 let HasExtSDWA9 = 0; 692} 693 694def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> { 695 let Outs32 = (outs VGPR_32:$vdst); 696 let Outs64 = Outs32; 697 let Ins32 = (ins SCSrc_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in); 698 let Ins64 = Ins32; 699 let Asm32 = " $vdst, $src0, $src1"; 700 let Asm64 = Asm32; 701 let HasSrc2 = 0; 702 let HasSrc2Mods = 0; 703 704 let HasExt = 0; 705 let HasExtDPP = 0; 706 let HasExt32BitDPP = 0; 707 let HasExt64BitDPP = 0; 708 let HasExtSDWA = 0; 709 let HasExtSDWA9 = 0; 710} 711 712//===----------------------------------------------------------------------===// 713// VOP2 Instructions 714//===----------------------------------------------------------------------===// 715 716let SubtargetPredicate = isGFX11Plus in 717defm V_CNDMASK_B16 : VOP2eInst <"v_cndmask_b16", VOP2e_I16_I16_I16_I1_fake16>; 718defm V_CNDMASK_B32 : VOP2eInst_VOPD <"v_cndmask_b32", VOP2e_I32_I32_I32_I1, 0x9, "v_cndmask_b32">; 719let SubtargetPredicate = HasMadMacF32Insts, isReMaterializable = 1 in 720def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>; 721 722let isCommutable = 1 in { 723let isReMaterializable = 1 in { 724defm V_ADD_F32 : VOP2Inst_VOPD <"v_add_f32", VOP_F32_F32_F32, 0x4, "v_add_f32", any_fadd>; 725defm V_SUB_F32 : VOP2Inst_VOPD <"v_sub_f32", VOP_F32_F32_F32, 0x5, "v_sub_f32", any_fsub>; 726defm V_SUBREV_F32 : VOP2Inst_VOPD <"v_subrev_f32", VOP_F32_F32_F32, 0x6, "v_subrev_f32", null_frag, "v_sub_f32">; 727defm V_MUL_LEGACY_F32 : VOP2Inst_VOPD <"v_mul_legacy_f32", VOP_F32_F32_F32, 0x7, "v_mul_dx9_zero_f32", AMDGPUfmul_legacy>; 728defm V_MUL_F32 : VOP2Inst_VOPD <"v_mul_f32", VOP_F32_F32_F32, 0x3, "v_mul_f32", any_fmul>; 729defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32_ARITH, AMDGPUmul_i24>; 730defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_I32_I32_I32, AMDGPUmulhi_i24>; 731defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32_ARITH, AMDGPUmul_u24>; 732defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>; 733defm V_MIN_F32 : VOP2Inst_VOPD <"v_min_f32", VOP_F32_F32_F32, 0xb, "v_min_f32", fminnum_like>; 734defm V_MAX_F32 : VOP2Inst_VOPD <"v_max_f32", VOP_F32_F32_F32, 0xa, "v_max_f32", fmaxnum_like>; 735defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smin>; 736defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smax>; 737defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>; 738defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umax>; 739defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, clshr_rev_32, "v_lshr_b32">; 740defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, cashr_rev_32, "v_ashr_i32">; 741defm V_LSHLREV_B32 : VOP2Inst_VOPD <"v_lshlrev_b32", VOP_I32_I32_I32, 0x11, "v_lshlrev_b32", clshl_rev_32, "v_lshl_b32">; 742defm V_AND_B32 : VOP2Inst_VOPD <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, 0x12, "v_and_b32", and>; 743defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>; 744defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, xor>; 745} // End isReMaterializable = 1 746 747let mayRaiseFPException = 0 in { 748let OtherPredicates = [HasMadMacF32Insts] in { 749let Constraints = "$vdst = $src2", DisableEncoding="$src2", 750 isConvertibleToThreeAddress = 1 in { 751defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>; 752 753let SubtargetPredicate = isGFX6GFX7GFX10 in 754defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_MAC_LEGACY_F32>; 755} // End Constraints = "$vdst = $src2", DisableEncoding="$src2", 756 // isConvertibleToThreeAddress = 1 757 758let isReMaterializable = 1 in 759def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, []>; 760} // End OtherPredicates = [HasMadMacF32Insts] 761} // End mayRaiseFPException = 0 762 763// No patterns so that the scalar instructions are always selected. 764// The scalar versions will be replaced with vector when needed later. 765defm V_ADD_CO_U32 : VOP2bInst <"v_add_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_add_co_u32", 1>; 766defm V_SUB_CO_U32 : VOP2bInst <"v_sub_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32", 1>; 767defm V_SUBREV_CO_U32 : VOP2bInst <"v_subrev_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32", 1>; 768defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_addc_u32", 1>; 769defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>; 770defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>; 771 772 773let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1 in { 774defm V_ADD_U32 : VOP2Inst_VOPD <"v_add_u32", VOP_I32_I32_I32_ARITH, 0x10, "v_add_nc_u32", null_frag, "v_add_u32", 1>; 775defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>; 776defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>; 777} 778 779} // End isCommutable = 1 780 781// These are special and do not read the exec mask. 782let isConvergent = 1, Uses = []<Register>, IsInvalidSingleUseConsumer = 1 in { 783def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE, []>; 784let IsNeverUniform = 1, Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in { 785def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, []> { 786 let IsInvalidSingleUseProducer = 1; 787 } 788} // End IsNeverUniform, $vdst = $vdst_in, DisableEncoding $vdst_in 789} // End isConvergent = 1 790 791foreach vt = Reg32Types.types in { 792 def : GCNPat<(vt (int_amdgcn_readlane vt:$src0, i32:$src1)), 793 (V_READLANE_B32 VRegOrLdsSrc_32:$src0, SCSrc_b32:$src1) 794 >; 795 796 def : GCNPat<(vt (int_amdgcn_writelane vt:$src0, i32:$src1, vt:$src2)), 797 (V_WRITELANE_B32 SCSrc_b32:$src0, SCSrc_b32:$src1, VGPR_32:$src2) 798 >; 799} 800 801let isReMaterializable = 1 in { 802defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_I32_I32_I32>; 803defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_I32_I32_I32, add_ctpop>; 804let IsNeverUniform = 1 in { 805defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_lo>; 806defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_hi>; 807} // End IsNeverUniform = 1 808defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, any_fldexp>; 809 810let ReadsModeReg = 0, mayRaiseFPException = 0 in { 811defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_i16_f32>; 812defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_u16_f32>; 813} 814 815defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_V2F16_F32_F32, AMDGPUpkrtz_f16_f32>; 816defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_V2I16_I32_I32, AMDGPUpk_u16_u32>; 817defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_V2I16_I32_I32, AMDGPUpk_i16_i32>; 818 819 820let SubtargetPredicate = isGFX6GFX7 in { 821defm V_MIN_LEGACY_F32 : VOP2Inst <"v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy>; 822defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>; 823} // End SubtargetPredicate = isGFX6GFX7 824 825let isCommutable = 1 in { 826let SubtargetPredicate = isGFX6GFX7 in { 827defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, csrl_32>; 828defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, csra_32>; 829defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, cshl_32>; 830} // End SubtargetPredicate = isGFX6GFX7 831} // End isCommutable = 1 832} // End isReMaterializable = 1 833 834defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_NO_EXT<VOP_I32_F32_I32>>; // TODO: set "Uses = dst" 835 836class DivergentBinOp<SDPatternOperator Op, VOP_Pseudo Inst> : 837 GCNPat< 838 (DivergentBinFrag<Op> Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1), 839 !if(!cast<Commutable_REV>(Inst).IsOrig, 840 (Inst $src0, $src1), 841 (Inst $src1, $src0) 842 ) 843 >; 844 845class DivergentClampingBinOp<SDPatternOperator Op, VOP_Pseudo Inst> : 846 GCNPat< 847 (DivergentBinFrag<Op> Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1), 848 !if(!cast<Commutable_REV>(Inst).IsOrig, 849 (Inst $src0, $src1, 0), 850 (Inst $src1, $src0, 0) 851 ) 852 >; 853 854def : DivergentBinOp<csrl_32, V_LSHRREV_B32_e64>; 855def : DivergentBinOp<csra_32, V_ASHRREV_I32_e64>; 856def : DivergentBinOp<cshl_32, V_LSHLREV_B32_e64>; 857 858let SubtargetPredicate = HasAddNoCarryInsts in { 859 def : DivergentClampingBinOp<add, V_ADD_U32_e64>; 860 def : DivergentClampingBinOp<sub, V_SUB_U32_e64>; 861} 862 863let SubtargetPredicate = isGFX6GFX7GFX8GFX9, Predicates = [isGFX6GFX7GFX8GFX9] in { 864def : DivergentClampingBinOp<add, V_ADD_CO_U32_e64>; 865def : DivergentClampingBinOp<sub, V_SUB_CO_U32_e64>; 866} 867 868def : DivergentBinOp<adde, V_ADDC_U32_e32>; 869def : DivergentBinOp<sube, V_SUBB_U32_e32>; 870 871class divergent_i64_BinOp <SDPatternOperator Op, Instruction Inst> : 872 GCNPat< 873 (DivergentBinFrag<Op> i64:$src0, i64:$src1), 874 (REG_SEQUENCE VReg_64, 875 (Inst 876 (i32 (EXTRACT_SUBREG $src0, sub0)), 877 (i32 (EXTRACT_SUBREG $src1, sub0)) 878 ), sub0, 879 (Inst 880 (i32 (EXTRACT_SUBREG $src0, sub1)), 881 (i32 (EXTRACT_SUBREG $src1, sub1)) 882 ), sub1 883 ) 884 >; 885 886def : divergent_i64_BinOp <and, V_AND_B32_e64>; 887def : divergent_i64_BinOp <or, V_OR_B32_e64>; 888def : divergent_i64_BinOp <xor, V_XOR_B32_e64>; 889 890// mul24 w/ 64 bit output. 891class mul24_64_Pat<SDPatternOperator Op, Instruction InstLo, Instruction InstHi> : GCNPat< 892 (i64 (Op i32:$src0, i32:$src1)), 893 (REG_SEQUENCE VReg_64, 894 (InstLo $src0, $src1), sub0, 895 (InstHi $src0, $src1), sub1) 896>; 897 898def : mul24_64_Pat<AMDGPUmul_i24, V_MUL_I32_I24_e64, V_MUL_HI_I32_I24_e64>; 899def : mul24_64_Pat<AMDGPUmul_u24, V_MUL_U32_U24_e64, V_MUL_HI_U32_U24_e64>; 900 901//===----------------------------------------------------------------------===// 902// 16-Bit Operand Instructions 903//===----------------------------------------------------------------------===// 904 905// The ldexp.f16 intrinsic expects a integer src1 operand, though the hardware 906// encoding treats src1 as an f16 907def LDEXP_F16_VOPProfile : VOPProfile <[f16, f16, f16, untyped]> { 908 let Src1Mod = Int32InputMods; 909 let Src1ModDPP = IntVRegInputMods; 910 let Src1ModVOP3DPP = IntVRegInputMods; 911 // SDWA sext is the only modifier allowed. 912 let HasSrc1IntMods = 1; 913 let HasSrc1FloatMods = 0; 914 let Src1ModSDWA = Int16SDWAInputMods; 915} 916def LDEXP_F16_VOPProfile_True16 : VOPProfile_Fake16<VOP_F16_F16_F16> { 917 let Src1RC32 = RegisterOperand<VGPR_32_Lo128>; 918 let Src1DPP = RegisterOperand<VGPR_32_Lo128>; 919 let Src1ModDPP = IntT16VRegInputMods</* IsFake16= */ 1>; 920} 921 922let isReMaterializable = 1 in { 923let FPDPRounding = 1 in { 924 let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in 925 defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", LDEXP_F16_VOPProfile>; 926 let SubtargetPredicate = HasTrue16BitInsts in 927 defm V_LDEXP_F16_t16 : VOP2Inst <"v_ldexp_f16_t16", LDEXP_F16_VOPProfile_True16>; 928} // End FPDPRounding = 1 929// FIXME VOP3 Only instructions. NFC using VOPProfile_True16 for these until a planned change to use a new register class for VOP3 encoded True16 instuctions 930defm V_LSHLREV_B16 : VOP2Inst_e64_t16 <"v_lshlrev_b16", VOP_I16_I16_I16, clshl_rev_16>; 931defm V_LSHRREV_B16 : VOP2Inst_e64_t16 <"v_lshrrev_b16", VOP_I16_I16_I16, clshr_rev_16>; 932defm V_ASHRREV_I16 : VOP2Inst_e64_t16 <"v_ashrrev_i16", VOP_I16_I16_I16, cashr_rev_16>; 933let isCommutable = 1 in { 934let FPDPRounding = 1 in { 935defm V_ADD_F16 : VOP2Inst_t16 <"v_add_f16", VOP_F16_F16_F16, any_fadd>; 936defm V_SUB_F16 : VOP2Inst_t16 <"v_sub_f16", VOP_F16_F16_F16, any_fsub>; 937defm V_SUBREV_F16 : VOP2Inst_t16 <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">; 938defm V_MUL_F16 : VOP2Inst_t16 <"v_mul_f16", VOP_F16_F16_F16, any_fmul>; 939} // End FPDPRounding = 1 940defm V_MUL_LO_U16 : VOP2Inst_e64_t16 <"v_mul_lo_u16", VOP_I16_I16_I16, mul>; 941defm V_MAX_F16 : VOP2Inst_t16 <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>; 942defm V_MIN_F16 : VOP2Inst_t16 <"v_min_f16", VOP_F16_F16_F16, fminnum_like>; 943defm V_MAX_U16 : VOP2Inst_e64_t16 <"v_max_u16", VOP_I16_I16_I16, umax>; 944defm V_MAX_I16 : VOP2Inst_e64_t16 <"v_max_i16", VOP_I16_I16_I16, smax>; 945defm V_MIN_U16 : VOP2Inst_e64_t16 <"v_min_u16", VOP_I16_I16_I16, umin>; 946defm V_MIN_I16 : VOP2Inst_e64_t16 <"v_min_i16", VOP_I16_I16_I16, smin>; 947} // End isCommutable = 1 948} // End isReMaterializable = 1 949 950class LDEXP_F16_Pat <SDPatternOperator op, VOP_Pseudo inst, VOPProfile P = inst.Pfl> : GCNPat < 951 (P.DstVT (op (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)), 952 (i16 (VOP3Mods0 P.Src1VT:$src1, i32:$src1_modifiers)))), 953 (inst $src0_modifiers, $src0, 954 $src1_modifiers, $src1, 955 $clamp, /* clamp */ 956 $omod /* omod */) 957>; 958 959let OtherPredicates = [NotHasTrue16BitInsts] in 960def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_e64>; 961 962let OtherPredicates = [HasTrue16BitInsts] in 963def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_t16_e64>; 964 965let SubtargetPredicate = isGFX11Plus in { 966 let isCommutable = 1 in { 967 defm V_AND_B16_t16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_Fake16<VOP_I16_I16_I16>, and>; 968 defm V_OR_B16_t16 : VOP2Inst_e64 <"v_or_b16_t16", VOPProfile_Fake16<VOP_I16_I16_I16>, or>; 969 defm V_XOR_B16_t16 : VOP2Inst_e64 <"v_xor_b16_t16", VOPProfile_Fake16<VOP_I16_I16_I16>, xor>; 970 } // End isCommutable = 1 971} // End SubtargetPredicate = isGFX11Plus 972 973let FPDPRounding = 1, isReMaterializable = 1, FixedSize = 1 in { 974let SubtargetPredicate = isGFX10Plus, True16Predicate = NotHasTrue16BitInsts in { 975def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">; 976} 977let SubtargetPredicate = HasTrue16BitInsts in { 978def V_FMAMK_F16_t16 : VOP2_Pseudo <"v_fmamk_f16_t16", VOP_MADMK_F16_t16, [], "">; 979} 980 981let isCommutable = 1 in { 982let SubtargetPredicate = isGFX10Plus, True16Predicate = NotHasTrue16BitInsts in { 983def V_FMAAK_F16 : VOP2_Pseudo <"v_fmaak_f16", VOP_MADAK_F16, [], "">; 984} 985let SubtargetPredicate = HasTrue16BitInsts in { 986def V_FMAAK_F16_t16 : VOP2_Pseudo <"v_fmaak_f16_t16", VOP_MADAK_F16_t16, [], "">; 987} 988} // End isCommutable = 1 989} // End FPDPRounding = 1, isReMaterializable = 1, FixedSize = 1 990 991let Constraints = "$vdst = $src2", 992 DisableEncoding="$src2", 993 isConvertibleToThreeAddress = 1, 994 isCommutable = 1 in { 995let SubtargetPredicate = isGFX10Plus, True16Predicate = NotHasTrue16BitInsts in { 996defm V_FMAC_F16 : VOP2Inst <"v_fmac_f16", VOP_MAC_F16>; 997} 998let SubtargetPredicate = HasTrue16BitInsts in { 999defm V_FMAC_F16_t16 : VOP2Inst <"v_fmac_f16_t16", VOP_MAC_F16_t16>; 1000} 1001} // End FMAC Constraints 1002 1003let SubtargetPredicate = Has16BitInsts in { 1004let isReMaterializable = 1 in { 1005let FPDPRounding = 1 in { 1006def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">; 1007} // End FPDPRounding = 1 1008let isCommutable = 1 in { 1009let mayRaiseFPException = 0 in { 1010def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">; 1011} 1012let SubtargetPredicate = isGFX8GFX9 in { 1013 defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16_ARITH, add>; 1014 defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16_ARITH, sub>; 1015 defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16_ARITH, null_frag, "v_sub_u16">; 1016} 1017} // End isCommutable = 1 1018} // End isReMaterializable = 1 1019 1020// FIXME: Missing FPDPRounding 1021let Constraints = "$vdst = $src2", DisableEncoding="$src2", 1022 isConvertibleToThreeAddress = 1, isCommutable = 1 in { 1023defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>; 1024} 1025} // End SubtargetPredicate = Has16BitInsts 1026 1027 1028let SubtargetPredicate = HasDLInsts in { 1029 1030let isReMaterializable = 1 in 1031defm V_XNOR_B32 : VOP2Inst <"v_xnor_b32", VOP_I32_I32_I32, xnor>; 1032 1033def : GCNPat< 1034 (i32 (DivergentUnaryFrag<not> (xor_oneuse i32:$src0, i32:$src1))), 1035 (i32 (V_XNOR_B32_e64 $src0, $src1)) 1036>; 1037 1038def : GCNPat< 1039 (i32 (DivergentBinFrag<xor_oneuse> (not i32:$src0), i32:$src1)), 1040 (i32 (V_XNOR_B32_e64 $src0, $src1)) 1041>; 1042 1043def : GCNPat< 1044 (i64 (DivergentUnaryFrag<not> (xor_oneuse i64:$src0, i64:$src1))), 1045 (REG_SEQUENCE VReg_64, (i32 (V_XNOR_B32_e64 1046 (i32 (EXTRACT_SUBREG $src0, sub0)), 1047 (i32 (EXTRACT_SUBREG $src1, sub0)))), sub0, 1048 (i32 (V_XNOR_B32_e64 1049 (i32 (EXTRACT_SUBREG $src0, sub1)), 1050 (i32 (EXTRACT_SUBREG $src1, sub1)))), sub1) 1051>; 1052 1053def : GCNPat< 1054 (i64 (DivergentBinFrag<xor_oneuse> (not i64:$src0), i64:$src1)), 1055 (REG_SEQUENCE VReg_64, (i32 (V_XNOR_B32_e64 1056 (i32 (EXTRACT_SUBREG $src0, sub0)), 1057 (i32 (EXTRACT_SUBREG $src1, sub0)))), sub0, 1058 (i32 (V_XNOR_B32_e64 1059 (i32 (EXTRACT_SUBREG $src0, sub1)), 1060 (i32 (EXTRACT_SUBREG $src1, sub1)))), sub1) 1061>; 1062 1063let Constraints = "$vdst = $src2", 1064 DisableEncoding = "$src2", 1065 isConvertibleToThreeAddress = 1, 1066 isCommutable = 1 in 1067defm V_FMAC_F32 : VOP2Inst_VOPD <"v_fmac_f32", VOP_MAC_F32, 0x0, "v_fmac_f32">; 1068} // End SubtargetPredicate = HasDLInsts 1069 1070let SubtargetPredicate = HasFmaLegacy32 in { 1071 1072let Constraints = "$vdst = $src2", 1073 DisableEncoding = "$src2", 1074 isConvertibleToThreeAddress = 1, 1075 isCommutable = 1 in 1076defm V_FMAC_LEGACY_F32 : VOP2Inst <"v_fmac_legacy_f32", VOP_MAC_LEGACY_F32>; 1077 1078} // End SubtargetPredicate = HasFmaLegacy32 1079 1080let SubtargetPredicate = HasFmacF64Inst, 1081 Constraints = "$vdst = $src2", 1082 DisableEncoding="$src2", 1083 isConvertibleToThreeAddress = 1, 1084 isCommutable = 1, 1085 SchedRW = [WriteDoubleAdd] in 1086defm V_FMAC_F64 : VOP2Inst <"v_fmac_f64", VOP_MAC_F64>; 1087 1088let Constraints = "$vdst = $src2", 1089 DisableEncoding="$src2", 1090 isConvertibleToThreeAddress = 1, 1091 isCommutable = 1, 1092 IsDOT = 1 in { 1093 let SubtargetPredicate = HasDot5Insts in 1094 defm V_DOT2C_F32_F16 : VOP2Inst_VOPD<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16, 0xc, "v_dot2acc_f32_f16">; 1095 let SubtargetPredicate = HasDot6Insts in 1096 defm V_DOT4C_I32_I8 : VOP2Inst<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>; 1097 1098 let SubtargetPredicate = HasDot4Insts in 1099 defm V_DOT2C_I32_I16 : VOP2Inst<"v_dot2c_i32_i16", VOP_DOT_ACC_I32_I32>; 1100 let SubtargetPredicate = HasDot3Insts in 1101 defm V_DOT8C_I32_I4 : VOP2Inst<"v_dot8c_i32_i4", VOP_DOT_ACC_I32_I32>; 1102} 1103 1104let AddedComplexity = 30 in { 1105 def : GCNPat< 1106 (f32 (AMDGPUfdot2 v2f16:$src0, v2f16:$src1, f32:$src2, (i1 DSTCLAMP.NONE))), 1107 (f32 (V_DOT2C_F32_F16_e32 $src0, $src1, $src2)) 1108 > { 1109 let SubtargetPredicate = HasDot5Insts; 1110 } 1111 def : GCNPat< 1112 (i32 (int_amdgcn_sdot4 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 1113 (i32 (V_DOT4C_I32_I8_e32 $src0, $src1, $src2)) 1114 > { 1115 let SubtargetPredicate = HasDot6Insts; 1116 } 1117 def : GCNPat< 1118 (i32 (int_amdgcn_sdot2 v2i16:$src0, v2i16:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 1119 (i32 (V_DOT2C_I32_I16_e32 $src0, $src1, $src2)) 1120 > { 1121 let SubtargetPredicate = HasDot4Insts; 1122 } 1123 def : GCNPat< 1124 (i32 (int_amdgcn_sdot8 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 1125 (i32 (V_DOT8C_I32_I4_e32 $src0, $src1, $src2)) 1126 > { 1127 let SubtargetPredicate = HasDot3Insts; 1128 } 1129} // End AddedComplexity = 30 1130 1131let SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1, FixedSize = 1 in { 1132def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">, VOPD_Component<0x2, "v_fmamk_f32">; 1133 1134let isCommutable = 1 in 1135def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">, VOPD_Component<0x1, "v_fmaak_f32">; 1136} // End SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1, FixedSize = 1 1137 1138let SubtargetPredicate = HasPkFmacF16Inst in { 1139defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>; 1140} // End SubtargetPredicate = HasPkFmacF16Inst 1141 1142// Note: 16-bit instructions produce a 0 result in the high 16-bits 1143// on GFX8 and GFX9 and preserve high 16 bits on GFX10+ 1144multiclass Arithmetic_i16_0Hi_Pats <SDPatternOperator op, Instruction inst> { 1145 1146def : GCNPat< 1147 (i32 (zext (op i16:$src0, i16:$src1))), 1148 (inst VSrc_b16:$src0, VSrc_b16:$src1) 1149>; 1150 1151def : GCNPat< 1152 (i64 (zext (op i16:$src0, i16:$src1))), 1153 (REG_SEQUENCE VReg_64, 1154 (inst $src0, $src1), sub0, 1155 (V_MOV_B32_e32 (i32 0)), sub1) 1156>; 1157} 1158 1159class ZExt_i16_i1_Pat <SDNode ext> : GCNPat < 1160 (i16 (ext i1:$src)), 1161 (V_CNDMASK_B32_e64 (i32 0/*src0mod*/), (i32 0/*src0*/), 1162 (i32 0/*src1mod*/), (i32 1/*src1*/), 1163 $src) 1164>; 1165 1166foreach vt = [i16, v2i16] in { 1167def : GCNPat < 1168 (and vt:$src0, vt:$src1), 1169 (V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 1170>; 1171 1172def : GCNPat < 1173 (or vt:$src0, vt:$src1), 1174 (V_OR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 1175>; 1176 1177def : GCNPat < 1178 (xor vt:$src0, vt:$src1), 1179 (V_XOR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 1180>; 1181} 1182 1183let Predicates = [Has16BitInsts, isGFX8GFX9] in { 1184 1185// Undo sub x, c -> add x, -c canonicalization since c is more likely 1186// an inline immediate than -c. 1187// TODO: Also do for 64-bit. 1188def : GCNPat< 1189 (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)), 1190 (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1) 1191>; 1192 1193def : GCNPat< 1194 (i32 (zext (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)))), 1195 (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1) 1196>; 1197 1198defm : Arithmetic_i16_0Hi_Pats<add, V_ADD_U16_e64>; 1199defm : Arithmetic_i16_0Hi_Pats<mul, V_MUL_LO_U16_e64>; 1200defm : Arithmetic_i16_0Hi_Pats<sub, V_SUB_U16_e64>; 1201defm : Arithmetic_i16_0Hi_Pats<smin, V_MIN_I16_e64>; 1202defm : Arithmetic_i16_0Hi_Pats<smax, V_MAX_I16_e64>; 1203defm : Arithmetic_i16_0Hi_Pats<umin, V_MIN_U16_e64>; 1204defm : Arithmetic_i16_0Hi_Pats<umax, V_MAX_U16_e64>; 1205defm : Arithmetic_i16_0Hi_Pats<clshl_rev_16, V_LSHLREV_B16_e64>; 1206defm : Arithmetic_i16_0Hi_Pats<clshr_rev_16, V_LSHRREV_B16_e64>; 1207defm : Arithmetic_i16_0Hi_Pats<cashr_rev_16, V_ASHRREV_I16_e64>; 1208 1209} // End Predicates = [Has16BitInsts, isGFX8GFX9] 1210 1211let Predicates = [Has16BitInsts] in { 1212 1213def : ZExt_i16_i1_Pat<zext>; 1214def : ZExt_i16_i1_Pat<anyext>; 1215 1216def : GCNPat < 1217 (i16 (sext i1:$src)), 1218 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), 1219 /*src1mod*/(i32 0), /*src1*/(i32 -1), $src) 1220>; 1221 1222} // End Predicates = [Has16BitInsts] 1223 1224 1225let SubtargetPredicate = HasIntClamp in { 1226// Set clamp bit for saturation. 1227def : VOPBinOpClampPat<uaddsat, V_ADD_CO_U32_e64, i32>; 1228def : VOPBinOpClampPat<usubsat, V_SUB_CO_U32_e64, i32>; 1229} 1230 1231let SubtargetPredicate = HasAddNoCarryInsts, OtherPredicates = [HasIntClamp] in { 1232let AddedComplexity = 1 in { // Prefer over form with carry-out. 1233def : VOPBinOpClampPat<uaddsat, V_ADD_U32_e64, i32>; 1234def : VOPBinOpClampPat<usubsat, V_SUB_U32_e64, i32>; 1235} 1236} 1237 1238let SubtargetPredicate = Has16BitInsts, OtherPredicates = [HasIntClamp] in { 1239def : VOPBinOpClampPat<uaddsat, V_ADD_U16_e64, i16>; 1240def : VOPBinOpClampPat<usubsat, V_SUB_U16_e64, i16>; 1241} 1242 1243let SubtargetPredicate = isGFX12Plus, isReMaterializable = 1 in { 1244 let SchedRW = [WriteDoubleAdd], isCommutable = 1 in { 1245 let FPDPRounding = 1 in { 1246 defm V_ADD_F64_pseudo : VOP2Inst <"v_add_f64_pseudo", VOP_F64_F64_F64, any_fadd>; 1247 defm V_MUL_F64_pseudo : VOP2Inst <"v_mul_f64_pseudo", VOP_F64_F64_F64, fmul>; 1248 } // End FPDPRounding = 1 1249 defm V_MIN_NUM_F64 : VOP2Inst <"v_min_num_f64", VOP_F64_F64_F64, fminnum_like>; 1250 defm V_MAX_NUM_F64 : VOP2Inst <"v_max_num_f64", VOP_F64_F64_F64, fmaxnum_like>; 1251 } // End SchedRW = [WriteDoubleAdd], isCommutable = 1 1252 let SchedRW = [Write64Bit] in { 1253 defm V_LSHLREV_B64_pseudo : VOP2Inst <"v_lshlrev_b64_pseudo", VOP_I64_I32_I64, clshl_rev_64>; 1254 } // End SchedRW = [Write64Bit] 1255} // End SubtargetPredicate = isGFX12Plus, isReMaterializable = 1 1256 1257//===----------------------------------------------------------------------===// 1258// DPP Encodings 1259//===----------------------------------------------------------------------===// 1260 1261class VOP2_DPP<bits<6> op, VOP2_DPP_Pseudo ps, 1262 string opName = ps.OpName, VOPProfile p = ps.Pfl, 1263 bit IsDPP16 = 0> : 1264 VOP_DPP<opName, p, IsDPP16> { 1265 let hasSideEffects = ps.hasSideEffects; 1266 let Defs = ps.Defs; 1267 let SchedRW = ps.SchedRW; 1268 let Uses = ps.Uses; 1269 1270 bits<8> vdst; 1271 bits<8> src1; 1272 let Inst{8-0} = 0xfa; 1273 let Inst{16-9} = !if(p.HasSrc1, src1{7-0}, 0); 1274 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 1275 let Inst{30-25} = op; 1276 let Inst{31} = 0x0; 1277} 1278 1279class Base_VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, 1280 string opName = ps.OpName, VOPProfile p = ps.Pfl> : 1281 VOP2_DPP<op, ps, opName, p, 1> { 1282 let AssemblerPredicate = HasDPP16; 1283 let SubtargetPredicate = ps.SubtargetPredicate; 1284 let OtherPredicates = ps.OtherPredicates; 1285} 1286 1287class VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, int subtarget, 1288 string opName = ps.OpName, VOPProfile p = ps.Pfl> : 1289 Base_VOP2_DPP16<op, ps, opName, p>, 1290 SIMCInstr <ps.PseudoInstr, subtarget>; 1291 1292class VOP2_DPP16_Gen<bits<6> op, VOP2_DPP_Pseudo ps, GFXGen Gen, 1293 string opName = ps.OpName, VOPProfile p = ps.Pfl> : 1294 VOP2_DPP16<op, ps, Gen.Subtarget, opName, p> { 1295 let AssemblerPredicate = Gen.AssemblerPredicate; 1296 let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate); 1297 let DecoderNamespace = Gen.DecoderNamespace# 1298 !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); 1299} 1300 1301class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps, 1302 VOPProfile p = ps.Pfl> : 1303 VOP_DPP8<ps.OpName, p> { 1304 let hasSideEffects = ps.hasSideEffects; 1305 let Defs = ps.Defs; 1306 let SchedRW = ps.SchedRW; 1307 let Uses = ps.Uses; 1308 1309 bits<8> vdst; 1310 bits<8> src1; 1311 1312 let Inst{8-0} = fi; 1313 let Inst{16-9} = !if(p.HasSrc1, src1{7-0}, 0); 1314 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 1315 let Inst{30-25} = op; 1316 let Inst{31} = 0x0; 1317 1318 let SubtargetPredicate = ps.SubtargetPredicate; 1319 let OtherPredicates = ps.OtherPredicates; 1320} 1321 1322class VOP2_DPP8_Gen<bits<6> op, VOP2_Pseudo ps, GFXGen Gen, 1323 VOPProfile p = ps.Pfl> : 1324 VOP2_DPP8<op, ps, p> { 1325 let AssemblerPredicate = Gen.AssemblerPredicate; 1326 let True16Predicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, NoTrue16Predicate); 1327 let DecoderNamespace = Gen.DecoderNamespace# 1328 !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); 1329} 1330 1331//===----------------------------------------------------------------------===// 1332// GFX11, GFX12 1333//===----------------------------------------------------------------------===// 1334 1335//===------------------------------- VOP2 -------------------------------===// 1336multiclass VOP2Only_Real_MADK<GFXGen Gen, bits<6> op> { 1337 def Gen.Suffix : 1338 VOP2_Real_Gen<!cast<VOP2_Pseudo>(NAME), Gen>, 1339 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1340} 1341 1342multiclass VOP2Only_Real_MADK_with_name<GFXGen Gen, bits<6> op, string asmName, 1343 string opName = NAME> { 1344 def Gen.Suffix : 1345 VOP2_Real_Gen<!cast<VOP2_Pseudo>(opName), Gen>, 1346 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(opName).Pfl> { 1347 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName); 1348 let AsmString = asmName # ps.AsmOperands; 1349 } 1350} 1351 1352multiclass VOP2_Real_e32<GFXGen Gen, bits<6> op> { 1353 def _e32#Gen.Suffix : 1354 VOP2_Real_Gen<!cast<VOP2_Pseudo>(NAME#"_e32"), Gen>, 1355 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 1356} 1357 1358multiclass VOP2Only_Real_e32<GFXGen Gen, bits<6> op> { 1359 let IsSingle = 1 in 1360 defm NAME: VOP2_Real_e32<Gen, op>; 1361} 1362 1363multiclass VOP2_Real_e64<GFXGen Gen, bits<6> op> { 1364 def _e64#Gen.Suffix : 1365 VOP3_Real_Gen<!cast<VOP3_Pseudo>(NAME#"_e64"), Gen>, 1366 VOP3e_gfx11_gfx12<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1367} 1368 1369multiclass VOP2_Real_dpp<GFXGen Gen, bits<6> op> { 1370 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 1371 def _dpp#Gen.Suffix : VOP2_DPP16_Gen<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), Gen>; 1372} 1373 1374multiclass VOP2_Real_dpp8<GFXGen Gen, bits<6> op> { 1375 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 1376 def _dpp8#Gen.Suffix : VOP2_DPP8_Gen<op, !cast<VOP2_Pseudo>(NAME#"_e32"), Gen>; 1377} 1378 1379//===------------------------- VOP2 (with name) -------------------------===// 1380multiclass VOP2_Real_e32_with_name<GFXGen Gen, bits<6> op, string opName, 1381 string asmName, bit single = 0> { 1382 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1383 def _e32#Gen.Suffix : 1384 VOP2_Real_Gen<ps, Gen, asmName>, 1385 VOP2e<op{5-0}, ps.Pfl> { 1386 let AsmString = asmName # ps.AsmOperands; 1387 let IsSingle = single; 1388 } 1389} 1390multiclass VOP2_Real_e64_with_name<GFXGen Gen, bits<6> op, string opName, 1391 string asmName> { 1392 defvar ps = !cast<VOP3_Pseudo>(opName#"_e64"); 1393 def _e64#Gen.Suffix : 1394 VOP3_Real_Gen<ps, Gen>, 1395 VOP3e_gfx11_gfx12<{0, 1, 0, 0, op{5-0}}, ps.Pfl> { 1396 let AsmString = asmName # ps.AsmOperands; 1397 } 1398} 1399 1400multiclass VOP2_Real_dpp_with_name<GFXGen Gen, bits<6> op, string opName, 1401 string asmName> { 1402 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1403 if ps.Pfl.HasExtDPP then 1404 def _dpp#Gen.Suffix : VOP2_DPP16_Gen<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), Gen> { 1405 let AsmString = asmName # ps.Pfl.AsmDPP16; 1406 } 1407} 1408multiclass VOP2_Real_dpp8_with_name<GFXGen Gen, bits<6> op, string opName, 1409 string asmName> { 1410 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1411 if ps.Pfl.HasExtDPP then 1412 def _dpp8#Gen.Suffix : VOP2_DPP8_Gen<op, ps, Gen> { 1413 let AsmString = asmName # ps.Pfl.AsmDPP8; 1414 } 1415} 1416 1417//===------------------------------ VOP2be ------------------------------===// 1418multiclass VOP2be_Real_e32<GFXGen Gen, bits<6> op, string opName, string asmName> { 1419 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1420 def _e32#Gen.Suffix : 1421 VOP2_Real_Gen<ps, Gen>, 1422 VOP2e<op{5-0}, ps.Pfl> { 1423 let AsmString = asmName # !subst(", vcc", "", ps.AsmOperands); 1424 } 1425} 1426multiclass VOP2be_Real_dpp<GFXGen Gen, bits<6> op, string opName, string asmName> { 1427 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then 1428 def _dpp#Gen.Suffix : 1429 VOP2_DPP16_Gen<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), Gen, asmName> { 1430 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1431 let AsmString = asmName # !subst(", vcc", "", AsmDPP); 1432 } 1433 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then 1434 def _dpp_w32#Gen.Suffix : 1435 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1436 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1437 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP); 1438 let isAsmParserOnly = 1; 1439 let WaveSizePredicate = isWave32; 1440 let AssemblerPredicate = Gen.AssemblerPredicate; 1441 let DecoderNamespace = Gen.DecoderNamespace; 1442 } 1443 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then 1444 def _dpp_w64#Gen.Suffix : 1445 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1446 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1447 let AsmString = asmName # AsmDPP; 1448 let isAsmParserOnly = 1; 1449 let WaveSizePredicate = isWave64; 1450 let AssemblerPredicate = Gen.AssemblerPredicate; 1451 let DecoderNamespace = Gen.DecoderNamespace; 1452 } 1453} 1454multiclass VOP2be_Real_dpp8<GFXGen Gen, bits<6> op, string opName, string asmName> { 1455 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then 1456 def _dpp8#Gen.Suffix : 1457 VOP2_DPP8_Gen<op, !cast<VOP2_Pseudo>(opName#"_e32"), Gen> { 1458 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1459 let AsmString = asmName # !subst(", vcc", "", AsmDPP8); 1460 } 1461 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then 1462 def _dpp8_w32#Gen.Suffix : 1463 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1464 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1465 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8); 1466 let isAsmParserOnly = 1; 1467 let WaveSizePredicate = isWave32; 1468 let AssemblerPredicate = Gen.AssemblerPredicate; 1469 let DecoderNamespace = Gen.DecoderNamespace; 1470 } 1471 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then 1472 def _dpp8_w64#Gen.Suffix : 1473 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1474 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1475 let AsmString = asmName # AsmDPP8; 1476 let isAsmParserOnly = 1; 1477 let WaveSizePredicate = isWave64; 1478 let AssemblerPredicate = Gen.AssemblerPredicate; 1479 let DecoderNamespace = Gen.DecoderNamespace; 1480 } 1481} 1482 1483// We don't want to override separate decoderNamespaces within these 1484multiclass VOP2_Realtriple_e64<GFXGen Gen, bits<6> op> { 1485 defm NAME : VOP3_Realtriple<Gen, {0, 1, 0, 0, op{5-0}}, /*isSingle=*/ 0, NAME> ; 1486} 1487 1488multiclass VOP2_Realtriple_e64_with_name<GFXGen Gen, bits<6> op, string opName, 1489 string asmName> { 1490 defm NAME : VOP3_Realtriple_with_name<Gen, {0, 1, 0, 0, op{5-0}}, opName, asmName> ; 1491} 1492 1493multiclass VOP2be_Real<GFXGen Gen, bits<6> op, string opName, string asmName> : 1494 VOP2be_Real_e32<Gen, op, opName, asmName>, 1495 VOP3be_Realtriple<Gen, {0, 1, 0, 0, op{5-0}}, /*isSingle=*/ 0, opName, asmName>, 1496 VOP2be_Real_dpp<Gen, op, opName, asmName>, 1497 VOP2be_Real_dpp8<Gen, op, opName, asmName>; 1498 1499// Only for CNDMASK 1500multiclass VOP2e_Real<GFXGen Gen, bits<6> op, string opName, string asmName> : 1501 VOP2_Real_e32<Gen, op>, 1502 VOP2_Realtriple_e64<Gen, op>, 1503 VOP2be_Real_dpp<Gen, op, opName, asmName>, 1504 VOP2be_Real_dpp8<Gen, op, opName, asmName>; 1505 1506multiclass VOP2Only_Real<GFXGen Gen, bits<6> op> : 1507 VOP2Only_Real_e32<Gen, op>, 1508 VOP2_Real_dpp<Gen, op>, 1509 VOP2_Real_dpp8<Gen, op>; 1510 1511multiclass VOP2_Real_FULL<GFXGen Gen, bits<6> op> : 1512 VOP2_Realtriple_e64<Gen, op>, 1513 VOP2_Real_e32<Gen, op>, 1514 VOP2_Real_dpp<Gen, op>, 1515 VOP2_Real_dpp8<Gen, op>; 1516 1517multiclass VOP2_Real_NO_VOP3_with_name<GFXGen Gen, bits<6> op, string opName, 1518 string asmName, bit isSingle = 0> { 1519 defm NAME : VOP2_Real_e32_with_name<Gen, op, opName, asmName, isSingle>, 1520 VOP2_Real_dpp_with_name<Gen, op, opName, asmName>, 1521 VOP2_Real_dpp8_with_name<Gen, op, opName, asmName>; 1522 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1523 def Gen.Suffix#"_alias" : AMDGPUMnemonicAlias<ps.Mnemonic, asmName> { 1524 let AssemblerPredicate = Gen.AssemblerPredicate; 1525 } 1526} 1527 1528multiclass VOP2_Real_FULL_with_name<GFXGen Gen, bits<6> op, string opName, 1529 string asmName> : 1530 VOP2_Realtriple_e64_with_name<Gen, op, opName, asmName>, 1531 VOP2_Real_NO_VOP3_with_name<Gen, op, opName, asmName>; 1532 1533multiclass VOP2_Real_NO_DPP_with_name<GFXGen Gen, bits<6> op, string opName, 1534 string asmName> { 1535 defm NAME : VOP2_Real_e32_with_name<Gen, op, opName, asmName>, 1536 VOP2_Real_e64_with_name<Gen, op, opName, asmName>; 1537 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1538 def Gen.Suffix#"_alias" : AMDGPUMnemonicAlias<ps.Mnemonic, asmName> { 1539 let AssemblerPredicate = Gen.AssemblerPredicate; 1540 } 1541} 1542 1543multiclass VOP2_Real_NO_DPP_with_alias<GFXGen Gen, bits<6> op, string alias> { 1544 defm NAME : VOP2_Real_e32<Gen, op>, 1545 VOP2_Real_e64<Gen, op>; 1546 def Gen.Suffix#"_alias" : AMDGPUMnemonicAlias<alias, NAME> { 1547 let AssemblerPredicate = Gen.AssemblerPredicate; 1548 } 1549} 1550 1551//===----------------------------------------------------------------------===// 1552// GFX12. 1553//===----------------------------------------------------------------------===// 1554 1555multiclass VOP2be_Real_gfx12<bits<6> op, string opName, string asmName> : 1556 VOP2be_Real<GFX12Gen, op, opName, asmName>; 1557 1558// Only for CNDMASK 1559multiclass VOP2e_Real_gfx12<bits<6> op, string opName, string asmName> : 1560 VOP2e_Real<GFX12Gen, op, opName, asmName>; 1561 1562multiclass VOP2_Real_FULL_with_name_gfx12<bits<6> op, string opName, 1563 string asmName> : 1564 VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>; 1565 1566multiclass VOP2_Real_FULL_t16_with_name_gfx12<bits<6> op, string opName, 1567 string asmName, string alias> { 1568 defm NAME : VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>; 1569 def _gfx12_2nd_alias : AMDGPUMnemonicAlias<alias, asmName> { 1570 let AssemblerPredicate = isGFX12Only; 1571 } 1572} 1573 1574multiclass VOP2_Real_NO_DPP_with_name_gfx12<bits<6> op, string opName, 1575 string asmName> : 1576 VOP2_Real_NO_DPP_with_name<GFX12Gen, op, opName, asmName>; 1577 1578multiclass VOP2_Real_NO_DPP_with_alias_gfx12<bits<6> op, string alias> : 1579 VOP2_Real_NO_DPP_with_alias<GFX12Gen, op, alias>; 1580 1581defm V_ADD_F64 : VOP2_Real_NO_DPP_with_name_gfx12<0x002, "V_ADD_F64_pseudo", "v_add_f64">; 1582defm V_MUL_F64 : VOP2_Real_NO_DPP_with_name_gfx12<0x006, "V_MUL_F64_pseudo", "v_mul_f64">; 1583defm V_LSHLREV_B64 : VOP2_Real_NO_DPP_with_name_gfx12<0x01f, "V_LSHLREV_B64_pseudo", "v_lshlrev_b64">; 1584defm V_MIN_NUM_F64 : VOP2_Real_NO_DPP_with_alias_gfx12<0x00d, "v_min_f64">; 1585defm V_MAX_NUM_F64 : VOP2_Real_NO_DPP_with_alias_gfx12<0x00e, "v_max_f64">; 1586 1587defm V_CNDMASK_B32 : VOP2e_Real_gfx12<0x001, "V_CNDMASK_B32", "v_cndmask_b32">; 1588defm V_ADD_CO_CI_U32 : 1589 VOP2be_Real_gfx12<0x020, "V_ADDC_U32", "v_add_co_ci_u32">; 1590defm V_SUB_CO_CI_U32 : 1591 VOP2be_Real_gfx12<0x021, "V_SUBB_U32", "v_sub_co_ci_u32">; 1592defm V_SUBREV_CO_CI_U32 : 1593 VOP2be_Real_gfx12<0x022, "V_SUBBREV_U32", "v_subrev_co_ci_u32">; 1594 1595defm V_MIN_NUM_F32 : VOP2_Real_FULL_with_name_gfx12<0x015, "V_MIN_F32", "v_min_num_f32">; 1596defm V_MAX_NUM_F32 : VOP2_Real_FULL_with_name_gfx12<0x016, "V_MAX_F32", "v_max_num_f32">; 1597defm V_MIN_NUM_F16 : VOP2_Real_FULL_t16_with_name_gfx12<0x030, "V_MIN_F16_t16", "v_min_num_f16", "v_min_f16">; 1598defm V_MIN_NUM_F16_fake16 : VOP2_Real_FULL_t16_with_name_gfx12<0x030, "V_MIN_F16_fake16", "v_min_num_f16", "v_min_f16">; 1599defm V_MAX_NUM_F16 : VOP2_Real_FULL_t16_with_name_gfx12<0x031, "V_MAX_F16_t16", "v_max_num_f16", "v_max_f16">; 1600defm V_MAX_NUM_F16_fake16 : VOP2_Real_FULL_t16_with_name_gfx12<0x031, "V_MAX_F16_fake16", "v_max_num_f16", "v_max_f16">; 1601 1602let SubtargetPredicate = isGFX12Plus in { 1603 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx12>; 1604 1605 defm : VOP2bInstAliases< 1606 V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx12, "v_add_co_ci_u32">; 1607 defm : VOP2bInstAliases< 1608 V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx12, "v_sub_co_ci_u32">; 1609 defm : VOP2bInstAliases< 1610 V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx12, "v_subrev_co_ci_u32">; 1611} // End SubtargetPredicate = isGFX12Plus 1612 1613//===----------------------------------------------------------------------===// 1614// GFX11. 1615//===----------------------------------------------------------------------===// 1616 1617multiclass VOP2be_Real_gfx11<bits<6> op, string opName, string asmName> : 1618 VOP2be_Real<GFX11Gen, op, opName, asmName>; 1619 1620// Only for CNDMASK 1621multiclass VOP2e_Real_gfx11<bits<6> op, string opName, string asmName> : 1622 VOP2e_Real<GFX11Gen, op, opName, asmName>; 1623 1624multiclass VOP2_Real_NO_VOP3_with_name_gfx11<bits<6> op, string opName, 1625 string asmName, bit isSingle = 0> { 1626 defm NAME : VOP2_Real_e32_with_name<GFX11Gen, op, opName, asmName, isSingle>, 1627 VOP2_Real_dpp_with_name<GFX11Gen, op, opName, asmName>, 1628 VOP2_Real_dpp8_with_name<GFX11Gen, op, opName, asmName>; 1629 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1630 def _gfx11_alias : AMDGPUMnemonicAlias<ps.Mnemonic, asmName> { 1631 let AssemblerPredicate = isGFX11Only; 1632 } 1633} 1634 1635multiclass VOP2_Real_NO_DPP_with_name_gfx11<bits<6> op, string opName, 1636 string asmName> : 1637 VOP2_Real_NO_DPP_with_name<GFX11Gen, op, opName, asmName>; 1638 1639multiclass VOP2_Real_FULL_gfx11_gfx12<bits<6> op> : 1640 VOP2_Real_FULL<GFX11Gen, op>, VOP2_Real_FULL<GFX12Gen, op>; 1641 1642multiclass VOP2_Real_FULL_with_name_gfx11_gfx12<bits<6> op, string opName, 1643 string asmName> : 1644 VOP2_Real_FULL_with_name<GFX11Gen, op, opName, asmName>, 1645 VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>; 1646 1647multiclass VOP2_Real_e32_gfx11_gfx12<bits<6> op> : 1648 VOP2Only_Real<GFX11Gen, op>, VOP2Only_Real<GFX12Gen, op>; 1649 1650multiclass VOP3Only_Realtriple_gfx11_gfx12<bits<10> op> : 1651 VOP3Only_Realtriple<GFX11Gen, op>, VOP3Only_Realtriple<GFX12Gen, op>; 1652 1653multiclass VOP3Only_Realtriple_t16_gfx11_gfx12<bits<10> op, string asmName> : 1654 VOP3Only_Realtriple_t16<GFX11Gen, op, asmName>, 1655 VOP3Only_Realtriple_t16<GFX12Gen, op, asmName>; 1656 1657multiclass VOP3beOnly_Realtriple_gfx11_gfx12<bits<10> op> : 1658 VOP3beOnly_Realtriple<GFX11Gen, op>, VOP3beOnly_Realtriple<GFX12Gen, op>; 1659 1660multiclass VOP2Only_Real_MADK_with_name_gfx11_gfx12<bits<6> op, string asmName, 1661 string opName = NAME> : 1662 VOP2Only_Real_MADK_with_name<GFX11Gen, op, asmName, opName>, 1663 VOP2Only_Real_MADK_with_name<GFX12Gen, op, asmName, opName>; 1664 1665multiclass VOP2_Real_FULL_t16_gfx11<bits<6> op, string asmName, 1666 string opName = NAME> : 1667 VOP2_Real_FULL_with_name<GFX11Gen, op, opName, asmName>; 1668 1669multiclass VOP2_Real_FULL_t16_gfx11_gfx12<bits<6> op, string asmName, 1670 string opName = NAME> : 1671 VOP2_Real_FULL_with_name_gfx11_gfx12<op, opName, asmName>; 1672 1673multiclass VOP2_Real_FULL_gfx11<bits<6> op> : 1674 VOP2_Real_FULL<GFX11Gen, op>; 1675 1676defm V_CNDMASK_B32 : VOP2e_Real_gfx11<0x001, "V_CNDMASK_B32", 1677 "v_cndmask_b32">; 1678defm V_DOT2ACC_F32_F16 : VOP2_Real_NO_VOP3_with_name_gfx11<0x002, 1679 "V_DOT2C_F32_F16", "v_dot2acc_f32_f16", 1>; 1680defm V_FMAC_DX9_ZERO_F32 : VOP2_Real_NO_DPP_with_name_gfx11<0x006, 1681 "V_FMAC_LEGACY_F32", "v_fmac_dx9_zero_f32">; 1682defm V_MUL_DX9_ZERO_F32 : VOP2_Real_FULL_with_name_gfx11_gfx12<0x007, 1683 "V_MUL_LEGACY_F32", "v_mul_dx9_zero_f32">; 1684defm V_LSHLREV_B32 : VOP2_Real_FULL_gfx11_gfx12<0x018>; 1685defm V_LSHRREV_B32 : VOP2_Real_FULL_gfx11_gfx12<0x019>; 1686defm V_ASHRREV_I32 : VOP2_Real_FULL_gfx11_gfx12<0x01a>; 1687defm V_ADD_CO_CI_U32 : 1688 VOP2be_Real_gfx11<0x020, "V_ADDC_U32", "v_add_co_ci_u32">; 1689defm V_SUB_CO_CI_U32 : 1690 VOP2be_Real_gfx11<0x021, "V_SUBB_U32", "v_sub_co_ci_u32">; 1691defm V_SUBREV_CO_CI_U32 : 1692 VOP2be_Real_gfx11<0x022, "V_SUBBREV_U32", "v_subrev_co_ci_u32">; 1693 1694defm V_CVT_PK_RTZ_F16_F32 : VOP2_Real_FULL_with_name_gfx11_gfx12<0x02f, 1695 "V_CVT_PKRTZ_F16_F32", "v_cvt_pk_rtz_f16_f32">; 1696defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx11_gfx12<0x03c>; 1697 1698defm V_ADD_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x032, "v_add_f16">; 1699defm V_ADD_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x032, "v_add_f16">; 1700defm V_SUB_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x033, "v_sub_f16">; 1701defm V_SUB_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x033, "v_sub_f16">; 1702defm V_SUBREV_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x034, "v_subrev_f16">; 1703defm V_SUBREV_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x034, "v_subrev_f16">; 1704defm V_MUL_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x035, "v_mul_f16">; 1705defm V_MUL_F16_fake16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x035, "v_mul_f16">; 1706defm V_FMAC_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x036, "v_fmac_f16">; 1707defm V_LDEXP_F16_t16 : VOP2_Real_FULL_t16_gfx11_gfx12<0x03b, "v_ldexp_f16">; 1708defm V_MAX_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">; 1709defm V_MAX_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">; 1710defm V_MIN_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">; 1711defm V_MIN_F16_fake16 : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">; 1712defm V_FMAMK_F16_t16 : VOP2Only_Real_MADK_with_name_gfx11_gfx12<0x037, "v_fmamk_f16">; 1713defm V_FMAAK_F16_t16 : VOP2Only_Real_MADK_with_name_gfx11_gfx12<0x038, "v_fmaak_f16">; 1714 1715// VOP3 only. 1716defm V_CNDMASK_B16 : VOP3Only_Realtriple_gfx11_gfx12<0x25d>; 1717defm V_LDEXP_F32 : VOP3Only_Realtriple_gfx11_gfx12<0x31c>; 1718defm V_BFM_B32 : VOP3Only_Realtriple_gfx11_gfx12<0x31d>; 1719defm V_BCNT_U32_B32 : VOP3Only_Realtriple_gfx11_gfx12<0x31e>; 1720defm V_MBCNT_LO_U32_B32 : VOP3Only_Realtriple_gfx11_gfx12<0x31f>; 1721defm V_MBCNT_HI_U32_B32 : VOP3Only_Realtriple_gfx11_gfx12<0x320>; 1722defm V_CVT_PK_NORM_I16_F32 : VOP3Only_Realtriple_with_name_gfx11_gfx12<0x321, "V_CVT_PKNORM_I16_F32", "v_cvt_pk_norm_i16_f32">; 1723defm V_CVT_PK_NORM_U16_F32 : VOP3Only_Realtriple_with_name_gfx11_gfx12<0x322, "V_CVT_PKNORM_U16_F32", "v_cvt_pk_norm_u16_f32">; 1724defm V_CVT_PK_U16_U32 : VOP3Only_Realtriple_gfx11_gfx12<0x323>; 1725defm V_CVT_PK_I16_I32 : VOP3Only_Realtriple_gfx11_gfx12<0x324>; 1726defm V_ADD_CO_U32 : VOP3beOnly_Realtriple_gfx11_gfx12<0x300>; 1727defm V_SUB_CO_U32 : VOP3beOnly_Realtriple_gfx11_gfx12<0x301>; 1728defm V_SUBREV_CO_U32 : VOP3beOnly_Realtriple_gfx11_gfx12<0x302>; 1729 1730let SubtargetPredicate = isGFX11Only in { 1731 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx11>; 1732 1733 defm : VOP2bInstAliases< 1734 V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx11, "v_add_co_ci_u32">; 1735 defm : VOP2bInstAliases< 1736 V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx11, "v_sub_co_ci_u32">; 1737 defm : VOP2bInstAliases< 1738 V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx11, "v_subrev_co_ci_u32">; 1739} // End SubtargetPredicate = isGFX11Only 1740 1741//===----------------------------------------------------------------------===// 1742// GFX10. 1743//===----------------------------------------------------------------------===// 1744 1745let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { 1746 //===------------------------------- VOP2 -------------------------------===// 1747 multiclass VOP2Only_Real_MADK_gfx10<bits<6> op> { 1748 def _gfx10 : 1749 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX10>, 1750 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1751 } 1752 multiclass VOP2Only_Real_MADK_gfx10_with_name<bits<6> op, string opName, 1753 string asmName> { 1754 def _gfx10 : 1755 VOP2_Real<!cast<VOP2_Pseudo>(opName), SIEncodingFamily.GFX10>, 1756 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(opName).Pfl> { 1757 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName); 1758 let AsmString = asmName # ps.AsmOperands; 1759 } 1760 } 1761 multiclass VOP2_Real_e32_gfx10<bits<6> op> { 1762 def _e32_gfx10 : 1763 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>, 1764 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 1765 } 1766 multiclass VOP2_Real_e64_gfx10<bits<6> op> { 1767 def _e64_gfx10 : 1768 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 1769 VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1770 } 1771 multiclass VOP2_Real_sdwa_gfx10<bits<6> op> { 1772 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 1773 def _sdwa_gfx10 : 1774 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 1775 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1776 } 1777 multiclass VOP2_Real_dpp_gfx10<bits<6> op> { 1778 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then 1779 def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX10>; 1780 } 1781 multiclass VOP2_Real_dpp8_gfx10<bits<6> op> { 1782 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then 1783 def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(NAME#"_e32")>; 1784 } 1785 1786 //===------------------------- VOP2 (with name) -------------------------===// 1787 multiclass VOP2_Real_e32_gfx10_with_name<bits<6> op, string opName, 1788 string asmName> { 1789 def _e32_gfx10 : 1790 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>, 1791 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> { 1792 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1793 let AsmString = asmName # ps.AsmOperands; 1794 } 1795 } 1796 multiclass VOP2_Real_e64_gfx10_with_name<bits<6> op, string opName, 1797 string asmName> { 1798 def _e64_gfx10 : 1799 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>, 1800 VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, 1801 !cast<VOP3_Pseudo>(opName#"_e64").Pfl> { 1802 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName#"_e64"); 1803 let AsmString = asmName # ps.AsmOperands; 1804 } 1805 } 1806 multiclass VOP2_Real_sdwa_gfx10_with_name<bits<6> op, string opName, 1807 string asmName> { 1808 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then 1809 def _sdwa_gfx10 : 1810 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1811 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1812 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1813 let AsmString = asmName # ps.AsmOperands; 1814 } 1815 } 1816 multiclass VOP2_Real_dpp_gfx10_with_name<bits<6> op, string opName, 1817 string asmName> { 1818 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 1819 def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX10> { 1820 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1821 let AsmString = asmName # ps.Pfl.AsmDPP16; 1822 } 1823 } 1824 multiclass VOP2_Real_dpp8_gfx10_with_name<bits<6> op, string opName, 1825 string asmName> { 1826 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 1827 def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1828 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1829 let AsmString = asmName # ps.Pfl.AsmDPP8; 1830 } 1831 } 1832 1833 //===------------------------------ VOP2be ------------------------------===// 1834 multiclass VOP2be_Real_e32_gfx10<bits<6> op, string opName, string asmName> { 1835 def _e32_gfx10 : 1836 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>, 1837 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> { 1838 VOP2_Pseudo Ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1839 let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); 1840 } 1841 } 1842 multiclass VOP2be_Real_e64_gfx10<bits<6> op, string opName, string asmName> { 1843 def _e64_gfx10 : 1844 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>, 1845 VOP3be_gfx10<{0, 1, 0, 0, op{5-0}}, 1846 !cast<VOP3_Pseudo>(opName#"_e64").Pfl> { 1847 VOP3_Pseudo Ps = !cast<VOP3_Pseudo>(opName#"_e64"); 1848 let AsmString = asmName # Ps.AsmOperands; 1849 } 1850 } 1851 multiclass VOP2be_Real_sdwa_gfx10<bits<6> op, string opName, string asmName> { 1852 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then 1853 def _sdwa_gfx10 : 1854 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1855 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1856 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1857 let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); 1858 } 1859 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then 1860 def _sdwa_w32_gfx10 : 1861 Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1862 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1863 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1864 let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands); 1865 let isAsmParserOnly = 1; 1866 let WaveSizePredicate = isWave32; 1867 } 1868 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then 1869 def _sdwa_w64_gfx10 : 1870 Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1871 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1872 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1873 let AsmString = asmName # Ps.AsmOperands; 1874 let isAsmParserOnly = 1; 1875 let WaveSizePredicate = isWave64; 1876 } 1877 } 1878 multiclass VOP2be_Real_dpp_gfx10<bits<6> op, string opName, string asmName> { 1879 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 1880 def _dpp_gfx10 : 1881 VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX10, asmName> { 1882 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1883 let AsmString = asmName # !subst(", vcc", "", AsmDPP); 1884 } 1885 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 1886 def _dpp_w32_gfx10 : 1887 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1888 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1889 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP); 1890 let isAsmParserOnly = 1; 1891 let WaveSizePredicate = isWave32; 1892 } 1893 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 1894 def _dpp_w64_gfx10 : 1895 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1896 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1897 let AsmString = asmName # AsmDPP; 1898 let isAsmParserOnly = 1; 1899 let WaveSizePredicate = isWave64; 1900 } 1901 } 1902 multiclass VOP2be_Real_dpp8_gfx10<bits<6> op, string opName, string asmName> { 1903 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 1904 def _dpp8_gfx10 : 1905 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1906 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1907 let AsmString = asmName # !subst(", vcc", "", AsmDPP8); 1908 } 1909 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 1910 def _dpp8_w32_gfx10 : 1911 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1912 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1913 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8); 1914 let isAsmParserOnly = 1; 1915 let WaveSizePredicate = isWave32; 1916 } 1917 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 1918 def _dpp8_w64_gfx10 : 1919 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1920 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1921 let AsmString = asmName # AsmDPP8; 1922 let isAsmParserOnly = 1; 1923 let WaveSizePredicate = isWave64; 1924 } 1925 } 1926 1927 //===----------------------------- VOP3Only -----------------------------===// 1928 multiclass VOP3Only_Real_gfx10<bits<10> op> { 1929 def _e64_gfx10 : 1930 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 1931 VOP3e_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 1932 let IsSingle = 1; 1933 } 1934 } 1935 1936 //===---------------------------- VOP3beOnly ----------------------------===// 1937 multiclass VOP3beOnly_Real_gfx10<bits<10> op> { 1938 def _e64_gfx10 : 1939 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 1940 VOP3be_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 1941 let IsSingle = 1; 1942 } 1943 } 1944} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" 1945 1946multiclass VOP2Only_Real_MADK_gfx10_gfx11<bits<6> op> : 1947 VOP2Only_Real_MADK_gfx10<op>, VOP2Only_Real_MADK<GFX11Gen, op>; 1948 1949multiclass VOP2Only_Real_MADK_gfx10_gfx11_gfx12<bits<6> op> : 1950 VOP2Only_Real_MADK_gfx10_gfx11<op>, VOP2Only_Real_MADK<GFX12Gen, op>; 1951 1952multiclass VOP2be_Real_gfx10<bits<6> op, string opName, string asmName> : 1953 VOP2be_Real_e32_gfx10<op, opName, asmName>, 1954 VOP2be_Real_e64_gfx10<op, opName, asmName>, 1955 VOP2be_Real_sdwa_gfx10<op, opName, asmName>, 1956 VOP2be_Real_dpp_gfx10<op, opName, asmName>, 1957 VOP2be_Real_dpp8_gfx10<op, opName, asmName>; 1958 1959multiclass VOP2e_Real_gfx10<bits<6> op, string opName, string asmName> : 1960 VOP2_Real_e32_gfx10<op>, 1961 VOP2_Real_e64_gfx10<op>, 1962 VOP2be_Real_sdwa_gfx10<op, opName, asmName>, 1963 VOP2be_Real_dpp_gfx10<op, opName, asmName>, 1964 VOP2be_Real_dpp8_gfx10<op, opName, asmName>; 1965 1966multiclass VOP2_Real_gfx10<bits<6> op> : 1967 VOP2_Real_e32_gfx10<op>, VOP2_Real_e64_gfx10<op>, 1968 VOP2_Real_sdwa_gfx10<op>, VOP2_Real_dpp_gfx10<op>, VOP2_Real_dpp8_gfx10<op>; 1969 1970multiclass VOP2_Real_gfx10_gfx11<bits<6> op> : 1971 VOP2_Real_gfx10<op>, VOP2_Real_FULL<GFX11Gen, op>; 1972 1973multiclass VOP2_Real_gfx10_gfx11_gfx12<bits<6> op> : 1974 VOP2_Real_gfx10_gfx11<op>, VOP2_Real_FULL<GFX12Gen, op>; 1975 1976multiclass VOP2_Real_with_name_gfx10<bits<6> op, string opName, 1977 string asmName> : 1978 VOP2_Real_e32_gfx10_with_name<op, opName, asmName>, 1979 VOP2_Real_e64_gfx10_with_name<op, opName, asmName>, 1980 VOP2_Real_sdwa_gfx10_with_name<op, opName, asmName>, 1981 VOP2_Real_dpp_gfx10_with_name<op, opName, asmName>, 1982 VOP2_Real_dpp8_gfx10_with_name<op, opName, asmName>; 1983 1984multiclass VOP2_Real_with_name_gfx10_gfx11_gfx12<bits<6> op, string opName, 1985 string asmName> : 1986 VOP2_Real_with_name_gfx10<op, opName, asmName>, 1987 VOP2_Real_FULL_with_name<GFX11Gen, op, opName, asmName>, 1988 VOP2_Real_FULL_with_name<GFX12Gen, op, opName, asmName>; 1989 1990// NB: Same opcode as v_mac_legacy_f32 1991let DecoderNamespace = "GFX10_B" in 1992defm V_FMAC_LEGACY_F32 : VOP2_Real_gfx10<0x006>; 1993 1994defm V_XNOR_B32 : VOP2_Real_gfx10_gfx11_gfx12<0x01e>; 1995defm V_FMAC_F32 : VOP2_Real_gfx10_gfx11_gfx12<0x02b>; 1996defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10_gfx11_gfx12<0x02c>; 1997defm V_FMAAK_F32 : VOP2Only_Real_MADK_gfx10_gfx11_gfx12<0x02d>; 1998defm V_ADD_F16 : VOP2_Real_gfx10<0x032>; 1999defm V_SUB_F16 : VOP2_Real_gfx10<0x033>; 2000defm V_SUBREV_F16 : VOP2_Real_gfx10<0x034>; 2001defm V_MUL_F16 : VOP2_Real_gfx10<0x035>; 2002defm V_FMAC_F16 : VOP2_Real_gfx10<0x036>; 2003defm V_FMAMK_F16 : VOP2Only_Real_MADK_gfx10<0x037>; 2004defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10<0x038>; 2005defm V_MAX_F16 : VOP2_Real_gfx10<0x039>; 2006defm V_MIN_F16 : VOP2_Real_gfx10<0x03a>; 2007defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>; 2008 2009let IsSingle = 1 in { 2010 defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>; 2011} 2012 2013// VOP2 no carry-in, carry-out. 2014defm V_ADD_NC_U32 : 2015 VOP2_Real_with_name_gfx10_gfx11_gfx12<0x025, "V_ADD_U32", "v_add_nc_u32">; 2016defm V_SUB_NC_U32 : 2017 VOP2_Real_with_name_gfx10_gfx11_gfx12<0x026, "V_SUB_U32", "v_sub_nc_u32">; 2018defm V_SUBREV_NC_U32 : 2019 VOP2_Real_with_name_gfx10_gfx11_gfx12<0x027, "V_SUBREV_U32", "v_subrev_nc_u32">; 2020 2021// VOP2 carry-in, carry-out. 2022defm V_ADD_CO_CI_U32 : 2023 VOP2be_Real_gfx10<0x028, "V_ADDC_U32", "v_add_co_ci_u32">; 2024defm V_SUB_CO_CI_U32 : 2025 VOP2be_Real_gfx10<0x029, "V_SUBB_U32", "v_sub_co_ci_u32">; 2026defm V_SUBREV_CO_CI_U32 : 2027 VOP2be_Real_gfx10<0x02a, "V_SUBBREV_U32", "v_subrev_co_ci_u32">; 2028 2029defm V_CNDMASK_B32 : 2030 VOP2e_Real_gfx10<0x001, "V_CNDMASK_B32", "v_cndmask_b32">; 2031 2032// VOP3 only. 2033defm V_BFM_B32 : VOP3Only_Real_gfx10<0x363>; 2034defm V_BCNT_U32_B32 : VOP3Only_Real_gfx10<0x364>; 2035defm V_MBCNT_LO_U32_B32 : VOP3Only_Real_gfx10<0x365>; 2036defm V_MBCNT_HI_U32_B32 : VOP3Only_Real_gfx10<0x366>; 2037defm V_LDEXP_F32 : VOP3Only_Real_gfx10<0x362>; 2038defm V_CVT_PKNORM_I16_F32 : VOP3Only_Real_gfx10<0x368>; 2039defm V_CVT_PKNORM_U16_F32 : VOP3Only_Real_gfx10<0x369>; 2040defm V_CVT_PK_U16_U32 : VOP3Only_Real_gfx10<0x36a>; 2041defm V_CVT_PK_I16_I32 : VOP3Only_Real_gfx10<0x36b>; 2042 2043// VOP3 carry-out. 2044defm V_ADD_CO_U32 : VOP3beOnly_Real_gfx10<0x30f>; 2045defm V_SUB_CO_U32 : VOP3beOnly_Real_gfx10<0x310>; 2046defm V_SUBREV_CO_U32 : VOP3beOnly_Real_gfx10<0x319>; 2047 2048let SubtargetPredicate = isGFX10Only in { 2049 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx10>; 2050 2051 defm : VOP2bInstAliases< 2052 V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx10, "v_add_co_ci_u32">; 2053 defm : VOP2bInstAliases< 2054 V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx10, "v_sub_co_ci_u32">; 2055 defm : VOP2bInstAliases< 2056 V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx10, "v_subrev_co_ci_u32">; 2057} // End SubtargetPredicate = isGFX10Only 2058 2059//===----------------------------------------------------------------------===// 2060// GFX6, GFX7, GFX10, GFX11 2061//===----------------------------------------------------------------------===// 2062 2063class VOP2_DPPe <bits<6> op, VOP2_DPP_Pseudo ps, VOPProfile P = ps.Pfl> : 2064 VOP_DPPe <P> { 2065 bits<8> vdst; 2066 bits<8> src1; 2067 let Inst{8-0} = 0xfa; //dpp 2068 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 2069 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 2070 let Inst{30-25} = op; 2071 let Inst{31} = 0x0; //encoding 2072} 2073 2074let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 2075 multiclass VOP2_Lane_Real_gfx6_gfx7<bits<6> op> { 2076 def _gfx6_gfx7 : 2077 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>, 2078 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 2079 } 2080 multiclass VOP2Only_Real_MADK_gfx6_gfx7<bits<6> op> { 2081 def _gfx6_gfx7 : 2082 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>, 2083 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 2084 } 2085 multiclass VOP2_Real_e32_gfx6_gfx7<bits<6> op, string opName = NAME> { 2086 def _e32_gfx6_gfx7 : 2087 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.SI>, 2088 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl>; 2089 } 2090 multiclass VOP2_Real_e64_gfx6_gfx7<bits<6> op, string opName = NAME> { 2091 def _e64_gfx6_gfx7 : 2092 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.SI>, 2093 VOP3e_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(opName#"_e64").Pfl>; 2094 } 2095 multiclass VOP2be_Real_e64_gfx6_gfx7<bits<6> op, string opName = NAME> { 2096 def _e64_gfx6_gfx7 : 2097 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.SI>, 2098 VOP3be_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(opName#"_e64").Pfl>; 2099 } 2100} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 2101 2102multiclass VOP2Only_Real_MADK_gfx6_gfx7_gfx10<bits<6> op> : 2103 VOP2Only_Real_MADK_gfx6_gfx7<op>, VOP2Only_Real_MADK_gfx10<op>; 2104 2105multiclass VOP2_Real_gfx6_gfx7<bits<6> op> : 2106 VOP2_Real_e32_gfx6_gfx7<op>, VOP2_Real_e64_gfx6_gfx7<op>; 2107 2108multiclass VOP2_Real_gfx6_gfx7_gfx10<bits<6> op> : 2109 VOP2_Real_gfx6_gfx7<op>, VOP2_Real_gfx10<op>; 2110 2111multiclass VOP2_Real_gfx6_gfx7_gfx10_gfx11<bits<6> op> : 2112 VOP2_Real_gfx6_gfx7_gfx10<op>, VOP2_Real_FULL<GFX11Gen, op>; 2113 2114multiclass VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<bits<6> op> : 2115 VOP2_Real_gfx6_gfx7_gfx10_gfx11<op>, VOP2_Real_FULL<GFX12Gen, op>; 2116 2117multiclass VOP2be_Real_gfx6_gfx7<bits<6> op> : 2118 VOP2_Real_e32_gfx6_gfx7<op>, VOP2be_Real_e64_gfx6_gfx7<op>; 2119 2120multiclass VOP2be_Real_gfx6_gfx7_with_name<bits<6> op, 2121 string opName, string asmName> { 2122 defvar ps32 = !cast<VOP2_Pseudo>(opName#"_e32"); 2123 defvar ps64 = !cast<VOP3_Pseudo>(opName#"_e64"); 2124 2125 let AsmString = asmName # ps32.AsmOperands in { 2126 defm "" : VOP2_Real_e32_gfx6_gfx7<op, opName>; 2127 } 2128 2129 let AsmString = asmName # ps64.AsmOperands in { 2130 defm "" : VOP2be_Real_e64_gfx6_gfx7<op, opName>; 2131 } 2132} 2133 2134defm V_CNDMASK_B32 : VOP2_Real_gfx6_gfx7<0x000>; 2135defm V_MIN_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00d>; 2136defm V_MAX_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00e>; 2137defm V_LSHR_B32 : VOP2_Real_gfx6_gfx7<0x015>; 2138defm V_ASHR_I32 : VOP2_Real_gfx6_gfx7<0x017>; 2139defm V_LSHL_B32 : VOP2_Real_gfx6_gfx7<0x019>; 2140defm V_BFM_B32 : VOP2_Real_gfx6_gfx7<0x01e>; 2141defm V_BCNT_U32_B32 : VOP2_Real_gfx6_gfx7<0x022>; 2142defm V_MBCNT_LO_U32_B32 : VOP2_Real_gfx6_gfx7<0x023>; 2143defm V_MBCNT_HI_U32_B32 : VOP2_Real_gfx6_gfx7<0x024>; 2144defm V_LDEXP_F32 : VOP2_Real_gfx6_gfx7<0x02b>; 2145defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_gfx6_gfx7<0x02c>; 2146defm V_CVT_PKNORM_I16_F32 : VOP2_Real_gfx6_gfx7<0x02d>; 2147defm V_CVT_PKNORM_U16_F32 : VOP2_Real_gfx6_gfx7<0x02e>; 2148defm V_CVT_PK_U16_U32 : VOP2_Real_gfx6_gfx7<0x030>; 2149defm V_CVT_PK_I16_I32 : VOP2_Real_gfx6_gfx7<0x031>; 2150 2151// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in 2152// VI, but the VI instructions behave the same as the SI versions. 2153defm V_ADD_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x025, "V_ADD_CO_U32", "v_add_i32">; 2154defm V_SUB_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x026, "V_SUB_CO_U32", "v_sub_i32">; 2155defm V_SUBREV_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x027, "V_SUBREV_CO_U32", "v_subrev_i32">; 2156defm V_ADDC_U32 : VOP2be_Real_gfx6_gfx7<0x028>; 2157defm V_SUBB_U32 : VOP2be_Real_gfx6_gfx7<0x029>; 2158defm V_SUBBREV_U32 : VOP2be_Real_gfx6_gfx7<0x02a>; 2159 2160defm V_READLANE_B32 : VOP2_Lane_Real_gfx6_gfx7<0x001>; 2161 2162let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in { 2163 defm V_WRITELANE_B32 : VOP2_Lane_Real_gfx6_gfx7<0x002>; 2164} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) 2165 2166let SubtargetPredicate = isGFX6GFX7 in { 2167 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx6_gfx7>; 2168 defm : VOP2eInstAliases<V_ADD_CO_U32_e32, V_ADD_I32_e32_gfx6_gfx7>; 2169 defm : VOP2eInstAliases<V_SUB_CO_U32_e32, V_SUB_I32_e32_gfx6_gfx7>; 2170 defm : VOP2eInstAliases<V_SUBREV_CO_U32_e32, V_SUBREV_I32_e32_gfx6_gfx7>; 2171 2172 def : VOP2e64InstAlias<V_ADD_CO_U32_e64, V_ADD_I32_e64_gfx6_gfx7>; 2173 def : VOP2e64InstAlias<V_SUB_CO_U32_e64, V_SUB_I32_e64_gfx6_gfx7>; 2174 def : VOP2e64InstAlias<V_SUBREV_CO_U32_e64, V_SUBREV_I32_e64_gfx6_gfx7>; 2175} // End SubtargetPredicate = isGFX6GFX7 2176 2177defm V_ADD_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x003>; 2178defm V_SUB_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x004>; 2179defm V_SUBREV_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x005>; 2180defm V_MAC_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x006>; 2181defm V_MUL_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x007>; 2182defm V_MUL_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x008>; 2183defm V_MUL_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x009>; 2184defm V_MUL_HI_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00a>; 2185defm V_MUL_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00b>; 2186defm V_MUL_HI_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x00c>; 2187defm V_MIN_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00f>; 2188defm V_MAX_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x010>; 2189defm V_MIN_I32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x011>; 2190defm V_MAX_I32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x012>; 2191defm V_MIN_U32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x013>; 2192defm V_MAX_U32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x014>; 2193defm V_LSHRREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x016>; 2194defm V_ASHRREV_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x018>; 2195defm V_LSHLREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01a>; 2196defm V_AND_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x01b>; 2197defm V_OR_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x01c>; 2198defm V_XOR_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11_gfx12<0x01d>; 2199defm V_MAC_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x01f>; 2200defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x02f>; 2201defm V_MADMK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x020>; 2202defm V_MADAK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x021>; 2203 2204//===----------------------------------------------------------------------===// 2205// GFX8, GFX9 (VI). 2206//===----------------------------------------------------------------------===// 2207 2208let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 2209 2210multiclass VOP2_Real_MADK_vi <bits<6> op> { 2211 def _vi : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>, 2212 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 2213} 2214 2215multiclass VOP2_Real_MADK_gfx940 <bits<6> op> { 2216 def _gfx940 : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX940>, 2217 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl> { 2218 let DecoderNamespace = "GFX9"; 2219 } 2220} 2221 2222multiclass VOP2_Real_e32_vi <bits<6> op> { 2223 def _e32_vi : 2224 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>, 2225 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 2226} 2227 2228multiclass VOP2_Real_e64_vi <bits<10> op> { 2229 def _e64_vi : 2230 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 2231 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 2232} 2233 2234multiclass VOP2_Real_e64only_vi <bits<10> op> { 2235 def _e64_vi : 2236 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 2237 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 2238 let IsSingle = 1; 2239 } 2240} 2241 2242multiclass Base_VOP2_Real_e32e64_vi <bits<6> op> : 2243 VOP2_Real_e32_vi<op>, 2244 VOP2_Real_e64_vi<{0, 1, 0, 0, op{5-0}}>; 2245 2246} // End AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" 2247 2248multiclass VOP2_SDWA_Real <bits<6> op> { 2249 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA then 2250 def _sdwa_vi : 2251 VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 2252 VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 2253} 2254 2255multiclass VOP2_SDWA9_Real <bits<6> op> { 2256 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 2257 def _sdwa_gfx9 : 2258 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 2259 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 2260} 2261 2262let AssemblerPredicate = isGFX8Only, DecoderNamespace = "GFX8" in { 2263 2264multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName> { 2265 def _e32_vi : 2266 VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.VI>, 2267 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> { 2268 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32"); 2269 let AsmString = AsmName # ps.AsmOperands; 2270 } 2271 def _e64_vi : 2272 VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.VI>, 2273 VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> { 2274 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64"); 2275 let AsmString = AsmName # ps.AsmOperands; 2276 } 2277 if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA then 2278 def _sdwa_vi : 2279 VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>, 2280 VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> { 2281 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa"); 2282 let AsmString = AsmName # ps.AsmOperands; 2283 } 2284 if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP then 2285 def _dpp_vi : 2286 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.VI>, 2287 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> { 2288 VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp"); 2289 let AsmString = AsmName # ps.AsmOperands; 2290 } 2291} 2292 2293} // End AssemblerPredicate = isGFX8Only, DecoderNamespace = "GFX8" 2294 2295let AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" in { 2296 2297multiclass VOP2be_Real_e32e64_gfx9 <bits<6> op, string OpName, string AsmName> { 2298 def _e32_gfx9 : 2299 VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.GFX9>, 2300 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> { 2301 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32"); 2302 let AsmString = AsmName # ps.AsmOperands; 2303 } 2304 def _e64_gfx9 : 2305 VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.GFX9>, 2306 VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> { 2307 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64"); 2308 let AsmString = AsmName # ps.AsmOperands; 2309 } 2310 if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA9 then 2311 def _sdwa_gfx9 : 2312 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>, 2313 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> { 2314 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa"); 2315 let AsmString = AsmName # ps.AsmOperands; 2316 } 2317 if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP then 2318 def _dpp_gfx9 : 2319 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.GFX9>, 2320 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> { 2321 VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp"); 2322 let AsmString = AsmName # ps.AsmOperands; 2323 } 2324} 2325 2326multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> { 2327 def _e32_gfx9 : 2328 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX9>, 2329 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 2330 def _e64_gfx9 : 2331 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>, 2332 VOP3e_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 2333 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 2334 def _sdwa_gfx9 : 2335 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 2336 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 2337 } 2338 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 2339 def _dpp_gfx9 : 2340 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, 2341 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>; 2342} 2343 2344} // End AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9" 2345 2346multiclass VOP2_Real_e32e64_vi <bits<6> op> : 2347 Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA_Real<op>, VOP2_SDWA9_Real<op> { 2348 2349 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 2350 def _dpp_vi : 2351 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>, 2352 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>; 2353} 2354 2355defm V_CNDMASK_B32 : VOP2_Real_e32e64_vi <0x0>; 2356defm V_ADD_F32 : VOP2_Real_e32e64_vi <0x1>; 2357defm V_SUB_F32 : VOP2_Real_e32e64_vi <0x2>; 2358defm V_SUBREV_F32 : VOP2_Real_e32e64_vi <0x3>; 2359let AssemblerPredicate = isGCN3ExcludingGFX90A in 2360defm V_MUL_LEGACY_F32 : VOP2_Real_e32e64_vi <0x4>; 2361defm V_MUL_F32 : VOP2_Real_e32e64_vi <0x5>; 2362defm V_MUL_I32_I24 : VOP2_Real_e32e64_vi <0x6>; 2363defm V_MUL_HI_I32_I24 : VOP2_Real_e32e64_vi <0x7>; 2364defm V_MUL_U32_U24 : VOP2_Real_e32e64_vi <0x8>; 2365defm V_MUL_HI_U32_U24 : VOP2_Real_e32e64_vi <0x9>; 2366defm V_MIN_F32 : VOP2_Real_e32e64_vi <0xa>; 2367defm V_MAX_F32 : VOP2_Real_e32e64_vi <0xb>; 2368defm V_MIN_I32 : VOP2_Real_e32e64_vi <0xc>; 2369defm V_MAX_I32 : VOP2_Real_e32e64_vi <0xd>; 2370defm V_MIN_U32 : VOP2_Real_e32e64_vi <0xe>; 2371defm V_MAX_U32 : VOP2_Real_e32e64_vi <0xf>; 2372defm V_LSHRREV_B32 : VOP2_Real_e32e64_vi <0x10>; 2373defm V_ASHRREV_I32 : VOP2_Real_e32e64_vi <0x11>; 2374defm V_LSHLREV_B32 : VOP2_Real_e32e64_vi <0x12>; 2375defm V_AND_B32 : VOP2_Real_e32e64_vi <0x13>; 2376defm V_OR_B32 : VOP2_Real_e32e64_vi <0x14>; 2377defm V_XOR_B32 : VOP2_Real_e32e64_vi <0x15>; 2378defm V_MAC_F32 : VOP2_Real_e32e64_vi <0x16>; 2379defm V_MADMK_F32 : VOP2_Real_MADK_vi <0x17>; 2380defm V_MADAK_F32 : VOP2_Real_MADK_vi <0x18>; 2381 2382defm V_ADD_U32 : VOP2be_Real_e32e64_vi_only <0x19, "V_ADD_CO_U32", "v_add_u32">; 2383defm V_SUB_U32 : VOP2be_Real_e32e64_vi_only <0x1a, "V_SUB_CO_U32", "v_sub_u32">; 2384defm V_SUBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1b, "V_SUBREV_CO_U32", "v_subrev_u32">; 2385defm V_ADDC_U32 : VOP2be_Real_e32e64_vi_only <0x1c, "V_ADDC_U32", "v_addc_u32">; 2386defm V_SUBB_U32 : VOP2be_Real_e32e64_vi_only <0x1d, "V_SUBB_U32", "v_subb_u32">; 2387defm V_SUBBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1e, "V_SUBBREV_U32", "v_subbrev_u32">; 2388 2389defm V_ADD_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x19, "V_ADD_CO_U32", "v_add_co_u32">; 2390defm V_SUB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1a, "V_SUB_CO_U32", "v_sub_co_u32">; 2391defm V_SUBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1b, "V_SUBREV_CO_U32", "v_subrev_co_u32">; 2392defm V_ADDC_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1c, "V_ADDC_U32", "v_addc_co_u32">; 2393defm V_SUBB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1d, "V_SUBB_U32", "v_subb_co_u32">; 2394defm V_SUBBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1e, "V_SUBBREV_U32", "v_subbrev_co_u32">; 2395 2396defm V_ADD_U32 : VOP2_Real_e32e64_gfx9 <0x34>; 2397defm V_SUB_U32 : VOP2_Real_e32e64_gfx9 <0x35>; 2398defm V_SUBREV_U32 : VOP2_Real_e32e64_gfx9 <0x36>; 2399 2400defm V_BFM_B32 : VOP2_Real_e64only_vi <0x293>; 2401defm V_BCNT_U32_B32 : VOP2_Real_e64only_vi <0x28b>; 2402defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64only_vi <0x28c>; 2403defm V_MBCNT_HI_U32_B32 : VOP2_Real_e64only_vi <0x28d>; 2404defm V_LDEXP_F32 : VOP2_Real_e64only_vi <0x288>; 2405defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64only_vi <0x1f0>; 2406defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64only_vi <0x294>; 2407defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64only_vi <0x295>; 2408defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e64only_vi <0x296>; 2409defm V_CVT_PK_U16_U32 : VOP2_Real_e64only_vi <0x297>; 2410defm V_CVT_PK_I16_I32 : VOP2_Real_e64only_vi <0x298>; 2411 2412defm V_ADD_F16 : VOP2_Real_e32e64_vi <0x1f>; 2413defm V_SUB_F16 : VOP2_Real_e32e64_vi <0x20>; 2414defm V_SUBREV_F16 : VOP2_Real_e32e64_vi <0x21>; 2415defm V_MUL_F16 : VOP2_Real_e32e64_vi <0x22>; 2416defm V_MAC_F16 : VOP2_Real_e32e64_vi <0x23>; 2417defm V_MADMK_F16 : VOP2_Real_MADK_vi <0x24>; 2418defm V_MADAK_F16 : VOP2_Real_MADK_vi <0x25>; 2419defm V_ADD_U16 : VOP2_Real_e32e64_vi <0x26>; 2420defm V_SUB_U16 : VOP2_Real_e32e64_vi <0x27>; 2421defm V_SUBREV_U16 : VOP2_Real_e32e64_vi <0x28>; 2422defm V_MUL_LO_U16 : VOP2_Real_e32e64_vi <0x29>; 2423defm V_LSHLREV_B16 : VOP2_Real_e32e64_vi <0x2a>; 2424defm V_LSHRREV_B16 : VOP2_Real_e32e64_vi <0x2b>; 2425defm V_ASHRREV_I16 : VOP2_Real_e32e64_vi <0x2c>; 2426defm V_MAX_F16 : VOP2_Real_e32e64_vi <0x2d>; 2427defm V_MIN_F16 : VOP2_Real_e32e64_vi <0x2e>; 2428defm V_MAX_U16 : VOP2_Real_e32e64_vi <0x2f>; 2429defm V_MAX_I16 : VOP2_Real_e32e64_vi <0x30>; 2430defm V_MIN_U16 : VOP2_Real_e32e64_vi <0x31>; 2431defm V_MIN_I16 : VOP2_Real_e32e64_vi <0x32>; 2432defm V_LDEXP_F16 : VOP2_Real_e32e64_vi <0x33>; 2433 2434let SubtargetPredicate = isGFX8GFX9 in { 2435 2436// Aliases to simplify matching of floating-point instructions that 2437// are VOP2 on SI and VOP3 on VI. 2438class SI2_VI3Alias <string name, VOP3_Real inst> : InstAlias < 2439 name#" $dst, $src0, $src1", 2440 !if(inst.Pfl.HasOMod, 2441 (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0), 2442 (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0)) 2443>, PredicateControl { 2444 let UseInstAsmMatchConverter = 0; 2445 let AsmVariantName = AMDGPUAsmVariants.VOP3; 2446} 2447 2448def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>; 2449def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>; 2450def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>; 2451def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>; 2452def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>; 2453 2454defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_vi>; 2455 2456} // End SubtargetPredicate = isGFX8GFX9 2457 2458let SubtargetPredicate = isGFX9Only in { 2459 2460defm : VOP2bInstAliases<V_ADD_U32_e32, V_ADD_CO_U32_e32_gfx9, "v_add_co_u32">; 2461defm : VOP2bInstAliases<V_ADDC_U32_e32, V_ADDC_CO_U32_e32_gfx9, "v_addc_co_u32">; 2462defm : VOP2bInstAliases<V_SUB_U32_e32, V_SUB_CO_U32_e32_gfx9, "v_sub_co_u32">; 2463defm : VOP2bInstAliases<V_SUBB_U32_e32, V_SUBB_CO_U32_e32_gfx9, "v_subb_co_u32">; 2464defm : VOP2bInstAliases<V_SUBREV_U32_e32, V_SUBREV_CO_U32_e32_gfx9, "v_subrev_co_u32">; 2465defm : VOP2bInstAliases<V_SUBBREV_U32_e32, V_SUBBREV_CO_U32_e32_gfx9, "v_subbrev_co_u32">; 2466 2467} // End SubtargetPredicate = isGFX9Only 2468 2469let SubtargetPredicate = HasDLInsts in { 2470 2471defm V_FMAC_F32 : VOP2_Real_e32e64_vi <0x3b>; 2472defm V_XNOR_B32 : VOP2_Real_e32e64_vi <0x3d>; 2473 2474} // End SubtargetPredicate = HasDLInsts 2475 2476let AssemblerPredicate = isGFX90APlus, DecoderNamespace = "GFX90A" in { 2477 multiclass VOP2_Real_e32_gfx90a <bits<6> op> { 2478 def _e32_gfx90a : 2479 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX90A>, 2480 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 2481 } 2482 2483 multiclass VOP2_Real_e64_gfx90a <bits<10> op> { 2484 def _e64_gfx90a : 2485 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX90A>, 2486 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 2487 } 2488 2489 multiclass Base_VOP2_Real_e32e64_gfx90a <bits<6> op> : 2490 VOP2_Real_e32_gfx90a<op>, 2491 VOP2_Real_e64_gfx90a<{0, 1, 0, 0, op{5-0}}>; 2492 2493 multiclass VOP2_Real_e32e64_gfx90a <bits<6> op> : 2494 Base_VOP2_Real_e32e64_gfx90a<op> { 2495 2496 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 2497 def _dpp_gfx90a : 2498 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX90A>, 2499 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> { 2500 let DecoderNamespace = "GFX9"; 2501 } 2502 } 2503} // End AssemblerPredicate = isGFX90APlus, DecoderNamespace = "GFX90A" 2504 2505let SubtargetPredicate = HasFmacF64Inst in { 2506 defm V_FMAC_F64 : VOP2_Real_e32e64_gfx90a <0x4>; 2507} // End SubtargetPredicate = HasFmacF64Inst 2508 2509let SubtargetPredicate = isGFX90APlus, IsSingle = 1 in { 2510 defm V_MUL_LEGACY_F32 : VOP2_Real_e64_gfx90a <0x2a1>; 2511} 2512 2513let SubtargetPredicate = HasFmaakFmamkF32Insts in { 2514defm V_FMAMK_F32 : VOP2_Real_MADK_gfx940 <0x17>; 2515defm V_FMAAK_F32 : VOP2_Real_MADK_gfx940 <0x18>; 2516} 2517 2518multiclass VOP2_Real_DOT_ACC_gfx9<bits<6> op> : Base_VOP2_Real_e32e64_vi<op> { 2519 let SubtargetPredicate = isGFX9Only in 2520 def _dpp_vi : VOP2_DPP<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>; 2521} 2522 2523multiclass VOP2_Real_DOT_ACC_gfx10<bits<6> op> : 2524 VOP2_Real_e32_gfx10<op>, 2525 VOP2_Real_dpp_gfx10<op>, 2526 VOP2_Real_dpp8_gfx10<op>; 2527 2528multiclass VOP2Only_Real_DOT_ACC_gfx10<bits<6> op> : VOP2_Real_dpp_gfx10<op>, 2529 VOP2_Real_dpp8_gfx10<op> { 2530 let IsSingle = 1 in 2531 defm NAME : VOP2_Real_e32_gfx10<op>; 2532} 2533 2534let OtherPredicates = [HasDot5Insts] in { 2535 defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx9<0x37>; 2536 // NB: Opcode conflicts with V_DOT8C_I32_I4 2537 // This opcode exists in gfx 10.1* only 2538 defm V_DOT2C_F32_F16 : VOP2Only_Real_DOT_ACC_gfx10<0x02>; 2539} 2540 2541let OtherPredicates = [HasDot6Insts] in { 2542 defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx9<0x39>; 2543 defm V_DOT4C_I32_I8 : VOP2Only_Real_DOT_ACC_gfx10<0x0d>; 2544} 2545 2546let OtherPredicates = [HasDot4Insts] in { 2547 defm V_DOT2C_I32_I16 : VOP2_Real_DOT_ACC_gfx9<0x38>; 2548} 2549let OtherPredicates = [HasDot3Insts] in { 2550 defm V_DOT8C_I32_I4 : VOP2_Real_DOT_ACC_gfx9<0x3a>; 2551} 2552 2553let SubtargetPredicate = HasPkFmacF16Inst in { 2554defm V_PK_FMAC_F16 : VOP2_Real_e32_vi<0x3c>; 2555} // End SubtargetPredicate = HasPkFmacF16Inst 2556 2557let SubtargetPredicate = HasDot3Insts in { 2558 // NB: Opcode conflicts with V_DOT2C_F32_F16 2559 let DecoderNamespace = "GFX10_B" in 2560 defm V_DOT8C_I32_I4 : VOP2_Real_DOT_ACC_gfx10<0x02>; 2561} 2562