1//===-- VOP2Instructions.td - Vector Instruction Definitions --------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10// VOP2 Classes 11//===----------------------------------------------------------------------===// 12 13class VOP2e <bits<6> op, VOPProfile P> : Enc32 { 14 bits<8> vdst; 15 bits<9> src0; 16 bits<8> src1; 17 18 let Inst{8-0} = !if(P.HasSrc0, src0, 0); 19 let Inst{16-9} = !if(P.HasSrc1, src1, 0); 20 let Inst{24-17} = !if(P.EmitDst, vdst, 0); 21 let Inst{30-25} = op; 22 let Inst{31} = 0x0; //encoding 23} 24 25class VOP2_MADKe <bits<6> op, VOPProfile P> : Enc64 { 26 bits<8> vdst; 27 bits<9> src0; 28 bits<8> src1; 29 bits<32> imm; 30 31 let Inst{8-0} = !if(P.HasSrc0, src0, 0); 32 let Inst{16-9} = !if(P.HasSrc1, src1, 0); 33 let Inst{24-17} = !if(P.EmitDst, vdst, 0); 34 let Inst{30-25} = op; 35 let Inst{31} = 0x0; // encoding 36 let Inst{63-32} = imm; 37} 38 39class VOP2_SDWAe <bits<6> op, VOPProfile P> : VOP_SDWAe <P> { 40 bits<8> vdst; 41 bits<8> src1; 42 43 let Inst{8-0} = 0xf9; // sdwa 44 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 45 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 46 let Inst{30-25} = op; 47 let Inst{31} = 0x0; // encoding 48} 49 50class VOP2_SDWA9Ae <bits<6> op, VOPProfile P> : VOP_SDWA9Ae <P> { 51 bits<8> vdst; 52 bits<9> src1; 53 54 let Inst{8-0} = 0xf9; // sdwa 55 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 56 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 57 let Inst{30-25} = op; 58 let Inst{31} = 0x0; // encoding 59 let Inst{63} = !if(P.HasSrc1, src1{8}, 0); // src1_sgpr 60} 61 62class VOP2_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], string suffix = "_e32"> : 63 VOP_Pseudo <opName, suffix, P, P.Outs32, P.Ins32, "", pattern> { 64 65 let AsmOperands = P.Asm32; 66 67 let Size = 4; 68 let mayLoad = 0; 69 let mayStore = 0; 70 let hasSideEffects = 0; 71 72 let ReadsModeReg = !or(isFloatType<P.DstVT>.ret, isFloatType<P.Src0VT>.ret); 73 74 let mayRaiseFPException = ReadsModeReg; 75 76 let VOP2 = 1; 77 let VALU = 1; 78 let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]); 79 80 let AsmVariantName = AMDGPUAsmVariants.Default; 81} 82 83class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemonic> : 84 VOP_Real <ps>, 85 InstSI <ps.OutOperandList, ps.InOperandList, real_name # ps.AsmOperands, []>, 86 SIMCInstr <ps.PseudoInstr, EncodingFamily> { 87 88 let VALU = 1; 89 let VOP2 = 1; 90 let isPseudo = 0; 91 let isCodeGenOnly = 0; 92 93 let Constraints = ps.Constraints; 94 let DisableEncoding = ps.DisableEncoding; 95 96 // copy relevant pseudo op flags 97 let SubtargetPredicate = ps.SubtargetPredicate; 98 let OtherPredicates = ps.OtherPredicates; 99 let AsmMatchConverter = ps.AsmMatchConverter; 100 let AsmVariantName = ps.AsmVariantName; 101 let Constraints = ps.Constraints; 102 let DisableEncoding = ps.DisableEncoding; 103 let TSFlags = ps.TSFlags; 104 let UseNamedOperandTable = ps.UseNamedOperandTable; 105 let Uses = ps.Uses; 106 let Defs = ps.Defs; 107 let SchedRW = ps.SchedRW; 108 let mayLoad = ps.mayLoad; 109 let mayStore = ps.mayStore; 110} 111 112class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 113 VOP_SDWA_Pseudo <OpName, P, pattern> { 114 let AsmMatchConverter = "cvtSdwaVOP2"; 115} 116 117class VOP2_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 118 VOP_DPP_Pseudo <OpName, P, pattern> { 119} 120 121 122class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { 123 list<dag> ret = !if(P.HasModifiers, 124 [(set P.DstVT:$vdst, 125 (node (P.Src0VT 126 !if(P.HasOMod, 127 (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), 128 (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))), 129 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], 130 [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]); 131} 132 133multiclass VOP2Inst_e32<string opName, 134 VOPProfile P, 135 SDPatternOperator node = null_frag, 136 string revOp = opName, 137 bit GFX9Renamed = 0> { 138 let renamedInGFX9 = GFX9Renamed in { 139 def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>, 140 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>; 141 } // End renamedInGFX9 = GFX9Renamed 142} 143multiclass 144 VOP2Inst_e32_VOPD<string opName, VOPProfile P, bits<5> VOPDOp, 145 string VOPDName, SDPatternOperator node = null_frag, 146 string revOp = opName, bit GFX9Renamed = 0> { 147 defm NAME : VOP2Inst_e32<opName, P, node, revOp, GFX9Renamed>, 148 VOPD_Component<VOPDOp, VOPDName>; 149} 150multiclass VOP2Inst_e64<string opName, 151 VOPProfile P, 152 SDPatternOperator node = null_frag, 153 string revOp = opName, 154 bit GFX9Renamed = 0> { 155 let renamedInGFX9 = GFX9Renamed in { 156 def _e64 : VOP3InstBase <opName, P, node, 1>, 157 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; 158 159 let SubtargetPredicate = isGFX11Plus in { 160 if P.HasExtVOP3DPP then 161 def _e64_dpp : VOP3_DPP_Pseudo <opName, P>; 162 } // End SubtargetPredicate = isGFX11Plus 163 } // End renamedInGFX9 = GFX9Renamed 164} 165 166multiclass VOP2Inst_sdwa<string opName, 167 VOPProfile P, 168 bit GFX9Renamed = 0> { 169 let renamedInGFX9 = GFX9Renamed in { 170 if P.HasExtSDWA then 171 def _sdwa : VOP2_SDWA_Pseudo <opName, P>; 172 } // End renamedInGFX9 = GFX9Renamed 173} 174 175multiclass VOP2Inst<string opName, 176 VOPProfile P, 177 SDPatternOperator node = null_frag, 178 string revOp = opName, 179 bit GFX9Renamed = 0> : 180 VOP2Inst_e32<opName, P, node, revOp, GFX9Renamed>, 181 VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>, 182 VOP2Inst_sdwa<opName, P, GFX9Renamed> { 183 let renamedInGFX9 = GFX9Renamed in { 184 if P.HasExtDPP then 185 def _dpp : VOP2_DPP_Pseudo <opName, P>; 186 } 187} 188 189multiclass VOP2Inst_t16<string opName, 190 VOPProfile P, 191 SDPatternOperator node = null_frag, 192 string revOp = opName, 193 bit GFX9Renamed = 0> { 194 let SubtargetPredicate = NotHasTrue16BitInsts, OtherPredicates = [Has16BitInsts] in { 195 defm NAME : VOP2Inst<opName, P, node, revOp, GFX9Renamed>; 196 } 197 let SubtargetPredicate = HasTrue16BitInsts in { 198 defm _t16 : VOP2Inst<opName#"_t16", VOPProfile_True16<P>, node, revOp#"_t16", GFX9Renamed>; 199 } 200} 201 202// Creating a _t16_e32 pseudo when there is no corresponding real instruction on 203// any subtarget is a problem. It makes getMCOpcodeGen return -1, which we 204// assume means the instruction is already a real. The fix is to not create that 205// _t16_e32 pseudo 206multiclass VOP2Inst_e64_t16<string opName, 207 VOPProfile P, 208 SDPatternOperator node = null_frag, 209 string revOp = opName, 210 bit GFX9Renamed = 0> { 211 let SubtargetPredicate = NotHasTrue16BitInsts, OtherPredicates = [Has16BitInsts] in { 212 defm NAME : VOP2Inst<opName, P, node, revOp, GFX9Renamed>; 213 } 214 let SubtargetPredicate = HasTrue16BitInsts in { 215 defm _t16 : VOP2Inst_e64<opName#"_t16", VOPProfile_True16<P>, node, revOp#"_t16", GFX9Renamed>; 216 } 217} 218 219multiclass VOP2Inst_VOPD<string opName, 220 VOPProfile P, 221 bits<5> VOPDOp, 222 string VOPDName, 223 SDPatternOperator node = null_frag, 224 string revOp = opName, 225 bit GFX9Renamed = 0> : 226 VOP2Inst_e32_VOPD<opName, P, VOPDOp, VOPDName, node, revOp, GFX9Renamed>, 227 VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>, 228 VOP2Inst_sdwa<opName, P, GFX9Renamed> { 229 let renamedInGFX9 = GFX9Renamed in { 230 if P.HasExtDPP then 231 def _dpp : VOP2_DPP_Pseudo <opName, P>; 232 } 233} 234 235multiclass VOP2bInst <string opName, 236 VOPProfile P, 237 SDPatternOperator node = null_frag, 238 string revOp = opName, 239 bit GFX9Renamed = 0, 240 bit useSGPRInput = !eq(P.NumSrcArgs, 3)> { 241 let renamedInGFX9 = GFX9Renamed in { 242 let SchedRW = [Write32Bit, WriteSALU] in { 243 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in { 244 def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>, 245 Commutable_REV<revOp#"_e32", !eq(revOp, opName)> { 246 let usesCustomInserter = true; 247 } 248 249 if P.HasExtSDWA then 250 def _sdwa : VOP2_SDWA_Pseudo <opName, P> { 251 let AsmMatchConverter = "cvtSdwaVOP2b"; 252 } 253 if P.HasExtDPP then 254 def _dpp : VOP2_DPP_Pseudo <opName, P>; 255 } // End Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] 256 257 def _e64 : VOP3InstBase <opName, P, node, 1>, 258 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; 259 260 let SubtargetPredicate = isGFX11Plus in { 261 if P.HasExtVOP3DPP then 262 def _e64_dpp : VOP3_DPP_Pseudo <opName, P>; 263 } // End SubtargetPredicate = isGFX11Plus 264 } 265 } 266} 267 268class VOP2bInstAlias <VOP2_Pseudo ps, Instruction inst, 269 string OpName, string opnd> : 270 InstAlias <OpName#" "#!subst("vcc", opnd, ps.Pfl.Asm32), 271 (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0, 272 ps.Pfl.Src1RC32:$src1), 273 1, inst.AsmVariantName>, 274 PredicateControl { 275} 276 277multiclass VOP2bInstAliases<VOP2_Pseudo ps, VOP2_Real inst, string OpName> { 278 let WaveSizePredicate = isWave32 in { 279 def : VOP2bInstAlias<ps, inst, OpName, "vcc_lo">; 280 } 281 let WaveSizePredicate = isWave64 in { 282 def : VOP2bInstAlias<ps, inst, OpName, "vcc">; 283 } 284} 285 286multiclass 287 VOP2eInst_Base<string opName, VOPProfile P, bits<5> VOPDOp, string VOPDName, 288 SDPatternOperator node, string revOp, bit useSGPRInput> { 289 290 let SchedRW = [Write32Bit] in { 291 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in { 292 if !eq(VOPDOp, -1) then 293 def _e32 : VOP2_Pseudo <opName, P>, 294 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>; 295 else 296 def _e32 : VOP2_Pseudo <opName, P>, 297 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>, 298 VOPD_Component<VOPDOp, VOPDName>; 299 300 if P.HasExtSDWA then 301 def _sdwa : VOP2_SDWA_Pseudo <opName, P> { 302 let AsmMatchConverter = "cvtSdwaVOP2e"; 303 } 304 305 if P.HasExtDPP then 306 def _dpp : VOP2_DPP_Pseudo <opName, P>; 307 } 308 309 def _e64 : VOP3InstBase <opName, P, node, 1>, 310 Commutable_REV<revOp#"_e64", !eq(revOp, opName)> { 311 let isReMaterializable = 1; 312 } 313 314 let SubtargetPredicate = isGFX11Plus in { 315 if P.HasExtVOP3DPP then 316 def _e64_dpp : VOP3_DPP_Pseudo <opName, P>; 317 } // End SubtargetPredicate = isGFX11Plus 318 } 319} 320 321multiclass 322 VOP2eInst<string opName, VOPProfile P, SDPatternOperator node = null_frag, 323 string revOp = opName, bit useSGPRInput = !eq(P.NumSrcArgs, 3)> 324 : VOP2eInst_Base<opName, P, -1, "", node, revOp, useSGPRInput>; 325 326multiclass 327 VOP2eInst_VOPD<string opName, VOPProfile P, bits<5> VOPDOp, string VOPDName, 328 SDPatternOperator node = null_frag, string revOp = opName, 329 bit useSGPRInput = !eq(P.NumSrcArgs, 3)> 330 : VOP2eInst_Base<opName, P, VOPDOp, VOPDName, node, revOp, useSGPRInput>; 331 332class VOP2eInstAlias <VOP2_Pseudo ps, Instruction inst, string opnd = ""> : 333 InstAlias <ps.OpName#" "#ps.Pfl.Asm32#", "#opnd, 334 (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0, 335 ps.Pfl.Src1RC32:$src1), 336 1, inst.AsmVariantName>, 337 PredicateControl; 338 339class VOP2e64InstAlias <VOP3_Pseudo ps, Instruction inst> : 340 InstAlias <ps.OpName#" "#ps.Pfl.Asm64, 341 (inst ps.Pfl.DstRC:$vdst, VOPDstS64orS32:$sdst, 342 ps.Pfl.Src0RC32:$src0, ps.Pfl.Src1RC32:$src1, clampmod:$clamp), 343 1, inst.AsmVariantName>, 344 PredicateControl; 345 346multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> { 347 let WaveSizePredicate = isWave32 in { 348 def : VOP2eInstAlias<ps, inst, "vcc_lo">; 349 } 350 let WaveSizePredicate = isWave64 in { 351 def : VOP2eInstAlias<ps, inst, "vcc">; 352 } 353} 354 355class VOP_MADK_Base<ValueType vt> : VOPProfile <[vt, vt, vt, vt]> { 356 string AsmVOPDXDeferred = ?; 357} 358 359class VOP_MADAK <ValueType vt> : VOP_MADK_Base<vt> { 360 field Operand ImmOpType = !if(!eq(vt.Size, 32), KImmFP32, KImmFP16); 361 field dag Ins32 = !if(!eq(vt.Size, 32), 362 (ins VSrc_f32_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm), 363 (ins VSrc_f16_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm)); 364 field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, VGPR_32:$vsrc1X, ImmOpType:$imm); 365 // Note that both src0X and imm are deferred 366 let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, VGPR_32:$vsrc1X, ImmOpType:$immDeferred); 367 field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, VGPR_32:$vsrc1Y, ImmOpType:$imm); 368 369 field string Asm32 = "$vdst, $src0, $src1, $imm"; 370 field string AsmVOPDX = "$vdstX, $src0X, $vsrc1X, $imm"; 371 let AsmVOPDXDeferred = "$vdstX, $src0X, $vsrc1X, $immDeferred"; 372 field string AsmVOPDY = "$vdstY, $src0Y, $vsrc1Y, $imm"; 373 field bit HasExt = 0; 374 let IsSingle = 1; 375} 376 377def VOP_MADAK_F16 : VOP_MADAK <f16>; 378def VOP_MADAK_F16_t16 : VOP_MADAK <f16> { 379 let IsTrue16 = 1; 380 let DstRC = VOPDstOperand<VGPR_32_Lo128>; 381 let Ins32 = (ins VSrcT_f16_Lo128_Deferred:$src0, VGPR_32_Lo128:$src1, ImmOpType:$imm); 382} 383def VOP_MADAK_F32 : VOP_MADAK <f32>; 384 385class VOP_MADMK <ValueType vt> : VOP_MADK_Base<vt> { 386 field Operand ImmOpType = !if(!eq(vt.Size, 32), KImmFP32, KImmFP16); 387 field dag Ins32 = !if(!eq(vt.Size, 32), 388 (ins VSrc_f32_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1), 389 (ins VSrc_f16_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1)); 390 field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$imm, VGPR_32:$vsrc1X); 391 let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$immDeferred, VGPR_32:$vsrc1X); 392 field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, ImmOpType:$imm, VGPR_32:$vsrc1Y); 393 394 field string Asm32 = "$vdst, $src0, $imm, $src1"; 395 field string AsmVOPDX = "$vdstX, $src0X, $imm, $vsrc1X"; 396 let AsmVOPDXDeferred = "$vdstX, $src0X, $immDeferred, $vsrc1X"; 397 field string AsmVOPDY = "$vdstY, $src0Y, $imm, $vsrc1Y"; 398 field bit HasExt = 0; 399 let IsSingle = 1; 400} 401 402def VOP_MADMK_F16 : VOP_MADMK <f16>; 403def VOP_MADMK_F16_t16 : VOP_MADMK <f16> { 404 let IsTrue16 = 1; 405 let DstRC = VOPDstOperand<VGPR_32_Lo128>; 406 let Ins32 = (ins VSrcT_f16_Lo128_Deferred:$src0, ImmOpType:$imm, VGPR_32_Lo128:$src1); 407} 408def VOP_MADMK_F32 : VOP_MADMK <f32>; 409 410class getRegisterOperandForVT<ValueType VT> { 411 RegisterOperand ret = RegisterOperand<getVregSrcForVT<VT>.ret>; 412} 413 414// FIXME: Remove src2_modifiers. It isn't used, so is wasting memory 415// and processing time but it makes it easier to convert to mad. 416class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, vt0]> { 417 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT<Src2VT>.ret:$src2); 418 let Ins64 = getIns64<Src0RC64, Src1RC64, getRegisterOperandForVT<Src2VT>.ret, 3, 419 0, HasModifiers, HasModifiers, HasOMod, 420 Src0Mod, Src1Mod, Src2Mod>.ret; 421 let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 422 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 423 getVregSrcForVT<Src2VT>.ret:$src2, // stub argument 424 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 425 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 426 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 427 let InsVOP3Base = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, RegisterOperand<VGPR_32>, 3, 428 0, HasModifiers, HasModifiers, HasOMod, 429 Src0ModVOP3DPP, Src1ModVOP3DPP, Src2Mod, HasOpSel, 0/*IsVOP3P*/>.ret; 430 // We need a dummy src2 tied to dst to track the use of that register for s_delay_alu 431 let InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X, VGPRSrc_32:$src2X); 432 let InsVOPDXDeferred = 433 (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X, 434 VGPR_32:$vsrc1X, VGPRSrc_32:$src2X); 435 let InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y, VGPRSrc_32:$src2Y); 436 let InsVOPDYDeferred = 437 (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y, 438 VGPR_32:$vsrc1Y, VGPRSrc_32:$src2Y); 439 440 let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 441 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 442 getVregSrcForVT<Src2VT>.ret:$src2, // stub argument 443 dpp8:$dpp8, FI:$fi); 444 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 445 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, 446 getVregSrcForVT<Src2VT>.ret:$src2, // stub argument 447 clampmod:$clamp, omod:$omod, 448 dst_sel:$dst_sel, dst_unused:$dst_unused, 449 src0_sel:$src0_sel, src1_sel:$src1_sel); 450 let Asm32 = getAsm32<1, 2, vt0>.ret; 451 let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt0>.ret; 452 let AsmDPP16 = getAsmDPP16<1, 2, HasModifiers, vt0>.ret; 453 let AsmDPP8 = getAsmDPP8<1, 2, 0, vt0>.ret; 454 let AsmSDWA = getAsmSDWA<1, 2, vt0>.ret; 455 let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt0>.ret; 456 let AsmVOP3Base = 457 getAsmVOP3Base<2 /*NumSrcArgs*/, HasDst, HasClamp, 458 HasOpSel, HasOMod, IsVOP3P, HasModifiers, 459 HasModifiers, HasModifiers, 460 0 /*Src2HasMods*/, DstVT>.ret; 461 let HasSrc2 = 0; 462 let HasSrc2Mods = 0; 463 464 let HasExt = 1; 465 let HasExtDPP = 1; 466 let HasExt32BitDPP = 1; 467 let HasExtSDWA = 1; 468 let HasExtSDWA9 = 0; 469 let TieRegDPP = "$src2"; 470} 471 472def VOP_MAC_F16 : VOP_MAC <f16>; 473def VOP_MAC_F16_t16 : VOP_MAC <f16> { 474 let IsTrue16 = 1; 475 let HasOpSel = 1; 476 let AsmVOP3OpSel = getAsmVOP3OpSel<2/*NumSrcArgs*/, HasClamp, HasOMod, 477 HasSrc0FloatMods, HasSrc1FloatMods, HasSrc2FloatMods>.ret; 478 let DstRC = VOPDstOperand<VGPR_32_Lo128>; 479 let DstRC64 = VOPDstOperand<VGPR_32>; 480 let Src1RC32 = VGPRSrc_32_Lo128; 481 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT_t16<Src2VT>.ret:$src2); 482 let Src0DPP = getVregSrcForVT_t16<Src0VT>.ret; 483 let Src1DPP = getVregSrcForVT_t16<Src1VT>.ret; 484 let Src2DPP = getVregSrcForVT_t16<Src2VT>.ret; 485 let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret; 486 let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret; 487 let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret; 488 let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 489 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 490 getVregSrcForVT_t16<Src2VT>.ret:$src2, // stub argument 491 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 492 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 493 let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 494 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 495 getVregSrcForVT_t16<Src2VT>.ret:$src2, // stub argument 496 dpp8:$dpp8, FI:$fi); 497 let Src2Mod = FP32InputMods; // dummy unused modifiers 498 let Src2RC64 = VGPRSrc_32; // stub argument 499} 500def VOP_MAC_F32 : VOP_MAC <f32>; 501let HasExtDPP = 0, HasExt32BitDPP = 0 in 502def VOP_MAC_LEGACY_F32 : VOP_MAC <f32>; 503let HasExtSDWA = 0, HasExt32BitDPP = 0, HasExt64BitDPP = 1 in 504def VOP_MAC_F64 : VOP_MAC <f64>; 505 506class VOP_DOT_ACC<ValueType vt0, ValueType vt1> : VOP_MAC<vt0, vt1> { 507 let HasClamp = 0; 508 let HasExtSDWA = 0; 509 let HasOpSel = 0; 510 let IsPacked = 0; 511} 512 513def VOP_DOT_ACC_F32_V2F16 : VOP_DOT_ACC<f32, v2f16> { 514 let Src0ModDPP = FPVRegInputMods; 515 let Src1ModDPP = FPVRegInputMods; 516 let HasClamp = 1; 517} 518 519def VOP_DOT_ACC_I32_I32 : VOP_DOT_ACC<i32, i32> { 520 let HasExtVOP3DPP = 0; 521 let HasSrc0Mods = 1; 522 let HasSrc1Mods = 1; 523 let HasClamp = 1; 524 525 let Src0Mod = Int32InputMods; 526 let Src1Mod = Int32InputMods; 527 let Ins64 = getIns64<Src0RC64, Src1RC64, getRegisterOperandForVT<Src2VT>.ret, 528 3 /*NumSrcArgs*/, HasClamp, 1 /*HasModifiers*/, 529 1 /*HasSrc2Mods*/, HasOMod, 530 Src0Mod, Src1Mod, Src2Mod>.ret; 531 let Asm64 = "$vdst, $src0, $src1$clamp"; 532} 533 534// Write out to vcc or arbitrary SGPR. 535def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], /*EnableClamp=*/1> { 536 let Asm32 = "$vdst, vcc, $src0, $src1"; 537 let AsmVOP3Base = "$vdst, $sdst, $src0, $src1$clamp"; 538 let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 539 let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 540 let AsmDPP = "$vdst, vcc, $src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 541 let AsmDPP8 = "$vdst, vcc, $src0, $src1 $dpp8$fi"; 542 let AsmDPP16 = AsmDPP#"$fi"; 543 let InsDPP = (ins DstRCDPP:$old, 544 Src0DPP:$src0, 545 Src1DPP:$src1, 546 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 547 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 548 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 549 let InsDPP8 = (ins DstRCDPP:$old, 550 Src0DPP:$src0, 551 Src1DPP:$src1, 552 dpp8:$dpp8, FI:$fi); 553 let Outs32 = (outs DstRC:$vdst); 554 let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); 555 let OutsVOP3DPP = Outs64; 556 let OutsVOP3DPP8 = Outs64; 557} 558 559// Write out to vcc or arbitrary SGPR and read in from vcc or 560// arbitrary SGPR. 561def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableClamp=*/1> { 562 let HasSrc2Mods = 0; 563 let Asm32 = "$vdst, vcc, $src0, $src1, vcc"; 564 let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 565 let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 566 let AsmDPP = "$vdst, vcc, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 567 let AsmDPP8 = "$vdst, vcc, $src0, $src1, vcc $dpp8$fi"; 568 let AsmDPP16 = AsmDPP#"$fi"; 569 let Outs32 = (outs DstRC:$vdst); 570 let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); 571 let AsmVOP3Base = "$vdst, $sdst, $src0, $src1, $src2$clamp"; 572 let OutsVOP3DPP = Outs64; 573 let OutsVOP3DPP8 = Outs64; 574 575 // Suppress src2 implied by type since the 32-bit encoding uses an 576 // implicit VCC use. 577 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); 578 579 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 580 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, 581 clampmod:$clamp, 582 dst_sel:$dst_sel, dst_unused:$dst_unused, 583 src0_sel:$src0_sel, src1_sel:$src1_sel); 584 585 let InsDPP = (ins DstRCDPP:$old, 586 Src0DPP:$src0, 587 Src1DPP:$src1, 588 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 589 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 590 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 591 let InsDPP8 = (ins DstRCDPP:$old, 592 Src0DPP:$src0, 593 Src1DPP:$src1, 594 dpp8:$dpp8, FI:$fi); 595 596 let HasExt = 1; 597 let HasExtDPP = 1; 598 let HasExt32BitDPP = 1; 599 let HasExtSDWA = 1; 600 let HasExtSDWA9 = 1; 601} 602 603// Read in from vcc or arbitrary SGPR. 604class VOP2e_SGPR<list<ValueType> ArgVT> : VOPProfile<ArgVT> { 605 let Asm32 = "$vdst, $src0, $src1"; 606 let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 607 let AsmSDWA9 = "$vdst, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 608 let AsmDPP = "$vdst, $src0_modifiers, $src1_modifiers, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 609 let AsmDPP8 = "$vdst, $src0, $src1, vcc $dpp8$fi"; 610 let AsmDPP16 = AsmDPP#"$fi"; 611 let AsmVOP3Base = "$vdst, $src0_modifiers, $src1_modifiers, $src2"; 612 613 let Outs32 = (outs DstRC:$vdst); 614 let Outs64 = (outs DstRC:$vdst); 615 616 // Suppress src2 implied by type since the 32-bit encoding uses an 617 // implicit VCC use. 618 let Ins32 = (ins VSrc_f32:$src0, Src1RC32:$src1); 619 620 let HasModifiers = 1; 621 622 // Select FP modifiers for VOP3 623 let Src0Mod = !if(!eq(Src0VT.Size, 16), FP16InputMods, FP32InputMods); 624 let Src1Mod = Src0Mod; 625 626 let HasSrc0IntMods = 0; 627 let HasSrc1IntMods = 0; 628 let HasSrc0FloatMods = 1; 629 let HasSrc1FloatMods = 1; 630 let InsSDWA = (ins FP32SDWAInputMods:$src0_modifiers, SDWASrc_f32:$src0, 631 FP32SDWAInputMods:$src1_modifiers, SDWASrc_f32:$src1, 632 clampmod:$clamp, 633 dst_sel:$dst_sel, dst_unused:$dst_unused, 634 src0_sel:$src0_sel, src1_sel:$src1_sel); 635 636 let InsDPP = (ins DstRCDPP:$old, 637 FPVRegInputMods:$src0_modifiers, Src0DPP:$src0, 638 FPVRegInputMods:$src1_modifiers, Src1DPP:$src1, 639 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 640 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 641 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 642 let InsDPP8 = (ins DstRCDPP:$old, 643 FPVRegInputMods:$src0_modifiers, Src0DPP:$src0, 644 FPVRegInputMods:$src1_modifiers, Src1DPP:$src1, 645 dpp8:$dpp8, FI:$fi); 646 647 let Src0ModVOP3DPP = FPVRegInputMods; 648 let Src1ModVOP3DPP = FPVRegInputMods; 649 650 let HasExt = 1; 651 let HasExtDPP = 1; 652 let HasExt32BitDPP = 1; 653 let HasExtSDWA = 1; 654 let HasExtSDWA9 = 1; 655} 656 657def VOP2e_I32_I32_I32_I1 : VOP2e_SGPR<[i32, i32, i32, i1]>; 658def VOP2e_I16_I16_I16_I1 : VOP2e_SGPR<[i16, i16, i16, i1]>; 659 660def VOP_READLANE : VOPProfile<[i32, i32, i32, untyped]> { 661 let Outs32 = (outs SReg_32:$vdst); 662 let Outs64 = Outs32; 663 let Ins32 = (ins VRegOrLdsSrc_32:$src0, SCSrc_b32:$src1); 664 let Ins64 = Ins32; 665 let Asm32 = " $vdst, $src0, $src1"; 666 let Asm64 = Asm32; 667 668 let HasExt = 0; 669 let HasExtDPP = 0; 670 let HasExt32BitDPP = 0; 671 let HasExt64BitDPP = 0; 672 let HasExtSDWA = 0; 673 let HasExtSDWA9 = 0; 674} 675 676def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> { 677 let Outs32 = (outs VGPR_32:$vdst); 678 let Outs64 = Outs32; 679 let Ins32 = (ins SCSrc_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in); 680 let Ins64 = Ins32; 681 let Asm32 = " $vdst, $src0, $src1"; 682 let Asm64 = Asm32; 683 let HasSrc2 = 0; 684 let HasSrc2Mods = 0; 685 686 let HasExt = 0; 687 let HasExtDPP = 0; 688 let HasExt32BitDPP = 0; 689 let HasExt64BitDPP = 0; 690 let HasExtSDWA = 0; 691 let HasExtSDWA9 = 0; 692} 693 694//===----------------------------------------------------------------------===// 695// VOP2 Instructions 696//===----------------------------------------------------------------------===// 697 698let SubtargetPredicate = isGFX11Plus in 699defm V_CNDMASK_B16 : VOP2eInst <"v_cndmask_b16", VOP2e_I16_I16_I16_I1>; 700defm V_CNDMASK_B32 : VOP2eInst_VOPD <"v_cndmask_b32", VOP2e_I32_I32_I32_I1, 0x9, "v_cndmask_b32">; 701let SubtargetPredicate = HasMadMacF32Insts, isReMaterializable = 1 in 702def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>; 703 704let isCommutable = 1 in { 705let isReMaterializable = 1 in { 706defm V_ADD_F32 : VOP2Inst_VOPD <"v_add_f32", VOP_F32_F32_F32, 0x4, "v_add_f32", any_fadd>; 707defm V_SUB_F32 : VOP2Inst_VOPD <"v_sub_f32", VOP_F32_F32_F32, 0x5, "v_sub_f32", any_fsub>; 708defm V_SUBREV_F32 : VOP2Inst_VOPD <"v_subrev_f32", VOP_F32_F32_F32, 0x6, "v_subrev_f32", null_frag, "v_sub_f32">; 709defm V_MUL_LEGACY_F32 : VOP2Inst_VOPD <"v_mul_legacy_f32", VOP_F32_F32_F32, 0x7, "v_mul_dx9_zero_f32", AMDGPUfmul_legacy>; 710defm V_MUL_F32 : VOP2Inst_VOPD <"v_mul_f32", VOP_F32_F32_F32, 0x3, "v_mul_f32", any_fmul>; 711defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32_ARITH, AMDGPUmul_i24>; 712defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_I32_I32_I32, AMDGPUmulhi_i24>; 713defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32_ARITH, AMDGPUmul_u24>; 714defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>; 715defm V_MIN_F32 : VOP2Inst_VOPD <"v_min_f32", VOP_F32_F32_F32, 0xb, "v_min_f32", fminnum_like>; 716defm V_MAX_F32 : VOP2Inst_VOPD <"v_max_f32", VOP_F32_F32_F32, 0xa, "v_max_f32", fmaxnum_like>; 717defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smin>; 718defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smax>; 719defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>; 720defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umax>; 721defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, clshr_rev_32, "v_lshr_b32">; 722defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, cashr_rev_32, "v_ashr_i32">; 723defm V_LSHLREV_B32 : VOP2Inst_VOPD <"v_lshlrev_b32", VOP_I32_I32_I32, 0x11, "v_lshlrev_b32", clshl_rev_32, "v_lshl_b32">; 724defm V_AND_B32 : VOP2Inst_VOPD <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, 0x12, "v_and_b32", and>; 725defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>; 726defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, xor>; 727} // End isReMaterializable = 1 728 729let mayRaiseFPException = 0 in { 730let OtherPredicates = [HasMadMacF32Insts] in { 731let Constraints = "$vdst = $src2", DisableEncoding="$src2", 732 isConvertibleToThreeAddress = 1 in { 733defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>; 734 735let SubtargetPredicate = isGFX6GFX7GFX10 in 736defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_MAC_LEGACY_F32>; 737} // End Constraints = "$vdst = $src2", DisableEncoding="$src2", 738 // isConvertibleToThreeAddress = 1 739 740let isReMaterializable = 1 in 741def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, []>; 742} // End OtherPredicates = [HasMadMacF32Insts] 743} // End mayRaiseFPException = 0 744 745// No patterns so that the scalar instructions are always selected. 746// The scalar versions will be replaced with vector when needed later. 747defm V_ADD_CO_U32 : VOP2bInst <"v_add_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_add_co_u32", 1>; 748defm V_SUB_CO_U32 : VOP2bInst <"v_sub_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32", 1>; 749defm V_SUBREV_CO_U32 : VOP2bInst <"v_subrev_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32", 1>; 750defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_addc_u32", 1>; 751defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>; 752defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>; 753 754 755let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1 in { 756defm V_ADD_U32 : VOP2Inst_VOPD <"v_add_u32", VOP_I32_I32_I32_ARITH, 0x10, "v_add_nc_u32", null_frag, "v_add_u32", 1>; 757defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>; 758defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>; 759} 760 761} // End isCommutable = 1 762 763// These are special and do not read the exec mask. 764let isConvergent = 1, Uses = []<Register> in { 765def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE, 766 [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))]>; 767let IsNeverUniform = 1, Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in { 768def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, 769 [(set i32:$vdst, (int_amdgcn_writelane i32:$src0, i32:$src1, i32:$vdst_in))]>; 770} // End IsNeverUniform, $vdst = $vdst_in, DisableEncoding $vdst_in 771} // End isConvergent = 1 772 773let isReMaterializable = 1 in { 774defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_I32_I32_I32>; 775defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_I32_I32_I32, add_ctpop>; 776let IsNeverUniform = 1 in { 777defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_lo>; 778defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_hi>; 779} // End IsNeverUniform = 1 780defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, any_fldexp>; 781 782let ReadsModeReg = 0, mayRaiseFPException = 0 in { 783defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_i16_f32>; 784defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_u16_f32>; 785} 786 787defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_V2F16_F32_F32, AMDGPUpkrtz_f16_f32>; 788defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_V2I16_I32_I32, AMDGPUpk_u16_u32>; 789defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_V2I16_I32_I32, AMDGPUpk_i16_i32>; 790 791 792let SubtargetPredicate = isGFX6GFX7 in { 793defm V_MIN_LEGACY_F32 : VOP2Inst <"v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy>; 794defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>; 795} // End SubtargetPredicate = isGFX6GFX7 796 797let isCommutable = 1 in { 798let SubtargetPredicate = isGFX6GFX7 in { 799defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, csrl_32>; 800defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, csra_32>; 801defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, cshl_32>; 802} // End SubtargetPredicate = isGFX6GFX7 803} // End isCommutable = 1 804} // End isReMaterializable = 1 805 806defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_NO_EXT<VOP_I32_F32_I32>>; // TODO: set "Uses = dst" 807 808class DivergentBinOp<SDPatternOperator Op, VOP_Pseudo Inst> : 809 GCNPat< 810 (DivergentBinFrag<Op> Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1), 811 !if(!cast<Commutable_REV>(Inst).IsOrig, 812 (Inst $src0, $src1), 813 (Inst $src1, $src0) 814 ) 815 >; 816 817class DivergentClampingBinOp<SDPatternOperator Op, VOP_Pseudo Inst> : 818 GCNPat< 819 (DivergentBinFrag<Op> Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1), 820 !if(!cast<Commutable_REV>(Inst).IsOrig, 821 (Inst $src0, $src1, 0), 822 (Inst $src1, $src0, 0) 823 ) 824 >; 825 826def : DivergentBinOp<csrl_32, V_LSHRREV_B32_e64>; 827def : DivergentBinOp<csra_32, V_ASHRREV_I32_e64>; 828def : DivergentBinOp<cshl_32, V_LSHLREV_B32_e64>; 829 830let SubtargetPredicate = HasAddNoCarryInsts in { 831 def : DivergentClampingBinOp<add, V_ADD_U32_e64>; 832 def : DivergentClampingBinOp<sub, V_SUB_U32_e64>; 833} 834 835let SubtargetPredicate = isGFX6GFX7GFX8GFX9, Predicates = [isGFX6GFX7GFX8GFX9] in { 836def : DivergentClampingBinOp<add, V_ADD_CO_U32_e64>; 837def : DivergentClampingBinOp<sub, V_SUB_CO_U32_e64>; 838} 839 840def : DivergentBinOp<adde, V_ADDC_U32_e32>; 841def : DivergentBinOp<sube, V_SUBB_U32_e32>; 842 843class divergent_i64_BinOp <SDPatternOperator Op, Instruction Inst> : 844 GCNPat< 845 (DivergentBinFrag<Op> i64:$src0, i64:$src1), 846 (REG_SEQUENCE VReg_64, 847 (Inst 848 (i32 (EXTRACT_SUBREG $src0, sub0)), 849 (i32 (EXTRACT_SUBREG $src1, sub0)) 850 ), sub0, 851 (Inst 852 (i32 (EXTRACT_SUBREG $src0, sub1)), 853 (i32 (EXTRACT_SUBREG $src1, sub1)) 854 ), sub1 855 ) 856 >; 857 858def : divergent_i64_BinOp <and, V_AND_B32_e64>; 859def : divergent_i64_BinOp <or, V_OR_B32_e64>; 860def : divergent_i64_BinOp <xor, V_XOR_B32_e64>; 861 862//===----------------------------------------------------------------------===// 863// 16-Bit Operand Instructions 864//===----------------------------------------------------------------------===// 865 866// The ldexp.f16 intrinsic expects a integer src1 operand, though the hardware 867// encoding treats src1 as an f16 868def LDEXP_F16_VOPProfile : VOPProfile <[f16, f16, f16, untyped]> { 869 let Src1Mod = Int32InputMods; 870 let Src1ModDPP = IntVRegInputMods; 871 let Src1ModVOP3DPP = IntVRegInputMods; 872 // SDWA sext is the only modifier allowed. 873 let HasSrc1IntMods = 1; 874 let HasSrc1FloatMods = 0; 875 let Src1ModSDWA = Int16SDWAInputMods; 876} 877def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16<VOP_F16_F16_F16> { 878 let Src1RC32 = RegisterOperand<VGPR_32_Lo128>; 879 let Src1DPP = VGPR_32_Lo128; 880 let Src1ModDPP = IntT16VRegInputMods; 881} 882 883let isReMaterializable = 1 in { 884let FPDPRounding = 1 in { 885 let SubtargetPredicate = NotHasTrue16BitInsts, OtherPredicates = [Has16BitInsts] in 886 defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", LDEXP_F16_VOPProfile>; 887 let SubtargetPredicate = HasTrue16BitInsts in 888 defm V_LDEXP_F16_t16 : VOP2Inst <"v_ldexp_f16_t16", LDEXP_F16_VOPProfile_True16>; 889} // End FPDPRounding = 1 890// FIXME VOP3 Only instructions. NFC using VOPProfile_True16 for these until a planned change to use a new register class for VOP3 encoded True16 instuctions 891defm V_LSHLREV_B16 : VOP2Inst_e64_t16 <"v_lshlrev_b16", VOP_I16_I16_I16, clshl_rev_16>; 892defm V_LSHRREV_B16 : VOP2Inst_e64_t16 <"v_lshrrev_b16", VOP_I16_I16_I16, clshr_rev_16>; 893defm V_ASHRREV_I16 : VOP2Inst_e64_t16 <"v_ashrrev_i16", VOP_I16_I16_I16, cashr_rev_16>; 894let isCommutable = 1 in { 895let FPDPRounding = 1 in { 896defm V_ADD_F16 : VOP2Inst_t16 <"v_add_f16", VOP_F16_F16_F16, any_fadd>; 897defm V_SUB_F16 : VOP2Inst_t16 <"v_sub_f16", VOP_F16_F16_F16, any_fsub>; 898defm V_SUBREV_F16 : VOP2Inst_t16 <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">; 899defm V_MUL_F16 : VOP2Inst_t16 <"v_mul_f16", VOP_F16_F16_F16, any_fmul>; 900} // End FPDPRounding = 1 901defm V_MUL_LO_U16 : VOP2Inst_e64_t16 <"v_mul_lo_u16", VOP_I16_I16_I16, mul>; 902defm V_MAX_F16 : VOP2Inst_t16 <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>; 903defm V_MIN_F16 : VOP2Inst_t16 <"v_min_f16", VOP_F16_F16_F16, fminnum_like>; 904defm V_MAX_U16 : VOP2Inst_e64_t16 <"v_max_u16", VOP_I16_I16_I16, umax>; 905defm V_MAX_I16 : VOP2Inst_e64_t16 <"v_max_i16", VOP_I16_I16_I16, smax>; 906defm V_MIN_U16 : VOP2Inst_e64_t16 <"v_min_u16", VOP_I16_I16_I16, umin>; 907defm V_MIN_I16 : VOP2Inst_e64_t16 <"v_min_i16", VOP_I16_I16_I16, smin>; 908} // End isCommutable = 1 909} // End isReMaterializable = 1 910 911class LDEXP_F16_Pat <SDPatternOperator op, VOP_Pseudo inst, VOPProfile P = inst.Pfl> : GCNPat < 912 (P.DstVT (op (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)), 913 (i16 (VOP3Mods0 P.Src1VT:$src1, i32:$src1_modifiers)))), 914 (inst $src0_modifiers, $src0, 915 $src1_modifiers, $src1, 916 $clamp, /* clamp */ 917 $omod /* omod */) 918>; 919 920let OtherPredicates = [NotHasTrue16BitInsts] in 921def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_e64>; 922 923let OtherPredicates = [HasTrue16BitInsts] in 924def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_t16_e64>; 925 926let SubtargetPredicate = isGFX11Plus in { 927 let isCommutable = 1 in { 928 defm V_AND_B16_t16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, and>; 929 defm V_OR_B16_t16 : VOP2Inst_e64 <"v_or_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, or>; 930 defm V_XOR_B16_t16 : VOP2Inst_e64 <"v_xor_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, xor>; 931 } // End isCommutable = 1 932} // End SubtargetPredicate = isGFX11Plus 933 934let FPDPRounding = 1, isReMaterializable = 1 in { 935let SubtargetPredicate = isGFX10Plus, OtherPredicates = [NotHasTrue16BitInsts] in { 936def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">; 937} 938let SubtargetPredicate = HasTrue16BitInsts in { 939def V_FMAMK_F16_t16 : VOP2_Pseudo <"v_fmamk_f16_t16", VOP_MADMK_F16_t16, [], "">; 940} 941 942let isCommutable = 1 in { 943let SubtargetPredicate = isGFX10Plus, OtherPredicates = [NotHasTrue16BitInsts] in { 944def V_FMAAK_F16 : VOP2_Pseudo <"v_fmaak_f16", VOP_MADAK_F16, [], "">; 945} 946let SubtargetPredicate = HasTrue16BitInsts in { 947def V_FMAAK_F16_t16 : VOP2_Pseudo <"v_fmaak_f16_t16", VOP_MADAK_F16_t16, [], "">; 948} 949} // End isCommutable = 1 950} // End FPDPRounding = 1, isReMaterializable = 1 951 952let Constraints = "$vdst = $src2", 953 DisableEncoding="$src2", 954 isConvertibleToThreeAddress = 1, 955 isCommutable = 1 in { 956let SubtargetPredicate = isGFX10Plus, OtherPredicates = [NotHasTrue16BitInsts] in { 957defm V_FMAC_F16 : VOP2Inst <"v_fmac_f16", VOP_MAC_F16>; 958} 959let SubtargetPredicate = HasTrue16BitInsts in { 960defm V_FMAC_F16_t16 : VOP2Inst <"v_fmac_f16_t16", VOP_MAC_F16_t16>; 961} 962} // End FMAC Constraints 963 964let SubtargetPredicate = Has16BitInsts in { 965let isReMaterializable = 1 in { 966let FPDPRounding = 1 in { 967def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">; 968} // End FPDPRounding = 1 969let isCommutable = 1 in { 970let mayRaiseFPException = 0 in { 971def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">; 972} 973let SubtargetPredicate = isGFX8GFX9 in { 974 defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16_ARITH, add>; 975 defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16_ARITH, sub>; 976 defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16_ARITH, null_frag, "v_sub_u16">; 977} 978} // End isCommutable = 1 979} // End isReMaterializable = 1 980 981// FIXME: Missing FPDPRounding 982let Constraints = "$vdst = $src2", DisableEncoding="$src2", 983 isConvertibleToThreeAddress = 1, isCommutable = 1 in { 984defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>; 985} 986} // End SubtargetPredicate = Has16BitInsts 987 988 989let SubtargetPredicate = HasDLInsts in { 990 991let isReMaterializable = 1 in 992defm V_XNOR_B32 : VOP2Inst <"v_xnor_b32", VOP_I32_I32_I32, xnor>; 993 994def : GCNPat< 995 (i32 (DivergentUnaryFrag<not> (xor_oneuse i32:$src0, i32:$src1))), 996 (i32 (V_XNOR_B32_e64 $src0, $src1)) 997>; 998 999def : GCNPat< 1000 (i32 (DivergentBinFrag<xor_oneuse> (not i32:$src0), i32:$src1)), 1001 (i32 (V_XNOR_B32_e64 $src0, $src1)) 1002>; 1003 1004def : GCNPat< 1005 (i64 (DivergentUnaryFrag<not> (xor_oneuse i64:$src0, i64:$src1))), 1006 (REG_SEQUENCE VReg_64, (i32 (V_XNOR_B32_e64 1007 (i32 (EXTRACT_SUBREG $src0, sub0)), 1008 (i32 (EXTRACT_SUBREG $src1, sub0)))), sub0, 1009 (i32 (V_XNOR_B32_e64 1010 (i32 (EXTRACT_SUBREG $src0, sub1)), 1011 (i32 (EXTRACT_SUBREG $src1, sub1)))), sub1) 1012>; 1013 1014def : GCNPat< 1015 (i64 (DivergentBinFrag<xor_oneuse> (not i64:$src0), i64:$src1)), 1016 (REG_SEQUENCE VReg_64, (i32 (V_XNOR_B32_e64 1017 (i32 (EXTRACT_SUBREG $src0, sub0)), 1018 (i32 (EXTRACT_SUBREG $src1, sub0)))), sub0, 1019 (i32 (V_XNOR_B32_e64 1020 (i32 (EXTRACT_SUBREG $src0, sub1)), 1021 (i32 (EXTRACT_SUBREG $src1, sub1)))), sub1) 1022>; 1023 1024let Constraints = "$vdst = $src2", 1025 DisableEncoding = "$src2", 1026 isConvertibleToThreeAddress = 1, 1027 isCommutable = 1 in 1028defm V_FMAC_F32 : VOP2Inst_VOPD <"v_fmac_f32", VOP_MAC_F32, 0x0, "v_fmac_f32">; 1029} // End SubtargetPredicate = HasDLInsts 1030 1031let SubtargetPredicate = HasFmaLegacy32 in { 1032 1033let Constraints = "$vdst = $src2", 1034 DisableEncoding = "$src2", 1035 isConvertibleToThreeAddress = 1, 1036 isCommutable = 1 in 1037defm V_FMAC_LEGACY_F32 : VOP2Inst <"v_fmac_legacy_f32", VOP_MAC_LEGACY_F32>; 1038 1039} // End SubtargetPredicate = HasFmaLegacy32 1040 1041let SubtargetPredicate = HasFmacF64Inst, 1042 Constraints = "$vdst = $src2", 1043 DisableEncoding="$src2", 1044 isConvertibleToThreeAddress = 1, 1045 isCommutable = 1, 1046 SchedRW = [WriteDoubleAdd] in 1047defm V_FMAC_F64 : VOP2Inst <"v_fmac_f64", VOP_MAC_F64>; 1048 1049let Constraints = "$vdst = $src2", 1050 DisableEncoding="$src2", 1051 isConvertibleToThreeAddress = 1, 1052 isCommutable = 1, 1053 IsDOT = 1 in { 1054 let SubtargetPredicate = HasDot5Insts in 1055 defm V_DOT2C_F32_F16 : VOP2Inst_VOPD<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16, 0xc, "v_dot2acc_f32_f16">; 1056 let SubtargetPredicate = HasDot6Insts in 1057 defm V_DOT4C_I32_I8 : VOP2Inst<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>; 1058 1059 let SubtargetPredicate = HasDot4Insts in 1060 defm V_DOT2C_I32_I16 : VOP2Inst<"v_dot2c_i32_i16", VOP_DOT_ACC_I32_I32>; 1061 let SubtargetPredicate = HasDot3Insts in 1062 defm V_DOT8C_I32_I4 : VOP2Inst<"v_dot8c_i32_i4", VOP_DOT_ACC_I32_I32>; 1063} 1064 1065let AddedComplexity = 30 in { 1066 def : GCNPat< 1067 (f32 (AMDGPUfdot2 v2f16:$src0, v2f16:$src1, f32:$src2, (i1 DSTCLAMP.NONE))), 1068 (f32 (V_DOT2C_F32_F16_e32 $src0, $src1, $src2)) 1069 > { 1070 let SubtargetPredicate = HasDot5Insts; 1071 } 1072 def : GCNPat< 1073 (i32 (int_amdgcn_sdot4 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 1074 (i32 (V_DOT4C_I32_I8_e32 $src0, $src1, $src2)) 1075 > { 1076 let SubtargetPredicate = HasDot6Insts; 1077 } 1078 def : GCNPat< 1079 (i32 (int_amdgcn_sdot2 v2i16:$src0, v2i16:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 1080 (i32 (V_DOT2C_I32_I16_e32 $src0, $src1, $src2)) 1081 > { 1082 let SubtargetPredicate = HasDot4Insts; 1083 } 1084 def : GCNPat< 1085 (i32 (int_amdgcn_sdot8 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 1086 (i32 (V_DOT8C_I32_I4_e32 $src0, $src1, $src2)) 1087 > { 1088 let SubtargetPredicate = HasDot3Insts; 1089 } 1090} // End AddedComplexity = 30 1091 1092let SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1 in { 1093def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">, VOPD_Component<0x2, "v_fmamk_f32">; 1094 1095let isCommutable = 1 in 1096def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">, VOPD_Component<0x1, "v_fmaak_f32">; 1097} 1098 1099let SubtargetPredicate = HasPkFmacF16Inst in { 1100defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>; 1101} // End SubtargetPredicate = HasPkFmacF16Inst 1102 1103// Note: 16-bit instructions produce a 0 result in the high 16-bits 1104// on GFX8 and GFX9 and preserve high 16 bits on GFX10+ 1105multiclass Arithmetic_i16_0Hi_Pats <SDPatternOperator op, Instruction inst> { 1106 1107def : GCNPat< 1108 (i32 (zext (op i16:$src0, i16:$src1))), 1109 (inst VSrc_b16:$src0, VSrc_b16:$src1) 1110>; 1111 1112def : GCNPat< 1113 (i64 (zext (op i16:$src0, i16:$src1))), 1114 (REG_SEQUENCE VReg_64, 1115 (inst $src0, $src1), sub0, 1116 (V_MOV_B32_e32 (i32 0)), sub1) 1117>; 1118} 1119 1120class ZExt_i16_i1_Pat <SDNode ext> : GCNPat < 1121 (i16 (ext i1:$src)), 1122 (V_CNDMASK_B32_e64 (i32 0/*src0mod*/), (i32 0/*src0*/), 1123 (i32 0/*src1mod*/), (i32 1/*src1*/), 1124 $src) 1125>; 1126 1127foreach vt = [i16, v2i16] in { 1128def : GCNPat < 1129 (and vt:$src0, vt:$src1), 1130 (V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 1131>; 1132 1133def : GCNPat < 1134 (or vt:$src0, vt:$src1), 1135 (V_OR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 1136>; 1137 1138def : GCNPat < 1139 (xor vt:$src0, vt:$src1), 1140 (V_XOR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 1141>; 1142} 1143 1144let Predicates = [Has16BitInsts, isGFX8GFX9] in { 1145 1146// Undo sub x, c -> add x, -c canonicalization since c is more likely 1147// an inline immediate than -c. 1148// TODO: Also do for 64-bit. 1149def : GCNPat< 1150 (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)), 1151 (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1) 1152>; 1153 1154def : GCNPat< 1155 (i32 (zext (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)))), 1156 (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1) 1157>; 1158 1159defm : Arithmetic_i16_0Hi_Pats<add, V_ADD_U16_e64>; 1160defm : Arithmetic_i16_0Hi_Pats<mul, V_MUL_LO_U16_e64>; 1161defm : Arithmetic_i16_0Hi_Pats<sub, V_SUB_U16_e64>; 1162defm : Arithmetic_i16_0Hi_Pats<smin, V_MIN_I16_e64>; 1163defm : Arithmetic_i16_0Hi_Pats<smax, V_MAX_I16_e64>; 1164defm : Arithmetic_i16_0Hi_Pats<umin, V_MIN_U16_e64>; 1165defm : Arithmetic_i16_0Hi_Pats<umax, V_MAX_U16_e64>; 1166defm : Arithmetic_i16_0Hi_Pats<clshl_rev_16, V_LSHLREV_B16_e64>; 1167defm : Arithmetic_i16_0Hi_Pats<clshr_rev_16, V_LSHRREV_B16_e64>; 1168defm : Arithmetic_i16_0Hi_Pats<cashr_rev_16, V_ASHRREV_I16_e64>; 1169 1170} // End Predicates = [Has16BitInsts, isGFX8GFX9] 1171 1172let Predicates = [Has16BitInsts] in { 1173 1174def : ZExt_i16_i1_Pat<zext>; 1175def : ZExt_i16_i1_Pat<anyext>; 1176 1177def : GCNPat < 1178 (i16 (sext i1:$src)), 1179 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), 1180 /*src1mod*/(i32 0), /*src1*/(i32 -1), $src) 1181>; 1182 1183} // End Predicates = [Has16BitInsts] 1184 1185 1186let SubtargetPredicate = HasIntClamp in { 1187// Set clamp bit for saturation. 1188def : VOPBinOpClampPat<uaddsat, V_ADD_CO_U32_e64, i32>; 1189def : VOPBinOpClampPat<usubsat, V_SUB_CO_U32_e64, i32>; 1190} 1191 1192let SubtargetPredicate = HasAddNoCarryInsts, OtherPredicates = [HasIntClamp] in { 1193let AddedComplexity = 1 in { // Prefer over form with carry-out. 1194def : VOPBinOpClampPat<uaddsat, V_ADD_U32_e64, i32>; 1195def : VOPBinOpClampPat<usubsat, V_SUB_U32_e64, i32>; 1196} 1197} 1198 1199let SubtargetPredicate = Has16BitInsts, OtherPredicates = [HasIntClamp] in { 1200def : VOPBinOpClampPat<uaddsat, V_ADD_U16_e64, i16>; 1201def : VOPBinOpClampPat<usubsat, V_SUB_U16_e64, i16>; 1202} 1203 1204//===----------------------------------------------------------------------===// 1205// DPP Encodings 1206//===----------------------------------------------------------------------===// 1207 1208class VOP2_DPP<bits<6> op, VOP2_DPP_Pseudo ps, 1209 string opName = ps.OpName, VOPProfile p = ps.Pfl, 1210 bit IsDPP16 = 0> : 1211 VOP_DPP<opName, p, IsDPP16> { 1212 let hasSideEffects = ps.hasSideEffects; 1213 let Defs = ps.Defs; 1214 let SchedRW = ps.SchedRW; 1215 let Uses = ps.Uses; 1216 1217 bits<8> vdst; 1218 bits<8> src1; 1219 let Inst{8-0} = 0xfa; 1220 let Inst{16-9} = !if(p.HasSrc1, src1{7-0}, 0); 1221 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 1222 let Inst{30-25} = op; 1223 let Inst{31} = 0x0; 1224} 1225 1226class Base_VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, 1227 string opName = ps.OpName, VOPProfile p = ps.Pfl> : 1228 VOP2_DPP<op, ps, opName, p, 1> { 1229 let AssemblerPredicate = HasDPP16; 1230 let SubtargetPredicate = HasDPP16; 1231 let OtherPredicates = ps.OtherPredicates; 1232} 1233 1234class VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, int subtarget, 1235 string opName = ps.OpName, VOPProfile p = ps.Pfl> : 1236 Base_VOP2_DPP16<op, ps, opName, p>, 1237 SIMCInstr <ps.PseudoInstr, subtarget>; 1238 1239class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps, 1240 VOPProfile p = ps.Pfl> : 1241 VOP_DPP8<ps.OpName, p> { 1242 let hasSideEffects = ps.hasSideEffects; 1243 let Defs = ps.Defs; 1244 let SchedRW = ps.SchedRW; 1245 let Uses = ps.Uses; 1246 1247 bits<8> vdst; 1248 bits<8> src1; 1249 1250 let Inst{8-0} = fi; 1251 let Inst{16-9} = !if(p.HasSrc1, src1{7-0}, 0); 1252 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 1253 let Inst{30-25} = op; 1254 let Inst{31} = 0x0; 1255 1256 let OtherPredicates = ps.OtherPredicates; 1257} 1258 1259//===----------------------------------------------------------------------===// 1260// GFX11. 1261//===----------------------------------------------------------------------===// 1262 1263let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in { 1264 //===------------------------------- VOP2 -------------------------------===// 1265 multiclass VOP2Only_Real_MADK_gfx11<bits<6> op> { 1266 def _gfx11 : 1267 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX11>, 1268 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1269 } 1270 multiclass VOP2Only_Real_MADK_gfx11_with_name<bits<6> op, string asmName, 1271 string opName = NAME> { 1272 def _gfx11 : 1273 VOP2_Real<!cast<VOP2_Pseudo>(opName), SIEncodingFamily.GFX11>, 1274 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(opName).Pfl> { 1275 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName); 1276 let AsmString = asmName # ps.AsmOperands; 1277 } 1278 } 1279 multiclass VOP2_Real_e32_gfx11<bits<6> op> { 1280 def _e32_gfx11 : 1281 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX11>, 1282 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 1283 } 1284 multiclass VOP2Only_Real_e32_gfx11<bits<6> op> { 1285 let IsSingle = 1 in 1286 defm NAME: VOP2_Real_e32_gfx11<op>; 1287 } 1288 multiclass VOP2_Real_e64_gfx11<bits<6> op> { 1289 def _e64_gfx11 : 1290 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX11>, 1291 VOP3e_gfx11<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1292 } 1293 multiclass VOP2_Real_dpp_gfx11<bits<6> op> { 1294 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 1295 def _dpp_gfx11 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX11> { 1296 let DecoderNamespace = "DPPGFX11"; 1297 } 1298 } 1299 multiclass VOP2_Real_dpp8_gfx11<bits<6> op> { 1300 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 1301 def _dpp8_gfx11 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(NAME#"_e32")> { 1302 let DecoderNamespace = "DPP8GFX11"; 1303 } 1304 } 1305 1306 //===------------------------- VOP2 (with name) -------------------------===// 1307 multiclass VOP2_Real_e32_with_name_gfx11<bits<6> op, string opName, 1308 string asmName, bit single = 0> { 1309 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1310 def _e32_gfx11 : 1311 VOP2_Real<ps, SIEncodingFamily.GFX11, asmName>, 1312 VOP2e<op{5-0}, ps.Pfl> { 1313 let AsmString = asmName # ps.AsmOperands; 1314 let IsSingle = single; 1315 } 1316 } 1317 multiclass VOP2_Real_e64_with_name_gfx11<bits<6> op, string opName, 1318 string asmName> { 1319 defvar ps = !cast<VOP3_Pseudo>(opName#"_e64"); 1320 def _e64_gfx11 : 1321 VOP3_Real<ps, SIEncodingFamily.GFX11>, 1322 VOP3e_gfx11<{0, 1, 0, 0, op{5-0}}, ps.Pfl> { 1323 let AsmString = asmName # ps.AsmOperands; 1324 } 1325 } 1326 1327 multiclass VOP2_Real_dpp_with_name_gfx11<bits<6> op, string opName, 1328 string asmName> { 1329 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1330 if ps.Pfl.HasExtDPP then 1331 def _dpp_gfx11 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), 1332 SIEncodingFamily.GFX11> { 1333 let AsmString = asmName # ps.Pfl.AsmDPP16; 1334 let DecoderNamespace = "DPPGFX11"; 1335 } 1336 } 1337 multiclass VOP2_Real_dpp8_with_name_gfx11<bits<6> op, string opName, 1338 string asmName> { 1339 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1340 if ps.Pfl.HasExtDPP then 1341 def _dpp8_gfx11 : VOP2_DPP8<op, ps> { 1342 let AsmString = asmName # ps.Pfl.AsmDPP8; 1343 let DecoderNamespace = "DPP8GFX11"; 1344 } 1345 } 1346 1347 //===------------------------------ VOP2be ------------------------------===// 1348 multiclass VOP2be_Real_e32_gfx11<bits<6> op, string opName, string asmName> { 1349 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1350 def _e32_gfx11 : 1351 VOP2_Real<ps, SIEncodingFamily.GFX11>, 1352 VOP2e<op{5-0}, ps.Pfl> { 1353 let AsmString = asmName # !subst(", vcc", "", ps.AsmOperands); 1354 } 1355 } 1356 multiclass VOP2be_Real_dpp_gfx11<bits<6> op, string opName, string asmName> { 1357 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then 1358 def _dpp_gfx11 : 1359 VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX11, asmName> { 1360 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1361 let AsmString = asmName # !subst(", vcc", "", AsmDPP); 1362 let DecoderNamespace = "DPPGFX11"; 1363 } 1364 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then 1365 def _dpp_w32_gfx11 : 1366 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1367 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1368 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP); 1369 let isAsmParserOnly = 1; 1370 let WaveSizePredicate = isWave32; 1371 } 1372 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then 1373 def _dpp_w64_gfx11 : 1374 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1375 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1376 let AsmString = asmName # AsmDPP; 1377 let isAsmParserOnly = 1; 1378 let WaveSizePredicate = isWave64; 1379 } 1380 } 1381 multiclass VOP2be_Real_dpp8_gfx11<bits<6> op, string opName, string asmName> { 1382 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then 1383 def _dpp8_gfx11 : 1384 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1385 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1386 let AsmString = asmName # !subst(", vcc", "", AsmDPP8); 1387 let DecoderNamespace = "DPP8GFX11"; 1388 } 1389 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then 1390 def _dpp8_w32_gfx11 : 1391 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1392 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1393 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8); 1394 let isAsmParserOnly = 1; 1395 let WaveSizePredicate = isWave32; 1396 } 1397 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then 1398 def _dpp8_w64_gfx11 : 1399 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1400 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1401 let AsmString = asmName # AsmDPP8; 1402 let isAsmParserOnly = 1; 1403 let WaveSizePredicate = isWave64; 1404 } 1405 } 1406 1407} // End AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" 1408 1409// We don't want to override separate decoderNamespaces within these 1410multiclass VOP2_Realtriple_e64_gfx11<bits<6> op> { 1411 defm NAME : VOP3_Realtriple_gfx11<{0, 1, 0, 0, op{5-0}}, /*isSingle=*/ 0, NAME> ; 1412} 1413multiclass VOP2_Realtriple_e64_with_name_gfx11<bits<6> op, string opName, 1414 string asmName> { 1415 defm NAME : VOP3_Realtriple_with_name_gfx11<{0, 1, 0, 0, op{5-0}}, opName, asmName> ; 1416} 1417 1418multiclass VOP2be_Real_gfx11<bits<6> op, string opName, string asmName> : 1419 VOP2be_Real_e32_gfx11<op, opName, asmName>, 1420 VOP3be_Realtriple_gfx11<{0, 1, 0, 0, op{5-0}}, /*isSingle=*/ 0, opName, asmName>, 1421 VOP2be_Real_dpp_gfx11<op, opName, asmName>, 1422 VOP2be_Real_dpp8_gfx11<op, opName, asmName>; 1423 1424// Only for CNDMASK 1425multiclass VOP2e_Real_gfx11<bits<6> op, string opName, string asmName> : 1426 VOP2_Real_e32_gfx11<op>, 1427 VOP2_Realtriple_e64_gfx11<op>, 1428 VOP2be_Real_dpp_gfx11<op, opName, asmName>, 1429 VOP2be_Real_dpp8_gfx11<op, opName, asmName>; 1430 1431multiclass VOP2Only_Real_gfx11<bits<6> op> : 1432 VOP2Only_Real_e32_gfx11<op>, 1433 VOP2_Real_dpp_gfx11<op>, 1434 VOP2_Real_dpp8_gfx11<op>; 1435 1436multiclass VOP2_Real_NO_VOP3_gfx11<bits<6> op> : 1437 VOP2_Real_e32_gfx11<op>, VOP2_Real_dpp_gfx11<op>, VOP2_Real_dpp8_gfx11<op>; 1438 1439multiclass VOP2_Real_FULL_gfx11<bits<6> op> : 1440 VOP2_Realtriple_e64_gfx11<op>, VOP2_Real_NO_VOP3_gfx11<op>; 1441 1442multiclass VOP2_Real_NO_VOP3_with_name_gfx11<bits<6> op, string opName, 1443 string asmName, bit isSingle = 0> { 1444 1445 defm NAME : VOP2_Real_e32_with_name_gfx11<op, opName, asmName, isSingle>, 1446 VOP2_Real_dpp_with_name_gfx11<op, opName, asmName>, 1447 VOP2_Real_dpp8_with_name_gfx11<op, opName, asmName>; 1448 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1449 def _gfx11_alias : MnemonicAlias<ps.Mnemonic, asmName>, Requires<[isGFX11Plus]>; 1450} 1451 1452multiclass VOP2_Real_FULL_with_name_gfx11<bits<6> op, string opName, 1453 string asmName> : 1454 VOP2_Realtriple_e64_with_name_gfx11<op, opName, asmName>, 1455 VOP2_Real_NO_VOP3_with_name_gfx11<op, opName, asmName>; 1456 1457multiclass VOP2_Real_FULL_t16_gfx11<bits<6> op, string asmName, string opName = NAME> 1458 : VOP2_Real_FULL_with_name_gfx11<op, opName, asmName>; 1459 1460multiclass VOP2_Real_NO_DPP_gfx11<bits<6> op> : 1461 VOP2_Real_e32_gfx11<op>, VOP2_Real_e64_gfx11<op>; 1462 1463multiclass VOP2_Real_NO_DPP_with_name_gfx11<bits<6> op, string opName, 1464 string asmName> { 1465 defm NAME : VOP2_Real_e32_with_name_gfx11<op, opName, asmName>, 1466 VOP2_Real_e64_with_name_gfx11<op, opName, asmName>; 1467 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1468 def _gfx11_alias : MnemonicAlias<ps.Mnemonic, asmName>, Requires<[isGFX11Plus]>; 1469} 1470 1471defm V_CNDMASK_B32 : VOP2e_Real_gfx11<0x001, "V_CNDMASK_B32", 1472 "v_cndmask_b32">; 1473defm V_DOT2ACC_F32_F16 : VOP2_Real_NO_VOP3_with_name_gfx11<0x002, 1474 "V_DOT2C_F32_F16", "v_dot2acc_f32_f16", 1>; 1475defm V_FMAC_DX9_ZERO_F32 : VOP2_Real_NO_DPP_with_name_gfx11<0x006, 1476 "V_FMAC_LEGACY_F32", "v_fmac_dx9_zero_f32">; 1477defm V_MUL_DX9_ZERO_F32 : VOP2_Real_FULL_with_name_gfx11<0x007, 1478 "V_MUL_LEGACY_F32", "v_mul_dx9_zero_f32">; 1479defm V_LSHLREV_B32 : VOP2_Real_FULL_gfx11<0x018>; 1480defm V_LSHRREV_B32 : VOP2_Real_FULL_gfx11<0x019>; 1481defm V_ASHRREV_I32 : VOP2_Real_FULL_gfx11<0x01a>; 1482defm V_ADD_CO_CI_U32 : 1483 VOP2be_Real_gfx11<0x020, "V_ADDC_U32", "v_add_co_ci_u32">; 1484defm V_SUB_CO_CI_U32 : 1485 VOP2be_Real_gfx11<0x021, "V_SUBB_U32", "v_sub_co_ci_u32">; 1486defm V_SUBREV_CO_CI_U32 : 1487 VOP2be_Real_gfx11<0x022, "V_SUBBREV_U32", "v_subrev_co_ci_u32">; 1488 1489defm V_CVT_PK_RTZ_F16_F32 : VOP2_Real_FULL_with_name_gfx11<0x02f, 1490 "V_CVT_PKRTZ_F16_F32", "v_cvt_pk_rtz_f16_f32">; 1491defm V_PK_FMAC_F16 : VOP2Only_Real_gfx11<0x03c>; 1492 1493defm V_ADD_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x032, "v_add_f16">; 1494defm V_SUB_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x033, "v_sub_f16">; 1495defm V_SUBREV_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x034, "v_subrev_f16">; 1496defm V_MUL_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x035, "v_mul_f16">; 1497defm V_FMAC_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x036, "v_fmac_f16">; 1498defm V_LDEXP_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x03b, "v_ldexp_f16">; 1499defm V_MAX_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">; 1500defm V_MIN_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">; 1501defm V_FMAMK_F16_t16 : VOP2Only_Real_MADK_gfx11_with_name<0x037, "v_fmamk_f16">; 1502defm V_FMAAK_F16_t16 : VOP2Only_Real_MADK_gfx11_with_name<0x038, "v_fmaak_f16">; 1503 1504// VOP3 only. 1505defm V_CNDMASK_B16 : VOP3Only_Realtriple_gfx11<0x25d>; 1506defm V_LDEXP_F32 : VOP3Only_Realtriple_gfx11<0x31c>; 1507defm V_BFM_B32 : VOP3Only_Realtriple_gfx11<0x31d>; 1508defm V_BCNT_U32_B32 : VOP3Only_Realtriple_gfx11<0x31e>; 1509defm V_MBCNT_LO_U32_B32 : VOP3Only_Realtriple_gfx11<0x31f>; 1510defm V_MBCNT_HI_U32_B32 : VOP3Only_Realtriple_gfx11<0x320>; 1511defm V_CVT_PK_NORM_I16_F32 : VOP3Only_Realtriple_with_name_gfx11<0x321, "V_CVT_PKNORM_I16_F32", "v_cvt_pk_norm_i16_f32">; 1512defm V_CVT_PK_NORM_U16_F32 : VOP3Only_Realtriple_with_name_gfx11<0x322, "V_CVT_PKNORM_U16_F32", "v_cvt_pk_norm_u16_f32">; 1513defm V_CVT_PK_U16_U32 : VOP3Only_Realtriple_gfx11<0x323>; 1514defm V_CVT_PK_I16_I32 : VOP3Only_Realtriple_gfx11<0x324>; 1515defm V_ADD_CO_U32 : VOP3beOnly_Realtriple_gfx11<0x300>; 1516defm V_SUB_CO_U32 : VOP3beOnly_Realtriple_gfx11<0x301>; 1517defm V_SUBREV_CO_U32 : VOP3beOnly_Realtriple_gfx11<0x302>; 1518 1519let SubtargetPredicate = isGFX11Plus in { 1520 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx11>; 1521 1522 defm : VOP2bInstAliases< 1523 V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx11, "v_add_co_ci_u32">; 1524 defm : VOP2bInstAliases< 1525 V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx11, "v_sub_co_ci_u32">; 1526 defm : VOP2bInstAliases< 1527 V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx11, "v_subrev_co_ci_u32">; 1528} // End SubtargetPredicate = isGFX11Plus 1529 1530//===----------------------------------------------------------------------===// 1531// GFX10. 1532//===----------------------------------------------------------------------===// 1533 1534let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { 1535 //===------------------------------- VOP2 -------------------------------===// 1536 multiclass VOP2Only_Real_MADK_gfx10<bits<6> op> { 1537 def _gfx10 : 1538 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX10>, 1539 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1540 } 1541 multiclass VOP2Only_Real_MADK_gfx10_with_name<bits<6> op, string opName, 1542 string asmName> { 1543 def _gfx10 : 1544 VOP2_Real<!cast<VOP2_Pseudo>(opName), SIEncodingFamily.GFX10>, 1545 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(opName).Pfl> { 1546 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName); 1547 let AsmString = asmName # ps.AsmOperands; 1548 } 1549 } 1550 multiclass VOP2_Real_e32_gfx10<bits<6> op> { 1551 def _e32_gfx10 : 1552 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>, 1553 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 1554 } 1555 multiclass VOP2_Real_e64_gfx10<bits<6> op> { 1556 def _e64_gfx10 : 1557 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 1558 VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1559 } 1560 multiclass VOP2_Real_sdwa_gfx10<bits<6> op> { 1561 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 1562 def _sdwa_gfx10 : 1563 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 1564 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 1565 let DecoderNamespace = "SDWA10"; 1566 } 1567 } 1568 multiclass VOP2_Real_dpp_gfx10<bits<6> op> { 1569 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then 1570 def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX10> { 1571 let DecoderNamespace = "SDWA10"; 1572 } 1573 } 1574 multiclass VOP2_Real_dpp8_gfx10<bits<6> op> { 1575 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then 1576 def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(NAME#"_e32")> { 1577 let DecoderNamespace = "DPP8"; 1578 } 1579 } 1580 1581 //===------------------------- VOP2 (with name) -------------------------===// 1582 multiclass VOP2_Real_e32_gfx10_with_name<bits<6> op, string opName, 1583 string asmName> { 1584 def _e32_gfx10 : 1585 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>, 1586 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> { 1587 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1588 let AsmString = asmName # ps.AsmOperands; 1589 } 1590 } 1591 multiclass VOP2_Real_e64_gfx10_with_name<bits<6> op, string opName, 1592 string asmName> { 1593 def _e64_gfx10 : 1594 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>, 1595 VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, 1596 !cast<VOP3_Pseudo>(opName#"_e64").Pfl> { 1597 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName#"_e64"); 1598 let AsmString = asmName # ps.AsmOperands; 1599 } 1600 } 1601 let DecoderNamespace = "SDWA10" in { 1602 multiclass VOP2_Real_sdwa_gfx10_with_name<bits<6> op, string opName, 1603 string asmName> { 1604 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then 1605 def _sdwa_gfx10 : 1606 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1607 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1608 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1609 let AsmString = asmName # ps.AsmOperands; 1610 } 1611 } 1612 multiclass VOP2_Real_dpp_gfx10_with_name<bits<6> op, string opName, 1613 string asmName> { 1614 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 1615 def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX10> { 1616 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1617 let AsmString = asmName # ps.Pfl.AsmDPP16; 1618 } 1619 } 1620 multiclass VOP2_Real_dpp8_gfx10_with_name<bits<6> op, string opName, 1621 string asmName> { 1622 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 1623 def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1624 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1625 let AsmString = asmName # ps.Pfl.AsmDPP8; 1626 let DecoderNamespace = "DPP8"; 1627 } 1628 } 1629 } // End DecoderNamespace = "SDWA10" 1630 1631 //===------------------------------ VOP2be ------------------------------===// 1632 multiclass VOP2be_Real_e32_gfx10<bits<6> op, string opName, string asmName> { 1633 def _e32_gfx10 : 1634 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>, 1635 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> { 1636 VOP2_Pseudo Ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1637 let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); 1638 } 1639 } 1640 multiclass VOP2be_Real_e64_gfx10<bits<6> op, string opName, string asmName> { 1641 def _e64_gfx10 : 1642 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>, 1643 VOP3be_gfx10<{0, 1, 0, 0, op{5-0}}, 1644 !cast<VOP3_Pseudo>(opName#"_e64").Pfl> { 1645 VOP3_Pseudo Ps = !cast<VOP3_Pseudo>(opName#"_e64"); 1646 let AsmString = asmName # Ps.AsmOperands; 1647 } 1648 } 1649 multiclass VOP2be_Real_sdwa_gfx10<bits<6> op, string opName, string asmName> { 1650 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then 1651 def _sdwa_gfx10 : 1652 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1653 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1654 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1655 let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); 1656 let DecoderNamespace = "SDWA10"; 1657 } 1658 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then 1659 def _sdwa_w32_gfx10 : 1660 Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1661 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1662 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1663 let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands); 1664 let isAsmParserOnly = 1; 1665 let DecoderNamespace = "SDWA10"; 1666 let WaveSizePredicate = isWave32; 1667 } 1668 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then 1669 def _sdwa_w64_gfx10 : 1670 Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1671 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1672 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1673 let AsmString = asmName # Ps.AsmOperands; 1674 let isAsmParserOnly = 1; 1675 let DecoderNamespace = "SDWA10"; 1676 let WaveSizePredicate = isWave64; 1677 } 1678 } 1679 multiclass VOP2be_Real_dpp_gfx10<bits<6> op, string opName, string asmName> { 1680 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 1681 def _dpp_gfx10 : 1682 VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX10, asmName> { 1683 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1684 let AsmString = asmName # !subst(", vcc", "", AsmDPP); 1685 let DecoderNamespace = "SDWA10"; 1686 } 1687 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 1688 def _dpp_w32_gfx10 : 1689 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1690 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1691 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP); 1692 let isAsmParserOnly = 1; 1693 let WaveSizePredicate = isWave32; 1694 } 1695 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 1696 def _dpp_w64_gfx10 : 1697 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1698 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1699 let AsmString = asmName # AsmDPP; 1700 let isAsmParserOnly = 1; 1701 let WaveSizePredicate = isWave64; 1702 } 1703 } 1704 multiclass VOP2be_Real_dpp8_gfx10<bits<6> op, string opName, string asmName> { 1705 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 1706 def _dpp8_gfx10 : 1707 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1708 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1709 let AsmString = asmName # !subst(", vcc", "", AsmDPP8); 1710 let DecoderNamespace = "DPP8"; 1711 } 1712 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 1713 def _dpp8_w32_gfx10 : 1714 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1715 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1716 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8); 1717 let isAsmParserOnly = 1; 1718 let WaveSizePredicate = isWave32; 1719 } 1720 if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then 1721 def _dpp8_w64_gfx10 : 1722 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1723 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1724 let AsmString = asmName # AsmDPP8; 1725 let isAsmParserOnly = 1; 1726 let WaveSizePredicate = isWave64; 1727 } 1728 } 1729 1730 //===----------------------------- VOP3Only -----------------------------===// 1731 multiclass VOP3Only_Real_gfx10<bits<10> op> { 1732 def _e64_gfx10 : 1733 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 1734 VOP3e_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 1735 let IsSingle = 1; 1736 } 1737 } 1738 1739 //===---------------------------- VOP3beOnly ----------------------------===// 1740 multiclass VOP3beOnly_Real_gfx10<bits<10> op> { 1741 def _e64_gfx10 : 1742 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 1743 VOP3be_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 1744 let IsSingle = 1; 1745 } 1746 } 1747} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" 1748 1749multiclass VOP2Only_Real_MADK_gfx10_gfx11<bits<6> op> : 1750 VOP2Only_Real_MADK_gfx10<op>, VOP2Only_Real_MADK_gfx11<op>; 1751 1752multiclass VOP2be_Real_gfx10<bits<6> op, string opName, string asmName> : 1753 VOP2be_Real_e32_gfx10<op, opName, asmName>, 1754 VOP2be_Real_e64_gfx10<op, opName, asmName>, 1755 VOP2be_Real_sdwa_gfx10<op, opName, asmName>, 1756 VOP2be_Real_dpp_gfx10<op, opName, asmName>, 1757 VOP2be_Real_dpp8_gfx10<op, opName, asmName>; 1758 1759multiclass VOP2e_Real_gfx10<bits<6> op, string opName, string asmName> : 1760 VOP2_Real_e32_gfx10<op>, 1761 VOP2_Real_e64_gfx10<op>, 1762 VOP2be_Real_sdwa_gfx10<op, opName, asmName>, 1763 VOP2be_Real_dpp_gfx10<op, opName, asmName>, 1764 VOP2be_Real_dpp8_gfx10<op, opName, asmName>; 1765 1766multiclass VOP2_Real_gfx10<bits<6> op> : 1767 VOP2_Real_e32_gfx10<op>, VOP2_Real_e64_gfx10<op>, 1768 VOP2_Real_sdwa_gfx10<op>, VOP2_Real_dpp_gfx10<op>, VOP2_Real_dpp8_gfx10<op>; 1769 1770multiclass VOP2_Real_gfx10_gfx11<bits<6> op> : 1771 VOP2_Real_gfx10<op>, VOP2_Real_FULL_gfx11<op>; 1772 1773multiclass VOP2_Real_with_name_gfx10<bits<6> op, string opName, 1774 string asmName> : 1775 VOP2_Real_e32_gfx10_with_name<op, opName, asmName>, 1776 VOP2_Real_e64_gfx10_with_name<op, opName, asmName>, 1777 VOP2_Real_sdwa_gfx10_with_name<op, opName, asmName>, 1778 VOP2_Real_dpp_gfx10_with_name<op, opName, asmName>, 1779 VOP2_Real_dpp8_gfx10_with_name<op, opName, asmName>; 1780 1781multiclass VOP2_Real_with_name_gfx10_gfx11<bits<6> op, string opName, 1782 string asmName> : 1783 VOP2_Real_with_name_gfx10<op, opName, asmName>, 1784 VOP2_Real_FULL_with_name_gfx11<op, opName, asmName>; 1785 1786// NB: Same opcode as v_mac_legacy_f32 1787let DecoderNamespace = "GFX10_B" in 1788defm V_FMAC_LEGACY_F32 : VOP2_Real_gfx10<0x006>; 1789 1790defm V_XNOR_B32 : VOP2_Real_gfx10_gfx11<0x01e>; 1791defm V_FMAC_F32 : VOP2_Real_gfx10_gfx11<0x02b>; 1792defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10_gfx11<0x02c>; 1793defm V_FMAAK_F32 : VOP2Only_Real_MADK_gfx10_gfx11<0x02d>; 1794defm V_ADD_F16 : VOP2_Real_gfx10<0x032>; 1795defm V_SUB_F16 : VOP2_Real_gfx10<0x033>; 1796defm V_SUBREV_F16 : VOP2_Real_gfx10<0x034>; 1797defm V_MUL_F16 : VOP2_Real_gfx10<0x035>; 1798defm V_FMAC_F16 : VOP2_Real_gfx10<0x036>; 1799defm V_FMAMK_F16 : VOP2Only_Real_MADK_gfx10<0x037>; 1800defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10<0x038>; 1801defm V_MAX_F16 : VOP2_Real_gfx10<0x039>; 1802defm V_MIN_F16 : VOP2_Real_gfx10<0x03a>; 1803defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>; 1804 1805let IsSingle = 1 in { 1806 defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>; 1807} 1808 1809// VOP2 no carry-in, carry-out. 1810defm V_ADD_NC_U32 : 1811 VOP2_Real_with_name_gfx10_gfx11<0x025, "V_ADD_U32", "v_add_nc_u32">; 1812defm V_SUB_NC_U32 : 1813 VOP2_Real_with_name_gfx10_gfx11<0x026, "V_SUB_U32", "v_sub_nc_u32">; 1814defm V_SUBREV_NC_U32 : 1815 VOP2_Real_with_name_gfx10_gfx11<0x027, "V_SUBREV_U32", "v_subrev_nc_u32">; 1816 1817// VOP2 carry-in, carry-out. 1818defm V_ADD_CO_CI_U32 : 1819 VOP2be_Real_gfx10<0x028, "V_ADDC_U32", "v_add_co_ci_u32">; 1820defm V_SUB_CO_CI_U32 : 1821 VOP2be_Real_gfx10<0x029, "V_SUBB_U32", "v_sub_co_ci_u32">; 1822defm V_SUBREV_CO_CI_U32 : 1823 VOP2be_Real_gfx10<0x02a, "V_SUBBREV_U32", "v_subrev_co_ci_u32">; 1824 1825defm V_CNDMASK_B32 : 1826 VOP2e_Real_gfx10<0x001, "V_CNDMASK_B32", "v_cndmask_b32">; 1827 1828// VOP3 only. 1829defm V_BFM_B32 : VOP3Only_Real_gfx10<0x363>; 1830defm V_BCNT_U32_B32 : VOP3Only_Real_gfx10<0x364>; 1831defm V_MBCNT_LO_U32_B32 : VOP3Only_Real_gfx10<0x365>; 1832defm V_MBCNT_HI_U32_B32 : VOP3Only_Real_gfx10<0x366>; 1833defm V_LDEXP_F32 : VOP3Only_Real_gfx10<0x362>; 1834defm V_CVT_PKNORM_I16_F32 : VOP3Only_Real_gfx10<0x368>; 1835defm V_CVT_PKNORM_U16_F32 : VOP3Only_Real_gfx10<0x369>; 1836defm V_CVT_PK_U16_U32 : VOP3Only_Real_gfx10<0x36a>; 1837defm V_CVT_PK_I16_I32 : VOP3Only_Real_gfx10<0x36b>; 1838 1839// VOP3 carry-out. 1840defm V_ADD_CO_U32 : VOP3beOnly_Real_gfx10<0x30f>; 1841defm V_SUB_CO_U32 : VOP3beOnly_Real_gfx10<0x310>; 1842defm V_SUBREV_CO_U32 : VOP3beOnly_Real_gfx10<0x319>; 1843 1844let SubtargetPredicate = isGFX10Only in { 1845 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx10>; 1846 1847 defm : VOP2bInstAliases< 1848 V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx10, "v_add_co_ci_u32">; 1849 defm : VOP2bInstAliases< 1850 V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx10, "v_sub_co_ci_u32">; 1851 defm : VOP2bInstAliases< 1852 V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx10, "v_subrev_co_ci_u32">; 1853} // End SubtargetPredicate = isGFX10Only 1854 1855//===----------------------------------------------------------------------===// 1856// GFX6, GFX7, GFX10, GFX11 1857//===----------------------------------------------------------------------===// 1858 1859class VOP2_DPPe <bits<6> op, VOP2_DPP_Pseudo ps, VOPProfile P = ps.Pfl> : 1860 VOP_DPPe <P> { 1861 bits<8> vdst; 1862 bits<8> src1; 1863 let Inst{8-0} = 0xfa; //dpp 1864 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 1865 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 1866 let Inst{30-25} = op; 1867 let Inst{31} = 0x0; //encoding 1868} 1869 1870let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 1871 multiclass VOP2_Lane_Real_gfx6_gfx7<bits<6> op> { 1872 def _gfx6_gfx7 : 1873 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>, 1874 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1875 } 1876 multiclass VOP2Only_Real_MADK_gfx6_gfx7<bits<6> op> { 1877 def _gfx6_gfx7 : 1878 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>, 1879 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1880 } 1881 multiclass VOP2_Real_e32_gfx6_gfx7<bits<6> op, string opName = NAME> { 1882 def _e32_gfx6_gfx7 : 1883 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.SI>, 1884 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl>; 1885 } 1886 multiclass VOP2_Real_e64_gfx6_gfx7<bits<6> op, string opName = NAME> { 1887 def _e64_gfx6_gfx7 : 1888 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.SI>, 1889 VOP3e_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(opName#"_e64").Pfl>; 1890 } 1891 multiclass VOP2be_Real_e64_gfx6_gfx7<bits<6> op, string opName = NAME> { 1892 def _e64_gfx6_gfx7 : 1893 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.SI>, 1894 VOP3be_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(opName#"_e64").Pfl>; 1895 } 1896} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 1897 1898multiclass VOP2Only_Real_MADK_gfx6_gfx7_gfx10<bits<6> op> : 1899 VOP2Only_Real_MADK_gfx6_gfx7<op>, VOP2Only_Real_MADK_gfx10<op>; 1900 1901multiclass VOP2_Real_gfx6_gfx7<bits<6> op> : 1902 VOP2_Real_e32_gfx6_gfx7<op>, VOP2_Real_e64_gfx6_gfx7<op>; 1903 1904multiclass VOP2_Real_gfx6_gfx7_gfx10<bits<6> op> : 1905 VOP2_Real_gfx6_gfx7<op>, VOP2_Real_gfx10<op>; 1906 1907multiclass VOP2_Real_gfx6_gfx7_gfx10_gfx11<bits<6> op> : 1908 VOP2_Real_gfx6_gfx7_gfx10<op>, VOP2_Real_FULL_gfx11<op>; 1909 1910multiclass VOP2be_Real_gfx6_gfx7<bits<6> op> : 1911 VOP2_Real_e32_gfx6_gfx7<op>, VOP2be_Real_e64_gfx6_gfx7<op>; 1912 1913multiclass VOP2be_Real_gfx6_gfx7_with_name<bits<6> op, 1914 string opName, string asmName> { 1915 defvar ps32 = !cast<VOP2_Pseudo>(opName#"_e32"); 1916 defvar ps64 = !cast<VOP3_Pseudo>(opName#"_e64"); 1917 1918 let AsmString = asmName # ps32.AsmOperands in { 1919 defm "" : VOP2_Real_e32_gfx6_gfx7<op, opName>; 1920 } 1921 1922 let AsmString = asmName # ps64.AsmOperands in { 1923 defm "" : VOP2be_Real_e64_gfx6_gfx7<op, opName>; 1924 } 1925} 1926 1927defm V_CNDMASK_B32 : VOP2_Real_gfx6_gfx7<0x000>; 1928defm V_MIN_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00d>; 1929defm V_MAX_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00e>; 1930defm V_LSHR_B32 : VOP2_Real_gfx6_gfx7<0x015>; 1931defm V_ASHR_I32 : VOP2_Real_gfx6_gfx7<0x017>; 1932defm V_LSHL_B32 : VOP2_Real_gfx6_gfx7<0x019>; 1933defm V_BFM_B32 : VOP2_Real_gfx6_gfx7<0x01e>; 1934defm V_BCNT_U32_B32 : VOP2_Real_gfx6_gfx7<0x022>; 1935defm V_MBCNT_LO_U32_B32 : VOP2_Real_gfx6_gfx7<0x023>; 1936defm V_MBCNT_HI_U32_B32 : VOP2_Real_gfx6_gfx7<0x024>; 1937defm V_LDEXP_F32 : VOP2_Real_gfx6_gfx7<0x02b>; 1938defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_gfx6_gfx7<0x02c>; 1939defm V_CVT_PKNORM_I16_F32 : VOP2_Real_gfx6_gfx7<0x02d>; 1940defm V_CVT_PKNORM_U16_F32 : VOP2_Real_gfx6_gfx7<0x02e>; 1941defm V_CVT_PK_U16_U32 : VOP2_Real_gfx6_gfx7<0x030>; 1942defm V_CVT_PK_I16_I32 : VOP2_Real_gfx6_gfx7<0x031>; 1943 1944// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in 1945// VI, but the VI instructions behave the same as the SI versions. 1946defm V_ADD_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x025, "V_ADD_CO_U32", "v_add_i32">; 1947defm V_SUB_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x026, "V_SUB_CO_U32", "v_sub_i32">; 1948defm V_SUBREV_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x027, "V_SUBREV_CO_U32", "v_subrev_i32">; 1949defm V_ADDC_U32 : VOP2be_Real_gfx6_gfx7<0x028>; 1950defm V_SUBB_U32 : VOP2be_Real_gfx6_gfx7<0x029>; 1951defm V_SUBBREV_U32 : VOP2be_Real_gfx6_gfx7<0x02a>; 1952 1953defm V_READLANE_B32 : VOP2_Lane_Real_gfx6_gfx7<0x001>; 1954 1955let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in { 1956 defm V_WRITELANE_B32 : VOP2_Lane_Real_gfx6_gfx7<0x002>; 1957} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) 1958 1959let SubtargetPredicate = isGFX6GFX7 in { 1960 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx6_gfx7>; 1961 defm : VOP2eInstAliases<V_ADD_CO_U32_e32, V_ADD_I32_e32_gfx6_gfx7>; 1962 defm : VOP2eInstAliases<V_SUB_CO_U32_e32, V_SUB_I32_e32_gfx6_gfx7>; 1963 defm : VOP2eInstAliases<V_SUBREV_CO_U32_e32, V_SUBREV_I32_e32_gfx6_gfx7>; 1964 1965 def : VOP2e64InstAlias<V_ADD_CO_U32_e64, V_ADD_I32_e64_gfx6_gfx7>; 1966 def : VOP2e64InstAlias<V_SUB_CO_U32_e64, V_SUB_I32_e64_gfx6_gfx7>; 1967 def : VOP2e64InstAlias<V_SUBREV_CO_U32_e64, V_SUBREV_I32_e64_gfx6_gfx7>; 1968} // End SubtargetPredicate = isGFX6GFX7 1969 1970defm V_ADD_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x003>; 1971defm V_SUB_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x004>; 1972defm V_SUBREV_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x005>; 1973defm V_MAC_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x006>; 1974defm V_MUL_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x007>; 1975defm V_MUL_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x008>; 1976defm V_MUL_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x009>; 1977defm V_MUL_HI_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00a>; 1978defm V_MUL_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00b>; 1979defm V_MUL_HI_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00c>; 1980defm V_MIN_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00f>; 1981defm V_MAX_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x010>; 1982defm V_MIN_I32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x011>; 1983defm V_MAX_I32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x012>; 1984defm V_MIN_U32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x013>; 1985defm V_MAX_U32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x014>; 1986defm V_LSHRREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x016>; 1987defm V_ASHRREV_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x018>; 1988defm V_LSHLREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01a>; 1989defm V_AND_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x01b>; 1990defm V_OR_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x01c>; 1991defm V_XOR_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x01d>; 1992defm V_MAC_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x01f>; 1993defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x02f>; 1994defm V_MADMK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x020>; 1995defm V_MADAK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x021>; 1996 1997//===----------------------------------------------------------------------===// 1998// GFX8, GFX9 (VI). 1999//===----------------------------------------------------------------------===// 2000 2001let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 2002 2003multiclass VOP2_Real_MADK_vi <bits<6> op> { 2004 def _vi : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>, 2005 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 2006} 2007 2008multiclass VOP2_Real_MADK_gfx940 <bits<6> op> { 2009 def _gfx940 : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX940>, 2010 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl> { 2011 let DecoderNamespace = "GFX9"; 2012 } 2013} 2014 2015multiclass VOP2_Real_e32_vi <bits<6> op> { 2016 def _e32_vi : 2017 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>, 2018 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 2019} 2020 2021multiclass VOP2_Real_e64_vi <bits<10> op> { 2022 def _e64_vi : 2023 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 2024 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 2025} 2026 2027multiclass VOP2_Real_e64only_vi <bits<10> op> { 2028 def _e64_vi : 2029 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 2030 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 2031 let IsSingle = 1; 2032 } 2033} 2034 2035multiclass Base_VOP2_Real_e32e64_vi <bits<6> op> : 2036 VOP2_Real_e32_vi<op>, 2037 VOP2_Real_e64_vi<{0, 1, 0, 0, op{5-0}}>; 2038 2039} // End AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" 2040 2041multiclass VOP2_SDWA_Real <bits<6> op> { 2042 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA then 2043 def _sdwa_vi : 2044 VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 2045 VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 2046} 2047 2048multiclass VOP2_SDWA9_Real <bits<6> op> { 2049 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 2050 def _sdwa_gfx9 : 2051 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 2052 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 2053} 2054 2055let AssemblerPredicate = isGFX8Only in { 2056 2057multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName> { 2058 def _e32_vi : 2059 VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.VI>, 2060 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> { 2061 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32"); 2062 let AsmString = AsmName # ps.AsmOperands; 2063 let DecoderNamespace = "GFX8"; 2064 } 2065 def _e64_vi : 2066 VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.VI>, 2067 VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> { 2068 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64"); 2069 let AsmString = AsmName # ps.AsmOperands; 2070 let DecoderNamespace = "GFX8"; 2071 } 2072 if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA then 2073 def _sdwa_vi : 2074 VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>, 2075 VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> { 2076 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa"); 2077 let AsmString = AsmName # ps.AsmOperands; 2078 } 2079 if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP then 2080 def _dpp_vi : 2081 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.VI>, 2082 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> { 2083 VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp"); 2084 let AsmString = AsmName # ps.AsmOperands; 2085 } 2086} 2087} 2088 2089let AssemblerPredicate = isGFX9Only in { 2090 2091multiclass VOP2be_Real_e32e64_gfx9 <bits<6> op, string OpName, string AsmName> { 2092 def _e32_gfx9 : 2093 VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.GFX9>, 2094 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> { 2095 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32"); 2096 let AsmString = AsmName # ps.AsmOperands; 2097 let DecoderNamespace = "GFX9"; 2098 } 2099 def _e64_gfx9 : 2100 VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.GFX9>, 2101 VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> { 2102 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64"); 2103 let AsmString = AsmName # ps.AsmOperands; 2104 let DecoderNamespace = "GFX9"; 2105 } 2106 if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA9 then 2107 def _sdwa_gfx9 : 2108 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>, 2109 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> { 2110 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa"); 2111 let AsmString = AsmName # ps.AsmOperands; 2112 } 2113 if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP then 2114 def _dpp_gfx9 : 2115 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.GFX9>, 2116 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> { 2117 VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp"); 2118 let AsmString = AsmName # ps.AsmOperands; 2119 let DecoderNamespace = "SDWA9"; 2120 } 2121} 2122 2123multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> { 2124 def _e32_gfx9 : 2125 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX9>, 2126 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>{ 2127 let DecoderNamespace = "GFX9"; 2128 } 2129 def _e64_gfx9 : 2130 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>, 2131 VOP3e_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 2132 let DecoderNamespace = "GFX9"; 2133 } 2134 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then 2135 def _sdwa_gfx9 : 2136 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 2137 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 2138 } 2139 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 2140 def _dpp_gfx9 : 2141 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, 2142 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> { 2143 let DecoderNamespace = "SDWA9"; 2144 } 2145} 2146 2147} // AssemblerPredicate = isGFX9Only 2148 2149multiclass VOP2_Real_e32e64_vi <bits<6> op> : 2150 Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA_Real<op>, VOP2_SDWA9_Real<op> { 2151 2152 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 2153 def _dpp_vi : 2154 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>, 2155 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>; 2156} 2157 2158defm V_CNDMASK_B32 : VOP2_Real_e32e64_vi <0x0>; 2159defm V_ADD_F32 : VOP2_Real_e32e64_vi <0x1>; 2160defm V_SUB_F32 : VOP2_Real_e32e64_vi <0x2>; 2161defm V_SUBREV_F32 : VOP2_Real_e32e64_vi <0x3>; 2162let AssemblerPredicate = isGCN3ExcludingGFX90A in 2163defm V_MUL_LEGACY_F32 : VOP2_Real_e32e64_vi <0x4>; 2164defm V_MUL_F32 : VOP2_Real_e32e64_vi <0x5>; 2165defm V_MUL_I32_I24 : VOP2_Real_e32e64_vi <0x6>; 2166defm V_MUL_HI_I32_I24 : VOP2_Real_e32e64_vi <0x7>; 2167defm V_MUL_U32_U24 : VOP2_Real_e32e64_vi <0x8>; 2168defm V_MUL_HI_U32_U24 : VOP2_Real_e32e64_vi <0x9>; 2169defm V_MIN_F32 : VOP2_Real_e32e64_vi <0xa>; 2170defm V_MAX_F32 : VOP2_Real_e32e64_vi <0xb>; 2171defm V_MIN_I32 : VOP2_Real_e32e64_vi <0xc>; 2172defm V_MAX_I32 : VOP2_Real_e32e64_vi <0xd>; 2173defm V_MIN_U32 : VOP2_Real_e32e64_vi <0xe>; 2174defm V_MAX_U32 : VOP2_Real_e32e64_vi <0xf>; 2175defm V_LSHRREV_B32 : VOP2_Real_e32e64_vi <0x10>; 2176defm V_ASHRREV_I32 : VOP2_Real_e32e64_vi <0x11>; 2177defm V_LSHLREV_B32 : VOP2_Real_e32e64_vi <0x12>; 2178defm V_AND_B32 : VOP2_Real_e32e64_vi <0x13>; 2179defm V_OR_B32 : VOP2_Real_e32e64_vi <0x14>; 2180defm V_XOR_B32 : VOP2_Real_e32e64_vi <0x15>; 2181defm V_MAC_F32 : VOP2_Real_e32e64_vi <0x16>; 2182defm V_MADMK_F32 : VOP2_Real_MADK_vi <0x17>; 2183defm V_MADAK_F32 : VOP2_Real_MADK_vi <0x18>; 2184 2185defm V_ADD_U32 : VOP2be_Real_e32e64_vi_only <0x19, "V_ADD_CO_U32", "v_add_u32">; 2186defm V_SUB_U32 : VOP2be_Real_e32e64_vi_only <0x1a, "V_SUB_CO_U32", "v_sub_u32">; 2187defm V_SUBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1b, "V_SUBREV_CO_U32", "v_subrev_u32">; 2188defm V_ADDC_U32 : VOP2be_Real_e32e64_vi_only <0x1c, "V_ADDC_U32", "v_addc_u32">; 2189defm V_SUBB_U32 : VOP2be_Real_e32e64_vi_only <0x1d, "V_SUBB_U32", "v_subb_u32">; 2190defm V_SUBBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1e, "V_SUBBREV_U32", "v_subbrev_u32">; 2191 2192defm V_ADD_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x19, "V_ADD_CO_U32", "v_add_co_u32">; 2193defm V_SUB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1a, "V_SUB_CO_U32", "v_sub_co_u32">; 2194defm V_SUBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1b, "V_SUBREV_CO_U32", "v_subrev_co_u32">; 2195defm V_ADDC_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1c, "V_ADDC_U32", "v_addc_co_u32">; 2196defm V_SUBB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1d, "V_SUBB_U32", "v_subb_co_u32">; 2197defm V_SUBBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1e, "V_SUBBREV_U32", "v_subbrev_co_u32">; 2198 2199defm V_ADD_U32 : VOP2_Real_e32e64_gfx9 <0x34>; 2200defm V_SUB_U32 : VOP2_Real_e32e64_gfx9 <0x35>; 2201defm V_SUBREV_U32 : VOP2_Real_e32e64_gfx9 <0x36>; 2202 2203defm V_BFM_B32 : VOP2_Real_e64only_vi <0x293>; 2204defm V_BCNT_U32_B32 : VOP2_Real_e64only_vi <0x28b>; 2205defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64only_vi <0x28c>; 2206defm V_MBCNT_HI_U32_B32 : VOP2_Real_e64only_vi <0x28d>; 2207defm V_LDEXP_F32 : VOP2_Real_e64only_vi <0x288>; 2208defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64only_vi <0x1f0>; 2209defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64only_vi <0x294>; 2210defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64only_vi <0x295>; 2211defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e64only_vi <0x296>; 2212defm V_CVT_PK_U16_U32 : VOP2_Real_e64only_vi <0x297>; 2213defm V_CVT_PK_I16_I32 : VOP2_Real_e64only_vi <0x298>; 2214 2215defm V_ADD_F16 : VOP2_Real_e32e64_vi <0x1f>; 2216defm V_SUB_F16 : VOP2_Real_e32e64_vi <0x20>; 2217defm V_SUBREV_F16 : VOP2_Real_e32e64_vi <0x21>; 2218defm V_MUL_F16 : VOP2_Real_e32e64_vi <0x22>; 2219defm V_MAC_F16 : VOP2_Real_e32e64_vi <0x23>; 2220defm V_MADMK_F16 : VOP2_Real_MADK_vi <0x24>; 2221defm V_MADAK_F16 : VOP2_Real_MADK_vi <0x25>; 2222defm V_ADD_U16 : VOP2_Real_e32e64_vi <0x26>; 2223defm V_SUB_U16 : VOP2_Real_e32e64_vi <0x27>; 2224defm V_SUBREV_U16 : VOP2_Real_e32e64_vi <0x28>; 2225defm V_MUL_LO_U16 : VOP2_Real_e32e64_vi <0x29>; 2226defm V_LSHLREV_B16 : VOP2_Real_e32e64_vi <0x2a>; 2227defm V_LSHRREV_B16 : VOP2_Real_e32e64_vi <0x2b>; 2228defm V_ASHRREV_I16 : VOP2_Real_e32e64_vi <0x2c>; 2229defm V_MAX_F16 : VOP2_Real_e32e64_vi <0x2d>; 2230defm V_MIN_F16 : VOP2_Real_e32e64_vi <0x2e>; 2231defm V_MAX_U16 : VOP2_Real_e32e64_vi <0x2f>; 2232defm V_MAX_I16 : VOP2_Real_e32e64_vi <0x30>; 2233defm V_MIN_U16 : VOP2_Real_e32e64_vi <0x31>; 2234defm V_MIN_I16 : VOP2_Real_e32e64_vi <0x32>; 2235defm V_LDEXP_F16 : VOP2_Real_e32e64_vi <0x33>; 2236 2237let SubtargetPredicate = isGFX8GFX9 in { 2238 2239// Aliases to simplify matching of floating-point instructions that 2240// are VOP2 on SI and VOP3 on VI. 2241class SI2_VI3Alias <string name, VOP3_Real inst> : InstAlias < 2242 name#" $dst, $src0, $src1", 2243 !if(inst.Pfl.HasOMod, 2244 (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0), 2245 (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0)) 2246>, PredicateControl { 2247 let UseInstAsmMatchConverter = 0; 2248 let AsmVariantName = AMDGPUAsmVariants.VOP3; 2249} 2250 2251def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>; 2252def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>; 2253def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>; 2254def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>; 2255def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>; 2256 2257defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_vi>; 2258 2259} // End SubtargetPredicate = isGFX8GFX9 2260 2261let SubtargetPredicate = isGFX9Only in { 2262 2263defm : VOP2bInstAliases<V_ADD_U32_e32, V_ADD_CO_U32_e32_gfx9, "v_add_co_u32">; 2264defm : VOP2bInstAliases<V_ADDC_U32_e32, V_ADDC_CO_U32_e32_gfx9, "v_addc_co_u32">; 2265defm : VOP2bInstAliases<V_SUB_U32_e32, V_SUB_CO_U32_e32_gfx9, "v_sub_co_u32">; 2266defm : VOP2bInstAliases<V_SUBB_U32_e32, V_SUBB_CO_U32_e32_gfx9, "v_subb_co_u32">; 2267defm : VOP2bInstAliases<V_SUBREV_U32_e32, V_SUBREV_CO_U32_e32_gfx9, "v_subrev_co_u32">; 2268defm : VOP2bInstAliases<V_SUBBREV_U32_e32, V_SUBBREV_CO_U32_e32_gfx9, "v_subbrev_co_u32">; 2269 2270} // End SubtargetPredicate = isGFX9Only 2271 2272let SubtargetPredicate = HasDLInsts in { 2273 2274defm V_FMAC_F32 : VOP2_Real_e32e64_vi <0x3b>; 2275defm V_XNOR_B32 : VOP2_Real_e32e64_vi <0x3d>; 2276 2277} // End SubtargetPredicate = HasDLInsts 2278 2279let AssemblerPredicate = isGFX90APlus, DecoderNamespace = "GFX90A" in { 2280 multiclass VOP2_Real_e32_gfx90a <bits<6> op> { 2281 def _e32_gfx90a : 2282 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX90A>, 2283 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 2284 } 2285 2286 multiclass VOP2_Real_e64_gfx90a <bits<10> op> { 2287 def _e64_gfx90a : 2288 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX90A>, 2289 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 2290 } 2291 2292 multiclass Base_VOP2_Real_e32e64_gfx90a <bits<6> op> : 2293 VOP2_Real_e32_gfx90a<op>, 2294 VOP2_Real_e64_gfx90a<{0, 1, 0, 0, op{5-0}}>; 2295 2296 multiclass VOP2_Real_e32e64_gfx90a <bits<6> op> : 2297 Base_VOP2_Real_e32e64_gfx90a<op> { 2298 2299 if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then 2300 def _dpp_gfx90a : 2301 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX90A>, 2302 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> { 2303 let DecoderNamespace = "SDWA9"; 2304 } 2305 } 2306} // End AssemblerPredicate = isGFX90APlus, DecoderNamespace = "GFX90A" 2307 2308let SubtargetPredicate = HasFmacF64Inst in { 2309 defm V_FMAC_F64 : VOP2_Real_e32e64_gfx90a <0x4>; 2310} // End SubtargetPredicate = HasFmacF64Inst 2311 2312let SubtargetPredicate = isGFX90APlus, IsSingle = 1 in { 2313 defm V_MUL_LEGACY_F32 : VOP2_Real_e64_gfx90a <0x2a1>; 2314} 2315 2316let SubtargetPredicate = HasFmaakFmamkF32Insts in { 2317defm V_FMAMK_F32 : VOP2_Real_MADK_gfx940 <0x17>; 2318defm V_FMAAK_F32 : VOP2_Real_MADK_gfx940 <0x18>; 2319} 2320 2321multiclass VOP2_Real_DOT_ACC_gfx9<bits<6> op> : Base_VOP2_Real_e32e64_vi<op> { 2322 def _dpp_vi : VOP2_DPP<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>; 2323} 2324 2325multiclass VOP2_Real_DOT_ACC_gfx10<bits<6> op> : 2326 VOP2_Real_e32_gfx10<op>, 2327 VOP2_Real_dpp_gfx10<op>, 2328 VOP2_Real_dpp8_gfx10<op>; 2329 2330let SubtargetPredicate = HasDot5Insts in { 2331 defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx9<0x37>; 2332 // NB: Opcode conflicts with V_DOT8C_I32_I4 2333 // This opcode exists in gfx 10.1* only 2334 defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx10<0x02>; 2335} 2336 2337let SubtargetPredicate = HasDot6Insts in { 2338 defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx9<0x39>; 2339 defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx10<0x0d>; 2340} 2341 2342let SubtargetPredicate = HasDot4Insts in { 2343 defm V_DOT2C_I32_I16 : VOP2_Real_DOT_ACC_gfx9<0x38>; 2344} 2345let SubtargetPredicate = HasDot3Insts in { 2346 defm V_DOT8C_I32_I4 : VOP2_Real_DOT_ACC_gfx9<0x3a>; 2347} 2348 2349let SubtargetPredicate = HasPkFmacF16Inst in { 2350defm V_PK_FMAC_F16 : VOP2_Real_e32_vi<0x3c>; 2351} // End SubtargetPredicate = HasPkFmacF16Inst 2352 2353let SubtargetPredicate = HasDot3Insts in { 2354 // NB: Opcode conflicts with V_DOT2C_F32_F16 2355 let DecoderNamespace = "GFX10_B" in 2356 defm V_DOT8C_I32_I4 : VOP2_Real_DOT_ACC_gfx10<0x02>; 2357} 2358