1//===-- VOP2Instructions.td - Vector Instruction Defintions ---------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10// VOP2 Classes 11//===----------------------------------------------------------------------===// 12 13class VOP2e <bits<6> op, VOPProfile P> : Enc32 { 14 bits<8> vdst; 15 bits<9> src0; 16 bits<8> src1; 17 18 let Inst{8-0} = !if(P.HasSrc0, src0, 0); 19 let Inst{16-9} = !if(P.HasSrc1, src1, 0); 20 let Inst{24-17} = !if(P.EmitDst, vdst, 0); 21 let Inst{30-25} = op; 22 let Inst{31} = 0x0; //encoding 23} 24 25class VOP2_MADKe <bits<6> op, VOPProfile P> : Enc64 { 26 bits<8> vdst; 27 bits<9> src0; 28 bits<8> src1; 29 bits<32> imm; 30 31 let Inst{8-0} = !if(P.HasSrc0, src0, 0); 32 let Inst{16-9} = !if(P.HasSrc1, src1, 0); 33 let Inst{24-17} = !if(P.EmitDst, vdst, 0); 34 let Inst{30-25} = op; 35 let Inst{31} = 0x0; // encoding 36 let Inst{63-32} = imm; 37} 38 39class VOP2_SDWAe <bits<6> op, VOPProfile P> : VOP_SDWAe <P> { 40 bits<8> vdst; 41 bits<8> src1; 42 43 let Inst{8-0} = 0xf9; // sdwa 44 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 45 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 46 let Inst{30-25} = op; 47 let Inst{31} = 0x0; // encoding 48} 49 50class VOP2_SDWA9Ae <bits<6> op, VOPProfile P> : VOP_SDWA9Ae <P> { 51 bits<8> vdst; 52 bits<9> src1; 53 54 let Inst{8-0} = 0xf9; // sdwa 55 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 56 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 57 let Inst{30-25} = op; 58 let Inst{31} = 0x0; // encoding 59 let Inst{63} = !if(P.HasSrc1, src1{8}, 0); // src1_sgpr 60} 61 62class VOP2_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], string suffix = "_e32"> : 63 VOP_Pseudo <opName, suffix, P, P.Outs32, P.Ins32, "", pattern> { 64 65 let AsmOperands = P.Asm32; 66 67 let Size = 4; 68 let mayLoad = 0; 69 let mayStore = 0; 70 let hasSideEffects = 0; 71 72 let VOP2 = 1; 73 let VALU = 1; 74 let Uses = [EXEC]; 75 76 let AsmVariantName = AMDGPUAsmVariants.Default; 77} 78 79class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily> : 80 InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>, 81 SIMCInstr <ps.PseudoInstr, EncodingFamily> { 82 83 let isPseudo = 0; 84 let isCodeGenOnly = 0; 85 86 let Constraints = ps.Constraints; 87 let DisableEncoding = ps.DisableEncoding; 88 89 // copy relevant pseudo op flags 90 let SubtargetPredicate = ps.SubtargetPredicate; 91 let AsmMatchConverter = ps.AsmMatchConverter; 92 let AsmVariantName = ps.AsmVariantName; 93 let Constraints = ps.Constraints; 94 let DisableEncoding = ps.DisableEncoding; 95 let TSFlags = ps.TSFlags; 96 let UseNamedOperandTable = ps.UseNamedOperandTable; 97 let Uses = ps.Uses; 98 let Defs = ps.Defs; 99} 100 101class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 102 VOP_SDWA_Pseudo <OpName, P, pattern> { 103 let AsmMatchConverter = "cvtSdwaVOP2"; 104} 105 106class VOP2_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 107 VOP_DPP_Pseudo <OpName, P, pattern> { 108} 109 110 111class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { 112 list<dag> ret = !if(P.HasModifiers, 113 [(set P.DstVT:$vdst, 114 (node (P.Src0VT 115 !if(P.HasOMod, 116 (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), 117 (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))), 118 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], 119 [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]); 120} 121 122multiclass VOP2Inst_e32<string opName, 123 VOPProfile P, 124 SDPatternOperator node = null_frag, 125 string revOp = opName, 126 bit GFX9Renamed = 0> { 127 let renamedInGFX9 = GFX9Renamed in { 128 def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>, 129 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>; 130 } // End renamedInGFX9 = GFX9Renamed 131} 132 133multiclass VOP2Inst_e64<string opName, 134 VOPProfile P, 135 SDPatternOperator node = null_frag, 136 string revOp = opName, 137 bit GFX9Renamed = 0> { 138 let renamedInGFX9 = GFX9Renamed in { 139 def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>, 140 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; 141 } // End renamedInGFX9 = GFX9Renamed 142} 143 144multiclass VOP2Inst_sdwa<string opName, 145 VOPProfile P, 146 SDPatternOperator node = null_frag, 147 string revOp = opName, 148 bit GFX9Renamed = 0> { 149 let renamedInGFX9 = GFX9Renamed in { 150 foreach _ = BoolToList<P.HasExtSDWA>.ret in 151 def _sdwa : VOP2_SDWA_Pseudo <opName, P>; 152 } // End renamedInGFX9 = GFX9Renamed 153} 154 155multiclass VOP2Inst<string opName, 156 VOPProfile P, 157 SDPatternOperator node = null_frag, 158 string revOp = opName, 159 bit GFX9Renamed = 0> : 160 VOP2Inst_e32<opName, P, node, revOp, GFX9Renamed>, 161 VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>, 162 VOP2Inst_sdwa<opName, P, node, revOp, GFX9Renamed> { 163 let renamedInGFX9 = GFX9Renamed in { 164 foreach _ = BoolToList<P.HasExtDPP>.ret in 165 def _dpp : VOP2_DPP_Pseudo <opName, P>; 166 } 167} 168 169multiclass VOP2bInst <string opName, 170 VOPProfile P, 171 SDPatternOperator node = null_frag, 172 string revOp = opName, 173 bit GFX9Renamed = 0, 174 bit useSGPRInput = !eq(P.NumSrcArgs, 3)> { 175 let renamedInGFX9 = GFX9Renamed in { 176 let SchedRW = [Write32Bit, WriteSALU] in { 177 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in { 178 def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>, 179 Commutable_REV<revOp#"_e32", !eq(revOp, opName)> { 180 let usesCustomInserter = !eq(P.NumSrcArgs, 2); 181 } 182 183 foreach _ = BoolToList<P.HasExtSDWA>.ret in 184 def _sdwa : VOP2_SDWA_Pseudo <opName, P> { 185 let AsmMatchConverter = "cvtSdwaVOP2b"; 186 } 187 foreach _ = BoolToList<P.HasExtDPP>.ret in 188 def _dpp : VOP2_DPP_Pseudo <opName, P>; 189 } 190 191 def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>, 192 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; 193 } 194 } 195} 196 197class VOP2bInstAlias <VOP2_Pseudo ps, Instruction inst, 198 string OpName, string opnd> : 199 InstAlias <OpName#" "#!subst("vcc", opnd, ps.Pfl.Asm32), 200 (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0, 201 ps.Pfl.Src1RC32:$src1)>, 202 PredicateControl { 203} 204 205multiclass VOP2bInstAliases<VOP2_Pseudo ps, VOP2_Real inst, string OpName> { 206 let WaveSizePredicate = isWave32 in { 207 def : VOP2bInstAlias<ps, inst, OpName, "vcc_lo">; 208 } 209 let WaveSizePredicate = isWave64 in { 210 def : VOP2bInstAlias<ps, inst, OpName, "vcc">; 211 } 212} 213 214multiclass VOP2eInst <string opName, 215 VOPProfile P, 216 SDPatternOperator node = null_frag, 217 string revOp = opName, 218 bit useSGPRInput = !eq(P.NumSrcArgs, 3)> { 219 220 let SchedRW = [Write32Bit] in { 221 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in { 222 def _e32 : VOP2_Pseudo <opName, P>, 223 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>; 224 225 foreach _ = BoolToList<P.HasExtSDWA>.ret in 226 def _sdwa : VOP2_SDWA_Pseudo <opName, P> { 227 let AsmMatchConverter = "cvtSdwaVOP2e"; 228 } 229 230 foreach _ = BoolToList<P.HasExtDPP>.ret in 231 def _dpp : VOP2_DPP_Pseudo <opName, P>; 232 } 233 234 def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>, 235 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; 236 } 237} 238 239class VOP2eInstAlias <VOP2_Pseudo ps, Instruction inst, string opnd> : 240 InstAlias <ps.OpName#" "#ps.Pfl.Asm32#", "#opnd, 241 (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0, 242 ps.Pfl.Src1RC32:$src1)>, 243 PredicateControl { 244} 245 246multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> { 247 let WaveSizePredicate = isWave32 in { 248 def : VOP2eInstAlias<ps, inst, "vcc_lo">; 249 } 250 let WaveSizePredicate = isWave64 in { 251 def : VOP2eInstAlias<ps, inst, "vcc">; 252 } 253} 254 255class VOP_MADAK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> { 256 field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm); 257 field dag Ins32 = !if(!eq(vt.Size, 32), 258 (ins VCSrc_f32:$src0, VGPR_32:$src1, ImmOpType:$imm), 259 (ins VCSrc_f16:$src0, VGPR_32:$src1, ImmOpType:$imm)); 260 field bit HasExt = 0; 261 262 // Hack to stop printing _e64 263 let DstRC = RegisterOperand<VGPR_32>; 264 field string Asm32 = " $vdst, $src0, $src1, $imm"; 265} 266 267def VOP_MADAK_F16 : VOP_MADAK <f16>; 268def VOP_MADAK_F32 : VOP_MADAK <f32>; 269 270class VOP_MADMK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> { 271 field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm); 272 field dag Ins32 = (ins VCSrc_f32:$src0, ImmOpType:$imm, VGPR_32:$src1); 273 field bit HasExt = 0; 274 275 // Hack to stop printing _e64 276 let DstRC = RegisterOperand<VGPR_32>; 277 field string Asm32 = " $vdst, $src0, $imm, $src1"; 278} 279 280def VOP_MADMK_F16 : VOP_MADMK <f16>; 281def VOP_MADMK_F32 : VOP_MADMK <f32>; 282 283// FIXME: Remove src2_modifiers. It isn't used, so is wasting memory 284// and processing time but it makes it easier to convert to mad. 285class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, vt0]> { 286 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2); 287 let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3, 288 0, HasModifiers, HasModifiers, HasOMod, 289 Src0Mod, Src1Mod, Src2Mod>.ret; 290 let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 291 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 292 VGPR_32:$src2, // stub argument 293 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 294 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 295 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 296 297 let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 298 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 299 VGPR_32:$src2, // stub argument 300 dpp8:$dpp8, FI:$fi); 301 302 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 303 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, 304 VGPR_32:$src2, // stub argument 305 clampmod:$clamp, omod:$omod, 306 dst_sel:$dst_sel, dst_unused:$dst_unused, 307 src0_sel:$src0_sel, src1_sel:$src1_sel); 308 let Asm32 = getAsm32<1, 2, vt0>.ret; 309 let Asm64 = getAsm64<1, 2, 0, HasModifiers, HasOMod, vt0>.ret; 310 let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt0>.ret; 311 let AsmDPP16 = getAsmDPP16<1, 2, HasModifiers, vt0>.ret; 312 let AsmDPP8 = getAsmDPP8<1, 2, 0, vt0>.ret; 313 let AsmSDWA = getAsmSDWA<1, 2, vt0>.ret; 314 let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt0>.ret; 315 let HasSrc2 = 0; 316 let HasSrc2Mods = 0; 317 318 let HasExt = 1; 319 let HasExtDPP = 1; 320 let HasExtSDWA = 1; 321 let HasExtSDWA9 = 0; 322 let TieRegDPP = "$src2"; 323} 324 325def VOP_MAC_F16 : VOP_MAC <f16>; 326def VOP_MAC_F32 : VOP_MAC <f32>; 327 328class VOP_DOT_ACC<ValueType vt0, ValueType vt1> : VOP_MAC<vt0, vt1> { 329 let HasClamp = 0; 330 let HasExtSDWA = 0; 331 let HasModifiers = 1; 332 let HasOpSel = 0; 333 let IsPacked = 0; 334} 335 336def VOP_DOT_ACC_F32_V2F16 : VOP_DOT_ACC<f32, v2f16> { 337 let Src0ModDPP = FPVRegInputMods; 338 let Src1ModDPP = FPVRegInputMods; 339} 340def VOP_DOT_ACC_I32_I32 : VOP_DOT_ACC<i32, i32>; 341 342// Write out to vcc or arbitrary SGPR. 343def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], 0, /*EnableClamp=*/1> { 344 let Asm32 = "$vdst, vcc, $src0, $src1"; 345 let Asm64 = "$vdst, $sdst, $src0, $src1$clamp"; 346 let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 347 let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 348 let AsmDPP = "$vdst, vcc, $src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 349 let AsmDPP8 = "$vdst, vcc, $src0, $src1 $dpp8$fi"; 350 let AsmDPP16 = AsmDPP#"$fi"; 351 let Outs32 = (outs DstRC:$vdst); 352 let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); 353} 354 355// Write out to vcc or arbitrary SGPR and read in from vcc or 356// arbitrary SGPR. 357def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], 0, /*EnableClamp=*/1> { 358 let Asm32 = "$vdst, vcc, $src0, $src1, vcc"; 359 let Asm64 = "$vdst, $sdst, $src0, $src1, $src2$clamp"; 360 let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 361 let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 362 let AsmDPP = "$vdst, vcc, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 363 let AsmDPP8 = "$vdst, vcc, $src0, $src1, vcc $dpp8$fi"; 364 let AsmDPP16 = AsmDPP#"$fi"; 365 let Outs32 = (outs DstRC:$vdst); 366 let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); 367 368 // Suppress src2 implied by type since the 32-bit encoding uses an 369 // implicit VCC use. 370 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); 371 372 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 373 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, 374 clampmod:$clamp, 375 dst_sel:$dst_sel, dst_unused:$dst_unused, 376 src0_sel:$src0_sel, src1_sel:$src1_sel); 377 378 let InsDPP = (ins DstRCDPP:$old, 379 Src0DPP:$src0, 380 Src1DPP:$src1, 381 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 382 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 383 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 384 385 let HasExt = 1; 386 let HasExtDPP = 1; 387 let HasExtSDWA = 1; 388 let HasExtSDWA9 = 1; 389} 390 391// Read in from vcc or arbitrary SGPR. 392def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableF32SrcMods=*/1> { 393 let Asm32 = "$vdst, $src0, $src1"; 394 let Asm64 = "$vdst, $src0_modifiers, $src1_modifiers, $src2"; 395 let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 396 let AsmSDWA9 = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 397 let AsmDPP = "$vdst, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 398 let AsmDPP8 = "$vdst, $src0, $src1, vcc $dpp8$fi"; 399 let AsmDPP16 = AsmDPP#"$fi"; 400 401 let Outs32 = (outs DstRC:$vdst); 402 let Outs64 = (outs DstRC:$vdst); 403 404 // Suppress src2 implied by type since the 32-bit encoding uses an 405 // implicit VCC use. 406 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); 407 408 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 409 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, 410 clampmod:$clamp, 411 dst_sel:$dst_sel, dst_unused:$dst_unused, 412 src0_sel:$src0_sel, src1_sel:$src1_sel); 413 414 let InsDPP = (ins DstRCDPP:$old, 415 Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 416 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 417 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 418 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 419 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 420 421 let HasExt = 1; 422 let HasExtDPP = 1; 423 let HasExtSDWA = 1; 424 let HasExtSDWA9 = 1; 425} 426 427def VOP_READLANE : VOPProfile<[i32, i32, i32]> { 428 let Outs32 = (outs SReg_32:$vdst); 429 let Outs64 = Outs32; 430 let Ins32 = (ins VRegOrLds_32:$src0, SCSrc_b32:$src1); 431 let Ins64 = Ins32; 432 let Asm32 = " $vdst, $src0, $src1"; 433 let Asm64 = Asm32; 434 435 let HasExt = 0; 436 let HasExtDPP = 0; 437 let HasExtSDWA = 0; 438 let HasExtSDWA9 = 0; 439} 440 441def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> { 442 let Outs32 = (outs VGPR_32:$vdst); 443 let Outs64 = Outs32; 444 let Ins32 = (ins SCSrc_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in); 445 let Ins64 = Ins32; 446 let Asm32 = " $vdst, $src0, $src1"; 447 let Asm64 = Asm32; 448 let HasSrc2 = 0; 449 let HasSrc2Mods = 0; 450 451 let HasExt = 0; 452 let HasExtDPP = 0; 453 let HasExtSDWA = 0; 454 let HasExtSDWA9 = 0; 455} 456 457//===----------------------------------------------------------------------===// 458// VOP2 Instructions 459//===----------------------------------------------------------------------===// 460 461defm V_CNDMASK_B32 : VOP2eInst <"v_cndmask_b32", VOP2e_I32_I32_I32_I1>; 462def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>; 463 464let isCommutable = 1 in { 465defm V_ADD_F32 : VOP2Inst <"v_add_f32", VOP_F32_F32_F32, fadd>; 466defm V_SUB_F32 : VOP2Inst <"v_sub_f32", VOP_F32_F32_F32, fsub>; 467defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub_f32">; 468defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>; 469defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, fmul>; 470defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32, AMDGPUmul_i24>; 471defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_i24>; 472defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32, AMDGPUmul_u24>; 473defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_u24>; 474defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum_like>; 475defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum_like>; 476defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smin>; 477defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smax>; 478defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>; 479defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umax>; 480defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, lshr_rev, "v_lshr_b32">; 481defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, ashr_rev, "v_ashr_i32">; 482defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, lshl_rev, "v_lshl_b32">; 483defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, and>; 484defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>; 485defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, xor>; 486 487let Constraints = "$vdst = $src2", DisableEncoding="$src2", 488 isConvertibleToThreeAddress = 1 in { 489defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>; 490} 491 492def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, []>; 493 494// No patterns so that the scalar instructions are always selected. 495// The scalar versions will be replaced with vector when needed later. 496 497// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI, 498// but the VI instructions behave the same as the SI versions. 499defm V_ADD_I32 : VOP2bInst <"v_add_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_add_i32", 1>; 500defm V_SUB_I32 : VOP2bInst <"v_sub_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32", 1>; 501defm V_SUBREV_I32 : VOP2bInst <"v_subrev_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32", 1>; 502defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_addc_u32", 1>; 503defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>; 504defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>; 505 506 507let SubtargetPredicate = HasAddNoCarryInsts in { 508defm V_ADD_U32 : VOP2Inst <"v_add_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_add_u32", 1>; 509defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>; 510defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>; 511} 512 513} // End isCommutable = 1 514 515// These are special and do not read the exec mask. 516let isConvergent = 1, Uses = []<Register> in { 517def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE, 518 [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))]>; 519 520let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in { 521def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, 522 [(set i32:$vdst, (int_amdgcn_writelane i32:$src0, i32:$src1, i32:$vdst_in))]>; 523} // End $vdst = $vdst_in, DisableEncoding $vdst_in 524} // End isConvergent = 1 525 526defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_NO_EXT<VOP_I32_I32_I32>>; 527defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, add_ctpop>; 528defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, int_amdgcn_mbcnt_lo>; 529defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, int_amdgcn_mbcnt_hi>; 530defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_NO_EXT<VOP_F32_F32_I32>, AMDGPUldexp>; 531defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_NO_EXT<VOP_I32_F32_I32>>; // TODO: set "Uses = dst" 532defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_NO_EXT<VOP_V2I16_F32_F32>, AMDGPUpknorm_i16_f32>; 533defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_NO_EXT<VOP_V2I16_F32_F32>, AMDGPUpknorm_u16_f32>; 534defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_NO_EXT<VOP_V2F16_F32_F32>, AMDGPUpkrtz_f16_f32>; 535defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_NO_EXT<VOP_V2I16_I32_I32>, AMDGPUpk_u16_u32>; 536defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT<VOP_V2I16_I32_I32>, AMDGPUpk_i16_i32>; 537 538 539let SubtargetPredicate = isGFX6GFX7 in { 540defm V_MIN_LEGACY_F32 : VOP2Inst <"v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy>; 541defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>; 542} // End SubtargetPredicate = isGFX6GFX7 543 544let SubtargetPredicate = isGFX6GFX7GFX10 in { 545let isCommutable = 1 in { 546defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_F32_F32_F32>; 547defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_I32_I32_I32, srl>; 548defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_I32_I32_I32, sra>; 549defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_I32_I32_I32, shl>; 550} // End isCommutable = 1 551} // End SubtargetPredicate = isGFX6GFX7GFX10 552 553class DivergentBinOp<SDPatternOperator Op, VOP_Pseudo Inst> : 554 GCNPat< 555 (getDivergentFrag<Op>.ret Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1), 556 !if(!cast<Commutable_REV>(Inst).IsOrig, 557 (Inst $src0, $src1), 558 (Inst $src1, $src0) 559 ) 560 >; 561 562class DivergentClampingBinOp<SDPatternOperator Op, VOP_Pseudo Inst> : 563 GCNPat< 564 (getDivergentFrag<Op>.ret Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1), 565 !if(!cast<Commutable_REV>(Inst).IsOrig, 566 (Inst $src0, $src1, 0), 567 (Inst $src1, $src0, 0) 568 ) 569 >; 570 571def : DivergentBinOp<srl, V_LSHRREV_B32_e64>; 572def : DivergentBinOp<sra, V_ASHRREV_I32_e64>; 573def : DivergentBinOp<shl, V_LSHLREV_B32_e64>; 574 575let SubtargetPredicate = HasAddNoCarryInsts in { 576 def : DivergentClampingBinOp<add, V_ADD_U32_e64>; 577 def : DivergentClampingBinOp<sub, V_SUB_U32_e64>; 578} 579 580let SubtargetPredicate = isGFX6GFX7GFX8GFX9, Predicates = [isGFX6GFX7GFX8GFX9] in { 581def : DivergentClampingBinOp<add, V_ADD_I32_e64>; 582def : DivergentClampingBinOp<sub, V_SUB_I32_e64>; 583} 584 585def : DivergentBinOp<adde, V_ADDC_U32_e32>; 586def : DivergentBinOp<sube, V_SUBB_U32_e32>; 587 588class divergent_i64_BinOp <SDPatternOperator Op, Instruction Inst> : 589 GCNPat< 590 (getDivergentFrag<Op>.ret i64:$src0, i64:$src1), 591 (REG_SEQUENCE VReg_64, 592 (Inst 593 (i32 (EXTRACT_SUBREG $src0, sub0)), 594 (i32 (EXTRACT_SUBREG $src1, sub0)) 595 ), sub0, 596 (Inst 597 (i32 (EXTRACT_SUBREG $src0, sub1)), 598 (i32 (EXTRACT_SUBREG $src1, sub1)) 599 ), sub1 600 ) 601 >; 602 603def : divergent_i64_BinOp <and, V_AND_B32_e32>; 604def : divergent_i64_BinOp <or, V_OR_B32_e32>; 605def : divergent_i64_BinOp <xor, V_XOR_B32_e32>; 606 607let SubtargetPredicate = Has16BitInsts in { 608 609let FPDPRounding = 1 in { 610def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">; 611defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I32, AMDGPUldexp>; 612} // End FPDPRounding = 1 613 614defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16, lshl_rev>; 615defm V_LSHRREV_B16 : VOP2Inst <"v_lshrrev_b16", VOP_I16_I16_I16, lshr_rev>; 616defm V_ASHRREV_I16 : VOP2Inst <"v_ashrrev_i16", VOP_I16_I16_I16, ashr_rev>; 617 618let isCommutable = 1 in { 619let FPDPRounding = 1 in { 620defm V_ADD_F16 : VOP2Inst <"v_add_f16", VOP_F16_F16_F16, fadd>; 621defm V_SUB_F16 : VOP2Inst <"v_sub_f16", VOP_F16_F16_F16, fsub>; 622defm V_SUBREV_F16 : VOP2Inst <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">; 623defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16, fmul>; 624def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">; 625} // End FPDPRounding = 1 626defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16, add>; 627defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16, sub>; 628defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16, null_frag, "v_sub_u16">; 629defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16, mul>; 630defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>; 631defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16, fminnum_like>; 632defm V_MAX_U16 : VOP2Inst <"v_max_u16", VOP_I16_I16_I16, umax>; 633defm V_MAX_I16 : VOP2Inst <"v_max_i16", VOP_I16_I16_I16, smax>; 634defm V_MIN_U16 : VOP2Inst <"v_min_u16", VOP_I16_I16_I16, umin>; 635defm V_MIN_I16 : VOP2Inst <"v_min_i16", VOP_I16_I16_I16, smin>; 636 637let Constraints = "$vdst = $src2", DisableEncoding="$src2", 638 isConvertibleToThreeAddress = 1 in { 639defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>; 640} 641} // End isCommutable = 1 642 643} // End SubtargetPredicate = Has16BitInsts 644 645let SubtargetPredicate = HasDLInsts in { 646 647defm V_XNOR_B32 : VOP2Inst <"v_xnor_b32", VOP_I32_I32_I32>; 648 649let Constraints = "$vdst = $src2", 650 DisableEncoding="$src2", 651 isConvertibleToThreeAddress = 1, 652 isCommutable = 1 in { 653defm V_FMAC_F32 : VOP2Inst <"v_fmac_f32", VOP_MAC_F32>; 654} 655 656} // End SubtargetPredicate = HasDLInsts 657 658let Constraints = "$vdst = $src2", 659 DisableEncoding="$src2", 660 isConvertibleToThreeAddress = 1, 661 isCommutable = 1, 662 IsDOT = 1 in { 663 let SubtargetPredicate = HasDot5Insts in 664 defm V_DOT2C_F32_F16 : VOP2Inst<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16>; 665 let SubtargetPredicate = HasDot6Insts in 666 defm V_DOT4C_I32_I8 : VOP2Inst<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>; 667 668 let SubtargetPredicate = HasDot4Insts in 669 defm V_DOT2C_I32_I16 : VOP2Inst<"v_dot2c_i32_i16", VOP_DOT_ACC_I32_I32>; 670 let SubtargetPredicate = HasDot3Insts in 671 defm V_DOT8C_I32_I4 : VOP2Inst<"v_dot8c_i32_i4", VOP_DOT_ACC_I32_I32>; 672} 673 674let AddedComplexity = 30 in { 675 def : GCNPat< 676 (f32 (AMDGPUfdot2 v2f16:$src0, v2f16:$src1, f32:$src2, (i1 DSTCLAMP.NONE))), 677 (f32 (V_DOT2C_F32_F16_e32 $src0, $src1, $src2)) 678 > { 679 let SubtargetPredicate = HasDot5Insts; 680 } 681 def : GCNPat< 682 (i32 (int_amdgcn_sdot4 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 683 (i32 (V_DOT4C_I32_I8_e32 $src0, $src1, $src2)) 684 > { 685 let SubtargetPredicate = HasDot6Insts; 686 } 687 def : GCNPat< 688 (i32 (int_amdgcn_sdot2 v2i16:$src0, v2i16:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 689 (i32 (V_DOT2C_I32_I16_e32 $src0, $src1, $src2)) 690 > { 691 let SubtargetPredicate = HasDot4Insts; 692 } 693 def : GCNPat< 694 (i32 (int_amdgcn_sdot8 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 695 (i32 (V_DOT8C_I32_I4_e32 $src0, $src1, $src2)) 696 > { 697 let SubtargetPredicate = HasDot3Insts; 698 } 699} // End AddedComplexity = 30 700 701let SubtargetPredicate = isGFX10Plus in { 702 703def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">; 704let FPDPRounding = 1 in 705def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">; 706 707let isCommutable = 1 in { 708def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">; 709let FPDPRounding = 1 in 710def V_FMAAK_F16 : VOP2_Pseudo <"v_fmaak_f16", VOP_MADAK_F16, [], "">; 711} // End isCommutable = 1 712 713let Constraints = "$vdst = $src2", 714 DisableEncoding="$src2", 715 isConvertibleToThreeAddress = 1, 716 isCommutable = 1 in { 717defm V_FMAC_F16 : VOP2Inst <"v_fmac_f16", VOP_MAC_F16>; 718} 719 720} // End SubtargetPredicate = isGFX10Plus 721 722let SubtargetPredicate = HasPkFmacF16Inst in { 723defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>; 724} // End SubtargetPredicate = HasPkFmacF16Inst 725 726// Note: 16-bit instructions produce a 0 result in the high 16-bits 727// on GFX8 and GFX9 and preserve high 16 bits on GFX10+ 728multiclass Arithmetic_i16_0Hi_Pats <SDPatternOperator op, Instruction inst> { 729 730def : GCNPat< 731 (i32 (zext (op i16:$src0, i16:$src1))), 732 (inst VSrc_b16:$src0, VSrc_b16:$src1) 733>; 734 735def : GCNPat< 736 (i64 (zext (op i16:$src0, i16:$src1))), 737 (REG_SEQUENCE VReg_64, 738 (inst $src0, $src1), sub0, 739 (V_MOV_B32_e32 (i32 0)), sub1) 740>; 741} 742 743class ZExt_i16_i1_Pat <SDNode ext> : GCNPat < 744 (i16 (ext i1:$src)), 745 (V_CNDMASK_B32_e64 (i32 0/*src0mod*/), (i32 0/*src0*/), 746 (i32 0/*src1mod*/), (i32 1/*src1*/), 747 $src) 748>; 749 750foreach vt = [i16, v2i16] in { 751def : GCNPat < 752 (and vt:$src0, vt:$src1), 753 (V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 754>; 755 756def : GCNPat < 757 (or vt:$src0, vt:$src1), 758 (V_OR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 759>; 760 761def : GCNPat < 762 (xor vt:$src0, vt:$src1), 763 (V_XOR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 764>; 765} 766 767let Predicates = [Has16BitInsts] in { 768 769// Undo sub x, c -> add x, -c canonicalization since c is more likely 770// an inline immediate than -c. 771// TODO: Also do for 64-bit. 772def : GCNPat< 773 (add i16:$src0, (i16 NegSubInlineConst16:$src1)), 774 (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineConst16:$src1) 775>; 776 777 778let Predicates = [Has16BitInsts, isGFX7GFX8GFX9] in { 779 780def : GCNPat< 781 (i32 (zext (add i16:$src0, (i16 NegSubInlineConst16:$src1)))), 782 (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineConst16:$src1) 783>; 784 785defm : Arithmetic_i16_0Hi_Pats<add, V_ADD_U16_e64>; 786defm : Arithmetic_i16_0Hi_Pats<mul, V_MUL_LO_U16_e64>; 787defm : Arithmetic_i16_0Hi_Pats<sub, V_SUB_U16_e64>; 788defm : Arithmetic_i16_0Hi_Pats<smin, V_MIN_I16_e64>; 789defm : Arithmetic_i16_0Hi_Pats<smax, V_MAX_I16_e64>; 790defm : Arithmetic_i16_0Hi_Pats<umin, V_MIN_U16_e64>; 791defm : Arithmetic_i16_0Hi_Pats<umax, V_MAX_U16_e64>; 792defm : Arithmetic_i16_0Hi_Pats<lshl_rev, V_LSHLREV_B16_e64>; 793defm : Arithmetic_i16_0Hi_Pats<lshr_rev, V_LSHRREV_B16_e64>; 794defm : Arithmetic_i16_0Hi_Pats<ashr_rev, V_ASHRREV_I16_e64>; 795} // End Predicates = [Has16BitInsts, isGFX7GFX8GFX9] 796 797def : ZExt_i16_i1_Pat<zext>; 798def : ZExt_i16_i1_Pat<anyext>; 799 800def : GCNPat < 801 (i16 (sext i1:$src)), 802 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), 803 /*src1mod*/(i32 0), /*src1*/(i32 -1), $src) 804>; 805 806} // End Predicates = [Has16BitInsts] 807 808 809//===----------------------------------------------------------------------===// 810// Target-specific instruction encodings. 811//===----------------------------------------------------------------------===// 812 813class VOP2_DPP<bits<6> op, VOP2_DPP_Pseudo ps, 814 string opName = ps.OpName, VOPProfile p = ps.Pfl, 815 bit IsDPP16 = 0> : 816 VOP_DPP<opName, p, IsDPP16> { 817 let hasSideEffects = ps.hasSideEffects; 818 let Defs = ps.Defs; 819 let SchedRW = ps.SchedRW; 820 let Uses = ps.Uses; 821 822 bits<8> vdst; 823 bits<8> src1; 824 let Inst{8-0} = 0xfa; 825 let Inst{16-9} = !if(p.HasSrc1, src1{7-0}, 0); 826 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 827 let Inst{30-25} = op; 828 let Inst{31} = 0x0; 829} 830 831class Base_VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, 832 string opName = ps.OpName, VOPProfile p = ps.Pfl> : 833 VOP2_DPP<op, ps, opName, p, 1> { 834 let AssemblerPredicate = !if(p.HasExt, HasDPP16, DisableInst); 835 let SubtargetPredicate = HasDPP16; 836} 837 838class VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, 839 string opName = ps.OpName, VOPProfile p = ps.Pfl> : 840 Base_VOP2_DPP16<op, ps, opName, p>, 841 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX10>; 842 843class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps, 844 string opName = ps.OpName, VOPProfile p = ps.Pfl> : 845 VOP_DPP8<ps.OpName, p> { 846 let hasSideEffects = ps.hasSideEffects; 847 let Defs = ps.Defs; 848 let SchedRW = ps.SchedRW; 849 let Uses = ps.Uses; 850 851 bits<8> vdst; 852 bits<8> src1; 853 854 let Inst{8-0} = fi; 855 let Inst{16-9} = !if(p.HasSrc1, src1{7-0}, 0); 856 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 857 let Inst{30-25} = op; 858 let Inst{31} = 0x0; 859 860 let AssemblerPredicate = !if(p.HasExt, HasDPP8, DisableInst); 861 let SubtargetPredicate = HasDPP8; 862} 863 864//===----------------------------------------------------------------------===// 865// GFX10. 866//===----------------------------------------------------------------------===// 867 868let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { 869 //===------------------------------- VOP2 -------------------------------===// 870 multiclass VOP2Only_Real_MADK_gfx10<bits<6> op> { 871 def _gfx10 : 872 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX10>, 873 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 874 } 875 multiclass VOP2Only_Real_MADK_gfx10_with_name<bits<6> op, string opName, 876 string asmName> { 877 def _gfx10 : 878 VOP2_Real<!cast<VOP2_Pseudo>(opName), SIEncodingFamily.GFX10>, 879 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(opName).Pfl> { 880 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName); 881 let AsmString = asmName # ps.AsmOperands; 882 } 883 } 884 multiclass VOP2_Real_e32_gfx10<bits<6> op> { 885 def _e32_gfx10 : 886 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>, 887 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 888 } 889 multiclass VOP2_Real_e64_gfx10<bits<6> op> { 890 def _e64_gfx10 : 891 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 892 VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 893 } 894 multiclass VOP2_Real_sdwa_gfx10<bits<6> op> { 895 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 896 def _sdwa_gfx10 : 897 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 898 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 899 let DecoderNamespace = "SDWA10"; 900 } 901 } 902 multiclass VOP2_Real_dpp_gfx10<bits<6> op> { 903 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 904 def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> { 905 let DecoderNamespace = "SDWA10"; 906 } 907 } 908 multiclass VOP2_Real_dpp8_gfx10<bits<6> op> { 909 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 910 def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(NAME#"_e32")> { 911 let DecoderNamespace = "DPP8"; 912 } 913 } 914 915 //===------------------------- VOP2 (with name) -------------------------===// 916 multiclass VOP2_Real_e32_gfx10_with_name<bits<6> op, string opName, 917 string asmName> { 918 def _e32_gfx10 : 919 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>, 920 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> { 921 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 922 let AsmString = asmName # ps.AsmOperands; 923 } 924 } 925 multiclass VOP2_Real_e64_gfx10_with_name<bits<6> op, string opName, 926 string asmName> { 927 def _e64_gfx10 : 928 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>, 929 VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, 930 !cast<VOP3_Pseudo>(opName#"_e64").Pfl> { 931 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName#"_e64"); 932 let AsmString = asmName # ps.AsmOperands; 933 } 934 } 935 let DecoderNamespace = "SDWA10" in { 936 multiclass VOP2_Real_sdwa_gfx10_with_name<bits<6> op, string opName, 937 string asmName> { 938 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in 939 def _sdwa_gfx10 : 940 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 941 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 942 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 943 let AsmString = asmName # ps.AsmOperands; 944 } 945 } 946 multiclass VOP2_Real_dpp_gfx10_with_name<bits<6> op, string opName, 947 string asmName> { 948 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 949 def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp")> { 950 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 951 let AsmString = asmName # ps.Pfl.AsmDPP16; 952 } 953 } 954 multiclass VOP2_Real_dpp8_gfx10_with_name<bits<6> op, string opName, 955 string asmName> { 956 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 957 def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 958 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 959 let AsmString = asmName # ps.Pfl.AsmDPP8; 960 let DecoderNamespace = "DPP8"; 961 } 962 } 963 } // End DecoderNamespace = "SDWA10" 964 965 //===------------------------------ VOP2be ------------------------------===// 966 multiclass VOP2be_Real_e32_gfx10<bits<6> op, string opName, string asmName> { 967 def _e32_gfx10 : 968 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>, 969 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> { 970 VOP2_Pseudo Ps = !cast<VOP2_Pseudo>(opName#"_e32"); 971 let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); 972 } 973 } 974 multiclass VOP2be_Real_e64_gfx10<bits<6> op, string opName, string asmName> { 975 def _e64_gfx10 : 976 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>, 977 VOP3be_gfx10<{0, 1, 0, 0, op{5-0}}, 978 !cast<VOP3_Pseudo>(opName#"_e64").Pfl> { 979 VOP3_Pseudo Ps = !cast<VOP3_Pseudo>(opName#"_e64"); 980 let AsmString = asmName # Ps.AsmOperands; 981 } 982 } 983 multiclass VOP2be_Real_sdwa_gfx10<bits<6> op, string opName, string asmName> { 984 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in 985 def _sdwa_gfx10 : 986 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 987 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 988 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 989 let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); 990 let DecoderNamespace = "SDWA10"; 991 } 992 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in 993 def _sdwa_w32_gfx10 : 994 Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 995 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 996 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 997 let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands); 998 let isAsmParserOnly = 1; 999 let DecoderNamespace = "SDWA10"; 1000 let WaveSizePredicate = isWave32; 1001 } 1002 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in 1003 def _sdwa_w64_gfx10 : 1004 Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1005 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1006 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1007 let AsmString = asmName # Ps.AsmOperands; 1008 let isAsmParserOnly = 1; 1009 let DecoderNamespace = "SDWA10"; 1010 let WaveSizePredicate = isWave64; 1011 } 1012 } 1013 multiclass VOP2be_Real_dpp_gfx10<bits<6> op, string opName, string asmName> { 1014 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1015 def _dpp_gfx10 : 1016 VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1017 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1018 let AsmString = asmName # !subst(", vcc", "", AsmDPP); 1019 let DecoderNamespace = "SDWA10"; 1020 } 1021 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1022 def _dpp_w32_gfx10 : 1023 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1024 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1025 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP); 1026 let isAsmParserOnly = 1; 1027 let WaveSizePredicate = isWave32; 1028 } 1029 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1030 def _dpp_w64_gfx10 : 1031 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1032 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1033 let AsmString = asmName # AsmDPP; 1034 let isAsmParserOnly = 1; 1035 let WaveSizePredicate = isWave64; 1036 } 1037 } 1038 multiclass VOP2be_Real_dpp8_gfx10<bits<6> op, string opName, string asmName> { 1039 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1040 def _dpp8_gfx10 : 1041 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> { 1042 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1043 let AsmString = asmName # !subst(", vcc", "", AsmDPP8); 1044 let DecoderNamespace = "DPP8"; 1045 } 1046 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1047 def _dpp8_w32_gfx10 : 1048 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> { 1049 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1050 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8); 1051 let isAsmParserOnly = 1; 1052 let WaveSizePredicate = isWave32; 1053 } 1054 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1055 def _dpp8_w64_gfx10 : 1056 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> { 1057 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1058 let AsmString = asmName # AsmDPP8; 1059 let isAsmParserOnly = 1; 1060 let WaveSizePredicate = isWave64; 1061 } 1062 } 1063 1064 //===----------------------------- VOP3Only -----------------------------===// 1065 multiclass VOP3Only_Real_gfx10<bits<10> op> { 1066 def _e64_gfx10 : 1067 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 1068 VOP3e_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1069 } 1070 1071 //===---------------------------- VOP3beOnly ----------------------------===// 1072 multiclass VOP3beOnly_Real_gfx10<bits<10> op, string opName, string asmName> { 1073 def _e64_gfx10 : 1074 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>, 1075 VOP3be_gfx10<op, !cast<VOP3_Pseudo>(opName#"_e64").Pfl> { 1076 VOP3_Pseudo Ps = !cast<VOP3_Pseudo>(opName#"_e64"); 1077 let AsmString = asmName # Ps.AsmOperands; 1078 } 1079 } 1080} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" 1081 1082multiclass VOP2be_Real_gfx10<bits<6> op, string opName, string asmName> : 1083 VOP2be_Real_e32_gfx10<op, opName, asmName>, 1084 VOP2be_Real_e64_gfx10<op, opName, asmName>, 1085 VOP2be_Real_sdwa_gfx10<op, opName, asmName>, 1086 VOP2be_Real_dpp_gfx10<op, opName, asmName>, 1087 VOP2be_Real_dpp8_gfx10<op, opName, asmName>; 1088 1089multiclass VOP2e_Real_gfx10<bits<6> op, string opName, string asmName> : 1090 VOP2_Real_e32_gfx10<op>, 1091 VOP2_Real_e64_gfx10<op>, 1092 VOP2be_Real_sdwa_gfx10<op, opName, asmName>, 1093 VOP2be_Real_dpp_gfx10<op, opName, asmName>, 1094 VOP2be_Real_dpp8_gfx10<op, opName, asmName>; 1095 1096multiclass VOP2_Real_gfx10<bits<6> op> : 1097 VOP2_Real_e32_gfx10<op>, VOP2_Real_e64_gfx10<op>, 1098 VOP2_Real_sdwa_gfx10<op>, VOP2_Real_dpp_gfx10<op>, VOP2_Real_dpp8_gfx10<op>; 1099 1100multiclass VOP2_Real_gfx10_with_name<bits<6> op, string opName, 1101 string asmName> : 1102 VOP2_Real_e32_gfx10_with_name<op, opName, asmName>, 1103 VOP2_Real_e64_gfx10_with_name<op, opName, asmName>, 1104 VOP2_Real_sdwa_gfx10_with_name<op, opName, asmName>, 1105 VOP2_Real_dpp_gfx10_with_name<op, opName, asmName>, 1106 VOP2_Real_dpp8_gfx10_with_name<op, opName, asmName>; 1107 1108defm V_XNOR_B32 : VOP2_Real_gfx10<0x01e>; 1109defm V_FMAC_F32 : VOP2_Real_gfx10<0x02b>; 1110defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10<0x02c>; 1111defm V_FMAAK_F32 : VOP2Only_Real_MADK_gfx10<0x02d>; 1112defm V_ADD_F16 : VOP2_Real_gfx10<0x032>; 1113defm V_SUB_F16 : VOP2_Real_gfx10<0x033>; 1114defm V_SUBREV_F16 : VOP2_Real_gfx10<0x034>; 1115defm V_MUL_F16 : VOP2_Real_gfx10<0x035>; 1116defm V_FMAC_F16 : VOP2_Real_gfx10<0x036>; 1117defm V_FMAMK_F16 : VOP2Only_Real_MADK_gfx10<0x037>; 1118defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10<0x038>; 1119defm V_MAX_F16 : VOP2_Real_gfx10<0x039>; 1120defm V_MIN_F16 : VOP2_Real_gfx10<0x03a>; 1121defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>; 1122defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>; 1123 1124// VOP2 no carry-in, carry-out. 1125defm V_ADD_NC_U32 : 1126 VOP2_Real_gfx10_with_name<0x025, "V_ADD_U32", "v_add_nc_u32">; 1127defm V_SUB_NC_U32 : 1128 VOP2_Real_gfx10_with_name<0x026, "V_SUB_U32", "v_sub_nc_u32">; 1129defm V_SUBREV_NC_U32 : 1130 VOP2_Real_gfx10_with_name<0x027, "V_SUBREV_U32", "v_subrev_nc_u32">; 1131 1132// VOP2 carry-in, carry-out. 1133defm V_ADD_CO_CI_U32 : 1134 VOP2be_Real_gfx10<0x028, "V_ADDC_U32", "v_add_co_ci_u32">; 1135defm V_SUB_CO_CI_U32 : 1136 VOP2be_Real_gfx10<0x029, "V_SUBB_U32", "v_sub_co_ci_u32">; 1137defm V_SUBREV_CO_CI_U32 : 1138 VOP2be_Real_gfx10<0x02a, "V_SUBBREV_U32", "v_subrev_co_ci_u32">; 1139 1140defm V_CNDMASK_B32 : 1141 VOP2e_Real_gfx10<0x001, "V_CNDMASK_B32", "v_cndmask_b32">; 1142 1143// VOP3 only. 1144defm V_BFM_B32 : VOP3Only_Real_gfx10<0x363>; 1145defm V_BCNT_U32_B32 : VOP3Only_Real_gfx10<0x364>; 1146defm V_MBCNT_LO_U32_B32 : VOP3Only_Real_gfx10<0x365>; 1147defm V_MBCNT_HI_U32_B32 : VOP3Only_Real_gfx10<0x366>; 1148defm V_LDEXP_F32 : VOP3Only_Real_gfx10<0x362>; 1149defm V_CVT_PKNORM_I16_F32 : VOP3Only_Real_gfx10<0x368>; 1150defm V_CVT_PKNORM_U16_F32 : VOP3Only_Real_gfx10<0x369>; 1151defm V_CVT_PK_U16_U32 : VOP3Only_Real_gfx10<0x36a>; 1152defm V_CVT_PK_I16_I32 : VOP3Only_Real_gfx10<0x36b>; 1153 1154// VOP3 carry-in, carry-out. 1155defm V_ADD_CO_U32 : 1156 VOP3beOnly_Real_gfx10<0x30f, "V_ADD_I32", "v_add_co_u32">; 1157defm V_SUB_CO_U32 : 1158 VOP3beOnly_Real_gfx10<0x310, "V_SUB_I32", "v_sub_co_u32">; 1159defm V_SUBREV_CO_U32 : 1160 VOP3beOnly_Real_gfx10<0x319, "V_SUBREV_I32", "v_subrev_co_u32">; 1161 1162let SubtargetPredicate = isGFX10Plus in { 1163 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx10>; 1164 1165 defm : VOP2bInstAliases< 1166 V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx10, "v_add_co_ci_u32">; 1167 defm : VOP2bInstAliases< 1168 V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx10, "v_sub_co_ci_u32">; 1169 defm : VOP2bInstAliases< 1170 V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx10, "v_subrev_co_ci_u32">; 1171} // End SubtargetPredicate = isGFX10Plus 1172 1173//===----------------------------------------------------------------------===// 1174// GFX6, GFX7, GFX10. 1175//===----------------------------------------------------------------------===// 1176 1177class VOP2_DPPe <bits<6> op, VOP2_DPP_Pseudo ps, VOPProfile P = ps.Pfl> : 1178 VOP_DPPe <P> { 1179 bits<8> vdst; 1180 bits<8> src1; 1181 let Inst{8-0} = 0xfa; //dpp 1182 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 1183 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 1184 let Inst{30-25} = op; 1185 let Inst{31} = 0x0; //encoding 1186} 1187 1188let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 1189 multiclass VOP2Only_Real_gfx6_gfx7<bits<6> op> { 1190 def _gfx6_gfx7 : 1191 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>, 1192 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1193 } 1194 multiclass VOP2Only_Real_MADK_gfx6_gfx7<bits<6> op> { 1195 def _gfx6_gfx7 : 1196 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>, 1197 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1198 } 1199 multiclass VOP2_Real_e32_gfx6_gfx7<bits<6> op> { 1200 def _e32_gfx6_gfx7 : 1201 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>, 1202 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 1203 } 1204 multiclass VOP2_Real_e64_gfx6_gfx7<bits<6> op> { 1205 def _e64_gfx6_gfx7 : 1206 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 1207 VOP3e_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1208 } 1209 multiclass VOP2be_Real_e64_gfx6_gfx7<bits<6> op> { 1210 def _e64_gfx6_gfx7 : 1211 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 1212 VOP3be_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1213 } 1214} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 1215 1216multiclass VOP2Only_Real_MADK_gfx6_gfx7_gfx10<bits<6> op> : 1217 VOP2Only_Real_MADK_gfx6_gfx7<op>, VOP2Only_Real_MADK_gfx10<op>; 1218 1219multiclass VOP2_Real_gfx6_gfx7<bits<6> op> : 1220 VOP2_Real_e32_gfx6_gfx7<op>, VOP2_Real_e64_gfx6_gfx7<op>; 1221 1222multiclass VOP2_Real_gfx6_gfx7_gfx10<bits<6> op> : 1223 VOP2_Real_gfx6_gfx7<op>, VOP2_Real_gfx10<op>; 1224 1225multiclass VOP2be_Real_gfx6_gfx7<bits<6> op> : 1226 VOP2_Real_e32_gfx6_gfx7<op>, VOP2be_Real_e64_gfx6_gfx7<op>; 1227 1228defm V_CNDMASK_B32 : VOP2_Real_gfx6_gfx7<0x000>; 1229defm V_MIN_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00d>; 1230defm V_MAX_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00e>; 1231defm V_LSHR_B32 : VOP2_Real_gfx6_gfx7<0x015>; 1232defm V_ASHR_I32 : VOP2_Real_gfx6_gfx7<0x017>; 1233defm V_LSHL_B32 : VOP2_Real_gfx6_gfx7<0x019>; 1234defm V_BFM_B32 : VOP2_Real_gfx6_gfx7<0x01e>; 1235defm V_BCNT_U32_B32 : VOP2_Real_gfx6_gfx7<0x022>; 1236defm V_MBCNT_LO_U32_B32 : VOP2_Real_gfx6_gfx7<0x023>; 1237defm V_MBCNT_HI_U32_B32 : VOP2_Real_gfx6_gfx7<0x024>; 1238defm V_LDEXP_F32 : VOP2_Real_gfx6_gfx7<0x02b>; 1239defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_gfx6_gfx7<0x02c>; 1240defm V_CVT_PKNORM_I16_F32 : VOP2_Real_gfx6_gfx7<0x02d>; 1241defm V_CVT_PKNORM_U16_F32 : VOP2_Real_gfx6_gfx7<0x02e>; 1242defm V_CVT_PK_U16_U32 : VOP2_Real_gfx6_gfx7<0x030>; 1243defm V_CVT_PK_I16_I32 : VOP2_Real_gfx6_gfx7<0x031>; 1244defm V_ADD_I32 : VOP2be_Real_gfx6_gfx7<0x025>; 1245defm V_SUB_I32 : VOP2be_Real_gfx6_gfx7<0x026>; 1246defm V_SUBREV_I32 : VOP2be_Real_gfx6_gfx7<0x027>; 1247defm V_ADDC_U32 : VOP2be_Real_gfx6_gfx7<0x028>; 1248defm V_SUBB_U32 : VOP2be_Real_gfx6_gfx7<0x029>; 1249defm V_SUBBREV_U32 : VOP2be_Real_gfx6_gfx7<0x02a>; 1250 1251defm V_READLANE_B32 : VOP2Only_Real_gfx6_gfx7<0x001>; 1252 1253let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in) in { 1254 defm V_WRITELANE_B32 : VOP2Only_Real_gfx6_gfx7<0x002>; 1255} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in) 1256 1257let SubtargetPredicate = isGFX6GFX7 in { 1258 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx6_gfx7>; 1259} // End SubtargetPredicate = isGFX6GFX7 1260 1261defm V_ADD_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x003>; 1262defm V_SUB_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x004>; 1263defm V_SUBREV_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x005>; 1264defm V_MAC_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x006>; 1265defm V_MUL_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x007>; 1266defm V_MUL_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x008>; 1267defm V_MUL_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10<0x009>; 1268defm V_MUL_HI_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10<0x00a>; 1269defm V_MUL_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10<0x00b>; 1270defm V_MUL_HI_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10<0x00c>; 1271defm V_MIN_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x00f>; 1272defm V_MAX_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x010>; 1273defm V_MIN_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x011>; 1274defm V_MAX_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x012>; 1275defm V_MIN_U32 : VOP2_Real_gfx6_gfx7_gfx10<0x013>; 1276defm V_MAX_U32 : VOP2_Real_gfx6_gfx7_gfx10<0x014>; 1277defm V_LSHRREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x016>; 1278defm V_ASHRREV_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x018>; 1279defm V_LSHLREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01a>; 1280defm V_AND_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01b>; 1281defm V_OR_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01c>; 1282defm V_XOR_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01d>; 1283defm V_MAC_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x01f>; 1284defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x02f>; 1285defm V_MADMK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x020>; 1286defm V_MADAK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x021>; 1287 1288//===----------------------------------------------------------------------===// 1289// GFX8, GFX9 (VI). 1290//===----------------------------------------------------------------------===// 1291 1292let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 1293 1294multiclass VOP2_Real_MADK_vi <bits<6> op> { 1295 def _vi : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>, 1296 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1297} 1298 1299multiclass VOP2_Real_e32_vi <bits<6> op> { 1300 def _e32_vi : 1301 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>, 1302 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 1303} 1304 1305multiclass VOP2_Real_e64_vi <bits<10> op> { 1306 def _e64_vi : 1307 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 1308 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1309} 1310 1311multiclass VOP2_Real_e64only_vi <bits<10> op> { 1312 def _e64_vi : 1313 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 1314 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 1315 // Hack to stop printing _e64 1316 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(NAME#"_e64"); 1317 let OutOperandList = (outs VGPR_32:$vdst); 1318 let AsmString = ps.Mnemonic # " " # ps.AsmOperands; 1319 } 1320} 1321 1322multiclass Base_VOP2_Real_e32e64_vi <bits<6> op> : 1323 VOP2_Real_e32_vi<op>, 1324 VOP2_Real_e64_vi<{0, 1, 0, 0, op{5-0}}>; 1325 1326} // End AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" 1327 1328multiclass VOP2_SDWA_Real <bits<6> op> { 1329 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA>.ret in 1330 def _sdwa_vi : 1331 VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 1332 VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1333} 1334 1335multiclass VOP2_SDWA9_Real <bits<6> op> { 1336 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 1337 def _sdwa_gfx9 : 1338 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 1339 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1340} 1341 1342let AssemblerPredicate = isGFX8Only in { 1343 1344multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName> { 1345 def _e32_vi : 1346 VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.VI>, 1347 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> { 1348 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32"); 1349 let AsmString = AsmName # ps.AsmOperands; 1350 let DecoderNamespace = "GFX8"; 1351 } 1352 def _e64_vi : 1353 VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.VI>, 1354 VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> { 1355 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64"); 1356 let AsmString = AsmName # ps.AsmOperands; 1357 let DecoderNamespace = "GFX8"; 1358 } 1359 foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA>.ret in 1360 def _sdwa_vi : 1361 VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>, 1362 VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> { 1363 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa"); 1364 let AsmString = AsmName # ps.AsmOperands; 1365 } 1366 foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP>.ret in 1367 def _dpp_vi : 1368 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.VI>, 1369 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> { 1370 VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp"); 1371 let AsmString = AsmName # ps.AsmOperands; 1372 } 1373} 1374} 1375 1376let AssemblerPredicate = isGFX9Only in { 1377 1378multiclass VOP2be_Real_e32e64_gfx9 <bits<6> op, string OpName, string AsmName> { 1379 def _e32_gfx9 : 1380 VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.GFX9>, 1381 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> { 1382 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32"); 1383 let AsmString = AsmName # ps.AsmOperands; 1384 let DecoderNamespace = "GFX9"; 1385 } 1386 def _e64_gfx9 : 1387 VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.GFX9>, 1388 VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> { 1389 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64"); 1390 let AsmString = AsmName # ps.AsmOperands; 1391 let DecoderNamespace = "GFX9"; 1392 } 1393 foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA9>.ret in 1394 def _sdwa_gfx9 : 1395 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>, 1396 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> { 1397 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa"); 1398 let AsmString = AsmName # ps.AsmOperands; 1399 } 1400 foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP>.ret in 1401 def _dpp_gfx9 : 1402 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.GFX9>, 1403 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> { 1404 VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp"); 1405 let AsmString = AsmName # ps.AsmOperands; 1406 let DecoderNamespace = "SDWA9"; 1407 } 1408} 1409 1410multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> { 1411 def _e32_gfx9 : 1412 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX9>, 1413 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>{ 1414 let DecoderNamespace = "GFX9"; 1415 } 1416 def _e64_gfx9 : 1417 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>, 1418 VOP3e_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 1419 let DecoderNamespace = "GFX9"; 1420 } 1421 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 1422 def _sdwa_gfx9 : 1423 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 1424 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 1425 } 1426 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 1427 def _dpp_gfx9 : 1428 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, 1429 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> { 1430 let DecoderNamespace = "SDWA9"; 1431 } 1432} 1433 1434} // AssemblerPredicate = isGFX9Only 1435 1436multiclass VOP2_Real_e32e64_vi <bits<6> op> : 1437 Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA_Real<op>, VOP2_SDWA9_Real<op> { 1438 1439 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 1440 def _dpp_vi : 1441 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>, 1442 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>; 1443} 1444 1445defm V_CNDMASK_B32 : VOP2_Real_e32e64_vi <0x0>; 1446defm V_ADD_F32 : VOP2_Real_e32e64_vi <0x1>; 1447defm V_SUB_F32 : VOP2_Real_e32e64_vi <0x2>; 1448defm V_SUBREV_F32 : VOP2_Real_e32e64_vi <0x3>; 1449defm V_MUL_LEGACY_F32 : VOP2_Real_e32e64_vi <0x4>; 1450defm V_MUL_F32 : VOP2_Real_e32e64_vi <0x5>; 1451defm V_MUL_I32_I24 : VOP2_Real_e32e64_vi <0x6>; 1452defm V_MUL_HI_I32_I24 : VOP2_Real_e32e64_vi <0x7>; 1453defm V_MUL_U32_U24 : VOP2_Real_e32e64_vi <0x8>; 1454defm V_MUL_HI_U32_U24 : VOP2_Real_e32e64_vi <0x9>; 1455defm V_MIN_F32 : VOP2_Real_e32e64_vi <0xa>; 1456defm V_MAX_F32 : VOP2_Real_e32e64_vi <0xb>; 1457defm V_MIN_I32 : VOP2_Real_e32e64_vi <0xc>; 1458defm V_MAX_I32 : VOP2_Real_e32e64_vi <0xd>; 1459defm V_MIN_U32 : VOP2_Real_e32e64_vi <0xe>; 1460defm V_MAX_U32 : VOP2_Real_e32e64_vi <0xf>; 1461defm V_LSHRREV_B32 : VOP2_Real_e32e64_vi <0x10>; 1462defm V_ASHRREV_I32 : VOP2_Real_e32e64_vi <0x11>; 1463defm V_LSHLREV_B32 : VOP2_Real_e32e64_vi <0x12>; 1464defm V_AND_B32 : VOP2_Real_e32e64_vi <0x13>; 1465defm V_OR_B32 : VOP2_Real_e32e64_vi <0x14>; 1466defm V_XOR_B32 : VOP2_Real_e32e64_vi <0x15>; 1467defm V_MAC_F32 : VOP2_Real_e32e64_vi <0x16>; 1468defm V_MADMK_F32 : VOP2_Real_MADK_vi <0x17>; 1469defm V_MADAK_F32 : VOP2_Real_MADK_vi <0x18>; 1470 1471defm V_ADD_U32 : VOP2be_Real_e32e64_vi_only <0x19, "V_ADD_I32", "v_add_u32">; 1472defm V_SUB_U32 : VOP2be_Real_e32e64_vi_only <0x1a, "V_SUB_I32", "v_sub_u32">; 1473defm V_SUBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1b, "V_SUBREV_I32", "v_subrev_u32">; 1474defm V_ADDC_U32 : VOP2be_Real_e32e64_vi_only <0x1c, "V_ADDC_U32", "v_addc_u32">; 1475defm V_SUBB_U32 : VOP2be_Real_e32e64_vi_only <0x1d, "V_SUBB_U32", "v_subb_u32">; 1476defm V_SUBBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1e, "V_SUBBREV_U32", "v_subbrev_u32">; 1477 1478defm V_ADD_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x19, "V_ADD_I32", "v_add_co_u32">; 1479defm V_SUB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1a, "V_SUB_I32", "v_sub_co_u32">; 1480defm V_SUBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1b, "V_SUBREV_I32", "v_subrev_co_u32">; 1481defm V_ADDC_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1c, "V_ADDC_U32", "v_addc_co_u32">; 1482defm V_SUBB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1d, "V_SUBB_U32", "v_subb_co_u32">; 1483defm V_SUBBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1e, "V_SUBBREV_U32", "v_subbrev_co_u32">; 1484 1485defm V_ADD_U32 : VOP2_Real_e32e64_gfx9 <0x34>; 1486defm V_SUB_U32 : VOP2_Real_e32e64_gfx9 <0x35>; 1487defm V_SUBREV_U32 : VOP2_Real_e32e64_gfx9 <0x36>; 1488 1489defm V_BFM_B32 : VOP2_Real_e64only_vi <0x293>; 1490defm V_BCNT_U32_B32 : VOP2_Real_e64only_vi <0x28b>; 1491defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64only_vi <0x28c>; 1492defm V_MBCNT_HI_U32_B32 : VOP2_Real_e64only_vi <0x28d>; 1493defm V_LDEXP_F32 : VOP2_Real_e64only_vi <0x288>; 1494defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64only_vi <0x1f0>; 1495defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64only_vi <0x294>; 1496defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64only_vi <0x295>; 1497defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e64only_vi <0x296>; 1498defm V_CVT_PK_U16_U32 : VOP2_Real_e64only_vi <0x297>; 1499defm V_CVT_PK_I16_I32 : VOP2_Real_e64only_vi <0x298>; 1500 1501defm V_ADD_F16 : VOP2_Real_e32e64_vi <0x1f>; 1502defm V_SUB_F16 : VOP2_Real_e32e64_vi <0x20>; 1503defm V_SUBREV_F16 : VOP2_Real_e32e64_vi <0x21>; 1504defm V_MUL_F16 : VOP2_Real_e32e64_vi <0x22>; 1505defm V_MAC_F16 : VOP2_Real_e32e64_vi <0x23>; 1506defm V_MADMK_F16 : VOP2_Real_MADK_vi <0x24>; 1507defm V_MADAK_F16 : VOP2_Real_MADK_vi <0x25>; 1508defm V_ADD_U16 : VOP2_Real_e32e64_vi <0x26>; 1509defm V_SUB_U16 : VOP2_Real_e32e64_vi <0x27>; 1510defm V_SUBREV_U16 : VOP2_Real_e32e64_vi <0x28>; 1511defm V_MUL_LO_U16 : VOP2_Real_e32e64_vi <0x29>; 1512defm V_LSHLREV_B16 : VOP2_Real_e32e64_vi <0x2a>; 1513defm V_LSHRREV_B16 : VOP2_Real_e32e64_vi <0x2b>; 1514defm V_ASHRREV_I16 : VOP2_Real_e32e64_vi <0x2c>; 1515defm V_MAX_F16 : VOP2_Real_e32e64_vi <0x2d>; 1516defm V_MIN_F16 : VOP2_Real_e32e64_vi <0x2e>; 1517defm V_MAX_U16 : VOP2_Real_e32e64_vi <0x2f>; 1518defm V_MAX_I16 : VOP2_Real_e32e64_vi <0x30>; 1519defm V_MIN_U16 : VOP2_Real_e32e64_vi <0x31>; 1520defm V_MIN_I16 : VOP2_Real_e32e64_vi <0x32>; 1521defm V_LDEXP_F16 : VOP2_Real_e32e64_vi <0x33>; 1522 1523let SubtargetPredicate = isGFX8GFX9 in { 1524 1525// Aliases to simplify matching of floating-point instructions that 1526// are VOP2 on SI and VOP3 on VI. 1527class SI2_VI3Alias <string name, VOP3_Real inst> : InstAlias < 1528 name#" $dst, $src0, $src1", 1529 !if(inst.Pfl.HasOMod, 1530 (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0), 1531 (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0)) 1532>, PredicateControl { 1533 let UseInstAsmMatchConverter = 0; 1534 let AsmVariantName = AMDGPUAsmVariants.VOP3; 1535} 1536 1537def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>; 1538def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>; 1539def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>; 1540def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>; 1541def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>; 1542 1543defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_vi>; 1544 1545} // End SubtargetPredicate = isGFX8GFX9 1546 1547let SubtargetPredicate = isGFX9Only in { 1548 1549defm : VOP2bInstAliases<V_ADD_I32_e32, V_ADD_CO_U32_e32_gfx9, "v_add_co_u32">; 1550defm : VOP2bInstAliases<V_ADDC_U32_e32, V_ADDC_CO_U32_e32_gfx9, "v_addc_co_u32">; 1551defm : VOP2bInstAliases<V_SUB_I32_e32, V_SUB_CO_U32_e32_gfx9, "v_sub_co_u32">; 1552defm : VOP2bInstAliases<V_SUBB_U32_e32, V_SUBB_CO_U32_e32_gfx9, "v_subb_co_u32">; 1553defm : VOP2bInstAliases<V_SUBREV_I32_e32, V_SUBREV_CO_U32_e32_gfx9, "v_subrev_co_u32">; 1554defm : VOP2bInstAliases<V_SUBBREV_U32_e32, V_SUBBREV_CO_U32_e32_gfx9, "v_subbrev_co_u32">; 1555 1556} // End SubtargetPredicate = isGFX9Only 1557 1558let SubtargetPredicate = HasDLInsts in { 1559 1560defm V_FMAC_F32 : VOP2_Real_e32e64_vi <0x3b>; 1561defm V_XNOR_B32 : VOP2_Real_e32e64_vi <0x3d>; 1562 1563} // End SubtargetPredicate = HasDLInsts 1564 1565multiclass VOP2_Real_DOT_ACC_gfx9<bits<6> op> : VOP2_Real_e32_vi<op> { 1566 def _dpp_vi : VOP2_DPP<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>; 1567} 1568 1569multiclass VOP2_Real_DOT_ACC_gfx10<bits<6> op> : 1570 VOP2_Real_e32_gfx10<op>, 1571 VOP2_Real_dpp_gfx10<op>, 1572 VOP2_Real_dpp8_gfx10<op>; 1573 1574let SubtargetPredicate = HasDot5Insts in { 1575 defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx9<0x37>; 1576 // NB: Opcode conflicts with V_DOT8C_I32_I4 1577 // This opcode exists in gfx 10.1* only 1578 defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx10<0x02>; 1579} 1580 1581let SubtargetPredicate = HasDot6Insts in { 1582 defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx9<0x39>; 1583 defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx10<0x0d>; 1584} 1585 1586let SubtargetPredicate = HasDot4Insts in { 1587 defm V_DOT2C_I32_I16 : VOP2_Real_DOT_ACC_gfx9<0x38>; 1588} 1589let SubtargetPredicate = HasDot3Insts in { 1590 defm V_DOT8C_I32_I4 : VOP2_Real_DOT_ACC_gfx9<0x3a>; 1591} 1592 1593let SubtargetPredicate = HasPkFmacF16Inst in { 1594defm V_PK_FMAC_F16 : VOP2_Real_e32_vi<0x3c>; 1595} // End SubtargetPredicate = HasPkFmacF16Inst 1596