1//===-- VOP2Instructions.td - Vector Instruction Defintions ---------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10// VOP2 Classes 11//===----------------------------------------------------------------------===// 12 13class VOP2e <bits<6> op, VOPProfile P> : Enc32 { 14 bits<8> vdst; 15 bits<9> src0; 16 bits<8> src1; 17 18 let Inst{8-0} = !if(P.HasSrc0, src0, 0); 19 let Inst{16-9} = !if(P.HasSrc1, src1, 0); 20 let Inst{24-17} = !if(P.EmitDst, vdst, 0); 21 let Inst{30-25} = op; 22 let Inst{31} = 0x0; //encoding 23} 24 25class VOP2_MADKe <bits<6> op, VOPProfile P> : Enc64 { 26 bits<8> vdst; 27 bits<9> src0; 28 bits<8> src1; 29 bits<32> imm; 30 31 let Inst{8-0} = !if(P.HasSrc0, src0, 0); 32 let Inst{16-9} = !if(P.HasSrc1, src1, 0); 33 let Inst{24-17} = !if(P.EmitDst, vdst, 0); 34 let Inst{30-25} = op; 35 let Inst{31} = 0x0; // encoding 36 let Inst{63-32} = imm; 37} 38 39class VOP2_SDWAe <bits<6> op, VOPProfile P> : VOP_SDWAe <P> { 40 bits<8> vdst; 41 bits<8> src1; 42 43 let Inst{8-0} = 0xf9; // sdwa 44 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 45 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 46 let Inst{30-25} = op; 47 let Inst{31} = 0x0; // encoding 48} 49 50class VOP2_SDWA9Ae <bits<6> op, VOPProfile P> : VOP_SDWA9Ae <P> { 51 bits<8> vdst; 52 bits<9> src1; 53 54 let Inst{8-0} = 0xf9; // sdwa 55 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 56 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 57 let Inst{30-25} = op; 58 let Inst{31} = 0x0; // encoding 59 let Inst{63} = !if(P.HasSrc1, src1{8}, 0); // src1_sgpr 60} 61 62class VOP2_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], string suffix = "_e32"> : 63 VOP_Pseudo <opName, suffix, P, P.Outs32, P.Ins32, "", pattern> { 64 65 let AsmOperands = P.Asm32; 66 67 let Size = 4; 68 let mayLoad = 0; 69 let mayStore = 0; 70 let hasSideEffects = 0; 71 72 let VOP2 = 1; 73 let VALU = 1; 74 let Uses = [EXEC]; 75 76 let AsmVariantName = AMDGPUAsmVariants.Default; 77} 78 79class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily> : 80 InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>, 81 SIMCInstr <ps.PseudoInstr, EncodingFamily> { 82 83 let isPseudo = 0; 84 let isCodeGenOnly = 0; 85 86 let Constraints = ps.Constraints; 87 let DisableEncoding = ps.DisableEncoding; 88 89 // copy relevant pseudo op flags 90 let SubtargetPredicate = ps.SubtargetPredicate; 91 let AsmMatchConverter = ps.AsmMatchConverter; 92 let AsmVariantName = ps.AsmVariantName; 93 let Constraints = ps.Constraints; 94 let DisableEncoding = ps.DisableEncoding; 95 let TSFlags = ps.TSFlags; 96 let UseNamedOperandTable = ps.UseNamedOperandTable; 97 let Uses = ps.Uses; 98 let Defs = ps.Defs; 99} 100 101class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 102 VOP_SDWA_Pseudo <OpName, P, pattern> { 103 let AsmMatchConverter = "cvtSdwaVOP2"; 104} 105 106class VOP2_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 107 VOP_DPP_Pseudo <OpName, P, pattern> { 108} 109 110 111class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { 112 list<dag> ret = !if(P.HasModifiers, 113 [(set P.DstVT:$vdst, 114 (node (P.Src0VT 115 !if(P.HasOMod, 116 (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), 117 (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))), 118 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], 119 [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]); 120} 121 122multiclass VOP2Inst_e32<string opName, 123 VOPProfile P, 124 SDPatternOperator node = null_frag, 125 string revOp = opName, 126 bit GFX9Renamed = 0> { 127 let renamedInGFX9 = GFX9Renamed in { 128 def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>, 129 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>; 130 } // End renamedInGFX9 = GFX9Renamed 131} 132 133multiclass VOP2Inst_e64<string opName, 134 VOPProfile P, 135 SDPatternOperator node = null_frag, 136 string revOp = opName, 137 bit GFX9Renamed = 0> { 138 let renamedInGFX9 = GFX9Renamed in { 139 def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>, 140 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; 141 } // End renamedInGFX9 = GFX9Renamed 142} 143 144multiclass VOP2Inst_sdwa<string opName, 145 VOPProfile P, 146 SDPatternOperator node = null_frag, 147 string revOp = opName, 148 bit GFX9Renamed = 0> { 149 let renamedInGFX9 = GFX9Renamed in { 150 def _sdwa : VOP2_SDWA_Pseudo <opName, P>; 151 } // End renamedInGFX9 = GFX9Renamed 152} 153 154multiclass VOP2Inst<string opName, 155 VOPProfile P, 156 SDPatternOperator node = null_frag, 157 string revOp = opName, 158 bit GFX9Renamed = 0> : 159 VOP2Inst_e32<opName, P, node, revOp, GFX9Renamed>, 160 VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>, 161 VOP2Inst_sdwa<opName, P, node, revOp, GFX9Renamed> { 162 let renamedInGFX9 = GFX9Renamed in { 163 foreach _ = BoolToList<P.HasExtDPP>.ret in 164 def _dpp : VOP2_DPP_Pseudo <opName, P>; 165 } 166} 167 168multiclass VOP2bInst <string opName, 169 VOPProfile P, 170 SDPatternOperator node = null_frag, 171 string revOp = opName, 172 bit GFX9Renamed = 0, 173 bit useSGPRInput = !eq(P.NumSrcArgs, 3)> { 174 let renamedInGFX9 = GFX9Renamed in { 175 let SchedRW = [Write32Bit, WriteSALU] in { 176 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in { 177 def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>, 178 Commutable_REV<revOp#"_e32", !eq(revOp, opName)> { 179 let usesCustomInserter = !eq(P.NumSrcArgs, 2); 180 } 181 182 def _sdwa : VOP2_SDWA_Pseudo <opName, P> { 183 let AsmMatchConverter = "cvtSdwaVOP2b"; 184 } 185 foreach _ = BoolToList<P.HasExtDPP>.ret in 186 def _dpp : VOP2_DPP_Pseudo <opName, P>; 187 } 188 189 def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>, 190 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; 191 } 192 } 193} 194 195class VOP2bInstAlias <VOP2_Pseudo ps, Instruction inst, 196 string OpName, string opnd> : 197 InstAlias <OpName#" "#!subst("vcc", opnd, ps.Pfl.Asm32), 198 (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0, 199 ps.Pfl.Src1RC32:$src1)>, 200 PredicateControl { 201} 202 203multiclass VOP2bInstAliases<VOP2_Pseudo ps, VOP2_Real inst, string OpName> { 204 let WaveSizePredicate = isWave32 in { 205 def : VOP2bInstAlias<ps, inst, OpName, "vcc_lo">; 206 } 207 let WaveSizePredicate = isWave64 in { 208 def : VOP2bInstAlias<ps, inst, OpName, "vcc">; 209 } 210} 211 212multiclass VOP2eInst <string opName, 213 VOPProfile P, 214 SDPatternOperator node = null_frag, 215 string revOp = opName, 216 bit useSGPRInput = !eq(P.NumSrcArgs, 3)> { 217 218 let SchedRW = [Write32Bit] in { 219 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in { 220 def _e32 : VOP2_Pseudo <opName, P>, 221 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>; 222 223 def _sdwa : VOP2_SDWA_Pseudo <opName, P> { 224 let AsmMatchConverter = "cvtSdwaVOP2b"; 225 } 226 227 foreach _ = BoolToList<P.HasExtDPP>.ret in 228 def _dpp : VOP2_DPP_Pseudo <opName, P>; 229 } 230 231 def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>, 232 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; 233 } 234} 235 236class VOP2eInstAlias <VOP2_Pseudo ps, Instruction inst, string opnd> : 237 InstAlias <ps.OpName#" "#ps.Pfl.Asm32#", "#opnd, 238 (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0, 239 ps.Pfl.Src1RC32:$src1)>, 240 PredicateControl { 241} 242 243multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> { 244 let WaveSizePredicate = isWave32 in { 245 def : VOP2eInstAlias<ps, inst, "vcc_lo">; 246 } 247 let WaveSizePredicate = isWave64 in { 248 def : VOP2eInstAlias<ps, inst, "vcc">; 249 } 250} 251 252class VOP_MADAK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> { 253 field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm); 254 field dag Ins32 = (ins VCSrc_f32:$src0, VGPR_32:$src1, ImmOpType:$imm); 255 field bit HasExt = 0; 256 257 // Hack to stop printing _e64 258 let DstRC = RegisterOperand<VGPR_32>; 259 field string Asm32 = " $vdst, $src0, $src1, $imm"; 260} 261 262def VOP_MADAK_F16 : VOP_MADAK <f16>; 263def VOP_MADAK_F32 : VOP_MADAK <f32>; 264 265class VOP_MADMK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> { 266 field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm); 267 field dag Ins32 = (ins VCSrc_f32:$src0, ImmOpType:$imm, VGPR_32:$src1); 268 field bit HasExt = 0; 269 270 // Hack to stop printing _e64 271 let DstRC = RegisterOperand<VGPR_32>; 272 field string Asm32 = " $vdst, $src0, $imm, $src1"; 273} 274 275def VOP_MADMK_F16 : VOP_MADMK <f16>; 276def VOP_MADMK_F32 : VOP_MADMK <f32>; 277 278// FIXME: Remove src2_modifiers. It isn't used, so is wasting memory 279// and processing time but it makes it easier to convert to mad. 280class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, vt0]> { 281 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2); 282 let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3, 283 0, HasModifiers, HasModifiers, HasOMod, 284 Src0Mod, Src1Mod, Src2Mod>.ret; 285 let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 286 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 287 VGPR_32:$src2, // stub argument 288 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 289 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 290 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 291 292 let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 293 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 294 VGPR_32:$src2, // stub argument 295 dpp8:$dpp8, FI:$fi); 296 297 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 298 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, 299 VGPR_32:$src2, // stub argument 300 clampmod:$clamp, omod:$omod, 301 dst_sel:$dst_sel, dst_unused:$dst_unused, 302 src0_sel:$src0_sel, src1_sel:$src1_sel); 303 let Asm32 = getAsm32<1, 2, vt0>.ret; 304 let Asm64 = getAsm64<1, 2, 0, HasModifiers, HasOMod, vt0>.ret; 305 let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt0>.ret; 306 let AsmDPP16 = getAsmDPP16<1, 2, HasModifiers, vt0>.ret; 307 let AsmDPP8 = getAsmDPP8<1, 2, 0, vt0>.ret; 308 let AsmSDWA = getAsmSDWA<1, 2, vt0>.ret; 309 let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt0>.ret; 310 let HasSrc2 = 0; 311 let HasSrc2Mods = 0; 312 313 let HasExt = 1; 314 let HasExtDPP = 1; 315 let HasExtSDWA = 1; 316 let HasExtSDWA9 = 0; 317 let TieRegDPP = "$src2"; 318} 319 320def VOP_MAC_F16 : VOP_MAC <f16>; 321def VOP_MAC_F32 : VOP_MAC <f32>; 322 323class VOP_DOT_ACC<ValueType vt0, ValueType vt1> : VOP_MAC<vt0, vt1> { 324 let HasClamp = 0; 325 let HasExtSDWA = 0; 326 let HasModifiers = 1; 327 let HasOpSel = 0; 328 let IsPacked = 0; 329} 330 331def VOP_DOT_ACC_F32_V2F16 : VOP_DOT_ACC<f32, v2f16> { 332 let Src0ModDPP = FPVRegInputMods; 333 let Src1ModDPP = FPVRegInputMods; 334} 335def VOP_DOT_ACC_I32_I32 : VOP_DOT_ACC<i32, i32>; 336 337// Write out to vcc or arbitrary SGPR. 338def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], 0, /*EnableClamp=*/1> { 339 let Asm32 = "$vdst, vcc, $src0, $src1"; 340 let Asm64 = "$vdst, $sdst, $src0, $src1$clamp"; 341 let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 342 let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 343 let AsmDPP = "$vdst, vcc, $src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 344 let AsmDPP8 = "$vdst, vcc, $src0, $src1 $dpp8$fi"; 345 let AsmDPP16 = AsmDPP#"$fi"; 346 let Outs32 = (outs DstRC:$vdst); 347 let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); 348} 349 350// Write out to vcc or arbitrary SGPR and read in from vcc or 351// arbitrary SGPR. 352def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], 0, /*EnableClamp=*/1> { 353 let Asm32 = "$vdst, vcc, $src0, $src1, vcc"; 354 let Asm64 = "$vdst, $sdst, $src0, $src1, $src2$clamp"; 355 let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 356 let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 357 let AsmDPP = "$vdst, vcc, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 358 let AsmDPP8 = "$vdst, vcc, $src0, $src1, vcc $dpp8$fi"; 359 let AsmDPP16 = AsmDPP#"$fi"; 360 let Outs32 = (outs DstRC:$vdst); 361 let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); 362 363 // Suppress src2 implied by type since the 32-bit encoding uses an 364 // implicit VCC use. 365 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); 366 367 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 368 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, 369 clampmod:$clamp, 370 dst_sel:$dst_sel, dst_unused:$dst_unused, 371 src0_sel:$src0_sel, src1_sel:$src1_sel); 372 373 let InsDPP = (ins DstRCDPP:$old, 374 Src0DPP:$src0, 375 Src1DPP:$src1, 376 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 377 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 378 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 379 380 let HasExt = 1; 381 let HasExtDPP = 1; 382 let HasExtSDWA = 1; 383 let HasExtSDWA9 = 1; 384} 385 386// Read in from vcc or arbitrary SGPR. 387def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableF32SrcMods=*/1> { 388 let Asm32 = "$vdst, $src0, $src1"; 389 let Asm64 = "$vdst, $src0_modifiers, $src1_modifiers, $src2"; 390 let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 391 let AsmSDWA9 = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 392 let AsmDPP = "$vdst, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 393 let AsmDPP8 = "$vdst, $src0, $src1, vcc $dpp8$fi"; 394 let AsmDPP16 = AsmDPP#"$fi"; 395 396 let Outs32 = (outs DstRC:$vdst); 397 let Outs64 = (outs DstRC:$vdst); 398 399 // Suppress src2 implied by type since the 32-bit encoding uses an 400 // implicit VCC use. 401 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); 402 403 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 404 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, 405 clampmod:$clamp, 406 dst_sel:$dst_sel, dst_unused:$dst_unused, 407 src0_sel:$src0_sel, src1_sel:$src1_sel); 408 409 let InsDPP = (ins DstRCDPP:$old, 410 Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 411 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 412 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 413 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 414 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 415 416 let HasExt = 1; 417 let HasExtDPP = 1; 418 let HasExtSDWA = 1; 419 let HasExtSDWA9 = 1; 420} 421 422def VOP_READLANE : VOPProfile<[i32, i32, i32]> { 423 let Outs32 = (outs SReg_32:$vdst); 424 let Outs64 = Outs32; 425 let Ins32 = (ins VRegOrLds_32:$src0, SCSrc_b32:$src1); 426 let Ins64 = Ins32; 427 let Asm32 = " $vdst, $src0, $src1"; 428 let Asm64 = Asm32; 429 430 let HasExt = 0; 431 let HasExtDPP = 0; 432 let HasExtSDWA = 0; 433 let HasExtSDWA9 = 0; 434} 435 436def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> { 437 let Outs32 = (outs VGPR_32:$vdst); 438 let Outs64 = Outs32; 439 let Ins32 = (ins SCSrc_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in); 440 let Ins64 = Ins32; 441 let Asm32 = " $vdst, $src0, $src1"; 442 let Asm64 = Asm32; 443 let HasSrc2 = 0; 444 let HasSrc2Mods = 0; 445 446 let HasExt = 0; 447 let HasExtDPP = 0; 448 let HasExtSDWA = 0; 449 let HasExtSDWA9 = 0; 450} 451 452//===----------------------------------------------------------------------===// 453// VOP2 Instructions 454//===----------------------------------------------------------------------===// 455 456defm V_CNDMASK_B32 : VOP2eInst <"v_cndmask_b32", VOP2e_I32_I32_I32_I1>; 457def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>; 458 459let isCommutable = 1 in { 460defm V_ADD_F32 : VOP2Inst <"v_add_f32", VOP_F32_F32_F32, fadd>; 461defm V_SUB_F32 : VOP2Inst <"v_sub_f32", VOP_F32_F32_F32, fsub>; 462defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub_f32">; 463defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>; 464defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, fmul>; 465defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmul_i24>; 466defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_i24>; 467defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmul_u24>; 468defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_u24>; 469defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum_like>; 470defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum_like>; 471defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smin>; 472defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smax>; 473defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>; 474defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umax>; 475defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, lshr_rev, "v_lshr_b32">; 476defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, ashr_rev, "v_ashr_i32">; 477defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, lshl_rev, "v_lshl_b32">; 478defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, and>; 479defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>; 480defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, xor>; 481 482let Constraints = "$vdst = $src2", DisableEncoding="$src2", 483 isConvertibleToThreeAddress = 1 in { 484defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>; 485} 486 487def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, []>; 488 489// No patterns so that the scalar instructions are always selected. 490// The scalar versions will be replaced with vector when needed later. 491 492// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI, 493// but the VI instructions behave the same as the SI versions. 494defm V_ADD_I32 : VOP2bInst <"v_add_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_add_i32", 1>; 495defm V_SUB_I32 : VOP2bInst <"v_sub_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32", 1>; 496defm V_SUBREV_I32 : VOP2bInst <"v_subrev_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32", 1>; 497defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_addc_u32", 1>; 498defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>; 499defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>; 500 501 502let SubtargetPredicate = HasAddNoCarryInsts in { 503defm V_ADD_U32 : VOP2Inst <"v_add_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_add_u32", 1>; 504defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>; 505defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>; 506} 507 508} // End isCommutable = 1 509 510// These are special and do not read the exec mask. 511let isConvergent = 1, Uses = []<Register> in { 512def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE, 513 [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))]>; 514 515let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in { 516def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, 517 [(set i32:$vdst, (int_amdgcn_writelane i32:$src0, i32:$src1, i32:$vdst_in))]>; 518} // End $vdst = $vdst_in, DisableEncoding $vdst_in 519} // End isConvergent = 1 520 521defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_NO_EXT<VOP_I32_I32_I32>>; 522defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>>; 523defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, int_amdgcn_mbcnt_lo>; 524defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, int_amdgcn_mbcnt_hi>; 525defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_NO_EXT<VOP_F32_F32_I32>, AMDGPUldexp>; 526defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_NO_EXT<VOP_I32_F32_I32>>; // TODO: set "Uses = dst" 527defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_NO_EXT<VOP_V2I16_F32_F32>, AMDGPUpknorm_i16_f32>; 528defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_NO_EXT<VOP_V2I16_F32_F32>, AMDGPUpknorm_u16_f32>; 529defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_NO_EXT<VOP_V2F16_F32_F32>, AMDGPUpkrtz_f16_f32>; 530defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_NO_EXT<VOP_V2I16_I32_I32>, AMDGPUpk_u16_u32>; 531defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT<VOP_V2I16_I32_I32>, AMDGPUpk_i16_i32>; 532 533 534let SubtargetPredicate = isGFX6GFX7 in { 535defm V_MIN_LEGACY_F32 : VOP2Inst <"v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy>; 536defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>; 537} // End SubtargetPredicate = isGFX6GFX7 538 539let SubtargetPredicate = isGFX6GFX7GFX10 in { 540let isCommutable = 1 in { 541defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_F32_F32_F32>; 542defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_I32_I32_I32>; 543defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_I32_I32_I32>; 544defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_I32_I32_I32>; 545} // End isCommutable = 1 546} // End SubtargetPredicate = isGFX6GFX7GFX10 547 548class DivergentBinOp<SDPatternOperator Op, VOP_Pseudo Inst> : 549 GCNPat< 550 (getDivergentFrag<Op>.ret Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1), 551 !if(!cast<Commutable_REV>(Inst).IsOrig, 552 (Inst $src0, $src1), 553 (Inst $src1, $src0) 554 ) 555 >; 556 557class DivergentClampingBinOp<SDPatternOperator Op, VOP_Pseudo Inst> : 558 GCNPat< 559 (getDivergentFrag<Op>.ret Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1), 560 !if(!cast<Commutable_REV>(Inst).IsOrig, 561 (Inst $src0, $src1, 0), 562 (Inst $src1, $src0, 0) 563 ) 564 >; 565 566def : DivergentBinOp<srl, V_LSHRREV_B32_e64>; 567def : DivergentBinOp<sra, V_ASHRREV_I32_e64>; 568def : DivergentBinOp<shl, V_LSHLREV_B32_e64>; 569 570let SubtargetPredicate = HasAddNoCarryInsts in { 571 def : DivergentClampingBinOp<add, V_ADD_U32_e64>; 572 def : DivergentClampingBinOp<sub, V_SUB_U32_e64>; 573} 574 575let SubtargetPredicate = isGFX6GFX7GFX8GFX9, Predicates = [isGFX6GFX7GFX8GFX9] in { 576def : DivergentClampingBinOp<add, V_ADD_I32_e64>; 577def : DivergentClampingBinOp<sub, V_SUB_I32_e64>; 578} 579 580def : DivergentBinOp<adde, V_ADDC_U32_e32>; 581def : DivergentBinOp<sube, V_SUBB_U32_e32>; 582 583class divergent_i64_BinOp <SDPatternOperator Op, Instruction Inst> : 584 GCNPat< 585 (getDivergentFrag<Op>.ret i64:$src0, i64:$src1), 586 (REG_SEQUENCE VReg_64, 587 (Inst 588 (i32 (EXTRACT_SUBREG $src0, sub0)), 589 (i32 (EXTRACT_SUBREG $src1, sub0)) 590 ), sub0, 591 (Inst 592 (i32 (EXTRACT_SUBREG $src0, sub1)), 593 (i32 (EXTRACT_SUBREG $src1, sub1)) 594 ), sub1 595 ) 596 >; 597 598def : divergent_i64_BinOp <and, V_AND_B32_e32>; 599def : divergent_i64_BinOp <or, V_OR_B32_e32>; 600def : divergent_i64_BinOp <xor, V_XOR_B32_e32>; 601 602let SubtargetPredicate = Has16BitInsts in { 603 604let FPDPRounding = 1 in { 605def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">; 606defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I32, AMDGPUldexp>; 607} // End FPDPRounding = 1 608 609defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16>; 610defm V_LSHRREV_B16 : VOP2Inst <"v_lshrrev_b16", VOP_I16_I16_I16>; 611defm V_ASHRREV_I16 : VOP2Inst <"v_ashrrev_i16", VOP_I16_I16_I16>; 612 613let isCommutable = 1 in { 614let FPDPRounding = 1 in { 615defm V_ADD_F16 : VOP2Inst <"v_add_f16", VOP_F16_F16_F16, fadd>; 616defm V_SUB_F16 : VOP2Inst <"v_sub_f16", VOP_F16_F16_F16, fsub>; 617defm V_SUBREV_F16 : VOP2Inst <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">; 618defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16, fmul>; 619def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">; 620} // End FPDPRounding = 1 621defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16>; 622defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16>; 623defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16, null_frag, "v_sub_u16">; 624defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16>; 625defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>; 626defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16, fminnum_like>; 627defm V_MAX_U16 : VOP2Inst <"v_max_u16", VOP_I16_I16_I16>; 628defm V_MAX_I16 : VOP2Inst <"v_max_i16", VOP_I16_I16_I16>; 629defm V_MIN_U16 : VOP2Inst <"v_min_u16", VOP_I16_I16_I16>; 630defm V_MIN_I16 : VOP2Inst <"v_min_i16", VOP_I16_I16_I16>; 631 632let Constraints = "$vdst = $src2", DisableEncoding="$src2", 633 isConvertibleToThreeAddress = 1 in { 634defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>; 635} 636} // End isCommutable = 1 637 638} // End SubtargetPredicate = Has16BitInsts 639 640let SubtargetPredicate = HasDLInsts in { 641 642defm V_XNOR_B32 : VOP2Inst <"v_xnor_b32", VOP_I32_I32_I32>; 643 644let Constraints = "$vdst = $src2", 645 DisableEncoding="$src2", 646 isConvertibleToThreeAddress = 1, 647 isCommutable = 1 in { 648defm V_FMAC_F32 : VOP2Inst <"v_fmac_f32", VOP_MAC_F32>; 649} 650 651} // End SubtargetPredicate = HasDLInsts 652 653let Constraints = "$vdst = $src2", 654 DisableEncoding="$src2", 655 isConvertibleToThreeAddress = 1, 656 isCommutable = 1 in { 657 let SubtargetPredicate = HasDot5Insts in 658 defm V_DOT2C_F32_F16 : VOP2Inst_e32<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16>; 659 let SubtargetPredicate = HasDot6Insts in 660 defm V_DOT4C_I32_I8 : VOP2Inst_e32<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>; 661 662 let SubtargetPredicate = HasDot4Insts in 663 defm V_DOT2C_I32_I16 : VOP2Inst_e32<"v_dot2c_i32_i16", VOP_DOT_ACC_I32_I32>; 664 let SubtargetPredicate = HasDot3Insts in 665 defm V_DOT8C_I32_I4 : VOP2Inst_e32<"v_dot8c_i32_i4", VOP_DOT_ACC_I32_I32>; 666} 667 668let AddedComplexity = 30 in { 669 def : GCNPat< 670 (f32 (AMDGPUfdot2 v2f16:$src0, v2f16:$src1, f32:$src2, (i1 DSTCLAMP.NONE))), 671 (f32 (V_DOT2C_F32_F16_e32 $src0, $src1, $src2)) 672 > { 673 let SubtargetPredicate = HasDot5Insts; 674 } 675 def : GCNPat< 676 (i32 (int_amdgcn_sdot4 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 677 (i32 (V_DOT4C_I32_I8_e32 $src0, $src1, $src2)) 678 > { 679 let SubtargetPredicate = HasDot6Insts; 680 } 681 def : GCNPat< 682 (i32 (int_amdgcn_sdot2 v2i16:$src0, v2i16:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 683 (i32 (V_DOT2C_I32_I16_e32 $src0, $src1, $src2)) 684 > { 685 let SubtargetPredicate = HasDot4Insts; 686 } 687 def : GCNPat< 688 (i32 (int_amdgcn_sdot8 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 689 (i32 (V_DOT8C_I32_I4_e32 $src0, $src1, $src2)) 690 > { 691 let SubtargetPredicate = HasDot3Insts; 692 } 693} // End AddedComplexity = 30 694 695let SubtargetPredicate = isGFX10Plus in { 696 697def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">; 698let FPDPRounding = 1 in 699def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">; 700 701let isCommutable = 1 in { 702def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">; 703let FPDPRounding = 1 in 704def V_FMAAK_F16 : VOP2_Pseudo <"v_fmaak_f16", VOP_MADAK_F16, [], "">; 705} // End isCommutable = 1 706 707let Constraints = "$vdst = $src2", 708 DisableEncoding="$src2", 709 isConvertibleToThreeAddress = 1, 710 isCommutable = 1 in { 711defm V_FMAC_F16 : VOP2Inst <"v_fmac_f16", VOP_MAC_F16>; 712} 713 714} // End SubtargetPredicate = isGFX10Plus 715 716let SubtargetPredicate = HasPkFmacF16Inst in { 717defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>; 718} // End SubtargetPredicate = HasPkFmacF16Inst 719 720// Note: 16-bit instructions produce a 0 result in the high 16-bits 721// on GFX8 and GFX9 and preserve high 16 bits on GFX10+ 722def ClearHI16 : OutPatFrag<(ops node:$op), 723 (V_AND_B32_e64 $op, (V_MOV_B32_e32 (i32 0xffff)))>; 724 725multiclass Arithmetic_i16_Pats <SDPatternOperator op, Instruction inst, 726 bit PreservesHI16 = 0> { 727 728def : GCNPat< 729 (op i16:$src0, i16:$src1), 730 !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src0, $src1)), (inst $src0, $src1)) 731>; 732 733def : GCNPat< 734 (i32 (zext (op i16:$src0, i16:$src1))), 735 !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src0, $src1)), (inst $src0, $src1)) 736>; 737 738def : GCNPat< 739 (i64 (zext (op i16:$src0, i16:$src1))), 740 (REG_SEQUENCE VReg_64, 741 !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src0, $src1)), (inst $src0, $src1)), 742 sub0, 743 (V_MOV_B32_e32 (i32 0)), sub1) 744>; 745} 746 747multiclass Bits_OpsRev_i16_Pats <SDPatternOperator op, Instruction inst, 748 bit PreservesHI16 = 0> { 749 750def : GCNPat< 751 (op i16:$src0, i16:$src1), 752 !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src1, $src0)), (inst $src1, $src0)) 753>; 754 755def : GCNPat< 756 (i32 (zext (op i16:$src0, i16:$src1))), 757 !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src1, $src0)), (inst $src1, $src0)) 758>; 759 760 761def : GCNPat< 762 (i64 (zext (op i16:$src0, i16:$src1))), 763 (REG_SEQUENCE VReg_64, 764 !if(!eq(PreservesHI16,1), (ClearHI16 (inst $src1, $src0)), (inst $src1, $src0)), 765 sub0, 766 (V_MOV_B32_e32 (i32 0)), sub1) 767>; 768} 769 770class ZExt_i16_i1_Pat <SDNode ext> : GCNPat < 771 (i16 (ext i1:$src)), 772 (V_CNDMASK_B32_e64 (i32 0/*src0mod*/), (i32 0/*src0*/), 773 (i32 0/*src1mod*/), (i32 1/*src1*/), 774 $src) 775>; 776 777let Predicates = [Has16BitInsts] in { 778 779let Predicates = [Has16BitInsts, isGFX7GFX8GFX9] in { 780defm : Arithmetic_i16_Pats<add, V_ADD_U16_e64>; 781defm : Arithmetic_i16_Pats<mul, V_MUL_LO_U16_e64>; 782defm : Arithmetic_i16_Pats<sub, V_SUB_U16_e64>; 783defm : Arithmetic_i16_Pats<smin, V_MIN_I16_e64>; 784defm : Arithmetic_i16_Pats<smax, V_MAX_I16_e64>; 785defm : Arithmetic_i16_Pats<umin, V_MIN_U16_e64>; 786defm : Arithmetic_i16_Pats<umax, V_MAX_U16_e64>; 787} 788 789let Predicates = [Has16BitInsts, isGFX10Plus] in { 790defm : Arithmetic_i16_Pats<add, V_ADD_U16_e64, 1>; 791defm : Arithmetic_i16_Pats<mul, V_MUL_LO_U16_e64, 1>; 792defm : Arithmetic_i16_Pats<sub, V_SUB_U16_e64, 1>; 793defm : Arithmetic_i16_Pats<smin, V_MIN_I16_e64, 1>; 794defm : Arithmetic_i16_Pats<smax, V_MAX_I16_e64, 1>; 795defm : Arithmetic_i16_Pats<umin, V_MIN_U16_e64, 1>; 796defm : Arithmetic_i16_Pats<umax, V_MAX_U16_e64, 1>; 797} 798 799def : GCNPat < 800 (and i16:$src0, i16:$src1), 801 (V_AND_B32_e64 $src0, $src1) 802>; 803 804def : GCNPat < 805 (or i16:$src0, i16:$src1), 806 (V_OR_B32_e64 $src0, $src1) 807>; 808 809def : GCNPat < 810 (xor i16:$src0, i16:$src1), 811 (V_XOR_B32_e64 $src0, $src1) 812>; 813 814let Predicates = [Has16BitInsts, isGFX7GFX8GFX9] in { 815defm : Bits_OpsRev_i16_Pats<shl, V_LSHLREV_B16_e64>; 816defm : Bits_OpsRev_i16_Pats<srl, V_LSHRREV_B16_e64>; 817defm : Bits_OpsRev_i16_Pats<sra, V_ASHRREV_I16_e64>; 818} 819 820let Predicates = [Has16BitInsts, isGFX10Plus] in { 821defm : Bits_OpsRev_i16_Pats<shl, V_LSHLREV_B16_e64, 1>; 822defm : Bits_OpsRev_i16_Pats<srl, V_LSHRREV_B16_e64, 1>; 823defm : Bits_OpsRev_i16_Pats<sra, V_ASHRREV_I16_e64, 1>; 824} 825 826def : ZExt_i16_i1_Pat<zext>; 827def : ZExt_i16_i1_Pat<anyext>; 828 829def : GCNPat < 830 (i16 (sext i1:$src)), 831 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), 832 /*src1mod*/(i32 0), /*src1*/(i32 -1), $src) 833>; 834 835// Undo sub x, c -> add x, -c canonicalization since c is more likely 836// an inline immediate than -c. 837// TODO: Also do for 64-bit. 838def : GCNPat< 839 (add i16:$src0, (i16 NegSubInlineConst16:$src1)), 840 (V_SUB_U16_e64 $src0, NegSubInlineConst16:$src1) 841>; 842 843} // End Predicates = [Has16BitInsts, isGFX7GFX8GFX9] 844 845 846//===----------------------------------------------------------------------===// 847// Target-specific instruction encodings. 848//===----------------------------------------------------------------------===// 849 850class VOP2_DPP<bits<6> op, VOP2_Pseudo ps, 851 string opName = ps.OpName, VOPProfile p = ps.Pfl, 852 bit IsDPP16 = 0> : 853 VOP_DPP<opName, p, IsDPP16> { 854 let hasSideEffects = ps.hasSideEffects; 855 let Defs = ps.Defs; 856 let SchedRW = ps.SchedRW; 857 let Uses = ps.Uses; 858 859 bits<8> vdst; 860 bits<8> src1; 861 let Inst{8-0} = 0xfa; 862 let Inst{16-9} = !if(p.HasSrc1, src1{7-0}, 0); 863 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 864 let Inst{30-25} = op; 865 let Inst{31} = 0x0; 866} 867 868class VOP2_DPP16<bits<6> op, VOP2_Pseudo ps, 869 string opName = ps.OpName, VOPProfile p = ps.Pfl> : 870 VOP2_DPP<op, ps, opName, p, 1> { 871 let AssemblerPredicate = !if(p.HasExt, HasDPP16, DisableInst); 872 let SubtargetPredicate = HasDPP16; 873} 874 875class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps, 876 string opName = ps.OpName, VOPProfile p = ps.Pfl> : 877 VOP_DPP8<ps.OpName, p> { 878 let hasSideEffects = ps.hasSideEffects; 879 let Defs = ps.Defs; 880 let SchedRW = ps.SchedRW; 881 let Uses = ps.Uses; 882 883 bits<8> vdst; 884 bits<8> src1; 885 886 let Inst{8-0} = fi; 887 let Inst{16-9} = !if(p.HasSrc1, src1{7-0}, 0); 888 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 889 let Inst{30-25} = op; 890 let Inst{31} = 0x0; 891 892 let AssemblerPredicate = !if(p.HasExt, HasDPP8, DisableInst); 893 let SubtargetPredicate = HasDPP8; 894} 895 896//===----------------------------------------------------------------------===// 897// GFX10. 898//===----------------------------------------------------------------------===// 899 900let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { 901 //===------------------------------- VOP2 -------------------------------===// 902 multiclass VOP2Only_Real_MADK_gfx10<bits<6> op> { 903 def _gfx10 : 904 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX10>, 905 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 906 } 907 multiclass VOP2Only_Real_MADK_gfx10_with_name<bits<6> op, string opName, 908 string asmName> { 909 def _gfx10 : 910 VOP2_Real<!cast<VOP2_Pseudo>(opName), SIEncodingFamily.GFX10>, 911 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(opName).Pfl> { 912 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName); 913 let AsmString = asmName # ps.AsmOperands; 914 } 915 } 916 multiclass VOP2_Real_e32_gfx10<bits<6> op> { 917 def _e32_gfx10 : 918 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>, 919 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 920 } 921 multiclass VOP2_Real_e64_gfx10<bits<6> op> { 922 def _e64_gfx10 : 923 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 924 VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 925 } 926 multiclass VOP2_Real_sdwa_gfx10<bits<6> op> { 927 def _sdwa_gfx10 : 928 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 929 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 930 let DecoderNamespace = "SDWA10"; 931 } 932 } 933 multiclass VOP2_Real_dpp_gfx10<bits<6> op> { 934 def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_Pseudo>(NAME#"_e32")> { 935 let DecoderNamespace = "SDWA10"; 936 } 937 } 938 multiclass VOP2_Real_dpp8_gfx10<bits<6> op> { 939 def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(NAME#"_e32")> { 940 let DecoderNamespace = "DPP8"; 941 } 942 } 943 944 //===------------------------- VOP2 (with name) -------------------------===// 945 multiclass VOP2_Real_e32_gfx10_with_name<bits<6> op, string opName, 946 string asmName> { 947 def _e32_gfx10 : 948 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>, 949 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> { 950 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 951 let AsmString = asmName # ps.AsmOperands; 952 } 953 } 954 multiclass VOP2_Real_e64_gfx10_with_name<bits<6> op, string opName, 955 string asmName> { 956 def _e64_gfx10 : 957 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>, 958 VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, 959 !cast<VOP3_Pseudo>(opName#"_e64").Pfl> { 960 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName#"_e64"); 961 let AsmString = asmName # ps.AsmOperands; 962 } 963 } 964 let DecoderNamespace = "SDWA10" in { 965 multiclass VOP2_Real_sdwa_gfx10_with_name<bits<6> op, string opName, 966 string asmName> { 967 def _sdwa_gfx10 : 968 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 969 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 970 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 971 let AsmString = asmName # ps.AsmOperands; 972 } 973 } 974 multiclass VOP2_Real_dpp_gfx10_with_name<bits<6> op, string opName, 975 string asmName> { 976 def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 977 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 978 let AsmString = asmName # ps.Pfl.AsmDPP16; 979 } 980 } 981 multiclass VOP2_Real_dpp8_gfx10_with_name<bits<6> op, string opName, 982 string asmName> { 983 def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 984 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 985 let AsmString = asmName # ps.Pfl.AsmDPP8; 986 let DecoderNamespace = "DPP8"; 987 } 988 } 989 } // End DecoderNamespace = "SDWA10" 990 991 //===------------------------------ VOP2be ------------------------------===// 992 multiclass VOP2be_Real_gfx10<bits<6> op, string opName, string asmName> { 993 def _e32_gfx10 : 994 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>, 995 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> { 996 VOP2_Pseudo Ps = !cast<VOP2_Pseudo>(opName#"_e32"); 997 let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); 998 } 999 def _e64_gfx10 : 1000 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>, 1001 VOP3be_gfx10<{0, 1, 0, 0, op{5-0}}, 1002 !cast<VOP3_Pseudo>(opName#"_e64").Pfl> { 1003 VOP3_Pseudo Ps = !cast<VOP3_Pseudo>(opName#"_e64"); 1004 let AsmString = asmName # Ps.AsmOperands; 1005 } 1006 def _sdwa_gfx10 : 1007 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1008 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1009 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1010 let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); 1011 let DecoderNamespace = "SDWA10"; 1012 } 1013 def _dpp_gfx10 : 1014 VOP2_DPP16<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> { 1015 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1016 let AsmString = asmName # !subst(", vcc", "", AsmDPP); 1017 let DecoderNamespace = "SDWA10"; 1018 } 1019 def _dpp8_gfx10 : 1020 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> { 1021 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1022 let AsmString = asmName # !subst(", vcc", "", AsmDPP8); 1023 let DecoderNamespace = "DPP8"; 1024 } 1025 1026 let WaveSizePredicate = isWave32 in { 1027 def _sdwa_w32_gfx10 : 1028 Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1029 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1030 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1031 let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands); 1032 let isAsmParserOnly = 1; 1033 let DecoderNamespace = "SDWA10"; 1034 } 1035 def _dpp_w32_gfx10 : 1036 VOP2_DPP16<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> { 1037 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1038 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP); 1039 let isAsmParserOnly = 1; 1040 } 1041 def _dpp8_w32_gfx10 : 1042 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> { 1043 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1044 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8); 1045 let isAsmParserOnly = 1; 1046 } 1047 } // End WaveSizePredicate = isWave32 1048 1049 let WaveSizePredicate = isWave64 in { 1050 def _sdwa_w64_gfx10 : 1051 Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1052 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1053 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1054 let AsmString = asmName # Ps.AsmOperands; 1055 let isAsmParserOnly = 1; 1056 let DecoderNamespace = "SDWA10"; 1057 } 1058 def _dpp_w64_gfx10 : 1059 VOP2_DPP16<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> { 1060 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1061 let AsmString = asmName # AsmDPP; 1062 let isAsmParserOnly = 1; 1063 } 1064 def _dpp8_w64_gfx10 : 1065 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> { 1066 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1067 let AsmString = asmName # AsmDPP8; 1068 let isAsmParserOnly = 1; 1069 } 1070 } // End WaveSizePredicate = isWave64 1071 } 1072 1073 //===----------------------------- VOP3Only -----------------------------===// 1074 multiclass VOP3Only_Real_gfx10<bits<10> op> { 1075 def _e64_gfx10 : 1076 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 1077 VOP3e_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1078 } 1079 1080 //===---------------------------- VOP3beOnly ----------------------------===// 1081 multiclass VOP3beOnly_Real_gfx10<bits<10> op, string opName, string asmName> { 1082 def _e64_gfx10 : 1083 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>, 1084 VOP3be_gfx10<op, !cast<VOP3_Pseudo>(opName#"_e64").Pfl> { 1085 VOP3_Pseudo Ps = !cast<VOP3_Pseudo>(opName#"_e64"); 1086 let AsmString = asmName # Ps.AsmOperands; 1087 } 1088 } 1089} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" 1090 1091multiclass Base_VOP2_Real_gfx10<bits<6> op> : 1092 VOP2_Real_e32_gfx10<op>, VOP2_Real_e64_gfx10<op>; 1093 1094multiclass VOP2_Real_gfx10<bits<6> op> : 1095 VOP2_Real_e32_gfx10<op>, VOP2_Real_e64_gfx10<op>, 1096 VOP2_Real_sdwa_gfx10<op>, VOP2_Real_dpp_gfx10<op>, VOP2_Real_dpp8_gfx10<op>; 1097 1098multiclass VOP2_Real_gfx10_with_name<bits<6> op, string opName, 1099 string asmName> : 1100 VOP2_Real_e32_gfx10_with_name<op, opName, asmName>, 1101 VOP2_Real_e64_gfx10_with_name<op, opName, asmName>, 1102 VOP2_Real_sdwa_gfx10_with_name<op, opName, asmName>, 1103 VOP2_Real_dpp_gfx10_with_name<op, opName, asmName>, 1104 VOP2_Real_dpp8_gfx10_with_name<op, opName, asmName>; 1105 1106defm V_CNDMASK_B32 : Base_VOP2_Real_gfx10<0x001>; 1107defm V_XNOR_B32 : VOP2_Real_gfx10<0x01e>; 1108defm V_FMAC_F32 : VOP2_Real_gfx10<0x02b>; 1109defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10<0x02c>; 1110defm V_FMAAK_F32 : VOP2Only_Real_MADK_gfx10<0x02d>; 1111defm V_ADD_F16 : VOP2_Real_gfx10<0x032>; 1112defm V_SUB_F16 : VOP2_Real_gfx10<0x033>; 1113defm V_SUBREV_F16 : VOP2_Real_gfx10<0x034>; 1114defm V_MUL_F16 : VOP2_Real_gfx10<0x035>; 1115defm V_FMAC_F16 : VOP2_Real_gfx10<0x036>; 1116defm V_FMAMK_F16 : VOP2Only_Real_MADK_gfx10<0x037>; 1117defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10<0x038>; 1118defm V_MAX_F16 : VOP2_Real_gfx10<0x039>; 1119defm V_MIN_F16 : VOP2_Real_gfx10<0x03a>; 1120defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>; 1121defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>; 1122 1123// VOP2 no carry-in, carry-out. 1124defm V_ADD_NC_U32 : 1125 VOP2_Real_gfx10_with_name<0x025, "V_ADD_U32", "v_add_nc_u32">; 1126defm V_SUB_NC_U32 : 1127 VOP2_Real_gfx10_with_name<0x026, "V_SUB_U32", "v_sub_nc_u32">; 1128defm V_SUBREV_NC_U32 : 1129 VOP2_Real_gfx10_with_name<0x027, "V_SUBREV_U32", "v_subrev_nc_u32">; 1130 1131// VOP2 carry-in, carry-out. 1132defm V_ADD_CO_CI_U32 : 1133 VOP2be_Real_gfx10<0x028, "V_ADDC_U32", "v_add_co_ci_u32">; 1134defm V_SUB_CO_CI_U32 : 1135 VOP2be_Real_gfx10<0x029, "V_SUBB_U32", "v_sub_co_ci_u32">; 1136defm V_SUBREV_CO_CI_U32 : 1137 VOP2be_Real_gfx10<0x02a, "V_SUBBREV_U32", "v_subrev_co_ci_u32">; 1138 1139// VOP3 only. 1140defm V_BFM_B32 : VOP3Only_Real_gfx10<0x363>; 1141defm V_BCNT_U32_B32 : VOP3Only_Real_gfx10<0x364>; 1142defm V_MBCNT_LO_U32_B32 : VOP3Only_Real_gfx10<0x365>; 1143defm V_MBCNT_HI_U32_B32 : VOP3Only_Real_gfx10<0x366>; 1144defm V_LDEXP_F32 : VOP3Only_Real_gfx10<0x362>; 1145defm V_CVT_PKNORM_I16_F32 : VOP3Only_Real_gfx10<0x368>; 1146defm V_CVT_PKNORM_U16_F32 : VOP3Only_Real_gfx10<0x369>; 1147defm V_CVT_PK_U16_U32 : VOP3Only_Real_gfx10<0x36a>; 1148defm V_CVT_PK_I16_I32 : VOP3Only_Real_gfx10<0x36b>; 1149 1150// VOP3 carry-in, carry-out. 1151defm V_ADD_CO_U32 : 1152 VOP3beOnly_Real_gfx10<0x30f, "V_ADD_I32", "v_add_co_u32">; 1153defm V_SUB_CO_U32 : 1154 VOP3beOnly_Real_gfx10<0x310, "V_SUB_I32", "v_sub_co_u32">; 1155defm V_SUBREV_CO_U32 : 1156 VOP3beOnly_Real_gfx10<0x319, "V_SUBREV_I32", "v_subrev_co_u32">; 1157 1158let SubtargetPredicate = isGFX10Plus in { 1159 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx10>; 1160 1161 defm : VOP2bInstAliases< 1162 V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx10, "v_add_co_ci_u32">; 1163 defm : VOP2bInstAliases< 1164 V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx10, "v_sub_co_ci_u32">; 1165 defm : VOP2bInstAliases< 1166 V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx10, "v_subrev_co_ci_u32">; 1167} // End SubtargetPredicate = isGFX10Plus 1168 1169//===----------------------------------------------------------------------===// 1170// GFX6, GFX7, GFX10. 1171//===----------------------------------------------------------------------===// 1172 1173class VOP2_DPPe <bits<6> op, VOP2_DPP_Pseudo ps, VOPProfile P = ps.Pfl> : 1174 VOP_DPPe <P> { 1175 bits<8> vdst; 1176 bits<8> src1; 1177 let Inst{8-0} = 0xfa; //dpp 1178 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 1179 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 1180 let Inst{30-25} = op; 1181 let Inst{31} = 0x0; //encoding 1182} 1183 1184let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 1185 multiclass VOP2Only_Real_gfx6_gfx7<bits<6> op> { 1186 def _gfx6_gfx7 : 1187 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>, 1188 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1189 } 1190 multiclass VOP2Only_Real_MADK_gfx6_gfx7<bits<6> op> { 1191 def _gfx6_gfx7 : 1192 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>, 1193 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1194 } 1195 multiclass VOP2_Real_e32_gfx6_gfx7<bits<6> op> { 1196 def _e32_gfx6_gfx7 : 1197 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>, 1198 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 1199 } 1200 multiclass VOP2_Real_e64_gfx6_gfx7<bits<6> op> { 1201 def _e64_gfx6_gfx7 : 1202 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 1203 VOP3e_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1204 } 1205 multiclass VOP2be_Real_e64_gfx6_gfx7<bits<6> op> { 1206 def _e64_gfx6_gfx7 : 1207 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 1208 VOP3be_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1209 } 1210} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 1211 1212multiclass VOP2Only_Real_MADK_gfx6_gfx7_gfx10<bits<6> op> : 1213 VOP2Only_Real_MADK_gfx6_gfx7<op>, VOP2Only_Real_MADK_gfx10<op>; 1214 1215multiclass VOP2_Real_gfx6_gfx7<bits<6> op> : 1216 VOP2_Real_e32_gfx6_gfx7<op>, VOP2_Real_e64_gfx6_gfx7<op>; 1217 1218multiclass VOP2_Real_gfx6_gfx7_gfx10<bits<6> op> : 1219 VOP2_Real_gfx6_gfx7<op>, VOP2_Real_gfx10<op>; 1220 1221multiclass VOP2be_Real_gfx6_gfx7<bits<6> op> : 1222 VOP2_Real_e32_gfx6_gfx7<op>, VOP2be_Real_e64_gfx6_gfx7<op>; 1223 1224defm V_CNDMASK_B32 : VOP2_Real_gfx6_gfx7<0x000>; 1225defm V_MIN_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00d>; 1226defm V_MAX_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00e>; 1227defm V_LSHR_B32 : VOP2_Real_gfx6_gfx7<0x015>; 1228defm V_ASHR_I32 : VOP2_Real_gfx6_gfx7<0x017>; 1229defm V_LSHL_B32 : VOP2_Real_gfx6_gfx7<0x019>; 1230defm V_BFM_B32 : VOP2_Real_gfx6_gfx7<0x01e>; 1231defm V_BCNT_U32_B32 : VOP2_Real_gfx6_gfx7<0x022>; 1232defm V_MBCNT_LO_U32_B32 : VOP2_Real_gfx6_gfx7<0x023>; 1233defm V_MBCNT_HI_U32_B32 : VOP2_Real_gfx6_gfx7<0x024>; 1234defm V_LDEXP_F32 : VOP2_Real_gfx6_gfx7<0x02b>; 1235defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_gfx6_gfx7<0x02c>; 1236defm V_CVT_PKNORM_I16_F32 : VOP2_Real_gfx6_gfx7<0x02d>; 1237defm V_CVT_PKNORM_U16_F32 : VOP2_Real_gfx6_gfx7<0x02e>; 1238defm V_CVT_PK_U16_U32 : VOP2_Real_gfx6_gfx7<0x030>; 1239defm V_CVT_PK_I16_I32 : VOP2_Real_gfx6_gfx7<0x031>; 1240defm V_ADD_I32 : VOP2be_Real_gfx6_gfx7<0x025>; 1241defm V_SUB_I32 : VOP2be_Real_gfx6_gfx7<0x026>; 1242defm V_SUBREV_I32 : VOP2be_Real_gfx6_gfx7<0x027>; 1243defm V_ADDC_U32 : VOP2be_Real_gfx6_gfx7<0x028>; 1244defm V_SUBB_U32 : VOP2be_Real_gfx6_gfx7<0x029>; 1245defm V_SUBBREV_U32 : VOP2be_Real_gfx6_gfx7<0x02a>; 1246 1247defm V_READLANE_B32 : VOP2Only_Real_gfx6_gfx7<0x001>; 1248 1249let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in) in { 1250 defm V_WRITELANE_B32 : VOP2Only_Real_gfx6_gfx7<0x002>; 1251} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VSrc_b32:$vdst_in) 1252 1253let SubtargetPredicate = isGFX6GFX7 in { 1254 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx6_gfx7>; 1255} // End SubtargetPredicate = isGFX6GFX7 1256 1257defm V_ADD_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x003>; 1258defm V_SUB_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x004>; 1259defm V_SUBREV_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x005>; 1260defm V_MAC_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x006>; 1261defm V_MUL_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x007>; 1262defm V_MUL_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x008>; 1263defm V_MUL_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10<0x009>; 1264defm V_MUL_HI_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10<0x00a>; 1265defm V_MUL_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10<0x00b>; 1266defm V_MUL_HI_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10<0x00c>; 1267defm V_MIN_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x00f>; 1268defm V_MAX_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x010>; 1269defm V_MIN_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x011>; 1270defm V_MAX_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x012>; 1271defm V_MIN_U32 : VOP2_Real_gfx6_gfx7_gfx10<0x013>; 1272defm V_MAX_U32 : VOP2_Real_gfx6_gfx7_gfx10<0x014>; 1273defm V_LSHRREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x016>; 1274defm V_ASHRREV_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x018>; 1275defm V_LSHLREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01a>; 1276defm V_AND_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01b>; 1277defm V_OR_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01c>; 1278defm V_XOR_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01d>; 1279defm V_MAC_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x01f>; 1280defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x02f>; 1281defm V_MADMK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x020>; 1282defm V_MADAK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x021>; 1283 1284//===----------------------------------------------------------------------===// 1285// GFX8, GFX9 (VI). 1286//===----------------------------------------------------------------------===// 1287 1288let AssemblerPredicates = [isGFX8GFX9], DecoderNamespace = "GFX8" in { 1289 1290multiclass VOP2_Real_MADK_vi <bits<6> op> { 1291 def _vi : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>, 1292 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1293} 1294 1295multiclass VOP2_Real_e32_vi <bits<6> op> { 1296 def _e32_vi : 1297 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>, 1298 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 1299} 1300 1301multiclass VOP2_Real_e64_vi <bits<10> op> { 1302 def _e64_vi : 1303 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 1304 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1305} 1306 1307multiclass VOP2_Real_e64only_vi <bits<10> op> { 1308 def _e64_vi : 1309 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 1310 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 1311 // Hack to stop printing _e64 1312 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(NAME#"_e64"); 1313 let OutOperandList = (outs VGPR_32:$vdst); 1314 let AsmString = ps.Mnemonic # " " # ps.AsmOperands; 1315 } 1316} 1317 1318multiclass Base_VOP2_Real_e32e64_vi <bits<6> op> : 1319 VOP2_Real_e32_vi<op>, 1320 VOP2_Real_e64_vi<{0, 1, 0, 0, op{5-0}}>; 1321 1322} // End AssemblerPredicates = [isGFX8GFX9], DecoderNamespace = "GFX8" 1323 1324multiclass VOP2_SDWA_Real <bits<6> op> { 1325 def _sdwa_vi : 1326 VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 1327 VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1328} 1329 1330multiclass VOP2_SDWA9_Real <bits<6> op> { 1331 def _sdwa_gfx9 : 1332 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 1333 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1334} 1335 1336let AssemblerPredicates = [isGFX8Only] in { 1337 1338multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName> { 1339 def _e32_vi : 1340 VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.VI>, 1341 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> { 1342 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32"); 1343 let AsmString = AsmName # ps.AsmOperands; 1344 let DecoderNamespace = "GFX8"; 1345 } 1346 def _e64_vi : 1347 VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.VI>, 1348 VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> { 1349 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64"); 1350 let AsmString = AsmName # ps.AsmOperands; 1351 let DecoderNamespace = "GFX8"; 1352 } 1353 def _sdwa_vi : 1354 VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>, 1355 VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> { 1356 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa"); 1357 let AsmString = AsmName # ps.AsmOperands; 1358 } 1359 foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP>.ret in 1360 def _dpp_vi : 1361 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.VI>, 1362 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> { 1363 VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp"); 1364 let AsmString = AsmName # ps.AsmOperands; 1365 } 1366} 1367} 1368 1369let AssemblerPredicates = [isGFX9Only] in { 1370 1371multiclass VOP2be_Real_e32e64_gfx9 <bits<6> op, string OpName, string AsmName> { 1372 def _e32_gfx9 : 1373 VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.GFX9>, 1374 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> { 1375 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32"); 1376 let AsmString = AsmName # ps.AsmOperands; 1377 let DecoderNamespace = "GFX9"; 1378 } 1379 def _e64_gfx9 : 1380 VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.GFX9>, 1381 VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> { 1382 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64"); 1383 let AsmString = AsmName # ps.AsmOperands; 1384 let DecoderNamespace = "GFX9"; 1385 } 1386 def _sdwa_gfx9 : 1387 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>, 1388 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> { 1389 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa"); 1390 let AsmString = AsmName # ps.AsmOperands; 1391 } 1392 foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP>.ret in 1393 def _dpp_gfx9 : 1394 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.GFX9>, 1395 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> { 1396 VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp"); 1397 let AsmString = AsmName # ps.AsmOperands; 1398 let DecoderNamespace = "SDWA9"; 1399 } 1400} 1401 1402multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> { 1403 def _e32_gfx9 : 1404 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX9>, 1405 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>{ 1406 let DecoderNamespace = "GFX9"; 1407 } 1408 def _e64_gfx9 : 1409 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>, 1410 VOP3e_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 1411 let DecoderNamespace = "GFX9"; 1412 } 1413 def _sdwa_gfx9 : 1414 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 1415 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 1416 } 1417 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 1418 def _dpp_gfx9 : 1419 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, 1420 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> { 1421 let DecoderNamespace = "SDWA9"; 1422 } 1423} 1424 1425} // AssemblerPredicates = [isGFX9Only] 1426 1427multiclass VOP2_Real_e32e64_vi <bits<6> op> : 1428 Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA_Real<op>, VOP2_SDWA9_Real<op> { 1429 1430 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 1431 def _dpp_vi : 1432 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>, 1433 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>; 1434} 1435 1436defm V_CNDMASK_B32 : VOP2_Real_e32e64_vi <0x0>; 1437defm V_ADD_F32 : VOP2_Real_e32e64_vi <0x1>; 1438defm V_SUB_F32 : VOP2_Real_e32e64_vi <0x2>; 1439defm V_SUBREV_F32 : VOP2_Real_e32e64_vi <0x3>; 1440defm V_MUL_LEGACY_F32 : VOP2_Real_e32e64_vi <0x4>; 1441defm V_MUL_F32 : VOP2_Real_e32e64_vi <0x5>; 1442defm V_MUL_I32_I24 : VOP2_Real_e32e64_vi <0x6>; 1443defm V_MUL_HI_I32_I24 : VOP2_Real_e32e64_vi <0x7>; 1444defm V_MUL_U32_U24 : VOP2_Real_e32e64_vi <0x8>; 1445defm V_MUL_HI_U32_U24 : VOP2_Real_e32e64_vi <0x9>; 1446defm V_MIN_F32 : VOP2_Real_e32e64_vi <0xa>; 1447defm V_MAX_F32 : VOP2_Real_e32e64_vi <0xb>; 1448defm V_MIN_I32 : VOP2_Real_e32e64_vi <0xc>; 1449defm V_MAX_I32 : VOP2_Real_e32e64_vi <0xd>; 1450defm V_MIN_U32 : VOP2_Real_e32e64_vi <0xe>; 1451defm V_MAX_U32 : VOP2_Real_e32e64_vi <0xf>; 1452defm V_LSHRREV_B32 : VOP2_Real_e32e64_vi <0x10>; 1453defm V_ASHRREV_I32 : VOP2_Real_e32e64_vi <0x11>; 1454defm V_LSHLREV_B32 : VOP2_Real_e32e64_vi <0x12>; 1455defm V_AND_B32 : VOP2_Real_e32e64_vi <0x13>; 1456defm V_OR_B32 : VOP2_Real_e32e64_vi <0x14>; 1457defm V_XOR_B32 : VOP2_Real_e32e64_vi <0x15>; 1458defm V_MAC_F32 : VOP2_Real_e32e64_vi <0x16>; 1459defm V_MADMK_F32 : VOP2_Real_MADK_vi <0x17>; 1460defm V_MADAK_F32 : VOP2_Real_MADK_vi <0x18>; 1461 1462defm V_ADD_U32 : VOP2be_Real_e32e64_vi_only <0x19, "V_ADD_I32", "v_add_u32">; 1463defm V_SUB_U32 : VOP2be_Real_e32e64_vi_only <0x1a, "V_SUB_I32", "v_sub_u32">; 1464defm V_SUBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1b, "V_SUBREV_I32", "v_subrev_u32">; 1465defm V_ADDC_U32 : VOP2be_Real_e32e64_vi_only <0x1c, "V_ADDC_U32", "v_addc_u32">; 1466defm V_SUBB_U32 : VOP2be_Real_e32e64_vi_only <0x1d, "V_SUBB_U32", "v_subb_u32">; 1467defm V_SUBBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1e, "V_SUBBREV_U32", "v_subbrev_u32">; 1468 1469defm V_ADD_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x19, "V_ADD_I32", "v_add_co_u32">; 1470defm V_SUB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1a, "V_SUB_I32", "v_sub_co_u32">; 1471defm V_SUBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1b, "V_SUBREV_I32", "v_subrev_co_u32">; 1472defm V_ADDC_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1c, "V_ADDC_U32", "v_addc_co_u32">; 1473defm V_SUBB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1d, "V_SUBB_U32", "v_subb_co_u32">; 1474defm V_SUBBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1e, "V_SUBBREV_U32", "v_subbrev_co_u32">; 1475 1476defm V_ADD_U32 : VOP2_Real_e32e64_gfx9 <0x34>; 1477defm V_SUB_U32 : VOP2_Real_e32e64_gfx9 <0x35>; 1478defm V_SUBREV_U32 : VOP2_Real_e32e64_gfx9 <0x36>; 1479 1480defm V_BFM_B32 : VOP2_Real_e64only_vi <0x293>; 1481defm V_BCNT_U32_B32 : VOP2_Real_e64only_vi <0x28b>; 1482defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64only_vi <0x28c>; 1483defm V_MBCNT_HI_U32_B32 : VOP2_Real_e64only_vi <0x28d>; 1484defm V_LDEXP_F32 : VOP2_Real_e64only_vi <0x288>; 1485defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64only_vi <0x1f0>; 1486defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64only_vi <0x294>; 1487defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64only_vi <0x295>; 1488defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e64only_vi <0x296>; 1489defm V_CVT_PK_U16_U32 : VOP2_Real_e64only_vi <0x297>; 1490defm V_CVT_PK_I16_I32 : VOP2_Real_e64only_vi <0x298>; 1491 1492defm V_ADD_F16 : VOP2_Real_e32e64_vi <0x1f>; 1493defm V_SUB_F16 : VOP2_Real_e32e64_vi <0x20>; 1494defm V_SUBREV_F16 : VOP2_Real_e32e64_vi <0x21>; 1495defm V_MUL_F16 : VOP2_Real_e32e64_vi <0x22>; 1496defm V_MAC_F16 : VOP2_Real_e32e64_vi <0x23>; 1497defm V_MADMK_F16 : VOP2_Real_MADK_vi <0x24>; 1498defm V_MADAK_F16 : VOP2_Real_MADK_vi <0x25>; 1499defm V_ADD_U16 : VOP2_Real_e32e64_vi <0x26>; 1500defm V_SUB_U16 : VOP2_Real_e32e64_vi <0x27>; 1501defm V_SUBREV_U16 : VOP2_Real_e32e64_vi <0x28>; 1502defm V_MUL_LO_U16 : VOP2_Real_e32e64_vi <0x29>; 1503defm V_LSHLREV_B16 : VOP2_Real_e32e64_vi <0x2a>; 1504defm V_LSHRREV_B16 : VOP2_Real_e32e64_vi <0x2b>; 1505defm V_ASHRREV_I16 : VOP2_Real_e32e64_vi <0x2c>; 1506defm V_MAX_F16 : VOP2_Real_e32e64_vi <0x2d>; 1507defm V_MIN_F16 : VOP2_Real_e32e64_vi <0x2e>; 1508defm V_MAX_U16 : VOP2_Real_e32e64_vi <0x2f>; 1509defm V_MAX_I16 : VOP2_Real_e32e64_vi <0x30>; 1510defm V_MIN_U16 : VOP2_Real_e32e64_vi <0x31>; 1511defm V_MIN_I16 : VOP2_Real_e32e64_vi <0x32>; 1512defm V_LDEXP_F16 : VOP2_Real_e32e64_vi <0x33>; 1513 1514let SubtargetPredicate = isGFX8GFX9 in { 1515 1516// Aliases to simplify matching of floating-point instructions that 1517// are VOP2 on SI and VOP3 on VI. 1518class SI2_VI3Alias <string name, VOP3_Real inst> : InstAlias < 1519 name#" $dst, $src0, $src1", 1520 !if(inst.Pfl.HasOMod, 1521 (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0), 1522 (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0)) 1523>, PredicateControl { 1524 let UseInstAsmMatchConverter = 0; 1525 let AsmVariantName = AMDGPUAsmVariants.VOP3; 1526} 1527 1528def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>; 1529def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>; 1530def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>; 1531def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>; 1532def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>; 1533 1534defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_vi>; 1535 1536} // End SubtargetPredicate = isGFX8GFX9 1537 1538let SubtargetPredicate = isGFX9Only in { 1539 1540defm : VOP2bInstAliases<V_ADD_I32_e32, V_ADD_CO_U32_e32_gfx9, "v_add_co_u32">; 1541defm : VOP2bInstAliases<V_ADDC_U32_e32, V_ADDC_CO_U32_e32_gfx9, "v_addc_co_u32">; 1542defm : VOP2bInstAliases<V_SUB_I32_e32, V_SUB_CO_U32_e32_gfx9, "v_sub_co_u32">; 1543defm : VOP2bInstAliases<V_SUBB_U32_e32, V_SUBB_CO_U32_e32_gfx9, "v_subb_co_u32">; 1544defm : VOP2bInstAliases<V_SUBREV_I32_e32, V_SUBREV_CO_U32_e32_gfx9, "v_subrev_co_u32">; 1545defm : VOP2bInstAliases<V_SUBBREV_U32_e32, V_SUBBREV_CO_U32_e32_gfx9, "v_subbrev_co_u32">; 1546 1547} // End SubtargetPredicate = isGFX9Only 1548 1549let SubtargetPredicate = HasDLInsts in { 1550 1551defm V_FMAC_F32 : VOP2_Real_e32e64_vi <0x3b>; 1552defm V_XNOR_B32 : VOP2_Real_e32e64_vi <0x3d>; 1553 1554} // End SubtargetPredicate = HasDLInsts 1555 1556multiclass VOP2_Real_DOT_ACC_gfx9<bits<6> op> : VOP2_Real_e32_vi<op> { 1557 def _dpp : VOP2_DPP<op, !cast<VOP2_Pseudo>(NAME#"_e32")>; 1558} 1559 1560multiclass VOP2_Real_DOT_ACC_gfx10<bits<6> op> : 1561 VOP2_Real_e32_gfx10<op>, 1562 VOP2_Real_dpp_gfx10<op>, 1563 VOP2_Real_dpp8_gfx10<op>; 1564 1565let SubtargetPredicate = HasDot5Insts in { 1566 defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx9<0x37>; 1567 // NB: Opcode conflicts with V_DOT8C_I32_I4 1568 // This opcode exists in gfx 10.1* only 1569 defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx10<0x02>; 1570} 1571 1572let SubtargetPredicate = HasDot6Insts in { 1573 defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx9<0x39>; 1574 defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx10<0x0d>; 1575} 1576 1577let SubtargetPredicate = HasDot4Insts in { 1578 defm V_DOT2C_I32_I16 : VOP2_Real_DOT_ACC_gfx9<0x38>; 1579} 1580let SubtargetPredicate = HasDot3Insts in { 1581 defm V_DOT8C_I32_I4 : VOP2_Real_DOT_ACC_gfx9<0x3a>; 1582} 1583 1584let SubtargetPredicate = HasPkFmacF16Inst in { 1585defm V_PK_FMAC_F16 : VOP2_Real_e32_vi<0x3c>; 1586} // End SubtargetPredicate = HasPkFmacF16Inst 1587