1//===-- VOP2Instructions.td - Vector Instruction Definitions --------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10// VOP2 Classes 11//===----------------------------------------------------------------------===// 12 13class VOP2e <bits<6> op, VOPProfile P> : Enc32 { 14 bits<8> vdst; 15 bits<9> src0; 16 bits<8> src1; 17 18 let Inst{8-0} = !if(P.HasSrc0, src0, 0); 19 let Inst{16-9} = !if(P.HasSrc1, src1, 0); 20 let Inst{24-17} = !if(P.EmitDst, vdst, 0); 21 let Inst{30-25} = op; 22 let Inst{31} = 0x0; //encoding 23} 24 25class VOP2_MADKe <bits<6> op, VOPProfile P> : Enc64 { 26 bits<8> vdst; 27 bits<9> src0; 28 bits<8> src1; 29 bits<32> imm; 30 31 let Inst{8-0} = !if(P.HasSrc0, src0, 0); 32 let Inst{16-9} = !if(P.HasSrc1, src1, 0); 33 let Inst{24-17} = !if(P.EmitDst, vdst, 0); 34 let Inst{30-25} = op; 35 let Inst{31} = 0x0; // encoding 36 let Inst{63-32} = imm; 37} 38 39class VOP2_SDWAe <bits<6> op, VOPProfile P> : VOP_SDWAe <P> { 40 bits<8> vdst; 41 bits<8> src1; 42 43 let Inst{8-0} = 0xf9; // sdwa 44 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 45 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 46 let Inst{30-25} = op; 47 let Inst{31} = 0x0; // encoding 48} 49 50class VOP2_SDWA9Ae <bits<6> op, VOPProfile P> : VOP_SDWA9Ae <P> { 51 bits<8> vdst; 52 bits<9> src1; 53 54 let Inst{8-0} = 0xf9; // sdwa 55 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 56 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 57 let Inst{30-25} = op; 58 let Inst{31} = 0x0; // encoding 59 let Inst{63} = !if(P.HasSrc1, src1{8}, 0); // src1_sgpr 60} 61 62class VOP2_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], string suffix = "_e32"> : 63 VOP_Pseudo <opName, suffix, P, P.Outs32, P.Ins32, "", pattern> { 64 65 let AsmOperands = P.Asm32; 66 67 let Size = 4; 68 let mayLoad = 0; 69 let mayStore = 0; 70 let hasSideEffects = 0; 71 72 let ReadsModeReg = !or(isFloatType<P.DstVT>.ret, isFloatType<P.Src0VT>.ret); 73 74 let mayRaiseFPException = ReadsModeReg; 75 76 let VOP2 = 1; 77 let VALU = 1; 78 let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]); 79 80 let AsmVariantName = AMDGPUAsmVariants.Default; 81} 82 83class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily> : 84 InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>, 85 SIMCInstr <ps.PseudoInstr, EncodingFamily> { 86 87 let isPseudo = 0; 88 let isCodeGenOnly = 0; 89 90 let Constraints = ps.Constraints; 91 let DisableEncoding = ps.DisableEncoding; 92 93 // copy relevant pseudo op flags 94 let SubtargetPredicate = ps.SubtargetPredicate; 95 let AsmMatchConverter = ps.AsmMatchConverter; 96 let AsmVariantName = ps.AsmVariantName; 97 let Constraints = ps.Constraints; 98 let DisableEncoding = ps.DisableEncoding; 99 let TSFlags = ps.TSFlags; 100 let UseNamedOperandTable = ps.UseNamedOperandTable; 101 let Uses = ps.Uses; 102 let Defs = ps.Defs; 103} 104 105class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 106 VOP_SDWA_Pseudo <OpName, P, pattern> { 107 let AsmMatchConverter = "cvtSdwaVOP2"; 108} 109 110class VOP2_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 111 VOP_DPP_Pseudo <OpName, P, pattern> { 112} 113 114 115class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { 116 list<dag> ret = !if(P.HasModifiers, 117 [(set P.DstVT:$vdst, 118 (node (P.Src0VT 119 !if(P.HasOMod, 120 (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), 121 (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))), 122 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], 123 [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]); 124} 125 126multiclass VOP2Inst_e32<string opName, 127 VOPProfile P, 128 SDPatternOperator node = null_frag, 129 string revOp = opName, 130 bit GFX9Renamed = 0> { 131 let renamedInGFX9 = GFX9Renamed in { 132 def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>, 133 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>; 134 } // End renamedInGFX9 = GFX9Renamed 135} 136 137multiclass VOP2Inst_e64<string opName, 138 VOPProfile P, 139 SDPatternOperator node = null_frag, 140 string revOp = opName, 141 bit GFX9Renamed = 0> { 142 let renamedInGFX9 = GFX9Renamed in { 143 def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>, 144 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; 145 } // End renamedInGFX9 = GFX9Renamed 146} 147 148multiclass VOP2Inst_sdwa<string opName, 149 VOPProfile P, 150 SDPatternOperator node = null_frag, 151 string revOp = opName, 152 bit GFX9Renamed = 0> { 153 let renamedInGFX9 = GFX9Renamed in { 154 foreach _ = BoolToList<P.HasExtSDWA>.ret in 155 def _sdwa : VOP2_SDWA_Pseudo <opName, P>; 156 } // End renamedInGFX9 = GFX9Renamed 157} 158 159multiclass VOP2Inst<string opName, 160 VOPProfile P, 161 SDPatternOperator node = null_frag, 162 string revOp = opName, 163 bit GFX9Renamed = 0> : 164 VOP2Inst_e32<opName, P, node, revOp, GFX9Renamed>, 165 VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>, 166 VOP2Inst_sdwa<opName, P, node, revOp, GFX9Renamed> { 167 let renamedInGFX9 = GFX9Renamed in { 168 foreach _ = BoolToList<P.HasExtDPP>.ret in 169 def _dpp : VOP2_DPP_Pseudo <opName, P>; 170 } 171} 172 173multiclass VOP2bInst <string opName, 174 VOPProfile P, 175 SDPatternOperator node = null_frag, 176 string revOp = opName, 177 bit GFX9Renamed = 0, 178 bit useSGPRInput = !eq(P.NumSrcArgs, 3)> { 179 let renamedInGFX9 = GFX9Renamed in { 180 let SchedRW = [Write32Bit, WriteSALU] in { 181 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in { 182 def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>, 183 Commutable_REV<revOp#"_e32", !eq(revOp, opName)> { 184 let usesCustomInserter = !eq(P.NumSrcArgs, 2); 185 } 186 187 foreach _ = BoolToList<P.HasExtSDWA>.ret in 188 def _sdwa : VOP2_SDWA_Pseudo <opName, P> { 189 let AsmMatchConverter = "cvtSdwaVOP2b"; 190 } 191 foreach _ = BoolToList<P.HasExtDPP>.ret in 192 def _dpp : VOP2_DPP_Pseudo <opName, P>; 193 } 194 195 def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>, 196 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; 197 } 198 } 199} 200 201class VOP2bInstAlias <VOP2_Pseudo ps, Instruction inst, 202 string OpName, string opnd> : 203 InstAlias <OpName#" "#!subst("vcc", opnd, ps.Pfl.Asm32), 204 (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0, 205 ps.Pfl.Src1RC32:$src1)>, 206 PredicateControl { 207} 208 209multiclass VOP2bInstAliases<VOP2_Pseudo ps, VOP2_Real inst, string OpName> { 210 let WaveSizePredicate = isWave32 in { 211 def : VOP2bInstAlias<ps, inst, OpName, "vcc_lo">; 212 } 213 let WaveSizePredicate = isWave64 in { 214 def : VOP2bInstAlias<ps, inst, OpName, "vcc">; 215 } 216} 217 218multiclass VOP2eInst <string opName, 219 VOPProfile P, 220 SDPatternOperator node = null_frag, 221 string revOp = opName, 222 bit useSGPRInput = !eq(P.NumSrcArgs, 3)> { 223 224 let SchedRW = [Write32Bit] in { 225 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in { 226 def _e32 : VOP2_Pseudo <opName, P>, 227 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>; 228 229 foreach _ = BoolToList<P.HasExtSDWA>.ret in 230 def _sdwa : VOP2_SDWA_Pseudo <opName, P> { 231 let AsmMatchConverter = "cvtSdwaVOP2e"; 232 } 233 234 foreach _ = BoolToList<P.HasExtDPP>.ret in 235 def _dpp : VOP2_DPP_Pseudo <opName, P>; 236 } 237 238 def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>, 239 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; 240 } 241} 242 243class VOP2eInstAlias <VOP2_Pseudo ps, Instruction inst, string opnd> : 244 InstAlias <ps.OpName#" "#ps.Pfl.Asm32#", "#opnd, 245 (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0, 246 ps.Pfl.Src1RC32:$src1)>, 247 PredicateControl { 248} 249 250multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> { 251 let WaveSizePredicate = isWave32 in { 252 def : VOP2eInstAlias<ps, inst, "vcc_lo">; 253 } 254 let WaveSizePredicate = isWave64 in { 255 def : VOP2eInstAlias<ps, inst, "vcc">; 256 } 257} 258 259class VOP_MADAK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> { 260 field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm); 261 field dag Ins32 = !if(!eq(vt.Size, 32), 262 (ins VCSrc_f32:$src0, VGPR_32:$src1, ImmOpType:$imm), 263 (ins VCSrc_f16:$src0, VGPR_32:$src1, ImmOpType:$imm)); 264 field bit HasExt = 0; 265 266 // Hack to stop printing _e64 267 let DstRC = RegisterOperand<VGPR_32>; 268 field string Asm32 = " $vdst, $src0, $src1, $imm"; 269} 270 271def VOP_MADAK_F16 : VOP_MADAK <f16>; 272def VOP_MADAK_F32 : VOP_MADAK <f32>; 273 274class VOP_MADMK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> { 275 field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm); 276 field dag Ins32 = (ins VCSrc_f32:$src0, ImmOpType:$imm, VGPR_32:$src1); 277 field bit HasExt = 0; 278 279 // Hack to stop printing _e64 280 let DstRC = RegisterOperand<VGPR_32>; 281 field string Asm32 = " $vdst, $src0, $imm, $src1"; 282} 283 284def VOP_MADMK_F16 : VOP_MADMK <f16>; 285def VOP_MADMK_F32 : VOP_MADMK <f32>; 286 287// FIXME: Remove src2_modifiers. It isn't used, so is wasting memory 288// and processing time but it makes it easier to convert to mad. 289class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, vt0]> { 290 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2); 291 let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3, 292 0, HasModifiers, HasModifiers, HasOMod, 293 Src0Mod, Src1Mod, Src2Mod>.ret; 294 let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 295 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 296 VGPR_32:$src2, // stub argument 297 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 298 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 299 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 300 301 let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 302 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 303 VGPR_32:$src2, // stub argument 304 dpp8:$dpp8, FI:$fi); 305 306 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 307 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, 308 VGPR_32:$src2, // stub argument 309 clampmod:$clamp, omod:$omod, 310 dst_sel:$dst_sel, dst_unused:$dst_unused, 311 src0_sel:$src0_sel, src1_sel:$src1_sel); 312 let Asm32 = getAsm32<1, 2, vt0>.ret; 313 let Asm64 = getAsm64<1, 2, 0, HasModifiers, HasOMod, vt0>.ret; 314 let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt0>.ret; 315 let AsmDPP16 = getAsmDPP16<1, 2, HasModifiers, vt0>.ret; 316 let AsmDPP8 = getAsmDPP8<1, 2, 0, vt0>.ret; 317 let AsmSDWA = getAsmSDWA<1, 2, vt0>.ret; 318 let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt0>.ret; 319 let HasSrc2 = 0; 320 let HasSrc2Mods = 0; 321 322 let HasExt = 1; 323 let HasExtDPP = 1; 324 let HasExtSDWA = 1; 325 let HasExtSDWA9 = 0; 326 let TieRegDPP = "$src2"; 327} 328 329def VOP_MAC_F16 : VOP_MAC <f16>; 330def VOP_MAC_F32 : VOP_MAC <f32>; 331 332class VOP_DOT_ACC<ValueType vt0, ValueType vt1> : VOP_MAC<vt0, vt1> { 333 let HasClamp = 0; 334 let HasExtSDWA = 0; 335 let HasModifiers = 1; 336 let HasOpSel = 0; 337 let IsPacked = 0; 338} 339 340def VOP_DOT_ACC_F32_V2F16 : VOP_DOT_ACC<f32, v2f16> { 341 let Src0ModDPP = FPVRegInputMods; 342 let Src1ModDPP = FPVRegInputMods; 343} 344def VOP_DOT_ACC_I32_I32 : VOP_DOT_ACC<i32, i32>; 345 346// Write out to vcc or arbitrary SGPR. 347def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], 0, /*EnableClamp=*/1> { 348 let Asm32 = "$vdst, vcc, $src0, $src1"; 349 let Asm64 = "$vdst, $sdst, $src0, $src1$clamp"; 350 let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 351 let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 352 let AsmDPP = "$vdst, vcc, $src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 353 let AsmDPP8 = "$vdst, vcc, $src0, $src1 $dpp8$fi"; 354 let AsmDPP16 = AsmDPP#"$fi"; 355 let Outs32 = (outs DstRC:$vdst); 356 let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); 357} 358 359// Write out to vcc or arbitrary SGPR and read in from vcc or 360// arbitrary SGPR. 361def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], 0, /*EnableClamp=*/1> { 362 let Asm32 = "$vdst, vcc, $src0, $src1, vcc"; 363 let Asm64 = "$vdst, $sdst, $src0, $src1, $src2$clamp"; 364 let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 365 let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 366 let AsmDPP = "$vdst, vcc, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 367 let AsmDPP8 = "$vdst, vcc, $src0, $src1, vcc $dpp8$fi"; 368 let AsmDPP16 = AsmDPP#"$fi"; 369 let Outs32 = (outs DstRC:$vdst); 370 let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); 371 372 // Suppress src2 implied by type since the 32-bit encoding uses an 373 // implicit VCC use. 374 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); 375 376 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 377 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, 378 clampmod:$clamp, 379 dst_sel:$dst_sel, dst_unused:$dst_unused, 380 src0_sel:$src0_sel, src1_sel:$src1_sel); 381 382 let InsDPP = (ins DstRCDPP:$old, 383 Src0DPP:$src0, 384 Src1DPP:$src1, 385 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 386 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 387 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 388 389 let HasExt = 1; 390 let HasExtDPP = 1; 391 let HasExtSDWA = 1; 392 let HasExtSDWA9 = 1; 393} 394 395// Read in from vcc or arbitrary SGPR. 396def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableF32SrcMods=*/1> { 397 let Asm32 = "$vdst, $src0, $src1"; 398 let Asm64 = "$vdst, $src0_modifiers, $src1_modifiers, $src2"; 399 let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 400 let AsmSDWA9 = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 401 let AsmDPP = "$vdst, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 402 let AsmDPP8 = "$vdst, $src0, $src1, vcc $dpp8$fi"; 403 let AsmDPP16 = AsmDPP#"$fi"; 404 405 let Outs32 = (outs DstRC:$vdst); 406 let Outs64 = (outs DstRC:$vdst); 407 408 // Suppress src2 implied by type since the 32-bit encoding uses an 409 // implicit VCC use. 410 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); 411 412 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 413 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, 414 clampmod:$clamp, 415 dst_sel:$dst_sel, dst_unused:$dst_unused, 416 src0_sel:$src0_sel, src1_sel:$src1_sel); 417 418 let InsDPP = (ins DstRCDPP:$old, 419 Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 420 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 421 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 422 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 423 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 424 425 let HasExt = 1; 426 let HasExtDPP = 1; 427 let HasExtSDWA = 1; 428 let HasExtSDWA9 = 1; 429} 430 431def VOP_READLANE : VOPProfile<[i32, i32, i32]> { 432 let Outs32 = (outs SReg_32:$vdst); 433 let Outs64 = Outs32; 434 let Ins32 = (ins VRegOrLds_32:$src0, SCSrc_b32:$src1); 435 let Ins64 = Ins32; 436 let Asm32 = " $vdst, $src0, $src1"; 437 let Asm64 = Asm32; 438 439 let HasExt = 0; 440 let HasExtDPP = 0; 441 let HasExtSDWA = 0; 442 let HasExtSDWA9 = 0; 443} 444 445def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> { 446 let Outs32 = (outs VGPR_32:$vdst); 447 let Outs64 = Outs32; 448 let Ins32 = (ins SCSrc_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in); 449 let Ins64 = Ins32; 450 let Asm32 = " $vdst, $src0, $src1"; 451 let Asm64 = Asm32; 452 let HasSrc2 = 0; 453 let HasSrc2Mods = 0; 454 455 let HasExt = 0; 456 let HasExtDPP = 0; 457 let HasExtSDWA = 0; 458 let HasExtSDWA9 = 0; 459} 460 461//===----------------------------------------------------------------------===// 462// VOP2 Instructions 463//===----------------------------------------------------------------------===// 464 465defm V_CNDMASK_B32 : VOP2eInst <"v_cndmask_b32", VOP2e_I32_I32_I32_I1>; 466let SubtargetPredicate = HasMadMacF32Insts in 467def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>; 468 469let isCommutable = 1 in { 470defm V_ADD_F32 : VOP2Inst <"v_add_f32", VOP_F32_F32_F32, any_fadd>; 471defm V_SUB_F32 : VOP2Inst <"v_sub_f32", VOP_F32_F32_F32, fsub>; 472defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub_f32">; 473defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>; 474defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, any_fmul>; 475defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32_ARITH, AMDGPUmul_i24>; 476defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_i24>; 477defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32_ARITH, AMDGPUmul_u24>; 478defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_u24>; 479defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum_like>; 480defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum_like>; 481defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smin>; 482defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smax>; 483defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>; 484defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umax>; 485defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, lshr_rev, "v_lshr_b32">; 486defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, ashr_rev, "v_ashr_i32">; 487defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, lshl_rev, "v_lshl_b32">; 488defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, and>; 489defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>; 490defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, xor>; 491 492let mayRaiseFPException = 0 in { 493let SubtargetPredicate = HasMadMacF32Insts in { 494let Constraints = "$vdst = $src2", DisableEncoding="$src2", 495 isConvertibleToThreeAddress = 1 in { 496defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>; 497} 498 499def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, []>; 500} // End SubtargetPredicate = HasMadMacF32Insts 501} 502 503// No patterns so that the scalar instructions are always selected. 504// The scalar versions will be replaced with vector when needed later. 505 506// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI, 507// but the VI instructions behave the same as the SI versions. 508defm V_ADD_I32 : VOP2bInst <"v_add_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_add_i32", 1>; 509defm V_SUB_I32 : VOP2bInst <"v_sub_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32", 1>; 510defm V_SUBREV_I32 : VOP2bInst <"v_subrev_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32", 1>; 511defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_addc_u32", 1>; 512defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>; 513defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>; 514 515 516let SubtargetPredicate = HasAddNoCarryInsts in { 517defm V_ADD_U32 : VOP2Inst <"v_add_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_add_u32", 1>; 518defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>; 519defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>; 520} 521 522} // End isCommutable = 1 523 524// These are special and do not read the exec mask. 525let isConvergent = 1, Uses = []<Register> in { 526def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE, 527 [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))]>; 528 529let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in { 530def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, 531 [(set i32:$vdst, (int_amdgcn_writelane i32:$src0, i32:$src1, i32:$vdst_in))]>; 532} // End $vdst = $vdst_in, DisableEncoding $vdst_in 533} // End isConvergent = 1 534 535defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_NO_EXT<VOP_I32_I32_I32>>; 536defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, add_ctpop>; 537defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, int_amdgcn_mbcnt_lo>; 538defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, int_amdgcn_mbcnt_hi>; 539defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_NO_EXT<VOP_F32_F32_I32>, AMDGPUldexp>; 540defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_NO_EXT<VOP_I32_F32_I32>>; // TODO: set "Uses = dst" 541 542let ReadsModeReg = 0, mayRaiseFPException = 0 in { 543defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_NO_EXT<VOP_V2I16_F32_F32>, AMDGPUpknorm_i16_f32>; 544defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_NO_EXT<VOP_V2I16_F32_F32>, AMDGPUpknorm_u16_f32>; 545} 546 547defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_NO_EXT<VOP_V2F16_F32_F32>, AMDGPUpkrtz_f16_f32>; 548defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_NO_EXT<VOP_V2I16_I32_I32>, AMDGPUpk_u16_u32>; 549defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT<VOP_V2I16_I32_I32>, AMDGPUpk_i16_i32>; 550 551 552let SubtargetPredicate = isGFX6GFX7 in { 553defm V_MIN_LEGACY_F32 : VOP2Inst <"v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy>; 554defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>; 555} // End SubtargetPredicate = isGFX6GFX7 556 557let isCommutable = 1 in { 558let SubtargetPredicate = isGFX6GFX7GFX10 in { 559let OtherPredicates = [HasMadMacF32Insts] in 560defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_F32_F32_F32>; 561} // End SubtargetPredicate = isGFX6GFX7GFX10 562let SubtargetPredicate = isGFX6GFX7 in { 563defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, srl>; 564defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, sra>; 565defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, shl>; 566} // End SubtargetPredicate = isGFX6GFX7 567} // End isCommutable = 1 568 569 570class DivergentBinOp<SDPatternOperator Op, VOP_Pseudo Inst> : 571 GCNPat< 572 (getDivergentFrag<Op>.ret Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1), 573 !if(!cast<Commutable_REV>(Inst).IsOrig, 574 (Inst $src0, $src1), 575 (Inst $src1, $src0) 576 ) 577 >; 578 579class DivergentClampingBinOp<SDPatternOperator Op, VOP_Pseudo Inst> : 580 GCNPat< 581 (getDivergentFrag<Op>.ret Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1), 582 !if(!cast<Commutable_REV>(Inst).IsOrig, 583 (Inst $src0, $src1, 0), 584 (Inst $src1, $src0, 0) 585 ) 586 >; 587 588def : DivergentBinOp<srl, V_LSHRREV_B32_e64>; 589def : DivergentBinOp<sra, V_ASHRREV_I32_e64>; 590def : DivergentBinOp<shl, V_LSHLREV_B32_e64>; 591 592let SubtargetPredicate = HasAddNoCarryInsts in { 593 def : DivergentClampingBinOp<add, V_ADD_U32_e64>; 594 def : DivergentClampingBinOp<sub, V_SUB_U32_e64>; 595} 596 597let SubtargetPredicate = isGFX6GFX7GFX8GFX9, Predicates = [isGFX6GFX7GFX8GFX9] in { 598def : DivergentClampingBinOp<add, V_ADD_I32_e64>; 599def : DivergentClampingBinOp<sub, V_SUB_I32_e64>; 600} 601 602def : DivergentBinOp<adde, V_ADDC_U32_e32>; 603def : DivergentBinOp<sube, V_SUBB_U32_e32>; 604 605class divergent_i64_BinOp <SDPatternOperator Op, Instruction Inst> : 606 GCNPat< 607 (getDivergentFrag<Op>.ret i64:$src0, i64:$src1), 608 (REG_SEQUENCE VReg_64, 609 (Inst 610 (i32 (EXTRACT_SUBREG $src0, sub0)), 611 (i32 (EXTRACT_SUBREG $src1, sub0)) 612 ), sub0, 613 (Inst 614 (i32 (EXTRACT_SUBREG $src0, sub1)), 615 (i32 (EXTRACT_SUBREG $src1, sub1)) 616 ), sub1 617 ) 618 >; 619 620def : divergent_i64_BinOp <and, V_AND_B32_e32>; 621def : divergent_i64_BinOp <or, V_OR_B32_e32>; 622def : divergent_i64_BinOp <xor, V_XOR_B32_e32>; 623 624let SubtargetPredicate = Has16BitInsts in { 625 626let FPDPRounding = 1 in { 627def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">; 628defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I32, AMDGPUldexp>; 629} // End FPDPRounding = 1 630 631defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16, lshl_rev>; 632defm V_LSHRREV_B16 : VOP2Inst <"v_lshrrev_b16", VOP_I16_I16_I16, lshr_rev>; 633defm V_ASHRREV_I16 : VOP2Inst <"v_ashrrev_i16", VOP_I16_I16_I16, ashr_rev>; 634 635let isCommutable = 1 in { 636let FPDPRounding = 1 in { 637defm V_ADD_F16 : VOP2Inst <"v_add_f16", VOP_F16_F16_F16, any_fadd>; 638defm V_SUB_F16 : VOP2Inst <"v_sub_f16", VOP_F16_F16_F16, fsub>; 639defm V_SUBREV_F16 : VOP2Inst <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">; 640defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16, any_fmul>; 641 642let mayRaiseFPException = 0 in { 643def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">; 644} 645 646} // End FPDPRounding = 1 647defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16_ARITH, add>; 648defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16_ARITH, sub>; 649defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16_ARITH, null_frag, "v_sub_u16">; 650defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16, mul>; 651defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>; 652defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16, fminnum_like>; 653defm V_MAX_U16 : VOP2Inst <"v_max_u16", VOP_I16_I16_I16, umax>; 654defm V_MAX_I16 : VOP2Inst <"v_max_i16", VOP_I16_I16_I16, smax>; 655defm V_MIN_U16 : VOP2Inst <"v_min_u16", VOP_I16_I16_I16, umin>; 656defm V_MIN_I16 : VOP2Inst <"v_min_i16", VOP_I16_I16_I16, smin>; 657 658let Constraints = "$vdst = $src2", DisableEncoding="$src2", 659 isConvertibleToThreeAddress = 1 in { 660defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>; 661} 662} // End isCommutable = 1 663 664} // End SubtargetPredicate = Has16BitInsts 665 666let SubtargetPredicate = HasDLInsts in { 667 668defm V_XNOR_B32 : VOP2Inst <"v_xnor_b32", VOP_I32_I32_I32>; 669 670let Constraints = "$vdst = $src2", 671 DisableEncoding="$src2", 672 isConvertibleToThreeAddress = 1, 673 isCommutable = 1 in { 674defm V_FMAC_F32 : VOP2Inst <"v_fmac_f32", VOP_MAC_F32>; 675} 676 677} // End SubtargetPredicate = HasDLInsts 678 679let Constraints = "$vdst = $src2", 680 DisableEncoding="$src2", 681 isConvertibleToThreeAddress = 1, 682 isCommutable = 1, 683 IsDOT = 1 in { 684 let SubtargetPredicate = HasDot5Insts in 685 defm V_DOT2C_F32_F16 : VOP2Inst<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16>; 686 let SubtargetPredicate = HasDot6Insts in 687 defm V_DOT4C_I32_I8 : VOP2Inst<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>; 688 689 let SubtargetPredicate = HasDot4Insts in 690 defm V_DOT2C_I32_I16 : VOP2Inst<"v_dot2c_i32_i16", VOP_DOT_ACC_I32_I32>; 691 let SubtargetPredicate = HasDot3Insts in 692 defm V_DOT8C_I32_I4 : VOP2Inst<"v_dot8c_i32_i4", VOP_DOT_ACC_I32_I32>; 693} 694 695let AddedComplexity = 30 in { 696 def : GCNPat< 697 (f32 (AMDGPUfdot2 v2f16:$src0, v2f16:$src1, f32:$src2, (i1 DSTCLAMP.NONE))), 698 (f32 (V_DOT2C_F32_F16_e32 $src0, $src1, $src2)) 699 > { 700 let SubtargetPredicate = HasDot5Insts; 701 } 702 def : GCNPat< 703 (i32 (int_amdgcn_sdot4 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 704 (i32 (V_DOT4C_I32_I8_e32 $src0, $src1, $src2)) 705 > { 706 let SubtargetPredicate = HasDot6Insts; 707 } 708 def : GCNPat< 709 (i32 (int_amdgcn_sdot2 v2i16:$src0, v2i16:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 710 (i32 (V_DOT2C_I32_I16_e32 $src0, $src1, $src2)) 711 > { 712 let SubtargetPredicate = HasDot4Insts; 713 } 714 def : GCNPat< 715 (i32 (int_amdgcn_sdot8 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 716 (i32 (V_DOT8C_I32_I4_e32 $src0, $src1, $src2)) 717 > { 718 let SubtargetPredicate = HasDot3Insts; 719 } 720} // End AddedComplexity = 30 721 722let SubtargetPredicate = isGFX10Plus in { 723 724def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">; 725let FPDPRounding = 1 in 726def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">; 727 728let isCommutable = 1 in { 729def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">; 730let FPDPRounding = 1 in 731def V_FMAAK_F16 : VOP2_Pseudo <"v_fmaak_f16", VOP_MADAK_F16, [], "">; 732} // End isCommutable = 1 733 734let Constraints = "$vdst = $src2", 735 DisableEncoding="$src2", 736 isConvertibleToThreeAddress = 1, 737 isCommutable = 1 in { 738defm V_FMAC_F16 : VOP2Inst <"v_fmac_f16", VOP_MAC_F16>; 739} 740 741} // End SubtargetPredicate = isGFX10Plus 742 743let SubtargetPredicate = HasPkFmacF16Inst in { 744defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>; 745} // End SubtargetPredicate = HasPkFmacF16Inst 746 747// Note: 16-bit instructions produce a 0 result in the high 16-bits 748// on GFX8 and GFX9 and preserve high 16 bits on GFX10+ 749multiclass Arithmetic_i16_0Hi_Pats <SDPatternOperator op, Instruction inst> { 750 751def : GCNPat< 752 (i32 (zext (op i16:$src0, i16:$src1))), 753 (inst VSrc_b16:$src0, VSrc_b16:$src1) 754>; 755 756def : GCNPat< 757 (i64 (zext (op i16:$src0, i16:$src1))), 758 (REG_SEQUENCE VReg_64, 759 (inst $src0, $src1), sub0, 760 (V_MOV_B32_e32 (i32 0)), sub1) 761>; 762} 763 764class ZExt_i16_i1_Pat <SDNode ext> : GCNPat < 765 (i16 (ext i1:$src)), 766 (V_CNDMASK_B32_e64 (i32 0/*src0mod*/), (i32 0/*src0*/), 767 (i32 0/*src1mod*/), (i32 1/*src1*/), 768 $src) 769>; 770 771foreach vt = [i16, v2i16] in { 772def : GCNPat < 773 (and vt:$src0, vt:$src1), 774 (V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 775>; 776 777def : GCNPat < 778 (or vt:$src0, vt:$src1), 779 (V_OR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 780>; 781 782def : GCNPat < 783 (xor vt:$src0, vt:$src1), 784 (V_XOR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 785>; 786} 787 788let Predicates = [Has16BitInsts] in { 789 790// Undo sub x, c -> add x, -c canonicalization since c is more likely 791// an inline immediate than -c. 792// TODO: Also do for 64-bit. 793def : GCNPat< 794 (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)), 795 (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1) 796>; 797 798 799let Predicates = [Has16BitInsts, isGFX7GFX8GFX9] in { 800 801def : GCNPat< 802 (i32 (zext (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)))), 803 (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1) 804>; 805 806defm : Arithmetic_i16_0Hi_Pats<add, V_ADD_U16_e64>; 807defm : Arithmetic_i16_0Hi_Pats<mul, V_MUL_LO_U16_e64>; 808defm : Arithmetic_i16_0Hi_Pats<sub, V_SUB_U16_e64>; 809defm : Arithmetic_i16_0Hi_Pats<smin, V_MIN_I16_e64>; 810defm : Arithmetic_i16_0Hi_Pats<smax, V_MAX_I16_e64>; 811defm : Arithmetic_i16_0Hi_Pats<umin, V_MIN_U16_e64>; 812defm : Arithmetic_i16_0Hi_Pats<umax, V_MAX_U16_e64>; 813defm : Arithmetic_i16_0Hi_Pats<lshl_rev, V_LSHLREV_B16_e64>; 814defm : Arithmetic_i16_0Hi_Pats<lshr_rev, V_LSHRREV_B16_e64>; 815defm : Arithmetic_i16_0Hi_Pats<ashr_rev, V_ASHRREV_I16_e64>; 816} // End Predicates = [Has16BitInsts, isGFX7GFX8GFX9] 817 818def : ZExt_i16_i1_Pat<zext>; 819def : ZExt_i16_i1_Pat<anyext>; 820 821def : GCNPat < 822 (i16 (sext i1:$src)), 823 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), 824 /*src1mod*/(i32 0), /*src1*/(i32 -1), $src) 825>; 826 827} // End Predicates = [Has16BitInsts] 828 829 830//===----------------------------------------------------------------------===// 831// Target-specific instruction encodings. 832//===----------------------------------------------------------------------===// 833 834class VOP2_DPP<bits<6> op, VOP2_DPP_Pseudo ps, 835 string opName = ps.OpName, VOPProfile p = ps.Pfl, 836 bit IsDPP16 = 0> : 837 VOP_DPP<opName, p, IsDPP16> { 838 let hasSideEffects = ps.hasSideEffects; 839 let Defs = ps.Defs; 840 let SchedRW = ps.SchedRW; 841 let Uses = ps.Uses; 842 843 bits<8> vdst; 844 bits<8> src1; 845 let Inst{8-0} = 0xfa; 846 let Inst{16-9} = !if(p.HasSrc1, src1{7-0}, 0); 847 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 848 let Inst{30-25} = op; 849 let Inst{31} = 0x0; 850} 851 852class Base_VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, 853 string opName = ps.OpName, VOPProfile p = ps.Pfl> : 854 VOP2_DPP<op, ps, opName, p, 1> { 855 let AssemblerPredicate = HasDPP16; 856 let SubtargetPredicate = HasDPP16; 857} 858 859class VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, 860 string opName = ps.OpName, VOPProfile p = ps.Pfl> : 861 Base_VOP2_DPP16<op, ps, opName, p>, 862 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX10>; 863 864class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps, 865 string opName = ps.OpName, VOPProfile p = ps.Pfl> : 866 VOP_DPP8<ps.OpName, p> { 867 let hasSideEffects = ps.hasSideEffects; 868 let Defs = ps.Defs; 869 let SchedRW = ps.SchedRW; 870 let Uses = ps.Uses; 871 872 bits<8> vdst; 873 bits<8> src1; 874 875 let Inst{8-0} = fi; 876 let Inst{16-9} = !if(p.HasSrc1, src1{7-0}, 0); 877 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 878 let Inst{30-25} = op; 879 let Inst{31} = 0x0; 880 881 let AssemblerPredicate = HasDPP8; 882 let SubtargetPredicate = HasDPP8; 883} 884 885//===----------------------------------------------------------------------===// 886// GFX10. 887//===----------------------------------------------------------------------===// 888 889let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { 890 //===------------------------------- VOP2 -------------------------------===// 891 multiclass VOP2Only_Real_MADK_gfx10<bits<6> op> { 892 def _gfx10 : 893 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX10>, 894 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 895 } 896 multiclass VOP2Only_Real_MADK_gfx10_with_name<bits<6> op, string opName, 897 string asmName> { 898 def _gfx10 : 899 VOP2_Real<!cast<VOP2_Pseudo>(opName), SIEncodingFamily.GFX10>, 900 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(opName).Pfl> { 901 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName); 902 let AsmString = asmName # ps.AsmOperands; 903 } 904 } 905 multiclass VOP2_Real_e32_gfx10<bits<6> op> { 906 def _e32_gfx10 : 907 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>, 908 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 909 } 910 multiclass VOP2_Real_e64_gfx10<bits<6> op> { 911 def _e64_gfx10 : 912 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 913 VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 914 } 915 multiclass VOP2_Real_sdwa_gfx10<bits<6> op> { 916 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 917 def _sdwa_gfx10 : 918 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 919 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 920 let DecoderNamespace = "SDWA10"; 921 } 922 } 923 multiclass VOP2_Real_dpp_gfx10<bits<6> op> { 924 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 925 def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> { 926 let DecoderNamespace = "SDWA10"; 927 } 928 } 929 multiclass VOP2_Real_dpp8_gfx10<bits<6> op> { 930 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 931 def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(NAME#"_e32")> { 932 let DecoderNamespace = "DPP8"; 933 } 934 } 935 936 //===------------------------- VOP2 (with name) -------------------------===// 937 multiclass VOP2_Real_e32_gfx10_with_name<bits<6> op, string opName, 938 string asmName> { 939 def _e32_gfx10 : 940 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>, 941 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> { 942 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 943 let AsmString = asmName # ps.AsmOperands; 944 } 945 } 946 multiclass VOP2_Real_e64_gfx10_with_name<bits<6> op, string opName, 947 string asmName> { 948 def _e64_gfx10 : 949 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>, 950 VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, 951 !cast<VOP3_Pseudo>(opName#"_e64").Pfl> { 952 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName#"_e64"); 953 let AsmString = asmName # ps.AsmOperands; 954 } 955 } 956 let DecoderNamespace = "SDWA10" in { 957 multiclass VOP2_Real_sdwa_gfx10_with_name<bits<6> op, string opName, 958 string asmName> { 959 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in 960 def _sdwa_gfx10 : 961 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 962 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 963 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 964 let AsmString = asmName # ps.AsmOperands; 965 } 966 } 967 multiclass VOP2_Real_dpp_gfx10_with_name<bits<6> op, string opName, 968 string asmName> { 969 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 970 def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp")> { 971 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 972 let AsmString = asmName # ps.Pfl.AsmDPP16; 973 } 974 } 975 multiclass VOP2_Real_dpp8_gfx10_with_name<bits<6> op, string opName, 976 string asmName> { 977 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 978 def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 979 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 980 let AsmString = asmName # ps.Pfl.AsmDPP8; 981 let DecoderNamespace = "DPP8"; 982 } 983 } 984 } // End DecoderNamespace = "SDWA10" 985 986 //===------------------------------ VOP2be ------------------------------===// 987 multiclass VOP2be_Real_e32_gfx10<bits<6> op, string opName, string asmName> { 988 def _e32_gfx10 : 989 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>, 990 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> { 991 VOP2_Pseudo Ps = !cast<VOP2_Pseudo>(opName#"_e32"); 992 let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); 993 } 994 } 995 multiclass VOP2be_Real_e64_gfx10<bits<6> op, string opName, string asmName> { 996 def _e64_gfx10 : 997 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>, 998 VOP3be_gfx10<{0, 1, 0, 0, op{5-0}}, 999 !cast<VOP3_Pseudo>(opName#"_e64").Pfl> { 1000 VOP3_Pseudo Ps = !cast<VOP3_Pseudo>(opName#"_e64"); 1001 let AsmString = asmName # Ps.AsmOperands; 1002 } 1003 } 1004 multiclass VOP2be_Real_sdwa_gfx10<bits<6> op, string opName, string asmName> { 1005 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in 1006 def _sdwa_gfx10 : 1007 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1008 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1009 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1010 let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); 1011 let DecoderNamespace = "SDWA10"; 1012 } 1013 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in 1014 def _sdwa_w32_gfx10 : 1015 Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1016 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1017 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1018 let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands); 1019 let isAsmParserOnly = 1; 1020 let DecoderNamespace = "SDWA10"; 1021 let WaveSizePredicate = isWave32; 1022 } 1023 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in 1024 def _sdwa_w64_gfx10 : 1025 Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1026 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1027 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1028 let AsmString = asmName # Ps.AsmOperands; 1029 let isAsmParserOnly = 1; 1030 let DecoderNamespace = "SDWA10"; 1031 let WaveSizePredicate = isWave64; 1032 } 1033 } 1034 multiclass VOP2be_Real_dpp_gfx10<bits<6> op, string opName, string asmName> { 1035 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1036 def _dpp_gfx10 : 1037 VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1038 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1039 let AsmString = asmName # !subst(", vcc", "", AsmDPP); 1040 let DecoderNamespace = "SDWA10"; 1041 } 1042 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1043 def _dpp_w32_gfx10 : 1044 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1045 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1046 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP); 1047 let isAsmParserOnly = 1; 1048 let WaveSizePredicate = isWave32; 1049 } 1050 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1051 def _dpp_w64_gfx10 : 1052 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1053 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1054 let AsmString = asmName # AsmDPP; 1055 let isAsmParserOnly = 1; 1056 let WaveSizePredicate = isWave64; 1057 } 1058 } 1059 multiclass VOP2be_Real_dpp8_gfx10<bits<6> op, string opName, string asmName> { 1060 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1061 def _dpp8_gfx10 : 1062 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> { 1063 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1064 let AsmString = asmName # !subst(", vcc", "", AsmDPP8); 1065 let DecoderNamespace = "DPP8"; 1066 } 1067 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1068 def _dpp8_w32_gfx10 : 1069 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> { 1070 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1071 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8); 1072 let isAsmParserOnly = 1; 1073 let WaveSizePredicate = isWave32; 1074 } 1075 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1076 def _dpp8_w64_gfx10 : 1077 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> { 1078 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1079 let AsmString = asmName # AsmDPP8; 1080 let isAsmParserOnly = 1; 1081 let WaveSizePredicate = isWave64; 1082 } 1083 } 1084 1085 //===----------------------------- VOP3Only -----------------------------===// 1086 multiclass VOP3Only_Real_gfx10<bits<10> op> { 1087 def _e64_gfx10 : 1088 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 1089 VOP3e_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1090 } 1091 1092 //===---------------------------- VOP3beOnly ----------------------------===// 1093 multiclass VOP3beOnly_Real_gfx10<bits<10> op, string opName, string asmName> { 1094 def _e64_gfx10 : 1095 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>, 1096 VOP3be_gfx10<op, !cast<VOP3_Pseudo>(opName#"_e64").Pfl> { 1097 VOP3_Pseudo Ps = !cast<VOP3_Pseudo>(opName#"_e64"); 1098 let AsmString = asmName # Ps.AsmOperands; 1099 } 1100 } 1101} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" 1102 1103multiclass VOP2be_Real_gfx10<bits<6> op, string opName, string asmName> : 1104 VOP2be_Real_e32_gfx10<op, opName, asmName>, 1105 VOP2be_Real_e64_gfx10<op, opName, asmName>, 1106 VOP2be_Real_sdwa_gfx10<op, opName, asmName>, 1107 VOP2be_Real_dpp_gfx10<op, opName, asmName>, 1108 VOP2be_Real_dpp8_gfx10<op, opName, asmName>; 1109 1110multiclass VOP2e_Real_gfx10<bits<6> op, string opName, string asmName> : 1111 VOP2_Real_e32_gfx10<op>, 1112 VOP2_Real_e64_gfx10<op>, 1113 VOP2be_Real_sdwa_gfx10<op, opName, asmName>, 1114 VOP2be_Real_dpp_gfx10<op, opName, asmName>, 1115 VOP2be_Real_dpp8_gfx10<op, opName, asmName>; 1116 1117multiclass VOP2_Real_gfx10<bits<6> op> : 1118 VOP2_Real_e32_gfx10<op>, VOP2_Real_e64_gfx10<op>, 1119 VOP2_Real_sdwa_gfx10<op>, VOP2_Real_dpp_gfx10<op>, VOP2_Real_dpp8_gfx10<op>; 1120 1121multiclass VOP2_Real_gfx10_with_name<bits<6> op, string opName, 1122 string asmName> : 1123 VOP2_Real_e32_gfx10_with_name<op, opName, asmName>, 1124 VOP2_Real_e64_gfx10_with_name<op, opName, asmName>, 1125 VOP2_Real_sdwa_gfx10_with_name<op, opName, asmName>, 1126 VOP2_Real_dpp_gfx10_with_name<op, opName, asmName>, 1127 VOP2_Real_dpp8_gfx10_with_name<op, opName, asmName>; 1128 1129defm V_XNOR_B32 : VOP2_Real_gfx10<0x01e>; 1130defm V_FMAC_F32 : VOP2_Real_gfx10<0x02b>; 1131defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10<0x02c>; 1132defm V_FMAAK_F32 : VOP2Only_Real_MADK_gfx10<0x02d>; 1133defm V_ADD_F16 : VOP2_Real_gfx10<0x032>; 1134defm V_SUB_F16 : VOP2_Real_gfx10<0x033>; 1135defm V_SUBREV_F16 : VOP2_Real_gfx10<0x034>; 1136defm V_MUL_F16 : VOP2_Real_gfx10<0x035>; 1137defm V_FMAC_F16 : VOP2_Real_gfx10<0x036>; 1138defm V_FMAMK_F16 : VOP2Only_Real_MADK_gfx10<0x037>; 1139defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10<0x038>; 1140defm V_MAX_F16 : VOP2_Real_gfx10<0x039>; 1141defm V_MIN_F16 : VOP2_Real_gfx10<0x03a>; 1142defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>; 1143defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>; 1144 1145// VOP2 no carry-in, carry-out. 1146defm V_ADD_NC_U32 : 1147 VOP2_Real_gfx10_with_name<0x025, "V_ADD_U32", "v_add_nc_u32">; 1148defm V_SUB_NC_U32 : 1149 VOP2_Real_gfx10_with_name<0x026, "V_SUB_U32", "v_sub_nc_u32">; 1150defm V_SUBREV_NC_U32 : 1151 VOP2_Real_gfx10_with_name<0x027, "V_SUBREV_U32", "v_subrev_nc_u32">; 1152 1153// VOP2 carry-in, carry-out. 1154defm V_ADD_CO_CI_U32 : 1155 VOP2be_Real_gfx10<0x028, "V_ADDC_U32", "v_add_co_ci_u32">; 1156defm V_SUB_CO_CI_U32 : 1157 VOP2be_Real_gfx10<0x029, "V_SUBB_U32", "v_sub_co_ci_u32">; 1158defm V_SUBREV_CO_CI_U32 : 1159 VOP2be_Real_gfx10<0x02a, "V_SUBBREV_U32", "v_subrev_co_ci_u32">; 1160 1161defm V_CNDMASK_B32 : 1162 VOP2e_Real_gfx10<0x001, "V_CNDMASK_B32", "v_cndmask_b32">; 1163 1164// VOP3 only. 1165defm V_BFM_B32 : VOP3Only_Real_gfx10<0x363>; 1166defm V_BCNT_U32_B32 : VOP3Only_Real_gfx10<0x364>; 1167defm V_MBCNT_LO_U32_B32 : VOP3Only_Real_gfx10<0x365>; 1168defm V_MBCNT_HI_U32_B32 : VOP3Only_Real_gfx10<0x366>; 1169defm V_LDEXP_F32 : VOP3Only_Real_gfx10<0x362>; 1170defm V_CVT_PKNORM_I16_F32 : VOP3Only_Real_gfx10<0x368>; 1171defm V_CVT_PKNORM_U16_F32 : VOP3Only_Real_gfx10<0x369>; 1172defm V_CVT_PK_U16_U32 : VOP3Only_Real_gfx10<0x36a>; 1173defm V_CVT_PK_I16_I32 : VOP3Only_Real_gfx10<0x36b>; 1174 1175// VOP3 carry-in, carry-out. 1176defm V_ADD_CO_U32 : 1177 VOP3beOnly_Real_gfx10<0x30f, "V_ADD_I32", "v_add_co_u32">; 1178defm V_SUB_CO_U32 : 1179 VOP3beOnly_Real_gfx10<0x310, "V_SUB_I32", "v_sub_co_u32">; 1180defm V_SUBREV_CO_U32 : 1181 VOP3beOnly_Real_gfx10<0x319, "V_SUBREV_I32", "v_subrev_co_u32">; 1182 1183let SubtargetPredicate = isGFX10Plus in { 1184 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx10>; 1185 1186 defm : VOP2bInstAliases< 1187 V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx10, "v_add_co_ci_u32">; 1188 defm : VOP2bInstAliases< 1189 V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx10, "v_sub_co_ci_u32">; 1190 defm : VOP2bInstAliases< 1191 V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx10, "v_subrev_co_ci_u32">; 1192} // End SubtargetPredicate = isGFX10Plus 1193 1194//===----------------------------------------------------------------------===// 1195// GFX6, GFX7, GFX10. 1196//===----------------------------------------------------------------------===// 1197 1198class VOP2_DPPe <bits<6> op, VOP2_DPP_Pseudo ps, VOPProfile P = ps.Pfl> : 1199 VOP_DPPe <P> { 1200 bits<8> vdst; 1201 bits<8> src1; 1202 let Inst{8-0} = 0xfa; //dpp 1203 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 1204 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 1205 let Inst{30-25} = op; 1206 let Inst{31} = 0x0; //encoding 1207} 1208 1209let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 1210 multiclass VOP2Only_Real_gfx6_gfx7<bits<6> op> { 1211 def _gfx6_gfx7 : 1212 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>, 1213 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1214 } 1215 multiclass VOP2Only_Real_MADK_gfx6_gfx7<bits<6> op> { 1216 def _gfx6_gfx7 : 1217 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>, 1218 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1219 } 1220 multiclass VOP2_Real_e32_gfx6_gfx7<bits<6> op> { 1221 def _e32_gfx6_gfx7 : 1222 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>, 1223 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 1224 } 1225 multiclass VOP2_Real_e64_gfx6_gfx7<bits<6> op> { 1226 def _e64_gfx6_gfx7 : 1227 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 1228 VOP3e_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1229 } 1230 multiclass VOP2be_Real_e64_gfx6_gfx7<bits<6> op> { 1231 def _e64_gfx6_gfx7 : 1232 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>, 1233 VOP3be_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1234 } 1235} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 1236 1237multiclass VOP2Only_Real_MADK_gfx6_gfx7_gfx10<bits<6> op> : 1238 VOP2Only_Real_MADK_gfx6_gfx7<op>, VOP2Only_Real_MADK_gfx10<op>; 1239 1240multiclass VOP2_Real_gfx6_gfx7<bits<6> op> : 1241 VOP2_Real_e32_gfx6_gfx7<op>, VOP2_Real_e64_gfx6_gfx7<op>; 1242 1243multiclass VOP2_Real_gfx6_gfx7_gfx10<bits<6> op> : 1244 VOP2_Real_gfx6_gfx7<op>, VOP2_Real_gfx10<op>; 1245 1246multiclass VOP2be_Real_gfx6_gfx7<bits<6> op> : 1247 VOP2_Real_e32_gfx6_gfx7<op>, VOP2be_Real_e64_gfx6_gfx7<op>; 1248 1249defm V_CNDMASK_B32 : VOP2_Real_gfx6_gfx7<0x000>; 1250defm V_MIN_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00d>; 1251defm V_MAX_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00e>; 1252defm V_LSHR_B32 : VOP2_Real_gfx6_gfx7<0x015>; 1253defm V_ASHR_I32 : VOP2_Real_gfx6_gfx7<0x017>; 1254defm V_LSHL_B32 : VOP2_Real_gfx6_gfx7<0x019>; 1255defm V_BFM_B32 : VOP2_Real_gfx6_gfx7<0x01e>; 1256defm V_BCNT_U32_B32 : VOP2_Real_gfx6_gfx7<0x022>; 1257defm V_MBCNT_LO_U32_B32 : VOP2_Real_gfx6_gfx7<0x023>; 1258defm V_MBCNT_HI_U32_B32 : VOP2_Real_gfx6_gfx7<0x024>; 1259defm V_LDEXP_F32 : VOP2_Real_gfx6_gfx7<0x02b>; 1260defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_gfx6_gfx7<0x02c>; 1261defm V_CVT_PKNORM_I16_F32 : VOP2_Real_gfx6_gfx7<0x02d>; 1262defm V_CVT_PKNORM_U16_F32 : VOP2_Real_gfx6_gfx7<0x02e>; 1263defm V_CVT_PK_U16_U32 : VOP2_Real_gfx6_gfx7<0x030>; 1264defm V_CVT_PK_I16_I32 : VOP2_Real_gfx6_gfx7<0x031>; 1265defm V_ADD_I32 : VOP2be_Real_gfx6_gfx7<0x025>; 1266defm V_SUB_I32 : VOP2be_Real_gfx6_gfx7<0x026>; 1267defm V_SUBREV_I32 : VOP2be_Real_gfx6_gfx7<0x027>; 1268defm V_ADDC_U32 : VOP2be_Real_gfx6_gfx7<0x028>; 1269defm V_SUBB_U32 : VOP2be_Real_gfx6_gfx7<0x029>; 1270defm V_SUBBREV_U32 : VOP2be_Real_gfx6_gfx7<0x02a>; 1271 1272defm V_READLANE_B32 : VOP2Only_Real_gfx6_gfx7<0x001>; 1273 1274let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in { 1275 defm V_WRITELANE_B32 : VOP2Only_Real_gfx6_gfx7<0x002>; 1276} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) 1277 1278let SubtargetPredicate = isGFX6GFX7 in { 1279 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx6_gfx7>; 1280} // End SubtargetPredicate = isGFX6GFX7 1281 1282defm V_ADD_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x003>; 1283defm V_SUB_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x004>; 1284defm V_SUBREV_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x005>; 1285let OtherPredicates = [HasMadMacF32Insts] in 1286defm V_MAC_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x006>; 1287defm V_MUL_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x007>; 1288defm V_MUL_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x008>; 1289defm V_MUL_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10<0x009>; 1290defm V_MUL_HI_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10<0x00a>; 1291defm V_MUL_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10<0x00b>; 1292defm V_MUL_HI_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10<0x00c>; 1293defm V_MIN_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x00f>; 1294defm V_MAX_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x010>; 1295defm V_MIN_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x011>; 1296defm V_MAX_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x012>; 1297defm V_MIN_U32 : VOP2_Real_gfx6_gfx7_gfx10<0x013>; 1298defm V_MAX_U32 : VOP2_Real_gfx6_gfx7_gfx10<0x014>; 1299defm V_LSHRREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x016>; 1300defm V_ASHRREV_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x018>; 1301defm V_LSHLREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01a>; 1302defm V_AND_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01b>; 1303defm V_OR_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01c>; 1304defm V_XOR_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01d>; 1305defm V_MAC_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x01f>; 1306defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x02f>; 1307defm V_MADMK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x020>; 1308defm V_MADAK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x021>; 1309 1310//===----------------------------------------------------------------------===// 1311// GFX8, GFX9 (VI). 1312//===----------------------------------------------------------------------===// 1313 1314let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 1315 1316multiclass VOP2_Real_MADK_vi <bits<6> op> { 1317 def _vi : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>, 1318 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1319} 1320 1321multiclass VOP2_Real_e32_vi <bits<6> op> { 1322 def _e32_vi : 1323 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>, 1324 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 1325} 1326 1327multiclass VOP2_Real_e64_vi <bits<10> op> { 1328 def _e64_vi : 1329 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 1330 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1331} 1332 1333multiclass VOP2_Real_e64only_vi <bits<10> op> { 1334 def _e64_vi : 1335 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 1336 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 1337 // Hack to stop printing _e64 1338 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(NAME#"_e64"); 1339 let OutOperandList = (outs VGPR_32:$vdst); 1340 let AsmString = ps.Mnemonic # " " # ps.AsmOperands; 1341 } 1342} 1343 1344multiclass Base_VOP2_Real_e32e64_vi <bits<6> op> : 1345 VOP2_Real_e32_vi<op>, 1346 VOP2_Real_e64_vi<{0, 1, 0, 0, op{5-0}}>; 1347 1348} // End AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" 1349 1350multiclass VOP2_SDWA_Real <bits<6> op> { 1351 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA>.ret in 1352 def _sdwa_vi : 1353 VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 1354 VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1355} 1356 1357multiclass VOP2_SDWA9_Real <bits<6> op> { 1358 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 1359 def _sdwa_gfx9 : 1360 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 1361 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1362} 1363 1364let AssemblerPredicate = isGFX8Only in { 1365 1366multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName> { 1367 def _e32_vi : 1368 VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.VI>, 1369 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> { 1370 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32"); 1371 let AsmString = AsmName # ps.AsmOperands; 1372 let DecoderNamespace = "GFX8"; 1373 } 1374 def _e64_vi : 1375 VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.VI>, 1376 VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> { 1377 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64"); 1378 let AsmString = AsmName # ps.AsmOperands; 1379 let DecoderNamespace = "GFX8"; 1380 } 1381 foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA>.ret in 1382 def _sdwa_vi : 1383 VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>, 1384 VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> { 1385 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa"); 1386 let AsmString = AsmName # ps.AsmOperands; 1387 } 1388 foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP>.ret in 1389 def _dpp_vi : 1390 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.VI>, 1391 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> { 1392 VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp"); 1393 let AsmString = AsmName # ps.AsmOperands; 1394 } 1395} 1396} 1397 1398let AssemblerPredicate = isGFX9Only in { 1399 1400multiclass VOP2be_Real_e32e64_gfx9 <bits<6> op, string OpName, string AsmName> { 1401 def _e32_gfx9 : 1402 VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.GFX9>, 1403 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> { 1404 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32"); 1405 let AsmString = AsmName # ps.AsmOperands; 1406 let DecoderNamespace = "GFX9"; 1407 } 1408 def _e64_gfx9 : 1409 VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.GFX9>, 1410 VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> { 1411 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64"); 1412 let AsmString = AsmName # ps.AsmOperands; 1413 let DecoderNamespace = "GFX9"; 1414 } 1415 foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA9>.ret in 1416 def _sdwa_gfx9 : 1417 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>, 1418 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> { 1419 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa"); 1420 let AsmString = AsmName # ps.AsmOperands; 1421 } 1422 foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP>.ret in 1423 def _dpp_gfx9 : 1424 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.GFX9>, 1425 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> { 1426 VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp"); 1427 let AsmString = AsmName # ps.AsmOperands; 1428 let DecoderNamespace = "SDWA9"; 1429 } 1430} 1431 1432multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> { 1433 def _e32_gfx9 : 1434 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX9>, 1435 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>{ 1436 let DecoderNamespace = "GFX9"; 1437 } 1438 def _e64_gfx9 : 1439 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>, 1440 VOP3e_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 1441 let DecoderNamespace = "GFX9"; 1442 } 1443 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 1444 def _sdwa_gfx9 : 1445 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 1446 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 1447 } 1448 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 1449 def _dpp_gfx9 : 1450 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, 1451 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> { 1452 let DecoderNamespace = "SDWA9"; 1453 } 1454} 1455 1456} // AssemblerPredicate = isGFX9Only 1457 1458multiclass VOP2_Real_e32e64_vi <bits<6> op> : 1459 Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA_Real<op>, VOP2_SDWA9_Real<op> { 1460 1461 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 1462 def _dpp_vi : 1463 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>, 1464 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>; 1465} 1466 1467defm V_CNDMASK_B32 : VOP2_Real_e32e64_vi <0x0>; 1468defm V_ADD_F32 : VOP2_Real_e32e64_vi <0x1>; 1469defm V_SUB_F32 : VOP2_Real_e32e64_vi <0x2>; 1470defm V_SUBREV_F32 : VOP2_Real_e32e64_vi <0x3>; 1471defm V_MUL_LEGACY_F32 : VOP2_Real_e32e64_vi <0x4>; 1472defm V_MUL_F32 : VOP2_Real_e32e64_vi <0x5>; 1473defm V_MUL_I32_I24 : VOP2_Real_e32e64_vi <0x6>; 1474defm V_MUL_HI_I32_I24 : VOP2_Real_e32e64_vi <0x7>; 1475defm V_MUL_U32_U24 : VOP2_Real_e32e64_vi <0x8>; 1476defm V_MUL_HI_U32_U24 : VOP2_Real_e32e64_vi <0x9>; 1477defm V_MIN_F32 : VOP2_Real_e32e64_vi <0xa>; 1478defm V_MAX_F32 : VOP2_Real_e32e64_vi <0xb>; 1479defm V_MIN_I32 : VOP2_Real_e32e64_vi <0xc>; 1480defm V_MAX_I32 : VOP2_Real_e32e64_vi <0xd>; 1481defm V_MIN_U32 : VOP2_Real_e32e64_vi <0xe>; 1482defm V_MAX_U32 : VOP2_Real_e32e64_vi <0xf>; 1483defm V_LSHRREV_B32 : VOP2_Real_e32e64_vi <0x10>; 1484defm V_ASHRREV_I32 : VOP2_Real_e32e64_vi <0x11>; 1485defm V_LSHLREV_B32 : VOP2_Real_e32e64_vi <0x12>; 1486defm V_AND_B32 : VOP2_Real_e32e64_vi <0x13>; 1487defm V_OR_B32 : VOP2_Real_e32e64_vi <0x14>; 1488defm V_XOR_B32 : VOP2_Real_e32e64_vi <0x15>; 1489defm V_MAC_F32 : VOP2_Real_e32e64_vi <0x16>; 1490defm V_MADMK_F32 : VOP2_Real_MADK_vi <0x17>; 1491defm V_MADAK_F32 : VOP2_Real_MADK_vi <0x18>; 1492 1493defm V_ADD_U32 : VOP2be_Real_e32e64_vi_only <0x19, "V_ADD_I32", "v_add_u32">; 1494defm V_SUB_U32 : VOP2be_Real_e32e64_vi_only <0x1a, "V_SUB_I32", "v_sub_u32">; 1495defm V_SUBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1b, "V_SUBREV_I32", "v_subrev_u32">; 1496defm V_ADDC_U32 : VOP2be_Real_e32e64_vi_only <0x1c, "V_ADDC_U32", "v_addc_u32">; 1497defm V_SUBB_U32 : VOP2be_Real_e32e64_vi_only <0x1d, "V_SUBB_U32", "v_subb_u32">; 1498defm V_SUBBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1e, "V_SUBBREV_U32", "v_subbrev_u32">; 1499 1500defm V_ADD_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x19, "V_ADD_I32", "v_add_co_u32">; 1501defm V_SUB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1a, "V_SUB_I32", "v_sub_co_u32">; 1502defm V_SUBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1b, "V_SUBREV_I32", "v_subrev_co_u32">; 1503defm V_ADDC_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1c, "V_ADDC_U32", "v_addc_co_u32">; 1504defm V_SUBB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1d, "V_SUBB_U32", "v_subb_co_u32">; 1505defm V_SUBBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1e, "V_SUBBREV_U32", "v_subbrev_co_u32">; 1506 1507defm V_ADD_U32 : VOP2_Real_e32e64_gfx9 <0x34>; 1508defm V_SUB_U32 : VOP2_Real_e32e64_gfx9 <0x35>; 1509defm V_SUBREV_U32 : VOP2_Real_e32e64_gfx9 <0x36>; 1510 1511defm V_BFM_B32 : VOP2_Real_e64only_vi <0x293>; 1512defm V_BCNT_U32_B32 : VOP2_Real_e64only_vi <0x28b>; 1513defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64only_vi <0x28c>; 1514defm V_MBCNT_HI_U32_B32 : VOP2_Real_e64only_vi <0x28d>; 1515defm V_LDEXP_F32 : VOP2_Real_e64only_vi <0x288>; 1516defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64only_vi <0x1f0>; 1517defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64only_vi <0x294>; 1518defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64only_vi <0x295>; 1519defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e64only_vi <0x296>; 1520defm V_CVT_PK_U16_U32 : VOP2_Real_e64only_vi <0x297>; 1521defm V_CVT_PK_I16_I32 : VOP2_Real_e64only_vi <0x298>; 1522 1523defm V_ADD_F16 : VOP2_Real_e32e64_vi <0x1f>; 1524defm V_SUB_F16 : VOP2_Real_e32e64_vi <0x20>; 1525defm V_SUBREV_F16 : VOP2_Real_e32e64_vi <0x21>; 1526defm V_MUL_F16 : VOP2_Real_e32e64_vi <0x22>; 1527defm V_MAC_F16 : VOP2_Real_e32e64_vi <0x23>; 1528defm V_MADMK_F16 : VOP2_Real_MADK_vi <0x24>; 1529defm V_MADAK_F16 : VOP2_Real_MADK_vi <0x25>; 1530defm V_ADD_U16 : VOP2_Real_e32e64_vi <0x26>; 1531defm V_SUB_U16 : VOP2_Real_e32e64_vi <0x27>; 1532defm V_SUBREV_U16 : VOP2_Real_e32e64_vi <0x28>; 1533defm V_MUL_LO_U16 : VOP2_Real_e32e64_vi <0x29>; 1534defm V_LSHLREV_B16 : VOP2_Real_e32e64_vi <0x2a>; 1535defm V_LSHRREV_B16 : VOP2_Real_e32e64_vi <0x2b>; 1536defm V_ASHRREV_I16 : VOP2_Real_e32e64_vi <0x2c>; 1537defm V_MAX_F16 : VOP2_Real_e32e64_vi <0x2d>; 1538defm V_MIN_F16 : VOP2_Real_e32e64_vi <0x2e>; 1539defm V_MAX_U16 : VOP2_Real_e32e64_vi <0x2f>; 1540defm V_MAX_I16 : VOP2_Real_e32e64_vi <0x30>; 1541defm V_MIN_U16 : VOP2_Real_e32e64_vi <0x31>; 1542defm V_MIN_I16 : VOP2_Real_e32e64_vi <0x32>; 1543defm V_LDEXP_F16 : VOP2_Real_e32e64_vi <0x33>; 1544 1545let SubtargetPredicate = isGFX8GFX9 in { 1546 1547// Aliases to simplify matching of floating-point instructions that 1548// are VOP2 on SI and VOP3 on VI. 1549class SI2_VI3Alias <string name, VOP3_Real inst> : InstAlias < 1550 name#" $dst, $src0, $src1", 1551 !if(inst.Pfl.HasOMod, 1552 (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0), 1553 (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0)) 1554>, PredicateControl { 1555 let UseInstAsmMatchConverter = 0; 1556 let AsmVariantName = AMDGPUAsmVariants.VOP3; 1557} 1558 1559def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>; 1560def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>; 1561def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>; 1562def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>; 1563def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>; 1564 1565defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_vi>; 1566 1567} // End SubtargetPredicate = isGFX8GFX9 1568 1569let SubtargetPredicate = isGFX9Only in { 1570 1571defm : VOP2bInstAliases<V_ADD_I32_e32, V_ADD_CO_U32_e32_gfx9, "v_add_co_u32">; 1572defm : VOP2bInstAliases<V_ADDC_U32_e32, V_ADDC_CO_U32_e32_gfx9, "v_addc_co_u32">; 1573defm : VOP2bInstAliases<V_SUB_I32_e32, V_SUB_CO_U32_e32_gfx9, "v_sub_co_u32">; 1574defm : VOP2bInstAliases<V_SUBB_U32_e32, V_SUBB_CO_U32_e32_gfx9, "v_subb_co_u32">; 1575defm : VOP2bInstAliases<V_SUBREV_I32_e32, V_SUBREV_CO_U32_e32_gfx9, "v_subrev_co_u32">; 1576defm : VOP2bInstAliases<V_SUBBREV_U32_e32, V_SUBBREV_CO_U32_e32_gfx9, "v_subbrev_co_u32">; 1577 1578} // End SubtargetPredicate = isGFX9Only 1579 1580let SubtargetPredicate = HasDLInsts in { 1581 1582defm V_FMAC_F32 : VOP2_Real_e32e64_vi <0x3b>; 1583defm V_XNOR_B32 : VOP2_Real_e32e64_vi <0x3d>; 1584 1585} // End SubtargetPredicate = HasDLInsts 1586 1587multiclass VOP2_Real_DOT_ACC_gfx9<bits<6> op> : VOP2_Real_e32_vi<op> { 1588 def _dpp_vi : VOP2_DPP<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>; 1589} 1590 1591multiclass VOP2_Real_DOT_ACC_gfx10<bits<6> op> : 1592 VOP2_Real_e32_gfx10<op>, 1593 VOP2_Real_dpp_gfx10<op>, 1594 VOP2_Real_dpp8_gfx10<op>; 1595 1596let SubtargetPredicate = HasDot5Insts in { 1597 defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx9<0x37>; 1598 // NB: Opcode conflicts with V_DOT8C_I32_I4 1599 // This opcode exists in gfx 10.1* only 1600 defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx10<0x02>; 1601} 1602 1603let SubtargetPredicate = HasDot6Insts in { 1604 defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx9<0x39>; 1605 defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx10<0x0d>; 1606} 1607 1608let SubtargetPredicate = HasDot4Insts in { 1609 defm V_DOT2C_I32_I16 : VOP2_Real_DOT_ACC_gfx9<0x38>; 1610} 1611let SubtargetPredicate = HasDot3Insts in { 1612 defm V_DOT8C_I32_I4 : VOP2_Real_DOT_ACC_gfx9<0x3a>; 1613} 1614 1615let SubtargetPredicate = HasPkFmacF16Inst in { 1616defm V_PK_FMAC_F16 : VOP2_Real_e32_vi<0x3c>; 1617} // End SubtargetPredicate = HasPkFmacF16Inst 1618 1619let SubtargetPredicate = HasDot3Insts in { 1620 // NB: Opcode conflicts with V_DOT2C_F32_F16 1621 let DecoderNamespace = "GFX10_B" in 1622 defm V_DOT8C_I32_I4 : VOP2_Real_DOT_ACC_gfx10<0x02>; 1623} 1624