1//===-- VOP2Instructions.td - Vector Instruction Definitions --------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10// VOP2 Classes 11//===----------------------------------------------------------------------===// 12 13class VOP2e <bits<6> op, VOPProfile P> : Enc32 { 14 bits<8> vdst; 15 bits<9> src0; 16 bits<8> src1; 17 18 let Inst{8-0} = !if(P.HasSrc0, src0, 0); 19 let Inst{16-9} = !if(P.HasSrc1, src1, 0); 20 let Inst{24-17} = !if(P.EmitDst, vdst, 0); 21 let Inst{30-25} = op; 22 let Inst{31} = 0x0; //encoding 23} 24 25class VOP2_MADKe <bits<6> op, VOPProfile P> : Enc64 { 26 bits<8> vdst; 27 bits<9> src0; 28 bits<8> src1; 29 bits<32> imm; 30 31 let Inst{8-0} = !if(P.HasSrc0, src0, 0); 32 let Inst{16-9} = !if(P.HasSrc1, src1, 0); 33 let Inst{24-17} = !if(P.EmitDst, vdst, 0); 34 let Inst{30-25} = op; 35 let Inst{31} = 0x0; // encoding 36 let Inst{63-32} = imm; 37} 38 39class VOP2_SDWAe <bits<6> op, VOPProfile P> : VOP_SDWAe <P> { 40 bits<8> vdst; 41 bits<8> src1; 42 43 let Inst{8-0} = 0xf9; // sdwa 44 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 45 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 46 let Inst{30-25} = op; 47 let Inst{31} = 0x0; // encoding 48} 49 50class VOP2_SDWA9Ae <bits<6> op, VOPProfile P> : VOP_SDWA9Ae <P> { 51 bits<8> vdst; 52 bits<9> src1; 53 54 let Inst{8-0} = 0xf9; // sdwa 55 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 56 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 57 let Inst{30-25} = op; 58 let Inst{31} = 0x0; // encoding 59 let Inst{63} = !if(P.HasSrc1, src1{8}, 0); // src1_sgpr 60} 61 62class VOP2_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], string suffix = "_e32"> : 63 VOP_Pseudo <opName, suffix, P, P.Outs32, P.Ins32, "", pattern> { 64 65 let AsmOperands = P.Asm32; 66 67 let Size = 4; 68 let mayLoad = 0; 69 let mayStore = 0; 70 let hasSideEffects = 0; 71 72 let ReadsModeReg = !or(isFloatType<P.DstVT>.ret, isFloatType<P.Src0VT>.ret); 73 74 let mayRaiseFPException = ReadsModeReg; 75 76 let VOP2 = 1; 77 let VALU = 1; 78 let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]); 79 80 let AsmVariantName = AMDGPUAsmVariants.Default; 81} 82 83class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily> : 84 VOP_Real <ps>, 85 InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>, 86 SIMCInstr <ps.PseudoInstr, EncodingFamily> { 87 88 let VALU = 1; 89 let VOP2 = 1; 90 let isPseudo = 0; 91 let isCodeGenOnly = 0; 92 93 let Constraints = ps.Constraints; 94 let DisableEncoding = ps.DisableEncoding; 95 96 // copy relevant pseudo op flags 97 let SubtargetPredicate = ps.SubtargetPredicate; 98 let OtherPredicates = ps.OtherPredicates; 99 let AsmMatchConverter = ps.AsmMatchConverter; 100 let AsmVariantName = ps.AsmVariantName; 101 let Constraints = ps.Constraints; 102 let DisableEncoding = ps.DisableEncoding; 103 let TSFlags = ps.TSFlags; 104 let UseNamedOperandTable = ps.UseNamedOperandTable; 105 let Uses = ps.Uses; 106 let Defs = ps.Defs; 107 let SchedRW = ps.SchedRW; 108 let mayLoad = ps.mayLoad; 109 let mayStore = ps.mayStore; 110} 111 112class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 113 VOP_SDWA_Pseudo <OpName, P, pattern> { 114 let AsmMatchConverter = "cvtSdwaVOP2"; 115} 116 117class VOP2_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 118 VOP_DPP_Pseudo <OpName, P, pattern> { 119} 120 121 122class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { 123 list<dag> ret = !if(P.HasModifiers, 124 [(set P.DstVT:$vdst, 125 (node (P.Src0VT 126 !if(P.HasOMod, 127 (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), 128 (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))), 129 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], 130 [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]); 131} 132 133multiclass VOP2Inst_e32<string opName, 134 VOPProfile P, 135 SDPatternOperator node = null_frag, 136 string revOp = opName, 137 bit GFX9Renamed = 0> { 138 let renamedInGFX9 = GFX9Renamed in { 139 def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>, 140 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>; 141 } // End renamedInGFX9 = GFX9Renamed 142} 143 144multiclass VOP2Inst_e64<string opName, 145 VOPProfile P, 146 SDPatternOperator node = null_frag, 147 string revOp = opName, 148 bit GFX9Renamed = 0> { 149 let renamedInGFX9 = GFX9Renamed in { 150 def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>, 151 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; 152 } // End renamedInGFX9 = GFX9Renamed 153} 154 155multiclass VOP2Inst_sdwa<string opName, 156 VOPProfile P, 157 SDPatternOperator node = null_frag, 158 string revOp = opName, 159 bit GFX9Renamed = 0> { 160 let renamedInGFX9 = GFX9Renamed in { 161 foreach _ = BoolToList<P.HasExtSDWA>.ret in 162 def _sdwa : VOP2_SDWA_Pseudo <opName, P>; 163 } // End renamedInGFX9 = GFX9Renamed 164} 165 166multiclass VOP2Inst<string opName, 167 VOPProfile P, 168 SDPatternOperator node = null_frag, 169 string revOp = opName, 170 bit GFX9Renamed = 0> : 171 VOP2Inst_e32<opName, P, node, revOp, GFX9Renamed>, 172 VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>, 173 VOP2Inst_sdwa<opName, P, node, revOp, GFX9Renamed> { 174 let renamedInGFX9 = GFX9Renamed in { 175 foreach _ = BoolToList<P.HasExtDPP>.ret in 176 def _dpp : VOP2_DPP_Pseudo <opName, P>; 177 } 178} 179 180multiclass VOP2bInst <string opName, 181 VOPProfile P, 182 SDPatternOperator node = null_frag, 183 string revOp = opName, 184 bit GFX9Renamed = 0, 185 bit useSGPRInput = !eq(P.NumSrcArgs, 3)> { 186 let renamedInGFX9 = GFX9Renamed in { 187 let SchedRW = [Write32Bit, WriteSALU] in { 188 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in { 189 def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>, 190 Commutable_REV<revOp#"_e32", !eq(revOp, opName)> { 191 let usesCustomInserter = !eq(P.NumSrcArgs, 2); 192 } 193 194 foreach _ = BoolToList<P.HasExtSDWA>.ret in 195 def _sdwa : VOP2_SDWA_Pseudo <opName, P> { 196 let AsmMatchConverter = "cvtSdwaVOP2b"; 197 } 198 foreach _ = BoolToList<P.HasExtDPP>.ret in 199 def _dpp : VOP2_DPP_Pseudo <opName, P>; 200 } 201 202 def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>, 203 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; 204 } 205 } 206} 207 208class VOP2bInstAlias <VOP2_Pseudo ps, Instruction inst, 209 string OpName, string opnd> : 210 InstAlias <OpName#" "#!subst("vcc", opnd, ps.Pfl.Asm32), 211 (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0, 212 ps.Pfl.Src1RC32:$src1)>, 213 PredicateControl { 214} 215 216multiclass VOP2bInstAliases<VOP2_Pseudo ps, VOP2_Real inst, string OpName> { 217 let WaveSizePredicate = isWave32 in { 218 def : VOP2bInstAlias<ps, inst, OpName, "vcc_lo">; 219 } 220 let WaveSizePredicate = isWave64 in { 221 def : VOP2bInstAlias<ps, inst, OpName, "vcc">; 222 } 223} 224 225multiclass VOP2eInst <string opName, 226 VOPProfile P, 227 SDPatternOperator node = null_frag, 228 string revOp = opName, 229 bit useSGPRInput = !eq(P.NumSrcArgs, 3)> { 230 231 let SchedRW = [Write32Bit] in { 232 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in { 233 def _e32 : VOP2_Pseudo <opName, P>, 234 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>; 235 236 foreach _ = BoolToList<P.HasExtSDWA>.ret in 237 def _sdwa : VOP2_SDWA_Pseudo <opName, P> { 238 let AsmMatchConverter = "cvtSdwaVOP2e"; 239 } 240 241 foreach _ = BoolToList<P.HasExtDPP>.ret in 242 def _dpp : VOP2_DPP_Pseudo <opName, P>; 243 } 244 245 def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>, 246 Commutable_REV<revOp#"_e64", !eq(revOp, opName)> { 247 let isReMaterializable = 1; 248 } 249 } 250} 251 252class VOP2eInstAlias <VOP2_Pseudo ps, Instruction inst, string opnd = ""> : 253 InstAlias <ps.OpName#" "#ps.Pfl.Asm32#", "#opnd, 254 (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0, 255 ps.Pfl.Src1RC32:$src1)>, PredicateControl; 256 257class VOP2e64InstAlias <VOP3_Pseudo ps, Instruction inst> : 258 InstAlias <ps.OpName#" "#ps.Pfl.Asm64, 259 (inst ps.Pfl.DstRC:$vdst, VOPDstS64orS32:$sdst, 260 ps.Pfl.Src0RC32:$src0, ps.Pfl.Src1RC32:$src1, clampmod:$clamp)>, 261 PredicateControl; 262 263multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> { 264 let WaveSizePredicate = isWave32 in { 265 def : VOP2eInstAlias<ps, inst, "vcc_lo">; 266 } 267 let WaveSizePredicate = isWave64 in { 268 def : VOP2eInstAlias<ps, inst, "vcc">; 269 } 270} 271 272class VOP_MADAK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> { 273 field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm); 274 field dag Ins32 = !if(!eq(vt.Size, 32), 275 (ins VCSrc_f32:$src0, VGPR_32:$src1, ImmOpType:$imm), 276 (ins VCSrc_f16:$src0, VGPR_32:$src1, ImmOpType:$imm)); 277 field bit HasExt = 0; 278 let IsSingle = 1; 279 280 field string Asm32 = "$vdst, $src0, $src1, $imm"; 281} 282 283def VOP_MADAK_F16 : VOP_MADAK <f16>; 284def VOP_MADAK_F32 : VOP_MADAK <f32>; 285 286class VOP_MADMK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> { 287 field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm); 288 field dag Ins32 = (ins VCSrc_f32:$src0, ImmOpType:$imm, VGPR_32:$src1); 289 field bit HasExt = 0; 290 let IsSingle = 1; 291 292 field string Asm32 = "$vdst, $src0, $imm, $src1"; 293} 294 295def VOP_MADMK_F16 : VOP_MADMK <f16>; 296def VOP_MADMK_F32 : VOP_MADMK <f32>; 297 298class getRegisterOperandForVT<ValueType VT> { 299 RegisterOperand ret = RegisterOperand<getVregSrcForVT<VT>.ret>; 300} 301 302// FIXME: Remove src2_modifiers. It isn't used, so is wasting memory 303// and processing time but it makes it easier to convert to mad. 304class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, vt0]> { 305 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT<Src2VT>.ret:$src2); 306 let Ins64 = getIns64<Src0RC64, Src1RC64, getRegisterOperandForVT<Src2VT>.ret, 3, 307 0, HasModifiers, HasModifiers, HasOMod, 308 Src0Mod, Src1Mod, Src2Mod>.ret; 309 let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 310 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 311 getVregSrcForVT<Src2VT>.ret:$src2, // stub argument 312 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 313 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 314 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 315 let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 316 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 317 getVregSrcForVT<Src2VT>.ret:$src2, // stub argument 318 dpp8:$dpp8, FI:$fi); 319 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 320 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, 321 getVregSrcForVT<Src2VT>.ret:$src2, // stub argument 322 clampmod:$clamp, omod:$omod, 323 dst_sel:$dst_sel, dst_unused:$dst_unused, 324 src0_sel:$src0_sel, src1_sel:$src1_sel); 325 let Asm32 = getAsm32<1, 2, vt0>.ret; 326 let Asm64 = getAsm64<1, 2, 0, HasModifiers, HasOMod, vt0>.ret; 327 let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt0>.ret; 328 let AsmDPP16 = getAsmDPP16<1, 2, HasModifiers, vt0>.ret; 329 let AsmDPP8 = getAsmDPP8<1, 2, 0, vt0>.ret; 330 let AsmSDWA = getAsmSDWA<1, 2, vt0>.ret; 331 let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt0>.ret; 332 let HasSrc2 = 0; 333 let HasSrc2Mods = 0; 334 335 let HasExt = 1; 336 let HasExtDPP = 1; 337 let HasExtSDWA = 1; 338 let HasExtSDWA9 = 0; 339 let TieRegDPP = "$src2"; 340} 341 342def VOP_MAC_F16 : VOP_MAC <f16>; 343def VOP_MAC_F32 : VOP_MAC <f32>; 344let HasExtDPP = 0 in 345def VOP_MAC_LEGACY_F32 : VOP_MAC <f32>; 346let HasExtSDWA = 0, HasExt64BitDPP = 1 in 347def VOP_MAC_F64 : VOP_MAC <f64>; 348 349class VOP_DOT_ACC<ValueType vt0, ValueType vt1> : VOP_MAC<vt0, vt1> { 350 let HasClamp = 0; 351 let HasExtSDWA = 0; 352 let HasOpSel = 0; 353 let IsPacked = 0; 354} 355 356def VOP_DOT_ACC_F32_V2F16 : VOP_DOT_ACC<f32, v2f16> { 357 let Src0ModDPP = FPVRegInputMods; 358 let Src1ModDPP = FPVRegInputMods; 359} 360 361def VOP_DOT_ACC_I32_I32 : VOP_DOT_ACC<i32, i32> { 362 let HasSrc0Mods = 1; 363 let HasSrc1Mods = 1; 364} 365 366// Write out to vcc or arbitrary SGPR. 367def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], 0, /*EnableClamp=*/1> { 368 let Asm32 = "$vdst, vcc, $src0, $src1"; 369 let Asm64 = "$vdst, $sdst, $src0, $src1$clamp"; 370 let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 371 let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 372 let AsmDPP = "$vdst, vcc, $src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 373 let AsmDPP8 = "$vdst, vcc, $src0, $src1 $dpp8$fi"; 374 let AsmDPP16 = AsmDPP#"$fi"; 375 let Outs32 = (outs DstRC:$vdst); 376 let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); 377} 378 379// Write out to vcc or arbitrary SGPR and read in from vcc or 380// arbitrary SGPR. 381def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], 0, /*EnableClamp=*/1> { 382 let Asm32 = "$vdst, vcc, $src0, $src1, vcc"; 383 let Asm64 = "$vdst, $sdst, $src0, $src1, $src2$clamp"; 384 let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 385 let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 386 let AsmDPP = "$vdst, vcc, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 387 let AsmDPP8 = "$vdst, vcc, $src0, $src1, vcc $dpp8$fi"; 388 let AsmDPP16 = AsmDPP#"$fi"; 389 let Outs32 = (outs DstRC:$vdst); 390 let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); 391 392 // Suppress src2 implied by type since the 32-bit encoding uses an 393 // implicit VCC use. 394 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); 395 396 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 397 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, 398 clampmod:$clamp, 399 dst_sel:$dst_sel, dst_unused:$dst_unused, 400 src0_sel:$src0_sel, src1_sel:$src1_sel); 401 402 let InsDPP = (ins DstRCDPP:$old, 403 Src0DPP:$src0, 404 Src1DPP:$src1, 405 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 406 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 407 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 408 409 let HasExt = 1; 410 let HasExtDPP = 1; 411 let HasExtSDWA = 1; 412 let HasExtSDWA9 = 1; 413} 414 415// Read in from vcc or arbitrary SGPR. 416def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableF32SrcMods=*/1> { 417 let Asm32 = "$vdst, $src0, $src1"; 418 let Asm64 = "$vdst, $src0_modifiers, $src1_modifiers, $src2"; 419 let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 420 let AsmSDWA9 = "$vdst, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 421 let AsmDPP = "$vdst, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 422 let AsmDPP8 = "$vdst, $src0, $src1, vcc $dpp8$fi"; 423 let AsmDPP16 = AsmDPP#"$fi"; 424 425 let Outs32 = (outs DstRC:$vdst); 426 let Outs64 = (outs DstRC:$vdst); 427 428 // Suppress src2 implied by type since the 32-bit encoding uses an 429 // implicit VCC use. 430 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); 431 432 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 433 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, 434 clampmod:$clamp, 435 dst_sel:$dst_sel, dst_unused:$dst_unused, 436 src0_sel:$src0_sel, src1_sel:$src1_sel); 437 438 let InsDPP = (ins DstRCDPP:$old, 439 Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 440 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 441 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 442 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 443 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 444 445 let HasExt = 1; 446 let HasExtDPP = 1; 447 let HasExtSDWA = 1; 448 let HasExtSDWA9 = 1; 449} 450 451def VOP_READLANE : VOPProfile<[i32, i32, i32]> { 452 let Outs32 = (outs SReg_32:$vdst); 453 let Outs64 = Outs32; 454 let Ins32 = (ins VRegOrLds_32:$src0, SCSrc_b32:$src1); 455 let Ins64 = Ins32; 456 let Asm32 = " $vdst, $src0, $src1"; 457 let Asm64 = Asm32; 458 459 let HasExt = 0; 460 let HasExtDPP = 0; 461 let HasExt64BitDPP = 0; 462 let HasExtSDWA = 0; 463 let HasExtSDWA9 = 0; 464} 465 466def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> { 467 let Outs32 = (outs VGPR_32:$vdst); 468 let Outs64 = Outs32; 469 let Ins32 = (ins SCSrc_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in); 470 let Ins64 = Ins32; 471 let Asm32 = " $vdst, $src0, $src1"; 472 let Asm64 = Asm32; 473 let HasSrc2 = 0; 474 let HasSrc2Mods = 0; 475 476 let HasExt = 0; 477 let HasExtDPP = 0; 478 let HasExt64BitDPP = 0; 479 let HasExtSDWA = 0; 480 let HasExtSDWA9 = 0; 481} 482 483//===----------------------------------------------------------------------===// 484// VOP2 Instructions 485//===----------------------------------------------------------------------===// 486 487defm V_CNDMASK_B32 : VOP2eInst <"v_cndmask_b32", VOP2e_I32_I32_I32_I1>; 488let SubtargetPredicate = HasMadMacF32Insts, isReMaterializable = 1 in 489def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>; 490 491let isCommutable = 1 in { 492let isReMaterializable = 1 in { 493defm V_ADD_F32 : VOP2Inst <"v_add_f32", VOP_F32_F32_F32, any_fadd>; 494defm V_SUB_F32 : VOP2Inst <"v_sub_f32", VOP_F32_F32_F32, any_fsub>; 495defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub_f32">; 496defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>; 497defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, any_fmul>; 498defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32_ARITH, AMDGPUmul_i24>; 499defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_i24>; 500defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32_ARITH, AMDGPUmul_u24>; 501defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_u24>; 502defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum_like>; 503defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum_like>; 504defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smin>; 505defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smax>; 506defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>; 507defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umax>; 508defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, lshr_rev, "v_lshr_b32">; 509defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, ashr_rev, "v_ashr_i32">; 510defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, lshl_rev, "v_lshl_b32">; 511defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, and>; 512defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>; 513defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, xor>; 514} // End isReMaterializable = 1 515 516let mayRaiseFPException = 0 in { 517let OtherPredicates = [HasMadMacF32Insts] in { 518let Constraints = "$vdst = $src2", DisableEncoding="$src2", 519 isConvertibleToThreeAddress = 1 in { 520defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>; 521 522let SubtargetPredicate = isGFX6GFX7GFX10 in 523defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_MAC_LEGACY_F32>; 524} // End Constraints = "$vdst = $src2", DisableEncoding="$src2", 525 // isConvertibleToThreeAddress = 1 526 527let isReMaterializable = 1 in 528def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, []>; 529} // End OtherPredicates = [HasMadMacF32Insts] 530} // End mayRaiseFPException = 0 531 532// No patterns so that the scalar instructions are always selected. 533// The scalar versions will be replaced with vector when needed later. 534defm V_ADD_CO_U32 : VOP2bInst <"v_add_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_add_co_u32", 1>; 535defm V_SUB_CO_U32 : VOP2bInst <"v_sub_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32", 1>; 536defm V_SUBREV_CO_U32 : VOP2bInst <"v_subrev_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32", 1>; 537defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_addc_u32", 1>; 538defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>; 539defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>; 540 541 542let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1 in { 543defm V_ADD_U32 : VOP2Inst <"v_add_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_add_u32", 1>; 544defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>; 545defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>; 546} 547 548} // End isCommutable = 1 549 550// These are special and do not read the exec mask. 551let isConvergent = 1, Uses = []<Register> in { 552def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE, 553 [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))]>; 554 555let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in { 556def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, 557 [(set i32:$vdst, (int_amdgcn_writelane i32:$src0, i32:$src1, i32:$vdst_in))]>; 558} // End $vdst = $vdst_in, DisableEncoding $vdst_in 559} // End isConvergent = 1 560 561let isReMaterializable = 1 in { 562defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_NO_EXT<VOP_I32_I32_I32>>; 563defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, add_ctpop>; 564defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, int_amdgcn_mbcnt_lo>; 565defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, int_amdgcn_mbcnt_hi>; 566defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_NO_EXT<VOP_F32_F32_I32>, AMDGPUldexp>; 567 568let ReadsModeReg = 0, mayRaiseFPException = 0 in { 569defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_NO_EXT<VOP_V2I16_F32_F32>, AMDGPUpknorm_i16_f32>; 570defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_NO_EXT<VOP_V2I16_F32_F32>, AMDGPUpknorm_u16_f32>; 571} 572 573defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_NO_EXT<VOP_V2F16_F32_F32>, AMDGPUpkrtz_f16_f32>; 574defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_NO_EXT<VOP_V2I16_I32_I32>, AMDGPUpk_u16_u32>; 575defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT<VOP_V2I16_I32_I32>, AMDGPUpk_i16_i32>; 576 577 578let SubtargetPredicate = isGFX6GFX7 in { 579defm V_MIN_LEGACY_F32 : VOP2Inst <"v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy>; 580defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>; 581} // End SubtargetPredicate = isGFX6GFX7 582 583let isCommutable = 1 in { 584let SubtargetPredicate = isGFX6GFX7 in { 585defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, srl>; 586defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, sra>; 587defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, shl>; 588} // End SubtargetPredicate = isGFX6GFX7 589} // End isCommutable = 1 590} // End isReMaterializable = 1 591 592defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_NO_EXT<VOP_I32_F32_I32>>; // TODO: set "Uses = dst" 593 594class DivergentBinOp<SDPatternOperator Op, VOP_Pseudo Inst> : 595 GCNPat< 596 (getDivergentFrag<Op>.ret Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1), 597 !if(!cast<Commutable_REV>(Inst).IsOrig, 598 (Inst $src0, $src1), 599 (Inst $src1, $src0) 600 ) 601 >; 602 603class DivergentClampingBinOp<SDPatternOperator Op, VOP_Pseudo Inst> : 604 GCNPat< 605 (getDivergentFrag<Op>.ret Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1), 606 !if(!cast<Commutable_REV>(Inst).IsOrig, 607 (Inst $src0, $src1, 0), 608 (Inst $src1, $src0, 0) 609 ) 610 >; 611 612def : DivergentBinOp<srl, V_LSHRREV_B32_e64>; 613def : DivergentBinOp<sra, V_ASHRREV_I32_e64>; 614def : DivergentBinOp<shl, V_LSHLREV_B32_e64>; 615 616let SubtargetPredicate = HasAddNoCarryInsts in { 617 def : DivergentClampingBinOp<add, V_ADD_U32_e64>; 618 def : DivergentClampingBinOp<sub, V_SUB_U32_e64>; 619} 620 621let SubtargetPredicate = isGFX6GFX7GFX8GFX9, Predicates = [isGFX6GFX7GFX8GFX9] in { 622def : DivergentClampingBinOp<add, V_ADD_CO_U32_e64>; 623def : DivergentClampingBinOp<sub, V_SUB_CO_U32_e64>; 624} 625 626def : DivergentBinOp<adde, V_ADDC_U32_e32>; 627def : DivergentBinOp<sube, V_SUBB_U32_e32>; 628 629class divergent_i64_BinOp <SDPatternOperator Op, Instruction Inst> : 630 GCNPat< 631 (getDivergentFrag<Op>.ret i64:$src0, i64:$src1), 632 (REG_SEQUENCE VReg_64, 633 (Inst 634 (i32 (EXTRACT_SUBREG $src0, sub0)), 635 (i32 (EXTRACT_SUBREG $src1, sub0)) 636 ), sub0, 637 (Inst 638 (i32 (EXTRACT_SUBREG $src0, sub1)), 639 (i32 (EXTRACT_SUBREG $src1, sub1)) 640 ), sub1 641 ) 642 >; 643 644def : divergent_i64_BinOp <and, V_AND_B32_e32>; 645def : divergent_i64_BinOp <or, V_OR_B32_e32>; 646def : divergent_i64_BinOp <xor, V_XOR_B32_e32>; 647 648let SubtargetPredicate = Has16BitInsts in { 649 650let FPDPRounding = 1 in { 651def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">; 652defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I32, AMDGPUldexp>; 653} // End FPDPRounding = 1 654 655defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16, lshl_rev>; 656defm V_LSHRREV_B16 : VOP2Inst <"v_lshrrev_b16", VOP_I16_I16_I16, lshr_rev>; 657defm V_ASHRREV_I16 : VOP2Inst <"v_ashrrev_i16", VOP_I16_I16_I16, ashr_rev>; 658 659let isCommutable = 1 in { 660let FPDPRounding = 1 in { 661defm V_ADD_F16 : VOP2Inst <"v_add_f16", VOP_F16_F16_F16, any_fadd>; 662defm V_SUB_F16 : VOP2Inst <"v_sub_f16", VOP_F16_F16_F16, any_fsub>; 663defm V_SUBREV_F16 : VOP2Inst <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">; 664defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16, any_fmul>; 665 666let mayRaiseFPException = 0 in { 667def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">; 668} 669 670} // End FPDPRounding = 1 671defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16_ARITH, add>; 672defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16_ARITH, sub>; 673defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16_ARITH, null_frag, "v_sub_u16">; 674defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16, mul>; 675defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>; 676defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16, fminnum_like>; 677defm V_MAX_U16 : VOP2Inst <"v_max_u16", VOP_I16_I16_I16, umax>; 678defm V_MAX_I16 : VOP2Inst <"v_max_i16", VOP_I16_I16_I16, smax>; 679defm V_MIN_U16 : VOP2Inst <"v_min_u16", VOP_I16_I16_I16, umin>; 680defm V_MIN_I16 : VOP2Inst <"v_min_i16", VOP_I16_I16_I16, smin>; 681 682let Constraints = "$vdst = $src2", DisableEncoding="$src2", 683 isConvertibleToThreeAddress = 1 in { 684defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>; 685} 686} // End isCommutable = 1 687 688} // End SubtargetPredicate = Has16BitInsts 689 690let SubtargetPredicate = HasDLInsts in { 691 692let isReMaterializable = 1 in 693defm V_XNOR_B32 : VOP2Inst <"v_xnor_b32", VOP_I32_I32_I32, xnor>; 694 695let Constraints = "$vdst = $src2", 696 DisableEncoding = "$src2", 697 isConvertibleToThreeAddress = 1, 698 isCommutable = 1 in 699defm V_FMAC_F32 : VOP2Inst <"v_fmac_f32", VOP_MAC_F32>; 700 701} // End SubtargetPredicate = HasDLInsts 702 703let SubtargetPredicate = HasFmaLegacy32 in { 704 705let Constraints = "$vdst = $src2", 706 DisableEncoding = "$src2", 707 isConvertibleToThreeAddress = 1, 708 isCommutable = 1 in 709defm V_FMAC_LEGACY_F32 : VOP2Inst <"v_fmac_legacy_f32", VOP_MAC_LEGACY_F32>; 710 711} // End SubtargetPredicate = HasFmaLegacy32 712 713let SubtargetPredicate = isGFX90APlus, 714 Constraints = "$vdst = $src2", 715 DisableEncoding="$src2", 716 isConvertibleToThreeAddress = 1, 717 isCommutable = 1, 718 SchedRW = [WriteDoubleAdd] in 719defm V_FMAC_F64 : VOP2Inst <"v_fmac_f64", VOP_MAC_F64>; 720 721let Constraints = "$vdst = $src2", 722 DisableEncoding="$src2", 723 isConvertibleToThreeAddress = 1, 724 isCommutable = 1, 725 IsDOT = 1 in { 726 let SubtargetPredicate = HasDot5Insts in 727 defm V_DOT2C_F32_F16 : VOP2Inst<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16>; 728 let SubtargetPredicate = HasDot6Insts in 729 defm V_DOT4C_I32_I8 : VOP2Inst<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>; 730 731 let SubtargetPredicate = HasDot4Insts in 732 defm V_DOT2C_I32_I16 : VOP2Inst<"v_dot2c_i32_i16", VOP_DOT_ACC_I32_I32>; 733 let SubtargetPredicate = HasDot3Insts in 734 defm V_DOT8C_I32_I4 : VOP2Inst<"v_dot8c_i32_i4", VOP_DOT_ACC_I32_I32>; 735} 736 737let AddedComplexity = 30 in { 738 def : GCNPat< 739 (f32 (AMDGPUfdot2 v2f16:$src0, v2f16:$src1, f32:$src2, (i1 DSTCLAMP.NONE))), 740 (f32 (V_DOT2C_F32_F16_e32 $src0, $src1, $src2)) 741 > { 742 let SubtargetPredicate = HasDot5Insts; 743 } 744 def : GCNPat< 745 (i32 (int_amdgcn_sdot4 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 746 (i32 (V_DOT4C_I32_I8_e32 $src0, $src1, $src2)) 747 > { 748 let SubtargetPredicate = HasDot6Insts; 749 } 750 def : GCNPat< 751 (i32 (int_amdgcn_sdot2 v2i16:$src0, v2i16:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 752 (i32 (V_DOT2C_I32_I16_e32 $src0, $src1, $src2)) 753 > { 754 let SubtargetPredicate = HasDot4Insts; 755 } 756 def : GCNPat< 757 (i32 (int_amdgcn_sdot8 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 758 (i32 (V_DOT8C_I32_I4_e32 $src0, $src1, $src2)) 759 > { 760 let SubtargetPredicate = HasDot3Insts; 761 } 762} // End AddedComplexity = 30 763 764let SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1 in { 765def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">; 766 767let isCommutable = 1 in 768def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">; 769} 770 771let SubtargetPredicate = isGFX10Plus in { 772 773let FPDPRounding = 1 in { 774def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">; 775 776let isCommutable = 1 in 777def V_FMAAK_F16 : VOP2_Pseudo <"v_fmaak_f16", VOP_MADAK_F16, [], "">; 778} // End FPDPRounding = 1 779 780let Constraints = "$vdst = $src2", 781 DisableEncoding="$src2", 782 isConvertibleToThreeAddress = 1, 783 isCommutable = 1 in { 784defm V_FMAC_F16 : VOP2Inst <"v_fmac_f16", VOP_MAC_F16>; 785} 786 787} // End SubtargetPredicate = isGFX10Plus 788 789let SubtargetPredicate = HasPkFmacF16Inst in { 790defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>; 791} // End SubtargetPredicate = HasPkFmacF16Inst 792 793// Note: 16-bit instructions produce a 0 result in the high 16-bits 794// on GFX8 and GFX9 and preserve high 16 bits on GFX10+ 795multiclass Arithmetic_i16_0Hi_Pats <SDPatternOperator op, Instruction inst> { 796 797def : GCNPat< 798 (i32 (zext (op i16:$src0, i16:$src1))), 799 (inst VSrc_b16:$src0, VSrc_b16:$src1) 800>; 801 802def : GCNPat< 803 (i64 (zext (op i16:$src0, i16:$src1))), 804 (REG_SEQUENCE VReg_64, 805 (inst $src0, $src1), sub0, 806 (V_MOV_B32_e32 (i32 0)), sub1) 807>; 808} 809 810class ZExt_i16_i1_Pat <SDNode ext> : GCNPat < 811 (i16 (ext i1:$src)), 812 (V_CNDMASK_B32_e64 (i32 0/*src0mod*/), (i32 0/*src0*/), 813 (i32 0/*src1mod*/), (i32 1/*src1*/), 814 $src) 815>; 816 817foreach vt = [i16, v2i16] in { 818def : GCNPat < 819 (and vt:$src0, vt:$src1), 820 (V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 821>; 822 823def : GCNPat < 824 (or vt:$src0, vt:$src1), 825 (V_OR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 826>; 827 828def : GCNPat < 829 (xor vt:$src0, vt:$src1), 830 (V_XOR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 831>; 832} 833 834let Predicates = [Has16BitInsts] in { 835 836// Undo sub x, c -> add x, -c canonicalization since c is more likely 837// an inline immediate than -c. 838// TODO: Also do for 64-bit. 839def : GCNPat< 840 (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)), 841 (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1) 842>; 843 844 845let Predicates = [Has16BitInsts, isGFX7GFX8GFX9] in { 846 847def : GCNPat< 848 (i32 (zext (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)))), 849 (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1) 850>; 851 852defm : Arithmetic_i16_0Hi_Pats<add, V_ADD_U16_e64>; 853defm : Arithmetic_i16_0Hi_Pats<mul, V_MUL_LO_U16_e64>; 854defm : Arithmetic_i16_0Hi_Pats<sub, V_SUB_U16_e64>; 855defm : Arithmetic_i16_0Hi_Pats<smin, V_MIN_I16_e64>; 856defm : Arithmetic_i16_0Hi_Pats<smax, V_MAX_I16_e64>; 857defm : Arithmetic_i16_0Hi_Pats<umin, V_MIN_U16_e64>; 858defm : Arithmetic_i16_0Hi_Pats<umax, V_MAX_U16_e64>; 859defm : Arithmetic_i16_0Hi_Pats<lshl_rev, V_LSHLREV_B16_e64>; 860defm : Arithmetic_i16_0Hi_Pats<lshr_rev, V_LSHRREV_B16_e64>; 861defm : Arithmetic_i16_0Hi_Pats<ashr_rev, V_ASHRREV_I16_e64>; 862} // End Predicates = [Has16BitInsts, isGFX7GFX8GFX9] 863 864def : ZExt_i16_i1_Pat<zext>; 865def : ZExt_i16_i1_Pat<anyext>; 866 867def : GCNPat < 868 (i16 (sext i1:$src)), 869 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), 870 /*src1mod*/(i32 0), /*src1*/(i32 -1), $src) 871>; 872 873} // End Predicates = [Has16BitInsts] 874 875 876let SubtargetPredicate = HasIntClamp in { 877// Set clamp bit for saturation. 878def : VOPBinOpClampPat<uaddsat, V_ADD_CO_U32_e64, i32>; 879def : VOPBinOpClampPat<usubsat, V_SUB_CO_U32_e64, i32>; 880} 881 882let SubtargetPredicate = HasAddNoCarryInsts, OtherPredicates = [HasIntClamp] in { 883let AddedComplexity = 1 in { // Prefer over form with carry-out. 884def : VOPBinOpClampPat<uaddsat, V_ADD_U32_e64, i32>; 885def : VOPBinOpClampPat<usubsat, V_SUB_U32_e64, i32>; 886} 887} 888 889let SubtargetPredicate = Has16BitInsts, OtherPredicates = [HasIntClamp] in { 890def : VOPBinOpClampPat<uaddsat, V_ADD_U16_e64, i16>; 891def : VOPBinOpClampPat<usubsat, V_SUB_U16_e64, i16>; 892} 893 894//===----------------------------------------------------------------------===// 895// Target-specific instruction encodings. 896//===----------------------------------------------------------------------===// 897 898class VOP2_DPP<bits<6> op, VOP2_DPP_Pseudo ps, 899 string opName = ps.OpName, VOPProfile p = ps.Pfl, 900 bit IsDPP16 = 0> : 901 VOP_DPP<opName, p, IsDPP16> { 902 let hasSideEffects = ps.hasSideEffects; 903 let Defs = ps.Defs; 904 let SchedRW = ps.SchedRW; 905 let Uses = ps.Uses; 906 907 bits<8> vdst; 908 bits<8> src1; 909 let Inst{8-0} = 0xfa; 910 let Inst{16-9} = !if(p.HasSrc1, src1{7-0}, 0); 911 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 912 let Inst{30-25} = op; 913 let Inst{31} = 0x0; 914} 915 916class Base_VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, 917 string opName = ps.OpName, VOPProfile p = ps.Pfl> : 918 VOP2_DPP<op, ps, opName, p, 1> { 919 let AssemblerPredicate = HasDPP16; 920 let SubtargetPredicate = HasDPP16; 921 let OtherPredicates = ps.OtherPredicates; 922} 923 924class VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, 925 string opName = ps.OpName, VOPProfile p = ps.Pfl> : 926 Base_VOP2_DPP16<op, ps, opName, p>, 927 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX10>; 928 929class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps, 930 string opName = ps.OpName, VOPProfile p = ps.Pfl> : 931 VOP_DPP8<ps.OpName, p> { 932 let hasSideEffects = ps.hasSideEffects; 933 let Defs = ps.Defs; 934 let SchedRW = ps.SchedRW; 935 let Uses = ps.Uses; 936 937 bits<8> vdst; 938 bits<8> src1; 939 940 let Inst{8-0} = fi; 941 let Inst{16-9} = !if(p.HasSrc1, src1{7-0}, 0); 942 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 943 let Inst{30-25} = op; 944 let Inst{31} = 0x0; 945 946 let OtherPredicates = ps.OtherPredicates; 947} 948 949//===----------------------------------------------------------------------===// 950// GFX10. 951//===----------------------------------------------------------------------===// 952 953let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { 954 //===------------------------------- VOP2 -------------------------------===// 955 multiclass VOP2Only_Real_MADK_gfx10<bits<6> op> { 956 def _gfx10 : 957 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX10>, 958 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 959 } 960 multiclass VOP2Only_Real_MADK_gfx10_with_name<bits<6> op, string opName, 961 string asmName> { 962 def _gfx10 : 963 VOP2_Real<!cast<VOP2_Pseudo>(opName), SIEncodingFamily.GFX10>, 964 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(opName).Pfl> { 965 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName); 966 let AsmString = asmName # ps.AsmOperands; 967 } 968 } 969 multiclass VOP2_Real_e32_gfx10<bits<6> op> { 970 def _e32_gfx10 : 971 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>, 972 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 973 } 974 multiclass VOP2_Real_e64_gfx10<bits<6> op> { 975 def _e64_gfx10 : 976 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 977 VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 978 } 979 multiclass VOP2_Real_sdwa_gfx10<bits<6> op> { 980 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 981 def _sdwa_gfx10 : 982 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 983 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 984 let DecoderNamespace = "SDWA10"; 985 } 986 } 987 multiclass VOP2_Real_dpp_gfx10<bits<6> op> { 988 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 989 def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> { 990 let DecoderNamespace = "SDWA10"; 991 } 992 } 993 multiclass VOP2_Real_dpp8_gfx10<bits<6> op> { 994 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 995 def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(NAME#"_e32")> { 996 let DecoderNamespace = "DPP8"; 997 } 998 } 999 1000 //===------------------------- VOP2 (with name) -------------------------===// 1001 multiclass VOP2_Real_e32_gfx10_with_name<bits<6> op, string opName, 1002 string asmName> { 1003 def _e32_gfx10 : 1004 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>, 1005 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> { 1006 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1007 let AsmString = asmName # ps.AsmOperands; 1008 } 1009 } 1010 multiclass VOP2_Real_e64_gfx10_with_name<bits<6> op, string opName, 1011 string asmName> { 1012 def _e64_gfx10 : 1013 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>, 1014 VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, 1015 !cast<VOP3_Pseudo>(opName#"_e64").Pfl> { 1016 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName#"_e64"); 1017 let AsmString = asmName # ps.AsmOperands; 1018 } 1019 } 1020 let DecoderNamespace = "SDWA10" in { 1021 multiclass VOP2_Real_sdwa_gfx10_with_name<bits<6> op, string opName, 1022 string asmName> { 1023 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in 1024 def _sdwa_gfx10 : 1025 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1026 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1027 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1028 let AsmString = asmName # ps.AsmOperands; 1029 } 1030 } 1031 multiclass VOP2_Real_dpp_gfx10_with_name<bits<6> op, string opName, 1032 string asmName> { 1033 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1034 def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp")> { 1035 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1036 let AsmString = asmName # ps.Pfl.AsmDPP16; 1037 } 1038 } 1039 multiclass VOP2_Real_dpp8_gfx10_with_name<bits<6> op, string opName, 1040 string asmName> { 1041 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1042 def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1043 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1044 let AsmString = asmName # ps.Pfl.AsmDPP8; 1045 let DecoderNamespace = "DPP8"; 1046 } 1047 } 1048 } // End DecoderNamespace = "SDWA10" 1049 1050 //===------------------------------ VOP2be ------------------------------===// 1051 multiclass VOP2be_Real_e32_gfx10<bits<6> op, string opName, string asmName> { 1052 def _e32_gfx10 : 1053 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>, 1054 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> { 1055 VOP2_Pseudo Ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1056 let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); 1057 } 1058 } 1059 multiclass VOP2be_Real_e64_gfx10<bits<6> op, string opName, string asmName> { 1060 def _e64_gfx10 : 1061 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>, 1062 VOP3be_gfx10<{0, 1, 0, 0, op{5-0}}, 1063 !cast<VOP3_Pseudo>(opName#"_e64").Pfl> { 1064 VOP3_Pseudo Ps = !cast<VOP3_Pseudo>(opName#"_e64"); 1065 let AsmString = asmName # Ps.AsmOperands; 1066 } 1067 } 1068 multiclass VOP2be_Real_sdwa_gfx10<bits<6> op, string opName, string asmName> { 1069 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in 1070 def _sdwa_gfx10 : 1071 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1072 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1073 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1074 let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); 1075 let DecoderNamespace = "SDWA10"; 1076 } 1077 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in 1078 def _sdwa_w32_gfx10 : 1079 Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1080 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1081 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1082 let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands); 1083 let isAsmParserOnly = 1; 1084 let DecoderNamespace = "SDWA10"; 1085 let WaveSizePredicate = isWave32; 1086 } 1087 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in 1088 def _sdwa_w64_gfx10 : 1089 Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1090 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1091 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1092 let AsmString = asmName # Ps.AsmOperands; 1093 let isAsmParserOnly = 1; 1094 let DecoderNamespace = "SDWA10"; 1095 let WaveSizePredicate = isWave64; 1096 } 1097 } 1098 multiclass VOP2be_Real_dpp_gfx10<bits<6> op, string opName, string asmName> { 1099 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1100 def _dpp_gfx10 : 1101 VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1102 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1103 let AsmString = asmName # !subst(", vcc", "", AsmDPP); 1104 let DecoderNamespace = "SDWA10"; 1105 } 1106 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1107 def _dpp_w32_gfx10 : 1108 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1109 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1110 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP); 1111 let isAsmParserOnly = 1; 1112 let WaveSizePredicate = isWave32; 1113 } 1114 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1115 def _dpp_w64_gfx10 : 1116 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1117 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1118 let AsmString = asmName # AsmDPP; 1119 let isAsmParserOnly = 1; 1120 let WaveSizePredicate = isWave64; 1121 } 1122 } 1123 multiclass VOP2be_Real_dpp8_gfx10<bits<6> op, string opName, string asmName> { 1124 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1125 def _dpp8_gfx10 : 1126 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> { 1127 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1128 let AsmString = asmName # !subst(", vcc", "", AsmDPP8); 1129 let DecoderNamespace = "DPP8"; 1130 } 1131 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1132 def _dpp8_w32_gfx10 : 1133 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> { 1134 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1135 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8); 1136 let isAsmParserOnly = 1; 1137 let WaveSizePredicate = isWave32; 1138 } 1139 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1140 def _dpp8_w64_gfx10 : 1141 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> { 1142 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1143 let AsmString = asmName # AsmDPP8; 1144 let isAsmParserOnly = 1; 1145 let WaveSizePredicate = isWave64; 1146 } 1147 } 1148 1149 //===----------------------------- VOP3Only -----------------------------===// 1150 multiclass VOP3Only_Real_gfx10<bits<10> op> { 1151 def _e64_gfx10 : 1152 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 1153 VOP3e_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 1154 let IsSingle = 1; 1155 } 1156 } 1157 1158 //===---------------------------- VOP3beOnly ----------------------------===// 1159 multiclass VOP3beOnly_Real_gfx10<bits<10> op> { 1160 def _e64_gfx10 : 1161 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 1162 VOP3be_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 1163 let IsSingle = 1; 1164 } 1165 } 1166} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" 1167 1168multiclass VOP2be_Real_gfx10<bits<6> op, string opName, string asmName> : 1169 VOP2be_Real_e32_gfx10<op, opName, asmName>, 1170 VOP2be_Real_e64_gfx10<op, opName, asmName>, 1171 VOP2be_Real_sdwa_gfx10<op, opName, asmName>, 1172 VOP2be_Real_dpp_gfx10<op, opName, asmName>, 1173 VOP2be_Real_dpp8_gfx10<op, opName, asmName>; 1174 1175multiclass VOP2e_Real_gfx10<bits<6> op, string opName, string asmName> : 1176 VOP2_Real_e32_gfx10<op>, 1177 VOP2_Real_e64_gfx10<op>, 1178 VOP2be_Real_sdwa_gfx10<op, opName, asmName>, 1179 VOP2be_Real_dpp_gfx10<op, opName, asmName>, 1180 VOP2be_Real_dpp8_gfx10<op, opName, asmName>; 1181 1182multiclass VOP2_Real_gfx10<bits<6> op> : 1183 VOP2_Real_e32_gfx10<op>, VOP2_Real_e64_gfx10<op>, 1184 VOP2_Real_sdwa_gfx10<op>, VOP2_Real_dpp_gfx10<op>, VOP2_Real_dpp8_gfx10<op>; 1185 1186multiclass VOP2_Real_gfx10_with_name<bits<6> op, string opName, 1187 string asmName> : 1188 VOP2_Real_e32_gfx10_with_name<op, opName, asmName>, 1189 VOP2_Real_e64_gfx10_with_name<op, opName, asmName>, 1190 VOP2_Real_sdwa_gfx10_with_name<op, opName, asmName>, 1191 VOP2_Real_dpp_gfx10_with_name<op, opName, asmName>, 1192 VOP2_Real_dpp8_gfx10_with_name<op, opName, asmName>; 1193 1194// NB: Same opcode as v_mac_legacy_f32 1195let DecoderNamespace = "GFX10_B" in 1196defm V_FMAC_LEGACY_F32 : VOP2_Real_gfx10<0x006>; 1197 1198defm V_XNOR_B32 : VOP2_Real_gfx10<0x01e>; 1199defm V_FMAC_F32 : VOP2_Real_gfx10<0x02b>; 1200defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10<0x02c>; 1201defm V_FMAAK_F32 : VOP2Only_Real_MADK_gfx10<0x02d>; 1202defm V_ADD_F16 : VOP2_Real_gfx10<0x032>; 1203defm V_SUB_F16 : VOP2_Real_gfx10<0x033>; 1204defm V_SUBREV_F16 : VOP2_Real_gfx10<0x034>; 1205defm V_MUL_F16 : VOP2_Real_gfx10<0x035>; 1206defm V_FMAC_F16 : VOP2_Real_gfx10<0x036>; 1207defm V_FMAMK_F16 : VOP2Only_Real_MADK_gfx10<0x037>; 1208defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10<0x038>; 1209defm V_MAX_F16 : VOP2_Real_gfx10<0x039>; 1210defm V_MIN_F16 : VOP2_Real_gfx10<0x03a>; 1211defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>; 1212 1213let IsSingle = 1 in { 1214defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>; 1215} 1216 1217// VOP2 no carry-in, carry-out. 1218defm V_ADD_NC_U32 : 1219 VOP2_Real_gfx10_with_name<0x025, "V_ADD_U32", "v_add_nc_u32">; 1220defm V_SUB_NC_U32 : 1221 VOP2_Real_gfx10_with_name<0x026, "V_SUB_U32", "v_sub_nc_u32">; 1222defm V_SUBREV_NC_U32 : 1223 VOP2_Real_gfx10_with_name<0x027, "V_SUBREV_U32", "v_subrev_nc_u32">; 1224 1225// VOP2 carry-in, carry-out. 1226defm V_ADD_CO_CI_U32 : 1227 VOP2be_Real_gfx10<0x028, "V_ADDC_U32", "v_add_co_ci_u32">; 1228defm V_SUB_CO_CI_U32 : 1229 VOP2be_Real_gfx10<0x029, "V_SUBB_U32", "v_sub_co_ci_u32">; 1230defm V_SUBREV_CO_CI_U32 : 1231 VOP2be_Real_gfx10<0x02a, "V_SUBBREV_U32", "v_subrev_co_ci_u32">; 1232 1233defm V_CNDMASK_B32 : 1234 VOP2e_Real_gfx10<0x001, "V_CNDMASK_B32", "v_cndmask_b32">; 1235 1236// VOP3 only. 1237defm V_BFM_B32 : VOP3Only_Real_gfx10<0x363>; 1238defm V_BCNT_U32_B32 : VOP3Only_Real_gfx10<0x364>; 1239defm V_MBCNT_LO_U32_B32 : VOP3Only_Real_gfx10<0x365>; 1240defm V_MBCNT_HI_U32_B32 : VOP3Only_Real_gfx10<0x366>; 1241defm V_LDEXP_F32 : VOP3Only_Real_gfx10<0x362>; 1242defm V_CVT_PKNORM_I16_F32 : VOP3Only_Real_gfx10<0x368>; 1243defm V_CVT_PKNORM_U16_F32 : VOP3Only_Real_gfx10<0x369>; 1244defm V_CVT_PK_U16_U32 : VOP3Only_Real_gfx10<0x36a>; 1245defm V_CVT_PK_I16_I32 : VOP3Only_Real_gfx10<0x36b>; 1246 1247// VOP3 carry-out. 1248defm V_ADD_CO_U32 : VOP3beOnly_Real_gfx10<0x30f>; 1249defm V_SUB_CO_U32 : VOP3beOnly_Real_gfx10<0x310>; 1250defm V_SUBREV_CO_U32 : VOP3beOnly_Real_gfx10<0x319>; 1251 1252let SubtargetPredicate = isGFX10Plus in { 1253 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx10>; 1254 1255 defm : VOP2bInstAliases< 1256 V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx10, "v_add_co_ci_u32">; 1257 defm : VOP2bInstAliases< 1258 V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx10, "v_sub_co_ci_u32">; 1259 defm : VOP2bInstAliases< 1260 V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx10, "v_subrev_co_ci_u32">; 1261} // End SubtargetPredicate = isGFX10Plus 1262 1263//===----------------------------------------------------------------------===// 1264// GFX6, GFX7, GFX10. 1265//===----------------------------------------------------------------------===// 1266 1267class VOP2_DPPe <bits<6> op, VOP2_DPP_Pseudo ps, VOPProfile P = ps.Pfl> : 1268 VOP_DPPe <P> { 1269 bits<8> vdst; 1270 bits<8> src1; 1271 let Inst{8-0} = 0xfa; //dpp 1272 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 1273 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 1274 let Inst{30-25} = op; 1275 let Inst{31} = 0x0; //encoding 1276} 1277 1278let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 1279 multiclass VOP2_Lane_Real_gfx6_gfx7<bits<6> op> { 1280 def _gfx6_gfx7 : 1281 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>, 1282 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1283 } 1284 multiclass VOP2Only_Real_MADK_gfx6_gfx7<bits<6> op> { 1285 def _gfx6_gfx7 : 1286 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>, 1287 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1288 } 1289 multiclass VOP2_Real_e32_gfx6_gfx7<bits<6> op, string opName = NAME> { 1290 def _e32_gfx6_gfx7 : 1291 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.SI>, 1292 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl>; 1293 } 1294 multiclass VOP2_Real_e64_gfx6_gfx7<bits<6> op, string opName = NAME> { 1295 def _e64_gfx6_gfx7 : 1296 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.SI>, 1297 VOP3e_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(opName#"_e64").Pfl>; 1298 } 1299 multiclass VOP2be_Real_e64_gfx6_gfx7<bits<6> op, string opName = NAME> { 1300 def _e64_gfx6_gfx7 : 1301 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.SI>, 1302 VOP3be_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(opName#"_e64").Pfl>; 1303 } 1304} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 1305 1306multiclass VOP2Only_Real_MADK_gfx6_gfx7_gfx10<bits<6> op> : 1307 VOP2Only_Real_MADK_gfx6_gfx7<op>, VOP2Only_Real_MADK_gfx10<op>; 1308 1309multiclass VOP2_Real_gfx6_gfx7<bits<6> op> : 1310 VOP2_Real_e32_gfx6_gfx7<op>, VOP2_Real_e64_gfx6_gfx7<op>; 1311 1312multiclass VOP2_Real_gfx6_gfx7_gfx10<bits<6> op> : 1313 VOP2_Real_gfx6_gfx7<op>, VOP2_Real_gfx10<op>; 1314 1315multiclass VOP2be_Real_gfx6_gfx7<bits<6> op> : 1316 VOP2_Real_e32_gfx6_gfx7<op>, VOP2be_Real_e64_gfx6_gfx7<op>; 1317 1318multiclass VOP2be_Real_gfx6_gfx7_with_name<bits<6> op, 1319 string opName, string asmName> { 1320 defvar ps32 = !cast<VOP2_Pseudo>(opName#"_e32"); 1321 defvar ps64 = !cast<VOP3_Pseudo>(opName#"_e64"); 1322 1323 let AsmString = asmName # ps32.AsmOperands in { 1324 defm "" : VOP2_Real_e32_gfx6_gfx7<op, opName>; 1325 } 1326 1327 let AsmString = asmName # ps64.AsmOperands in { 1328 defm "" : VOP2be_Real_e64_gfx6_gfx7<op, opName>; 1329 } 1330} 1331 1332defm V_CNDMASK_B32 : VOP2_Real_gfx6_gfx7<0x000>; 1333defm V_MIN_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00d>; 1334defm V_MAX_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00e>; 1335defm V_LSHR_B32 : VOP2_Real_gfx6_gfx7<0x015>; 1336defm V_ASHR_I32 : VOP2_Real_gfx6_gfx7<0x017>; 1337defm V_LSHL_B32 : VOP2_Real_gfx6_gfx7<0x019>; 1338defm V_BFM_B32 : VOP2_Real_gfx6_gfx7<0x01e>; 1339defm V_BCNT_U32_B32 : VOP2_Real_gfx6_gfx7<0x022>; 1340defm V_MBCNT_LO_U32_B32 : VOP2_Real_gfx6_gfx7<0x023>; 1341defm V_MBCNT_HI_U32_B32 : VOP2_Real_gfx6_gfx7<0x024>; 1342defm V_LDEXP_F32 : VOP2_Real_gfx6_gfx7<0x02b>; 1343defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_gfx6_gfx7<0x02c>; 1344defm V_CVT_PKNORM_I16_F32 : VOP2_Real_gfx6_gfx7<0x02d>; 1345defm V_CVT_PKNORM_U16_F32 : VOP2_Real_gfx6_gfx7<0x02e>; 1346defm V_CVT_PK_U16_U32 : VOP2_Real_gfx6_gfx7<0x030>; 1347defm V_CVT_PK_I16_I32 : VOP2_Real_gfx6_gfx7<0x031>; 1348 1349// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in 1350// VI, but the VI instructions behave the same as the SI versions. 1351defm V_ADD_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x025, "V_ADD_CO_U32", "v_add_i32">; 1352defm V_SUB_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x026, "V_SUB_CO_U32", "v_sub_i32">; 1353defm V_SUBREV_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x027, "V_SUBREV_CO_U32", "v_subrev_i32">; 1354defm V_ADDC_U32 : VOP2be_Real_gfx6_gfx7<0x028>; 1355defm V_SUBB_U32 : VOP2be_Real_gfx6_gfx7<0x029>; 1356defm V_SUBBREV_U32 : VOP2be_Real_gfx6_gfx7<0x02a>; 1357 1358defm V_READLANE_B32 : VOP2_Lane_Real_gfx6_gfx7<0x001>; 1359 1360let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in { 1361 defm V_WRITELANE_B32 : VOP2_Lane_Real_gfx6_gfx7<0x002>; 1362} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) 1363 1364let SubtargetPredicate = isGFX6GFX7 in { 1365 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx6_gfx7>; 1366 defm : VOP2eInstAliases<V_ADD_CO_U32_e32, V_ADD_I32_e32_gfx6_gfx7>; 1367 defm : VOP2eInstAliases<V_SUB_CO_U32_e32, V_SUB_I32_e32_gfx6_gfx7>; 1368 defm : VOP2eInstAliases<V_SUBREV_CO_U32_e32, V_SUBREV_I32_e32_gfx6_gfx7>; 1369 1370 def : VOP2e64InstAlias<V_ADD_CO_U32_e64, V_ADD_I32_e64_gfx6_gfx7>; 1371 def : VOP2e64InstAlias<V_SUB_CO_U32_e64, V_SUB_I32_e64_gfx6_gfx7>; 1372 def : VOP2e64InstAlias<V_SUBREV_CO_U32_e64, V_SUBREV_I32_e64_gfx6_gfx7>; 1373} // End SubtargetPredicate = isGFX6GFX7 1374 1375defm V_ADD_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x003>; 1376defm V_SUB_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x004>; 1377defm V_SUBREV_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x005>; 1378defm V_MAC_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x006>; 1379defm V_MUL_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x007>; 1380defm V_MUL_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x008>; 1381defm V_MUL_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10<0x009>; 1382defm V_MUL_HI_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10<0x00a>; 1383defm V_MUL_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10<0x00b>; 1384defm V_MUL_HI_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10<0x00c>; 1385defm V_MIN_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x00f>; 1386defm V_MAX_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x010>; 1387defm V_MIN_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x011>; 1388defm V_MAX_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x012>; 1389defm V_MIN_U32 : VOP2_Real_gfx6_gfx7_gfx10<0x013>; 1390defm V_MAX_U32 : VOP2_Real_gfx6_gfx7_gfx10<0x014>; 1391defm V_LSHRREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x016>; 1392defm V_ASHRREV_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x018>; 1393defm V_LSHLREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01a>; 1394defm V_AND_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01b>; 1395defm V_OR_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01c>; 1396defm V_XOR_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01d>; 1397defm V_MAC_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x01f>; 1398defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x02f>; 1399defm V_MADMK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x020>; 1400defm V_MADAK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x021>; 1401 1402//===----------------------------------------------------------------------===// 1403// GFX8, GFX9 (VI). 1404//===----------------------------------------------------------------------===// 1405 1406let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 1407 1408multiclass VOP2_Real_MADK_vi <bits<6> op> { 1409 def _vi : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>, 1410 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1411} 1412 1413multiclass VOP2_Real_e32_vi <bits<6> op> { 1414 def _e32_vi : 1415 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>, 1416 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 1417} 1418 1419multiclass VOP2_Real_e64_vi <bits<10> op> { 1420 def _e64_vi : 1421 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 1422 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1423} 1424 1425multiclass VOP2_Real_e64only_vi <bits<10> op> { 1426 def _e64_vi : 1427 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 1428 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 1429 let IsSingle = 1; 1430 } 1431} 1432 1433multiclass Base_VOP2_Real_e32e64_vi <bits<6> op> : 1434 VOP2_Real_e32_vi<op>, 1435 VOP2_Real_e64_vi<{0, 1, 0, 0, op{5-0}}>; 1436 1437} // End AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" 1438 1439multiclass VOP2_SDWA_Real <bits<6> op> { 1440 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA>.ret in 1441 def _sdwa_vi : 1442 VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 1443 VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1444} 1445 1446multiclass VOP2_SDWA9_Real <bits<6> op> { 1447 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 1448 def _sdwa_gfx9 : 1449 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 1450 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1451} 1452 1453let AssemblerPredicate = isGFX8Only in { 1454 1455multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName> { 1456 def _e32_vi : 1457 VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.VI>, 1458 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> { 1459 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32"); 1460 let AsmString = AsmName # ps.AsmOperands; 1461 let DecoderNamespace = "GFX8"; 1462 } 1463 def _e64_vi : 1464 VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.VI>, 1465 VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> { 1466 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64"); 1467 let AsmString = AsmName # ps.AsmOperands; 1468 let DecoderNamespace = "GFX8"; 1469 } 1470 foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA>.ret in 1471 def _sdwa_vi : 1472 VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>, 1473 VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> { 1474 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa"); 1475 let AsmString = AsmName # ps.AsmOperands; 1476 } 1477 foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP>.ret in 1478 def _dpp_vi : 1479 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.VI>, 1480 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> { 1481 VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp"); 1482 let AsmString = AsmName # ps.AsmOperands; 1483 } 1484} 1485} 1486 1487let AssemblerPredicate = isGFX9Only in { 1488 1489multiclass VOP2be_Real_e32e64_gfx9 <bits<6> op, string OpName, string AsmName> { 1490 def _e32_gfx9 : 1491 VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.GFX9>, 1492 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> { 1493 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32"); 1494 let AsmString = AsmName # ps.AsmOperands; 1495 let DecoderNamespace = "GFX9"; 1496 } 1497 def _e64_gfx9 : 1498 VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.GFX9>, 1499 VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> { 1500 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64"); 1501 let AsmString = AsmName # ps.AsmOperands; 1502 let DecoderNamespace = "GFX9"; 1503 } 1504 foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA9>.ret in 1505 def _sdwa_gfx9 : 1506 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>, 1507 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> { 1508 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa"); 1509 let AsmString = AsmName # ps.AsmOperands; 1510 } 1511 foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP>.ret in 1512 def _dpp_gfx9 : 1513 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.GFX9>, 1514 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> { 1515 VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp"); 1516 let AsmString = AsmName # ps.AsmOperands; 1517 let DecoderNamespace = "SDWA9"; 1518 } 1519} 1520 1521multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> { 1522 def _e32_gfx9 : 1523 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX9>, 1524 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>{ 1525 let DecoderNamespace = "GFX9"; 1526 } 1527 def _e64_gfx9 : 1528 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>, 1529 VOP3e_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 1530 let DecoderNamespace = "GFX9"; 1531 } 1532 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 1533 def _sdwa_gfx9 : 1534 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 1535 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 1536 } 1537 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 1538 def _dpp_gfx9 : 1539 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, 1540 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> { 1541 let DecoderNamespace = "SDWA9"; 1542 } 1543} 1544 1545} // AssemblerPredicate = isGFX9Only 1546 1547multiclass VOP2_Real_e32e64_vi <bits<6> op> : 1548 Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA_Real<op>, VOP2_SDWA9_Real<op> { 1549 1550 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 1551 def _dpp_vi : 1552 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>, 1553 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>; 1554} 1555 1556defm V_CNDMASK_B32 : VOP2_Real_e32e64_vi <0x0>; 1557defm V_ADD_F32 : VOP2_Real_e32e64_vi <0x1>; 1558defm V_SUB_F32 : VOP2_Real_e32e64_vi <0x2>; 1559defm V_SUBREV_F32 : VOP2_Real_e32e64_vi <0x3>; 1560let AssemblerPredicate = isGCN3ExcludingGFX90A in 1561defm V_MUL_LEGACY_F32 : VOP2_Real_e32e64_vi <0x4>; 1562defm V_MUL_F32 : VOP2_Real_e32e64_vi <0x5>; 1563defm V_MUL_I32_I24 : VOP2_Real_e32e64_vi <0x6>; 1564defm V_MUL_HI_I32_I24 : VOP2_Real_e32e64_vi <0x7>; 1565defm V_MUL_U32_U24 : VOP2_Real_e32e64_vi <0x8>; 1566defm V_MUL_HI_U32_U24 : VOP2_Real_e32e64_vi <0x9>; 1567defm V_MIN_F32 : VOP2_Real_e32e64_vi <0xa>; 1568defm V_MAX_F32 : VOP2_Real_e32e64_vi <0xb>; 1569defm V_MIN_I32 : VOP2_Real_e32e64_vi <0xc>; 1570defm V_MAX_I32 : VOP2_Real_e32e64_vi <0xd>; 1571defm V_MIN_U32 : VOP2_Real_e32e64_vi <0xe>; 1572defm V_MAX_U32 : VOP2_Real_e32e64_vi <0xf>; 1573defm V_LSHRREV_B32 : VOP2_Real_e32e64_vi <0x10>; 1574defm V_ASHRREV_I32 : VOP2_Real_e32e64_vi <0x11>; 1575defm V_LSHLREV_B32 : VOP2_Real_e32e64_vi <0x12>; 1576defm V_AND_B32 : VOP2_Real_e32e64_vi <0x13>; 1577defm V_OR_B32 : VOP2_Real_e32e64_vi <0x14>; 1578defm V_XOR_B32 : VOP2_Real_e32e64_vi <0x15>; 1579defm V_MAC_F32 : VOP2_Real_e32e64_vi <0x16>; 1580defm V_MADMK_F32 : VOP2_Real_MADK_vi <0x17>; 1581defm V_MADAK_F32 : VOP2_Real_MADK_vi <0x18>; 1582 1583defm V_ADD_U32 : VOP2be_Real_e32e64_vi_only <0x19, "V_ADD_CO_U32", "v_add_u32">; 1584defm V_SUB_U32 : VOP2be_Real_e32e64_vi_only <0x1a, "V_SUB_CO_U32", "v_sub_u32">; 1585defm V_SUBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1b, "V_SUBREV_CO_U32", "v_subrev_u32">; 1586defm V_ADDC_U32 : VOP2be_Real_e32e64_vi_only <0x1c, "V_ADDC_U32", "v_addc_u32">; 1587defm V_SUBB_U32 : VOP2be_Real_e32e64_vi_only <0x1d, "V_SUBB_U32", "v_subb_u32">; 1588defm V_SUBBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1e, "V_SUBBREV_U32", "v_subbrev_u32">; 1589 1590defm V_ADD_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x19, "V_ADD_CO_U32", "v_add_co_u32">; 1591defm V_SUB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1a, "V_SUB_CO_U32", "v_sub_co_u32">; 1592defm V_SUBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1b, "V_SUBREV_CO_U32", "v_subrev_co_u32">; 1593defm V_ADDC_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1c, "V_ADDC_U32", "v_addc_co_u32">; 1594defm V_SUBB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1d, "V_SUBB_U32", "v_subb_co_u32">; 1595defm V_SUBBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1e, "V_SUBBREV_U32", "v_subbrev_co_u32">; 1596 1597defm V_ADD_U32 : VOP2_Real_e32e64_gfx9 <0x34>; 1598defm V_SUB_U32 : VOP2_Real_e32e64_gfx9 <0x35>; 1599defm V_SUBREV_U32 : VOP2_Real_e32e64_gfx9 <0x36>; 1600 1601defm V_BFM_B32 : VOP2_Real_e64only_vi <0x293>; 1602defm V_BCNT_U32_B32 : VOP2_Real_e64only_vi <0x28b>; 1603defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64only_vi <0x28c>; 1604defm V_MBCNT_HI_U32_B32 : VOP2_Real_e64only_vi <0x28d>; 1605defm V_LDEXP_F32 : VOP2_Real_e64only_vi <0x288>; 1606defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64only_vi <0x1f0>; 1607defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64only_vi <0x294>; 1608defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64only_vi <0x295>; 1609defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e64only_vi <0x296>; 1610defm V_CVT_PK_U16_U32 : VOP2_Real_e64only_vi <0x297>; 1611defm V_CVT_PK_I16_I32 : VOP2_Real_e64only_vi <0x298>; 1612 1613defm V_ADD_F16 : VOP2_Real_e32e64_vi <0x1f>; 1614defm V_SUB_F16 : VOP2_Real_e32e64_vi <0x20>; 1615defm V_SUBREV_F16 : VOP2_Real_e32e64_vi <0x21>; 1616defm V_MUL_F16 : VOP2_Real_e32e64_vi <0x22>; 1617defm V_MAC_F16 : VOP2_Real_e32e64_vi <0x23>; 1618defm V_MADMK_F16 : VOP2_Real_MADK_vi <0x24>; 1619defm V_MADAK_F16 : VOP2_Real_MADK_vi <0x25>; 1620defm V_ADD_U16 : VOP2_Real_e32e64_vi <0x26>; 1621defm V_SUB_U16 : VOP2_Real_e32e64_vi <0x27>; 1622defm V_SUBREV_U16 : VOP2_Real_e32e64_vi <0x28>; 1623defm V_MUL_LO_U16 : VOP2_Real_e32e64_vi <0x29>; 1624defm V_LSHLREV_B16 : VOP2_Real_e32e64_vi <0x2a>; 1625defm V_LSHRREV_B16 : VOP2_Real_e32e64_vi <0x2b>; 1626defm V_ASHRREV_I16 : VOP2_Real_e32e64_vi <0x2c>; 1627defm V_MAX_F16 : VOP2_Real_e32e64_vi <0x2d>; 1628defm V_MIN_F16 : VOP2_Real_e32e64_vi <0x2e>; 1629defm V_MAX_U16 : VOP2_Real_e32e64_vi <0x2f>; 1630defm V_MAX_I16 : VOP2_Real_e32e64_vi <0x30>; 1631defm V_MIN_U16 : VOP2_Real_e32e64_vi <0x31>; 1632defm V_MIN_I16 : VOP2_Real_e32e64_vi <0x32>; 1633defm V_LDEXP_F16 : VOP2_Real_e32e64_vi <0x33>; 1634 1635let SubtargetPredicate = isGFX8GFX9 in { 1636 1637// Aliases to simplify matching of floating-point instructions that 1638// are VOP2 on SI and VOP3 on VI. 1639class SI2_VI3Alias <string name, VOP3_Real inst> : InstAlias < 1640 name#" $dst, $src0, $src1", 1641 !if(inst.Pfl.HasOMod, 1642 (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0), 1643 (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0)) 1644>, PredicateControl { 1645 let UseInstAsmMatchConverter = 0; 1646 let AsmVariantName = AMDGPUAsmVariants.VOP3; 1647} 1648 1649def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>; 1650def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>; 1651def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>; 1652def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>; 1653def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>; 1654 1655defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_vi>; 1656 1657} // End SubtargetPredicate = isGFX8GFX9 1658 1659let SubtargetPredicate = isGFX9Only in { 1660 1661defm : VOP2bInstAliases<V_ADD_U32_e32, V_ADD_CO_U32_e32_gfx9, "v_add_co_u32">; 1662defm : VOP2bInstAliases<V_ADDC_U32_e32, V_ADDC_CO_U32_e32_gfx9, "v_addc_co_u32">; 1663defm : VOP2bInstAliases<V_SUB_U32_e32, V_SUB_CO_U32_e32_gfx9, "v_sub_co_u32">; 1664defm : VOP2bInstAliases<V_SUBB_U32_e32, V_SUBB_CO_U32_e32_gfx9, "v_subb_co_u32">; 1665defm : VOP2bInstAliases<V_SUBREV_U32_e32, V_SUBREV_CO_U32_e32_gfx9, "v_subrev_co_u32">; 1666defm : VOP2bInstAliases<V_SUBBREV_U32_e32, V_SUBBREV_CO_U32_e32_gfx9, "v_subbrev_co_u32">; 1667 1668} // End SubtargetPredicate = isGFX9Only 1669 1670let SubtargetPredicate = HasDLInsts in { 1671 1672defm V_FMAC_F32 : VOP2_Real_e32e64_vi <0x3b>; 1673defm V_XNOR_B32 : VOP2_Real_e32e64_vi <0x3d>; 1674 1675} // End SubtargetPredicate = HasDLInsts 1676 1677let AssemblerPredicate = isGFX90APlus, DecoderNamespace = "GFX90A" in { 1678 multiclass VOP2_Real_e32_gfx90a <bits<6> op> { 1679 def _e32_gfx90a : 1680 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX90A>, 1681 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 1682 } 1683 1684 multiclass VOP2_Real_e64_gfx90a <bits<10> op> { 1685 def _e64_gfx90a : 1686 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX90A>, 1687 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1688 } 1689 1690 multiclass Base_VOP2_Real_e32e64_gfx90a <bits<6> op> : 1691 VOP2_Real_e32_gfx90a<op>, 1692 VOP2_Real_e64_gfx90a<{0, 1, 0, 0, op{5-0}}>; 1693 1694 multiclass VOP2_Real_e32e64_gfx90a <bits<6> op> : 1695 Base_VOP2_Real_e32e64_gfx90a<op> { 1696 1697 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 1698 def _dpp_gfx90a : 1699 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX90A>, 1700 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> { 1701 let DecoderNamespace = "SDWA9"; 1702 } 1703 } 1704} // End AssemblerPredicate = isGFX90APlus, DecoderNamespace = "GFX90A" 1705 1706let SubtargetPredicate = isGFX90APlus in { 1707 defm V_FMAC_F64 : VOP2_Real_e32e64_gfx90a <0x4>; 1708 let IsSingle = 1 in { 1709 defm V_MUL_LEGACY_F32 : VOP2_Real_e64_gfx90a <0x2a1>; 1710 } 1711} // End SubtargetPredicate = isGFX90APlus 1712 1713multiclass VOP2_Real_DOT_ACC_gfx9<bits<6> op> : VOP2_Real_e32_vi<op> { 1714 def _dpp_vi : VOP2_DPP<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>; 1715} 1716 1717multiclass VOP2_Real_DOT_ACC_gfx10<bits<6> op> : 1718 VOP2_Real_e32_gfx10<op>, 1719 VOP2_Real_dpp_gfx10<op>, 1720 VOP2_Real_dpp8_gfx10<op>; 1721 1722let SubtargetPredicate = HasDot5Insts in { 1723 defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx9<0x37>; 1724 // NB: Opcode conflicts with V_DOT8C_I32_I4 1725 // This opcode exists in gfx 10.1* only 1726 defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx10<0x02>; 1727} 1728 1729let SubtargetPredicate = HasDot6Insts in { 1730 defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx9<0x39>; 1731 defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx10<0x0d>; 1732} 1733 1734let SubtargetPredicate = HasDot4Insts in { 1735 defm V_DOT2C_I32_I16 : VOP2_Real_DOT_ACC_gfx9<0x38>; 1736} 1737let SubtargetPredicate = HasDot3Insts in { 1738 defm V_DOT8C_I32_I4 : VOP2_Real_DOT_ACC_gfx9<0x3a>; 1739} 1740 1741let SubtargetPredicate = HasPkFmacF16Inst in { 1742defm V_PK_FMAC_F16 : VOP2_Real_e32_vi<0x3c>; 1743} // End SubtargetPredicate = HasPkFmacF16Inst 1744 1745let SubtargetPredicate = HasDot3Insts in { 1746 // NB: Opcode conflicts with V_DOT2C_F32_F16 1747 let DecoderNamespace = "GFX10_B" in 1748 defm V_DOT8C_I32_I4 : VOP2_Real_DOT_ACC_gfx10<0x02>; 1749} 1750