1//===-- VOP2Instructions.td - Vector Instruction Definitions --------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10// VOP2 Classes 11//===----------------------------------------------------------------------===// 12 13class VOP2e <bits<6> op, VOPProfile P> : Enc32 { 14 bits<8> vdst; 15 bits<9> src0; 16 bits<8> src1; 17 18 let Inst{8-0} = !if(P.HasSrc0, src0, 0); 19 let Inst{16-9} = !if(P.HasSrc1, src1, 0); 20 let Inst{24-17} = !if(P.EmitDst, vdst, 0); 21 let Inst{30-25} = op; 22 let Inst{31} = 0x0; //encoding 23} 24 25class VOP2_MADKe <bits<6> op, VOPProfile P> : Enc64 { 26 bits<8> vdst; 27 bits<9> src0; 28 bits<8> src1; 29 bits<32> imm; 30 31 let Inst{8-0} = !if(P.HasSrc0, src0, 0); 32 let Inst{16-9} = !if(P.HasSrc1, src1, 0); 33 let Inst{24-17} = !if(P.EmitDst, vdst, 0); 34 let Inst{30-25} = op; 35 let Inst{31} = 0x0; // encoding 36 let Inst{63-32} = imm; 37} 38 39class VOP2_SDWAe <bits<6> op, VOPProfile P> : VOP_SDWAe <P> { 40 bits<8> vdst; 41 bits<8> src1; 42 43 let Inst{8-0} = 0xf9; // sdwa 44 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 45 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 46 let Inst{30-25} = op; 47 let Inst{31} = 0x0; // encoding 48} 49 50class VOP2_SDWA9Ae <bits<6> op, VOPProfile P> : VOP_SDWA9Ae <P> { 51 bits<8> vdst; 52 bits<9> src1; 53 54 let Inst{8-0} = 0xf9; // sdwa 55 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 56 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 57 let Inst{30-25} = op; 58 let Inst{31} = 0x0; // encoding 59 let Inst{63} = !if(P.HasSrc1, src1{8}, 0); // src1_sgpr 60} 61 62class VOP2_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], string suffix = "_e32"> : 63 VOP_Pseudo <opName, suffix, P, P.Outs32, P.Ins32, "", pattern> { 64 65 let AsmOperands = P.Asm32; 66 67 let Size = 4; 68 let mayLoad = 0; 69 let mayStore = 0; 70 let hasSideEffects = 0; 71 72 let ReadsModeReg = !or(isFloatType<P.DstVT>.ret, isFloatType<P.Src0VT>.ret); 73 74 let mayRaiseFPException = ReadsModeReg; 75 76 let VOP2 = 1; 77 let VALU = 1; 78 let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]); 79 80 let AsmVariantName = AMDGPUAsmVariants.Default; 81} 82 83class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily> : 84 VOP_Real <ps>, 85 InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>, 86 SIMCInstr <ps.PseudoInstr, EncodingFamily> { 87 88 let VALU = 1; 89 let VOP2 = 1; 90 let isPseudo = 0; 91 let isCodeGenOnly = 0; 92 93 let Constraints = ps.Constraints; 94 let DisableEncoding = ps.DisableEncoding; 95 96 // copy relevant pseudo op flags 97 let SubtargetPredicate = ps.SubtargetPredicate; 98 let OtherPredicates = ps.OtherPredicates; 99 let AsmMatchConverter = ps.AsmMatchConverter; 100 let AsmVariantName = ps.AsmVariantName; 101 let Constraints = ps.Constraints; 102 let DisableEncoding = ps.DisableEncoding; 103 let TSFlags = ps.TSFlags; 104 let UseNamedOperandTable = ps.UseNamedOperandTable; 105 let Uses = ps.Uses; 106 let Defs = ps.Defs; 107 let SchedRW = ps.SchedRW; 108 let mayLoad = ps.mayLoad; 109 let mayStore = ps.mayStore; 110} 111 112class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 113 VOP_SDWA_Pseudo <OpName, P, pattern> { 114 let AsmMatchConverter = "cvtSdwaVOP2"; 115} 116 117class VOP2_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 118 VOP_DPP_Pseudo <OpName, P, pattern> { 119} 120 121 122class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { 123 list<dag> ret = !if(P.HasModifiers, 124 [(set P.DstVT:$vdst, 125 (node (P.Src0VT 126 !if(P.HasOMod, 127 (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), 128 (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))), 129 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], 130 [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]); 131} 132 133multiclass VOP2Inst_e32<string opName, 134 VOPProfile P, 135 SDPatternOperator node = null_frag, 136 string revOp = opName, 137 bit GFX9Renamed = 0> { 138 let renamedInGFX9 = GFX9Renamed in { 139 def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>, 140 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>; 141 } // End renamedInGFX9 = GFX9Renamed 142} 143 144multiclass VOP2Inst_e64<string opName, 145 VOPProfile P, 146 SDPatternOperator node = null_frag, 147 string revOp = opName, 148 bit GFX9Renamed = 0> { 149 let renamedInGFX9 = GFX9Renamed in { 150 def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>, 151 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; 152 } // End renamedInGFX9 = GFX9Renamed 153} 154 155multiclass VOP2Inst_sdwa<string opName, 156 VOPProfile P, 157 bit GFX9Renamed = 0> { 158 let renamedInGFX9 = GFX9Renamed in { 159 foreach _ = BoolToList<P.HasExtSDWA>.ret in 160 def _sdwa : VOP2_SDWA_Pseudo <opName, P>; 161 } // End renamedInGFX9 = GFX9Renamed 162} 163 164multiclass VOP2Inst<string opName, 165 VOPProfile P, 166 SDPatternOperator node = null_frag, 167 string revOp = opName, 168 bit GFX9Renamed = 0> : 169 VOP2Inst_e32<opName, P, node, revOp, GFX9Renamed>, 170 VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>, 171 VOP2Inst_sdwa<opName, P, GFX9Renamed> { 172 let renamedInGFX9 = GFX9Renamed in { 173 foreach _ = BoolToList<P.HasExtDPP>.ret in 174 def _dpp : VOP2_DPP_Pseudo <opName, P>; 175 } 176} 177 178multiclass VOP2bInst <string opName, 179 VOPProfile P, 180 SDPatternOperator node = null_frag, 181 string revOp = opName, 182 bit GFX9Renamed = 0, 183 bit useSGPRInput = !eq(P.NumSrcArgs, 3)> { 184 let renamedInGFX9 = GFX9Renamed in { 185 let SchedRW = [Write32Bit, WriteSALU] in { 186 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in { 187 def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>, 188 Commutable_REV<revOp#"_e32", !eq(revOp, opName)> { 189 let usesCustomInserter = true; 190 } 191 192 foreach _ = BoolToList<P.HasExtSDWA>.ret in 193 def _sdwa : VOP2_SDWA_Pseudo <opName, P> { 194 let AsmMatchConverter = "cvtSdwaVOP2b"; 195 } 196 foreach _ = BoolToList<P.HasExtDPP>.ret in 197 def _dpp : VOP2_DPP_Pseudo <opName, P>; 198 } 199 200 def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>, 201 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; 202 } 203 } 204} 205 206class VOP2bInstAlias <VOP2_Pseudo ps, Instruction inst, 207 string OpName, string opnd> : 208 InstAlias <OpName#" "#!subst("vcc", opnd, ps.Pfl.Asm32), 209 (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0, 210 ps.Pfl.Src1RC32:$src1)>, 211 PredicateControl { 212} 213 214multiclass VOP2bInstAliases<VOP2_Pseudo ps, VOP2_Real inst, string OpName> { 215 let WaveSizePredicate = isWave32 in { 216 def : VOP2bInstAlias<ps, inst, OpName, "vcc_lo">; 217 } 218 let WaveSizePredicate = isWave64 in { 219 def : VOP2bInstAlias<ps, inst, OpName, "vcc">; 220 } 221} 222 223multiclass VOP2eInst <string opName, 224 VOPProfile P, 225 SDPatternOperator node = null_frag, 226 string revOp = opName, 227 bit useSGPRInput = !eq(P.NumSrcArgs, 3)> { 228 229 let SchedRW = [Write32Bit] in { 230 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in { 231 def _e32 : VOP2_Pseudo <opName, P>, 232 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>; 233 234 foreach _ = BoolToList<P.HasExtSDWA>.ret in 235 def _sdwa : VOP2_SDWA_Pseudo <opName, P> { 236 let AsmMatchConverter = "cvtSdwaVOP2e"; 237 } 238 239 foreach _ = BoolToList<P.HasExtDPP>.ret in 240 def _dpp : VOP2_DPP_Pseudo <opName, P>; 241 } 242 243 def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>, 244 Commutable_REV<revOp#"_e64", !eq(revOp, opName)> { 245 let isReMaterializable = 1; 246 } 247 } 248} 249 250class VOP2eInstAlias <VOP2_Pseudo ps, Instruction inst, string opnd = ""> : 251 InstAlias <ps.OpName#" "#ps.Pfl.Asm32#", "#opnd, 252 (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0, 253 ps.Pfl.Src1RC32:$src1)>, PredicateControl; 254 255class VOP2e64InstAlias <VOP3_Pseudo ps, Instruction inst> : 256 InstAlias <ps.OpName#" "#ps.Pfl.Asm64, 257 (inst ps.Pfl.DstRC:$vdst, VOPDstS64orS32:$sdst, 258 ps.Pfl.Src0RC32:$src0, ps.Pfl.Src1RC32:$src1, clampmod:$clamp)>, 259 PredicateControl; 260 261multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> { 262 let WaveSizePredicate = isWave32 in { 263 def : VOP2eInstAlias<ps, inst, "vcc_lo">; 264 } 265 let WaveSizePredicate = isWave64 in { 266 def : VOP2eInstAlias<ps, inst, "vcc">; 267 } 268} 269 270class VOP_MADAK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> { 271 field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm); 272 field dag Ins32 = !if(!eq(vt.Size, 32), 273 (ins VSrc_f32_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm), 274 (ins VSrc_f16_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm)); 275 field string Asm32 = "$vdst, $src0, $src1, $imm"; 276 field bit HasExt = 0; 277 let IsSingle = 1; 278} 279 280def VOP_MADAK_F16 : VOP_MADAK <f16>; 281def VOP_MADAK_F32 : VOP_MADAK <f32>; 282 283class VOP_MADMK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> { 284 field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm); 285 field dag Ins32 = (ins VSrc_f32_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1); 286 field string Asm32 = "$vdst, $src0, $imm, $src1"; 287 field bit HasExt = 0; 288 let IsSingle = 1; 289} 290 291def VOP_MADMK_F16 : VOP_MADMK <f16>; 292def VOP_MADMK_F32 : VOP_MADMK <f32>; 293 294class getRegisterOperandForVT<ValueType VT> { 295 RegisterOperand ret = RegisterOperand<getVregSrcForVT<VT>.ret>; 296} 297 298// FIXME: Remove src2_modifiers. It isn't used, so is wasting memory 299// and processing time but it makes it easier to convert to mad. 300class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, vt0]> { 301 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT<Src2VT>.ret:$src2); 302 let Ins64 = getIns64<Src0RC64, Src1RC64, getRegisterOperandForVT<Src2VT>.ret, 3, 303 0, HasModifiers, HasModifiers, HasOMod, 304 Src0Mod, Src1Mod, Src2Mod>.ret; 305 let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 306 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 307 getVregSrcForVT<Src2VT>.ret:$src2, // stub argument 308 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 309 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 310 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 311 let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 312 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 313 getVregSrcForVT<Src2VT>.ret:$src2, // stub argument 314 dpp8:$dpp8, FI:$fi); 315 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 316 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, 317 getVregSrcForVT<Src2VT>.ret:$src2, // stub argument 318 clampmod:$clamp, omod:$omod, 319 dst_sel:$dst_sel, dst_unused:$dst_unused, 320 src0_sel:$src0_sel, src1_sel:$src1_sel); 321 let Asm32 = getAsm32<1, 2, vt0>.ret; 322 let Asm64 = getAsm64<1, 2, 0, HasModifiers, HasOMod, vt0>.ret; 323 let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt0>.ret; 324 let AsmDPP16 = getAsmDPP16<1, 2, HasModifiers, vt0>.ret; 325 let AsmDPP8 = getAsmDPP8<1, 2, 0, vt0>.ret; 326 let AsmSDWA = getAsmSDWA<1, 2, vt0>.ret; 327 let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt0>.ret; 328 let HasSrc2 = 0; 329 let HasSrc2Mods = 0; 330 331 let HasExt = 1; 332 let HasExtDPP = 1; 333 let HasExtSDWA = 1; 334 let HasExtSDWA9 = 0; 335 let TieRegDPP = "$src2"; 336} 337 338def VOP_MAC_F16 : VOP_MAC <f16>; 339def VOP_MAC_F32 : VOP_MAC <f32>; 340let HasExtDPP = 0 in 341def VOP_MAC_LEGACY_F32 : VOP_MAC <f32>; 342let HasExtSDWA = 0, HasExt64BitDPP = 1 in 343def VOP_MAC_F64 : VOP_MAC <f64>; 344 345class VOP_DOT_ACC<ValueType vt0, ValueType vt1> : VOP_MAC<vt0, vt1> { 346 let HasClamp = 0; 347 let HasExtSDWA = 0; 348 let HasOpSel = 0; 349 let IsPacked = 0; 350} 351 352def VOP_DOT_ACC_F32_V2F16 : VOP_DOT_ACC<f32, v2f16> { 353 let Src0ModDPP = FPVRegInputMods; 354 let Src1ModDPP = FPVRegInputMods; 355} 356 357def VOP_DOT_ACC_I32_I32 : VOP_DOT_ACC<i32, i32> { 358 let HasSrc0Mods = 1; 359 let HasSrc1Mods = 1; 360} 361 362// Write out to vcc or arbitrary SGPR. 363def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], 0, /*EnableClamp=*/1> { 364 let Asm32 = "$vdst, vcc, $src0, $src1"; 365 let Asm64 = "$vdst, $sdst, $src0, $src1$clamp"; 366 let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 367 let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 368 let AsmDPP = "$vdst, vcc, $src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 369 let AsmDPP8 = "$vdst, vcc, $src0, $src1 $dpp8$fi"; 370 let AsmDPP16 = AsmDPP#"$fi"; 371 let Outs32 = (outs DstRC:$vdst); 372 let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); 373} 374 375// Write out to vcc or arbitrary SGPR and read in from vcc or 376// arbitrary SGPR. 377def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], 0, /*EnableClamp=*/1> { 378 let Asm32 = "$vdst, vcc, $src0, $src1, vcc"; 379 let Asm64 = "$vdst, $sdst, $src0, $src1, $src2$clamp"; 380 let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 381 let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 382 let AsmDPP = "$vdst, vcc, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 383 let AsmDPP8 = "$vdst, vcc, $src0, $src1, vcc $dpp8$fi"; 384 let AsmDPP16 = AsmDPP#"$fi"; 385 let Outs32 = (outs DstRC:$vdst); 386 let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); 387 388 // Suppress src2 implied by type since the 32-bit encoding uses an 389 // implicit VCC use. 390 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); 391 392 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 393 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, 394 clampmod:$clamp, 395 dst_sel:$dst_sel, dst_unused:$dst_unused, 396 src0_sel:$src0_sel, src1_sel:$src1_sel); 397 398 let InsDPP = (ins DstRCDPP:$old, 399 Src0DPP:$src0, 400 Src1DPP:$src1, 401 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 402 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 403 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 404 405 let HasExt = 1; 406 let HasExtDPP = 1; 407 let HasExtSDWA = 1; 408 let HasExtSDWA9 = 1; 409} 410 411// Read in from vcc or arbitrary SGPR. 412def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableF32SrcMods=*/1> { 413 let Asm32 = "$vdst, $src0, $src1"; 414 let Asm64 = "$vdst, $src0_modifiers, $src1_modifiers, $src2"; 415 let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 416 let AsmSDWA9 = "$vdst, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 417 let AsmDPP = "$vdst, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 418 let AsmDPP8 = "$vdst, $src0, $src1, vcc $dpp8$fi"; 419 let AsmDPP16 = AsmDPP#"$fi"; 420 421 let Outs32 = (outs DstRC:$vdst); 422 let Outs64 = (outs DstRC:$vdst); 423 424 // Suppress src2 implied by type since the 32-bit encoding uses an 425 // implicit VCC use. 426 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); 427 428 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 429 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, 430 clampmod:$clamp, 431 dst_sel:$dst_sel, dst_unused:$dst_unused, 432 src0_sel:$src0_sel, src1_sel:$src1_sel); 433 434 let InsDPP = (ins DstRCDPP:$old, 435 Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 436 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 437 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 438 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 439 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 440 441 let HasExt = 1; 442 let HasExtDPP = 1; 443 let HasExtSDWA = 1; 444 let HasExtSDWA9 = 1; 445} 446 447def VOP_READLANE : VOPProfile<[i32, i32, i32]> { 448 let Outs32 = (outs SReg_32:$vdst); 449 let Outs64 = Outs32; 450 let Ins32 = (ins VRegOrLds_32:$src0, SCSrc_b32:$src1); 451 let Ins64 = Ins32; 452 let Asm32 = " $vdst, $src0, $src1"; 453 let Asm64 = Asm32; 454 455 let HasExt = 0; 456 let HasExtDPP = 0; 457 let HasExt64BitDPP = 0; 458 let HasExtSDWA = 0; 459 let HasExtSDWA9 = 0; 460} 461 462def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> { 463 let Outs32 = (outs VGPR_32:$vdst); 464 let Outs64 = Outs32; 465 let Ins32 = (ins SCSrc_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in); 466 let Ins64 = Ins32; 467 let Asm32 = " $vdst, $src0, $src1"; 468 let Asm64 = Asm32; 469 let HasSrc2 = 0; 470 let HasSrc2Mods = 0; 471 472 let HasExt = 0; 473 let HasExtDPP = 0; 474 let HasExt64BitDPP = 0; 475 let HasExtSDWA = 0; 476 let HasExtSDWA9 = 0; 477} 478 479//===----------------------------------------------------------------------===// 480// VOP2 Instructions 481//===----------------------------------------------------------------------===// 482 483defm V_CNDMASK_B32 : VOP2eInst <"v_cndmask_b32", VOP2e_I32_I32_I32_I1>; 484let SubtargetPredicate = HasMadMacF32Insts, isReMaterializable = 1 in 485def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>; 486 487let isCommutable = 1 in { 488let isReMaterializable = 1 in { 489defm V_ADD_F32 : VOP2Inst <"v_add_f32", VOP_F32_F32_F32, any_fadd>; 490defm V_SUB_F32 : VOP2Inst <"v_sub_f32", VOP_F32_F32_F32, any_fsub>; 491defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub_f32">; 492defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>; 493defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, any_fmul>; 494defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32_ARITH, AMDGPUmul_i24>; 495defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_I32_I32_I32, AMDGPUmulhi_i24>; 496defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32_ARITH, AMDGPUmul_u24>; 497defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>; 498defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum_like>; 499defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum_like>; 500defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smin>; 501defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smax>; 502defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>; 503defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umax>; 504defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, clshr_rev_32, "v_lshr_b32">; 505defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, cashr_rev_32, "v_ashr_i32">; 506defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, clshl_rev_32, "v_lshl_b32">; 507defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, and>; 508defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>; 509defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, xor>; 510} // End isReMaterializable = 1 511 512let mayRaiseFPException = 0 in { 513let OtherPredicates = [HasMadMacF32Insts] in { 514let Constraints = "$vdst = $src2", DisableEncoding="$src2", 515 isConvertibleToThreeAddress = 1 in { 516defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>; 517 518let SubtargetPredicate = isGFX6GFX7GFX10 in 519defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_MAC_LEGACY_F32>; 520} // End Constraints = "$vdst = $src2", DisableEncoding="$src2", 521 // isConvertibleToThreeAddress = 1 522 523let isReMaterializable = 1 in 524def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, []>; 525} // End OtherPredicates = [HasMadMacF32Insts] 526} // End mayRaiseFPException = 0 527 528// No patterns so that the scalar instructions are always selected. 529// The scalar versions will be replaced with vector when needed later. 530defm V_ADD_CO_U32 : VOP2bInst <"v_add_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_add_co_u32", 1>; 531defm V_SUB_CO_U32 : VOP2bInst <"v_sub_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32", 1>; 532defm V_SUBREV_CO_U32 : VOP2bInst <"v_subrev_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32", 1>; 533defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_addc_u32", 1>; 534defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>; 535defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>; 536 537 538let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1 in { 539defm V_ADD_U32 : VOP2Inst <"v_add_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_add_u32", 1>; 540defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>; 541defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>; 542} 543 544} // End isCommutable = 1 545 546// These are special and do not read the exec mask. 547let isConvergent = 1, Uses = []<Register> in { 548def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE, 549 [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))]>; 550 551let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in { 552def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, 553 [(set i32:$vdst, (int_amdgcn_writelane i32:$src0, i32:$src1, i32:$vdst_in))]>; 554} // End $vdst = $vdst_in, DisableEncoding $vdst_in 555} // End isConvergent = 1 556 557let isReMaterializable = 1 in { 558defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_NO_EXT<VOP_I32_I32_I32>>; 559defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, add_ctpop>; 560defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, int_amdgcn_mbcnt_lo>; 561defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, int_amdgcn_mbcnt_hi>; 562defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_NO_EXT<VOP_F32_F32_I32>, AMDGPUldexp>; 563 564let ReadsModeReg = 0, mayRaiseFPException = 0 in { 565defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_NO_EXT<VOP_V2I16_F32_F32>, AMDGPUpknorm_i16_f32>; 566defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_NO_EXT<VOP_V2I16_F32_F32>, AMDGPUpknorm_u16_f32>; 567} 568 569defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_NO_EXT<VOP_V2F16_F32_F32>, AMDGPUpkrtz_f16_f32>; 570defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_NO_EXT<VOP_V2I16_I32_I32>, AMDGPUpk_u16_u32>; 571defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT<VOP_V2I16_I32_I32>, AMDGPUpk_i16_i32>; 572 573 574let SubtargetPredicate = isGFX6GFX7 in { 575defm V_MIN_LEGACY_F32 : VOP2Inst <"v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy>; 576defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>; 577} // End SubtargetPredicate = isGFX6GFX7 578 579let isCommutable = 1 in { 580let SubtargetPredicate = isGFX6GFX7 in { 581defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, csrl_32>; 582defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, csra_32>; 583defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, cshl_32>; 584} // End SubtargetPredicate = isGFX6GFX7 585} // End isCommutable = 1 586} // End isReMaterializable = 1 587 588defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_NO_EXT<VOP_I32_F32_I32>>; // TODO: set "Uses = dst" 589 590class DivergentBinOp<SDPatternOperator Op, VOP_Pseudo Inst> : 591 GCNPat< 592 (getDivergentFrag<Op>.ret Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1), 593 !if(!cast<Commutable_REV>(Inst).IsOrig, 594 (Inst $src0, $src1), 595 (Inst $src1, $src0) 596 ) 597 >; 598 599class DivergentClampingBinOp<SDPatternOperator Op, VOP_Pseudo Inst> : 600 GCNPat< 601 (getDivergentFrag<Op>.ret Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1), 602 !if(!cast<Commutable_REV>(Inst).IsOrig, 603 (Inst $src0, $src1, 0), 604 (Inst $src1, $src0, 0) 605 ) 606 >; 607 608def : DivergentBinOp<csrl_32, V_LSHRREV_B32_e64>; 609def : DivergentBinOp<csra_32, V_ASHRREV_I32_e64>; 610def : DivergentBinOp<cshl_32, V_LSHLREV_B32_e64>; 611 612let SubtargetPredicate = HasAddNoCarryInsts in { 613 def : DivergentClampingBinOp<add, V_ADD_U32_e64>; 614 def : DivergentClampingBinOp<sub, V_SUB_U32_e64>; 615} 616 617let SubtargetPredicate = isGFX6GFX7GFX8GFX9, Predicates = [isGFX6GFX7GFX8GFX9] in { 618def : DivergentClampingBinOp<add, V_ADD_CO_U32_e64>; 619def : DivergentClampingBinOp<sub, V_SUB_CO_U32_e64>; 620} 621 622def : DivergentBinOp<adde, V_ADDC_U32_e32>; 623def : DivergentBinOp<sube, V_SUBB_U32_e32>; 624 625class divergent_i64_BinOp <SDPatternOperator Op, Instruction Inst> : 626 GCNPat< 627 (getDivergentFrag<Op>.ret i64:$src0, i64:$src1), 628 (REG_SEQUENCE VReg_64, 629 (Inst 630 (i32 (EXTRACT_SUBREG $src0, sub0)), 631 (i32 (EXTRACT_SUBREG $src1, sub0)) 632 ), sub0, 633 (Inst 634 (i32 (EXTRACT_SUBREG $src0, sub1)), 635 (i32 (EXTRACT_SUBREG $src1, sub1)) 636 ), sub1 637 ) 638 >; 639 640def : divergent_i64_BinOp <and, V_AND_B32_e64>; 641def : divergent_i64_BinOp <or, V_OR_B32_e64>; 642def : divergent_i64_BinOp <xor, V_XOR_B32_e64>; 643 644let SubtargetPredicate = Has16BitInsts in { 645 646let FPDPRounding = 1 in { 647def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">; 648defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I32, AMDGPUldexp>; 649} // End FPDPRounding = 1 650 651defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16, clshl_rev_16>; 652defm V_LSHRREV_B16 : VOP2Inst <"v_lshrrev_b16", VOP_I16_I16_I16, clshr_rev_16>; 653defm V_ASHRREV_I16 : VOP2Inst <"v_ashrrev_i16", VOP_I16_I16_I16, cashr_rev_16>; 654 655let isCommutable = 1 in { 656let FPDPRounding = 1 in { 657defm V_ADD_F16 : VOP2Inst <"v_add_f16", VOP_F16_F16_F16, any_fadd>; 658defm V_SUB_F16 : VOP2Inst <"v_sub_f16", VOP_F16_F16_F16, any_fsub>; 659defm V_SUBREV_F16 : VOP2Inst <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">; 660defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16, any_fmul>; 661 662let mayRaiseFPException = 0 in { 663def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">; 664} 665 666} // End FPDPRounding = 1 667defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16_ARITH, add>; 668defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16_ARITH, sub>; 669defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16_ARITH, null_frag, "v_sub_u16">; 670defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16, mul>; 671defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>; 672defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16, fminnum_like>; 673defm V_MAX_U16 : VOP2Inst <"v_max_u16", VOP_I16_I16_I16, umax>; 674defm V_MAX_I16 : VOP2Inst <"v_max_i16", VOP_I16_I16_I16, smax>; 675defm V_MIN_U16 : VOP2Inst <"v_min_u16", VOP_I16_I16_I16, umin>; 676defm V_MIN_I16 : VOP2Inst <"v_min_i16", VOP_I16_I16_I16, smin>; 677 678let Constraints = "$vdst = $src2", DisableEncoding="$src2", 679 isConvertibleToThreeAddress = 1 in { 680defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>; 681} 682} // End isCommutable = 1 683 684} // End SubtargetPredicate = Has16BitInsts 685 686let SubtargetPredicate = HasDLInsts in { 687 688let isReMaterializable = 1 in 689defm V_XNOR_B32 : VOP2Inst <"v_xnor_b32", VOP_I32_I32_I32, xnor>; 690 691def : GCNPat< 692 (i32 (DivergentUnaryFrag<not> (xor_oneuse i32:$src0, i32:$src1))), 693 (i32 (V_XNOR_B32_e64 $src0, $src1)) 694>; 695 696def : GCNPat< 697 (i32 (DivergentBinFrag<xor_oneuse> (not i32:$src0), i32:$src1)), 698 (i32 (V_XNOR_B32_e64 $src0, $src1)) 699>; 700 701def : GCNPat< 702 (i64 (DivergentUnaryFrag<not> (xor_oneuse i64:$src0, i64:$src1))), 703 (REG_SEQUENCE VReg_64, (i32 (V_XNOR_B32_e64 704 (i32 (EXTRACT_SUBREG $src0, sub0)), 705 (i32 (EXTRACT_SUBREG $src1, sub0)))), sub0, 706 (i32 (V_XNOR_B32_e64 707 (i32 (EXTRACT_SUBREG $src0, sub1)), 708 (i32 (EXTRACT_SUBREG $src1, sub1)))), sub1) 709>; 710 711def : GCNPat< 712 (i64 (DivergentBinFrag<xor_oneuse> (not i64:$src0), i64:$src1)), 713 (REG_SEQUENCE VReg_64, (i32 (V_XNOR_B32_e64 714 (i32 (EXTRACT_SUBREG $src0, sub0)), 715 (i32 (EXTRACT_SUBREG $src1, sub0)))), sub0, 716 (i32 (V_XNOR_B32_e64 717 (i32 (EXTRACT_SUBREG $src0, sub1)), 718 (i32 (EXTRACT_SUBREG $src1, sub1)))), sub1) 719>; 720 721let Constraints = "$vdst = $src2", 722 DisableEncoding = "$src2", 723 isConvertibleToThreeAddress = 1, 724 isCommutable = 1 in 725defm V_FMAC_F32 : VOP2Inst <"v_fmac_f32", VOP_MAC_F32>; 726 727} // End SubtargetPredicate = HasDLInsts 728 729let SubtargetPredicate = HasFmaLegacy32 in { 730 731let Constraints = "$vdst = $src2", 732 DisableEncoding = "$src2", 733 isConvertibleToThreeAddress = 1, 734 isCommutable = 1 in 735defm V_FMAC_LEGACY_F32 : VOP2Inst <"v_fmac_legacy_f32", VOP_MAC_LEGACY_F32>; 736 737} // End SubtargetPredicate = HasFmaLegacy32 738 739let SubtargetPredicate = isGFX90APlus, 740 Constraints = "$vdst = $src2", 741 DisableEncoding="$src2", 742 isConvertibleToThreeAddress = 1, 743 isCommutable = 1, 744 SchedRW = [WriteDoubleAdd] in 745defm V_FMAC_F64 : VOP2Inst <"v_fmac_f64", VOP_MAC_F64>; 746 747let Constraints = "$vdst = $src2", 748 DisableEncoding="$src2", 749 isConvertibleToThreeAddress = 1, 750 isCommutable = 1, 751 IsDOT = 1 in { 752 let SubtargetPredicate = HasDot5Insts in 753 defm V_DOT2C_F32_F16 : VOP2Inst<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16>; 754 let SubtargetPredicate = HasDot6Insts in 755 defm V_DOT4C_I32_I8 : VOP2Inst<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>; 756 757 let SubtargetPredicate = HasDot4Insts in 758 defm V_DOT2C_I32_I16 : VOP2Inst<"v_dot2c_i32_i16", VOP_DOT_ACC_I32_I32>; 759 let SubtargetPredicate = HasDot3Insts in 760 defm V_DOT8C_I32_I4 : VOP2Inst<"v_dot8c_i32_i4", VOP_DOT_ACC_I32_I32>; 761} 762 763let AddedComplexity = 30 in { 764 def : GCNPat< 765 (f32 (AMDGPUfdot2 v2f16:$src0, v2f16:$src1, f32:$src2, (i1 DSTCLAMP.NONE))), 766 (f32 (V_DOT2C_F32_F16_e32 $src0, $src1, $src2)) 767 > { 768 let SubtargetPredicate = HasDot5Insts; 769 } 770 def : GCNPat< 771 (i32 (int_amdgcn_sdot4 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 772 (i32 (V_DOT4C_I32_I8_e32 $src0, $src1, $src2)) 773 > { 774 let SubtargetPredicate = HasDot6Insts; 775 } 776 def : GCNPat< 777 (i32 (int_amdgcn_sdot2 v2i16:$src0, v2i16:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 778 (i32 (V_DOT2C_I32_I16_e32 $src0, $src1, $src2)) 779 > { 780 let SubtargetPredicate = HasDot4Insts; 781 } 782 def : GCNPat< 783 (i32 (int_amdgcn_sdot8 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 784 (i32 (V_DOT8C_I32_I4_e32 $src0, $src1, $src2)) 785 > { 786 let SubtargetPredicate = HasDot3Insts; 787 } 788} // End AddedComplexity = 30 789 790let SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1 in { 791def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">; 792 793let isCommutable = 1 in 794def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">; 795} 796 797let SubtargetPredicate = isGFX10Plus in { 798 799let FPDPRounding = 1 in { 800def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">; 801 802let isCommutable = 1 in 803def V_FMAAK_F16 : VOP2_Pseudo <"v_fmaak_f16", VOP_MADAK_F16, [], "">; 804} // End FPDPRounding = 1 805 806let Constraints = "$vdst = $src2", 807 DisableEncoding="$src2", 808 isConvertibleToThreeAddress = 1, 809 isCommutable = 1 in { 810defm V_FMAC_F16 : VOP2Inst <"v_fmac_f16", VOP_MAC_F16>; 811} 812 813} // End SubtargetPredicate = isGFX10Plus 814 815let SubtargetPredicate = HasPkFmacF16Inst in { 816defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>; 817} // End SubtargetPredicate = HasPkFmacF16Inst 818 819// Note: 16-bit instructions produce a 0 result in the high 16-bits 820// on GFX8 and GFX9 and preserve high 16 bits on GFX10+ 821multiclass Arithmetic_i16_0Hi_Pats <SDPatternOperator op, Instruction inst> { 822 823def : GCNPat< 824 (i32 (zext (op i16:$src0, i16:$src1))), 825 (inst VSrc_b16:$src0, VSrc_b16:$src1) 826>; 827 828def : GCNPat< 829 (i64 (zext (op i16:$src0, i16:$src1))), 830 (REG_SEQUENCE VReg_64, 831 (inst $src0, $src1), sub0, 832 (V_MOV_B32_e32 (i32 0)), sub1) 833>; 834} 835 836class ZExt_i16_i1_Pat <SDNode ext> : GCNPat < 837 (i16 (ext i1:$src)), 838 (V_CNDMASK_B32_e64 (i32 0/*src0mod*/), (i32 0/*src0*/), 839 (i32 0/*src1mod*/), (i32 1/*src1*/), 840 $src) 841>; 842 843foreach vt = [i16, v2i16] in { 844def : GCNPat < 845 (and vt:$src0, vt:$src1), 846 (V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 847>; 848 849def : GCNPat < 850 (or vt:$src0, vt:$src1), 851 (V_OR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 852>; 853 854def : GCNPat < 855 (xor vt:$src0, vt:$src1), 856 (V_XOR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 857>; 858} 859 860let Predicates = [Has16BitInsts] in { 861 862// Undo sub x, c -> add x, -c canonicalization since c is more likely 863// an inline immediate than -c. 864// TODO: Also do for 64-bit. 865def : GCNPat< 866 (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)), 867 (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1) 868>; 869 870 871let Predicates = [Has16BitInsts, isGFX7GFX8GFX9] in { 872 873def : GCNPat< 874 (i32 (zext (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)))), 875 (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1) 876>; 877 878defm : Arithmetic_i16_0Hi_Pats<add, V_ADD_U16_e64>; 879defm : Arithmetic_i16_0Hi_Pats<mul, V_MUL_LO_U16_e64>; 880defm : Arithmetic_i16_0Hi_Pats<sub, V_SUB_U16_e64>; 881defm : Arithmetic_i16_0Hi_Pats<smin, V_MIN_I16_e64>; 882defm : Arithmetic_i16_0Hi_Pats<smax, V_MAX_I16_e64>; 883defm : Arithmetic_i16_0Hi_Pats<umin, V_MIN_U16_e64>; 884defm : Arithmetic_i16_0Hi_Pats<umax, V_MAX_U16_e64>; 885defm : Arithmetic_i16_0Hi_Pats<clshl_rev_16, V_LSHLREV_B16_e64>; 886defm : Arithmetic_i16_0Hi_Pats<clshr_rev_16, V_LSHRREV_B16_e64>; 887defm : Arithmetic_i16_0Hi_Pats<cashr_rev_16, V_ASHRREV_I16_e64>; 888} // End Predicates = [Has16BitInsts, isGFX7GFX8GFX9] 889 890def : ZExt_i16_i1_Pat<zext>; 891def : ZExt_i16_i1_Pat<anyext>; 892 893def : GCNPat < 894 (i16 (sext i1:$src)), 895 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), 896 /*src1mod*/(i32 0), /*src1*/(i32 -1), $src) 897>; 898 899} // End Predicates = [Has16BitInsts] 900 901 902let SubtargetPredicate = HasIntClamp in { 903// Set clamp bit for saturation. 904def : VOPBinOpClampPat<uaddsat, V_ADD_CO_U32_e64, i32>; 905def : VOPBinOpClampPat<usubsat, V_SUB_CO_U32_e64, i32>; 906} 907 908let SubtargetPredicate = HasAddNoCarryInsts, OtherPredicates = [HasIntClamp] in { 909let AddedComplexity = 1 in { // Prefer over form with carry-out. 910def : VOPBinOpClampPat<uaddsat, V_ADD_U32_e64, i32>; 911def : VOPBinOpClampPat<usubsat, V_SUB_U32_e64, i32>; 912} 913} 914 915let SubtargetPredicate = Has16BitInsts, OtherPredicates = [HasIntClamp] in { 916def : VOPBinOpClampPat<uaddsat, V_ADD_U16_e64, i16>; 917def : VOPBinOpClampPat<usubsat, V_SUB_U16_e64, i16>; 918} 919 920//===----------------------------------------------------------------------===// 921// Target-specific instruction encodings. 922//===----------------------------------------------------------------------===// 923 924class VOP2_DPP<bits<6> op, VOP2_DPP_Pseudo ps, 925 string opName = ps.OpName, VOPProfile p = ps.Pfl, 926 bit IsDPP16 = 0> : 927 VOP_DPP<opName, p, IsDPP16> { 928 let hasSideEffects = ps.hasSideEffects; 929 let Defs = ps.Defs; 930 let SchedRW = ps.SchedRW; 931 let Uses = ps.Uses; 932 933 bits<8> vdst; 934 bits<8> src1; 935 let Inst{8-0} = 0xfa; 936 let Inst{16-9} = !if(p.HasSrc1, src1{7-0}, 0); 937 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 938 let Inst{30-25} = op; 939 let Inst{31} = 0x0; 940} 941 942class Base_VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, 943 string opName = ps.OpName, VOPProfile p = ps.Pfl> : 944 VOP2_DPP<op, ps, opName, p, 1> { 945 let AssemblerPredicate = HasDPP16; 946 let SubtargetPredicate = HasDPP16; 947 let OtherPredicates = ps.OtherPredicates; 948} 949 950class VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, 951 string opName = ps.OpName, VOPProfile p = ps.Pfl> : 952 Base_VOP2_DPP16<op, ps, opName, p>, 953 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX10>; 954 955class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps, 956 VOPProfile p = ps.Pfl> : 957 VOP_DPP8<ps.OpName, p> { 958 let hasSideEffects = ps.hasSideEffects; 959 let Defs = ps.Defs; 960 let SchedRW = ps.SchedRW; 961 let Uses = ps.Uses; 962 963 bits<8> vdst; 964 bits<8> src1; 965 966 let Inst{8-0} = fi; 967 let Inst{16-9} = !if(p.HasSrc1, src1{7-0}, 0); 968 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 969 let Inst{30-25} = op; 970 let Inst{31} = 0x0; 971 972 let OtherPredicates = ps.OtherPredicates; 973} 974 975//===----------------------------------------------------------------------===// 976// GFX10. 977//===----------------------------------------------------------------------===// 978 979let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { 980 //===------------------------------- VOP2 -------------------------------===// 981 multiclass VOP2Only_Real_MADK_gfx10<bits<6> op> { 982 def _gfx10 : 983 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX10>, 984 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 985 } 986 multiclass VOP2Only_Real_MADK_gfx10_with_name<bits<6> op, string opName, 987 string asmName> { 988 def _gfx10 : 989 VOP2_Real<!cast<VOP2_Pseudo>(opName), SIEncodingFamily.GFX10>, 990 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(opName).Pfl> { 991 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName); 992 let AsmString = asmName # ps.AsmOperands; 993 } 994 } 995 multiclass VOP2_Real_e32_gfx10<bits<6> op> { 996 def _e32_gfx10 : 997 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>, 998 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 999 } 1000 multiclass VOP2_Real_e64_gfx10<bits<6> op> { 1001 def _e64_gfx10 : 1002 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 1003 VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1004 } 1005 multiclass VOP2_Real_sdwa_gfx10<bits<6> op> { 1006 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 1007 def _sdwa_gfx10 : 1008 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 1009 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 1010 let DecoderNamespace = "SDWA10"; 1011 } 1012 } 1013 multiclass VOP2_Real_dpp_gfx10<bits<6> op> { 1014 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 1015 def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> { 1016 let DecoderNamespace = "SDWA10"; 1017 } 1018 } 1019 multiclass VOP2_Real_dpp8_gfx10<bits<6> op> { 1020 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 1021 def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(NAME#"_e32")> { 1022 let DecoderNamespace = "DPP8"; 1023 } 1024 } 1025 1026 //===------------------------- VOP2 (with name) -------------------------===// 1027 multiclass VOP2_Real_e32_gfx10_with_name<bits<6> op, string opName, 1028 string asmName> { 1029 def _e32_gfx10 : 1030 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>, 1031 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> { 1032 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1033 let AsmString = asmName # ps.AsmOperands; 1034 } 1035 } 1036 multiclass VOP2_Real_e64_gfx10_with_name<bits<6> op, string opName, 1037 string asmName> { 1038 def _e64_gfx10 : 1039 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>, 1040 VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, 1041 !cast<VOP3_Pseudo>(opName#"_e64").Pfl> { 1042 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName#"_e64"); 1043 let AsmString = asmName # ps.AsmOperands; 1044 } 1045 } 1046 let DecoderNamespace = "SDWA10" in { 1047 multiclass VOP2_Real_sdwa_gfx10_with_name<bits<6> op, string opName, 1048 string asmName> { 1049 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in 1050 def _sdwa_gfx10 : 1051 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1052 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1053 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1054 let AsmString = asmName # ps.AsmOperands; 1055 } 1056 } 1057 multiclass VOP2_Real_dpp_gfx10_with_name<bits<6> op, string opName, 1058 string asmName> { 1059 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1060 def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp")> { 1061 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1062 let AsmString = asmName # ps.Pfl.AsmDPP16; 1063 } 1064 } 1065 multiclass VOP2_Real_dpp8_gfx10_with_name<bits<6> op, string opName, 1066 string asmName> { 1067 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1068 def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1069 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1070 let AsmString = asmName # ps.Pfl.AsmDPP8; 1071 let DecoderNamespace = "DPP8"; 1072 } 1073 } 1074 } // End DecoderNamespace = "SDWA10" 1075 1076 //===------------------------------ VOP2be ------------------------------===// 1077 multiclass VOP2be_Real_e32_gfx10<bits<6> op, string opName, string asmName> { 1078 def _e32_gfx10 : 1079 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>, 1080 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> { 1081 VOP2_Pseudo Ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1082 let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); 1083 } 1084 } 1085 multiclass VOP2be_Real_e64_gfx10<bits<6> op, string opName, string asmName> { 1086 def _e64_gfx10 : 1087 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>, 1088 VOP3be_gfx10<{0, 1, 0, 0, op{5-0}}, 1089 !cast<VOP3_Pseudo>(opName#"_e64").Pfl> { 1090 VOP3_Pseudo Ps = !cast<VOP3_Pseudo>(opName#"_e64"); 1091 let AsmString = asmName # Ps.AsmOperands; 1092 } 1093 } 1094 multiclass VOP2be_Real_sdwa_gfx10<bits<6> op, string opName, string asmName> { 1095 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in 1096 def _sdwa_gfx10 : 1097 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1098 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1099 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1100 let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); 1101 let DecoderNamespace = "SDWA10"; 1102 } 1103 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in 1104 def _sdwa_w32_gfx10 : 1105 Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1106 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1107 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1108 let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands); 1109 let isAsmParserOnly = 1; 1110 let DecoderNamespace = "SDWA10"; 1111 let WaveSizePredicate = isWave32; 1112 } 1113 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in 1114 def _sdwa_w64_gfx10 : 1115 Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1116 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1117 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1118 let AsmString = asmName # Ps.AsmOperands; 1119 let isAsmParserOnly = 1; 1120 let DecoderNamespace = "SDWA10"; 1121 let WaveSizePredicate = isWave64; 1122 } 1123 } 1124 multiclass VOP2be_Real_dpp_gfx10<bits<6> op, string opName, string asmName> { 1125 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1126 def _dpp_gfx10 : 1127 VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1128 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1129 let AsmString = asmName # !subst(", vcc", "", AsmDPP); 1130 let DecoderNamespace = "SDWA10"; 1131 } 1132 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1133 def _dpp_w32_gfx10 : 1134 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1135 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1136 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP); 1137 let isAsmParserOnly = 1; 1138 let WaveSizePredicate = isWave32; 1139 } 1140 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1141 def _dpp_w64_gfx10 : 1142 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1143 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1144 let AsmString = asmName # AsmDPP; 1145 let isAsmParserOnly = 1; 1146 let WaveSizePredicate = isWave64; 1147 } 1148 } 1149 multiclass VOP2be_Real_dpp8_gfx10<bits<6> op, string opName, string asmName> { 1150 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1151 def _dpp8_gfx10 : 1152 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1153 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1154 let AsmString = asmName # !subst(", vcc", "", AsmDPP8); 1155 let DecoderNamespace = "DPP8"; 1156 } 1157 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1158 def _dpp8_w32_gfx10 : 1159 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1160 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1161 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8); 1162 let isAsmParserOnly = 1; 1163 let WaveSizePredicate = isWave32; 1164 } 1165 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1166 def _dpp8_w64_gfx10 : 1167 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1168 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1169 let AsmString = asmName # AsmDPP8; 1170 let isAsmParserOnly = 1; 1171 let WaveSizePredicate = isWave64; 1172 } 1173 } 1174 1175 //===----------------------------- VOP3Only -----------------------------===// 1176 multiclass VOP3Only_Real_gfx10<bits<10> op> { 1177 def _e64_gfx10 : 1178 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 1179 VOP3e_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 1180 let IsSingle = 1; 1181 } 1182 } 1183 1184 //===---------------------------- VOP3beOnly ----------------------------===// 1185 multiclass VOP3beOnly_Real_gfx10<bits<10> op> { 1186 def _e64_gfx10 : 1187 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 1188 VOP3be_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 1189 let IsSingle = 1; 1190 } 1191 } 1192} // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" 1193 1194multiclass VOP2be_Real_gfx10<bits<6> op, string opName, string asmName> : 1195 VOP2be_Real_e32_gfx10<op, opName, asmName>, 1196 VOP2be_Real_e64_gfx10<op, opName, asmName>, 1197 VOP2be_Real_sdwa_gfx10<op, opName, asmName>, 1198 VOP2be_Real_dpp_gfx10<op, opName, asmName>, 1199 VOP2be_Real_dpp8_gfx10<op, opName, asmName>; 1200 1201multiclass VOP2e_Real_gfx10<bits<6> op, string opName, string asmName> : 1202 VOP2_Real_e32_gfx10<op>, 1203 VOP2_Real_e64_gfx10<op>, 1204 VOP2be_Real_sdwa_gfx10<op, opName, asmName>, 1205 VOP2be_Real_dpp_gfx10<op, opName, asmName>, 1206 VOP2be_Real_dpp8_gfx10<op, opName, asmName>; 1207 1208multiclass VOP2_Real_gfx10<bits<6> op> : 1209 VOP2_Real_e32_gfx10<op>, VOP2_Real_e64_gfx10<op>, 1210 VOP2_Real_sdwa_gfx10<op>, VOP2_Real_dpp_gfx10<op>, VOP2_Real_dpp8_gfx10<op>; 1211 1212multiclass VOP2_Real_gfx10_with_name<bits<6> op, string opName, 1213 string asmName> : 1214 VOP2_Real_e32_gfx10_with_name<op, opName, asmName>, 1215 VOP2_Real_e64_gfx10_with_name<op, opName, asmName>, 1216 VOP2_Real_sdwa_gfx10_with_name<op, opName, asmName>, 1217 VOP2_Real_dpp_gfx10_with_name<op, opName, asmName>, 1218 VOP2_Real_dpp8_gfx10_with_name<op, opName, asmName>; 1219 1220// NB: Same opcode as v_mac_legacy_f32 1221let DecoderNamespace = "GFX10_B" in 1222defm V_FMAC_LEGACY_F32 : VOP2_Real_gfx10<0x006>; 1223 1224defm V_XNOR_B32 : VOP2_Real_gfx10<0x01e>; 1225defm V_FMAC_F32 : VOP2_Real_gfx10<0x02b>; 1226defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10<0x02c>; 1227defm V_FMAAK_F32 : VOP2Only_Real_MADK_gfx10<0x02d>; 1228defm V_ADD_F16 : VOP2_Real_gfx10<0x032>; 1229defm V_SUB_F16 : VOP2_Real_gfx10<0x033>; 1230defm V_SUBREV_F16 : VOP2_Real_gfx10<0x034>; 1231defm V_MUL_F16 : VOP2_Real_gfx10<0x035>; 1232defm V_FMAC_F16 : VOP2_Real_gfx10<0x036>; 1233defm V_FMAMK_F16 : VOP2Only_Real_MADK_gfx10<0x037>; 1234defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10<0x038>; 1235defm V_MAX_F16 : VOP2_Real_gfx10<0x039>; 1236defm V_MIN_F16 : VOP2_Real_gfx10<0x03a>; 1237defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>; 1238 1239let IsSingle = 1 in { 1240defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>; 1241} 1242 1243// VOP2 no carry-in, carry-out. 1244defm V_ADD_NC_U32 : 1245 VOP2_Real_gfx10_with_name<0x025, "V_ADD_U32", "v_add_nc_u32">; 1246defm V_SUB_NC_U32 : 1247 VOP2_Real_gfx10_with_name<0x026, "V_SUB_U32", "v_sub_nc_u32">; 1248defm V_SUBREV_NC_U32 : 1249 VOP2_Real_gfx10_with_name<0x027, "V_SUBREV_U32", "v_subrev_nc_u32">; 1250 1251// VOP2 carry-in, carry-out. 1252defm V_ADD_CO_CI_U32 : 1253 VOP2be_Real_gfx10<0x028, "V_ADDC_U32", "v_add_co_ci_u32">; 1254defm V_SUB_CO_CI_U32 : 1255 VOP2be_Real_gfx10<0x029, "V_SUBB_U32", "v_sub_co_ci_u32">; 1256defm V_SUBREV_CO_CI_U32 : 1257 VOP2be_Real_gfx10<0x02a, "V_SUBBREV_U32", "v_subrev_co_ci_u32">; 1258 1259defm V_CNDMASK_B32 : 1260 VOP2e_Real_gfx10<0x001, "V_CNDMASK_B32", "v_cndmask_b32">; 1261 1262// VOP3 only. 1263defm V_BFM_B32 : VOP3Only_Real_gfx10<0x363>; 1264defm V_BCNT_U32_B32 : VOP3Only_Real_gfx10<0x364>; 1265defm V_MBCNT_LO_U32_B32 : VOP3Only_Real_gfx10<0x365>; 1266defm V_MBCNT_HI_U32_B32 : VOP3Only_Real_gfx10<0x366>; 1267defm V_LDEXP_F32 : VOP3Only_Real_gfx10<0x362>; 1268defm V_CVT_PKNORM_I16_F32 : VOP3Only_Real_gfx10<0x368>; 1269defm V_CVT_PKNORM_U16_F32 : VOP3Only_Real_gfx10<0x369>; 1270defm V_CVT_PK_U16_U32 : VOP3Only_Real_gfx10<0x36a>; 1271defm V_CVT_PK_I16_I32 : VOP3Only_Real_gfx10<0x36b>; 1272 1273// VOP3 carry-out. 1274defm V_ADD_CO_U32 : VOP3beOnly_Real_gfx10<0x30f>; 1275defm V_SUB_CO_U32 : VOP3beOnly_Real_gfx10<0x310>; 1276defm V_SUBREV_CO_U32 : VOP3beOnly_Real_gfx10<0x319>; 1277 1278let SubtargetPredicate = isGFX10Plus in { 1279 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx10>; 1280 1281 defm : VOP2bInstAliases< 1282 V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx10, "v_add_co_ci_u32">; 1283 defm : VOP2bInstAliases< 1284 V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx10, "v_sub_co_ci_u32">; 1285 defm : VOP2bInstAliases< 1286 V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx10, "v_subrev_co_ci_u32">; 1287} // End SubtargetPredicate = isGFX10Plus 1288 1289//===----------------------------------------------------------------------===// 1290// GFX6, GFX7, GFX10. 1291//===----------------------------------------------------------------------===// 1292 1293class VOP2_DPPe <bits<6> op, VOP2_DPP_Pseudo ps, VOPProfile P = ps.Pfl> : 1294 VOP_DPPe <P> { 1295 bits<8> vdst; 1296 bits<8> src1; 1297 let Inst{8-0} = 0xfa; //dpp 1298 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 1299 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 1300 let Inst{30-25} = op; 1301 let Inst{31} = 0x0; //encoding 1302} 1303 1304let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 1305 multiclass VOP2_Lane_Real_gfx6_gfx7<bits<6> op> { 1306 def _gfx6_gfx7 : 1307 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>, 1308 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1309 } 1310 multiclass VOP2Only_Real_MADK_gfx6_gfx7<bits<6> op> { 1311 def _gfx6_gfx7 : 1312 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>, 1313 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1314 } 1315 multiclass VOP2_Real_e32_gfx6_gfx7<bits<6> op, string opName = NAME> { 1316 def _e32_gfx6_gfx7 : 1317 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.SI>, 1318 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl>; 1319 } 1320 multiclass VOP2_Real_e64_gfx6_gfx7<bits<6> op, string opName = NAME> { 1321 def _e64_gfx6_gfx7 : 1322 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.SI>, 1323 VOP3e_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(opName#"_e64").Pfl>; 1324 } 1325 multiclass VOP2be_Real_e64_gfx6_gfx7<bits<6> op, string opName = NAME> { 1326 def _e64_gfx6_gfx7 : 1327 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.SI>, 1328 VOP3be_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(opName#"_e64").Pfl>; 1329 } 1330} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 1331 1332multiclass VOP2Only_Real_MADK_gfx6_gfx7_gfx10<bits<6> op> : 1333 VOP2Only_Real_MADK_gfx6_gfx7<op>, VOP2Only_Real_MADK_gfx10<op>; 1334 1335multiclass VOP2_Real_gfx6_gfx7<bits<6> op> : 1336 VOP2_Real_e32_gfx6_gfx7<op>, VOP2_Real_e64_gfx6_gfx7<op>; 1337 1338multiclass VOP2_Real_gfx6_gfx7_gfx10<bits<6> op> : 1339 VOP2_Real_gfx6_gfx7<op>, VOP2_Real_gfx10<op>; 1340 1341multiclass VOP2be_Real_gfx6_gfx7<bits<6> op> : 1342 VOP2_Real_e32_gfx6_gfx7<op>, VOP2be_Real_e64_gfx6_gfx7<op>; 1343 1344multiclass VOP2be_Real_gfx6_gfx7_with_name<bits<6> op, 1345 string opName, string asmName> { 1346 defvar ps32 = !cast<VOP2_Pseudo>(opName#"_e32"); 1347 defvar ps64 = !cast<VOP3_Pseudo>(opName#"_e64"); 1348 1349 let AsmString = asmName # ps32.AsmOperands in { 1350 defm "" : VOP2_Real_e32_gfx6_gfx7<op, opName>; 1351 } 1352 1353 let AsmString = asmName # ps64.AsmOperands in { 1354 defm "" : VOP2be_Real_e64_gfx6_gfx7<op, opName>; 1355 } 1356} 1357 1358defm V_CNDMASK_B32 : VOP2_Real_gfx6_gfx7<0x000>; 1359defm V_MIN_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00d>; 1360defm V_MAX_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00e>; 1361defm V_LSHR_B32 : VOP2_Real_gfx6_gfx7<0x015>; 1362defm V_ASHR_I32 : VOP2_Real_gfx6_gfx7<0x017>; 1363defm V_LSHL_B32 : VOP2_Real_gfx6_gfx7<0x019>; 1364defm V_BFM_B32 : VOP2_Real_gfx6_gfx7<0x01e>; 1365defm V_BCNT_U32_B32 : VOP2_Real_gfx6_gfx7<0x022>; 1366defm V_MBCNT_LO_U32_B32 : VOP2_Real_gfx6_gfx7<0x023>; 1367defm V_MBCNT_HI_U32_B32 : VOP2_Real_gfx6_gfx7<0x024>; 1368defm V_LDEXP_F32 : VOP2_Real_gfx6_gfx7<0x02b>; 1369defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_gfx6_gfx7<0x02c>; 1370defm V_CVT_PKNORM_I16_F32 : VOP2_Real_gfx6_gfx7<0x02d>; 1371defm V_CVT_PKNORM_U16_F32 : VOP2_Real_gfx6_gfx7<0x02e>; 1372defm V_CVT_PK_U16_U32 : VOP2_Real_gfx6_gfx7<0x030>; 1373defm V_CVT_PK_I16_I32 : VOP2_Real_gfx6_gfx7<0x031>; 1374 1375// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in 1376// VI, but the VI instructions behave the same as the SI versions. 1377defm V_ADD_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x025, "V_ADD_CO_U32", "v_add_i32">; 1378defm V_SUB_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x026, "V_SUB_CO_U32", "v_sub_i32">; 1379defm V_SUBREV_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x027, "V_SUBREV_CO_U32", "v_subrev_i32">; 1380defm V_ADDC_U32 : VOP2be_Real_gfx6_gfx7<0x028>; 1381defm V_SUBB_U32 : VOP2be_Real_gfx6_gfx7<0x029>; 1382defm V_SUBBREV_U32 : VOP2be_Real_gfx6_gfx7<0x02a>; 1383 1384defm V_READLANE_B32 : VOP2_Lane_Real_gfx6_gfx7<0x001>; 1385 1386let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in { 1387 defm V_WRITELANE_B32 : VOP2_Lane_Real_gfx6_gfx7<0x002>; 1388} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) 1389 1390let SubtargetPredicate = isGFX6GFX7 in { 1391 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx6_gfx7>; 1392 defm : VOP2eInstAliases<V_ADD_CO_U32_e32, V_ADD_I32_e32_gfx6_gfx7>; 1393 defm : VOP2eInstAliases<V_SUB_CO_U32_e32, V_SUB_I32_e32_gfx6_gfx7>; 1394 defm : VOP2eInstAliases<V_SUBREV_CO_U32_e32, V_SUBREV_I32_e32_gfx6_gfx7>; 1395 1396 def : VOP2e64InstAlias<V_ADD_CO_U32_e64, V_ADD_I32_e64_gfx6_gfx7>; 1397 def : VOP2e64InstAlias<V_SUB_CO_U32_e64, V_SUB_I32_e64_gfx6_gfx7>; 1398 def : VOP2e64InstAlias<V_SUBREV_CO_U32_e64, V_SUBREV_I32_e64_gfx6_gfx7>; 1399} // End SubtargetPredicate = isGFX6GFX7 1400 1401defm V_ADD_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x003>; 1402defm V_SUB_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x004>; 1403defm V_SUBREV_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x005>; 1404defm V_MAC_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x006>; 1405defm V_MUL_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x007>; 1406defm V_MUL_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x008>; 1407defm V_MUL_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10<0x009>; 1408defm V_MUL_HI_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10<0x00a>; 1409defm V_MUL_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10<0x00b>; 1410defm V_MUL_HI_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10<0x00c>; 1411defm V_MIN_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x00f>; 1412defm V_MAX_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x010>; 1413defm V_MIN_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x011>; 1414defm V_MAX_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x012>; 1415defm V_MIN_U32 : VOP2_Real_gfx6_gfx7_gfx10<0x013>; 1416defm V_MAX_U32 : VOP2_Real_gfx6_gfx7_gfx10<0x014>; 1417defm V_LSHRREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x016>; 1418defm V_ASHRREV_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x018>; 1419defm V_LSHLREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01a>; 1420defm V_AND_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01b>; 1421defm V_OR_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01c>; 1422defm V_XOR_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01d>; 1423defm V_MAC_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x01f>; 1424defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x02f>; 1425defm V_MADMK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x020>; 1426defm V_MADAK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x021>; 1427 1428//===----------------------------------------------------------------------===// 1429// GFX8, GFX9 (VI). 1430//===----------------------------------------------------------------------===// 1431 1432let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 1433 1434multiclass VOP2_Real_MADK_vi <bits<6> op> { 1435 def _vi : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>, 1436 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1437} 1438 1439multiclass VOP2_Real_e32_vi <bits<6> op> { 1440 def _e32_vi : 1441 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>, 1442 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 1443} 1444 1445multiclass VOP2_Real_e64_vi <bits<10> op> { 1446 def _e64_vi : 1447 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 1448 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1449} 1450 1451multiclass VOP2_Real_e64only_vi <bits<10> op> { 1452 def _e64_vi : 1453 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 1454 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 1455 let IsSingle = 1; 1456 } 1457} 1458 1459multiclass Base_VOP2_Real_e32e64_vi <bits<6> op> : 1460 VOP2_Real_e32_vi<op>, 1461 VOP2_Real_e64_vi<{0, 1, 0, 0, op{5-0}}>; 1462 1463} // End AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" 1464 1465multiclass VOP2_SDWA_Real <bits<6> op> { 1466 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA>.ret in 1467 def _sdwa_vi : 1468 VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 1469 VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1470} 1471 1472multiclass VOP2_SDWA9_Real <bits<6> op> { 1473 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 1474 def _sdwa_gfx9 : 1475 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 1476 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 1477} 1478 1479let AssemblerPredicate = isGFX8Only in { 1480 1481multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName> { 1482 def _e32_vi : 1483 VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.VI>, 1484 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> { 1485 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32"); 1486 let AsmString = AsmName # ps.AsmOperands; 1487 let DecoderNamespace = "GFX8"; 1488 } 1489 def _e64_vi : 1490 VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.VI>, 1491 VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> { 1492 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64"); 1493 let AsmString = AsmName # ps.AsmOperands; 1494 let DecoderNamespace = "GFX8"; 1495 } 1496 foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA>.ret in 1497 def _sdwa_vi : 1498 VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>, 1499 VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> { 1500 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa"); 1501 let AsmString = AsmName # ps.AsmOperands; 1502 } 1503 foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP>.ret in 1504 def _dpp_vi : 1505 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.VI>, 1506 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> { 1507 VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp"); 1508 let AsmString = AsmName # ps.AsmOperands; 1509 } 1510} 1511} 1512 1513let AssemblerPredicate = isGFX9Only in { 1514 1515multiclass VOP2be_Real_e32e64_gfx9 <bits<6> op, string OpName, string AsmName> { 1516 def _e32_gfx9 : 1517 VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.GFX9>, 1518 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> { 1519 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32"); 1520 let AsmString = AsmName # ps.AsmOperands; 1521 let DecoderNamespace = "GFX9"; 1522 } 1523 def _e64_gfx9 : 1524 VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.GFX9>, 1525 VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> { 1526 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64"); 1527 let AsmString = AsmName # ps.AsmOperands; 1528 let DecoderNamespace = "GFX9"; 1529 } 1530 foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA9>.ret in 1531 def _sdwa_gfx9 : 1532 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>, 1533 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> { 1534 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa"); 1535 let AsmString = AsmName # ps.AsmOperands; 1536 } 1537 foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP>.ret in 1538 def _dpp_gfx9 : 1539 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.GFX9>, 1540 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> { 1541 VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp"); 1542 let AsmString = AsmName # ps.AsmOperands; 1543 let DecoderNamespace = "SDWA9"; 1544 } 1545} 1546 1547multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> { 1548 def _e32_gfx9 : 1549 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX9>, 1550 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>{ 1551 let DecoderNamespace = "GFX9"; 1552 } 1553 def _e64_gfx9 : 1554 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>, 1555 VOP3e_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 1556 let DecoderNamespace = "GFX9"; 1557 } 1558 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 1559 def _sdwa_gfx9 : 1560 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 1561 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 1562 } 1563 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 1564 def _dpp_gfx9 : 1565 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, 1566 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> { 1567 let DecoderNamespace = "SDWA9"; 1568 } 1569} 1570 1571} // AssemblerPredicate = isGFX9Only 1572 1573multiclass VOP2_Real_e32e64_vi <bits<6> op> : 1574 Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA_Real<op>, VOP2_SDWA9_Real<op> { 1575 1576 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 1577 def _dpp_vi : 1578 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>, 1579 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>; 1580} 1581 1582defm V_CNDMASK_B32 : VOP2_Real_e32e64_vi <0x0>; 1583defm V_ADD_F32 : VOP2_Real_e32e64_vi <0x1>; 1584defm V_SUB_F32 : VOP2_Real_e32e64_vi <0x2>; 1585defm V_SUBREV_F32 : VOP2_Real_e32e64_vi <0x3>; 1586let AssemblerPredicate = isGCN3ExcludingGFX90A in 1587defm V_MUL_LEGACY_F32 : VOP2_Real_e32e64_vi <0x4>; 1588defm V_MUL_F32 : VOP2_Real_e32e64_vi <0x5>; 1589defm V_MUL_I32_I24 : VOP2_Real_e32e64_vi <0x6>; 1590defm V_MUL_HI_I32_I24 : VOP2_Real_e32e64_vi <0x7>; 1591defm V_MUL_U32_U24 : VOP2_Real_e32e64_vi <0x8>; 1592defm V_MUL_HI_U32_U24 : VOP2_Real_e32e64_vi <0x9>; 1593defm V_MIN_F32 : VOP2_Real_e32e64_vi <0xa>; 1594defm V_MAX_F32 : VOP2_Real_e32e64_vi <0xb>; 1595defm V_MIN_I32 : VOP2_Real_e32e64_vi <0xc>; 1596defm V_MAX_I32 : VOP2_Real_e32e64_vi <0xd>; 1597defm V_MIN_U32 : VOP2_Real_e32e64_vi <0xe>; 1598defm V_MAX_U32 : VOP2_Real_e32e64_vi <0xf>; 1599defm V_LSHRREV_B32 : VOP2_Real_e32e64_vi <0x10>; 1600defm V_ASHRREV_I32 : VOP2_Real_e32e64_vi <0x11>; 1601defm V_LSHLREV_B32 : VOP2_Real_e32e64_vi <0x12>; 1602defm V_AND_B32 : VOP2_Real_e32e64_vi <0x13>; 1603defm V_OR_B32 : VOP2_Real_e32e64_vi <0x14>; 1604defm V_XOR_B32 : VOP2_Real_e32e64_vi <0x15>; 1605defm V_MAC_F32 : VOP2_Real_e32e64_vi <0x16>; 1606defm V_MADMK_F32 : VOP2_Real_MADK_vi <0x17>; 1607defm V_MADAK_F32 : VOP2_Real_MADK_vi <0x18>; 1608 1609defm V_ADD_U32 : VOP2be_Real_e32e64_vi_only <0x19, "V_ADD_CO_U32", "v_add_u32">; 1610defm V_SUB_U32 : VOP2be_Real_e32e64_vi_only <0x1a, "V_SUB_CO_U32", "v_sub_u32">; 1611defm V_SUBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1b, "V_SUBREV_CO_U32", "v_subrev_u32">; 1612defm V_ADDC_U32 : VOP2be_Real_e32e64_vi_only <0x1c, "V_ADDC_U32", "v_addc_u32">; 1613defm V_SUBB_U32 : VOP2be_Real_e32e64_vi_only <0x1d, "V_SUBB_U32", "v_subb_u32">; 1614defm V_SUBBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1e, "V_SUBBREV_U32", "v_subbrev_u32">; 1615 1616defm V_ADD_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x19, "V_ADD_CO_U32", "v_add_co_u32">; 1617defm V_SUB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1a, "V_SUB_CO_U32", "v_sub_co_u32">; 1618defm V_SUBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1b, "V_SUBREV_CO_U32", "v_subrev_co_u32">; 1619defm V_ADDC_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1c, "V_ADDC_U32", "v_addc_co_u32">; 1620defm V_SUBB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1d, "V_SUBB_U32", "v_subb_co_u32">; 1621defm V_SUBBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1e, "V_SUBBREV_U32", "v_subbrev_co_u32">; 1622 1623defm V_ADD_U32 : VOP2_Real_e32e64_gfx9 <0x34>; 1624defm V_SUB_U32 : VOP2_Real_e32e64_gfx9 <0x35>; 1625defm V_SUBREV_U32 : VOP2_Real_e32e64_gfx9 <0x36>; 1626 1627defm V_BFM_B32 : VOP2_Real_e64only_vi <0x293>; 1628defm V_BCNT_U32_B32 : VOP2_Real_e64only_vi <0x28b>; 1629defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64only_vi <0x28c>; 1630defm V_MBCNT_HI_U32_B32 : VOP2_Real_e64only_vi <0x28d>; 1631defm V_LDEXP_F32 : VOP2_Real_e64only_vi <0x288>; 1632defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64only_vi <0x1f0>; 1633defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64only_vi <0x294>; 1634defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64only_vi <0x295>; 1635defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e64only_vi <0x296>; 1636defm V_CVT_PK_U16_U32 : VOP2_Real_e64only_vi <0x297>; 1637defm V_CVT_PK_I16_I32 : VOP2_Real_e64only_vi <0x298>; 1638 1639defm V_ADD_F16 : VOP2_Real_e32e64_vi <0x1f>; 1640defm V_SUB_F16 : VOP2_Real_e32e64_vi <0x20>; 1641defm V_SUBREV_F16 : VOP2_Real_e32e64_vi <0x21>; 1642defm V_MUL_F16 : VOP2_Real_e32e64_vi <0x22>; 1643defm V_MAC_F16 : VOP2_Real_e32e64_vi <0x23>; 1644defm V_MADMK_F16 : VOP2_Real_MADK_vi <0x24>; 1645defm V_MADAK_F16 : VOP2_Real_MADK_vi <0x25>; 1646defm V_ADD_U16 : VOP2_Real_e32e64_vi <0x26>; 1647defm V_SUB_U16 : VOP2_Real_e32e64_vi <0x27>; 1648defm V_SUBREV_U16 : VOP2_Real_e32e64_vi <0x28>; 1649defm V_MUL_LO_U16 : VOP2_Real_e32e64_vi <0x29>; 1650defm V_LSHLREV_B16 : VOP2_Real_e32e64_vi <0x2a>; 1651defm V_LSHRREV_B16 : VOP2_Real_e32e64_vi <0x2b>; 1652defm V_ASHRREV_I16 : VOP2_Real_e32e64_vi <0x2c>; 1653defm V_MAX_F16 : VOP2_Real_e32e64_vi <0x2d>; 1654defm V_MIN_F16 : VOP2_Real_e32e64_vi <0x2e>; 1655defm V_MAX_U16 : VOP2_Real_e32e64_vi <0x2f>; 1656defm V_MAX_I16 : VOP2_Real_e32e64_vi <0x30>; 1657defm V_MIN_U16 : VOP2_Real_e32e64_vi <0x31>; 1658defm V_MIN_I16 : VOP2_Real_e32e64_vi <0x32>; 1659defm V_LDEXP_F16 : VOP2_Real_e32e64_vi <0x33>; 1660 1661let SubtargetPredicate = isGFX8GFX9 in { 1662 1663// Aliases to simplify matching of floating-point instructions that 1664// are VOP2 on SI and VOP3 on VI. 1665class SI2_VI3Alias <string name, VOP3_Real inst> : InstAlias < 1666 name#" $dst, $src0, $src1", 1667 !if(inst.Pfl.HasOMod, 1668 (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0), 1669 (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0)) 1670>, PredicateControl { 1671 let UseInstAsmMatchConverter = 0; 1672 let AsmVariantName = AMDGPUAsmVariants.VOP3; 1673} 1674 1675def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>; 1676def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>; 1677def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>; 1678def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>; 1679def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>; 1680 1681defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_vi>; 1682 1683} // End SubtargetPredicate = isGFX8GFX9 1684 1685let SubtargetPredicate = isGFX9Only in { 1686 1687defm : VOP2bInstAliases<V_ADD_U32_e32, V_ADD_CO_U32_e32_gfx9, "v_add_co_u32">; 1688defm : VOP2bInstAliases<V_ADDC_U32_e32, V_ADDC_CO_U32_e32_gfx9, "v_addc_co_u32">; 1689defm : VOP2bInstAliases<V_SUB_U32_e32, V_SUB_CO_U32_e32_gfx9, "v_sub_co_u32">; 1690defm : VOP2bInstAliases<V_SUBB_U32_e32, V_SUBB_CO_U32_e32_gfx9, "v_subb_co_u32">; 1691defm : VOP2bInstAliases<V_SUBREV_U32_e32, V_SUBREV_CO_U32_e32_gfx9, "v_subrev_co_u32">; 1692defm : VOP2bInstAliases<V_SUBBREV_U32_e32, V_SUBBREV_CO_U32_e32_gfx9, "v_subbrev_co_u32">; 1693 1694} // End SubtargetPredicate = isGFX9Only 1695 1696let SubtargetPredicate = HasDLInsts in { 1697 1698defm V_FMAC_F32 : VOP2_Real_e32e64_vi <0x3b>; 1699defm V_XNOR_B32 : VOP2_Real_e32e64_vi <0x3d>; 1700 1701} // End SubtargetPredicate = HasDLInsts 1702 1703let AssemblerPredicate = isGFX90APlus, DecoderNamespace = "GFX90A" in { 1704 multiclass VOP2_Real_e32_gfx90a <bits<6> op> { 1705 def _e32_gfx90a : 1706 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX90A>, 1707 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 1708 } 1709 1710 multiclass VOP2_Real_e64_gfx90a <bits<10> op> { 1711 def _e64_gfx90a : 1712 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX90A>, 1713 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1714 } 1715 1716 multiclass Base_VOP2_Real_e32e64_gfx90a <bits<6> op> : 1717 VOP2_Real_e32_gfx90a<op>, 1718 VOP2_Real_e64_gfx90a<{0, 1, 0, 0, op{5-0}}>; 1719 1720 multiclass VOP2_Real_e32e64_gfx90a <bits<6> op> : 1721 Base_VOP2_Real_e32e64_gfx90a<op> { 1722 1723 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 1724 def _dpp_gfx90a : 1725 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX90A>, 1726 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> { 1727 let DecoderNamespace = "SDWA9"; 1728 } 1729 } 1730} // End AssemblerPredicate = isGFX90APlus, DecoderNamespace = "GFX90A" 1731 1732let SubtargetPredicate = isGFX90APlus in { 1733 defm V_FMAC_F64 : VOP2_Real_e32e64_gfx90a <0x4>; 1734 let IsSingle = 1 in { 1735 defm V_MUL_LEGACY_F32 : VOP2_Real_e64_gfx90a <0x2a1>; 1736 } 1737} // End SubtargetPredicate = isGFX90APlus 1738 1739multiclass VOP2_Real_DOT_ACC_gfx9<bits<6> op> : VOP2_Real_e32_vi<op> { 1740 def _dpp_vi : VOP2_DPP<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>; 1741} 1742 1743multiclass VOP2_Real_DOT_ACC_gfx10<bits<6> op> : 1744 VOP2_Real_e32_gfx10<op>, 1745 VOP2_Real_dpp_gfx10<op>, 1746 VOP2_Real_dpp8_gfx10<op>; 1747 1748let SubtargetPredicate = HasDot5Insts in { 1749 defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx9<0x37>; 1750 // NB: Opcode conflicts with V_DOT8C_I32_I4 1751 // This opcode exists in gfx 10.1* only 1752 defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx10<0x02>; 1753} 1754 1755let SubtargetPredicate = HasDot6Insts in { 1756 defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx9<0x39>; 1757 defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx10<0x0d>; 1758} 1759 1760let SubtargetPredicate = HasDot4Insts in { 1761 defm V_DOT2C_I32_I16 : VOP2_Real_DOT_ACC_gfx9<0x38>; 1762} 1763let SubtargetPredicate = HasDot3Insts in { 1764 defm V_DOT8C_I32_I4 : VOP2_Real_DOT_ACC_gfx9<0x3a>; 1765} 1766 1767let SubtargetPredicate = HasPkFmacF16Inst in { 1768defm V_PK_FMAC_F16 : VOP2_Real_e32_vi<0x3c>; 1769} // End SubtargetPredicate = HasPkFmacF16Inst 1770 1771let SubtargetPredicate = HasDot3Insts in { 1772 // NB: Opcode conflicts with V_DOT2C_F32_F16 1773 let DecoderNamespace = "GFX10_B" in 1774 defm V_DOT8C_I32_I4 : VOP2_Real_DOT_ACC_gfx10<0x02>; 1775} 1776