1//===-- VOP2Instructions.td - Vector Instruction Definitions --------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10// VOP2 Classes 11//===----------------------------------------------------------------------===// 12 13class VOP2e <bits<6> op, VOPProfile P> : Enc32 { 14 bits<8> vdst; 15 bits<9> src0; 16 bits<8> src1; 17 18 let Inst{8-0} = !if(P.HasSrc0, src0, 0); 19 let Inst{16-9} = !if(P.HasSrc1, src1, 0); 20 let Inst{24-17} = !if(P.EmitDst, vdst, 0); 21 let Inst{30-25} = op; 22 let Inst{31} = 0x0; //encoding 23} 24 25class VOP2_MADKe <bits<6> op, VOPProfile P> : Enc64 { 26 bits<8> vdst; 27 bits<9> src0; 28 bits<8> src1; 29 bits<32> imm; 30 31 let Inst{8-0} = !if(P.HasSrc0, src0, 0); 32 let Inst{16-9} = !if(P.HasSrc1, src1, 0); 33 let Inst{24-17} = !if(P.EmitDst, vdst, 0); 34 let Inst{30-25} = op; 35 let Inst{31} = 0x0; // encoding 36 let Inst{63-32} = imm; 37} 38 39class VOP2_SDWAe <bits<6> op, VOPProfile P> : VOP_SDWAe <P> { 40 bits<8> vdst; 41 bits<8> src1; 42 43 let Inst{8-0} = 0xf9; // sdwa 44 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 45 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 46 let Inst{30-25} = op; 47 let Inst{31} = 0x0; // encoding 48} 49 50class VOP2_SDWA9Ae <bits<6> op, VOPProfile P> : VOP_SDWA9Ae <P> { 51 bits<8> vdst; 52 bits<9> src1; 53 54 let Inst{8-0} = 0xf9; // sdwa 55 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 56 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 57 let Inst{30-25} = op; 58 let Inst{31} = 0x0; // encoding 59 let Inst{63} = !if(P.HasSrc1, src1{8}, 0); // src1_sgpr 60} 61 62class VOP2_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], string suffix = "_e32"> : 63 VOP_Pseudo <opName, suffix, P, P.Outs32, P.Ins32, "", pattern> { 64 65 let AsmOperands = P.Asm32; 66 67 let Size = 4; 68 let mayLoad = 0; 69 let mayStore = 0; 70 let hasSideEffects = 0; 71 72 let ReadsModeReg = !or(isFloatType<P.DstVT>.ret, isFloatType<P.Src0VT>.ret); 73 74 let mayRaiseFPException = ReadsModeReg; 75 76 let VOP2 = 1; 77 let VALU = 1; 78 let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]); 79 80 let AsmVariantName = AMDGPUAsmVariants.Default; 81} 82 83class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemonic> : 84 VOP_Real <ps>, 85 InstSI <ps.OutOperandList, ps.InOperandList, real_name # ps.AsmOperands, []>, 86 SIMCInstr <ps.PseudoInstr, EncodingFamily> { 87 88 let VALU = 1; 89 let VOP2 = 1; 90 let isPseudo = 0; 91 let isCodeGenOnly = 0; 92 93 let Constraints = ps.Constraints; 94 let DisableEncoding = ps.DisableEncoding; 95 96 // copy relevant pseudo op flags 97 let SubtargetPredicate = ps.SubtargetPredicate; 98 let OtherPredicates = ps.OtherPredicates; 99 let AsmMatchConverter = ps.AsmMatchConverter; 100 let AsmVariantName = ps.AsmVariantName; 101 let Constraints = ps.Constraints; 102 let DisableEncoding = ps.DisableEncoding; 103 let TSFlags = ps.TSFlags; 104 let UseNamedOperandTable = ps.UseNamedOperandTable; 105 let Uses = ps.Uses; 106 let Defs = ps.Defs; 107 let SchedRW = ps.SchedRW; 108 let mayLoad = ps.mayLoad; 109 let mayStore = ps.mayStore; 110} 111 112class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 113 VOP_SDWA_Pseudo <OpName, P, pattern> { 114 let AsmMatchConverter = "cvtSdwaVOP2"; 115} 116 117class VOP2_DPP_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> : 118 VOP_DPP_Pseudo <OpName, P, pattern> { 119} 120 121 122class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies { 123 list<dag> ret = !if(P.HasModifiers, 124 [(set P.DstVT:$vdst, 125 (node (P.Src0VT 126 !if(P.HasOMod, 127 (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod), 128 (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))), 129 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], 130 [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]); 131} 132 133multiclass VOP2Inst_e32<string opName, 134 VOPProfile P, 135 SDPatternOperator node = null_frag, 136 string revOp = opName, 137 bit GFX9Renamed = 0> { 138 let renamedInGFX9 = GFX9Renamed in { 139 def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>, 140 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>; 141 } // End renamedInGFX9 = GFX9Renamed 142} 143multiclass 144 VOP2Inst_e32_VOPD<string opName, VOPProfile P, bits<5> VOPDOp, 145 string VOPDName, SDPatternOperator node = null_frag, 146 string revOp = opName, bit GFX9Renamed = 0> { 147 defm NAME : VOP2Inst_e32<opName, P, node, revOp, GFX9Renamed>, 148 VOPD_Component<VOPDOp, VOPDName>; 149} 150multiclass VOP2Inst_e64<string opName, 151 VOPProfile P, 152 SDPatternOperator node = null_frag, 153 string revOp = opName, 154 bit GFX9Renamed = 0> { 155 let renamedInGFX9 = GFX9Renamed in { 156 def _e64 : VOP3InstBase <opName, P, node, 1>, 157 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; 158 159 let SubtargetPredicate = isGFX11Plus in { 160 foreach _ = BoolToList<P.HasExtVOP3DPP>.ret in 161 def _e64_dpp : VOP3_DPP_Pseudo <opName, P>; 162 } // End SubtargetPredicate = isGFX11Plus 163 } // End renamedInGFX9 = GFX9Renamed 164} 165 166multiclass VOP2Inst_sdwa<string opName, 167 VOPProfile P, 168 bit GFX9Renamed = 0> { 169 let renamedInGFX9 = GFX9Renamed in { 170 foreach _ = BoolToList<P.HasExtSDWA>.ret in 171 def _sdwa : VOP2_SDWA_Pseudo <opName, P>; 172 } // End renamedInGFX9 = GFX9Renamed 173} 174 175multiclass VOP2Inst<string opName, 176 VOPProfile P, 177 SDPatternOperator node = null_frag, 178 string revOp = opName, 179 bit GFX9Renamed = 0> : 180 VOP2Inst_e32<opName, P, node, revOp, GFX9Renamed>, 181 VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>, 182 VOP2Inst_sdwa<opName, P, GFX9Renamed> { 183 let renamedInGFX9 = GFX9Renamed in { 184 foreach _ = BoolToList<P.HasExtDPP>.ret in 185 def _dpp : VOP2_DPP_Pseudo <opName, P>; 186 } 187} 188 189multiclass VOP2Inst_t16<string opName, 190 VOPProfile P, 191 SDPatternOperator node = null_frag, 192 string revOp = opName, 193 bit GFX9Renamed = 0> { 194 let SubtargetPredicate = NotHasTrue16BitInsts, OtherPredicates = [Has16BitInsts] in { 195 defm NAME : VOP2Inst<opName, P, node, revOp, GFX9Renamed>; 196 } 197 let SubtargetPredicate = HasTrue16BitInsts in { 198 defm _t16 : VOP2Inst<opName#"_t16", VOPProfile_True16<P>, node, revOp#"_t16", GFX9Renamed>; 199 } 200} 201 202// Creating a _t16_e32 pseudo when there is no corresponding real instruction on 203// any subtarget is a problem. It makes getMCOpcodeGen return -1, which we 204// assume means the instruction is already a real. The fix is to not create that 205// _t16_e32 pseudo 206multiclass VOP2Inst_e64_t16<string opName, 207 VOPProfile P, 208 SDPatternOperator node = null_frag, 209 string revOp = opName, 210 bit GFX9Renamed = 0> { 211 let SubtargetPredicate = NotHasTrue16BitInsts, OtherPredicates = [Has16BitInsts] in { 212 defm NAME : VOP2Inst<opName, P, node, revOp, GFX9Renamed>; 213 } 214 let SubtargetPredicate = HasTrue16BitInsts in { 215 defm _t16 : VOP2Inst_e64<opName#"_t16", VOPProfile_True16<P>, node, revOp#"_t16", GFX9Renamed>; 216 } 217} 218 219multiclass VOP2Inst_VOPD<string opName, 220 VOPProfile P, 221 bits<5> VOPDOp, 222 string VOPDName, 223 SDPatternOperator node = null_frag, 224 string revOp = opName, 225 bit GFX9Renamed = 0> : 226 VOP2Inst_e32_VOPD<opName, P, VOPDOp, VOPDName, node, revOp, GFX9Renamed>, 227 VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>, 228 VOP2Inst_sdwa<opName, P, GFX9Renamed> { 229 let renamedInGFX9 = GFX9Renamed in { 230 foreach _ = BoolToList<P.HasExtDPP>.ret in 231 def _dpp : VOP2_DPP_Pseudo <opName, P>; 232 } 233} 234 235multiclass VOP2bInst <string opName, 236 VOPProfile P, 237 SDPatternOperator node = null_frag, 238 string revOp = opName, 239 bit GFX9Renamed = 0, 240 bit useSGPRInput = !eq(P.NumSrcArgs, 3)> { 241 let renamedInGFX9 = GFX9Renamed in { 242 let SchedRW = [Write32Bit, WriteSALU] in { 243 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in { 244 def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>, 245 Commutable_REV<revOp#"_e32", !eq(revOp, opName)> { 246 let usesCustomInserter = true; 247 } 248 249 foreach _ = BoolToList<P.HasExtSDWA>.ret in 250 def _sdwa : VOP2_SDWA_Pseudo <opName, P> { 251 let AsmMatchConverter = "cvtSdwaVOP2b"; 252 } 253 foreach _ = BoolToList<P.HasExtDPP>.ret in 254 def _dpp : VOP2_DPP_Pseudo <opName, P>; 255 } // End Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] 256 257 def _e64 : VOP3InstBase <opName, P, node, 1>, 258 Commutable_REV<revOp#"_e64", !eq(revOp, opName)>; 259 260 let SubtargetPredicate = isGFX11Plus in { 261 foreach _ = BoolToList<P.HasExtVOP3DPP>.ret in 262 def _e64_dpp : VOP3_DPP_Pseudo <opName, P>; 263 } // End SubtargetPredicate = isGFX11Plus 264 } 265 } 266} 267 268class VOP2bInstAlias <VOP2_Pseudo ps, Instruction inst, 269 string OpName, string opnd> : 270 InstAlias <OpName#" "#!subst("vcc", opnd, ps.Pfl.Asm32), 271 (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0, 272 ps.Pfl.Src1RC32:$src1), 273 1, inst.AsmVariantName>, 274 PredicateControl { 275} 276 277multiclass VOP2bInstAliases<VOP2_Pseudo ps, VOP2_Real inst, string OpName> { 278 let WaveSizePredicate = isWave32 in { 279 def : VOP2bInstAlias<ps, inst, OpName, "vcc_lo">; 280 } 281 let WaveSizePredicate = isWave64 in { 282 def : VOP2bInstAlias<ps, inst, OpName, "vcc">; 283 } 284} 285 286multiclass 287 VOP2eInst_Base<string opName, VOPProfile P, bits<5> VOPDOp, string VOPDName, 288 SDPatternOperator node, string revOp, bit useSGPRInput> { 289 290 let SchedRW = [Write32Bit] in { 291 let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in { 292 if !eq(VOPDOp, -1) then 293 def _e32 : VOP2_Pseudo <opName, P>, 294 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>; 295 else 296 def _e32 : VOP2_Pseudo <opName, P>, 297 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>, 298 VOPD_Component<VOPDOp, VOPDName>; 299 300 foreach _ = BoolToList<P.HasExtSDWA>.ret in 301 def _sdwa : VOP2_SDWA_Pseudo <opName, P> { 302 let AsmMatchConverter = "cvtSdwaVOP2e"; 303 } 304 305 foreach _ = BoolToList<P.HasExtDPP>.ret in 306 def _dpp : VOP2_DPP_Pseudo <opName, P>; 307 } 308 309 def _e64 : VOP3InstBase <opName, P, node, 1>, 310 Commutable_REV<revOp#"_e64", !eq(revOp, opName)> { 311 let isReMaterializable = 1; 312 } 313 314 let SubtargetPredicate = isGFX11Plus in { 315 foreach _ = BoolToList<P.HasExtVOP3DPP>.ret in 316 def _e64_dpp : VOP3_DPP_Pseudo <opName, P>; 317 } // End SubtargetPredicate = isGFX11Plus 318 } 319} 320 321multiclass 322 VOP2eInst<string opName, VOPProfile P, SDPatternOperator node = null_frag, 323 string revOp = opName, bit useSGPRInput = !eq(P.NumSrcArgs, 3)> 324 : VOP2eInst_Base<opName, P, -1, "", node, revOp, useSGPRInput>; 325 326multiclass 327 VOP2eInst_VOPD<string opName, VOPProfile P, bits<5> VOPDOp, string VOPDName, 328 SDPatternOperator node = null_frag, string revOp = opName, 329 bit useSGPRInput = !eq(P.NumSrcArgs, 3)> 330 : VOP2eInst_Base<opName, P, VOPDOp, VOPDName, node, revOp, useSGPRInput>; 331 332class VOP2eInstAlias <VOP2_Pseudo ps, Instruction inst, string opnd = ""> : 333 InstAlias <ps.OpName#" "#ps.Pfl.Asm32#", "#opnd, 334 (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0, 335 ps.Pfl.Src1RC32:$src1), 336 1, inst.AsmVariantName>, 337 PredicateControl; 338 339class VOP2e64InstAlias <VOP3_Pseudo ps, Instruction inst> : 340 InstAlias <ps.OpName#" "#ps.Pfl.Asm64, 341 (inst ps.Pfl.DstRC:$vdst, VOPDstS64orS32:$sdst, 342 ps.Pfl.Src0RC32:$src0, ps.Pfl.Src1RC32:$src1, clampmod:$clamp), 343 1, inst.AsmVariantName>, 344 PredicateControl; 345 346multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> { 347 let WaveSizePredicate = isWave32 in { 348 def : VOP2eInstAlias<ps, inst, "vcc_lo">; 349 } 350 let WaveSizePredicate = isWave64 in { 351 def : VOP2eInstAlias<ps, inst, "vcc">; 352 } 353} 354 355class VOP_MADK_Base<ValueType vt> : VOPProfile <[vt, vt, vt, vt]> { 356 string AsmVOPDXDeferred = ?; 357} 358 359class VOP_MADAK <ValueType vt> : VOP_MADK_Base<vt> { 360 field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm); 361 field dag Ins32 = !if(!eq(vt.Size, 32), 362 (ins VSrc_f32_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm), 363 (ins VSrc_f16_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm)); 364 field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, VGPR_32:$vsrc1X, ImmOpType:$imm); 365 // Note that both src0X and imm are deferred 366 let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, VGPR_32:$vsrc1X, ImmOpType:$immDeferred); 367 field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, VGPR_32:$vsrc1Y, ImmOpType:$imm); 368 369 field string Asm32 = "$vdst, $src0, $src1, $imm"; 370 field string AsmVOPDX = "$vdstX, $src0X, $vsrc1X, $imm"; 371 let AsmVOPDXDeferred = "$vdstX, $src0X, $vsrc1X, $immDeferred"; 372 field string AsmVOPDY = "$vdstY, $src0Y, $vsrc1Y, $imm"; 373 field bit HasExt = 0; 374 let IsSingle = 1; 375} 376 377def VOP_MADAK_F16 : VOP_MADAK <f16>; 378def VOP_MADAK_F16_t16 : VOP_MADAK <f16> { 379 let IsTrue16 = 1; 380 let DstRC = VOPDstOperand<VGPR_32_Lo128>; 381 let Ins32 = (ins VSrcT_f16_Lo128_Deferred:$src0, VGPR_32_Lo128:$src1, ImmOpType:$imm); 382} 383def VOP_MADAK_F32 : VOP_MADAK <f32>; 384 385class VOP_MADMK <ValueType vt> : VOP_MADK_Base<vt> { 386 field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm); 387 field dag Ins32 = !if(!eq(vt.Size, 32), 388 (ins VSrc_f32_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1), 389 (ins VSrc_f16_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1)); 390 field dag InsVOPDX = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$imm, VGPR_32:$vsrc1X); 391 let InsVOPDXDeferred = (ins VSrc_f32_Deferred:$src0X, ImmOpType:$immDeferred, VGPR_32:$vsrc1X); 392 field dag InsVOPDY = (ins VSrc_f32_Deferred:$src0Y, ImmOpType:$imm, VGPR_32:$vsrc1Y); 393 394 field string Asm32 = "$vdst, $src0, $imm, $src1"; 395 field string AsmVOPDX = "$vdstX, $src0X, $imm, $vsrc1X"; 396 let AsmVOPDXDeferred = "$vdstX, $src0X, $immDeferred, $vsrc1X"; 397 field string AsmVOPDY = "$vdstY, $src0Y, $imm, $vsrc1Y"; 398 field bit HasExt = 0; 399 let IsSingle = 1; 400} 401 402def VOP_MADMK_F16 : VOP_MADMK <f16>; 403def VOP_MADMK_F16_t16 : VOP_MADMK <f16> { 404 let IsTrue16 = 1; 405 let DstRC = VOPDstOperand<VGPR_32_Lo128>; 406 let Ins32 = (ins VSrcT_f16_Lo128_Deferred:$src0, ImmOpType:$imm, VGPR_32_Lo128:$src1); 407} 408def VOP_MADMK_F32 : VOP_MADMK <f32>; 409 410class getRegisterOperandForVT<ValueType VT> { 411 RegisterOperand ret = RegisterOperand<getVregSrcForVT<VT>.ret>; 412} 413 414// FIXME: Remove src2_modifiers. It isn't used, so is wasting memory 415// and processing time but it makes it easier to convert to mad. 416class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, vt0]> { 417 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT<Src2VT>.ret:$src2); 418 let Ins64 = getIns64<Src0RC64, Src1RC64, getRegisterOperandForVT<Src2VT>.ret, 3, 419 0, HasModifiers, HasModifiers, HasOMod, 420 Src0Mod, Src1Mod, Src2Mod>.ret; 421 let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 422 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 423 getVregSrcForVT<Src2VT>.ret:$src2, // stub argument 424 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 425 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 426 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 427 let InsVOP3Base = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, RegisterOperand<VGPR_32>, 3, 428 0, HasModifiers, HasModifiers, HasOMod, 429 Src0ModVOP3DPP, Src1ModVOP3DPP, Src2Mod, HasOpSel, 0/*IsVOP3P*/>.ret; 430 // We need a dummy src2 tied to dst to track the use of that register for s_delay_alu 431 let InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X, VGPRSrc_32:$src2X); 432 let InsVOPDXDeferred = 433 (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X, 434 VGPR_32:$vsrc1X, VGPRSrc_32:$src2X); 435 let InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y, VGPRSrc_32:$src2Y); 436 let InsVOPDYDeferred = 437 (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y, 438 VGPR_32:$vsrc1Y, VGPRSrc_32:$src2Y); 439 440 let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 441 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 442 getVregSrcForVT<Src2VT>.ret:$src2, // stub argument 443 dpp8:$dpp8, FI:$fi); 444 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 445 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, 446 getVregSrcForVT<Src2VT>.ret:$src2, // stub argument 447 clampmod:$clamp, omod:$omod, 448 dst_sel:$dst_sel, dst_unused:$dst_unused, 449 src0_sel:$src0_sel, src1_sel:$src1_sel); 450 let Asm32 = getAsm32<1, 2, vt0>.ret; 451 let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt0>.ret; 452 let AsmDPP16 = getAsmDPP16<1, 2, HasModifiers, vt0>.ret; 453 let AsmDPP8 = getAsmDPP8<1, 2, 0, vt0>.ret; 454 let AsmSDWA = getAsmSDWA<1, 2, vt0>.ret; 455 let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt0>.ret; 456 let AsmVOP3Base = 457 getAsmVOP3Base<2 /*NumSrcArgs*/, HasDst, HasClamp, 458 HasOpSel, HasOMod, IsVOP3P, HasModifiers, 459 HasModifiers, HasModifiers, 460 0 /*Src2HasMods*/, DstVT>.ret; 461 let HasSrc2 = 0; 462 let HasSrc2Mods = 0; 463 464 let HasExt = 1; 465 let HasExtDPP = 1; 466 let HasExt32BitDPP = 1; 467 let HasExtSDWA = 1; 468 let HasExtSDWA9 = 0; 469 let TieRegDPP = "$src2"; 470} 471 472def VOP_MAC_F16 : VOP_MAC <f16>; 473def VOP_MAC_F16_t16 : VOP_MAC <f16> { 474 let IsTrue16 = 1; 475 let HasOpSel = 1; 476 let AsmVOP3OpSel = getAsmVOP3OpSel<2/*NumSrcArgs*/, HasClamp, HasOMod, 477 HasSrc0FloatMods, HasSrc1FloatMods, HasSrc2FloatMods>.ret; 478 let DstRC = VOPDstOperand<VGPR_32_Lo128>; 479 let DstRC64 = VOPDstOperand<VGPR_32>; 480 let Src1RC32 = VGPRSrc_32_Lo128; 481 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, getVregSrcForVT_t16<Src2VT>.ret:$src2); 482 let Src0DPP = getVregSrcForVT_t16<Src0VT>.ret; 483 let Src1DPP = getVregSrcForVT_t16<Src1VT>.ret; 484 let Src2DPP = getVregSrcForVT_t16<Src2VT>.ret; 485 let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret; 486 let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret; 487 let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret; 488 let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 489 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 490 getVregSrcForVT_t16<Src2VT>.ret:$src2, // stub argument 491 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 492 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 493 let InsDPP8 = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0, 494 Src1ModDPP:$src1_modifiers, Src1DPP:$src1, 495 getVregSrcForVT_t16<Src2VT>.ret:$src2, // stub argument 496 dpp8:$dpp8, FI:$fi); 497 let Src2Mod = FP32InputMods; // dummy unused modifiers 498 let Src2RC64 = VGPRSrc_32; // stub argument 499} 500def VOP_MAC_F32 : VOP_MAC <f32>; 501let HasExtDPP = 0, HasExt32BitDPP = 0 in 502def VOP_MAC_LEGACY_F32 : VOP_MAC <f32>; 503let HasExtSDWA = 0, HasExt32BitDPP = 0, HasExt64BitDPP = 1 in 504def VOP_MAC_F64 : VOP_MAC <f64>; 505 506class VOP_DOT_ACC<ValueType vt0, ValueType vt1> : VOP_MAC<vt0, vt1> { 507 let HasClamp = 0; 508 let HasExtSDWA = 0; 509 let HasOpSel = 0; 510 let IsPacked = 0; 511} 512 513def VOP_DOT_ACC_F32_V2F16 : VOP_DOT_ACC<f32, v2f16> { 514 let Src0ModDPP = FPVRegInputMods; 515 let Src1ModDPP = FPVRegInputMods; 516 let HasClamp = 1; 517} 518 519def VOP_DOT_ACC_I32_I32 : VOP_DOT_ACC<i32, i32> { 520 let HasExtVOP3DPP = 0; 521 let HasSrc0Mods = 1; 522 let HasSrc1Mods = 1; 523 let HasClamp = 1; 524 525 let Src0Mod = Int32InputMods; 526 let Src1Mod = Int32InputMods; 527 let Ins64 = getIns64<Src0RC64, Src1RC64, getRegisterOperandForVT<Src2VT>.ret, 528 3 /*NumSrcArgs*/, HasClamp, 1 /*HasModifiers*/, 529 1 /*HasSrc2Mods*/, HasOMod, 530 Src0Mod, Src1Mod, Src2Mod>.ret; 531 let Asm64 = "$vdst, $src0, $src1$clamp"; 532} 533 534// Write out to vcc or arbitrary SGPR. 535def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped], /*EnableClamp=*/1> { 536 let Asm32 = "$vdst, vcc, $src0, $src1"; 537 let AsmVOP3Base = "$vdst, $sdst, $src0, $src1$clamp"; 538 let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 539 let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 540 let AsmDPP = "$vdst, vcc, $src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 541 let AsmDPP8 = "$vdst, vcc, $src0, $src1 $dpp8$fi"; 542 let AsmDPP16 = AsmDPP#"$fi"; 543 let InsDPP = (ins DstRCDPP:$old, 544 Src0DPP:$src0, 545 Src1DPP:$src1, 546 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 547 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 548 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 549 let InsDPP8 = (ins DstRCDPP:$old, 550 Src0DPP:$src0, 551 Src1DPP:$src1, 552 dpp8:$dpp8, FI:$fi); 553 let Outs32 = (outs DstRC:$vdst); 554 let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); 555 let OutsVOP3DPP = Outs64; 556 let OutsVOP3DPP8 = Outs64; 557} 558 559// Write out to vcc or arbitrary SGPR and read in from vcc or 560// arbitrary SGPR. 561def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableClamp=*/1> { 562 let HasSrc2Mods = 0; 563 let Asm32 = "$vdst, vcc, $src0, $src1, vcc"; 564 let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 565 let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 566 let AsmDPP = "$vdst, vcc, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 567 let AsmDPP8 = "$vdst, vcc, $src0, $src1, vcc $dpp8$fi"; 568 let AsmDPP16 = AsmDPP#"$fi"; 569 let Outs32 = (outs DstRC:$vdst); 570 let Outs64 = (outs DstRC:$vdst, VOPDstS64orS32:$sdst); 571 let AsmVOP3Base = "$vdst, $sdst, $src0, $src1, $src2$clamp"; 572 let OutsVOP3DPP = Outs64; 573 let OutsVOP3DPP8 = Outs64; 574 575 // Suppress src2 implied by type since the 32-bit encoding uses an 576 // implicit VCC use. 577 let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); 578 579 let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, 580 Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, 581 clampmod:$clamp, 582 dst_sel:$dst_sel, dst_unused:$dst_unused, 583 src0_sel:$src0_sel, src1_sel:$src1_sel); 584 585 let InsDPP = (ins DstRCDPP:$old, 586 Src0DPP:$src0, 587 Src1DPP:$src1, 588 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 589 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 590 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 591 let InsDPP8 = (ins DstRCDPP:$old, 592 Src0DPP:$src0, 593 Src1DPP:$src1, 594 dpp8:$dpp8, FI:$fi); 595 596 let HasExt = 1; 597 let HasExtDPP = 1; 598 let HasExt32BitDPP = 1; 599 let HasExtSDWA = 1; 600 let HasExtSDWA9 = 1; 601} 602 603// Read in from vcc or arbitrary SGPR. 604class VOP2e_SGPR<list<ValueType> ArgVT> : VOPProfile<ArgVT> { 605 let Asm32 = "$vdst, $src0, $src1"; 606 let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 607 let AsmSDWA9 = "$vdst, $src0_modifiers, $src1_modifiers, vcc$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; 608 let AsmDPP = "$vdst, $src0_modifiers, $src1_modifiers, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 609 let AsmDPP8 = "$vdst, $src0, $src1, vcc $dpp8$fi"; 610 let AsmDPP16 = AsmDPP#"$fi"; 611 let AsmVOP3Base = "$vdst, $src0_modifiers, $src1_modifiers, $src2"; 612 613 let Outs32 = (outs DstRC:$vdst); 614 let Outs64 = (outs DstRC:$vdst); 615 616 // Suppress src2 implied by type since the 32-bit encoding uses an 617 // implicit VCC use. 618 let Ins32 = (ins VSrc_f32:$src0, Src1RC32:$src1); 619 620 let HasModifiers = 1; 621 622 // Select FP modifiers for VOP3 623 let Src0Mod = !if(!eq(Src0VT.Size, 16), FP16InputMods, FP32InputMods); 624 let Src1Mod = Src0Mod; 625 626 let HasSrc0IntMods = 0; 627 let HasSrc1IntMods = 0; 628 let HasSrc0FloatMods = 1; 629 let HasSrc1FloatMods = 1; 630 let InsSDWA = (ins FP32SDWAInputMods:$src0_modifiers, SDWASrc_f32:$src0, 631 FP32SDWAInputMods:$src1_modifiers, SDWASrc_f32:$src1, 632 clampmod:$clamp, 633 dst_sel:$dst_sel, dst_unused:$dst_unused, 634 src0_sel:$src0_sel, src1_sel:$src1_sel); 635 636 let InsDPP = (ins DstRCDPP:$old, 637 FPVRegInputMods:$src0_modifiers, Src0DPP:$src0, 638 FPVRegInputMods:$src1_modifiers, Src1DPP:$src1, 639 dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, 640 bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); 641 let InsDPP16 = !con(InsDPP, (ins FI:$fi)); 642 let InsDPP8 = (ins DstRCDPP:$old, 643 FPVRegInputMods:$src0_modifiers, Src0DPP:$src0, 644 FPVRegInputMods:$src1_modifiers, Src1DPP:$src1, 645 dpp8:$dpp8, FI:$fi); 646 647 let Src0ModVOP3DPP = FPVRegInputMods; 648 let Src1ModVOP3DPP = FPVRegInputMods; 649 650 let HasExt = 1; 651 let HasExtDPP = 1; 652 let HasExt32BitDPP = 1; 653 let HasExtSDWA = 1; 654 let HasExtSDWA9 = 1; 655} 656 657def VOP2e_I32_I32_I32_I1 : VOP2e_SGPR<[i32, i32, i32, i1]>; 658def VOP2e_I16_I16_I16_I1 : VOP2e_SGPR<[i16, i16, i16, i1]>; 659 660def VOP_READLANE : VOPProfile<[i32, i32, i32, untyped]> { 661 let Outs32 = (outs SReg_32:$vdst); 662 let Outs64 = Outs32; 663 let Ins32 = (ins VRegOrLds_32:$src0, SCSrc_b32:$src1); 664 let Ins64 = Ins32; 665 let Asm32 = " $vdst, $src0, $src1"; 666 let Asm64 = Asm32; 667 668 let HasExt = 0; 669 let HasExtDPP = 0; 670 let HasExt32BitDPP = 0; 671 let HasExt64BitDPP = 0; 672 let HasExtSDWA = 0; 673 let HasExtSDWA9 = 0; 674} 675 676def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> { 677 let Outs32 = (outs VGPR_32:$vdst); 678 let Outs64 = Outs32; 679 let Ins32 = (ins SCSrc_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in); 680 let Ins64 = Ins32; 681 let Asm32 = " $vdst, $src0, $src1"; 682 let Asm64 = Asm32; 683 let HasSrc2 = 0; 684 let HasSrc2Mods = 0; 685 686 let HasExt = 0; 687 let HasExtDPP = 0; 688 let HasExt32BitDPP = 0; 689 let HasExt64BitDPP = 0; 690 let HasExtSDWA = 0; 691 let HasExtSDWA9 = 0; 692} 693 694//===----------------------------------------------------------------------===// 695// VOP2 Instructions 696//===----------------------------------------------------------------------===// 697 698let SubtargetPredicate = isGFX11Plus in 699defm V_CNDMASK_B16 : VOP2eInst <"v_cndmask_b16", VOP2e_I16_I16_I16_I1>; 700defm V_CNDMASK_B32 : VOP2eInst_VOPD <"v_cndmask_b32", VOP2e_I32_I32_I32_I1, 0x9, "v_cndmask_b32">; 701let SubtargetPredicate = HasMadMacF32Insts, isReMaterializable = 1 in 702def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>; 703 704let isCommutable = 1 in { 705let isReMaterializable = 1 in { 706defm V_ADD_F32 : VOP2Inst_VOPD <"v_add_f32", VOP_F32_F32_F32, 0x4, "v_add_f32", any_fadd>; 707defm V_SUB_F32 : VOP2Inst_VOPD <"v_sub_f32", VOP_F32_F32_F32, 0x5, "v_sub_f32", any_fsub>; 708defm V_SUBREV_F32 : VOP2Inst_VOPD <"v_subrev_f32", VOP_F32_F32_F32, 0x6, "v_subrev_f32", null_frag, "v_sub_f32">; 709defm V_MUL_LEGACY_F32 : VOP2Inst_VOPD <"v_mul_legacy_f32", VOP_F32_F32_F32, 0x7, "v_mul_dx9_zero_f32", AMDGPUfmul_legacy>; 710defm V_MUL_F32 : VOP2Inst_VOPD <"v_mul_f32", VOP_F32_F32_F32, 0x3, "v_mul_f32", any_fmul>; 711defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32_ARITH, AMDGPUmul_i24>; 712defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_I32_I32_I32, AMDGPUmulhi_i24>; 713defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32_ARITH, AMDGPUmul_u24>; 714defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>; 715defm V_MIN_F32 : VOP2Inst_VOPD <"v_min_f32", VOP_F32_F32_F32, 0xb, "v_min_f32", fminnum_like>; 716defm V_MAX_F32 : VOP2Inst_VOPD <"v_max_f32", VOP_F32_F32_F32, 0xa, "v_max_f32", fmaxnum_like>; 717defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smin>; 718defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smax>; 719defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>; 720defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umax>; 721defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, clshr_rev_32, "v_lshr_b32">; 722defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, cashr_rev_32, "v_ashr_i32">; 723defm V_LSHLREV_B32 : VOP2Inst_VOPD <"v_lshlrev_b32", VOP_I32_I32_I32, 0x11, "v_lshlrev_b32", clshl_rev_32, "v_lshl_b32">; 724defm V_AND_B32 : VOP2Inst_VOPD <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, 0x12, "v_and_b32", and>; 725defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>; 726defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, xor>; 727} // End isReMaterializable = 1 728 729let mayRaiseFPException = 0 in { 730let OtherPredicates = [HasMadMacF32Insts] in { 731let Constraints = "$vdst = $src2", DisableEncoding="$src2", 732 isConvertibleToThreeAddress = 1 in { 733defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>; 734 735let SubtargetPredicate = isGFX6GFX7GFX10 in 736defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_MAC_LEGACY_F32>; 737} // End Constraints = "$vdst = $src2", DisableEncoding="$src2", 738 // isConvertibleToThreeAddress = 1 739 740let isReMaterializable = 1 in 741def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, []>; 742} // End OtherPredicates = [HasMadMacF32Insts] 743} // End mayRaiseFPException = 0 744 745// No patterns so that the scalar instructions are always selected. 746// The scalar versions will be replaced with vector when needed later. 747defm V_ADD_CO_U32 : VOP2bInst <"v_add_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_add_co_u32", 1>; 748defm V_SUB_CO_U32 : VOP2bInst <"v_sub_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32", 1>; 749defm V_SUBREV_CO_U32 : VOP2bInst <"v_subrev_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32", 1>; 750defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_addc_u32", 1>; 751defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>; 752defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>; 753 754 755let SubtargetPredicate = HasAddNoCarryInsts, isReMaterializable = 1 in { 756defm V_ADD_U32 : VOP2Inst_VOPD <"v_add_u32", VOP_I32_I32_I32_ARITH, 0x10, "v_add_nc_u32", null_frag, "v_add_u32", 1>; 757defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>; 758defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, "v_sub_u32", 1>; 759} 760 761} // End isCommutable = 1 762 763// These are special and do not read the exec mask. 764let isConvergent = 1, Uses = []<Register> in { 765def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE, 766 [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))]>; 767 768let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in { 769def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, 770 [(set i32:$vdst, (int_amdgcn_writelane i32:$src0, i32:$src1, i32:$vdst_in))]>; 771} // End $vdst = $vdst_in, DisableEncoding $vdst_in 772} // End isConvergent = 1 773 774let isReMaterializable = 1 in { 775defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_I32_I32_I32>; 776defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_I32_I32_I32, add_ctpop>; 777defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_lo>; 778defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_hi>; 779defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, AMDGPUldexp>; 780 781let ReadsModeReg = 0, mayRaiseFPException = 0 in { 782defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_i16_f32>; 783defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_u16_f32>; 784} 785 786defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_V2F16_F32_F32, AMDGPUpkrtz_f16_f32>; 787defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_V2I16_I32_I32, AMDGPUpk_u16_u32>; 788defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_V2I16_I32_I32, AMDGPUpk_i16_i32>; 789 790 791let SubtargetPredicate = isGFX6GFX7 in { 792defm V_MIN_LEGACY_F32 : VOP2Inst <"v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy>; 793defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>; 794} // End SubtargetPredicate = isGFX6GFX7 795 796let isCommutable = 1 in { 797let SubtargetPredicate = isGFX6GFX7 in { 798defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, csrl_32>; 799defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, csra_32>; 800defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, cshl_32>; 801} // End SubtargetPredicate = isGFX6GFX7 802} // End isCommutable = 1 803} // End isReMaterializable = 1 804 805defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_NO_EXT<VOP_I32_F32_I32>>; // TODO: set "Uses = dst" 806 807class DivergentBinOp<SDPatternOperator Op, VOP_Pseudo Inst> : 808 GCNPat< 809 (DivergentBinFrag<Op> Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1), 810 !if(!cast<Commutable_REV>(Inst).IsOrig, 811 (Inst $src0, $src1), 812 (Inst $src1, $src0) 813 ) 814 >; 815 816class DivergentClampingBinOp<SDPatternOperator Op, VOP_Pseudo Inst> : 817 GCNPat< 818 (DivergentBinFrag<Op> Inst.Pfl.Src0VT:$src0, Inst.Pfl.Src1VT:$src1), 819 !if(!cast<Commutable_REV>(Inst).IsOrig, 820 (Inst $src0, $src1, 0), 821 (Inst $src1, $src0, 0) 822 ) 823 >; 824 825def : DivergentBinOp<csrl_32, V_LSHRREV_B32_e64>; 826def : DivergentBinOp<csra_32, V_ASHRREV_I32_e64>; 827def : DivergentBinOp<cshl_32, V_LSHLREV_B32_e64>; 828 829let SubtargetPredicate = HasAddNoCarryInsts in { 830 def : DivergentClampingBinOp<add, V_ADD_U32_e64>; 831 def : DivergentClampingBinOp<sub, V_SUB_U32_e64>; 832} 833 834let SubtargetPredicate = isGFX6GFX7GFX8GFX9, Predicates = [isGFX6GFX7GFX8GFX9] in { 835def : DivergentClampingBinOp<add, V_ADD_CO_U32_e64>; 836def : DivergentClampingBinOp<sub, V_SUB_CO_U32_e64>; 837} 838 839def : DivergentBinOp<adde, V_ADDC_U32_e32>; 840def : DivergentBinOp<sube, V_SUBB_U32_e32>; 841 842class divergent_i64_BinOp <SDPatternOperator Op, Instruction Inst> : 843 GCNPat< 844 (DivergentBinFrag<Op> i64:$src0, i64:$src1), 845 (REG_SEQUENCE VReg_64, 846 (Inst 847 (i32 (EXTRACT_SUBREG $src0, sub0)), 848 (i32 (EXTRACT_SUBREG $src1, sub0)) 849 ), sub0, 850 (Inst 851 (i32 (EXTRACT_SUBREG $src0, sub1)), 852 (i32 (EXTRACT_SUBREG $src1, sub1)) 853 ), sub1 854 ) 855 >; 856 857def : divergent_i64_BinOp <and, V_AND_B32_e64>; 858def : divergent_i64_BinOp <or, V_OR_B32_e64>; 859def : divergent_i64_BinOp <xor, V_XOR_B32_e64>; 860 861//===----------------------------------------------------------------------===// 862// 16-Bit Operand Instructions 863//===----------------------------------------------------------------------===// 864 865def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16<VOP_F16_F16_I32> { 866 // The ldexp.f16 intrinsic expects a i32 src1 operand, though the hardware 867 // encoding treats src1 as an f16 868 let Src1RC32 = RegisterOperand<VGPR_32_Lo128>; 869 let Src1DPP = VGPR_32_Lo128; 870 let Src1ModDPP = IntT16VRegInputMods; 871} 872 873let isReMaterializable = 1 in { 874let FPDPRounding = 1 in { 875 let SubtargetPredicate = NotHasTrue16BitInsts, OtherPredicates = [Has16BitInsts] in 876 defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I32, AMDGPUldexp>; 877 let SubtargetPredicate = HasTrue16BitInsts in 878 defm V_LDEXP_F16_t16 : VOP2Inst <"v_ldexp_f16_t16", LDEXP_F16_VOPProfile_True16, AMDGPUldexp>; 879} // End FPDPRounding = 1 880// FIXME VOP3 Only instructions. NFC using VOPProfile_True16 for these until a planned change to use a new register class for VOP3 encoded True16 instuctions 881defm V_LSHLREV_B16 : VOP2Inst_e64_t16 <"v_lshlrev_b16", VOP_I16_I16_I16, clshl_rev_16>; 882defm V_LSHRREV_B16 : VOP2Inst_e64_t16 <"v_lshrrev_b16", VOP_I16_I16_I16, clshr_rev_16>; 883defm V_ASHRREV_I16 : VOP2Inst_e64_t16 <"v_ashrrev_i16", VOP_I16_I16_I16, cashr_rev_16>; 884let isCommutable = 1 in { 885let FPDPRounding = 1 in { 886defm V_ADD_F16 : VOP2Inst_t16 <"v_add_f16", VOP_F16_F16_F16, any_fadd>; 887defm V_SUB_F16 : VOP2Inst_t16 <"v_sub_f16", VOP_F16_F16_F16, any_fsub>; 888defm V_SUBREV_F16 : VOP2Inst_t16 <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">; 889defm V_MUL_F16 : VOP2Inst_t16 <"v_mul_f16", VOP_F16_F16_F16, any_fmul>; 890} // End FPDPRounding = 1 891defm V_MUL_LO_U16 : VOP2Inst_e64_t16 <"v_mul_lo_u16", VOP_I16_I16_I16, mul>; 892defm V_MAX_F16 : VOP2Inst_t16 <"v_max_f16", VOP_F16_F16_F16, fmaxnum_like>; 893defm V_MIN_F16 : VOP2Inst_t16 <"v_min_f16", VOP_F16_F16_F16, fminnum_like>; 894defm V_MAX_U16 : VOP2Inst_e64_t16 <"v_max_u16", VOP_I16_I16_I16, umax>; 895defm V_MAX_I16 : VOP2Inst_e64_t16 <"v_max_i16", VOP_I16_I16_I16, smax>; 896defm V_MIN_U16 : VOP2Inst_e64_t16 <"v_min_u16", VOP_I16_I16_I16, umin>; 897defm V_MIN_I16 : VOP2Inst_e64_t16 <"v_min_i16", VOP_I16_I16_I16, smin>; 898} // End isCommutable = 1 899} // End isReMaterializable = 1 900 901let SubtargetPredicate = isGFX11Plus in { 902 let isCommutable = 1 in { 903 defm V_AND_B16_t16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, and>; 904 defm V_OR_B16_t16 : VOP2Inst_e64 <"v_or_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, or>; 905 defm V_XOR_B16_t16 : VOP2Inst_e64 <"v_xor_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, xor>; 906 } // End isCommutable = 1 907} // End SubtargetPredicate = isGFX11Plus 908 909let FPDPRounding = 1, isReMaterializable = 1 in { 910let SubtargetPredicate = isGFX10Plus, OtherPredicates = [NotHasTrue16BitInsts] in { 911def V_FMAMK_F16 : VOP2_Pseudo <"v_fmamk_f16", VOP_MADMK_F16, [], "">; 912} 913let SubtargetPredicate = HasTrue16BitInsts in { 914def V_FMAMK_F16_t16 : VOP2_Pseudo <"v_fmamk_f16_t16", VOP_MADMK_F16_t16, [], "">; 915} 916 917let isCommutable = 1 in { 918let SubtargetPredicate = isGFX10Plus, OtherPredicates = [NotHasTrue16BitInsts] in { 919def V_FMAAK_F16 : VOP2_Pseudo <"v_fmaak_f16", VOP_MADAK_F16, [], "">; 920} 921let SubtargetPredicate = HasTrue16BitInsts in { 922def V_FMAAK_F16_t16 : VOP2_Pseudo <"v_fmaak_f16_t16", VOP_MADAK_F16_t16, [], "">; 923} 924} // End isCommutable = 1 925} // End FPDPRounding = 1, isReMaterializable = 1 926 927let Constraints = "$vdst = $src2", 928 DisableEncoding="$src2", 929 isConvertibleToThreeAddress = 1, 930 isCommutable = 1 in { 931let SubtargetPredicate = isGFX10Plus, OtherPredicates = [NotHasTrue16BitInsts] in { 932defm V_FMAC_F16 : VOP2Inst <"v_fmac_f16", VOP_MAC_F16>; 933} 934let SubtargetPredicate = HasTrue16BitInsts in { 935defm V_FMAC_F16_t16 : VOP2Inst <"v_fmac_f16_t16", VOP_MAC_F16_t16>; 936} 937} // End FMAC Constraints 938 939let SubtargetPredicate = Has16BitInsts in { 940let isReMaterializable = 1 in { 941let FPDPRounding = 1 in { 942def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">; 943} // End FPDPRounding = 1 944let isCommutable = 1 in { 945let mayRaiseFPException = 0 in { 946def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16, [], "">; 947} 948let SubtargetPredicate = isGFX8GFX9 in { 949 defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16_ARITH, add>; 950 defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16_ARITH, sub>; 951 defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16_ARITH, null_frag, "v_sub_u16">; 952} 953} // End isCommutable = 1 954} // End isReMaterializable = 1 955 956// FIXME: Missing FPDPRounding 957let Constraints = "$vdst = $src2", DisableEncoding="$src2", 958 isConvertibleToThreeAddress = 1, isCommutable = 1 in { 959defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>; 960} 961} // End SubtargetPredicate = Has16BitInsts 962 963 964let SubtargetPredicate = HasDLInsts in { 965 966let isReMaterializable = 1 in 967defm V_XNOR_B32 : VOP2Inst <"v_xnor_b32", VOP_I32_I32_I32, xnor>; 968 969def : GCNPat< 970 (i32 (DivergentUnaryFrag<not> (xor_oneuse i32:$src0, i32:$src1))), 971 (i32 (V_XNOR_B32_e64 $src0, $src1)) 972>; 973 974def : GCNPat< 975 (i32 (DivergentBinFrag<xor_oneuse> (not i32:$src0), i32:$src1)), 976 (i32 (V_XNOR_B32_e64 $src0, $src1)) 977>; 978 979def : GCNPat< 980 (i64 (DivergentUnaryFrag<not> (xor_oneuse i64:$src0, i64:$src1))), 981 (REG_SEQUENCE VReg_64, (i32 (V_XNOR_B32_e64 982 (i32 (EXTRACT_SUBREG $src0, sub0)), 983 (i32 (EXTRACT_SUBREG $src1, sub0)))), sub0, 984 (i32 (V_XNOR_B32_e64 985 (i32 (EXTRACT_SUBREG $src0, sub1)), 986 (i32 (EXTRACT_SUBREG $src1, sub1)))), sub1) 987>; 988 989def : GCNPat< 990 (i64 (DivergentBinFrag<xor_oneuse> (not i64:$src0), i64:$src1)), 991 (REG_SEQUENCE VReg_64, (i32 (V_XNOR_B32_e64 992 (i32 (EXTRACT_SUBREG $src0, sub0)), 993 (i32 (EXTRACT_SUBREG $src1, sub0)))), sub0, 994 (i32 (V_XNOR_B32_e64 995 (i32 (EXTRACT_SUBREG $src0, sub1)), 996 (i32 (EXTRACT_SUBREG $src1, sub1)))), sub1) 997>; 998 999let Constraints = "$vdst = $src2", 1000 DisableEncoding = "$src2", 1001 isConvertibleToThreeAddress = 1, 1002 isCommutable = 1 in 1003defm V_FMAC_F32 : VOP2Inst_VOPD <"v_fmac_f32", VOP_MAC_F32, 0x0, "v_fmac_f32">; 1004} // End SubtargetPredicate = HasDLInsts 1005 1006let SubtargetPredicate = HasFmaLegacy32 in { 1007 1008let Constraints = "$vdst = $src2", 1009 DisableEncoding = "$src2", 1010 isConvertibleToThreeAddress = 1, 1011 isCommutable = 1 in 1012defm V_FMAC_LEGACY_F32 : VOP2Inst <"v_fmac_legacy_f32", VOP_MAC_LEGACY_F32>; 1013 1014} // End SubtargetPredicate = HasFmaLegacy32 1015 1016let SubtargetPredicate = HasFmacF64Inst, 1017 Constraints = "$vdst = $src2", 1018 DisableEncoding="$src2", 1019 isConvertibleToThreeAddress = 1, 1020 isCommutable = 1, 1021 SchedRW = [WriteDoubleAdd] in 1022defm V_FMAC_F64 : VOP2Inst <"v_fmac_f64", VOP_MAC_F64>; 1023 1024let Constraints = "$vdst = $src2", 1025 DisableEncoding="$src2", 1026 isConvertibleToThreeAddress = 1, 1027 isCommutable = 1, 1028 IsDOT = 1 in { 1029 let SubtargetPredicate = HasDot5Insts in 1030 defm V_DOT2C_F32_F16 : VOP2Inst_VOPD<"v_dot2c_f32_f16", VOP_DOT_ACC_F32_V2F16, 0xc, "v_dot2acc_f32_f16">; 1031 let SubtargetPredicate = HasDot6Insts in 1032 defm V_DOT4C_I32_I8 : VOP2Inst<"v_dot4c_i32_i8", VOP_DOT_ACC_I32_I32>; 1033 1034 let SubtargetPredicate = HasDot4Insts in 1035 defm V_DOT2C_I32_I16 : VOP2Inst<"v_dot2c_i32_i16", VOP_DOT_ACC_I32_I32>; 1036 let SubtargetPredicate = HasDot3Insts in 1037 defm V_DOT8C_I32_I4 : VOP2Inst<"v_dot8c_i32_i4", VOP_DOT_ACC_I32_I32>; 1038} 1039 1040let AddedComplexity = 30 in { 1041 def : GCNPat< 1042 (f32 (AMDGPUfdot2 v2f16:$src0, v2f16:$src1, f32:$src2, (i1 DSTCLAMP.NONE))), 1043 (f32 (V_DOT2C_F32_F16_e32 $src0, $src1, $src2)) 1044 > { 1045 let SubtargetPredicate = HasDot5Insts; 1046 } 1047 def : GCNPat< 1048 (i32 (int_amdgcn_sdot4 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 1049 (i32 (V_DOT4C_I32_I8_e32 $src0, $src1, $src2)) 1050 > { 1051 let SubtargetPredicate = HasDot6Insts; 1052 } 1053 def : GCNPat< 1054 (i32 (int_amdgcn_sdot2 v2i16:$src0, v2i16:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 1055 (i32 (V_DOT2C_I32_I16_e32 $src0, $src1, $src2)) 1056 > { 1057 let SubtargetPredicate = HasDot4Insts; 1058 } 1059 def : GCNPat< 1060 (i32 (int_amdgcn_sdot8 i32:$src0, i32:$src1, i32:$src2, (i1 DSTCLAMP.NONE))), 1061 (i32 (V_DOT8C_I32_I4_e32 $src0, $src1, $src2)) 1062 > { 1063 let SubtargetPredicate = HasDot3Insts; 1064 } 1065} // End AddedComplexity = 30 1066 1067let SubtargetPredicate = HasFmaakFmamkF32Insts, isReMaterializable = 1 in { 1068def V_FMAMK_F32 : VOP2_Pseudo<"v_fmamk_f32", VOP_MADMK_F32, [], "">, VOPD_Component<0x2, "v_fmamk_f32">; 1069 1070let isCommutable = 1 in 1071def V_FMAAK_F32 : VOP2_Pseudo<"v_fmaak_f32", VOP_MADAK_F32, [], "">, VOPD_Component<0x1, "v_fmaak_f32">; 1072} 1073 1074let SubtargetPredicate = HasPkFmacF16Inst in { 1075defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>; 1076} // End SubtargetPredicate = HasPkFmacF16Inst 1077 1078// Note: 16-bit instructions produce a 0 result in the high 16-bits 1079// on GFX8 and GFX9 and preserve high 16 bits on GFX10+ 1080multiclass Arithmetic_i16_0Hi_Pats <SDPatternOperator op, Instruction inst> { 1081 1082def : GCNPat< 1083 (i32 (zext (op i16:$src0, i16:$src1))), 1084 (inst VSrc_b16:$src0, VSrc_b16:$src1) 1085>; 1086 1087def : GCNPat< 1088 (i64 (zext (op i16:$src0, i16:$src1))), 1089 (REG_SEQUENCE VReg_64, 1090 (inst $src0, $src1), sub0, 1091 (V_MOV_B32_e32 (i32 0)), sub1) 1092>; 1093} 1094 1095class ZExt_i16_i1_Pat <SDNode ext> : GCNPat < 1096 (i16 (ext i1:$src)), 1097 (V_CNDMASK_B32_e64 (i32 0/*src0mod*/), (i32 0/*src0*/), 1098 (i32 0/*src1mod*/), (i32 1/*src1*/), 1099 $src) 1100>; 1101 1102foreach vt = [i16, v2i16] in { 1103def : GCNPat < 1104 (and vt:$src0, vt:$src1), 1105 (V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 1106>; 1107 1108def : GCNPat < 1109 (or vt:$src0, vt:$src1), 1110 (V_OR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 1111>; 1112 1113def : GCNPat < 1114 (xor vt:$src0, vt:$src1), 1115 (V_XOR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) 1116>; 1117} 1118 1119let Predicates = [Has16BitInsts, isGFX8GFX9] in { 1120 1121// Undo sub x, c -> add x, -c canonicalization since c is more likely 1122// an inline immediate than -c. 1123// TODO: Also do for 64-bit. 1124def : GCNPat< 1125 (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)), 1126 (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1) 1127>; 1128 1129def : GCNPat< 1130 (i32 (zext (add i16:$src0, (i16 NegSubInlineIntConst16:$src1)))), 1131 (V_SUB_U16_e64 VSrc_b16:$src0, NegSubInlineIntConst16:$src1) 1132>; 1133 1134defm : Arithmetic_i16_0Hi_Pats<add, V_ADD_U16_e64>; 1135defm : Arithmetic_i16_0Hi_Pats<mul, V_MUL_LO_U16_e64>; 1136defm : Arithmetic_i16_0Hi_Pats<sub, V_SUB_U16_e64>; 1137defm : Arithmetic_i16_0Hi_Pats<smin, V_MIN_I16_e64>; 1138defm : Arithmetic_i16_0Hi_Pats<smax, V_MAX_I16_e64>; 1139defm : Arithmetic_i16_0Hi_Pats<umin, V_MIN_U16_e64>; 1140defm : Arithmetic_i16_0Hi_Pats<umax, V_MAX_U16_e64>; 1141defm : Arithmetic_i16_0Hi_Pats<clshl_rev_16, V_LSHLREV_B16_e64>; 1142defm : Arithmetic_i16_0Hi_Pats<clshr_rev_16, V_LSHRREV_B16_e64>; 1143defm : Arithmetic_i16_0Hi_Pats<cashr_rev_16, V_ASHRREV_I16_e64>; 1144 1145} // End Predicates = [Has16BitInsts, isGFX8GFX9] 1146 1147let Predicates = [Has16BitInsts] in { 1148 1149def : ZExt_i16_i1_Pat<zext>; 1150def : ZExt_i16_i1_Pat<anyext>; 1151 1152def : GCNPat < 1153 (i16 (sext i1:$src)), 1154 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0), 1155 /*src1mod*/(i32 0), /*src1*/(i32 -1), $src) 1156>; 1157 1158} // End Predicates = [Has16BitInsts] 1159 1160 1161let SubtargetPredicate = HasIntClamp in { 1162// Set clamp bit for saturation. 1163def : VOPBinOpClampPat<uaddsat, V_ADD_CO_U32_e64, i32>; 1164def : VOPBinOpClampPat<usubsat, V_SUB_CO_U32_e64, i32>; 1165} 1166 1167let SubtargetPredicate = HasAddNoCarryInsts, OtherPredicates = [HasIntClamp] in { 1168let AddedComplexity = 1 in { // Prefer over form with carry-out. 1169def : VOPBinOpClampPat<uaddsat, V_ADD_U32_e64, i32>; 1170def : VOPBinOpClampPat<usubsat, V_SUB_U32_e64, i32>; 1171} 1172} 1173 1174let SubtargetPredicate = Has16BitInsts, OtherPredicates = [HasIntClamp] in { 1175def : VOPBinOpClampPat<uaddsat, V_ADD_U16_e64, i16>; 1176def : VOPBinOpClampPat<usubsat, V_SUB_U16_e64, i16>; 1177} 1178 1179//===----------------------------------------------------------------------===// 1180// DPP Encodings 1181//===----------------------------------------------------------------------===// 1182 1183class VOP2_DPP<bits<6> op, VOP2_DPP_Pseudo ps, 1184 string opName = ps.OpName, VOPProfile p = ps.Pfl, 1185 bit IsDPP16 = 0> : 1186 VOP_DPP<opName, p, IsDPP16> { 1187 let hasSideEffects = ps.hasSideEffects; 1188 let Defs = ps.Defs; 1189 let SchedRW = ps.SchedRW; 1190 let Uses = ps.Uses; 1191 1192 bits<8> vdst; 1193 bits<8> src1; 1194 let Inst{8-0} = 0xfa; 1195 let Inst{16-9} = !if(p.HasSrc1, src1{7-0}, 0); 1196 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 1197 let Inst{30-25} = op; 1198 let Inst{31} = 0x0; 1199} 1200 1201class Base_VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, 1202 string opName = ps.OpName, VOPProfile p = ps.Pfl> : 1203 VOP2_DPP<op, ps, opName, p, 1> { 1204 let AssemblerPredicate = HasDPP16; 1205 let SubtargetPredicate = HasDPP16; 1206 let OtherPredicates = ps.OtherPredicates; 1207} 1208 1209class VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, int subtarget, 1210 string opName = ps.OpName, VOPProfile p = ps.Pfl> : 1211 Base_VOP2_DPP16<op, ps, opName, p>, 1212 SIMCInstr <ps.PseudoInstr, subtarget>; 1213 1214class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps, 1215 VOPProfile p = ps.Pfl> : 1216 VOP_DPP8<ps.OpName, p> { 1217 let hasSideEffects = ps.hasSideEffects; 1218 let Defs = ps.Defs; 1219 let SchedRW = ps.SchedRW; 1220 let Uses = ps.Uses; 1221 1222 bits<8> vdst; 1223 bits<8> src1; 1224 1225 let Inst{8-0} = fi; 1226 let Inst{16-9} = !if(p.HasSrc1, src1{7-0}, 0); 1227 let Inst{24-17} = !if(p.EmitDst, vdst{7-0}, 0); 1228 let Inst{30-25} = op; 1229 let Inst{31} = 0x0; 1230 1231 let OtherPredicates = ps.OtherPredicates; 1232} 1233 1234//===----------------------------------------------------------------------===// 1235// GFX11. 1236//===----------------------------------------------------------------------===// 1237 1238let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in { 1239 //===------------------------------- VOP2 -------------------------------===// 1240 multiclass VOP2Only_Real_MADK_gfx11<bits<6> op> { 1241 def _gfx11 : 1242 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX11>, 1243 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1244 } 1245 multiclass VOP2Only_Real_MADK_gfx11_with_name<bits<6> op, string asmName, 1246 string opName = NAME> { 1247 def _gfx11 : 1248 VOP2_Real<!cast<VOP2_Pseudo>(opName), SIEncodingFamily.GFX11>, 1249 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(opName).Pfl> { 1250 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName); 1251 let AsmString = asmName # ps.AsmOperands; 1252 } 1253 } 1254 multiclass VOP2_Real_e32_gfx11<bits<6> op> { 1255 def _e32_gfx11 : 1256 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX11>, 1257 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 1258 } 1259 multiclass VOP2Only_Real_e32_gfx11<bits<6> op> { 1260 let IsSingle = 1 in 1261 defm NAME: VOP2_Real_e32_gfx11<op>; 1262 } 1263 multiclass VOP2_Real_e64_gfx11<bits<6> op> { 1264 def _e64_gfx11 : 1265 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX11>, 1266 VOP3e_gfx11<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1267 } 1268 multiclass VOP2_Real_dpp_gfx11<bits<6> op> { 1269 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 1270 def _dpp_gfx11 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX11> { 1271 let DecoderNamespace = "DPPGFX11"; 1272 } 1273 } 1274 multiclass VOP2_Real_dpp8_gfx11<bits<6> op> { 1275 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 1276 def _dpp8_gfx11 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(NAME#"_e32")> { 1277 let DecoderNamespace = "DPP8GFX11"; 1278 } 1279 } 1280 1281 //===------------------------- VOP2 (with name) -------------------------===// 1282 multiclass VOP2_Real_e32_with_name_gfx11<bits<6> op, string opName, 1283 string asmName, bit single = 0> { 1284 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1285 def _e32_gfx11 : 1286 VOP2_Real<ps, SIEncodingFamily.GFX11, asmName>, 1287 VOP2e<op{5-0}, ps.Pfl> { 1288 let AsmString = asmName # ps.AsmOperands; 1289 let IsSingle = single; 1290 } 1291 } 1292 multiclass VOP2_Real_e64_with_name_gfx11<bits<6> op, string opName, 1293 string asmName> { 1294 defvar ps = !cast<VOP3_Pseudo>(opName#"_e64"); 1295 def _e64_gfx11 : 1296 VOP3_Real<ps, SIEncodingFamily.GFX11>, 1297 VOP3e_gfx11<{0, 1, 0, 0, op{5-0}}, ps.Pfl> { 1298 let AsmString = asmName # ps.AsmOperands; 1299 } 1300 } 1301 1302 multiclass VOP2_Real_dpp_with_name_gfx11<bits<6> op, string opName, 1303 string asmName> { 1304 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1305 foreach _ = BoolToList<ps.Pfl.HasExtDPP>.ret in 1306 def _dpp_gfx11 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), 1307 SIEncodingFamily.GFX11> { 1308 let AsmString = asmName # ps.Pfl.AsmDPP16; 1309 let DecoderNamespace = "DPPGFX11"; 1310 } 1311 } 1312 multiclass VOP2_Real_dpp8_with_name_gfx11<bits<6> op, string opName, 1313 string asmName> { 1314 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1315 foreach _ = BoolToList<ps.Pfl.HasExtDPP>.ret in 1316 def _dpp8_gfx11 : VOP2_DPP8<op, ps> { 1317 let AsmString = asmName # ps.Pfl.AsmDPP8; 1318 let DecoderNamespace = "DPP8GFX11"; 1319 } 1320 } 1321 1322 //===------------------------------ VOP2be ------------------------------===// 1323 multiclass VOP2be_Real_e32_gfx11<bits<6> op, string opName, string asmName> { 1324 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1325 def _e32_gfx11 : 1326 VOP2_Real<ps, SIEncodingFamily.GFX11>, 1327 VOP2e<op{5-0}, ps.Pfl> { 1328 let AsmString = asmName # !subst(", vcc", "", ps.AsmOperands); 1329 } 1330 } 1331 multiclass VOP2be_Real_dpp_gfx11<bits<6> op, string opName, string asmName> { 1332 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1333 def _dpp_gfx11 : 1334 VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX11, asmName> { 1335 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1336 let AsmString = asmName # !subst(", vcc", "", AsmDPP); 1337 let DecoderNamespace = "DPPGFX11"; 1338 } 1339 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1340 def _dpp_w32_gfx11 : 1341 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1342 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1343 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP); 1344 let isAsmParserOnly = 1; 1345 let WaveSizePredicate = isWave32; 1346 } 1347 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1348 def _dpp_w64_gfx11 : 1349 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1350 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1351 let AsmString = asmName # AsmDPP; 1352 let isAsmParserOnly = 1; 1353 let WaveSizePredicate = isWave64; 1354 } 1355 } 1356 multiclass VOP2be_Real_dpp8_gfx11<bits<6> op, string opName, string asmName> { 1357 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1358 def _dpp8_gfx11 : 1359 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1360 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1361 let AsmString = asmName # !subst(", vcc", "", AsmDPP8); 1362 let DecoderNamespace = "DPP8GFX11"; 1363 } 1364 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1365 def _dpp8_w32_gfx11 : 1366 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1367 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1368 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8); 1369 let isAsmParserOnly = 1; 1370 let WaveSizePredicate = isWave32; 1371 } 1372 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in 1373 def _dpp8_w64_gfx11 : 1374 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1375 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1376 let AsmString = asmName # AsmDPP8; 1377 let isAsmParserOnly = 1; 1378 let WaveSizePredicate = isWave64; 1379 } 1380 } 1381 1382} // End AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" 1383 1384// We don't want to override separate decoderNamespaces within these 1385multiclass VOP2_Realtriple_e64_gfx11<bits<6> op> { 1386 defm NAME : VOP3_Realtriple_gfx11<{0, 1, 0, 0, op{5-0}}, /*isSingle=*/ 0, NAME> ; 1387} 1388multiclass VOP2_Realtriple_e64_with_name_gfx11<bits<6> op, string opName, 1389 string asmName> { 1390 defm NAME : VOP3_Realtriple_with_name_gfx11<{0, 1, 0, 0, op{5-0}}, opName, asmName> ; 1391} 1392 1393multiclass VOP2be_Real_gfx11<bits<6> op, string opName, string asmName> : 1394 VOP2be_Real_e32_gfx11<op, opName, asmName>, 1395 VOP3be_Realtriple_gfx11<{0, 1, 0, 0, op{5-0}}, /*isSingle=*/ 0, opName, asmName>, 1396 VOP2be_Real_dpp_gfx11<op, opName, asmName>, 1397 VOP2be_Real_dpp8_gfx11<op, opName, asmName>; 1398 1399// Only for CNDMASK 1400multiclass VOP2e_Real_gfx11<bits<6> op, string opName, string asmName> : 1401 VOP2_Real_e32_gfx11<op>, 1402 VOP2_Realtriple_e64_gfx11<op>, 1403 VOP2be_Real_dpp_gfx11<op, opName, asmName>, 1404 VOP2be_Real_dpp8_gfx11<op, opName, asmName>; 1405 1406multiclass VOP2Only_Real_gfx11<bits<6> op> : 1407 VOP2Only_Real_e32_gfx11<op>, 1408 VOP2_Real_dpp_gfx11<op>, 1409 VOP2_Real_dpp8_gfx11<op>; 1410 1411multiclass VOP2_Real_NO_VOP3_gfx11<bits<6> op> : 1412 VOP2_Real_e32_gfx11<op>, VOP2_Real_dpp_gfx11<op>, VOP2_Real_dpp8_gfx11<op>; 1413 1414multiclass VOP2_Real_FULL_gfx11<bits<6> op> : 1415 VOP2_Realtriple_e64_gfx11<op>, VOP2_Real_NO_VOP3_gfx11<op>; 1416 1417multiclass VOP2_Real_NO_VOP3_with_name_gfx11<bits<6> op, string opName, 1418 string asmName, bit isSingle = 0> { 1419 1420 defm NAME : VOP2_Real_e32_with_name_gfx11<op, opName, asmName, isSingle>, 1421 VOP2_Real_dpp_with_name_gfx11<op, opName, asmName>, 1422 VOP2_Real_dpp8_with_name_gfx11<op, opName, asmName>; 1423 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1424 def _gfx11_alias : MnemonicAlias<ps.Mnemonic, asmName>, Requires<[isGFX11Plus]>; 1425} 1426 1427multiclass VOP2_Real_FULL_with_name_gfx11<bits<6> op, string opName, 1428 string asmName> : 1429 VOP2_Realtriple_e64_with_name_gfx11<op, opName, asmName>, 1430 VOP2_Real_NO_VOP3_with_name_gfx11<op, opName, asmName>; 1431 1432multiclass VOP2_Real_FULL_t16_gfx11<bits<6> op, string asmName, string opName = NAME> 1433 : VOP2_Real_FULL_with_name_gfx11<op, opName, asmName>; 1434 1435multiclass VOP2_Real_NO_DPP_gfx11<bits<6> op> : 1436 VOP2_Real_e32_gfx11<op>, VOP2_Real_e64_gfx11<op>; 1437 1438multiclass VOP2_Real_NO_DPP_with_name_gfx11<bits<6> op, string opName, 1439 string asmName> { 1440 defm NAME : VOP2_Real_e32_with_name_gfx11<op, opName, asmName>, 1441 VOP2_Real_e64_with_name_gfx11<op, opName, asmName>; 1442 defvar ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1443 def _gfx11_alias : MnemonicAlias<ps.Mnemonic, asmName>, Requires<[isGFX11Plus]>; 1444} 1445 1446defm V_CNDMASK_B32 : VOP2e_Real_gfx11<0x001, "V_CNDMASK_B32", 1447 "v_cndmask_b32">; 1448defm V_DOT2ACC_F32_F16 : VOP2_Real_NO_VOP3_with_name_gfx11<0x002, 1449 "V_DOT2C_F32_F16", "v_dot2acc_f32_f16", 1>; 1450defm V_FMAC_DX9_ZERO_F32 : VOP2_Real_NO_DPP_with_name_gfx11<0x006, 1451 "V_FMAC_LEGACY_F32", "v_fmac_dx9_zero_f32">; 1452defm V_MUL_DX9_ZERO_F32 : VOP2_Real_FULL_with_name_gfx11<0x007, 1453 "V_MUL_LEGACY_F32", "v_mul_dx9_zero_f32">; 1454defm V_LSHLREV_B32 : VOP2_Real_FULL_gfx11<0x018>; 1455defm V_LSHRREV_B32 : VOP2_Real_FULL_gfx11<0x019>; 1456defm V_ASHRREV_I32 : VOP2_Real_FULL_gfx11<0x01a>; 1457defm V_ADD_CO_CI_U32 : 1458 VOP2be_Real_gfx11<0x020, "V_ADDC_U32", "v_add_co_ci_u32">; 1459defm V_SUB_CO_CI_U32 : 1460 VOP2be_Real_gfx11<0x021, "V_SUBB_U32", "v_sub_co_ci_u32">; 1461defm V_SUBREV_CO_CI_U32 : 1462 VOP2be_Real_gfx11<0x022, "V_SUBBREV_U32", "v_subrev_co_ci_u32">; 1463 1464defm V_CVT_PK_RTZ_F16_F32 : VOP2_Real_FULL_with_name_gfx11<0x02f, 1465 "V_CVT_PKRTZ_F16_F32", "v_cvt_pk_rtz_f16_f32">; 1466defm V_PK_FMAC_F16 : VOP2Only_Real_gfx11<0x03c>; 1467 1468defm V_ADD_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x032, "v_add_f16">; 1469defm V_SUB_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x033, "v_sub_f16">; 1470defm V_SUBREV_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x034, "v_subrev_f16">; 1471defm V_MUL_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x035, "v_mul_f16">; 1472defm V_FMAC_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x036, "v_fmac_f16">; 1473defm V_LDEXP_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x03b, "v_ldexp_f16">; 1474defm V_MAX_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">; 1475defm V_MIN_F16_t16 : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">; 1476defm V_FMAMK_F16_t16 : VOP2Only_Real_MADK_gfx11_with_name<0x037, "v_fmamk_f16">; 1477defm V_FMAAK_F16_t16 : VOP2Only_Real_MADK_gfx11_with_name<0x038, "v_fmaak_f16">; 1478 1479// VOP3 only. 1480defm V_CNDMASK_B16 : VOP3Only_Realtriple_gfx11<0x25d>; 1481defm V_LDEXP_F32 : VOP3Only_Realtriple_gfx11<0x31c>; 1482defm V_BFM_B32 : VOP3Only_Realtriple_gfx11<0x31d>; 1483defm V_BCNT_U32_B32 : VOP3Only_Realtriple_gfx11<0x31e>; 1484defm V_MBCNT_LO_U32_B32 : VOP3Only_Realtriple_gfx11<0x31f>; 1485defm V_MBCNT_HI_U32_B32 : VOP3Only_Realtriple_gfx11<0x320>; 1486defm V_CVT_PKNORM_I16_F32 : VOP3Only_Realtriple_gfx11<0x321>; 1487defm V_CVT_PKNORM_U16_F32 : VOP3Only_Realtriple_gfx11<0x322>; 1488defm V_CVT_PK_U16_U32 : VOP3Only_Realtriple_gfx11<0x323>; 1489defm V_CVT_PK_I16_I32 : VOP3Only_Realtriple_gfx11<0x324>; 1490defm V_ADD_CO_U32 : VOP3beOnly_Realtriple_gfx11<0x300>; 1491defm V_SUB_CO_U32 : VOP3beOnly_Realtriple_gfx11<0x301>; 1492defm V_SUBREV_CO_U32 : VOP3beOnly_Realtriple_gfx11<0x302>; 1493 1494let SubtargetPredicate = isGFX11Plus in { 1495 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx11>; 1496 1497 defm : VOP2bInstAliases< 1498 V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx11, "v_add_co_ci_u32">; 1499 defm : VOP2bInstAliases< 1500 V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx11, "v_sub_co_ci_u32">; 1501 defm : VOP2bInstAliases< 1502 V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx11, "v_subrev_co_ci_u32">; 1503} // End SubtargetPredicate = isGFX11Plus 1504 1505//===----------------------------------------------------------------------===// 1506// GFX10. 1507//===----------------------------------------------------------------------===// 1508 1509let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { 1510 //===------------------------------- VOP2 -------------------------------===// 1511 multiclass VOP2Only_Real_MADK_gfx10<bits<6> op> { 1512 def _gfx10 : 1513 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX10>, 1514 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1515 } 1516 multiclass VOP2Only_Real_MADK_gfx10_with_name<bits<6> op, string opName, 1517 string asmName> { 1518 def _gfx10 : 1519 VOP2_Real<!cast<VOP2_Pseudo>(opName), SIEncodingFamily.GFX10>, 1520 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(opName).Pfl> { 1521 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName); 1522 let AsmString = asmName # ps.AsmOperands; 1523 } 1524 } 1525 multiclass VOP2_Real_e32_gfx10<bits<6> op> { 1526 def _e32_gfx10 : 1527 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX10>, 1528 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 1529 } 1530 multiclass VOP2_Real_e64_gfx10<bits<6> op> { 1531 def _e64_gfx10 : 1532 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 1533 VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 1534 } 1535 multiclass VOP2_Real_sdwa_gfx10<bits<6> op> { 1536 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 1537 def _sdwa_gfx10 : 1538 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 1539 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 1540 let DecoderNamespace = "SDWA10"; 1541 } 1542 } 1543 multiclass VOP2_Real_dpp_gfx10<bits<6> op> { 1544 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP>.ret in 1545 def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX10> { 1546 let DecoderNamespace = "SDWA10"; 1547 } 1548 } 1549 multiclass VOP2_Real_dpp8_gfx10<bits<6> op> { 1550 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP>.ret in 1551 def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(NAME#"_e32")> { 1552 let DecoderNamespace = "DPP8"; 1553 } 1554 } 1555 1556 //===------------------------- VOP2 (with name) -------------------------===// 1557 multiclass VOP2_Real_e32_gfx10_with_name<bits<6> op, string opName, 1558 string asmName> { 1559 def _e32_gfx10 : 1560 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>, 1561 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> { 1562 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1563 let AsmString = asmName # ps.AsmOperands; 1564 } 1565 } 1566 multiclass VOP2_Real_e64_gfx10_with_name<bits<6> op, string opName, 1567 string asmName> { 1568 def _e64_gfx10 : 1569 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>, 1570 VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, 1571 !cast<VOP3_Pseudo>(opName#"_e64").Pfl> { 1572 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(opName#"_e64"); 1573 let AsmString = asmName # ps.AsmOperands; 1574 } 1575 } 1576 let DecoderNamespace = "SDWA10" in { 1577 multiclass VOP2_Real_sdwa_gfx10_with_name<bits<6> op, string opName, 1578 string asmName> { 1579 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in 1580 def _sdwa_gfx10 : 1581 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1582 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1583 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1584 let AsmString = asmName # ps.AsmOperands; 1585 } 1586 } 1587 multiclass VOP2_Real_dpp_gfx10_with_name<bits<6> op, string opName, 1588 string asmName> { 1589 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in 1590 def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX10> { 1591 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1592 let AsmString = asmName # ps.Pfl.AsmDPP16; 1593 } 1594 } 1595 multiclass VOP2_Real_dpp8_gfx10_with_name<bits<6> op, string opName, 1596 string asmName> { 1597 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in 1598 def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1599 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1600 let AsmString = asmName # ps.Pfl.AsmDPP8; 1601 let DecoderNamespace = "DPP8"; 1602 } 1603 } 1604 } // End DecoderNamespace = "SDWA10" 1605 1606 //===------------------------------ VOP2be ------------------------------===// 1607 multiclass VOP2be_Real_e32_gfx10<bits<6> op, string opName, string asmName> { 1608 def _e32_gfx10 : 1609 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.GFX10>, 1610 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl> { 1611 VOP2_Pseudo Ps = !cast<VOP2_Pseudo>(opName#"_e32"); 1612 let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); 1613 } 1614 } 1615 multiclass VOP2be_Real_e64_gfx10<bits<6> op, string opName, string asmName> { 1616 def _e64_gfx10 : 1617 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>, 1618 VOP3be_gfx10<{0, 1, 0, 0, op{5-0}}, 1619 !cast<VOP3_Pseudo>(opName#"_e64").Pfl> { 1620 VOP3_Pseudo Ps = !cast<VOP3_Pseudo>(opName#"_e64"); 1621 let AsmString = asmName # Ps.AsmOperands; 1622 } 1623 } 1624 multiclass VOP2be_Real_sdwa_gfx10<bits<6> op, string opName, string asmName> { 1625 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in 1626 def _sdwa_gfx10 : 1627 VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1628 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1629 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1630 let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); 1631 let DecoderNamespace = "SDWA10"; 1632 } 1633 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in 1634 def _sdwa_w32_gfx10 : 1635 Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1636 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1637 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1638 let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands); 1639 let isAsmParserOnly = 1; 1640 let DecoderNamespace = "SDWA10"; 1641 let WaveSizePredicate = isWave32; 1642 } 1643 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in 1644 def _sdwa_w64_gfx10 : 1645 Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>, 1646 VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> { 1647 VOP2_SDWA_Pseudo Ps = !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa"); 1648 let AsmString = asmName # Ps.AsmOperands; 1649 let isAsmParserOnly = 1; 1650 let DecoderNamespace = "SDWA10"; 1651 let WaveSizePredicate = isWave64; 1652 } 1653 } 1654 multiclass VOP2be_Real_dpp_gfx10<bits<6> op, string opName, string asmName> { 1655 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in 1656 def _dpp_gfx10 : 1657 VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX10, asmName> { 1658 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1659 let AsmString = asmName # !subst(", vcc", "", AsmDPP); 1660 let DecoderNamespace = "SDWA10"; 1661 } 1662 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in 1663 def _dpp_w32_gfx10 : 1664 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1665 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1666 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP); 1667 let isAsmParserOnly = 1; 1668 let WaveSizePredicate = isWave32; 1669 } 1670 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in 1671 def _dpp_w64_gfx10 : 1672 Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> { 1673 string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16; 1674 let AsmString = asmName # AsmDPP; 1675 let isAsmParserOnly = 1; 1676 let WaveSizePredicate = isWave64; 1677 } 1678 } 1679 multiclass VOP2be_Real_dpp8_gfx10<bits<6> op, string opName, string asmName> { 1680 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in 1681 def _dpp8_gfx10 : 1682 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1683 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1684 let AsmString = asmName # !subst(", vcc", "", AsmDPP8); 1685 let DecoderNamespace = "DPP8"; 1686 } 1687 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in 1688 def _dpp8_w32_gfx10 : 1689 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1690 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1691 let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8); 1692 let isAsmParserOnly = 1; 1693 let WaveSizePredicate = isWave32; 1694 } 1695 foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in 1696 def _dpp8_w64_gfx10 : 1697 VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> { 1698 string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8; 1699 let AsmString = asmName # AsmDPP8; 1700 let isAsmParserOnly = 1; 1701 let WaveSizePredicate = isWave64; 1702 } 1703 } 1704 1705 //===----------------------------- VOP3Only -----------------------------===// 1706 multiclass VOP3Only_Real_gfx10<bits<10> op> { 1707 def _e64_gfx10 : 1708 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 1709 VOP3e_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 1710 let IsSingle = 1; 1711 } 1712 } 1713 1714 //===---------------------------- VOP3beOnly ----------------------------===// 1715 multiclass VOP3beOnly_Real_gfx10<bits<10> op> { 1716 def _e64_gfx10 : 1717 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>, 1718 VOP3be_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 1719 let IsSingle = 1; 1720 } 1721 } 1722} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" 1723 1724multiclass VOP2Only_Real_MADK_gfx10_gfx11<bits<6> op> : 1725 VOP2Only_Real_MADK_gfx10<op>, VOP2Only_Real_MADK_gfx11<op>; 1726 1727multiclass VOP2be_Real_gfx10<bits<6> op, string opName, string asmName> : 1728 VOP2be_Real_e32_gfx10<op, opName, asmName>, 1729 VOP2be_Real_e64_gfx10<op, opName, asmName>, 1730 VOP2be_Real_sdwa_gfx10<op, opName, asmName>, 1731 VOP2be_Real_dpp_gfx10<op, opName, asmName>, 1732 VOP2be_Real_dpp8_gfx10<op, opName, asmName>; 1733 1734multiclass VOP2e_Real_gfx10<bits<6> op, string opName, string asmName> : 1735 VOP2_Real_e32_gfx10<op>, 1736 VOP2_Real_e64_gfx10<op>, 1737 VOP2be_Real_sdwa_gfx10<op, opName, asmName>, 1738 VOP2be_Real_dpp_gfx10<op, opName, asmName>, 1739 VOP2be_Real_dpp8_gfx10<op, opName, asmName>; 1740 1741multiclass VOP2_Real_gfx10<bits<6> op> : 1742 VOP2_Real_e32_gfx10<op>, VOP2_Real_e64_gfx10<op>, 1743 VOP2_Real_sdwa_gfx10<op>, VOP2_Real_dpp_gfx10<op>, VOP2_Real_dpp8_gfx10<op>; 1744 1745multiclass VOP2_Real_gfx10_gfx11<bits<6> op> : 1746 VOP2_Real_gfx10<op>, VOP2_Real_FULL_gfx11<op>; 1747 1748multiclass VOP2_Real_with_name_gfx10<bits<6> op, string opName, 1749 string asmName> : 1750 VOP2_Real_e32_gfx10_with_name<op, opName, asmName>, 1751 VOP2_Real_e64_gfx10_with_name<op, opName, asmName>, 1752 VOP2_Real_sdwa_gfx10_with_name<op, opName, asmName>, 1753 VOP2_Real_dpp_gfx10_with_name<op, opName, asmName>, 1754 VOP2_Real_dpp8_gfx10_with_name<op, opName, asmName>; 1755 1756multiclass VOP2_Real_with_name_gfx10_gfx11<bits<6> op, string opName, 1757 string asmName> : 1758 VOP2_Real_with_name_gfx10<op, opName, asmName>, 1759 VOP2_Real_FULL_with_name_gfx11<op, opName, asmName>; 1760 1761// NB: Same opcode as v_mac_legacy_f32 1762let DecoderNamespace = "GFX10_B" in 1763defm V_FMAC_LEGACY_F32 : VOP2_Real_gfx10<0x006>; 1764 1765defm V_XNOR_B32 : VOP2_Real_gfx10_gfx11<0x01e>; 1766defm V_FMAC_F32 : VOP2_Real_gfx10_gfx11<0x02b>; 1767defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10_gfx11<0x02c>; 1768defm V_FMAAK_F32 : VOP2Only_Real_MADK_gfx10_gfx11<0x02d>; 1769defm V_ADD_F16 : VOP2_Real_gfx10<0x032>; 1770defm V_SUB_F16 : VOP2_Real_gfx10<0x033>; 1771defm V_SUBREV_F16 : VOP2_Real_gfx10<0x034>; 1772defm V_MUL_F16 : VOP2_Real_gfx10<0x035>; 1773defm V_FMAC_F16 : VOP2_Real_gfx10<0x036>; 1774defm V_FMAMK_F16 : VOP2Only_Real_MADK_gfx10<0x037>; 1775defm V_FMAAK_F16 : VOP2Only_Real_MADK_gfx10<0x038>; 1776defm V_MAX_F16 : VOP2_Real_gfx10<0x039>; 1777defm V_MIN_F16 : VOP2_Real_gfx10<0x03a>; 1778defm V_LDEXP_F16 : VOP2_Real_gfx10<0x03b>; 1779 1780let IsSingle = 1 in { 1781 defm V_PK_FMAC_F16 : VOP2_Real_e32_gfx10<0x03c>; 1782} 1783 1784// VOP2 no carry-in, carry-out. 1785defm V_ADD_NC_U32 : 1786 VOP2_Real_with_name_gfx10_gfx11<0x025, "V_ADD_U32", "v_add_nc_u32">; 1787defm V_SUB_NC_U32 : 1788 VOP2_Real_with_name_gfx10_gfx11<0x026, "V_SUB_U32", "v_sub_nc_u32">; 1789defm V_SUBREV_NC_U32 : 1790 VOP2_Real_with_name_gfx10_gfx11<0x027, "V_SUBREV_U32", "v_subrev_nc_u32">; 1791 1792// VOP2 carry-in, carry-out. 1793defm V_ADD_CO_CI_U32 : 1794 VOP2be_Real_gfx10<0x028, "V_ADDC_U32", "v_add_co_ci_u32">; 1795defm V_SUB_CO_CI_U32 : 1796 VOP2be_Real_gfx10<0x029, "V_SUBB_U32", "v_sub_co_ci_u32">; 1797defm V_SUBREV_CO_CI_U32 : 1798 VOP2be_Real_gfx10<0x02a, "V_SUBBREV_U32", "v_subrev_co_ci_u32">; 1799 1800defm V_CNDMASK_B32 : 1801 VOP2e_Real_gfx10<0x001, "V_CNDMASK_B32", "v_cndmask_b32">; 1802 1803// VOP3 only. 1804defm V_BFM_B32 : VOP3Only_Real_gfx10<0x363>; 1805defm V_BCNT_U32_B32 : VOP3Only_Real_gfx10<0x364>; 1806defm V_MBCNT_LO_U32_B32 : VOP3Only_Real_gfx10<0x365>; 1807defm V_MBCNT_HI_U32_B32 : VOP3Only_Real_gfx10<0x366>; 1808defm V_LDEXP_F32 : VOP3Only_Real_gfx10<0x362>; 1809defm V_CVT_PKNORM_I16_F32 : VOP3Only_Real_gfx10<0x368>; 1810defm V_CVT_PKNORM_U16_F32 : VOP3Only_Real_gfx10<0x369>; 1811defm V_CVT_PK_U16_U32 : VOP3Only_Real_gfx10<0x36a>; 1812defm V_CVT_PK_I16_I32 : VOP3Only_Real_gfx10<0x36b>; 1813 1814// VOP3 carry-out. 1815defm V_ADD_CO_U32 : VOP3beOnly_Real_gfx10<0x30f>; 1816defm V_SUB_CO_U32 : VOP3beOnly_Real_gfx10<0x310>; 1817defm V_SUBREV_CO_U32 : VOP3beOnly_Real_gfx10<0x319>; 1818 1819let SubtargetPredicate = isGFX10Only in { 1820 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx10>; 1821 1822 defm : VOP2bInstAliases< 1823 V_ADDC_U32_e32, V_ADD_CO_CI_U32_e32_gfx10, "v_add_co_ci_u32">; 1824 defm : VOP2bInstAliases< 1825 V_SUBB_U32_e32, V_SUB_CO_CI_U32_e32_gfx10, "v_sub_co_ci_u32">; 1826 defm : VOP2bInstAliases< 1827 V_SUBBREV_U32_e32, V_SUBREV_CO_CI_U32_e32_gfx10, "v_subrev_co_ci_u32">; 1828} // End SubtargetPredicate = isGFX10Only 1829 1830//===----------------------------------------------------------------------===// 1831// GFX6, GFX7, GFX10, GFX11 1832//===----------------------------------------------------------------------===// 1833 1834class VOP2_DPPe <bits<6> op, VOP2_DPP_Pseudo ps, VOPProfile P = ps.Pfl> : 1835 VOP_DPPe <P> { 1836 bits<8> vdst; 1837 bits<8> src1; 1838 let Inst{8-0} = 0xfa; //dpp 1839 let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); 1840 let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); 1841 let Inst{30-25} = op; 1842 let Inst{31} = 0x0; //encoding 1843} 1844 1845let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 1846 multiclass VOP2_Lane_Real_gfx6_gfx7<bits<6> op> { 1847 def _gfx6_gfx7 : 1848 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>, 1849 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1850 } 1851 multiclass VOP2Only_Real_MADK_gfx6_gfx7<bits<6> op> { 1852 def _gfx6_gfx7 : 1853 VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>, 1854 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1855 } 1856 multiclass VOP2_Real_e32_gfx6_gfx7<bits<6> op, string opName = NAME> { 1857 def _e32_gfx6_gfx7 : 1858 VOP2_Real<!cast<VOP2_Pseudo>(opName#"_e32"), SIEncodingFamily.SI>, 1859 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(opName#"_e32").Pfl>; 1860 } 1861 multiclass VOP2_Real_e64_gfx6_gfx7<bits<6> op, string opName = NAME> { 1862 def _e64_gfx6_gfx7 : 1863 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.SI>, 1864 VOP3e_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(opName#"_e64").Pfl>; 1865 } 1866 multiclass VOP2be_Real_e64_gfx6_gfx7<bits<6> op, string opName = NAME> { 1867 def _e64_gfx6_gfx7 : 1868 VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.SI>, 1869 VOP3be_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(opName#"_e64").Pfl>; 1870 } 1871} // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 1872 1873multiclass VOP2Only_Real_MADK_gfx6_gfx7_gfx10<bits<6> op> : 1874 VOP2Only_Real_MADK_gfx6_gfx7<op>, VOP2Only_Real_MADK_gfx10<op>; 1875 1876multiclass VOP2_Real_gfx6_gfx7<bits<6> op> : 1877 VOP2_Real_e32_gfx6_gfx7<op>, VOP2_Real_e64_gfx6_gfx7<op>; 1878 1879multiclass VOP2_Real_gfx6_gfx7_gfx10<bits<6> op> : 1880 VOP2_Real_gfx6_gfx7<op>, VOP2_Real_gfx10<op>; 1881 1882multiclass VOP2_Real_gfx6_gfx7_gfx10_gfx11<bits<6> op> : 1883 VOP2_Real_gfx6_gfx7_gfx10<op>, VOP2_Real_FULL_gfx11<op>; 1884 1885multiclass VOP2be_Real_gfx6_gfx7<bits<6> op> : 1886 VOP2_Real_e32_gfx6_gfx7<op>, VOP2be_Real_e64_gfx6_gfx7<op>; 1887 1888multiclass VOP2be_Real_gfx6_gfx7_with_name<bits<6> op, 1889 string opName, string asmName> { 1890 defvar ps32 = !cast<VOP2_Pseudo>(opName#"_e32"); 1891 defvar ps64 = !cast<VOP3_Pseudo>(opName#"_e64"); 1892 1893 let AsmString = asmName # ps32.AsmOperands in { 1894 defm "" : VOP2_Real_e32_gfx6_gfx7<op, opName>; 1895 } 1896 1897 let AsmString = asmName # ps64.AsmOperands in { 1898 defm "" : VOP2be_Real_e64_gfx6_gfx7<op, opName>; 1899 } 1900} 1901 1902defm V_CNDMASK_B32 : VOP2_Real_gfx6_gfx7<0x000>; 1903defm V_MIN_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00d>; 1904defm V_MAX_LEGACY_F32 : VOP2_Real_gfx6_gfx7<0x00e>; 1905defm V_LSHR_B32 : VOP2_Real_gfx6_gfx7<0x015>; 1906defm V_ASHR_I32 : VOP2_Real_gfx6_gfx7<0x017>; 1907defm V_LSHL_B32 : VOP2_Real_gfx6_gfx7<0x019>; 1908defm V_BFM_B32 : VOP2_Real_gfx6_gfx7<0x01e>; 1909defm V_BCNT_U32_B32 : VOP2_Real_gfx6_gfx7<0x022>; 1910defm V_MBCNT_LO_U32_B32 : VOP2_Real_gfx6_gfx7<0x023>; 1911defm V_MBCNT_HI_U32_B32 : VOP2_Real_gfx6_gfx7<0x024>; 1912defm V_LDEXP_F32 : VOP2_Real_gfx6_gfx7<0x02b>; 1913defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_gfx6_gfx7<0x02c>; 1914defm V_CVT_PKNORM_I16_F32 : VOP2_Real_gfx6_gfx7<0x02d>; 1915defm V_CVT_PKNORM_U16_F32 : VOP2_Real_gfx6_gfx7<0x02e>; 1916defm V_CVT_PK_U16_U32 : VOP2_Real_gfx6_gfx7<0x030>; 1917defm V_CVT_PK_I16_I32 : VOP2_Real_gfx6_gfx7<0x031>; 1918 1919// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in 1920// VI, but the VI instructions behave the same as the SI versions. 1921defm V_ADD_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x025, "V_ADD_CO_U32", "v_add_i32">; 1922defm V_SUB_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x026, "V_SUB_CO_U32", "v_sub_i32">; 1923defm V_SUBREV_I32 : VOP2be_Real_gfx6_gfx7_with_name<0x027, "V_SUBREV_CO_U32", "v_subrev_i32">; 1924defm V_ADDC_U32 : VOP2be_Real_gfx6_gfx7<0x028>; 1925defm V_SUBB_U32 : VOP2be_Real_gfx6_gfx7<0x029>; 1926defm V_SUBBREV_U32 : VOP2be_Real_gfx6_gfx7<0x02a>; 1927 1928defm V_READLANE_B32 : VOP2_Lane_Real_gfx6_gfx7<0x001>; 1929 1930let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in { 1931 defm V_WRITELANE_B32 : VOP2_Lane_Real_gfx6_gfx7<0x002>; 1932} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) 1933 1934let SubtargetPredicate = isGFX6GFX7 in { 1935 defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx6_gfx7>; 1936 defm : VOP2eInstAliases<V_ADD_CO_U32_e32, V_ADD_I32_e32_gfx6_gfx7>; 1937 defm : VOP2eInstAliases<V_SUB_CO_U32_e32, V_SUB_I32_e32_gfx6_gfx7>; 1938 defm : VOP2eInstAliases<V_SUBREV_CO_U32_e32, V_SUBREV_I32_e32_gfx6_gfx7>; 1939 1940 def : VOP2e64InstAlias<V_ADD_CO_U32_e64, V_ADD_I32_e64_gfx6_gfx7>; 1941 def : VOP2e64InstAlias<V_SUB_CO_U32_e64, V_SUB_I32_e64_gfx6_gfx7>; 1942 def : VOP2e64InstAlias<V_SUBREV_CO_U32_e64, V_SUBREV_I32_e64_gfx6_gfx7>; 1943} // End SubtargetPredicate = isGFX6GFX7 1944 1945defm V_ADD_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x003>; 1946defm V_SUB_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x004>; 1947defm V_SUBREV_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x005>; 1948defm V_MAC_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x006>; 1949defm V_MUL_LEGACY_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x007>; 1950defm V_MUL_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x008>; 1951defm V_MUL_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x009>; 1952defm V_MUL_HI_I32_I24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00a>; 1953defm V_MUL_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00b>; 1954defm V_MUL_HI_U32_U24 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00c>; 1955defm V_MIN_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x00f>; 1956defm V_MAX_F32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x010>; 1957defm V_MIN_I32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x011>; 1958defm V_MAX_I32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x012>; 1959defm V_MIN_U32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x013>; 1960defm V_MAX_U32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x014>; 1961defm V_LSHRREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x016>; 1962defm V_ASHRREV_I32 : VOP2_Real_gfx6_gfx7_gfx10<0x018>; 1963defm V_LSHLREV_B32 : VOP2_Real_gfx6_gfx7_gfx10<0x01a>; 1964defm V_AND_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x01b>; 1965defm V_OR_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x01c>; 1966defm V_XOR_B32 : VOP2_Real_gfx6_gfx7_gfx10_gfx11<0x01d>; 1967defm V_MAC_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x01f>; 1968defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_gfx6_gfx7_gfx10<0x02f>; 1969defm V_MADMK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x020>; 1970defm V_MADAK_F32 : VOP2Only_Real_MADK_gfx6_gfx7_gfx10<0x021>; 1971 1972//===----------------------------------------------------------------------===// 1973// GFX8, GFX9 (VI). 1974//===----------------------------------------------------------------------===// 1975 1976let AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" in { 1977 1978multiclass VOP2_Real_MADK_vi <bits<6> op> { 1979 def _vi : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>, 1980 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>; 1981} 1982 1983multiclass VOP2_Real_MADK_gfx940 <bits<6> op> { 1984 def _gfx940 : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX940>, 1985 VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl> { 1986 let DecoderNamespace = "GFX9"; 1987 } 1988} 1989 1990multiclass VOP2_Real_e32_vi <bits<6> op> { 1991 def _e32_vi : 1992 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>, 1993 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 1994} 1995 1996multiclass VOP2_Real_e64_vi <bits<10> op> { 1997 def _e64_vi : 1998 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 1999 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 2000} 2001 2002multiclass VOP2_Real_e64only_vi <bits<10> op> { 2003 def _e64_vi : 2004 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>, 2005 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 2006 let IsSingle = 1; 2007 } 2008} 2009 2010multiclass Base_VOP2_Real_e32e64_vi <bits<6> op> : 2011 VOP2_Real_e32_vi<op>, 2012 VOP2_Real_e64_vi<{0, 1, 0, 0, op{5-0}}>; 2013 2014} // End AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8" 2015 2016multiclass VOP2_SDWA_Real <bits<6> op> { 2017 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA>.ret in 2018 def _sdwa_vi : 2019 VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 2020 VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 2021} 2022 2023multiclass VOP2_SDWA9_Real <bits<6> op> { 2024 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 2025 def _sdwa_gfx9 : 2026 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 2027 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>; 2028} 2029 2030let AssemblerPredicate = isGFX8Only in { 2031 2032multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName> { 2033 def _e32_vi : 2034 VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.VI>, 2035 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> { 2036 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32"); 2037 let AsmString = AsmName # ps.AsmOperands; 2038 let DecoderNamespace = "GFX8"; 2039 } 2040 def _e64_vi : 2041 VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.VI>, 2042 VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> { 2043 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64"); 2044 let AsmString = AsmName # ps.AsmOperands; 2045 let DecoderNamespace = "GFX8"; 2046 } 2047 foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA>.ret in 2048 def _sdwa_vi : 2049 VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>, 2050 VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> { 2051 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa"); 2052 let AsmString = AsmName # ps.AsmOperands; 2053 } 2054 foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP>.ret in 2055 def _dpp_vi : 2056 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.VI>, 2057 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> { 2058 VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp"); 2059 let AsmString = AsmName # ps.AsmOperands; 2060 } 2061} 2062} 2063 2064let AssemblerPredicate = isGFX9Only in { 2065 2066multiclass VOP2be_Real_e32e64_gfx9 <bits<6> op, string OpName, string AsmName> { 2067 def _e32_gfx9 : 2068 VOP2_Real<!cast<VOP2_Pseudo>(OpName#"_e32"), SIEncodingFamily.GFX9>, 2069 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(OpName#"_e32").Pfl> { 2070 VOP2_Pseudo ps = !cast<VOP2_Pseudo>(OpName#"_e32"); 2071 let AsmString = AsmName # ps.AsmOperands; 2072 let DecoderNamespace = "GFX9"; 2073 } 2074 def _e64_gfx9 : 2075 VOP3_Real<!cast<VOP3_Pseudo>(OpName#"_e64"), SIEncodingFamily.GFX9>, 2076 VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(OpName#"_e64").Pfl> { 2077 VOP3_Pseudo ps = !cast<VOP3_Pseudo>(OpName#"_e64"); 2078 let AsmString = AsmName # ps.AsmOperands; 2079 let DecoderNamespace = "GFX9"; 2080 } 2081 foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA9>.ret in 2082 def _sdwa_gfx9 : 2083 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>, 2084 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> { 2085 VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa"); 2086 let AsmString = AsmName # ps.AsmOperands; 2087 } 2088 foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP>.ret in 2089 def _dpp_gfx9 : 2090 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.GFX9>, 2091 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> { 2092 VOP2_DPP_Pseudo ps = !cast<VOP2_DPP_Pseudo>(OpName#"_dpp"); 2093 let AsmString = AsmName # ps.AsmOperands; 2094 let DecoderNamespace = "SDWA9"; 2095 } 2096} 2097 2098multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> { 2099 def _e32_gfx9 : 2100 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX9>, 2101 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>{ 2102 let DecoderNamespace = "GFX9"; 2103 } 2104 def _e64_gfx9 : 2105 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX9>, 2106 VOP3e_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> { 2107 let DecoderNamespace = "GFX9"; 2108 } 2109 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in 2110 def _sdwa_gfx9 : 2111 VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>, 2112 VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> { 2113 } 2114 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 2115 def _dpp_gfx9 : 2116 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>, 2117 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> { 2118 let DecoderNamespace = "SDWA9"; 2119 } 2120} 2121 2122} // AssemblerPredicate = isGFX9Only 2123 2124multiclass VOP2_Real_e32e64_vi <bits<6> op> : 2125 Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA_Real<op>, VOP2_SDWA9_Real<op> { 2126 2127 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 2128 def _dpp_vi : 2129 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>, 2130 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>; 2131} 2132 2133defm V_CNDMASK_B32 : VOP2_Real_e32e64_vi <0x0>; 2134defm V_ADD_F32 : VOP2_Real_e32e64_vi <0x1>; 2135defm V_SUB_F32 : VOP2_Real_e32e64_vi <0x2>; 2136defm V_SUBREV_F32 : VOP2_Real_e32e64_vi <0x3>; 2137let AssemblerPredicate = isGCN3ExcludingGFX90A in 2138defm V_MUL_LEGACY_F32 : VOP2_Real_e32e64_vi <0x4>; 2139defm V_MUL_F32 : VOP2_Real_e32e64_vi <0x5>; 2140defm V_MUL_I32_I24 : VOP2_Real_e32e64_vi <0x6>; 2141defm V_MUL_HI_I32_I24 : VOP2_Real_e32e64_vi <0x7>; 2142defm V_MUL_U32_U24 : VOP2_Real_e32e64_vi <0x8>; 2143defm V_MUL_HI_U32_U24 : VOP2_Real_e32e64_vi <0x9>; 2144defm V_MIN_F32 : VOP2_Real_e32e64_vi <0xa>; 2145defm V_MAX_F32 : VOP2_Real_e32e64_vi <0xb>; 2146defm V_MIN_I32 : VOP2_Real_e32e64_vi <0xc>; 2147defm V_MAX_I32 : VOP2_Real_e32e64_vi <0xd>; 2148defm V_MIN_U32 : VOP2_Real_e32e64_vi <0xe>; 2149defm V_MAX_U32 : VOP2_Real_e32e64_vi <0xf>; 2150defm V_LSHRREV_B32 : VOP2_Real_e32e64_vi <0x10>; 2151defm V_ASHRREV_I32 : VOP2_Real_e32e64_vi <0x11>; 2152defm V_LSHLREV_B32 : VOP2_Real_e32e64_vi <0x12>; 2153defm V_AND_B32 : VOP2_Real_e32e64_vi <0x13>; 2154defm V_OR_B32 : VOP2_Real_e32e64_vi <0x14>; 2155defm V_XOR_B32 : VOP2_Real_e32e64_vi <0x15>; 2156defm V_MAC_F32 : VOP2_Real_e32e64_vi <0x16>; 2157defm V_MADMK_F32 : VOP2_Real_MADK_vi <0x17>; 2158defm V_MADAK_F32 : VOP2_Real_MADK_vi <0x18>; 2159 2160defm V_ADD_U32 : VOP2be_Real_e32e64_vi_only <0x19, "V_ADD_CO_U32", "v_add_u32">; 2161defm V_SUB_U32 : VOP2be_Real_e32e64_vi_only <0x1a, "V_SUB_CO_U32", "v_sub_u32">; 2162defm V_SUBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1b, "V_SUBREV_CO_U32", "v_subrev_u32">; 2163defm V_ADDC_U32 : VOP2be_Real_e32e64_vi_only <0x1c, "V_ADDC_U32", "v_addc_u32">; 2164defm V_SUBB_U32 : VOP2be_Real_e32e64_vi_only <0x1d, "V_SUBB_U32", "v_subb_u32">; 2165defm V_SUBBREV_U32 : VOP2be_Real_e32e64_vi_only <0x1e, "V_SUBBREV_U32", "v_subbrev_u32">; 2166 2167defm V_ADD_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x19, "V_ADD_CO_U32", "v_add_co_u32">; 2168defm V_SUB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1a, "V_SUB_CO_U32", "v_sub_co_u32">; 2169defm V_SUBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1b, "V_SUBREV_CO_U32", "v_subrev_co_u32">; 2170defm V_ADDC_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1c, "V_ADDC_U32", "v_addc_co_u32">; 2171defm V_SUBB_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1d, "V_SUBB_U32", "v_subb_co_u32">; 2172defm V_SUBBREV_CO_U32 : VOP2be_Real_e32e64_gfx9 <0x1e, "V_SUBBREV_U32", "v_subbrev_co_u32">; 2173 2174defm V_ADD_U32 : VOP2_Real_e32e64_gfx9 <0x34>; 2175defm V_SUB_U32 : VOP2_Real_e32e64_gfx9 <0x35>; 2176defm V_SUBREV_U32 : VOP2_Real_e32e64_gfx9 <0x36>; 2177 2178defm V_BFM_B32 : VOP2_Real_e64only_vi <0x293>; 2179defm V_BCNT_U32_B32 : VOP2_Real_e64only_vi <0x28b>; 2180defm V_MBCNT_LO_U32_B32 : VOP2_Real_e64only_vi <0x28c>; 2181defm V_MBCNT_HI_U32_B32 : VOP2_Real_e64only_vi <0x28d>; 2182defm V_LDEXP_F32 : VOP2_Real_e64only_vi <0x288>; 2183defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64only_vi <0x1f0>; 2184defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64only_vi <0x294>; 2185defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64only_vi <0x295>; 2186defm V_CVT_PKRTZ_F16_F32 : VOP2_Real_e64only_vi <0x296>; 2187defm V_CVT_PK_U16_U32 : VOP2_Real_e64only_vi <0x297>; 2188defm V_CVT_PK_I16_I32 : VOP2_Real_e64only_vi <0x298>; 2189 2190defm V_ADD_F16 : VOP2_Real_e32e64_vi <0x1f>; 2191defm V_SUB_F16 : VOP2_Real_e32e64_vi <0x20>; 2192defm V_SUBREV_F16 : VOP2_Real_e32e64_vi <0x21>; 2193defm V_MUL_F16 : VOP2_Real_e32e64_vi <0x22>; 2194defm V_MAC_F16 : VOP2_Real_e32e64_vi <0x23>; 2195defm V_MADMK_F16 : VOP2_Real_MADK_vi <0x24>; 2196defm V_MADAK_F16 : VOP2_Real_MADK_vi <0x25>; 2197defm V_ADD_U16 : VOP2_Real_e32e64_vi <0x26>; 2198defm V_SUB_U16 : VOP2_Real_e32e64_vi <0x27>; 2199defm V_SUBREV_U16 : VOP2_Real_e32e64_vi <0x28>; 2200defm V_MUL_LO_U16 : VOP2_Real_e32e64_vi <0x29>; 2201defm V_LSHLREV_B16 : VOP2_Real_e32e64_vi <0x2a>; 2202defm V_LSHRREV_B16 : VOP2_Real_e32e64_vi <0x2b>; 2203defm V_ASHRREV_I16 : VOP2_Real_e32e64_vi <0x2c>; 2204defm V_MAX_F16 : VOP2_Real_e32e64_vi <0x2d>; 2205defm V_MIN_F16 : VOP2_Real_e32e64_vi <0x2e>; 2206defm V_MAX_U16 : VOP2_Real_e32e64_vi <0x2f>; 2207defm V_MAX_I16 : VOP2_Real_e32e64_vi <0x30>; 2208defm V_MIN_U16 : VOP2_Real_e32e64_vi <0x31>; 2209defm V_MIN_I16 : VOP2_Real_e32e64_vi <0x32>; 2210defm V_LDEXP_F16 : VOP2_Real_e32e64_vi <0x33>; 2211 2212let SubtargetPredicate = isGFX8GFX9 in { 2213 2214// Aliases to simplify matching of floating-point instructions that 2215// are VOP2 on SI and VOP3 on VI. 2216class SI2_VI3Alias <string name, VOP3_Real inst> : InstAlias < 2217 name#" $dst, $src0, $src1", 2218 !if(inst.Pfl.HasOMod, 2219 (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0), 2220 (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0)) 2221>, PredicateControl { 2222 let UseInstAsmMatchConverter = 0; 2223 let AsmVariantName = AMDGPUAsmVariants.VOP3; 2224} 2225 2226def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>; 2227def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>; 2228def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>; 2229def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>; 2230def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>; 2231 2232defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_vi>; 2233 2234} // End SubtargetPredicate = isGFX8GFX9 2235 2236let SubtargetPredicate = isGFX9Only in { 2237 2238defm : VOP2bInstAliases<V_ADD_U32_e32, V_ADD_CO_U32_e32_gfx9, "v_add_co_u32">; 2239defm : VOP2bInstAliases<V_ADDC_U32_e32, V_ADDC_CO_U32_e32_gfx9, "v_addc_co_u32">; 2240defm : VOP2bInstAliases<V_SUB_U32_e32, V_SUB_CO_U32_e32_gfx9, "v_sub_co_u32">; 2241defm : VOP2bInstAliases<V_SUBB_U32_e32, V_SUBB_CO_U32_e32_gfx9, "v_subb_co_u32">; 2242defm : VOP2bInstAliases<V_SUBREV_U32_e32, V_SUBREV_CO_U32_e32_gfx9, "v_subrev_co_u32">; 2243defm : VOP2bInstAliases<V_SUBBREV_U32_e32, V_SUBBREV_CO_U32_e32_gfx9, "v_subbrev_co_u32">; 2244 2245} // End SubtargetPredicate = isGFX9Only 2246 2247let SubtargetPredicate = HasDLInsts in { 2248 2249defm V_FMAC_F32 : VOP2_Real_e32e64_vi <0x3b>; 2250defm V_XNOR_B32 : VOP2_Real_e32e64_vi <0x3d>; 2251 2252} // End SubtargetPredicate = HasDLInsts 2253 2254let AssemblerPredicate = isGFX90APlus, DecoderNamespace = "GFX90A" in { 2255 multiclass VOP2_Real_e32_gfx90a <bits<6> op> { 2256 def _e32_gfx90a : 2257 VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.GFX90A>, 2258 VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>; 2259 } 2260 2261 multiclass VOP2_Real_e64_gfx90a <bits<10> op> { 2262 def _e64_gfx90a : 2263 VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX90A>, 2264 VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>; 2265 } 2266 2267 multiclass Base_VOP2_Real_e32e64_gfx90a <bits<6> op> : 2268 VOP2_Real_e32_gfx90a<op>, 2269 VOP2_Real_e64_gfx90a<{0, 1, 0, 0, op{5-0}}>; 2270 2271 multiclass VOP2_Real_e32e64_gfx90a <bits<6> op> : 2272 Base_VOP2_Real_e32e64_gfx90a<op> { 2273 2274 foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in 2275 def _dpp_gfx90a : 2276 VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX90A>, 2277 VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> { 2278 let DecoderNamespace = "SDWA9"; 2279 } 2280 } 2281} // End AssemblerPredicate = isGFX90APlus, DecoderNamespace = "GFX90A" 2282 2283let SubtargetPredicate = HasFmacF64Inst in { 2284 defm V_FMAC_F64 : VOP2_Real_e32e64_gfx90a <0x4>; 2285} // End SubtargetPredicate = HasFmacF64Inst 2286 2287let SubtargetPredicate = isGFX90APlus, IsSingle = 1 in { 2288 defm V_MUL_LEGACY_F32 : VOP2_Real_e64_gfx90a <0x2a1>; 2289} 2290 2291let SubtargetPredicate = HasFmaakFmamkF32Insts in { 2292defm V_FMAMK_F32 : VOP2_Real_MADK_gfx940 <0x17>; 2293defm V_FMAAK_F32 : VOP2_Real_MADK_gfx940 <0x18>; 2294} 2295 2296multiclass VOP2_Real_DOT_ACC_gfx9<bits<6> op> : Base_VOP2_Real_e32e64_vi<op> { 2297 def _dpp_vi : VOP2_DPP<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>; 2298} 2299 2300multiclass VOP2_Real_DOT_ACC_gfx10<bits<6> op> : 2301 VOP2_Real_e32_gfx10<op>, 2302 VOP2_Real_dpp_gfx10<op>, 2303 VOP2_Real_dpp8_gfx10<op>; 2304 2305let SubtargetPredicate = HasDot5Insts in { 2306 defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx9<0x37>; 2307 // NB: Opcode conflicts with V_DOT8C_I32_I4 2308 // This opcode exists in gfx 10.1* only 2309 defm V_DOT2C_F32_F16 : VOP2_Real_DOT_ACC_gfx10<0x02>; 2310} 2311 2312let SubtargetPredicate = HasDot6Insts in { 2313 defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx9<0x39>; 2314 defm V_DOT4C_I32_I8 : VOP2_Real_DOT_ACC_gfx10<0x0d>; 2315} 2316 2317let SubtargetPredicate = HasDot4Insts in { 2318 defm V_DOT2C_I32_I16 : VOP2_Real_DOT_ACC_gfx9<0x38>; 2319} 2320let SubtargetPredicate = HasDot3Insts in { 2321 defm V_DOT8C_I32_I4 : VOP2_Real_DOT_ACC_gfx9<0x3a>; 2322} 2323 2324let SubtargetPredicate = HasPkFmacF16Inst in { 2325defm V_PK_FMAC_F16 : VOP2_Real_e32_vi<0x3c>; 2326} // End SubtargetPredicate = HasPkFmacF16Inst 2327 2328let SubtargetPredicate = HasDot3Insts in { 2329 // NB: Opcode conflicts with V_DOT2C_F32_F16 2330 let DecoderNamespace = "GFX10_B" in 2331 defm V_DOT8C_I32_I4 : VOP2_Real_DOT_ACC_gfx10<0x02>; 2332} 2333