Lines Matching +full:vcc +full:- +full:p
1 //===-- VOP3Instructions.td - Vector Instruction Definitions --------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 // only VOP instruction that implicitly reads VCC.
43 class V_MUL_PROF<VOPProfile P> : VOP3_Profile<P> {
53 //===----------------------------------------------------------------------===//
55 //===----------------------------------------------------------------------===//
57 class VOP3Interp<string OpName, VOPProfile P, list<dag> pattern = []> :
58 VOP3_Pseudo<OpName, P, pattern> {
122 //===----------------------------------------------------------------------===//
124 //===----------------------------------------------------------------------===//
181 let Uses = [MODE, VCC, EXEC] in {
184 // if (vcc)
191 // if (vcc)
196 } // End Uses = [MODE, VCC, EXEC]
214 // XXX - No FPException seems suspect but manual doesn't say it does
255 // Double precision division pre-scale.
413 // Note: 16-bit instructions produce a 0 result in the high 16-bits
454 // When the inner operation is used multiple times, selecting 3-op
455 // instructions may still be beneficial -- if the other users can be
460 if (!N->isDivergent())
469 if (!Operands[i]->isDivergent() &&
474 if (ConstantBusUses > Subtarget->getConstantBusLimit(AMDGPU::V_ADD3_U32_e64))
490 // blocking folding SGPR->VGPR copies later.
493 const int ConstantBusLimit = Subtarget->getConstantBusLimit(AMDGPU::V_ADD3_U32_e64);
496 const RegisterBank *RegBank = RBI.getRegBank(Operands[i]->getReg(), MRI, TRI);
497 if (RegBank->getID() == AMDGPU::SGPRRegBankID) {
509 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
510 return C->getZExtValue() <= 4;
604 uint32_t V = N->getZExtValue();
605 return isPowerOf2_32(V - 1);
609 uint32_t V = N->getZExtValue();
610 return CurDAG->getTargetConstant(Log2_32(V - 1), SDLoc(N), MVT::i32);
659 // src0 is shifted left by 0-4 (use “0” to get ADD_U64).
675 // These instructions have non-standard use of op_sel. In particular they are
702 foreach Index = [0, -1] in {
780 // GISel-specific pattern that avoids creating a SGPR->VGPR copy if
800 // Handle cases where amdgpu-codegenprepare-mul24 made a mul24 instead of a normal mul.
807 // exclude pre-GFX9 where it was slow
837 return CurDAG->getTargetConstant(
838 N->getZExtValue() ? SISrcMods::OP_SEL_0 : SISrcMods::NONE,
882 // Undo sub x, c -> add x, -c canonicalization since c is more likely
883 // an inline immediate than -c.
911 def : DivFmasPat<f32, V_DIV_FMAS_F32_e64, VCC>;
912 def : DivFmasPat<f64, V_DIV_FMAS_F64_e64, VCC>;
920 class VOP3_DOT_Profile<VOPProfile P> : VOP3_Profile<P, VOP3_OPSEL> {
993 //===----------------------------------------------------------------------===//
995 //===----------------------------------------------------------------------===//
997 class getClampPat<VOPProfile P, SDPatternOperator node> {
998 dag ret3 = (P.DstVT (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2));
999 dag ret2 = (P.DstVT (node P.Src0VT:$src0, P.Src1VT:$src1));
1000 dag ret1 = (P.DstVT (node P.Src0VT:$src0));
1001 dag ret = !if(!eq(P.NumSrcArgs, 3), ret3,
1002 !if(!eq(P.NumSrcArgs, 2), ret2,
1006 class getClampRes<VOPProfile P, Instruction inst> {
1007 dag ret3 = (inst P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2, (i1 0));
1008 dag ret2 = (inst P.Src0VT:$src0, P.Src1VT:$src1, (i1 0));
1009 dag ret1 = (inst P.Src0VT:$src0, (i1 0));
1010 dag ret = !if(!eq(P.NumSrcArgs, 3), ret3,
1011 !if(!eq(P.NumSrcArgs, 2), ret2,
1033 //===----------------------------------------------------------------------===//
1034 // Target-specific instruction encodings.
1035 //===----------------------------------------------------------------------===//
1037 //===----------------------------------------------------------------------===//
1039 //===----------------------------------------------------------------------===//
1086 //===----------------------------------------------------------------------===//
1088 //===----------------------------------------------------------------------===//
1233 //===----------------------------------------------------------------------===//
1235 //===----------------------------------------------------------------------===//
1308 // TODO-GFX10: add MC tests for v_add/sub_nc_i16
1354 // FIXME-GFX10-OPSEL: Need to add "selective" opsel support to some of these
1367 //===----------------------------------------------------------------------===//
1369 //===----------------------------------------------------------------------===//
1375 VOP3e_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
1380 VOP3be_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
1395 //===----------------------------------------------------------------------===//
1397 //===----------------------------------------------------------------------===//
1403 VOP3e_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
1408 VOP3be_gfx6_gfx7<op{8-0}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
1476 //===----------------------------------------------------------------------===//
1478 //===----------------------------------------------------------------------===//