Lines Matching +full:smem +full:- +full:part
1 //===-- SIInstructions.td - SI Instruction Definitions --------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 // This file was originally auto-generated from a GPU register header file and
11 //===----------------------------------------------------------------------===//
18 [{ return !N->isDivergent(); }]>;
23 [{ return N->isDivergent(); }]>;
34 //===----------------------------------------------------------------------===//
36 //===----------------------------------------------------------------------===//
94 //===----------------------------------------------------------------------===//
96 //===----------------------------------------------------------------------===//
130 // 64-bit vector move instruction. This is mainly used by the
142 // 64-bit vector move with dpp. Expanded post-RA.
144 let Size = 16; // Requires two 8-byte v_mov_b32_dpp to complete.
147 // 64-bit scalar move immediate instruction. This is used to avoid subregs
168 // Pseudoinstruction for @llvm.amdgcn.strict.wwm. It is turned into a copy post-RA, so
426 // by making them non-duplicable, we are observing better code generation result.
427 // So we choose to mark them non-duplicable in hope of getting better code
919 // use it in the sdata operand of SMEM instructions. We still need to
1073 (SI_KILL_I1_PSEUDO SCSrc_i1:$src, -1)
1088 (SI_DEMOTE_I1 SCSrc_i1:$src, -1)
1103 //===----------------------------------------------------------------------===//
1105 //===----------------------------------------------------------------------===//
1179 //===----------------------------------------------------------------------===//
1181 //===----------------------------------------------------------------------===//
1294 foreach Index = 0-1 in {
1307 foreach Index = 0-2 in {
1323 foreach Index = 0-3 in {
1339 foreach Index = 0-4 in {
1355 foreach Index = 0-5 in {
1371 foreach Index = 0-6 in {
1387 foreach Index = 0-7 in {
1403 foreach Index = 0-8 in {
1419 foreach Index = 0-9 in {
1435 foreach Index = 0-10 in {
1451 foreach Index = 0-11 in {
1467 foreach Index = 0-15 in {
1484 foreach Index = 0-31 in {
1504 // 16-bit bitcast
1520 // 32-bit bitcast
1553 // 64-bit bitcast
1604 // 96-bit bitcast
1608 // 128-bit bitcast
1671 // 160-bit bitcast
1677 // 192-bit bitcast
1693 // 224-bit bitcast
1699 // 256-bit bitcast
1760 // 288-bit bitcast
1766 // 320-bit bitcast
1772 // 320-bit bitcast
1778 // 384-bit bitcast
1784 // 512-bit bitcast
1840 // 1024-bit bitcast
2179 // V_MOV_B64_PSEUDO and S_MOV_B64_IMM_PSEUDO can be used with any 64-bit
2232 // XXX - Should this use a s_cmp to set SCC?
2234 // Set to sign-extended 64-bit value (true = -1, false = 0)
2266 /*src1mod*/(i32 0), /*src1*/(i32 -1), i1:$src0)
2287 //===----------------------------------------------------------------------===//
2289 //===----------------------------------------------------------------------===//
2300 auto *X = dyn_cast<ConstantSDNode>(N->getOperand(0)->getOperand(1));
2301 auto *NotX = dyn_cast<ConstantSDNode>(N->getOperand(1)->getOperand(1));
2303 ~(unsigned)X->getZExtValue() == (unsigned)NotX->getZExtValue();
2323 // 64-bit version
2335 // SHA-256 Ch function
2344 // 64-bit version
2433 //===----------------------------------------------------------------------===//
2435 //===----------------------------------------------------------------------===//
2452 //===----------------------------------------------------------------------===//
2454 //===----------------------------------------------------------------------===//
2563 // FIXME: We need to use COPY_TO_REGCLASS to work-around the fact that
2581 /*src1mod*/(i32 0), /*src1*/(i32 -1), $src), sub0,
2583 /*src1mod*/(i32 0), /*src1*/(i32 -1), $src), sub1)
2609 // these as 32 or 64-bit comparisons. When legalizing SGPR copies,
2641 (i1 (add i1:$src0, (i1 -1))),
2646 (i1 (sub i1:$src0, (i1 -1))),
2680 (i1 (add i1:$src0, (i1 -1))),
2685 (i1 (sub i1:$src0, (i1 -1))),
2692 (i32 (DivergentBinFrag<xor> i32:$src0, (i32 -1))),
2697 (i64 (DivergentBinFrag<xor> i64:$src0, (i64 -1))),
2756 /*src1mod*/(i32 0), /*src1*/(i32 -1),
2767 //===----------------------------------------------------------------------===//
2769 //===----------------------------------------------------------------------===//
2772 // zeros the high bits of the 32-bit register.
2840 return CurDAG->getTargetConstant(1ULL << N->getZExtValue(), SDLoc(N),
2849 // (trunc i32 (srl i32 $a, i32 $b)) ->
2965 let AddedComplexity = -5 in {
3020 } // End AddedComplexity = -5
3064 // On pre-gfx9 targets, v_max_*/v_min_* did not respect the denormal
3335 //===----------------------------------------------------------------------===//
3337 //===----------------------------------------------------------------------===//
3341 // V_FRACT is buggy on SI, so the F32 version is never used and (x-floor(x)) is
3347 // Convert floor(x) to (x - fract(x))
3375 // Undo sub x, c -> add x, -c canonicalization since c is more likely
3376 // an inline immediate than -c.
3377 // TODO: Also do for 64-bit.
3406 (vt (SHL (vt (add (vt (shl 1, vt:$a)), -1)), vt:$b)),
3411 (vt (ADD (vt (shl 1, vt:$a)), -1)),
3427 return CurDAG->getTargetConstant(llvm::popcount(N->getZExtValue()), SDLoc(N),
3437 // x & ((1 << y) - 1)
3439 (DivergentBinFrag<and> i32:$src, (add_oneuse (shl_oneuse 1, i32:$width), -1)),
3443 // x & ~(-1 << y)
3446 (xor_oneuse (shl_oneuse -1, i32:$width), -1)),
3450 // x & (-1 >> (bitwidth - y))
3452 (DivergentBinFrag<and> i32:$src, (srl_oneuse -1, (sub 32, i32:$width))),
3456 // x << (bitwidth - y) >> (bitwidth - y)
3469 // SHA-256 Ma patterns
3471 // ((x & z) | (y & (x | z))) -> BFI (XOR x, y), z, y
3631 // Convert a floating-point power of 2 to the integer exponent.
3633 const auto &APF = N->getValueAPF();
3636 return CurDAG->getTargetConstant(Log2, SDLoc(N), MVT::i32);
3639 // Check if a floating point value is a power of 2 floating-point
3651 // For f64 ldexp is always better than materializing a 64-bit
3653 return Exp != INT_MIN && (Exp < -1 || Exp > 2);
3664 // For f64 ldexp is always better than materializing a 64-bit
3666 return Exp != INT_MIN && (Exp < -1 || Exp > 2);
3690 // which we would need to be re-negated (which should never happen in
3712 // Returns -1 if the input is zero.
3719 // Returns -1 if the input is zero.
3805 foreach N = 0-3 in {
3843 // Integer multiply-add: arg0 * arg1 + arg2.
3845 // arg0 and arg1 are 32-bit integers (interpreted as signed or unsigned),
3846 // arg2 is a 64-bit integer. Result is a 64-bit integer and a 1-bit carry-out.
4018 let Inst{31-0} = 0x00000000;