Lines Matching +full:abs +full:- +full:flat

1 //===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 float f = (float)N->getValueAPF().convertToFloat();
15 float f = (float)N->getValueAPF().convertToFloat();
20 double d = (double)N->getValueAPF().convertToDouble();
25 double d = (double)N->getValueAPF().convertToDouble();
44 return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N));
52 // E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
63 //-----------------------------------
65 //-----------------------------------
323 //-----------------------------------
325 //-----------------------------------
338 //-----------------------------------
340 //-----------------------------------
436 //-----------------------------------
438 //-----------------------------------
569 //-----------------------------------
571 //-----------------------------------
664 F_MATH_2<"min.xorsign.abs.f32 \t$dst, $src0, $src1;",
668 F_MATH_2<"min.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;",
672 F_MATH_2<"min.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
676 F_MATH_2<"min.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
691 F_MATH_2<"max.xorsign.abs.f32 \t$dst, $src0, $src1;",
695 F_MATH_2<"max.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;",
699 F_MATH_2<"max.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
703 F_MATH_2<"max.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
917 // Abs
920 def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
922 def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
925 def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
929 // Abs, Neg bf16, bf16x2
932 def INT_NVVM_ABS_BF16 : F_MATH_1<"abs.bf16 \t$dst, $src0;", Int16Regs,
934 def INT_NVVM_ABS_BF16X2 : F_MATH_1<"abs.bf16x2 \t$dst, $src0;", Int32Regs,
1183 // 1.0f / sqrt_approx -> rsqrt_approx
1190 // same for int_nvvm_sqrt_f when non-precision sqrt is requested
1526 //-----------------------------------
1528 //-----------------------------------
2169 // No need to define ".gpu"-scoped atomics. They do the same thing
2170 // as the regular, non-scoped atomics defined elsewhere.
2239 //-----------------------------------
2241 //-----------------------------------
2244 // read-only in a kernel.
2343 //-----------------------------------
2345 //-----------------------------------
2348 // non-coherent texture cache, and therefore the values read must be read-only
2421 // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
2543 // ->
2561 //-----------------------------------
2563 // - Just ignore them in codegen
2564 //-----------------------------------
2769 //-----------------------------------
2771 //-----------------------------------
4394 //-----------------------------------
4396 //-----------------------------------
4483 //-----------------------------------
4485 //-----------------------------------
4552 //===- Handle Query -------------------------------------------------------===//
4568 //===- Surface Stores -----------------------------------------------------===//
6350 //-----------------------------------
6352 //-----------------------------------
6374 // TODO Add read vector-version of special registers
6448 // In addition to target-independent fields provided by WMMA_REGS, it adds
6449 // the fields commonly used to implement specific PTX instruction -- register
6471 // List of register names for the fragment -- ["ra0", "ra1",...]
6474 // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
6478 // per-instruction predicates, but currently all fragments that can be used in
6481 // longer the case, we can concat all per-fragment predicates to enforce that
6484 // fp16 -> fp16/fp32 @ m16n16k16
6492 // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
6498 // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
6517 // b1 -> s32 @ m8n8k128(b1)
6521 // u4/s4 -> s32 @ m8n8k32 (u4/s4)
6576 // Common WMMA-related fields used for building patterns for all MMA instructions.
6582 // Pre-build the pattern to match (intrinsic arg0, arg1, ...).
6597 // To match the right intrinsic, we need to build AS-constrained PatFrag.
6607 // Build AS-constrained pattern.
6638 // To match the right intrinsic, we need to build AS-constrained PatFrag.
6649 // Build AS-constrained pattern.
6813 // Build AS-constrained pattern.
6841 // Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
6850 // Build intrinsic->instruction patterns for all MMA instructions.