Lines Matching +full:eq +full:- +full:level

1 //===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 float f = (float)N->getValueAPF().convertToFloat();
15 float f = (float)N->getValueAPF().convertToFloat();
20 double d = (double)N->getValueAPF().convertToDouble();
25 double d = (double)N->getValueAPF().convertToDouble();
44 return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N));
52 // E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
63 //-----------------------------------
65 //-----------------------------------
162 !eq(reg, "i32"): Int32Regs,
163 !eq(reg, "f32"): Float32Regs);
323 //-----------------------------------
325 //-----------------------------------
338 //-----------------------------------
340 //-----------------------------------
436 //-----------------------------------
438 //-----------------------------------
569 //-----------------------------------
571 //-----------------------------------
726 MIN_MAX_TUPLE<"_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_f16,
728 MIN_MAX_TUPLE<"_ftz_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_f16,
730 MIN_MAX_TUPLE<"_NaN_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_f16,
732 MIN_MAX_TUPLE<"_ftz_NaN_f16", !if(!eq(IntName, "min"),
734 MIN_MAX_TUPLE<"_xorsign_abs_f16", !if(!eq(IntName, "min"),
737 MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16", !if(!eq(IntName, "min"),
740 MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
743 MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
746 MIN_MAX_TUPLE<"_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_f16x2,
748 MIN_MAX_TUPLE<"_ftz_f16x2", !if(!eq(IntName, "min"),
750 MIN_MAX_TUPLE<"_NaN_f16x2", !if(!eq(IntName, "min"),
752 MIN_MAX_TUPLE<"_ftz_NaN_f16x2", !if(!eq(IntName, "min"),
754 MIN_MAX_TUPLE<"_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
757 MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
760 MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
763 MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
767 MIN_MAX_TUPLE<"_bf16", !if(!eq(IntName, "min"),
769 MIN_MAX_TUPLE<"_NaN_bf16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_bf16,
771 MIN_MAX_TUPLE<"_xorsign_abs_bf16", !if(!eq(IntName, "min"),
774 MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16", !if(!eq(IntName, "min"),
777 MIN_MAX_TUPLE<"_bf16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_bf16x2,
779 MIN_MAX_TUPLE<"_NaN_bf16x2", !if(!eq(IntName, "min"),
781 MIN_MAX_TUPLE<"_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
784 MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
1183 // 1.0f / sqrt_approx -> rsqrt_approx
1190 // same for int_nvvm_sqrt_f when non-precision sqrt is requested
1526 //-----------------------------------
1528 //-----------------------------------
1548 Requires<!if(!or(!eq(TypeStr, ".f16"), !eq(TypeStr, ".bf16")), [Predicate<"false">], Pred)>;
2113 defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
2114 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
2127 defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
2128 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
2169 // No need to define ".gpu"-scoped atomics. They do the same thing
2170 // as the regular, non-scoped atomics defined elsewhere.
2239 //-----------------------------------
2241 //-----------------------------------
2244 // read-only in a kernel.
2343 //-----------------------------------
2345 //-----------------------------------
2348 // non-coherent texture cache, and therefore the values read must be read-only
2421 // FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
2543 // ->
2561 //-----------------------------------
2563 // - Just ignore them in codegen
2564 //-----------------------------------
2769 //-----------------------------------
2771 //-----------------------------------
2827 TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2829 TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2831 TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2916 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2918 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2920 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2996 TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2998 TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3000 TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3089 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3091 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3093 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3176 : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3178 : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3180 : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3265 : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3267 : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3269 : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3323 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
3326 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
3329 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
3427 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3429 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3431 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3499 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32",
3502 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32",
3505 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32",
3578 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3580 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3582 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3651 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32",
3654 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32",
3657 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32",
3729 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3731 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3733 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3797 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32",
3800 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32",
3803 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32",
3846 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
3849 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
3852 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
4394 //-----------------------------------
4396 //-----------------------------------
4483 //-----------------------------------
4485 //-----------------------------------
4552 //===- Handle Query -------------------------------------------------------===//
4568 //===- Surface Stores -----------------------------------------------------===//
6350 //-----------------------------------
6352 //-----------------------------------
6374 // TODO Add read vector-version of special registers
6448 // In addition to target-independent fields provided by WMMA_REGS, it adds
6449 // the fields commonly used to implement specific PTX instruction -- register
6455 !eq(ptx_elt_type, "f16") : Int32Regs,
6456 !eq(ptx_elt_type, "f32") : Float32Regs,
6457 !eq(ptx_elt_type, "f64") : Float64Regs,
6458 !eq(ptx_elt_type, "bf16") : Int32Regs,
6459 !eq(ptx_elt_type, "tf32") : Int32Regs,
6460 !eq(ptx_elt_type, "s32") : Int32Regs,
6461 !eq(ptx_elt_type, "b16") : Int32Regs,
6462 !eq(ptx_elt_type, "s8") : Int32Regs,
6463 !eq(ptx_elt_type, "u8") : Int32Regs,
6464 !eq(ptx_elt_type, "s4") : Int32Regs,
6465 !eq(ptx_elt_type, "u4") : Int32Regs,
6466 !eq(ptx_elt_type, "b1") : Int32Regs);
6471 // List of register names for the fragment -- ["ra0", "ra1",...]
6474 // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
6478 // per-instruction predicates, but currently all fragments that can be used in
6481 // longer the case, we can concat all per-fragment predicates to enforce that
6484 // fp16 -> fp16/fp32 @ m16n16k16
6485 !and(!eq(geom, "m16n16k16"),
6486 !or(!eq(ptx_elt_type, "f16"),
6487 !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<60>],
6489 !and(!eq(geom,"m8n8k4"),
6490 !eq(ptx_elt_type, "f64")) : [hasSM<80>, hasPTX<70>],
6492 // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
6493 !and(!or(!eq(geom, "m8n32k16"),
6494 !eq(geom, "m32n8k16")),
6495 !or(!eq(ptx_elt_type, "f16"),
6496 !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<61>],
6498 // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
6499 !and(!or(!eq(geom,"m16n16k16"),
6500 !eq(geom,"m8n32k16"),
6501 !eq(geom,"m32n8k16")),
6502 !or(!eq(ptx_elt_type, "u8"),
6503 !eq(ptx_elt_type, "s8"),
6504 !eq(ptx_elt_type, "s32"))) : [hasSM<72>, hasPTX<63>],
6506 !and(!or(!eq(geom,"m16n16k16"),
6507 !eq(geom,"m8n32k16"),
6508 !eq(geom,"m32n8k16")),
6509 !eq(ptx_elt_type, "bf16")) : [hasSM<80>, hasPTX<70>],
6511 !and(!eq(geom,"m16n16k8"),
6512 !eq(ptx_elt_type, "tf32")) : [hasSM<80>, hasPTX<70>],
6514 !and(!eq(geom,"m16n16k8"),
6515 !eq(ptx_elt_type, "f32")) : [hasSM<80>, hasPTX<70>],
6517 // b1 -> s32 @ m8n8k128(b1)
6519 !eq(geom,"m8n8k128")) : [hasSM<75>, hasPTX<63>],
6521 // u4/s4 -> s32 @ m8n8k32 (u4/s4)
6523 !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<63>],
6525 !or(!eq(geom,"m16n8k8"),
6526 !eq(geom,"m8n8k16")) : [hasSM<75>, hasPTX<65>],
6529 !eq(geom, "m8n8k4")) : [hasSM<70>, hasPTX<64>],
6532 !and(!eq(op,"mma"),
6533 !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<65>],
6535 !and(!eq(ptx_elt_type,"f64"),
6536 !eq(geom, "m8n8k4")) : [hasSM<80>, hasPTX<70>],
6538 !and(!eq(op,"mma"),
6539 !or(!eq(geom, "m16n8k16"),
6540 !eq(geom, "m16n8k4"),
6541 !eq(geom, "m16n8k32"),
6542 !eq(geom, "m16n8k64"),
6543 !eq(geom, "m8n8k128"),
6544 !eq(geom, "m16n8k128"),
6545 !eq(geom, "m16n8k256"))) : [hasSM<80>, hasPTX<70>],
6547 !and(!eq(op,"ldmatrix"),
6548 !eq(ptx_elt_type,"b16"),
6549 !eq(geom, "m8n8")) : [hasSM<75>, hasPTX<65>]);
6576 // Common WMMA-related fields used for building patterns for all MMA instructions.
6582 // Pre-build the pattern to match (intrinsic arg0, arg1, ...).
6597 // To match the right intrinsic, we need to build AS-constrained PatFrag.
6604 !cond(!eq(Space, ".shared"): AS_match.shared,
6605 !eq(Space, ".global"): AS_match.global,
6607 // Build AS-constrained pattern.
6638 // To match the right intrinsic, we need to build AS-constrained PatFrag.
6646 !cond(!eq(Space, ".shared"): AS_match.shared,
6647 !eq(Space, ".global"): AS_match.global,
6649 // Build AS-constrained pattern.
6690 !if(!eq(b1op, ".and.popc"), [hasSM<80>,hasPTX<71>],[])
6705 !eq(FragA.ptx_elt_type, "f16") : "." # FragD.ptx_elt_type
6811 !cond(!eq(Space, ".shared"): AS_match.shared,
6813 // Build AS-constrained pattern.
6841 // Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
6850 // Build intrinsic->instruction patterns for all MMA instructions.