xref: /freebsd/contrib/llvm-project/clang/lib/Headers/f16cintrin.h (revision 81ad626541db97eb356e2c1d4a20eb2a26a766ab)
10b57cec5SDimitry Andric /*===---- f16cintrin.h - F16C intrinsics -----------------------------------===
20b57cec5SDimitry Andric  *
30b57cec5SDimitry Andric  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric  * See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric  *
70b57cec5SDimitry Andric  *===-----------------------------------------------------------------------===
80b57cec5SDimitry Andric  */
90b57cec5SDimitry Andric 
100b57cec5SDimitry Andric #if !defined __IMMINTRIN_H
110b57cec5SDimitry Andric #error "Never use <f16cintrin.h> directly; include <immintrin.h> instead."
120b57cec5SDimitry Andric #endif
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric #ifndef __F16CINTRIN_H
150b57cec5SDimitry Andric #define __F16CINTRIN_H
160b57cec5SDimitry Andric 
170b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */
180b57cec5SDimitry Andric #define __DEFAULT_FN_ATTRS128 \
190b57cec5SDimitry Andric   __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(128)))
200b57cec5SDimitry Andric #define __DEFAULT_FN_ATTRS256 \
210b57cec5SDimitry Andric   __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(256)))
220b57cec5SDimitry Andric 
230b57cec5SDimitry Andric /* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h,
240b57cec5SDimitry Andric  * but that's because icc can emulate these without f16c using a library call.
250b57cec5SDimitry Andric  * Since we don't do that let's leave these in f16cintrin.h.
260b57cec5SDimitry Andric  */
270b57cec5SDimitry Andric 
280b57cec5SDimitry Andric /// Converts a 16-bit half-precision float value into a 32-bit float
290b57cec5SDimitry Andric ///    value.
300b57cec5SDimitry Andric ///
310b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
320b57cec5SDimitry Andric ///
330b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
340b57cec5SDimitry Andric ///
350b57cec5SDimitry Andric /// \param __a
360b57cec5SDimitry Andric ///    A 16-bit half-precision float value.
370b57cec5SDimitry Andric /// \returns The converted 32-bit float value.
380b57cec5SDimitry Andric static __inline float __DEFAULT_FN_ATTRS128
_cvtsh_ss(unsigned short __a)390b57cec5SDimitry Andric _cvtsh_ss(unsigned short __a)
400b57cec5SDimitry Andric {
410b57cec5SDimitry Andric   __v8hi __v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};
420b57cec5SDimitry Andric   __v4sf __r = __builtin_ia32_vcvtph2ps(__v);
430b57cec5SDimitry Andric   return __r[0];
440b57cec5SDimitry Andric }
450b57cec5SDimitry Andric 
460b57cec5SDimitry Andric /// Converts a 32-bit single-precision float value to a 16-bit
470b57cec5SDimitry Andric ///    half-precision float value.
480b57cec5SDimitry Andric ///
490b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
500b57cec5SDimitry Andric ///
510b57cec5SDimitry Andric /// \code
520b57cec5SDimitry Andric /// unsigned short _cvtss_sh(float a, const int imm);
530b57cec5SDimitry Andric /// \endcode
540b57cec5SDimitry Andric ///
550b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
560b57cec5SDimitry Andric ///
570b57cec5SDimitry Andric /// \param a
580b57cec5SDimitry Andric ///    A 32-bit single-precision float value to be converted to a 16-bit
590b57cec5SDimitry Andric ///    half-precision float value.
600b57cec5SDimitry Andric /// \param imm
610b57cec5SDimitry Andric ///    An immediate value controlling rounding using bits [2:0]: \n
620b57cec5SDimitry Andric ///    000: Nearest \n
630b57cec5SDimitry Andric ///    001: Down \n
640b57cec5SDimitry Andric ///    010: Up \n
650b57cec5SDimitry Andric ///    011: Truncate \n
660b57cec5SDimitry Andric ///    1XX: Use MXCSR.RC for rounding
670b57cec5SDimitry Andric /// \returns The converted 16-bit half-precision float value.
68*81ad6265SDimitry Andric #define _cvtss_sh(a, imm) __extension__ ({ \
69*81ad6265SDimitry Andric   (unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \
70*81ad6265SDimitry Andric                                                      (imm)))[0]); })
710b57cec5SDimitry Andric 
720b57cec5SDimitry Andric /// Converts a 128-bit vector containing 32-bit float values into a
730b57cec5SDimitry Andric ///    128-bit vector containing 16-bit half-precision float values.
740b57cec5SDimitry Andric ///
750b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
760b57cec5SDimitry Andric ///
770b57cec5SDimitry Andric /// \code
780b57cec5SDimitry Andric /// __m128i _mm_cvtps_ph(__m128 a, const int imm);
790b57cec5SDimitry Andric /// \endcode
800b57cec5SDimitry Andric ///
810b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
820b57cec5SDimitry Andric ///
830b57cec5SDimitry Andric /// \param a
840b57cec5SDimitry Andric ///    A 128-bit vector containing 32-bit float values.
850b57cec5SDimitry Andric /// \param imm
860b57cec5SDimitry Andric ///    An immediate value controlling rounding using bits [2:0]: \n
870b57cec5SDimitry Andric ///    000: Nearest \n
880b57cec5SDimitry Andric ///    001: Down \n
890b57cec5SDimitry Andric ///    010: Up \n
900b57cec5SDimitry Andric ///    011: Truncate \n
910b57cec5SDimitry Andric ///    1XX: Use MXCSR.RC for rounding
920b57cec5SDimitry Andric /// \returns A 128-bit vector containing converted 16-bit half-precision float
930b57cec5SDimitry Andric ///    values. The lower 64 bits are used to store the converted 16-bit
940b57cec5SDimitry Andric ///    half-precision floating-point values.
950b57cec5SDimitry Andric #define _mm_cvtps_ph(a, imm) \
96349cc55cSDimitry Andric   ((__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm)))
970b57cec5SDimitry Andric 
980b57cec5SDimitry Andric /// Converts a 128-bit vector containing 16-bit half-precision float
990b57cec5SDimitry Andric ///    values into a 128-bit vector containing 32-bit float values.
1000b57cec5SDimitry Andric ///
1010b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1020b57cec5SDimitry Andric ///
1030b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
1040b57cec5SDimitry Andric ///
1050b57cec5SDimitry Andric /// \param __a
1060b57cec5SDimitry Andric ///    A 128-bit vector containing 16-bit half-precision float values. The lower
1070b57cec5SDimitry Andric ///    64 bits are used in the conversion.
1080b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float] containing converted float values.
1090b57cec5SDimitry Andric static __inline __m128 __DEFAULT_FN_ATTRS128
_mm_cvtph_ps(__m128i __a)1100b57cec5SDimitry Andric _mm_cvtph_ps(__m128i __a)
1110b57cec5SDimitry Andric {
1120b57cec5SDimitry Andric   return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);
1130b57cec5SDimitry Andric }
1140b57cec5SDimitry Andric 
1150b57cec5SDimitry Andric /// Converts a 256-bit vector of [8 x float] into a 128-bit vector
1160b57cec5SDimitry Andric ///    containing 16-bit half-precision float values.
1170b57cec5SDimitry Andric ///
1180b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1190b57cec5SDimitry Andric ///
1200b57cec5SDimitry Andric /// \code
1210b57cec5SDimitry Andric /// __m128i _mm256_cvtps_ph(__m256 a, const int imm);
1220b57cec5SDimitry Andric /// \endcode
1230b57cec5SDimitry Andric ///
1240b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPS2PH </c> instruction.
1250b57cec5SDimitry Andric ///
1260b57cec5SDimitry Andric /// \param a
1270b57cec5SDimitry Andric ///    A 256-bit vector containing 32-bit single-precision float values to be
1280b57cec5SDimitry Andric ///    converted to 16-bit half-precision float values.
1290b57cec5SDimitry Andric /// \param imm
1300b57cec5SDimitry Andric ///    An immediate value controlling rounding using bits [2:0]: \n
1310b57cec5SDimitry Andric ///    000: Nearest \n
1320b57cec5SDimitry Andric ///    001: Down \n
1330b57cec5SDimitry Andric ///    010: Up \n
1340b57cec5SDimitry Andric ///    011: Truncate \n
1350b57cec5SDimitry Andric ///    1XX: Use MXCSR.RC for rounding
1360b57cec5SDimitry Andric /// \returns A 128-bit vector containing the converted 16-bit half-precision
1370b57cec5SDimitry Andric ///    float values.
1380b57cec5SDimitry Andric #define _mm256_cvtps_ph(a, imm) \
139349cc55cSDimitry Andric  ((__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm)))
1400b57cec5SDimitry Andric 
1410b57cec5SDimitry Andric /// Converts a 128-bit vector containing 16-bit half-precision float
1420b57cec5SDimitry Andric ///    values into a 256-bit vector of [8 x float].
1430b57cec5SDimitry Andric ///
1440b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1450b57cec5SDimitry Andric ///
1460b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VCVTPH2PS </c> instruction.
1470b57cec5SDimitry Andric ///
1480b57cec5SDimitry Andric /// \param __a
1490b57cec5SDimitry Andric ///    A 128-bit vector containing 16-bit half-precision float values to be
1500b57cec5SDimitry Andric ///    converted to 32-bit single-precision float values.
1510b57cec5SDimitry Andric /// \returns A vector of [8 x float] containing the converted 32-bit
1520b57cec5SDimitry Andric ///    single-precision float values.
1530b57cec5SDimitry Andric static __inline __m256 __DEFAULT_FN_ATTRS256
_mm256_cvtph_ps(__m128i __a)1540b57cec5SDimitry Andric _mm256_cvtph_ps(__m128i __a)
1550b57cec5SDimitry Andric {
1560b57cec5SDimitry Andric   return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
1570b57cec5SDimitry Andric }
1580b57cec5SDimitry Andric 
1590b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS128
1600b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS256
1610b57cec5SDimitry Andric 
1620b57cec5SDimitry Andric #endif /* __F16CINTRIN_H */
163