xref: /freebsd/contrib/llvm-project/clang/lib/Headers/pmmintrin.h (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
10b57cec5SDimitry Andric /*===---- pmmintrin.h - SSE3 intrinsics ------------------------------------===
20b57cec5SDimitry Andric  *
30b57cec5SDimitry Andric  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric  * See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric  *
70b57cec5SDimitry Andric  *===-----------------------------------------------------------------------===
80b57cec5SDimitry Andric  */
90b57cec5SDimitry Andric 
100b57cec5SDimitry Andric #ifndef __PMMINTRIN_H
110b57cec5SDimitry Andric #define __PMMINTRIN_H
120b57cec5SDimitry Andric 
13349cc55cSDimitry Andric #if !defined(__i386__) && !defined(__x86_64__)
14349cc55cSDimitry Andric #error "This header is only meant to be used on x86 and x64 architecture"
15349cc55cSDimitry Andric #endif
16349cc55cSDimitry Andric 
170b57cec5SDimitry Andric #include <emmintrin.h>
180b57cec5SDimitry Andric 
190b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */
200b57cec5SDimitry Andric #define __DEFAULT_FN_ATTRS                                                     \
21*5f757f3fSDimitry Andric   __attribute__((__always_inline__, __nodebug__,                               \
22*5f757f3fSDimitry Andric                  __target__("sse3,no-evex512"), __min_vector_width__(128)))
230b57cec5SDimitry Andric 
240b57cec5SDimitry Andric /// Loads data from an unaligned memory location to elements in a 128-bit
250b57cec5SDimitry Andric ///    vector.
260b57cec5SDimitry Andric ///
270b57cec5SDimitry Andric ///    If the address of the data is not 16-byte aligned, the instruction may
280b57cec5SDimitry Andric ///    read two adjacent aligned blocks of memory to retrieve the requested
290b57cec5SDimitry Andric ///    data.
300b57cec5SDimitry Andric ///
310b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
320b57cec5SDimitry Andric ///
330b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VLDDQU </c> instruction.
340b57cec5SDimitry Andric ///
350b57cec5SDimitry Andric /// \param __p
360b57cec5SDimitry Andric ///    A pointer to a 128-bit integer vector containing integer values.
370b57cec5SDimitry Andric /// \returns A 128-bit vector containing the moved values.
380b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
3981ad6265SDimitry Andric _mm_lddqu_si128(__m128i_u const *__p)
400b57cec5SDimitry Andric {
410b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_lddqu((char const *)__p);
420b57cec5SDimitry Andric }
430b57cec5SDimitry Andric 
440b57cec5SDimitry Andric /// Adds the even-indexed values and subtracts the odd-indexed values of
450b57cec5SDimitry Andric ///    two 128-bit vectors of [4 x float].
460b57cec5SDimitry Andric ///
470b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
480b57cec5SDimitry Andric ///
490b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VADDSUBPS </c> instruction.
500b57cec5SDimitry Andric ///
510b57cec5SDimitry Andric /// \param __a
520b57cec5SDimitry Andric ///    A 128-bit vector of [4 x float] containing the left source operand.
530b57cec5SDimitry Andric /// \param __b
540b57cec5SDimitry Andric ///    A 128-bit vector of [4 x float] containing the right source operand.
550b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float] containing the alternating sums and
560b57cec5SDimitry Andric ///    differences of both operands.
570b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS
580b57cec5SDimitry Andric _mm_addsub_ps(__m128 __a, __m128 __b)
590b57cec5SDimitry Andric {
600b57cec5SDimitry Andric   return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b);
610b57cec5SDimitry Andric }
620b57cec5SDimitry Andric 
630b57cec5SDimitry Andric /// Horizontally adds the adjacent pairs of values contained in two
640b57cec5SDimitry Andric ///    128-bit vectors of [4 x float].
650b57cec5SDimitry Andric ///
660b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
670b57cec5SDimitry Andric ///
680b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VHADDPS </c> instruction.
690b57cec5SDimitry Andric ///
700b57cec5SDimitry Andric /// \param __a
710b57cec5SDimitry Andric ///    A 128-bit vector of [4 x float] containing one of the source operands.
720b57cec5SDimitry Andric ///    The horizontal sums of the values are stored in the lower bits of the
730b57cec5SDimitry Andric ///    destination.
740b57cec5SDimitry Andric /// \param __b
750b57cec5SDimitry Andric ///    A 128-bit vector of [4 x float] containing one of the source operands.
760b57cec5SDimitry Andric ///    The horizontal sums of the values are stored in the upper bits of the
770b57cec5SDimitry Andric ///    destination.
780b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float] containing the horizontal sums of
790b57cec5SDimitry Andric ///    both operands.
800b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS
810b57cec5SDimitry Andric _mm_hadd_ps(__m128 __a, __m128 __b)
820b57cec5SDimitry Andric {
830b57cec5SDimitry Andric   return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b);
840b57cec5SDimitry Andric }
850b57cec5SDimitry Andric 
860b57cec5SDimitry Andric /// Horizontally subtracts the adjacent pairs of values contained in two
870b57cec5SDimitry Andric ///    128-bit vectors of [4 x float].
880b57cec5SDimitry Andric ///
890b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
900b57cec5SDimitry Andric ///
910b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VHSUBPS </c> instruction.
920b57cec5SDimitry Andric ///
930b57cec5SDimitry Andric /// \param __a
940b57cec5SDimitry Andric ///    A 128-bit vector of [4 x float] containing one of the source operands.
950b57cec5SDimitry Andric ///    The horizontal differences between the values are stored in the lower
960b57cec5SDimitry Andric ///    bits of the destination.
970b57cec5SDimitry Andric /// \param __b
980b57cec5SDimitry Andric ///    A 128-bit vector of [4 x float] containing one of the source operands.
990b57cec5SDimitry Andric ///    The horizontal differences between the values are stored in the upper
1000b57cec5SDimitry Andric ///    bits of the destination.
1010b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float] containing the horizontal
1020b57cec5SDimitry Andric ///    differences of both operands.
1030b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS
1040b57cec5SDimitry Andric _mm_hsub_ps(__m128 __a, __m128 __b)
1050b57cec5SDimitry Andric {
1060b57cec5SDimitry Andric   return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b);
1070b57cec5SDimitry Andric }
1080b57cec5SDimitry Andric 
1090b57cec5SDimitry Andric /// Moves and duplicates odd-indexed values from a 128-bit vector
1100b57cec5SDimitry Andric ///    of [4 x float] to float values stored in a 128-bit vector of
1110b57cec5SDimitry Andric ///    [4 x float].
1120b57cec5SDimitry Andric ///
1130b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1140b57cec5SDimitry Andric ///
1150b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVSHDUP </c> instruction.
1160b57cec5SDimitry Andric ///
1170b57cec5SDimitry Andric /// \param __a
1180b57cec5SDimitry Andric ///    A 128-bit vector of [4 x float]. \n
1190b57cec5SDimitry Andric ///    Bits [127:96] of the source are written to bits [127:96] and [95:64] of
1200b57cec5SDimitry Andric ///    the destination. \n
1210b57cec5SDimitry Andric ///    Bits [63:32] of the source are written to bits [63:32] and [31:0] of the
1220b57cec5SDimitry Andric ///    destination.
1230b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float] containing the moved and duplicated
1240b57cec5SDimitry Andric ///    values.
1250b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS
1260b57cec5SDimitry Andric _mm_movehdup_ps(__m128 __a)
1270b57cec5SDimitry Andric {
1280b57cec5SDimitry Andric   return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3);
1290b57cec5SDimitry Andric }
1300b57cec5SDimitry Andric 
1310b57cec5SDimitry Andric /// Duplicates even-indexed values from a 128-bit vector of
1320b57cec5SDimitry Andric ///    [4 x float] to float values stored in a 128-bit vector of [4 x float].
1330b57cec5SDimitry Andric ///
1340b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1350b57cec5SDimitry Andric ///
1360b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVSLDUP </c> instruction.
1370b57cec5SDimitry Andric ///
1380b57cec5SDimitry Andric /// \param __a
1390b57cec5SDimitry Andric ///    A 128-bit vector of [4 x float] \n
1400b57cec5SDimitry Andric ///    Bits [95:64] of the source are written to bits [127:96] and [95:64] of
1410b57cec5SDimitry Andric ///    the destination. \n
1420b57cec5SDimitry Andric ///    Bits [31:0] of the source are written to bits [63:32] and [31:0] of the
1430b57cec5SDimitry Andric ///    destination.
1440b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float] containing the moved and duplicated
1450b57cec5SDimitry Andric ///    values.
1460b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS
1470b57cec5SDimitry Andric _mm_moveldup_ps(__m128 __a)
1480b57cec5SDimitry Andric {
1490b57cec5SDimitry Andric   return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 2, 2);
1500b57cec5SDimitry Andric }
1510b57cec5SDimitry Andric 
1520b57cec5SDimitry Andric /// Adds the even-indexed values and subtracts the odd-indexed values of
1530b57cec5SDimitry Andric ///    two 128-bit vectors of [2 x double].
1540b57cec5SDimitry Andric ///
1550b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1560b57cec5SDimitry Andric ///
1570b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VADDSUBPD </c> instruction.
1580b57cec5SDimitry Andric ///
1590b57cec5SDimitry Andric /// \param __a
1600b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing the left source operand.
1610b57cec5SDimitry Andric /// \param __b
1620b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing the right source operand.
1630b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the alternating sums
1640b57cec5SDimitry Andric ///    and differences of both operands.
1650b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
1660b57cec5SDimitry Andric _mm_addsub_pd(__m128d __a, __m128d __b)
1670b57cec5SDimitry Andric {
1680b57cec5SDimitry Andric   return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b);
1690b57cec5SDimitry Andric }
1700b57cec5SDimitry Andric 
1710b57cec5SDimitry Andric /// Horizontally adds the pairs of values contained in two 128-bit
1720b57cec5SDimitry Andric ///    vectors of [2 x double].
1730b57cec5SDimitry Andric ///
1740b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1750b57cec5SDimitry Andric ///
1760b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VHADDPD </c> instruction.
1770b57cec5SDimitry Andric ///
1780b57cec5SDimitry Andric /// \param __a
1790b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing one of the source operands.
1800b57cec5SDimitry Andric ///    The horizontal sum of the values is stored in the lower bits of the
1810b57cec5SDimitry Andric ///    destination.
1820b57cec5SDimitry Andric /// \param __b
1830b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing one of the source operands.
1840b57cec5SDimitry Andric ///    The horizontal sum of the values is stored in the upper bits of the
1850b57cec5SDimitry Andric ///    destination.
1860b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the horizontal sums of
1870b57cec5SDimitry Andric ///    both operands.
1880b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
1890b57cec5SDimitry Andric _mm_hadd_pd(__m128d __a, __m128d __b)
1900b57cec5SDimitry Andric {
1910b57cec5SDimitry Andric   return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b);
1920b57cec5SDimitry Andric }
1930b57cec5SDimitry Andric 
1940b57cec5SDimitry Andric /// Horizontally subtracts the pairs of values contained in two 128-bit
1950b57cec5SDimitry Andric ///    vectors of [2 x double].
1960b57cec5SDimitry Andric ///
1970b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1980b57cec5SDimitry Andric ///
1990b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VHSUBPD </c> instruction.
2000b57cec5SDimitry Andric ///
2010b57cec5SDimitry Andric /// \param __a
2020b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing one of the source operands.
2030b57cec5SDimitry Andric ///    The horizontal difference of the values is stored in the lower bits of
2040b57cec5SDimitry Andric ///    the destination.
2050b57cec5SDimitry Andric /// \param __b
2060b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double] containing one of the source operands.
2070b57cec5SDimitry Andric ///    The horizontal difference of the values is stored in the upper bits of
2080b57cec5SDimitry Andric ///    the destination.
2090b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the horizontal
2100b57cec5SDimitry Andric ///    differences of both operands.
2110b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
2120b57cec5SDimitry Andric _mm_hsub_pd(__m128d __a, __m128d __b)
2130b57cec5SDimitry Andric {
2140b57cec5SDimitry Andric   return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b);
2150b57cec5SDimitry Andric }
2160b57cec5SDimitry Andric 
2170b57cec5SDimitry Andric /// Moves and duplicates one double-precision value to double-precision
2180b57cec5SDimitry Andric ///    values stored in a 128-bit vector of [2 x double].
2190b57cec5SDimitry Andric ///
2200b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2210b57cec5SDimitry Andric ///
2220b57cec5SDimitry Andric /// \code
2230b57cec5SDimitry Andric /// __m128d _mm_loaddup_pd(double const *dp);
2240b57cec5SDimitry Andric /// \endcode
2250b57cec5SDimitry Andric ///
2260b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.
2270b57cec5SDimitry Andric ///
2280b57cec5SDimitry Andric /// \param dp
2290b57cec5SDimitry Andric ///    A pointer to a double-precision value to be moved and duplicated.
2300b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the moved and
2310b57cec5SDimitry Andric ///    duplicated values.
2320b57cec5SDimitry Andric #define        _mm_loaddup_pd(dp)        _mm_load1_pd(dp)
2330b57cec5SDimitry Andric 
2340b57cec5SDimitry Andric /// Moves and duplicates the double-precision value in the lower bits of
2350b57cec5SDimitry Andric ///    a 128-bit vector of [2 x double] to double-precision values stored in a
2360b57cec5SDimitry Andric ///    128-bit vector of [2 x double].
2370b57cec5SDimitry Andric ///
2380b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2390b57cec5SDimitry Andric ///
2400b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction.
2410b57cec5SDimitry Andric ///
2420b57cec5SDimitry Andric /// \param __a
2430b57cec5SDimitry Andric ///    A 128-bit vector of [2 x double]. Bits [63:0] are written to bits
2440b57cec5SDimitry Andric ///    [127:64] and [63:0] of the destination.
2450b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the moved and
2460b57cec5SDimitry Andric ///    duplicated values.
2470b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS
2480b57cec5SDimitry Andric _mm_movedup_pd(__m128d __a)
2490b57cec5SDimitry Andric {
2500b57cec5SDimitry Andric   return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
2510b57cec5SDimitry Andric }
2520b57cec5SDimitry Andric 
2530b57cec5SDimitry Andric /// Establishes a linear address memory range to be monitored and puts
2540b57cec5SDimitry Andric ///    the processor in the monitor event pending state. Data stored in the
2550b57cec5SDimitry Andric ///    monitored address range causes the processor to exit the pending state.
2560b57cec5SDimitry Andric ///
25706c3fb27SDimitry Andric /// The \c MONITOR instruction can be used in kernel mode, and in other modes
25806c3fb27SDimitry Andric /// if MSR <c> C001_0015h[MonMwaitUserEn] </c> is set.
25906c3fb27SDimitry Andric ///
2600b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2610b57cec5SDimitry Andric ///
26206c3fb27SDimitry Andric /// This intrinsic corresponds to the \c MONITOR instruction.
2630b57cec5SDimitry Andric ///
2640b57cec5SDimitry Andric /// \param __p
2650b57cec5SDimitry Andric ///    The memory range to be monitored. The size of the range is determined by
2660b57cec5SDimitry Andric ///    CPUID function 0000_0005h.
2670b57cec5SDimitry Andric /// \param __extensions
2680b57cec5SDimitry Andric ///    Optional extensions for the monitoring state.
2690b57cec5SDimitry Andric /// \param __hints
2700b57cec5SDimitry Andric ///    Optional hints for the monitoring state.
2710b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS
2720b57cec5SDimitry Andric _mm_monitor(void const *__p, unsigned __extensions, unsigned __hints)
2730b57cec5SDimitry Andric {
274480093f4SDimitry Andric   __builtin_ia32_monitor(__p, __extensions, __hints);
2750b57cec5SDimitry Andric }
2760b57cec5SDimitry Andric 
27706c3fb27SDimitry Andric /// Used with the \c MONITOR instruction to wait while the processor is in
2780b57cec5SDimitry Andric ///    the monitor event pending state. Data stored in the monitored address
27906c3fb27SDimitry Andric ///    range, or an interrupt, causes the processor to exit the pending state.
28006c3fb27SDimitry Andric ///
28106c3fb27SDimitry Andric /// The \c MWAIT instruction can be used in kernel mode, and in other modes if
28206c3fb27SDimitry Andric /// MSR <c> C001_0015h[MonMwaitUserEn] </c> is set.
2830b57cec5SDimitry Andric ///
2840b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
2850b57cec5SDimitry Andric ///
28606c3fb27SDimitry Andric /// This intrinsic corresponds to the \c MWAIT instruction.
2870b57cec5SDimitry Andric ///
2880b57cec5SDimitry Andric /// \param __extensions
28906c3fb27SDimitry Andric ///    Optional extensions for the monitoring state, which can vary by
2900b57cec5SDimitry Andric ///    processor.
2910b57cec5SDimitry Andric /// \param __hints
29206c3fb27SDimitry Andric ///    Optional hints for the monitoring state, which can vary by processor.
2930b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS
2940b57cec5SDimitry Andric _mm_mwait(unsigned __extensions, unsigned __hints)
2950b57cec5SDimitry Andric {
2960b57cec5SDimitry Andric   __builtin_ia32_mwait(__extensions, __hints);
2970b57cec5SDimitry Andric }
2980b57cec5SDimitry Andric 
2990b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS
3000b57cec5SDimitry Andric 
3010b57cec5SDimitry Andric #endif /* __PMMINTRIN_H */
302