10b57cec5SDimitry Andric /*===---- pmmintrin.h - SSE3 intrinsics ------------------------------------=== 20b57cec5SDimitry Andric * 30b57cec5SDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric * See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric * 70b57cec5SDimitry Andric *===-----------------------------------------------------------------------=== 80b57cec5SDimitry Andric */ 90b57cec5SDimitry Andric 100b57cec5SDimitry Andric #ifndef __PMMINTRIN_H 110b57cec5SDimitry Andric #define __PMMINTRIN_H 120b57cec5SDimitry Andric 13349cc55cSDimitry Andric #if !defined(__i386__) && !defined(__x86_64__) 14349cc55cSDimitry Andric #error "This header is only meant to be used on x86 and x64 architecture" 15349cc55cSDimitry Andric #endif 16349cc55cSDimitry Andric 170b57cec5SDimitry Andric #include <emmintrin.h> 180b57cec5SDimitry Andric 190b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */ 200b57cec5SDimitry Andric #define __DEFAULT_FN_ATTRS \ 21*5f757f3fSDimitry Andric __attribute__((__always_inline__, __nodebug__, \ 22*5f757f3fSDimitry Andric __target__("sse3,no-evex512"), __min_vector_width__(128))) 230b57cec5SDimitry Andric 240b57cec5SDimitry Andric /// Loads data from an unaligned memory location to elements in a 128-bit 250b57cec5SDimitry Andric /// vector. 260b57cec5SDimitry Andric /// 270b57cec5SDimitry Andric /// If the address of the data is not 16-byte aligned, the instruction may 280b57cec5SDimitry Andric /// read two adjacent aligned blocks of memory to retrieve the requested 290b57cec5SDimitry Andric /// data. 300b57cec5SDimitry Andric /// 310b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 320b57cec5SDimitry Andric /// 330b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VLDDQU </c> instruction. 340b57cec5SDimitry Andric /// 350b57cec5SDimitry Andric /// \param __p 360b57cec5SDimitry Andric /// A pointer to a 128-bit integer vector containing integer values. 370b57cec5SDimitry Andric /// \returns A 128-bit vector containing the moved values. 380b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 3981ad6265SDimitry Andric _mm_lddqu_si128(__m128i_u const *__p) 400b57cec5SDimitry Andric { 410b57cec5SDimitry Andric return (__m128i)__builtin_ia32_lddqu((char const *)__p); 420b57cec5SDimitry Andric } 430b57cec5SDimitry Andric 440b57cec5SDimitry Andric /// Adds the even-indexed values and subtracts the odd-indexed values of 450b57cec5SDimitry Andric /// two 128-bit vectors of [4 x float]. 460b57cec5SDimitry Andric /// 470b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 480b57cec5SDimitry Andric /// 490b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VADDSUBPS </c> instruction. 500b57cec5SDimitry Andric /// 510b57cec5SDimitry Andric /// \param __a 520b57cec5SDimitry Andric /// A 128-bit vector of [4 x float] containing the left source operand. 530b57cec5SDimitry Andric /// \param __b 540b57cec5SDimitry Andric /// A 128-bit vector of [4 x float] containing the right source operand. 550b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float] containing the alternating sums and 560b57cec5SDimitry Andric /// differences of both operands. 570b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS 580b57cec5SDimitry Andric _mm_addsub_ps(__m128 __a, __m128 __b) 590b57cec5SDimitry Andric { 600b57cec5SDimitry Andric return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b); 610b57cec5SDimitry Andric } 620b57cec5SDimitry Andric 630b57cec5SDimitry Andric /// Horizontally adds the adjacent pairs of values contained in two 640b57cec5SDimitry Andric /// 128-bit vectors of [4 x float]. 650b57cec5SDimitry Andric /// 660b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 670b57cec5SDimitry Andric /// 680b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VHADDPS </c> instruction. 690b57cec5SDimitry Andric /// 700b57cec5SDimitry Andric /// \param __a 710b57cec5SDimitry Andric /// A 128-bit vector of [4 x float] containing one of the source operands. 720b57cec5SDimitry Andric /// The horizontal sums of the values are stored in the lower bits of the 730b57cec5SDimitry Andric /// destination. 740b57cec5SDimitry Andric /// \param __b 750b57cec5SDimitry Andric /// A 128-bit vector of [4 x float] containing one of the source operands. 760b57cec5SDimitry Andric /// The horizontal sums of the values are stored in the upper bits of the 770b57cec5SDimitry Andric /// destination. 780b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float] containing the horizontal sums of 790b57cec5SDimitry Andric /// both operands. 800b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS 810b57cec5SDimitry Andric _mm_hadd_ps(__m128 __a, __m128 __b) 820b57cec5SDimitry Andric { 830b57cec5SDimitry Andric return __builtin_ia32_haddps((__v4sf)__a, (__v4sf)__b); 840b57cec5SDimitry Andric } 850b57cec5SDimitry Andric 860b57cec5SDimitry Andric /// Horizontally subtracts the adjacent pairs of values contained in two 870b57cec5SDimitry Andric /// 128-bit vectors of [4 x float]. 880b57cec5SDimitry Andric /// 890b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 900b57cec5SDimitry Andric /// 910b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VHSUBPS </c> instruction. 920b57cec5SDimitry Andric /// 930b57cec5SDimitry Andric /// \param __a 940b57cec5SDimitry Andric /// A 128-bit vector of [4 x float] containing one of the source operands. 950b57cec5SDimitry Andric /// The horizontal differences between the values are stored in the lower 960b57cec5SDimitry Andric /// bits of the destination. 970b57cec5SDimitry Andric /// \param __b 980b57cec5SDimitry Andric /// A 128-bit vector of [4 x float] containing one of the source operands. 990b57cec5SDimitry Andric /// The horizontal differences between the values are stored in the upper 1000b57cec5SDimitry Andric /// bits of the destination. 1010b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float] containing the horizontal 1020b57cec5SDimitry Andric /// differences of both operands. 1030b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS 1040b57cec5SDimitry Andric _mm_hsub_ps(__m128 __a, __m128 __b) 1050b57cec5SDimitry Andric { 1060b57cec5SDimitry Andric return __builtin_ia32_hsubps((__v4sf)__a, (__v4sf)__b); 1070b57cec5SDimitry Andric } 1080b57cec5SDimitry Andric 1090b57cec5SDimitry Andric /// Moves and duplicates odd-indexed values from a 128-bit vector 1100b57cec5SDimitry Andric /// of [4 x float] to float values stored in a 128-bit vector of 1110b57cec5SDimitry Andric /// [4 x float]. 1120b57cec5SDimitry Andric /// 1130b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1140b57cec5SDimitry Andric /// 1150b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVSHDUP </c> instruction. 1160b57cec5SDimitry Andric /// 1170b57cec5SDimitry Andric /// \param __a 1180b57cec5SDimitry Andric /// A 128-bit vector of [4 x float]. \n 1190b57cec5SDimitry Andric /// Bits [127:96] of the source are written to bits [127:96] and [95:64] of 1200b57cec5SDimitry Andric /// the destination. \n 1210b57cec5SDimitry Andric /// Bits [63:32] of the source are written to bits [63:32] and [31:0] of the 1220b57cec5SDimitry Andric /// destination. 1230b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float] containing the moved and duplicated 1240b57cec5SDimitry Andric /// values. 1250b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS 1260b57cec5SDimitry Andric _mm_movehdup_ps(__m128 __a) 1270b57cec5SDimitry Andric { 1280b57cec5SDimitry Andric return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3); 1290b57cec5SDimitry Andric } 1300b57cec5SDimitry Andric 1310b57cec5SDimitry Andric /// Duplicates even-indexed values from a 128-bit vector of 1320b57cec5SDimitry Andric /// [4 x float] to float values stored in a 128-bit vector of [4 x float]. 1330b57cec5SDimitry Andric /// 1340b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1350b57cec5SDimitry Andric /// 1360b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVSLDUP </c> instruction. 1370b57cec5SDimitry Andric /// 1380b57cec5SDimitry Andric /// \param __a 1390b57cec5SDimitry Andric /// A 128-bit vector of [4 x float] \n 1400b57cec5SDimitry Andric /// Bits [95:64] of the source are written to bits [127:96] and [95:64] of 1410b57cec5SDimitry Andric /// the destination. \n 1420b57cec5SDimitry Andric /// Bits [31:0] of the source are written to bits [63:32] and [31:0] of the 1430b57cec5SDimitry Andric /// destination. 1440b57cec5SDimitry Andric /// \returns A 128-bit vector of [4 x float] containing the moved and duplicated 1450b57cec5SDimitry Andric /// values. 1460b57cec5SDimitry Andric static __inline__ __m128 __DEFAULT_FN_ATTRS 1470b57cec5SDimitry Andric _mm_moveldup_ps(__m128 __a) 1480b57cec5SDimitry Andric { 1490b57cec5SDimitry Andric return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 2, 2); 1500b57cec5SDimitry Andric } 1510b57cec5SDimitry Andric 1520b57cec5SDimitry Andric /// Adds the even-indexed values and subtracts the odd-indexed values of 1530b57cec5SDimitry Andric /// two 128-bit vectors of [2 x double]. 1540b57cec5SDimitry Andric /// 1550b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1560b57cec5SDimitry Andric /// 1570b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VADDSUBPD </c> instruction. 1580b57cec5SDimitry Andric /// 1590b57cec5SDimitry Andric /// \param __a 1600b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the left source operand. 1610b57cec5SDimitry Andric /// \param __b 1620b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing the right source operand. 1630b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the alternating sums 1640b57cec5SDimitry Andric /// and differences of both operands. 1650b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 1660b57cec5SDimitry Andric _mm_addsub_pd(__m128d __a, __m128d __b) 1670b57cec5SDimitry Andric { 1680b57cec5SDimitry Andric return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b); 1690b57cec5SDimitry Andric } 1700b57cec5SDimitry Andric 1710b57cec5SDimitry Andric /// Horizontally adds the pairs of values contained in two 128-bit 1720b57cec5SDimitry Andric /// vectors of [2 x double]. 1730b57cec5SDimitry Andric /// 1740b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1750b57cec5SDimitry Andric /// 1760b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VHADDPD </c> instruction. 1770b57cec5SDimitry Andric /// 1780b57cec5SDimitry Andric /// \param __a 1790b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 1800b57cec5SDimitry Andric /// The horizontal sum of the values is stored in the lower bits of the 1810b57cec5SDimitry Andric /// destination. 1820b57cec5SDimitry Andric /// \param __b 1830b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 1840b57cec5SDimitry Andric /// The horizontal sum of the values is stored in the upper bits of the 1850b57cec5SDimitry Andric /// destination. 1860b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the horizontal sums of 1870b57cec5SDimitry Andric /// both operands. 1880b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 1890b57cec5SDimitry Andric _mm_hadd_pd(__m128d __a, __m128d __b) 1900b57cec5SDimitry Andric { 1910b57cec5SDimitry Andric return __builtin_ia32_haddpd((__v2df)__a, (__v2df)__b); 1920b57cec5SDimitry Andric } 1930b57cec5SDimitry Andric 1940b57cec5SDimitry Andric /// Horizontally subtracts the pairs of values contained in two 128-bit 1950b57cec5SDimitry Andric /// vectors of [2 x double]. 1960b57cec5SDimitry Andric /// 1970b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1980b57cec5SDimitry Andric /// 1990b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VHSUBPD </c> instruction. 2000b57cec5SDimitry Andric /// 2010b57cec5SDimitry Andric /// \param __a 2020b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 2030b57cec5SDimitry Andric /// The horizontal difference of the values is stored in the lower bits of 2040b57cec5SDimitry Andric /// the destination. 2050b57cec5SDimitry Andric /// \param __b 2060b57cec5SDimitry Andric /// A 128-bit vector of [2 x double] containing one of the source operands. 2070b57cec5SDimitry Andric /// The horizontal difference of the values is stored in the upper bits of 2080b57cec5SDimitry Andric /// the destination. 2090b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the horizontal 2100b57cec5SDimitry Andric /// differences of both operands. 2110b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 2120b57cec5SDimitry Andric _mm_hsub_pd(__m128d __a, __m128d __b) 2130b57cec5SDimitry Andric { 2140b57cec5SDimitry Andric return __builtin_ia32_hsubpd((__v2df)__a, (__v2df)__b); 2150b57cec5SDimitry Andric } 2160b57cec5SDimitry Andric 2170b57cec5SDimitry Andric /// Moves and duplicates one double-precision value to double-precision 2180b57cec5SDimitry Andric /// values stored in a 128-bit vector of [2 x double]. 2190b57cec5SDimitry Andric /// 2200b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2210b57cec5SDimitry Andric /// 2220b57cec5SDimitry Andric /// \code 2230b57cec5SDimitry Andric /// __m128d _mm_loaddup_pd(double const *dp); 2240b57cec5SDimitry Andric /// \endcode 2250b57cec5SDimitry Andric /// 2260b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction. 2270b57cec5SDimitry Andric /// 2280b57cec5SDimitry Andric /// \param dp 2290b57cec5SDimitry Andric /// A pointer to a double-precision value to be moved and duplicated. 2300b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the moved and 2310b57cec5SDimitry Andric /// duplicated values. 2320b57cec5SDimitry Andric #define _mm_loaddup_pd(dp) _mm_load1_pd(dp) 2330b57cec5SDimitry Andric 2340b57cec5SDimitry Andric /// Moves and duplicates the double-precision value in the lower bits of 2350b57cec5SDimitry Andric /// a 128-bit vector of [2 x double] to double-precision values stored in a 2360b57cec5SDimitry Andric /// 128-bit vector of [2 x double]. 2370b57cec5SDimitry Andric /// 2380b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2390b57cec5SDimitry Andric /// 2400b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> VMOVDDUP </c> instruction. 2410b57cec5SDimitry Andric /// 2420b57cec5SDimitry Andric /// \param __a 2430b57cec5SDimitry Andric /// A 128-bit vector of [2 x double]. Bits [63:0] are written to bits 2440b57cec5SDimitry Andric /// [127:64] and [63:0] of the destination. 2450b57cec5SDimitry Andric /// \returns A 128-bit vector of [2 x double] containing the moved and 2460b57cec5SDimitry Andric /// duplicated values. 2470b57cec5SDimitry Andric static __inline__ __m128d __DEFAULT_FN_ATTRS 2480b57cec5SDimitry Andric _mm_movedup_pd(__m128d __a) 2490b57cec5SDimitry Andric { 2500b57cec5SDimitry Andric return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); 2510b57cec5SDimitry Andric } 2520b57cec5SDimitry Andric 2530b57cec5SDimitry Andric /// Establishes a linear address memory range to be monitored and puts 2540b57cec5SDimitry Andric /// the processor in the monitor event pending state. Data stored in the 2550b57cec5SDimitry Andric /// monitored address range causes the processor to exit the pending state. 2560b57cec5SDimitry Andric /// 25706c3fb27SDimitry Andric /// The \c MONITOR instruction can be used in kernel mode, and in other modes 25806c3fb27SDimitry Andric /// if MSR <c> C001_0015h[MonMwaitUserEn] </c> is set. 25906c3fb27SDimitry Andric /// 2600b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2610b57cec5SDimitry Andric /// 26206c3fb27SDimitry Andric /// This intrinsic corresponds to the \c MONITOR instruction. 2630b57cec5SDimitry Andric /// 2640b57cec5SDimitry Andric /// \param __p 2650b57cec5SDimitry Andric /// The memory range to be monitored. The size of the range is determined by 2660b57cec5SDimitry Andric /// CPUID function 0000_0005h. 2670b57cec5SDimitry Andric /// \param __extensions 2680b57cec5SDimitry Andric /// Optional extensions for the monitoring state. 2690b57cec5SDimitry Andric /// \param __hints 2700b57cec5SDimitry Andric /// Optional hints for the monitoring state. 2710b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 2720b57cec5SDimitry Andric _mm_monitor(void const *__p, unsigned __extensions, unsigned __hints) 2730b57cec5SDimitry Andric { 274480093f4SDimitry Andric __builtin_ia32_monitor(__p, __extensions, __hints); 2750b57cec5SDimitry Andric } 2760b57cec5SDimitry Andric 27706c3fb27SDimitry Andric /// Used with the \c MONITOR instruction to wait while the processor is in 2780b57cec5SDimitry Andric /// the monitor event pending state. Data stored in the monitored address 27906c3fb27SDimitry Andric /// range, or an interrupt, causes the processor to exit the pending state. 28006c3fb27SDimitry Andric /// 28106c3fb27SDimitry Andric /// The \c MWAIT instruction can be used in kernel mode, and in other modes if 28206c3fb27SDimitry Andric /// MSR <c> C001_0015h[MonMwaitUserEn] </c> is set. 2830b57cec5SDimitry Andric /// 2840b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 2850b57cec5SDimitry Andric /// 28606c3fb27SDimitry Andric /// This intrinsic corresponds to the \c MWAIT instruction. 2870b57cec5SDimitry Andric /// 2880b57cec5SDimitry Andric /// \param __extensions 28906c3fb27SDimitry Andric /// Optional extensions for the monitoring state, which can vary by 2900b57cec5SDimitry Andric /// processor. 2910b57cec5SDimitry Andric /// \param __hints 29206c3fb27SDimitry Andric /// Optional hints for the monitoring state, which can vary by processor. 2930b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 2940b57cec5SDimitry Andric _mm_mwait(unsigned __extensions, unsigned __hints) 2950b57cec5SDimitry Andric { 2960b57cec5SDimitry Andric __builtin_ia32_mwait(__extensions, __hints); 2970b57cec5SDimitry Andric } 2980b57cec5SDimitry Andric 2990b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS 3000b57cec5SDimitry Andric 3010b57cec5SDimitry Andric #endif /* __PMMINTRIN_H */ 302