10b57cec5SDimitry Andric /*===---- ammintrin.h - SSE4a intrinsics -----------------------------------=== 20b57cec5SDimitry Andric * 30b57cec5SDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric * See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric * 70b57cec5SDimitry Andric *===-----------------------------------------------------------------------=== 80b57cec5SDimitry Andric */ 90b57cec5SDimitry Andric 100b57cec5SDimitry Andric #ifndef __AMMINTRIN_H 110b57cec5SDimitry Andric #define __AMMINTRIN_H 120b57cec5SDimitry Andric 13349cc55cSDimitry Andric #if !defined(__i386__) && !defined(__x86_64__) 14349cc55cSDimitry Andric #error "This header is only meant to be used on x86 and x64 architecture" 15349cc55cSDimitry Andric #endif 16349cc55cSDimitry Andric 170b57cec5SDimitry Andric #include <pmmintrin.h> 180b57cec5SDimitry Andric 190b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */ 200b57cec5SDimitry Andric #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a"), __min_vector_width__(128))) 210b57cec5SDimitry Andric 220b57cec5SDimitry Andric /// Extracts the specified bits from the lower 64 bits of the 128-bit 230b57cec5SDimitry Andric /// integer vector operand at the index \a idx and of the length \a len. 240b57cec5SDimitry Andric /// 250b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 260b57cec5SDimitry Andric /// 270b57cec5SDimitry Andric /// \code 280b57cec5SDimitry Andric /// __m128i _mm_extracti_si64(__m128i x, const int len, const int idx); 290b57cec5SDimitry Andric /// \endcode 300b57cec5SDimitry Andric /// 310b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> EXTRQ </c> instruction. 320b57cec5SDimitry Andric /// 330b57cec5SDimitry Andric /// \param x 340b57cec5SDimitry Andric /// The value from which bits are extracted. 350b57cec5SDimitry Andric /// \param len 360b57cec5SDimitry Andric /// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0] 370b57cec5SDimitry Andric /// are zero, the length is interpreted as 64. 380b57cec5SDimitry Andric /// \param idx 390b57cec5SDimitry Andric /// Bits [5:0] specify the index of the least significant bit; the other 400b57cec5SDimitry Andric /// bits are ignored. If the sum of the index and length is greater than 64, 410b57cec5SDimitry Andric /// the result is undefined. If the length and index are both zero, bits 420b57cec5SDimitry Andric /// [63:0] of parameter \a x are extracted. If the length is zero but the 430b57cec5SDimitry Andric /// index is non-zero, the result is undefined. 440b57cec5SDimitry Andric /// \returns A 128-bit integer vector whose lower 64 bits contain the bits 450b57cec5SDimitry Andric /// extracted from the source operand. 460b57cec5SDimitry Andric #define _mm_extracti_si64(x, len, idx) \ 470b57cec5SDimitry Andric ((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \ 480b57cec5SDimitry Andric (char)(len), (char)(idx))) 490b57cec5SDimitry Andric 500b57cec5SDimitry Andric /// Extracts the specified bits from the lower 64 bits of the 128-bit 510b57cec5SDimitry Andric /// integer vector operand at the index and of the length specified by 520b57cec5SDimitry Andric /// \a __y. 530b57cec5SDimitry Andric /// 540b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 550b57cec5SDimitry Andric /// 560b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> EXTRQ </c> instruction. 570b57cec5SDimitry Andric /// 580b57cec5SDimitry Andric /// \param __x 590b57cec5SDimitry Andric /// The value from which bits are extracted. 600b57cec5SDimitry Andric /// \param __y 610b57cec5SDimitry Andric /// Specifies the index of the least significant bit at [13:8] and the 620b57cec5SDimitry Andric /// length at [5:0]; all other bits are ignored. If bits [5:0] are zero, the 630b57cec5SDimitry Andric /// length is interpreted as 64. If the sum of the index and length is 640b57cec5SDimitry Andric /// greater than 64, the result is undefined. If the length and index are 650b57cec5SDimitry Andric /// both zero, bits [63:0] of parameter \a __x are extracted. If the length 660b57cec5SDimitry Andric /// is zero but the index is non-zero, the result is undefined. 670b57cec5SDimitry Andric /// \returns A 128-bit vector whose lower 64 bits contain the bits extracted 680b57cec5SDimitry Andric /// from the source operand. 690b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 700b57cec5SDimitry Andric _mm_extract_si64(__m128i __x, __m128i __y) 710b57cec5SDimitry Andric { 720b57cec5SDimitry Andric return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y); 730b57cec5SDimitry Andric } 740b57cec5SDimitry Andric 750b57cec5SDimitry Andric /// Inserts bits of a specified length from the source integer vector 760b57cec5SDimitry Andric /// \a y into the lower 64 bits of the destination integer vector \a x at 770b57cec5SDimitry Andric /// the index \a idx and of the length \a len. 780b57cec5SDimitry Andric /// 790b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 800b57cec5SDimitry Andric /// 810b57cec5SDimitry Andric /// \code 820b57cec5SDimitry Andric /// __m128i _mm_inserti_si64(__m128i x, __m128i y, const int len, 830b57cec5SDimitry Andric /// const int idx); 840b57cec5SDimitry Andric /// \endcode 850b57cec5SDimitry Andric /// 860b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> INSERTQ </c> instruction. 870b57cec5SDimitry Andric /// 880b57cec5SDimitry Andric /// \param x 890b57cec5SDimitry Andric /// The destination operand where bits will be inserted. The inserted bits 900b57cec5SDimitry Andric /// are defined by the length \a len and by the index \a idx specifying the 910b57cec5SDimitry Andric /// least significant bit. 920b57cec5SDimitry Andric /// \param y 930b57cec5SDimitry Andric /// The source operand containing the bits to be extracted. The extracted 940b57cec5SDimitry Andric /// bits are the least significant bits of operand \a y of length \a len. 950b57cec5SDimitry Andric /// \param len 960b57cec5SDimitry Andric /// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0] 970b57cec5SDimitry Andric /// are zero, the length is interpreted as 64. 980b57cec5SDimitry Andric /// \param idx 990b57cec5SDimitry Andric /// Bits [5:0] specify the index of the least significant bit; the other 1000b57cec5SDimitry Andric /// bits are ignored. If the sum of the index and length is greater than 64, 1010b57cec5SDimitry Andric /// the result is undefined. If the length and index are both zero, bits 1020b57cec5SDimitry Andric /// [63:0] of parameter \a y are inserted into parameter \a x. If the length 1030b57cec5SDimitry Andric /// is zero but the index is non-zero, the result is undefined. 1040b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the original lower 64-bits of 1050b57cec5SDimitry Andric /// destination operand \a x with the specified bitfields replaced by the 1060b57cec5SDimitry Andric /// lower bits of source operand \a y. The upper 64 bits of the return value 1070b57cec5SDimitry Andric /// are undefined. 1080b57cec5SDimitry Andric #define _mm_inserti_si64(x, y, len, idx) \ 1090b57cec5SDimitry Andric ((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \ 1100b57cec5SDimitry Andric (__v2di)(__m128i)(y), \ 1110b57cec5SDimitry Andric (char)(len), (char)(idx))) 1120b57cec5SDimitry Andric 1130b57cec5SDimitry Andric /// Inserts bits of a specified length from the source integer vector 1140b57cec5SDimitry Andric /// \a __y into the lower 64 bits of the destination integer vector \a __x 1150b57cec5SDimitry Andric /// at the index and of the length specified by \a __y. 1160b57cec5SDimitry Andric /// 1170b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1180b57cec5SDimitry Andric /// 1190b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> INSERTQ </c> instruction. 1200b57cec5SDimitry Andric /// 1210b57cec5SDimitry Andric /// \param __x 1220b57cec5SDimitry Andric /// The destination operand where bits will be inserted. The inserted bits 1230b57cec5SDimitry Andric /// are defined by the length and by the index of the least significant bit 1240b57cec5SDimitry Andric /// specified by operand \a __y. 1250b57cec5SDimitry Andric /// \param __y 1260b57cec5SDimitry Andric /// The source operand containing the bits to be extracted. The extracted 1270b57cec5SDimitry Andric /// bits are the least significant bits of operand \a __y with length 1280b57cec5SDimitry Andric /// specified by bits [69:64]. These are inserted into the destination at the 1290b57cec5SDimitry Andric /// index specified by bits [77:72]; all other bits are ignored. If bits 1300b57cec5SDimitry Andric /// [69:64] are zero, the length is interpreted as 64. If the sum of the 1310b57cec5SDimitry Andric /// index and length is greater than 64, the result is undefined. If the 1320b57cec5SDimitry Andric /// length and index are both zero, bits [63:0] of parameter \a __y are 1330b57cec5SDimitry Andric /// inserted into parameter \a __x. If the length is zero but the index is 1340b57cec5SDimitry Andric /// non-zero, the result is undefined. 1350b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the original lower 64-bits of 1360b57cec5SDimitry Andric /// destination operand \a __x with the specified bitfields replaced by the 1370b57cec5SDimitry Andric /// lower bits of source operand \a __y. The upper 64 bits of the return 1380b57cec5SDimitry Andric /// value are undefined. 1390b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS 1400b57cec5SDimitry Andric _mm_insert_si64(__m128i __x, __m128i __y) 1410b57cec5SDimitry Andric { 1420b57cec5SDimitry Andric return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y); 1430b57cec5SDimitry Andric } 1440b57cec5SDimitry Andric 1450b57cec5SDimitry Andric /// Stores a 64-bit double-precision value in a 64-bit memory location. 1460b57cec5SDimitry Andric /// To minimize caching, the data is flagged as non-temporal (unlikely to be 1470b57cec5SDimitry Andric /// used again soon). 1480b57cec5SDimitry Andric /// 1490b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1500b57cec5SDimitry Andric /// 1510b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVNTSD </c> instruction. 1520b57cec5SDimitry Andric /// 1530b57cec5SDimitry Andric /// \param __p 1540b57cec5SDimitry Andric /// The 64-bit memory location used to store the register value. 1550b57cec5SDimitry Andric /// \param __a 1560b57cec5SDimitry Andric /// The 64-bit double-precision floating-point register value to be stored. 1570b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 158*5f757f3fSDimitry Andric _mm_stream_sd(void *__p, __m128d __a) 1590b57cec5SDimitry Andric { 160*5f757f3fSDimitry Andric __builtin_ia32_movntsd((double *)__p, (__v2df)__a); 1610b57cec5SDimitry Andric } 1620b57cec5SDimitry Andric 1630b57cec5SDimitry Andric /// Stores a 32-bit single-precision floating-point value in a 32-bit 1640b57cec5SDimitry Andric /// memory location. To minimize caching, the data is flagged as 1650b57cec5SDimitry Andric /// non-temporal (unlikely to be used again soon). 1660b57cec5SDimitry Andric /// 1670b57cec5SDimitry Andric /// \headerfile <x86intrin.h> 1680b57cec5SDimitry Andric /// 1690b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVNTSS </c> instruction. 1700b57cec5SDimitry Andric /// 1710b57cec5SDimitry Andric /// \param __p 1720b57cec5SDimitry Andric /// The 32-bit memory location used to store the register value. 1730b57cec5SDimitry Andric /// \param __a 1740b57cec5SDimitry Andric /// The 32-bit single-precision floating-point register value to be stored. 1750b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS 176*5f757f3fSDimitry Andric _mm_stream_ss(void *__p, __m128 __a) 1770b57cec5SDimitry Andric { 178*5f757f3fSDimitry Andric __builtin_ia32_movntss((float *)__p, (__v4sf)__a); 1790b57cec5SDimitry Andric } 1800b57cec5SDimitry Andric 1810b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS 1820b57cec5SDimitry Andric 1830b57cec5SDimitry Andric #endif /* __AMMINTRIN_H */ 184