xref: /freebsd/contrib/llvm-project/clang/lib/Headers/ammintrin.h (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
10b57cec5SDimitry Andric /*===---- ammintrin.h - SSE4a intrinsics -----------------------------------===
20b57cec5SDimitry Andric  *
30b57cec5SDimitry Andric  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric  * See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric  *
70b57cec5SDimitry Andric  *===-----------------------------------------------------------------------===
80b57cec5SDimitry Andric  */
90b57cec5SDimitry Andric 
100b57cec5SDimitry Andric #ifndef __AMMINTRIN_H
110b57cec5SDimitry Andric #define __AMMINTRIN_H
120b57cec5SDimitry Andric 
13349cc55cSDimitry Andric #if !defined(__i386__) && !defined(__x86_64__)
14349cc55cSDimitry Andric #error "This header is only meant to be used on x86 and x64 architecture"
15349cc55cSDimitry Andric #endif
16349cc55cSDimitry Andric 
170b57cec5SDimitry Andric #include <pmmintrin.h>
180b57cec5SDimitry Andric 
190b57cec5SDimitry Andric /* Define the default attributes for the functions in this file. */
200b57cec5SDimitry Andric #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a"), __min_vector_width__(128)))
210b57cec5SDimitry Andric 
220b57cec5SDimitry Andric /// Extracts the specified bits from the lower 64 bits of the 128-bit
230b57cec5SDimitry Andric ///    integer vector operand at the index \a idx and of the length \a len.
240b57cec5SDimitry Andric ///
250b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
260b57cec5SDimitry Andric ///
270b57cec5SDimitry Andric /// \code
280b57cec5SDimitry Andric /// __m128i _mm_extracti_si64(__m128i x, const int len, const int idx);
290b57cec5SDimitry Andric /// \endcode
300b57cec5SDimitry Andric ///
310b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> EXTRQ </c> instruction.
320b57cec5SDimitry Andric ///
330b57cec5SDimitry Andric /// \param x
340b57cec5SDimitry Andric ///    The value from which bits are extracted.
350b57cec5SDimitry Andric /// \param len
360b57cec5SDimitry Andric ///    Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]
370b57cec5SDimitry Andric ///    are zero, the length is interpreted as 64.
380b57cec5SDimitry Andric /// \param idx
390b57cec5SDimitry Andric ///    Bits [5:0] specify the index of the least significant bit; the other
400b57cec5SDimitry Andric ///    bits are ignored. If the sum of the index and length is greater than 64,
410b57cec5SDimitry Andric ///    the result is undefined. If the length and index are both zero, bits
420b57cec5SDimitry Andric ///    [63:0] of parameter \a x are extracted. If the length is zero but the
430b57cec5SDimitry Andric ///    index is non-zero, the result is undefined.
440b57cec5SDimitry Andric /// \returns A 128-bit integer vector whose lower 64 bits contain the bits
450b57cec5SDimitry Andric ///    extracted from the source operand.
460b57cec5SDimitry Andric #define _mm_extracti_si64(x, len, idx) \
470b57cec5SDimitry Andric   ((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \
480b57cec5SDimitry Andric                                   (char)(len), (char)(idx)))
490b57cec5SDimitry Andric 
500b57cec5SDimitry Andric /// Extracts the specified bits from the lower 64 bits of the 128-bit
510b57cec5SDimitry Andric ///    integer vector operand at the index and of the length specified by
520b57cec5SDimitry Andric ///    \a __y.
530b57cec5SDimitry Andric ///
540b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
550b57cec5SDimitry Andric ///
560b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> EXTRQ </c> instruction.
570b57cec5SDimitry Andric ///
580b57cec5SDimitry Andric /// \param __x
590b57cec5SDimitry Andric ///    The value from which bits are extracted.
600b57cec5SDimitry Andric /// \param __y
610b57cec5SDimitry Andric ///    Specifies the index of the least significant bit at [13:8] and the
620b57cec5SDimitry Andric ///    length at [5:0]; all other bits are ignored. If bits [5:0] are zero, the
630b57cec5SDimitry Andric ///    length is interpreted as 64. If the sum of the index and length is
640b57cec5SDimitry Andric ///    greater than 64, the result is undefined. If the length and index are
650b57cec5SDimitry Andric ///    both zero, bits [63:0] of parameter \a __x are extracted. If the length
660b57cec5SDimitry Andric ///    is zero but the index is non-zero, the result is undefined.
670b57cec5SDimitry Andric /// \returns A 128-bit vector whose lower 64 bits contain the bits extracted
680b57cec5SDimitry Andric ///    from the source operand.
690b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
700b57cec5SDimitry Andric _mm_extract_si64(__m128i __x, __m128i __y)
710b57cec5SDimitry Andric {
720b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y);
730b57cec5SDimitry Andric }
740b57cec5SDimitry Andric 
750b57cec5SDimitry Andric /// Inserts bits of a specified length from the source integer vector
760b57cec5SDimitry Andric ///    \a y into the lower 64 bits of the destination integer vector \a x at
770b57cec5SDimitry Andric ///    the index \a idx and of the length \a len.
780b57cec5SDimitry Andric ///
790b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
800b57cec5SDimitry Andric ///
810b57cec5SDimitry Andric /// \code
820b57cec5SDimitry Andric /// __m128i _mm_inserti_si64(__m128i x, __m128i y, const int len,
830b57cec5SDimitry Andric /// const int idx);
840b57cec5SDimitry Andric /// \endcode
850b57cec5SDimitry Andric ///
860b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> INSERTQ </c> instruction.
870b57cec5SDimitry Andric ///
880b57cec5SDimitry Andric /// \param x
890b57cec5SDimitry Andric ///    The destination operand where bits will be inserted. The inserted bits
900b57cec5SDimitry Andric ///    are defined by the length \a len and by the index \a idx specifying the
910b57cec5SDimitry Andric ///    least significant bit.
920b57cec5SDimitry Andric /// \param y
930b57cec5SDimitry Andric ///    The source operand containing the bits to be extracted. The extracted
940b57cec5SDimitry Andric ///    bits are the least significant bits of operand \a y of length \a len.
950b57cec5SDimitry Andric /// \param len
960b57cec5SDimitry Andric ///    Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]
970b57cec5SDimitry Andric ///    are zero, the length is interpreted as 64.
980b57cec5SDimitry Andric /// \param idx
990b57cec5SDimitry Andric ///    Bits [5:0] specify the index of the least significant bit; the other
1000b57cec5SDimitry Andric ///    bits are ignored. If the sum of the index and length is greater than 64,
1010b57cec5SDimitry Andric ///    the result is undefined. If the length and index are both zero, bits
1020b57cec5SDimitry Andric ///    [63:0] of parameter \a y are inserted into parameter \a x. If the length
1030b57cec5SDimitry Andric ///    is zero but the index is non-zero, the result is undefined.
1040b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the original lower 64-bits of
1050b57cec5SDimitry Andric ///    destination operand \a x with the specified bitfields replaced by the
1060b57cec5SDimitry Andric ///    lower bits of source operand \a y. The upper 64 bits of the return value
1070b57cec5SDimitry Andric ///    are undefined.
1080b57cec5SDimitry Andric #define _mm_inserti_si64(x, y, len, idx) \
1090b57cec5SDimitry Andric   ((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \
1100b57cec5SDimitry Andric                                     (__v2di)(__m128i)(y), \
1110b57cec5SDimitry Andric                                     (char)(len), (char)(idx)))
1120b57cec5SDimitry Andric 
1130b57cec5SDimitry Andric /// Inserts bits of a specified length from the source integer vector
1140b57cec5SDimitry Andric ///    \a __y into the lower 64 bits of the destination integer vector \a __x
1150b57cec5SDimitry Andric ///    at the index and of the length specified by \a __y.
1160b57cec5SDimitry Andric ///
1170b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1180b57cec5SDimitry Andric ///
1190b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> INSERTQ </c> instruction.
1200b57cec5SDimitry Andric ///
1210b57cec5SDimitry Andric /// \param __x
1220b57cec5SDimitry Andric ///    The destination operand where bits will be inserted. The inserted bits
1230b57cec5SDimitry Andric ///    are defined by the length and by the index of the least significant bit
1240b57cec5SDimitry Andric ///    specified by operand \a __y.
1250b57cec5SDimitry Andric /// \param __y
1260b57cec5SDimitry Andric ///    The source operand containing the bits to be extracted. The extracted
1270b57cec5SDimitry Andric ///    bits are the least significant bits of operand \a __y with length
1280b57cec5SDimitry Andric ///    specified by bits [69:64]. These are inserted into the destination at the
1290b57cec5SDimitry Andric ///    index specified by bits [77:72]; all other bits are ignored. If bits
1300b57cec5SDimitry Andric ///    [69:64] are zero, the length is interpreted as 64. If the sum of the
1310b57cec5SDimitry Andric ///    index and length is greater than 64, the result is undefined. If the
1320b57cec5SDimitry Andric ///    length and index are both zero, bits [63:0] of parameter \a __y are
1330b57cec5SDimitry Andric ///    inserted into parameter \a __x. If the length is zero but the index is
1340b57cec5SDimitry Andric ///    non-zero, the result is undefined.
1350b57cec5SDimitry Andric /// \returns A 128-bit integer vector containing the original lower 64-bits of
1360b57cec5SDimitry Andric ///    destination operand \a __x with the specified bitfields replaced by the
1370b57cec5SDimitry Andric ///    lower bits of source operand \a __y. The upper 64 bits of the return
1380b57cec5SDimitry Andric ///    value are undefined.
1390b57cec5SDimitry Andric static __inline__ __m128i __DEFAULT_FN_ATTRS
1400b57cec5SDimitry Andric _mm_insert_si64(__m128i __x, __m128i __y)
1410b57cec5SDimitry Andric {
1420b57cec5SDimitry Andric   return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y);
1430b57cec5SDimitry Andric }
1440b57cec5SDimitry Andric 
1450b57cec5SDimitry Andric /// Stores a 64-bit double-precision value in a 64-bit memory location.
1460b57cec5SDimitry Andric ///    To minimize caching, the data is flagged as non-temporal (unlikely to be
1470b57cec5SDimitry Andric ///    used again soon).
1480b57cec5SDimitry Andric ///
1490b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1500b57cec5SDimitry Andric ///
1510b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVNTSD </c> instruction.
1520b57cec5SDimitry Andric ///
1530b57cec5SDimitry Andric /// \param __p
1540b57cec5SDimitry Andric ///    The 64-bit memory location used to store the register value.
1550b57cec5SDimitry Andric /// \param __a
1560b57cec5SDimitry Andric ///    The 64-bit double-precision floating-point register value to be stored.
1570b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS
158*5f757f3fSDimitry Andric _mm_stream_sd(void *__p, __m128d __a)
1590b57cec5SDimitry Andric {
160*5f757f3fSDimitry Andric   __builtin_ia32_movntsd((double *)__p, (__v2df)__a);
1610b57cec5SDimitry Andric }
1620b57cec5SDimitry Andric 
1630b57cec5SDimitry Andric /// Stores a 32-bit single-precision floating-point value in a 32-bit
1640b57cec5SDimitry Andric ///    memory location. To minimize caching, the data is flagged as
1650b57cec5SDimitry Andric ///    non-temporal (unlikely to be used again soon).
1660b57cec5SDimitry Andric ///
1670b57cec5SDimitry Andric /// \headerfile <x86intrin.h>
1680b57cec5SDimitry Andric ///
1690b57cec5SDimitry Andric /// This intrinsic corresponds to the <c> MOVNTSS </c> instruction.
1700b57cec5SDimitry Andric ///
1710b57cec5SDimitry Andric /// \param __p
1720b57cec5SDimitry Andric ///    The 32-bit memory location used to store the register value.
1730b57cec5SDimitry Andric /// \param __a
1740b57cec5SDimitry Andric ///    The 32-bit single-precision floating-point register value to be stored.
1750b57cec5SDimitry Andric static __inline__ void __DEFAULT_FN_ATTRS
176*5f757f3fSDimitry Andric _mm_stream_ss(void *__p, __m128 __a)
1770b57cec5SDimitry Andric {
178*5f757f3fSDimitry Andric   __builtin_ia32_movntss((float *)__p, (__v4sf)__a);
1790b57cec5SDimitry Andric }
1800b57cec5SDimitry Andric 
1810b57cec5SDimitry Andric #undef __DEFAULT_FN_ATTRS
1820b57cec5SDimitry Andric 
1830b57cec5SDimitry Andric #endif /* __AMMINTRIN_H */
184