1 /*===---- ammintrin.h - SSE4a intrinsics -----------------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10 #ifndef __AMMINTRIN_H 11 #define __AMMINTRIN_H 12 13 #include <pmmintrin.h> 14 15 /* Define the default attributes for the functions in this file. */ 16 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a"), __min_vector_width__(128))) 17 18 /// Extracts the specified bits from the lower 64 bits of the 128-bit 19 /// integer vector operand at the index \a idx and of the length \a len. 20 /// 21 /// \headerfile <x86intrin.h> 22 /// 23 /// \code 24 /// __m128i _mm_extracti_si64(__m128i x, const int len, const int idx); 25 /// \endcode 26 /// 27 /// This intrinsic corresponds to the <c> EXTRQ </c> instruction. 28 /// 29 /// \param x 30 /// The value from which bits are extracted. 31 /// \param len 32 /// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0] 33 /// are zero, the length is interpreted as 64. 34 /// \param idx 35 /// Bits [5:0] specify the index of the least significant bit; the other 36 /// bits are ignored. If the sum of the index and length is greater than 64, 37 /// the result is undefined. If the length and index are both zero, bits 38 /// [63:0] of parameter \a x are extracted. If the length is zero but the 39 /// index is non-zero, the result is undefined. 40 /// \returns A 128-bit integer vector whose lower 64 bits contain the bits 41 /// extracted from the source operand. 42 #define _mm_extracti_si64(x, len, idx) \ 43 ((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(x), \ 44 (char)(len), (char)(idx))) 45 46 /// Extracts the specified bits from the lower 64 bits of the 128-bit 47 /// integer vector operand at the index and of the length specified by 48 /// \a __y. 49 /// 50 /// \headerfile <x86intrin.h> 51 /// 52 /// This intrinsic corresponds to the <c> EXTRQ </c> instruction. 53 /// 54 /// \param __x 55 /// The value from which bits are extracted. 56 /// \param __y 57 /// Specifies the index of the least significant bit at [13:8] and the 58 /// length at [5:0]; all other bits are ignored. If bits [5:0] are zero, the 59 /// length is interpreted as 64. If the sum of the index and length is 60 /// greater than 64, the result is undefined. If the length and index are 61 /// both zero, bits [63:0] of parameter \a __x are extracted. If the length 62 /// is zero but the index is non-zero, the result is undefined. 63 /// \returns A 128-bit vector whose lower 64 bits contain the bits extracted 64 /// from the source operand. 65 static __inline__ __m128i __DEFAULT_FN_ATTRS 66 _mm_extract_si64(__m128i __x, __m128i __y) 67 { 68 return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y); 69 } 70 71 /// Inserts bits of a specified length from the source integer vector 72 /// \a y into the lower 64 bits of the destination integer vector \a x at 73 /// the index \a idx and of the length \a len. 74 /// 75 /// \headerfile <x86intrin.h> 76 /// 77 /// \code 78 /// __m128i _mm_inserti_si64(__m128i x, __m128i y, const int len, 79 /// const int idx); 80 /// \endcode 81 /// 82 /// This intrinsic corresponds to the <c> INSERTQ </c> instruction. 83 /// 84 /// \param x 85 /// The destination operand where bits will be inserted. The inserted bits 86 /// are defined by the length \a len and by the index \a idx specifying the 87 /// least significant bit. 88 /// \param y 89 /// The source operand containing the bits to be extracted. The extracted 90 /// bits are the least significant bits of operand \a y of length \a len. 91 /// \param len 92 /// Bits [5:0] specify the length; the other bits are ignored. If bits [5:0] 93 /// are zero, the length is interpreted as 64. 94 /// \param idx 95 /// Bits [5:0] specify the index of the least significant bit; the other 96 /// bits are ignored. If the sum of the index and length is greater than 64, 97 /// the result is undefined. If the length and index are both zero, bits 98 /// [63:0] of parameter \a y are inserted into parameter \a x. If the length 99 /// is zero but the index is non-zero, the result is undefined. 100 /// \returns A 128-bit integer vector containing the original lower 64-bits of 101 /// destination operand \a x with the specified bitfields replaced by the 102 /// lower bits of source operand \a y. The upper 64 bits of the return value 103 /// are undefined. 104 #define _mm_inserti_si64(x, y, len, idx) \ 105 ((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(x), \ 106 (__v2di)(__m128i)(y), \ 107 (char)(len), (char)(idx))) 108 109 /// Inserts bits of a specified length from the source integer vector 110 /// \a __y into the lower 64 bits of the destination integer vector \a __x 111 /// at the index and of the length specified by \a __y. 112 /// 113 /// \headerfile <x86intrin.h> 114 /// 115 /// This intrinsic corresponds to the <c> INSERTQ </c> instruction. 116 /// 117 /// \param __x 118 /// The destination operand where bits will be inserted. The inserted bits 119 /// are defined by the length and by the index of the least significant bit 120 /// specified by operand \a __y. 121 /// \param __y 122 /// The source operand containing the bits to be extracted. The extracted 123 /// bits are the least significant bits of operand \a __y with length 124 /// specified by bits [69:64]. These are inserted into the destination at the 125 /// index specified by bits [77:72]; all other bits are ignored. If bits 126 /// [69:64] are zero, the length is interpreted as 64. If the sum of the 127 /// index and length is greater than 64, the result is undefined. If the 128 /// length and index are both zero, bits [63:0] of parameter \a __y are 129 /// inserted into parameter \a __x. If the length is zero but the index is 130 /// non-zero, the result is undefined. 131 /// \returns A 128-bit integer vector containing the original lower 64-bits of 132 /// destination operand \a __x with the specified bitfields replaced by the 133 /// lower bits of source operand \a __y. The upper 64 bits of the return 134 /// value are undefined. 135 static __inline__ __m128i __DEFAULT_FN_ATTRS 136 _mm_insert_si64(__m128i __x, __m128i __y) 137 { 138 return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y); 139 } 140 141 /// Stores a 64-bit double-precision value in a 64-bit memory location. 142 /// To minimize caching, the data is flagged as non-temporal (unlikely to be 143 /// used again soon). 144 /// 145 /// \headerfile <x86intrin.h> 146 /// 147 /// This intrinsic corresponds to the <c> MOVNTSD </c> instruction. 148 /// 149 /// \param __p 150 /// The 64-bit memory location used to store the register value. 151 /// \param __a 152 /// The 64-bit double-precision floating-point register value to be stored. 153 static __inline__ void __DEFAULT_FN_ATTRS 154 _mm_stream_sd(double *__p, __m128d __a) 155 { 156 __builtin_ia32_movntsd(__p, (__v2df)__a); 157 } 158 159 /// Stores a 32-bit single-precision floating-point value in a 32-bit 160 /// memory location. To minimize caching, the data is flagged as 161 /// non-temporal (unlikely to be used again soon). 162 /// 163 /// \headerfile <x86intrin.h> 164 /// 165 /// This intrinsic corresponds to the <c> MOVNTSS </c> instruction. 166 /// 167 /// \param __p 168 /// The 32-bit memory location used to store the register value. 169 /// \param __a 170 /// The 32-bit single-precision floating-point register value to be stored. 171 static __inline__ void __DEFAULT_FN_ATTRS 172 _mm_stream_ss(float *__p, __m128 __a) 173 { 174 __builtin_ia32_movntss(__p, (__v4sf)__a); 175 } 176 177 #undef __DEFAULT_FN_ATTRS 178 179 #endif /* __AMMINTRIN_H */ 180