1 /*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10 #ifndef __IMMINTRIN_H 11 #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead." 12 #endif 13 14 #ifndef __AVX512VLINTRIN_H 15 #define __AVX512VLINTRIN_H 16 17 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(128))) 18 #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(256))) 19 20 typedef short __v2hi __attribute__((__vector_size__(4))); 21 typedef char __v4qi __attribute__((__vector_size__(4))); 22 typedef char __v2qi __attribute__((__vector_size__(2))); 23 24 /* Integer compare */ 25 26 #define _mm_cmpeq_epi32_mask(A, B) \ 27 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ) 28 #define _mm_mask_cmpeq_epi32_mask(k, A, B) \ 29 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ) 30 #define _mm_cmpge_epi32_mask(A, B) \ 31 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GE) 32 #define _mm_mask_cmpge_epi32_mask(k, A, B) \ 33 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE) 34 #define _mm_cmpgt_epi32_mask(A, B) \ 35 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GT) 36 #define _mm_mask_cmpgt_epi32_mask(k, A, B) \ 37 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT) 38 #define _mm_cmple_epi32_mask(A, B) \ 39 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LE) 40 #define _mm_mask_cmple_epi32_mask(k, A, B) \ 41 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE) 42 #define _mm_cmplt_epi32_mask(A, B) \ 43 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LT) 44 #define _mm_mask_cmplt_epi32_mask(k, A, B) \ 45 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT) 46 #define _mm_cmpneq_epi32_mask(A, B) \ 47 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_NE) 48 #define _mm_mask_cmpneq_epi32_mask(k, A, B) \ 49 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE) 50 51 #define _mm256_cmpeq_epi32_mask(A, B) \ 52 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ) 53 #define _mm256_mask_cmpeq_epi32_mask(k, A, B) \ 54 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ) 55 #define _mm256_cmpge_epi32_mask(A, B) \ 56 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GE) 57 #define _mm256_mask_cmpge_epi32_mask(k, A, B) \ 58 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE) 59 #define _mm256_cmpgt_epi32_mask(A, B) \ 60 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GT) 61 #define _mm256_mask_cmpgt_epi32_mask(k, A, B) \ 62 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT) 63 #define _mm256_cmple_epi32_mask(A, B) \ 64 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LE) 65 #define _mm256_mask_cmple_epi32_mask(k, A, B) \ 66 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE) 67 #define _mm256_cmplt_epi32_mask(A, B) \ 68 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LT) 69 #define _mm256_mask_cmplt_epi32_mask(k, A, B) \ 70 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT) 71 #define _mm256_cmpneq_epi32_mask(A, B) \ 72 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_NE) 73 #define _mm256_mask_cmpneq_epi32_mask(k, A, B) \ 74 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE) 75 76 #define _mm_cmpeq_epu32_mask(A, B) \ 77 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ) 78 #define _mm_mask_cmpeq_epu32_mask(k, A, B) \ 79 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ) 80 #define _mm_cmpge_epu32_mask(A, B) \ 81 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GE) 82 #define _mm_mask_cmpge_epu32_mask(k, A, B) \ 83 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE) 84 #define _mm_cmpgt_epu32_mask(A, B) \ 85 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GT) 86 #define _mm_mask_cmpgt_epu32_mask(k, A, B) \ 87 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT) 88 #define _mm_cmple_epu32_mask(A, B) \ 89 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LE) 90 #define _mm_mask_cmple_epu32_mask(k, A, B) \ 91 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE) 92 #define _mm_cmplt_epu32_mask(A, B) \ 93 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LT) 94 #define _mm_mask_cmplt_epu32_mask(k, A, B) \ 95 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT) 96 #define _mm_cmpneq_epu32_mask(A, B) \ 97 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_NE) 98 #define _mm_mask_cmpneq_epu32_mask(k, A, B) \ 99 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE) 100 101 #define _mm256_cmpeq_epu32_mask(A, B) \ 102 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ) 103 #define _mm256_mask_cmpeq_epu32_mask(k, A, B) \ 104 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ) 105 #define _mm256_cmpge_epu32_mask(A, B) \ 106 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GE) 107 #define _mm256_mask_cmpge_epu32_mask(k, A, B) \ 108 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE) 109 #define _mm256_cmpgt_epu32_mask(A, B) \ 110 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GT) 111 #define _mm256_mask_cmpgt_epu32_mask(k, A, B) \ 112 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT) 113 #define _mm256_cmple_epu32_mask(A, B) \ 114 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LE) 115 #define _mm256_mask_cmple_epu32_mask(k, A, B) \ 116 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE) 117 #define _mm256_cmplt_epu32_mask(A, B) \ 118 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LT) 119 #define _mm256_mask_cmplt_epu32_mask(k, A, B) \ 120 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT) 121 #define _mm256_cmpneq_epu32_mask(A, B) \ 122 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_NE) 123 #define _mm256_mask_cmpneq_epu32_mask(k, A, B) \ 124 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE) 125 126 #define _mm_cmpeq_epi64_mask(A, B) \ 127 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ) 128 #define _mm_mask_cmpeq_epi64_mask(k, A, B) \ 129 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ) 130 #define _mm_cmpge_epi64_mask(A, B) \ 131 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GE) 132 #define _mm_mask_cmpge_epi64_mask(k, A, B) \ 133 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE) 134 #define _mm_cmpgt_epi64_mask(A, B) \ 135 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GT) 136 #define _mm_mask_cmpgt_epi64_mask(k, A, B) \ 137 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT) 138 #define _mm_cmple_epi64_mask(A, B) \ 139 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LE) 140 #define _mm_mask_cmple_epi64_mask(k, A, B) \ 141 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE) 142 #define _mm_cmplt_epi64_mask(A, B) \ 143 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LT) 144 #define _mm_mask_cmplt_epi64_mask(k, A, B) \ 145 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT) 146 #define _mm_cmpneq_epi64_mask(A, B) \ 147 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_NE) 148 #define _mm_mask_cmpneq_epi64_mask(k, A, B) \ 149 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE) 150 151 #define _mm256_cmpeq_epi64_mask(A, B) \ 152 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ) 153 #define _mm256_mask_cmpeq_epi64_mask(k, A, B) \ 154 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ) 155 #define _mm256_cmpge_epi64_mask(A, B) \ 156 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GE) 157 #define _mm256_mask_cmpge_epi64_mask(k, A, B) \ 158 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE) 159 #define _mm256_cmpgt_epi64_mask(A, B) \ 160 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GT) 161 #define _mm256_mask_cmpgt_epi64_mask(k, A, B) \ 162 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT) 163 #define _mm256_cmple_epi64_mask(A, B) \ 164 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LE) 165 #define _mm256_mask_cmple_epi64_mask(k, A, B) \ 166 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE) 167 #define _mm256_cmplt_epi64_mask(A, B) \ 168 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LT) 169 #define _mm256_mask_cmplt_epi64_mask(k, A, B) \ 170 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT) 171 #define _mm256_cmpneq_epi64_mask(A, B) \ 172 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_NE) 173 #define _mm256_mask_cmpneq_epi64_mask(k, A, B) \ 174 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE) 175 176 #define _mm_cmpeq_epu64_mask(A, B) \ 177 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ) 178 #define _mm_mask_cmpeq_epu64_mask(k, A, B) \ 179 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ) 180 #define _mm_cmpge_epu64_mask(A, B) \ 181 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GE) 182 #define _mm_mask_cmpge_epu64_mask(k, A, B) \ 183 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE) 184 #define _mm_cmpgt_epu64_mask(A, B) \ 185 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GT) 186 #define _mm_mask_cmpgt_epu64_mask(k, A, B) \ 187 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT) 188 #define _mm_cmple_epu64_mask(A, B) \ 189 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LE) 190 #define _mm_mask_cmple_epu64_mask(k, A, B) \ 191 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE) 192 #define _mm_cmplt_epu64_mask(A, B) \ 193 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LT) 194 #define _mm_mask_cmplt_epu64_mask(k, A, B) \ 195 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT) 196 #define _mm_cmpneq_epu64_mask(A, B) \ 197 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_NE) 198 #define _mm_mask_cmpneq_epu64_mask(k, A, B) \ 199 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) 200 201 #define _mm256_cmpeq_epu64_mask(A, B) \ 202 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ) 203 #define _mm256_mask_cmpeq_epu64_mask(k, A, B) \ 204 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ) 205 #define _mm256_cmpge_epu64_mask(A, B) \ 206 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GE) 207 #define _mm256_mask_cmpge_epu64_mask(k, A, B) \ 208 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE) 209 #define _mm256_cmpgt_epu64_mask(A, B) \ 210 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GT) 211 #define _mm256_mask_cmpgt_epu64_mask(k, A, B) \ 212 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT) 213 #define _mm256_cmple_epu64_mask(A, B) \ 214 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LE) 215 #define _mm256_mask_cmple_epu64_mask(k, A, B) \ 216 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE) 217 #define _mm256_cmplt_epu64_mask(A, B) \ 218 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LT) 219 #define _mm256_mask_cmplt_epu64_mask(k, A, B) \ 220 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT) 221 #define _mm256_cmpneq_epu64_mask(A, B) \ 222 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_NE) 223 #define _mm256_mask_cmpneq_epu64_mask(k, A, B) \ 224 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) 225 226 static __inline__ __m256i __DEFAULT_FN_ATTRS256 227 _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 228 { 229 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 230 (__v8si)_mm256_add_epi32(__A, __B), 231 (__v8si)__W); 232 } 233 234 static __inline__ __m256i __DEFAULT_FN_ATTRS256 235 _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B) 236 { 237 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 238 (__v8si)_mm256_add_epi32(__A, __B), 239 (__v8si)_mm256_setzero_si256()); 240 } 241 242 static __inline__ __m256i __DEFAULT_FN_ATTRS256 243 _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 244 { 245 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 246 (__v4di)_mm256_add_epi64(__A, __B), 247 (__v4di)__W); 248 } 249 250 static __inline__ __m256i __DEFAULT_FN_ATTRS256 251 _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B) 252 { 253 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 254 (__v4di)_mm256_add_epi64(__A, __B), 255 (__v4di)_mm256_setzero_si256()); 256 } 257 258 static __inline__ __m256i __DEFAULT_FN_ATTRS256 259 _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 260 { 261 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 262 (__v8si)_mm256_sub_epi32(__A, __B), 263 (__v8si)__W); 264 } 265 266 static __inline__ __m256i __DEFAULT_FN_ATTRS256 267 _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B) 268 { 269 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 270 (__v8si)_mm256_sub_epi32(__A, __B), 271 (__v8si)_mm256_setzero_si256()); 272 } 273 274 static __inline__ __m256i __DEFAULT_FN_ATTRS256 275 _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 276 { 277 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 278 (__v4di)_mm256_sub_epi64(__A, __B), 279 (__v4di)__W); 280 } 281 282 static __inline__ __m256i __DEFAULT_FN_ATTRS256 283 _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B) 284 { 285 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 286 (__v4di)_mm256_sub_epi64(__A, __B), 287 (__v4di)_mm256_setzero_si256()); 288 } 289 290 static __inline__ __m128i __DEFAULT_FN_ATTRS128 291 _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 292 { 293 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 294 (__v4si)_mm_add_epi32(__A, __B), 295 (__v4si)__W); 296 } 297 298 static __inline__ __m128i __DEFAULT_FN_ATTRS128 299 _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B) 300 { 301 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 302 (__v4si)_mm_add_epi32(__A, __B), 303 (__v4si)_mm_setzero_si128()); 304 } 305 306 static __inline__ __m128i __DEFAULT_FN_ATTRS128 307 _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 308 { 309 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 310 (__v2di)_mm_add_epi64(__A, __B), 311 (__v2di)__W); 312 } 313 314 static __inline__ __m128i __DEFAULT_FN_ATTRS128 315 _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B) 316 { 317 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 318 (__v2di)_mm_add_epi64(__A, __B), 319 (__v2di)_mm_setzero_si128()); 320 } 321 322 static __inline__ __m128i __DEFAULT_FN_ATTRS128 323 _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 324 { 325 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 326 (__v4si)_mm_sub_epi32(__A, __B), 327 (__v4si)__W); 328 } 329 330 static __inline__ __m128i __DEFAULT_FN_ATTRS128 331 _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B) 332 { 333 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 334 (__v4si)_mm_sub_epi32(__A, __B), 335 (__v4si)_mm_setzero_si128()); 336 } 337 338 static __inline__ __m128i __DEFAULT_FN_ATTRS128 339 _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 340 { 341 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 342 (__v2di)_mm_sub_epi64(__A, __B), 343 (__v2di)__W); 344 } 345 346 static __inline__ __m128i __DEFAULT_FN_ATTRS128 347 _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B) 348 { 349 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 350 (__v2di)_mm_sub_epi64(__A, __B), 351 (__v2di)_mm_setzero_si128()); 352 } 353 354 static __inline__ __m256i __DEFAULT_FN_ATTRS256 355 _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) 356 { 357 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 358 (__v4di)_mm256_mul_epi32(__X, __Y), 359 (__v4di)__W); 360 } 361 362 static __inline__ __m256i __DEFAULT_FN_ATTRS256 363 _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y) 364 { 365 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 366 (__v4di)_mm256_mul_epi32(__X, __Y), 367 (__v4di)_mm256_setzero_si256()); 368 } 369 370 static __inline__ __m128i __DEFAULT_FN_ATTRS128 371 _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) 372 { 373 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 374 (__v2di)_mm_mul_epi32(__X, __Y), 375 (__v2di)__W); 376 } 377 378 static __inline__ __m128i __DEFAULT_FN_ATTRS128 379 _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y) 380 { 381 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 382 (__v2di)_mm_mul_epi32(__X, __Y), 383 (__v2di)_mm_setzero_si128()); 384 } 385 386 static __inline__ __m256i __DEFAULT_FN_ATTRS256 387 _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) 388 { 389 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 390 (__v4di)_mm256_mul_epu32(__X, __Y), 391 (__v4di)__W); 392 } 393 394 static __inline__ __m256i __DEFAULT_FN_ATTRS256 395 _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y) 396 { 397 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 398 (__v4di)_mm256_mul_epu32(__X, __Y), 399 (__v4di)_mm256_setzero_si256()); 400 } 401 402 static __inline__ __m128i __DEFAULT_FN_ATTRS128 403 _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) 404 { 405 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 406 (__v2di)_mm_mul_epu32(__X, __Y), 407 (__v2di)__W); 408 } 409 410 static __inline__ __m128i __DEFAULT_FN_ATTRS128 411 _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y) 412 { 413 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 414 (__v2di)_mm_mul_epu32(__X, __Y), 415 (__v2di)_mm_setzero_si128()); 416 } 417 418 static __inline__ __m256i __DEFAULT_FN_ATTRS256 419 _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B) 420 { 421 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 422 (__v8si)_mm256_mullo_epi32(__A, __B), 423 (__v8si)_mm256_setzero_si256()); 424 } 425 426 static __inline__ __m256i __DEFAULT_FN_ATTRS256 427 _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) 428 { 429 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 430 (__v8si)_mm256_mullo_epi32(__A, __B), 431 (__v8si)__W); 432 } 433 434 static __inline__ __m128i __DEFAULT_FN_ATTRS128 435 _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B) 436 { 437 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 438 (__v4si)_mm_mullo_epi32(__A, __B), 439 (__v4si)_mm_setzero_si128()); 440 } 441 442 static __inline__ __m128i __DEFAULT_FN_ATTRS128 443 _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) 444 { 445 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 446 (__v4si)_mm_mullo_epi32(__A, __B), 447 (__v4si)__W); 448 } 449 450 static __inline__ __m256i __DEFAULT_FN_ATTRS256 451 _mm256_and_epi32(__m256i __a, __m256i __b) 452 { 453 return (__m256i)((__v8su)__a & (__v8su)__b); 454 } 455 456 static __inline__ __m256i __DEFAULT_FN_ATTRS256 457 _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 458 { 459 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 460 (__v8si)_mm256_and_epi32(__A, __B), 461 (__v8si)__W); 462 } 463 464 static __inline__ __m256i __DEFAULT_FN_ATTRS256 465 _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B) 466 { 467 return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B); 468 } 469 470 static __inline__ __m128i __DEFAULT_FN_ATTRS128 471 _mm_and_epi32(__m128i __a, __m128i __b) 472 { 473 return (__m128i)((__v4su)__a & (__v4su)__b); 474 } 475 476 static __inline__ __m128i __DEFAULT_FN_ATTRS128 477 _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 478 { 479 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 480 (__v4si)_mm_and_epi32(__A, __B), 481 (__v4si)__W); 482 } 483 484 static __inline__ __m128i __DEFAULT_FN_ATTRS128 485 _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B) 486 { 487 return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B); 488 } 489 490 static __inline__ __m256i __DEFAULT_FN_ATTRS256 491 _mm256_andnot_epi32(__m256i __A, __m256i __B) 492 { 493 return (__m256i)(~(__v8su)__A & (__v8su)__B); 494 } 495 496 static __inline__ __m256i __DEFAULT_FN_ATTRS256 497 _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 498 { 499 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 500 (__v8si)_mm256_andnot_epi32(__A, __B), 501 (__v8si)__W); 502 } 503 504 static __inline__ __m256i __DEFAULT_FN_ATTRS256 505 _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B) 506 { 507 return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(), 508 __U, __A, __B); 509 } 510 511 static __inline__ __m128i __DEFAULT_FN_ATTRS128 512 _mm_andnot_epi32(__m128i __A, __m128i __B) 513 { 514 return (__m128i)(~(__v4su)__A & (__v4su)__B); 515 } 516 517 static __inline__ __m128i __DEFAULT_FN_ATTRS128 518 _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 519 { 520 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 521 (__v4si)_mm_andnot_epi32(__A, __B), 522 (__v4si)__W); 523 } 524 525 static __inline__ __m128i __DEFAULT_FN_ATTRS128 526 _mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B) 527 { 528 return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B); 529 } 530 531 static __inline__ __m256i __DEFAULT_FN_ATTRS256 532 _mm256_or_epi32(__m256i __a, __m256i __b) 533 { 534 return (__m256i)((__v8su)__a | (__v8su)__b); 535 } 536 537 static __inline__ __m256i __DEFAULT_FN_ATTRS256 538 _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 539 { 540 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 541 (__v8si)_mm256_or_epi32(__A, __B), 542 (__v8si)__W); 543 } 544 545 static __inline__ __m256i __DEFAULT_FN_ATTRS256 546 _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B) 547 { 548 return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B); 549 } 550 551 static __inline__ __m128i __DEFAULT_FN_ATTRS128 552 _mm_or_epi32(__m128i __a, __m128i __b) 553 { 554 return (__m128i)((__v4su)__a | (__v4su)__b); 555 } 556 557 static __inline__ __m128i __DEFAULT_FN_ATTRS128 558 _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 559 { 560 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 561 (__v4si)_mm_or_epi32(__A, __B), 562 (__v4si)__W); 563 } 564 565 static __inline__ __m128i __DEFAULT_FN_ATTRS128 566 _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B) 567 { 568 return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B); 569 } 570 571 static __inline__ __m256i __DEFAULT_FN_ATTRS256 572 _mm256_xor_epi32(__m256i __a, __m256i __b) 573 { 574 return (__m256i)((__v8su)__a ^ (__v8su)__b); 575 } 576 577 static __inline__ __m256i __DEFAULT_FN_ATTRS256 578 _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 579 { 580 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 581 (__v8si)_mm256_xor_epi32(__A, __B), 582 (__v8si)__W); 583 } 584 585 static __inline__ __m256i __DEFAULT_FN_ATTRS256 586 _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B) 587 { 588 return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B); 589 } 590 591 static __inline__ __m128i __DEFAULT_FN_ATTRS128 592 _mm_xor_epi32(__m128i __a, __m128i __b) 593 { 594 return (__m128i)((__v4su)__a ^ (__v4su)__b); 595 } 596 597 static __inline__ __m128i __DEFAULT_FN_ATTRS128 598 _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 599 { 600 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 601 (__v4si)_mm_xor_epi32(__A, __B), 602 (__v4si)__W); 603 } 604 605 static __inline__ __m128i __DEFAULT_FN_ATTRS128 606 _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B) 607 { 608 return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B); 609 } 610 611 static __inline__ __m256i __DEFAULT_FN_ATTRS256 612 _mm256_and_epi64(__m256i __a, __m256i __b) 613 { 614 return (__m256i)((__v4du)__a & (__v4du)__b); 615 } 616 617 static __inline__ __m256i __DEFAULT_FN_ATTRS256 618 _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 619 { 620 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 621 (__v4di)_mm256_and_epi64(__A, __B), 622 (__v4di)__W); 623 } 624 625 static __inline__ __m256i __DEFAULT_FN_ATTRS256 626 _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B) 627 { 628 return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B); 629 } 630 631 static __inline__ __m128i __DEFAULT_FN_ATTRS128 632 _mm_and_epi64(__m128i __a, __m128i __b) 633 { 634 return (__m128i)((__v2du)__a & (__v2du)__b); 635 } 636 637 static __inline__ __m128i __DEFAULT_FN_ATTRS128 638 _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 639 { 640 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 641 (__v2di)_mm_and_epi64(__A, __B), 642 (__v2di)__W); 643 } 644 645 static __inline__ __m128i __DEFAULT_FN_ATTRS128 646 _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B) 647 { 648 return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B); 649 } 650 651 static __inline__ __m256i __DEFAULT_FN_ATTRS256 652 _mm256_andnot_epi64(__m256i __A, __m256i __B) 653 { 654 return (__m256i)(~(__v4du)__A & (__v4du)__B); 655 } 656 657 static __inline__ __m256i __DEFAULT_FN_ATTRS256 658 _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 659 { 660 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 661 (__v4di)_mm256_andnot_epi64(__A, __B), 662 (__v4di)__W); 663 } 664 665 static __inline__ __m256i __DEFAULT_FN_ATTRS256 666 _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B) 667 { 668 return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(), 669 __U, __A, __B); 670 } 671 672 static __inline__ __m128i __DEFAULT_FN_ATTRS128 673 _mm_andnot_epi64(__m128i __A, __m128i __B) 674 { 675 return (__m128i)(~(__v2du)__A & (__v2du)__B); 676 } 677 678 static __inline__ __m128i __DEFAULT_FN_ATTRS128 679 _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 680 { 681 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 682 (__v2di)_mm_andnot_epi64(__A, __B), 683 (__v2di)__W); 684 } 685 686 static __inline__ __m128i __DEFAULT_FN_ATTRS128 687 _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B) 688 { 689 return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B); 690 } 691 692 static __inline__ __m256i __DEFAULT_FN_ATTRS256 693 _mm256_or_epi64(__m256i __a, __m256i __b) 694 { 695 return (__m256i)((__v4du)__a | (__v4du)__b); 696 } 697 698 static __inline__ __m256i __DEFAULT_FN_ATTRS256 699 _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 700 { 701 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 702 (__v4di)_mm256_or_epi64(__A, __B), 703 (__v4di)__W); 704 } 705 706 static __inline__ __m256i __DEFAULT_FN_ATTRS256 707 _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B) 708 { 709 return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B); 710 } 711 712 static __inline__ __m128i __DEFAULT_FN_ATTRS128 713 _mm_or_epi64(__m128i __a, __m128i __b) 714 { 715 return (__m128i)((__v2du)__a | (__v2du)__b); 716 } 717 718 static __inline__ __m128i __DEFAULT_FN_ATTRS128 719 _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 720 { 721 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 722 (__v2di)_mm_or_epi64(__A, __B), 723 (__v2di)__W); 724 } 725 726 static __inline__ __m128i __DEFAULT_FN_ATTRS128 727 _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B) 728 { 729 return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B); 730 } 731 732 static __inline__ __m256i __DEFAULT_FN_ATTRS256 733 _mm256_xor_epi64(__m256i __a, __m256i __b) 734 { 735 return (__m256i)((__v4du)__a ^ (__v4du)__b); 736 } 737 738 static __inline__ __m256i __DEFAULT_FN_ATTRS256 739 _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 740 { 741 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 742 (__v4di)_mm256_xor_epi64(__A, __B), 743 (__v4di)__W); 744 } 745 746 static __inline__ __m256i __DEFAULT_FN_ATTRS256 747 _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B) 748 { 749 return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B); 750 } 751 752 static __inline__ __m128i __DEFAULT_FN_ATTRS128 753 _mm_xor_epi64(__m128i __a, __m128i __b) 754 { 755 return (__m128i)((__v2du)__a ^ (__v2du)__b); 756 } 757 758 static __inline__ __m128i __DEFAULT_FN_ATTRS128 759 _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A, 760 __m128i __B) 761 { 762 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 763 (__v2di)_mm_xor_epi64(__A, __B), 764 (__v2di)__W); 765 } 766 767 static __inline__ __m128i __DEFAULT_FN_ATTRS128 768 _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B) 769 { 770 return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B); 771 } 772 773 #define _mm_cmp_epi32_mask(a, b, p) \ 774 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ 775 (__v4si)(__m128i)(b), (int)(p), \ 776 (__mmask8)-1) 777 778 #define _mm_mask_cmp_epi32_mask(m, a, b, p) \ 779 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ 780 (__v4si)(__m128i)(b), (int)(p), \ 781 (__mmask8)(m)) 782 783 #define _mm_cmp_epu32_mask(a, b, p) \ 784 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ 785 (__v4si)(__m128i)(b), (int)(p), \ 786 (__mmask8)-1) 787 788 #define _mm_mask_cmp_epu32_mask(m, a, b, p) \ 789 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ 790 (__v4si)(__m128i)(b), (int)(p), \ 791 (__mmask8)(m)) 792 793 #define _mm256_cmp_epi32_mask(a, b, p) \ 794 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ 795 (__v8si)(__m256i)(b), (int)(p), \ 796 (__mmask8)-1) 797 798 #define _mm256_mask_cmp_epi32_mask(m, a, b, p) \ 799 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ 800 (__v8si)(__m256i)(b), (int)(p), \ 801 (__mmask8)(m)) 802 803 #define _mm256_cmp_epu32_mask(a, b, p) \ 804 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ 805 (__v8si)(__m256i)(b), (int)(p), \ 806 (__mmask8)-1) 807 808 #define _mm256_mask_cmp_epu32_mask(m, a, b, p) \ 809 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ 810 (__v8si)(__m256i)(b), (int)(p), \ 811 (__mmask8)(m)) 812 813 #define _mm_cmp_epi64_mask(a, b, p) \ 814 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ 815 (__v2di)(__m128i)(b), (int)(p), \ 816 (__mmask8)-1) 817 818 #define _mm_mask_cmp_epi64_mask(m, a, b, p) \ 819 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ 820 (__v2di)(__m128i)(b), (int)(p), \ 821 (__mmask8)(m)) 822 823 #define _mm_cmp_epu64_mask(a, b, p) \ 824 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ 825 (__v2di)(__m128i)(b), (int)(p), \ 826 (__mmask8)-1) 827 828 #define _mm_mask_cmp_epu64_mask(m, a, b, p) \ 829 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ 830 (__v2di)(__m128i)(b), (int)(p), \ 831 (__mmask8)(m)) 832 833 #define _mm256_cmp_epi64_mask(a, b, p) \ 834 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ 835 (__v4di)(__m256i)(b), (int)(p), \ 836 (__mmask8)-1) 837 838 #define _mm256_mask_cmp_epi64_mask(m, a, b, p) \ 839 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ 840 (__v4di)(__m256i)(b), (int)(p), \ 841 (__mmask8)(m)) 842 843 #define _mm256_cmp_epu64_mask(a, b, p) \ 844 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ 845 (__v4di)(__m256i)(b), (int)(p), \ 846 (__mmask8)-1) 847 848 #define _mm256_mask_cmp_epu64_mask(m, a, b, p) \ 849 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ 850 (__v4di)(__m256i)(b), (int)(p), \ 851 (__mmask8)(m)) 852 853 #define _mm256_cmp_ps_mask(a, b, p) \ 854 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ 855 (__v8sf)(__m256)(b), (int)(p), \ 856 (__mmask8)-1) 857 858 #define _mm256_mask_cmp_ps_mask(m, a, b, p) \ 859 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ 860 (__v8sf)(__m256)(b), (int)(p), \ 861 (__mmask8)(m)) 862 863 #define _mm256_cmp_pd_mask(a, b, p) \ 864 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ 865 (__v4df)(__m256d)(b), (int)(p), \ 866 (__mmask8)-1) 867 868 #define _mm256_mask_cmp_pd_mask(m, a, b, p) \ 869 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ 870 (__v4df)(__m256d)(b), (int)(p), \ 871 (__mmask8)(m)) 872 873 #define _mm_cmp_ps_mask(a, b, p) \ 874 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ 875 (__v4sf)(__m128)(b), (int)(p), \ 876 (__mmask8)-1) 877 878 #define _mm_mask_cmp_ps_mask(m, a, b, p) \ 879 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ 880 (__v4sf)(__m128)(b), (int)(p), \ 881 (__mmask8)(m)) 882 883 #define _mm_cmp_pd_mask(a, b, p) \ 884 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ 885 (__v2df)(__m128d)(b), (int)(p), \ 886 (__mmask8)-1) 887 888 #define _mm_mask_cmp_pd_mask(m, a, b, p) \ 889 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ 890 (__v2df)(__m128d)(b), (int)(p), \ 891 (__mmask8)(m)) 892 893 static __inline__ __m128d __DEFAULT_FN_ATTRS128 894 _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 895 { 896 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 897 __builtin_ia32_vfmaddpd ((__v2df) __A, 898 (__v2df) __B, 899 (__v2df) __C), 900 (__v2df) __A); 901 } 902 903 static __inline__ __m128d __DEFAULT_FN_ATTRS128 904 _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 905 { 906 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 907 __builtin_ia32_vfmaddpd ((__v2df) __A, 908 (__v2df) __B, 909 (__v2df) __C), 910 (__v2df) __C); 911 } 912 913 static __inline__ __m128d __DEFAULT_FN_ATTRS128 914 _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 915 { 916 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 917 __builtin_ia32_vfmaddpd ((__v2df) __A, 918 (__v2df) __B, 919 (__v2df) __C), 920 (__v2df)_mm_setzero_pd()); 921 } 922 923 static __inline__ __m128d __DEFAULT_FN_ATTRS128 924 _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 925 { 926 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 927 __builtin_ia32_vfmaddpd ((__v2df) __A, 928 (__v2df) __B, 929 -(__v2df) __C), 930 (__v2df) __A); 931 } 932 933 static __inline__ __m128d __DEFAULT_FN_ATTRS128 934 _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 935 { 936 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 937 __builtin_ia32_vfmaddpd ((__v2df) __A, 938 (__v2df) __B, 939 -(__v2df) __C), 940 (__v2df)_mm_setzero_pd()); 941 } 942 943 static __inline__ __m128d __DEFAULT_FN_ATTRS128 944 _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 945 { 946 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 947 __builtin_ia32_vfmaddpd (-(__v2df) __A, 948 (__v2df) __B, 949 (__v2df) __C), 950 (__v2df) __C); 951 } 952 953 static __inline__ __m128d __DEFAULT_FN_ATTRS128 954 _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 955 { 956 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 957 __builtin_ia32_vfmaddpd (-(__v2df) __A, 958 (__v2df) __B, 959 (__v2df) __C), 960 (__v2df)_mm_setzero_pd()); 961 } 962 963 static __inline__ __m128d __DEFAULT_FN_ATTRS128 964 _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 965 { 966 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 967 __builtin_ia32_vfmaddpd (-(__v2df) __A, 968 (__v2df) __B, 969 -(__v2df) __C), 970 (__v2df)_mm_setzero_pd()); 971 } 972 973 static __inline__ __m256d __DEFAULT_FN_ATTRS256 974 _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 975 { 976 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 977 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 978 (__v4df) __B, 979 (__v4df) __C), 980 (__v4df) __A); 981 } 982 983 static __inline__ __m256d __DEFAULT_FN_ATTRS256 984 _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 985 { 986 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 987 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 988 (__v4df) __B, 989 (__v4df) __C), 990 (__v4df) __C); 991 } 992 993 static __inline__ __m256d __DEFAULT_FN_ATTRS256 994 _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 995 { 996 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 997 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 998 (__v4df) __B, 999 (__v4df) __C), 1000 (__v4df)_mm256_setzero_pd()); 1001 } 1002 1003 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1004 _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1005 { 1006 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1007 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 1008 (__v4df) __B, 1009 -(__v4df) __C), 1010 (__v4df) __A); 1011 } 1012 1013 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1014 _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1015 { 1016 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1017 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 1018 (__v4df) __B, 1019 -(__v4df) __C), 1020 (__v4df)_mm256_setzero_pd()); 1021 } 1022 1023 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1024 _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1025 { 1026 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1027 __builtin_ia32_vfmaddpd256 (-(__v4df) __A, 1028 (__v4df) __B, 1029 (__v4df) __C), 1030 (__v4df) __C); 1031 } 1032 1033 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1034 _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1035 { 1036 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1037 __builtin_ia32_vfmaddpd256 (-(__v4df) __A, 1038 (__v4df) __B, 1039 (__v4df) __C), 1040 (__v4df)_mm256_setzero_pd()); 1041 } 1042 1043 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1044 _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1045 { 1046 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1047 __builtin_ia32_vfmaddpd256 (-(__v4df) __A, 1048 (__v4df) __B, 1049 -(__v4df) __C), 1050 (__v4df)_mm256_setzero_pd()); 1051 } 1052 1053 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1054 _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1055 { 1056 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1057 __builtin_ia32_vfmaddps ((__v4sf) __A, 1058 (__v4sf) __B, 1059 (__v4sf) __C), 1060 (__v4sf) __A); 1061 } 1062 1063 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1064 _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1065 { 1066 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1067 __builtin_ia32_vfmaddps ((__v4sf) __A, 1068 (__v4sf) __B, 1069 (__v4sf) __C), 1070 (__v4sf) __C); 1071 } 1072 1073 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1074 _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1075 { 1076 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1077 __builtin_ia32_vfmaddps ((__v4sf) __A, 1078 (__v4sf) __B, 1079 (__v4sf) __C), 1080 (__v4sf)_mm_setzero_ps()); 1081 } 1082 1083 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1084 _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1085 { 1086 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1087 __builtin_ia32_vfmaddps ((__v4sf) __A, 1088 (__v4sf) __B, 1089 -(__v4sf) __C), 1090 (__v4sf) __A); 1091 } 1092 1093 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1094 _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1095 { 1096 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1097 __builtin_ia32_vfmaddps ((__v4sf) __A, 1098 (__v4sf) __B, 1099 -(__v4sf) __C), 1100 (__v4sf)_mm_setzero_ps()); 1101 } 1102 1103 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1104 _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1105 { 1106 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1107 __builtin_ia32_vfmaddps (-(__v4sf) __A, 1108 (__v4sf) __B, 1109 (__v4sf) __C), 1110 (__v4sf) __C); 1111 } 1112 1113 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1114 _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1115 { 1116 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1117 __builtin_ia32_vfmaddps (-(__v4sf) __A, 1118 (__v4sf) __B, 1119 (__v4sf) __C), 1120 (__v4sf)_mm_setzero_ps()); 1121 } 1122 1123 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1124 _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1125 { 1126 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1127 __builtin_ia32_vfmaddps (-(__v4sf) __A, 1128 (__v4sf) __B, 1129 -(__v4sf) __C), 1130 (__v4sf)_mm_setzero_ps()); 1131 } 1132 1133 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1134 _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1135 { 1136 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1137 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1138 (__v8sf) __B, 1139 (__v8sf) __C), 1140 (__v8sf) __A); 1141 } 1142 1143 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1144 _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1145 { 1146 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1147 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1148 (__v8sf) __B, 1149 (__v8sf) __C), 1150 (__v8sf) __C); 1151 } 1152 1153 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1154 _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1155 { 1156 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1157 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1158 (__v8sf) __B, 1159 (__v8sf) __C), 1160 (__v8sf)_mm256_setzero_ps()); 1161 } 1162 1163 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1164 _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1165 { 1166 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1167 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1168 (__v8sf) __B, 1169 -(__v8sf) __C), 1170 (__v8sf) __A); 1171 } 1172 1173 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1174 _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1175 { 1176 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1177 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1178 (__v8sf) __B, 1179 -(__v8sf) __C), 1180 (__v8sf)_mm256_setzero_ps()); 1181 } 1182 1183 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1184 _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1185 { 1186 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1187 __builtin_ia32_vfmaddps256 (-(__v8sf) __A, 1188 (__v8sf) __B, 1189 (__v8sf) __C), 1190 (__v8sf) __C); 1191 } 1192 1193 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1194 _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1195 { 1196 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1197 __builtin_ia32_vfmaddps256 (-(__v8sf) __A, 1198 (__v8sf) __B, 1199 (__v8sf) __C), 1200 (__v8sf)_mm256_setzero_ps()); 1201 } 1202 1203 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1204 _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1205 { 1206 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1207 __builtin_ia32_vfmaddps256 (-(__v8sf) __A, 1208 (__v8sf) __B, 1209 -(__v8sf) __C), 1210 (__v8sf)_mm256_setzero_ps()); 1211 } 1212 1213 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1214 _mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1215 { 1216 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1217 __builtin_ia32_vfmaddsubpd ((__v2df) __A, 1218 (__v2df) __B, 1219 (__v2df) __C), 1220 (__v2df) __A); 1221 } 1222 1223 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1224 _mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1225 { 1226 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1227 __builtin_ia32_vfmaddsubpd ((__v2df) __A, 1228 (__v2df) __B, 1229 (__v2df) __C), 1230 (__v2df) __C); 1231 } 1232 1233 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1234 _mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1235 { 1236 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1237 __builtin_ia32_vfmaddsubpd ((__v2df) __A, 1238 (__v2df) __B, 1239 (__v2df) __C), 1240 (__v2df)_mm_setzero_pd()); 1241 } 1242 1243 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1244 _mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1245 { 1246 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1247 __builtin_ia32_vfmaddsubpd ((__v2df) __A, 1248 (__v2df) __B, 1249 -(__v2df) __C), 1250 (__v2df) __A); 1251 } 1252 1253 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1254 _mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1255 { 1256 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1257 __builtin_ia32_vfmaddsubpd ((__v2df) __A, 1258 (__v2df) __B, 1259 -(__v2df) __C), 1260 (__v2df)_mm_setzero_pd()); 1261 } 1262 1263 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1264 _mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1265 { 1266 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1267 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 1268 (__v4df) __B, 1269 (__v4df) __C), 1270 (__v4df) __A); 1271 } 1272 1273 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1274 _mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1275 { 1276 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1277 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 1278 (__v4df) __B, 1279 (__v4df) __C), 1280 (__v4df) __C); 1281 } 1282 1283 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1284 _mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1285 { 1286 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1287 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 1288 (__v4df) __B, 1289 (__v4df) __C), 1290 (__v4df)_mm256_setzero_pd()); 1291 } 1292 1293 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1294 _mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1295 { 1296 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1297 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 1298 (__v4df) __B, 1299 -(__v4df) __C), 1300 (__v4df) __A); 1301 } 1302 1303 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1304 _mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1305 { 1306 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1307 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 1308 (__v4df) __B, 1309 -(__v4df) __C), 1310 (__v4df)_mm256_setzero_pd()); 1311 } 1312 1313 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1314 _mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1315 { 1316 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1317 __builtin_ia32_vfmaddsubps ((__v4sf) __A, 1318 (__v4sf) __B, 1319 (__v4sf) __C), 1320 (__v4sf) __A); 1321 } 1322 1323 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1324 _mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1325 { 1326 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1327 __builtin_ia32_vfmaddsubps ((__v4sf) __A, 1328 (__v4sf) __B, 1329 (__v4sf) __C), 1330 (__v4sf) __C); 1331 } 1332 1333 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1334 _mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1335 { 1336 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1337 __builtin_ia32_vfmaddsubps ((__v4sf) __A, 1338 (__v4sf) __B, 1339 (__v4sf) __C), 1340 (__v4sf)_mm_setzero_ps()); 1341 } 1342 1343 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1344 _mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1345 { 1346 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1347 __builtin_ia32_vfmaddsubps ((__v4sf) __A, 1348 (__v4sf) __B, 1349 -(__v4sf) __C), 1350 (__v4sf) __A); 1351 } 1352 1353 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1354 _mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1355 { 1356 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1357 __builtin_ia32_vfmaddsubps ((__v4sf) __A, 1358 (__v4sf) __B, 1359 -(__v4sf) __C), 1360 (__v4sf)_mm_setzero_ps()); 1361 } 1362 1363 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1364 _mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, 1365 __m256 __C) 1366 { 1367 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1368 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 1369 (__v8sf) __B, 1370 (__v8sf) __C), 1371 (__v8sf) __A); 1372 } 1373 1374 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1375 _mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1376 { 1377 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1378 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 1379 (__v8sf) __B, 1380 (__v8sf) __C), 1381 (__v8sf) __C); 1382 } 1383 1384 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1385 _mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1386 { 1387 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1388 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 1389 (__v8sf) __B, 1390 (__v8sf) __C), 1391 (__v8sf)_mm256_setzero_ps()); 1392 } 1393 1394 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1395 _mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1396 { 1397 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1398 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 1399 (__v8sf) __B, 1400 -(__v8sf) __C), 1401 (__v8sf) __A); 1402 } 1403 1404 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1405 _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1406 { 1407 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1408 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 1409 (__v8sf) __B, 1410 -(__v8sf) __C), 1411 (__v8sf)_mm256_setzero_ps()); 1412 } 1413 1414 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1415 _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1416 { 1417 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1418 __builtin_ia32_vfmaddpd ((__v2df) __A, 1419 (__v2df) __B, 1420 -(__v2df) __C), 1421 (__v2df) __C); 1422 } 1423 1424 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1425 _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1426 { 1427 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1428 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 1429 (__v4df) __B, 1430 -(__v4df) __C), 1431 (__v4df) __C); 1432 } 1433 1434 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1435 _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1436 { 1437 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1438 __builtin_ia32_vfmaddps ((__v4sf) __A, 1439 (__v4sf) __B, 1440 -(__v4sf) __C), 1441 (__v4sf) __C); 1442 } 1443 1444 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1445 _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1446 { 1447 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1448 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1449 (__v8sf) __B, 1450 -(__v8sf) __C), 1451 (__v8sf) __C); 1452 } 1453 1454 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1455 _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1456 { 1457 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1458 __builtin_ia32_vfmaddsubpd ((__v2df) __A, 1459 (__v2df) __B, 1460 -(__v2df) __C), 1461 (__v2df) __C); 1462 } 1463 1464 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1465 _mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1466 { 1467 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1468 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 1469 (__v4df) __B, 1470 -(__v4df) __C), 1471 (__v4df) __C); 1472 } 1473 1474 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1475 _mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1476 { 1477 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1478 __builtin_ia32_vfmaddsubps ((__v4sf) __A, 1479 (__v4sf) __B, 1480 -(__v4sf) __C), 1481 (__v4sf) __C); 1482 } 1483 1484 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1485 _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1486 { 1487 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1488 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 1489 (__v8sf) __B, 1490 -(__v8sf) __C), 1491 (__v8sf) __C); 1492 } 1493 1494 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1495 _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1496 { 1497 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1498 __builtin_ia32_vfmaddpd ((__v2df) __A, 1499 -(__v2df) __B, 1500 (__v2df) __C), 1501 (__v2df) __A); 1502 } 1503 1504 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1505 _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1506 { 1507 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1508 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 1509 -(__v4df) __B, 1510 (__v4df) __C), 1511 (__v4df) __A); 1512 } 1513 1514 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1515 _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1516 { 1517 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1518 __builtin_ia32_vfmaddps ((__v4sf) __A, 1519 -(__v4sf) __B, 1520 (__v4sf) __C), 1521 (__v4sf) __A); 1522 } 1523 1524 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1525 _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1526 { 1527 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1528 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1529 -(__v8sf) __B, 1530 (__v8sf) __C), 1531 (__v8sf) __A); 1532 } 1533 1534 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1535 _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1536 { 1537 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1538 __builtin_ia32_vfmaddpd ((__v2df) __A, 1539 -(__v2df) __B, 1540 -(__v2df) __C), 1541 (__v2df) __A); 1542 } 1543 1544 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1545 _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1546 { 1547 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1548 __builtin_ia32_vfmaddpd ((__v2df) __A, 1549 -(__v2df) __B, 1550 -(__v2df) __C), 1551 (__v2df) __C); 1552 } 1553 1554 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1555 _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1556 { 1557 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1558 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 1559 -(__v4df) __B, 1560 -(__v4df) __C), 1561 (__v4df) __A); 1562 } 1563 1564 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1565 _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1566 { 1567 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1568 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 1569 -(__v4df) __B, 1570 -(__v4df) __C), 1571 (__v4df) __C); 1572 } 1573 1574 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1575 _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1576 { 1577 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1578 __builtin_ia32_vfmaddps ((__v4sf) __A, 1579 -(__v4sf) __B, 1580 -(__v4sf) __C), 1581 (__v4sf) __A); 1582 } 1583 1584 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1585 _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1586 { 1587 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1588 __builtin_ia32_vfmaddps ((__v4sf) __A, 1589 -(__v4sf) __B, 1590 -(__v4sf) __C), 1591 (__v4sf) __C); 1592 } 1593 1594 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1595 _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1596 { 1597 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1598 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1599 -(__v8sf) __B, 1600 -(__v8sf) __C), 1601 (__v8sf) __A); 1602 } 1603 1604 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1605 _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1606 { 1607 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1608 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1609 -(__v8sf) __B, 1610 -(__v8sf) __C), 1611 (__v8sf) __C); 1612 } 1613 1614 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1615 _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 1616 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 1617 (__v2df)_mm_add_pd(__A, __B), 1618 (__v2df)__W); 1619 } 1620 1621 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1622 _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) { 1623 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 1624 (__v2df)_mm_add_pd(__A, __B), 1625 (__v2df)_mm_setzero_pd()); 1626 } 1627 1628 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1629 _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 1630 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 1631 (__v4df)_mm256_add_pd(__A, __B), 1632 (__v4df)__W); 1633 } 1634 1635 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1636 _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) { 1637 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 1638 (__v4df)_mm256_add_pd(__A, __B), 1639 (__v4df)_mm256_setzero_pd()); 1640 } 1641 1642 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1643 _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 1644 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 1645 (__v4sf)_mm_add_ps(__A, __B), 1646 (__v4sf)__W); 1647 } 1648 1649 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1650 _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) { 1651 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 1652 (__v4sf)_mm_add_ps(__A, __B), 1653 (__v4sf)_mm_setzero_ps()); 1654 } 1655 1656 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1657 _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 1658 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 1659 (__v8sf)_mm256_add_ps(__A, __B), 1660 (__v8sf)__W); 1661 } 1662 1663 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1664 _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) { 1665 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 1666 (__v8sf)_mm256_add_ps(__A, __B), 1667 (__v8sf)_mm256_setzero_ps()); 1668 } 1669 1670 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1671 _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) { 1672 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, 1673 (__v4si) __W, 1674 (__v4si) __A); 1675 } 1676 1677 static __inline__ __m256i __DEFAULT_FN_ATTRS256 1678 _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) { 1679 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, 1680 (__v8si) __W, 1681 (__v8si) __A); 1682 } 1683 1684 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1685 _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) { 1686 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, 1687 (__v2df) __W, 1688 (__v2df) __A); 1689 } 1690 1691 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1692 _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) { 1693 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, 1694 (__v4df) __W, 1695 (__v4df) __A); 1696 } 1697 1698 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1699 _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) { 1700 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, 1701 (__v4sf) __W, 1702 (__v4sf) __A); 1703 } 1704 1705 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1706 _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) { 1707 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, 1708 (__v8sf) __W, 1709 (__v8sf) __A); 1710 } 1711 1712 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1713 _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) { 1714 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, 1715 (__v2di) __W, 1716 (__v2di) __A); 1717 } 1718 1719 static __inline__ __m256i __DEFAULT_FN_ATTRS256 1720 _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) { 1721 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, 1722 (__v4di) __W, 1723 (__v4di) __A); 1724 } 1725 1726 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1727 _mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) { 1728 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, 1729 (__v2df) __W, 1730 (__mmask8) __U); 1731 } 1732 1733 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1734 _mm_maskz_compress_pd (__mmask8 __U, __m128d __A) { 1735 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, 1736 (__v2df) 1737 _mm_setzero_pd (), 1738 (__mmask8) __U); 1739 } 1740 1741 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1742 _mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) { 1743 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, 1744 (__v4df) __W, 1745 (__mmask8) __U); 1746 } 1747 1748 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1749 _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) { 1750 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, 1751 (__v4df) 1752 _mm256_setzero_pd (), 1753 (__mmask8) __U); 1754 } 1755 1756 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1757 _mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { 1758 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, 1759 (__v2di) __W, 1760 (__mmask8) __U); 1761 } 1762 1763 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1764 _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) { 1765 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, 1766 (__v2di) 1767 _mm_setzero_si128 (), 1768 (__mmask8) __U); 1769 } 1770 1771 static __inline__ __m256i __DEFAULT_FN_ATTRS256 1772 _mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { 1773 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, 1774 (__v4di) __W, 1775 (__mmask8) __U); 1776 } 1777 1778 static __inline__ __m256i __DEFAULT_FN_ATTRS256 1779 _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) { 1780 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, 1781 (__v4di) 1782 _mm256_setzero_si256 (), 1783 (__mmask8) __U); 1784 } 1785 1786 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1787 _mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) { 1788 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, 1789 (__v4sf) __W, 1790 (__mmask8) __U); 1791 } 1792 1793 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1794 _mm_maskz_compress_ps (__mmask8 __U, __m128 __A) { 1795 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, 1796 (__v4sf) 1797 _mm_setzero_ps (), 1798 (__mmask8) __U); 1799 } 1800 1801 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1802 _mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) { 1803 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, 1804 (__v8sf) __W, 1805 (__mmask8) __U); 1806 } 1807 1808 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1809 _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) { 1810 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, 1811 (__v8sf) 1812 _mm256_setzero_ps (), 1813 (__mmask8) __U); 1814 } 1815 1816 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1817 _mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { 1818 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, 1819 (__v4si) __W, 1820 (__mmask8) __U); 1821 } 1822 1823 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1824 _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) { 1825 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, 1826 (__v4si) 1827 _mm_setzero_si128 (), 1828 (__mmask8) __U); 1829 } 1830 1831 static __inline__ __m256i __DEFAULT_FN_ATTRS256 1832 _mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { 1833 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, 1834 (__v8si) __W, 1835 (__mmask8) __U); 1836 } 1837 1838 static __inline__ __m256i __DEFAULT_FN_ATTRS256 1839 _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) { 1840 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, 1841 (__v8si) 1842 _mm256_setzero_si256 (), 1843 (__mmask8) __U); 1844 } 1845 1846 static __inline__ void __DEFAULT_FN_ATTRS128 1847 _mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) { 1848 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P, 1849 (__v2df) __A, 1850 (__mmask8) __U); 1851 } 1852 1853 static __inline__ void __DEFAULT_FN_ATTRS256 1854 _mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) { 1855 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P, 1856 (__v4df) __A, 1857 (__mmask8) __U); 1858 } 1859 1860 static __inline__ void __DEFAULT_FN_ATTRS128 1861 _mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) { 1862 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P, 1863 (__v2di) __A, 1864 (__mmask8) __U); 1865 } 1866 1867 static __inline__ void __DEFAULT_FN_ATTRS256 1868 _mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) { 1869 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P, 1870 (__v4di) __A, 1871 (__mmask8) __U); 1872 } 1873 1874 static __inline__ void __DEFAULT_FN_ATTRS128 1875 _mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) { 1876 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P, 1877 (__v4sf) __A, 1878 (__mmask8) __U); 1879 } 1880 1881 static __inline__ void __DEFAULT_FN_ATTRS256 1882 _mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) { 1883 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P, 1884 (__v8sf) __A, 1885 (__mmask8) __U); 1886 } 1887 1888 static __inline__ void __DEFAULT_FN_ATTRS128 1889 _mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) { 1890 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P, 1891 (__v4si) __A, 1892 (__mmask8) __U); 1893 } 1894 1895 static __inline__ void __DEFAULT_FN_ATTRS256 1896 _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) { 1897 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P, 1898 (__v8si) __A, 1899 (__mmask8) __U); 1900 } 1901 1902 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1903 _mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) { 1904 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 1905 (__v2df)_mm_cvtepi32_pd(__A), 1906 (__v2df)__W); 1907 } 1908 1909 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1910 _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { 1911 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 1912 (__v2df)_mm_cvtepi32_pd(__A), 1913 (__v2df)_mm_setzero_pd()); 1914 } 1915 1916 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1917 _mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) { 1918 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 1919 (__v4df)_mm256_cvtepi32_pd(__A), 1920 (__v4df)__W); 1921 } 1922 1923 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1924 _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { 1925 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 1926 (__v4df)_mm256_cvtepi32_pd(__A), 1927 (__v4df)_mm256_setzero_pd()); 1928 } 1929 1930 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1931 _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) { 1932 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 1933 (__v4sf)_mm_cvtepi32_ps(__A), 1934 (__v4sf)__W); 1935 } 1936 1937 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1938 _mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A) { 1939 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 1940 (__v4sf)_mm_cvtepi32_ps(__A), 1941 (__v4sf)_mm_setzero_ps()); 1942 } 1943 1944 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1945 _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) { 1946 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 1947 (__v8sf)_mm256_cvtepi32_ps(__A), 1948 (__v8sf)__W); 1949 } 1950 1951 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1952 _mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A) { 1953 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 1954 (__v8sf)_mm256_cvtepi32_ps(__A), 1955 (__v8sf)_mm256_setzero_ps()); 1956 } 1957 1958 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1959 _mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) { 1960 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A, 1961 (__v4si) __W, 1962 (__mmask8) __U); 1963 } 1964 1965 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1966 _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) { 1967 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A, 1968 (__v4si) 1969 _mm_setzero_si128 (), 1970 (__mmask8) __U); 1971 } 1972 1973 static __inline__ __m128i __DEFAULT_FN_ATTRS256 1974 _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) { 1975 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 1976 (__v4si)_mm256_cvtpd_epi32(__A), 1977 (__v4si)__W); 1978 } 1979 1980 static __inline__ __m128i __DEFAULT_FN_ATTRS256 1981 _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) { 1982 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 1983 (__v4si)_mm256_cvtpd_epi32(__A), 1984 (__v4si)_mm_setzero_si128()); 1985 } 1986 1987 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1988 _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) { 1989 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, 1990 (__v4sf) __W, 1991 (__mmask8) __U); 1992 } 1993 1994 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1995 _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) { 1996 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, 1997 (__v4sf) 1998 _mm_setzero_ps (), 1999 (__mmask8) __U); 2000 } 2001 2002 static __inline__ __m128 __DEFAULT_FN_ATTRS256 2003 _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) { 2004 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2005 (__v4sf)_mm256_cvtpd_ps(__A), 2006 (__v4sf)__W); 2007 } 2008 2009 static __inline__ __m128 __DEFAULT_FN_ATTRS256 2010 _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) { 2011 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2012 (__v4sf)_mm256_cvtpd_ps(__A), 2013 (__v4sf)_mm_setzero_ps()); 2014 } 2015 2016 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2017 _mm_cvtpd_epu32 (__m128d __A) { 2018 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 2019 (__v4si) 2020 _mm_setzero_si128 (), 2021 (__mmask8) -1); 2022 } 2023 2024 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2025 _mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) { 2026 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 2027 (__v4si) __W, 2028 (__mmask8) __U); 2029 } 2030 2031 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2032 _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) { 2033 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 2034 (__v4si) 2035 _mm_setzero_si128 (), 2036 (__mmask8) __U); 2037 } 2038 2039 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2040 _mm256_cvtpd_epu32 (__m256d __A) { 2041 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 2042 (__v4si) 2043 _mm_setzero_si128 (), 2044 (__mmask8) -1); 2045 } 2046 2047 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2048 _mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) { 2049 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 2050 (__v4si) __W, 2051 (__mmask8) __U); 2052 } 2053 2054 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2055 _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) { 2056 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 2057 (__v4si) 2058 _mm_setzero_si128 (), 2059 (__mmask8) __U); 2060 } 2061 2062 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2063 _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) { 2064 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2065 (__v4si)_mm_cvtps_epi32(__A), 2066 (__v4si)__W); 2067 } 2068 2069 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2070 _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) { 2071 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2072 (__v4si)_mm_cvtps_epi32(__A), 2073 (__v4si)_mm_setzero_si128()); 2074 } 2075 2076 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2077 _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) { 2078 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2079 (__v8si)_mm256_cvtps_epi32(__A), 2080 (__v8si)__W); 2081 } 2082 2083 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2084 _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) { 2085 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2086 (__v8si)_mm256_cvtps_epi32(__A), 2087 (__v8si)_mm256_setzero_si256()); 2088 } 2089 2090 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2091 _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) { 2092 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2093 (__v2df)_mm_cvtps_pd(__A), 2094 (__v2df)__W); 2095 } 2096 2097 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2098 _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) { 2099 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2100 (__v2df)_mm_cvtps_pd(__A), 2101 (__v2df)_mm_setzero_pd()); 2102 } 2103 2104 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2105 _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) { 2106 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2107 (__v4df)_mm256_cvtps_pd(__A), 2108 (__v4df)__W); 2109 } 2110 2111 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2112 _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) { 2113 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2114 (__v4df)_mm256_cvtps_pd(__A), 2115 (__v4df)_mm256_setzero_pd()); 2116 } 2117 2118 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2119 _mm_cvtps_epu32 (__m128 __A) { 2120 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 2121 (__v4si) 2122 _mm_setzero_si128 (), 2123 (__mmask8) -1); 2124 } 2125 2126 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2127 _mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) { 2128 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 2129 (__v4si) __W, 2130 (__mmask8) __U); 2131 } 2132 2133 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2134 _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) { 2135 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 2136 (__v4si) 2137 _mm_setzero_si128 (), 2138 (__mmask8) __U); 2139 } 2140 2141 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2142 _mm256_cvtps_epu32 (__m256 __A) { 2143 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 2144 (__v8si) 2145 _mm256_setzero_si256 (), 2146 (__mmask8) -1); 2147 } 2148 2149 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2150 _mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) { 2151 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 2152 (__v8si) __W, 2153 (__mmask8) __U); 2154 } 2155 2156 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2157 _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) { 2158 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 2159 (__v8si) 2160 _mm256_setzero_si256 (), 2161 (__mmask8) __U); 2162 } 2163 2164 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2165 _mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) { 2166 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A, 2167 (__v4si) __W, 2168 (__mmask8) __U); 2169 } 2170 2171 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2172 _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) { 2173 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A, 2174 (__v4si) 2175 _mm_setzero_si128 (), 2176 (__mmask8) __U); 2177 } 2178 2179 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2180 _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) { 2181 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2182 (__v4si)_mm256_cvttpd_epi32(__A), 2183 (__v4si)__W); 2184 } 2185 2186 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2187 _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) { 2188 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2189 (__v4si)_mm256_cvttpd_epi32(__A), 2190 (__v4si)_mm_setzero_si128()); 2191 } 2192 2193 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2194 _mm_cvttpd_epu32 (__m128d __A) { 2195 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 2196 (__v4si) 2197 _mm_setzero_si128 (), 2198 (__mmask8) -1); 2199 } 2200 2201 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2202 _mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) { 2203 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 2204 (__v4si) __W, 2205 (__mmask8) __U); 2206 } 2207 2208 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2209 _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) { 2210 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 2211 (__v4si) 2212 _mm_setzero_si128 (), 2213 (__mmask8) __U); 2214 } 2215 2216 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2217 _mm256_cvttpd_epu32 (__m256d __A) { 2218 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 2219 (__v4si) 2220 _mm_setzero_si128 (), 2221 (__mmask8) -1); 2222 } 2223 2224 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2225 _mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) { 2226 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 2227 (__v4si) __W, 2228 (__mmask8) __U); 2229 } 2230 2231 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2232 _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) { 2233 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 2234 (__v4si) 2235 _mm_setzero_si128 (), 2236 (__mmask8) __U); 2237 } 2238 2239 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2240 _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) { 2241 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2242 (__v4si)_mm_cvttps_epi32(__A), 2243 (__v4si)__W); 2244 } 2245 2246 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2247 _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) { 2248 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2249 (__v4si)_mm_cvttps_epi32(__A), 2250 (__v4si)_mm_setzero_si128()); 2251 } 2252 2253 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2254 _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) { 2255 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2256 (__v8si)_mm256_cvttps_epi32(__A), 2257 (__v8si)__W); 2258 } 2259 2260 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2261 _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) { 2262 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2263 (__v8si)_mm256_cvttps_epi32(__A), 2264 (__v8si)_mm256_setzero_si256()); 2265 } 2266 2267 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2268 _mm_cvttps_epu32 (__m128 __A) { 2269 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 2270 (__v4si) 2271 _mm_setzero_si128 (), 2272 (__mmask8) -1); 2273 } 2274 2275 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2276 _mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) { 2277 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 2278 (__v4si) __W, 2279 (__mmask8) __U); 2280 } 2281 2282 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2283 _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) { 2284 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 2285 (__v4si) 2286 _mm_setzero_si128 (), 2287 (__mmask8) __U); 2288 } 2289 2290 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2291 _mm256_cvttps_epu32 (__m256 __A) { 2292 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 2293 (__v8si) 2294 _mm256_setzero_si256 (), 2295 (__mmask8) -1); 2296 } 2297 2298 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2299 _mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) { 2300 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 2301 (__v8si) __W, 2302 (__mmask8) __U); 2303 } 2304 2305 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2306 _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) { 2307 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 2308 (__v8si) 2309 _mm256_setzero_si256 (), 2310 (__mmask8) __U); 2311 } 2312 2313 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2314 _mm_cvtepu32_pd (__m128i __A) { 2315 return (__m128d) __builtin_convertvector( 2316 __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df); 2317 } 2318 2319 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2320 _mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) { 2321 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 2322 (__v2df)_mm_cvtepu32_pd(__A), 2323 (__v2df)__W); 2324 } 2325 2326 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2327 _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { 2328 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 2329 (__v2df)_mm_cvtepu32_pd(__A), 2330 (__v2df)_mm_setzero_pd()); 2331 } 2332 2333 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2334 _mm256_cvtepu32_pd (__m128i __A) { 2335 return (__m256d)__builtin_convertvector((__v4su)__A, __v4df); 2336 } 2337 2338 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2339 _mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) { 2340 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 2341 (__v4df)_mm256_cvtepu32_pd(__A), 2342 (__v4df)__W); 2343 } 2344 2345 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2346 _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { 2347 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 2348 (__v4df)_mm256_cvtepu32_pd(__A), 2349 (__v4df)_mm256_setzero_pd()); 2350 } 2351 2352 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2353 _mm_cvtepu32_ps (__m128i __A) { 2354 return (__m128)__builtin_convertvector((__v4su)__A, __v4sf); 2355 } 2356 2357 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2358 _mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) { 2359 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2360 (__v4sf)_mm_cvtepu32_ps(__A), 2361 (__v4sf)__W); 2362 } 2363 2364 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2365 _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) { 2366 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2367 (__v4sf)_mm_cvtepu32_ps(__A), 2368 (__v4sf)_mm_setzero_ps()); 2369 } 2370 2371 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2372 _mm256_cvtepu32_ps (__m256i __A) { 2373 return (__m256)__builtin_convertvector((__v8su)__A, __v8sf); 2374 } 2375 2376 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2377 _mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) { 2378 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2379 (__v8sf)_mm256_cvtepu32_ps(__A), 2380 (__v8sf)__W); 2381 } 2382 2383 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2384 _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) { 2385 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2386 (__v8sf)_mm256_cvtepu32_ps(__A), 2387 (__v8sf)_mm256_setzero_ps()); 2388 } 2389 2390 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2391 _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 2392 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2393 (__v2df)_mm_div_pd(__A, __B), 2394 (__v2df)__W); 2395 } 2396 2397 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2398 _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) { 2399 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2400 (__v2df)_mm_div_pd(__A, __B), 2401 (__v2df)_mm_setzero_pd()); 2402 } 2403 2404 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2405 _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 2406 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2407 (__v4df)_mm256_div_pd(__A, __B), 2408 (__v4df)__W); 2409 } 2410 2411 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2412 _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) { 2413 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2414 (__v4df)_mm256_div_pd(__A, __B), 2415 (__v4df)_mm256_setzero_pd()); 2416 } 2417 2418 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2419 _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 2420 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2421 (__v4sf)_mm_div_ps(__A, __B), 2422 (__v4sf)__W); 2423 } 2424 2425 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2426 _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) { 2427 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2428 (__v4sf)_mm_div_ps(__A, __B), 2429 (__v4sf)_mm_setzero_ps()); 2430 } 2431 2432 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2433 _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 2434 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2435 (__v8sf)_mm256_div_ps(__A, __B), 2436 (__v8sf)__W); 2437 } 2438 2439 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2440 _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) { 2441 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2442 (__v8sf)_mm256_div_ps(__A, __B), 2443 (__v8sf)_mm256_setzero_ps()); 2444 } 2445 2446 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2447 _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) { 2448 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A, 2449 (__v2df) __W, 2450 (__mmask8) __U); 2451 } 2452 2453 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2454 _mm_maskz_expand_pd (__mmask8 __U, __m128d __A) { 2455 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A, 2456 (__v2df) 2457 _mm_setzero_pd (), 2458 (__mmask8) __U); 2459 } 2460 2461 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2462 _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) { 2463 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A, 2464 (__v4df) __W, 2465 (__mmask8) __U); 2466 } 2467 2468 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2469 _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) { 2470 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A, 2471 (__v4df) 2472 _mm256_setzero_pd (), 2473 (__mmask8) __U); 2474 } 2475 2476 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2477 _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { 2478 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A, 2479 (__v2di) __W, 2480 (__mmask8) __U); 2481 } 2482 2483 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2484 _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) { 2485 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A, 2486 (__v2di) 2487 _mm_setzero_si128 (), 2488 (__mmask8) __U); 2489 } 2490 2491 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2492 _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { 2493 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A, 2494 (__v4di) __W, 2495 (__mmask8) __U); 2496 } 2497 2498 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2499 _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) { 2500 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A, 2501 (__v4di) 2502 _mm256_setzero_si256 (), 2503 (__mmask8) __U); 2504 } 2505 2506 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2507 _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) { 2508 return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P, 2509 (__v2df) __W, 2510 (__mmask8) 2511 __U); 2512 } 2513 2514 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2515 _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { 2516 return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P, 2517 (__v2df) 2518 _mm_setzero_pd (), 2519 (__mmask8) 2520 __U); 2521 } 2522 2523 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2524 _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) { 2525 return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P, 2526 (__v4df) __W, 2527 (__mmask8) 2528 __U); 2529 } 2530 2531 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2532 _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { 2533 return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P, 2534 (__v4df) 2535 _mm256_setzero_pd (), 2536 (__mmask8) 2537 __U); 2538 } 2539 2540 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2541 _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) { 2542 return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P, 2543 (__v2di) __W, 2544 (__mmask8) 2545 __U); 2546 } 2547 2548 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2549 _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { 2550 return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P, 2551 (__v2di) 2552 _mm_setzero_si128 (), 2553 (__mmask8) 2554 __U); 2555 } 2556 2557 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2558 _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U, 2559 void const *__P) { 2560 return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P, 2561 (__v4di) __W, 2562 (__mmask8) 2563 __U); 2564 } 2565 2566 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2567 _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { 2568 return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P, 2569 (__v4di) 2570 _mm256_setzero_si256 (), 2571 (__mmask8) 2572 __U); 2573 } 2574 2575 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2576 _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) { 2577 return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P, 2578 (__v4sf) __W, 2579 (__mmask8) __U); 2580 } 2581 2582 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2583 _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { 2584 return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P, 2585 (__v4sf) 2586 _mm_setzero_ps (), 2587 (__mmask8) 2588 __U); 2589 } 2590 2591 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2592 _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) { 2593 return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P, 2594 (__v8sf) __W, 2595 (__mmask8) __U); 2596 } 2597 2598 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2599 _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { 2600 return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P, 2601 (__v8sf) 2602 _mm256_setzero_ps (), 2603 (__mmask8) 2604 __U); 2605 } 2606 2607 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2608 _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) { 2609 return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P, 2610 (__v4si) __W, 2611 (__mmask8) 2612 __U); 2613 } 2614 2615 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2616 _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { 2617 return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P, 2618 (__v4si) 2619 _mm_setzero_si128 (), 2620 (__mmask8) __U); 2621 } 2622 2623 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2624 _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U, 2625 void const *__P) { 2626 return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P, 2627 (__v8si) __W, 2628 (__mmask8) 2629 __U); 2630 } 2631 2632 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2633 _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { 2634 return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P, 2635 (__v8si) 2636 _mm256_setzero_si256 (), 2637 (__mmask8) 2638 __U); 2639 } 2640 2641 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2642 _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) { 2643 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A, 2644 (__v4sf) __W, 2645 (__mmask8) __U); 2646 } 2647 2648 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2649 _mm_maskz_expand_ps (__mmask8 __U, __m128 __A) { 2650 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A, 2651 (__v4sf) 2652 _mm_setzero_ps (), 2653 (__mmask8) __U); 2654 } 2655 2656 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2657 _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) { 2658 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A, 2659 (__v8sf) __W, 2660 (__mmask8) __U); 2661 } 2662 2663 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2664 _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) { 2665 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A, 2666 (__v8sf) 2667 _mm256_setzero_ps (), 2668 (__mmask8) __U); 2669 } 2670 2671 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2672 _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { 2673 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A, 2674 (__v4si) __W, 2675 (__mmask8) __U); 2676 } 2677 2678 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2679 _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) { 2680 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A, 2681 (__v4si) 2682 _mm_setzero_si128 (), 2683 (__mmask8) __U); 2684 } 2685 2686 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2687 _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { 2688 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A, 2689 (__v8si) __W, 2690 (__mmask8) __U); 2691 } 2692 2693 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2694 _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) { 2695 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A, 2696 (__v8si) 2697 _mm256_setzero_si256 (), 2698 (__mmask8) __U); 2699 } 2700 2701 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2702 _mm_getexp_pd (__m128d __A) { 2703 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 2704 (__v2df) 2705 _mm_setzero_pd (), 2706 (__mmask8) -1); 2707 } 2708 2709 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2710 _mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) { 2711 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 2712 (__v2df) __W, 2713 (__mmask8) __U); 2714 } 2715 2716 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2717 _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) { 2718 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 2719 (__v2df) 2720 _mm_setzero_pd (), 2721 (__mmask8) __U); 2722 } 2723 2724 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2725 _mm256_getexp_pd (__m256d __A) { 2726 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 2727 (__v4df) 2728 _mm256_setzero_pd (), 2729 (__mmask8) -1); 2730 } 2731 2732 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2733 _mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) { 2734 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 2735 (__v4df) __W, 2736 (__mmask8) __U); 2737 } 2738 2739 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2740 _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) { 2741 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 2742 (__v4df) 2743 _mm256_setzero_pd (), 2744 (__mmask8) __U); 2745 } 2746 2747 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2748 _mm_getexp_ps (__m128 __A) { 2749 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 2750 (__v4sf) 2751 _mm_setzero_ps (), 2752 (__mmask8) -1); 2753 } 2754 2755 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2756 _mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) { 2757 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 2758 (__v4sf) __W, 2759 (__mmask8) __U); 2760 } 2761 2762 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2763 _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) { 2764 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 2765 (__v4sf) 2766 _mm_setzero_ps (), 2767 (__mmask8) __U); 2768 } 2769 2770 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2771 _mm256_getexp_ps (__m256 __A) { 2772 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 2773 (__v8sf) 2774 _mm256_setzero_ps (), 2775 (__mmask8) -1); 2776 } 2777 2778 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2779 _mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) { 2780 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 2781 (__v8sf) __W, 2782 (__mmask8) __U); 2783 } 2784 2785 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2786 _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) { 2787 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 2788 (__v8sf) 2789 _mm256_setzero_ps (), 2790 (__mmask8) __U); 2791 } 2792 2793 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2794 _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 2795 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2796 (__v2df)_mm_max_pd(__A, __B), 2797 (__v2df)__W); 2798 } 2799 2800 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2801 _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) { 2802 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2803 (__v2df)_mm_max_pd(__A, __B), 2804 (__v2df)_mm_setzero_pd()); 2805 } 2806 2807 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2808 _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 2809 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2810 (__v4df)_mm256_max_pd(__A, __B), 2811 (__v4df)__W); 2812 } 2813 2814 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2815 _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) { 2816 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2817 (__v4df)_mm256_max_pd(__A, __B), 2818 (__v4df)_mm256_setzero_pd()); 2819 } 2820 2821 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2822 _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 2823 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2824 (__v4sf)_mm_max_ps(__A, __B), 2825 (__v4sf)__W); 2826 } 2827 2828 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2829 _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) { 2830 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2831 (__v4sf)_mm_max_ps(__A, __B), 2832 (__v4sf)_mm_setzero_ps()); 2833 } 2834 2835 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2836 _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 2837 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2838 (__v8sf)_mm256_max_ps(__A, __B), 2839 (__v8sf)__W); 2840 } 2841 2842 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2843 _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) { 2844 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2845 (__v8sf)_mm256_max_ps(__A, __B), 2846 (__v8sf)_mm256_setzero_ps()); 2847 } 2848 2849 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2850 _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 2851 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2852 (__v2df)_mm_min_pd(__A, __B), 2853 (__v2df)__W); 2854 } 2855 2856 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2857 _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) { 2858 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2859 (__v2df)_mm_min_pd(__A, __B), 2860 (__v2df)_mm_setzero_pd()); 2861 } 2862 2863 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2864 _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 2865 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2866 (__v4df)_mm256_min_pd(__A, __B), 2867 (__v4df)__W); 2868 } 2869 2870 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2871 _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) { 2872 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2873 (__v4df)_mm256_min_pd(__A, __B), 2874 (__v4df)_mm256_setzero_pd()); 2875 } 2876 2877 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2878 _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 2879 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2880 (__v4sf)_mm_min_ps(__A, __B), 2881 (__v4sf)__W); 2882 } 2883 2884 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2885 _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) { 2886 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2887 (__v4sf)_mm_min_ps(__A, __B), 2888 (__v4sf)_mm_setzero_ps()); 2889 } 2890 2891 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2892 _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 2893 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2894 (__v8sf)_mm256_min_ps(__A, __B), 2895 (__v8sf)__W); 2896 } 2897 2898 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2899 _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) { 2900 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2901 (__v8sf)_mm256_min_ps(__A, __B), 2902 (__v8sf)_mm256_setzero_ps()); 2903 } 2904 2905 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2906 _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 2907 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2908 (__v2df)_mm_mul_pd(__A, __B), 2909 (__v2df)__W); 2910 } 2911 2912 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2913 _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) { 2914 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2915 (__v2df)_mm_mul_pd(__A, __B), 2916 (__v2df)_mm_setzero_pd()); 2917 } 2918 2919 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2920 _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 2921 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2922 (__v4df)_mm256_mul_pd(__A, __B), 2923 (__v4df)__W); 2924 } 2925 2926 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2927 _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) { 2928 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2929 (__v4df)_mm256_mul_pd(__A, __B), 2930 (__v4df)_mm256_setzero_pd()); 2931 } 2932 2933 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2934 _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 2935 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2936 (__v4sf)_mm_mul_ps(__A, __B), 2937 (__v4sf)__W); 2938 } 2939 2940 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2941 _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) { 2942 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2943 (__v4sf)_mm_mul_ps(__A, __B), 2944 (__v4sf)_mm_setzero_ps()); 2945 } 2946 2947 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2948 _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 2949 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2950 (__v8sf)_mm256_mul_ps(__A, __B), 2951 (__v8sf)__W); 2952 } 2953 2954 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2955 _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) { 2956 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2957 (__v8sf)_mm256_mul_ps(__A, __B), 2958 (__v8sf)_mm256_setzero_ps()); 2959 } 2960 2961 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2962 _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) { 2963 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2964 (__v4si)_mm_abs_epi32(__A), 2965 (__v4si)__W); 2966 } 2967 2968 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2969 _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) { 2970 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2971 (__v4si)_mm_abs_epi32(__A), 2972 (__v4si)_mm_setzero_si128()); 2973 } 2974 2975 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2976 _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) { 2977 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2978 (__v8si)_mm256_abs_epi32(__A), 2979 (__v8si)__W); 2980 } 2981 2982 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2983 _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) { 2984 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2985 (__v8si)_mm256_abs_epi32(__A), 2986 (__v8si)_mm256_setzero_si256()); 2987 } 2988 2989 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2990 _mm_abs_epi64 (__m128i __A) { 2991 return (__m128i)__builtin_ia32_pabsq128((__v2di)__A); 2992 } 2993 2994 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2995 _mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { 2996 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 2997 (__v2di)_mm_abs_epi64(__A), 2998 (__v2di)__W); 2999 } 3000 3001 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3002 _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) { 3003 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 3004 (__v2di)_mm_abs_epi64(__A), 3005 (__v2di)_mm_setzero_si128()); 3006 } 3007 3008 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3009 _mm256_abs_epi64 (__m256i __A) { 3010 return (__m256i)__builtin_ia32_pabsq256 ((__v4di)__A); 3011 } 3012 3013 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3014 _mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { 3015 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 3016 (__v4di)_mm256_abs_epi64(__A), 3017 (__v4di)__W); 3018 } 3019 3020 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3021 _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) { 3022 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 3023 (__v4di)_mm256_abs_epi64(__A), 3024 (__v4di)_mm256_setzero_si256()); 3025 } 3026 3027 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3028 _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) { 3029 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3030 (__v4si)_mm_max_epi32(__A, __B), 3031 (__v4si)_mm_setzero_si128()); 3032 } 3033 3034 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3035 _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3036 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3037 (__v4si)_mm_max_epi32(__A, __B), 3038 (__v4si)__W); 3039 } 3040 3041 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3042 _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) { 3043 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3044 (__v8si)_mm256_max_epi32(__A, __B), 3045 (__v8si)_mm256_setzero_si256()); 3046 } 3047 3048 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3049 _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3050 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3051 (__v8si)_mm256_max_epi32(__A, __B), 3052 (__v8si)__W); 3053 } 3054 3055 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3056 _mm_max_epi64 (__m128i __A, __m128i __B) { 3057 return (__m128i)__builtin_ia32_pmaxsq128((__v2di)__A, (__v2di)__B); 3058 } 3059 3060 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3061 _mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { 3062 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3063 (__v2di)_mm_max_epi64(__A, __B), 3064 (__v2di)_mm_setzero_si128()); 3065 } 3066 3067 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3068 _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3069 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3070 (__v2di)_mm_max_epi64(__A, __B), 3071 (__v2di)__W); 3072 } 3073 3074 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3075 _mm256_max_epi64 (__m256i __A, __m256i __B) { 3076 return (__m256i)__builtin_ia32_pmaxsq256((__v4di)__A, (__v4di)__B); 3077 } 3078 3079 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3080 _mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) { 3081 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3082 (__v4di)_mm256_max_epi64(__A, __B), 3083 (__v4di)_mm256_setzero_si256()); 3084 } 3085 3086 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3087 _mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3088 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3089 (__v4di)_mm256_max_epi64(__A, __B), 3090 (__v4di)__W); 3091 } 3092 3093 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3094 _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) { 3095 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3096 (__v4si)_mm_max_epu32(__A, __B), 3097 (__v4si)_mm_setzero_si128()); 3098 } 3099 3100 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3101 _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3102 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3103 (__v4si)_mm_max_epu32(__A, __B), 3104 (__v4si)__W); 3105 } 3106 3107 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3108 _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) { 3109 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3110 (__v8si)_mm256_max_epu32(__A, __B), 3111 (__v8si)_mm256_setzero_si256()); 3112 } 3113 3114 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3115 _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3116 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3117 (__v8si)_mm256_max_epu32(__A, __B), 3118 (__v8si)__W); 3119 } 3120 3121 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3122 _mm_max_epu64 (__m128i __A, __m128i __B) { 3123 return (__m128i)__builtin_ia32_pmaxuq128((__v2di)__A, (__v2di)__B); 3124 } 3125 3126 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3127 _mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { 3128 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3129 (__v2di)_mm_max_epu64(__A, __B), 3130 (__v2di)_mm_setzero_si128()); 3131 } 3132 3133 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3134 _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3135 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3136 (__v2di)_mm_max_epu64(__A, __B), 3137 (__v2di)__W); 3138 } 3139 3140 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3141 _mm256_max_epu64 (__m256i __A, __m256i __B) { 3142 return (__m256i)__builtin_ia32_pmaxuq256((__v4di)__A, (__v4di)__B); 3143 } 3144 3145 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3146 _mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { 3147 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3148 (__v4di)_mm256_max_epu64(__A, __B), 3149 (__v4di)_mm256_setzero_si256()); 3150 } 3151 3152 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3153 _mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3154 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3155 (__v4di)_mm256_max_epu64(__A, __B), 3156 (__v4di)__W); 3157 } 3158 3159 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3160 _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) { 3161 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3162 (__v4si)_mm_min_epi32(__A, __B), 3163 (__v4si)_mm_setzero_si128()); 3164 } 3165 3166 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3167 _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3168 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3169 (__v4si)_mm_min_epi32(__A, __B), 3170 (__v4si)__W); 3171 } 3172 3173 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3174 _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) { 3175 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3176 (__v8si)_mm256_min_epi32(__A, __B), 3177 (__v8si)_mm256_setzero_si256()); 3178 } 3179 3180 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3181 _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3182 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3183 (__v8si)_mm256_min_epi32(__A, __B), 3184 (__v8si)__W); 3185 } 3186 3187 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3188 _mm_min_epi64 (__m128i __A, __m128i __B) { 3189 return (__m128i)__builtin_ia32_pminsq128((__v2di)__A, (__v2di)__B); 3190 } 3191 3192 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3193 _mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3194 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3195 (__v2di)_mm_min_epi64(__A, __B), 3196 (__v2di)__W); 3197 } 3198 3199 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3200 _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { 3201 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3202 (__v2di)_mm_min_epi64(__A, __B), 3203 (__v2di)_mm_setzero_si128()); 3204 } 3205 3206 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3207 _mm256_min_epi64 (__m256i __A, __m256i __B) { 3208 return (__m256i)__builtin_ia32_pminsq256((__v4di)__A, (__v4di)__B); 3209 } 3210 3211 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3212 _mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3213 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3214 (__v4di)_mm256_min_epi64(__A, __B), 3215 (__v4di)__W); 3216 } 3217 3218 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3219 _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) { 3220 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3221 (__v4di)_mm256_min_epi64(__A, __B), 3222 (__v4di)_mm256_setzero_si256()); 3223 } 3224 3225 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3226 _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) { 3227 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3228 (__v4si)_mm_min_epu32(__A, __B), 3229 (__v4si)_mm_setzero_si128()); 3230 } 3231 3232 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3233 _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3234 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3235 (__v4si)_mm_min_epu32(__A, __B), 3236 (__v4si)__W); 3237 } 3238 3239 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3240 _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) { 3241 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3242 (__v8si)_mm256_min_epu32(__A, __B), 3243 (__v8si)_mm256_setzero_si256()); 3244 } 3245 3246 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3247 _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3248 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3249 (__v8si)_mm256_min_epu32(__A, __B), 3250 (__v8si)__W); 3251 } 3252 3253 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3254 _mm_min_epu64 (__m128i __A, __m128i __B) { 3255 return (__m128i)__builtin_ia32_pminuq128((__v2di)__A, (__v2di)__B); 3256 } 3257 3258 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3259 _mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3260 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3261 (__v2di)_mm_min_epu64(__A, __B), 3262 (__v2di)__W); 3263 } 3264 3265 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3266 _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { 3267 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3268 (__v2di)_mm_min_epu64(__A, __B), 3269 (__v2di)_mm_setzero_si128()); 3270 } 3271 3272 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3273 _mm256_min_epu64 (__m256i __A, __m256i __B) { 3274 return (__m256i)__builtin_ia32_pminuq256((__v4di)__A, (__v4di)__B); 3275 } 3276 3277 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3278 _mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3279 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3280 (__v4di)_mm256_min_epu64(__A, __B), 3281 (__v4di)__W); 3282 } 3283 3284 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3285 _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { 3286 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3287 (__v4di)_mm256_min_epu64(__A, __B), 3288 (__v4di)_mm256_setzero_si256()); 3289 } 3290 3291 #define _mm_roundscale_pd(A, imm) \ 3292 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 3293 (int)(imm), \ 3294 (__v2df)_mm_setzero_pd(), \ 3295 (__mmask8)-1) 3296 3297 3298 #define _mm_mask_roundscale_pd(W, U, A, imm) \ 3299 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 3300 (int)(imm), \ 3301 (__v2df)(__m128d)(W), \ 3302 (__mmask8)(U)) 3303 3304 3305 #define _mm_maskz_roundscale_pd(U, A, imm) \ 3306 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 3307 (int)(imm), \ 3308 (__v2df)_mm_setzero_pd(), \ 3309 (__mmask8)(U)) 3310 3311 3312 #define _mm256_roundscale_pd(A, imm) \ 3313 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 3314 (int)(imm), \ 3315 (__v4df)_mm256_setzero_pd(), \ 3316 (__mmask8)-1) 3317 3318 3319 #define _mm256_mask_roundscale_pd(W, U, A, imm) \ 3320 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 3321 (int)(imm), \ 3322 (__v4df)(__m256d)(W), \ 3323 (__mmask8)(U)) 3324 3325 3326 #define _mm256_maskz_roundscale_pd(U, A, imm) \ 3327 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 3328 (int)(imm), \ 3329 (__v4df)_mm256_setzero_pd(), \ 3330 (__mmask8)(U)) 3331 3332 #define _mm_roundscale_ps(A, imm) \ 3333 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 3334 (__v4sf)_mm_setzero_ps(), \ 3335 (__mmask8)-1) 3336 3337 3338 #define _mm_mask_roundscale_ps(W, U, A, imm) \ 3339 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 3340 (__v4sf)(__m128)(W), \ 3341 (__mmask8)(U)) 3342 3343 3344 #define _mm_maskz_roundscale_ps(U, A, imm) \ 3345 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 3346 (__v4sf)_mm_setzero_ps(), \ 3347 (__mmask8)(U)) 3348 3349 #define _mm256_roundscale_ps(A, imm) \ 3350 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 3351 (__v8sf)_mm256_setzero_ps(), \ 3352 (__mmask8)-1) 3353 3354 #define _mm256_mask_roundscale_ps(W, U, A, imm) \ 3355 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 3356 (__v8sf)(__m256)(W), \ 3357 (__mmask8)(U)) 3358 3359 3360 #define _mm256_maskz_roundscale_ps(U, A, imm) \ 3361 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 3362 (__v8sf)_mm256_setzero_ps(), \ 3363 (__mmask8)(U)) 3364 3365 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3366 _mm_scalef_pd (__m128d __A, __m128d __B) { 3367 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 3368 (__v2df) __B, 3369 (__v2df) 3370 _mm_setzero_pd (), 3371 (__mmask8) -1); 3372 } 3373 3374 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3375 _mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A, 3376 __m128d __B) { 3377 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 3378 (__v2df) __B, 3379 (__v2df) __W, 3380 (__mmask8) __U); 3381 } 3382 3383 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3384 _mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) { 3385 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 3386 (__v2df) __B, 3387 (__v2df) 3388 _mm_setzero_pd (), 3389 (__mmask8) __U); 3390 } 3391 3392 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3393 _mm256_scalef_pd (__m256d __A, __m256d __B) { 3394 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 3395 (__v4df) __B, 3396 (__v4df) 3397 _mm256_setzero_pd (), 3398 (__mmask8) -1); 3399 } 3400 3401 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3402 _mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A, 3403 __m256d __B) { 3404 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 3405 (__v4df) __B, 3406 (__v4df) __W, 3407 (__mmask8) __U); 3408 } 3409 3410 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3411 _mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) { 3412 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 3413 (__v4df) __B, 3414 (__v4df) 3415 _mm256_setzero_pd (), 3416 (__mmask8) __U); 3417 } 3418 3419 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3420 _mm_scalef_ps (__m128 __A, __m128 __B) { 3421 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 3422 (__v4sf) __B, 3423 (__v4sf) 3424 _mm_setzero_ps (), 3425 (__mmask8) -1); 3426 } 3427 3428 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3429 _mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 3430 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 3431 (__v4sf) __B, 3432 (__v4sf) __W, 3433 (__mmask8) __U); 3434 } 3435 3436 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3437 _mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) { 3438 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 3439 (__v4sf) __B, 3440 (__v4sf) 3441 _mm_setzero_ps (), 3442 (__mmask8) __U); 3443 } 3444 3445 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3446 _mm256_scalef_ps (__m256 __A, __m256 __B) { 3447 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 3448 (__v8sf) __B, 3449 (__v8sf) 3450 _mm256_setzero_ps (), 3451 (__mmask8) -1); 3452 } 3453 3454 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3455 _mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A, 3456 __m256 __B) { 3457 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 3458 (__v8sf) __B, 3459 (__v8sf) __W, 3460 (__mmask8) __U); 3461 } 3462 3463 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3464 _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { 3465 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 3466 (__v8sf) __B, 3467 (__v8sf) 3468 _mm256_setzero_ps (), 3469 (__mmask8) __U); 3470 } 3471 3472 #define _mm_i64scatter_pd(addr, index, v1, scale) \ 3473 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)-1, \ 3474 (__v2di)(__m128i)(index), \ 3475 (__v2df)(__m128d)(v1), (int)(scale)) 3476 3477 #define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \ 3478 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)(mask), \ 3479 (__v2di)(__m128i)(index), \ 3480 (__v2df)(__m128d)(v1), (int)(scale)) 3481 3482 #define _mm_i64scatter_epi64(addr, index, v1, scale) \ 3483 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)-1, \ 3484 (__v2di)(__m128i)(index), \ 3485 (__v2di)(__m128i)(v1), (int)(scale)) 3486 3487 #define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \ 3488 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)(mask), \ 3489 (__v2di)(__m128i)(index), \ 3490 (__v2di)(__m128i)(v1), (int)(scale)) 3491 3492 #define _mm256_i64scatter_pd(addr, index, v1, scale) \ 3493 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)-1, \ 3494 (__v4di)(__m256i)(index), \ 3495 (__v4df)(__m256d)(v1), (int)(scale)) 3496 3497 #define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \ 3498 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)(mask), \ 3499 (__v4di)(__m256i)(index), \ 3500 (__v4df)(__m256d)(v1), (int)(scale)) 3501 3502 #define _mm256_i64scatter_epi64(addr, index, v1, scale) \ 3503 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)-1, \ 3504 (__v4di)(__m256i)(index), \ 3505 (__v4di)(__m256i)(v1), (int)(scale)) 3506 3507 #define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \ 3508 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)(mask), \ 3509 (__v4di)(__m256i)(index), \ 3510 (__v4di)(__m256i)(v1), (int)(scale)) 3511 3512 #define _mm_i64scatter_ps(addr, index, v1, scale) \ 3513 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)-1, \ 3514 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3515 (int)(scale)) 3516 3517 #define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \ 3518 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)(mask), \ 3519 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3520 (int)(scale)) 3521 3522 #define _mm_i64scatter_epi32(addr, index, v1, scale) \ 3523 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)-1, \ 3524 (__v2di)(__m128i)(index), \ 3525 (__v4si)(__m128i)(v1), (int)(scale)) 3526 3527 #define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \ 3528 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)(mask), \ 3529 (__v2di)(__m128i)(index), \ 3530 (__v4si)(__m128i)(v1), (int)(scale)) 3531 3532 #define _mm256_i64scatter_ps(addr, index, v1, scale) \ 3533 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)-1, \ 3534 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \ 3535 (int)(scale)) 3536 3537 #define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \ 3538 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)(mask), \ 3539 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \ 3540 (int)(scale)) 3541 3542 #define _mm256_i64scatter_epi32(addr, index, v1, scale) \ 3543 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)-1, \ 3544 (__v4di)(__m256i)(index), \ 3545 (__v4si)(__m128i)(v1), (int)(scale)) 3546 3547 #define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \ 3548 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)(mask), \ 3549 (__v4di)(__m256i)(index), \ 3550 (__v4si)(__m128i)(v1), (int)(scale)) 3551 3552 #define _mm_i32scatter_pd(addr, index, v1, scale) \ 3553 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)-1, \ 3554 (__v4si)(__m128i)(index), \ 3555 (__v2df)(__m128d)(v1), (int)(scale)) 3556 3557 #define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \ 3558 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)(mask), \ 3559 (__v4si)(__m128i)(index), \ 3560 (__v2df)(__m128d)(v1), (int)(scale)) 3561 3562 #define _mm_i32scatter_epi64(addr, index, v1, scale) \ 3563 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)-1, \ 3564 (__v4si)(__m128i)(index), \ 3565 (__v2di)(__m128i)(v1), (int)(scale)) 3566 3567 #define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \ 3568 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)(mask), \ 3569 (__v4si)(__m128i)(index), \ 3570 (__v2di)(__m128i)(v1), (int)(scale)) 3571 3572 #define _mm256_i32scatter_pd(addr, index, v1, scale) \ 3573 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)-1, \ 3574 (__v4si)(__m128i)(index), \ 3575 (__v4df)(__m256d)(v1), (int)(scale)) 3576 3577 #define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \ 3578 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)(mask), \ 3579 (__v4si)(__m128i)(index), \ 3580 (__v4df)(__m256d)(v1), (int)(scale)) 3581 3582 #define _mm256_i32scatter_epi64(addr, index, v1, scale) \ 3583 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)-1, \ 3584 (__v4si)(__m128i)(index), \ 3585 (__v4di)(__m256i)(v1), (int)(scale)) 3586 3587 #define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \ 3588 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)(mask), \ 3589 (__v4si)(__m128i)(index), \ 3590 (__v4di)(__m256i)(v1), (int)(scale)) 3591 3592 #define _mm_i32scatter_ps(addr, index, v1, scale) \ 3593 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)-1, \ 3594 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3595 (int)(scale)) 3596 3597 #define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \ 3598 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)(mask), \ 3599 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3600 (int)(scale)) 3601 3602 #define _mm_i32scatter_epi32(addr, index, v1, scale) \ 3603 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)-1, \ 3604 (__v4si)(__m128i)(index), \ 3605 (__v4si)(__m128i)(v1), (int)(scale)) 3606 3607 #define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \ 3608 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)(mask), \ 3609 (__v4si)(__m128i)(index), \ 3610 (__v4si)(__m128i)(v1), (int)(scale)) 3611 3612 #define _mm256_i32scatter_ps(addr, index, v1, scale) \ 3613 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)-1, \ 3614 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ 3615 (int)(scale)) 3616 3617 #define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \ 3618 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)(mask), \ 3619 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ 3620 (int)(scale)) 3621 3622 #define _mm256_i32scatter_epi32(addr, index, v1, scale) \ 3623 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)-1, \ 3624 (__v8si)(__m256i)(index), \ 3625 (__v8si)(__m256i)(v1), (int)(scale)) 3626 3627 #define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \ 3628 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)(mask), \ 3629 (__v8si)(__m256i)(index), \ 3630 (__v8si)(__m256i)(v1), (int)(scale)) 3631 3632 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3633 _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) { 3634 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3635 (__v2df)_mm_sqrt_pd(__A), 3636 (__v2df)__W); 3637 } 3638 3639 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3640 _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) { 3641 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3642 (__v2df)_mm_sqrt_pd(__A), 3643 (__v2df)_mm_setzero_pd()); 3644 } 3645 3646 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3647 _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) { 3648 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3649 (__v4df)_mm256_sqrt_pd(__A), 3650 (__v4df)__W); 3651 } 3652 3653 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3654 _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) { 3655 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3656 (__v4df)_mm256_sqrt_pd(__A), 3657 (__v4df)_mm256_setzero_pd()); 3658 } 3659 3660 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3661 _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) { 3662 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3663 (__v4sf)_mm_sqrt_ps(__A), 3664 (__v4sf)__W); 3665 } 3666 3667 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3668 _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) { 3669 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3670 (__v4sf)_mm_sqrt_ps(__A), 3671 (__v4sf)_mm_setzero_ps()); 3672 } 3673 3674 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3675 _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) { 3676 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3677 (__v8sf)_mm256_sqrt_ps(__A), 3678 (__v8sf)__W); 3679 } 3680 3681 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3682 _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) { 3683 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3684 (__v8sf)_mm256_sqrt_ps(__A), 3685 (__v8sf)_mm256_setzero_ps()); 3686 } 3687 3688 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3689 _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 3690 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3691 (__v2df)_mm_sub_pd(__A, __B), 3692 (__v2df)__W); 3693 } 3694 3695 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3696 _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) { 3697 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3698 (__v2df)_mm_sub_pd(__A, __B), 3699 (__v2df)_mm_setzero_pd()); 3700 } 3701 3702 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3703 _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 3704 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3705 (__v4df)_mm256_sub_pd(__A, __B), 3706 (__v4df)__W); 3707 } 3708 3709 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3710 _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) { 3711 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3712 (__v4df)_mm256_sub_pd(__A, __B), 3713 (__v4df)_mm256_setzero_pd()); 3714 } 3715 3716 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3717 _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 3718 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3719 (__v4sf)_mm_sub_ps(__A, __B), 3720 (__v4sf)__W); 3721 } 3722 3723 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3724 _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) { 3725 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3726 (__v4sf)_mm_sub_ps(__A, __B), 3727 (__v4sf)_mm_setzero_ps()); 3728 } 3729 3730 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3731 _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 3732 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3733 (__v8sf)_mm256_sub_ps(__A, __B), 3734 (__v8sf)__W); 3735 } 3736 3737 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3738 _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) { 3739 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3740 (__v8sf)_mm256_sub_ps(__A, __B), 3741 (__v8sf)_mm256_setzero_ps()); 3742 } 3743 3744 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3745 _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) { 3746 return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I, 3747 (__v4si)__B); 3748 } 3749 3750 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3751 _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I, 3752 __m128i __B) { 3753 return (__m128i)__builtin_ia32_selectd_128(__U, 3754 (__v4si)_mm_permutex2var_epi32(__A, __I, __B), 3755 (__v4si)__A); 3756 } 3757 3758 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3759 _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U, 3760 __m128i __B) { 3761 return (__m128i)__builtin_ia32_selectd_128(__U, 3762 (__v4si)_mm_permutex2var_epi32(__A, __I, __B), 3763 (__v4si)__I); 3764 } 3765 3766 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3767 _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I, 3768 __m128i __B) { 3769 return (__m128i)__builtin_ia32_selectd_128(__U, 3770 (__v4si)_mm_permutex2var_epi32(__A, __I, __B), 3771 (__v4si)_mm_setzero_si128()); 3772 } 3773 3774 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3775 _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) { 3776 return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I, 3777 (__v8si) __B); 3778 } 3779 3780 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3781 _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I, 3782 __m256i __B) { 3783 return (__m256i)__builtin_ia32_selectd_256(__U, 3784 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B), 3785 (__v8si)__A); 3786 } 3787 3788 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3789 _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U, 3790 __m256i __B) { 3791 return (__m256i)__builtin_ia32_selectd_256(__U, 3792 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B), 3793 (__v8si)__I); 3794 } 3795 3796 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3797 _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I, 3798 __m256i __B) { 3799 return (__m256i)__builtin_ia32_selectd_256(__U, 3800 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B), 3801 (__v8si)_mm256_setzero_si256()); 3802 } 3803 3804 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3805 _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) { 3806 return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I, 3807 (__v2df)__B); 3808 } 3809 3810 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3811 _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) { 3812 return (__m128d)__builtin_ia32_selectpd_128(__U, 3813 (__v2df)_mm_permutex2var_pd(__A, __I, __B), 3814 (__v2df)__A); 3815 } 3816 3817 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3818 _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) { 3819 return (__m128d)__builtin_ia32_selectpd_128(__U, 3820 (__v2df)_mm_permutex2var_pd(__A, __I, __B), 3821 (__v2df)(__m128d)__I); 3822 } 3823 3824 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3825 _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) { 3826 return (__m128d)__builtin_ia32_selectpd_128(__U, 3827 (__v2df)_mm_permutex2var_pd(__A, __I, __B), 3828 (__v2df)_mm_setzero_pd()); 3829 } 3830 3831 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3832 _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) { 3833 return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I, 3834 (__v4df)__B); 3835 } 3836 3837 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3838 _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I, 3839 __m256d __B) { 3840 return (__m256d)__builtin_ia32_selectpd_256(__U, 3841 (__v4df)_mm256_permutex2var_pd(__A, __I, __B), 3842 (__v4df)__A); 3843 } 3844 3845 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3846 _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U, 3847 __m256d __B) { 3848 return (__m256d)__builtin_ia32_selectpd_256(__U, 3849 (__v4df)_mm256_permutex2var_pd(__A, __I, __B), 3850 (__v4df)(__m256d)__I); 3851 } 3852 3853 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3854 _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I, 3855 __m256d __B) { 3856 return (__m256d)__builtin_ia32_selectpd_256(__U, 3857 (__v4df)_mm256_permutex2var_pd(__A, __I, __B), 3858 (__v4df)_mm256_setzero_pd()); 3859 } 3860 3861 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3862 _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) { 3863 return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I, 3864 (__v4sf)__B); 3865 } 3866 3867 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3868 _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) { 3869 return (__m128)__builtin_ia32_selectps_128(__U, 3870 (__v4sf)_mm_permutex2var_ps(__A, __I, __B), 3871 (__v4sf)__A); 3872 } 3873 3874 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3875 _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) { 3876 return (__m128)__builtin_ia32_selectps_128(__U, 3877 (__v4sf)_mm_permutex2var_ps(__A, __I, __B), 3878 (__v4sf)(__m128)__I); 3879 } 3880 3881 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3882 _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) { 3883 return (__m128)__builtin_ia32_selectps_128(__U, 3884 (__v4sf)_mm_permutex2var_ps(__A, __I, __B), 3885 (__v4sf)_mm_setzero_ps()); 3886 } 3887 3888 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3889 _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) { 3890 return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I, 3891 (__v8sf) __B); 3892 } 3893 3894 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3895 _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) { 3896 return (__m256)__builtin_ia32_selectps_256(__U, 3897 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), 3898 (__v8sf)__A); 3899 } 3900 3901 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3902 _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U, 3903 __m256 __B) { 3904 return (__m256)__builtin_ia32_selectps_256(__U, 3905 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), 3906 (__v8sf)(__m256)__I); 3907 } 3908 3909 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3910 _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I, 3911 __m256 __B) { 3912 return (__m256)__builtin_ia32_selectps_256(__U, 3913 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), 3914 (__v8sf)_mm256_setzero_ps()); 3915 } 3916 3917 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3918 _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) { 3919 return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I, 3920 (__v2di)__B); 3921 } 3922 3923 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3924 _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I, 3925 __m128i __B) { 3926 return (__m128i)__builtin_ia32_selectq_128(__U, 3927 (__v2di)_mm_permutex2var_epi64(__A, __I, __B), 3928 (__v2di)__A); 3929 } 3930 3931 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3932 _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U, 3933 __m128i __B) { 3934 return (__m128i)__builtin_ia32_selectq_128(__U, 3935 (__v2di)_mm_permutex2var_epi64(__A, __I, __B), 3936 (__v2di)__I); 3937 } 3938 3939 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3940 _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I, 3941 __m128i __B) { 3942 return (__m128i)__builtin_ia32_selectq_128(__U, 3943 (__v2di)_mm_permutex2var_epi64(__A, __I, __B), 3944 (__v2di)_mm_setzero_si128()); 3945 } 3946 3947 3948 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3949 _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) { 3950 return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I, 3951 (__v4di) __B); 3952 } 3953 3954 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3955 _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I, 3956 __m256i __B) { 3957 return (__m256i)__builtin_ia32_selectq_256(__U, 3958 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B), 3959 (__v4di)__A); 3960 } 3961 3962 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3963 _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U, 3964 __m256i __B) { 3965 return (__m256i)__builtin_ia32_selectq_256(__U, 3966 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B), 3967 (__v4di)__I); 3968 } 3969 3970 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3971 _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I, 3972 __m256i __B) { 3973 return (__m256i)__builtin_ia32_selectq_256(__U, 3974 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B), 3975 (__v4di)_mm256_setzero_si256()); 3976 } 3977 3978 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3979 _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A) 3980 { 3981 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 3982 (__v4si)_mm_cvtepi8_epi32(__A), 3983 (__v4si)__W); 3984 } 3985 3986 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3987 _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A) 3988 { 3989 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 3990 (__v4si)_mm_cvtepi8_epi32(__A), 3991 (__v4si)_mm_setzero_si128()); 3992 } 3993 3994 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3995 _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A) 3996 { 3997 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 3998 (__v8si)_mm256_cvtepi8_epi32(__A), 3999 (__v8si)__W); 4000 } 4001 4002 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4003 _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A) 4004 { 4005 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4006 (__v8si)_mm256_cvtepi8_epi32(__A), 4007 (__v8si)_mm256_setzero_si256()); 4008 } 4009 4010 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4011 _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4012 { 4013 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4014 (__v2di)_mm_cvtepi8_epi64(__A), 4015 (__v2di)__W); 4016 } 4017 4018 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4019 _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) 4020 { 4021 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4022 (__v2di)_mm_cvtepi8_epi64(__A), 4023 (__v2di)_mm_setzero_si128()); 4024 } 4025 4026 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4027 _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4028 { 4029 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4030 (__v4di)_mm256_cvtepi8_epi64(__A), 4031 (__v4di)__W); 4032 } 4033 4034 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4035 _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) 4036 { 4037 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4038 (__v4di)_mm256_cvtepi8_epi64(__A), 4039 (__v4di)_mm256_setzero_si256()); 4040 } 4041 4042 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4043 _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X) 4044 { 4045 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4046 (__v2di)_mm_cvtepi32_epi64(__X), 4047 (__v2di)__W); 4048 } 4049 4050 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4051 _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) 4052 { 4053 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4054 (__v2di)_mm_cvtepi32_epi64(__X), 4055 (__v2di)_mm_setzero_si128()); 4056 } 4057 4058 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4059 _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X) 4060 { 4061 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4062 (__v4di)_mm256_cvtepi32_epi64(__X), 4063 (__v4di)__W); 4064 } 4065 4066 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4067 _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) 4068 { 4069 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4070 (__v4di)_mm256_cvtepi32_epi64(__X), 4071 (__v4di)_mm256_setzero_si256()); 4072 } 4073 4074 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4075 _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A) 4076 { 4077 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4078 (__v4si)_mm_cvtepi16_epi32(__A), 4079 (__v4si)__W); 4080 } 4081 4082 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4083 _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A) 4084 { 4085 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4086 (__v4si)_mm_cvtepi16_epi32(__A), 4087 (__v4si)_mm_setzero_si128()); 4088 } 4089 4090 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4091 _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A) 4092 { 4093 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4094 (__v8si)_mm256_cvtepi16_epi32(__A), 4095 (__v8si)__W); 4096 } 4097 4098 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4099 _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A) 4100 { 4101 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4102 (__v8si)_mm256_cvtepi16_epi32(__A), 4103 (__v8si)_mm256_setzero_si256()); 4104 } 4105 4106 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4107 _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4108 { 4109 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4110 (__v2di)_mm_cvtepi16_epi64(__A), 4111 (__v2di)__W); 4112 } 4113 4114 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4115 _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) 4116 { 4117 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4118 (__v2di)_mm_cvtepi16_epi64(__A), 4119 (__v2di)_mm_setzero_si128()); 4120 } 4121 4122 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4123 _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4124 { 4125 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4126 (__v4di)_mm256_cvtepi16_epi64(__A), 4127 (__v4di)__W); 4128 } 4129 4130 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4131 _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) 4132 { 4133 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4134 (__v4di)_mm256_cvtepi16_epi64(__A), 4135 (__v4di)_mm256_setzero_si256()); 4136 } 4137 4138 4139 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4140 _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A) 4141 { 4142 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4143 (__v4si)_mm_cvtepu8_epi32(__A), 4144 (__v4si)__W); 4145 } 4146 4147 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4148 _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) 4149 { 4150 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4151 (__v4si)_mm_cvtepu8_epi32(__A), 4152 (__v4si)_mm_setzero_si128()); 4153 } 4154 4155 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4156 _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A) 4157 { 4158 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4159 (__v8si)_mm256_cvtepu8_epi32(__A), 4160 (__v8si)__W); 4161 } 4162 4163 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4164 _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) 4165 { 4166 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4167 (__v8si)_mm256_cvtepu8_epi32(__A), 4168 (__v8si)_mm256_setzero_si256()); 4169 } 4170 4171 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4172 _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4173 { 4174 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4175 (__v2di)_mm_cvtepu8_epi64(__A), 4176 (__v2di)__W); 4177 } 4178 4179 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4180 _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) 4181 { 4182 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4183 (__v2di)_mm_cvtepu8_epi64(__A), 4184 (__v2di)_mm_setzero_si128()); 4185 } 4186 4187 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4188 _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4189 { 4190 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4191 (__v4di)_mm256_cvtepu8_epi64(__A), 4192 (__v4di)__W); 4193 } 4194 4195 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4196 _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A) 4197 { 4198 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4199 (__v4di)_mm256_cvtepu8_epi64(__A), 4200 (__v4di)_mm256_setzero_si256()); 4201 } 4202 4203 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4204 _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X) 4205 { 4206 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4207 (__v2di)_mm_cvtepu32_epi64(__X), 4208 (__v2di)__W); 4209 } 4210 4211 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4212 _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) 4213 { 4214 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4215 (__v2di)_mm_cvtepu32_epi64(__X), 4216 (__v2di)_mm_setzero_si128()); 4217 } 4218 4219 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4220 _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X) 4221 { 4222 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4223 (__v4di)_mm256_cvtepu32_epi64(__X), 4224 (__v4di)__W); 4225 } 4226 4227 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4228 _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) 4229 { 4230 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4231 (__v4di)_mm256_cvtepu32_epi64(__X), 4232 (__v4di)_mm256_setzero_si256()); 4233 } 4234 4235 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4236 _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A) 4237 { 4238 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4239 (__v4si)_mm_cvtepu16_epi32(__A), 4240 (__v4si)__W); 4241 } 4242 4243 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4244 _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) 4245 { 4246 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4247 (__v4si)_mm_cvtepu16_epi32(__A), 4248 (__v4si)_mm_setzero_si128()); 4249 } 4250 4251 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4252 _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A) 4253 { 4254 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4255 (__v8si)_mm256_cvtepu16_epi32(__A), 4256 (__v8si)__W); 4257 } 4258 4259 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4260 _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) 4261 { 4262 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4263 (__v8si)_mm256_cvtepu16_epi32(__A), 4264 (__v8si)_mm256_setzero_si256()); 4265 } 4266 4267 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4268 _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4269 { 4270 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4271 (__v2di)_mm_cvtepu16_epi64(__A), 4272 (__v2di)__W); 4273 } 4274 4275 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4276 _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) 4277 { 4278 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4279 (__v2di)_mm_cvtepu16_epi64(__A), 4280 (__v2di)_mm_setzero_si128()); 4281 } 4282 4283 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4284 _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4285 { 4286 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4287 (__v4di)_mm256_cvtepu16_epi64(__A), 4288 (__v4di)__W); 4289 } 4290 4291 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4292 _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) 4293 { 4294 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4295 (__v4di)_mm256_cvtepu16_epi64(__A), 4296 (__v4di)_mm256_setzero_si256()); 4297 } 4298 4299 4300 #define _mm_rol_epi32(a, b) \ 4301 (__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b)) 4302 4303 #define _mm_mask_rol_epi32(w, u, a, b) \ 4304 (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ 4305 (__v4si)_mm_rol_epi32((a), (b)), \ 4306 (__v4si)(__m128i)(w)) 4307 4308 #define _mm_maskz_rol_epi32(u, a, b) \ 4309 (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ 4310 (__v4si)_mm_rol_epi32((a), (b)), \ 4311 (__v4si)_mm_setzero_si128()) 4312 4313 #define _mm256_rol_epi32(a, b) \ 4314 (__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b)) 4315 4316 #define _mm256_mask_rol_epi32(w, u, a, b) \ 4317 (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ 4318 (__v8si)_mm256_rol_epi32((a), (b)), \ 4319 (__v8si)(__m256i)(w)) 4320 4321 #define _mm256_maskz_rol_epi32(u, a, b) \ 4322 (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ 4323 (__v8si)_mm256_rol_epi32((a), (b)), \ 4324 (__v8si)_mm256_setzero_si256()) 4325 4326 #define _mm_rol_epi64(a, b) \ 4327 (__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b)) 4328 4329 #define _mm_mask_rol_epi64(w, u, a, b) \ 4330 (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ 4331 (__v2di)_mm_rol_epi64((a), (b)), \ 4332 (__v2di)(__m128i)(w)) 4333 4334 #define _mm_maskz_rol_epi64(u, a, b) \ 4335 (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ 4336 (__v2di)_mm_rol_epi64((a), (b)), \ 4337 (__v2di)_mm_setzero_si128()) 4338 4339 #define _mm256_rol_epi64(a, b) \ 4340 (__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b)) 4341 4342 #define _mm256_mask_rol_epi64(w, u, a, b) \ 4343 (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ 4344 (__v4di)_mm256_rol_epi64((a), (b)), \ 4345 (__v4di)(__m256i)(w)) 4346 4347 #define _mm256_maskz_rol_epi64(u, a, b) \ 4348 (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ 4349 (__v4di)_mm256_rol_epi64((a), (b)), \ 4350 (__v4di)_mm256_setzero_si256()) 4351 4352 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4353 _mm_rolv_epi32 (__m128i __A, __m128i __B) 4354 { 4355 return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B); 4356 } 4357 4358 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4359 _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4360 { 4361 return (__m128i)__builtin_ia32_selectd_128(__U, 4362 (__v4si)_mm_rolv_epi32(__A, __B), 4363 (__v4si)__W); 4364 } 4365 4366 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4367 _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 4368 { 4369 return (__m128i)__builtin_ia32_selectd_128(__U, 4370 (__v4si)_mm_rolv_epi32(__A, __B), 4371 (__v4si)_mm_setzero_si128()); 4372 } 4373 4374 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4375 _mm256_rolv_epi32 (__m256i __A, __m256i __B) 4376 { 4377 return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B); 4378 } 4379 4380 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4381 _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 4382 { 4383 return (__m256i)__builtin_ia32_selectd_256(__U, 4384 (__v8si)_mm256_rolv_epi32(__A, __B), 4385 (__v8si)__W); 4386 } 4387 4388 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4389 _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 4390 { 4391 return (__m256i)__builtin_ia32_selectd_256(__U, 4392 (__v8si)_mm256_rolv_epi32(__A, __B), 4393 (__v8si)_mm256_setzero_si256()); 4394 } 4395 4396 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4397 _mm_rolv_epi64 (__m128i __A, __m128i __B) 4398 { 4399 return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B); 4400 } 4401 4402 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4403 _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4404 { 4405 return (__m128i)__builtin_ia32_selectq_128(__U, 4406 (__v2di)_mm_rolv_epi64(__A, __B), 4407 (__v2di)__W); 4408 } 4409 4410 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4411 _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 4412 { 4413 return (__m128i)__builtin_ia32_selectq_128(__U, 4414 (__v2di)_mm_rolv_epi64(__A, __B), 4415 (__v2di)_mm_setzero_si128()); 4416 } 4417 4418 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4419 _mm256_rolv_epi64 (__m256i __A, __m256i __B) 4420 { 4421 return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B); 4422 } 4423 4424 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4425 _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 4426 { 4427 return (__m256i)__builtin_ia32_selectq_256(__U, 4428 (__v4di)_mm256_rolv_epi64(__A, __B), 4429 (__v4di)__W); 4430 } 4431 4432 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4433 _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 4434 { 4435 return (__m256i)__builtin_ia32_selectq_256(__U, 4436 (__v4di)_mm256_rolv_epi64(__A, __B), 4437 (__v4di)_mm256_setzero_si256()); 4438 } 4439 4440 #define _mm_ror_epi32(a, b) \ 4441 (__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b)) 4442 4443 #define _mm_mask_ror_epi32(w, u, a, b) \ 4444 (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ 4445 (__v4si)_mm_ror_epi32((a), (b)), \ 4446 (__v4si)(__m128i)(w)) 4447 4448 #define _mm_maskz_ror_epi32(u, a, b) \ 4449 (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ 4450 (__v4si)_mm_ror_epi32((a), (b)), \ 4451 (__v4si)_mm_setzero_si128()) 4452 4453 #define _mm256_ror_epi32(a, b) \ 4454 (__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b)) 4455 4456 #define _mm256_mask_ror_epi32(w, u, a, b) \ 4457 (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ 4458 (__v8si)_mm256_ror_epi32((a), (b)), \ 4459 (__v8si)(__m256i)(w)) 4460 4461 #define _mm256_maskz_ror_epi32(u, a, b) \ 4462 (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ 4463 (__v8si)_mm256_ror_epi32((a), (b)), \ 4464 (__v8si)_mm256_setzero_si256()) 4465 4466 #define _mm_ror_epi64(a, b) \ 4467 (__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b)) 4468 4469 #define _mm_mask_ror_epi64(w, u, a, b) \ 4470 (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ 4471 (__v2di)_mm_ror_epi64((a), (b)), \ 4472 (__v2di)(__m128i)(w)) 4473 4474 #define _mm_maskz_ror_epi64(u, a, b) \ 4475 (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ 4476 (__v2di)_mm_ror_epi64((a), (b)), \ 4477 (__v2di)_mm_setzero_si128()) 4478 4479 #define _mm256_ror_epi64(a, b) \ 4480 (__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b)) 4481 4482 #define _mm256_mask_ror_epi64(w, u, a, b) \ 4483 (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ 4484 (__v4di)_mm256_ror_epi64((a), (b)), \ 4485 (__v4di)(__m256i)(w)) 4486 4487 #define _mm256_maskz_ror_epi64(u, a, b) \ 4488 (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ 4489 (__v4di)_mm256_ror_epi64((a), (b)), \ 4490 (__v4di)_mm256_setzero_si256()) 4491 4492 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4493 _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4494 { 4495 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4496 (__v4si)_mm_sll_epi32(__A, __B), 4497 (__v4si)__W); 4498 } 4499 4500 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4501 _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B) 4502 { 4503 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4504 (__v4si)_mm_sll_epi32(__A, __B), 4505 (__v4si)_mm_setzero_si128()); 4506 } 4507 4508 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4509 _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 4510 { 4511 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4512 (__v8si)_mm256_sll_epi32(__A, __B), 4513 (__v8si)__W); 4514 } 4515 4516 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4517 _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B) 4518 { 4519 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4520 (__v8si)_mm256_sll_epi32(__A, __B), 4521 (__v8si)_mm256_setzero_si256()); 4522 } 4523 4524 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4525 _mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) 4526 { 4527 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4528 (__v4si)_mm_slli_epi32(__A, __B), 4529 (__v4si)__W); 4530 } 4531 4532 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4533 _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, unsigned int __B) 4534 { 4535 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4536 (__v4si)_mm_slli_epi32(__A, __B), 4537 (__v4si)_mm_setzero_si128()); 4538 } 4539 4540 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4541 _mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) 4542 { 4543 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4544 (__v8si)_mm256_slli_epi32(__A, __B), 4545 (__v8si)__W); 4546 } 4547 4548 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4549 _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) 4550 { 4551 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4552 (__v8si)_mm256_slli_epi32(__A, __B), 4553 (__v8si)_mm256_setzero_si256()); 4554 } 4555 4556 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4557 _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4558 { 4559 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4560 (__v2di)_mm_sll_epi64(__A, __B), 4561 (__v2di)__W); 4562 } 4563 4564 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4565 _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B) 4566 { 4567 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4568 (__v2di)_mm_sll_epi64(__A, __B), 4569 (__v2di)_mm_setzero_si128()); 4570 } 4571 4572 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4573 _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 4574 { 4575 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4576 (__v4di)_mm256_sll_epi64(__A, __B), 4577 (__v4di)__W); 4578 } 4579 4580 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4581 _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B) 4582 { 4583 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4584 (__v4di)_mm256_sll_epi64(__A, __B), 4585 (__v4di)_mm256_setzero_si256()); 4586 } 4587 4588 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4589 _mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) 4590 { 4591 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4592 (__v2di)_mm_slli_epi64(__A, __B), 4593 (__v2di)__W); 4594 } 4595 4596 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4597 _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, unsigned int __B) 4598 { 4599 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4600 (__v2di)_mm_slli_epi64(__A, __B), 4601 (__v2di)_mm_setzero_si128()); 4602 } 4603 4604 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4605 _mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) 4606 { 4607 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4608 (__v4di)_mm256_slli_epi64(__A, __B), 4609 (__v4di)__W); 4610 } 4611 4612 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4613 _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) 4614 { 4615 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4616 (__v4di)_mm256_slli_epi64(__A, __B), 4617 (__v4di)_mm256_setzero_si256()); 4618 } 4619 4620 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4621 _mm_rorv_epi32 (__m128i __A, __m128i __B) 4622 { 4623 return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B); 4624 } 4625 4626 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4627 _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4628 { 4629 return (__m128i)__builtin_ia32_selectd_128(__U, 4630 (__v4si)_mm_rorv_epi32(__A, __B), 4631 (__v4si)__W); 4632 } 4633 4634 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4635 _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 4636 { 4637 return (__m128i)__builtin_ia32_selectd_128(__U, 4638 (__v4si)_mm_rorv_epi32(__A, __B), 4639 (__v4si)_mm_setzero_si128()); 4640 } 4641 4642 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4643 _mm256_rorv_epi32 (__m256i __A, __m256i __B) 4644 { 4645 return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B); 4646 } 4647 4648 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4649 _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 4650 { 4651 return (__m256i)__builtin_ia32_selectd_256(__U, 4652 (__v8si)_mm256_rorv_epi32(__A, __B), 4653 (__v8si)__W); 4654 } 4655 4656 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4657 _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 4658 { 4659 return (__m256i)__builtin_ia32_selectd_256(__U, 4660 (__v8si)_mm256_rorv_epi32(__A, __B), 4661 (__v8si)_mm256_setzero_si256()); 4662 } 4663 4664 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4665 _mm_rorv_epi64 (__m128i __A, __m128i __B) 4666 { 4667 return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B); 4668 } 4669 4670 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4671 _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4672 { 4673 return (__m128i)__builtin_ia32_selectq_128(__U, 4674 (__v2di)_mm_rorv_epi64(__A, __B), 4675 (__v2di)__W); 4676 } 4677 4678 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4679 _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 4680 { 4681 return (__m128i)__builtin_ia32_selectq_128(__U, 4682 (__v2di)_mm_rorv_epi64(__A, __B), 4683 (__v2di)_mm_setzero_si128()); 4684 } 4685 4686 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4687 _mm256_rorv_epi64 (__m256i __A, __m256i __B) 4688 { 4689 return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B); 4690 } 4691 4692 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4693 _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 4694 { 4695 return (__m256i)__builtin_ia32_selectq_256(__U, 4696 (__v4di)_mm256_rorv_epi64(__A, __B), 4697 (__v4di)__W); 4698 } 4699 4700 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4701 _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 4702 { 4703 return (__m256i)__builtin_ia32_selectq_256(__U, 4704 (__v4di)_mm256_rorv_epi64(__A, __B), 4705 (__v4di)_mm256_setzero_si256()); 4706 } 4707 4708 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4709 _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 4710 { 4711 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4712 (__v2di)_mm_sllv_epi64(__X, __Y), 4713 (__v2di)__W); 4714 } 4715 4716 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4717 _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) 4718 { 4719 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4720 (__v2di)_mm_sllv_epi64(__X, __Y), 4721 (__v2di)_mm_setzero_si128()); 4722 } 4723 4724 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4725 _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 4726 { 4727 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4728 (__v4di)_mm256_sllv_epi64(__X, __Y), 4729 (__v4di)__W); 4730 } 4731 4732 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4733 _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) 4734 { 4735 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4736 (__v4di)_mm256_sllv_epi64(__X, __Y), 4737 (__v4di)_mm256_setzero_si256()); 4738 } 4739 4740 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4741 _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 4742 { 4743 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4744 (__v4si)_mm_sllv_epi32(__X, __Y), 4745 (__v4si)__W); 4746 } 4747 4748 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4749 _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) 4750 { 4751 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4752 (__v4si)_mm_sllv_epi32(__X, __Y), 4753 (__v4si)_mm_setzero_si128()); 4754 } 4755 4756 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4757 _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 4758 { 4759 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4760 (__v8si)_mm256_sllv_epi32(__X, __Y), 4761 (__v8si)__W); 4762 } 4763 4764 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4765 _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) 4766 { 4767 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4768 (__v8si)_mm256_sllv_epi32(__X, __Y), 4769 (__v8si)_mm256_setzero_si256()); 4770 } 4771 4772 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4773 _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 4774 { 4775 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4776 (__v2di)_mm_srlv_epi64(__X, __Y), 4777 (__v2di)__W); 4778 } 4779 4780 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4781 _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) 4782 { 4783 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4784 (__v2di)_mm_srlv_epi64(__X, __Y), 4785 (__v2di)_mm_setzero_si128()); 4786 } 4787 4788 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4789 _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 4790 { 4791 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4792 (__v4di)_mm256_srlv_epi64(__X, __Y), 4793 (__v4di)__W); 4794 } 4795 4796 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4797 _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) 4798 { 4799 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4800 (__v4di)_mm256_srlv_epi64(__X, __Y), 4801 (__v4di)_mm256_setzero_si256()); 4802 } 4803 4804 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4805 _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 4806 { 4807 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4808 (__v4si)_mm_srlv_epi32(__X, __Y), 4809 (__v4si)__W); 4810 } 4811 4812 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4813 _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) 4814 { 4815 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4816 (__v4si)_mm_srlv_epi32(__X, __Y), 4817 (__v4si)_mm_setzero_si128()); 4818 } 4819 4820 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4821 _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 4822 { 4823 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4824 (__v8si)_mm256_srlv_epi32(__X, __Y), 4825 (__v8si)__W); 4826 } 4827 4828 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4829 _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) 4830 { 4831 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4832 (__v8si)_mm256_srlv_epi32(__X, __Y), 4833 (__v8si)_mm256_setzero_si256()); 4834 } 4835 4836 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4837 _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4838 { 4839 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4840 (__v4si)_mm_srl_epi32(__A, __B), 4841 (__v4si)__W); 4842 } 4843 4844 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4845 _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B) 4846 { 4847 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4848 (__v4si)_mm_srl_epi32(__A, __B), 4849 (__v4si)_mm_setzero_si128()); 4850 } 4851 4852 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4853 _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 4854 { 4855 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4856 (__v8si)_mm256_srl_epi32(__A, __B), 4857 (__v8si)__W); 4858 } 4859 4860 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4861 _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B) 4862 { 4863 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4864 (__v8si)_mm256_srl_epi32(__A, __B), 4865 (__v8si)_mm256_setzero_si256()); 4866 } 4867 4868 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4869 _mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) 4870 { 4871 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4872 (__v4si)_mm_srli_epi32(__A, __B), 4873 (__v4si)__W); 4874 } 4875 4876 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4877 _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, unsigned int __B) 4878 { 4879 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4880 (__v4si)_mm_srli_epi32(__A, __B), 4881 (__v4si)_mm_setzero_si128()); 4882 } 4883 4884 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4885 _mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) 4886 { 4887 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4888 (__v8si)_mm256_srli_epi32(__A, __B), 4889 (__v8si)__W); 4890 } 4891 4892 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4893 _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) 4894 { 4895 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4896 (__v8si)_mm256_srli_epi32(__A, __B), 4897 (__v8si)_mm256_setzero_si256()); 4898 } 4899 4900 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4901 _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4902 { 4903 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4904 (__v2di)_mm_srl_epi64(__A, __B), 4905 (__v2di)__W); 4906 } 4907 4908 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4909 _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B) 4910 { 4911 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4912 (__v2di)_mm_srl_epi64(__A, __B), 4913 (__v2di)_mm_setzero_si128()); 4914 } 4915 4916 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4917 _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 4918 { 4919 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4920 (__v4di)_mm256_srl_epi64(__A, __B), 4921 (__v4di)__W); 4922 } 4923 4924 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4925 _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B) 4926 { 4927 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4928 (__v4di)_mm256_srl_epi64(__A, __B), 4929 (__v4di)_mm256_setzero_si256()); 4930 } 4931 4932 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4933 _mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) 4934 { 4935 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4936 (__v2di)_mm_srli_epi64(__A, __B), 4937 (__v2di)__W); 4938 } 4939 4940 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4941 _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, unsigned int __B) 4942 { 4943 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4944 (__v2di)_mm_srli_epi64(__A, __B), 4945 (__v2di)_mm_setzero_si128()); 4946 } 4947 4948 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4949 _mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) 4950 { 4951 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4952 (__v4di)_mm256_srli_epi64(__A, __B), 4953 (__v4di)__W); 4954 } 4955 4956 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4957 _mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) 4958 { 4959 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4960 (__v4di)_mm256_srli_epi64(__A, __B), 4961 (__v4di)_mm256_setzero_si256()); 4962 } 4963 4964 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4965 _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 4966 { 4967 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4968 (__v4si)_mm_srav_epi32(__X, __Y), 4969 (__v4si)__W); 4970 } 4971 4972 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4973 _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y) 4974 { 4975 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4976 (__v4si)_mm_srav_epi32(__X, __Y), 4977 (__v4si)_mm_setzero_si128()); 4978 } 4979 4980 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4981 _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 4982 { 4983 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4984 (__v8si)_mm256_srav_epi32(__X, __Y), 4985 (__v8si)__W); 4986 } 4987 4988 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4989 _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y) 4990 { 4991 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4992 (__v8si)_mm256_srav_epi32(__X, __Y), 4993 (__v8si)_mm256_setzero_si256()); 4994 } 4995 4996 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4997 _mm_srav_epi64(__m128i __X, __m128i __Y) 4998 { 4999 return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y); 5000 } 5001 5002 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5003 _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 5004 { 5005 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5006 (__v2di)_mm_srav_epi64(__X, __Y), 5007 (__v2di)__W); 5008 } 5009 5010 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5011 _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y) 5012 { 5013 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5014 (__v2di)_mm_srav_epi64(__X, __Y), 5015 (__v2di)_mm_setzero_si128()); 5016 } 5017 5018 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5019 _mm256_srav_epi64(__m256i __X, __m256i __Y) 5020 { 5021 return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y); 5022 } 5023 5024 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5025 _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 5026 { 5027 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5028 (__v4di)_mm256_srav_epi64(__X, __Y), 5029 (__v4di)__W); 5030 } 5031 5032 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5033 _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y) 5034 { 5035 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5036 (__v4di)_mm256_srav_epi64(__X, __Y), 5037 (__v4di)_mm256_setzero_si256()); 5038 } 5039 5040 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5041 _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 5042 { 5043 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, 5044 (__v4si) __A, 5045 (__v4si) __W); 5046 } 5047 5048 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5049 _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A) 5050 { 5051 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, 5052 (__v4si) __A, 5053 (__v4si) _mm_setzero_si128 ()); 5054 } 5055 5056 5057 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5058 _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A) 5059 { 5060 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, 5061 (__v8si) __A, 5062 (__v8si) __W); 5063 } 5064 5065 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5066 _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A) 5067 { 5068 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, 5069 (__v8si) __A, 5070 (__v8si) _mm256_setzero_si256 ()); 5071 } 5072 5073 static __inline __m128i __DEFAULT_FN_ATTRS128 5074 _mm_load_epi32 (void const *__P) 5075 { 5076 return *(const __m128i *) __P; 5077 } 5078 5079 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5080 _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P) 5081 { 5082 return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P, 5083 (__v4si) __W, 5084 (__mmask8) 5085 __U); 5086 } 5087 5088 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5089 _mm_maskz_load_epi32 (__mmask8 __U, void const *__P) 5090 { 5091 return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P, 5092 (__v4si) 5093 _mm_setzero_si128 (), 5094 (__mmask8) 5095 __U); 5096 } 5097 5098 static __inline __m256i __DEFAULT_FN_ATTRS256 5099 _mm256_load_epi32 (void const *__P) 5100 { 5101 return *(const __m256i *) __P; 5102 } 5103 5104 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5105 _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P) 5106 { 5107 return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P, 5108 (__v8si) __W, 5109 (__mmask8) 5110 __U); 5111 } 5112 5113 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5114 _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P) 5115 { 5116 return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P, 5117 (__v8si) 5118 _mm256_setzero_si256 (), 5119 (__mmask8) 5120 __U); 5121 } 5122 5123 static __inline void __DEFAULT_FN_ATTRS128 5124 _mm_store_epi32 (void *__P, __m128i __A) 5125 { 5126 *(__m128i *) __P = __A; 5127 } 5128 5129 static __inline__ void __DEFAULT_FN_ATTRS128 5130 _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A) 5131 { 5132 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P, 5133 (__v4si) __A, 5134 (__mmask8) __U); 5135 } 5136 5137 static __inline void __DEFAULT_FN_ATTRS256 5138 _mm256_store_epi32 (void *__P, __m256i __A) 5139 { 5140 *(__m256i *) __P = __A; 5141 } 5142 5143 static __inline__ void __DEFAULT_FN_ATTRS256 5144 _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A) 5145 { 5146 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P, 5147 (__v8si) __A, 5148 (__mmask8) __U); 5149 } 5150 5151 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5152 _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 5153 { 5154 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, 5155 (__v2di) __A, 5156 (__v2di) __W); 5157 } 5158 5159 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5160 _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A) 5161 { 5162 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, 5163 (__v2di) __A, 5164 (__v2di) _mm_setzero_si128 ()); 5165 } 5166 5167 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5168 _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A) 5169 { 5170 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, 5171 (__v4di) __A, 5172 (__v4di) __W); 5173 } 5174 5175 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5176 _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A) 5177 { 5178 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, 5179 (__v4di) __A, 5180 (__v4di) _mm256_setzero_si256 ()); 5181 } 5182 5183 static __inline __m128i __DEFAULT_FN_ATTRS128 5184 _mm_load_epi64 (void const *__P) 5185 { 5186 return *(const __m128i *) __P; 5187 } 5188 5189 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5190 _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P) 5191 { 5192 return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P, 5193 (__v2di) __W, 5194 (__mmask8) 5195 __U); 5196 } 5197 5198 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5199 _mm_maskz_load_epi64 (__mmask8 __U, void const *__P) 5200 { 5201 return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P, 5202 (__v2di) 5203 _mm_setzero_si128 (), 5204 (__mmask8) 5205 __U); 5206 } 5207 5208 static __inline __m256i __DEFAULT_FN_ATTRS256 5209 _mm256_load_epi64 (void const *__P) 5210 { 5211 return *(const __m256i *) __P; 5212 } 5213 5214 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5215 _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P) 5216 { 5217 return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P, 5218 (__v4di) __W, 5219 (__mmask8) 5220 __U); 5221 } 5222 5223 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5224 _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P) 5225 { 5226 return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P, 5227 (__v4di) 5228 _mm256_setzero_si256 (), 5229 (__mmask8) 5230 __U); 5231 } 5232 5233 static __inline void __DEFAULT_FN_ATTRS128 5234 _mm_store_epi64 (void *__P, __m128i __A) 5235 { 5236 *(__m128i *) __P = __A; 5237 } 5238 5239 static __inline__ void __DEFAULT_FN_ATTRS128 5240 _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A) 5241 { 5242 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P, 5243 (__v2di) __A, 5244 (__mmask8) __U); 5245 } 5246 5247 static __inline void __DEFAULT_FN_ATTRS256 5248 _mm256_store_epi64 (void *__P, __m256i __A) 5249 { 5250 *(__m256i *) __P = __A; 5251 } 5252 5253 static __inline__ void __DEFAULT_FN_ATTRS256 5254 _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A) 5255 { 5256 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P, 5257 (__v4di) __A, 5258 (__mmask8) __U); 5259 } 5260 5261 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5262 _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A) 5263 { 5264 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5265 (__v2df)_mm_movedup_pd(__A), 5266 (__v2df)__W); 5267 } 5268 5269 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5270 _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A) 5271 { 5272 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5273 (__v2df)_mm_movedup_pd(__A), 5274 (__v2df)_mm_setzero_pd()); 5275 } 5276 5277 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5278 _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A) 5279 { 5280 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5281 (__v4df)_mm256_movedup_pd(__A), 5282 (__v4df)__W); 5283 } 5284 5285 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5286 _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A) 5287 { 5288 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5289 (__v4df)_mm256_movedup_pd(__A), 5290 (__v4df)_mm256_setzero_pd()); 5291 } 5292 5293 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5294 _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A) 5295 { 5296 return (__m128i)__builtin_ia32_selectd_128(__M, 5297 (__v4si) _mm_set1_epi32(__A), 5298 (__v4si)__O); 5299 } 5300 5301 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5302 _mm_maskz_set1_epi32( __mmask8 __M, int __A) 5303 { 5304 return (__m128i)__builtin_ia32_selectd_128(__M, 5305 (__v4si) _mm_set1_epi32(__A), 5306 (__v4si)_mm_setzero_si128()); 5307 } 5308 5309 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5310 _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A) 5311 { 5312 return (__m256i)__builtin_ia32_selectd_256(__M, 5313 (__v8si) _mm256_set1_epi32(__A), 5314 (__v8si)__O); 5315 } 5316 5317 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5318 _mm256_maskz_set1_epi32( __mmask8 __M, int __A) 5319 { 5320 return (__m256i)__builtin_ia32_selectd_256(__M, 5321 (__v8si) _mm256_set1_epi32(__A), 5322 (__v8si)_mm256_setzero_si256()); 5323 } 5324 5325 5326 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5327 _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A) 5328 { 5329 return (__m128i) __builtin_ia32_selectq_128(__M, 5330 (__v2di) _mm_set1_epi64x(__A), 5331 (__v2di) __O); 5332 } 5333 5334 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5335 _mm_maskz_set1_epi64 (__mmask8 __M, long long __A) 5336 { 5337 return (__m128i) __builtin_ia32_selectq_128(__M, 5338 (__v2di) _mm_set1_epi64x(__A), 5339 (__v2di) _mm_setzero_si128()); 5340 } 5341 5342 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5343 _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A) 5344 { 5345 return (__m256i) __builtin_ia32_selectq_256(__M, 5346 (__v4di) _mm256_set1_epi64x(__A), 5347 (__v4di) __O) ; 5348 } 5349 5350 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5351 _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A) 5352 { 5353 return (__m256i) __builtin_ia32_selectq_256(__M, 5354 (__v4di) _mm256_set1_epi64x(__A), 5355 (__v4di) _mm256_setzero_si256()); 5356 } 5357 5358 #define _mm_fixupimm_pd(A, B, C, imm) \ 5359 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ 5360 (__v2df)(__m128d)(B), \ 5361 (__v2di)(__m128i)(C), (int)(imm), \ 5362 (__mmask8)-1) 5363 5364 #define _mm_mask_fixupimm_pd(A, U, B, C, imm) \ 5365 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ 5366 (__v2df)(__m128d)(B), \ 5367 (__v2di)(__m128i)(C), (int)(imm), \ 5368 (__mmask8)(U)) 5369 5370 #define _mm_maskz_fixupimm_pd(U, A, B, C, imm) \ 5371 (__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \ 5372 (__v2df)(__m128d)(B), \ 5373 (__v2di)(__m128i)(C), \ 5374 (int)(imm), (__mmask8)(U)) 5375 5376 #define _mm256_fixupimm_pd(A, B, C, imm) \ 5377 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ 5378 (__v4df)(__m256d)(B), \ 5379 (__v4di)(__m256i)(C), (int)(imm), \ 5380 (__mmask8)-1) 5381 5382 #define _mm256_mask_fixupimm_pd(A, U, B, C, imm) \ 5383 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ 5384 (__v4df)(__m256d)(B), \ 5385 (__v4di)(__m256i)(C), (int)(imm), \ 5386 (__mmask8)(U)) 5387 5388 #define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) \ 5389 (__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \ 5390 (__v4df)(__m256d)(B), \ 5391 (__v4di)(__m256i)(C), \ 5392 (int)(imm), (__mmask8)(U)) 5393 5394 #define _mm_fixupimm_ps(A, B, C, imm) \ 5395 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ 5396 (__v4sf)(__m128)(B), \ 5397 (__v4si)(__m128i)(C), (int)(imm), \ 5398 (__mmask8)-1) 5399 5400 #define _mm_mask_fixupimm_ps(A, U, B, C, imm) \ 5401 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ 5402 (__v4sf)(__m128)(B), \ 5403 (__v4si)(__m128i)(C), (int)(imm), \ 5404 (__mmask8)(U)) 5405 5406 #define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \ 5407 (__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \ 5408 (__v4sf)(__m128)(B), \ 5409 (__v4si)(__m128i)(C), (int)(imm), \ 5410 (__mmask8)(U)) 5411 5412 #define _mm256_fixupimm_ps(A, B, C, imm) \ 5413 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ 5414 (__v8sf)(__m256)(B), \ 5415 (__v8si)(__m256i)(C), (int)(imm), \ 5416 (__mmask8)-1) 5417 5418 #define _mm256_mask_fixupimm_ps(A, U, B, C, imm) \ 5419 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ 5420 (__v8sf)(__m256)(B), \ 5421 (__v8si)(__m256i)(C), (int)(imm), \ 5422 (__mmask8)(U)) 5423 5424 #define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \ 5425 (__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \ 5426 (__v8sf)(__m256)(B), \ 5427 (__v8si)(__m256i)(C), (int)(imm), \ 5428 (__mmask8)(U)) 5429 5430 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5431 _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P) 5432 { 5433 return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P, 5434 (__v2df) __W, 5435 (__mmask8) __U); 5436 } 5437 5438 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5439 _mm_maskz_load_pd (__mmask8 __U, void const *__P) 5440 { 5441 return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P, 5442 (__v2df) 5443 _mm_setzero_pd (), 5444 (__mmask8) __U); 5445 } 5446 5447 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5448 _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P) 5449 { 5450 return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P, 5451 (__v4df) __W, 5452 (__mmask8) __U); 5453 } 5454 5455 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5456 _mm256_maskz_load_pd (__mmask8 __U, void const *__P) 5457 { 5458 return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P, 5459 (__v4df) 5460 _mm256_setzero_pd (), 5461 (__mmask8) __U); 5462 } 5463 5464 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5465 _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P) 5466 { 5467 return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P, 5468 (__v4sf) __W, 5469 (__mmask8) __U); 5470 } 5471 5472 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5473 _mm_maskz_load_ps (__mmask8 __U, void const *__P) 5474 { 5475 return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P, 5476 (__v4sf) 5477 _mm_setzero_ps (), 5478 (__mmask8) __U); 5479 } 5480 5481 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5482 _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P) 5483 { 5484 return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P, 5485 (__v8sf) __W, 5486 (__mmask8) __U); 5487 } 5488 5489 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5490 _mm256_maskz_load_ps (__mmask8 __U, void const *__P) 5491 { 5492 return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P, 5493 (__v8sf) 5494 _mm256_setzero_ps (), 5495 (__mmask8) __U); 5496 } 5497 5498 static __inline __m128i __DEFAULT_FN_ATTRS128 5499 _mm_loadu_epi64 (void const *__P) 5500 { 5501 struct __loadu_epi64 { 5502 __m128i_u __v; 5503 } __attribute__((__packed__, __may_alias__)); 5504 return ((const struct __loadu_epi64*)__P)->__v; 5505 } 5506 5507 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5508 _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) 5509 { 5510 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P, 5511 (__v2di) __W, 5512 (__mmask8) __U); 5513 } 5514 5515 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5516 _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P) 5517 { 5518 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P, 5519 (__v2di) 5520 _mm_setzero_si128 (), 5521 (__mmask8) __U); 5522 } 5523 5524 static __inline __m256i __DEFAULT_FN_ATTRS256 5525 _mm256_loadu_epi64 (void const *__P) 5526 { 5527 struct __loadu_epi64 { 5528 __m256i_u __v; 5529 } __attribute__((__packed__, __may_alias__)); 5530 return ((const struct __loadu_epi64*)__P)->__v; 5531 } 5532 5533 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5534 _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P) 5535 { 5536 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P, 5537 (__v4di) __W, 5538 (__mmask8) __U); 5539 } 5540 5541 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5542 _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P) 5543 { 5544 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P, 5545 (__v4di) 5546 _mm256_setzero_si256 (), 5547 (__mmask8) __U); 5548 } 5549 5550 static __inline __m128i __DEFAULT_FN_ATTRS128 5551 _mm_loadu_epi32 (void const *__P) 5552 { 5553 struct __loadu_epi32 { 5554 __m128i_u __v; 5555 } __attribute__((__packed__, __may_alias__)); 5556 return ((const struct __loadu_epi32*)__P)->__v; 5557 } 5558 5559 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5560 _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) 5561 { 5562 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P, 5563 (__v4si) __W, 5564 (__mmask8) __U); 5565 } 5566 5567 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5568 _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P) 5569 { 5570 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P, 5571 (__v4si) 5572 _mm_setzero_si128 (), 5573 (__mmask8) __U); 5574 } 5575 5576 static __inline __m256i __DEFAULT_FN_ATTRS256 5577 _mm256_loadu_epi32 (void const *__P) 5578 { 5579 struct __loadu_epi32 { 5580 __m256i_u __v; 5581 } __attribute__((__packed__, __may_alias__)); 5582 return ((const struct __loadu_epi32*)__P)->__v; 5583 } 5584 5585 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5586 _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P) 5587 { 5588 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P, 5589 (__v8si) __W, 5590 (__mmask8) __U); 5591 } 5592 5593 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5594 _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P) 5595 { 5596 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P, 5597 (__v8si) 5598 _mm256_setzero_si256 (), 5599 (__mmask8) __U); 5600 } 5601 5602 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5603 _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P) 5604 { 5605 return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P, 5606 (__v2df) __W, 5607 (__mmask8) __U); 5608 } 5609 5610 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5611 _mm_maskz_loadu_pd (__mmask8 __U, void const *__P) 5612 { 5613 return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P, 5614 (__v2df) 5615 _mm_setzero_pd (), 5616 (__mmask8) __U); 5617 } 5618 5619 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5620 _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P) 5621 { 5622 return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P, 5623 (__v4df) __W, 5624 (__mmask8) __U); 5625 } 5626 5627 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5628 _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P) 5629 { 5630 return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P, 5631 (__v4df) 5632 _mm256_setzero_pd (), 5633 (__mmask8) __U); 5634 } 5635 5636 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5637 _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P) 5638 { 5639 return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P, 5640 (__v4sf) __W, 5641 (__mmask8) __U); 5642 } 5643 5644 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5645 _mm_maskz_loadu_ps (__mmask8 __U, void const *__P) 5646 { 5647 return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P, 5648 (__v4sf) 5649 _mm_setzero_ps (), 5650 (__mmask8) __U); 5651 } 5652 5653 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5654 _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P) 5655 { 5656 return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P, 5657 (__v8sf) __W, 5658 (__mmask8) __U); 5659 } 5660 5661 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5662 _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P) 5663 { 5664 return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P, 5665 (__v8sf) 5666 _mm256_setzero_ps (), 5667 (__mmask8) __U); 5668 } 5669 5670 static __inline__ void __DEFAULT_FN_ATTRS128 5671 _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A) 5672 { 5673 __builtin_ia32_storeapd128_mask ((__v2df *) __P, 5674 (__v2df) __A, 5675 (__mmask8) __U); 5676 } 5677 5678 static __inline__ void __DEFAULT_FN_ATTRS256 5679 _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A) 5680 { 5681 __builtin_ia32_storeapd256_mask ((__v4df *) __P, 5682 (__v4df) __A, 5683 (__mmask8) __U); 5684 } 5685 5686 static __inline__ void __DEFAULT_FN_ATTRS128 5687 _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A) 5688 { 5689 __builtin_ia32_storeaps128_mask ((__v4sf *) __P, 5690 (__v4sf) __A, 5691 (__mmask8) __U); 5692 } 5693 5694 static __inline__ void __DEFAULT_FN_ATTRS256 5695 _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A) 5696 { 5697 __builtin_ia32_storeaps256_mask ((__v8sf *) __P, 5698 (__v8sf) __A, 5699 (__mmask8) __U); 5700 } 5701 5702 static __inline void __DEFAULT_FN_ATTRS128 5703 _mm_storeu_epi64 (void *__P, __m128i __A) 5704 { 5705 struct __storeu_epi64 { 5706 __m128i_u __v; 5707 } __attribute__((__packed__, __may_alias__)); 5708 ((struct __storeu_epi64*)__P)->__v = __A; 5709 } 5710 5711 static __inline__ void __DEFAULT_FN_ATTRS128 5712 _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A) 5713 { 5714 __builtin_ia32_storedqudi128_mask ((__v2di *) __P, 5715 (__v2di) __A, 5716 (__mmask8) __U); 5717 } 5718 5719 static __inline void __DEFAULT_FN_ATTRS256 5720 _mm256_storeu_epi64 (void *__P, __m256i __A) 5721 { 5722 struct __storeu_epi64 { 5723 __m256i_u __v; 5724 } __attribute__((__packed__, __may_alias__)); 5725 ((struct __storeu_epi64*)__P)->__v = __A; 5726 } 5727 5728 static __inline__ void __DEFAULT_FN_ATTRS256 5729 _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A) 5730 { 5731 __builtin_ia32_storedqudi256_mask ((__v4di *) __P, 5732 (__v4di) __A, 5733 (__mmask8) __U); 5734 } 5735 5736 static __inline void __DEFAULT_FN_ATTRS128 5737 _mm_storeu_epi32 (void *__P, __m128i __A) 5738 { 5739 struct __storeu_epi32 { 5740 __m128i_u __v; 5741 } __attribute__((__packed__, __may_alias__)); 5742 ((struct __storeu_epi32*)__P)->__v = __A; 5743 } 5744 5745 static __inline__ void __DEFAULT_FN_ATTRS128 5746 _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A) 5747 { 5748 __builtin_ia32_storedqusi128_mask ((__v4si *) __P, 5749 (__v4si) __A, 5750 (__mmask8) __U); 5751 } 5752 5753 static __inline void __DEFAULT_FN_ATTRS256 5754 _mm256_storeu_epi32 (void *__P, __m256i __A) 5755 { 5756 struct __storeu_epi32 { 5757 __m256i_u __v; 5758 } __attribute__((__packed__, __may_alias__)); 5759 ((struct __storeu_epi32*)__P)->__v = __A; 5760 } 5761 5762 static __inline__ void __DEFAULT_FN_ATTRS256 5763 _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A) 5764 { 5765 __builtin_ia32_storedqusi256_mask ((__v8si *) __P, 5766 (__v8si) __A, 5767 (__mmask8) __U); 5768 } 5769 5770 static __inline__ void __DEFAULT_FN_ATTRS128 5771 _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A) 5772 { 5773 __builtin_ia32_storeupd128_mask ((__v2df *) __P, 5774 (__v2df) __A, 5775 (__mmask8) __U); 5776 } 5777 5778 static __inline__ void __DEFAULT_FN_ATTRS256 5779 _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A) 5780 { 5781 __builtin_ia32_storeupd256_mask ((__v4df *) __P, 5782 (__v4df) __A, 5783 (__mmask8) __U); 5784 } 5785 5786 static __inline__ void __DEFAULT_FN_ATTRS128 5787 _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A) 5788 { 5789 __builtin_ia32_storeups128_mask ((__v4sf *) __P, 5790 (__v4sf) __A, 5791 (__mmask8) __U); 5792 } 5793 5794 static __inline__ void __DEFAULT_FN_ATTRS256 5795 _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A) 5796 { 5797 __builtin_ia32_storeups256_mask ((__v8sf *) __P, 5798 (__v8sf) __A, 5799 (__mmask8) __U); 5800 } 5801 5802 5803 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5804 _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 5805 { 5806 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5807 (__v2df)_mm_unpackhi_pd(__A, __B), 5808 (__v2df)__W); 5809 } 5810 5811 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5812 _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B) 5813 { 5814 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5815 (__v2df)_mm_unpackhi_pd(__A, __B), 5816 (__v2df)_mm_setzero_pd()); 5817 } 5818 5819 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5820 _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) 5821 { 5822 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5823 (__v4df)_mm256_unpackhi_pd(__A, __B), 5824 (__v4df)__W); 5825 } 5826 5827 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5828 _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) 5829 { 5830 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5831 (__v4df)_mm256_unpackhi_pd(__A, __B), 5832 (__v4df)_mm256_setzero_pd()); 5833 } 5834 5835 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5836 _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 5837 { 5838 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 5839 (__v4sf)_mm_unpackhi_ps(__A, __B), 5840 (__v4sf)__W); 5841 } 5842 5843 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5844 _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B) 5845 { 5846 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 5847 (__v4sf)_mm_unpackhi_ps(__A, __B), 5848 (__v4sf)_mm_setzero_ps()); 5849 } 5850 5851 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5852 _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 5853 { 5854 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 5855 (__v8sf)_mm256_unpackhi_ps(__A, __B), 5856 (__v8sf)__W); 5857 } 5858 5859 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5860 _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B) 5861 { 5862 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 5863 (__v8sf)_mm256_unpackhi_ps(__A, __B), 5864 (__v8sf)_mm256_setzero_ps()); 5865 } 5866 5867 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5868 _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 5869 { 5870 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5871 (__v2df)_mm_unpacklo_pd(__A, __B), 5872 (__v2df)__W); 5873 } 5874 5875 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5876 _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) 5877 { 5878 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5879 (__v2df)_mm_unpacklo_pd(__A, __B), 5880 (__v2df)_mm_setzero_pd()); 5881 } 5882 5883 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5884 _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) 5885 { 5886 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5887 (__v4df)_mm256_unpacklo_pd(__A, __B), 5888 (__v4df)__W); 5889 } 5890 5891 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5892 _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) 5893 { 5894 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5895 (__v4df)_mm256_unpacklo_pd(__A, __B), 5896 (__v4df)_mm256_setzero_pd()); 5897 } 5898 5899 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5900 _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 5901 { 5902 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 5903 (__v4sf)_mm_unpacklo_ps(__A, __B), 5904 (__v4sf)__W); 5905 } 5906 5907 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5908 _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B) 5909 { 5910 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 5911 (__v4sf)_mm_unpacklo_ps(__A, __B), 5912 (__v4sf)_mm_setzero_ps()); 5913 } 5914 5915 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5916 _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 5917 { 5918 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 5919 (__v8sf)_mm256_unpacklo_ps(__A, __B), 5920 (__v8sf)__W); 5921 } 5922 5923 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5924 _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B) 5925 { 5926 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 5927 (__v8sf)_mm256_unpacklo_ps(__A, __B), 5928 (__v8sf)_mm256_setzero_ps()); 5929 } 5930 5931 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5932 _mm_rcp14_pd (__m128d __A) 5933 { 5934 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 5935 (__v2df) 5936 _mm_setzero_pd (), 5937 (__mmask8) -1); 5938 } 5939 5940 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5941 _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A) 5942 { 5943 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 5944 (__v2df) __W, 5945 (__mmask8) __U); 5946 } 5947 5948 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5949 _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A) 5950 { 5951 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 5952 (__v2df) 5953 _mm_setzero_pd (), 5954 (__mmask8) __U); 5955 } 5956 5957 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5958 _mm256_rcp14_pd (__m256d __A) 5959 { 5960 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 5961 (__v4df) 5962 _mm256_setzero_pd (), 5963 (__mmask8) -1); 5964 } 5965 5966 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5967 _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A) 5968 { 5969 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 5970 (__v4df) __W, 5971 (__mmask8) __U); 5972 } 5973 5974 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5975 _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A) 5976 { 5977 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 5978 (__v4df) 5979 _mm256_setzero_pd (), 5980 (__mmask8) __U); 5981 } 5982 5983 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5984 _mm_rcp14_ps (__m128 __A) 5985 { 5986 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 5987 (__v4sf) 5988 _mm_setzero_ps (), 5989 (__mmask8) -1); 5990 } 5991 5992 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5993 _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A) 5994 { 5995 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 5996 (__v4sf) __W, 5997 (__mmask8) __U); 5998 } 5999 6000 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6001 _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A) 6002 { 6003 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 6004 (__v4sf) 6005 _mm_setzero_ps (), 6006 (__mmask8) __U); 6007 } 6008 6009 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6010 _mm256_rcp14_ps (__m256 __A) 6011 { 6012 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 6013 (__v8sf) 6014 _mm256_setzero_ps (), 6015 (__mmask8) -1); 6016 } 6017 6018 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6019 _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A) 6020 { 6021 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 6022 (__v8sf) __W, 6023 (__mmask8) __U); 6024 } 6025 6026 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6027 _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A) 6028 { 6029 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 6030 (__v8sf) 6031 _mm256_setzero_ps (), 6032 (__mmask8) __U); 6033 } 6034 6035 #define _mm_mask_permute_pd(W, U, X, C) \ 6036 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6037 (__v2df)_mm_permute_pd((X), (C)), \ 6038 (__v2df)(__m128d)(W)) 6039 6040 #define _mm_maskz_permute_pd(U, X, C) \ 6041 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6042 (__v2df)_mm_permute_pd((X), (C)), \ 6043 (__v2df)_mm_setzero_pd()) 6044 6045 #define _mm256_mask_permute_pd(W, U, X, C) \ 6046 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6047 (__v4df)_mm256_permute_pd((X), (C)), \ 6048 (__v4df)(__m256d)(W)) 6049 6050 #define _mm256_maskz_permute_pd(U, X, C) \ 6051 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6052 (__v4df)_mm256_permute_pd((X), (C)), \ 6053 (__v4df)_mm256_setzero_pd()) 6054 6055 #define _mm_mask_permute_ps(W, U, X, C) \ 6056 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6057 (__v4sf)_mm_permute_ps((X), (C)), \ 6058 (__v4sf)(__m128)(W)) 6059 6060 #define _mm_maskz_permute_ps(U, X, C) \ 6061 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6062 (__v4sf)_mm_permute_ps((X), (C)), \ 6063 (__v4sf)_mm_setzero_ps()) 6064 6065 #define _mm256_mask_permute_ps(W, U, X, C) \ 6066 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6067 (__v8sf)_mm256_permute_ps((X), (C)), \ 6068 (__v8sf)(__m256)(W)) 6069 6070 #define _mm256_maskz_permute_ps(U, X, C) \ 6071 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6072 (__v8sf)_mm256_permute_ps((X), (C)), \ 6073 (__v8sf)_mm256_setzero_ps()) 6074 6075 static __inline__ __m128d __DEFAULT_FN_ATTRS128 6076 _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) 6077 { 6078 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 6079 (__v2df)_mm_permutevar_pd(__A, __C), 6080 (__v2df)__W); 6081 } 6082 6083 static __inline__ __m128d __DEFAULT_FN_ATTRS128 6084 _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) 6085 { 6086 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 6087 (__v2df)_mm_permutevar_pd(__A, __C), 6088 (__v2df)_mm_setzero_pd()); 6089 } 6090 6091 static __inline__ __m256d __DEFAULT_FN_ATTRS256 6092 _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) 6093 { 6094 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 6095 (__v4df)_mm256_permutevar_pd(__A, __C), 6096 (__v4df)__W); 6097 } 6098 6099 static __inline__ __m256d __DEFAULT_FN_ATTRS256 6100 _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) 6101 { 6102 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 6103 (__v4df)_mm256_permutevar_pd(__A, __C), 6104 (__v4df)_mm256_setzero_pd()); 6105 } 6106 6107 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6108 _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) 6109 { 6110 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 6111 (__v4sf)_mm_permutevar_ps(__A, __C), 6112 (__v4sf)__W); 6113 } 6114 6115 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6116 _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) 6117 { 6118 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 6119 (__v4sf)_mm_permutevar_ps(__A, __C), 6120 (__v4sf)_mm_setzero_ps()); 6121 } 6122 6123 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6124 _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) 6125 { 6126 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 6127 (__v8sf)_mm256_permutevar_ps(__A, __C), 6128 (__v8sf)__W); 6129 } 6130 6131 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6132 _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) 6133 { 6134 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 6135 (__v8sf)_mm256_permutevar_ps(__A, __C), 6136 (__v8sf)_mm256_setzero_ps()); 6137 } 6138 6139 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6140 _mm_test_epi32_mask (__m128i __A, __m128i __B) 6141 { 6142 return _mm_cmpneq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); 6143 } 6144 6145 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6146 _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) 6147 { 6148 return _mm_mask_cmpneq_epi32_mask (__U, _mm_and_si128 (__A, __B), 6149 _mm_setzero_si128()); 6150 } 6151 6152 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6153 _mm256_test_epi32_mask (__m256i __A, __m256i __B) 6154 { 6155 return _mm256_cmpneq_epi32_mask (_mm256_and_si256 (__A, __B), 6156 _mm256_setzero_si256()); 6157 } 6158 6159 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6160 _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) 6161 { 6162 return _mm256_mask_cmpneq_epi32_mask (__U, _mm256_and_si256 (__A, __B), 6163 _mm256_setzero_si256()); 6164 } 6165 6166 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6167 _mm_test_epi64_mask (__m128i __A, __m128i __B) 6168 { 6169 return _mm_cmpneq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); 6170 } 6171 6172 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6173 _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) 6174 { 6175 return _mm_mask_cmpneq_epi64_mask (__U, _mm_and_si128 (__A, __B), 6176 _mm_setzero_si128()); 6177 } 6178 6179 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6180 _mm256_test_epi64_mask (__m256i __A, __m256i __B) 6181 { 6182 return _mm256_cmpneq_epi64_mask (_mm256_and_si256 (__A, __B), 6183 _mm256_setzero_si256()); 6184 } 6185 6186 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6187 _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) 6188 { 6189 return _mm256_mask_cmpneq_epi64_mask (__U, _mm256_and_si256 (__A, __B), 6190 _mm256_setzero_si256()); 6191 } 6192 6193 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6194 _mm_testn_epi32_mask (__m128i __A, __m128i __B) 6195 { 6196 return _mm_cmpeq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); 6197 } 6198 6199 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6200 _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) 6201 { 6202 return _mm_mask_cmpeq_epi32_mask (__U, _mm_and_si128 (__A, __B), 6203 _mm_setzero_si128()); 6204 } 6205 6206 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6207 _mm256_testn_epi32_mask (__m256i __A, __m256i __B) 6208 { 6209 return _mm256_cmpeq_epi32_mask (_mm256_and_si256 (__A, __B), 6210 _mm256_setzero_si256()); 6211 } 6212 6213 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6214 _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) 6215 { 6216 return _mm256_mask_cmpeq_epi32_mask (__U, _mm256_and_si256 (__A, __B), 6217 _mm256_setzero_si256()); 6218 } 6219 6220 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6221 _mm_testn_epi64_mask (__m128i __A, __m128i __B) 6222 { 6223 return _mm_cmpeq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); 6224 } 6225 6226 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6227 _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) 6228 { 6229 return _mm_mask_cmpeq_epi64_mask (__U, _mm_and_si128 (__A, __B), 6230 _mm_setzero_si128()); 6231 } 6232 6233 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6234 _mm256_testn_epi64_mask (__m256i __A, __m256i __B) 6235 { 6236 return _mm256_cmpeq_epi64_mask (_mm256_and_si256 (__A, __B), 6237 _mm256_setzero_si256()); 6238 } 6239 6240 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6241 _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) 6242 { 6243 return _mm256_mask_cmpeq_epi64_mask (__U, _mm256_and_si256 (__A, __B), 6244 _mm256_setzero_si256()); 6245 } 6246 6247 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6248 _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6249 { 6250 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6251 (__v4si)_mm_unpackhi_epi32(__A, __B), 6252 (__v4si)__W); 6253 } 6254 6255 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6256 _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) 6257 { 6258 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6259 (__v4si)_mm_unpackhi_epi32(__A, __B), 6260 (__v4si)_mm_setzero_si128()); 6261 } 6262 6263 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6264 _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6265 { 6266 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6267 (__v8si)_mm256_unpackhi_epi32(__A, __B), 6268 (__v8si)__W); 6269 } 6270 6271 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6272 _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B) 6273 { 6274 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6275 (__v8si)_mm256_unpackhi_epi32(__A, __B), 6276 (__v8si)_mm256_setzero_si256()); 6277 } 6278 6279 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6280 _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6281 { 6282 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6283 (__v2di)_mm_unpackhi_epi64(__A, __B), 6284 (__v2di)__W); 6285 } 6286 6287 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6288 _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B) 6289 { 6290 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6291 (__v2di)_mm_unpackhi_epi64(__A, __B), 6292 (__v2di)_mm_setzero_si128()); 6293 } 6294 6295 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6296 _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6297 { 6298 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6299 (__v4di)_mm256_unpackhi_epi64(__A, __B), 6300 (__v4di)__W); 6301 } 6302 6303 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6304 _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B) 6305 { 6306 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6307 (__v4di)_mm256_unpackhi_epi64(__A, __B), 6308 (__v4di)_mm256_setzero_si256()); 6309 } 6310 6311 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6312 _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6313 { 6314 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6315 (__v4si)_mm_unpacklo_epi32(__A, __B), 6316 (__v4si)__W); 6317 } 6318 6319 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6320 _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B) 6321 { 6322 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6323 (__v4si)_mm_unpacklo_epi32(__A, __B), 6324 (__v4si)_mm_setzero_si128()); 6325 } 6326 6327 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6328 _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6329 { 6330 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6331 (__v8si)_mm256_unpacklo_epi32(__A, __B), 6332 (__v8si)__W); 6333 } 6334 6335 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6336 _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B) 6337 { 6338 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6339 (__v8si)_mm256_unpacklo_epi32(__A, __B), 6340 (__v8si)_mm256_setzero_si256()); 6341 } 6342 6343 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6344 _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6345 { 6346 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6347 (__v2di)_mm_unpacklo_epi64(__A, __B), 6348 (__v2di)__W); 6349 } 6350 6351 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6352 _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B) 6353 { 6354 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6355 (__v2di)_mm_unpacklo_epi64(__A, __B), 6356 (__v2di)_mm_setzero_si128()); 6357 } 6358 6359 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6360 _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6361 { 6362 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6363 (__v4di)_mm256_unpacklo_epi64(__A, __B), 6364 (__v4di)__W); 6365 } 6366 6367 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6368 _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B) 6369 { 6370 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6371 (__v4di)_mm256_unpacklo_epi64(__A, __B), 6372 (__v4di)_mm256_setzero_si256()); 6373 } 6374 6375 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6376 _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6377 { 6378 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6379 (__v4si)_mm_sra_epi32(__A, __B), 6380 (__v4si)__W); 6381 } 6382 6383 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6384 _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B) 6385 { 6386 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6387 (__v4si)_mm_sra_epi32(__A, __B), 6388 (__v4si)_mm_setzero_si128()); 6389 } 6390 6391 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6392 _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 6393 { 6394 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6395 (__v8si)_mm256_sra_epi32(__A, __B), 6396 (__v8si)__W); 6397 } 6398 6399 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6400 _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B) 6401 { 6402 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6403 (__v8si)_mm256_sra_epi32(__A, __B), 6404 (__v8si)_mm256_setzero_si256()); 6405 } 6406 6407 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6408 _mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) 6409 { 6410 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6411 (__v4si)_mm_srai_epi32(__A, __B), 6412 (__v4si)__W); 6413 } 6414 6415 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6416 _mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, unsigned int __B) 6417 { 6418 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6419 (__v4si)_mm_srai_epi32(__A, __B), 6420 (__v4si)_mm_setzero_si128()); 6421 } 6422 6423 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6424 _mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) 6425 { 6426 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6427 (__v8si)_mm256_srai_epi32(__A, __B), 6428 (__v8si)__W); 6429 } 6430 6431 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6432 _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, unsigned int __B) 6433 { 6434 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6435 (__v8si)_mm256_srai_epi32(__A, __B), 6436 (__v8si)_mm256_setzero_si256()); 6437 } 6438 6439 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6440 _mm_sra_epi64(__m128i __A, __m128i __B) 6441 { 6442 return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B); 6443 } 6444 6445 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6446 _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6447 { 6448 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 6449 (__v2di)_mm_sra_epi64(__A, __B), \ 6450 (__v2di)__W); 6451 } 6452 6453 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6454 _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B) 6455 { 6456 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 6457 (__v2di)_mm_sra_epi64(__A, __B), \ 6458 (__v2di)_mm_setzero_si128()); 6459 } 6460 6461 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6462 _mm256_sra_epi64(__m256i __A, __m128i __B) 6463 { 6464 return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B); 6465 } 6466 6467 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6468 _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 6469 { 6470 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 6471 (__v4di)_mm256_sra_epi64(__A, __B), \ 6472 (__v4di)__W); 6473 } 6474 6475 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6476 _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B) 6477 { 6478 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 6479 (__v4di)_mm256_sra_epi64(__A, __B), \ 6480 (__v4di)_mm256_setzero_si256()); 6481 } 6482 6483 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6484 _mm_srai_epi64(__m128i __A, unsigned int __imm) 6485 { 6486 return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, __imm); 6487 } 6488 6489 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6490 _mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __imm) 6491 { 6492 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 6493 (__v2di)_mm_srai_epi64(__A, __imm), \ 6494 (__v2di)__W); 6495 } 6496 6497 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6498 _mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, unsigned int __imm) 6499 { 6500 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 6501 (__v2di)_mm_srai_epi64(__A, __imm), \ 6502 (__v2di)_mm_setzero_si128()); 6503 } 6504 6505 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6506 _mm256_srai_epi64(__m256i __A, unsigned int __imm) 6507 { 6508 return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, __imm); 6509 } 6510 6511 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6512 _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, 6513 unsigned int __imm) 6514 { 6515 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 6516 (__v4di)_mm256_srai_epi64(__A, __imm), \ 6517 (__v4di)__W); 6518 } 6519 6520 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6521 _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm) 6522 { 6523 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 6524 (__v4di)_mm256_srai_epi64(__A, __imm), \ 6525 (__v4di)_mm256_setzero_si256()); 6526 } 6527 6528 #define _mm_ternarylogic_epi32(A, B, C, imm) \ 6529 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \ 6530 (__v4si)(__m128i)(B), \ 6531 (__v4si)(__m128i)(C), (int)(imm), \ 6532 (__mmask8)-1) 6533 6534 #define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) \ 6535 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \ 6536 (__v4si)(__m128i)(B), \ 6537 (__v4si)(__m128i)(C), (int)(imm), \ 6538 (__mmask8)(U)) 6539 6540 #define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) \ 6541 (__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \ 6542 (__v4si)(__m128i)(B), \ 6543 (__v4si)(__m128i)(C), (int)(imm), \ 6544 (__mmask8)(U)) 6545 6546 #define _mm256_ternarylogic_epi32(A, B, C, imm) \ 6547 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \ 6548 (__v8si)(__m256i)(B), \ 6549 (__v8si)(__m256i)(C), (int)(imm), \ 6550 (__mmask8)-1) 6551 6552 #define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) \ 6553 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \ 6554 (__v8si)(__m256i)(B), \ 6555 (__v8si)(__m256i)(C), (int)(imm), \ 6556 (__mmask8)(U)) 6557 6558 #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) \ 6559 (__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \ 6560 (__v8si)(__m256i)(B), \ 6561 (__v8si)(__m256i)(C), (int)(imm), \ 6562 (__mmask8)(U)) 6563 6564 #define _mm_ternarylogic_epi64(A, B, C, imm) \ 6565 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \ 6566 (__v2di)(__m128i)(B), \ 6567 (__v2di)(__m128i)(C), (int)(imm), \ 6568 (__mmask8)-1) 6569 6570 #define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) \ 6571 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \ 6572 (__v2di)(__m128i)(B), \ 6573 (__v2di)(__m128i)(C), (int)(imm), \ 6574 (__mmask8)(U)) 6575 6576 #define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) \ 6577 (__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \ 6578 (__v2di)(__m128i)(B), \ 6579 (__v2di)(__m128i)(C), (int)(imm), \ 6580 (__mmask8)(U)) 6581 6582 #define _mm256_ternarylogic_epi64(A, B, C, imm) \ 6583 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \ 6584 (__v4di)(__m256i)(B), \ 6585 (__v4di)(__m256i)(C), (int)(imm), \ 6586 (__mmask8)-1) 6587 6588 #define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) \ 6589 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \ 6590 (__v4di)(__m256i)(B), \ 6591 (__v4di)(__m256i)(C), (int)(imm), \ 6592 (__mmask8)(U)) 6593 6594 #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) \ 6595 (__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \ 6596 (__v4di)(__m256i)(B), \ 6597 (__v4di)(__m256i)(C), (int)(imm), \ 6598 (__mmask8)(U)) 6599 6600 6601 6602 #define _mm256_shuffle_f32x4(A, B, imm) \ 6603 (__m256)__builtin_ia32_shuf_f32x4_256((__v8sf)(__m256)(A), \ 6604 (__v8sf)(__m256)(B), (int)(imm)) 6605 6606 #define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) \ 6607 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6608 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \ 6609 (__v8sf)(__m256)(W)) 6610 6611 #define _mm256_maskz_shuffle_f32x4(U, A, B, imm) \ 6612 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6613 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \ 6614 (__v8sf)_mm256_setzero_ps()) 6615 6616 #define _mm256_shuffle_f64x2(A, B, imm) \ 6617 (__m256d)__builtin_ia32_shuf_f64x2_256((__v4df)(__m256d)(A), \ 6618 (__v4df)(__m256d)(B), (int)(imm)) 6619 6620 #define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) \ 6621 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6622 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \ 6623 (__v4df)(__m256d)(W)) 6624 6625 #define _mm256_maskz_shuffle_f64x2(U, A, B, imm) \ 6626 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6627 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \ 6628 (__v4df)_mm256_setzero_pd()) 6629 6630 #define _mm256_shuffle_i32x4(A, B, imm) \ 6631 (__m256i)__builtin_ia32_shuf_i32x4_256((__v8si)(__m256i)(A), \ 6632 (__v8si)(__m256i)(B), (int)(imm)) 6633 6634 #define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) \ 6635 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 6636 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \ 6637 (__v8si)(__m256i)(W)) 6638 6639 #define _mm256_maskz_shuffle_i32x4(U, A, B, imm) \ 6640 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 6641 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \ 6642 (__v8si)_mm256_setzero_si256()) 6643 6644 #define _mm256_shuffle_i64x2(A, B, imm) \ 6645 (__m256i)__builtin_ia32_shuf_i64x2_256((__v4di)(__m256i)(A), \ 6646 (__v4di)(__m256i)(B), (int)(imm)) 6647 6648 #define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) \ 6649 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 6650 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \ 6651 (__v4di)(__m256i)(W)) 6652 6653 6654 #define _mm256_maskz_shuffle_i64x2(U, A, B, imm) \ 6655 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 6656 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \ 6657 (__v4di)_mm256_setzero_si256()) 6658 6659 #define _mm_mask_shuffle_pd(W, U, A, B, M) \ 6660 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6661 (__v2df)_mm_shuffle_pd((A), (B), (M)), \ 6662 (__v2df)(__m128d)(W)) 6663 6664 #define _mm_maskz_shuffle_pd(U, A, B, M) \ 6665 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6666 (__v2df)_mm_shuffle_pd((A), (B), (M)), \ 6667 (__v2df)_mm_setzero_pd()) 6668 6669 #define _mm256_mask_shuffle_pd(W, U, A, B, M) \ 6670 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6671 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ 6672 (__v4df)(__m256d)(W)) 6673 6674 #define _mm256_maskz_shuffle_pd(U, A, B, M) \ 6675 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6676 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ 6677 (__v4df)_mm256_setzero_pd()) 6678 6679 #define _mm_mask_shuffle_ps(W, U, A, B, M) \ 6680 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6681 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ 6682 (__v4sf)(__m128)(W)) 6683 6684 #define _mm_maskz_shuffle_ps(U, A, B, M) \ 6685 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6686 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ 6687 (__v4sf)_mm_setzero_ps()) 6688 6689 #define _mm256_mask_shuffle_ps(W, U, A, B, M) \ 6690 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6691 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ 6692 (__v8sf)(__m256)(W)) 6693 6694 #define _mm256_maskz_shuffle_ps(U, A, B, M) \ 6695 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6696 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ 6697 (__v8sf)_mm256_setzero_ps()) 6698 6699 static __inline__ __m128d __DEFAULT_FN_ATTRS128 6700 _mm_rsqrt14_pd (__m128d __A) 6701 { 6702 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 6703 (__v2df) 6704 _mm_setzero_pd (), 6705 (__mmask8) -1); 6706 } 6707 6708 static __inline__ __m128d __DEFAULT_FN_ATTRS128 6709 _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A) 6710 { 6711 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 6712 (__v2df) __W, 6713 (__mmask8) __U); 6714 } 6715 6716 static __inline__ __m128d __DEFAULT_FN_ATTRS128 6717 _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A) 6718 { 6719 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 6720 (__v2df) 6721 _mm_setzero_pd (), 6722 (__mmask8) __U); 6723 } 6724 6725 static __inline__ __m256d __DEFAULT_FN_ATTRS256 6726 _mm256_rsqrt14_pd (__m256d __A) 6727 { 6728 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 6729 (__v4df) 6730 _mm256_setzero_pd (), 6731 (__mmask8) -1); 6732 } 6733 6734 static __inline__ __m256d __DEFAULT_FN_ATTRS256 6735 _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A) 6736 { 6737 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 6738 (__v4df) __W, 6739 (__mmask8) __U); 6740 } 6741 6742 static __inline__ __m256d __DEFAULT_FN_ATTRS256 6743 _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A) 6744 { 6745 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 6746 (__v4df) 6747 _mm256_setzero_pd (), 6748 (__mmask8) __U); 6749 } 6750 6751 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6752 _mm_rsqrt14_ps (__m128 __A) 6753 { 6754 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 6755 (__v4sf) 6756 _mm_setzero_ps (), 6757 (__mmask8) -1); 6758 } 6759 6760 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6761 _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A) 6762 { 6763 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 6764 (__v4sf) __W, 6765 (__mmask8) __U); 6766 } 6767 6768 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6769 _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A) 6770 { 6771 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 6772 (__v4sf) 6773 _mm_setzero_ps (), 6774 (__mmask8) __U); 6775 } 6776 6777 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6778 _mm256_rsqrt14_ps (__m256 __A) 6779 { 6780 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 6781 (__v8sf) 6782 _mm256_setzero_ps (), 6783 (__mmask8) -1); 6784 } 6785 6786 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6787 _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A) 6788 { 6789 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 6790 (__v8sf) __W, 6791 (__mmask8) __U); 6792 } 6793 6794 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6795 _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A) 6796 { 6797 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 6798 (__v8sf) 6799 _mm256_setzero_ps (), 6800 (__mmask8) __U); 6801 } 6802 6803 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6804 _mm256_broadcast_f32x4(__m128 __A) 6805 { 6806 return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 6807 0, 1, 2, 3, 0, 1, 2, 3); 6808 } 6809 6810 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6811 _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A) 6812 { 6813 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, 6814 (__v8sf)_mm256_broadcast_f32x4(__A), 6815 (__v8sf)__O); 6816 } 6817 6818 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6819 _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A) 6820 { 6821 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, 6822 (__v8sf)_mm256_broadcast_f32x4(__A), 6823 (__v8sf)_mm256_setzero_ps()); 6824 } 6825 6826 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6827 _mm256_broadcast_i32x4(__m128i __A) 6828 { 6829 return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 6830 0, 1, 2, 3, 0, 1, 2, 3); 6831 } 6832 6833 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6834 _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A) 6835 { 6836 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 6837 (__v8si)_mm256_broadcast_i32x4(__A), 6838 (__v8si)__O); 6839 } 6840 6841 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6842 _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A) 6843 { 6844 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 6845 (__v8si)_mm256_broadcast_i32x4(__A), 6846 (__v8si)_mm256_setzero_si256()); 6847 } 6848 6849 static __inline__ __m256d __DEFAULT_FN_ATTRS256 6850 _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A) 6851 { 6852 return (__m256d)__builtin_ia32_selectpd_256(__M, 6853 (__v4df) _mm256_broadcastsd_pd(__A), 6854 (__v4df) __O); 6855 } 6856 6857 static __inline__ __m256d __DEFAULT_FN_ATTRS256 6858 _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) 6859 { 6860 return (__m256d)__builtin_ia32_selectpd_256(__M, 6861 (__v4df) _mm256_broadcastsd_pd(__A), 6862 (__v4df) _mm256_setzero_pd()); 6863 } 6864 6865 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6866 _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A) 6867 { 6868 return (__m128)__builtin_ia32_selectps_128(__M, 6869 (__v4sf) _mm_broadcastss_ps(__A), 6870 (__v4sf) __O); 6871 } 6872 6873 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6874 _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) 6875 { 6876 return (__m128)__builtin_ia32_selectps_128(__M, 6877 (__v4sf) _mm_broadcastss_ps(__A), 6878 (__v4sf) _mm_setzero_ps()); 6879 } 6880 6881 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6882 _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A) 6883 { 6884 return (__m256)__builtin_ia32_selectps_256(__M, 6885 (__v8sf) _mm256_broadcastss_ps(__A), 6886 (__v8sf) __O); 6887 } 6888 6889 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6890 _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) 6891 { 6892 return (__m256)__builtin_ia32_selectps_256(__M, 6893 (__v8sf) _mm256_broadcastss_ps(__A), 6894 (__v8sf) _mm256_setzero_ps()); 6895 } 6896 6897 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6898 _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 6899 { 6900 return (__m128i)__builtin_ia32_selectd_128(__M, 6901 (__v4si) _mm_broadcastd_epi32(__A), 6902 (__v4si) __O); 6903 } 6904 6905 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6906 _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) 6907 { 6908 return (__m128i)__builtin_ia32_selectd_128(__M, 6909 (__v4si) _mm_broadcastd_epi32(__A), 6910 (__v4si) _mm_setzero_si128()); 6911 } 6912 6913 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6914 _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A) 6915 { 6916 return (__m256i)__builtin_ia32_selectd_256(__M, 6917 (__v8si) _mm256_broadcastd_epi32(__A), 6918 (__v8si) __O); 6919 } 6920 6921 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6922 _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) 6923 { 6924 return (__m256i)__builtin_ia32_selectd_256(__M, 6925 (__v8si) _mm256_broadcastd_epi32(__A), 6926 (__v8si) _mm256_setzero_si256()); 6927 } 6928 6929 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6930 _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A) 6931 { 6932 return (__m128i)__builtin_ia32_selectq_128(__M, 6933 (__v2di) _mm_broadcastq_epi64(__A), 6934 (__v2di) __O); 6935 } 6936 6937 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6938 _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 6939 { 6940 return (__m128i)__builtin_ia32_selectq_128(__M, 6941 (__v2di) _mm_broadcastq_epi64(__A), 6942 (__v2di) _mm_setzero_si128()); 6943 } 6944 6945 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6946 _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A) 6947 { 6948 return (__m256i)__builtin_ia32_selectq_256(__M, 6949 (__v4di) _mm256_broadcastq_epi64(__A), 6950 (__v4di) __O); 6951 } 6952 6953 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6954 _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 6955 { 6956 return (__m256i)__builtin_ia32_selectq_256(__M, 6957 (__v4di) _mm256_broadcastq_epi64(__A), 6958 (__v4di) _mm256_setzero_si256()); 6959 } 6960 6961 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6962 _mm_cvtsepi32_epi8 (__m128i __A) 6963 { 6964 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 6965 (__v16qi)_mm_undefined_si128(), 6966 (__mmask8) -1); 6967 } 6968 6969 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6970 _mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 6971 { 6972 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 6973 (__v16qi) __O, __M); 6974 } 6975 6976 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6977 _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A) 6978 { 6979 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 6980 (__v16qi) _mm_setzero_si128 (), 6981 __M); 6982 } 6983 6984 static __inline__ void __DEFAULT_FN_ATTRS128 6985 _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 6986 { 6987 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 6988 } 6989 6990 static __inline__ __m128i __DEFAULT_FN_ATTRS256 6991 _mm256_cvtsepi32_epi8 (__m256i __A) 6992 { 6993 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 6994 (__v16qi)_mm_undefined_si128(), 6995 (__mmask8) -1); 6996 } 6997 6998 static __inline__ __m128i __DEFAULT_FN_ATTRS256 6999 _mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7000 { 7001 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 7002 (__v16qi) __O, __M); 7003 } 7004 7005 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7006 _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A) 7007 { 7008 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 7009 (__v16qi) _mm_setzero_si128 (), 7010 __M); 7011 } 7012 7013 static __inline__ void __DEFAULT_FN_ATTRS256 7014 _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7015 { 7016 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); 7017 } 7018 7019 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7020 _mm_cvtsepi32_epi16 (__m128i __A) 7021 { 7022 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 7023 (__v8hi)_mm_setzero_si128 (), 7024 (__mmask8) -1); 7025 } 7026 7027 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7028 _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7029 { 7030 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 7031 (__v8hi)__O, 7032 __M); 7033 } 7034 7035 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7036 _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A) 7037 { 7038 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 7039 (__v8hi) _mm_setzero_si128 (), 7040 __M); 7041 } 7042 7043 static __inline__ void __DEFAULT_FN_ATTRS128 7044 _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7045 { 7046 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 7047 } 7048 7049 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7050 _mm256_cvtsepi32_epi16 (__m256i __A) 7051 { 7052 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 7053 (__v8hi)_mm_undefined_si128(), 7054 (__mmask8) -1); 7055 } 7056 7057 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7058 _mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7059 { 7060 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 7061 (__v8hi) __O, __M); 7062 } 7063 7064 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7065 _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A) 7066 { 7067 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 7068 (__v8hi) _mm_setzero_si128 (), 7069 __M); 7070 } 7071 7072 static __inline__ void __DEFAULT_FN_ATTRS256 7073 _mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7074 { 7075 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 7076 } 7077 7078 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7079 _mm_cvtsepi64_epi8 (__m128i __A) 7080 { 7081 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 7082 (__v16qi)_mm_undefined_si128(), 7083 (__mmask8) -1); 7084 } 7085 7086 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7087 _mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7088 { 7089 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 7090 (__v16qi) __O, __M); 7091 } 7092 7093 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7094 _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A) 7095 { 7096 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 7097 (__v16qi) _mm_setzero_si128 (), 7098 __M); 7099 } 7100 7101 static __inline__ void __DEFAULT_FN_ATTRS128 7102 _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7103 { 7104 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 7105 } 7106 7107 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7108 _mm256_cvtsepi64_epi8 (__m256i __A) 7109 { 7110 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 7111 (__v16qi)_mm_undefined_si128(), 7112 (__mmask8) -1); 7113 } 7114 7115 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7116 _mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7117 { 7118 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 7119 (__v16qi) __O, __M); 7120 } 7121 7122 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7123 _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A) 7124 { 7125 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 7126 (__v16qi) _mm_setzero_si128 (), 7127 __M); 7128 } 7129 7130 static __inline__ void __DEFAULT_FN_ATTRS256 7131 _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7132 { 7133 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 7134 } 7135 7136 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7137 _mm_cvtsepi64_epi32 (__m128i __A) 7138 { 7139 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 7140 (__v4si)_mm_undefined_si128(), 7141 (__mmask8) -1); 7142 } 7143 7144 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7145 _mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 7146 { 7147 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 7148 (__v4si) __O, __M); 7149 } 7150 7151 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7152 _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A) 7153 { 7154 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 7155 (__v4si) _mm_setzero_si128 (), 7156 __M); 7157 } 7158 7159 static __inline__ void __DEFAULT_FN_ATTRS128 7160 _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 7161 { 7162 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 7163 } 7164 7165 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7166 _mm256_cvtsepi64_epi32 (__m256i __A) 7167 { 7168 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 7169 (__v4si)_mm_undefined_si128(), 7170 (__mmask8) -1); 7171 } 7172 7173 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7174 _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 7175 { 7176 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 7177 (__v4si)__O, 7178 __M); 7179 } 7180 7181 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7182 _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A) 7183 { 7184 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 7185 (__v4si) _mm_setzero_si128 (), 7186 __M); 7187 } 7188 7189 static __inline__ void __DEFAULT_FN_ATTRS256 7190 _mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 7191 { 7192 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 7193 } 7194 7195 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7196 _mm_cvtsepi64_epi16 (__m128i __A) 7197 { 7198 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 7199 (__v8hi)_mm_undefined_si128(), 7200 (__mmask8) -1); 7201 } 7202 7203 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7204 _mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7205 { 7206 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 7207 (__v8hi) __O, __M); 7208 } 7209 7210 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7211 _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A) 7212 { 7213 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 7214 (__v8hi) _mm_setzero_si128 (), 7215 __M); 7216 } 7217 7218 static __inline__ void __DEFAULT_FN_ATTRS128 7219 _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7220 { 7221 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 7222 } 7223 7224 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7225 _mm256_cvtsepi64_epi16 (__m256i __A) 7226 { 7227 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 7228 (__v8hi)_mm_undefined_si128(), 7229 (__mmask8) -1); 7230 } 7231 7232 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7233 _mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7234 { 7235 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 7236 (__v8hi) __O, __M); 7237 } 7238 7239 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7240 _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A) 7241 { 7242 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 7243 (__v8hi) _mm_setzero_si128 (), 7244 __M); 7245 } 7246 7247 static __inline__ void __DEFAULT_FN_ATTRS256 7248 _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7249 { 7250 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 7251 } 7252 7253 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7254 _mm_cvtusepi32_epi8 (__m128i __A) 7255 { 7256 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 7257 (__v16qi)_mm_undefined_si128(), 7258 (__mmask8) -1); 7259 } 7260 7261 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7262 _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7263 { 7264 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 7265 (__v16qi) __O, 7266 __M); 7267 } 7268 7269 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7270 _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A) 7271 { 7272 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 7273 (__v16qi) _mm_setzero_si128 (), 7274 __M); 7275 } 7276 7277 static __inline__ void __DEFAULT_FN_ATTRS128 7278 _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7279 { 7280 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 7281 } 7282 7283 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7284 _mm256_cvtusepi32_epi8 (__m256i __A) 7285 { 7286 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 7287 (__v16qi)_mm_undefined_si128(), 7288 (__mmask8) -1); 7289 } 7290 7291 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7292 _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7293 { 7294 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 7295 (__v16qi) __O, 7296 __M); 7297 } 7298 7299 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7300 _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A) 7301 { 7302 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 7303 (__v16qi) _mm_setzero_si128 (), 7304 __M); 7305 } 7306 7307 static __inline__ void __DEFAULT_FN_ATTRS256 7308 _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7309 { 7310 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M); 7311 } 7312 7313 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7314 _mm_cvtusepi32_epi16 (__m128i __A) 7315 { 7316 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 7317 (__v8hi)_mm_undefined_si128(), 7318 (__mmask8) -1); 7319 } 7320 7321 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7322 _mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7323 { 7324 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 7325 (__v8hi) __O, __M); 7326 } 7327 7328 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7329 _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A) 7330 { 7331 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 7332 (__v8hi) _mm_setzero_si128 (), 7333 __M); 7334 } 7335 7336 static __inline__ void __DEFAULT_FN_ATTRS128 7337 _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7338 { 7339 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 7340 } 7341 7342 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7343 _mm256_cvtusepi32_epi16 (__m256i __A) 7344 { 7345 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 7346 (__v8hi) _mm_undefined_si128(), 7347 (__mmask8) -1); 7348 } 7349 7350 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7351 _mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7352 { 7353 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 7354 (__v8hi) __O, __M); 7355 } 7356 7357 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7358 _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A) 7359 { 7360 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 7361 (__v8hi) _mm_setzero_si128 (), 7362 __M); 7363 } 7364 7365 static __inline__ void __DEFAULT_FN_ATTRS256 7366 _mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7367 { 7368 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 7369 } 7370 7371 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7372 _mm_cvtusepi64_epi8 (__m128i __A) 7373 { 7374 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 7375 (__v16qi)_mm_undefined_si128(), 7376 (__mmask8) -1); 7377 } 7378 7379 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7380 _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7381 { 7382 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 7383 (__v16qi) __O, 7384 __M); 7385 } 7386 7387 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7388 _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A) 7389 { 7390 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 7391 (__v16qi) _mm_setzero_si128 (), 7392 __M); 7393 } 7394 7395 static __inline__ void __DEFAULT_FN_ATTRS128 7396 _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7397 { 7398 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 7399 } 7400 7401 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7402 _mm256_cvtusepi64_epi8 (__m256i __A) 7403 { 7404 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 7405 (__v16qi)_mm_undefined_si128(), 7406 (__mmask8) -1); 7407 } 7408 7409 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7410 _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7411 { 7412 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 7413 (__v16qi) __O, 7414 __M); 7415 } 7416 7417 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7418 _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A) 7419 { 7420 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 7421 (__v16qi) _mm_setzero_si128 (), 7422 __M); 7423 } 7424 7425 static __inline__ void __DEFAULT_FN_ATTRS256 7426 _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7427 { 7428 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 7429 } 7430 7431 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7432 _mm_cvtusepi64_epi32 (__m128i __A) 7433 { 7434 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 7435 (__v4si)_mm_undefined_si128(), 7436 (__mmask8) -1); 7437 } 7438 7439 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7440 _mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 7441 { 7442 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 7443 (__v4si) __O, __M); 7444 } 7445 7446 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7447 _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A) 7448 { 7449 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 7450 (__v4si) _mm_setzero_si128 (), 7451 __M); 7452 } 7453 7454 static __inline__ void __DEFAULT_FN_ATTRS128 7455 _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 7456 { 7457 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 7458 } 7459 7460 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7461 _mm256_cvtusepi64_epi32 (__m256i __A) 7462 { 7463 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 7464 (__v4si)_mm_undefined_si128(), 7465 (__mmask8) -1); 7466 } 7467 7468 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7469 _mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 7470 { 7471 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 7472 (__v4si) __O, __M); 7473 } 7474 7475 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7476 _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A) 7477 { 7478 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 7479 (__v4si) _mm_setzero_si128 (), 7480 __M); 7481 } 7482 7483 static __inline__ void __DEFAULT_FN_ATTRS256 7484 _mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 7485 { 7486 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 7487 } 7488 7489 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7490 _mm_cvtusepi64_epi16 (__m128i __A) 7491 { 7492 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 7493 (__v8hi)_mm_undefined_si128(), 7494 (__mmask8) -1); 7495 } 7496 7497 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7498 _mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7499 { 7500 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 7501 (__v8hi) __O, __M); 7502 } 7503 7504 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7505 _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A) 7506 { 7507 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 7508 (__v8hi) _mm_setzero_si128 (), 7509 __M); 7510 } 7511 7512 static __inline__ void __DEFAULT_FN_ATTRS128 7513 _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7514 { 7515 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 7516 } 7517 7518 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7519 _mm256_cvtusepi64_epi16 (__m256i __A) 7520 { 7521 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 7522 (__v8hi)_mm_undefined_si128(), 7523 (__mmask8) -1); 7524 } 7525 7526 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7527 _mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7528 { 7529 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 7530 (__v8hi) __O, __M); 7531 } 7532 7533 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7534 _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A) 7535 { 7536 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 7537 (__v8hi) _mm_setzero_si128 (), 7538 __M); 7539 } 7540 7541 static __inline__ void __DEFAULT_FN_ATTRS256 7542 _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7543 { 7544 __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 7545 } 7546 7547 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7548 _mm_cvtepi32_epi8 (__m128i __A) 7549 { 7550 return (__m128i)__builtin_shufflevector( 7551 __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1, 7552 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7); 7553 } 7554 7555 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7556 _mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7557 { 7558 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, 7559 (__v16qi) __O, __M); 7560 } 7561 7562 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7563 _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A) 7564 { 7565 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, 7566 (__v16qi) 7567 _mm_setzero_si128 (), 7568 __M); 7569 } 7570 7571 static __inline__ void __DEFAULT_FN_ATTRS128 7572 _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7573 { 7574 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 7575 } 7576 7577 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7578 _mm256_cvtepi32_epi8 (__m256i __A) 7579 { 7580 return (__m128i)__builtin_shufflevector( 7581 __builtin_convertvector((__v8si)__A, __v8qi), 7582 (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 7583 12, 13, 14, 15); 7584 } 7585 7586 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7587 _mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7588 { 7589 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, 7590 (__v16qi) __O, __M); 7591 } 7592 7593 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7594 _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A) 7595 { 7596 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, 7597 (__v16qi) _mm_setzero_si128 (), 7598 __M); 7599 } 7600 7601 static __inline__ void __DEFAULT_FN_ATTRS256 7602 _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7603 { 7604 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); 7605 } 7606 7607 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7608 _mm_cvtepi32_epi16 (__m128i __A) 7609 { 7610 return (__m128i)__builtin_shufflevector( 7611 __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1, 7612 2, 3, 4, 5, 6, 7); 7613 } 7614 7615 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7616 _mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7617 { 7618 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, 7619 (__v8hi) __O, __M); 7620 } 7621 7622 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7623 _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A) 7624 { 7625 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, 7626 (__v8hi) _mm_setzero_si128 (), 7627 __M); 7628 } 7629 7630 static __inline__ void __DEFAULT_FN_ATTRS128 7631 _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7632 { 7633 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 7634 } 7635 7636 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7637 _mm256_cvtepi32_epi16 (__m256i __A) 7638 { 7639 return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi); 7640 } 7641 7642 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7643 _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7644 { 7645 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, 7646 (__v8hi) __O, __M); 7647 } 7648 7649 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7650 _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A) 7651 { 7652 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, 7653 (__v8hi) _mm_setzero_si128 (), 7654 __M); 7655 } 7656 7657 static __inline__ void __DEFAULT_FN_ATTRS256 7658 _mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7659 { 7660 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 7661 } 7662 7663 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7664 _mm_cvtepi64_epi8 (__m128i __A) 7665 { 7666 return (__m128i)__builtin_shufflevector( 7667 __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3, 7668 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3); 7669 } 7670 7671 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7672 _mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7673 { 7674 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, 7675 (__v16qi) __O, __M); 7676 } 7677 7678 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7679 _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A) 7680 { 7681 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, 7682 (__v16qi) _mm_setzero_si128 (), 7683 __M); 7684 } 7685 7686 static __inline__ void __DEFAULT_FN_ATTRS128 7687 _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7688 { 7689 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 7690 } 7691 7692 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7693 _mm256_cvtepi64_epi8 (__m256i __A) 7694 { 7695 return (__m128i)__builtin_shufflevector( 7696 __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1, 7697 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7); 7698 } 7699 7700 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7701 _mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7702 { 7703 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, 7704 (__v16qi) __O, __M); 7705 } 7706 7707 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7708 _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A) 7709 { 7710 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, 7711 (__v16qi) _mm_setzero_si128 (), 7712 __M); 7713 } 7714 7715 static __inline__ void __DEFAULT_FN_ATTRS256 7716 _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7717 { 7718 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 7719 } 7720 7721 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7722 _mm_cvtepi64_epi32 (__m128i __A) 7723 { 7724 return (__m128i)__builtin_shufflevector( 7725 __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3); 7726 } 7727 7728 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7729 _mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 7730 { 7731 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, 7732 (__v4si) __O, __M); 7733 } 7734 7735 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7736 _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A) 7737 { 7738 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, 7739 (__v4si) _mm_setzero_si128 (), 7740 __M); 7741 } 7742 7743 static __inline__ void __DEFAULT_FN_ATTRS128 7744 _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 7745 { 7746 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 7747 } 7748 7749 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7750 _mm256_cvtepi64_epi32 (__m256i __A) 7751 { 7752 return (__m128i)__builtin_convertvector((__v4di)__A, __v4si); 7753 } 7754 7755 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7756 _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 7757 { 7758 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 7759 (__v4si)_mm256_cvtepi64_epi32(__A), 7760 (__v4si)__O); 7761 } 7762 7763 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7764 _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A) 7765 { 7766 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 7767 (__v4si)_mm256_cvtepi64_epi32(__A), 7768 (__v4si)_mm_setzero_si128()); 7769 } 7770 7771 static __inline__ void __DEFAULT_FN_ATTRS256 7772 _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 7773 { 7774 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 7775 } 7776 7777 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7778 _mm_cvtepi64_epi16 (__m128i __A) 7779 { 7780 return (__m128i)__builtin_shufflevector( 7781 __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3, 7782 3, 3, 3, 3); 7783 } 7784 7785 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7786 _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7787 { 7788 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, 7789 (__v8hi)__O, 7790 __M); 7791 } 7792 7793 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7794 _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A) 7795 { 7796 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, 7797 (__v8hi) _mm_setzero_si128 (), 7798 __M); 7799 } 7800 7801 static __inline__ void __DEFAULT_FN_ATTRS128 7802 _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7803 { 7804 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 7805 } 7806 7807 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7808 _mm256_cvtepi64_epi16 (__m256i __A) 7809 { 7810 return (__m128i)__builtin_shufflevector( 7811 __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1, 7812 2, 3, 4, 5, 6, 7); 7813 } 7814 7815 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7816 _mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7817 { 7818 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, 7819 (__v8hi) __O, __M); 7820 } 7821 7822 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7823 _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A) 7824 { 7825 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, 7826 (__v8hi) _mm_setzero_si128 (), 7827 __M); 7828 } 7829 7830 static __inline__ void __DEFAULT_FN_ATTRS256 7831 _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7832 { 7833 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 7834 } 7835 7836 #define _mm256_extractf32x4_ps(A, imm) \ 7837 (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ 7838 (int)(imm), \ 7839 (__v4sf)_mm_undefined_ps(), \ 7840 (__mmask8)-1) 7841 7842 #define _mm256_mask_extractf32x4_ps(W, U, A, imm) \ 7843 (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ 7844 (int)(imm), \ 7845 (__v4sf)(__m128)(W), \ 7846 (__mmask8)(U)) 7847 7848 #define _mm256_maskz_extractf32x4_ps(U, A, imm) \ 7849 (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ 7850 (int)(imm), \ 7851 (__v4sf)_mm_setzero_ps(), \ 7852 (__mmask8)(U)) 7853 7854 #define _mm256_extracti32x4_epi32(A, imm) \ 7855 (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ 7856 (int)(imm), \ 7857 (__v4si)_mm_undefined_si128(), \ 7858 (__mmask8)-1) 7859 7860 #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \ 7861 (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ 7862 (int)(imm), \ 7863 (__v4si)(__m128i)(W), \ 7864 (__mmask8)(U)) 7865 7866 #define _mm256_maskz_extracti32x4_epi32(U, A, imm) \ 7867 (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ 7868 (int)(imm), \ 7869 (__v4si)_mm_setzero_si128(), \ 7870 (__mmask8)(U)) 7871 7872 #define _mm256_insertf32x4(A, B, imm) \ 7873 (__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \ 7874 (__v4sf)(__m128)(B), (int)(imm)) 7875 7876 #define _mm256_mask_insertf32x4(W, U, A, B, imm) \ 7877 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 7878 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \ 7879 (__v8sf)(__m256)(W)) 7880 7881 #define _mm256_maskz_insertf32x4(U, A, B, imm) \ 7882 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 7883 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \ 7884 (__v8sf)_mm256_setzero_ps()) 7885 7886 #define _mm256_inserti32x4(A, B, imm) \ 7887 (__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \ 7888 (__v4si)(__m128i)(B), (int)(imm)) 7889 7890 #define _mm256_mask_inserti32x4(W, U, A, B, imm) \ 7891 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 7892 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \ 7893 (__v8si)(__m256i)(W)) 7894 7895 #define _mm256_maskz_inserti32x4(U, A, B, imm) \ 7896 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 7897 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \ 7898 (__v8si)_mm256_setzero_si256()) 7899 7900 #define _mm_getmant_pd(A, B, C) \ 7901 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 7902 (int)(((C)<<2) | (B)), \ 7903 (__v2df)_mm_setzero_pd(), \ 7904 (__mmask8)-1) 7905 7906 #define _mm_mask_getmant_pd(W, U, A, B, C) \ 7907 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 7908 (int)(((C)<<2) | (B)), \ 7909 (__v2df)(__m128d)(W), \ 7910 (__mmask8)(U)) 7911 7912 #define _mm_maskz_getmant_pd(U, A, B, C) \ 7913 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 7914 (int)(((C)<<2) | (B)), \ 7915 (__v2df)_mm_setzero_pd(), \ 7916 (__mmask8)(U)) 7917 7918 #define _mm256_getmant_pd(A, B, C) \ 7919 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 7920 (int)(((C)<<2) | (B)), \ 7921 (__v4df)_mm256_setzero_pd(), \ 7922 (__mmask8)-1) 7923 7924 #define _mm256_mask_getmant_pd(W, U, A, B, C) \ 7925 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 7926 (int)(((C)<<2) | (B)), \ 7927 (__v4df)(__m256d)(W), \ 7928 (__mmask8)(U)) 7929 7930 #define _mm256_maskz_getmant_pd(U, A, B, C) \ 7931 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 7932 (int)(((C)<<2) | (B)), \ 7933 (__v4df)_mm256_setzero_pd(), \ 7934 (__mmask8)(U)) 7935 7936 #define _mm_getmant_ps(A, B, C) \ 7937 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 7938 (int)(((C)<<2) | (B)), \ 7939 (__v4sf)_mm_setzero_ps(), \ 7940 (__mmask8)-1) 7941 7942 #define _mm_mask_getmant_ps(W, U, A, B, C) \ 7943 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 7944 (int)(((C)<<2) | (B)), \ 7945 (__v4sf)(__m128)(W), \ 7946 (__mmask8)(U)) 7947 7948 #define _mm_maskz_getmant_ps(U, A, B, C) \ 7949 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 7950 (int)(((C)<<2) | (B)), \ 7951 (__v4sf)_mm_setzero_ps(), \ 7952 (__mmask8)(U)) 7953 7954 #define _mm256_getmant_ps(A, B, C) \ 7955 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 7956 (int)(((C)<<2) | (B)), \ 7957 (__v8sf)_mm256_setzero_ps(), \ 7958 (__mmask8)-1) 7959 7960 #define _mm256_mask_getmant_ps(W, U, A, B, C) \ 7961 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 7962 (int)(((C)<<2) | (B)), \ 7963 (__v8sf)(__m256)(W), \ 7964 (__mmask8)(U)) 7965 7966 #define _mm256_maskz_getmant_ps(U, A, B, C) \ 7967 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 7968 (int)(((C)<<2) | (B)), \ 7969 (__v8sf)_mm256_setzero_ps(), \ 7970 (__mmask8)(U)) 7971 7972 #define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \ 7973 (__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \ 7974 (void const *)(addr), \ 7975 (__v2di)(__m128i)(index), \ 7976 (__mmask8)(mask), (int)(scale)) 7977 7978 #define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \ 7979 (__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \ 7980 (void const *)(addr), \ 7981 (__v2di)(__m128i)(index), \ 7982 (__mmask8)(mask), (int)(scale)) 7983 7984 #define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \ 7985 (__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \ 7986 (void const *)(addr), \ 7987 (__v4di)(__m256i)(index), \ 7988 (__mmask8)(mask), (int)(scale)) 7989 7990 #define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \ 7991 (__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \ 7992 (void const *)(addr), \ 7993 (__v4di)(__m256i)(index), \ 7994 (__mmask8)(mask), (int)(scale)) 7995 7996 #define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \ 7997 (__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \ 7998 (void const *)(addr), \ 7999 (__v2di)(__m128i)(index), \ 8000 (__mmask8)(mask), (int)(scale)) 8001 8002 #define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \ 8003 (__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \ 8004 (void const *)(addr), \ 8005 (__v2di)(__m128i)(index), \ 8006 (__mmask8)(mask), (int)(scale)) 8007 8008 #define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \ 8009 (__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \ 8010 (void const *)(addr), \ 8011 (__v4di)(__m256i)(index), \ 8012 (__mmask8)(mask), (int)(scale)) 8013 8014 #define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \ 8015 (__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \ 8016 (void const *)(addr), \ 8017 (__v4di)(__m256i)(index), \ 8018 (__mmask8)(mask), (int)(scale)) 8019 8020 #define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \ 8021 (__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \ 8022 (void const *)(addr), \ 8023 (__v4si)(__m128i)(index), \ 8024 (__mmask8)(mask), (int)(scale)) 8025 8026 #define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \ 8027 (__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \ 8028 (void const *)(addr), \ 8029 (__v4si)(__m128i)(index), \ 8030 (__mmask8)(mask), (int)(scale)) 8031 8032 #define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \ 8033 (__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \ 8034 (void const *)(addr), \ 8035 (__v4si)(__m128i)(index), \ 8036 (__mmask8)(mask), (int)(scale)) 8037 8038 #define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \ 8039 (__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \ 8040 (void const *)(addr), \ 8041 (__v4si)(__m128i)(index), \ 8042 (__mmask8)(mask), (int)(scale)) 8043 8044 #define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \ 8045 (__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \ 8046 (void const *)(addr), \ 8047 (__v4si)(__m128i)(index), \ 8048 (__mmask8)(mask), (int)(scale)) 8049 8050 #define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \ 8051 (__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \ 8052 (void const *)(addr), \ 8053 (__v4si)(__m128i)(index), \ 8054 (__mmask8)(mask), (int)(scale)) 8055 8056 #define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \ 8057 (__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \ 8058 (void const *)(addr), \ 8059 (__v8si)(__m256i)(index), \ 8060 (__mmask8)(mask), (int)(scale)) 8061 8062 #define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \ 8063 (__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \ 8064 (void const *)(addr), \ 8065 (__v8si)(__m256i)(index), \ 8066 (__mmask8)(mask), (int)(scale)) 8067 8068 #define _mm256_permutex_pd(X, C) \ 8069 (__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(X), (int)(C)) 8070 8071 #define _mm256_mask_permutex_pd(W, U, X, C) \ 8072 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 8073 (__v4df)_mm256_permutex_pd((X), (C)), \ 8074 (__v4df)(__m256d)(W)) 8075 8076 #define _mm256_maskz_permutex_pd(U, X, C) \ 8077 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 8078 (__v4df)_mm256_permutex_pd((X), (C)), \ 8079 (__v4df)_mm256_setzero_pd()) 8080 8081 #define _mm256_permutex_epi64(X, C) \ 8082 (__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C)) 8083 8084 #define _mm256_mask_permutex_epi64(W, U, X, C) \ 8085 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8086 (__v4di)_mm256_permutex_epi64((X), (C)), \ 8087 (__v4di)(__m256i)(W)) 8088 8089 #define _mm256_maskz_permutex_epi64(U, X, C) \ 8090 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8091 (__v4di)_mm256_permutex_epi64((X), (C)), \ 8092 (__v4di)_mm256_setzero_si256()) 8093 8094 static __inline__ __m256d __DEFAULT_FN_ATTRS256 8095 _mm256_permutexvar_pd (__m256i __X, __m256d __Y) 8096 { 8097 return (__m256d)__builtin_ia32_permvardf256((__v4df)__Y, (__v4di)__X); 8098 } 8099 8100 static __inline__ __m256d __DEFAULT_FN_ATTRS256 8101 _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X, 8102 __m256d __Y) 8103 { 8104 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 8105 (__v4df)_mm256_permutexvar_pd(__X, __Y), 8106 (__v4df)__W); 8107 } 8108 8109 static __inline__ __m256d __DEFAULT_FN_ATTRS256 8110 _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y) 8111 { 8112 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 8113 (__v4df)_mm256_permutexvar_pd(__X, __Y), 8114 (__v4df)_mm256_setzero_pd()); 8115 } 8116 8117 static __inline__ __m256i __DEFAULT_FN_ATTRS256 8118 _mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y) 8119 { 8120 return (__m256i)__builtin_ia32_permvardi256((__v4di) __Y, (__v4di) __X); 8121 } 8122 8123 static __inline__ __m256i __DEFAULT_FN_ATTRS256 8124 _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y) 8125 { 8126 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 8127 (__v4di)_mm256_permutexvar_epi64(__X, __Y), 8128 (__v4di)_mm256_setzero_si256()); 8129 } 8130 8131 static __inline__ __m256i __DEFAULT_FN_ATTRS256 8132 _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X, 8133 __m256i __Y) 8134 { 8135 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 8136 (__v4di)_mm256_permutexvar_epi64(__X, __Y), 8137 (__v4di)__W); 8138 } 8139 8140 #define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A)) 8141 8142 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8143 _mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y) 8144 { 8145 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8146 (__v8sf)_mm256_permutexvar_ps(__X, __Y), 8147 (__v8sf)__W); 8148 } 8149 8150 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8151 _mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y) 8152 { 8153 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8154 (__v8sf)_mm256_permutexvar_ps(__X, __Y), 8155 (__v8sf)_mm256_setzero_ps()); 8156 } 8157 8158 #define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A)) 8159 8160 static __inline__ __m256i __DEFAULT_FN_ATTRS256 8161 _mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X, 8162 __m256i __Y) 8163 { 8164 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 8165 (__v8si)_mm256_permutexvar_epi32(__X, __Y), 8166 (__v8si)__W); 8167 } 8168 8169 static __inline__ __m256i __DEFAULT_FN_ATTRS256 8170 _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) 8171 { 8172 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 8173 (__v8si)_mm256_permutexvar_epi32(__X, __Y), 8174 (__v8si)_mm256_setzero_si256()); 8175 } 8176 8177 #define _mm_alignr_epi32(A, B, imm) \ 8178 (__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \ 8179 (__v4si)(__m128i)(B), (int)(imm)) 8180 8181 #define _mm_mask_alignr_epi32(W, U, A, B, imm) \ 8182 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8183 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \ 8184 (__v4si)(__m128i)(W)) 8185 8186 #define _mm_maskz_alignr_epi32(U, A, B, imm) \ 8187 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8188 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \ 8189 (__v4si)_mm_setzero_si128()) 8190 8191 #define _mm256_alignr_epi32(A, B, imm) \ 8192 (__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \ 8193 (__v8si)(__m256i)(B), (int)(imm)) 8194 8195 #define _mm256_mask_alignr_epi32(W, U, A, B, imm) \ 8196 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8197 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \ 8198 (__v8si)(__m256i)(W)) 8199 8200 #define _mm256_maskz_alignr_epi32(U, A, B, imm) \ 8201 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8202 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \ 8203 (__v8si)_mm256_setzero_si256()) 8204 8205 #define _mm_alignr_epi64(A, B, imm) \ 8206 (__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \ 8207 (__v2di)(__m128i)(B), (int)(imm)) 8208 8209 #define _mm_mask_alignr_epi64(W, U, A, B, imm) \ 8210 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 8211 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \ 8212 (__v2di)(__m128i)(W)) 8213 8214 #define _mm_maskz_alignr_epi64(U, A, B, imm) \ 8215 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 8216 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \ 8217 (__v2di)_mm_setzero_si128()) 8218 8219 #define _mm256_alignr_epi64(A, B, imm) \ 8220 (__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \ 8221 (__v4di)(__m256i)(B), (int)(imm)) 8222 8223 #define _mm256_mask_alignr_epi64(W, U, A, B, imm) \ 8224 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8225 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ 8226 (__v4di)(__m256i)(W)) 8227 8228 #define _mm256_maskz_alignr_epi64(U, A, B, imm) \ 8229 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8230 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ 8231 (__v4di)_mm256_setzero_si256()) 8232 8233 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8234 _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A) 8235 { 8236 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8237 (__v4sf)_mm_movehdup_ps(__A), 8238 (__v4sf)__W); 8239 } 8240 8241 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8242 _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A) 8243 { 8244 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8245 (__v4sf)_mm_movehdup_ps(__A), 8246 (__v4sf)_mm_setzero_ps()); 8247 } 8248 8249 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8250 _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A) 8251 { 8252 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8253 (__v8sf)_mm256_movehdup_ps(__A), 8254 (__v8sf)__W); 8255 } 8256 8257 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8258 _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A) 8259 { 8260 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8261 (__v8sf)_mm256_movehdup_ps(__A), 8262 (__v8sf)_mm256_setzero_ps()); 8263 } 8264 8265 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8266 _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A) 8267 { 8268 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8269 (__v4sf)_mm_moveldup_ps(__A), 8270 (__v4sf)__W); 8271 } 8272 8273 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8274 _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A) 8275 { 8276 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8277 (__v4sf)_mm_moveldup_ps(__A), 8278 (__v4sf)_mm_setzero_ps()); 8279 } 8280 8281 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8282 _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A) 8283 { 8284 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8285 (__v8sf)_mm256_moveldup_ps(__A), 8286 (__v8sf)__W); 8287 } 8288 8289 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8290 _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A) 8291 { 8292 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8293 (__v8sf)_mm256_moveldup_ps(__A), 8294 (__v8sf)_mm256_setzero_ps()); 8295 } 8296 8297 #define _mm256_mask_shuffle_epi32(W, U, A, I) \ 8298 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8299 (__v8si)_mm256_shuffle_epi32((A), (I)), \ 8300 (__v8si)(__m256i)(W)) 8301 8302 #define _mm256_maskz_shuffle_epi32(U, A, I) \ 8303 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8304 (__v8si)_mm256_shuffle_epi32((A), (I)), \ 8305 (__v8si)_mm256_setzero_si256()) 8306 8307 #define _mm_mask_shuffle_epi32(W, U, A, I) \ 8308 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8309 (__v4si)_mm_shuffle_epi32((A), (I)), \ 8310 (__v4si)(__m128i)(W)) 8311 8312 #define _mm_maskz_shuffle_epi32(U, A, I) \ 8313 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8314 (__v4si)_mm_shuffle_epi32((A), (I)), \ 8315 (__v4si)_mm_setzero_si128()) 8316 8317 static __inline__ __m128d __DEFAULT_FN_ATTRS128 8318 _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A) 8319 { 8320 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, 8321 (__v2df) __A, 8322 (__v2df) __W); 8323 } 8324 8325 static __inline__ __m128d __DEFAULT_FN_ATTRS128 8326 _mm_maskz_mov_pd (__mmask8 __U, __m128d __A) 8327 { 8328 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, 8329 (__v2df) __A, 8330 (__v2df) _mm_setzero_pd ()); 8331 } 8332 8333 static __inline__ __m256d __DEFAULT_FN_ATTRS256 8334 _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A) 8335 { 8336 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, 8337 (__v4df) __A, 8338 (__v4df) __W); 8339 } 8340 8341 static __inline__ __m256d __DEFAULT_FN_ATTRS256 8342 _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A) 8343 { 8344 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, 8345 (__v4df) __A, 8346 (__v4df) _mm256_setzero_pd ()); 8347 } 8348 8349 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8350 _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A) 8351 { 8352 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, 8353 (__v4sf) __A, 8354 (__v4sf) __W); 8355 } 8356 8357 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8358 _mm_maskz_mov_ps (__mmask8 __U, __m128 __A) 8359 { 8360 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, 8361 (__v4sf) __A, 8362 (__v4sf) _mm_setzero_ps ()); 8363 } 8364 8365 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8366 _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A) 8367 { 8368 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, 8369 (__v8sf) __A, 8370 (__v8sf) __W); 8371 } 8372 8373 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8374 _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A) 8375 { 8376 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, 8377 (__v8sf) __A, 8378 (__v8sf) _mm256_setzero_ps ()); 8379 } 8380 8381 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8382 _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A) 8383 { 8384 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A, 8385 (__v4sf) __W, 8386 (__mmask8) __U); 8387 } 8388 8389 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8390 _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A) 8391 { 8392 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A, 8393 (__v4sf) 8394 _mm_setzero_ps (), 8395 (__mmask8) __U); 8396 } 8397 8398 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8399 _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A) 8400 { 8401 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A, 8402 (__v8sf) __W, 8403 (__mmask8) __U); 8404 } 8405 8406 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8407 _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A) 8408 { 8409 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A, 8410 (__v8sf) 8411 _mm256_setzero_ps (), 8412 (__mmask8) __U); 8413 } 8414 8415 #define _mm_mask_cvt_roundps_ph(W, U, A, I) \ 8416 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ 8417 (__v8hi)(__m128i)(W), \ 8418 (__mmask8)(U)) 8419 8420 #define _mm_maskz_cvt_roundps_ph(U, A, I) \ 8421 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ 8422 (__v8hi)_mm_setzero_si128(), \ 8423 (__mmask8)(U)) 8424 8425 #define _mm_mask_cvtps_ph _mm_mask_cvt_roundps_ph 8426 #define _mm_maskz_cvtps_ph _mm_maskz_cvt_roundps_ph 8427 8428 #define _mm256_mask_cvt_roundps_ph(W, U, A, I) \ 8429 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ 8430 (__v8hi)(__m128i)(W), \ 8431 (__mmask8)(U)) 8432 8433 #define _mm256_maskz_cvt_roundps_ph(U, A, I) \ 8434 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ 8435 (__v8hi)_mm_setzero_si128(), \ 8436 (__mmask8)(U)) 8437 8438 #define _mm256_mask_cvtps_ph _mm256_mask_cvt_roundps_ph 8439 #define _mm256_maskz_cvtps_ph _mm256_maskz_cvt_roundps_ph 8440 8441 8442 #undef __DEFAULT_FN_ATTRS128 8443 #undef __DEFAULT_FN_ATTRS256 8444 8445 #endif /* __AVX512VLINTRIN_H */ 8446