1 /*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10 #ifndef __IMMINTRIN_H 11 #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead." 12 #endif 13 14 #ifndef __AVX512VLINTRIN_H 15 #define __AVX512VLINTRIN_H 16 17 #define __DEFAULT_FN_ATTRS128 \ 18 __attribute__((__always_inline__, __nodebug__, \ 19 __target__("avx512vl,no-evex512"), \ 20 __min_vector_width__(128))) 21 #define __DEFAULT_FN_ATTRS256 \ 22 __attribute__((__always_inline__, __nodebug__, \ 23 __target__("avx512vl,no-evex512"), \ 24 __min_vector_width__(256))) 25 26 typedef short __v2hi __attribute__((__vector_size__(4))); 27 typedef char __v4qi __attribute__((__vector_size__(4))); 28 typedef char __v2qi __attribute__((__vector_size__(2))); 29 30 /* Integer compare */ 31 32 #define _mm_cmpeq_epi32_mask(A, B) \ 33 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ) 34 #define _mm_mask_cmpeq_epi32_mask(k, A, B) \ 35 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ) 36 #define _mm_cmpge_epi32_mask(A, B) \ 37 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GE) 38 #define _mm_mask_cmpge_epi32_mask(k, A, B) \ 39 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE) 40 #define _mm_cmpgt_epi32_mask(A, B) \ 41 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GT) 42 #define _mm_mask_cmpgt_epi32_mask(k, A, B) \ 43 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT) 44 #define _mm_cmple_epi32_mask(A, B) \ 45 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LE) 46 #define _mm_mask_cmple_epi32_mask(k, A, B) \ 47 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE) 48 #define _mm_cmplt_epi32_mask(A, B) \ 49 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LT) 50 #define _mm_mask_cmplt_epi32_mask(k, A, B) \ 51 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT) 52 #define _mm_cmpneq_epi32_mask(A, B) \ 53 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_NE) 54 #define _mm_mask_cmpneq_epi32_mask(k, A, B) \ 55 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE) 56 57 #define _mm256_cmpeq_epi32_mask(A, B) \ 58 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ) 59 #define _mm256_mask_cmpeq_epi32_mask(k, A, B) \ 60 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ) 61 #define _mm256_cmpge_epi32_mask(A, B) \ 62 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GE) 63 #define _mm256_mask_cmpge_epi32_mask(k, A, B) \ 64 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE) 65 #define _mm256_cmpgt_epi32_mask(A, B) \ 66 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GT) 67 #define _mm256_mask_cmpgt_epi32_mask(k, A, B) \ 68 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT) 69 #define _mm256_cmple_epi32_mask(A, B) \ 70 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LE) 71 #define _mm256_mask_cmple_epi32_mask(k, A, B) \ 72 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE) 73 #define _mm256_cmplt_epi32_mask(A, B) \ 74 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LT) 75 #define _mm256_mask_cmplt_epi32_mask(k, A, B) \ 76 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT) 77 #define _mm256_cmpneq_epi32_mask(A, B) \ 78 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_NE) 79 #define _mm256_mask_cmpneq_epi32_mask(k, A, B) \ 80 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE) 81 82 #define _mm_cmpeq_epu32_mask(A, B) \ 83 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ) 84 #define _mm_mask_cmpeq_epu32_mask(k, A, B) \ 85 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ) 86 #define _mm_cmpge_epu32_mask(A, B) \ 87 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GE) 88 #define _mm_mask_cmpge_epu32_mask(k, A, B) \ 89 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE) 90 #define _mm_cmpgt_epu32_mask(A, B) \ 91 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GT) 92 #define _mm_mask_cmpgt_epu32_mask(k, A, B) \ 93 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT) 94 #define _mm_cmple_epu32_mask(A, B) \ 95 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LE) 96 #define _mm_mask_cmple_epu32_mask(k, A, B) \ 97 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE) 98 #define _mm_cmplt_epu32_mask(A, B) \ 99 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LT) 100 #define _mm_mask_cmplt_epu32_mask(k, A, B) \ 101 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT) 102 #define _mm_cmpneq_epu32_mask(A, B) \ 103 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_NE) 104 #define _mm_mask_cmpneq_epu32_mask(k, A, B) \ 105 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE) 106 107 #define _mm256_cmpeq_epu32_mask(A, B) \ 108 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ) 109 #define _mm256_mask_cmpeq_epu32_mask(k, A, B) \ 110 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ) 111 #define _mm256_cmpge_epu32_mask(A, B) \ 112 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GE) 113 #define _mm256_mask_cmpge_epu32_mask(k, A, B) \ 114 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE) 115 #define _mm256_cmpgt_epu32_mask(A, B) \ 116 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GT) 117 #define _mm256_mask_cmpgt_epu32_mask(k, A, B) \ 118 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT) 119 #define _mm256_cmple_epu32_mask(A, B) \ 120 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LE) 121 #define _mm256_mask_cmple_epu32_mask(k, A, B) \ 122 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE) 123 #define _mm256_cmplt_epu32_mask(A, B) \ 124 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LT) 125 #define _mm256_mask_cmplt_epu32_mask(k, A, B) \ 126 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT) 127 #define _mm256_cmpneq_epu32_mask(A, B) \ 128 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_NE) 129 #define _mm256_mask_cmpneq_epu32_mask(k, A, B) \ 130 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE) 131 132 #define _mm_cmpeq_epi64_mask(A, B) \ 133 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ) 134 #define _mm_mask_cmpeq_epi64_mask(k, A, B) \ 135 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ) 136 #define _mm_cmpge_epi64_mask(A, B) \ 137 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GE) 138 #define _mm_mask_cmpge_epi64_mask(k, A, B) \ 139 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE) 140 #define _mm_cmpgt_epi64_mask(A, B) \ 141 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GT) 142 #define _mm_mask_cmpgt_epi64_mask(k, A, B) \ 143 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT) 144 #define _mm_cmple_epi64_mask(A, B) \ 145 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LE) 146 #define _mm_mask_cmple_epi64_mask(k, A, B) \ 147 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE) 148 #define _mm_cmplt_epi64_mask(A, B) \ 149 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LT) 150 #define _mm_mask_cmplt_epi64_mask(k, A, B) \ 151 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT) 152 #define _mm_cmpneq_epi64_mask(A, B) \ 153 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_NE) 154 #define _mm_mask_cmpneq_epi64_mask(k, A, B) \ 155 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE) 156 157 #define _mm256_cmpeq_epi64_mask(A, B) \ 158 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ) 159 #define _mm256_mask_cmpeq_epi64_mask(k, A, B) \ 160 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ) 161 #define _mm256_cmpge_epi64_mask(A, B) \ 162 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GE) 163 #define _mm256_mask_cmpge_epi64_mask(k, A, B) \ 164 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE) 165 #define _mm256_cmpgt_epi64_mask(A, B) \ 166 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GT) 167 #define _mm256_mask_cmpgt_epi64_mask(k, A, B) \ 168 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT) 169 #define _mm256_cmple_epi64_mask(A, B) \ 170 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LE) 171 #define _mm256_mask_cmple_epi64_mask(k, A, B) \ 172 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE) 173 #define _mm256_cmplt_epi64_mask(A, B) \ 174 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LT) 175 #define _mm256_mask_cmplt_epi64_mask(k, A, B) \ 176 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT) 177 #define _mm256_cmpneq_epi64_mask(A, B) \ 178 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_NE) 179 #define _mm256_mask_cmpneq_epi64_mask(k, A, B) \ 180 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE) 181 182 #define _mm_cmpeq_epu64_mask(A, B) \ 183 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ) 184 #define _mm_mask_cmpeq_epu64_mask(k, A, B) \ 185 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ) 186 #define _mm_cmpge_epu64_mask(A, B) \ 187 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GE) 188 #define _mm_mask_cmpge_epu64_mask(k, A, B) \ 189 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE) 190 #define _mm_cmpgt_epu64_mask(A, B) \ 191 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GT) 192 #define _mm_mask_cmpgt_epu64_mask(k, A, B) \ 193 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT) 194 #define _mm_cmple_epu64_mask(A, B) \ 195 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LE) 196 #define _mm_mask_cmple_epu64_mask(k, A, B) \ 197 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE) 198 #define _mm_cmplt_epu64_mask(A, B) \ 199 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LT) 200 #define _mm_mask_cmplt_epu64_mask(k, A, B) \ 201 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT) 202 #define _mm_cmpneq_epu64_mask(A, B) \ 203 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_NE) 204 #define _mm_mask_cmpneq_epu64_mask(k, A, B) \ 205 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) 206 207 #define _mm256_cmpeq_epu64_mask(A, B) \ 208 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ) 209 #define _mm256_mask_cmpeq_epu64_mask(k, A, B) \ 210 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ) 211 #define _mm256_cmpge_epu64_mask(A, B) \ 212 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GE) 213 #define _mm256_mask_cmpge_epu64_mask(k, A, B) \ 214 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE) 215 #define _mm256_cmpgt_epu64_mask(A, B) \ 216 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GT) 217 #define _mm256_mask_cmpgt_epu64_mask(k, A, B) \ 218 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT) 219 #define _mm256_cmple_epu64_mask(A, B) \ 220 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LE) 221 #define _mm256_mask_cmple_epu64_mask(k, A, B) \ 222 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE) 223 #define _mm256_cmplt_epu64_mask(A, B) \ 224 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LT) 225 #define _mm256_mask_cmplt_epu64_mask(k, A, B) \ 226 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT) 227 #define _mm256_cmpneq_epu64_mask(A, B) \ 228 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_NE) 229 #define _mm256_mask_cmpneq_epu64_mask(k, A, B) \ 230 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) 231 232 static __inline__ __m256i __DEFAULT_FN_ATTRS256 233 _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 234 { 235 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 236 (__v8si)_mm256_add_epi32(__A, __B), 237 (__v8si)__W); 238 } 239 240 static __inline__ __m256i __DEFAULT_FN_ATTRS256 241 _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B) 242 { 243 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 244 (__v8si)_mm256_add_epi32(__A, __B), 245 (__v8si)_mm256_setzero_si256()); 246 } 247 248 static __inline__ __m256i __DEFAULT_FN_ATTRS256 249 _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 250 { 251 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 252 (__v4di)_mm256_add_epi64(__A, __B), 253 (__v4di)__W); 254 } 255 256 static __inline__ __m256i __DEFAULT_FN_ATTRS256 257 _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B) 258 { 259 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 260 (__v4di)_mm256_add_epi64(__A, __B), 261 (__v4di)_mm256_setzero_si256()); 262 } 263 264 static __inline__ __m256i __DEFAULT_FN_ATTRS256 265 _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 266 { 267 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 268 (__v8si)_mm256_sub_epi32(__A, __B), 269 (__v8si)__W); 270 } 271 272 static __inline__ __m256i __DEFAULT_FN_ATTRS256 273 _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B) 274 { 275 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 276 (__v8si)_mm256_sub_epi32(__A, __B), 277 (__v8si)_mm256_setzero_si256()); 278 } 279 280 static __inline__ __m256i __DEFAULT_FN_ATTRS256 281 _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 282 { 283 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 284 (__v4di)_mm256_sub_epi64(__A, __B), 285 (__v4di)__W); 286 } 287 288 static __inline__ __m256i __DEFAULT_FN_ATTRS256 289 _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B) 290 { 291 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 292 (__v4di)_mm256_sub_epi64(__A, __B), 293 (__v4di)_mm256_setzero_si256()); 294 } 295 296 static __inline__ __m128i __DEFAULT_FN_ATTRS128 297 _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 298 { 299 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 300 (__v4si)_mm_add_epi32(__A, __B), 301 (__v4si)__W); 302 } 303 304 static __inline__ __m128i __DEFAULT_FN_ATTRS128 305 _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B) 306 { 307 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 308 (__v4si)_mm_add_epi32(__A, __B), 309 (__v4si)_mm_setzero_si128()); 310 } 311 312 static __inline__ __m128i __DEFAULT_FN_ATTRS128 313 _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 314 { 315 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 316 (__v2di)_mm_add_epi64(__A, __B), 317 (__v2di)__W); 318 } 319 320 static __inline__ __m128i __DEFAULT_FN_ATTRS128 321 _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B) 322 { 323 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 324 (__v2di)_mm_add_epi64(__A, __B), 325 (__v2di)_mm_setzero_si128()); 326 } 327 328 static __inline__ __m128i __DEFAULT_FN_ATTRS128 329 _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 330 { 331 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 332 (__v4si)_mm_sub_epi32(__A, __B), 333 (__v4si)__W); 334 } 335 336 static __inline__ __m128i __DEFAULT_FN_ATTRS128 337 _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B) 338 { 339 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 340 (__v4si)_mm_sub_epi32(__A, __B), 341 (__v4si)_mm_setzero_si128()); 342 } 343 344 static __inline__ __m128i __DEFAULT_FN_ATTRS128 345 _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 346 { 347 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 348 (__v2di)_mm_sub_epi64(__A, __B), 349 (__v2di)__W); 350 } 351 352 static __inline__ __m128i __DEFAULT_FN_ATTRS128 353 _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B) 354 { 355 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 356 (__v2di)_mm_sub_epi64(__A, __B), 357 (__v2di)_mm_setzero_si128()); 358 } 359 360 static __inline__ __m256i __DEFAULT_FN_ATTRS256 361 _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) 362 { 363 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 364 (__v4di)_mm256_mul_epi32(__X, __Y), 365 (__v4di)__W); 366 } 367 368 static __inline__ __m256i __DEFAULT_FN_ATTRS256 369 _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y) 370 { 371 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 372 (__v4di)_mm256_mul_epi32(__X, __Y), 373 (__v4di)_mm256_setzero_si256()); 374 } 375 376 static __inline__ __m128i __DEFAULT_FN_ATTRS128 377 _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) 378 { 379 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 380 (__v2di)_mm_mul_epi32(__X, __Y), 381 (__v2di)__W); 382 } 383 384 static __inline__ __m128i __DEFAULT_FN_ATTRS128 385 _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y) 386 { 387 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 388 (__v2di)_mm_mul_epi32(__X, __Y), 389 (__v2di)_mm_setzero_si128()); 390 } 391 392 static __inline__ __m256i __DEFAULT_FN_ATTRS256 393 _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) 394 { 395 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 396 (__v4di)_mm256_mul_epu32(__X, __Y), 397 (__v4di)__W); 398 } 399 400 static __inline__ __m256i __DEFAULT_FN_ATTRS256 401 _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y) 402 { 403 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 404 (__v4di)_mm256_mul_epu32(__X, __Y), 405 (__v4di)_mm256_setzero_si256()); 406 } 407 408 static __inline__ __m128i __DEFAULT_FN_ATTRS128 409 _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) 410 { 411 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 412 (__v2di)_mm_mul_epu32(__X, __Y), 413 (__v2di)__W); 414 } 415 416 static __inline__ __m128i __DEFAULT_FN_ATTRS128 417 _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y) 418 { 419 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 420 (__v2di)_mm_mul_epu32(__X, __Y), 421 (__v2di)_mm_setzero_si128()); 422 } 423 424 static __inline__ __m256i __DEFAULT_FN_ATTRS256 425 _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B) 426 { 427 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 428 (__v8si)_mm256_mullo_epi32(__A, __B), 429 (__v8si)_mm256_setzero_si256()); 430 } 431 432 static __inline__ __m256i __DEFAULT_FN_ATTRS256 433 _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) 434 { 435 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 436 (__v8si)_mm256_mullo_epi32(__A, __B), 437 (__v8si)__W); 438 } 439 440 static __inline__ __m128i __DEFAULT_FN_ATTRS128 441 _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B) 442 { 443 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 444 (__v4si)_mm_mullo_epi32(__A, __B), 445 (__v4si)_mm_setzero_si128()); 446 } 447 448 static __inline__ __m128i __DEFAULT_FN_ATTRS128 449 _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) 450 { 451 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 452 (__v4si)_mm_mullo_epi32(__A, __B), 453 (__v4si)__W); 454 } 455 456 static __inline__ __m256i __DEFAULT_FN_ATTRS256 457 _mm256_and_epi32(__m256i __a, __m256i __b) 458 { 459 return (__m256i)((__v8su)__a & (__v8su)__b); 460 } 461 462 static __inline__ __m256i __DEFAULT_FN_ATTRS256 463 _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 464 { 465 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 466 (__v8si)_mm256_and_epi32(__A, __B), 467 (__v8si)__W); 468 } 469 470 static __inline__ __m256i __DEFAULT_FN_ATTRS256 471 _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B) 472 { 473 return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B); 474 } 475 476 static __inline__ __m128i __DEFAULT_FN_ATTRS128 477 _mm_and_epi32(__m128i __a, __m128i __b) 478 { 479 return (__m128i)((__v4su)__a & (__v4su)__b); 480 } 481 482 static __inline__ __m128i __DEFAULT_FN_ATTRS128 483 _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 484 { 485 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 486 (__v4si)_mm_and_epi32(__A, __B), 487 (__v4si)__W); 488 } 489 490 static __inline__ __m128i __DEFAULT_FN_ATTRS128 491 _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B) 492 { 493 return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B); 494 } 495 496 static __inline__ __m256i __DEFAULT_FN_ATTRS256 497 _mm256_andnot_epi32(__m256i __A, __m256i __B) 498 { 499 return (__m256i)(~(__v8su)__A & (__v8su)__B); 500 } 501 502 static __inline__ __m256i __DEFAULT_FN_ATTRS256 503 _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 504 { 505 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 506 (__v8si)_mm256_andnot_epi32(__A, __B), 507 (__v8si)__W); 508 } 509 510 static __inline__ __m256i __DEFAULT_FN_ATTRS256 511 _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B) 512 { 513 return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(), 514 __U, __A, __B); 515 } 516 517 static __inline__ __m128i __DEFAULT_FN_ATTRS128 518 _mm_andnot_epi32(__m128i __A, __m128i __B) 519 { 520 return (__m128i)(~(__v4su)__A & (__v4su)__B); 521 } 522 523 static __inline__ __m128i __DEFAULT_FN_ATTRS128 524 _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 525 { 526 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 527 (__v4si)_mm_andnot_epi32(__A, __B), 528 (__v4si)__W); 529 } 530 531 static __inline__ __m128i __DEFAULT_FN_ATTRS128 532 _mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B) 533 { 534 return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B); 535 } 536 537 static __inline__ __m256i __DEFAULT_FN_ATTRS256 538 _mm256_or_epi32(__m256i __a, __m256i __b) 539 { 540 return (__m256i)((__v8su)__a | (__v8su)__b); 541 } 542 543 static __inline__ __m256i __DEFAULT_FN_ATTRS256 544 _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 545 { 546 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 547 (__v8si)_mm256_or_epi32(__A, __B), 548 (__v8si)__W); 549 } 550 551 static __inline__ __m256i __DEFAULT_FN_ATTRS256 552 _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B) 553 { 554 return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B); 555 } 556 557 static __inline__ __m128i __DEFAULT_FN_ATTRS128 558 _mm_or_epi32(__m128i __a, __m128i __b) 559 { 560 return (__m128i)((__v4su)__a | (__v4su)__b); 561 } 562 563 static __inline__ __m128i __DEFAULT_FN_ATTRS128 564 _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 565 { 566 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 567 (__v4si)_mm_or_epi32(__A, __B), 568 (__v4si)__W); 569 } 570 571 static __inline__ __m128i __DEFAULT_FN_ATTRS128 572 _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B) 573 { 574 return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B); 575 } 576 577 static __inline__ __m256i __DEFAULT_FN_ATTRS256 578 _mm256_xor_epi32(__m256i __a, __m256i __b) 579 { 580 return (__m256i)((__v8su)__a ^ (__v8su)__b); 581 } 582 583 static __inline__ __m256i __DEFAULT_FN_ATTRS256 584 _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 585 { 586 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 587 (__v8si)_mm256_xor_epi32(__A, __B), 588 (__v8si)__W); 589 } 590 591 static __inline__ __m256i __DEFAULT_FN_ATTRS256 592 _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B) 593 { 594 return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B); 595 } 596 597 static __inline__ __m128i __DEFAULT_FN_ATTRS128 598 _mm_xor_epi32(__m128i __a, __m128i __b) 599 { 600 return (__m128i)((__v4su)__a ^ (__v4su)__b); 601 } 602 603 static __inline__ __m128i __DEFAULT_FN_ATTRS128 604 _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 605 { 606 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 607 (__v4si)_mm_xor_epi32(__A, __B), 608 (__v4si)__W); 609 } 610 611 static __inline__ __m128i __DEFAULT_FN_ATTRS128 612 _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B) 613 { 614 return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B); 615 } 616 617 static __inline__ __m256i __DEFAULT_FN_ATTRS256 618 _mm256_and_epi64(__m256i __a, __m256i __b) 619 { 620 return (__m256i)((__v4du)__a & (__v4du)__b); 621 } 622 623 static __inline__ __m256i __DEFAULT_FN_ATTRS256 624 _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 625 { 626 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 627 (__v4di)_mm256_and_epi64(__A, __B), 628 (__v4di)__W); 629 } 630 631 static __inline__ __m256i __DEFAULT_FN_ATTRS256 632 _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B) 633 { 634 return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B); 635 } 636 637 static __inline__ __m128i __DEFAULT_FN_ATTRS128 638 _mm_and_epi64(__m128i __a, __m128i __b) 639 { 640 return (__m128i)((__v2du)__a & (__v2du)__b); 641 } 642 643 static __inline__ __m128i __DEFAULT_FN_ATTRS128 644 _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 645 { 646 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 647 (__v2di)_mm_and_epi64(__A, __B), 648 (__v2di)__W); 649 } 650 651 static __inline__ __m128i __DEFAULT_FN_ATTRS128 652 _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B) 653 { 654 return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B); 655 } 656 657 static __inline__ __m256i __DEFAULT_FN_ATTRS256 658 _mm256_andnot_epi64(__m256i __A, __m256i __B) 659 { 660 return (__m256i)(~(__v4du)__A & (__v4du)__B); 661 } 662 663 static __inline__ __m256i __DEFAULT_FN_ATTRS256 664 _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 665 { 666 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 667 (__v4di)_mm256_andnot_epi64(__A, __B), 668 (__v4di)__W); 669 } 670 671 static __inline__ __m256i __DEFAULT_FN_ATTRS256 672 _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B) 673 { 674 return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(), 675 __U, __A, __B); 676 } 677 678 static __inline__ __m128i __DEFAULT_FN_ATTRS128 679 _mm_andnot_epi64(__m128i __A, __m128i __B) 680 { 681 return (__m128i)(~(__v2du)__A & (__v2du)__B); 682 } 683 684 static __inline__ __m128i __DEFAULT_FN_ATTRS128 685 _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 686 { 687 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 688 (__v2di)_mm_andnot_epi64(__A, __B), 689 (__v2di)__W); 690 } 691 692 static __inline__ __m128i __DEFAULT_FN_ATTRS128 693 _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B) 694 { 695 return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B); 696 } 697 698 static __inline__ __m256i __DEFAULT_FN_ATTRS256 699 _mm256_or_epi64(__m256i __a, __m256i __b) 700 { 701 return (__m256i)((__v4du)__a | (__v4du)__b); 702 } 703 704 static __inline__ __m256i __DEFAULT_FN_ATTRS256 705 _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 706 { 707 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 708 (__v4di)_mm256_or_epi64(__A, __B), 709 (__v4di)__W); 710 } 711 712 static __inline__ __m256i __DEFAULT_FN_ATTRS256 713 _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B) 714 { 715 return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B); 716 } 717 718 static __inline__ __m128i __DEFAULT_FN_ATTRS128 719 _mm_or_epi64(__m128i __a, __m128i __b) 720 { 721 return (__m128i)((__v2du)__a | (__v2du)__b); 722 } 723 724 static __inline__ __m128i __DEFAULT_FN_ATTRS128 725 _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 726 { 727 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 728 (__v2di)_mm_or_epi64(__A, __B), 729 (__v2di)__W); 730 } 731 732 static __inline__ __m128i __DEFAULT_FN_ATTRS128 733 _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B) 734 { 735 return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B); 736 } 737 738 static __inline__ __m256i __DEFAULT_FN_ATTRS256 739 _mm256_xor_epi64(__m256i __a, __m256i __b) 740 { 741 return (__m256i)((__v4du)__a ^ (__v4du)__b); 742 } 743 744 static __inline__ __m256i __DEFAULT_FN_ATTRS256 745 _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 746 { 747 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 748 (__v4di)_mm256_xor_epi64(__A, __B), 749 (__v4di)__W); 750 } 751 752 static __inline__ __m256i __DEFAULT_FN_ATTRS256 753 _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B) 754 { 755 return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B); 756 } 757 758 static __inline__ __m128i __DEFAULT_FN_ATTRS128 759 _mm_xor_epi64(__m128i __a, __m128i __b) 760 { 761 return (__m128i)((__v2du)__a ^ (__v2du)__b); 762 } 763 764 static __inline__ __m128i __DEFAULT_FN_ATTRS128 765 _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A, 766 __m128i __B) 767 { 768 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 769 (__v2di)_mm_xor_epi64(__A, __B), 770 (__v2di)__W); 771 } 772 773 static __inline__ __m128i __DEFAULT_FN_ATTRS128 774 _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B) 775 { 776 return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B); 777 } 778 779 #define _mm_cmp_epi32_mask(a, b, p) \ 780 ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ 781 (__v4si)(__m128i)(b), (int)(p), \ 782 (__mmask8)-1)) 783 784 #define _mm_mask_cmp_epi32_mask(m, a, b, p) \ 785 ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ 786 (__v4si)(__m128i)(b), (int)(p), \ 787 (__mmask8)(m))) 788 789 #define _mm_cmp_epu32_mask(a, b, p) \ 790 ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ 791 (__v4si)(__m128i)(b), (int)(p), \ 792 (__mmask8)-1)) 793 794 #define _mm_mask_cmp_epu32_mask(m, a, b, p) \ 795 ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ 796 (__v4si)(__m128i)(b), (int)(p), \ 797 (__mmask8)(m))) 798 799 #define _mm256_cmp_epi32_mask(a, b, p) \ 800 ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ 801 (__v8si)(__m256i)(b), (int)(p), \ 802 (__mmask8)-1)) 803 804 #define _mm256_mask_cmp_epi32_mask(m, a, b, p) \ 805 ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ 806 (__v8si)(__m256i)(b), (int)(p), \ 807 (__mmask8)(m))) 808 809 #define _mm256_cmp_epu32_mask(a, b, p) \ 810 ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ 811 (__v8si)(__m256i)(b), (int)(p), \ 812 (__mmask8)-1)) 813 814 #define _mm256_mask_cmp_epu32_mask(m, a, b, p) \ 815 ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ 816 (__v8si)(__m256i)(b), (int)(p), \ 817 (__mmask8)(m))) 818 819 #define _mm_cmp_epi64_mask(a, b, p) \ 820 ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ 821 (__v2di)(__m128i)(b), (int)(p), \ 822 (__mmask8)-1)) 823 824 #define _mm_mask_cmp_epi64_mask(m, a, b, p) \ 825 ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ 826 (__v2di)(__m128i)(b), (int)(p), \ 827 (__mmask8)(m))) 828 829 #define _mm_cmp_epu64_mask(a, b, p) \ 830 ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ 831 (__v2di)(__m128i)(b), (int)(p), \ 832 (__mmask8)-1)) 833 834 #define _mm_mask_cmp_epu64_mask(m, a, b, p) \ 835 ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ 836 (__v2di)(__m128i)(b), (int)(p), \ 837 (__mmask8)(m))) 838 839 #define _mm256_cmp_epi64_mask(a, b, p) \ 840 ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ 841 (__v4di)(__m256i)(b), (int)(p), \ 842 (__mmask8)-1)) 843 844 #define _mm256_mask_cmp_epi64_mask(m, a, b, p) \ 845 ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ 846 (__v4di)(__m256i)(b), (int)(p), \ 847 (__mmask8)(m))) 848 849 #define _mm256_cmp_epu64_mask(a, b, p) \ 850 ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ 851 (__v4di)(__m256i)(b), (int)(p), \ 852 (__mmask8)-1)) 853 854 #define _mm256_mask_cmp_epu64_mask(m, a, b, p) \ 855 ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ 856 (__v4di)(__m256i)(b), (int)(p), \ 857 (__mmask8)(m))) 858 859 #define _mm256_cmp_ps_mask(a, b, p) \ 860 ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ 861 (__v8sf)(__m256)(b), (int)(p), \ 862 (__mmask8)-1)) 863 864 #define _mm256_mask_cmp_ps_mask(m, a, b, p) \ 865 ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ 866 (__v8sf)(__m256)(b), (int)(p), \ 867 (__mmask8)(m))) 868 869 #define _mm256_cmp_pd_mask(a, b, p) \ 870 ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ 871 (__v4df)(__m256d)(b), (int)(p), \ 872 (__mmask8)-1)) 873 874 #define _mm256_mask_cmp_pd_mask(m, a, b, p) \ 875 ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ 876 (__v4df)(__m256d)(b), (int)(p), \ 877 (__mmask8)(m))) 878 879 #define _mm_cmp_ps_mask(a, b, p) \ 880 ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ 881 (__v4sf)(__m128)(b), (int)(p), \ 882 (__mmask8)-1)) 883 884 #define _mm_mask_cmp_ps_mask(m, a, b, p) \ 885 ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ 886 (__v4sf)(__m128)(b), (int)(p), \ 887 (__mmask8)(m))) 888 889 #define _mm_cmp_pd_mask(a, b, p) \ 890 ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ 891 (__v2df)(__m128d)(b), (int)(p), \ 892 (__mmask8)-1)) 893 894 #define _mm_mask_cmp_pd_mask(m, a, b, p) \ 895 ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ 896 (__v2df)(__m128d)(b), (int)(p), \ 897 (__mmask8)(m))) 898 899 static __inline__ __m128d __DEFAULT_FN_ATTRS128 900 _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 901 { 902 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 903 __builtin_ia32_vfmaddpd ((__v2df) __A, 904 (__v2df) __B, 905 (__v2df) __C), 906 (__v2df) __A); 907 } 908 909 static __inline__ __m128d __DEFAULT_FN_ATTRS128 910 _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 911 { 912 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 913 __builtin_ia32_vfmaddpd ((__v2df) __A, 914 (__v2df) __B, 915 (__v2df) __C), 916 (__v2df) __C); 917 } 918 919 static __inline__ __m128d __DEFAULT_FN_ATTRS128 920 _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 921 { 922 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 923 __builtin_ia32_vfmaddpd ((__v2df) __A, 924 (__v2df) __B, 925 (__v2df) __C), 926 (__v2df)_mm_setzero_pd()); 927 } 928 929 static __inline__ __m128d __DEFAULT_FN_ATTRS128 930 _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 931 { 932 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 933 __builtin_ia32_vfmaddpd ((__v2df) __A, 934 (__v2df) __B, 935 -(__v2df) __C), 936 (__v2df) __A); 937 } 938 939 static __inline__ __m128d __DEFAULT_FN_ATTRS128 940 _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 941 { 942 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 943 __builtin_ia32_vfmaddpd ((__v2df) __A, 944 (__v2df) __B, 945 -(__v2df) __C), 946 (__v2df)_mm_setzero_pd()); 947 } 948 949 static __inline__ __m128d __DEFAULT_FN_ATTRS128 950 _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 951 { 952 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 953 __builtin_ia32_vfmaddpd (-(__v2df) __A, 954 (__v2df) __B, 955 (__v2df) __C), 956 (__v2df) __C); 957 } 958 959 static __inline__ __m128d __DEFAULT_FN_ATTRS128 960 _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 961 { 962 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 963 __builtin_ia32_vfmaddpd (-(__v2df) __A, 964 (__v2df) __B, 965 (__v2df) __C), 966 (__v2df)_mm_setzero_pd()); 967 } 968 969 static __inline__ __m128d __DEFAULT_FN_ATTRS128 970 _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 971 { 972 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 973 __builtin_ia32_vfmaddpd (-(__v2df) __A, 974 (__v2df) __B, 975 -(__v2df) __C), 976 (__v2df)_mm_setzero_pd()); 977 } 978 979 static __inline__ __m256d __DEFAULT_FN_ATTRS256 980 _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 981 { 982 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 983 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 984 (__v4df) __B, 985 (__v4df) __C), 986 (__v4df) __A); 987 } 988 989 static __inline__ __m256d __DEFAULT_FN_ATTRS256 990 _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 991 { 992 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 993 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 994 (__v4df) __B, 995 (__v4df) __C), 996 (__v4df) __C); 997 } 998 999 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1000 _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1001 { 1002 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1003 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 1004 (__v4df) __B, 1005 (__v4df) __C), 1006 (__v4df)_mm256_setzero_pd()); 1007 } 1008 1009 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1010 _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1011 { 1012 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1013 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 1014 (__v4df) __B, 1015 -(__v4df) __C), 1016 (__v4df) __A); 1017 } 1018 1019 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1020 _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1021 { 1022 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1023 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 1024 (__v4df) __B, 1025 -(__v4df) __C), 1026 (__v4df)_mm256_setzero_pd()); 1027 } 1028 1029 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1030 _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1031 { 1032 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1033 __builtin_ia32_vfmaddpd256 (-(__v4df) __A, 1034 (__v4df) __B, 1035 (__v4df) __C), 1036 (__v4df) __C); 1037 } 1038 1039 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1040 _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1041 { 1042 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1043 __builtin_ia32_vfmaddpd256 (-(__v4df) __A, 1044 (__v4df) __B, 1045 (__v4df) __C), 1046 (__v4df)_mm256_setzero_pd()); 1047 } 1048 1049 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1050 _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1051 { 1052 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1053 __builtin_ia32_vfmaddpd256 (-(__v4df) __A, 1054 (__v4df) __B, 1055 -(__v4df) __C), 1056 (__v4df)_mm256_setzero_pd()); 1057 } 1058 1059 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1060 _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1061 { 1062 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1063 __builtin_ia32_vfmaddps ((__v4sf) __A, 1064 (__v4sf) __B, 1065 (__v4sf) __C), 1066 (__v4sf) __A); 1067 } 1068 1069 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1070 _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1071 { 1072 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1073 __builtin_ia32_vfmaddps ((__v4sf) __A, 1074 (__v4sf) __B, 1075 (__v4sf) __C), 1076 (__v4sf) __C); 1077 } 1078 1079 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1080 _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1081 { 1082 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1083 __builtin_ia32_vfmaddps ((__v4sf) __A, 1084 (__v4sf) __B, 1085 (__v4sf) __C), 1086 (__v4sf)_mm_setzero_ps()); 1087 } 1088 1089 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1090 _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1091 { 1092 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1093 __builtin_ia32_vfmaddps ((__v4sf) __A, 1094 (__v4sf) __B, 1095 -(__v4sf) __C), 1096 (__v4sf) __A); 1097 } 1098 1099 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1100 _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1101 { 1102 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1103 __builtin_ia32_vfmaddps ((__v4sf) __A, 1104 (__v4sf) __B, 1105 -(__v4sf) __C), 1106 (__v4sf)_mm_setzero_ps()); 1107 } 1108 1109 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1110 _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1111 { 1112 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1113 __builtin_ia32_vfmaddps (-(__v4sf) __A, 1114 (__v4sf) __B, 1115 (__v4sf) __C), 1116 (__v4sf) __C); 1117 } 1118 1119 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1120 _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1121 { 1122 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1123 __builtin_ia32_vfmaddps (-(__v4sf) __A, 1124 (__v4sf) __B, 1125 (__v4sf) __C), 1126 (__v4sf)_mm_setzero_ps()); 1127 } 1128 1129 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1130 _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1131 { 1132 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1133 __builtin_ia32_vfmaddps (-(__v4sf) __A, 1134 (__v4sf) __B, 1135 -(__v4sf) __C), 1136 (__v4sf)_mm_setzero_ps()); 1137 } 1138 1139 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1140 _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1141 { 1142 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1143 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1144 (__v8sf) __B, 1145 (__v8sf) __C), 1146 (__v8sf) __A); 1147 } 1148 1149 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1150 _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1151 { 1152 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1153 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1154 (__v8sf) __B, 1155 (__v8sf) __C), 1156 (__v8sf) __C); 1157 } 1158 1159 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1160 _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1161 { 1162 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1163 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1164 (__v8sf) __B, 1165 (__v8sf) __C), 1166 (__v8sf)_mm256_setzero_ps()); 1167 } 1168 1169 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1170 _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1171 { 1172 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1173 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1174 (__v8sf) __B, 1175 -(__v8sf) __C), 1176 (__v8sf) __A); 1177 } 1178 1179 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1180 _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1181 { 1182 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1183 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1184 (__v8sf) __B, 1185 -(__v8sf) __C), 1186 (__v8sf)_mm256_setzero_ps()); 1187 } 1188 1189 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1190 _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1191 { 1192 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1193 __builtin_ia32_vfmaddps256 (-(__v8sf) __A, 1194 (__v8sf) __B, 1195 (__v8sf) __C), 1196 (__v8sf) __C); 1197 } 1198 1199 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1200 _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1201 { 1202 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1203 __builtin_ia32_vfmaddps256 (-(__v8sf) __A, 1204 (__v8sf) __B, 1205 (__v8sf) __C), 1206 (__v8sf)_mm256_setzero_ps()); 1207 } 1208 1209 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1210 _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1211 { 1212 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1213 __builtin_ia32_vfmaddps256 (-(__v8sf) __A, 1214 (__v8sf) __B, 1215 -(__v8sf) __C), 1216 (__v8sf)_mm256_setzero_ps()); 1217 } 1218 1219 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1220 _mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1221 { 1222 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1223 __builtin_ia32_vfmaddsubpd ((__v2df) __A, 1224 (__v2df) __B, 1225 (__v2df) __C), 1226 (__v2df) __A); 1227 } 1228 1229 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1230 _mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1231 { 1232 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1233 __builtin_ia32_vfmaddsubpd ((__v2df) __A, 1234 (__v2df) __B, 1235 (__v2df) __C), 1236 (__v2df) __C); 1237 } 1238 1239 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1240 _mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1241 { 1242 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1243 __builtin_ia32_vfmaddsubpd ((__v2df) __A, 1244 (__v2df) __B, 1245 (__v2df) __C), 1246 (__v2df)_mm_setzero_pd()); 1247 } 1248 1249 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1250 _mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1251 { 1252 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1253 __builtin_ia32_vfmaddsubpd ((__v2df) __A, 1254 (__v2df) __B, 1255 -(__v2df) __C), 1256 (__v2df) __A); 1257 } 1258 1259 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1260 _mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1261 { 1262 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1263 __builtin_ia32_vfmaddsubpd ((__v2df) __A, 1264 (__v2df) __B, 1265 -(__v2df) __C), 1266 (__v2df)_mm_setzero_pd()); 1267 } 1268 1269 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1270 _mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1271 { 1272 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1273 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 1274 (__v4df) __B, 1275 (__v4df) __C), 1276 (__v4df) __A); 1277 } 1278 1279 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1280 _mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1281 { 1282 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1283 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 1284 (__v4df) __B, 1285 (__v4df) __C), 1286 (__v4df) __C); 1287 } 1288 1289 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1290 _mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1291 { 1292 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1293 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 1294 (__v4df) __B, 1295 (__v4df) __C), 1296 (__v4df)_mm256_setzero_pd()); 1297 } 1298 1299 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1300 _mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1301 { 1302 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1303 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 1304 (__v4df) __B, 1305 -(__v4df) __C), 1306 (__v4df) __A); 1307 } 1308 1309 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1310 _mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1311 { 1312 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1313 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 1314 (__v4df) __B, 1315 -(__v4df) __C), 1316 (__v4df)_mm256_setzero_pd()); 1317 } 1318 1319 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1320 _mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1321 { 1322 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1323 __builtin_ia32_vfmaddsubps ((__v4sf) __A, 1324 (__v4sf) __B, 1325 (__v4sf) __C), 1326 (__v4sf) __A); 1327 } 1328 1329 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1330 _mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1331 { 1332 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1333 __builtin_ia32_vfmaddsubps ((__v4sf) __A, 1334 (__v4sf) __B, 1335 (__v4sf) __C), 1336 (__v4sf) __C); 1337 } 1338 1339 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1340 _mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1341 { 1342 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1343 __builtin_ia32_vfmaddsubps ((__v4sf) __A, 1344 (__v4sf) __B, 1345 (__v4sf) __C), 1346 (__v4sf)_mm_setzero_ps()); 1347 } 1348 1349 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1350 _mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1351 { 1352 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1353 __builtin_ia32_vfmaddsubps ((__v4sf) __A, 1354 (__v4sf) __B, 1355 -(__v4sf) __C), 1356 (__v4sf) __A); 1357 } 1358 1359 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1360 _mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1361 { 1362 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1363 __builtin_ia32_vfmaddsubps ((__v4sf) __A, 1364 (__v4sf) __B, 1365 -(__v4sf) __C), 1366 (__v4sf)_mm_setzero_ps()); 1367 } 1368 1369 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1370 _mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, 1371 __m256 __C) 1372 { 1373 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1374 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 1375 (__v8sf) __B, 1376 (__v8sf) __C), 1377 (__v8sf) __A); 1378 } 1379 1380 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1381 _mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1382 { 1383 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1384 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 1385 (__v8sf) __B, 1386 (__v8sf) __C), 1387 (__v8sf) __C); 1388 } 1389 1390 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1391 _mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1392 { 1393 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1394 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 1395 (__v8sf) __B, 1396 (__v8sf) __C), 1397 (__v8sf)_mm256_setzero_ps()); 1398 } 1399 1400 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1401 _mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1402 { 1403 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1404 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 1405 (__v8sf) __B, 1406 -(__v8sf) __C), 1407 (__v8sf) __A); 1408 } 1409 1410 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1411 _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1412 { 1413 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1414 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 1415 (__v8sf) __B, 1416 -(__v8sf) __C), 1417 (__v8sf)_mm256_setzero_ps()); 1418 } 1419 1420 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1421 _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1422 { 1423 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1424 __builtin_ia32_vfmaddpd ((__v2df) __A, 1425 (__v2df) __B, 1426 -(__v2df) __C), 1427 (__v2df) __C); 1428 } 1429 1430 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1431 _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1432 { 1433 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1434 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 1435 (__v4df) __B, 1436 -(__v4df) __C), 1437 (__v4df) __C); 1438 } 1439 1440 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1441 _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1442 { 1443 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1444 __builtin_ia32_vfmaddps ((__v4sf) __A, 1445 (__v4sf) __B, 1446 -(__v4sf) __C), 1447 (__v4sf) __C); 1448 } 1449 1450 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1451 _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1452 { 1453 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1454 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1455 (__v8sf) __B, 1456 -(__v8sf) __C), 1457 (__v8sf) __C); 1458 } 1459 1460 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1461 _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1462 { 1463 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1464 __builtin_ia32_vfmaddsubpd ((__v2df) __A, 1465 (__v2df) __B, 1466 -(__v2df) __C), 1467 (__v2df) __C); 1468 } 1469 1470 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1471 _mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1472 { 1473 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1474 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 1475 (__v4df) __B, 1476 -(__v4df) __C), 1477 (__v4df) __C); 1478 } 1479 1480 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1481 _mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1482 { 1483 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1484 __builtin_ia32_vfmaddsubps ((__v4sf) __A, 1485 (__v4sf) __B, 1486 -(__v4sf) __C), 1487 (__v4sf) __C); 1488 } 1489 1490 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1491 _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1492 { 1493 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1494 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 1495 (__v8sf) __B, 1496 -(__v8sf) __C), 1497 (__v8sf) __C); 1498 } 1499 1500 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1501 _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1502 { 1503 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1504 __builtin_ia32_vfmaddpd ((__v2df) __A, 1505 -(__v2df) __B, 1506 (__v2df) __C), 1507 (__v2df) __A); 1508 } 1509 1510 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1511 _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1512 { 1513 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1514 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 1515 -(__v4df) __B, 1516 (__v4df) __C), 1517 (__v4df) __A); 1518 } 1519 1520 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1521 _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1522 { 1523 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1524 __builtin_ia32_vfmaddps ((__v4sf) __A, 1525 -(__v4sf) __B, 1526 (__v4sf) __C), 1527 (__v4sf) __A); 1528 } 1529 1530 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1531 _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1532 { 1533 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1534 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1535 -(__v8sf) __B, 1536 (__v8sf) __C), 1537 (__v8sf) __A); 1538 } 1539 1540 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1541 _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1542 { 1543 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1544 __builtin_ia32_vfmaddpd ((__v2df) __A, 1545 -(__v2df) __B, 1546 -(__v2df) __C), 1547 (__v2df) __A); 1548 } 1549 1550 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1551 _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1552 { 1553 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1554 __builtin_ia32_vfmaddpd ((__v2df) __A, 1555 -(__v2df) __B, 1556 -(__v2df) __C), 1557 (__v2df) __C); 1558 } 1559 1560 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1561 _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1562 { 1563 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1564 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 1565 -(__v4df) __B, 1566 -(__v4df) __C), 1567 (__v4df) __A); 1568 } 1569 1570 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1571 _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1572 { 1573 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1574 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 1575 -(__v4df) __B, 1576 -(__v4df) __C), 1577 (__v4df) __C); 1578 } 1579 1580 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1581 _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1582 { 1583 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1584 __builtin_ia32_vfmaddps ((__v4sf) __A, 1585 -(__v4sf) __B, 1586 -(__v4sf) __C), 1587 (__v4sf) __A); 1588 } 1589 1590 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1591 _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1592 { 1593 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1594 __builtin_ia32_vfmaddps ((__v4sf) __A, 1595 -(__v4sf) __B, 1596 -(__v4sf) __C), 1597 (__v4sf) __C); 1598 } 1599 1600 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1601 _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1602 { 1603 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1604 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1605 -(__v8sf) __B, 1606 -(__v8sf) __C), 1607 (__v8sf) __A); 1608 } 1609 1610 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1611 _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1612 { 1613 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1614 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1615 -(__v8sf) __B, 1616 -(__v8sf) __C), 1617 (__v8sf) __C); 1618 } 1619 1620 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1621 _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 1622 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 1623 (__v2df)_mm_add_pd(__A, __B), 1624 (__v2df)__W); 1625 } 1626 1627 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1628 _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) { 1629 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 1630 (__v2df)_mm_add_pd(__A, __B), 1631 (__v2df)_mm_setzero_pd()); 1632 } 1633 1634 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1635 _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 1636 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 1637 (__v4df)_mm256_add_pd(__A, __B), 1638 (__v4df)__W); 1639 } 1640 1641 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1642 _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) { 1643 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 1644 (__v4df)_mm256_add_pd(__A, __B), 1645 (__v4df)_mm256_setzero_pd()); 1646 } 1647 1648 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1649 _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 1650 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 1651 (__v4sf)_mm_add_ps(__A, __B), 1652 (__v4sf)__W); 1653 } 1654 1655 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1656 _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) { 1657 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 1658 (__v4sf)_mm_add_ps(__A, __B), 1659 (__v4sf)_mm_setzero_ps()); 1660 } 1661 1662 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1663 _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 1664 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 1665 (__v8sf)_mm256_add_ps(__A, __B), 1666 (__v8sf)__W); 1667 } 1668 1669 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1670 _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) { 1671 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 1672 (__v8sf)_mm256_add_ps(__A, __B), 1673 (__v8sf)_mm256_setzero_ps()); 1674 } 1675 1676 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1677 _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) { 1678 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, 1679 (__v4si) __W, 1680 (__v4si) __A); 1681 } 1682 1683 static __inline__ __m256i __DEFAULT_FN_ATTRS256 1684 _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) { 1685 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, 1686 (__v8si) __W, 1687 (__v8si) __A); 1688 } 1689 1690 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1691 _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) { 1692 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, 1693 (__v2df) __W, 1694 (__v2df) __A); 1695 } 1696 1697 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1698 _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) { 1699 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, 1700 (__v4df) __W, 1701 (__v4df) __A); 1702 } 1703 1704 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1705 _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) { 1706 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, 1707 (__v4sf) __W, 1708 (__v4sf) __A); 1709 } 1710 1711 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1712 _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) { 1713 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, 1714 (__v8sf) __W, 1715 (__v8sf) __A); 1716 } 1717 1718 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1719 _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) { 1720 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, 1721 (__v2di) __W, 1722 (__v2di) __A); 1723 } 1724 1725 static __inline__ __m256i __DEFAULT_FN_ATTRS256 1726 _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) { 1727 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, 1728 (__v4di) __W, 1729 (__v4di) __A); 1730 } 1731 1732 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1733 _mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) { 1734 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, 1735 (__v2df) __W, 1736 (__mmask8) __U); 1737 } 1738 1739 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1740 _mm_maskz_compress_pd (__mmask8 __U, __m128d __A) { 1741 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, 1742 (__v2df) 1743 _mm_setzero_pd (), 1744 (__mmask8) __U); 1745 } 1746 1747 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1748 _mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) { 1749 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, 1750 (__v4df) __W, 1751 (__mmask8) __U); 1752 } 1753 1754 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1755 _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) { 1756 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, 1757 (__v4df) 1758 _mm256_setzero_pd (), 1759 (__mmask8) __U); 1760 } 1761 1762 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1763 _mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { 1764 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, 1765 (__v2di) __W, 1766 (__mmask8) __U); 1767 } 1768 1769 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1770 _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) { 1771 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, 1772 (__v2di) 1773 _mm_setzero_si128 (), 1774 (__mmask8) __U); 1775 } 1776 1777 static __inline__ __m256i __DEFAULT_FN_ATTRS256 1778 _mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { 1779 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, 1780 (__v4di) __W, 1781 (__mmask8) __U); 1782 } 1783 1784 static __inline__ __m256i __DEFAULT_FN_ATTRS256 1785 _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) { 1786 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, 1787 (__v4di) 1788 _mm256_setzero_si256 (), 1789 (__mmask8) __U); 1790 } 1791 1792 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1793 _mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) { 1794 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, 1795 (__v4sf) __W, 1796 (__mmask8) __U); 1797 } 1798 1799 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1800 _mm_maskz_compress_ps (__mmask8 __U, __m128 __A) { 1801 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, 1802 (__v4sf) 1803 _mm_setzero_ps (), 1804 (__mmask8) __U); 1805 } 1806 1807 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1808 _mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) { 1809 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, 1810 (__v8sf) __W, 1811 (__mmask8) __U); 1812 } 1813 1814 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1815 _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) { 1816 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, 1817 (__v8sf) 1818 _mm256_setzero_ps (), 1819 (__mmask8) __U); 1820 } 1821 1822 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1823 _mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { 1824 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, 1825 (__v4si) __W, 1826 (__mmask8) __U); 1827 } 1828 1829 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1830 _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) { 1831 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, 1832 (__v4si) 1833 _mm_setzero_si128 (), 1834 (__mmask8) __U); 1835 } 1836 1837 static __inline__ __m256i __DEFAULT_FN_ATTRS256 1838 _mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { 1839 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, 1840 (__v8si) __W, 1841 (__mmask8) __U); 1842 } 1843 1844 static __inline__ __m256i __DEFAULT_FN_ATTRS256 1845 _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) { 1846 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, 1847 (__v8si) 1848 _mm256_setzero_si256 (), 1849 (__mmask8) __U); 1850 } 1851 1852 static __inline__ void __DEFAULT_FN_ATTRS128 1853 _mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) { 1854 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P, 1855 (__v2df) __A, 1856 (__mmask8) __U); 1857 } 1858 1859 static __inline__ void __DEFAULT_FN_ATTRS256 1860 _mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) { 1861 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P, 1862 (__v4df) __A, 1863 (__mmask8) __U); 1864 } 1865 1866 static __inline__ void __DEFAULT_FN_ATTRS128 1867 _mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) { 1868 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P, 1869 (__v2di) __A, 1870 (__mmask8) __U); 1871 } 1872 1873 static __inline__ void __DEFAULT_FN_ATTRS256 1874 _mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) { 1875 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P, 1876 (__v4di) __A, 1877 (__mmask8) __U); 1878 } 1879 1880 static __inline__ void __DEFAULT_FN_ATTRS128 1881 _mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) { 1882 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P, 1883 (__v4sf) __A, 1884 (__mmask8) __U); 1885 } 1886 1887 static __inline__ void __DEFAULT_FN_ATTRS256 1888 _mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) { 1889 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P, 1890 (__v8sf) __A, 1891 (__mmask8) __U); 1892 } 1893 1894 static __inline__ void __DEFAULT_FN_ATTRS128 1895 _mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) { 1896 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P, 1897 (__v4si) __A, 1898 (__mmask8) __U); 1899 } 1900 1901 static __inline__ void __DEFAULT_FN_ATTRS256 1902 _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) { 1903 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P, 1904 (__v8si) __A, 1905 (__mmask8) __U); 1906 } 1907 1908 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1909 _mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) { 1910 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 1911 (__v2df)_mm_cvtepi32_pd(__A), 1912 (__v2df)__W); 1913 } 1914 1915 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1916 _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { 1917 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 1918 (__v2df)_mm_cvtepi32_pd(__A), 1919 (__v2df)_mm_setzero_pd()); 1920 } 1921 1922 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1923 _mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) { 1924 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 1925 (__v4df)_mm256_cvtepi32_pd(__A), 1926 (__v4df)__W); 1927 } 1928 1929 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1930 _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { 1931 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 1932 (__v4df)_mm256_cvtepi32_pd(__A), 1933 (__v4df)_mm256_setzero_pd()); 1934 } 1935 1936 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1937 _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) { 1938 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 1939 (__v4sf)_mm_cvtepi32_ps(__A), 1940 (__v4sf)__W); 1941 } 1942 1943 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1944 _mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A) { 1945 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 1946 (__v4sf)_mm_cvtepi32_ps(__A), 1947 (__v4sf)_mm_setzero_ps()); 1948 } 1949 1950 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1951 _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) { 1952 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 1953 (__v8sf)_mm256_cvtepi32_ps(__A), 1954 (__v8sf)__W); 1955 } 1956 1957 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1958 _mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A) { 1959 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 1960 (__v8sf)_mm256_cvtepi32_ps(__A), 1961 (__v8sf)_mm256_setzero_ps()); 1962 } 1963 1964 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1965 _mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) { 1966 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A, 1967 (__v4si) __W, 1968 (__mmask8) __U); 1969 } 1970 1971 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1972 _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) { 1973 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A, 1974 (__v4si) 1975 _mm_setzero_si128 (), 1976 (__mmask8) __U); 1977 } 1978 1979 static __inline__ __m128i __DEFAULT_FN_ATTRS256 1980 _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) { 1981 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 1982 (__v4si)_mm256_cvtpd_epi32(__A), 1983 (__v4si)__W); 1984 } 1985 1986 static __inline__ __m128i __DEFAULT_FN_ATTRS256 1987 _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) { 1988 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 1989 (__v4si)_mm256_cvtpd_epi32(__A), 1990 (__v4si)_mm_setzero_si128()); 1991 } 1992 1993 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1994 _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) { 1995 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, 1996 (__v4sf) __W, 1997 (__mmask8) __U); 1998 } 1999 2000 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2001 _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) { 2002 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, 2003 (__v4sf) 2004 _mm_setzero_ps (), 2005 (__mmask8) __U); 2006 } 2007 2008 static __inline__ __m128 __DEFAULT_FN_ATTRS256 2009 _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) { 2010 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2011 (__v4sf)_mm256_cvtpd_ps(__A), 2012 (__v4sf)__W); 2013 } 2014 2015 static __inline__ __m128 __DEFAULT_FN_ATTRS256 2016 _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) { 2017 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2018 (__v4sf)_mm256_cvtpd_ps(__A), 2019 (__v4sf)_mm_setzero_ps()); 2020 } 2021 2022 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2023 _mm_cvtpd_epu32 (__m128d __A) { 2024 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 2025 (__v4si) 2026 _mm_setzero_si128 (), 2027 (__mmask8) -1); 2028 } 2029 2030 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2031 _mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) { 2032 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 2033 (__v4si) __W, 2034 (__mmask8) __U); 2035 } 2036 2037 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2038 _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) { 2039 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 2040 (__v4si) 2041 _mm_setzero_si128 (), 2042 (__mmask8) __U); 2043 } 2044 2045 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2046 _mm256_cvtpd_epu32 (__m256d __A) { 2047 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 2048 (__v4si) 2049 _mm_setzero_si128 (), 2050 (__mmask8) -1); 2051 } 2052 2053 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2054 _mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) { 2055 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 2056 (__v4si) __W, 2057 (__mmask8) __U); 2058 } 2059 2060 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2061 _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) { 2062 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 2063 (__v4si) 2064 _mm_setzero_si128 (), 2065 (__mmask8) __U); 2066 } 2067 2068 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2069 _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) { 2070 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2071 (__v4si)_mm_cvtps_epi32(__A), 2072 (__v4si)__W); 2073 } 2074 2075 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2076 _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) { 2077 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2078 (__v4si)_mm_cvtps_epi32(__A), 2079 (__v4si)_mm_setzero_si128()); 2080 } 2081 2082 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2083 _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) { 2084 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2085 (__v8si)_mm256_cvtps_epi32(__A), 2086 (__v8si)__W); 2087 } 2088 2089 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2090 _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) { 2091 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2092 (__v8si)_mm256_cvtps_epi32(__A), 2093 (__v8si)_mm256_setzero_si256()); 2094 } 2095 2096 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2097 _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) { 2098 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2099 (__v2df)_mm_cvtps_pd(__A), 2100 (__v2df)__W); 2101 } 2102 2103 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2104 _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) { 2105 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2106 (__v2df)_mm_cvtps_pd(__A), 2107 (__v2df)_mm_setzero_pd()); 2108 } 2109 2110 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2111 _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) { 2112 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2113 (__v4df)_mm256_cvtps_pd(__A), 2114 (__v4df)__W); 2115 } 2116 2117 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2118 _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) { 2119 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2120 (__v4df)_mm256_cvtps_pd(__A), 2121 (__v4df)_mm256_setzero_pd()); 2122 } 2123 2124 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2125 _mm_cvtps_epu32 (__m128 __A) { 2126 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 2127 (__v4si) 2128 _mm_setzero_si128 (), 2129 (__mmask8) -1); 2130 } 2131 2132 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2133 _mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) { 2134 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 2135 (__v4si) __W, 2136 (__mmask8) __U); 2137 } 2138 2139 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2140 _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) { 2141 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 2142 (__v4si) 2143 _mm_setzero_si128 (), 2144 (__mmask8) __U); 2145 } 2146 2147 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2148 _mm256_cvtps_epu32 (__m256 __A) { 2149 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 2150 (__v8si) 2151 _mm256_setzero_si256 (), 2152 (__mmask8) -1); 2153 } 2154 2155 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2156 _mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) { 2157 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 2158 (__v8si) __W, 2159 (__mmask8) __U); 2160 } 2161 2162 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2163 _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) { 2164 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 2165 (__v8si) 2166 _mm256_setzero_si256 (), 2167 (__mmask8) __U); 2168 } 2169 2170 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2171 _mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) { 2172 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A, 2173 (__v4si) __W, 2174 (__mmask8) __U); 2175 } 2176 2177 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2178 _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) { 2179 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A, 2180 (__v4si) 2181 _mm_setzero_si128 (), 2182 (__mmask8) __U); 2183 } 2184 2185 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2186 _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) { 2187 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2188 (__v4si)_mm256_cvttpd_epi32(__A), 2189 (__v4si)__W); 2190 } 2191 2192 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2193 _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) { 2194 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2195 (__v4si)_mm256_cvttpd_epi32(__A), 2196 (__v4si)_mm_setzero_si128()); 2197 } 2198 2199 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2200 _mm_cvttpd_epu32 (__m128d __A) { 2201 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 2202 (__v4si) 2203 _mm_setzero_si128 (), 2204 (__mmask8) -1); 2205 } 2206 2207 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2208 _mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) { 2209 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 2210 (__v4si) __W, 2211 (__mmask8) __U); 2212 } 2213 2214 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2215 _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) { 2216 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 2217 (__v4si) 2218 _mm_setzero_si128 (), 2219 (__mmask8) __U); 2220 } 2221 2222 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2223 _mm256_cvttpd_epu32 (__m256d __A) { 2224 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 2225 (__v4si) 2226 _mm_setzero_si128 (), 2227 (__mmask8) -1); 2228 } 2229 2230 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2231 _mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) { 2232 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 2233 (__v4si) __W, 2234 (__mmask8) __U); 2235 } 2236 2237 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2238 _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) { 2239 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 2240 (__v4si) 2241 _mm_setzero_si128 (), 2242 (__mmask8) __U); 2243 } 2244 2245 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2246 _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) { 2247 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2248 (__v4si)_mm_cvttps_epi32(__A), 2249 (__v4si)__W); 2250 } 2251 2252 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2253 _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) { 2254 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2255 (__v4si)_mm_cvttps_epi32(__A), 2256 (__v4si)_mm_setzero_si128()); 2257 } 2258 2259 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2260 _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) { 2261 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2262 (__v8si)_mm256_cvttps_epi32(__A), 2263 (__v8si)__W); 2264 } 2265 2266 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2267 _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) { 2268 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2269 (__v8si)_mm256_cvttps_epi32(__A), 2270 (__v8si)_mm256_setzero_si256()); 2271 } 2272 2273 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2274 _mm_cvttps_epu32 (__m128 __A) { 2275 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 2276 (__v4si) 2277 _mm_setzero_si128 (), 2278 (__mmask8) -1); 2279 } 2280 2281 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2282 _mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) { 2283 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 2284 (__v4si) __W, 2285 (__mmask8) __U); 2286 } 2287 2288 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2289 _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) { 2290 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 2291 (__v4si) 2292 _mm_setzero_si128 (), 2293 (__mmask8) __U); 2294 } 2295 2296 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2297 _mm256_cvttps_epu32 (__m256 __A) { 2298 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 2299 (__v8si) 2300 _mm256_setzero_si256 (), 2301 (__mmask8) -1); 2302 } 2303 2304 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2305 _mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) { 2306 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 2307 (__v8si) __W, 2308 (__mmask8) __U); 2309 } 2310 2311 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2312 _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) { 2313 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 2314 (__v8si) 2315 _mm256_setzero_si256 (), 2316 (__mmask8) __U); 2317 } 2318 2319 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2320 _mm_cvtepu32_pd (__m128i __A) { 2321 return (__m128d) __builtin_convertvector( 2322 __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df); 2323 } 2324 2325 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2326 _mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) { 2327 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 2328 (__v2df)_mm_cvtepu32_pd(__A), 2329 (__v2df)__W); 2330 } 2331 2332 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2333 _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { 2334 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 2335 (__v2df)_mm_cvtepu32_pd(__A), 2336 (__v2df)_mm_setzero_pd()); 2337 } 2338 2339 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2340 _mm256_cvtepu32_pd (__m128i __A) { 2341 return (__m256d)__builtin_convertvector((__v4su)__A, __v4df); 2342 } 2343 2344 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2345 _mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) { 2346 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 2347 (__v4df)_mm256_cvtepu32_pd(__A), 2348 (__v4df)__W); 2349 } 2350 2351 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2352 _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { 2353 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 2354 (__v4df)_mm256_cvtepu32_pd(__A), 2355 (__v4df)_mm256_setzero_pd()); 2356 } 2357 2358 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2359 _mm_cvtepu32_ps (__m128i __A) { 2360 return (__m128)__builtin_convertvector((__v4su)__A, __v4sf); 2361 } 2362 2363 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2364 _mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) { 2365 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2366 (__v4sf)_mm_cvtepu32_ps(__A), 2367 (__v4sf)__W); 2368 } 2369 2370 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2371 _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) { 2372 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2373 (__v4sf)_mm_cvtepu32_ps(__A), 2374 (__v4sf)_mm_setzero_ps()); 2375 } 2376 2377 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2378 _mm256_cvtepu32_ps (__m256i __A) { 2379 return (__m256)__builtin_convertvector((__v8su)__A, __v8sf); 2380 } 2381 2382 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2383 _mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) { 2384 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2385 (__v8sf)_mm256_cvtepu32_ps(__A), 2386 (__v8sf)__W); 2387 } 2388 2389 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2390 _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) { 2391 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2392 (__v8sf)_mm256_cvtepu32_ps(__A), 2393 (__v8sf)_mm256_setzero_ps()); 2394 } 2395 2396 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2397 _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 2398 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2399 (__v2df)_mm_div_pd(__A, __B), 2400 (__v2df)__W); 2401 } 2402 2403 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2404 _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) { 2405 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2406 (__v2df)_mm_div_pd(__A, __B), 2407 (__v2df)_mm_setzero_pd()); 2408 } 2409 2410 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2411 _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 2412 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2413 (__v4df)_mm256_div_pd(__A, __B), 2414 (__v4df)__W); 2415 } 2416 2417 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2418 _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) { 2419 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2420 (__v4df)_mm256_div_pd(__A, __B), 2421 (__v4df)_mm256_setzero_pd()); 2422 } 2423 2424 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2425 _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 2426 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2427 (__v4sf)_mm_div_ps(__A, __B), 2428 (__v4sf)__W); 2429 } 2430 2431 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2432 _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) { 2433 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2434 (__v4sf)_mm_div_ps(__A, __B), 2435 (__v4sf)_mm_setzero_ps()); 2436 } 2437 2438 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2439 _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 2440 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2441 (__v8sf)_mm256_div_ps(__A, __B), 2442 (__v8sf)__W); 2443 } 2444 2445 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2446 _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) { 2447 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2448 (__v8sf)_mm256_div_ps(__A, __B), 2449 (__v8sf)_mm256_setzero_ps()); 2450 } 2451 2452 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2453 _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) { 2454 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A, 2455 (__v2df) __W, 2456 (__mmask8) __U); 2457 } 2458 2459 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2460 _mm_maskz_expand_pd (__mmask8 __U, __m128d __A) { 2461 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A, 2462 (__v2df) 2463 _mm_setzero_pd (), 2464 (__mmask8) __U); 2465 } 2466 2467 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2468 _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) { 2469 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A, 2470 (__v4df) __W, 2471 (__mmask8) __U); 2472 } 2473 2474 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2475 _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) { 2476 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A, 2477 (__v4df) 2478 _mm256_setzero_pd (), 2479 (__mmask8) __U); 2480 } 2481 2482 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2483 _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { 2484 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A, 2485 (__v2di) __W, 2486 (__mmask8) __U); 2487 } 2488 2489 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2490 _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) { 2491 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A, 2492 (__v2di) 2493 _mm_setzero_si128 (), 2494 (__mmask8) __U); 2495 } 2496 2497 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2498 _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { 2499 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A, 2500 (__v4di) __W, 2501 (__mmask8) __U); 2502 } 2503 2504 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2505 _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) { 2506 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A, 2507 (__v4di) 2508 _mm256_setzero_si256 (), 2509 (__mmask8) __U); 2510 } 2511 2512 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2513 _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) { 2514 return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P, 2515 (__v2df) __W, 2516 (__mmask8) 2517 __U); 2518 } 2519 2520 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2521 _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { 2522 return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P, 2523 (__v2df) 2524 _mm_setzero_pd (), 2525 (__mmask8) 2526 __U); 2527 } 2528 2529 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2530 _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) { 2531 return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P, 2532 (__v4df) __W, 2533 (__mmask8) 2534 __U); 2535 } 2536 2537 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2538 _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { 2539 return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P, 2540 (__v4df) 2541 _mm256_setzero_pd (), 2542 (__mmask8) 2543 __U); 2544 } 2545 2546 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2547 _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) { 2548 return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P, 2549 (__v2di) __W, 2550 (__mmask8) 2551 __U); 2552 } 2553 2554 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2555 _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { 2556 return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P, 2557 (__v2di) 2558 _mm_setzero_si128 (), 2559 (__mmask8) 2560 __U); 2561 } 2562 2563 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2564 _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U, 2565 void const *__P) { 2566 return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P, 2567 (__v4di) __W, 2568 (__mmask8) 2569 __U); 2570 } 2571 2572 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2573 _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { 2574 return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P, 2575 (__v4di) 2576 _mm256_setzero_si256 (), 2577 (__mmask8) 2578 __U); 2579 } 2580 2581 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2582 _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) { 2583 return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P, 2584 (__v4sf) __W, 2585 (__mmask8) __U); 2586 } 2587 2588 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2589 _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { 2590 return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P, 2591 (__v4sf) 2592 _mm_setzero_ps (), 2593 (__mmask8) 2594 __U); 2595 } 2596 2597 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2598 _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) { 2599 return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P, 2600 (__v8sf) __W, 2601 (__mmask8) __U); 2602 } 2603 2604 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2605 _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { 2606 return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P, 2607 (__v8sf) 2608 _mm256_setzero_ps (), 2609 (__mmask8) 2610 __U); 2611 } 2612 2613 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2614 _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) { 2615 return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P, 2616 (__v4si) __W, 2617 (__mmask8) 2618 __U); 2619 } 2620 2621 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2622 _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { 2623 return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P, 2624 (__v4si) 2625 _mm_setzero_si128 (), 2626 (__mmask8) __U); 2627 } 2628 2629 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2630 _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U, 2631 void const *__P) { 2632 return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P, 2633 (__v8si) __W, 2634 (__mmask8) 2635 __U); 2636 } 2637 2638 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2639 _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { 2640 return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P, 2641 (__v8si) 2642 _mm256_setzero_si256 (), 2643 (__mmask8) 2644 __U); 2645 } 2646 2647 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2648 _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) { 2649 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A, 2650 (__v4sf) __W, 2651 (__mmask8) __U); 2652 } 2653 2654 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2655 _mm_maskz_expand_ps (__mmask8 __U, __m128 __A) { 2656 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A, 2657 (__v4sf) 2658 _mm_setzero_ps (), 2659 (__mmask8) __U); 2660 } 2661 2662 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2663 _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) { 2664 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A, 2665 (__v8sf) __W, 2666 (__mmask8) __U); 2667 } 2668 2669 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2670 _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) { 2671 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A, 2672 (__v8sf) 2673 _mm256_setzero_ps (), 2674 (__mmask8) __U); 2675 } 2676 2677 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2678 _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { 2679 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A, 2680 (__v4si) __W, 2681 (__mmask8) __U); 2682 } 2683 2684 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2685 _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) { 2686 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A, 2687 (__v4si) 2688 _mm_setzero_si128 (), 2689 (__mmask8) __U); 2690 } 2691 2692 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2693 _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { 2694 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A, 2695 (__v8si) __W, 2696 (__mmask8) __U); 2697 } 2698 2699 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2700 _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) { 2701 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A, 2702 (__v8si) 2703 _mm256_setzero_si256 (), 2704 (__mmask8) __U); 2705 } 2706 2707 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2708 _mm_getexp_pd (__m128d __A) { 2709 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 2710 (__v2df) 2711 _mm_setzero_pd (), 2712 (__mmask8) -1); 2713 } 2714 2715 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2716 _mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) { 2717 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 2718 (__v2df) __W, 2719 (__mmask8) __U); 2720 } 2721 2722 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2723 _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) { 2724 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 2725 (__v2df) 2726 _mm_setzero_pd (), 2727 (__mmask8) __U); 2728 } 2729 2730 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2731 _mm256_getexp_pd (__m256d __A) { 2732 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 2733 (__v4df) 2734 _mm256_setzero_pd (), 2735 (__mmask8) -1); 2736 } 2737 2738 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2739 _mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) { 2740 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 2741 (__v4df) __W, 2742 (__mmask8) __U); 2743 } 2744 2745 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2746 _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) { 2747 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 2748 (__v4df) 2749 _mm256_setzero_pd (), 2750 (__mmask8) __U); 2751 } 2752 2753 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2754 _mm_getexp_ps (__m128 __A) { 2755 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 2756 (__v4sf) 2757 _mm_setzero_ps (), 2758 (__mmask8) -1); 2759 } 2760 2761 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2762 _mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) { 2763 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 2764 (__v4sf) __W, 2765 (__mmask8) __U); 2766 } 2767 2768 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2769 _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) { 2770 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 2771 (__v4sf) 2772 _mm_setzero_ps (), 2773 (__mmask8) __U); 2774 } 2775 2776 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2777 _mm256_getexp_ps (__m256 __A) { 2778 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 2779 (__v8sf) 2780 _mm256_setzero_ps (), 2781 (__mmask8) -1); 2782 } 2783 2784 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2785 _mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) { 2786 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 2787 (__v8sf) __W, 2788 (__mmask8) __U); 2789 } 2790 2791 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2792 _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) { 2793 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 2794 (__v8sf) 2795 _mm256_setzero_ps (), 2796 (__mmask8) __U); 2797 } 2798 2799 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2800 _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 2801 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2802 (__v2df)_mm_max_pd(__A, __B), 2803 (__v2df)__W); 2804 } 2805 2806 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2807 _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) { 2808 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2809 (__v2df)_mm_max_pd(__A, __B), 2810 (__v2df)_mm_setzero_pd()); 2811 } 2812 2813 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2814 _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 2815 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2816 (__v4df)_mm256_max_pd(__A, __B), 2817 (__v4df)__W); 2818 } 2819 2820 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2821 _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) { 2822 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2823 (__v4df)_mm256_max_pd(__A, __B), 2824 (__v4df)_mm256_setzero_pd()); 2825 } 2826 2827 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2828 _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 2829 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2830 (__v4sf)_mm_max_ps(__A, __B), 2831 (__v4sf)__W); 2832 } 2833 2834 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2835 _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) { 2836 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2837 (__v4sf)_mm_max_ps(__A, __B), 2838 (__v4sf)_mm_setzero_ps()); 2839 } 2840 2841 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2842 _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 2843 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2844 (__v8sf)_mm256_max_ps(__A, __B), 2845 (__v8sf)__W); 2846 } 2847 2848 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2849 _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) { 2850 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2851 (__v8sf)_mm256_max_ps(__A, __B), 2852 (__v8sf)_mm256_setzero_ps()); 2853 } 2854 2855 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2856 _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 2857 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2858 (__v2df)_mm_min_pd(__A, __B), 2859 (__v2df)__W); 2860 } 2861 2862 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2863 _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) { 2864 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2865 (__v2df)_mm_min_pd(__A, __B), 2866 (__v2df)_mm_setzero_pd()); 2867 } 2868 2869 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2870 _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 2871 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2872 (__v4df)_mm256_min_pd(__A, __B), 2873 (__v4df)__W); 2874 } 2875 2876 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2877 _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) { 2878 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2879 (__v4df)_mm256_min_pd(__A, __B), 2880 (__v4df)_mm256_setzero_pd()); 2881 } 2882 2883 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2884 _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 2885 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2886 (__v4sf)_mm_min_ps(__A, __B), 2887 (__v4sf)__W); 2888 } 2889 2890 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2891 _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) { 2892 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2893 (__v4sf)_mm_min_ps(__A, __B), 2894 (__v4sf)_mm_setzero_ps()); 2895 } 2896 2897 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2898 _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 2899 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2900 (__v8sf)_mm256_min_ps(__A, __B), 2901 (__v8sf)__W); 2902 } 2903 2904 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2905 _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) { 2906 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2907 (__v8sf)_mm256_min_ps(__A, __B), 2908 (__v8sf)_mm256_setzero_ps()); 2909 } 2910 2911 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2912 _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 2913 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2914 (__v2df)_mm_mul_pd(__A, __B), 2915 (__v2df)__W); 2916 } 2917 2918 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2919 _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) { 2920 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2921 (__v2df)_mm_mul_pd(__A, __B), 2922 (__v2df)_mm_setzero_pd()); 2923 } 2924 2925 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2926 _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 2927 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2928 (__v4df)_mm256_mul_pd(__A, __B), 2929 (__v4df)__W); 2930 } 2931 2932 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2933 _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) { 2934 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2935 (__v4df)_mm256_mul_pd(__A, __B), 2936 (__v4df)_mm256_setzero_pd()); 2937 } 2938 2939 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2940 _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 2941 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2942 (__v4sf)_mm_mul_ps(__A, __B), 2943 (__v4sf)__W); 2944 } 2945 2946 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2947 _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) { 2948 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2949 (__v4sf)_mm_mul_ps(__A, __B), 2950 (__v4sf)_mm_setzero_ps()); 2951 } 2952 2953 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2954 _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 2955 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2956 (__v8sf)_mm256_mul_ps(__A, __B), 2957 (__v8sf)__W); 2958 } 2959 2960 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2961 _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) { 2962 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2963 (__v8sf)_mm256_mul_ps(__A, __B), 2964 (__v8sf)_mm256_setzero_ps()); 2965 } 2966 2967 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2968 _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) { 2969 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2970 (__v4si)_mm_abs_epi32(__A), 2971 (__v4si)__W); 2972 } 2973 2974 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2975 _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) { 2976 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2977 (__v4si)_mm_abs_epi32(__A), 2978 (__v4si)_mm_setzero_si128()); 2979 } 2980 2981 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2982 _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) { 2983 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2984 (__v8si)_mm256_abs_epi32(__A), 2985 (__v8si)__W); 2986 } 2987 2988 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2989 _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) { 2990 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2991 (__v8si)_mm256_abs_epi32(__A), 2992 (__v8si)_mm256_setzero_si256()); 2993 } 2994 2995 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2996 _mm_abs_epi64 (__m128i __A) { 2997 return (__m128i)__builtin_elementwise_abs((__v2di)__A); 2998 } 2999 3000 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3001 _mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { 3002 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 3003 (__v2di)_mm_abs_epi64(__A), 3004 (__v2di)__W); 3005 } 3006 3007 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3008 _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) { 3009 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 3010 (__v2di)_mm_abs_epi64(__A), 3011 (__v2di)_mm_setzero_si128()); 3012 } 3013 3014 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3015 _mm256_abs_epi64 (__m256i __A) { 3016 return (__m256i)__builtin_elementwise_abs((__v4di)__A); 3017 } 3018 3019 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3020 _mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { 3021 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 3022 (__v4di)_mm256_abs_epi64(__A), 3023 (__v4di)__W); 3024 } 3025 3026 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3027 _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) { 3028 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 3029 (__v4di)_mm256_abs_epi64(__A), 3030 (__v4di)_mm256_setzero_si256()); 3031 } 3032 3033 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3034 _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) { 3035 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3036 (__v4si)_mm_max_epi32(__A, __B), 3037 (__v4si)_mm_setzero_si128()); 3038 } 3039 3040 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3041 _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3042 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3043 (__v4si)_mm_max_epi32(__A, __B), 3044 (__v4si)__W); 3045 } 3046 3047 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3048 _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) { 3049 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3050 (__v8si)_mm256_max_epi32(__A, __B), 3051 (__v8si)_mm256_setzero_si256()); 3052 } 3053 3054 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3055 _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3056 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3057 (__v8si)_mm256_max_epi32(__A, __B), 3058 (__v8si)__W); 3059 } 3060 3061 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3062 _mm_max_epi64 (__m128i __A, __m128i __B) { 3063 return (__m128i)__builtin_elementwise_max((__v2di)__A, (__v2di)__B); 3064 } 3065 3066 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3067 _mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { 3068 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3069 (__v2di)_mm_max_epi64(__A, __B), 3070 (__v2di)_mm_setzero_si128()); 3071 } 3072 3073 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3074 _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3075 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3076 (__v2di)_mm_max_epi64(__A, __B), 3077 (__v2di)__W); 3078 } 3079 3080 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3081 _mm256_max_epi64 (__m256i __A, __m256i __B) { 3082 return (__m256i)__builtin_elementwise_max((__v4di)__A, (__v4di)__B); 3083 } 3084 3085 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3086 _mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) { 3087 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3088 (__v4di)_mm256_max_epi64(__A, __B), 3089 (__v4di)_mm256_setzero_si256()); 3090 } 3091 3092 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3093 _mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3094 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3095 (__v4di)_mm256_max_epi64(__A, __B), 3096 (__v4di)__W); 3097 } 3098 3099 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3100 _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) { 3101 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3102 (__v4si)_mm_max_epu32(__A, __B), 3103 (__v4si)_mm_setzero_si128()); 3104 } 3105 3106 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3107 _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3108 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3109 (__v4si)_mm_max_epu32(__A, __B), 3110 (__v4si)__W); 3111 } 3112 3113 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3114 _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) { 3115 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3116 (__v8si)_mm256_max_epu32(__A, __B), 3117 (__v8si)_mm256_setzero_si256()); 3118 } 3119 3120 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3121 _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3122 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3123 (__v8si)_mm256_max_epu32(__A, __B), 3124 (__v8si)__W); 3125 } 3126 3127 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3128 _mm_max_epu64 (__m128i __A, __m128i __B) { 3129 return (__m128i)__builtin_elementwise_max((__v2du)__A, (__v2du)__B); 3130 } 3131 3132 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3133 _mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { 3134 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3135 (__v2di)_mm_max_epu64(__A, __B), 3136 (__v2di)_mm_setzero_si128()); 3137 } 3138 3139 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3140 _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3141 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3142 (__v2di)_mm_max_epu64(__A, __B), 3143 (__v2di)__W); 3144 } 3145 3146 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3147 _mm256_max_epu64 (__m256i __A, __m256i __B) { 3148 return (__m256i)__builtin_elementwise_max((__v4du)__A, (__v4du)__B); 3149 } 3150 3151 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3152 _mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { 3153 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3154 (__v4di)_mm256_max_epu64(__A, __B), 3155 (__v4di)_mm256_setzero_si256()); 3156 } 3157 3158 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3159 _mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3160 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3161 (__v4di)_mm256_max_epu64(__A, __B), 3162 (__v4di)__W); 3163 } 3164 3165 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3166 _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) { 3167 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3168 (__v4si)_mm_min_epi32(__A, __B), 3169 (__v4si)_mm_setzero_si128()); 3170 } 3171 3172 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3173 _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3174 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3175 (__v4si)_mm_min_epi32(__A, __B), 3176 (__v4si)__W); 3177 } 3178 3179 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3180 _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) { 3181 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3182 (__v8si)_mm256_min_epi32(__A, __B), 3183 (__v8si)_mm256_setzero_si256()); 3184 } 3185 3186 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3187 _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3188 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3189 (__v8si)_mm256_min_epi32(__A, __B), 3190 (__v8si)__W); 3191 } 3192 3193 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3194 _mm_min_epi64 (__m128i __A, __m128i __B) { 3195 return (__m128i)__builtin_elementwise_min((__v2di)__A, (__v2di)__B); 3196 } 3197 3198 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3199 _mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3200 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3201 (__v2di)_mm_min_epi64(__A, __B), 3202 (__v2di)__W); 3203 } 3204 3205 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3206 _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { 3207 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3208 (__v2di)_mm_min_epi64(__A, __B), 3209 (__v2di)_mm_setzero_si128()); 3210 } 3211 3212 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3213 _mm256_min_epi64 (__m256i __A, __m256i __B) { 3214 return (__m256i)__builtin_elementwise_min((__v4di)__A, (__v4di)__B); 3215 } 3216 3217 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3218 _mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3219 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3220 (__v4di)_mm256_min_epi64(__A, __B), 3221 (__v4di)__W); 3222 } 3223 3224 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3225 _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) { 3226 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3227 (__v4di)_mm256_min_epi64(__A, __B), 3228 (__v4di)_mm256_setzero_si256()); 3229 } 3230 3231 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3232 _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) { 3233 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3234 (__v4si)_mm_min_epu32(__A, __B), 3235 (__v4si)_mm_setzero_si128()); 3236 } 3237 3238 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3239 _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3240 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3241 (__v4si)_mm_min_epu32(__A, __B), 3242 (__v4si)__W); 3243 } 3244 3245 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3246 _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) { 3247 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3248 (__v8si)_mm256_min_epu32(__A, __B), 3249 (__v8si)_mm256_setzero_si256()); 3250 } 3251 3252 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3253 _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3254 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3255 (__v8si)_mm256_min_epu32(__A, __B), 3256 (__v8si)__W); 3257 } 3258 3259 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3260 _mm_min_epu64 (__m128i __A, __m128i __B) { 3261 return (__m128i)__builtin_elementwise_min((__v2du)__A, (__v2du)__B); 3262 } 3263 3264 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3265 _mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3266 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3267 (__v2di)_mm_min_epu64(__A, __B), 3268 (__v2di)__W); 3269 } 3270 3271 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3272 _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { 3273 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3274 (__v2di)_mm_min_epu64(__A, __B), 3275 (__v2di)_mm_setzero_si128()); 3276 } 3277 3278 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3279 _mm256_min_epu64 (__m256i __A, __m256i __B) { 3280 return (__m256i)__builtin_elementwise_min((__v4du)__A, (__v4du)__B); 3281 } 3282 3283 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3284 _mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3285 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3286 (__v4di)_mm256_min_epu64(__A, __B), 3287 (__v4di)__W); 3288 } 3289 3290 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3291 _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { 3292 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3293 (__v4di)_mm256_min_epu64(__A, __B), 3294 (__v4di)_mm256_setzero_si256()); 3295 } 3296 3297 #define _mm_roundscale_pd(A, imm) \ 3298 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 3299 (int)(imm), \ 3300 (__v2df)_mm_setzero_pd(), \ 3301 (__mmask8)-1)) 3302 3303 3304 #define _mm_mask_roundscale_pd(W, U, A, imm) \ 3305 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 3306 (int)(imm), \ 3307 (__v2df)(__m128d)(W), \ 3308 (__mmask8)(U))) 3309 3310 3311 #define _mm_maskz_roundscale_pd(U, A, imm) \ 3312 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 3313 (int)(imm), \ 3314 (__v2df)_mm_setzero_pd(), \ 3315 (__mmask8)(U))) 3316 3317 3318 #define _mm256_roundscale_pd(A, imm) \ 3319 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 3320 (int)(imm), \ 3321 (__v4df)_mm256_setzero_pd(), \ 3322 (__mmask8)-1)) 3323 3324 3325 #define _mm256_mask_roundscale_pd(W, U, A, imm) \ 3326 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 3327 (int)(imm), \ 3328 (__v4df)(__m256d)(W), \ 3329 (__mmask8)(U))) 3330 3331 3332 #define _mm256_maskz_roundscale_pd(U, A, imm) \ 3333 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 3334 (int)(imm), \ 3335 (__v4df)_mm256_setzero_pd(), \ 3336 (__mmask8)(U))) 3337 3338 #define _mm_roundscale_ps(A, imm) \ 3339 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 3340 (__v4sf)_mm_setzero_ps(), \ 3341 (__mmask8)-1)) 3342 3343 3344 #define _mm_mask_roundscale_ps(W, U, A, imm) \ 3345 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 3346 (__v4sf)(__m128)(W), \ 3347 (__mmask8)(U))) 3348 3349 3350 #define _mm_maskz_roundscale_ps(U, A, imm) \ 3351 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 3352 (__v4sf)_mm_setzero_ps(), \ 3353 (__mmask8)(U))) 3354 3355 #define _mm256_roundscale_ps(A, imm) \ 3356 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 3357 (__v8sf)_mm256_setzero_ps(), \ 3358 (__mmask8)-1)) 3359 3360 #define _mm256_mask_roundscale_ps(W, U, A, imm) \ 3361 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 3362 (__v8sf)(__m256)(W), \ 3363 (__mmask8)(U))) 3364 3365 3366 #define _mm256_maskz_roundscale_ps(U, A, imm) \ 3367 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 3368 (__v8sf)_mm256_setzero_ps(), \ 3369 (__mmask8)(U))) 3370 3371 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3372 _mm_scalef_pd (__m128d __A, __m128d __B) { 3373 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 3374 (__v2df) __B, 3375 (__v2df) 3376 _mm_setzero_pd (), 3377 (__mmask8) -1); 3378 } 3379 3380 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3381 _mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A, 3382 __m128d __B) { 3383 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 3384 (__v2df) __B, 3385 (__v2df) __W, 3386 (__mmask8) __U); 3387 } 3388 3389 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3390 _mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) { 3391 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 3392 (__v2df) __B, 3393 (__v2df) 3394 _mm_setzero_pd (), 3395 (__mmask8) __U); 3396 } 3397 3398 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3399 _mm256_scalef_pd (__m256d __A, __m256d __B) { 3400 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 3401 (__v4df) __B, 3402 (__v4df) 3403 _mm256_setzero_pd (), 3404 (__mmask8) -1); 3405 } 3406 3407 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3408 _mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A, 3409 __m256d __B) { 3410 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 3411 (__v4df) __B, 3412 (__v4df) __W, 3413 (__mmask8) __U); 3414 } 3415 3416 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3417 _mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) { 3418 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 3419 (__v4df) __B, 3420 (__v4df) 3421 _mm256_setzero_pd (), 3422 (__mmask8) __U); 3423 } 3424 3425 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3426 _mm_scalef_ps (__m128 __A, __m128 __B) { 3427 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 3428 (__v4sf) __B, 3429 (__v4sf) 3430 _mm_setzero_ps (), 3431 (__mmask8) -1); 3432 } 3433 3434 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3435 _mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 3436 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 3437 (__v4sf) __B, 3438 (__v4sf) __W, 3439 (__mmask8) __U); 3440 } 3441 3442 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3443 _mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) { 3444 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 3445 (__v4sf) __B, 3446 (__v4sf) 3447 _mm_setzero_ps (), 3448 (__mmask8) __U); 3449 } 3450 3451 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3452 _mm256_scalef_ps (__m256 __A, __m256 __B) { 3453 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 3454 (__v8sf) __B, 3455 (__v8sf) 3456 _mm256_setzero_ps (), 3457 (__mmask8) -1); 3458 } 3459 3460 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3461 _mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A, 3462 __m256 __B) { 3463 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 3464 (__v8sf) __B, 3465 (__v8sf) __W, 3466 (__mmask8) __U); 3467 } 3468 3469 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3470 _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { 3471 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 3472 (__v8sf) __B, 3473 (__v8sf) 3474 _mm256_setzero_ps (), 3475 (__mmask8) __U); 3476 } 3477 3478 #define _mm_i64scatter_pd(addr, index, v1, scale) \ 3479 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)-1, \ 3480 (__v2di)(__m128i)(index), \ 3481 (__v2df)(__m128d)(v1), (int)(scale)) 3482 3483 #define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \ 3484 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)(mask), \ 3485 (__v2di)(__m128i)(index), \ 3486 (__v2df)(__m128d)(v1), (int)(scale)) 3487 3488 #define _mm_i64scatter_epi64(addr, index, v1, scale) \ 3489 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)-1, \ 3490 (__v2di)(__m128i)(index), \ 3491 (__v2di)(__m128i)(v1), (int)(scale)) 3492 3493 #define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \ 3494 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)(mask), \ 3495 (__v2di)(__m128i)(index), \ 3496 (__v2di)(__m128i)(v1), (int)(scale)) 3497 3498 #define _mm256_i64scatter_pd(addr, index, v1, scale) \ 3499 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)-1, \ 3500 (__v4di)(__m256i)(index), \ 3501 (__v4df)(__m256d)(v1), (int)(scale)) 3502 3503 #define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \ 3504 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)(mask), \ 3505 (__v4di)(__m256i)(index), \ 3506 (__v4df)(__m256d)(v1), (int)(scale)) 3507 3508 #define _mm256_i64scatter_epi64(addr, index, v1, scale) \ 3509 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)-1, \ 3510 (__v4di)(__m256i)(index), \ 3511 (__v4di)(__m256i)(v1), (int)(scale)) 3512 3513 #define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \ 3514 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)(mask), \ 3515 (__v4di)(__m256i)(index), \ 3516 (__v4di)(__m256i)(v1), (int)(scale)) 3517 3518 #define _mm_i64scatter_ps(addr, index, v1, scale) \ 3519 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)-1, \ 3520 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3521 (int)(scale)) 3522 3523 #define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \ 3524 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)(mask), \ 3525 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3526 (int)(scale)) 3527 3528 #define _mm_i64scatter_epi32(addr, index, v1, scale) \ 3529 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)-1, \ 3530 (__v2di)(__m128i)(index), \ 3531 (__v4si)(__m128i)(v1), (int)(scale)) 3532 3533 #define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \ 3534 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)(mask), \ 3535 (__v2di)(__m128i)(index), \ 3536 (__v4si)(__m128i)(v1), (int)(scale)) 3537 3538 #define _mm256_i64scatter_ps(addr, index, v1, scale) \ 3539 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)-1, \ 3540 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \ 3541 (int)(scale)) 3542 3543 #define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \ 3544 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)(mask), \ 3545 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \ 3546 (int)(scale)) 3547 3548 #define _mm256_i64scatter_epi32(addr, index, v1, scale) \ 3549 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)-1, \ 3550 (__v4di)(__m256i)(index), \ 3551 (__v4si)(__m128i)(v1), (int)(scale)) 3552 3553 #define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \ 3554 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)(mask), \ 3555 (__v4di)(__m256i)(index), \ 3556 (__v4si)(__m128i)(v1), (int)(scale)) 3557 3558 #define _mm_i32scatter_pd(addr, index, v1, scale) \ 3559 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)-1, \ 3560 (__v4si)(__m128i)(index), \ 3561 (__v2df)(__m128d)(v1), (int)(scale)) 3562 3563 #define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \ 3564 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)(mask), \ 3565 (__v4si)(__m128i)(index), \ 3566 (__v2df)(__m128d)(v1), (int)(scale)) 3567 3568 #define _mm_i32scatter_epi64(addr, index, v1, scale) \ 3569 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)-1, \ 3570 (__v4si)(__m128i)(index), \ 3571 (__v2di)(__m128i)(v1), (int)(scale)) 3572 3573 #define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \ 3574 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)(mask), \ 3575 (__v4si)(__m128i)(index), \ 3576 (__v2di)(__m128i)(v1), (int)(scale)) 3577 3578 #define _mm256_i32scatter_pd(addr, index, v1, scale) \ 3579 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)-1, \ 3580 (__v4si)(__m128i)(index), \ 3581 (__v4df)(__m256d)(v1), (int)(scale)) 3582 3583 #define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \ 3584 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)(mask), \ 3585 (__v4si)(__m128i)(index), \ 3586 (__v4df)(__m256d)(v1), (int)(scale)) 3587 3588 #define _mm256_i32scatter_epi64(addr, index, v1, scale) \ 3589 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)-1, \ 3590 (__v4si)(__m128i)(index), \ 3591 (__v4di)(__m256i)(v1), (int)(scale)) 3592 3593 #define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \ 3594 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)(mask), \ 3595 (__v4si)(__m128i)(index), \ 3596 (__v4di)(__m256i)(v1), (int)(scale)) 3597 3598 #define _mm_i32scatter_ps(addr, index, v1, scale) \ 3599 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)-1, \ 3600 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3601 (int)(scale)) 3602 3603 #define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \ 3604 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)(mask), \ 3605 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3606 (int)(scale)) 3607 3608 #define _mm_i32scatter_epi32(addr, index, v1, scale) \ 3609 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)-1, \ 3610 (__v4si)(__m128i)(index), \ 3611 (__v4si)(__m128i)(v1), (int)(scale)) 3612 3613 #define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \ 3614 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)(mask), \ 3615 (__v4si)(__m128i)(index), \ 3616 (__v4si)(__m128i)(v1), (int)(scale)) 3617 3618 #define _mm256_i32scatter_ps(addr, index, v1, scale) \ 3619 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)-1, \ 3620 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ 3621 (int)(scale)) 3622 3623 #define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \ 3624 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)(mask), \ 3625 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ 3626 (int)(scale)) 3627 3628 #define _mm256_i32scatter_epi32(addr, index, v1, scale) \ 3629 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)-1, \ 3630 (__v8si)(__m256i)(index), \ 3631 (__v8si)(__m256i)(v1), (int)(scale)) 3632 3633 #define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \ 3634 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)(mask), \ 3635 (__v8si)(__m256i)(index), \ 3636 (__v8si)(__m256i)(v1), (int)(scale)) 3637 3638 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3639 _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) { 3640 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3641 (__v2df)_mm_sqrt_pd(__A), 3642 (__v2df)__W); 3643 } 3644 3645 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3646 _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) { 3647 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3648 (__v2df)_mm_sqrt_pd(__A), 3649 (__v2df)_mm_setzero_pd()); 3650 } 3651 3652 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3653 _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) { 3654 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3655 (__v4df)_mm256_sqrt_pd(__A), 3656 (__v4df)__W); 3657 } 3658 3659 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3660 _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) { 3661 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3662 (__v4df)_mm256_sqrt_pd(__A), 3663 (__v4df)_mm256_setzero_pd()); 3664 } 3665 3666 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3667 _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) { 3668 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3669 (__v4sf)_mm_sqrt_ps(__A), 3670 (__v4sf)__W); 3671 } 3672 3673 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3674 _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) { 3675 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3676 (__v4sf)_mm_sqrt_ps(__A), 3677 (__v4sf)_mm_setzero_ps()); 3678 } 3679 3680 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3681 _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) { 3682 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3683 (__v8sf)_mm256_sqrt_ps(__A), 3684 (__v8sf)__W); 3685 } 3686 3687 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3688 _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) { 3689 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3690 (__v8sf)_mm256_sqrt_ps(__A), 3691 (__v8sf)_mm256_setzero_ps()); 3692 } 3693 3694 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3695 _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 3696 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3697 (__v2df)_mm_sub_pd(__A, __B), 3698 (__v2df)__W); 3699 } 3700 3701 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3702 _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) { 3703 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3704 (__v2df)_mm_sub_pd(__A, __B), 3705 (__v2df)_mm_setzero_pd()); 3706 } 3707 3708 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3709 _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 3710 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3711 (__v4df)_mm256_sub_pd(__A, __B), 3712 (__v4df)__W); 3713 } 3714 3715 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3716 _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) { 3717 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3718 (__v4df)_mm256_sub_pd(__A, __B), 3719 (__v4df)_mm256_setzero_pd()); 3720 } 3721 3722 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3723 _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 3724 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3725 (__v4sf)_mm_sub_ps(__A, __B), 3726 (__v4sf)__W); 3727 } 3728 3729 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3730 _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) { 3731 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3732 (__v4sf)_mm_sub_ps(__A, __B), 3733 (__v4sf)_mm_setzero_ps()); 3734 } 3735 3736 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3737 _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 3738 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3739 (__v8sf)_mm256_sub_ps(__A, __B), 3740 (__v8sf)__W); 3741 } 3742 3743 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3744 _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) { 3745 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3746 (__v8sf)_mm256_sub_ps(__A, __B), 3747 (__v8sf)_mm256_setzero_ps()); 3748 } 3749 3750 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3751 _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) { 3752 return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I, 3753 (__v4si)__B); 3754 } 3755 3756 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3757 _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I, 3758 __m128i __B) { 3759 return (__m128i)__builtin_ia32_selectd_128(__U, 3760 (__v4si)_mm_permutex2var_epi32(__A, __I, __B), 3761 (__v4si)__A); 3762 } 3763 3764 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3765 _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U, 3766 __m128i __B) { 3767 return (__m128i)__builtin_ia32_selectd_128(__U, 3768 (__v4si)_mm_permutex2var_epi32(__A, __I, __B), 3769 (__v4si)__I); 3770 } 3771 3772 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3773 _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I, 3774 __m128i __B) { 3775 return (__m128i)__builtin_ia32_selectd_128(__U, 3776 (__v4si)_mm_permutex2var_epi32(__A, __I, __B), 3777 (__v4si)_mm_setzero_si128()); 3778 } 3779 3780 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3781 _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) { 3782 return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I, 3783 (__v8si) __B); 3784 } 3785 3786 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3787 _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I, 3788 __m256i __B) { 3789 return (__m256i)__builtin_ia32_selectd_256(__U, 3790 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B), 3791 (__v8si)__A); 3792 } 3793 3794 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3795 _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U, 3796 __m256i __B) { 3797 return (__m256i)__builtin_ia32_selectd_256(__U, 3798 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B), 3799 (__v8si)__I); 3800 } 3801 3802 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3803 _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I, 3804 __m256i __B) { 3805 return (__m256i)__builtin_ia32_selectd_256(__U, 3806 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B), 3807 (__v8si)_mm256_setzero_si256()); 3808 } 3809 3810 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3811 _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) { 3812 return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I, 3813 (__v2df)__B); 3814 } 3815 3816 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3817 _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) { 3818 return (__m128d)__builtin_ia32_selectpd_128(__U, 3819 (__v2df)_mm_permutex2var_pd(__A, __I, __B), 3820 (__v2df)__A); 3821 } 3822 3823 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3824 _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) { 3825 return (__m128d)__builtin_ia32_selectpd_128(__U, 3826 (__v2df)_mm_permutex2var_pd(__A, __I, __B), 3827 (__v2df)(__m128d)__I); 3828 } 3829 3830 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3831 _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) { 3832 return (__m128d)__builtin_ia32_selectpd_128(__U, 3833 (__v2df)_mm_permutex2var_pd(__A, __I, __B), 3834 (__v2df)_mm_setzero_pd()); 3835 } 3836 3837 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3838 _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) { 3839 return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I, 3840 (__v4df)__B); 3841 } 3842 3843 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3844 _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I, 3845 __m256d __B) { 3846 return (__m256d)__builtin_ia32_selectpd_256(__U, 3847 (__v4df)_mm256_permutex2var_pd(__A, __I, __B), 3848 (__v4df)__A); 3849 } 3850 3851 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3852 _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U, 3853 __m256d __B) { 3854 return (__m256d)__builtin_ia32_selectpd_256(__U, 3855 (__v4df)_mm256_permutex2var_pd(__A, __I, __B), 3856 (__v4df)(__m256d)__I); 3857 } 3858 3859 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3860 _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I, 3861 __m256d __B) { 3862 return (__m256d)__builtin_ia32_selectpd_256(__U, 3863 (__v4df)_mm256_permutex2var_pd(__A, __I, __B), 3864 (__v4df)_mm256_setzero_pd()); 3865 } 3866 3867 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3868 _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) { 3869 return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I, 3870 (__v4sf)__B); 3871 } 3872 3873 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3874 _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) { 3875 return (__m128)__builtin_ia32_selectps_128(__U, 3876 (__v4sf)_mm_permutex2var_ps(__A, __I, __B), 3877 (__v4sf)__A); 3878 } 3879 3880 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3881 _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) { 3882 return (__m128)__builtin_ia32_selectps_128(__U, 3883 (__v4sf)_mm_permutex2var_ps(__A, __I, __B), 3884 (__v4sf)(__m128)__I); 3885 } 3886 3887 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3888 _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) { 3889 return (__m128)__builtin_ia32_selectps_128(__U, 3890 (__v4sf)_mm_permutex2var_ps(__A, __I, __B), 3891 (__v4sf)_mm_setzero_ps()); 3892 } 3893 3894 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3895 _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) { 3896 return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I, 3897 (__v8sf) __B); 3898 } 3899 3900 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3901 _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) { 3902 return (__m256)__builtin_ia32_selectps_256(__U, 3903 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), 3904 (__v8sf)__A); 3905 } 3906 3907 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3908 _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U, 3909 __m256 __B) { 3910 return (__m256)__builtin_ia32_selectps_256(__U, 3911 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), 3912 (__v8sf)(__m256)__I); 3913 } 3914 3915 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3916 _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I, 3917 __m256 __B) { 3918 return (__m256)__builtin_ia32_selectps_256(__U, 3919 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), 3920 (__v8sf)_mm256_setzero_ps()); 3921 } 3922 3923 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3924 _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) { 3925 return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I, 3926 (__v2di)__B); 3927 } 3928 3929 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3930 _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I, 3931 __m128i __B) { 3932 return (__m128i)__builtin_ia32_selectq_128(__U, 3933 (__v2di)_mm_permutex2var_epi64(__A, __I, __B), 3934 (__v2di)__A); 3935 } 3936 3937 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3938 _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U, 3939 __m128i __B) { 3940 return (__m128i)__builtin_ia32_selectq_128(__U, 3941 (__v2di)_mm_permutex2var_epi64(__A, __I, __B), 3942 (__v2di)__I); 3943 } 3944 3945 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3946 _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I, 3947 __m128i __B) { 3948 return (__m128i)__builtin_ia32_selectq_128(__U, 3949 (__v2di)_mm_permutex2var_epi64(__A, __I, __B), 3950 (__v2di)_mm_setzero_si128()); 3951 } 3952 3953 3954 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3955 _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) { 3956 return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I, 3957 (__v4di) __B); 3958 } 3959 3960 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3961 _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I, 3962 __m256i __B) { 3963 return (__m256i)__builtin_ia32_selectq_256(__U, 3964 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B), 3965 (__v4di)__A); 3966 } 3967 3968 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3969 _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U, 3970 __m256i __B) { 3971 return (__m256i)__builtin_ia32_selectq_256(__U, 3972 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B), 3973 (__v4di)__I); 3974 } 3975 3976 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3977 _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I, 3978 __m256i __B) { 3979 return (__m256i)__builtin_ia32_selectq_256(__U, 3980 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B), 3981 (__v4di)_mm256_setzero_si256()); 3982 } 3983 3984 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3985 _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A) 3986 { 3987 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 3988 (__v4si)_mm_cvtepi8_epi32(__A), 3989 (__v4si)__W); 3990 } 3991 3992 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3993 _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A) 3994 { 3995 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 3996 (__v4si)_mm_cvtepi8_epi32(__A), 3997 (__v4si)_mm_setzero_si128()); 3998 } 3999 4000 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4001 _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A) 4002 { 4003 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4004 (__v8si)_mm256_cvtepi8_epi32(__A), 4005 (__v8si)__W); 4006 } 4007 4008 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4009 _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A) 4010 { 4011 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4012 (__v8si)_mm256_cvtepi8_epi32(__A), 4013 (__v8si)_mm256_setzero_si256()); 4014 } 4015 4016 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4017 _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4018 { 4019 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4020 (__v2di)_mm_cvtepi8_epi64(__A), 4021 (__v2di)__W); 4022 } 4023 4024 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4025 _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) 4026 { 4027 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4028 (__v2di)_mm_cvtepi8_epi64(__A), 4029 (__v2di)_mm_setzero_si128()); 4030 } 4031 4032 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4033 _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4034 { 4035 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4036 (__v4di)_mm256_cvtepi8_epi64(__A), 4037 (__v4di)__W); 4038 } 4039 4040 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4041 _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) 4042 { 4043 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4044 (__v4di)_mm256_cvtepi8_epi64(__A), 4045 (__v4di)_mm256_setzero_si256()); 4046 } 4047 4048 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4049 _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X) 4050 { 4051 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4052 (__v2di)_mm_cvtepi32_epi64(__X), 4053 (__v2di)__W); 4054 } 4055 4056 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4057 _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) 4058 { 4059 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4060 (__v2di)_mm_cvtepi32_epi64(__X), 4061 (__v2di)_mm_setzero_si128()); 4062 } 4063 4064 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4065 _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X) 4066 { 4067 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4068 (__v4di)_mm256_cvtepi32_epi64(__X), 4069 (__v4di)__W); 4070 } 4071 4072 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4073 _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) 4074 { 4075 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4076 (__v4di)_mm256_cvtepi32_epi64(__X), 4077 (__v4di)_mm256_setzero_si256()); 4078 } 4079 4080 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4081 _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A) 4082 { 4083 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4084 (__v4si)_mm_cvtepi16_epi32(__A), 4085 (__v4si)__W); 4086 } 4087 4088 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4089 _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A) 4090 { 4091 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4092 (__v4si)_mm_cvtepi16_epi32(__A), 4093 (__v4si)_mm_setzero_si128()); 4094 } 4095 4096 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4097 _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A) 4098 { 4099 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4100 (__v8si)_mm256_cvtepi16_epi32(__A), 4101 (__v8si)__W); 4102 } 4103 4104 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4105 _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A) 4106 { 4107 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4108 (__v8si)_mm256_cvtepi16_epi32(__A), 4109 (__v8si)_mm256_setzero_si256()); 4110 } 4111 4112 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4113 _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4114 { 4115 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4116 (__v2di)_mm_cvtepi16_epi64(__A), 4117 (__v2di)__W); 4118 } 4119 4120 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4121 _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) 4122 { 4123 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4124 (__v2di)_mm_cvtepi16_epi64(__A), 4125 (__v2di)_mm_setzero_si128()); 4126 } 4127 4128 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4129 _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4130 { 4131 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4132 (__v4di)_mm256_cvtepi16_epi64(__A), 4133 (__v4di)__W); 4134 } 4135 4136 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4137 _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) 4138 { 4139 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4140 (__v4di)_mm256_cvtepi16_epi64(__A), 4141 (__v4di)_mm256_setzero_si256()); 4142 } 4143 4144 4145 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4146 _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A) 4147 { 4148 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4149 (__v4si)_mm_cvtepu8_epi32(__A), 4150 (__v4si)__W); 4151 } 4152 4153 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4154 _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) 4155 { 4156 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4157 (__v4si)_mm_cvtepu8_epi32(__A), 4158 (__v4si)_mm_setzero_si128()); 4159 } 4160 4161 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4162 _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A) 4163 { 4164 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4165 (__v8si)_mm256_cvtepu8_epi32(__A), 4166 (__v8si)__W); 4167 } 4168 4169 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4170 _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) 4171 { 4172 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4173 (__v8si)_mm256_cvtepu8_epi32(__A), 4174 (__v8si)_mm256_setzero_si256()); 4175 } 4176 4177 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4178 _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4179 { 4180 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4181 (__v2di)_mm_cvtepu8_epi64(__A), 4182 (__v2di)__W); 4183 } 4184 4185 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4186 _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) 4187 { 4188 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4189 (__v2di)_mm_cvtepu8_epi64(__A), 4190 (__v2di)_mm_setzero_si128()); 4191 } 4192 4193 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4194 _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4195 { 4196 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4197 (__v4di)_mm256_cvtepu8_epi64(__A), 4198 (__v4di)__W); 4199 } 4200 4201 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4202 _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A) 4203 { 4204 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4205 (__v4di)_mm256_cvtepu8_epi64(__A), 4206 (__v4di)_mm256_setzero_si256()); 4207 } 4208 4209 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4210 _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X) 4211 { 4212 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4213 (__v2di)_mm_cvtepu32_epi64(__X), 4214 (__v2di)__W); 4215 } 4216 4217 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4218 _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) 4219 { 4220 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4221 (__v2di)_mm_cvtepu32_epi64(__X), 4222 (__v2di)_mm_setzero_si128()); 4223 } 4224 4225 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4226 _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X) 4227 { 4228 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4229 (__v4di)_mm256_cvtepu32_epi64(__X), 4230 (__v4di)__W); 4231 } 4232 4233 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4234 _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) 4235 { 4236 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4237 (__v4di)_mm256_cvtepu32_epi64(__X), 4238 (__v4di)_mm256_setzero_si256()); 4239 } 4240 4241 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4242 _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A) 4243 { 4244 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4245 (__v4si)_mm_cvtepu16_epi32(__A), 4246 (__v4si)__W); 4247 } 4248 4249 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4250 _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) 4251 { 4252 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4253 (__v4si)_mm_cvtepu16_epi32(__A), 4254 (__v4si)_mm_setzero_si128()); 4255 } 4256 4257 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4258 _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A) 4259 { 4260 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4261 (__v8si)_mm256_cvtepu16_epi32(__A), 4262 (__v8si)__W); 4263 } 4264 4265 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4266 _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) 4267 { 4268 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4269 (__v8si)_mm256_cvtepu16_epi32(__A), 4270 (__v8si)_mm256_setzero_si256()); 4271 } 4272 4273 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4274 _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4275 { 4276 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4277 (__v2di)_mm_cvtepu16_epi64(__A), 4278 (__v2di)__W); 4279 } 4280 4281 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4282 _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) 4283 { 4284 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4285 (__v2di)_mm_cvtepu16_epi64(__A), 4286 (__v2di)_mm_setzero_si128()); 4287 } 4288 4289 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4290 _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4291 { 4292 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4293 (__v4di)_mm256_cvtepu16_epi64(__A), 4294 (__v4di)__W); 4295 } 4296 4297 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4298 _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) 4299 { 4300 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4301 (__v4di)_mm256_cvtepu16_epi64(__A), 4302 (__v4di)_mm256_setzero_si256()); 4303 } 4304 4305 4306 #define _mm_rol_epi32(a, b) \ 4307 ((__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b))) 4308 4309 #define _mm_mask_rol_epi32(w, u, a, b) \ 4310 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ 4311 (__v4si)_mm_rol_epi32((a), (b)), \ 4312 (__v4si)(__m128i)(w))) 4313 4314 #define _mm_maskz_rol_epi32(u, a, b) \ 4315 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ 4316 (__v4si)_mm_rol_epi32((a), (b)), \ 4317 (__v4si)_mm_setzero_si128())) 4318 4319 #define _mm256_rol_epi32(a, b) \ 4320 ((__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b))) 4321 4322 #define _mm256_mask_rol_epi32(w, u, a, b) \ 4323 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ 4324 (__v8si)_mm256_rol_epi32((a), (b)), \ 4325 (__v8si)(__m256i)(w))) 4326 4327 #define _mm256_maskz_rol_epi32(u, a, b) \ 4328 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ 4329 (__v8si)_mm256_rol_epi32((a), (b)), \ 4330 (__v8si)_mm256_setzero_si256())) 4331 4332 #define _mm_rol_epi64(a, b) \ 4333 ((__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b))) 4334 4335 #define _mm_mask_rol_epi64(w, u, a, b) \ 4336 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ 4337 (__v2di)_mm_rol_epi64((a), (b)), \ 4338 (__v2di)(__m128i)(w))) 4339 4340 #define _mm_maskz_rol_epi64(u, a, b) \ 4341 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ 4342 (__v2di)_mm_rol_epi64((a), (b)), \ 4343 (__v2di)_mm_setzero_si128())) 4344 4345 #define _mm256_rol_epi64(a, b) \ 4346 ((__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b))) 4347 4348 #define _mm256_mask_rol_epi64(w, u, a, b) \ 4349 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ 4350 (__v4di)_mm256_rol_epi64((a), (b)), \ 4351 (__v4di)(__m256i)(w))) 4352 4353 #define _mm256_maskz_rol_epi64(u, a, b) \ 4354 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ 4355 (__v4di)_mm256_rol_epi64((a), (b)), \ 4356 (__v4di)_mm256_setzero_si256())) 4357 4358 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4359 _mm_rolv_epi32 (__m128i __A, __m128i __B) 4360 { 4361 return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B); 4362 } 4363 4364 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4365 _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4366 { 4367 return (__m128i)__builtin_ia32_selectd_128(__U, 4368 (__v4si)_mm_rolv_epi32(__A, __B), 4369 (__v4si)__W); 4370 } 4371 4372 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4373 _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 4374 { 4375 return (__m128i)__builtin_ia32_selectd_128(__U, 4376 (__v4si)_mm_rolv_epi32(__A, __B), 4377 (__v4si)_mm_setzero_si128()); 4378 } 4379 4380 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4381 _mm256_rolv_epi32 (__m256i __A, __m256i __B) 4382 { 4383 return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B); 4384 } 4385 4386 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4387 _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 4388 { 4389 return (__m256i)__builtin_ia32_selectd_256(__U, 4390 (__v8si)_mm256_rolv_epi32(__A, __B), 4391 (__v8si)__W); 4392 } 4393 4394 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4395 _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 4396 { 4397 return (__m256i)__builtin_ia32_selectd_256(__U, 4398 (__v8si)_mm256_rolv_epi32(__A, __B), 4399 (__v8si)_mm256_setzero_si256()); 4400 } 4401 4402 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4403 _mm_rolv_epi64 (__m128i __A, __m128i __B) 4404 { 4405 return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B); 4406 } 4407 4408 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4409 _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4410 { 4411 return (__m128i)__builtin_ia32_selectq_128(__U, 4412 (__v2di)_mm_rolv_epi64(__A, __B), 4413 (__v2di)__W); 4414 } 4415 4416 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4417 _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 4418 { 4419 return (__m128i)__builtin_ia32_selectq_128(__U, 4420 (__v2di)_mm_rolv_epi64(__A, __B), 4421 (__v2di)_mm_setzero_si128()); 4422 } 4423 4424 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4425 _mm256_rolv_epi64 (__m256i __A, __m256i __B) 4426 { 4427 return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B); 4428 } 4429 4430 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4431 _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 4432 { 4433 return (__m256i)__builtin_ia32_selectq_256(__U, 4434 (__v4di)_mm256_rolv_epi64(__A, __B), 4435 (__v4di)__W); 4436 } 4437 4438 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4439 _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 4440 { 4441 return (__m256i)__builtin_ia32_selectq_256(__U, 4442 (__v4di)_mm256_rolv_epi64(__A, __B), 4443 (__v4di)_mm256_setzero_si256()); 4444 } 4445 4446 #define _mm_ror_epi32(a, b) \ 4447 ((__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b))) 4448 4449 #define _mm_mask_ror_epi32(w, u, a, b) \ 4450 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ 4451 (__v4si)_mm_ror_epi32((a), (b)), \ 4452 (__v4si)(__m128i)(w))) 4453 4454 #define _mm_maskz_ror_epi32(u, a, b) \ 4455 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ 4456 (__v4si)_mm_ror_epi32((a), (b)), \ 4457 (__v4si)_mm_setzero_si128())) 4458 4459 #define _mm256_ror_epi32(a, b) \ 4460 ((__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b))) 4461 4462 #define _mm256_mask_ror_epi32(w, u, a, b) \ 4463 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ 4464 (__v8si)_mm256_ror_epi32((a), (b)), \ 4465 (__v8si)(__m256i)(w))) 4466 4467 #define _mm256_maskz_ror_epi32(u, a, b) \ 4468 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ 4469 (__v8si)_mm256_ror_epi32((a), (b)), \ 4470 (__v8si)_mm256_setzero_si256())) 4471 4472 #define _mm_ror_epi64(a, b) \ 4473 ((__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b))) 4474 4475 #define _mm_mask_ror_epi64(w, u, a, b) \ 4476 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ 4477 (__v2di)_mm_ror_epi64((a), (b)), \ 4478 (__v2di)(__m128i)(w))) 4479 4480 #define _mm_maskz_ror_epi64(u, a, b) \ 4481 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ 4482 (__v2di)_mm_ror_epi64((a), (b)), \ 4483 (__v2di)_mm_setzero_si128())) 4484 4485 #define _mm256_ror_epi64(a, b) \ 4486 ((__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b))) 4487 4488 #define _mm256_mask_ror_epi64(w, u, a, b) \ 4489 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ 4490 (__v4di)_mm256_ror_epi64((a), (b)), \ 4491 (__v4di)(__m256i)(w))) 4492 4493 #define _mm256_maskz_ror_epi64(u, a, b) \ 4494 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ 4495 (__v4di)_mm256_ror_epi64((a), (b)), \ 4496 (__v4di)_mm256_setzero_si256())) 4497 4498 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4499 _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4500 { 4501 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4502 (__v4si)_mm_sll_epi32(__A, __B), 4503 (__v4si)__W); 4504 } 4505 4506 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4507 _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B) 4508 { 4509 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4510 (__v4si)_mm_sll_epi32(__A, __B), 4511 (__v4si)_mm_setzero_si128()); 4512 } 4513 4514 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4515 _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 4516 { 4517 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4518 (__v8si)_mm256_sll_epi32(__A, __B), 4519 (__v8si)__W); 4520 } 4521 4522 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4523 _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B) 4524 { 4525 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4526 (__v8si)_mm256_sll_epi32(__A, __B), 4527 (__v8si)_mm256_setzero_si256()); 4528 } 4529 4530 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4531 _mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) 4532 { 4533 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4534 (__v4si)_mm_slli_epi32(__A, (int)__B), 4535 (__v4si)__W); 4536 } 4537 4538 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4539 _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, unsigned int __B) 4540 { 4541 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4542 (__v4si)_mm_slli_epi32(__A, (int)__B), 4543 (__v4si)_mm_setzero_si128()); 4544 } 4545 4546 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4547 _mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) 4548 { 4549 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4550 (__v8si)_mm256_slli_epi32(__A, (int)__B), 4551 (__v8si)__W); 4552 } 4553 4554 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4555 _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) 4556 { 4557 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4558 (__v8si)_mm256_slli_epi32(__A, (int)__B), 4559 (__v8si)_mm256_setzero_si256()); 4560 } 4561 4562 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4563 _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4564 { 4565 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4566 (__v2di)_mm_sll_epi64(__A, __B), 4567 (__v2di)__W); 4568 } 4569 4570 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4571 _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B) 4572 { 4573 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4574 (__v2di)_mm_sll_epi64(__A, __B), 4575 (__v2di)_mm_setzero_si128()); 4576 } 4577 4578 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4579 _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 4580 { 4581 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4582 (__v4di)_mm256_sll_epi64(__A, __B), 4583 (__v4di)__W); 4584 } 4585 4586 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4587 _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B) 4588 { 4589 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4590 (__v4di)_mm256_sll_epi64(__A, __B), 4591 (__v4di)_mm256_setzero_si256()); 4592 } 4593 4594 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4595 _mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) 4596 { 4597 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4598 (__v2di)_mm_slli_epi64(__A, (int)__B), 4599 (__v2di)__W); 4600 } 4601 4602 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4603 _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, unsigned int __B) 4604 { 4605 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4606 (__v2di)_mm_slli_epi64(__A, (int)__B), 4607 (__v2di)_mm_setzero_si128()); 4608 } 4609 4610 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4611 _mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) 4612 { 4613 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4614 (__v4di)_mm256_slli_epi64(__A, (int)__B), 4615 (__v4di)__W); 4616 } 4617 4618 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4619 _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) 4620 { 4621 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4622 (__v4di)_mm256_slli_epi64(__A, (int)__B), 4623 (__v4di)_mm256_setzero_si256()); 4624 } 4625 4626 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4627 _mm_rorv_epi32 (__m128i __A, __m128i __B) 4628 { 4629 return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B); 4630 } 4631 4632 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4633 _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4634 { 4635 return (__m128i)__builtin_ia32_selectd_128(__U, 4636 (__v4si)_mm_rorv_epi32(__A, __B), 4637 (__v4si)__W); 4638 } 4639 4640 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4641 _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 4642 { 4643 return (__m128i)__builtin_ia32_selectd_128(__U, 4644 (__v4si)_mm_rorv_epi32(__A, __B), 4645 (__v4si)_mm_setzero_si128()); 4646 } 4647 4648 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4649 _mm256_rorv_epi32 (__m256i __A, __m256i __B) 4650 { 4651 return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B); 4652 } 4653 4654 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4655 _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 4656 { 4657 return (__m256i)__builtin_ia32_selectd_256(__U, 4658 (__v8si)_mm256_rorv_epi32(__A, __B), 4659 (__v8si)__W); 4660 } 4661 4662 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4663 _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 4664 { 4665 return (__m256i)__builtin_ia32_selectd_256(__U, 4666 (__v8si)_mm256_rorv_epi32(__A, __B), 4667 (__v8si)_mm256_setzero_si256()); 4668 } 4669 4670 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4671 _mm_rorv_epi64 (__m128i __A, __m128i __B) 4672 { 4673 return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B); 4674 } 4675 4676 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4677 _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4678 { 4679 return (__m128i)__builtin_ia32_selectq_128(__U, 4680 (__v2di)_mm_rorv_epi64(__A, __B), 4681 (__v2di)__W); 4682 } 4683 4684 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4685 _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 4686 { 4687 return (__m128i)__builtin_ia32_selectq_128(__U, 4688 (__v2di)_mm_rorv_epi64(__A, __B), 4689 (__v2di)_mm_setzero_si128()); 4690 } 4691 4692 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4693 _mm256_rorv_epi64 (__m256i __A, __m256i __B) 4694 { 4695 return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B); 4696 } 4697 4698 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4699 _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 4700 { 4701 return (__m256i)__builtin_ia32_selectq_256(__U, 4702 (__v4di)_mm256_rorv_epi64(__A, __B), 4703 (__v4di)__W); 4704 } 4705 4706 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4707 _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 4708 { 4709 return (__m256i)__builtin_ia32_selectq_256(__U, 4710 (__v4di)_mm256_rorv_epi64(__A, __B), 4711 (__v4di)_mm256_setzero_si256()); 4712 } 4713 4714 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4715 _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 4716 { 4717 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4718 (__v2di)_mm_sllv_epi64(__X, __Y), 4719 (__v2di)__W); 4720 } 4721 4722 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4723 _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) 4724 { 4725 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4726 (__v2di)_mm_sllv_epi64(__X, __Y), 4727 (__v2di)_mm_setzero_si128()); 4728 } 4729 4730 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4731 _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 4732 { 4733 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4734 (__v4di)_mm256_sllv_epi64(__X, __Y), 4735 (__v4di)__W); 4736 } 4737 4738 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4739 _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) 4740 { 4741 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4742 (__v4di)_mm256_sllv_epi64(__X, __Y), 4743 (__v4di)_mm256_setzero_si256()); 4744 } 4745 4746 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4747 _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 4748 { 4749 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4750 (__v4si)_mm_sllv_epi32(__X, __Y), 4751 (__v4si)__W); 4752 } 4753 4754 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4755 _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) 4756 { 4757 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4758 (__v4si)_mm_sllv_epi32(__X, __Y), 4759 (__v4si)_mm_setzero_si128()); 4760 } 4761 4762 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4763 _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 4764 { 4765 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4766 (__v8si)_mm256_sllv_epi32(__X, __Y), 4767 (__v8si)__W); 4768 } 4769 4770 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4771 _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) 4772 { 4773 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4774 (__v8si)_mm256_sllv_epi32(__X, __Y), 4775 (__v8si)_mm256_setzero_si256()); 4776 } 4777 4778 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4779 _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 4780 { 4781 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4782 (__v2di)_mm_srlv_epi64(__X, __Y), 4783 (__v2di)__W); 4784 } 4785 4786 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4787 _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) 4788 { 4789 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4790 (__v2di)_mm_srlv_epi64(__X, __Y), 4791 (__v2di)_mm_setzero_si128()); 4792 } 4793 4794 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4795 _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 4796 { 4797 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4798 (__v4di)_mm256_srlv_epi64(__X, __Y), 4799 (__v4di)__W); 4800 } 4801 4802 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4803 _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) 4804 { 4805 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4806 (__v4di)_mm256_srlv_epi64(__X, __Y), 4807 (__v4di)_mm256_setzero_si256()); 4808 } 4809 4810 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4811 _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 4812 { 4813 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4814 (__v4si)_mm_srlv_epi32(__X, __Y), 4815 (__v4si)__W); 4816 } 4817 4818 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4819 _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) 4820 { 4821 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4822 (__v4si)_mm_srlv_epi32(__X, __Y), 4823 (__v4si)_mm_setzero_si128()); 4824 } 4825 4826 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4827 _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 4828 { 4829 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4830 (__v8si)_mm256_srlv_epi32(__X, __Y), 4831 (__v8si)__W); 4832 } 4833 4834 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4835 _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) 4836 { 4837 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4838 (__v8si)_mm256_srlv_epi32(__X, __Y), 4839 (__v8si)_mm256_setzero_si256()); 4840 } 4841 4842 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4843 _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4844 { 4845 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4846 (__v4si)_mm_srl_epi32(__A, __B), 4847 (__v4si)__W); 4848 } 4849 4850 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4851 _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B) 4852 { 4853 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4854 (__v4si)_mm_srl_epi32(__A, __B), 4855 (__v4si)_mm_setzero_si128()); 4856 } 4857 4858 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4859 _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 4860 { 4861 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4862 (__v8si)_mm256_srl_epi32(__A, __B), 4863 (__v8si)__W); 4864 } 4865 4866 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4867 _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B) 4868 { 4869 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4870 (__v8si)_mm256_srl_epi32(__A, __B), 4871 (__v8si)_mm256_setzero_si256()); 4872 } 4873 4874 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4875 _mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) 4876 { 4877 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4878 (__v4si)_mm_srli_epi32(__A, (int)__B), 4879 (__v4si)__W); 4880 } 4881 4882 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4883 _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, unsigned int __B) 4884 { 4885 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4886 (__v4si)_mm_srli_epi32(__A, (int)__B), 4887 (__v4si)_mm_setzero_si128()); 4888 } 4889 4890 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4891 _mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) 4892 { 4893 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4894 (__v8si)_mm256_srli_epi32(__A, (int)__B), 4895 (__v8si)__W); 4896 } 4897 4898 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4899 _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) 4900 { 4901 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4902 (__v8si)_mm256_srli_epi32(__A, (int)__B), 4903 (__v8si)_mm256_setzero_si256()); 4904 } 4905 4906 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4907 _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4908 { 4909 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4910 (__v2di)_mm_srl_epi64(__A, __B), 4911 (__v2di)__W); 4912 } 4913 4914 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4915 _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B) 4916 { 4917 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4918 (__v2di)_mm_srl_epi64(__A, __B), 4919 (__v2di)_mm_setzero_si128()); 4920 } 4921 4922 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4923 _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 4924 { 4925 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4926 (__v4di)_mm256_srl_epi64(__A, __B), 4927 (__v4di)__W); 4928 } 4929 4930 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4931 _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B) 4932 { 4933 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4934 (__v4di)_mm256_srl_epi64(__A, __B), 4935 (__v4di)_mm256_setzero_si256()); 4936 } 4937 4938 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4939 _mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) 4940 { 4941 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4942 (__v2di)_mm_srli_epi64(__A, (int)__B), 4943 (__v2di)__W); 4944 } 4945 4946 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4947 _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, unsigned int __B) 4948 { 4949 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4950 (__v2di)_mm_srli_epi64(__A, (int)__B), 4951 (__v2di)_mm_setzero_si128()); 4952 } 4953 4954 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4955 _mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) 4956 { 4957 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4958 (__v4di)_mm256_srli_epi64(__A, (int)__B), 4959 (__v4di)__W); 4960 } 4961 4962 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4963 _mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) 4964 { 4965 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4966 (__v4di)_mm256_srli_epi64(__A, (int)__B), 4967 (__v4di)_mm256_setzero_si256()); 4968 } 4969 4970 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4971 _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 4972 { 4973 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4974 (__v4si)_mm_srav_epi32(__X, __Y), 4975 (__v4si)__W); 4976 } 4977 4978 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4979 _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y) 4980 { 4981 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4982 (__v4si)_mm_srav_epi32(__X, __Y), 4983 (__v4si)_mm_setzero_si128()); 4984 } 4985 4986 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4987 _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 4988 { 4989 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4990 (__v8si)_mm256_srav_epi32(__X, __Y), 4991 (__v8si)__W); 4992 } 4993 4994 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4995 _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y) 4996 { 4997 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4998 (__v8si)_mm256_srav_epi32(__X, __Y), 4999 (__v8si)_mm256_setzero_si256()); 5000 } 5001 5002 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5003 _mm_srav_epi64(__m128i __X, __m128i __Y) 5004 { 5005 return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y); 5006 } 5007 5008 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5009 _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 5010 { 5011 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5012 (__v2di)_mm_srav_epi64(__X, __Y), 5013 (__v2di)__W); 5014 } 5015 5016 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5017 _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y) 5018 { 5019 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5020 (__v2di)_mm_srav_epi64(__X, __Y), 5021 (__v2di)_mm_setzero_si128()); 5022 } 5023 5024 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5025 _mm256_srav_epi64(__m256i __X, __m256i __Y) 5026 { 5027 return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y); 5028 } 5029 5030 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5031 _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 5032 { 5033 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5034 (__v4di)_mm256_srav_epi64(__X, __Y), 5035 (__v4di)__W); 5036 } 5037 5038 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5039 _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y) 5040 { 5041 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5042 (__v4di)_mm256_srav_epi64(__X, __Y), 5043 (__v4di)_mm256_setzero_si256()); 5044 } 5045 5046 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5047 _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 5048 { 5049 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, 5050 (__v4si) __A, 5051 (__v4si) __W); 5052 } 5053 5054 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5055 _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A) 5056 { 5057 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, 5058 (__v4si) __A, 5059 (__v4si) _mm_setzero_si128 ()); 5060 } 5061 5062 5063 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5064 _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A) 5065 { 5066 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, 5067 (__v8si) __A, 5068 (__v8si) __W); 5069 } 5070 5071 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5072 _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A) 5073 { 5074 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, 5075 (__v8si) __A, 5076 (__v8si) _mm256_setzero_si256 ()); 5077 } 5078 5079 static __inline __m128i __DEFAULT_FN_ATTRS128 5080 _mm_load_epi32 (void const *__P) 5081 { 5082 return *(const __m128i *) __P; 5083 } 5084 5085 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5086 _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P) 5087 { 5088 return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P, 5089 (__v4si) __W, 5090 (__mmask8) 5091 __U); 5092 } 5093 5094 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5095 _mm_maskz_load_epi32 (__mmask8 __U, void const *__P) 5096 { 5097 return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P, 5098 (__v4si) 5099 _mm_setzero_si128 (), 5100 (__mmask8) 5101 __U); 5102 } 5103 5104 static __inline __m256i __DEFAULT_FN_ATTRS256 5105 _mm256_load_epi32 (void const *__P) 5106 { 5107 return *(const __m256i *) __P; 5108 } 5109 5110 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5111 _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P) 5112 { 5113 return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P, 5114 (__v8si) __W, 5115 (__mmask8) 5116 __U); 5117 } 5118 5119 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5120 _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P) 5121 { 5122 return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P, 5123 (__v8si) 5124 _mm256_setzero_si256 (), 5125 (__mmask8) 5126 __U); 5127 } 5128 5129 static __inline void __DEFAULT_FN_ATTRS128 5130 _mm_store_epi32 (void *__P, __m128i __A) 5131 { 5132 *(__m128i *) __P = __A; 5133 } 5134 5135 static __inline__ void __DEFAULT_FN_ATTRS128 5136 _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A) 5137 { 5138 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P, 5139 (__v4si) __A, 5140 (__mmask8) __U); 5141 } 5142 5143 static __inline void __DEFAULT_FN_ATTRS256 5144 _mm256_store_epi32 (void *__P, __m256i __A) 5145 { 5146 *(__m256i *) __P = __A; 5147 } 5148 5149 static __inline__ void __DEFAULT_FN_ATTRS256 5150 _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A) 5151 { 5152 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P, 5153 (__v8si) __A, 5154 (__mmask8) __U); 5155 } 5156 5157 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5158 _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 5159 { 5160 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, 5161 (__v2di) __A, 5162 (__v2di) __W); 5163 } 5164 5165 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5166 _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A) 5167 { 5168 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, 5169 (__v2di) __A, 5170 (__v2di) _mm_setzero_si128 ()); 5171 } 5172 5173 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5174 _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A) 5175 { 5176 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, 5177 (__v4di) __A, 5178 (__v4di) __W); 5179 } 5180 5181 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5182 _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A) 5183 { 5184 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, 5185 (__v4di) __A, 5186 (__v4di) _mm256_setzero_si256 ()); 5187 } 5188 5189 static __inline __m128i __DEFAULT_FN_ATTRS128 5190 _mm_load_epi64 (void const *__P) 5191 { 5192 return *(const __m128i *) __P; 5193 } 5194 5195 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5196 _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P) 5197 { 5198 return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P, 5199 (__v2di) __W, 5200 (__mmask8) 5201 __U); 5202 } 5203 5204 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5205 _mm_maskz_load_epi64 (__mmask8 __U, void const *__P) 5206 { 5207 return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P, 5208 (__v2di) 5209 _mm_setzero_si128 (), 5210 (__mmask8) 5211 __U); 5212 } 5213 5214 static __inline __m256i __DEFAULT_FN_ATTRS256 5215 _mm256_load_epi64 (void const *__P) 5216 { 5217 return *(const __m256i *) __P; 5218 } 5219 5220 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5221 _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P) 5222 { 5223 return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P, 5224 (__v4di) __W, 5225 (__mmask8) 5226 __U); 5227 } 5228 5229 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5230 _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P) 5231 { 5232 return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P, 5233 (__v4di) 5234 _mm256_setzero_si256 (), 5235 (__mmask8) 5236 __U); 5237 } 5238 5239 static __inline void __DEFAULT_FN_ATTRS128 5240 _mm_store_epi64 (void *__P, __m128i __A) 5241 { 5242 *(__m128i *) __P = __A; 5243 } 5244 5245 static __inline__ void __DEFAULT_FN_ATTRS128 5246 _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A) 5247 { 5248 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P, 5249 (__v2di) __A, 5250 (__mmask8) __U); 5251 } 5252 5253 static __inline void __DEFAULT_FN_ATTRS256 5254 _mm256_store_epi64 (void *__P, __m256i __A) 5255 { 5256 *(__m256i *) __P = __A; 5257 } 5258 5259 static __inline__ void __DEFAULT_FN_ATTRS256 5260 _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A) 5261 { 5262 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P, 5263 (__v4di) __A, 5264 (__mmask8) __U); 5265 } 5266 5267 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5268 _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A) 5269 { 5270 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5271 (__v2df)_mm_movedup_pd(__A), 5272 (__v2df)__W); 5273 } 5274 5275 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5276 _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A) 5277 { 5278 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5279 (__v2df)_mm_movedup_pd(__A), 5280 (__v2df)_mm_setzero_pd()); 5281 } 5282 5283 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5284 _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A) 5285 { 5286 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5287 (__v4df)_mm256_movedup_pd(__A), 5288 (__v4df)__W); 5289 } 5290 5291 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5292 _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A) 5293 { 5294 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5295 (__v4df)_mm256_movedup_pd(__A), 5296 (__v4df)_mm256_setzero_pd()); 5297 } 5298 5299 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5300 _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A) 5301 { 5302 return (__m128i)__builtin_ia32_selectd_128(__M, 5303 (__v4si) _mm_set1_epi32(__A), 5304 (__v4si)__O); 5305 } 5306 5307 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5308 _mm_maskz_set1_epi32( __mmask8 __M, int __A) 5309 { 5310 return (__m128i)__builtin_ia32_selectd_128(__M, 5311 (__v4si) _mm_set1_epi32(__A), 5312 (__v4si)_mm_setzero_si128()); 5313 } 5314 5315 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5316 _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A) 5317 { 5318 return (__m256i)__builtin_ia32_selectd_256(__M, 5319 (__v8si) _mm256_set1_epi32(__A), 5320 (__v8si)__O); 5321 } 5322 5323 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5324 _mm256_maskz_set1_epi32( __mmask8 __M, int __A) 5325 { 5326 return (__m256i)__builtin_ia32_selectd_256(__M, 5327 (__v8si) _mm256_set1_epi32(__A), 5328 (__v8si)_mm256_setzero_si256()); 5329 } 5330 5331 5332 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5333 _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A) 5334 { 5335 return (__m128i) __builtin_ia32_selectq_128(__M, 5336 (__v2di) _mm_set1_epi64x(__A), 5337 (__v2di) __O); 5338 } 5339 5340 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5341 _mm_maskz_set1_epi64 (__mmask8 __M, long long __A) 5342 { 5343 return (__m128i) __builtin_ia32_selectq_128(__M, 5344 (__v2di) _mm_set1_epi64x(__A), 5345 (__v2di) _mm_setzero_si128()); 5346 } 5347 5348 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5349 _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A) 5350 { 5351 return (__m256i) __builtin_ia32_selectq_256(__M, 5352 (__v4di) _mm256_set1_epi64x(__A), 5353 (__v4di) __O) ; 5354 } 5355 5356 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5357 _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A) 5358 { 5359 return (__m256i) __builtin_ia32_selectq_256(__M, 5360 (__v4di) _mm256_set1_epi64x(__A), 5361 (__v4di) _mm256_setzero_si256()); 5362 } 5363 5364 #define _mm_fixupimm_pd(A, B, C, imm) \ 5365 ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ 5366 (__v2df)(__m128d)(B), \ 5367 (__v2di)(__m128i)(C), (int)(imm), \ 5368 (__mmask8)-1)) 5369 5370 #define _mm_mask_fixupimm_pd(A, U, B, C, imm) \ 5371 ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ 5372 (__v2df)(__m128d)(B), \ 5373 (__v2di)(__m128i)(C), (int)(imm), \ 5374 (__mmask8)(U))) 5375 5376 #define _mm_maskz_fixupimm_pd(U, A, B, C, imm) \ 5377 ((__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \ 5378 (__v2df)(__m128d)(B), \ 5379 (__v2di)(__m128i)(C), \ 5380 (int)(imm), (__mmask8)(U))) 5381 5382 #define _mm256_fixupimm_pd(A, B, C, imm) \ 5383 ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ 5384 (__v4df)(__m256d)(B), \ 5385 (__v4di)(__m256i)(C), (int)(imm), \ 5386 (__mmask8)-1)) 5387 5388 #define _mm256_mask_fixupimm_pd(A, U, B, C, imm) \ 5389 ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ 5390 (__v4df)(__m256d)(B), \ 5391 (__v4di)(__m256i)(C), (int)(imm), \ 5392 (__mmask8)(U))) 5393 5394 #define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) \ 5395 ((__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \ 5396 (__v4df)(__m256d)(B), \ 5397 (__v4di)(__m256i)(C), \ 5398 (int)(imm), (__mmask8)(U))) 5399 5400 #define _mm_fixupimm_ps(A, B, C, imm) \ 5401 ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ 5402 (__v4sf)(__m128)(B), \ 5403 (__v4si)(__m128i)(C), (int)(imm), \ 5404 (__mmask8)-1)) 5405 5406 #define _mm_mask_fixupimm_ps(A, U, B, C, imm) \ 5407 ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ 5408 (__v4sf)(__m128)(B), \ 5409 (__v4si)(__m128i)(C), (int)(imm), \ 5410 (__mmask8)(U))) 5411 5412 #define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \ 5413 ((__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \ 5414 (__v4sf)(__m128)(B), \ 5415 (__v4si)(__m128i)(C), (int)(imm), \ 5416 (__mmask8)(U))) 5417 5418 #define _mm256_fixupimm_ps(A, B, C, imm) \ 5419 ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ 5420 (__v8sf)(__m256)(B), \ 5421 (__v8si)(__m256i)(C), (int)(imm), \ 5422 (__mmask8)-1)) 5423 5424 #define _mm256_mask_fixupimm_ps(A, U, B, C, imm) \ 5425 ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ 5426 (__v8sf)(__m256)(B), \ 5427 (__v8si)(__m256i)(C), (int)(imm), \ 5428 (__mmask8)(U))) 5429 5430 #define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \ 5431 ((__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \ 5432 (__v8sf)(__m256)(B), \ 5433 (__v8si)(__m256i)(C), (int)(imm), \ 5434 (__mmask8)(U))) 5435 5436 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5437 _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P) 5438 { 5439 return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P, 5440 (__v2df) __W, 5441 (__mmask8) __U); 5442 } 5443 5444 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5445 _mm_maskz_load_pd (__mmask8 __U, void const *__P) 5446 { 5447 return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P, 5448 (__v2df) 5449 _mm_setzero_pd (), 5450 (__mmask8) __U); 5451 } 5452 5453 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5454 _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P) 5455 { 5456 return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P, 5457 (__v4df) __W, 5458 (__mmask8) __U); 5459 } 5460 5461 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5462 _mm256_maskz_load_pd (__mmask8 __U, void const *__P) 5463 { 5464 return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P, 5465 (__v4df) 5466 _mm256_setzero_pd (), 5467 (__mmask8) __U); 5468 } 5469 5470 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5471 _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P) 5472 { 5473 return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P, 5474 (__v4sf) __W, 5475 (__mmask8) __U); 5476 } 5477 5478 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5479 _mm_maskz_load_ps (__mmask8 __U, void const *__P) 5480 { 5481 return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P, 5482 (__v4sf) 5483 _mm_setzero_ps (), 5484 (__mmask8) __U); 5485 } 5486 5487 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5488 _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P) 5489 { 5490 return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P, 5491 (__v8sf) __W, 5492 (__mmask8) __U); 5493 } 5494 5495 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5496 _mm256_maskz_load_ps (__mmask8 __U, void const *__P) 5497 { 5498 return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P, 5499 (__v8sf) 5500 _mm256_setzero_ps (), 5501 (__mmask8) __U); 5502 } 5503 5504 static __inline __m128i __DEFAULT_FN_ATTRS128 5505 _mm_loadu_epi64 (void const *__P) 5506 { 5507 struct __loadu_epi64 { 5508 __m128i_u __v; 5509 } __attribute__((__packed__, __may_alias__)); 5510 return ((const struct __loadu_epi64*)__P)->__v; 5511 } 5512 5513 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5514 _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) 5515 { 5516 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P, 5517 (__v2di) __W, 5518 (__mmask8) __U); 5519 } 5520 5521 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5522 _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P) 5523 { 5524 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P, 5525 (__v2di) 5526 _mm_setzero_si128 (), 5527 (__mmask8) __U); 5528 } 5529 5530 static __inline __m256i __DEFAULT_FN_ATTRS256 5531 _mm256_loadu_epi64 (void const *__P) 5532 { 5533 struct __loadu_epi64 { 5534 __m256i_u __v; 5535 } __attribute__((__packed__, __may_alias__)); 5536 return ((const struct __loadu_epi64*)__P)->__v; 5537 } 5538 5539 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5540 _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P) 5541 { 5542 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P, 5543 (__v4di) __W, 5544 (__mmask8) __U); 5545 } 5546 5547 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5548 _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P) 5549 { 5550 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P, 5551 (__v4di) 5552 _mm256_setzero_si256 (), 5553 (__mmask8) __U); 5554 } 5555 5556 static __inline __m128i __DEFAULT_FN_ATTRS128 5557 _mm_loadu_epi32 (void const *__P) 5558 { 5559 struct __loadu_epi32 { 5560 __m128i_u __v; 5561 } __attribute__((__packed__, __may_alias__)); 5562 return ((const struct __loadu_epi32*)__P)->__v; 5563 } 5564 5565 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5566 _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) 5567 { 5568 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P, 5569 (__v4si) __W, 5570 (__mmask8) __U); 5571 } 5572 5573 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5574 _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P) 5575 { 5576 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P, 5577 (__v4si) 5578 _mm_setzero_si128 (), 5579 (__mmask8) __U); 5580 } 5581 5582 static __inline __m256i __DEFAULT_FN_ATTRS256 5583 _mm256_loadu_epi32 (void const *__P) 5584 { 5585 struct __loadu_epi32 { 5586 __m256i_u __v; 5587 } __attribute__((__packed__, __may_alias__)); 5588 return ((const struct __loadu_epi32*)__P)->__v; 5589 } 5590 5591 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5592 _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P) 5593 { 5594 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P, 5595 (__v8si) __W, 5596 (__mmask8) __U); 5597 } 5598 5599 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5600 _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P) 5601 { 5602 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P, 5603 (__v8si) 5604 _mm256_setzero_si256 (), 5605 (__mmask8) __U); 5606 } 5607 5608 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5609 _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P) 5610 { 5611 return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P, 5612 (__v2df) __W, 5613 (__mmask8) __U); 5614 } 5615 5616 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5617 _mm_maskz_loadu_pd (__mmask8 __U, void const *__P) 5618 { 5619 return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P, 5620 (__v2df) 5621 _mm_setzero_pd (), 5622 (__mmask8) __U); 5623 } 5624 5625 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5626 _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P) 5627 { 5628 return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P, 5629 (__v4df) __W, 5630 (__mmask8) __U); 5631 } 5632 5633 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5634 _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P) 5635 { 5636 return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P, 5637 (__v4df) 5638 _mm256_setzero_pd (), 5639 (__mmask8) __U); 5640 } 5641 5642 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5643 _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P) 5644 { 5645 return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P, 5646 (__v4sf) __W, 5647 (__mmask8) __U); 5648 } 5649 5650 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5651 _mm_maskz_loadu_ps (__mmask8 __U, void const *__P) 5652 { 5653 return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P, 5654 (__v4sf) 5655 _mm_setzero_ps (), 5656 (__mmask8) __U); 5657 } 5658 5659 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5660 _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P) 5661 { 5662 return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P, 5663 (__v8sf) __W, 5664 (__mmask8) __U); 5665 } 5666 5667 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5668 _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P) 5669 { 5670 return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P, 5671 (__v8sf) 5672 _mm256_setzero_ps (), 5673 (__mmask8) __U); 5674 } 5675 5676 static __inline__ void __DEFAULT_FN_ATTRS128 5677 _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A) 5678 { 5679 __builtin_ia32_storeapd128_mask ((__v2df *) __P, 5680 (__v2df) __A, 5681 (__mmask8) __U); 5682 } 5683 5684 static __inline__ void __DEFAULT_FN_ATTRS256 5685 _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A) 5686 { 5687 __builtin_ia32_storeapd256_mask ((__v4df *) __P, 5688 (__v4df) __A, 5689 (__mmask8) __U); 5690 } 5691 5692 static __inline__ void __DEFAULT_FN_ATTRS128 5693 _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A) 5694 { 5695 __builtin_ia32_storeaps128_mask ((__v4sf *) __P, 5696 (__v4sf) __A, 5697 (__mmask8) __U); 5698 } 5699 5700 static __inline__ void __DEFAULT_FN_ATTRS256 5701 _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A) 5702 { 5703 __builtin_ia32_storeaps256_mask ((__v8sf *) __P, 5704 (__v8sf) __A, 5705 (__mmask8) __U); 5706 } 5707 5708 static __inline void __DEFAULT_FN_ATTRS128 5709 _mm_storeu_epi64 (void *__P, __m128i __A) 5710 { 5711 struct __storeu_epi64 { 5712 __m128i_u __v; 5713 } __attribute__((__packed__, __may_alias__)); 5714 ((struct __storeu_epi64*)__P)->__v = __A; 5715 } 5716 5717 static __inline__ void __DEFAULT_FN_ATTRS128 5718 _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A) 5719 { 5720 __builtin_ia32_storedqudi128_mask ((__v2di *) __P, 5721 (__v2di) __A, 5722 (__mmask8) __U); 5723 } 5724 5725 static __inline void __DEFAULT_FN_ATTRS256 5726 _mm256_storeu_epi64 (void *__P, __m256i __A) 5727 { 5728 struct __storeu_epi64 { 5729 __m256i_u __v; 5730 } __attribute__((__packed__, __may_alias__)); 5731 ((struct __storeu_epi64*)__P)->__v = __A; 5732 } 5733 5734 static __inline__ void __DEFAULT_FN_ATTRS256 5735 _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A) 5736 { 5737 __builtin_ia32_storedqudi256_mask ((__v4di *) __P, 5738 (__v4di) __A, 5739 (__mmask8) __U); 5740 } 5741 5742 static __inline void __DEFAULT_FN_ATTRS128 5743 _mm_storeu_epi32 (void *__P, __m128i __A) 5744 { 5745 struct __storeu_epi32 { 5746 __m128i_u __v; 5747 } __attribute__((__packed__, __may_alias__)); 5748 ((struct __storeu_epi32*)__P)->__v = __A; 5749 } 5750 5751 static __inline__ void __DEFAULT_FN_ATTRS128 5752 _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A) 5753 { 5754 __builtin_ia32_storedqusi128_mask ((__v4si *) __P, 5755 (__v4si) __A, 5756 (__mmask8) __U); 5757 } 5758 5759 static __inline void __DEFAULT_FN_ATTRS256 5760 _mm256_storeu_epi32 (void *__P, __m256i __A) 5761 { 5762 struct __storeu_epi32 { 5763 __m256i_u __v; 5764 } __attribute__((__packed__, __may_alias__)); 5765 ((struct __storeu_epi32*)__P)->__v = __A; 5766 } 5767 5768 static __inline__ void __DEFAULT_FN_ATTRS256 5769 _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A) 5770 { 5771 __builtin_ia32_storedqusi256_mask ((__v8si *) __P, 5772 (__v8si) __A, 5773 (__mmask8) __U); 5774 } 5775 5776 static __inline__ void __DEFAULT_FN_ATTRS128 5777 _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A) 5778 { 5779 __builtin_ia32_storeupd128_mask ((__v2df *) __P, 5780 (__v2df) __A, 5781 (__mmask8) __U); 5782 } 5783 5784 static __inline__ void __DEFAULT_FN_ATTRS256 5785 _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A) 5786 { 5787 __builtin_ia32_storeupd256_mask ((__v4df *) __P, 5788 (__v4df) __A, 5789 (__mmask8) __U); 5790 } 5791 5792 static __inline__ void __DEFAULT_FN_ATTRS128 5793 _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A) 5794 { 5795 __builtin_ia32_storeups128_mask ((__v4sf *) __P, 5796 (__v4sf) __A, 5797 (__mmask8) __U); 5798 } 5799 5800 static __inline__ void __DEFAULT_FN_ATTRS256 5801 _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A) 5802 { 5803 __builtin_ia32_storeups256_mask ((__v8sf *) __P, 5804 (__v8sf) __A, 5805 (__mmask8) __U); 5806 } 5807 5808 5809 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5810 _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 5811 { 5812 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5813 (__v2df)_mm_unpackhi_pd(__A, __B), 5814 (__v2df)__W); 5815 } 5816 5817 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5818 _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B) 5819 { 5820 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5821 (__v2df)_mm_unpackhi_pd(__A, __B), 5822 (__v2df)_mm_setzero_pd()); 5823 } 5824 5825 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5826 _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) 5827 { 5828 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5829 (__v4df)_mm256_unpackhi_pd(__A, __B), 5830 (__v4df)__W); 5831 } 5832 5833 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5834 _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) 5835 { 5836 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5837 (__v4df)_mm256_unpackhi_pd(__A, __B), 5838 (__v4df)_mm256_setzero_pd()); 5839 } 5840 5841 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5842 _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 5843 { 5844 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 5845 (__v4sf)_mm_unpackhi_ps(__A, __B), 5846 (__v4sf)__W); 5847 } 5848 5849 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5850 _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B) 5851 { 5852 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 5853 (__v4sf)_mm_unpackhi_ps(__A, __B), 5854 (__v4sf)_mm_setzero_ps()); 5855 } 5856 5857 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5858 _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 5859 { 5860 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 5861 (__v8sf)_mm256_unpackhi_ps(__A, __B), 5862 (__v8sf)__W); 5863 } 5864 5865 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5866 _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B) 5867 { 5868 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 5869 (__v8sf)_mm256_unpackhi_ps(__A, __B), 5870 (__v8sf)_mm256_setzero_ps()); 5871 } 5872 5873 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5874 _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 5875 { 5876 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5877 (__v2df)_mm_unpacklo_pd(__A, __B), 5878 (__v2df)__W); 5879 } 5880 5881 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5882 _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) 5883 { 5884 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5885 (__v2df)_mm_unpacklo_pd(__A, __B), 5886 (__v2df)_mm_setzero_pd()); 5887 } 5888 5889 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5890 _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) 5891 { 5892 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5893 (__v4df)_mm256_unpacklo_pd(__A, __B), 5894 (__v4df)__W); 5895 } 5896 5897 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5898 _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) 5899 { 5900 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5901 (__v4df)_mm256_unpacklo_pd(__A, __B), 5902 (__v4df)_mm256_setzero_pd()); 5903 } 5904 5905 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5906 _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 5907 { 5908 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 5909 (__v4sf)_mm_unpacklo_ps(__A, __B), 5910 (__v4sf)__W); 5911 } 5912 5913 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5914 _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B) 5915 { 5916 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 5917 (__v4sf)_mm_unpacklo_ps(__A, __B), 5918 (__v4sf)_mm_setzero_ps()); 5919 } 5920 5921 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5922 _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 5923 { 5924 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 5925 (__v8sf)_mm256_unpacklo_ps(__A, __B), 5926 (__v8sf)__W); 5927 } 5928 5929 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5930 _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B) 5931 { 5932 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 5933 (__v8sf)_mm256_unpacklo_ps(__A, __B), 5934 (__v8sf)_mm256_setzero_ps()); 5935 } 5936 5937 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5938 _mm_rcp14_pd (__m128d __A) 5939 { 5940 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 5941 (__v2df) 5942 _mm_setzero_pd (), 5943 (__mmask8) -1); 5944 } 5945 5946 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5947 _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A) 5948 { 5949 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 5950 (__v2df) __W, 5951 (__mmask8) __U); 5952 } 5953 5954 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5955 _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A) 5956 { 5957 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 5958 (__v2df) 5959 _mm_setzero_pd (), 5960 (__mmask8) __U); 5961 } 5962 5963 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5964 _mm256_rcp14_pd (__m256d __A) 5965 { 5966 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 5967 (__v4df) 5968 _mm256_setzero_pd (), 5969 (__mmask8) -1); 5970 } 5971 5972 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5973 _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A) 5974 { 5975 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 5976 (__v4df) __W, 5977 (__mmask8) __U); 5978 } 5979 5980 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5981 _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A) 5982 { 5983 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 5984 (__v4df) 5985 _mm256_setzero_pd (), 5986 (__mmask8) __U); 5987 } 5988 5989 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5990 _mm_rcp14_ps (__m128 __A) 5991 { 5992 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 5993 (__v4sf) 5994 _mm_setzero_ps (), 5995 (__mmask8) -1); 5996 } 5997 5998 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5999 _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A) 6000 { 6001 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 6002 (__v4sf) __W, 6003 (__mmask8) __U); 6004 } 6005 6006 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6007 _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A) 6008 { 6009 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 6010 (__v4sf) 6011 _mm_setzero_ps (), 6012 (__mmask8) __U); 6013 } 6014 6015 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6016 _mm256_rcp14_ps (__m256 __A) 6017 { 6018 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 6019 (__v8sf) 6020 _mm256_setzero_ps (), 6021 (__mmask8) -1); 6022 } 6023 6024 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6025 _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A) 6026 { 6027 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 6028 (__v8sf) __W, 6029 (__mmask8) __U); 6030 } 6031 6032 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6033 _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A) 6034 { 6035 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 6036 (__v8sf) 6037 _mm256_setzero_ps (), 6038 (__mmask8) __U); 6039 } 6040 6041 #define _mm_mask_permute_pd(W, U, X, C) \ 6042 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6043 (__v2df)_mm_permute_pd((X), (C)), \ 6044 (__v2df)(__m128d)(W))) 6045 6046 #define _mm_maskz_permute_pd(U, X, C) \ 6047 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6048 (__v2df)_mm_permute_pd((X), (C)), \ 6049 (__v2df)_mm_setzero_pd())) 6050 6051 #define _mm256_mask_permute_pd(W, U, X, C) \ 6052 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6053 (__v4df)_mm256_permute_pd((X), (C)), \ 6054 (__v4df)(__m256d)(W))) 6055 6056 #define _mm256_maskz_permute_pd(U, X, C) \ 6057 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6058 (__v4df)_mm256_permute_pd((X), (C)), \ 6059 (__v4df)_mm256_setzero_pd())) 6060 6061 #define _mm_mask_permute_ps(W, U, X, C) \ 6062 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6063 (__v4sf)_mm_permute_ps((X), (C)), \ 6064 (__v4sf)(__m128)(W))) 6065 6066 #define _mm_maskz_permute_ps(U, X, C) \ 6067 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6068 (__v4sf)_mm_permute_ps((X), (C)), \ 6069 (__v4sf)_mm_setzero_ps())) 6070 6071 #define _mm256_mask_permute_ps(W, U, X, C) \ 6072 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6073 (__v8sf)_mm256_permute_ps((X), (C)), \ 6074 (__v8sf)(__m256)(W))) 6075 6076 #define _mm256_maskz_permute_ps(U, X, C) \ 6077 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6078 (__v8sf)_mm256_permute_ps((X), (C)), \ 6079 (__v8sf)_mm256_setzero_ps())) 6080 6081 static __inline__ __m128d __DEFAULT_FN_ATTRS128 6082 _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) 6083 { 6084 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 6085 (__v2df)_mm_permutevar_pd(__A, __C), 6086 (__v2df)__W); 6087 } 6088 6089 static __inline__ __m128d __DEFAULT_FN_ATTRS128 6090 _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) 6091 { 6092 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 6093 (__v2df)_mm_permutevar_pd(__A, __C), 6094 (__v2df)_mm_setzero_pd()); 6095 } 6096 6097 static __inline__ __m256d __DEFAULT_FN_ATTRS256 6098 _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) 6099 { 6100 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 6101 (__v4df)_mm256_permutevar_pd(__A, __C), 6102 (__v4df)__W); 6103 } 6104 6105 static __inline__ __m256d __DEFAULT_FN_ATTRS256 6106 _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) 6107 { 6108 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 6109 (__v4df)_mm256_permutevar_pd(__A, __C), 6110 (__v4df)_mm256_setzero_pd()); 6111 } 6112 6113 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6114 _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) 6115 { 6116 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 6117 (__v4sf)_mm_permutevar_ps(__A, __C), 6118 (__v4sf)__W); 6119 } 6120 6121 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6122 _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) 6123 { 6124 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 6125 (__v4sf)_mm_permutevar_ps(__A, __C), 6126 (__v4sf)_mm_setzero_ps()); 6127 } 6128 6129 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6130 _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) 6131 { 6132 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 6133 (__v8sf)_mm256_permutevar_ps(__A, __C), 6134 (__v8sf)__W); 6135 } 6136 6137 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6138 _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) 6139 { 6140 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 6141 (__v8sf)_mm256_permutevar_ps(__A, __C), 6142 (__v8sf)_mm256_setzero_ps()); 6143 } 6144 6145 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6146 _mm_test_epi32_mask (__m128i __A, __m128i __B) 6147 { 6148 return _mm_cmpneq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); 6149 } 6150 6151 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6152 _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) 6153 { 6154 return _mm_mask_cmpneq_epi32_mask (__U, _mm_and_si128 (__A, __B), 6155 _mm_setzero_si128()); 6156 } 6157 6158 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6159 _mm256_test_epi32_mask (__m256i __A, __m256i __B) 6160 { 6161 return _mm256_cmpneq_epi32_mask (_mm256_and_si256 (__A, __B), 6162 _mm256_setzero_si256()); 6163 } 6164 6165 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6166 _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) 6167 { 6168 return _mm256_mask_cmpneq_epi32_mask (__U, _mm256_and_si256 (__A, __B), 6169 _mm256_setzero_si256()); 6170 } 6171 6172 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6173 _mm_test_epi64_mask (__m128i __A, __m128i __B) 6174 { 6175 return _mm_cmpneq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); 6176 } 6177 6178 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6179 _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) 6180 { 6181 return _mm_mask_cmpneq_epi64_mask (__U, _mm_and_si128 (__A, __B), 6182 _mm_setzero_si128()); 6183 } 6184 6185 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6186 _mm256_test_epi64_mask (__m256i __A, __m256i __B) 6187 { 6188 return _mm256_cmpneq_epi64_mask (_mm256_and_si256 (__A, __B), 6189 _mm256_setzero_si256()); 6190 } 6191 6192 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6193 _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) 6194 { 6195 return _mm256_mask_cmpneq_epi64_mask (__U, _mm256_and_si256 (__A, __B), 6196 _mm256_setzero_si256()); 6197 } 6198 6199 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6200 _mm_testn_epi32_mask (__m128i __A, __m128i __B) 6201 { 6202 return _mm_cmpeq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); 6203 } 6204 6205 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6206 _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) 6207 { 6208 return _mm_mask_cmpeq_epi32_mask (__U, _mm_and_si128 (__A, __B), 6209 _mm_setzero_si128()); 6210 } 6211 6212 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6213 _mm256_testn_epi32_mask (__m256i __A, __m256i __B) 6214 { 6215 return _mm256_cmpeq_epi32_mask (_mm256_and_si256 (__A, __B), 6216 _mm256_setzero_si256()); 6217 } 6218 6219 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6220 _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) 6221 { 6222 return _mm256_mask_cmpeq_epi32_mask (__U, _mm256_and_si256 (__A, __B), 6223 _mm256_setzero_si256()); 6224 } 6225 6226 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6227 _mm_testn_epi64_mask (__m128i __A, __m128i __B) 6228 { 6229 return _mm_cmpeq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); 6230 } 6231 6232 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6233 _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) 6234 { 6235 return _mm_mask_cmpeq_epi64_mask (__U, _mm_and_si128 (__A, __B), 6236 _mm_setzero_si128()); 6237 } 6238 6239 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6240 _mm256_testn_epi64_mask (__m256i __A, __m256i __B) 6241 { 6242 return _mm256_cmpeq_epi64_mask (_mm256_and_si256 (__A, __B), 6243 _mm256_setzero_si256()); 6244 } 6245 6246 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6247 _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) 6248 { 6249 return _mm256_mask_cmpeq_epi64_mask (__U, _mm256_and_si256 (__A, __B), 6250 _mm256_setzero_si256()); 6251 } 6252 6253 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6254 _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6255 { 6256 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6257 (__v4si)_mm_unpackhi_epi32(__A, __B), 6258 (__v4si)__W); 6259 } 6260 6261 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6262 _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) 6263 { 6264 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6265 (__v4si)_mm_unpackhi_epi32(__A, __B), 6266 (__v4si)_mm_setzero_si128()); 6267 } 6268 6269 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6270 _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6271 { 6272 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6273 (__v8si)_mm256_unpackhi_epi32(__A, __B), 6274 (__v8si)__W); 6275 } 6276 6277 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6278 _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B) 6279 { 6280 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6281 (__v8si)_mm256_unpackhi_epi32(__A, __B), 6282 (__v8si)_mm256_setzero_si256()); 6283 } 6284 6285 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6286 _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6287 { 6288 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6289 (__v2di)_mm_unpackhi_epi64(__A, __B), 6290 (__v2di)__W); 6291 } 6292 6293 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6294 _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B) 6295 { 6296 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6297 (__v2di)_mm_unpackhi_epi64(__A, __B), 6298 (__v2di)_mm_setzero_si128()); 6299 } 6300 6301 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6302 _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6303 { 6304 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6305 (__v4di)_mm256_unpackhi_epi64(__A, __B), 6306 (__v4di)__W); 6307 } 6308 6309 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6310 _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B) 6311 { 6312 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6313 (__v4di)_mm256_unpackhi_epi64(__A, __B), 6314 (__v4di)_mm256_setzero_si256()); 6315 } 6316 6317 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6318 _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6319 { 6320 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6321 (__v4si)_mm_unpacklo_epi32(__A, __B), 6322 (__v4si)__W); 6323 } 6324 6325 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6326 _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B) 6327 { 6328 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6329 (__v4si)_mm_unpacklo_epi32(__A, __B), 6330 (__v4si)_mm_setzero_si128()); 6331 } 6332 6333 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6334 _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6335 { 6336 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6337 (__v8si)_mm256_unpacklo_epi32(__A, __B), 6338 (__v8si)__W); 6339 } 6340 6341 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6342 _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B) 6343 { 6344 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6345 (__v8si)_mm256_unpacklo_epi32(__A, __B), 6346 (__v8si)_mm256_setzero_si256()); 6347 } 6348 6349 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6350 _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6351 { 6352 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6353 (__v2di)_mm_unpacklo_epi64(__A, __B), 6354 (__v2di)__W); 6355 } 6356 6357 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6358 _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B) 6359 { 6360 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6361 (__v2di)_mm_unpacklo_epi64(__A, __B), 6362 (__v2di)_mm_setzero_si128()); 6363 } 6364 6365 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6366 _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6367 { 6368 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6369 (__v4di)_mm256_unpacklo_epi64(__A, __B), 6370 (__v4di)__W); 6371 } 6372 6373 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6374 _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B) 6375 { 6376 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6377 (__v4di)_mm256_unpacklo_epi64(__A, __B), 6378 (__v4di)_mm256_setzero_si256()); 6379 } 6380 6381 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6382 _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6383 { 6384 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6385 (__v4si)_mm_sra_epi32(__A, __B), 6386 (__v4si)__W); 6387 } 6388 6389 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6390 _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B) 6391 { 6392 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6393 (__v4si)_mm_sra_epi32(__A, __B), 6394 (__v4si)_mm_setzero_si128()); 6395 } 6396 6397 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6398 _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 6399 { 6400 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6401 (__v8si)_mm256_sra_epi32(__A, __B), 6402 (__v8si)__W); 6403 } 6404 6405 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6406 _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B) 6407 { 6408 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6409 (__v8si)_mm256_sra_epi32(__A, __B), 6410 (__v8si)_mm256_setzero_si256()); 6411 } 6412 6413 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6414 _mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) 6415 { 6416 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6417 (__v4si)_mm_srai_epi32(__A, (int)__B), 6418 (__v4si)__W); 6419 } 6420 6421 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6422 _mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, unsigned int __B) 6423 { 6424 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6425 (__v4si)_mm_srai_epi32(__A, (int)__B), 6426 (__v4si)_mm_setzero_si128()); 6427 } 6428 6429 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6430 _mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) 6431 { 6432 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6433 (__v8si)_mm256_srai_epi32(__A, (int)__B), 6434 (__v8si)__W); 6435 } 6436 6437 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6438 _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, unsigned int __B) 6439 { 6440 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6441 (__v8si)_mm256_srai_epi32(__A, (int)__B), 6442 (__v8si)_mm256_setzero_si256()); 6443 } 6444 6445 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6446 _mm_sra_epi64(__m128i __A, __m128i __B) 6447 { 6448 return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B); 6449 } 6450 6451 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6452 _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6453 { 6454 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 6455 (__v2di)_mm_sra_epi64(__A, __B), \ 6456 (__v2di)__W); 6457 } 6458 6459 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6460 _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B) 6461 { 6462 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 6463 (__v2di)_mm_sra_epi64(__A, __B), \ 6464 (__v2di)_mm_setzero_si128()); 6465 } 6466 6467 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6468 _mm256_sra_epi64(__m256i __A, __m128i __B) 6469 { 6470 return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B); 6471 } 6472 6473 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6474 _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 6475 { 6476 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 6477 (__v4di)_mm256_sra_epi64(__A, __B), \ 6478 (__v4di)__W); 6479 } 6480 6481 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6482 _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B) 6483 { 6484 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 6485 (__v4di)_mm256_sra_epi64(__A, __B), \ 6486 (__v4di)_mm256_setzero_si256()); 6487 } 6488 6489 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6490 _mm_srai_epi64(__m128i __A, unsigned int __imm) 6491 { 6492 return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, (int)__imm); 6493 } 6494 6495 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6496 _mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __imm) 6497 { 6498 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 6499 (__v2di)_mm_srai_epi64(__A, __imm), \ 6500 (__v2di)__W); 6501 } 6502 6503 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6504 _mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, unsigned int __imm) 6505 { 6506 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 6507 (__v2di)_mm_srai_epi64(__A, __imm), \ 6508 (__v2di)_mm_setzero_si128()); 6509 } 6510 6511 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6512 _mm256_srai_epi64(__m256i __A, unsigned int __imm) 6513 { 6514 return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, (int)__imm); 6515 } 6516 6517 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6518 _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, 6519 unsigned int __imm) 6520 { 6521 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 6522 (__v4di)_mm256_srai_epi64(__A, __imm), \ 6523 (__v4di)__W); 6524 } 6525 6526 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6527 _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm) 6528 { 6529 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 6530 (__v4di)_mm256_srai_epi64(__A, __imm), \ 6531 (__v4di)_mm256_setzero_si256()); 6532 } 6533 6534 #define _mm_ternarylogic_epi32(A, B, C, imm) \ 6535 ((__m128i)__builtin_ia32_pternlogd128_mask( \ 6536 (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \ 6537 (unsigned char)(imm), (__mmask8)-1)) 6538 6539 #define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) \ 6540 ((__m128i)__builtin_ia32_pternlogd128_mask( \ 6541 (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \ 6542 (unsigned char)(imm), (__mmask8)(U))) 6543 6544 #define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) \ 6545 ((__m128i)__builtin_ia32_pternlogd128_maskz( \ 6546 (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \ 6547 (unsigned char)(imm), (__mmask8)(U))) 6548 6549 #define _mm256_ternarylogic_epi32(A, B, C, imm) \ 6550 ((__m256i)__builtin_ia32_pternlogd256_mask( \ 6551 (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \ 6552 (unsigned char)(imm), (__mmask8)-1)) 6553 6554 #define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) \ 6555 ((__m256i)__builtin_ia32_pternlogd256_mask( \ 6556 (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \ 6557 (unsigned char)(imm), (__mmask8)(U))) 6558 6559 #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) \ 6560 ((__m256i)__builtin_ia32_pternlogd256_maskz( \ 6561 (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \ 6562 (unsigned char)(imm), (__mmask8)(U))) 6563 6564 #define _mm_ternarylogic_epi64(A, B, C, imm) \ 6565 ((__m128i)__builtin_ia32_pternlogq128_mask( \ 6566 (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \ 6567 (unsigned char)(imm), (__mmask8)-1)) 6568 6569 #define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) \ 6570 ((__m128i)__builtin_ia32_pternlogq128_mask( \ 6571 (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \ 6572 (unsigned char)(imm), (__mmask8)(U))) 6573 6574 #define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) \ 6575 ((__m128i)__builtin_ia32_pternlogq128_maskz( \ 6576 (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \ 6577 (unsigned char)(imm), (__mmask8)(U))) 6578 6579 #define _mm256_ternarylogic_epi64(A, B, C, imm) \ 6580 ((__m256i)__builtin_ia32_pternlogq256_mask( \ 6581 (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \ 6582 (unsigned char)(imm), (__mmask8)-1)) 6583 6584 #define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) \ 6585 ((__m256i)__builtin_ia32_pternlogq256_mask( \ 6586 (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \ 6587 (unsigned char)(imm), (__mmask8)(U))) 6588 6589 #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) \ 6590 ((__m256i)__builtin_ia32_pternlogq256_maskz( \ 6591 (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \ 6592 (unsigned char)(imm), (__mmask8)(U))) 6593 6594 #define _mm256_shuffle_f32x4(A, B, imm) \ 6595 ((__m256)__builtin_ia32_shuf_f32x4_256((__v8sf)(__m256)(A), \ 6596 (__v8sf)(__m256)(B), (int)(imm))) 6597 6598 #define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) \ 6599 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6600 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \ 6601 (__v8sf)(__m256)(W))) 6602 6603 #define _mm256_maskz_shuffle_f32x4(U, A, B, imm) \ 6604 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6605 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \ 6606 (__v8sf)_mm256_setzero_ps())) 6607 6608 #define _mm256_shuffle_f64x2(A, B, imm) \ 6609 ((__m256d)__builtin_ia32_shuf_f64x2_256((__v4df)(__m256d)(A), \ 6610 (__v4df)(__m256d)(B), (int)(imm))) 6611 6612 #define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) \ 6613 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6614 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \ 6615 (__v4df)(__m256d)(W))) 6616 6617 #define _mm256_maskz_shuffle_f64x2(U, A, B, imm) \ 6618 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6619 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \ 6620 (__v4df)_mm256_setzero_pd())) 6621 6622 #define _mm256_shuffle_i32x4(A, B, imm) \ 6623 ((__m256i)__builtin_ia32_shuf_i32x4_256((__v8si)(__m256i)(A), \ 6624 (__v8si)(__m256i)(B), (int)(imm))) 6625 6626 #define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) \ 6627 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 6628 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \ 6629 (__v8si)(__m256i)(W))) 6630 6631 #define _mm256_maskz_shuffle_i32x4(U, A, B, imm) \ 6632 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 6633 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \ 6634 (__v8si)_mm256_setzero_si256())) 6635 6636 #define _mm256_shuffle_i64x2(A, B, imm) \ 6637 ((__m256i)__builtin_ia32_shuf_i64x2_256((__v4di)(__m256i)(A), \ 6638 (__v4di)(__m256i)(B), (int)(imm))) 6639 6640 #define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) \ 6641 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 6642 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \ 6643 (__v4di)(__m256i)(W))) 6644 6645 6646 #define _mm256_maskz_shuffle_i64x2(U, A, B, imm) \ 6647 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 6648 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \ 6649 (__v4di)_mm256_setzero_si256())) 6650 6651 #define _mm_mask_shuffle_pd(W, U, A, B, M) \ 6652 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6653 (__v2df)_mm_shuffle_pd((A), (B), (M)), \ 6654 (__v2df)(__m128d)(W))) 6655 6656 #define _mm_maskz_shuffle_pd(U, A, B, M) \ 6657 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6658 (__v2df)_mm_shuffle_pd((A), (B), (M)), \ 6659 (__v2df)_mm_setzero_pd())) 6660 6661 #define _mm256_mask_shuffle_pd(W, U, A, B, M) \ 6662 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6663 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ 6664 (__v4df)(__m256d)(W))) 6665 6666 #define _mm256_maskz_shuffle_pd(U, A, B, M) \ 6667 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6668 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ 6669 (__v4df)_mm256_setzero_pd())) 6670 6671 #define _mm_mask_shuffle_ps(W, U, A, B, M) \ 6672 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6673 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ 6674 (__v4sf)(__m128)(W))) 6675 6676 #define _mm_maskz_shuffle_ps(U, A, B, M) \ 6677 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6678 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ 6679 (__v4sf)_mm_setzero_ps())) 6680 6681 #define _mm256_mask_shuffle_ps(W, U, A, B, M) \ 6682 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6683 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ 6684 (__v8sf)(__m256)(W))) 6685 6686 #define _mm256_maskz_shuffle_ps(U, A, B, M) \ 6687 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6688 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ 6689 (__v8sf)_mm256_setzero_ps())) 6690 6691 static __inline__ __m128d __DEFAULT_FN_ATTRS128 6692 _mm_rsqrt14_pd (__m128d __A) 6693 { 6694 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 6695 (__v2df) 6696 _mm_setzero_pd (), 6697 (__mmask8) -1); 6698 } 6699 6700 static __inline__ __m128d __DEFAULT_FN_ATTRS128 6701 _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A) 6702 { 6703 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 6704 (__v2df) __W, 6705 (__mmask8) __U); 6706 } 6707 6708 static __inline__ __m128d __DEFAULT_FN_ATTRS128 6709 _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A) 6710 { 6711 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 6712 (__v2df) 6713 _mm_setzero_pd (), 6714 (__mmask8) __U); 6715 } 6716 6717 static __inline__ __m256d __DEFAULT_FN_ATTRS256 6718 _mm256_rsqrt14_pd (__m256d __A) 6719 { 6720 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 6721 (__v4df) 6722 _mm256_setzero_pd (), 6723 (__mmask8) -1); 6724 } 6725 6726 static __inline__ __m256d __DEFAULT_FN_ATTRS256 6727 _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A) 6728 { 6729 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 6730 (__v4df) __W, 6731 (__mmask8) __U); 6732 } 6733 6734 static __inline__ __m256d __DEFAULT_FN_ATTRS256 6735 _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A) 6736 { 6737 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 6738 (__v4df) 6739 _mm256_setzero_pd (), 6740 (__mmask8) __U); 6741 } 6742 6743 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6744 _mm_rsqrt14_ps (__m128 __A) 6745 { 6746 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 6747 (__v4sf) 6748 _mm_setzero_ps (), 6749 (__mmask8) -1); 6750 } 6751 6752 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6753 _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A) 6754 { 6755 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 6756 (__v4sf) __W, 6757 (__mmask8) __U); 6758 } 6759 6760 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6761 _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A) 6762 { 6763 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 6764 (__v4sf) 6765 _mm_setzero_ps (), 6766 (__mmask8) __U); 6767 } 6768 6769 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6770 _mm256_rsqrt14_ps (__m256 __A) 6771 { 6772 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 6773 (__v8sf) 6774 _mm256_setzero_ps (), 6775 (__mmask8) -1); 6776 } 6777 6778 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6779 _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A) 6780 { 6781 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 6782 (__v8sf) __W, 6783 (__mmask8) __U); 6784 } 6785 6786 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6787 _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A) 6788 { 6789 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 6790 (__v8sf) 6791 _mm256_setzero_ps (), 6792 (__mmask8) __U); 6793 } 6794 6795 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6796 _mm256_broadcast_f32x4(__m128 __A) 6797 { 6798 return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 6799 0, 1, 2, 3, 0, 1, 2, 3); 6800 } 6801 6802 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6803 _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A) 6804 { 6805 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, 6806 (__v8sf)_mm256_broadcast_f32x4(__A), 6807 (__v8sf)__O); 6808 } 6809 6810 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6811 _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A) 6812 { 6813 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, 6814 (__v8sf)_mm256_broadcast_f32x4(__A), 6815 (__v8sf)_mm256_setzero_ps()); 6816 } 6817 6818 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6819 _mm256_broadcast_i32x4(__m128i __A) 6820 { 6821 return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 6822 0, 1, 2, 3, 0, 1, 2, 3); 6823 } 6824 6825 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6826 _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A) 6827 { 6828 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 6829 (__v8si)_mm256_broadcast_i32x4(__A), 6830 (__v8si)__O); 6831 } 6832 6833 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6834 _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A) 6835 { 6836 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 6837 (__v8si)_mm256_broadcast_i32x4(__A), 6838 (__v8si)_mm256_setzero_si256()); 6839 } 6840 6841 static __inline__ __m256d __DEFAULT_FN_ATTRS256 6842 _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A) 6843 { 6844 return (__m256d)__builtin_ia32_selectpd_256(__M, 6845 (__v4df) _mm256_broadcastsd_pd(__A), 6846 (__v4df) __O); 6847 } 6848 6849 static __inline__ __m256d __DEFAULT_FN_ATTRS256 6850 _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) 6851 { 6852 return (__m256d)__builtin_ia32_selectpd_256(__M, 6853 (__v4df) _mm256_broadcastsd_pd(__A), 6854 (__v4df) _mm256_setzero_pd()); 6855 } 6856 6857 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6858 _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A) 6859 { 6860 return (__m128)__builtin_ia32_selectps_128(__M, 6861 (__v4sf) _mm_broadcastss_ps(__A), 6862 (__v4sf) __O); 6863 } 6864 6865 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6866 _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) 6867 { 6868 return (__m128)__builtin_ia32_selectps_128(__M, 6869 (__v4sf) _mm_broadcastss_ps(__A), 6870 (__v4sf) _mm_setzero_ps()); 6871 } 6872 6873 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6874 _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A) 6875 { 6876 return (__m256)__builtin_ia32_selectps_256(__M, 6877 (__v8sf) _mm256_broadcastss_ps(__A), 6878 (__v8sf) __O); 6879 } 6880 6881 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6882 _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) 6883 { 6884 return (__m256)__builtin_ia32_selectps_256(__M, 6885 (__v8sf) _mm256_broadcastss_ps(__A), 6886 (__v8sf) _mm256_setzero_ps()); 6887 } 6888 6889 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6890 _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 6891 { 6892 return (__m128i)__builtin_ia32_selectd_128(__M, 6893 (__v4si) _mm_broadcastd_epi32(__A), 6894 (__v4si) __O); 6895 } 6896 6897 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6898 _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) 6899 { 6900 return (__m128i)__builtin_ia32_selectd_128(__M, 6901 (__v4si) _mm_broadcastd_epi32(__A), 6902 (__v4si) _mm_setzero_si128()); 6903 } 6904 6905 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6906 _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A) 6907 { 6908 return (__m256i)__builtin_ia32_selectd_256(__M, 6909 (__v8si) _mm256_broadcastd_epi32(__A), 6910 (__v8si) __O); 6911 } 6912 6913 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6914 _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) 6915 { 6916 return (__m256i)__builtin_ia32_selectd_256(__M, 6917 (__v8si) _mm256_broadcastd_epi32(__A), 6918 (__v8si) _mm256_setzero_si256()); 6919 } 6920 6921 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6922 _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A) 6923 { 6924 return (__m128i)__builtin_ia32_selectq_128(__M, 6925 (__v2di) _mm_broadcastq_epi64(__A), 6926 (__v2di) __O); 6927 } 6928 6929 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6930 _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 6931 { 6932 return (__m128i)__builtin_ia32_selectq_128(__M, 6933 (__v2di) _mm_broadcastq_epi64(__A), 6934 (__v2di) _mm_setzero_si128()); 6935 } 6936 6937 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6938 _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A) 6939 { 6940 return (__m256i)__builtin_ia32_selectq_256(__M, 6941 (__v4di) _mm256_broadcastq_epi64(__A), 6942 (__v4di) __O); 6943 } 6944 6945 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6946 _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 6947 { 6948 return (__m256i)__builtin_ia32_selectq_256(__M, 6949 (__v4di) _mm256_broadcastq_epi64(__A), 6950 (__v4di) _mm256_setzero_si256()); 6951 } 6952 6953 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6954 _mm_cvtsepi32_epi8 (__m128i __A) 6955 { 6956 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 6957 (__v16qi)_mm_undefined_si128(), 6958 (__mmask8) -1); 6959 } 6960 6961 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6962 _mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 6963 { 6964 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 6965 (__v16qi) __O, __M); 6966 } 6967 6968 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6969 _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A) 6970 { 6971 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 6972 (__v16qi) _mm_setzero_si128 (), 6973 __M); 6974 } 6975 6976 static __inline__ void __DEFAULT_FN_ATTRS128 6977 _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 6978 { 6979 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 6980 } 6981 6982 static __inline__ __m128i __DEFAULT_FN_ATTRS256 6983 _mm256_cvtsepi32_epi8 (__m256i __A) 6984 { 6985 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 6986 (__v16qi)_mm_undefined_si128(), 6987 (__mmask8) -1); 6988 } 6989 6990 static __inline__ __m128i __DEFAULT_FN_ATTRS256 6991 _mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 6992 { 6993 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 6994 (__v16qi) __O, __M); 6995 } 6996 6997 static __inline__ __m128i __DEFAULT_FN_ATTRS256 6998 _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A) 6999 { 7000 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 7001 (__v16qi) _mm_setzero_si128 (), 7002 __M); 7003 } 7004 7005 static __inline__ void __DEFAULT_FN_ATTRS256 7006 _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7007 { 7008 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); 7009 } 7010 7011 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7012 _mm_cvtsepi32_epi16 (__m128i __A) 7013 { 7014 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 7015 (__v8hi)_mm_setzero_si128 (), 7016 (__mmask8) -1); 7017 } 7018 7019 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7020 _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7021 { 7022 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 7023 (__v8hi)__O, 7024 __M); 7025 } 7026 7027 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7028 _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A) 7029 { 7030 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 7031 (__v8hi) _mm_setzero_si128 (), 7032 __M); 7033 } 7034 7035 static __inline__ void __DEFAULT_FN_ATTRS128 7036 _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7037 { 7038 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 7039 } 7040 7041 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7042 _mm256_cvtsepi32_epi16 (__m256i __A) 7043 { 7044 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 7045 (__v8hi)_mm_undefined_si128(), 7046 (__mmask8) -1); 7047 } 7048 7049 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7050 _mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7051 { 7052 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 7053 (__v8hi) __O, __M); 7054 } 7055 7056 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7057 _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A) 7058 { 7059 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 7060 (__v8hi) _mm_setzero_si128 (), 7061 __M); 7062 } 7063 7064 static __inline__ void __DEFAULT_FN_ATTRS256 7065 _mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7066 { 7067 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 7068 } 7069 7070 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7071 _mm_cvtsepi64_epi8 (__m128i __A) 7072 { 7073 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 7074 (__v16qi)_mm_undefined_si128(), 7075 (__mmask8) -1); 7076 } 7077 7078 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7079 _mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7080 { 7081 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 7082 (__v16qi) __O, __M); 7083 } 7084 7085 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7086 _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A) 7087 { 7088 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 7089 (__v16qi) _mm_setzero_si128 (), 7090 __M); 7091 } 7092 7093 static __inline__ void __DEFAULT_FN_ATTRS128 7094 _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7095 { 7096 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 7097 } 7098 7099 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7100 _mm256_cvtsepi64_epi8 (__m256i __A) 7101 { 7102 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 7103 (__v16qi)_mm_undefined_si128(), 7104 (__mmask8) -1); 7105 } 7106 7107 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7108 _mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7109 { 7110 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 7111 (__v16qi) __O, __M); 7112 } 7113 7114 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7115 _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A) 7116 { 7117 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 7118 (__v16qi) _mm_setzero_si128 (), 7119 __M); 7120 } 7121 7122 static __inline__ void __DEFAULT_FN_ATTRS256 7123 _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7124 { 7125 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 7126 } 7127 7128 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7129 _mm_cvtsepi64_epi32 (__m128i __A) 7130 { 7131 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 7132 (__v4si)_mm_undefined_si128(), 7133 (__mmask8) -1); 7134 } 7135 7136 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7137 _mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 7138 { 7139 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 7140 (__v4si) __O, __M); 7141 } 7142 7143 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7144 _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A) 7145 { 7146 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 7147 (__v4si) _mm_setzero_si128 (), 7148 __M); 7149 } 7150 7151 static __inline__ void __DEFAULT_FN_ATTRS128 7152 _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 7153 { 7154 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 7155 } 7156 7157 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7158 _mm256_cvtsepi64_epi32 (__m256i __A) 7159 { 7160 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 7161 (__v4si)_mm_undefined_si128(), 7162 (__mmask8) -1); 7163 } 7164 7165 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7166 _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 7167 { 7168 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 7169 (__v4si)__O, 7170 __M); 7171 } 7172 7173 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7174 _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A) 7175 { 7176 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 7177 (__v4si) _mm_setzero_si128 (), 7178 __M); 7179 } 7180 7181 static __inline__ void __DEFAULT_FN_ATTRS256 7182 _mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 7183 { 7184 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 7185 } 7186 7187 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7188 _mm_cvtsepi64_epi16 (__m128i __A) 7189 { 7190 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 7191 (__v8hi)_mm_undefined_si128(), 7192 (__mmask8) -1); 7193 } 7194 7195 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7196 _mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7197 { 7198 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 7199 (__v8hi) __O, __M); 7200 } 7201 7202 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7203 _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A) 7204 { 7205 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 7206 (__v8hi) _mm_setzero_si128 (), 7207 __M); 7208 } 7209 7210 static __inline__ void __DEFAULT_FN_ATTRS128 7211 _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7212 { 7213 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 7214 } 7215 7216 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7217 _mm256_cvtsepi64_epi16 (__m256i __A) 7218 { 7219 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 7220 (__v8hi)_mm_undefined_si128(), 7221 (__mmask8) -1); 7222 } 7223 7224 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7225 _mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7226 { 7227 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 7228 (__v8hi) __O, __M); 7229 } 7230 7231 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7232 _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A) 7233 { 7234 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 7235 (__v8hi) _mm_setzero_si128 (), 7236 __M); 7237 } 7238 7239 static __inline__ void __DEFAULT_FN_ATTRS256 7240 _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7241 { 7242 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 7243 } 7244 7245 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7246 _mm_cvtusepi32_epi8 (__m128i __A) 7247 { 7248 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 7249 (__v16qi)_mm_undefined_si128(), 7250 (__mmask8) -1); 7251 } 7252 7253 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7254 _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7255 { 7256 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 7257 (__v16qi) __O, 7258 __M); 7259 } 7260 7261 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7262 _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A) 7263 { 7264 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 7265 (__v16qi) _mm_setzero_si128 (), 7266 __M); 7267 } 7268 7269 static __inline__ void __DEFAULT_FN_ATTRS128 7270 _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7271 { 7272 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 7273 } 7274 7275 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7276 _mm256_cvtusepi32_epi8 (__m256i __A) 7277 { 7278 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 7279 (__v16qi)_mm_undefined_si128(), 7280 (__mmask8) -1); 7281 } 7282 7283 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7284 _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7285 { 7286 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 7287 (__v16qi) __O, 7288 __M); 7289 } 7290 7291 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7292 _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A) 7293 { 7294 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 7295 (__v16qi) _mm_setzero_si128 (), 7296 __M); 7297 } 7298 7299 static __inline__ void __DEFAULT_FN_ATTRS256 7300 _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7301 { 7302 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M); 7303 } 7304 7305 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7306 _mm_cvtusepi32_epi16 (__m128i __A) 7307 { 7308 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 7309 (__v8hi)_mm_undefined_si128(), 7310 (__mmask8) -1); 7311 } 7312 7313 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7314 _mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7315 { 7316 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 7317 (__v8hi) __O, __M); 7318 } 7319 7320 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7321 _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A) 7322 { 7323 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 7324 (__v8hi) _mm_setzero_si128 (), 7325 __M); 7326 } 7327 7328 static __inline__ void __DEFAULT_FN_ATTRS128 7329 _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7330 { 7331 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 7332 } 7333 7334 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7335 _mm256_cvtusepi32_epi16 (__m256i __A) 7336 { 7337 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 7338 (__v8hi) _mm_undefined_si128(), 7339 (__mmask8) -1); 7340 } 7341 7342 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7343 _mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7344 { 7345 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 7346 (__v8hi) __O, __M); 7347 } 7348 7349 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7350 _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A) 7351 { 7352 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 7353 (__v8hi) _mm_setzero_si128 (), 7354 __M); 7355 } 7356 7357 static __inline__ void __DEFAULT_FN_ATTRS256 7358 _mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7359 { 7360 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 7361 } 7362 7363 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7364 _mm_cvtusepi64_epi8 (__m128i __A) 7365 { 7366 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 7367 (__v16qi)_mm_undefined_si128(), 7368 (__mmask8) -1); 7369 } 7370 7371 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7372 _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7373 { 7374 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 7375 (__v16qi) __O, 7376 __M); 7377 } 7378 7379 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7380 _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A) 7381 { 7382 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 7383 (__v16qi) _mm_setzero_si128 (), 7384 __M); 7385 } 7386 7387 static __inline__ void __DEFAULT_FN_ATTRS128 7388 _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7389 { 7390 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 7391 } 7392 7393 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7394 _mm256_cvtusepi64_epi8 (__m256i __A) 7395 { 7396 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 7397 (__v16qi)_mm_undefined_si128(), 7398 (__mmask8) -1); 7399 } 7400 7401 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7402 _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7403 { 7404 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 7405 (__v16qi) __O, 7406 __M); 7407 } 7408 7409 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7410 _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A) 7411 { 7412 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 7413 (__v16qi) _mm_setzero_si128 (), 7414 __M); 7415 } 7416 7417 static __inline__ void __DEFAULT_FN_ATTRS256 7418 _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7419 { 7420 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 7421 } 7422 7423 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7424 _mm_cvtusepi64_epi32 (__m128i __A) 7425 { 7426 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 7427 (__v4si)_mm_undefined_si128(), 7428 (__mmask8) -1); 7429 } 7430 7431 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7432 _mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 7433 { 7434 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 7435 (__v4si) __O, __M); 7436 } 7437 7438 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7439 _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A) 7440 { 7441 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 7442 (__v4si) _mm_setzero_si128 (), 7443 __M); 7444 } 7445 7446 static __inline__ void __DEFAULT_FN_ATTRS128 7447 _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 7448 { 7449 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 7450 } 7451 7452 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7453 _mm256_cvtusepi64_epi32 (__m256i __A) 7454 { 7455 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 7456 (__v4si)_mm_undefined_si128(), 7457 (__mmask8) -1); 7458 } 7459 7460 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7461 _mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 7462 { 7463 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 7464 (__v4si) __O, __M); 7465 } 7466 7467 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7468 _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A) 7469 { 7470 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 7471 (__v4si) _mm_setzero_si128 (), 7472 __M); 7473 } 7474 7475 static __inline__ void __DEFAULT_FN_ATTRS256 7476 _mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 7477 { 7478 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 7479 } 7480 7481 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7482 _mm_cvtusepi64_epi16 (__m128i __A) 7483 { 7484 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 7485 (__v8hi)_mm_undefined_si128(), 7486 (__mmask8) -1); 7487 } 7488 7489 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7490 _mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7491 { 7492 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 7493 (__v8hi) __O, __M); 7494 } 7495 7496 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7497 _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A) 7498 { 7499 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 7500 (__v8hi) _mm_setzero_si128 (), 7501 __M); 7502 } 7503 7504 static __inline__ void __DEFAULT_FN_ATTRS128 7505 _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7506 { 7507 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 7508 } 7509 7510 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7511 _mm256_cvtusepi64_epi16 (__m256i __A) 7512 { 7513 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 7514 (__v8hi)_mm_undefined_si128(), 7515 (__mmask8) -1); 7516 } 7517 7518 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7519 _mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7520 { 7521 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 7522 (__v8hi) __O, __M); 7523 } 7524 7525 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7526 _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A) 7527 { 7528 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 7529 (__v8hi) _mm_setzero_si128 (), 7530 __M); 7531 } 7532 7533 static __inline__ void __DEFAULT_FN_ATTRS256 7534 _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7535 { 7536 __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 7537 } 7538 7539 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7540 _mm_cvtepi32_epi8 (__m128i __A) 7541 { 7542 return (__m128i)__builtin_shufflevector( 7543 __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1, 7544 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7); 7545 } 7546 7547 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7548 _mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7549 { 7550 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, 7551 (__v16qi) __O, __M); 7552 } 7553 7554 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7555 _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A) 7556 { 7557 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, 7558 (__v16qi) 7559 _mm_setzero_si128 (), 7560 __M); 7561 } 7562 7563 static __inline__ void __DEFAULT_FN_ATTRS128 7564 _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7565 { 7566 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 7567 } 7568 7569 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7570 _mm256_cvtepi32_epi8 (__m256i __A) 7571 { 7572 return (__m128i)__builtin_shufflevector( 7573 __builtin_convertvector((__v8si)__A, __v8qi), 7574 (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 7575 12, 13, 14, 15); 7576 } 7577 7578 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7579 _mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7580 { 7581 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, 7582 (__v16qi) __O, __M); 7583 } 7584 7585 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7586 _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A) 7587 { 7588 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, 7589 (__v16qi) _mm_setzero_si128 (), 7590 __M); 7591 } 7592 7593 static __inline__ void __DEFAULT_FN_ATTRS256 7594 _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7595 { 7596 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); 7597 } 7598 7599 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7600 _mm_cvtepi32_epi16 (__m128i __A) 7601 { 7602 return (__m128i)__builtin_shufflevector( 7603 __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1, 7604 2, 3, 4, 5, 6, 7); 7605 } 7606 7607 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7608 _mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7609 { 7610 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, 7611 (__v8hi) __O, __M); 7612 } 7613 7614 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7615 _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A) 7616 { 7617 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, 7618 (__v8hi) _mm_setzero_si128 (), 7619 __M); 7620 } 7621 7622 static __inline__ void __DEFAULT_FN_ATTRS128 7623 _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7624 { 7625 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 7626 } 7627 7628 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7629 _mm256_cvtepi32_epi16 (__m256i __A) 7630 { 7631 return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi); 7632 } 7633 7634 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7635 _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7636 { 7637 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, 7638 (__v8hi) __O, __M); 7639 } 7640 7641 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7642 _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A) 7643 { 7644 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, 7645 (__v8hi) _mm_setzero_si128 (), 7646 __M); 7647 } 7648 7649 static __inline__ void __DEFAULT_FN_ATTRS256 7650 _mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7651 { 7652 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 7653 } 7654 7655 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7656 _mm_cvtepi64_epi8 (__m128i __A) 7657 { 7658 return (__m128i)__builtin_shufflevector( 7659 __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3, 7660 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3); 7661 } 7662 7663 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7664 _mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7665 { 7666 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, 7667 (__v16qi) __O, __M); 7668 } 7669 7670 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7671 _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A) 7672 { 7673 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, 7674 (__v16qi) _mm_setzero_si128 (), 7675 __M); 7676 } 7677 7678 static __inline__ void __DEFAULT_FN_ATTRS128 7679 _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7680 { 7681 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 7682 } 7683 7684 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7685 _mm256_cvtepi64_epi8 (__m256i __A) 7686 { 7687 return (__m128i)__builtin_shufflevector( 7688 __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1, 7689 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7); 7690 } 7691 7692 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7693 _mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7694 { 7695 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, 7696 (__v16qi) __O, __M); 7697 } 7698 7699 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7700 _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A) 7701 { 7702 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, 7703 (__v16qi) _mm_setzero_si128 (), 7704 __M); 7705 } 7706 7707 static __inline__ void __DEFAULT_FN_ATTRS256 7708 _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7709 { 7710 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 7711 } 7712 7713 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7714 _mm_cvtepi64_epi32 (__m128i __A) 7715 { 7716 return (__m128i)__builtin_shufflevector( 7717 __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3); 7718 } 7719 7720 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7721 _mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 7722 { 7723 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, 7724 (__v4si) __O, __M); 7725 } 7726 7727 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7728 _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A) 7729 { 7730 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, 7731 (__v4si) _mm_setzero_si128 (), 7732 __M); 7733 } 7734 7735 static __inline__ void __DEFAULT_FN_ATTRS128 7736 _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 7737 { 7738 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 7739 } 7740 7741 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7742 _mm256_cvtepi64_epi32 (__m256i __A) 7743 { 7744 return (__m128i)__builtin_convertvector((__v4di)__A, __v4si); 7745 } 7746 7747 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7748 _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 7749 { 7750 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 7751 (__v4si)_mm256_cvtepi64_epi32(__A), 7752 (__v4si)__O); 7753 } 7754 7755 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7756 _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A) 7757 { 7758 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 7759 (__v4si)_mm256_cvtepi64_epi32(__A), 7760 (__v4si)_mm_setzero_si128()); 7761 } 7762 7763 static __inline__ void __DEFAULT_FN_ATTRS256 7764 _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 7765 { 7766 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 7767 } 7768 7769 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7770 _mm_cvtepi64_epi16 (__m128i __A) 7771 { 7772 return (__m128i)__builtin_shufflevector( 7773 __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3, 7774 3, 3, 3, 3); 7775 } 7776 7777 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7778 _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7779 { 7780 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, 7781 (__v8hi)__O, 7782 __M); 7783 } 7784 7785 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7786 _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A) 7787 { 7788 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, 7789 (__v8hi) _mm_setzero_si128 (), 7790 __M); 7791 } 7792 7793 static __inline__ void __DEFAULT_FN_ATTRS128 7794 _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7795 { 7796 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 7797 } 7798 7799 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7800 _mm256_cvtepi64_epi16 (__m256i __A) 7801 { 7802 return (__m128i)__builtin_shufflevector( 7803 __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1, 7804 2, 3, 4, 5, 6, 7); 7805 } 7806 7807 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7808 _mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7809 { 7810 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, 7811 (__v8hi) __O, __M); 7812 } 7813 7814 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7815 _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A) 7816 { 7817 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, 7818 (__v8hi) _mm_setzero_si128 (), 7819 __M); 7820 } 7821 7822 static __inline__ void __DEFAULT_FN_ATTRS256 7823 _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7824 { 7825 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 7826 } 7827 7828 #define _mm256_extractf32x4_ps(A, imm) \ 7829 ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ 7830 (int)(imm), \ 7831 (__v4sf)_mm_undefined_ps(), \ 7832 (__mmask8)-1)) 7833 7834 #define _mm256_mask_extractf32x4_ps(W, U, A, imm) \ 7835 ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ 7836 (int)(imm), \ 7837 (__v4sf)(__m128)(W), \ 7838 (__mmask8)(U))) 7839 7840 #define _mm256_maskz_extractf32x4_ps(U, A, imm) \ 7841 ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ 7842 (int)(imm), \ 7843 (__v4sf)_mm_setzero_ps(), \ 7844 (__mmask8)(U))) 7845 7846 #define _mm256_extracti32x4_epi32(A, imm) \ 7847 ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ 7848 (int)(imm), \ 7849 (__v4si)_mm_undefined_si128(), \ 7850 (__mmask8)-1)) 7851 7852 #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \ 7853 ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ 7854 (int)(imm), \ 7855 (__v4si)(__m128i)(W), \ 7856 (__mmask8)(U))) 7857 7858 #define _mm256_maskz_extracti32x4_epi32(U, A, imm) \ 7859 ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ 7860 (int)(imm), \ 7861 (__v4si)_mm_setzero_si128(), \ 7862 (__mmask8)(U))) 7863 7864 #define _mm256_insertf32x4(A, B, imm) \ 7865 ((__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \ 7866 (__v4sf)(__m128)(B), (int)(imm))) 7867 7868 #define _mm256_mask_insertf32x4(W, U, A, B, imm) \ 7869 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 7870 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \ 7871 (__v8sf)(__m256)(W))) 7872 7873 #define _mm256_maskz_insertf32x4(U, A, B, imm) \ 7874 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 7875 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \ 7876 (__v8sf)_mm256_setzero_ps())) 7877 7878 #define _mm256_inserti32x4(A, B, imm) \ 7879 ((__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \ 7880 (__v4si)(__m128i)(B), (int)(imm))) 7881 7882 #define _mm256_mask_inserti32x4(W, U, A, B, imm) \ 7883 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 7884 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \ 7885 (__v8si)(__m256i)(W))) 7886 7887 #define _mm256_maskz_inserti32x4(U, A, B, imm) \ 7888 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 7889 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \ 7890 (__v8si)_mm256_setzero_si256())) 7891 7892 #define _mm_getmant_pd(A, B, C) \ 7893 ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 7894 (int)(((C)<<2) | (B)), \ 7895 (__v2df)_mm_setzero_pd(), \ 7896 (__mmask8)-1)) 7897 7898 #define _mm_mask_getmant_pd(W, U, A, B, C) \ 7899 ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 7900 (int)(((C)<<2) | (B)), \ 7901 (__v2df)(__m128d)(W), \ 7902 (__mmask8)(U))) 7903 7904 #define _mm_maskz_getmant_pd(U, A, B, C) \ 7905 ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 7906 (int)(((C)<<2) | (B)), \ 7907 (__v2df)_mm_setzero_pd(), \ 7908 (__mmask8)(U))) 7909 7910 #define _mm256_getmant_pd(A, B, C) \ 7911 ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 7912 (int)(((C)<<2) | (B)), \ 7913 (__v4df)_mm256_setzero_pd(), \ 7914 (__mmask8)-1)) 7915 7916 #define _mm256_mask_getmant_pd(W, U, A, B, C) \ 7917 ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 7918 (int)(((C)<<2) | (B)), \ 7919 (__v4df)(__m256d)(W), \ 7920 (__mmask8)(U))) 7921 7922 #define _mm256_maskz_getmant_pd(U, A, B, C) \ 7923 ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 7924 (int)(((C)<<2) | (B)), \ 7925 (__v4df)_mm256_setzero_pd(), \ 7926 (__mmask8)(U))) 7927 7928 #define _mm_getmant_ps(A, B, C) \ 7929 ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 7930 (int)(((C)<<2) | (B)), \ 7931 (__v4sf)_mm_setzero_ps(), \ 7932 (__mmask8)-1)) 7933 7934 #define _mm_mask_getmant_ps(W, U, A, B, C) \ 7935 ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 7936 (int)(((C)<<2) | (B)), \ 7937 (__v4sf)(__m128)(W), \ 7938 (__mmask8)(U))) 7939 7940 #define _mm_maskz_getmant_ps(U, A, B, C) \ 7941 ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 7942 (int)(((C)<<2) | (B)), \ 7943 (__v4sf)_mm_setzero_ps(), \ 7944 (__mmask8)(U))) 7945 7946 #define _mm256_getmant_ps(A, B, C) \ 7947 ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 7948 (int)(((C)<<2) | (B)), \ 7949 (__v8sf)_mm256_setzero_ps(), \ 7950 (__mmask8)-1)) 7951 7952 #define _mm256_mask_getmant_ps(W, U, A, B, C) \ 7953 ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 7954 (int)(((C)<<2) | (B)), \ 7955 (__v8sf)(__m256)(W), \ 7956 (__mmask8)(U))) 7957 7958 #define _mm256_maskz_getmant_ps(U, A, B, C) \ 7959 ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 7960 (int)(((C)<<2) | (B)), \ 7961 (__v8sf)_mm256_setzero_ps(), \ 7962 (__mmask8)(U))) 7963 7964 #define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \ 7965 ((__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \ 7966 (void const *)(addr), \ 7967 (__v2di)(__m128i)(index), \ 7968 (__mmask8)(mask), (int)(scale))) 7969 7970 #define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \ 7971 ((__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \ 7972 (void const *)(addr), \ 7973 (__v2di)(__m128i)(index), \ 7974 (__mmask8)(mask), (int)(scale))) 7975 7976 #define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \ 7977 ((__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \ 7978 (void const *)(addr), \ 7979 (__v4di)(__m256i)(index), \ 7980 (__mmask8)(mask), (int)(scale))) 7981 7982 #define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \ 7983 ((__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \ 7984 (void const *)(addr), \ 7985 (__v4di)(__m256i)(index), \ 7986 (__mmask8)(mask), (int)(scale))) 7987 7988 #define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \ 7989 ((__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \ 7990 (void const *)(addr), \ 7991 (__v2di)(__m128i)(index), \ 7992 (__mmask8)(mask), (int)(scale))) 7993 7994 #define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \ 7995 ((__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \ 7996 (void const *)(addr), \ 7997 (__v2di)(__m128i)(index), \ 7998 (__mmask8)(mask), (int)(scale))) 7999 8000 #define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \ 8001 ((__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \ 8002 (void const *)(addr), \ 8003 (__v4di)(__m256i)(index), \ 8004 (__mmask8)(mask), (int)(scale))) 8005 8006 #define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \ 8007 ((__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \ 8008 (void const *)(addr), \ 8009 (__v4di)(__m256i)(index), \ 8010 (__mmask8)(mask), (int)(scale))) 8011 8012 #define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \ 8013 ((__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \ 8014 (void const *)(addr), \ 8015 (__v4si)(__m128i)(index), \ 8016 (__mmask8)(mask), (int)(scale))) 8017 8018 #define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \ 8019 ((__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \ 8020 (void const *)(addr), \ 8021 (__v4si)(__m128i)(index), \ 8022 (__mmask8)(mask), (int)(scale))) 8023 8024 #define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \ 8025 ((__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \ 8026 (void const *)(addr), \ 8027 (__v4si)(__m128i)(index), \ 8028 (__mmask8)(mask), (int)(scale))) 8029 8030 #define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \ 8031 ((__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \ 8032 (void const *)(addr), \ 8033 (__v4si)(__m128i)(index), \ 8034 (__mmask8)(mask), (int)(scale))) 8035 8036 #define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \ 8037 ((__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \ 8038 (void const *)(addr), \ 8039 (__v4si)(__m128i)(index), \ 8040 (__mmask8)(mask), (int)(scale))) 8041 8042 #define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \ 8043 ((__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \ 8044 (void const *)(addr), \ 8045 (__v4si)(__m128i)(index), \ 8046 (__mmask8)(mask), (int)(scale))) 8047 8048 #define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \ 8049 ((__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \ 8050 (void const *)(addr), \ 8051 (__v8si)(__m256i)(index), \ 8052 (__mmask8)(mask), (int)(scale))) 8053 8054 #define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \ 8055 ((__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \ 8056 (void const *)(addr), \ 8057 (__v8si)(__m256i)(index), \ 8058 (__mmask8)(mask), (int)(scale))) 8059 8060 #define _mm256_permutex_pd(X, C) \ 8061 ((__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(X), (int)(C))) 8062 8063 #define _mm256_mask_permutex_pd(W, U, X, C) \ 8064 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 8065 (__v4df)_mm256_permutex_pd((X), (C)), \ 8066 (__v4df)(__m256d)(W))) 8067 8068 #define _mm256_maskz_permutex_pd(U, X, C) \ 8069 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 8070 (__v4df)_mm256_permutex_pd((X), (C)), \ 8071 (__v4df)_mm256_setzero_pd())) 8072 8073 #define _mm256_permutex_epi64(X, C) \ 8074 ((__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C))) 8075 8076 #define _mm256_mask_permutex_epi64(W, U, X, C) \ 8077 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8078 (__v4di)_mm256_permutex_epi64((X), (C)), \ 8079 (__v4di)(__m256i)(W))) 8080 8081 #define _mm256_maskz_permutex_epi64(U, X, C) \ 8082 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8083 (__v4di)_mm256_permutex_epi64((X), (C)), \ 8084 (__v4di)_mm256_setzero_si256())) 8085 8086 static __inline__ __m256d __DEFAULT_FN_ATTRS256 8087 _mm256_permutexvar_pd (__m256i __X, __m256d __Y) 8088 { 8089 return (__m256d)__builtin_ia32_permvardf256((__v4df)__Y, (__v4di)__X); 8090 } 8091 8092 static __inline__ __m256d __DEFAULT_FN_ATTRS256 8093 _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X, 8094 __m256d __Y) 8095 { 8096 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 8097 (__v4df)_mm256_permutexvar_pd(__X, __Y), 8098 (__v4df)__W); 8099 } 8100 8101 static __inline__ __m256d __DEFAULT_FN_ATTRS256 8102 _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y) 8103 { 8104 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 8105 (__v4df)_mm256_permutexvar_pd(__X, __Y), 8106 (__v4df)_mm256_setzero_pd()); 8107 } 8108 8109 static __inline__ __m256i __DEFAULT_FN_ATTRS256 8110 _mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y) 8111 { 8112 return (__m256i)__builtin_ia32_permvardi256((__v4di) __Y, (__v4di) __X); 8113 } 8114 8115 static __inline__ __m256i __DEFAULT_FN_ATTRS256 8116 _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y) 8117 { 8118 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 8119 (__v4di)_mm256_permutexvar_epi64(__X, __Y), 8120 (__v4di)_mm256_setzero_si256()); 8121 } 8122 8123 static __inline__ __m256i __DEFAULT_FN_ATTRS256 8124 _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X, 8125 __m256i __Y) 8126 { 8127 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 8128 (__v4di)_mm256_permutexvar_epi64(__X, __Y), 8129 (__v4di)__W); 8130 } 8131 8132 #define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A)) 8133 8134 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8135 _mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y) 8136 { 8137 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8138 (__v8sf)_mm256_permutexvar_ps(__X, __Y), 8139 (__v8sf)__W); 8140 } 8141 8142 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8143 _mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y) 8144 { 8145 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8146 (__v8sf)_mm256_permutexvar_ps(__X, __Y), 8147 (__v8sf)_mm256_setzero_ps()); 8148 } 8149 8150 #define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A)) 8151 8152 static __inline__ __m256i __DEFAULT_FN_ATTRS256 8153 _mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X, 8154 __m256i __Y) 8155 { 8156 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 8157 (__v8si)_mm256_permutexvar_epi32(__X, __Y), 8158 (__v8si)__W); 8159 } 8160 8161 static __inline__ __m256i __DEFAULT_FN_ATTRS256 8162 _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) 8163 { 8164 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 8165 (__v8si)_mm256_permutexvar_epi32(__X, __Y), 8166 (__v8si)_mm256_setzero_si256()); 8167 } 8168 8169 #define _mm_alignr_epi32(A, B, imm) \ 8170 ((__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \ 8171 (__v4si)(__m128i)(B), (int)(imm))) 8172 8173 #define _mm_mask_alignr_epi32(W, U, A, B, imm) \ 8174 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8175 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \ 8176 (__v4si)(__m128i)(W))) 8177 8178 #define _mm_maskz_alignr_epi32(U, A, B, imm) \ 8179 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8180 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \ 8181 (__v4si)_mm_setzero_si128())) 8182 8183 #define _mm256_alignr_epi32(A, B, imm) \ 8184 ((__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \ 8185 (__v8si)(__m256i)(B), (int)(imm))) 8186 8187 #define _mm256_mask_alignr_epi32(W, U, A, B, imm) \ 8188 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8189 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \ 8190 (__v8si)(__m256i)(W))) 8191 8192 #define _mm256_maskz_alignr_epi32(U, A, B, imm) \ 8193 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8194 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \ 8195 (__v8si)_mm256_setzero_si256())) 8196 8197 #define _mm_alignr_epi64(A, B, imm) \ 8198 ((__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \ 8199 (__v2di)(__m128i)(B), (int)(imm))) 8200 8201 #define _mm_mask_alignr_epi64(W, U, A, B, imm) \ 8202 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 8203 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \ 8204 (__v2di)(__m128i)(W))) 8205 8206 #define _mm_maskz_alignr_epi64(U, A, B, imm) \ 8207 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 8208 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \ 8209 (__v2di)_mm_setzero_si128())) 8210 8211 #define _mm256_alignr_epi64(A, B, imm) \ 8212 ((__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \ 8213 (__v4di)(__m256i)(B), (int)(imm))) 8214 8215 #define _mm256_mask_alignr_epi64(W, U, A, B, imm) \ 8216 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8217 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ 8218 (__v4di)(__m256i)(W))) 8219 8220 #define _mm256_maskz_alignr_epi64(U, A, B, imm) \ 8221 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8222 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ 8223 (__v4di)_mm256_setzero_si256())) 8224 8225 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8226 _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A) 8227 { 8228 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8229 (__v4sf)_mm_movehdup_ps(__A), 8230 (__v4sf)__W); 8231 } 8232 8233 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8234 _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A) 8235 { 8236 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8237 (__v4sf)_mm_movehdup_ps(__A), 8238 (__v4sf)_mm_setzero_ps()); 8239 } 8240 8241 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8242 _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A) 8243 { 8244 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8245 (__v8sf)_mm256_movehdup_ps(__A), 8246 (__v8sf)__W); 8247 } 8248 8249 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8250 _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A) 8251 { 8252 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8253 (__v8sf)_mm256_movehdup_ps(__A), 8254 (__v8sf)_mm256_setzero_ps()); 8255 } 8256 8257 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8258 _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A) 8259 { 8260 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8261 (__v4sf)_mm_moveldup_ps(__A), 8262 (__v4sf)__W); 8263 } 8264 8265 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8266 _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A) 8267 { 8268 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8269 (__v4sf)_mm_moveldup_ps(__A), 8270 (__v4sf)_mm_setzero_ps()); 8271 } 8272 8273 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8274 _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A) 8275 { 8276 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8277 (__v8sf)_mm256_moveldup_ps(__A), 8278 (__v8sf)__W); 8279 } 8280 8281 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8282 _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A) 8283 { 8284 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8285 (__v8sf)_mm256_moveldup_ps(__A), 8286 (__v8sf)_mm256_setzero_ps()); 8287 } 8288 8289 #define _mm256_mask_shuffle_epi32(W, U, A, I) \ 8290 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8291 (__v8si)_mm256_shuffle_epi32((A), (I)), \ 8292 (__v8si)(__m256i)(W))) 8293 8294 #define _mm256_maskz_shuffle_epi32(U, A, I) \ 8295 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8296 (__v8si)_mm256_shuffle_epi32((A), (I)), \ 8297 (__v8si)_mm256_setzero_si256())) 8298 8299 #define _mm_mask_shuffle_epi32(W, U, A, I) \ 8300 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8301 (__v4si)_mm_shuffle_epi32((A), (I)), \ 8302 (__v4si)(__m128i)(W))) 8303 8304 #define _mm_maskz_shuffle_epi32(U, A, I) \ 8305 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8306 (__v4si)_mm_shuffle_epi32((A), (I)), \ 8307 (__v4si)_mm_setzero_si128())) 8308 8309 static __inline__ __m128d __DEFAULT_FN_ATTRS128 8310 _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A) 8311 { 8312 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, 8313 (__v2df) __A, 8314 (__v2df) __W); 8315 } 8316 8317 static __inline__ __m128d __DEFAULT_FN_ATTRS128 8318 _mm_maskz_mov_pd (__mmask8 __U, __m128d __A) 8319 { 8320 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, 8321 (__v2df) __A, 8322 (__v2df) _mm_setzero_pd ()); 8323 } 8324 8325 static __inline__ __m256d __DEFAULT_FN_ATTRS256 8326 _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A) 8327 { 8328 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, 8329 (__v4df) __A, 8330 (__v4df) __W); 8331 } 8332 8333 static __inline__ __m256d __DEFAULT_FN_ATTRS256 8334 _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A) 8335 { 8336 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, 8337 (__v4df) __A, 8338 (__v4df) _mm256_setzero_pd ()); 8339 } 8340 8341 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8342 _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A) 8343 { 8344 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, 8345 (__v4sf) __A, 8346 (__v4sf) __W); 8347 } 8348 8349 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8350 _mm_maskz_mov_ps (__mmask8 __U, __m128 __A) 8351 { 8352 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, 8353 (__v4sf) __A, 8354 (__v4sf) _mm_setzero_ps ()); 8355 } 8356 8357 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8358 _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A) 8359 { 8360 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, 8361 (__v8sf) __A, 8362 (__v8sf) __W); 8363 } 8364 8365 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8366 _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A) 8367 { 8368 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, 8369 (__v8sf) __A, 8370 (__v8sf) _mm256_setzero_ps ()); 8371 } 8372 8373 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8374 _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A) 8375 { 8376 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A, 8377 (__v4sf) __W, 8378 (__mmask8) __U); 8379 } 8380 8381 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8382 _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A) 8383 { 8384 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A, 8385 (__v4sf) 8386 _mm_setzero_ps (), 8387 (__mmask8) __U); 8388 } 8389 8390 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8391 _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A) 8392 { 8393 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A, 8394 (__v8sf) __W, 8395 (__mmask8) __U); 8396 } 8397 8398 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8399 _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A) 8400 { 8401 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A, 8402 (__v8sf) 8403 _mm256_setzero_ps (), 8404 (__mmask8) __U); 8405 } 8406 8407 #define _mm_mask_cvt_roundps_ph(W, U, A, I) \ 8408 ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ 8409 (__v8hi)(__m128i)(W), \ 8410 (__mmask8)(U))) 8411 8412 #define _mm_maskz_cvt_roundps_ph(U, A, I) \ 8413 ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ 8414 (__v8hi)_mm_setzero_si128(), \ 8415 (__mmask8)(U))) 8416 8417 #define _mm_mask_cvtps_ph _mm_mask_cvt_roundps_ph 8418 #define _mm_maskz_cvtps_ph _mm_maskz_cvt_roundps_ph 8419 8420 #define _mm256_mask_cvt_roundps_ph(W, U, A, I) \ 8421 ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ 8422 (__v8hi)(__m128i)(W), \ 8423 (__mmask8)(U))) 8424 8425 #define _mm256_maskz_cvt_roundps_ph(U, A, I) \ 8426 ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ 8427 (__v8hi)_mm_setzero_si128(), \ 8428 (__mmask8)(U))) 8429 8430 #define _mm256_mask_cvtps_ph _mm256_mask_cvt_roundps_ph 8431 #define _mm256_maskz_cvtps_ph _mm256_maskz_cvt_roundps_ph 8432 8433 8434 #undef __DEFAULT_FN_ATTRS128 8435 #undef __DEFAULT_FN_ATTRS256 8436 8437 #endif /* __AVX512VLINTRIN_H */ 8438