1 /*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10 #ifndef __IMMINTRIN_H 11 #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead." 12 #endif 13 14 #ifndef __AVX512VLINTRIN_H 15 #define __AVX512VLINTRIN_H 16 17 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(128))) 18 #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(256))) 19 20 typedef short __v2hi __attribute__((__vector_size__(4))); 21 typedef char __v4qi __attribute__((__vector_size__(4))); 22 typedef char __v2qi __attribute__((__vector_size__(2))); 23 24 /* Integer compare */ 25 26 #define _mm_cmpeq_epi32_mask(A, B) \ 27 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ) 28 #define _mm_mask_cmpeq_epi32_mask(k, A, B) \ 29 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ) 30 #define _mm_cmpge_epi32_mask(A, B) \ 31 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GE) 32 #define _mm_mask_cmpge_epi32_mask(k, A, B) \ 33 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE) 34 #define _mm_cmpgt_epi32_mask(A, B) \ 35 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GT) 36 #define _mm_mask_cmpgt_epi32_mask(k, A, B) \ 37 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT) 38 #define _mm_cmple_epi32_mask(A, B) \ 39 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LE) 40 #define _mm_mask_cmple_epi32_mask(k, A, B) \ 41 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE) 42 #define _mm_cmplt_epi32_mask(A, B) \ 43 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LT) 44 #define _mm_mask_cmplt_epi32_mask(k, A, B) \ 45 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT) 46 #define _mm_cmpneq_epi32_mask(A, B) \ 47 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_NE) 48 #define _mm_mask_cmpneq_epi32_mask(k, A, B) \ 49 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE) 50 51 #define _mm256_cmpeq_epi32_mask(A, B) \ 52 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ) 53 #define _mm256_mask_cmpeq_epi32_mask(k, A, B) \ 54 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ) 55 #define _mm256_cmpge_epi32_mask(A, B) \ 56 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GE) 57 #define _mm256_mask_cmpge_epi32_mask(k, A, B) \ 58 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE) 59 #define _mm256_cmpgt_epi32_mask(A, B) \ 60 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GT) 61 #define _mm256_mask_cmpgt_epi32_mask(k, A, B) \ 62 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT) 63 #define _mm256_cmple_epi32_mask(A, B) \ 64 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LE) 65 #define _mm256_mask_cmple_epi32_mask(k, A, B) \ 66 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE) 67 #define _mm256_cmplt_epi32_mask(A, B) \ 68 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LT) 69 #define _mm256_mask_cmplt_epi32_mask(k, A, B) \ 70 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT) 71 #define _mm256_cmpneq_epi32_mask(A, B) \ 72 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_NE) 73 #define _mm256_mask_cmpneq_epi32_mask(k, A, B) \ 74 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE) 75 76 #define _mm_cmpeq_epu32_mask(A, B) \ 77 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ) 78 #define _mm_mask_cmpeq_epu32_mask(k, A, B) \ 79 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ) 80 #define _mm_cmpge_epu32_mask(A, B) \ 81 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GE) 82 #define _mm_mask_cmpge_epu32_mask(k, A, B) \ 83 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE) 84 #define _mm_cmpgt_epu32_mask(A, B) \ 85 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GT) 86 #define _mm_mask_cmpgt_epu32_mask(k, A, B) \ 87 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT) 88 #define _mm_cmple_epu32_mask(A, B) \ 89 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LE) 90 #define _mm_mask_cmple_epu32_mask(k, A, B) \ 91 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE) 92 #define _mm_cmplt_epu32_mask(A, B) \ 93 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LT) 94 #define _mm_mask_cmplt_epu32_mask(k, A, B) \ 95 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT) 96 #define _mm_cmpneq_epu32_mask(A, B) \ 97 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_NE) 98 #define _mm_mask_cmpneq_epu32_mask(k, A, B) \ 99 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE) 100 101 #define _mm256_cmpeq_epu32_mask(A, B) \ 102 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ) 103 #define _mm256_mask_cmpeq_epu32_mask(k, A, B) \ 104 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ) 105 #define _mm256_cmpge_epu32_mask(A, B) \ 106 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GE) 107 #define _mm256_mask_cmpge_epu32_mask(k, A, B) \ 108 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE) 109 #define _mm256_cmpgt_epu32_mask(A, B) \ 110 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GT) 111 #define _mm256_mask_cmpgt_epu32_mask(k, A, B) \ 112 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT) 113 #define _mm256_cmple_epu32_mask(A, B) \ 114 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LE) 115 #define _mm256_mask_cmple_epu32_mask(k, A, B) \ 116 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE) 117 #define _mm256_cmplt_epu32_mask(A, B) \ 118 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LT) 119 #define _mm256_mask_cmplt_epu32_mask(k, A, B) \ 120 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT) 121 #define _mm256_cmpneq_epu32_mask(A, B) \ 122 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_NE) 123 #define _mm256_mask_cmpneq_epu32_mask(k, A, B) \ 124 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE) 125 126 #define _mm_cmpeq_epi64_mask(A, B) \ 127 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ) 128 #define _mm_mask_cmpeq_epi64_mask(k, A, B) \ 129 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ) 130 #define _mm_cmpge_epi64_mask(A, B) \ 131 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GE) 132 #define _mm_mask_cmpge_epi64_mask(k, A, B) \ 133 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE) 134 #define _mm_cmpgt_epi64_mask(A, B) \ 135 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GT) 136 #define _mm_mask_cmpgt_epi64_mask(k, A, B) \ 137 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT) 138 #define _mm_cmple_epi64_mask(A, B) \ 139 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LE) 140 #define _mm_mask_cmple_epi64_mask(k, A, B) \ 141 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE) 142 #define _mm_cmplt_epi64_mask(A, B) \ 143 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LT) 144 #define _mm_mask_cmplt_epi64_mask(k, A, B) \ 145 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT) 146 #define _mm_cmpneq_epi64_mask(A, B) \ 147 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_NE) 148 #define _mm_mask_cmpneq_epi64_mask(k, A, B) \ 149 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE) 150 151 #define _mm256_cmpeq_epi64_mask(A, B) \ 152 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ) 153 #define _mm256_mask_cmpeq_epi64_mask(k, A, B) \ 154 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ) 155 #define _mm256_cmpge_epi64_mask(A, B) \ 156 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GE) 157 #define _mm256_mask_cmpge_epi64_mask(k, A, B) \ 158 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE) 159 #define _mm256_cmpgt_epi64_mask(A, B) \ 160 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GT) 161 #define _mm256_mask_cmpgt_epi64_mask(k, A, B) \ 162 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT) 163 #define _mm256_cmple_epi64_mask(A, B) \ 164 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LE) 165 #define _mm256_mask_cmple_epi64_mask(k, A, B) \ 166 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE) 167 #define _mm256_cmplt_epi64_mask(A, B) \ 168 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LT) 169 #define _mm256_mask_cmplt_epi64_mask(k, A, B) \ 170 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT) 171 #define _mm256_cmpneq_epi64_mask(A, B) \ 172 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_NE) 173 #define _mm256_mask_cmpneq_epi64_mask(k, A, B) \ 174 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE) 175 176 #define _mm_cmpeq_epu64_mask(A, B) \ 177 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ) 178 #define _mm_mask_cmpeq_epu64_mask(k, A, B) \ 179 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ) 180 #define _mm_cmpge_epu64_mask(A, B) \ 181 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GE) 182 #define _mm_mask_cmpge_epu64_mask(k, A, B) \ 183 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE) 184 #define _mm_cmpgt_epu64_mask(A, B) \ 185 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GT) 186 #define _mm_mask_cmpgt_epu64_mask(k, A, B) \ 187 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT) 188 #define _mm_cmple_epu64_mask(A, B) \ 189 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LE) 190 #define _mm_mask_cmple_epu64_mask(k, A, B) \ 191 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE) 192 #define _mm_cmplt_epu64_mask(A, B) \ 193 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LT) 194 #define _mm_mask_cmplt_epu64_mask(k, A, B) \ 195 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT) 196 #define _mm_cmpneq_epu64_mask(A, B) \ 197 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_NE) 198 #define _mm_mask_cmpneq_epu64_mask(k, A, B) \ 199 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) 200 201 #define _mm256_cmpeq_epu64_mask(A, B) \ 202 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ) 203 #define _mm256_mask_cmpeq_epu64_mask(k, A, B) \ 204 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ) 205 #define _mm256_cmpge_epu64_mask(A, B) \ 206 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GE) 207 #define _mm256_mask_cmpge_epu64_mask(k, A, B) \ 208 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE) 209 #define _mm256_cmpgt_epu64_mask(A, B) \ 210 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GT) 211 #define _mm256_mask_cmpgt_epu64_mask(k, A, B) \ 212 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT) 213 #define _mm256_cmple_epu64_mask(A, B) \ 214 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LE) 215 #define _mm256_mask_cmple_epu64_mask(k, A, B) \ 216 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE) 217 #define _mm256_cmplt_epu64_mask(A, B) \ 218 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LT) 219 #define _mm256_mask_cmplt_epu64_mask(k, A, B) \ 220 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT) 221 #define _mm256_cmpneq_epu64_mask(A, B) \ 222 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_NE) 223 #define _mm256_mask_cmpneq_epu64_mask(k, A, B) \ 224 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) 225 226 static __inline__ __m256i __DEFAULT_FN_ATTRS256 227 _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 228 { 229 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 230 (__v8si)_mm256_add_epi32(__A, __B), 231 (__v8si)__W); 232 } 233 234 static __inline__ __m256i __DEFAULT_FN_ATTRS256 235 _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B) 236 { 237 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 238 (__v8si)_mm256_add_epi32(__A, __B), 239 (__v8si)_mm256_setzero_si256()); 240 } 241 242 static __inline__ __m256i __DEFAULT_FN_ATTRS256 243 _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 244 { 245 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 246 (__v4di)_mm256_add_epi64(__A, __B), 247 (__v4di)__W); 248 } 249 250 static __inline__ __m256i __DEFAULT_FN_ATTRS256 251 _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B) 252 { 253 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 254 (__v4di)_mm256_add_epi64(__A, __B), 255 (__v4di)_mm256_setzero_si256()); 256 } 257 258 static __inline__ __m256i __DEFAULT_FN_ATTRS256 259 _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 260 { 261 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 262 (__v8si)_mm256_sub_epi32(__A, __B), 263 (__v8si)__W); 264 } 265 266 static __inline__ __m256i __DEFAULT_FN_ATTRS256 267 _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B) 268 { 269 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 270 (__v8si)_mm256_sub_epi32(__A, __B), 271 (__v8si)_mm256_setzero_si256()); 272 } 273 274 static __inline__ __m256i __DEFAULT_FN_ATTRS256 275 _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 276 { 277 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 278 (__v4di)_mm256_sub_epi64(__A, __B), 279 (__v4di)__W); 280 } 281 282 static __inline__ __m256i __DEFAULT_FN_ATTRS256 283 _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B) 284 { 285 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 286 (__v4di)_mm256_sub_epi64(__A, __B), 287 (__v4di)_mm256_setzero_si256()); 288 } 289 290 static __inline__ __m128i __DEFAULT_FN_ATTRS128 291 _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 292 { 293 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 294 (__v4si)_mm_add_epi32(__A, __B), 295 (__v4si)__W); 296 } 297 298 static __inline__ __m128i __DEFAULT_FN_ATTRS128 299 _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B) 300 { 301 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 302 (__v4si)_mm_add_epi32(__A, __B), 303 (__v4si)_mm_setzero_si128()); 304 } 305 306 static __inline__ __m128i __DEFAULT_FN_ATTRS128 307 _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 308 { 309 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 310 (__v2di)_mm_add_epi64(__A, __B), 311 (__v2di)__W); 312 } 313 314 static __inline__ __m128i __DEFAULT_FN_ATTRS128 315 _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B) 316 { 317 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 318 (__v2di)_mm_add_epi64(__A, __B), 319 (__v2di)_mm_setzero_si128()); 320 } 321 322 static __inline__ __m128i __DEFAULT_FN_ATTRS128 323 _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 324 { 325 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 326 (__v4si)_mm_sub_epi32(__A, __B), 327 (__v4si)__W); 328 } 329 330 static __inline__ __m128i __DEFAULT_FN_ATTRS128 331 _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B) 332 { 333 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 334 (__v4si)_mm_sub_epi32(__A, __B), 335 (__v4si)_mm_setzero_si128()); 336 } 337 338 static __inline__ __m128i __DEFAULT_FN_ATTRS128 339 _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 340 { 341 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 342 (__v2di)_mm_sub_epi64(__A, __B), 343 (__v2di)__W); 344 } 345 346 static __inline__ __m128i __DEFAULT_FN_ATTRS128 347 _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B) 348 { 349 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 350 (__v2di)_mm_sub_epi64(__A, __B), 351 (__v2di)_mm_setzero_si128()); 352 } 353 354 static __inline__ __m256i __DEFAULT_FN_ATTRS256 355 _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) 356 { 357 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 358 (__v4di)_mm256_mul_epi32(__X, __Y), 359 (__v4di)__W); 360 } 361 362 static __inline__ __m256i __DEFAULT_FN_ATTRS256 363 _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y) 364 { 365 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 366 (__v4di)_mm256_mul_epi32(__X, __Y), 367 (__v4di)_mm256_setzero_si256()); 368 } 369 370 static __inline__ __m128i __DEFAULT_FN_ATTRS128 371 _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) 372 { 373 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 374 (__v2di)_mm_mul_epi32(__X, __Y), 375 (__v2di)__W); 376 } 377 378 static __inline__ __m128i __DEFAULT_FN_ATTRS128 379 _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y) 380 { 381 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 382 (__v2di)_mm_mul_epi32(__X, __Y), 383 (__v2di)_mm_setzero_si128()); 384 } 385 386 static __inline__ __m256i __DEFAULT_FN_ATTRS256 387 _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) 388 { 389 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 390 (__v4di)_mm256_mul_epu32(__X, __Y), 391 (__v4di)__W); 392 } 393 394 static __inline__ __m256i __DEFAULT_FN_ATTRS256 395 _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y) 396 { 397 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 398 (__v4di)_mm256_mul_epu32(__X, __Y), 399 (__v4di)_mm256_setzero_si256()); 400 } 401 402 static __inline__ __m128i __DEFAULT_FN_ATTRS128 403 _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) 404 { 405 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 406 (__v2di)_mm_mul_epu32(__X, __Y), 407 (__v2di)__W); 408 } 409 410 static __inline__ __m128i __DEFAULT_FN_ATTRS128 411 _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y) 412 { 413 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 414 (__v2di)_mm_mul_epu32(__X, __Y), 415 (__v2di)_mm_setzero_si128()); 416 } 417 418 static __inline__ __m256i __DEFAULT_FN_ATTRS256 419 _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B) 420 { 421 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 422 (__v8si)_mm256_mullo_epi32(__A, __B), 423 (__v8si)_mm256_setzero_si256()); 424 } 425 426 static __inline__ __m256i __DEFAULT_FN_ATTRS256 427 _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) 428 { 429 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 430 (__v8si)_mm256_mullo_epi32(__A, __B), 431 (__v8si)__W); 432 } 433 434 static __inline__ __m128i __DEFAULT_FN_ATTRS128 435 _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B) 436 { 437 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 438 (__v4si)_mm_mullo_epi32(__A, __B), 439 (__v4si)_mm_setzero_si128()); 440 } 441 442 static __inline__ __m128i __DEFAULT_FN_ATTRS128 443 _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) 444 { 445 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 446 (__v4si)_mm_mullo_epi32(__A, __B), 447 (__v4si)__W); 448 } 449 450 static __inline__ __m256i __DEFAULT_FN_ATTRS256 451 _mm256_and_epi32(__m256i __a, __m256i __b) 452 { 453 return (__m256i)((__v8su)__a & (__v8su)__b); 454 } 455 456 static __inline__ __m256i __DEFAULT_FN_ATTRS256 457 _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 458 { 459 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 460 (__v8si)_mm256_and_epi32(__A, __B), 461 (__v8si)__W); 462 } 463 464 static __inline__ __m256i __DEFAULT_FN_ATTRS256 465 _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B) 466 { 467 return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B); 468 } 469 470 static __inline__ __m128i __DEFAULT_FN_ATTRS128 471 _mm_and_epi32(__m128i __a, __m128i __b) 472 { 473 return (__m128i)((__v4su)__a & (__v4su)__b); 474 } 475 476 static __inline__ __m128i __DEFAULT_FN_ATTRS128 477 _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 478 { 479 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 480 (__v4si)_mm_and_epi32(__A, __B), 481 (__v4si)__W); 482 } 483 484 static __inline__ __m128i __DEFAULT_FN_ATTRS128 485 _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B) 486 { 487 return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B); 488 } 489 490 static __inline__ __m256i __DEFAULT_FN_ATTRS256 491 _mm256_andnot_epi32(__m256i __A, __m256i __B) 492 { 493 return (__m256i)(~(__v8su)__A & (__v8su)__B); 494 } 495 496 static __inline__ __m256i __DEFAULT_FN_ATTRS256 497 _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 498 { 499 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 500 (__v8si)_mm256_andnot_epi32(__A, __B), 501 (__v8si)__W); 502 } 503 504 static __inline__ __m256i __DEFAULT_FN_ATTRS256 505 _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B) 506 { 507 return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(), 508 __U, __A, __B); 509 } 510 511 static __inline__ __m128i __DEFAULT_FN_ATTRS128 512 _mm_andnot_epi32(__m128i __A, __m128i __B) 513 { 514 return (__m128i)(~(__v4su)__A & (__v4su)__B); 515 } 516 517 static __inline__ __m128i __DEFAULT_FN_ATTRS128 518 _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 519 { 520 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 521 (__v4si)_mm_andnot_epi32(__A, __B), 522 (__v4si)__W); 523 } 524 525 static __inline__ __m128i __DEFAULT_FN_ATTRS128 526 _mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B) 527 { 528 return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B); 529 } 530 531 static __inline__ __m256i __DEFAULT_FN_ATTRS256 532 _mm256_or_epi32(__m256i __a, __m256i __b) 533 { 534 return (__m256i)((__v8su)__a | (__v8su)__b); 535 } 536 537 static __inline__ __m256i __DEFAULT_FN_ATTRS256 538 _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 539 { 540 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 541 (__v8si)_mm256_or_epi32(__A, __B), 542 (__v8si)__W); 543 } 544 545 static __inline__ __m256i __DEFAULT_FN_ATTRS256 546 _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B) 547 { 548 return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B); 549 } 550 551 static __inline__ __m128i __DEFAULT_FN_ATTRS128 552 _mm_or_epi32(__m128i __a, __m128i __b) 553 { 554 return (__m128i)((__v4su)__a | (__v4su)__b); 555 } 556 557 static __inline__ __m128i __DEFAULT_FN_ATTRS128 558 _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 559 { 560 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 561 (__v4si)_mm_or_epi32(__A, __B), 562 (__v4si)__W); 563 } 564 565 static __inline__ __m128i __DEFAULT_FN_ATTRS128 566 _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B) 567 { 568 return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B); 569 } 570 571 static __inline__ __m256i __DEFAULT_FN_ATTRS256 572 _mm256_xor_epi32(__m256i __a, __m256i __b) 573 { 574 return (__m256i)((__v8su)__a ^ (__v8su)__b); 575 } 576 577 static __inline__ __m256i __DEFAULT_FN_ATTRS256 578 _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 579 { 580 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 581 (__v8si)_mm256_xor_epi32(__A, __B), 582 (__v8si)__W); 583 } 584 585 static __inline__ __m256i __DEFAULT_FN_ATTRS256 586 _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B) 587 { 588 return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B); 589 } 590 591 static __inline__ __m128i __DEFAULT_FN_ATTRS128 592 _mm_xor_epi32(__m128i __a, __m128i __b) 593 { 594 return (__m128i)((__v4su)__a ^ (__v4su)__b); 595 } 596 597 static __inline__ __m128i __DEFAULT_FN_ATTRS128 598 _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 599 { 600 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 601 (__v4si)_mm_xor_epi32(__A, __B), 602 (__v4si)__W); 603 } 604 605 static __inline__ __m128i __DEFAULT_FN_ATTRS128 606 _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B) 607 { 608 return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B); 609 } 610 611 static __inline__ __m256i __DEFAULT_FN_ATTRS256 612 _mm256_and_epi64(__m256i __a, __m256i __b) 613 { 614 return (__m256i)((__v4du)__a & (__v4du)__b); 615 } 616 617 static __inline__ __m256i __DEFAULT_FN_ATTRS256 618 _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 619 { 620 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 621 (__v4di)_mm256_and_epi64(__A, __B), 622 (__v4di)__W); 623 } 624 625 static __inline__ __m256i __DEFAULT_FN_ATTRS256 626 _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B) 627 { 628 return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B); 629 } 630 631 static __inline__ __m128i __DEFAULT_FN_ATTRS128 632 _mm_and_epi64(__m128i __a, __m128i __b) 633 { 634 return (__m128i)((__v2du)__a & (__v2du)__b); 635 } 636 637 static __inline__ __m128i __DEFAULT_FN_ATTRS128 638 _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 639 { 640 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 641 (__v2di)_mm_and_epi64(__A, __B), 642 (__v2di)__W); 643 } 644 645 static __inline__ __m128i __DEFAULT_FN_ATTRS128 646 _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B) 647 { 648 return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B); 649 } 650 651 static __inline__ __m256i __DEFAULT_FN_ATTRS256 652 _mm256_andnot_epi64(__m256i __A, __m256i __B) 653 { 654 return (__m256i)(~(__v4du)__A & (__v4du)__B); 655 } 656 657 static __inline__ __m256i __DEFAULT_FN_ATTRS256 658 _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 659 { 660 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 661 (__v4di)_mm256_andnot_epi64(__A, __B), 662 (__v4di)__W); 663 } 664 665 static __inline__ __m256i __DEFAULT_FN_ATTRS256 666 _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B) 667 { 668 return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(), 669 __U, __A, __B); 670 } 671 672 static __inline__ __m128i __DEFAULT_FN_ATTRS128 673 _mm_andnot_epi64(__m128i __A, __m128i __B) 674 { 675 return (__m128i)(~(__v2du)__A & (__v2du)__B); 676 } 677 678 static __inline__ __m128i __DEFAULT_FN_ATTRS128 679 _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 680 { 681 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 682 (__v2di)_mm_andnot_epi64(__A, __B), 683 (__v2di)__W); 684 } 685 686 static __inline__ __m128i __DEFAULT_FN_ATTRS128 687 _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B) 688 { 689 return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B); 690 } 691 692 static __inline__ __m256i __DEFAULT_FN_ATTRS256 693 _mm256_or_epi64(__m256i __a, __m256i __b) 694 { 695 return (__m256i)((__v4du)__a | (__v4du)__b); 696 } 697 698 static __inline__ __m256i __DEFAULT_FN_ATTRS256 699 _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 700 { 701 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 702 (__v4di)_mm256_or_epi64(__A, __B), 703 (__v4di)__W); 704 } 705 706 static __inline__ __m256i __DEFAULT_FN_ATTRS256 707 _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B) 708 { 709 return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B); 710 } 711 712 static __inline__ __m128i __DEFAULT_FN_ATTRS128 713 _mm_or_epi64(__m128i __a, __m128i __b) 714 { 715 return (__m128i)((__v2du)__a | (__v2du)__b); 716 } 717 718 static __inline__ __m128i __DEFAULT_FN_ATTRS128 719 _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 720 { 721 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 722 (__v2di)_mm_or_epi64(__A, __B), 723 (__v2di)__W); 724 } 725 726 static __inline__ __m128i __DEFAULT_FN_ATTRS128 727 _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B) 728 { 729 return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B); 730 } 731 732 static __inline__ __m256i __DEFAULT_FN_ATTRS256 733 _mm256_xor_epi64(__m256i __a, __m256i __b) 734 { 735 return (__m256i)((__v4du)__a ^ (__v4du)__b); 736 } 737 738 static __inline__ __m256i __DEFAULT_FN_ATTRS256 739 _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 740 { 741 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 742 (__v4di)_mm256_xor_epi64(__A, __B), 743 (__v4di)__W); 744 } 745 746 static __inline__ __m256i __DEFAULT_FN_ATTRS256 747 _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B) 748 { 749 return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B); 750 } 751 752 static __inline__ __m128i __DEFAULT_FN_ATTRS128 753 _mm_xor_epi64(__m128i __a, __m128i __b) 754 { 755 return (__m128i)((__v2du)__a ^ (__v2du)__b); 756 } 757 758 static __inline__ __m128i __DEFAULT_FN_ATTRS128 759 _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A, 760 __m128i __B) 761 { 762 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 763 (__v2di)_mm_xor_epi64(__A, __B), 764 (__v2di)__W); 765 } 766 767 static __inline__ __m128i __DEFAULT_FN_ATTRS128 768 _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B) 769 { 770 return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B); 771 } 772 773 #define _mm_cmp_epi32_mask(a, b, p) \ 774 ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ 775 (__v4si)(__m128i)(b), (int)(p), \ 776 (__mmask8)-1)) 777 778 #define _mm_mask_cmp_epi32_mask(m, a, b, p) \ 779 ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ 780 (__v4si)(__m128i)(b), (int)(p), \ 781 (__mmask8)(m))) 782 783 #define _mm_cmp_epu32_mask(a, b, p) \ 784 ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ 785 (__v4si)(__m128i)(b), (int)(p), \ 786 (__mmask8)-1)) 787 788 #define _mm_mask_cmp_epu32_mask(m, a, b, p) \ 789 ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ 790 (__v4si)(__m128i)(b), (int)(p), \ 791 (__mmask8)(m))) 792 793 #define _mm256_cmp_epi32_mask(a, b, p) \ 794 ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ 795 (__v8si)(__m256i)(b), (int)(p), \ 796 (__mmask8)-1)) 797 798 #define _mm256_mask_cmp_epi32_mask(m, a, b, p) \ 799 ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ 800 (__v8si)(__m256i)(b), (int)(p), \ 801 (__mmask8)(m))) 802 803 #define _mm256_cmp_epu32_mask(a, b, p) \ 804 ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ 805 (__v8si)(__m256i)(b), (int)(p), \ 806 (__mmask8)-1)) 807 808 #define _mm256_mask_cmp_epu32_mask(m, a, b, p) \ 809 ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ 810 (__v8si)(__m256i)(b), (int)(p), \ 811 (__mmask8)(m))) 812 813 #define _mm_cmp_epi64_mask(a, b, p) \ 814 ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ 815 (__v2di)(__m128i)(b), (int)(p), \ 816 (__mmask8)-1)) 817 818 #define _mm_mask_cmp_epi64_mask(m, a, b, p) \ 819 ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ 820 (__v2di)(__m128i)(b), (int)(p), \ 821 (__mmask8)(m))) 822 823 #define _mm_cmp_epu64_mask(a, b, p) \ 824 ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ 825 (__v2di)(__m128i)(b), (int)(p), \ 826 (__mmask8)-1)) 827 828 #define _mm_mask_cmp_epu64_mask(m, a, b, p) \ 829 ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ 830 (__v2di)(__m128i)(b), (int)(p), \ 831 (__mmask8)(m))) 832 833 #define _mm256_cmp_epi64_mask(a, b, p) \ 834 ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ 835 (__v4di)(__m256i)(b), (int)(p), \ 836 (__mmask8)-1)) 837 838 #define _mm256_mask_cmp_epi64_mask(m, a, b, p) \ 839 ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ 840 (__v4di)(__m256i)(b), (int)(p), \ 841 (__mmask8)(m))) 842 843 #define _mm256_cmp_epu64_mask(a, b, p) \ 844 ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ 845 (__v4di)(__m256i)(b), (int)(p), \ 846 (__mmask8)-1)) 847 848 #define _mm256_mask_cmp_epu64_mask(m, a, b, p) \ 849 ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ 850 (__v4di)(__m256i)(b), (int)(p), \ 851 (__mmask8)(m))) 852 853 #define _mm256_cmp_ps_mask(a, b, p) \ 854 ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ 855 (__v8sf)(__m256)(b), (int)(p), \ 856 (__mmask8)-1)) 857 858 #define _mm256_mask_cmp_ps_mask(m, a, b, p) \ 859 ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ 860 (__v8sf)(__m256)(b), (int)(p), \ 861 (__mmask8)(m))) 862 863 #define _mm256_cmp_pd_mask(a, b, p) \ 864 ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ 865 (__v4df)(__m256d)(b), (int)(p), \ 866 (__mmask8)-1)) 867 868 #define _mm256_mask_cmp_pd_mask(m, a, b, p) \ 869 ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ 870 (__v4df)(__m256d)(b), (int)(p), \ 871 (__mmask8)(m))) 872 873 #define _mm_cmp_ps_mask(a, b, p) \ 874 ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ 875 (__v4sf)(__m128)(b), (int)(p), \ 876 (__mmask8)-1)) 877 878 #define _mm_mask_cmp_ps_mask(m, a, b, p) \ 879 ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ 880 (__v4sf)(__m128)(b), (int)(p), \ 881 (__mmask8)(m))) 882 883 #define _mm_cmp_pd_mask(a, b, p) \ 884 ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ 885 (__v2df)(__m128d)(b), (int)(p), \ 886 (__mmask8)-1)) 887 888 #define _mm_mask_cmp_pd_mask(m, a, b, p) \ 889 ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ 890 (__v2df)(__m128d)(b), (int)(p), \ 891 (__mmask8)(m))) 892 893 static __inline__ __m128d __DEFAULT_FN_ATTRS128 894 _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 895 { 896 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 897 __builtin_ia32_vfmaddpd ((__v2df) __A, 898 (__v2df) __B, 899 (__v2df) __C), 900 (__v2df) __A); 901 } 902 903 static __inline__ __m128d __DEFAULT_FN_ATTRS128 904 _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 905 { 906 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 907 __builtin_ia32_vfmaddpd ((__v2df) __A, 908 (__v2df) __B, 909 (__v2df) __C), 910 (__v2df) __C); 911 } 912 913 static __inline__ __m128d __DEFAULT_FN_ATTRS128 914 _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 915 { 916 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 917 __builtin_ia32_vfmaddpd ((__v2df) __A, 918 (__v2df) __B, 919 (__v2df) __C), 920 (__v2df)_mm_setzero_pd()); 921 } 922 923 static __inline__ __m128d __DEFAULT_FN_ATTRS128 924 _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 925 { 926 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 927 __builtin_ia32_vfmaddpd ((__v2df) __A, 928 (__v2df) __B, 929 -(__v2df) __C), 930 (__v2df) __A); 931 } 932 933 static __inline__ __m128d __DEFAULT_FN_ATTRS128 934 _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 935 { 936 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 937 __builtin_ia32_vfmaddpd ((__v2df) __A, 938 (__v2df) __B, 939 -(__v2df) __C), 940 (__v2df)_mm_setzero_pd()); 941 } 942 943 static __inline__ __m128d __DEFAULT_FN_ATTRS128 944 _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 945 { 946 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 947 __builtin_ia32_vfmaddpd (-(__v2df) __A, 948 (__v2df) __B, 949 (__v2df) __C), 950 (__v2df) __C); 951 } 952 953 static __inline__ __m128d __DEFAULT_FN_ATTRS128 954 _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 955 { 956 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 957 __builtin_ia32_vfmaddpd (-(__v2df) __A, 958 (__v2df) __B, 959 (__v2df) __C), 960 (__v2df)_mm_setzero_pd()); 961 } 962 963 static __inline__ __m128d __DEFAULT_FN_ATTRS128 964 _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 965 { 966 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 967 __builtin_ia32_vfmaddpd (-(__v2df) __A, 968 (__v2df) __B, 969 -(__v2df) __C), 970 (__v2df)_mm_setzero_pd()); 971 } 972 973 static __inline__ __m256d __DEFAULT_FN_ATTRS256 974 _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 975 { 976 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 977 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 978 (__v4df) __B, 979 (__v4df) __C), 980 (__v4df) __A); 981 } 982 983 static __inline__ __m256d __DEFAULT_FN_ATTRS256 984 _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 985 { 986 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 987 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 988 (__v4df) __B, 989 (__v4df) __C), 990 (__v4df) __C); 991 } 992 993 static __inline__ __m256d __DEFAULT_FN_ATTRS256 994 _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 995 { 996 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 997 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 998 (__v4df) __B, 999 (__v4df) __C), 1000 (__v4df)_mm256_setzero_pd()); 1001 } 1002 1003 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1004 _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1005 { 1006 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1007 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 1008 (__v4df) __B, 1009 -(__v4df) __C), 1010 (__v4df) __A); 1011 } 1012 1013 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1014 _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1015 { 1016 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1017 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 1018 (__v4df) __B, 1019 -(__v4df) __C), 1020 (__v4df)_mm256_setzero_pd()); 1021 } 1022 1023 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1024 _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1025 { 1026 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1027 __builtin_ia32_vfmaddpd256 (-(__v4df) __A, 1028 (__v4df) __B, 1029 (__v4df) __C), 1030 (__v4df) __C); 1031 } 1032 1033 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1034 _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1035 { 1036 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1037 __builtin_ia32_vfmaddpd256 (-(__v4df) __A, 1038 (__v4df) __B, 1039 (__v4df) __C), 1040 (__v4df)_mm256_setzero_pd()); 1041 } 1042 1043 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1044 _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1045 { 1046 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1047 __builtin_ia32_vfmaddpd256 (-(__v4df) __A, 1048 (__v4df) __B, 1049 -(__v4df) __C), 1050 (__v4df)_mm256_setzero_pd()); 1051 } 1052 1053 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1054 _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1055 { 1056 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1057 __builtin_ia32_vfmaddps ((__v4sf) __A, 1058 (__v4sf) __B, 1059 (__v4sf) __C), 1060 (__v4sf) __A); 1061 } 1062 1063 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1064 _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1065 { 1066 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1067 __builtin_ia32_vfmaddps ((__v4sf) __A, 1068 (__v4sf) __B, 1069 (__v4sf) __C), 1070 (__v4sf) __C); 1071 } 1072 1073 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1074 _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1075 { 1076 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1077 __builtin_ia32_vfmaddps ((__v4sf) __A, 1078 (__v4sf) __B, 1079 (__v4sf) __C), 1080 (__v4sf)_mm_setzero_ps()); 1081 } 1082 1083 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1084 _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1085 { 1086 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1087 __builtin_ia32_vfmaddps ((__v4sf) __A, 1088 (__v4sf) __B, 1089 -(__v4sf) __C), 1090 (__v4sf) __A); 1091 } 1092 1093 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1094 _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1095 { 1096 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1097 __builtin_ia32_vfmaddps ((__v4sf) __A, 1098 (__v4sf) __B, 1099 -(__v4sf) __C), 1100 (__v4sf)_mm_setzero_ps()); 1101 } 1102 1103 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1104 _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1105 { 1106 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1107 __builtin_ia32_vfmaddps (-(__v4sf) __A, 1108 (__v4sf) __B, 1109 (__v4sf) __C), 1110 (__v4sf) __C); 1111 } 1112 1113 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1114 _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1115 { 1116 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1117 __builtin_ia32_vfmaddps (-(__v4sf) __A, 1118 (__v4sf) __B, 1119 (__v4sf) __C), 1120 (__v4sf)_mm_setzero_ps()); 1121 } 1122 1123 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1124 _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1125 { 1126 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1127 __builtin_ia32_vfmaddps (-(__v4sf) __A, 1128 (__v4sf) __B, 1129 -(__v4sf) __C), 1130 (__v4sf)_mm_setzero_ps()); 1131 } 1132 1133 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1134 _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1135 { 1136 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1137 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1138 (__v8sf) __B, 1139 (__v8sf) __C), 1140 (__v8sf) __A); 1141 } 1142 1143 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1144 _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1145 { 1146 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1147 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1148 (__v8sf) __B, 1149 (__v8sf) __C), 1150 (__v8sf) __C); 1151 } 1152 1153 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1154 _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1155 { 1156 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1157 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1158 (__v8sf) __B, 1159 (__v8sf) __C), 1160 (__v8sf)_mm256_setzero_ps()); 1161 } 1162 1163 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1164 _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1165 { 1166 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1167 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1168 (__v8sf) __B, 1169 -(__v8sf) __C), 1170 (__v8sf) __A); 1171 } 1172 1173 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1174 _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1175 { 1176 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1177 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1178 (__v8sf) __B, 1179 -(__v8sf) __C), 1180 (__v8sf)_mm256_setzero_ps()); 1181 } 1182 1183 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1184 _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1185 { 1186 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1187 __builtin_ia32_vfmaddps256 (-(__v8sf) __A, 1188 (__v8sf) __B, 1189 (__v8sf) __C), 1190 (__v8sf) __C); 1191 } 1192 1193 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1194 _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1195 { 1196 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1197 __builtin_ia32_vfmaddps256 (-(__v8sf) __A, 1198 (__v8sf) __B, 1199 (__v8sf) __C), 1200 (__v8sf)_mm256_setzero_ps()); 1201 } 1202 1203 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1204 _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1205 { 1206 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1207 __builtin_ia32_vfmaddps256 (-(__v8sf) __A, 1208 (__v8sf) __B, 1209 -(__v8sf) __C), 1210 (__v8sf)_mm256_setzero_ps()); 1211 } 1212 1213 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1214 _mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1215 { 1216 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1217 __builtin_ia32_vfmaddsubpd ((__v2df) __A, 1218 (__v2df) __B, 1219 (__v2df) __C), 1220 (__v2df) __A); 1221 } 1222 1223 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1224 _mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1225 { 1226 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1227 __builtin_ia32_vfmaddsubpd ((__v2df) __A, 1228 (__v2df) __B, 1229 (__v2df) __C), 1230 (__v2df) __C); 1231 } 1232 1233 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1234 _mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1235 { 1236 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1237 __builtin_ia32_vfmaddsubpd ((__v2df) __A, 1238 (__v2df) __B, 1239 (__v2df) __C), 1240 (__v2df)_mm_setzero_pd()); 1241 } 1242 1243 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1244 _mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1245 { 1246 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1247 __builtin_ia32_vfmaddsubpd ((__v2df) __A, 1248 (__v2df) __B, 1249 -(__v2df) __C), 1250 (__v2df) __A); 1251 } 1252 1253 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1254 _mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1255 { 1256 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1257 __builtin_ia32_vfmaddsubpd ((__v2df) __A, 1258 (__v2df) __B, 1259 -(__v2df) __C), 1260 (__v2df)_mm_setzero_pd()); 1261 } 1262 1263 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1264 _mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1265 { 1266 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1267 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 1268 (__v4df) __B, 1269 (__v4df) __C), 1270 (__v4df) __A); 1271 } 1272 1273 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1274 _mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1275 { 1276 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1277 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 1278 (__v4df) __B, 1279 (__v4df) __C), 1280 (__v4df) __C); 1281 } 1282 1283 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1284 _mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1285 { 1286 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1287 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 1288 (__v4df) __B, 1289 (__v4df) __C), 1290 (__v4df)_mm256_setzero_pd()); 1291 } 1292 1293 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1294 _mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1295 { 1296 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1297 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 1298 (__v4df) __B, 1299 -(__v4df) __C), 1300 (__v4df) __A); 1301 } 1302 1303 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1304 _mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1305 { 1306 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1307 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 1308 (__v4df) __B, 1309 -(__v4df) __C), 1310 (__v4df)_mm256_setzero_pd()); 1311 } 1312 1313 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1314 _mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1315 { 1316 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1317 __builtin_ia32_vfmaddsubps ((__v4sf) __A, 1318 (__v4sf) __B, 1319 (__v4sf) __C), 1320 (__v4sf) __A); 1321 } 1322 1323 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1324 _mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1325 { 1326 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1327 __builtin_ia32_vfmaddsubps ((__v4sf) __A, 1328 (__v4sf) __B, 1329 (__v4sf) __C), 1330 (__v4sf) __C); 1331 } 1332 1333 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1334 _mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1335 { 1336 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1337 __builtin_ia32_vfmaddsubps ((__v4sf) __A, 1338 (__v4sf) __B, 1339 (__v4sf) __C), 1340 (__v4sf)_mm_setzero_ps()); 1341 } 1342 1343 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1344 _mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1345 { 1346 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1347 __builtin_ia32_vfmaddsubps ((__v4sf) __A, 1348 (__v4sf) __B, 1349 -(__v4sf) __C), 1350 (__v4sf) __A); 1351 } 1352 1353 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1354 _mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1355 { 1356 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1357 __builtin_ia32_vfmaddsubps ((__v4sf) __A, 1358 (__v4sf) __B, 1359 -(__v4sf) __C), 1360 (__v4sf)_mm_setzero_ps()); 1361 } 1362 1363 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1364 _mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, 1365 __m256 __C) 1366 { 1367 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1368 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 1369 (__v8sf) __B, 1370 (__v8sf) __C), 1371 (__v8sf) __A); 1372 } 1373 1374 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1375 _mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1376 { 1377 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1378 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 1379 (__v8sf) __B, 1380 (__v8sf) __C), 1381 (__v8sf) __C); 1382 } 1383 1384 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1385 _mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1386 { 1387 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1388 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 1389 (__v8sf) __B, 1390 (__v8sf) __C), 1391 (__v8sf)_mm256_setzero_ps()); 1392 } 1393 1394 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1395 _mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1396 { 1397 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1398 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 1399 (__v8sf) __B, 1400 -(__v8sf) __C), 1401 (__v8sf) __A); 1402 } 1403 1404 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1405 _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1406 { 1407 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1408 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 1409 (__v8sf) __B, 1410 -(__v8sf) __C), 1411 (__v8sf)_mm256_setzero_ps()); 1412 } 1413 1414 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1415 _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1416 { 1417 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1418 __builtin_ia32_vfmaddpd ((__v2df) __A, 1419 (__v2df) __B, 1420 -(__v2df) __C), 1421 (__v2df) __C); 1422 } 1423 1424 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1425 _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1426 { 1427 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1428 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 1429 (__v4df) __B, 1430 -(__v4df) __C), 1431 (__v4df) __C); 1432 } 1433 1434 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1435 _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1436 { 1437 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1438 __builtin_ia32_vfmaddps ((__v4sf) __A, 1439 (__v4sf) __B, 1440 -(__v4sf) __C), 1441 (__v4sf) __C); 1442 } 1443 1444 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1445 _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1446 { 1447 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1448 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1449 (__v8sf) __B, 1450 -(__v8sf) __C), 1451 (__v8sf) __C); 1452 } 1453 1454 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1455 _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1456 { 1457 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1458 __builtin_ia32_vfmaddsubpd ((__v2df) __A, 1459 (__v2df) __B, 1460 -(__v2df) __C), 1461 (__v2df) __C); 1462 } 1463 1464 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1465 _mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1466 { 1467 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1468 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 1469 (__v4df) __B, 1470 -(__v4df) __C), 1471 (__v4df) __C); 1472 } 1473 1474 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1475 _mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1476 { 1477 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1478 __builtin_ia32_vfmaddsubps ((__v4sf) __A, 1479 (__v4sf) __B, 1480 -(__v4sf) __C), 1481 (__v4sf) __C); 1482 } 1483 1484 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1485 _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1486 { 1487 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1488 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 1489 (__v8sf) __B, 1490 -(__v8sf) __C), 1491 (__v8sf) __C); 1492 } 1493 1494 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1495 _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1496 { 1497 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1498 __builtin_ia32_vfmaddpd ((__v2df) __A, 1499 -(__v2df) __B, 1500 (__v2df) __C), 1501 (__v2df) __A); 1502 } 1503 1504 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1505 _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1506 { 1507 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1508 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 1509 -(__v4df) __B, 1510 (__v4df) __C), 1511 (__v4df) __A); 1512 } 1513 1514 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1515 _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1516 { 1517 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1518 __builtin_ia32_vfmaddps ((__v4sf) __A, 1519 -(__v4sf) __B, 1520 (__v4sf) __C), 1521 (__v4sf) __A); 1522 } 1523 1524 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1525 _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1526 { 1527 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1528 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1529 -(__v8sf) __B, 1530 (__v8sf) __C), 1531 (__v8sf) __A); 1532 } 1533 1534 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1535 _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1536 { 1537 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1538 __builtin_ia32_vfmaddpd ((__v2df) __A, 1539 -(__v2df) __B, 1540 -(__v2df) __C), 1541 (__v2df) __A); 1542 } 1543 1544 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1545 _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1546 { 1547 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1548 __builtin_ia32_vfmaddpd ((__v2df) __A, 1549 -(__v2df) __B, 1550 -(__v2df) __C), 1551 (__v2df) __C); 1552 } 1553 1554 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1555 _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1556 { 1557 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1558 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 1559 -(__v4df) __B, 1560 -(__v4df) __C), 1561 (__v4df) __A); 1562 } 1563 1564 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1565 _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1566 { 1567 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1568 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 1569 -(__v4df) __B, 1570 -(__v4df) __C), 1571 (__v4df) __C); 1572 } 1573 1574 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1575 _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1576 { 1577 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1578 __builtin_ia32_vfmaddps ((__v4sf) __A, 1579 -(__v4sf) __B, 1580 -(__v4sf) __C), 1581 (__v4sf) __A); 1582 } 1583 1584 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1585 _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1586 { 1587 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1588 __builtin_ia32_vfmaddps ((__v4sf) __A, 1589 -(__v4sf) __B, 1590 -(__v4sf) __C), 1591 (__v4sf) __C); 1592 } 1593 1594 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1595 _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1596 { 1597 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1598 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1599 -(__v8sf) __B, 1600 -(__v8sf) __C), 1601 (__v8sf) __A); 1602 } 1603 1604 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1605 _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1606 { 1607 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1608 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1609 -(__v8sf) __B, 1610 -(__v8sf) __C), 1611 (__v8sf) __C); 1612 } 1613 1614 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1615 _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 1616 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 1617 (__v2df)_mm_add_pd(__A, __B), 1618 (__v2df)__W); 1619 } 1620 1621 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1622 _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) { 1623 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 1624 (__v2df)_mm_add_pd(__A, __B), 1625 (__v2df)_mm_setzero_pd()); 1626 } 1627 1628 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1629 _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 1630 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 1631 (__v4df)_mm256_add_pd(__A, __B), 1632 (__v4df)__W); 1633 } 1634 1635 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1636 _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) { 1637 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 1638 (__v4df)_mm256_add_pd(__A, __B), 1639 (__v4df)_mm256_setzero_pd()); 1640 } 1641 1642 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1643 _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 1644 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 1645 (__v4sf)_mm_add_ps(__A, __B), 1646 (__v4sf)__W); 1647 } 1648 1649 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1650 _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) { 1651 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 1652 (__v4sf)_mm_add_ps(__A, __B), 1653 (__v4sf)_mm_setzero_ps()); 1654 } 1655 1656 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1657 _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 1658 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 1659 (__v8sf)_mm256_add_ps(__A, __B), 1660 (__v8sf)__W); 1661 } 1662 1663 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1664 _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) { 1665 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 1666 (__v8sf)_mm256_add_ps(__A, __B), 1667 (__v8sf)_mm256_setzero_ps()); 1668 } 1669 1670 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1671 _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) { 1672 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, 1673 (__v4si) __W, 1674 (__v4si) __A); 1675 } 1676 1677 static __inline__ __m256i __DEFAULT_FN_ATTRS256 1678 _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) { 1679 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, 1680 (__v8si) __W, 1681 (__v8si) __A); 1682 } 1683 1684 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1685 _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) { 1686 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, 1687 (__v2df) __W, 1688 (__v2df) __A); 1689 } 1690 1691 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1692 _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) { 1693 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, 1694 (__v4df) __W, 1695 (__v4df) __A); 1696 } 1697 1698 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1699 _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) { 1700 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, 1701 (__v4sf) __W, 1702 (__v4sf) __A); 1703 } 1704 1705 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1706 _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) { 1707 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, 1708 (__v8sf) __W, 1709 (__v8sf) __A); 1710 } 1711 1712 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1713 _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) { 1714 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, 1715 (__v2di) __W, 1716 (__v2di) __A); 1717 } 1718 1719 static __inline__ __m256i __DEFAULT_FN_ATTRS256 1720 _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) { 1721 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, 1722 (__v4di) __W, 1723 (__v4di) __A); 1724 } 1725 1726 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1727 _mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) { 1728 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, 1729 (__v2df) __W, 1730 (__mmask8) __U); 1731 } 1732 1733 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1734 _mm_maskz_compress_pd (__mmask8 __U, __m128d __A) { 1735 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, 1736 (__v2df) 1737 _mm_setzero_pd (), 1738 (__mmask8) __U); 1739 } 1740 1741 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1742 _mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) { 1743 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, 1744 (__v4df) __W, 1745 (__mmask8) __U); 1746 } 1747 1748 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1749 _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) { 1750 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, 1751 (__v4df) 1752 _mm256_setzero_pd (), 1753 (__mmask8) __U); 1754 } 1755 1756 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1757 _mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { 1758 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, 1759 (__v2di) __W, 1760 (__mmask8) __U); 1761 } 1762 1763 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1764 _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) { 1765 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, 1766 (__v2di) 1767 _mm_setzero_si128 (), 1768 (__mmask8) __U); 1769 } 1770 1771 static __inline__ __m256i __DEFAULT_FN_ATTRS256 1772 _mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { 1773 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, 1774 (__v4di) __W, 1775 (__mmask8) __U); 1776 } 1777 1778 static __inline__ __m256i __DEFAULT_FN_ATTRS256 1779 _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) { 1780 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, 1781 (__v4di) 1782 _mm256_setzero_si256 (), 1783 (__mmask8) __U); 1784 } 1785 1786 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1787 _mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) { 1788 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, 1789 (__v4sf) __W, 1790 (__mmask8) __U); 1791 } 1792 1793 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1794 _mm_maskz_compress_ps (__mmask8 __U, __m128 __A) { 1795 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, 1796 (__v4sf) 1797 _mm_setzero_ps (), 1798 (__mmask8) __U); 1799 } 1800 1801 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1802 _mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) { 1803 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, 1804 (__v8sf) __W, 1805 (__mmask8) __U); 1806 } 1807 1808 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1809 _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) { 1810 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, 1811 (__v8sf) 1812 _mm256_setzero_ps (), 1813 (__mmask8) __U); 1814 } 1815 1816 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1817 _mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { 1818 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, 1819 (__v4si) __W, 1820 (__mmask8) __U); 1821 } 1822 1823 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1824 _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) { 1825 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, 1826 (__v4si) 1827 _mm_setzero_si128 (), 1828 (__mmask8) __U); 1829 } 1830 1831 static __inline__ __m256i __DEFAULT_FN_ATTRS256 1832 _mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { 1833 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, 1834 (__v8si) __W, 1835 (__mmask8) __U); 1836 } 1837 1838 static __inline__ __m256i __DEFAULT_FN_ATTRS256 1839 _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) { 1840 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, 1841 (__v8si) 1842 _mm256_setzero_si256 (), 1843 (__mmask8) __U); 1844 } 1845 1846 static __inline__ void __DEFAULT_FN_ATTRS128 1847 _mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) { 1848 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P, 1849 (__v2df) __A, 1850 (__mmask8) __U); 1851 } 1852 1853 static __inline__ void __DEFAULT_FN_ATTRS256 1854 _mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) { 1855 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P, 1856 (__v4df) __A, 1857 (__mmask8) __U); 1858 } 1859 1860 static __inline__ void __DEFAULT_FN_ATTRS128 1861 _mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) { 1862 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P, 1863 (__v2di) __A, 1864 (__mmask8) __U); 1865 } 1866 1867 static __inline__ void __DEFAULT_FN_ATTRS256 1868 _mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) { 1869 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P, 1870 (__v4di) __A, 1871 (__mmask8) __U); 1872 } 1873 1874 static __inline__ void __DEFAULT_FN_ATTRS128 1875 _mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) { 1876 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P, 1877 (__v4sf) __A, 1878 (__mmask8) __U); 1879 } 1880 1881 static __inline__ void __DEFAULT_FN_ATTRS256 1882 _mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) { 1883 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P, 1884 (__v8sf) __A, 1885 (__mmask8) __U); 1886 } 1887 1888 static __inline__ void __DEFAULT_FN_ATTRS128 1889 _mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) { 1890 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P, 1891 (__v4si) __A, 1892 (__mmask8) __U); 1893 } 1894 1895 static __inline__ void __DEFAULT_FN_ATTRS256 1896 _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) { 1897 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P, 1898 (__v8si) __A, 1899 (__mmask8) __U); 1900 } 1901 1902 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1903 _mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) { 1904 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 1905 (__v2df)_mm_cvtepi32_pd(__A), 1906 (__v2df)__W); 1907 } 1908 1909 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1910 _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { 1911 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 1912 (__v2df)_mm_cvtepi32_pd(__A), 1913 (__v2df)_mm_setzero_pd()); 1914 } 1915 1916 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1917 _mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) { 1918 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 1919 (__v4df)_mm256_cvtepi32_pd(__A), 1920 (__v4df)__W); 1921 } 1922 1923 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1924 _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { 1925 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 1926 (__v4df)_mm256_cvtepi32_pd(__A), 1927 (__v4df)_mm256_setzero_pd()); 1928 } 1929 1930 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1931 _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) { 1932 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 1933 (__v4sf)_mm_cvtepi32_ps(__A), 1934 (__v4sf)__W); 1935 } 1936 1937 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1938 _mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A) { 1939 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 1940 (__v4sf)_mm_cvtepi32_ps(__A), 1941 (__v4sf)_mm_setzero_ps()); 1942 } 1943 1944 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1945 _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) { 1946 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 1947 (__v8sf)_mm256_cvtepi32_ps(__A), 1948 (__v8sf)__W); 1949 } 1950 1951 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1952 _mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A) { 1953 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 1954 (__v8sf)_mm256_cvtepi32_ps(__A), 1955 (__v8sf)_mm256_setzero_ps()); 1956 } 1957 1958 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1959 _mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) { 1960 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A, 1961 (__v4si) __W, 1962 (__mmask8) __U); 1963 } 1964 1965 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1966 _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) { 1967 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A, 1968 (__v4si) 1969 _mm_setzero_si128 (), 1970 (__mmask8) __U); 1971 } 1972 1973 static __inline__ __m128i __DEFAULT_FN_ATTRS256 1974 _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) { 1975 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 1976 (__v4si)_mm256_cvtpd_epi32(__A), 1977 (__v4si)__W); 1978 } 1979 1980 static __inline__ __m128i __DEFAULT_FN_ATTRS256 1981 _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) { 1982 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 1983 (__v4si)_mm256_cvtpd_epi32(__A), 1984 (__v4si)_mm_setzero_si128()); 1985 } 1986 1987 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1988 _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) { 1989 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, 1990 (__v4sf) __W, 1991 (__mmask8) __U); 1992 } 1993 1994 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1995 _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) { 1996 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, 1997 (__v4sf) 1998 _mm_setzero_ps (), 1999 (__mmask8) __U); 2000 } 2001 2002 static __inline__ __m128 __DEFAULT_FN_ATTRS256 2003 _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) { 2004 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2005 (__v4sf)_mm256_cvtpd_ps(__A), 2006 (__v4sf)__W); 2007 } 2008 2009 static __inline__ __m128 __DEFAULT_FN_ATTRS256 2010 _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) { 2011 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2012 (__v4sf)_mm256_cvtpd_ps(__A), 2013 (__v4sf)_mm_setzero_ps()); 2014 } 2015 2016 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2017 _mm_cvtpd_epu32 (__m128d __A) { 2018 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 2019 (__v4si) 2020 _mm_setzero_si128 (), 2021 (__mmask8) -1); 2022 } 2023 2024 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2025 _mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) { 2026 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 2027 (__v4si) __W, 2028 (__mmask8) __U); 2029 } 2030 2031 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2032 _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) { 2033 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 2034 (__v4si) 2035 _mm_setzero_si128 (), 2036 (__mmask8) __U); 2037 } 2038 2039 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2040 _mm256_cvtpd_epu32 (__m256d __A) { 2041 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 2042 (__v4si) 2043 _mm_setzero_si128 (), 2044 (__mmask8) -1); 2045 } 2046 2047 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2048 _mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) { 2049 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 2050 (__v4si) __W, 2051 (__mmask8) __U); 2052 } 2053 2054 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2055 _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) { 2056 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 2057 (__v4si) 2058 _mm_setzero_si128 (), 2059 (__mmask8) __U); 2060 } 2061 2062 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2063 _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) { 2064 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2065 (__v4si)_mm_cvtps_epi32(__A), 2066 (__v4si)__W); 2067 } 2068 2069 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2070 _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) { 2071 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2072 (__v4si)_mm_cvtps_epi32(__A), 2073 (__v4si)_mm_setzero_si128()); 2074 } 2075 2076 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2077 _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) { 2078 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2079 (__v8si)_mm256_cvtps_epi32(__A), 2080 (__v8si)__W); 2081 } 2082 2083 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2084 _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) { 2085 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2086 (__v8si)_mm256_cvtps_epi32(__A), 2087 (__v8si)_mm256_setzero_si256()); 2088 } 2089 2090 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2091 _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) { 2092 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2093 (__v2df)_mm_cvtps_pd(__A), 2094 (__v2df)__W); 2095 } 2096 2097 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2098 _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) { 2099 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2100 (__v2df)_mm_cvtps_pd(__A), 2101 (__v2df)_mm_setzero_pd()); 2102 } 2103 2104 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2105 _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) { 2106 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2107 (__v4df)_mm256_cvtps_pd(__A), 2108 (__v4df)__W); 2109 } 2110 2111 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2112 _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) { 2113 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2114 (__v4df)_mm256_cvtps_pd(__A), 2115 (__v4df)_mm256_setzero_pd()); 2116 } 2117 2118 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2119 _mm_cvtps_epu32 (__m128 __A) { 2120 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 2121 (__v4si) 2122 _mm_setzero_si128 (), 2123 (__mmask8) -1); 2124 } 2125 2126 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2127 _mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) { 2128 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 2129 (__v4si) __W, 2130 (__mmask8) __U); 2131 } 2132 2133 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2134 _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) { 2135 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 2136 (__v4si) 2137 _mm_setzero_si128 (), 2138 (__mmask8) __U); 2139 } 2140 2141 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2142 _mm256_cvtps_epu32 (__m256 __A) { 2143 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 2144 (__v8si) 2145 _mm256_setzero_si256 (), 2146 (__mmask8) -1); 2147 } 2148 2149 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2150 _mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) { 2151 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 2152 (__v8si) __W, 2153 (__mmask8) __U); 2154 } 2155 2156 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2157 _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) { 2158 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 2159 (__v8si) 2160 _mm256_setzero_si256 (), 2161 (__mmask8) __U); 2162 } 2163 2164 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2165 _mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) { 2166 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A, 2167 (__v4si) __W, 2168 (__mmask8) __U); 2169 } 2170 2171 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2172 _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) { 2173 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A, 2174 (__v4si) 2175 _mm_setzero_si128 (), 2176 (__mmask8) __U); 2177 } 2178 2179 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2180 _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) { 2181 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2182 (__v4si)_mm256_cvttpd_epi32(__A), 2183 (__v4si)__W); 2184 } 2185 2186 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2187 _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) { 2188 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2189 (__v4si)_mm256_cvttpd_epi32(__A), 2190 (__v4si)_mm_setzero_si128()); 2191 } 2192 2193 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2194 _mm_cvttpd_epu32 (__m128d __A) { 2195 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 2196 (__v4si) 2197 _mm_setzero_si128 (), 2198 (__mmask8) -1); 2199 } 2200 2201 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2202 _mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) { 2203 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 2204 (__v4si) __W, 2205 (__mmask8) __U); 2206 } 2207 2208 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2209 _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) { 2210 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 2211 (__v4si) 2212 _mm_setzero_si128 (), 2213 (__mmask8) __U); 2214 } 2215 2216 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2217 _mm256_cvttpd_epu32 (__m256d __A) { 2218 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 2219 (__v4si) 2220 _mm_setzero_si128 (), 2221 (__mmask8) -1); 2222 } 2223 2224 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2225 _mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) { 2226 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 2227 (__v4si) __W, 2228 (__mmask8) __U); 2229 } 2230 2231 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2232 _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) { 2233 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 2234 (__v4si) 2235 _mm_setzero_si128 (), 2236 (__mmask8) __U); 2237 } 2238 2239 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2240 _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) { 2241 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2242 (__v4si)_mm_cvttps_epi32(__A), 2243 (__v4si)__W); 2244 } 2245 2246 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2247 _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) { 2248 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2249 (__v4si)_mm_cvttps_epi32(__A), 2250 (__v4si)_mm_setzero_si128()); 2251 } 2252 2253 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2254 _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) { 2255 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2256 (__v8si)_mm256_cvttps_epi32(__A), 2257 (__v8si)__W); 2258 } 2259 2260 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2261 _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) { 2262 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2263 (__v8si)_mm256_cvttps_epi32(__A), 2264 (__v8si)_mm256_setzero_si256()); 2265 } 2266 2267 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2268 _mm_cvttps_epu32 (__m128 __A) { 2269 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 2270 (__v4si) 2271 _mm_setzero_si128 (), 2272 (__mmask8) -1); 2273 } 2274 2275 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2276 _mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) { 2277 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 2278 (__v4si) __W, 2279 (__mmask8) __U); 2280 } 2281 2282 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2283 _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) { 2284 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 2285 (__v4si) 2286 _mm_setzero_si128 (), 2287 (__mmask8) __U); 2288 } 2289 2290 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2291 _mm256_cvttps_epu32 (__m256 __A) { 2292 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 2293 (__v8si) 2294 _mm256_setzero_si256 (), 2295 (__mmask8) -1); 2296 } 2297 2298 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2299 _mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) { 2300 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 2301 (__v8si) __W, 2302 (__mmask8) __U); 2303 } 2304 2305 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2306 _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) { 2307 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 2308 (__v8si) 2309 _mm256_setzero_si256 (), 2310 (__mmask8) __U); 2311 } 2312 2313 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2314 _mm_cvtepu32_pd (__m128i __A) { 2315 return (__m128d) __builtin_convertvector( 2316 __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df); 2317 } 2318 2319 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2320 _mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) { 2321 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 2322 (__v2df)_mm_cvtepu32_pd(__A), 2323 (__v2df)__W); 2324 } 2325 2326 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2327 _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { 2328 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 2329 (__v2df)_mm_cvtepu32_pd(__A), 2330 (__v2df)_mm_setzero_pd()); 2331 } 2332 2333 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2334 _mm256_cvtepu32_pd (__m128i __A) { 2335 return (__m256d)__builtin_convertvector((__v4su)__A, __v4df); 2336 } 2337 2338 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2339 _mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) { 2340 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 2341 (__v4df)_mm256_cvtepu32_pd(__A), 2342 (__v4df)__W); 2343 } 2344 2345 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2346 _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { 2347 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 2348 (__v4df)_mm256_cvtepu32_pd(__A), 2349 (__v4df)_mm256_setzero_pd()); 2350 } 2351 2352 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2353 _mm_cvtepu32_ps (__m128i __A) { 2354 return (__m128)__builtin_convertvector((__v4su)__A, __v4sf); 2355 } 2356 2357 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2358 _mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) { 2359 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2360 (__v4sf)_mm_cvtepu32_ps(__A), 2361 (__v4sf)__W); 2362 } 2363 2364 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2365 _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) { 2366 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2367 (__v4sf)_mm_cvtepu32_ps(__A), 2368 (__v4sf)_mm_setzero_ps()); 2369 } 2370 2371 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2372 _mm256_cvtepu32_ps (__m256i __A) { 2373 return (__m256)__builtin_convertvector((__v8su)__A, __v8sf); 2374 } 2375 2376 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2377 _mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) { 2378 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2379 (__v8sf)_mm256_cvtepu32_ps(__A), 2380 (__v8sf)__W); 2381 } 2382 2383 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2384 _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) { 2385 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2386 (__v8sf)_mm256_cvtepu32_ps(__A), 2387 (__v8sf)_mm256_setzero_ps()); 2388 } 2389 2390 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2391 _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 2392 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2393 (__v2df)_mm_div_pd(__A, __B), 2394 (__v2df)__W); 2395 } 2396 2397 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2398 _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) { 2399 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2400 (__v2df)_mm_div_pd(__A, __B), 2401 (__v2df)_mm_setzero_pd()); 2402 } 2403 2404 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2405 _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 2406 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2407 (__v4df)_mm256_div_pd(__A, __B), 2408 (__v4df)__W); 2409 } 2410 2411 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2412 _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) { 2413 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2414 (__v4df)_mm256_div_pd(__A, __B), 2415 (__v4df)_mm256_setzero_pd()); 2416 } 2417 2418 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2419 _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 2420 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2421 (__v4sf)_mm_div_ps(__A, __B), 2422 (__v4sf)__W); 2423 } 2424 2425 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2426 _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) { 2427 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2428 (__v4sf)_mm_div_ps(__A, __B), 2429 (__v4sf)_mm_setzero_ps()); 2430 } 2431 2432 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2433 _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 2434 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2435 (__v8sf)_mm256_div_ps(__A, __B), 2436 (__v8sf)__W); 2437 } 2438 2439 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2440 _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) { 2441 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2442 (__v8sf)_mm256_div_ps(__A, __B), 2443 (__v8sf)_mm256_setzero_ps()); 2444 } 2445 2446 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2447 _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) { 2448 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A, 2449 (__v2df) __W, 2450 (__mmask8) __U); 2451 } 2452 2453 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2454 _mm_maskz_expand_pd (__mmask8 __U, __m128d __A) { 2455 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A, 2456 (__v2df) 2457 _mm_setzero_pd (), 2458 (__mmask8) __U); 2459 } 2460 2461 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2462 _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) { 2463 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A, 2464 (__v4df) __W, 2465 (__mmask8) __U); 2466 } 2467 2468 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2469 _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) { 2470 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A, 2471 (__v4df) 2472 _mm256_setzero_pd (), 2473 (__mmask8) __U); 2474 } 2475 2476 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2477 _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { 2478 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A, 2479 (__v2di) __W, 2480 (__mmask8) __U); 2481 } 2482 2483 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2484 _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) { 2485 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A, 2486 (__v2di) 2487 _mm_setzero_si128 (), 2488 (__mmask8) __U); 2489 } 2490 2491 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2492 _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { 2493 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A, 2494 (__v4di) __W, 2495 (__mmask8) __U); 2496 } 2497 2498 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2499 _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) { 2500 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A, 2501 (__v4di) 2502 _mm256_setzero_si256 (), 2503 (__mmask8) __U); 2504 } 2505 2506 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2507 _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) { 2508 return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P, 2509 (__v2df) __W, 2510 (__mmask8) 2511 __U); 2512 } 2513 2514 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2515 _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { 2516 return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P, 2517 (__v2df) 2518 _mm_setzero_pd (), 2519 (__mmask8) 2520 __U); 2521 } 2522 2523 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2524 _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) { 2525 return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P, 2526 (__v4df) __W, 2527 (__mmask8) 2528 __U); 2529 } 2530 2531 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2532 _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { 2533 return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P, 2534 (__v4df) 2535 _mm256_setzero_pd (), 2536 (__mmask8) 2537 __U); 2538 } 2539 2540 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2541 _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) { 2542 return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P, 2543 (__v2di) __W, 2544 (__mmask8) 2545 __U); 2546 } 2547 2548 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2549 _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { 2550 return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P, 2551 (__v2di) 2552 _mm_setzero_si128 (), 2553 (__mmask8) 2554 __U); 2555 } 2556 2557 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2558 _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U, 2559 void const *__P) { 2560 return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P, 2561 (__v4di) __W, 2562 (__mmask8) 2563 __U); 2564 } 2565 2566 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2567 _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { 2568 return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P, 2569 (__v4di) 2570 _mm256_setzero_si256 (), 2571 (__mmask8) 2572 __U); 2573 } 2574 2575 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2576 _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) { 2577 return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P, 2578 (__v4sf) __W, 2579 (__mmask8) __U); 2580 } 2581 2582 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2583 _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { 2584 return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P, 2585 (__v4sf) 2586 _mm_setzero_ps (), 2587 (__mmask8) 2588 __U); 2589 } 2590 2591 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2592 _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) { 2593 return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P, 2594 (__v8sf) __W, 2595 (__mmask8) __U); 2596 } 2597 2598 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2599 _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { 2600 return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P, 2601 (__v8sf) 2602 _mm256_setzero_ps (), 2603 (__mmask8) 2604 __U); 2605 } 2606 2607 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2608 _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) { 2609 return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P, 2610 (__v4si) __W, 2611 (__mmask8) 2612 __U); 2613 } 2614 2615 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2616 _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { 2617 return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P, 2618 (__v4si) 2619 _mm_setzero_si128 (), 2620 (__mmask8) __U); 2621 } 2622 2623 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2624 _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U, 2625 void const *__P) { 2626 return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P, 2627 (__v8si) __W, 2628 (__mmask8) 2629 __U); 2630 } 2631 2632 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2633 _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { 2634 return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P, 2635 (__v8si) 2636 _mm256_setzero_si256 (), 2637 (__mmask8) 2638 __U); 2639 } 2640 2641 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2642 _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) { 2643 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A, 2644 (__v4sf) __W, 2645 (__mmask8) __U); 2646 } 2647 2648 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2649 _mm_maskz_expand_ps (__mmask8 __U, __m128 __A) { 2650 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A, 2651 (__v4sf) 2652 _mm_setzero_ps (), 2653 (__mmask8) __U); 2654 } 2655 2656 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2657 _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) { 2658 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A, 2659 (__v8sf) __W, 2660 (__mmask8) __U); 2661 } 2662 2663 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2664 _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) { 2665 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A, 2666 (__v8sf) 2667 _mm256_setzero_ps (), 2668 (__mmask8) __U); 2669 } 2670 2671 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2672 _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { 2673 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A, 2674 (__v4si) __W, 2675 (__mmask8) __U); 2676 } 2677 2678 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2679 _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) { 2680 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A, 2681 (__v4si) 2682 _mm_setzero_si128 (), 2683 (__mmask8) __U); 2684 } 2685 2686 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2687 _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { 2688 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A, 2689 (__v8si) __W, 2690 (__mmask8) __U); 2691 } 2692 2693 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2694 _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) { 2695 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A, 2696 (__v8si) 2697 _mm256_setzero_si256 (), 2698 (__mmask8) __U); 2699 } 2700 2701 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2702 _mm_getexp_pd (__m128d __A) { 2703 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 2704 (__v2df) 2705 _mm_setzero_pd (), 2706 (__mmask8) -1); 2707 } 2708 2709 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2710 _mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) { 2711 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 2712 (__v2df) __W, 2713 (__mmask8) __U); 2714 } 2715 2716 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2717 _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) { 2718 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 2719 (__v2df) 2720 _mm_setzero_pd (), 2721 (__mmask8) __U); 2722 } 2723 2724 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2725 _mm256_getexp_pd (__m256d __A) { 2726 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 2727 (__v4df) 2728 _mm256_setzero_pd (), 2729 (__mmask8) -1); 2730 } 2731 2732 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2733 _mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) { 2734 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 2735 (__v4df) __W, 2736 (__mmask8) __U); 2737 } 2738 2739 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2740 _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) { 2741 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 2742 (__v4df) 2743 _mm256_setzero_pd (), 2744 (__mmask8) __U); 2745 } 2746 2747 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2748 _mm_getexp_ps (__m128 __A) { 2749 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 2750 (__v4sf) 2751 _mm_setzero_ps (), 2752 (__mmask8) -1); 2753 } 2754 2755 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2756 _mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) { 2757 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 2758 (__v4sf) __W, 2759 (__mmask8) __U); 2760 } 2761 2762 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2763 _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) { 2764 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 2765 (__v4sf) 2766 _mm_setzero_ps (), 2767 (__mmask8) __U); 2768 } 2769 2770 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2771 _mm256_getexp_ps (__m256 __A) { 2772 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 2773 (__v8sf) 2774 _mm256_setzero_ps (), 2775 (__mmask8) -1); 2776 } 2777 2778 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2779 _mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) { 2780 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 2781 (__v8sf) __W, 2782 (__mmask8) __U); 2783 } 2784 2785 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2786 _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) { 2787 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 2788 (__v8sf) 2789 _mm256_setzero_ps (), 2790 (__mmask8) __U); 2791 } 2792 2793 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2794 _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 2795 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2796 (__v2df)_mm_max_pd(__A, __B), 2797 (__v2df)__W); 2798 } 2799 2800 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2801 _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) { 2802 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2803 (__v2df)_mm_max_pd(__A, __B), 2804 (__v2df)_mm_setzero_pd()); 2805 } 2806 2807 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2808 _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 2809 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2810 (__v4df)_mm256_max_pd(__A, __B), 2811 (__v4df)__W); 2812 } 2813 2814 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2815 _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) { 2816 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2817 (__v4df)_mm256_max_pd(__A, __B), 2818 (__v4df)_mm256_setzero_pd()); 2819 } 2820 2821 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2822 _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 2823 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2824 (__v4sf)_mm_max_ps(__A, __B), 2825 (__v4sf)__W); 2826 } 2827 2828 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2829 _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) { 2830 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2831 (__v4sf)_mm_max_ps(__A, __B), 2832 (__v4sf)_mm_setzero_ps()); 2833 } 2834 2835 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2836 _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 2837 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2838 (__v8sf)_mm256_max_ps(__A, __B), 2839 (__v8sf)__W); 2840 } 2841 2842 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2843 _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) { 2844 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2845 (__v8sf)_mm256_max_ps(__A, __B), 2846 (__v8sf)_mm256_setzero_ps()); 2847 } 2848 2849 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2850 _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 2851 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2852 (__v2df)_mm_min_pd(__A, __B), 2853 (__v2df)__W); 2854 } 2855 2856 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2857 _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) { 2858 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2859 (__v2df)_mm_min_pd(__A, __B), 2860 (__v2df)_mm_setzero_pd()); 2861 } 2862 2863 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2864 _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 2865 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2866 (__v4df)_mm256_min_pd(__A, __B), 2867 (__v4df)__W); 2868 } 2869 2870 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2871 _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) { 2872 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2873 (__v4df)_mm256_min_pd(__A, __B), 2874 (__v4df)_mm256_setzero_pd()); 2875 } 2876 2877 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2878 _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 2879 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2880 (__v4sf)_mm_min_ps(__A, __B), 2881 (__v4sf)__W); 2882 } 2883 2884 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2885 _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) { 2886 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2887 (__v4sf)_mm_min_ps(__A, __B), 2888 (__v4sf)_mm_setzero_ps()); 2889 } 2890 2891 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2892 _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 2893 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2894 (__v8sf)_mm256_min_ps(__A, __B), 2895 (__v8sf)__W); 2896 } 2897 2898 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2899 _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) { 2900 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2901 (__v8sf)_mm256_min_ps(__A, __B), 2902 (__v8sf)_mm256_setzero_ps()); 2903 } 2904 2905 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2906 _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 2907 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2908 (__v2df)_mm_mul_pd(__A, __B), 2909 (__v2df)__W); 2910 } 2911 2912 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2913 _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) { 2914 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2915 (__v2df)_mm_mul_pd(__A, __B), 2916 (__v2df)_mm_setzero_pd()); 2917 } 2918 2919 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2920 _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 2921 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2922 (__v4df)_mm256_mul_pd(__A, __B), 2923 (__v4df)__W); 2924 } 2925 2926 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2927 _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) { 2928 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2929 (__v4df)_mm256_mul_pd(__A, __B), 2930 (__v4df)_mm256_setzero_pd()); 2931 } 2932 2933 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2934 _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 2935 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2936 (__v4sf)_mm_mul_ps(__A, __B), 2937 (__v4sf)__W); 2938 } 2939 2940 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2941 _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) { 2942 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2943 (__v4sf)_mm_mul_ps(__A, __B), 2944 (__v4sf)_mm_setzero_ps()); 2945 } 2946 2947 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2948 _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 2949 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2950 (__v8sf)_mm256_mul_ps(__A, __B), 2951 (__v8sf)__W); 2952 } 2953 2954 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2955 _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) { 2956 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2957 (__v8sf)_mm256_mul_ps(__A, __B), 2958 (__v8sf)_mm256_setzero_ps()); 2959 } 2960 2961 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2962 _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) { 2963 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2964 (__v4si)_mm_abs_epi32(__A), 2965 (__v4si)__W); 2966 } 2967 2968 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2969 _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) { 2970 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2971 (__v4si)_mm_abs_epi32(__A), 2972 (__v4si)_mm_setzero_si128()); 2973 } 2974 2975 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2976 _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) { 2977 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2978 (__v8si)_mm256_abs_epi32(__A), 2979 (__v8si)__W); 2980 } 2981 2982 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2983 _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) { 2984 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2985 (__v8si)_mm256_abs_epi32(__A), 2986 (__v8si)_mm256_setzero_si256()); 2987 } 2988 2989 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2990 _mm_abs_epi64 (__m128i __A) { 2991 return (__m128i)__builtin_elementwise_abs((__v2di)__A); 2992 } 2993 2994 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2995 _mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { 2996 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 2997 (__v2di)_mm_abs_epi64(__A), 2998 (__v2di)__W); 2999 } 3000 3001 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3002 _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) { 3003 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 3004 (__v2di)_mm_abs_epi64(__A), 3005 (__v2di)_mm_setzero_si128()); 3006 } 3007 3008 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3009 _mm256_abs_epi64 (__m256i __A) { 3010 return (__m256i)__builtin_elementwise_abs((__v4di)__A); 3011 } 3012 3013 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3014 _mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { 3015 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 3016 (__v4di)_mm256_abs_epi64(__A), 3017 (__v4di)__W); 3018 } 3019 3020 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3021 _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) { 3022 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 3023 (__v4di)_mm256_abs_epi64(__A), 3024 (__v4di)_mm256_setzero_si256()); 3025 } 3026 3027 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3028 _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) { 3029 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3030 (__v4si)_mm_max_epi32(__A, __B), 3031 (__v4si)_mm_setzero_si128()); 3032 } 3033 3034 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3035 _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3036 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3037 (__v4si)_mm_max_epi32(__A, __B), 3038 (__v4si)__W); 3039 } 3040 3041 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3042 _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) { 3043 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3044 (__v8si)_mm256_max_epi32(__A, __B), 3045 (__v8si)_mm256_setzero_si256()); 3046 } 3047 3048 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3049 _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3050 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3051 (__v8si)_mm256_max_epi32(__A, __B), 3052 (__v8si)__W); 3053 } 3054 3055 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3056 _mm_max_epi64 (__m128i __A, __m128i __B) { 3057 return (__m128i)__builtin_elementwise_max((__v2di)__A, (__v2di)__B); 3058 } 3059 3060 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3061 _mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { 3062 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3063 (__v2di)_mm_max_epi64(__A, __B), 3064 (__v2di)_mm_setzero_si128()); 3065 } 3066 3067 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3068 _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3069 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3070 (__v2di)_mm_max_epi64(__A, __B), 3071 (__v2di)__W); 3072 } 3073 3074 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3075 _mm256_max_epi64 (__m256i __A, __m256i __B) { 3076 return (__m256i)__builtin_elementwise_max((__v4di)__A, (__v4di)__B); 3077 } 3078 3079 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3080 _mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) { 3081 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3082 (__v4di)_mm256_max_epi64(__A, __B), 3083 (__v4di)_mm256_setzero_si256()); 3084 } 3085 3086 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3087 _mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3088 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3089 (__v4di)_mm256_max_epi64(__A, __B), 3090 (__v4di)__W); 3091 } 3092 3093 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3094 _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) { 3095 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3096 (__v4si)_mm_max_epu32(__A, __B), 3097 (__v4si)_mm_setzero_si128()); 3098 } 3099 3100 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3101 _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3102 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3103 (__v4si)_mm_max_epu32(__A, __B), 3104 (__v4si)__W); 3105 } 3106 3107 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3108 _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) { 3109 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3110 (__v8si)_mm256_max_epu32(__A, __B), 3111 (__v8si)_mm256_setzero_si256()); 3112 } 3113 3114 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3115 _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3116 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3117 (__v8si)_mm256_max_epu32(__A, __B), 3118 (__v8si)__W); 3119 } 3120 3121 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3122 _mm_max_epu64 (__m128i __A, __m128i __B) { 3123 return (__m128i)__builtin_elementwise_max((__v2du)__A, (__v2du)__B); 3124 } 3125 3126 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3127 _mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { 3128 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3129 (__v2di)_mm_max_epu64(__A, __B), 3130 (__v2di)_mm_setzero_si128()); 3131 } 3132 3133 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3134 _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3135 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3136 (__v2di)_mm_max_epu64(__A, __B), 3137 (__v2di)__W); 3138 } 3139 3140 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3141 _mm256_max_epu64 (__m256i __A, __m256i __B) { 3142 return (__m256i)__builtin_elementwise_max((__v4du)__A, (__v4du)__B); 3143 } 3144 3145 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3146 _mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { 3147 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3148 (__v4di)_mm256_max_epu64(__A, __B), 3149 (__v4di)_mm256_setzero_si256()); 3150 } 3151 3152 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3153 _mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3154 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3155 (__v4di)_mm256_max_epu64(__A, __B), 3156 (__v4di)__W); 3157 } 3158 3159 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3160 _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) { 3161 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3162 (__v4si)_mm_min_epi32(__A, __B), 3163 (__v4si)_mm_setzero_si128()); 3164 } 3165 3166 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3167 _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3168 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3169 (__v4si)_mm_min_epi32(__A, __B), 3170 (__v4si)__W); 3171 } 3172 3173 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3174 _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) { 3175 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3176 (__v8si)_mm256_min_epi32(__A, __B), 3177 (__v8si)_mm256_setzero_si256()); 3178 } 3179 3180 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3181 _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3182 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3183 (__v8si)_mm256_min_epi32(__A, __B), 3184 (__v8si)__W); 3185 } 3186 3187 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3188 _mm_min_epi64 (__m128i __A, __m128i __B) { 3189 return (__m128i)__builtin_elementwise_min((__v2di)__A, (__v2di)__B); 3190 } 3191 3192 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3193 _mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3194 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3195 (__v2di)_mm_min_epi64(__A, __B), 3196 (__v2di)__W); 3197 } 3198 3199 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3200 _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { 3201 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3202 (__v2di)_mm_min_epi64(__A, __B), 3203 (__v2di)_mm_setzero_si128()); 3204 } 3205 3206 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3207 _mm256_min_epi64 (__m256i __A, __m256i __B) { 3208 return (__m256i)__builtin_elementwise_min((__v4di)__A, (__v4di)__B); 3209 } 3210 3211 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3212 _mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3213 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3214 (__v4di)_mm256_min_epi64(__A, __B), 3215 (__v4di)__W); 3216 } 3217 3218 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3219 _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) { 3220 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3221 (__v4di)_mm256_min_epi64(__A, __B), 3222 (__v4di)_mm256_setzero_si256()); 3223 } 3224 3225 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3226 _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) { 3227 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3228 (__v4si)_mm_min_epu32(__A, __B), 3229 (__v4si)_mm_setzero_si128()); 3230 } 3231 3232 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3233 _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3234 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3235 (__v4si)_mm_min_epu32(__A, __B), 3236 (__v4si)__W); 3237 } 3238 3239 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3240 _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) { 3241 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3242 (__v8si)_mm256_min_epu32(__A, __B), 3243 (__v8si)_mm256_setzero_si256()); 3244 } 3245 3246 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3247 _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3248 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3249 (__v8si)_mm256_min_epu32(__A, __B), 3250 (__v8si)__W); 3251 } 3252 3253 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3254 _mm_min_epu64 (__m128i __A, __m128i __B) { 3255 return (__m128i)__builtin_elementwise_min((__v2du)__A, (__v2du)__B); 3256 } 3257 3258 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3259 _mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3260 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3261 (__v2di)_mm_min_epu64(__A, __B), 3262 (__v2di)__W); 3263 } 3264 3265 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3266 _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { 3267 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3268 (__v2di)_mm_min_epu64(__A, __B), 3269 (__v2di)_mm_setzero_si128()); 3270 } 3271 3272 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3273 _mm256_min_epu64 (__m256i __A, __m256i __B) { 3274 return (__m256i)__builtin_elementwise_min((__v4du)__A, (__v4du)__B); 3275 } 3276 3277 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3278 _mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3279 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3280 (__v4di)_mm256_min_epu64(__A, __B), 3281 (__v4di)__W); 3282 } 3283 3284 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3285 _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { 3286 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3287 (__v4di)_mm256_min_epu64(__A, __B), 3288 (__v4di)_mm256_setzero_si256()); 3289 } 3290 3291 #define _mm_roundscale_pd(A, imm) \ 3292 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 3293 (int)(imm), \ 3294 (__v2df)_mm_setzero_pd(), \ 3295 (__mmask8)-1)) 3296 3297 3298 #define _mm_mask_roundscale_pd(W, U, A, imm) \ 3299 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 3300 (int)(imm), \ 3301 (__v2df)(__m128d)(W), \ 3302 (__mmask8)(U))) 3303 3304 3305 #define _mm_maskz_roundscale_pd(U, A, imm) \ 3306 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 3307 (int)(imm), \ 3308 (__v2df)_mm_setzero_pd(), \ 3309 (__mmask8)(U))) 3310 3311 3312 #define _mm256_roundscale_pd(A, imm) \ 3313 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 3314 (int)(imm), \ 3315 (__v4df)_mm256_setzero_pd(), \ 3316 (__mmask8)-1)) 3317 3318 3319 #define _mm256_mask_roundscale_pd(W, U, A, imm) \ 3320 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 3321 (int)(imm), \ 3322 (__v4df)(__m256d)(W), \ 3323 (__mmask8)(U))) 3324 3325 3326 #define _mm256_maskz_roundscale_pd(U, A, imm) \ 3327 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 3328 (int)(imm), \ 3329 (__v4df)_mm256_setzero_pd(), \ 3330 (__mmask8)(U))) 3331 3332 #define _mm_roundscale_ps(A, imm) \ 3333 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 3334 (__v4sf)_mm_setzero_ps(), \ 3335 (__mmask8)-1)) 3336 3337 3338 #define _mm_mask_roundscale_ps(W, U, A, imm) \ 3339 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 3340 (__v4sf)(__m128)(W), \ 3341 (__mmask8)(U))) 3342 3343 3344 #define _mm_maskz_roundscale_ps(U, A, imm) \ 3345 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 3346 (__v4sf)_mm_setzero_ps(), \ 3347 (__mmask8)(U))) 3348 3349 #define _mm256_roundscale_ps(A, imm) \ 3350 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 3351 (__v8sf)_mm256_setzero_ps(), \ 3352 (__mmask8)-1)) 3353 3354 #define _mm256_mask_roundscale_ps(W, U, A, imm) \ 3355 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 3356 (__v8sf)(__m256)(W), \ 3357 (__mmask8)(U))) 3358 3359 3360 #define _mm256_maskz_roundscale_ps(U, A, imm) \ 3361 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 3362 (__v8sf)_mm256_setzero_ps(), \ 3363 (__mmask8)(U))) 3364 3365 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3366 _mm_scalef_pd (__m128d __A, __m128d __B) { 3367 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 3368 (__v2df) __B, 3369 (__v2df) 3370 _mm_setzero_pd (), 3371 (__mmask8) -1); 3372 } 3373 3374 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3375 _mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A, 3376 __m128d __B) { 3377 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 3378 (__v2df) __B, 3379 (__v2df) __W, 3380 (__mmask8) __U); 3381 } 3382 3383 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3384 _mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) { 3385 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 3386 (__v2df) __B, 3387 (__v2df) 3388 _mm_setzero_pd (), 3389 (__mmask8) __U); 3390 } 3391 3392 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3393 _mm256_scalef_pd (__m256d __A, __m256d __B) { 3394 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 3395 (__v4df) __B, 3396 (__v4df) 3397 _mm256_setzero_pd (), 3398 (__mmask8) -1); 3399 } 3400 3401 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3402 _mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A, 3403 __m256d __B) { 3404 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 3405 (__v4df) __B, 3406 (__v4df) __W, 3407 (__mmask8) __U); 3408 } 3409 3410 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3411 _mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) { 3412 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 3413 (__v4df) __B, 3414 (__v4df) 3415 _mm256_setzero_pd (), 3416 (__mmask8) __U); 3417 } 3418 3419 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3420 _mm_scalef_ps (__m128 __A, __m128 __B) { 3421 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 3422 (__v4sf) __B, 3423 (__v4sf) 3424 _mm_setzero_ps (), 3425 (__mmask8) -1); 3426 } 3427 3428 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3429 _mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 3430 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 3431 (__v4sf) __B, 3432 (__v4sf) __W, 3433 (__mmask8) __U); 3434 } 3435 3436 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3437 _mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) { 3438 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 3439 (__v4sf) __B, 3440 (__v4sf) 3441 _mm_setzero_ps (), 3442 (__mmask8) __U); 3443 } 3444 3445 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3446 _mm256_scalef_ps (__m256 __A, __m256 __B) { 3447 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 3448 (__v8sf) __B, 3449 (__v8sf) 3450 _mm256_setzero_ps (), 3451 (__mmask8) -1); 3452 } 3453 3454 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3455 _mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A, 3456 __m256 __B) { 3457 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 3458 (__v8sf) __B, 3459 (__v8sf) __W, 3460 (__mmask8) __U); 3461 } 3462 3463 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3464 _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { 3465 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 3466 (__v8sf) __B, 3467 (__v8sf) 3468 _mm256_setzero_ps (), 3469 (__mmask8) __U); 3470 } 3471 3472 #define _mm_i64scatter_pd(addr, index, v1, scale) \ 3473 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)-1, \ 3474 (__v2di)(__m128i)(index), \ 3475 (__v2df)(__m128d)(v1), (int)(scale)) 3476 3477 #define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \ 3478 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)(mask), \ 3479 (__v2di)(__m128i)(index), \ 3480 (__v2df)(__m128d)(v1), (int)(scale)) 3481 3482 #define _mm_i64scatter_epi64(addr, index, v1, scale) \ 3483 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)-1, \ 3484 (__v2di)(__m128i)(index), \ 3485 (__v2di)(__m128i)(v1), (int)(scale)) 3486 3487 #define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \ 3488 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)(mask), \ 3489 (__v2di)(__m128i)(index), \ 3490 (__v2di)(__m128i)(v1), (int)(scale)) 3491 3492 #define _mm256_i64scatter_pd(addr, index, v1, scale) \ 3493 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)-1, \ 3494 (__v4di)(__m256i)(index), \ 3495 (__v4df)(__m256d)(v1), (int)(scale)) 3496 3497 #define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \ 3498 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)(mask), \ 3499 (__v4di)(__m256i)(index), \ 3500 (__v4df)(__m256d)(v1), (int)(scale)) 3501 3502 #define _mm256_i64scatter_epi64(addr, index, v1, scale) \ 3503 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)-1, \ 3504 (__v4di)(__m256i)(index), \ 3505 (__v4di)(__m256i)(v1), (int)(scale)) 3506 3507 #define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \ 3508 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)(mask), \ 3509 (__v4di)(__m256i)(index), \ 3510 (__v4di)(__m256i)(v1), (int)(scale)) 3511 3512 #define _mm_i64scatter_ps(addr, index, v1, scale) \ 3513 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)-1, \ 3514 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3515 (int)(scale)) 3516 3517 #define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \ 3518 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)(mask), \ 3519 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3520 (int)(scale)) 3521 3522 #define _mm_i64scatter_epi32(addr, index, v1, scale) \ 3523 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)-1, \ 3524 (__v2di)(__m128i)(index), \ 3525 (__v4si)(__m128i)(v1), (int)(scale)) 3526 3527 #define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \ 3528 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)(mask), \ 3529 (__v2di)(__m128i)(index), \ 3530 (__v4si)(__m128i)(v1), (int)(scale)) 3531 3532 #define _mm256_i64scatter_ps(addr, index, v1, scale) \ 3533 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)-1, \ 3534 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \ 3535 (int)(scale)) 3536 3537 #define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \ 3538 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)(mask), \ 3539 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \ 3540 (int)(scale)) 3541 3542 #define _mm256_i64scatter_epi32(addr, index, v1, scale) \ 3543 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)-1, \ 3544 (__v4di)(__m256i)(index), \ 3545 (__v4si)(__m128i)(v1), (int)(scale)) 3546 3547 #define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \ 3548 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)(mask), \ 3549 (__v4di)(__m256i)(index), \ 3550 (__v4si)(__m128i)(v1), (int)(scale)) 3551 3552 #define _mm_i32scatter_pd(addr, index, v1, scale) \ 3553 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)-1, \ 3554 (__v4si)(__m128i)(index), \ 3555 (__v2df)(__m128d)(v1), (int)(scale)) 3556 3557 #define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \ 3558 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)(mask), \ 3559 (__v4si)(__m128i)(index), \ 3560 (__v2df)(__m128d)(v1), (int)(scale)) 3561 3562 #define _mm_i32scatter_epi64(addr, index, v1, scale) \ 3563 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)-1, \ 3564 (__v4si)(__m128i)(index), \ 3565 (__v2di)(__m128i)(v1), (int)(scale)) 3566 3567 #define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \ 3568 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)(mask), \ 3569 (__v4si)(__m128i)(index), \ 3570 (__v2di)(__m128i)(v1), (int)(scale)) 3571 3572 #define _mm256_i32scatter_pd(addr, index, v1, scale) \ 3573 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)-1, \ 3574 (__v4si)(__m128i)(index), \ 3575 (__v4df)(__m256d)(v1), (int)(scale)) 3576 3577 #define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \ 3578 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)(mask), \ 3579 (__v4si)(__m128i)(index), \ 3580 (__v4df)(__m256d)(v1), (int)(scale)) 3581 3582 #define _mm256_i32scatter_epi64(addr, index, v1, scale) \ 3583 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)-1, \ 3584 (__v4si)(__m128i)(index), \ 3585 (__v4di)(__m256i)(v1), (int)(scale)) 3586 3587 #define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \ 3588 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)(mask), \ 3589 (__v4si)(__m128i)(index), \ 3590 (__v4di)(__m256i)(v1), (int)(scale)) 3591 3592 #define _mm_i32scatter_ps(addr, index, v1, scale) \ 3593 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)-1, \ 3594 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3595 (int)(scale)) 3596 3597 #define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \ 3598 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)(mask), \ 3599 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3600 (int)(scale)) 3601 3602 #define _mm_i32scatter_epi32(addr, index, v1, scale) \ 3603 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)-1, \ 3604 (__v4si)(__m128i)(index), \ 3605 (__v4si)(__m128i)(v1), (int)(scale)) 3606 3607 #define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \ 3608 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)(mask), \ 3609 (__v4si)(__m128i)(index), \ 3610 (__v4si)(__m128i)(v1), (int)(scale)) 3611 3612 #define _mm256_i32scatter_ps(addr, index, v1, scale) \ 3613 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)-1, \ 3614 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ 3615 (int)(scale)) 3616 3617 #define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \ 3618 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)(mask), \ 3619 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ 3620 (int)(scale)) 3621 3622 #define _mm256_i32scatter_epi32(addr, index, v1, scale) \ 3623 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)-1, \ 3624 (__v8si)(__m256i)(index), \ 3625 (__v8si)(__m256i)(v1), (int)(scale)) 3626 3627 #define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \ 3628 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)(mask), \ 3629 (__v8si)(__m256i)(index), \ 3630 (__v8si)(__m256i)(v1), (int)(scale)) 3631 3632 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3633 _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) { 3634 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3635 (__v2df)_mm_sqrt_pd(__A), 3636 (__v2df)__W); 3637 } 3638 3639 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3640 _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) { 3641 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3642 (__v2df)_mm_sqrt_pd(__A), 3643 (__v2df)_mm_setzero_pd()); 3644 } 3645 3646 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3647 _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) { 3648 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3649 (__v4df)_mm256_sqrt_pd(__A), 3650 (__v4df)__W); 3651 } 3652 3653 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3654 _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) { 3655 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3656 (__v4df)_mm256_sqrt_pd(__A), 3657 (__v4df)_mm256_setzero_pd()); 3658 } 3659 3660 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3661 _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) { 3662 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3663 (__v4sf)_mm_sqrt_ps(__A), 3664 (__v4sf)__W); 3665 } 3666 3667 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3668 _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) { 3669 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3670 (__v4sf)_mm_sqrt_ps(__A), 3671 (__v4sf)_mm_setzero_ps()); 3672 } 3673 3674 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3675 _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) { 3676 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3677 (__v8sf)_mm256_sqrt_ps(__A), 3678 (__v8sf)__W); 3679 } 3680 3681 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3682 _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) { 3683 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3684 (__v8sf)_mm256_sqrt_ps(__A), 3685 (__v8sf)_mm256_setzero_ps()); 3686 } 3687 3688 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3689 _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 3690 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3691 (__v2df)_mm_sub_pd(__A, __B), 3692 (__v2df)__W); 3693 } 3694 3695 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3696 _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) { 3697 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3698 (__v2df)_mm_sub_pd(__A, __B), 3699 (__v2df)_mm_setzero_pd()); 3700 } 3701 3702 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3703 _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 3704 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3705 (__v4df)_mm256_sub_pd(__A, __B), 3706 (__v4df)__W); 3707 } 3708 3709 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3710 _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) { 3711 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3712 (__v4df)_mm256_sub_pd(__A, __B), 3713 (__v4df)_mm256_setzero_pd()); 3714 } 3715 3716 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3717 _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 3718 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3719 (__v4sf)_mm_sub_ps(__A, __B), 3720 (__v4sf)__W); 3721 } 3722 3723 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3724 _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) { 3725 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3726 (__v4sf)_mm_sub_ps(__A, __B), 3727 (__v4sf)_mm_setzero_ps()); 3728 } 3729 3730 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3731 _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 3732 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3733 (__v8sf)_mm256_sub_ps(__A, __B), 3734 (__v8sf)__W); 3735 } 3736 3737 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3738 _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) { 3739 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3740 (__v8sf)_mm256_sub_ps(__A, __B), 3741 (__v8sf)_mm256_setzero_ps()); 3742 } 3743 3744 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3745 _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) { 3746 return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I, 3747 (__v4si)__B); 3748 } 3749 3750 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3751 _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I, 3752 __m128i __B) { 3753 return (__m128i)__builtin_ia32_selectd_128(__U, 3754 (__v4si)_mm_permutex2var_epi32(__A, __I, __B), 3755 (__v4si)__A); 3756 } 3757 3758 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3759 _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U, 3760 __m128i __B) { 3761 return (__m128i)__builtin_ia32_selectd_128(__U, 3762 (__v4si)_mm_permutex2var_epi32(__A, __I, __B), 3763 (__v4si)__I); 3764 } 3765 3766 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3767 _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I, 3768 __m128i __B) { 3769 return (__m128i)__builtin_ia32_selectd_128(__U, 3770 (__v4si)_mm_permutex2var_epi32(__A, __I, __B), 3771 (__v4si)_mm_setzero_si128()); 3772 } 3773 3774 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3775 _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) { 3776 return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I, 3777 (__v8si) __B); 3778 } 3779 3780 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3781 _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I, 3782 __m256i __B) { 3783 return (__m256i)__builtin_ia32_selectd_256(__U, 3784 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B), 3785 (__v8si)__A); 3786 } 3787 3788 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3789 _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U, 3790 __m256i __B) { 3791 return (__m256i)__builtin_ia32_selectd_256(__U, 3792 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B), 3793 (__v8si)__I); 3794 } 3795 3796 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3797 _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I, 3798 __m256i __B) { 3799 return (__m256i)__builtin_ia32_selectd_256(__U, 3800 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B), 3801 (__v8si)_mm256_setzero_si256()); 3802 } 3803 3804 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3805 _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) { 3806 return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I, 3807 (__v2df)__B); 3808 } 3809 3810 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3811 _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) { 3812 return (__m128d)__builtin_ia32_selectpd_128(__U, 3813 (__v2df)_mm_permutex2var_pd(__A, __I, __B), 3814 (__v2df)__A); 3815 } 3816 3817 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3818 _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) { 3819 return (__m128d)__builtin_ia32_selectpd_128(__U, 3820 (__v2df)_mm_permutex2var_pd(__A, __I, __B), 3821 (__v2df)(__m128d)__I); 3822 } 3823 3824 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3825 _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) { 3826 return (__m128d)__builtin_ia32_selectpd_128(__U, 3827 (__v2df)_mm_permutex2var_pd(__A, __I, __B), 3828 (__v2df)_mm_setzero_pd()); 3829 } 3830 3831 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3832 _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) { 3833 return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I, 3834 (__v4df)__B); 3835 } 3836 3837 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3838 _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I, 3839 __m256d __B) { 3840 return (__m256d)__builtin_ia32_selectpd_256(__U, 3841 (__v4df)_mm256_permutex2var_pd(__A, __I, __B), 3842 (__v4df)__A); 3843 } 3844 3845 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3846 _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U, 3847 __m256d __B) { 3848 return (__m256d)__builtin_ia32_selectpd_256(__U, 3849 (__v4df)_mm256_permutex2var_pd(__A, __I, __B), 3850 (__v4df)(__m256d)__I); 3851 } 3852 3853 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3854 _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I, 3855 __m256d __B) { 3856 return (__m256d)__builtin_ia32_selectpd_256(__U, 3857 (__v4df)_mm256_permutex2var_pd(__A, __I, __B), 3858 (__v4df)_mm256_setzero_pd()); 3859 } 3860 3861 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3862 _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) { 3863 return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I, 3864 (__v4sf)__B); 3865 } 3866 3867 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3868 _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) { 3869 return (__m128)__builtin_ia32_selectps_128(__U, 3870 (__v4sf)_mm_permutex2var_ps(__A, __I, __B), 3871 (__v4sf)__A); 3872 } 3873 3874 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3875 _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) { 3876 return (__m128)__builtin_ia32_selectps_128(__U, 3877 (__v4sf)_mm_permutex2var_ps(__A, __I, __B), 3878 (__v4sf)(__m128)__I); 3879 } 3880 3881 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3882 _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) { 3883 return (__m128)__builtin_ia32_selectps_128(__U, 3884 (__v4sf)_mm_permutex2var_ps(__A, __I, __B), 3885 (__v4sf)_mm_setzero_ps()); 3886 } 3887 3888 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3889 _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) { 3890 return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I, 3891 (__v8sf) __B); 3892 } 3893 3894 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3895 _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) { 3896 return (__m256)__builtin_ia32_selectps_256(__U, 3897 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), 3898 (__v8sf)__A); 3899 } 3900 3901 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3902 _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U, 3903 __m256 __B) { 3904 return (__m256)__builtin_ia32_selectps_256(__U, 3905 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), 3906 (__v8sf)(__m256)__I); 3907 } 3908 3909 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3910 _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I, 3911 __m256 __B) { 3912 return (__m256)__builtin_ia32_selectps_256(__U, 3913 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), 3914 (__v8sf)_mm256_setzero_ps()); 3915 } 3916 3917 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3918 _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) { 3919 return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I, 3920 (__v2di)__B); 3921 } 3922 3923 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3924 _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I, 3925 __m128i __B) { 3926 return (__m128i)__builtin_ia32_selectq_128(__U, 3927 (__v2di)_mm_permutex2var_epi64(__A, __I, __B), 3928 (__v2di)__A); 3929 } 3930 3931 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3932 _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U, 3933 __m128i __B) { 3934 return (__m128i)__builtin_ia32_selectq_128(__U, 3935 (__v2di)_mm_permutex2var_epi64(__A, __I, __B), 3936 (__v2di)__I); 3937 } 3938 3939 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3940 _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I, 3941 __m128i __B) { 3942 return (__m128i)__builtin_ia32_selectq_128(__U, 3943 (__v2di)_mm_permutex2var_epi64(__A, __I, __B), 3944 (__v2di)_mm_setzero_si128()); 3945 } 3946 3947 3948 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3949 _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) { 3950 return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I, 3951 (__v4di) __B); 3952 } 3953 3954 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3955 _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I, 3956 __m256i __B) { 3957 return (__m256i)__builtin_ia32_selectq_256(__U, 3958 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B), 3959 (__v4di)__A); 3960 } 3961 3962 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3963 _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U, 3964 __m256i __B) { 3965 return (__m256i)__builtin_ia32_selectq_256(__U, 3966 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B), 3967 (__v4di)__I); 3968 } 3969 3970 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3971 _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I, 3972 __m256i __B) { 3973 return (__m256i)__builtin_ia32_selectq_256(__U, 3974 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B), 3975 (__v4di)_mm256_setzero_si256()); 3976 } 3977 3978 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3979 _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A) 3980 { 3981 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 3982 (__v4si)_mm_cvtepi8_epi32(__A), 3983 (__v4si)__W); 3984 } 3985 3986 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3987 _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A) 3988 { 3989 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 3990 (__v4si)_mm_cvtepi8_epi32(__A), 3991 (__v4si)_mm_setzero_si128()); 3992 } 3993 3994 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3995 _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A) 3996 { 3997 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 3998 (__v8si)_mm256_cvtepi8_epi32(__A), 3999 (__v8si)__W); 4000 } 4001 4002 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4003 _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A) 4004 { 4005 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4006 (__v8si)_mm256_cvtepi8_epi32(__A), 4007 (__v8si)_mm256_setzero_si256()); 4008 } 4009 4010 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4011 _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4012 { 4013 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4014 (__v2di)_mm_cvtepi8_epi64(__A), 4015 (__v2di)__W); 4016 } 4017 4018 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4019 _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) 4020 { 4021 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4022 (__v2di)_mm_cvtepi8_epi64(__A), 4023 (__v2di)_mm_setzero_si128()); 4024 } 4025 4026 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4027 _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4028 { 4029 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4030 (__v4di)_mm256_cvtepi8_epi64(__A), 4031 (__v4di)__W); 4032 } 4033 4034 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4035 _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) 4036 { 4037 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4038 (__v4di)_mm256_cvtepi8_epi64(__A), 4039 (__v4di)_mm256_setzero_si256()); 4040 } 4041 4042 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4043 _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X) 4044 { 4045 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4046 (__v2di)_mm_cvtepi32_epi64(__X), 4047 (__v2di)__W); 4048 } 4049 4050 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4051 _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) 4052 { 4053 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4054 (__v2di)_mm_cvtepi32_epi64(__X), 4055 (__v2di)_mm_setzero_si128()); 4056 } 4057 4058 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4059 _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X) 4060 { 4061 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4062 (__v4di)_mm256_cvtepi32_epi64(__X), 4063 (__v4di)__W); 4064 } 4065 4066 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4067 _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) 4068 { 4069 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4070 (__v4di)_mm256_cvtepi32_epi64(__X), 4071 (__v4di)_mm256_setzero_si256()); 4072 } 4073 4074 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4075 _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A) 4076 { 4077 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4078 (__v4si)_mm_cvtepi16_epi32(__A), 4079 (__v4si)__W); 4080 } 4081 4082 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4083 _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A) 4084 { 4085 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4086 (__v4si)_mm_cvtepi16_epi32(__A), 4087 (__v4si)_mm_setzero_si128()); 4088 } 4089 4090 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4091 _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A) 4092 { 4093 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4094 (__v8si)_mm256_cvtepi16_epi32(__A), 4095 (__v8si)__W); 4096 } 4097 4098 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4099 _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A) 4100 { 4101 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4102 (__v8si)_mm256_cvtepi16_epi32(__A), 4103 (__v8si)_mm256_setzero_si256()); 4104 } 4105 4106 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4107 _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4108 { 4109 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4110 (__v2di)_mm_cvtepi16_epi64(__A), 4111 (__v2di)__W); 4112 } 4113 4114 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4115 _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) 4116 { 4117 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4118 (__v2di)_mm_cvtepi16_epi64(__A), 4119 (__v2di)_mm_setzero_si128()); 4120 } 4121 4122 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4123 _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4124 { 4125 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4126 (__v4di)_mm256_cvtepi16_epi64(__A), 4127 (__v4di)__W); 4128 } 4129 4130 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4131 _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) 4132 { 4133 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4134 (__v4di)_mm256_cvtepi16_epi64(__A), 4135 (__v4di)_mm256_setzero_si256()); 4136 } 4137 4138 4139 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4140 _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A) 4141 { 4142 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4143 (__v4si)_mm_cvtepu8_epi32(__A), 4144 (__v4si)__W); 4145 } 4146 4147 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4148 _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) 4149 { 4150 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4151 (__v4si)_mm_cvtepu8_epi32(__A), 4152 (__v4si)_mm_setzero_si128()); 4153 } 4154 4155 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4156 _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A) 4157 { 4158 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4159 (__v8si)_mm256_cvtepu8_epi32(__A), 4160 (__v8si)__W); 4161 } 4162 4163 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4164 _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) 4165 { 4166 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4167 (__v8si)_mm256_cvtepu8_epi32(__A), 4168 (__v8si)_mm256_setzero_si256()); 4169 } 4170 4171 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4172 _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4173 { 4174 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4175 (__v2di)_mm_cvtepu8_epi64(__A), 4176 (__v2di)__W); 4177 } 4178 4179 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4180 _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) 4181 { 4182 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4183 (__v2di)_mm_cvtepu8_epi64(__A), 4184 (__v2di)_mm_setzero_si128()); 4185 } 4186 4187 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4188 _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4189 { 4190 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4191 (__v4di)_mm256_cvtepu8_epi64(__A), 4192 (__v4di)__W); 4193 } 4194 4195 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4196 _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A) 4197 { 4198 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4199 (__v4di)_mm256_cvtepu8_epi64(__A), 4200 (__v4di)_mm256_setzero_si256()); 4201 } 4202 4203 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4204 _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X) 4205 { 4206 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4207 (__v2di)_mm_cvtepu32_epi64(__X), 4208 (__v2di)__W); 4209 } 4210 4211 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4212 _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) 4213 { 4214 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4215 (__v2di)_mm_cvtepu32_epi64(__X), 4216 (__v2di)_mm_setzero_si128()); 4217 } 4218 4219 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4220 _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X) 4221 { 4222 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4223 (__v4di)_mm256_cvtepu32_epi64(__X), 4224 (__v4di)__W); 4225 } 4226 4227 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4228 _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) 4229 { 4230 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4231 (__v4di)_mm256_cvtepu32_epi64(__X), 4232 (__v4di)_mm256_setzero_si256()); 4233 } 4234 4235 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4236 _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A) 4237 { 4238 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4239 (__v4si)_mm_cvtepu16_epi32(__A), 4240 (__v4si)__W); 4241 } 4242 4243 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4244 _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) 4245 { 4246 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4247 (__v4si)_mm_cvtepu16_epi32(__A), 4248 (__v4si)_mm_setzero_si128()); 4249 } 4250 4251 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4252 _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A) 4253 { 4254 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4255 (__v8si)_mm256_cvtepu16_epi32(__A), 4256 (__v8si)__W); 4257 } 4258 4259 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4260 _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) 4261 { 4262 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4263 (__v8si)_mm256_cvtepu16_epi32(__A), 4264 (__v8si)_mm256_setzero_si256()); 4265 } 4266 4267 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4268 _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4269 { 4270 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4271 (__v2di)_mm_cvtepu16_epi64(__A), 4272 (__v2di)__W); 4273 } 4274 4275 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4276 _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) 4277 { 4278 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4279 (__v2di)_mm_cvtepu16_epi64(__A), 4280 (__v2di)_mm_setzero_si128()); 4281 } 4282 4283 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4284 _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4285 { 4286 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4287 (__v4di)_mm256_cvtepu16_epi64(__A), 4288 (__v4di)__W); 4289 } 4290 4291 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4292 _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) 4293 { 4294 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4295 (__v4di)_mm256_cvtepu16_epi64(__A), 4296 (__v4di)_mm256_setzero_si256()); 4297 } 4298 4299 4300 #define _mm_rol_epi32(a, b) \ 4301 ((__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b))) 4302 4303 #define _mm_mask_rol_epi32(w, u, a, b) \ 4304 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ 4305 (__v4si)_mm_rol_epi32((a), (b)), \ 4306 (__v4si)(__m128i)(w))) 4307 4308 #define _mm_maskz_rol_epi32(u, a, b) \ 4309 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ 4310 (__v4si)_mm_rol_epi32((a), (b)), \ 4311 (__v4si)_mm_setzero_si128())) 4312 4313 #define _mm256_rol_epi32(a, b) \ 4314 ((__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b))) 4315 4316 #define _mm256_mask_rol_epi32(w, u, a, b) \ 4317 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ 4318 (__v8si)_mm256_rol_epi32((a), (b)), \ 4319 (__v8si)(__m256i)(w))) 4320 4321 #define _mm256_maskz_rol_epi32(u, a, b) \ 4322 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ 4323 (__v8si)_mm256_rol_epi32((a), (b)), \ 4324 (__v8si)_mm256_setzero_si256())) 4325 4326 #define _mm_rol_epi64(a, b) \ 4327 ((__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b))) 4328 4329 #define _mm_mask_rol_epi64(w, u, a, b) \ 4330 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ 4331 (__v2di)_mm_rol_epi64((a), (b)), \ 4332 (__v2di)(__m128i)(w))) 4333 4334 #define _mm_maskz_rol_epi64(u, a, b) \ 4335 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ 4336 (__v2di)_mm_rol_epi64((a), (b)), \ 4337 (__v2di)_mm_setzero_si128())) 4338 4339 #define _mm256_rol_epi64(a, b) \ 4340 ((__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b))) 4341 4342 #define _mm256_mask_rol_epi64(w, u, a, b) \ 4343 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ 4344 (__v4di)_mm256_rol_epi64((a), (b)), \ 4345 (__v4di)(__m256i)(w))) 4346 4347 #define _mm256_maskz_rol_epi64(u, a, b) \ 4348 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ 4349 (__v4di)_mm256_rol_epi64((a), (b)), \ 4350 (__v4di)_mm256_setzero_si256())) 4351 4352 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4353 _mm_rolv_epi32 (__m128i __A, __m128i __B) 4354 { 4355 return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B); 4356 } 4357 4358 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4359 _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4360 { 4361 return (__m128i)__builtin_ia32_selectd_128(__U, 4362 (__v4si)_mm_rolv_epi32(__A, __B), 4363 (__v4si)__W); 4364 } 4365 4366 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4367 _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 4368 { 4369 return (__m128i)__builtin_ia32_selectd_128(__U, 4370 (__v4si)_mm_rolv_epi32(__A, __B), 4371 (__v4si)_mm_setzero_si128()); 4372 } 4373 4374 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4375 _mm256_rolv_epi32 (__m256i __A, __m256i __B) 4376 { 4377 return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B); 4378 } 4379 4380 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4381 _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 4382 { 4383 return (__m256i)__builtin_ia32_selectd_256(__U, 4384 (__v8si)_mm256_rolv_epi32(__A, __B), 4385 (__v8si)__W); 4386 } 4387 4388 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4389 _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 4390 { 4391 return (__m256i)__builtin_ia32_selectd_256(__U, 4392 (__v8si)_mm256_rolv_epi32(__A, __B), 4393 (__v8si)_mm256_setzero_si256()); 4394 } 4395 4396 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4397 _mm_rolv_epi64 (__m128i __A, __m128i __B) 4398 { 4399 return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B); 4400 } 4401 4402 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4403 _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4404 { 4405 return (__m128i)__builtin_ia32_selectq_128(__U, 4406 (__v2di)_mm_rolv_epi64(__A, __B), 4407 (__v2di)__W); 4408 } 4409 4410 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4411 _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 4412 { 4413 return (__m128i)__builtin_ia32_selectq_128(__U, 4414 (__v2di)_mm_rolv_epi64(__A, __B), 4415 (__v2di)_mm_setzero_si128()); 4416 } 4417 4418 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4419 _mm256_rolv_epi64 (__m256i __A, __m256i __B) 4420 { 4421 return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B); 4422 } 4423 4424 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4425 _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 4426 { 4427 return (__m256i)__builtin_ia32_selectq_256(__U, 4428 (__v4di)_mm256_rolv_epi64(__A, __B), 4429 (__v4di)__W); 4430 } 4431 4432 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4433 _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 4434 { 4435 return (__m256i)__builtin_ia32_selectq_256(__U, 4436 (__v4di)_mm256_rolv_epi64(__A, __B), 4437 (__v4di)_mm256_setzero_si256()); 4438 } 4439 4440 #define _mm_ror_epi32(a, b) \ 4441 ((__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b))) 4442 4443 #define _mm_mask_ror_epi32(w, u, a, b) \ 4444 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ 4445 (__v4si)_mm_ror_epi32((a), (b)), \ 4446 (__v4si)(__m128i)(w))) 4447 4448 #define _mm_maskz_ror_epi32(u, a, b) \ 4449 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ 4450 (__v4si)_mm_ror_epi32((a), (b)), \ 4451 (__v4si)_mm_setzero_si128())) 4452 4453 #define _mm256_ror_epi32(a, b) \ 4454 ((__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b))) 4455 4456 #define _mm256_mask_ror_epi32(w, u, a, b) \ 4457 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ 4458 (__v8si)_mm256_ror_epi32((a), (b)), \ 4459 (__v8si)(__m256i)(w))) 4460 4461 #define _mm256_maskz_ror_epi32(u, a, b) \ 4462 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ 4463 (__v8si)_mm256_ror_epi32((a), (b)), \ 4464 (__v8si)_mm256_setzero_si256())) 4465 4466 #define _mm_ror_epi64(a, b) \ 4467 ((__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b))) 4468 4469 #define _mm_mask_ror_epi64(w, u, a, b) \ 4470 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ 4471 (__v2di)_mm_ror_epi64((a), (b)), \ 4472 (__v2di)(__m128i)(w))) 4473 4474 #define _mm_maskz_ror_epi64(u, a, b) \ 4475 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ 4476 (__v2di)_mm_ror_epi64((a), (b)), \ 4477 (__v2di)_mm_setzero_si128())) 4478 4479 #define _mm256_ror_epi64(a, b) \ 4480 ((__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b))) 4481 4482 #define _mm256_mask_ror_epi64(w, u, a, b) \ 4483 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ 4484 (__v4di)_mm256_ror_epi64((a), (b)), \ 4485 (__v4di)(__m256i)(w))) 4486 4487 #define _mm256_maskz_ror_epi64(u, a, b) \ 4488 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ 4489 (__v4di)_mm256_ror_epi64((a), (b)), \ 4490 (__v4di)_mm256_setzero_si256())) 4491 4492 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4493 _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4494 { 4495 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4496 (__v4si)_mm_sll_epi32(__A, __B), 4497 (__v4si)__W); 4498 } 4499 4500 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4501 _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B) 4502 { 4503 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4504 (__v4si)_mm_sll_epi32(__A, __B), 4505 (__v4si)_mm_setzero_si128()); 4506 } 4507 4508 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4509 _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 4510 { 4511 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4512 (__v8si)_mm256_sll_epi32(__A, __B), 4513 (__v8si)__W); 4514 } 4515 4516 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4517 _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B) 4518 { 4519 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4520 (__v8si)_mm256_sll_epi32(__A, __B), 4521 (__v8si)_mm256_setzero_si256()); 4522 } 4523 4524 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4525 _mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) 4526 { 4527 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4528 (__v4si)_mm_slli_epi32(__A, (int)__B), 4529 (__v4si)__W); 4530 } 4531 4532 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4533 _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, unsigned int __B) 4534 { 4535 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4536 (__v4si)_mm_slli_epi32(__A, (int)__B), 4537 (__v4si)_mm_setzero_si128()); 4538 } 4539 4540 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4541 _mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) 4542 { 4543 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4544 (__v8si)_mm256_slli_epi32(__A, (int)__B), 4545 (__v8si)__W); 4546 } 4547 4548 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4549 _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) 4550 { 4551 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4552 (__v8si)_mm256_slli_epi32(__A, (int)__B), 4553 (__v8si)_mm256_setzero_si256()); 4554 } 4555 4556 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4557 _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4558 { 4559 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4560 (__v2di)_mm_sll_epi64(__A, __B), 4561 (__v2di)__W); 4562 } 4563 4564 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4565 _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B) 4566 { 4567 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4568 (__v2di)_mm_sll_epi64(__A, __B), 4569 (__v2di)_mm_setzero_si128()); 4570 } 4571 4572 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4573 _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 4574 { 4575 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4576 (__v4di)_mm256_sll_epi64(__A, __B), 4577 (__v4di)__W); 4578 } 4579 4580 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4581 _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B) 4582 { 4583 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4584 (__v4di)_mm256_sll_epi64(__A, __B), 4585 (__v4di)_mm256_setzero_si256()); 4586 } 4587 4588 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4589 _mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) 4590 { 4591 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4592 (__v2di)_mm_slli_epi64(__A, (int)__B), 4593 (__v2di)__W); 4594 } 4595 4596 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4597 _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, unsigned int __B) 4598 { 4599 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4600 (__v2di)_mm_slli_epi64(__A, (int)__B), 4601 (__v2di)_mm_setzero_si128()); 4602 } 4603 4604 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4605 _mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) 4606 { 4607 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4608 (__v4di)_mm256_slli_epi64(__A, (int)__B), 4609 (__v4di)__W); 4610 } 4611 4612 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4613 _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) 4614 { 4615 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4616 (__v4di)_mm256_slli_epi64(__A, (int)__B), 4617 (__v4di)_mm256_setzero_si256()); 4618 } 4619 4620 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4621 _mm_rorv_epi32 (__m128i __A, __m128i __B) 4622 { 4623 return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B); 4624 } 4625 4626 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4627 _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4628 { 4629 return (__m128i)__builtin_ia32_selectd_128(__U, 4630 (__v4si)_mm_rorv_epi32(__A, __B), 4631 (__v4si)__W); 4632 } 4633 4634 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4635 _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 4636 { 4637 return (__m128i)__builtin_ia32_selectd_128(__U, 4638 (__v4si)_mm_rorv_epi32(__A, __B), 4639 (__v4si)_mm_setzero_si128()); 4640 } 4641 4642 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4643 _mm256_rorv_epi32 (__m256i __A, __m256i __B) 4644 { 4645 return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B); 4646 } 4647 4648 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4649 _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 4650 { 4651 return (__m256i)__builtin_ia32_selectd_256(__U, 4652 (__v8si)_mm256_rorv_epi32(__A, __B), 4653 (__v8si)__W); 4654 } 4655 4656 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4657 _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 4658 { 4659 return (__m256i)__builtin_ia32_selectd_256(__U, 4660 (__v8si)_mm256_rorv_epi32(__A, __B), 4661 (__v8si)_mm256_setzero_si256()); 4662 } 4663 4664 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4665 _mm_rorv_epi64 (__m128i __A, __m128i __B) 4666 { 4667 return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B); 4668 } 4669 4670 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4671 _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4672 { 4673 return (__m128i)__builtin_ia32_selectq_128(__U, 4674 (__v2di)_mm_rorv_epi64(__A, __B), 4675 (__v2di)__W); 4676 } 4677 4678 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4679 _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 4680 { 4681 return (__m128i)__builtin_ia32_selectq_128(__U, 4682 (__v2di)_mm_rorv_epi64(__A, __B), 4683 (__v2di)_mm_setzero_si128()); 4684 } 4685 4686 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4687 _mm256_rorv_epi64 (__m256i __A, __m256i __B) 4688 { 4689 return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B); 4690 } 4691 4692 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4693 _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 4694 { 4695 return (__m256i)__builtin_ia32_selectq_256(__U, 4696 (__v4di)_mm256_rorv_epi64(__A, __B), 4697 (__v4di)__W); 4698 } 4699 4700 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4701 _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 4702 { 4703 return (__m256i)__builtin_ia32_selectq_256(__U, 4704 (__v4di)_mm256_rorv_epi64(__A, __B), 4705 (__v4di)_mm256_setzero_si256()); 4706 } 4707 4708 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4709 _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 4710 { 4711 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4712 (__v2di)_mm_sllv_epi64(__X, __Y), 4713 (__v2di)__W); 4714 } 4715 4716 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4717 _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) 4718 { 4719 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4720 (__v2di)_mm_sllv_epi64(__X, __Y), 4721 (__v2di)_mm_setzero_si128()); 4722 } 4723 4724 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4725 _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 4726 { 4727 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4728 (__v4di)_mm256_sllv_epi64(__X, __Y), 4729 (__v4di)__W); 4730 } 4731 4732 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4733 _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) 4734 { 4735 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4736 (__v4di)_mm256_sllv_epi64(__X, __Y), 4737 (__v4di)_mm256_setzero_si256()); 4738 } 4739 4740 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4741 _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 4742 { 4743 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4744 (__v4si)_mm_sllv_epi32(__X, __Y), 4745 (__v4si)__W); 4746 } 4747 4748 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4749 _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) 4750 { 4751 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4752 (__v4si)_mm_sllv_epi32(__X, __Y), 4753 (__v4si)_mm_setzero_si128()); 4754 } 4755 4756 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4757 _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 4758 { 4759 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4760 (__v8si)_mm256_sllv_epi32(__X, __Y), 4761 (__v8si)__W); 4762 } 4763 4764 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4765 _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) 4766 { 4767 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4768 (__v8si)_mm256_sllv_epi32(__X, __Y), 4769 (__v8si)_mm256_setzero_si256()); 4770 } 4771 4772 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4773 _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 4774 { 4775 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4776 (__v2di)_mm_srlv_epi64(__X, __Y), 4777 (__v2di)__W); 4778 } 4779 4780 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4781 _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) 4782 { 4783 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4784 (__v2di)_mm_srlv_epi64(__X, __Y), 4785 (__v2di)_mm_setzero_si128()); 4786 } 4787 4788 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4789 _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 4790 { 4791 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4792 (__v4di)_mm256_srlv_epi64(__X, __Y), 4793 (__v4di)__W); 4794 } 4795 4796 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4797 _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) 4798 { 4799 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4800 (__v4di)_mm256_srlv_epi64(__X, __Y), 4801 (__v4di)_mm256_setzero_si256()); 4802 } 4803 4804 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4805 _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 4806 { 4807 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4808 (__v4si)_mm_srlv_epi32(__X, __Y), 4809 (__v4si)__W); 4810 } 4811 4812 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4813 _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) 4814 { 4815 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4816 (__v4si)_mm_srlv_epi32(__X, __Y), 4817 (__v4si)_mm_setzero_si128()); 4818 } 4819 4820 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4821 _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 4822 { 4823 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4824 (__v8si)_mm256_srlv_epi32(__X, __Y), 4825 (__v8si)__W); 4826 } 4827 4828 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4829 _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) 4830 { 4831 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4832 (__v8si)_mm256_srlv_epi32(__X, __Y), 4833 (__v8si)_mm256_setzero_si256()); 4834 } 4835 4836 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4837 _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4838 { 4839 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4840 (__v4si)_mm_srl_epi32(__A, __B), 4841 (__v4si)__W); 4842 } 4843 4844 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4845 _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B) 4846 { 4847 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4848 (__v4si)_mm_srl_epi32(__A, __B), 4849 (__v4si)_mm_setzero_si128()); 4850 } 4851 4852 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4853 _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 4854 { 4855 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4856 (__v8si)_mm256_srl_epi32(__A, __B), 4857 (__v8si)__W); 4858 } 4859 4860 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4861 _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B) 4862 { 4863 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4864 (__v8si)_mm256_srl_epi32(__A, __B), 4865 (__v8si)_mm256_setzero_si256()); 4866 } 4867 4868 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4869 _mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) 4870 { 4871 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4872 (__v4si)_mm_srli_epi32(__A, (int)__B), 4873 (__v4si)__W); 4874 } 4875 4876 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4877 _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, unsigned int __B) 4878 { 4879 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4880 (__v4si)_mm_srli_epi32(__A, (int)__B), 4881 (__v4si)_mm_setzero_si128()); 4882 } 4883 4884 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4885 _mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) 4886 { 4887 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4888 (__v8si)_mm256_srli_epi32(__A, (int)__B), 4889 (__v8si)__W); 4890 } 4891 4892 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4893 _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) 4894 { 4895 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4896 (__v8si)_mm256_srli_epi32(__A, (int)__B), 4897 (__v8si)_mm256_setzero_si256()); 4898 } 4899 4900 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4901 _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4902 { 4903 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4904 (__v2di)_mm_srl_epi64(__A, __B), 4905 (__v2di)__W); 4906 } 4907 4908 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4909 _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B) 4910 { 4911 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4912 (__v2di)_mm_srl_epi64(__A, __B), 4913 (__v2di)_mm_setzero_si128()); 4914 } 4915 4916 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4917 _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 4918 { 4919 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4920 (__v4di)_mm256_srl_epi64(__A, __B), 4921 (__v4di)__W); 4922 } 4923 4924 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4925 _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B) 4926 { 4927 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4928 (__v4di)_mm256_srl_epi64(__A, __B), 4929 (__v4di)_mm256_setzero_si256()); 4930 } 4931 4932 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4933 _mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) 4934 { 4935 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4936 (__v2di)_mm_srli_epi64(__A, (int)__B), 4937 (__v2di)__W); 4938 } 4939 4940 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4941 _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, unsigned int __B) 4942 { 4943 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4944 (__v2di)_mm_srli_epi64(__A, (int)__B), 4945 (__v2di)_mm_setzero_si128()); 4946 } 4947 4948 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4949 _mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) 4950 { 4951 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4952 (__v4di)_mm256_srli_epi64(__A, (int)__B), 4953 (__v4di)__W); 4954 } 4955 4956 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4957 _mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) 4958 { 4959 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4960 (__v4di)_mm256_srli_epi64(__A, (int)__B), 4961 (__v4di)_mm256_setzero_si256()); 4962 } 4963 4964 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4965 _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 4966 { 4967 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4968 (__v4si)_mm_srav_epi32(__X, __Y), 4969 (__v4si)__W); 4970 } 4971 4972 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4973 _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y) 4974 { 4975 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4976 (__v4si)_mm_srav_epi32(__X, __Y), 4977 (__v4si)_mm_setzero_si128()); 4978 } 4979 4980 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4981 _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 4982 { 4983 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4984 (__v8si)_mm256_srav_epi32(__X, __Y), 4985 (__v8si)__W); 4986 } 4987 4988 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4989 _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y) 4990 { 4991 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4992 (__v8si)_mm256_srav_epi32(__X, __Y), 4993 (__v8si)_mm256_setzero_si256()); 4994 } 4995 4996 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4997 _mm_srav_epi64(__m128i __X, __m128i __Y) 4998 { 4999 return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y); 5000 } 5001 5002 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5003 _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 5004 { 5005 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5006 (__v2di)_mm_srav_epi64(__X, __Y), 5007 (__v2di)__W); 5008 } 5009 5010 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5011 _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y) 5012 { 5013 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5014 (__v2di)_mm_srav_epi64(__X, __Y), 5015 (__v2di)_mm_setzero_si128()); 5016 } 5017 5018 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5019 _mm256_srav_epi64(__m256i __X, __m256i __Y) 5020 { 5021 return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y); 5022 } 5023 5024 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5025 _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 5026 { 5027 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5028 (__v4di)_mm256_srav_epi64(__X, __Y), 5029 (__v4di)__W); 5030 } 5031 5032 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5033 _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y) 5034 { 5035 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5036 (__v4di)_mm256_srav_epi64(__X, __Y), 5037 (__v4di)_mm256_setzero_si256()); 5038 } 5039 5040 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5041 _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 5042 { 5043 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, 5044 (__v4si) __A, 5045 (__v4si) __W); 5046 } 5047 5048 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5049 _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A) 5050 { 5051 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, 5052 (__v4si) __A, 5053 (__v4si) _mm_setzero_si128 ()); 5054 } 5055 5056 5057 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5058 _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A) 5059 { 5060 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, 5061 (__v8si) __A, 5062 (__v8si) __W); 5063 } 5064 5065 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5066 _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A) 5067 { 5068 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, 5069 (__v8si) __A, 5070 (__v8si) _mm256_setzero_si256 ()); 5071 } 5072 5073 static __inline __m128i __DEFAULT_FN_ATTRS128 5074 _mm_load_epi32 (void const *__P) 5075 { 5076 return *(const __m128i *) __P; 5077 } 5078 5079 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5080 _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P) 5081 { 5082 return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P, 5083 (__v4si) __W, 5084 (__mmask8) 5085 __U); 5086 } 5087 5088 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5089 _mm_maskz_load_epi32 (__mmask8 __U, void const *__P) 5090 { 5091 return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P, 5092 (__v4si) 5093 _mm_setzero_si128 (), 5094 (__mmask8) 5095 __U); 5096 } 5097 5098 static __inline __m256i __DEFAULT_FN_ATTRS256 5099 _mm256_load_epi32 (void const *__P) 5100 { 5101 return *(const __m256i *) __P; 5102 } 5103 5104 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5105 _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P) 5106 { 5107 return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P, 5108 (__v8si) __W, 5109 (__mmask8) 5110 __U); 5111 } 5112 5113 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5114 _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P) 5115 { 5116 return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P, 5117 (__v8si) 5118 _mm256_setzero_si256 (), 5119 (__mmask8) 5120 __U); 5121 } 5122 5123 static __inline void __DEFAULT_FN_ATTRS128 5124 _mm_store_epi32 (void *__P, __m128i __A) 5125 { 5126 *(__m128i *) __P = __A; 5127 } 5128 5129 static __inline__ void __DEFAULT_FN_ATTRS128 5130 _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A) 5131 { 5132 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P, 5133 (__v4si) __A, 5134 (__mmask8) __U); 5135 } 5136 5137 static __inline void __DEFAULT_FN_ATTRS256 5138 _mm256_store_epi32 (void *__P, __m256i __A) 5139 { 5140 *(__m256i *) __P = __A; 5141 } 5142 5143 static __inline__ void __DEFAULT_FN_ATTRS256 5144 _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A) 5145 { 5146 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P, 5147 (__v8si) __A, 5148 (__mmask8) __U); 5149 } 5150 5151 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5152 _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 5153 { 5154 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, 5155 (__v2di) __A, 5156 (__v2di) __W); 5157 } 5158 5159 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5160 _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A) 5161 { 5162 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, 5163 (__v2di) __A, 5164 (__v2di) _mm_setzero_si128 ()); 5165 } 5166 5167 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5168 _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A) 5169 { 5170 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, 5171 (__v4di) __A, 5172 (__v4di) __W); 5173 } 5174 5175 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5176 _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A) 5177 { 5178 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, 5179 (__v4di) __A, 5180 (__v4di) _mm256_setzero_si256 ()); 5181 } 5182 5183 static __inline __m128i __DEFAULT_FN_ATTRS128 5184 _mm_load_epi64 (void const *__P) 5185 { 5186 return *(const __m128i *) __P; 5187 } 5188 5189 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5190 _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P) 5191 { 5192 return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P, 5193 (__v2di) __W, 5194 (__mmask8) 5195 __U); 5196 } 5197 5198 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5199 _mm_maskz_load_epi64 (__mmask8 __U, void const *__P) 5200 { 5201 return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P, 5202 (__v2di) 5203 _mm_setzero_si128 (), 5204 (__mmask8) 5205 __U); 5206 } 5207 5208 static __inline __m256i __DEFAULT_FN_ATTRS256 5209 _mm256_load_epi64 (void const *__P) 5210 { 5211 return *(const __m256i *) __P; 5212 } 5213 5214 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5215 _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P) 5216 { 5217 return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P, 5218 (__v4di) __W, 5219 (__mmask8) 5220 __U); 5221 } 5222 5223 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5224 _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P) 5225 { 5226 return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P, 5227 (__v4di) 5228 _mm256_setzero_si256 (), 5229 (__mmask8) 5230 __U); 5231 } 5232 5233 static __inline void __DEFAULT_FN_ATTRS128 5234 _mm_store_epi64 (void *__P, __m128i __A) 5235 { 5236 *(__m128i *) __P = __A; 5237 } 5238 5239 static __inline__ void __DEFAULT_FN_ATTRS128 5240 _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A) 5241 { 5242 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P, 5243 (__v2di) __A, 5244 (__mmask8) __U); 5245 } 5246 5247 static __inline void __DEFAULT_FN_ATTRS256 5248 _mm256_store_epi64 (void *__P, __m256i __A) 5249 { 5250 *(__m256i *) __P = __A; 5251 } 5252 5253 static __inline__ void __DEFAULT_FN_ATTRS256 5254 _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A) 5255 { 5256 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P, 5257 (__v4di) __A, 5258 (__mmask8) __U); 5259 } 5260 5261 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5262 _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A) 5263 { 5264 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5265 (__v2df)_mm_movedup_pd(__A), 5266 (__v2df)__W); 5267 } 5268 5269 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5270 _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A) 5271 { 5272 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5273 (__v2df)_mm_movedup_pd(__A), 5274 (__v2df)_mm_setzero_pd()); 5275 } 5276 5277 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5278 _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A) 5279 { 5280 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5281 (__v4df)_mm256_movedup_pd(__A), 5282 (__v4df)__W); 5283 } 5284 5285 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5286 _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A) 5287 { 5288 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5289 (__v4df)_mm256_movedup_pd(__A), 5290 (__v4df)_mm256_setzero_pd()); 5291 } 5292 5293 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5294 _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A) 5295 { 5296 return (__m128i)__builtin_ia32_selectd_128(__M, 5297 (__v4si) _mm_set1_epi32(__A), 5298 (__v4si)__O); 5299 } 5300 5301 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5302 _mm_maskz_set1_epi32( __mmask8 __M, int __A) 5303 { 5304 return (__m128i)__builtin_ia32_selectd_128(__M, 5305 (__v4si) _mm_set1_epi32(__A), 5306 (__v4si)_mm_setzero_si128()); 5307 } 5308 5309 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5310 _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A) 5311 { 5312 return (__m256i)__builtin_ia32_selectd_256(__M, 5313 (__v8si) _mm256_set1_epi32(__A), 5314 (__v8si)__O); 5315 } 5316 5317 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5318 _mm256_maskz_set1_epi32( __mmask8 __M, int __A) 5319 { 5320 return (__m256i)__builtin_ia32_selectd_256(__M, 5321 (__v8si) _mm256_set1_epi32(__A), 5322 (__v8si)_mm256_setzero_si256()); 5323 } 5324 5325 5326 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5327 _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A) 5328 { 5329 return (__m128i) __builtin_ia32_selectq_128(__M, 5330 (__v2di) _mm_set1_epi64x(__A), 5331 (__v2di) __O); 5332 } 5333 5334 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5335 _mm_maskz_set1_epi64 (__mmask8 __M, long long __A) 5336 { 5337 return (__m128i) __builtin_ia32_selectq_128(__M, 5338 (__v2di) _mm_set1_epi64x(__A), 5339 (__v2di) _mm_setzero_si128()); 5340 } 5341 5342 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5343 _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A) 5344 { 5345 return (__m256i) __builtin_ia32_selectq_256(__M, 5346 (__v4di) _mm256_set1_epi64x(__A), 5347 (__v4di) __O) ; 5348 } 5349 5350 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5351 _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A) 5352 { 5353 return (__m256i) __builtin_ia32_selectq_256(__M, 5354 (__v4di) _mm256_set1_epi64x(__A), 5355 (__v4di) _mm256_setzero_si256()); 5356 } 5357 5358 #define _mm_fixupimm_pd(A, B, C, imm) \ 5359 ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ 5360 (__v2df)(__m128d)(B), \ 5361 (__v2di)(__m128i)(C), (int)(imm), \ 5362 (__mmask8)-1)) 5363 5364 #define _mm_mask_fixupimm_pd(A, U, B, C, imm) \ 5365 ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ 5366 (__v2df)(__m128d)(B), \ 5367 (__v2di)(__m128i)(C), (int)(imm), \ 5368 (__mmask8)(U))) 5369 5370 #define _mm_maskz_fixupimm_pd(U, A, B, C, imm) \ 5371 ((__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \ 5372 (__v2df)(__m128d)(B), \ 5373 (__v2di)(__m128i)(C), \ 5374 (int)(imm), (__mmask8)(U))) 5375 5376 #define _mm256_fixupimm_pd(A, B, C, imm) \ 5377 ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ 5378 (__v4df)(__m256d)(B), \ 5379 (__v4di)(__m256i)(C), (int)(imm), \ 5380 (__mmask8)-1)) 5381 5382 #define _mm256_mask_fixupimm_pd(A, U, B, C, imm) \ 5383 ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ 5384 (__v4df)(__m256d)(B), \ 5385 (__v4di)(__m256i)(C), (int)(imm), \ 5386 (__mmask8)(U))) 5387 5388 #define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) \ 5389 ((__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \ 5390 (__v4df)(__m256d)(B), \ 5391 (__v4di)(__m256i)(C), \ 5392 (int)(imm), (__mmask8)(U))) 5393 5394 #define _mm_fixupimm_ps(A, B, C, imm) \ 5395 ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ 5396 (__v4sf)(__m128)(B), \ 5397 (__v4si)(__m128i)(C), (int)(imm), \ 5398 (__mmask8)-1)) 5399 5400 #define _mm_mask_fixupimm_ps(A, U, B, C, imm) \ 5401 ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ 5402 (__v4sf)(__m128)(B), \ 5403 (__v4si)(__m128i)(C), (int)(imm), \ 5404 (__mmask8)(U))) 5405 5406 #define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \ 5407 ((__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \ 5408 (__v4sf)(__m128)(B), \ 5409 (__v4si)(__m128i)(C), (int)(imm), \ 5410 (__mmask8)(U))) 5411 5412 #define _mm256_fixupimm_ps(A, B, C, imm) \ 5413 ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ 5414 (__v8sf)(__m256)(B), \ 5415 (__v8si)(__m256i)(C), (int)(imm), \ 5416 (__mmask8)-1)) 5417 5418 #define _mm256_mask_fixupimm_ps(A, U, B, C, imm) \ 5419 ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ 5420 (__v8sf)(__m256)(B), \ 5421 (__v8si)(__m256i)(C), (int)(imm), \ 5422 (__mmask8)(U))) 5423 5424 #define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \ 5425 ((__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \ 5426 (__v8sf)(__m256)(B), \ 5427 (__v8si)(__m256i)(C), (int)(imm), \ 5428 (__mmask8)(U))) 5429 5430 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5431 _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P) 5432 { 5433 return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P, 5434 (__v2df) __W, 5435 (__mmask8) __U); 5436 } 5437 5438 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5439 _mm_maskz_load_pd (__mmask8 __U, void const *__P) 5440 { 5441 return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P, 5442 (__v2df) 5443 _mm_setzero_pd (), 5444 (__mmask8) __U); 5445 } 5446 5447 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5448 _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P) 5449 { 5450 return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P, 5451 (__v4df) __W, 5452 (__mmask8) __U); 5453 } 5454 5455 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5456 _mm256_maskz_load_pd (__mmask8 __U, void const *__P) 5457 { 5458 return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P, 5459 (__v4df) 5460 _mm256_setzero_pd (), 5461 (__mmask8) __U); 5462 } 5463 5464 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5465 _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P) 5466 { 5467 return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P, 5468 (__v4sf) __W, 5469 (__mmask8) __U); 5470 } 5471 5472 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5473 _mm_maskz_load_ps (__mmask8 __U, void const *__P) 5474 { 5475 return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P, 5476 (__v4sf) 5477 _mm_setzero_ps (), 5478 (__mmask8) __U); 5479 } 5480 5481 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5482 _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P) 5483 { 5484 return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P, 5485 (__v8sf) __W, 5486 (__mmask8) __U); 5487 } 5488 5489 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5490 _mm256_maskz_load_ps (__mmask8 __U, void const *__P) 5491 { 5492 return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P, 5493 (__v8sf) 5494 _mm256_setzero_ps (), 5495 (__mmask8) __U); 5496 } 5497 5498 static __inline __m128i __DEFAULT_FN_ATTRS128 5499 _mm_loadu_epi64 (void const *__P) 5500 { 5501 struct __loadu_epi64 { 5502 __m128i_u __v; 5503 } __attribute__((__packed__, __may_alias__)); 5504 return ((const struct __loadu_epi64*)__P)->__v; 5505 } 5506 5507 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5508 _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) 5509 { 5510 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P, 5511 (__v2di) __W, 5512 (__mmask8) __U); 5513 } 5514 5515 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5516 _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P) 5517 { 5518 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P, 5519 (__v2di) 5520 _mm_setzero_si128 (), 5521 (__mmask8) __U); 5522 } 5523 5524 static __inline __m256i __DEFAULT_FN_ATTRS256 5525 _mm256_loadu_epi64 (void const *__P) 5526 { 5527 struct __loadu_epi64 { 5528 __m256i_u __v; 5529 } __attribute__((__packed__, __may_alias__)); 5530 return ((const struct __loadu_epi64*)__P)->__v; 5531 } 5532 5533 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5534 _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P) 5535 { 5536 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P, 5537 (__v4di) __W, 5538 (__mmask8) __U); 5539 } 5540 5541 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5542 _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P) 5543 { 5544 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P, 5545 (__v4di) 5546 _mm256_setzero_si256 (), 5547 (__mmask8) __U); 5548 } 5549 5550 static __inline __m128i __DEFAULT_FN_ATTRS128 5551 _mm_loadu_epi32 (void const *__P) 5552 { 5553 struct __loadu_epi32 { 5554 __m128i_u __v; 5555 } __attribute__((__packed__, __may_alias__)); 5556 return ((const struct __loadu_epi32*)__P)->__v; 5557 } 5558 5559 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5560 _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) 5561 { 5562 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P, 5563 (__v4si) __W, 5564 (__mmask8) __U); 5565 } 5566 5567 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5568 _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P) 5569 { 5570 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P, 5571 (__v4si) 5572 _mm_setzero_si128 (), 5573 (__mmask8) __U); 5574 } 5575 5576 static __inline __m256i __DEFAULT_FN_ATTRS256 5577 _mm256_loadu_epi32 (void const *__P) 5578 { 5579 struct __loadu_epi32 { 5580 __m256i_u __v; 5581 } __attribute__((__packed__, __may_alias__)); 5582 return ((const struct __loadu_epi32*)__P)->__v; 5583 } 5584 5585 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5586 _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P) 5587 { 5588 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P, 5589 (__v8si) __W, 5590 (__mmask8) __U); 5591 } 5592 5593 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5594 _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P) 5595 { 5596 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P, 5597 (__v8si) 5598 _mm256_setzero_si256 (), 5599 (__mmask8) __U); 5600 } 5601 5602 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5603 _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P) 5604 { 5605 return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P, 5606 (__v2df) __W, 5607 (__mmask8) __U); 5608 } 5609 5610 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5611 _mm_maskz_loadu_pd (__mmask8 __U, void const *__P) 5612 { 5613 return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P, 5614 (__v2df) 5615 _mm_setzero_pd (), 5616 (__mmask8) __U); 5617 } 5618 5619 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5620 _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P) 5621 { 5622 return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P, 5623 (__v4df) __W, 5624 (__mmask8) __U); 5625 } 5626 5627 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5628 _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P) 5629 { 5630 return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P, 5631 (__v4df) 5632 _mm256_setzero_pd (), 5633 (__mmask8) __U); 5634 } 5635 5636 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5637 _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P) 5638 { 5639 return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P, 5640 (__v4sf) __W, 5641 (__mmask8) __U); 5642 } 5643 5644 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5645 _mm_maskz_loadu_ps (__mmask8 __U, void const *__P) 5646 { 5647 return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P, 5648 (__v4sf) 5649 _mm_setzero_ps (), 5650 (__mmask8) __U); 5651 } 5652 5653 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5654 _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P) 5655 { 5656 return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P, 5657 (__v8sf) __W, 5658 (__mmask8) __U); 5659 } 5660 5661 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5662 _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P) 5663 { 5664 return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P, 5665 (__v8sf) 5666 _mm256_setzero_ps (), 5667 (__mmask8) __U); 5668 } 5669 5670 static __inline__ void __DEFAULT_FN_ATTRS128 5671 _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A) 5672 { 5673 __builtin_ia32_storeapd128_mask ((__v2df *) __P, 5674 (__v2df) __A, 5675 (__mmask8) __U); 5676 } 5677 5678 static __inline__ void __DEFAULT_FN_ATTRS256 5679 _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A) 5680 { 5681 __builtin_ia32_storeapd256_mask ((__v4df *) __P, 5682 (__v4df) __A, 5683 (__mmask8) __U); 5684 } 5685 5686 static __inline__ void __DEFAULT_FN_ATTRS128 5687 _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A) 5688 { 5689 __builtin_ia32_storeaps128_mask ((__v4sf *) __P, 5690 (__v4sf) __A, 5691 (__mmask8) __U); 5692 } 5693 5694 static __inline__ void __DEFAULT_FN_ATTRS256 5695 _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A) 5696 { 5697 __builtin_ia32_storeaps256_mask ((__v8sf *) __P, 5698 (__v8sf) __A, 5699 (__mmask8) __U); 5700 } 5701 5702 static __inline void __DEFAULT_FN_ATTRS128 5703 _mm_storeu_epi64 (void *__P, __m128i __A) 5704 { 5705 struct __storeu_epi64 { 5706 __m128i_u __v; 5707 } __attribute__((__packed__, __may_alias__)); 5708 ((struct __storeu_epi64*)__P)->__v = __A; 5709 } 5710 5711 static __inline__ void __DEFAULT_FN_ATTRS128 5712 _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A) 5713 { 5714 __builtin_ia32_storedqudi128_mask ((__v2di *) __P, 5715 (__v2di) __A, 5716 (__mmask8) __U); 5717 } 5718 5719 static __inline void __DEFAULT_FN_ATTRS256 5720 _mm256_storeu_epi64 (void *__P, __m256i __A) 5721 { 5722 struct __storeu_epi64 { 5723 __m256i_u __v; 5724 } __attribute__((__packed__, __may_alias__)); 5725 ((struct __storeu_epi64*)__P)->__v = __A; 5726 } 5727 5728 static __inline__ void __DEFAULT_FN_ATTRS256 5729 _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A) 5730 { 5731 __builtin_ia32_storedqudi256_mask ((__v4di *) __P, 5732 (__v4di) __A, 5733 (__mmask8) __U); 5734 } 5735 5736 static __inline void __DEFAULT_FN_ATTRS128 5737 _mm_storeu_epi32 (void *__P, __m128i __A) 5738 { 5739 struct __storeu_epi32 { 5740 __m128i_u __v; 5741 } __attribute__((__packed__, __may_alias__)); 5742 ((struct __storeu_epi32*)__P)->__v = __A; 5743 } 5744 5745 static __inline__ void __DEFAULT_FN_ATTRS128 5746 _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A) 5747 { 5748 __builtin_ia32_storedqusi128_mask ((__v4si *) __P, 5749 (__v4si) __A, 5750 (__mmask8) __U); 5751 } 5752 5753 static __inline void __DEFAULT_FN_ATTRS256 5754 _mm256_storeu_epi32 (void *__P, __m256i __A) 5755 { 5756 struct __storeu_epi32 { 5757 __m256i_u __v; 5758 } __attribute__((__packed__, __may_alias__)); 5759 ((struct __storeu_epi32*)__P)->__v = __A; 5760 } 5761 5762 static __inline__ void __DEFAULT_FN_ATTRS256 5763 _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A) 5764 { 5765 __builtin_ia32_storedqusi256_mask ((__v8si *) __P, 5766 (__v8si) __A, 5767 (__mmask8) __U); 5768 } 5769 5770 static __inline__ void __DEFAULT_FN_ATTRS128 5771 _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A) 5772 { 5773 __builtin_ia32_storeupd128_mask ((__v2df *) __P, 5774 (__v2df) __A, 5775 (__mmask8) __U); 5776 } 5777 5778 static __inline__ void __DEFAULT_FN_ATTRS256 5779 _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A) 5780 { 5781 __builtin_ia32_storeupd256_mask ((__v4df *) __P, 5782 (__v4df) __A, 5783 (__mmask8) __U); 5784 } 5785 5786 static __inline__ void __DEFAULT_FN_ATTRS128 5787 _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A) 5788 { 5789 __builtin_ia32_storeups128_mask ((__v4sf *) __P, 5790 (__v4sf) __A, 5791 (__mmask8) __U); 5792 } 5793 5794 static __inline__ void __DEFAULT_FN_ATTRS256 5795 _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A) 5796 { 5797 __builtin_ia32_storeups256_mask ((__v8sf *) __P, 5798 (__v8sf) __A, 5799 (__mmask8) __U); 5800 } 5801 5802 5803 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5804 _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 5805 { 5806 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5807 (__v2df)_mm_unpackhi_pd(__A, __B), 5808 (__v2df)__W); 5809 } 5810 5811 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5812 _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B) 5813 { 5814 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5815 (__v2df)_mm_unpackhi_pd(__A, __B), 5816 (__v2df)_mm_setzero_pd()); 5817 } 5818 5819 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5820 _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) 5821 { 5822 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5823 (__v4df)_mm256_unpackhi_pd(__A, __B), 5824 (__v4df)__W); 5825 } 5826 5827 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5828 _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) 5829 { 5830 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5831 (__v4df)_mm256_unpackhi_pd(__A, __B), 5832 (__v4df)_mm256_setzero_pd()); 5833 } 5834 5835 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5836 _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 5837 { 5838 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 5839 (__v4sf)_mm_unpackhi_ps(__A, __B), 5840 (__v4sf)__W); 5841 } 5842 5843 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5844 _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B) 5845 { 5846 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 5847 (__v4sf)_mm_unpackhi_ps(__A, __B), 5848 (__v4sf)_mm_setzero_ps()); 5849 } 5850 5851 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5852 _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 5853 { 5854 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 5855 (__v8sf)_mm256_unpackhi_ps(__A, __B), 5856 (__v8sf)__W); 5857 } 5858 5859 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5860 _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B) 5861 { 5862 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 5863 (__v8sf)_mm256_unpackhi_ps(__A, __B), 5864 (__v8sf)_mm256_setzero_ps()); 5865 } 5866 5867 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5868 _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 5869 { 5870 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5871 (__v2df)_mm_unpacklo_pd(__A, __B), 5872 (__v2df)__W); 5873 } 5874 5875 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5876 _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) 5877 { 5878 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5879 (__v2df)_mm_unpacklo_pd(__A, __B), 5880 (__v2df)_mm_setzero_pd()); 5881 } 5882 5883 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5884 _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) 5885 { 5886 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5887 (__v4df)_mm256_unpacklo_pd(__A, __B), 5888 (__v4df)__W); 5889 } 5890 5891 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5892 _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) 5893 { 5894 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5895 (__v4df)_mm256_unpacklo_pd(__A, __B), 5896 (__v4df)_mm256_setzero_pd()); 5897 } 5898 5899 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5900 _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 5901 { 5902 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 5903 (__v4sf)_mm_unpacklo_ps(__A, __B), 5904 (__v4sf)__W); 5905 } 5906 5907 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5908 _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B) 5909 { 5910 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 5911 (__v4sf)_mm_unpacklo_ps(__A, __B), 5912 (__v4sf)_mm_setzero_ps()); 5913 } 5914 5915 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5916 _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 5917 { 5918 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 5919 (__v8sf)_mm256_unpacklo_ps(__A, __B), 5920 (__v8sf)__W); 5921 } 5922 5923 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5924 _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B) 5925 { 5926 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 5927 (__v8sf)_mm256_unpacklo_ps(__A, __B), 5928 (__v8sf)_mm256_setzero_ps()); 5929 } 5930 5931 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5932 _mm_rcp14_pd (__m128d __A) 5933 { 5934 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 5935 (__v2df) 5936 _mm_setzero_pd (), 5937 (__mmask8) -1); 5938 } 5939 5940 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5941 _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A) 5942 { 5943 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 5944 (__v2df) __W, 5945 (__mmask8) __U); 5946 } 5947 5948 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5949 _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A) 5950 { 5951 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 5952 (__v2df) 5953 _mm_setzero_pd (), 5954 (__mmask8) __U); 5955 } 5956 5957 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5958 _mm256_rcp14_pd (__m256d __A) 5959 { 5960 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 5961 (__v4df) 5962 _mm256_setzero_pd (), 5963 (__mmask8) -1); 5964 } 5965 5966 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5967 _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A) 5968 { 5969 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 5970 (__v4df) __W, 5971 (__mmask8) __U); 5972 } 5973 5974 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5975 _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A) 5976 { 5977 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 5978 (__v4df) 5979 _mm256_setzero_pd (), 5980 (__mmask8) __U); 5981 } 5982 5983 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5984 _mm_rcp14_ps (__m128 __A) 5985 { 5986 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 5987 (__v4sf) 5988 _mm_setzero_ps (), 5989 (__mmask8) -1); 5990 } 5991 5992 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5993 _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A) 5994 { 5995 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 5996 (__v4sf) __W, 5997 (__mmask8) __U); 5998 } 5999 6000 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6001 _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A) 6002 { 6003 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 6004 (__v4sf) 6005 _mm_setzero_ps (), 6006 (__mmask8) __U); 6007 } 6008 6009 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6010 _mm256_rcp14_ps (__m256 __A) 6011 { 6012 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 6013 (__v8sf) 6014 _mm256_setzero_ps (), 6015 (__mmask8) -1); 6016 } 6017 6018 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6019 _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A) 6020 { 6021 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 6022 (__v8sf) __W, 6023 (__mmask8) __U); 6024 } 6025 6026 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6027 _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A) 6028 { 6029 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 6030 (__v8sf) 6031 _mm256_setzero_ps (), 6032 (__mmask8) __U); 6033 } 6034 6035 #define _mm_mask_permute_pd(W, U, X, C) \ 6036 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6037 (__v2df)_mm_permute_pd((X), (C)), \ 6038 (__v2df)(__m128d)(W))) 6039 6040 #define _mm_maskz_permute_pd(U, X, C) \ 6041 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6042 (__v2df)_mm_permute_pd((X), (C)), \ 6043 (__v2df)_mm_setzero_pd())) 6044 6045 #define _mm256_mask_permute_pd(W, U, X, C) \ 6046 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6047 (__v4df)_mm256_permute_pd((X), (C)), \ 6048 (__v4df)(__m256d)(W))) 6049 6050 #define _mm256_maskz_permute_pd(U, X, C) \ 6051 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6052 (__v4df)_mm256_permute_pd((X), (C)), \ 6053 (__v4df)_mm256_setzero_pd())) 6054 6055 #define _mm_mask_permute_ps(W, U, X, C) \ 6056 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6057 (__v4sf)_mm_permute_ps((X), (C)), \ 6058 (__v4sf)(__m128)(W))) 6059 6060 #define _mm_maskz_permute_ps(U, X, C) \ 6061 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6062 (__v4sf)_mm_permute_ps((X), (C)), \ 6063 (__v4sf)_mm_setzero_ps())) 6064 6065 #define _mm256_mask_permute_ps(W, U, X, C) \ 6066 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6067 (__v8sf)_mm256_permute_ps((X), (C)), \ 6068 (__v8sf)(__m256)(W))) 6069 6070 #define _mm256_maskz_permute_ps(U, X, C) \ 6071 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6072 (__v8sf)_mm256_permute_ps((X), (C)), \ 6073 (__v8sf)_mm256_setzero_ps())) 6074 6075 static __inline__ __m128d __DEFAULT_FN_ATTRS128 6076 _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) 6077 { 6078 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 6079 (__v2df)_mm_permutevar_pd(__A, __C), 6080 (__v2df)__W); 6081 } 6082 6083 static __inline__ __m128d __DEFAULT_FN_ATTRS128 6084 _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) 6085 { 6086 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 6087 (__v2df)_mm_permutevar_pd(__A, __C), 6088 (__v2df)_mm_setzero_pd()); 6089 } 6090 6091 static __inline__ __m256d __DEFAULT_FN_ATTRS256 6092 _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) 6093 { 6094 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 6095 (__v4df)_mm256_permutevar_pd(__A, __C), 6096 (__v4df)__W); 6097 } 6098 6099 static __inline__ __m256d __DEFAULT_FN_ATTRS256 6100 _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) 6101 { 6102 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 6103 (__v4df)_mm256_permutevar_pd(__A, __C), 6104 (__v4df)_mm256_setzero_pd()); 6105 } 6106 6107 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6108 _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) 6109 { 6110 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 6111 (__v4sf)_mm_permutevar_ps(__A, __C), 6112 (__v4sf)__W); 6113 } 6114 6115 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6116 _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) 6117 { 6118 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 6119 (__v4sf)_mm_permutevar_ps(__A, __C), 6120 (__v4sf)_mm_setzero_ps()); 6121 } 6122 6123 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6124 _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) 6125 { 6126 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 6127 (__v8sf)_mm256_permutevar_ps(__A, __C), 6128 (__v8sf)__W); 6129 } 6130 6131 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6132 _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) 6133 { 6134 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 6135 (__v8sf)_mm256_permutevar_ps(__A, __C), 6136 (__v8sf)_mm256_setzero_ps()); 6137 } 6138 6139 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6140 _mm_test_epi32_mask (__m128i __A, __m128i __B) 6141 { 6142 return _mm_cmpneq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); 6143 } 6144 6145 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6146 _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) 6147 { 6148 return _mm_mask_cmpneq_epi32_mask (__U, _mm_and_si128 (__A, __B), 6149 _mm_setzero_si128()); 6150 } 6151 6152 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6153 _mm256_test_epi32_mask (__m256i __A, __m256i __B) 6154 { 6155 return _mm256_cmpneq_epi32_mask (_mm256_and_si256 (__A, __B), 6156 _mm256_setzero_si256()); 6157 } 6158 6159 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6160 _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) 6161 { 6162 return _mm256_mask_cmpneq_epi32_mask (__U, _mm256_and_si256 (__A, __B), 6163 _mm256_setzero_si256()); 6164 } 6165 6166 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6167 _mm_test_epi64_mask (__m128i __A, __m128i __B) 6168 { 6169 return _mm_cmpneq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); 6170 } 6171 6172 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6173 _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) 6174 { 6175 return _mm_mask_cmpneq_epi64_mask (__U, _mm_and_si128 (__A, __B), 6176 _mm_setzero_si128()); 6177 } 6178 6179 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6180 _mm256_test_epi64_mask (__m256i __A, __m256i __B) 6181 { 6182 return _mm256_cmpneq_epi64_mask (_mm256_and_si256 (__A, __B), 6183 _mm256_setzero_si256()); 6184 } 6185 6186 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6187 _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) 6188 { 6189 return _mm256_mask_cmpneq_epi64_mask (__U, _mm256_and_si256 (__A, __B), 6190 _mm256_setzero_si256()); 6191 } 6192 6193 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6194 _mm_testn_epi32_mask (__m128i __A, __m128i __B) 6195 { 6196 return _mm_cmpeq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); 6197 } 6198 6199 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6200 _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) 6201 { 6202 return _mm_mask_cmpeq_epi32_mask (__U, _mm_and_si128 (__A, __B), 6203 _mm_setzero_si128()); 6204 } 6205 6206 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6207 _mm256_testn_epi32_mask (__m256i __A, __m256i __B) 6208 { 6209 return _mm256_cmpeq_epi32_mask (_mm256_and_si256 (__A, __B), 6210 _mm256_setzero_si256()); 6211 } 6212 6213 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6214 _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) 6215 { 6216 return _mm256_mask_cmpeq_epi32_mask (__U, _mm256_and_si256 (__A, __B), 6217 _mm256_setzero_si256()); 6218 } 6219 6220 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6221 _mm_testn_epi64_mask (__m128i __A, __m128i __B) 6222 { 6223 return _mm_cmpeq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); 6224 } 6225 6226 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6227 _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) 6228 { 6229 return _mm_mask_cmpeq_epi64_mask (__U, _mm_and_si128 (__A, __B), 6230 _mm_setzero_si128()); 6231 } 6232 6233 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6234 _mm256_testn_epi64_mask (__m256i __A, __m256i __B) 6235 { 6236 return _mm256_cmpeq_epi64_mask (_mm256_and_si256 (__A, __B), 6237 _mm256_setzero_si256()); 6238 } 6239 6240 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6241 _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) 6242 { 6243 return _mm256_mask_cmpeq_epi64_mask (__U, _mm256_and_si256 (__A, __B), 6244 _mm256_setzero_si256()); 6245 } 6246 6247 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6248 _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6249 { 6250 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6251 (__v4si)_mm_unpackhi_epi32(__A, __B), 6252 (__v4si)__W); 6253 } 6254 6255 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6256 _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) 6257 { 6258 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6259 (__v4si)_mm_unpackhi_epi32(__A, __B), 6260 (__v4si)_mm_setzero_si128()); 6261 } 6262 6263 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6264 _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6265 { 6266 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6267 (__v8si)_mm256_unpackhi_epi32(__A, __B), 6268 (__v8si)__W); 6269 } 6270 6271 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6272 _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B) 6273 { 6274 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6275 (__v8si)_mm256_unpackhi_epi32(__A, __B), 6276 (__v8si)_mm256_setzero_si256()); 6277 } 6278 6279 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6280 _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6281 { 6282 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6283 (__v2di)_mm_unpackhi_epi64(__A, __B), 6284 (__v2di)__W); 6285 } 6286 6287 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6288 _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B) 6289 { 6290 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6291 (__v2di)_mm_unpackhi_epi64(__A, __B), 6292 (__v2di)_mm_setzero_si128()); 6293 } 6294 6295 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6296 _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6297 { 6298 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6299 (__v4di)_mm256_unpackhi_epi64(__A, __B), 6300 (__v4di)__W); 6301 } 6302 6303 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6304 _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B) 6305 { 6306 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6307 (__v4di)_mm256_unpackhi_epi64(__A, __B), 6308 (__v4di)_mm256_setzero_si256()); 6309 } 6310 6311 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6312 _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6313 { 6314 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6315 (__v4si)_mm_unpacklo_epi32(__A, __B), 6316 (__v4si)__W); 6317 } 6318 6319 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6320 _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B) 6321 { 6322 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6323 (__v4si)_mm_unpacklo_epi32(__A, __B), 6324 (__v4si)_mm_setzero_si128()); 6325 } 6326 6327 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6328 _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6329 { 6330 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6331 (__v8si)_mm256_unpacklo_epi32(__A, __B), 6332 (__v8si)__W); 6333 } 6334 6335 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6336 _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B) 6337 { 6338 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6339 (__v8si)_mm256_unpacklo_epi32(__A, __B), 6340 (__v8si)_mm256_setzero_si256()); 6341 } 6342 6343 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6344 _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6345 { 6346 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6347 (__v2di)_mm_unpacklo_epi64(__A, __B), 6348 (__v2di)__W); 6349 } 6350 6351 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6352 _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B) 6353 { 6354 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6355 (__v2di)_mm_unpacklo_epi64(__A, __B), 6356 (__v2di)_mm_setzero_si128()); 6357 } 6358 6359 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6360 _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6361 { 6362 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6363 (__v4di)_mm256_unpacklo_epi64(__A, __B), 6364 (__v4di)__W); 6365 } 6366 6367 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6368 _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B) 6369 { 6370 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6371 (__v4di)_mm256_unpacklo_epi64(__A, __B), 6372 (__v4di)_mm256_setzero_si256()); 6373 } 6374 6375 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6376 _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6377 { 6378 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6379 (__v4si)_mm_sra_epi32(__A, __B), 6380 (__v4si)__W); 6381 } 6382 6383 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6384 _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B) 6385 { 6386 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6387 (__v4si)_mm_sra_epi32(__A, __B), 6388 (__v4si)_mm_setzero_si128()); 6389 } 6390 6391 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6392 _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 6393 { 6394 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6395 (__v8si)_mm256_sra_epi32(__A, __B), 6396 (__v8si)__W); 6397 } 6398 6399 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6400 _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B) 6401 { 6402 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6403 (__v8si)_mm256_sra_epi32(__A, __B), 6404 (__v8si)_mm256_setzero_si256()); 6405 } 6406 6407 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6408 _mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) 6409 { 6410 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6411 (__v4si)_mm_srai_epi32(__A, (int)__B), 6412 (__v4si)__W); 6413 } 6414 6415 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6416 _mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, unsigned int __B) 6417 { 6418 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6419 (__v4si)_mm_srai_epi32(__A, (int)__B), 6420 (__v4si)_mm_setzero_si128()); 6421 } 6422 6423 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6424 _mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) 6425 { 6426 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6427 (__v8si)_mm256_srai_epi32(__A, (int)__B), 6428 (__v8si)__W); 6429 } 6430 6431 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6432 _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, unsigned int __B) 6433 { 6434 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6435 (__v8si)_mm256_srai_epi32(__A, (int)__B), 6436 (__v8si)_mm256_setzero_si256()); 6437 } 6438 6439 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6440 _mm_sra_epi64(__m128i __A, __m128i __B) 6441 { 6442 return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B); 6443 } 6444 6445 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6446 _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6447 { 6448 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 6449 (__v2di)_mm_sra_epi64(__A, __B), \ 6450 (__v2di)__W); 6451 } 6452 6453 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6454 _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B) 6455 { 6456 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 6457 (__v2di)_mm_sra_epi64(__A, __B), \ 6458 (__v2di)_mm_setzero_si128()); 6459 } 6460 6461 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6462 _mm256_sra_epi64(__m256i __A, __m128i __B) 6463 { 6464 return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B); 6465 } 6466 6467 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6468 _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 6469 { 6470 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 6471 (__v4di)_mm256_sra_epi64(__A, __B), \ 6472 (__v4di)__W); 6473 } 6474 6475 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6476 _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B) 6477 { 6478 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 6479 (__v4di)_mm256_sra_epi64(__A, __B), \ 6480 (__v4di)_mm256_setzero_si256()); 6481 } 6482 6483 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6484 _mm_srai_epi64(__m128i __A, unsigned int __imm) 6485 { 6486 return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, (int)__imm); 6487 } 6488 6489 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6490 _mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __imm) 6491 { 6492 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 6493 (__v2di)_mm_srai_epi64(__A, __imm), \ 6494 (__v2di)__W); 6495 } 6496 6497 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6498 _mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, unsigned int __imm) 6499 { 6500 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 6501 (__v2di)_mm_srai_epi64(__A, __imm), \ 6502 (__v2di)_mm_setzero_si128()); 6503 } 6504 6505 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6506 _mm256_srai_epi64(__m256i __A, unsigned int __imm) 6507 { 6508 return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, (int)__imm); 6509 } 6510 6511 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6512 _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, 6513 unsigned int __imm) 6514 { 6515 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 6516 (__v4di)_mm256_srai_epi64(__A, __imm), \ 6517 (__v4di)__W); 6518 } 6519 6520 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6521 _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm) 6522 { 6523 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 6524 (__v4di)_mm256_srai_epi64(__A, __imm), \ 6525 (__v4di)_mm256_setzero_si256()); 6526 } 6527 6528 #define _mm_ternarylogic_epi32(A, B, C, imm) \ 6529 ((__m128i)__builtin_ia32_pternlogd128_mask( \ 6530 (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \ 6531 (unsigned char)(imm), (__mmask8)-1)) 6532 6533 #define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) \ 6534 ((__m128i)__builtin_ia32_pternlogd128_mask( \ 6535 (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \ 6536 (unsigned char)(imm), (__mmask8)(U))) 6537 6538 #define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) \ 6539 ((__m128i)__builtin_ia32_pternlogd128_maskz( \ 6540 (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \ 6541 (unsigned char)(imm), (__mmask8)(U))) 6542 6543 #define _mm256_ternarylogic_epi32(A, B, C, imm) \ 6544 ((__m256i)__builtin_ia32_pternlogd256_mask( \ 6545 (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \ 6546 (unsigned char)(imm), (__mmask8)-1)) 6547 6548 #define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) \ 6549 ((__m256i)__builtin_ia32_pternlogd256_mask( \ 6550 (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \ 6551 (unsigned char)(imm), (__mmask8)(U))) 6552 6553 #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) \ 6554 ((__m256i)__builtin_ia32_pternlogd256_maskz( \ 6555 (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \ 6556 (unsigned char)(imm), (__mmask8)(U))) 6557 6558 #define _mm_ternarylogic_epi64(A, B, C, imm) \ 6559 ((__m128i)__builtin_ia32_pternlogq128_mask( \ 6560 (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \ 6561 (unsigned char)(imm), (__mmask8)-1)) 6562 6563 #define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) \ 6564 ((__m128i)__builtin_ia32_pternlogq128_mask( \ 6565 (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \ 6566 (unsigned char)(imm), (__mmask8)(U))) 6567 6568 #define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) \ 6569 ((__m128i)__builtin_ia32_pternlogq128_maskz( \ 6570 (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \ 6571 (unsigned char)(imm), (__mmask8)(U))) 6572 6573 #define _mm256_ternarylogic_epi64(A, B, C, imm) \ 6574 ((__m256i)__builtin_ia32_pternlogq256_mask( \ 6575 (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \ 6576 (unsigned char)(imm), (__mmask8)-1)) 6577 6578 #define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) \ 6579 ((__m256i)__builtin_ia32_pternlogq256_mask( \ 6580 (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \ 6581 (unsigned char)(imm), (__mmask8)(U))) 6582 6583 #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) \ 6584 ((__m256i)__builtin_ia32_pternlogq256_maskz( \ 6585 (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \ 6586 (unsigned char)(imm), (__mmask8)(U))) 6587 6588 #define _mm256_shuffle_f32x4(A, B, imm) \ 6589 ((__m256)__builtin_ia32_shuf_f32x4_256((__v8sf)(__m256)(A), \ 6590 (__v8sf)(__m256)(B), (int)(imm))) 6591 6592 #define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) \ 6593 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6594 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \ 6595 (__v8sf)(__m256)(W))) 6596 6597 #define _mm256_maskz_shuffle_f32x4(U, A, B, imm) \ 6598 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6599 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \ 6600 (__v8sf)_mm256_setzero_ps())) 6601 6602 #define _mm256_shuffle_f64x2(A, B, imm) \ 6603 ((__m256d)__builtin_ia32_shuf_f64x2_256((__v4df)(__m256d)(A), \ 6604 (__v4df)(__m256d)(B), (int)(imm))) 6605 6606 #define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) \ 6607 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6608 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \ 6609 (__v4df)(__m256d)(W))) 6610 6611 #define _mm256_maskz_shuffle_f64x2(U, A, B, imm) \ 6612 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6613 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \ 6614 (__v4df)_mm256_setzero_pd())) 6615 6616 #define _mm256_shuffle_i32x4(A, B, imm) \ 6617 ((__m256i)__builtin_ia32_shuf_i32x4_256((__v8si)(__m256i)(A), \ 6618 (__v8si)(__m256i)(B), (int)(imm))) 6619 6620 #define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) \ 6621 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 6622 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \ 6623 (__v8si)(__m256i)(W))) 6624 6625 #define _mm256_maskz_shuffle_i32x4(U, A, B, imm) \ 6626 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 6627 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \ 6628 (__v8si)_mm256_setzero_si256())) 6629 6630 #define _mm256_shuffle_i64x2(A, B, imm) \ 6631 ((__m256i)__builtin_ia32_shuf_i64x2_256((__v4di)(__m256i)(A), \ 6632 (__v4di)(__m256i)(B), (int)(imm))) 6633 6634 #define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) \ 6635 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 6636 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \ 6637 (__v4di)(__m256i)(W))) 6638 6639 6640 #define _mm256_maskz_shuffle_i64x2(U, A, B, imm) \ 6641 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 6642 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \ 6643 (__v4di)_mm256_setzero_si256())) 6644 6645 #define _mm_mask_shuffle_pd(W, U, A, B, M) \ 6646 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6647 (__v2df)_mm_shuffle_pd((A), (B), (M)), \ 6648 (__v2df)(__m128d)(W))) 6649 6650 #define _mm_maskz_shuffle_pd(U, A, B, M) \ 6651 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6652 (__v2df)_mm_shuffle_pd((A), (B), (M)), \ 6653 (__v2df)_mm_setzero_pd())) 6654 6655 #define _mm256_mask_shuffle_pd(W, U, A, B, M) \ 6656 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6657 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ 6658 (__v4df)(__m256d)(W))) 6659 6660 #define _mm256_maskz_shuffle_pd(U, A, B, M) \ 6661 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6662 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ 6663 (__v4df)_mm256_setzero_pd())) 6664 6665 #define _mm_mask_shuffle_ps(W, U, A, B, M) \ 6666 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6667 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ 6668 (__v4sf)(__m128)(W))) 6669 6670 #define _mm_maskz_shuffle_ps(U, A, B, M) \ 6671 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6672 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ 6673 (__v4sf)_mm_setzero_ps())) 6674 6675 #define _mm256_mask_shuffle_ps(W, U, A, B, M) \ 6676 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6677 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ 6678 (__v8sf)(__m256)(W))) 6679 6680 #define _mm256_maskz_shuffle_ps(U, A, B, M) \ 6681 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6682 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ 6683 (__v8sf)_mm256_setzero_ps())) 6684 6685 static __inline__ __m128d __DEFAULT_FN_ATTRS128 6686 _mm_rsqrt14_pd (__m128d __A) 6687 { 6688 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 6689 (__v2df) 6690 _mm_setzero_pd (), 6691 (__mmask8) -1); 6692 } 6693 6694 static __inline__ __m128d __DEFAULT_FN_ATTRS128 6695 _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A) 6696 { 6697 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 6698 (__v2df) __W, 6699 (__mmask8) __U); 6700 } 6701 6702 static __inline__ __m128d __DEFAULT_FN_ATTRS128 6703 _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A) 6704 { 6705 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 6706 (__v2df) 6707 _mm_setzero_pd (), 6708 (__mmask8) __U); 6709 } 6710 6711 static __inline__ __m256d __DEFAULT_FN_ATTRS256 6712 _mm256_rsqrt14_pd (__m256d __A) 6713 { 6714 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 6715 (__v4df) 6716 _mm256_setzero_pd (), 6717 (__mmask8) -1); 6718 } 6719 6720 static __inline__ __m256d __DEFAULT_FN_ATTRS256 6721 _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A) 6722 { 6723 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 6724 (__v4df) __W, 6725 (__mmask8) __U); 6726 } 6727 6728 static __inline__ __m256d __DEFAULT_FN_ATTRS256 6729 _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A) 6730 { 6731 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 6732 (__v4df) 6733 _mm256_setzero_pd (), 6734 (__mmask8) __U); 6735 } 6736 6737 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6738 _mm_rsqrt14_ps (__m128 __A) 6739 { 6740 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 6741 (__v4sf) 6742 _mm_setzero_ps (), 6743 (__mmask8) -1); 6744 } 6745 6746 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6747 _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A) 6748 { 6749 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 6750 (__v4sf) __W, 6751 (__mmask8) __U); 6752 } 6753 6754 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6755 _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A) 6756 { 6757 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 6758 (__v4sf) 6759 _mm_setzero_ps (), 6760 (__mmask8) __U); 6761 } 6762 6763 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6764 _mm256_rsqrt14_ps (__m256 __A) 6765 { 6766 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 6767 (__v8sf) 6768 _mm256_setzero_ps (), 6769 (__mmask8) -1); 6770 } 6771 6772 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6773 _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A) 6774 { 6775 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 6776 (__v8sf) __W, 6777 (__mmask8) __U); 6778 } 6779 6780 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6781 _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A) 6782 { 6783 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 6784 (__v8sf) 6785 _mm256_setzero_ps (), 6786 (__mmask8) __U); 6787 } 6788 6789 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6790 _mm256_broadcast_f32x4(__m128 __A) 6791 { 6792 return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 6793 0, 1, 2, 3, 0, 1, 2, 3); 6794 } 6795 6796 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6797 _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A) 6798 { 6799 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, 6800 (__v8sf)_mm256_broadcast_f32x4(__A), 6801 (__v8sf)__O); 6802 } 6803 6804 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6805 _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A) 6806 { 6807 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, 6808 (__v8sf)_mm256_broadcast_f32x4(__A), 6809 (__v8sf)_mm256_setzero_ps()); 6810 } 6811 6812 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6813 _mm256_broadcast_i32x4(__m128i __A) 6814 { 6815 return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 6816 0, 1, 2, 3, 0, 1, 2, 3); 6817 } 6818 6819 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6820 _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A) 6821 { 6822 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 6823 (__v8si)_mm256_broadcast_i32x4(__A), 6824 (__v8si)__O); 6825 } 6826 6827 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6828 _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A) 6829 { 6830 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 6831 (__v8si)_mm256_broadcast_i32x4(__A), 6832 (__v8si)_mm256_setzero_si256()); 6833 } 6834 6835 static __inline__ __m256d __DEFAULT_FN_ATTRS256 6836 _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A) 6837 { 6838 return (__m256d)__builtin_ia32_selectpd_256(__M, 6839 (__v4df) _mm256_broadcastsd_pd(__A), 6840 (__v4df) __O); 6841 } 6842 6843 static __inline__ __m256d __DEFAULT_FN_ATTRS256 6844 _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) 6845 { 6846 return (__m256d)__builtin_ia32_selectpd_256(__M, 6847 (__v4df) _mm256_broadcastsd_pd(__A), 6848 (__v4df) _mm256_setzero_pd()); 6849 } 6850 6851 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6852 _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A) 6853 { 6854 return (__m128)__builtin_ia32_selectps_128(__M, 6855 (__v4sf) _mm_broadcastss_ps(__A), 6856 (__v4sf) __O); 6857 } 6858 6859 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6860 _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) 6861 { 6862 return (__m128)__builtin_ia32_selectps_128(__M, 6863 (__v4sf) _mm_broadcastss_ps(__A), 6864 (__v4sf) _mm_setzero_ps()); 6865 } 6866 6867 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6868 _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A) 6869 { 6870 return (__m256)__builtin_ia32_selectps_256(__M, 6871 (__v8sf) _mm256_broadcastss_ps(__A), 6872 (__v8sf) __O); 6873 } 6874 6875 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6876 _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) 6877 { 6878 return (__m256)__builtin_ia32_selectps_256(__M, 6879 (__v8sf) _mm256_broadcastss_ps(__A), 6880 (__v8sf) _mm256_setzero_ps()); 6881 } 6882 6883 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6884 _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 6885 { 6886 return (__m128i)__builtin_ia32_selectd_128(__M, 6887 (__v4si) _mm_broadcastd_epi32(__A), 6888 (__v4si) __O); 6889 } 6890 6891 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6892 _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) 6893 { 6894 return (__m128i)__builtin_ia32_selectd_128(__M, 6895 (__v4si) _mm_broadcastd_epi32(__A), 6896 (__v4si) _mm_setzero_si128()); 6897 } 6898 6899 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6900 _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A) 6901 { 6902 return (__m256i)__builtin_ia32_selectd_256(__M, 6903 (__v8si) _mm256_broadcastd_epi32(__A), 6904 (__v8si) __O); 6905 } 6906 6907 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6908 _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) 6909 { 6910 return (__m256i)__builtin_ia32_selectd_256(__M, 6911 (__v8si) _mm256_broadcastd_epi32(__A), 6912 (__v8si) _mm256_setzero_si256()); 6913 } 6914 6915 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6916 _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A) 6917 { 6918 return (__m128i)__builtin_ia32_selectq_128(__M, 6919 (__v2di) _mm_broadcastq_epi64(__A), 6920 (__v2di) __O); 6921 } 6922 6923 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6924 _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 6925 { 6926 return (__m128i)__builtin_ia32_selectq_128(__M, 6927 (__v2di) _mm_broadcastq_epi64(__A), 6928 (__v2di) _mm_setzero_si128()); 6929 } 6930 6931 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6932 _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A) 6933 { 6934 return (__m256i)__builtin_ia32_selectq_256(__M, 6935 (__v4di) _mm256_broadcastq_epi64(__A), 6936 (__v4di) __O); 6937 } 6938 6939 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6940 _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 6941 { 6942 return (__m256i)__builtin_ia32_selectq_256(__M, 6943 (__v4di) _mm256_broadcastq_epi64(__A), 6944 (__v4di) _mm256_setzero_si256()); 6945 } 6946 6947 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6948 _mm_cvtsepi32_epi8 (__m128i __A) 6949 { 6950 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 6951 (__v16qi)_mm_undefined_si128(), 6952 (__mmask8) -1); 6953 } 6954 6955 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6956 _mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 6957 { 6958 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 6959 (__v16qi) __O, __M); 6960 } 6961 6962 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6963 _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A) 6964 { 6965 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 6966 (__v16qi) _mm_setzero_si128 (), 6967 __M); 6968 } 6969 6970 static __inline__ void __DEFAULT_FN_ATTRS128 6971 _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 6972 { 6973 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 6974 } 6975 6976 static __inline__ __m128i __DEFAULT_FN_ATTRS256 6977 _mm256_cvtsepi32_epi8 (__m256i __A) 6978 { 6979 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 6980 (__v16qi)_mm_undefined_si128(), 6981 (__mmask8) -1); 6982 } 6983 6984 static __inline__ __m128i __DEFAULT_FN_ATTRS256 6985 _mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 6986 { 6987 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 6988 (__v16qi) __O, __M); 6989 } 6990 6991 static __inline__ __m128i __DEFAULT_FN_ATTRS256 6992 _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A) 6993 { 6994 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 6995 (__v16qi) _mm_setzero_si128 (), 6996 __M); 6997 } 6998 6999 static __inline__ void __DEFAULT_FN_ATTRS256 7000 _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7001 { 7002 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); 7003 } 7004 7005 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7006 _mm_cvtsepi32_epi16 (__m128i __A) 7007 { 7008 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 7009 (__v8hi)_mm_setzero_si128 (), 7010 (__mmask8) -1); 7011 } 7012 7013 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7014 _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7015 { 7016 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 7017 (__v8hi)__O, 7018 __M); 7019 } 7020 7021 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7022 _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A) 7023 { 7024 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 7025 (__v8hi) _mm_setzero_si128 (), 7026 __M); 7027 } 7028 7029 static __inline__ void __DEFAULT_FN_ATTRS128 7030 _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7031 { 7032 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 7033 } 7034 7035 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7036 _mm256_cvtsepi32_epi16 (__m256i __A) 7037 { 7038 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 7039 (__v8hi)_mm_undefined_si128(), 7040 (__mmask8) -1); 7041 } 7042 7043 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7044 _mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7045 { 7046 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 7047 (__v8hi) __O, __M); 7048 } 7049 7050 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7051 _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A) 7052 { 7053 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 7054 (__v8hi) _mm_setzero_si128 (), 7055 __M); 7056 } 7057 7058 static __inline__ void __DEFAULT_FN_ATTRS256 7059 _mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7060 { 7061 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 7062 } 7063 7064 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7065 _mm_cvtsepi64_epi8 (__m128i __A) 7066 { 7067 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 7068 (__v16qi)_mm_undefined_si128(), 7069 (__mmask8) -1); 7070 } 7071 7072 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7073 _mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7074 { 7075 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 7076 (__v16qi) __O, __M); 7077 } 7078 7079 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7080 _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A) 7081 { 7082 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 7083 (__v16qi) _mm_setzero_si128 (), 7084 __M); 7085 } 7086 7087 static __inline__ void __DEFAULT_FN_ATTRS128 7088 _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7089 { 7090 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 7091 } 7092 7093 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7094 _mm256_cvtsepi64_epi8 (__m256i __A) 7095 { 7096 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 7097 (__v16qi)_mm_undefined_si128(), 7098 (__mmask8) -1); 7099 } 7100 7101 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7102 _mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7103 { 7104 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 7105 (__v16qi) __O, __M); 7106 } 7107 7108 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7109 _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A) 7110 { 7111 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 7112 (__v16qi) _mm_setzero_si128 (), 7113 __M); 7114 } 7115 7116 static __inline__ void __DEFAULT_FN_ATTRS256 7117 _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7118 { 7119 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 7120 } 7121 7122 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7123 _mm_cvtsepi64_epi32 (__m128i __A) 7124 { 7125 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 7126 (__v4si)_mm_undefined_si128(), 7127 (__mmask8) -1); 7128 } 7129 7130 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7131 _mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 7132 { 7133 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 7134 (__v4si) __O, __M); 7135 } 7136 7137 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7138 _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A) 7139 { 7140 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 7141 (__v4si) _mm_setzero_si128 (), 7142 __M); 7143 } 7144 7145 static __inline__ void __DEFAULT_FN_ATTRS128 7146 _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 7147 { 7148 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 7149 } 7150 7151 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7152 _mm256_cvtsepi64_epi32 (__m256i __A) 7153 { 7154 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 7155 (__v4si)_mm_undefined_si128(), 7156 (__mmask8) -1); 7157 } 7158 7159 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7160 _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 7161 { 7162 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 7163 (__v4si)__O, 7164 __M); 7165 } 7166 7167 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7168 _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A) 7169 { 7170 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 7171 (__v4si) _mm_setzero_si128 (), 7172 __M); 7173 } 7174 7175 static __inline__ void __DEFAULT_FN_ATTRS256 7176 _mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 7177 { 7178 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 7179 } 7180 7181 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7182 _mm_cvtsepi64_epi16 (__m128i __A) 7183 { 7184 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 7185 (__v8hi)_mm_undefined_si128(), 7186 (__mmask8) -1); 7187 } 7188 7189 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7190 _mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7191 { 7192 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 7193 (__v8hi) __O, __M); 7194 } 7195 7196 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7197 _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A) 7198 { 7199 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 7200 (__v8hi) _mm_setzero_si128 (), 7201 __M); 7202 } 7203 7204 static __inline__ void __DEFAULT_FN_ATTRS128 7205 _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7206 { 7207 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 7208 } 7209 7210 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7211 _mm256_cvtsepi64_epi16 (__m256i __A) 7212 { 7213 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 7214 (__v8hi)_mm_undefined_si128(), 7215 (__mmask8) -1); 7216 } 7217 7218 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7219 _mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7220 { 7221 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 7222 (__v8hi) __O, __M); 7223 } 7224 7225 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7226 _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A) 7227 { 7228 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 7229 (__v8hi) _mm_setzero_si128 (), 7230 __M); 7231 } 7232 7233 static __inline__ void __DEFAULT_FN_ATTRS256 7234 _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7235 { 7236 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 7237 } 7238 7239 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7240 _mm_cvtusepi32_epi8 (__m128i __A) 7241 { 7242 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 7243 (__v16qi)_mm_undefined_si128(), 7244 (__mmask8) -1); 7245 } 7246 7247 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7248 _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7249 { 7250 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 7251 (__v16qi) __O, 7252 __M); 7253 } 7254 7255 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7256 _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A) 7257 { 7258 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 7259 (__v16qi) _mm_setzero_si128 (), 7260 __M); 7261 } 7262 7263 static __inline__ void __DEFAULT_FN_ATTRS128 7264 _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7265 { 7266 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 7267 } 7268 7269 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7270 _mm256_cvtusepi32_epi8 (__m256i __A) 7271 { 7272 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 7273 (__v16qi)_mm_undefined_si128(), 7274 (__mmask8) -1); 7275 } 7276 7277 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7278 _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7279 { 7280 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 7281 (__v16qi) __O, 7282 __M); 7283 } 7284 7285 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7286 _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A) 7287 { 7288 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 7289 (__v16qi) _mm_setzero_si128 (), 7290 __M); 7291 } 7292 7293 static __inline__ void __DEFAULT_FN_ATTRS256 7294 _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7295 { 7296 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M); 7297 } 7298 7299 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7300 _mm_cvtusepi32_epi16 (__m128i __A) 7301 { 7302 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 7303 (__v8hi)_mm_undefined_si128(), 7304 (__mmask8) -1); 7305 } 7306 7307 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7308 _mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7309 { 7310 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 7311 (__v8hi) __O, __M); 7312 } 7313 7314 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7315 _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A) 7316 { 7317 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 7318 (__v8hi) _mm_setzero_si128 (), 7319 __M); 7320 } 7321 7322 static __inline__ void __DEFAULT_FN_ATTRS128 7323 _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7324 { 7325 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 7326 } 7327 7328 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7329 _mm256_cvtusepi32_epi16 (__m256i __A) 7330 { 7331 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 7332 (__v8hi) _mm_undefined_si128(), 7333 (__mmask8) -1); 7334 } 7335 7336 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7337 _mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7338 { 7339 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 7340 (__v8hi) __O, __M); 7341 } 7342 7343 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7344 _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A) 7345 { 7346 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 7347 (__v8hi) _mm_setzero_si128 (), 7348 __M); 7349 } 7350 7351 static __inline__ void __DEFAULT_FN_ATTRS256 7352 _mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7353 { 7354 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 7355 } 7356 7357 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7358 _mm_cvtusepi64_epi8 (__m128i __A) 7359 { 7360 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 7361 (__v16qi)_mm_undefined_si128(), 7362 (__mmask8) -1); 7363 } 7364 7365 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7366 _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7367 { 7368 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 7369 (__v16qi) __O, 7370 __M); 7371 } 7372 7373 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7374 _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A) 7375 { 7376 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 7377 (__v16qi) _mm_setzero_si128 (), 7378 __M); 7379 } 7380 7381 static __inline__ void __DEFAULT_FN_ATTRS128 7382 _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7383 { 7384 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 7385 } 7386 7387 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7388 _mm256_cvtusepi64_epi8 (__m256i __A) 7389 { 7390 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 7391 (__v16qi)_mm_undefined_si128(), 7392 (__mmask8) -1); 7393 } 7394 7395 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7396 _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7397 { 7398 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 7399 (__v16qi) __O, 7400 __M); 7401 } 7402 7403 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7404 _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A) 7405 { 7406 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 7407 (__v16qi) _mm_setzero_si128 (), 7408 __M); 7409 } 7410 7411 static __inline__ void __DEFAULT_FN_ATTRS256 7412 _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7413 { 7414 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 7415 } 7416 7417 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7418 _mm_cvtusepi64_epi32 (__m128i __A) 7419 { 7420 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 7421 (__v4si)_mm_undefined_si128(), 7422 (__mmask8) -1); 7423 } 7424 7425 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7426 _mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 7427 { 7428 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 7429 (__v4si) __O, __M); 7430 } 7431 7432 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7433 _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A) 7434 { 7435 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 7436 (__v4si) _mm_setzero_si128 (), 7437 __M); 7438 } 7439 7440 static __inline__ void __DEFAULT_FN_ATTRS128 7441 _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 7442 { 7443 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 7444 } 7445 7446 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7447 _mm256_cvtusepi64_epi32 (__m256i __A) 7448 { 7449 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 7450 (__v4si)_mm_undefined_si128(), 7451 (__mmask8) -1); 7452 } 7453 7454 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7455 _mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 7456 { 7457 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 7458 (__v4si) __O, __M); 7459 } 7460 7461 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7462 _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A) 7463 { 7464 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 7465 (__v4si) _mm_setzero_si128 (), 7466 __M); 7467 } 7468 7469 static __inline__ void __DEFAULT_FN_ATTRS256 7470 _mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 7471 { 7472 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 7473 } 7474 7475 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7476 _mm_cvtusepi64_epi16 (__m128i __A) 7477 { 7478 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 7479 (__v8hi)_mm_undefined_si128(), 7480 (__mmask8) -1); 7481 } 7482 7483 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7484 _mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7485 { 7486 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 7487 (__v8hi) __O, __M); 7488 } 7489 7490 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7491 _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A) 7492 { 7493 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 7494 (__v8hi) _mm_setzero_si128 (), 7495 __M); 7496 } 7497 7498 static __inline__ void __DEFAULT_FN_ATTRS128 7499 _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7500 { 7501 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 7502 } 7503 7504 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7505 _mm256_cvtusepi64_epi16 (__m256i __A) 7506 { 7507 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 7508 (__v8hi)_mm_undefined_si128(), 7509 (__mmask8) -1); 7510 } 7511 7512 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7513 _mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7514 { 7515 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 7516 (__v8hi) __O, __M); 7517 } 7518 7519 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7520 _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A) 7521 { 7522 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 7523 (__v8hi) _mm_setzero_si128 (), 7524 __M); 7525 } 7526 7527 static __inline__ void __DEFAULT_FN_ATTRS256 7528 _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7529 { 7530 __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 7531 } 7532 7533 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7534 _mm_cvtepi32_epi8 (__m128i __A) 7535 { 7536 return (__m128i)__builtin_shufflevector( 7537 __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1, 7538 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7); 7539 } 7540 7541 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7542 _mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7543 { 7544 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, 7545 (__v16qi) __O, __M); 7546 } 7547 7548 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7549 _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A) 7550 { 7551 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, 7552 (__v16qi) 7553 _mm_setzero_si128 (), 7554 __M); 7555 } 7556 7557 static __inline__ void __DEFAULT_FN_ATTRS128 7558 _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7559 { 7560 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 7561 } 7562 7563 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7564 _mm256_cvtepi32_epi8 (__m256i __A) 7565 { 7566 return (__m128i)__builtin_shufflevector( 7567 __builtin_convertvector((__v8si)__A, __v8qi), 7568 (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 7569 12, 13, 14, 15); 7570 } 7571 7572 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7573 _mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7574 { 7575 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, 7576 (__v16qi) __O, __M); 7577 } 7578 7579 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7580 _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A) 7581 { 7582 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, 7583 (__v16qi) _mm_setzero_si128 (), 7584 __M); 7585 } 7586 7587 static __inline__ void __DEFAULT_FN_ATTRS256 7588 _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7589 { 7590 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); 7591 } 7592 7593 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7594 _mm_cvtepi32_epi16 (__m128i __A) 7595 { 7596 return (__m128i)__builtin_shufflevector( 7597 __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1, 7598 2, 3, 4, 5, 6, 7); 7599 } 7600 7601 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7602 _mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7603 { 7604 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, 7605 (__v8hi) __O, __M); 7606 } 7607 7608 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7609 _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A) 7610 { 7611 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, 7612 (__v8hi) _mm_setzero_si128 (), 7613 __M); 7614 } 7615 7616 static __inline__ void __DEFAULT_FN_ATTRS128 7617 _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7618 { 7619 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 7620 } 7621 7622 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7623 _mm256_cvtepi32_epi16 (__m256i __A) 7624 { 7625 return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi); 7626 } 7627 7628 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7629 _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7630 { 7631 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, 7632 (__v8hi) __O, __M); 7633 } 7634 7635 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7636 _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A) 7637 { 7638 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, 7639 (__v8hi) _mm_setzero_si128 (), 7640 __M); 7641 } 7642 7643 static __inline__ void __DEFAULT_FN_ATTRS256 7644 _mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7645 { 7646 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 7647 } 7648 7649 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7650 _mm_cvtepi64_epi8 (__m128i __A) 7651 { 7652 return (__m128i)__builtin_shufflevector( 7653 __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3, 7654 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3); 7655 } 7656 7657 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7658 _mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7659 { 7660 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, 7661 (__v16qi) __O, __M); 7662 } 7663 7664 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7665 _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A) 7666 { 7667 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, 7668 (__v16qi) _mm_setzero_si128 (), 7669 __M); 7670 } 7671 7672 static __inline__ void __DEFAULT_FN_ATTRS128 7673 _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7674 { 7675 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 7676 } 7677 7678 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7679 _mm256_cvtepi64_epi8 (__m256i __A) 7680 { 7681 return (__m128i)__builtin_shufflevector( 7682 __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1, 7683 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7); 7684 } 7685 7686 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7687 _mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7688 { 7689 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, 7690 (__v16qi) __O, __M); 7691 } 7692 7693 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7694 _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A) 7695 { 7696 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, 7697 (__v16qi) _mm_setzero_si128 (), 7698 __M); 7699 } 7700 7701 static __inline__ void __DEFAULT_FN_ATTRS256 7702 _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7703 { 7704 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 7705 } 7706 7707 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7708 _mm_cvtepi64_epi32 (__m128i __A) 7709 { 7710 return (__m128i)__builtin_shufflevector( 7711 __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3); 7712 } 7713 7714 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7715 _mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 7716 { 7717 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, 7718 (__v4si) __O, __M); 7719 } 7720 7721 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7722 _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A) 7723 { 7724 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, 7725 (__v4si) _mm_setzero_si128 (), 7726 __M); 7727 } 7728 7729 static __inline__ void __DEFAULT_FN_ATTRS128 7730 _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 7731 { 7732 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 7733 } 7734 7735 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7736 _mm256_cvtepi64_epi32 (__m256i __A) 7737 { 7738 return (__m128i)__builtin_convertvector((__v4di)__A, __v4si); 7739 } 7740 7741 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7742 _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 7743 { 7744 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 7745 (__v4si)_mm256_cvtepi64_epi32(__A), 7746 (__v4si)__O); 7747 } 7748 7749 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7750 _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A) 7751 { 7752 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 7753 (__v4si)_mm256_cvtepi64_epi32(__A), 7754 (__v4si)_mm_setzero_si128()); 7755 } 7756 7757 static __inline__ void __DEFAULT_FN_ATTRS256 7758 _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 7759 { 7760 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 7761 } 7762 7763 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7764 _mm_cvtepi64_epi16 (__m128i __A) 7765 { 7766 return (__m128i)__builtin_shufflevector( 7767 __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3, 7768 3, 3, 3, 3); 7769 } 7770 7771 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7772 _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7773 { 7774 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, 7775 (__v8hi)__O, 7776 __M); 7777 } 7778 7779 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7780 _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A) 7781 { 7782 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, 7783 (__v8hi) _mm_setzero_si128 (), 7784 __M); 7785 } 7786 7787 static __inline__ void __DEFAULT_FN_ATTRS128 7788 _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7789 { 7790 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 7791 } 7792 7793 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7794 _mm256_cvtepi64_epi16 (__m256i __A) 7795 { 7796 return (__m128i)__builtin_shufflevector( 7797 __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1, 7798 2, 3, 4, 5, 6, 7); 7799 } 7800 7801 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7802 _mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7803 { 7804 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, 7805 (__v8hi) __O, __M); 7806 } 7807 7808 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7809 _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A) 7810 { 7811 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, 7812 (__v8hi) _mm_setzero_si128 (), 7813 __M); 7814 } 7815 7816 static __inline__ void __DEFAULT_FN_ATTRS256 7817 _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7818 { 7819 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 7820 } 7821 7822 #define _mm256_extractf32x4_ps(A, imm) \ 7823 ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ 7824 (int)(imm), \ 7825 (__v4sf)_mm_undefined_ps(), \ 7826 (__mmask8)-1)) 7827 7828 #define _mm256_mask_extractf32x4_ps(W, U, A, imm) \ 7829 ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ 7830 (int)(imm), \ 7831 (__v4sf)(__m128)(W), \ 7832 (__mmask8)(U))) 7833 7834 #define _mm256_maskz_extractf32x4_ps(U, A, imm) \ 7835 ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ 7836 (int)(imm), \ 7837 (__v4sf)_mm_setzero_ps(), \ 7838 (__mmask8)(U))) 7839 7840 #define _mm256_extracti32x4_epi32(A, imm) \ 7841 ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ 7842 (int)(imm), \ 7843 (__v4si)_mm_undefined_si128(), \ 7844 (__mmask8)-1)) 7845 7846 #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \ 7847 ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ 7848 (int)(imm), \ 7849 (__v4si)(__m128i)(W), \ 7850 (__mmask8)(U))) 7851 7852 #define _mm256_maskz_extracti32x4_epi32(U, A, imm) \ 7853 ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ 7854 (int)(imm), \ 7855 (__v4si)_mm_setzero_si128(), \ 7856 (__mmask8)(U))) 7857 7858 #define _mm256_insertf32x4(A, B, imm) \ 7859 ((__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \ 7860 (__v4sf)(__m128)(B), (int)(imm))) 7861 7862 #define _mm256_mask_insertf32x4(W, U, A, B, imm) \ 7863 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 7864 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \ 7865 (__v8sf)(__m256)(W))) 7866 7867 #define _mm256_maskz_insertf32x4(U, A, B, imm) \ 7868 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 7869 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \ 7870 (__v8sf)_mm256_setzero_ps())) 7871 7872 #define _mm256_inserti32x4(A, B, imm) \ 7873 ((__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \ 7874 (__v4si)(__m128i)(B), (int)(imm))) 7875 7876 #define _mm256_mask_inserti32x4(W, U, A, B, imm) \ 7877 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 7878 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \ 7879 (__v8si)(__m256i)(W))) 7880 7881 #define _mm256_maskz_inserti32x4(U, A, B, imm) \ 7882 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 7883 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \ 7884 (__v8si)_mm256_setzero_si256())) 7885 7886 #define _mm_getmant_pd(A, B, C) \ 7887 ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 7888 (int)(((C)<<2) | (B)), \ 7889 (__v2df)_mm_setzero_pd(), \ 7890 (__mmask8)-1)) 7891 7892 #define _mm_mask_getmant_pd(W, U, A, B, C) \ 7893 ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 7894 (int)(((C)<<2) | (B)), \ 7895 (__v2df)(__m128d)(W), \ 7896 (__mmask8)(U))) 7897 7898 #define _mm_maskz_getmant_pd(U, A, B, C) \ 7899 ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 7900 (int)(((C)<<2) | (B)), \ 7901 (__v2df)_mm_setzero_pd(), \ 7902 (__mmask8)(U))) 7903 7904 #define _mm256_getmant_pd(A, B, C) \ 7905 ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 7906 (int)(((C)<<2) | (B)), \ 7907 (__v4df)_mm256_setzero_pd(), \ 7908 (__mmask8)-1)) 7909 7910 #define _mm256_mask_getmant_pd(W, U, A, B, C) \ 7911 ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 7912 (int)(((C)<<2) | (B)), \ 7913 (__v4df)(__m256d)(W), \ 7914 (__mmask8)(U))) 7915 7916 #define _mm256_maskz_getmant_pd(U, A, B, C) \ 7917 ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 7918 (int)(((C)<<2) | (B)), \ 7919 (__v4df)_mm256_setzero_pd(), \ 7920 (__mmask8)(U))) 7921 7922 #define _mm_getmant_ps(A, B, C) \ 7923 ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 7924 (int)(((C)<<2) | (B)), \ 7925 (__v4sf)_mm_setzero_ps(), \ 7926 (__mmask8)-1)) 7927 7928 #define _mm_mask_getmant_ps(W, U, A, B, C) \ 7929 ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 7930 (int)(((C)<<2) | (B)), \ 7931 (__v4sf)(__m128)(W), \ 7932 (__mmask8)(U))) 7933 7934 #define _mm_maskz_getmant_ps(U, A, B, C) \ 7935 ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 7936 (int)(((C)<<2) | (B)), \ 7937 (__v4sf)_mm_setzero_ps(), \ 7938 (__mmask8)(U))) 7939 7940 #define _mm256_getmant_ps(A, B, C) \ 7941 ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 7942 (int)(((C)<<2) | (B)), \ 7943 (__v8sf)_mm256_setzero_ps(), \ 7944 (__mmask8)-1)) 7945 7946 #define _mm256_mask_getmant_ps(W, U, A, B, C) \ 7947 ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 7948 (int)(((C)<<2) | (B)), \ 7949 (__v8sf)(__m256)(W), \ 7950 (__mmask8)(U))) 7951 7952 #define _mm256_maskz_getmant_ps(U, A, B, C) \ 7953 ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 7954 (int)(((C)<<2) | (B)), \ 7955 (__v8sf)_mm256_setzero_ps(), \ 7956 (__mmask8)(U))) 7957 7958 #define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \ 7959 ((__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \ 7960 (void const *)(addr), \ 7961 (__v2di)(__m128i)(index), \ 7962 (__mmask8)(mask), (int)(scale))) 7963 7964 #define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \ 7965 ((__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \ 7966 (void const *)(addr), \ 7967 (__v2di)(__m128i)(index), \ 7968 (__mmask8)(mask), (int)(scale))) 7969 7970 #define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \ 7971 ((__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \ 7972 (void const *)(addr), \ 7973 (__v4di)(__m256i)(index), \ 7974 (__mmask8)(mask), (int)(scale))) 7975 7976 #define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \ 7977 ((__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \ 7978 (void const *)(addr), \ 7979 (__v4di)(__m256i)(index), \ 7980 (__mmask8)(mask), (int)(scale))) 7981 7982 #define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \ 7983 ((__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \ 7984 (void const *)(addr), \ 7985 (__v2di)(__m128i)(index), \ 7986 (__mmask8)(mask), (int)(scale))) 7987 7988 #define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \ 7989 ((__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \ 7990 (void const *)(addr), \ 7991 (__v2di)(__m128i)(index), \ 7992 (__mmask8)(mask), (int)(scale))) 7993 7994 #define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \ 7995 ((__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \ 7996 (void const *)(addr), \ 7997 (__v4di)(__m256i)(index), \ 7998 (__mmask8)(mask), (int)(scale))) 7999 8000 #define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \ 8001 ((__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \ 8002 (void const *)(addr), \ 8003 (__v4di)(__m256i)(index), \ 8004 (__mmask8)(mask), (int)(scale))) 8005 8006 #define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \ 8007 ((__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \ 8008 (void const *)(addr), \ 8009 (__v4si)(__m128i)(index), \ 8010 (__mmask8)(mask), (int)(scale))) 8011 8012 #define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \ 8013 ((__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \ 8014 (void const *)(addr), \ 8015 (__v4si)(__m128i)(index), \ 8016 (__mmask8)(mask), (int)(scale))) 8017 8018 #define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \ 8019 ((__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \ 8020 (void const *)(addr), \ 8021 (__v4si)(__m128i)(index), \ 8022 (__mmask8)(mask), (int)(scale))) 8023 8024 #define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \ 8025 ((__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \ 8026 (void const *)(addr), \ 8027 (__v4si)(__m128i)(index), \ 8028 (__mmask8)(mask), (int)(scale))) 8029 8030 #define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \ 8031 ((__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \ 8032 (void const *)(addr), \ 8033 (__v4si)(__m128i)(index), \ 8034 (__mmask8)(mask), (int)(scale))) 8035 8036 #define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \ 8037 ((__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \ 8038 (void const *)(addr), \ 8039 (__v4si)(__m128i)(index), \ 8040 (__mmask8)(mask), (int)(scale))) 8041 8042 #define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \ 8043 ((__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \ 8044 (void const *)(addr), \ 8045 (__v8si)(__m256i)(index), \ 8046 (__mmask8)(mask), (int)(scale))) 8047 8048 #define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \ 8049 ((__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \ 8050 (void const *)(addr), \ 8051 (__v8si)(__m256i)(index), \ 8052 (__mmask8)(mask), (int)(scale))) 8053 8054 #define _mm256_permutex_pd(X, C) \ 8055 ((__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(X), (int)(C))) 8056 8057 #define _mm256_mask_permutex_pd(W, U, X, C) \ 8058 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 8059 (__v4df)_mm256_permutex_pd((X), (C)), \ 8060 (__v4df)(__m256d)(W))) 8061 8062 #define _mm256_maskz_permutex_pd(U, X, C) \ 8063 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 8064 (__v4df)_mm256_permutex_pd((X), (C)), \ 8065 (__v4df)_mm256_setzero_pd())) 8066 8067 #define _mm256_permutex_epi64(X, C) \ 8068 ((__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C))) 8069 8070 #define _mm256_mask_permutex_epi64(W, U, X, C) \ 8071 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8072 (__v4di)_mm256_permutex_epi64((X), (C)), \ 8073 (__v4di)(__m256i)(W))) 8074 8075 #define _mm256_maskz_permutex_epi64(U, X, C) \ 8076 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8077 (__v4di)_mm256_permutex_epi64((X), (C)), \ 8078 (__v4di)_mm256_setzero_si256())) 8079 8080 static __inline__ __m256d __DEFAULT_FN_ATTRS256 8081 _mm256_permutexvar_pd (__m256i __X, __m256d __Y) 8082 { 8083 return (__m256d)__builtin_ia32_permvardf256((__v4df)__Y, (__v4di)__X); 8084 } 8085 8086 static __inline__ __m256d __DEFAULT_FN_ATTRS256 8087 _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X, 8088 __m256d __Y) 8089 { 8090 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 8091 (__v4df)_mm256_permutexvar_pd(__X, __Y), 8092 (__v4df)__W); 8093 } 8094 8095 static __inline__ __m256d __DEFAULT_FN_ATTRS256 8096 _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y) 8097 { 8098 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 8099 (__v4df)_mm256_permutexvar_pd(__X, __Y), 8100 (__v4df)_mm256_setzero_pd()); 8101 } 8102 8103 static __inline__ __m256i __DEFAULT_FN_ATTRS256 8104 _mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y) 8105 { 8106 return (__m256i)__builtin_ia32_permvardi256((__v4di) __Y, (__v4di) __X); 8107 } 8108 8109 static __inline__ __m256i __DEFAULT_FN_ATTRS256 8110 _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y) 8111 { 8112 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 8113 (__v4di)_mm256_permutexvar_epi64(__X, __Y), 8114 (__v4di)_mm256_setzero_si256()); 8115 } 8116 8117 static __inline__ __m256i __DEFAULT_FN_ATTRS256 8118 _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X, 8119 __m256i __Y) 8120 { 8121 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 8122 (__v4di)_mm256_permutexvar_epi64(__X, __Y), 8123 (__v4di)__W); 8124 } 8125 8126 #define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A)) 8127 8128 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8129 _mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y) 8130 { 8131 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8132 (__v8sf)_mm256_permutexvar_ps(__X, __Y), 8133 (__v8sf)__W); 8134 } 8135 8136 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8137 _mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y) 8138 { 8139 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8140 (__v8sf)_mm256_permutexvar_ps(__X, __Y), 8141 (__v8sf)_mm256_setzero_ps()); 8142 } 8143 8144 #define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A)) 8145 8146 static __inline__ __m256i __DEFAULT_FN_ATTRS256 8147 _mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X, 8148 __m256i __Y) 8149 { 8150 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 8151 (__v8si)_mm256_permutexvar_epi32(__X, __Y), 8152 (__v8si)__W); 8153 } 8154 8155 static __inline__ __m256i __DEFAULT_FN_ATTRS256 8156 _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) 8157 { 8158 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 8159 (__v8si)_mm256_permutexvar_epi32(__X, __Y), 8160 (__v8si)_mm256_setzero_si256()); 8161 } 8162 8163 #define _mm_alignr_epi32(A, B, imm) \ 8164 ((__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \ 8165 (__v4si)(__m128i)(B), (int)(imm))) 8166 8167 #define _mm_mask_alignr_epi32(W, U, A, B, imm) \ 8168 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8169 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \ 8170 (__v4si)(__m128i)(W))) 8171 8172 #define _mm_maskz_alignr_epi32(U, A, B, imm) \ 8173 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8174 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \ 8175 (__v4si)_mm_setzero_si128())) 8176 8177 #define _mm256_alignr_epi32(A, B, imm) \ 8178 ((__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \ 8179 (__v8si)(__m256i)(B), (int)(imm))) 8180 8181 #define _mm256_mask_alignr_epi32(W, U, A, B, imm) \ 8182 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8183 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \ 8184 (__v8si)(__m256i)(W))) 8185 8186 #define _mm256_maskz_alignr_epi32(U, A, B, imm) \ 8187 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8188 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \ 8189 (__v8si)_mm256_setzero_si256())) 8190 8191 #define _mm_alignr_epi64(A, B, imm) \ 8192 ((__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \ 8193 (__v2di)(__m128i)(B), (int)(imm))) 8194 8195 #define _mm_mask_alignr_epi64(W, U, A, B, imm) \ 8196 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 8197 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \ 8198 (__v2di)(__m128i)(W))) 8199 8200 #define _mm_maskz_alignr_epi64(U, A, B, imm) \ 8201 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 8202 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \ 8203 (__v2di)_mm_setzero_si128())) 8204 8205 #define _mm256_alignr_epi64(A, B, imm) \ 8206 ((__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \ 8207 (__v4di)(__m256i)(B), (int)(imm))) 8208 8209 #define _mm256_mask_alignr_epi64(W, U, A, B, imm) \ 8210 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8211 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ 8212 (__v4di)(__m256i)(W))) 8213 8214 #define _mm256_maskz_alignr_epi64(U, A, B, imm) \ 8215 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8216 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ 8217 (__v4di)_mm256_setzero_si256())) 8218 8219 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8220 _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A) 8221 { 8222 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8223 (__v4sf)_mm_movehdup_ps(__A), 8224 (__v4sf)__W); 8225 } 8226 8227 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8228 _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A) 8229 { 8230 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8231 (__v4sf)_mm_movehdup_ps(__A), 8232 (__v4sf)_mm_setzero_ps()); 8233 } 8234 8235 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8236 _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A) 8237 { 8238 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8239 (__v8sf)_mm256_movehdup_ps(__A), 8240 (__v8sf)__W); 8241 } 8242 8243 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8244 _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A) 8245 { 8246 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8247 (__v8sf)_mm256_movehdup_ps(__A), 8248 (__v8sf)_mm256_setzero_ps()); 8249 } 8250 8251 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8252 _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A) 8253 { 8254 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8255 (__v4sf)_mm_moveldup_ps(__A), 8256 (__v4sf)__W); 8257 } 8258 8259 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8260 _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A) 8261 { 8262 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8263 (__v4sf)_mm_moveldup_ps(__A), 8264 (__v4sf)_mm_setzero_ps()); 8265 } 8266 8267 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8268 _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A) 8269 { 8270 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8271 (__v8sf)_mm256_moveldup_ps(__A), 8272 (__v8sf)__W); 8273 } 8274 8275 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8276 _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A) 8277 { 8278 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8279 (__v8sf)_mm256_moveldup_ps(__A), 8280 (__v8sf)_mm256_setzero_ps()); 8281 } 8282 8283 #define _mm256_mask_shuffle_epi32(W, U, A, I) \ 8284 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8285 (__v8si)_mm256_shuffle_epi32((A), (I)), \ 8286 (__v8si)(__m256i)(W))) 8287 8288 #define _mm256_maskz_shuffle_epi32(U, A, I) \ 8289 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8290 (__v8si)_mm256_shuffle_epi32((A), (I)), \ 8291 (__v8si)_mm256_setzero_si256())) 8292 8293 #define _mm_mask_shuffle_epi32(W, U, A, I) \ 8294 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8295 (__v4si)_mm_shuffle_epi32((A), (I)), \ 8296 (__v4si)(__m128i)(W))) 8297 8298 #define _mm_maskz_shuffle_epi32(U, A, I) \ 8299 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8300 (__v4si)_mm_shuffle_epi32((A), (I)), \ 8301 (__v4si)_mm_setzero_si128())) 8302 8303 static __inline__ __m128d __DEFAULT_FN_ATTRS128 8304 _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A) 8305 { 8306 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, 8307 (__v2df) __A, 8308 (__v2df) __W); 8309 } 8310 8311 static __inline__ __m128d __DEFAULT_FN_ATTRS128 8312 _mm_maskz_mov_pd (__mmask8 __U, __m128d __A) 8313 { 8314 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, 8315 (__v2df) __A, 8316 (__v2df) _mm_setzero_pd ()); 8317 } 8318 8319 static __inline__ __m256d __DEFAULT_FN_ATTRS256 8320 _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A) 8321 { 8322 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, 8323 (__v4df) __A, 8324 (__v4df) __W); 8325 } 8326 8327 static __inline__ __m256d __DEFAULT_FN_ATTRS256 8328 _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A) 8329 { 8330 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, 8331 (__v4df) __A, 8332 (__v4df) _mm256_setzero_pd ()); 8333 } 8334 8335 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8336 _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A) 8337 { 8338 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, 8339 (__v4sf) __A, 8340 (__v4sf) __W); 8341 } 8342 8343 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8344 _mm_maskz_mov_ps (__mmask8 __U, __m128 __A) 8345 { 8346 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, 8347 (__v4sf) __A, 8348 (__v4sf) _mm_setzero_ps ()); 8349 } 8350 8351 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8352 _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A) 8353 { 8354 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, 8355 (__v8sf) __A, 8356 (__v8sf) __W); 8357 } 8358 8359 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8360 _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A) 8361 { 8362 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, 8363 (__v8sf) __A, 8364 (__v8sf) _mm256_setzero_ps ()); 8365 } 8366 8367 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8368 _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A) 8369 { 8370 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A, 8371 (__v4sf) __W, 8372 (__mmask8) __U); 8373 } 8374 8375 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8376 _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A) 8377 { 8378 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A, 8379 (__v4sf) 8380 _mm_setzero_ps (), 8381 (__mmask8) __U); 8382 } 8383 8384 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8385 _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A) 8386 { 8387 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A, 8388 (__v8sf) __W, 8389 (__mmask8) __U); 8390 } 8391 8392 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8393 _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A) 8394 { 8395 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A, 8396 (__v8sf) 8397 _mm256_setzero_ps (), 8398 (__mmask8) __U); 8399 } 8400 8401 #define _mm_mask_cvt_roundps_ph(W, U, A, I) \ 8402 ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ 8403 (__v8hi)(__m128i)(W), \ 8404 (__mmask8)(U))) 8405 8406 #define _mm_maskz_cvt_roundps_ph(U, A, I) \ 8407 ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ 8408 (__v8hi)_mm_setzero_si128(), \ 8409 (__mmask8)(U))) 8410 8411 #define _mm_mask_cvtps_ph _mm_mask_cvt_roundps_ph 8412 #define _mm_maskz_cvtps_ph _mm_maskz_cvt_roundps_ph 8413 8414 #define _mm256_mask_cvt_roundps_ph(W, U, A, I) \ 8415 ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ 8416 (__v8hi)(__m128i)(W), \ 8417 (__mmask8)(U))) 8418 8419 #define _mm256_maskz_cvt_roundps_ph(U, A, I) \ 8420 ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ 8421 (__v8hi)_mm_setzero_si128(), \ 8422 (__mmask8)(U))) 8423 8424 #define _mm256_mask_cvtps_ph _mm256_mask_cvt_roundps_ph 8425 #define _mm256_maskz_cvtps_ph _mm256_maskz_cvt_roundps_ph 8426 8427 8428 #undef __DEFAULT_FN_ATTRS128 8429 #undef __DEFAULT_FN_ATTRS256 8430 8431 #endif /* __AVX512VLINTRIN_H */ 8432