1 /*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10 #ifndef __IMMINTRIN_H 11 #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead." 12 #endif 13 14 #ifndef __AVX512VLINTRIN_H 15 #define __AVX512VLINTRIN_H 16 17 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(128))) 18 #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(256))) 19 20 typedef short __v2hi __attribute__((__vector_size__(4))); 21 typedef char __v4qi __attribute__((__vector_size__(4))); 22 typedef char __v2qi __attribute__((__vector_size__(2))); 23 24 /* Integer compare */ 25 26 #define _mm_cmpeq_epi32_mask(A, B) \ 27 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ) 28 #define _mm_mask_cmpeq_epi32_mask(k, A, B) \ 29 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ) 30 #define _mm_cmpge_epi32_mask(A, B) \ 31 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GE) 32 #define _mm_mask_cmpge_epi32_mask(k, A, B) \ 33 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE) 34 #define _mm_cmpgt_epi32_mask(A, B) \ 35 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GT) 36 #define _mm_mask_cmpgt_epi32_mask(k, A, B) \ 37 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT) 38 #define _mm_cmple_epi32_mask(A, B) \ 39 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LE) 40 #define _mm_mask_cmple_epi32_mask(k, A, B) \ 41 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE) 42 #define _mm_cmplt_epi32_mask(A, B) \ 43 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LT) 44 #define _mm_mask_cmplt_epi32_mask(k, A, B) \ 45 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT) 46 #define _mm_cmpneq_epi32_mask(A, B) \ 47 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_NE) 48 #define _mm_mask_cmpneq_epi32_mask(k, A, B) \ 49 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE) 50 51 #define _mm256_cmpeq_epi32_mask(A, B) \ 52 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ) 53 #define _mm256_mask_cmpeq_epi32_mask(k, A, B) \ 54 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ) 55 #define _mm256_cmpge_epi32_mask(A, B) \ 56 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GE) 57 #define _mm256_mask_cmpge_epi32_mask(k, A, B) \ 58 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE) 59 #define _mm256_cmpgt_epi32_mask(A, B) \ 60 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GT) 61 #define _mm256_mask_cmpgt_epi32_mask(k, A, B) \ 62 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT) 63 #define _mm256_cmple_epi32_mask(A, B) \ 64 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LE) 65 #define _mm256_mask_cmple_epi32_mask(k, A, B) \ 66 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE) 67 #define _mm256_cmplt_epi32_mask(A, B) \ 68 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LT) 69 #define _mm256_mask_cmplt_epi32_mask(k, A, B) \ 70 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT) 71 #define _mm256_cmpneq_epi32_mask(A, B) \ 72 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_NE) 73 #define _mm256_mask_cmpneq_epi32_mask(k, A, B) \ 74 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE) 75 76 #define _mm_cmpeq_epu32_mask(A, B) \ 77 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ) 78 #define _mm_mask_cmpeq_epu32_mask(k, A, B) \ 79 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ) 80 #define _mm_cmpge_epu32_mask(A, B) \ 81 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GE) 82 #define _mm_mask_cmpge_epu32_mask(k, A, B) \ 83 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE) 84 #define _mm_cmpgt_epu32_mask(A, B) \ 85 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GT) 86 #define _mm_mask_cmpgt_epu32_mask(k, A, B) \ 87 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT) 88 #define _mm_cmple_epu32_mask(A, B) \ 89 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LE) 90 #define _mm_mask_cmple_epu32_mask(k, A, B) \ 91 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE) 92 #define _mm_cmplt_epu32_mask(A, B) \ 93 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LT) 94 #define _mm_mask_cmplt_epu32_mask(k, A, B) \ 95 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT) 96 #define _mm_cmpneq_epu32_mask(A, B) \ 97 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_NE) 98 #define _mm_mask_cmpneq_epu32_mask(k, A, B) \ 99 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE) 100 101 #define _mm256_cmpeq_epu32_mask(A, B) \ 102 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ) 103 #define _mm256_mask_cmpeq_epu32_mask(k, A, B) \ 104 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ) 105 #define _mm256_cmpge_epu32_mask(A, B) \ 106 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GE) 107 #define _mm256_mask_cmpge_epu32_mask(k, A, B) \ 108 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE) 109 #define _mm256_cmpgt_epu32_mask(A, B) \ 110 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GT) 111 #define _mm256_mask_cmpgt_epu32_mask(k, A, B) \ 112 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT) 113 #define _mm256_cmple_epu32_mask(A, B) \ 114 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LE) 115 #define _mm256_mask_cmple_epu32_mask(k, A, B) \ 116 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE) 117 #define _mm256_cmplt_epu32_mask(A, B) \ 118 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LT) 119 #define _mm256_mask_cmplt_epu32_mask(k, A, B) \ 120 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT) 121 #define _mm256_cmpneq_epu32_mask(A, B) \ 122 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_NE) 123 #define _mm256_mask_cmpneq_epu32_mask(k, A, B) \ 124 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE) 125 126 #define _mm_cmpeq_epi64_mask(A, B) \ 127 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ) 128 #define _mm_mask_cmpeq_epi64_mask(k, A, B) \ 129 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ) 130 #define _mm_cmpge_epi64_mask(A, B) \ 131 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GE) 132 #define _mm_mask_cmpge_epi64_mask(k, A, B) \ 133 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE) 134 #define _mm_cmpgt_epi64_mask(A, B) \ 135 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GT) 136 #define _mm_mask_cmpgt_epi64_mask(k, A, B) \ 137 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT) 138 #define _mm_cmple_epi64_mask(A, B) \ 139 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LE) 140 #define _mm_mask_cmple_epi64_mask(k, A, B) \ 141 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE) 142 #define _mm_cmplt_epi64_mask(A, B) \ 143 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LT) 144 #define _mm_mask_cmplt_epi64_mask(k, A, B) \ 145 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT) 146 #define _mm_cmpneq_epi64_mask(A, B) \ 147 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_NE) 148 #define _mm_mask_cmpneq_epi64_mask(k, A, B) \ 149 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE) 150 151 #define _mm256_cmpeq_epi64_mask(A, B) \ 152 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ) 153 #define _mm256_mask_cmpeq_epi64_mask(k, A, B) \ 154 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ) 155 #define _mm256_cmpge_epi64_mask(A, B) \ 156 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GE) 157 #define _mm256_mask_cmpge_epi64_mask(k, A, B) \ 158 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE) 159 #define _mm256_cmpgt_epi64_mask(A, B) \ 160 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GT) 161 #define _mm256_mask_cmpgt_epi64_mask(k, A, B) \ 162 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT) 163 #define _mm256_cmple_epi64_mask(A, B) \ 164 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LE) 165 #define _mm256_mask_cmple_epi64_mask(k, A, B) \ 166 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE) 167 #define _mm256_cmplt_epi64_mask(A, B) \ 168 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LT) 169 #define _mm256_mask_cmplt_epi64_mask(k, A, B) \ 170 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT) 171 #define _mm256_cmpneq_epi64_mask(A, B) \ 172 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_NE) 173 #define _mm256_mask_cmpneq_epi64_mask(k, A, B) \ 174 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE) 175 176 #define _mm_cmpeq_epu64_mask(A, B) \ 177 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ) 178 #define _mm_mask_cmpeq_epu64_mask(k, A, B) \ 179 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ) 180 #define _mm_cmpge_epu64_mask(A, B) \ 181 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GE) 182 #define _mm_mask_cmpge_epu64_mask(k, A, B) \ 183 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE) 184 #define _mm_cmpgt_epu64_mask(A, B) \ 185 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GT) 186 #define _mm_mask_cmpgt_epu64_mask(k, A, B) \ 187 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT) 188 #define _mm_cmple_epu64_mask(A, B) \ 189 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LE) 190 #define _mm_mask_cmple_epu64_mask(k, A, B) \ 191 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE) 192 #define _mm_cmplt_epu64_mask(A, B) \ 193 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LT) 194 #define _mm_mask_cmplt_epu64_mask(k, A, B) \ 195 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT) 196 #define _mm_cmpneq_epu64_mask(A, B) \ 197 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_NE) 198 #define _mm_mask_cmpneq_epu64_mask(k, A, B) \ 199 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) 200 201 #define _mm256_cmpeq_epu64_mask(A, B) \ 202 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ) 203 #define _mm256_mask_cmpeq_epu64_mask(k, A, B) \ 204 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ) 205 #define _mm256_cmpge_epu64_mask(A, B) \ 206 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GE) 207 #define _mm256_mask_cmpge_epu64_mask(k, A, B) \ 208 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE) 209 #define _mm256_cmpgt_epu64_mask(A, B) \ 210 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GT) 211 #define _mm256_mask_cmpgt_epu64_mask(k, A, B) \ 212 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT) 213 #define _mm256_cmple_epu64_mask(A, B) \ 214 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LE) 215 #define _mm256_mask_cmple_epu64_mask(k, A, B) \ 216 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE) 217 #define _mm256_cmplt_epu64_mask(A, B) \ 218 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LT) 219 #define _mm256_mask_cmplt_epu64_mask(k, A, B) \ 220 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT) 221 #define _mm256_cmpneq_epu64_mask(A, B) \ 222 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_NE) 223 #define _mm256_mask_cmpneq_epu64_mask(k, A, B) \ 224 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) 225 226 static __inline__ __m256i __DEFAULT_FN_ATTRS256 227 _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 228 { 229 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 230 (__v8si)_mm256_add_epi32(__A, __B), 231 (__v8si)__W); 232 } 233 234 static __inline__ __m256i __DEFAULT_FN_ATTRS256 235 _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B) 236 { 237 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 238 (__v8si)_mm256_add_epi32(__A, __B), 239 (__v8si)_mm256_setzero_si256()); 240 } 241 242 static __inline__ __m256i __DEFAULT_FN_ATTRS256 243 _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 244 { 245 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 246 (__v4di)_mm256_add_epi64(__A, __B), 247 (__v4di)__W); 248 } 249 250 static __inline__ __m256i __DEFAULT_FN_ATTRS256 251 _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B) 252 { 253 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 254 (__v4di)_mm256_add_epi64(__A, __B), 255 (__v4di)_mm256_setzero_si256()); 256 } 257 258 static __inline__ __m256i __DEFAULT_FN_ATTRS256 259 _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 260 { 261 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 262 (__v8si)_mm256_sub_epi32(__A, __B), 263 (__v8si)__W); 264 } 265 266 static __inline__ __m256i __DEFAULT_FN_ATTRS256 267 _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B) 268 { 269 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 270 (__v8si)_mm256_sub_epi32(__A, __B), 271 (__v8si)_mm256_setzero_si256()); 272 } 273 274 static __inline__ __m256i __DEFAULT_FN_ATTRS256 275 _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 276 { 277 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 278 (__v4di)_mm256_sub_epi64(__A, __B), 279 (__v4di)__W); 280 } 281 282 static __inline__ __m256i __DEFAULT_FN_ATTRS256 283 _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B) 284 { 285 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 286 (__v4di)_mm256_sub_epi64(__A, __B), 287 (__v4di)_mm256_setzero_si256()); 288 } 289 290 static __inline__ __m128i __DEFAULT_FN_ATTRS128 291 _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 292 { 293 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 294 (__v4si)_mm_add_epi32(__A, __B), 295 (__v4si)__W); 296 } 297 298 static __inline__ __m128i __DEFAULT_FN_ATTRS128 299 _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B) 300 { 301 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 302 (__v4si)_mm_add_epi32(__A, __B), 303 (__v4si)_mm_setzero_si128()); 304 } 305 306 static __inline__ __m128i __DEFAULT_FN_ATTRS128 307 _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 308 { 309 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 310 (__v2di)_mm_add_epi64(__A, __B), 311 (__v2di)__W); 312 } 313 314 static __inline__ __m128i __DEFAULT_FN_ATTRS128 315 _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B) 316 { 317 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 318 (__v2di)_mm_add_epi64(__A, __B), 319 (__v2di)_mm_setzero_si128()); 320 } 321 322 static __inline__ __m128i __DEFAULT_FN_ATTRS128 323 _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 324 { 325 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 326 (__v4si)_mm_sub_epi32(__A, __B), 327 (__v4si)__W); 328 } 329 330 static __inline__ __m128i __DEFAULT_FN_ATTRS128 331 _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B) 332 { 333 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 334 (__v4si)_mm_sub_epi32(__A, __B), 335 (__v4si)_mm_setzero_si128()); 336 } 337 338 static __inline__ __m128i __DEFAULT_FN_ATTRS128 339 _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 340 { 341 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 342 (__v2di)_mm_sub_epi64(__A, __B), 343 (__v2di)__W); 344 } 345 346 static __inline__ __m128i __DEFAULT_FN_ATTRS128 347 _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B) 348 { 349 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 350 (__v2di)_mm_sub_epi64(__A, __B), 351 (__v2di)_mm_setzero_si128()); 352 } 353 354 static __inline__ __m256i __DEFAULT_FN_ATTRS256 355 _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) 356 { 357 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 358 (__v4di)_mm256_mul_epi32(__X, __Y), 359 (__v4di)__W); 360 } 361 362 static __inline__ __m256i __DEFAULT_FN_ATTRS256 363 _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y) 364 { 365 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 366 (__v4di)_mm256_mul_epi32(__X, __Y), 367 (__v4di)_mm256_setzero_si256()); 368 } 369 370 static __inline__ __m128i __DEFAULT_FN_ATTRS128 371 _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) 372 { 373 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 374 (__v2di)_mm_mul_epi32(__X, __Y), 375 (__v2di)__W); 376 } 377 378 static __inline__ __m128i __DEFAULT_FN_ATTRS128 379 _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y) 380 { 381 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 382 (__v2di)_mm_mul_epi32(__X, __Y), 383 (__v2di)_mm_setzero_si128()); 384 } 385 386 static __inline__ __m256i __DEFAULT_FN_ATTRS256 387 _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) 388 { 389 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 390 (__v4di)_mm256_mul_epu32(__X, __Y), 391 (__v4di)__W); 392 } 393 394 static __inline__ __m256i __DEFAULT_FN_ATTRS256 395 _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y) 396 { 397 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 398 (__v4di)_mm256_mul_epu32(__X, __Y), 399 (__v4di)_mm256_setzero_si256()); 400 } 401 402 static __inline__ __m128i __DEFAULT_FN_ATTRS128 403 _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) 404 { 405 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 406 (__v2di)_mm_mul_epu32(__X, __Y), 407 (__v2di)__W); 408 } 409 410 static __inline__ __m128i __DEFAULT_FN_ATTRS128 411 _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y) 412 { 413 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 414 (__v2di)_mm_mul_epu32(__X, __Y), 415 (__v2di)_mm_setzero_si128()); 416 } 417 418 static __inline__ __m256i __DEFAULT_FN_ATTRS256 419 _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B) 420 { 421 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 422 (__v8si)_mm256_mullo_epi32(__A, __B), 423 (__v8si)_mm256_setzero_si256()); 424 } 425 426 static __inline__ __m256i __DEFAULT_FN_ATTRS256 427 _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) 428 { 429 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 430 (__v8si)_mm256_mullo_epi32(__A, __B), 431 (__v8si)__W); 432 } 433 434 static __inline__ __m128i __DEFAULT_FN_ATTRS128 435 _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B) 436 { 437 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 438 (__v4si)_mm_mullo_epi32(__A, __B), 439 (__v4si)_mm_setzero_si128()); 440 } 441 442 static __inline__ __m128i __DEFAULT_FN_ATTRS128 443 _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) 444 { 445 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 446 (__v4si)_mm_mullo_epi32(__A, __B), 447 (__v4si)__W); 448 } 449 450 static __inline__ __m256i __DEFAULT_FN_ATTRS256 451 _mm256_and_epi32(__m256i __a, __m256i __b) 452 { 453 return (__m256i)((__v8su)__a & (__v8su)__b); 454 } 455 456 static __inline__ __m256i __DEFAULT_FN_ATTRS256 457 _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 458 { 459 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 460 (__v8si)_mm256_and_epi32(__A, __B), 461 (__v8si)__W); 462 } 463 464 static __inline__ __m256i __DEFAULT_FN_ATTRS256 465 _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B) 466 { 467 return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B); 468 } 469 470 static __inline__ __m128i __DEFAULT_FN_ATTRS128 471 _mm_and_epi32(__m128i __a, __m128i __b) 472 { 473 return (__m128i)((__v4su)__a & (__v4su)__b); 474 } 475 476 static __inline__ __m128i __DEFAULT_FN_ATTRS128 477 _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 478 { 479 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 480 (__v4si)_mm_and_epi32(__A, __B), 481 (__v4si)__W); 482 } 483 484 static __inline__ __m128i __DEFAULT_FN_ATTRS128 485 _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B) 486 { 487 return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B); 488 } 489 490 static __inline__ __m256i __DEFAULT_FN_ATTRS256 491 _mm256_andnot_epi32(__m256i __A, __m256i __B) 492 { 493 return (__m256i)(~(__v8su)__A & (__v8su)__B); 494 } 495 496 static __inline__ __m256i __DEFAULT_FN_ATTRS256 497 _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 498 { 499 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 500 (__v8si)_mm256_andnot_epi32(__A, __B), 501 (__v8si)__W); 502 } 503 504 static __inline__ __m256i __DEFAULT_FN_ATTRS256 505 _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B) 506 { 507 return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(), 508 __U, __A, __B); 509 } 510 511 static __inline__ __m128i __DEFAULT_FN_ATTRS128 512 _mm_andnot_epi32(__m128i __A, __m128i __B) 513 { 514 return (__m128i)(~(__v4su)__A & (__v4su)__B); 515 } 516 517 static __inline__ __m128i __DEFAULT_FN_ATTRS128 518 _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 519 { 520 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 521 (__v4si)_mm_andnot_epi32(__A, __B), 522 (__v4si)__W); 523 } 524 525 static __inline__ __m128i __DEFAULT_FN_ATTRS128 526 _mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B) 527 { 528 return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B); 529 } 530 531 static __inline__ __m256i __DEFAULT_FN_ATTRS256 532 _mm256_or_epi32(__m256i __a, __m256i __b) 533 { 534 return (__m256i)((__v8su)__a | (__v8su)__b); 535 } 536 537 static __inline__ __m256i __DEFAULT_FN_ATTRS256 538 _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 539 { 540 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 541 (__v8si)_mm256_or_epi32(__A, __B), 542 (__v8si)__W); 543 } 544 545 static __inline__ __m256i __DEFAULT_FN_ATTRS256 546 _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B) 547 { 548 return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B); 549 } 550 551 static __inline__ __m128i __DEFAULT_FN_ATTRS128 552 _mm_or_epi32(__m128i __a, __m128i __b) 553 { 554 return (__m128i)((__v4su)__a | (__v4su)__b); 555 } 556 557 static __inline__ __m128i __DEFAULT_FN_ATTRS128 558 _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 559 { 560 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 561 (__v4si)_mm_or_epi32(__A, __B), 562 (__v4si)__W); 563 } 564 565 static __inline__ __m128i __DEFAULT_FN_ATTRS128 566 _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B) 567 { 568 return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B); 569 } 570 571 static __inline__ __m256i __DEFAULT_FN_ATTRS256 572 _mm256_xor_epi32(__m256i __a, __m256i __b) 573 { 574 return (__m256i)((__v8su)__a ^ (__v8su)__b); 575 } 576 577 static __inline__ __m256i __DEFAULT_FN_ATTRS256 578 _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 579 { 580 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 581 (__v8si)_mm256_xor_epi32(__A, __B), 582 (__v8si)__W); 583 } 584 585 static __inline__ __m256i __DEFAULT_FN_ATTRS256 586 _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B) 587 { 588 return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B); 589 } 590 591 static __inline__ __m128i __DEFAULT_FN_ATTRS128 592 _mm_xor_epi32(__m128i __a, __m128i __b) 593 { 594 return (__m128i)((__v4su)__a ^ (__v4su)__b); 595 } 596 597 static __inline__ __m128i __DEFAULT_FN_ATTRS128 598 _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 599 { 600 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 601 (__v4si)_mm_xor_epi32(__A, __B), 602 (__v4si)__W); 603 } 604 605 static __inline__ __m128i __DEFAULT_FN_ATTRS128 606 _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B) 607 { 608 return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B); 609 } 610 611 static __inline__ __m256i __DEFAULT_FN_ATTRS256 612 _mm256_and_epi64(__m256i __a, __m256i __b) 613 { 614 return (__m256i)((__v4du)__a & (__v4du)__b); 615 } 616 617 static __inline__ __m256i __DEFAULT_FN_ATTRS256 618 _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 619 { 620 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 621 (__v4di)_mm256_and_epi64(__A, __B), 622 (__v4di)__W); 623 } 624 625 static __inline__ __m256i __DEFAULT_FN_ATTRS256 626 _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B) 627 { 628 return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B); 629 } 630 631 static __inline__ __m128i __DEFAULT_FN_ATTRS128 632 _mm_and_epi64(__m128i __a, __m128i __b) 633 { 634 return (__m128i)((__v2du)__a & (__v2du)__b); 635 } 636 637 static __inline__ __m128i __DEFAULT_FN_ATTRS128 638 _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 639 { 640 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 641 (__v2di)_mm_and_epi64(__A, __B), 642 (__v2di)__W); 643 } 644 645 static __inline__ __m128i __DEFAULT_FN_ATTRS128 646 _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B) 647 { 648 return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B); 649 } 650 651 static __inline__ __m256i __DEFAULT_FN_ATTRS256 652 _mm256_andnot_epi64(__m256i __A, __m256i __B) 653 { 654 return (__m256i)(~(__v4du)__A & (__v4du)__B); 655 } 656 657 static __inline__ __m256i __DEFAULT_FN_ATTRS256 658 _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 659 { 660 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 661 (__v4di)_mm256_andnot_epi64(__A, __B), 662 (__v4di)__W); 663 } 664 665 static __inline__ __m256i __DEFAULT_FN_ATTRS256 666 _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B) 667 { 668 return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(), 669 __U, __A, __B); 670 } 671 672 static __inline__ __m128i __DEFAULT_FN_ATTRS128 673 _mm_andnot_epi64(__m128i __A, __m128i __B) 674 { 675 return (__m128i)(~(__v2du)__A & (__v2du)__B); 676 } 677 678 static __inline__ __m128i __DEFAULT_FN_ATTRS128 679 _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 680 { 681 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 682 (__v2di)_mm_andnot_epi64(__A, __B), 683 (__v2di)__W); 684 } 685 686 static __inline__ __m128i __DEFAULT_FN_ATTRS128 687 _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B) 688 { 689 return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B); 690 } 691 692 static __inline__ __m256i __DEFAULT_FN_ATTRS256 693 _mm256_or_epi64(__m256i __a, __m256i __b) 694 { 695 return (__m256i)((__v4du)__a | (__v4du)__b); 696 } 697 698 static __inline__ __m256i __DEFAULT_FN_ATTRS256 699 _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 700 { 701 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 702 (__v4di)_mm256_or_epi64(__A, __B), 703 (__v4di)__W); 704 } 705 706 static __inline__ __m256i __DEFAULT_FN_ATTRS256 707 _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B) 708 { 709 return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B); 710 } 711 712 static __inline__ __m128i __DEFAULT_FN_ATTRS128 713 _mm_or_epi64(__m128i __a, __m128i __b) 714 { 715 return (__m128i)((__v2du)__a | (__v2du)__b); 716 } 717 718 static __inline__ __m128i __DEFAULT_FN_ATTRS128 719 _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 720 { 721 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 722 (__v2di)_mm_or_epi64(__A, __B), 723 (__v2di)__W); 724 } 725 726 static __inline__ __m128i __DEFAULT_FN_ATTRS128 727 _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B) 728 { 729 return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B); 730 } 731 732 static __inline__ __m256i __DEFAULT_FN_ATTRS256 733 _mm256_xor_epi64(__m256i __a, __m256i __b) 734 { 735 return (__m256i)((__v4du)__a ^ (__v4du)__b); 736 } 737 738 static __inline__ __m256i __DEFAULT_FN_ATTRS256 739 _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 740 { 741 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 742 (__v4di)_mm256_xor_epi64(__A, __B), 743 (__v4di)__W); 744 } 745 746 static __inline__ __m256i __DEFAULT_FN_ATTRS256 747 _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B) 748 { 749 return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B); 750 } 751 752 static __inline__ __m128i __DEFAULT_FN_ATTRS128 753 _mm_xor_epi64(__m128i __a, __m128i __b) 754 { 755 return (__m128i)((__v2du)__a ^ (__v2du)__b); 756 } 757 758 static __inline__ __m128i __DEFAULT_FN_ATTRS128 759 _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A, 760 __m128i __B) 761 { 762 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 763 (__v2di)_mm_xor_epi64(__A, __B), 764 (__v2di)__W); 765 } 766 767 static __inline__ __m128i __DEFAULT_FN_ATTRS128 768 _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B) 769 { 770 return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B); 771 } 772 773 #define _mm_cmp_epi32_mask(a, b, p) \ 774 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ 775 (__v4si)(__m128i)(b), (int)(p), \ 776 (__mmask8)-1) 777 778 #define _mm_mask_cmp_epi32_mask(m, a, b, p) \ 779 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ 780 (__v4si)(__m128i)(b), (int)(p), \ 781 (__mmask8)(m)) 782 783 #define _mm_cmp_epu32_mask(a, b, p) \ 784 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ 785 (__v4si)(__m128i)(b), (int)(p), \ 786 (__mmask8)-1) 787 788 #define _mm_mask_cmp_epu32_mask(m, a, b, p) \ 789 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ 790 (__v4si)(__m128i)(b), (int)(p), \ 791 (__mmask8)(m)) 792 793 #define _mm256_cmp_epi32_mask(a, b, p) \ 794 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ 795 (__v8si)(__m256i)(b), (int)(p), \ 796 (__mmask8)-1) 797 798 #define _mm256_mask_cmp_epi32_mask(m, a, b, p) \ 799 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ 800 (__v8si)(__m256i)(b), (int)(p), \ 801 (__mmask8)(m)) 802 803 #define _mm256_cmp_epu32_mask(a, b, p) \ 804 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ 805 (__v8si)(__m256i)(b), (int)(p), \ 806 (__mmask8)-1) 807 808 #define _mm256_mask_cmp_epu32_mask(m, a, b, p) \ 809 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ 810 (__v8si)(__m256i)(b), (int)(p), \ 811 (__mmask8)(m)) 812 813 #define _mm_cmp_epi64_mask(a, b, p) \ 814 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ 815 (__v2di)(__m128i)(b), (int)(p), \ 816 (__mmask8)-1) 817 818 #define _mm_mask_cmp_epi64_mask(m, a, b, p) \ 819 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ 820 (__v2di)(__m128i)(b), (int)(p), \ 821 (__mmask8)(m)) 822 823 #define _mm_cmp_epu64_mask(a, b, p) \ 824 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ 825 (__v2di)(__m128i)(b), (int)(p), \ 826 (__mmask8)-1) 827 828 #define _mm_mask_cmp_epu64_mask(m, a, b, p) \ 829 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ 830 (__v2di)(__m128i)(b), (int)(p), \ 831 (__mmask8)(m)) 832 833 #define _mm256_cmp_epi64_mask(a, b, p) \ 834 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ 835 (__v4di)(__m256i)(b), (int)(p), \ 836 (__mmask8)-1) 837 838 #define _mm256_mask_cmp_epi64_mask(m, a, b, p) \ 839 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ 840 (__v4di)(__m256i)(b), (int)(p), \ 841 (__mmask8)(m)) 842 843 #define _mm256_cmp_epu64_mask(a, b, p) \ 844 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ 845 (__v4di)(__m256i)(b), (int)(p), \ 846 (__mmask8)-1) 847 848 #define _mm256_mask_cmp_epu64_mask(m, a, b, p) \ 849 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ 850 (__v4di)(__m256i)(b), (int)(p), \ 851 (__mmask8)(m)) 852 853 #define _mm256_cmp_ps_mask(a, b, p) \ 854 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ 855 (__v8sf)(__m256)(b), (int)(p), \ 856 (__mmask8)-1) 857 858 #define _mm256_mask_cmp_ps_mask(m, a, b, p) \ 859 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ 860 (__v8sf)(__m256)(b), (int)(p), \ 861 (__mmask8)(m)) 862 863 #define _mm256_cmp_pd_mask(a, b, p) \ 864 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ 865 (__v4df)(__m256d)(b), (int)(p), \ 866 (__mmask8)-1) 867 868 #define _mm256_mask_cmp_pd_mask(m, a, b, p) \ 869 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ 870 (__v4df)(__m256d)(b), (int)(p), \ 871 (__mmask8)(m)) 872 873 #define _mm_cmp_ps_mask(a, b, p) \ 874 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ 875 (__v4sf)(__m128)(b), (int)(p), \ 876 (__mmask8)-1) 877 878 #define _mm_mask_cmp_ps_mask(m, a, b, p) \ 879 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ 880 (__v4sf)(__m128)(b), (int)(p), \ 881 (__mmask8)(m)) 882 883 #define _mm_cmp_pd_mask(a, b, p) \ 884 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ 885 (__v2df)(__m128d)(b), (int)(p), \ 886 (__mmask8)-1) 887 888 #define _mm_mask_cmp_pd_mask(m, a, b, p) \ 889 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ 890 (__v2df)(__m128d)(b), (int)(p), \ 891 (__mmask8)(m)) 892 893 static __inline__ __m128d __DEFAULT_FN_ATTRS128 894 _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 895 { 896 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 897 __builtin_ia32_vfmaddpd ((__v2df) __A, 898 (__v2df) __B, 899 (__v2df) __C), 900 (__v2df) __A); 901 } 902 903 static __inline__ __m128d __DEFAULT_FN_ATTRS128 904 _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 905 { 906 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 907 __builtin_ia32_vfmaddpd ((__v2df) __A, 908 (__v2df) __B, 909 (__v2df) __C), 910 (__v2df) __C); 911 } 912 913 static __inline__ __m128d __DEFAULT_FN_ATTRS128 914 _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 915 { 916 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 917 __builtin_ia32_vfmaddpd ((__v2df) __A, 918 (__v2df) __B, 919 (__v2df) __C), 920 (__v2df)_mm_setzero_pd()); 921 } 922 923 static __inline__ __m128d __DEFAULT_FN_ATTRS128 924 _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 925 { 926 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 927 __builtin_ia32_vfmaddpd ((__v2df) __A, 928 (__v2df) __B, 929 -(__v2df) __C), 930 (__v2df) __A); 931 } 932 933 static __inline__ __m128d __DEFAULT_FN_ATTRS128 934 _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 935 { 936 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 937 __builtin_ia32_vfmaddpd ((__v2df) __A, 938 (__v2df) __B, 939 -(__v2df) __C), 940 (__v2df)_mm_setzero_pd()); 941 } 942 943 static __inline__ __m128d __DEFAULT_FN_ATTRS128 944 _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 945 { 946 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 947 __builtin_ia32_vfmaddpd (-(__v2df) __A, 948 (__v2df) __B, 949 (__v2df) __C), 950 (__v2df) __C); 951 } 952 953 static __inline__ __m128d __DEFAULT_FN_ATTRS128 954 _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 955 { 956 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 957 __builtin_ia32_vfmaddpd (-(__v2df) __A, 958 (__v2df) __B, 959 (__v2df) __C), 960 (__v2df)_mm_setzero_pd()); 961 } 962 963 static __inline__ __m128d __DEFAULT_FN_ATTRS128 964 _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 965 { 966 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 967 __builtin_ia32_vfmaddpd (-(__v2df) __A, 968 (__v2df) __B, 969 -(__v2df) __C), 970 (__v2df)_mm_setzero_pd()); 971 } 972 973 static __inline__ __m256d __DEFAULT_FN_ATTRS256 974 _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 975 { 976 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 977 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 978 (__v4df) __B, 979 (__v4df) __C), 980 (__v4df) __A); 981 } 982 983 static __inline__ __m256d __DEFAULT_FN_ATTRS256 984 _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 985 { 986 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 987 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 988 (__v4df) __B, 989 (__v4df) __C), 990 (__v4df) __C); 991 } 992 993 static __inline__ __m256d __DEFAULT_FN_ATTRS256 994 _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 995 { 996 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 997 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 998 (__v4df) __B, 999 (__v4df) __C), 1000 (__v4df)_mm256_setzero_pd()); 1001 } 1002 1003 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1004 _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1005 { 1006 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1007 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 1008 (__v4df) __B, 1009 -(__v4df) __C), 1010 (__v4df) __A); 1011 } 1012 1013 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1014 _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1015 { 1016 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1017 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 1018 (__v4df) __B, 1019 -(__v4df) __C), 1020 (__v4df)_mm256_setzero_pd()); 1021 } 1022 1023 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1024 _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1025 { 1026 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1027 __builtin_ia32_vfmaddpd256 (-(__v4df) __A, 1028 (__v4df) __B, 1029 (__v4df) __C), 1030 (__v4df) __C); 1031 } 1032 1033 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1034 _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1035 { 1036 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1037 __builtin_ia32_vfmaddpd256 (-(__v4df) __A, 1038 (__v4df) __B, 1039 (__v4df) __C), 1040 (__v4df)_mm256_setzero_pd()); 1041 } 1042 1043 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1044 _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1045 { 1046 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1047 __builtin_ia32_vfmaddpd256 (-(__v4df) __A, 1048 (__v4df) __B, 1049 -(__v4df) __C), 1050 (__v4df)_mm256_setzero_pd()); 1051 } 1052 1053 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1054 _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1055 { 1056 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1057 __builtin_ia32_vfmaddps ((__v4sf) __A, 1058 (__v4sf) __B, 1059 (__v4sf) __C), 1060 (__v4sf) __A); 1061 } 1062 1063 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1064 _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1065 { 1066 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1067 __builtin_ia32_vfmaddps ((__v4sf) __A, 1068 (__v4sf) __B, 1069 (__v4sf) __C), 1070 (__v4sf) __C); 1071 } 1072 1073 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1074 _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1075 { 1076 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1077 __builtin_ia32_vfmaddps ((__v4sf) __A, 1078 (__v4sf) __B, 1079 (__v4sf) __C), 1080 (__v4sf)_mm_setzero_ps()); 1081 } 1082 1083 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1084 _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1085 { 1086 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1087 __builtin_ia32_vfmaddps ((__v4sf) __A, 1088 (__v4sf) __B, 1089 -(__v4sf) __C), 1090 (__v4sf) __A); 1091 } 1092 1093 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1094 _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1095 { 1096 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1097 __builtin_ia32_vfmaddps ((__v4sf) __A, 1098 (__v4sf) __B, 1099 -(__v4sf) __C), 1100 (__v4sf)_mm_setzero_ps()); 1101 } 1102 1103 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1104 _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1105 { 1106 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1107 __builtin_ia32_vfmaddps (-(__v4sf) __A, 1108 (__v4sf) __B, 1109 (__v4sf) __C), 1110 (__v4sf) __C); 1111 } 1112 1113 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1114 _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1115 { 1116 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1117 __builtin_ia32_vfmaddps (-(__v4sf) __A, 1118 (__v4sf) __B, 1119 (__v4sf) __C), 1120 (__v4sf)_mm_setzero_ps()); 1121 } 1122 1123 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1124 _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1125 { 1126 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1127 __builtin_ia32_vfmaddps (-(__v4sf) __A, 1128 (__v4sf) __B, 1129 -(__v4sf) __C), 1130 (__v4sf)_mm_setzero_ps()); 1131 } 1132 1133 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1134 _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1135 { 1136 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1137 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1138 (__v8sf) __B, 1139 (__v8sf) __C), 1140 (__v8sf) __A); 1141 } 1142 1143 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1144 _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1145 { 1146 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1147 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1148 (__v8sf) __B, 1149 (__v8sf) __C), 1150 (__v8sf) __C); 1151 } 1152 1153 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1154 _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1155 { 1156 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1157 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1158 (__v8sf) __B, 1159 (__v8sf) __C), 1160 (__v8sf)_mm256_setzero_ps()); 1161 } 1162 1163 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1164 _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1165 { 1166 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1167 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1168 (__v8sf) __B, 1169 -(__v8sf) __C), 1170 (__v8sf) __A); 1171 } 1172 1173 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1174 _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1175 { 1176 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1177 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1178 (__v8sf) __B, 1179 -(__v8sf) __C), 1180 (__v8sf)_mm256_setzero_ps()); 1181 } 1182 1183 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1184 _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1185 { 1186 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1187 __builtin_ia32_vfmaddps256 (-(__v8sf) __A, 1188 (__v8sf) __B, 1189 (__v8sf) __C), 1190 (__v8sf) __C); 1191 } 1192 1193 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1194 _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1195 { 1196 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1197 __builtin_ia32_vfmaddps256 (-(__v8sf) __A, 1198 (__v8sf) __B, 1199 (__v8sf) __C), 1200 (__v8sf)_mm256_setzero_ps()); 1201 } 1202 1203 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1204 _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1205 { 1206 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1207 __builtin_ia32_vfmaddps256 (-(__v8sf) __A, 1208 (__v8sf) __B, 1209 -(__v8sf) __C), 1210 (__v8sf)_mm256_setzero_ps()); 1211 } 1212 1213 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1214 _mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1215 { 1216 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1217 __builtin_ia32_vfmaddsubpd ((__v2df) __A, 1218 (__v2df) __B, 1219 (__v2df) __C), 1220 (__v2df) __A); 1221 } 1222 1223 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1224 _mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1225 { 1226 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1227 __builtin_ia32_vfmaddsubpd ((__v2df) __A, 1228 (__v2df) __B, 1229 (__v2df) __C), 1230 (__v2df) __C); 1231 } 1232 1233 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1234 _mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1235 { 1236 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1237 __builtin_ia32_vfmaddsubpd ((__v2df) __A, 1238 (__v2df) __B, 1239 (__v2df) __C), 1240 (__v2df)_mm_setzero_pd()); 1241 } 1242 1243 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1244 _mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1245 { 1246 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1247 __builtin_ia32_vfmaddsubpd ((__v2df) __A, 1248 (__v2df) __B, 1249 -(__v2df) __C), 1250 (__v2df) __A); 1251 } 1252 1253 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1254 _mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1255 { 1256 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1257 __builtin_ia32_vfmaddsubpd ((__v2df) __A, 1258 (__v2df) __B, 1259 -(__v2df) __C), 1260 (__v2df)_mm_setzero_pd()); 1261 } 1262 1263 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1264 _mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1265 { 1266 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1267 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 1268 (__v4df) __B, 1269 (__v4df) __C), 1270 (__v4df) __A); 1271 } 1272 1273 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1274 _mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1275 { 1276 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1277 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 1278 (__v4df) __B, 1279 (__v4df) __C), 1280 (__v4df) __C); 1281 } 1282 1283 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1284 _mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1285 { 1286 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1287 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 1288 (__v4df) __B, 1289 (__v4df) __C), 1290 (__v4df)_mm256_setzero_pd()); 1291 } 1292 1293 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1294 _mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1295 { 1296 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1297 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 1298 (__v4df) __B, 1299 -(__v4df) __C), 1300 (__v4df) __A); 1301 } 1302 1303 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1304 _mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1305 { 1306 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1307 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 1308 (__v4df) __B, 1309 -(__v4df) __C), 1310 (__v4df)_mm256_setzero_pd()); 1311 } 1312 1313 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1314 _mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1315 { 1316 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1317 __builtin_ia32_vfmaddsubps ((__v4sf) __A, 1318 (__v4sf) __B, 1319 (__v4sf) __C), 1320 (__v4sf) __A); 1321 } 1322 1323 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1324 _mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1325 { 1326 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1327 __builtin_ia32_vfmaddsubps ((__v4sf) __A, 1328 (__v4sf) __B, 1329 (__v4sf) __C), 1330 (__v4sf) __C); 1331 } 1332 1333 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1334 _mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1335 { 1336 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1337 __builtin_ia32_vfmaddsubps ((__v4sf) __A, 1338 (__v4sf) __B, 1339 (__v4sf) __C), 1340 (__v4sf)_mm_setzero_ps()); 1341 } 1342 1343 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1344 _mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1345 { 1346 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1347 __builtin_ia32_vfmaddsubps ((__v4sf) __A, 1348 (__v4sf) __B, 1349 -(__v4sf) __C), 1350 (__v4sf) __A); 1351 } 1352 1353 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1354 _mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1355 { 1356 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1357 __builtin_ia32_vfmaddsubps ((__v4sf) __A, 1358 (__v4sf) __B, 1359 -(__v4sf) __C), 1360 (__v4sf)_mm_setzero_ps()); 1361 } 1362 1363 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1364 _mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, 1365 __m256 __C) 1366 { 1367 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1368 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 1369 (__v8sf) __B, 1370 (__v8sf) __C), 1371 (__v8sf) __A); 1372 } 1373 1374 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1375 _mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1376 { 1377 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1378 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 1379 (__v8sf) __B, 1380 (__v8sf) __C), 1381 (__v8sf) __C); 1382 } 1383 1384 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1385 _mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1386 { 1387 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1388 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 1389 (__v8sf) __B, 1390 (__v8sf) __C), 1391 (__v8sf)_mm256_setzero_ps()); 1392 } 1393 1394 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1395 _mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1396 { 1397 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1398 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 1399 (__v8sf) __B, 1400 -(__v8sf) __C), 1401 (__v8sf) __A); 1402 } 1403 1404 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1405 _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1406 { 1407 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1408 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 1409 (__v8sf) __B, 1410 -(__v8sf) __C), 1411 (__v8sf)_mm256_setzero_ps()); 1412 } 1413 1414 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1415 _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1416 { 1417 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1418 __builtin_ia32_vfmaddpd ((__v2df) __A, 1419 (__v2df) __B, 1420 -(__v2df) __C), 1421 (__v2df) __C); 1422 } 1423 1424 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1425 _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1426 { 1427 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1428 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 1429 (__v4df) __B, 1430 -(__v4df) __C), 1431 (__v4df) __C); 1432 } 1433 1434 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1435 _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1436 { 1437 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1438 __builtin_ia32_vfmaddps ((__v4sf) __A, 1439 (__v4sf) __B, 1440 -(__v4sf) __C), 1441 (__v4sf) __C); 1442 } 1443 1444 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1445 _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1446 { 1447 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1448 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1449 (__v8sf) __B, 1450 -(__v8sf) __C), 1451 (__v8sf) __C); 1452 } 1453 1454 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1455 _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1456 { 1457 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1458 __builtin_ia32_vfmaddsubpd ((__v2df) __A, 1459 (__v2df) __B, 1460 -(__v2df) __C), 1461 (__v2df) __C); 1462 } 1463 1464 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1465 _mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1466 { 1467 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1468 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 1469 (__v4df) __B, 1470 -(__v4df) __C), 1471 (__v4df) __C); 1472 } 1473 1474 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1475 _mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1476 { 1477 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1478 __builtin_ia32_vfmaddsubps ((__v4sf) __A, 1479 (__v4sf) __B, 1480 -(__v4sf) __C), 1481 (__v4sf) __C); 1482 } 1483 1484 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1485 _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1486 { 1487 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1488 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 1489 (__v8sf) __B, 1490 -(__v8sf) __C), 1491 (__v8sf) __C); 1492 } 1493 1494 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1495 _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1496 { 1497 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1498 __builtin_ia32_vfmaddpd ((__v2df) __A, 1499 -(__v2df) __B, 1500 (__v2df) __C), 1501 (__v2df) __A); 1502 } 1503 1504 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1505 _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1506 { 1507 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1508 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 1509 -(__v4df) __B, 1510 (__v4df) __C), 1511 (__v4df) __A); 1512 } 1513 1514 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1515 _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1516 { 1517 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1518 __builtin_ia32_vfmaddps ((__v4sf) __A, 1519 -(__v4sf) __B, 1520 (__v4sf) __C), 1521 (__v4sf) __A); 1522 } 1523 1524 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1525 _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1526 { 1527 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1528 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1529 -(__v8sf) __B, 1530 (__v8sf) __C), 1531 (__v8sf) __A); 1532 } 1533 1534 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1535 _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1536 { 1537 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1538 __builtin_ia32_vfmaddpd ((__v2df) __A, 1539 -(__v2df) __B, 1540 -(__v2df) __C), 1541 (__v2df) __A); 1542 } 1543 1544 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1545 _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1546 { 1547 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1548 __builtin_ia32_vfmaddpd ((__v2df) __A, 1549 -(__v2df) __B, 1550 -(__v2df) __C), 1551 (__v2df) __C); 1552 } 1553 1554 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1555 _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1556 { 1557 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1558 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 1559 -(__v4df) __B, 1560 -(__v4df) __C), 1561 (__v4df) __A); 1562 } 1563 1564 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1565 _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1566 { 1567 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1568 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 1569 -(__v4df) __B, 1570 -(__v4df) __C), 1571 (__v4df) __C); 1572 } 1573 1574 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1575 _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1576 { 1577 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1578 __builtin_ia32_vfmaddps ((__v4sf) __A, 1579 -(__v4sf) __B, 1580 -(__v4sf) __C), 1581 (__v4sf) __A); 1582 } 1583 1584 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1585 _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1586 { 1587 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1588 __builtin_ia32_vfmaddps ((__v4sf) __A, 1589 -(__v4sf) __B, 1590 -(__v4sf) __C), 1591 (__v4sf) __C); 1592 } 1593 1594 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1595 _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1596 { 1597 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1598 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1599 -(__v8sf) __B, 1600 -(__v8sf) __C), 1601 (__v8sf) __A); 1602 } 1603 1604 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1605 _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1606 { 1607 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1608 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1609 -(__v8sf) __B, 1610 -(__v8sf) __C), 1611 (__v8sf) __C); 1612 } 1613 1614 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1615 _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 1616 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 1617 (__v2df)_mm_add_pd(__A, __B), 1618 (__v2df)__W); 1619 } 1620 1621 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1622 _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) { 1623 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 1624 (__v2df)_mm_add_pd(__A, __B), 1625 (__v2df)_mm_setzero_pd()); 1626 } 1627 1628 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1629 _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 1630 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 1631 (__v4df)_mm256_add_pd(__A, __B), 1632 (__v4df)__W); 1633 } 1634 1635 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1636 _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) { 1637 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 1638 (__v4df)_mm256_add_pd(__A, __B), 1639 (__v4df)_mm256_setzero_pd()); 1640 } 1641 1642 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1643 _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 1644 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 1645 (__v4sf)_mm_add_ps(__A, __B), 1646 (__v4sf)__W); 1647 } 1648 1649 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1650 _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) { 1651 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 1652 (__v4sf)_mm_add_ps(__A, __B), 1653 (__v4sf)_mm_setzero_ps()); 1654 } 1655 1656 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1657 _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 1658 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 1659 (__v8sf)_mm256_add_ps(__A, __B), 1660 (__v8sf)__W); 1661 } 1662 1663 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1664 _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) { 1665 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 1666 (__v8sf)_mm256_add_ps(__A, __B), 1667 (__v8sf)_mm256_setzero_ps()); 1668 } 1669 1670 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1671 _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) { 1672 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, 1673 (__v4si) __W, 1674 (__v4si) __A); 1675 } 1676 1677 static __inline__ __m256i __DEFAULT_FN_ATTRS256 1678 _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) { 1679 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, 1680 (__v8si) __W, 1681 (__v8si) __A); 1682 } 1683 1684 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1685 _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) { 1686 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, 1687 (__v2df) __W, 1688 (__v2df) __A); 1689 } 1690 1691 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1692 _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) { 1693 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, 1694 (__v4df) __W, 1695 (__v4df) __A); 1696 } 1697 1698 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1699 _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) { 1700 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, 1701 (__v4sf) __W, 1702 (__v4sf) __A); 1703 } 1704 1705 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1706 _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) { 1707 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, 1708 (__v8sf) __W, 1709 (__v8sf) __A); 1710 } 1711 1712 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1713 _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) { 1714 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, 1715 (__v2di) __W, 1716 (__v2di) __A); 1717 } 1718 1719 static __inline__ __m256i __DEFAULT_FN_ATTRS256 1720 _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) { 1721 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, 1722 (__v4di) __W, 1723 (__v4di) __A); 1724 } 1725 1726 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1727 _mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) { 1728 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, 1729 (__v2df) __W, 1730 (__mmask8) __U); 1731 } 1732 1733 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1734 _mm_maskz_compress_pd (__mmask8 __U, __m128d __A) { 1735 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, 1736 (__v2df) 1737 _mm_setzero_pd (), 1738 (__mmask8) __U); 1739 } 1740 1741 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1742 _mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) { 1743 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, 1744 (__v4df) __W, 1745 (__mmask8) __U); 1746 } 1747 1748 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1749 _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) { 1750 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, 1751 (__v4df) 1752 _mm256_setzero_pd (), 1753 (__mmask8) __U); 1754 } 1755 1756 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1757 _mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { 1758 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, 1759 (__v2di) __W, 1760 (__mmask8) __U); 1761 } 1762 1763 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1764 _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) { 1765 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, 1766 (__v2di) 1767 _mm_setzero_si128 (), 1768 (__mmask8) __U); 1769 } 1770 1771 static __inline__ __m256i __DEFAULT_FN_ATTRS256 1772 _mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { 1773 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, 1774 (__v4di) __W, 1775 (__mmask8) __U); 1776 } 1777 1778 static __inline__ __m256i __DEFAULT_FN_ATTRS256 1779 _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) { 1780 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, 1781 (__v4di) 1782 _mm256_setzero_si256 (), 1783 (__mmask8) __U); 1784 } 1785 1786 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1787 _mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) { 1788 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, 1789 (__v4sf) __W, 1790 (__mmask8) __U); 1791 } 1792 1793 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1794 _mm_maskz_compress_ps (__mmask8 __U, __m128 __A) { 1795 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, 1796 (__v4sf) 1797 _mm_setzero_ps (), 1798 (__mmask8) __U); 1799 } 1800 1801 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1802 _mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) { 1803 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, 1804 (__v8sf) __W, 1805 (__mmask8) __U); 1806 } 1807 1808 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1809 _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) { 1810 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, 1811 (__v8sf) 1812 _mm256_setzero_ps (), 1813 (__mmask8) __U); 1814 } 1815 1816 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1817 _mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { 1818 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, 1819 (__v4si) __W, 1820 (__mmask8) __U); 1821 } 1822 1823 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1824 _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) { 1825 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, 1826 (__v4si) 1827 _mm_setzero_si128 (), 1828 (__mmask8) __U); 1829 } 1830 1831 static __inline__ __m256i __DEFAULT_FN_ATTRS256 1832 _mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { 1833 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, 1834 (__v8si) __W, 1835 (__mmask8) __U); 1836 } 1837 1838 static __inline__ __m256i __DEFAULT_FN_ATTRS256 1839 _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) { 1840 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, 1841 (__v8si) 1842 _mm256_setzero_si256 (), 1843 (__mmask8) __U); 1844 } 1845 1846 static __inline__ void __DEFAULT_FN_ATTRS128 1847 _mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) { 1848 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P, 1849 (__v2df) __A, 1850 (__mmask8) __U); 1851 } 1852 1853 static __inline__ void __DEFAULT_FN_ATTRS256 1854 _mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) { 1855 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P, 1856 (__v4df) __A, 1857 (__mmask8) __U); 1858 } 1859 1860 static __inline__ void __DEFAULT_FN_ATTRS128 1861 _mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) { 1862 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P, 1863 (__v2di) __A, 1864 (__mmask8) __U); 1865 } 1866 1867 static __inline__ void __DEFAULT_FN_ATTRS256 1868 _mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) { 1869 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P, 1870 (__v4di) __A, 1871 (__mmask8) __U); 1872 } 1873 1874 static __inline__ void __DEFAULT_FN_ATTRS128 1875 _mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) { 1876 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P, 1877 (__v4sf) __A, 1878 (__mmask8) __U); 1879 } 1880 1881 static __inline__ void __DEFAULT_FN_ATTRS256 1882 _mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) { 1883 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P, 1884 (__v8sf) __A, 1885 (__mmask8) __U); 1886 } 1887 1888 static __inline__ void __DEFAULT_FN_ATTRS128 1889 _mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) { 1890 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P, 1891 (__v4si) __A, 1892 (__mmask8) __U); 1893 } 1894 1895 static __inline__ void __DEFAULT_FN_ATTRS256 1896 _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) { 1897 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P, 1898 (__v8si) __A, 1899 (__mmask8) __U); 1900 } 1901 1902 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1903 _mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) { 1904 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 1905 (__v2df)_mm_cvtepi32_pd(__A), 1906 (__v2df)__W); 1907 } 1908 1909 static __inline__ __m128d __DEFAULT_FN_ATTRS128 1910 _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { 1911 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 1912 (__v2df)_mm_cvtepi32_pd(__A), 1913 (__v2df)_mm_setzero_pd()); 1914 } 1915 1916 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1917 _mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) { 1918 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 1919 (__v4df)_mm256_cvtepi32_pd(__A), 1920 (__v4df)__W); 1921 } 1922 1923 static __inline__ __m256d __DEFAULT_FN_ATTRS256 1924 _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { 1925 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 1926 (__v4df)_mm256_cvtepi32_pd(__A), 1927 (__v4df)_mm256_setzero_pd()); 1928 } 1929 1930 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1931 _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) { 1932 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 1933 (__v4sf)_mm_cvtepi32_ps(__A), 1934 (__v4sf)__W); 1935 } 1936 1937 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1938 _mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A) { 1939 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 1940 (__v4sf)_mm_cvtepi32_ps(__A), 1941 (__v4sf)_mm_setzero_ps()); 1942 } 1943 1944 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1945 _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) { 1946 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 1947 (__v8sf)_mm256_cvtepi32_ps(__A), 1948 (__v8sf)__W); 1949 } 1950 1951 static __inline__ __m256 __DEFAULT_FN_ATTRS256 1952 _mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A) { 1953 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 1954 (__v8sf)_mm256_cvtepi32_ps(__A), 1955 (__v8sf)_mm256_setzero_ps()); 1956 } 1957 1958 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1959 _mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) { 1960 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A, 1961 (__v4si) __W, 1962 (__mmask8) __U); 1963 } 1964 1965 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1966 _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) { 1967 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A, 1968 (__v4si) 1969 _mm_setzero_si128 (), 1970 (__mmask8) __U); 1971 } 1972 1973 static __inline__ __m128i __DEFAULT_FN_ATTRS256 1974 _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) { 1975 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 1976 (__v4si)_mm256_cvtpd_epi32(__A), 1977 (__v4si)__W); 1978 } 1979 1980 static __inline__ __m128i __DEFAULT_FN_ATTRS256 1981 _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) { 1982 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 1983 (__v4si)_mm256_cvtpd_epi32(__A), 1984 (__v4si)_mm_setzero_si128()); 1985 } 1986 1987 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1988 _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) { 1989 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, 1990 (__v4sf) __W, 1991 (__mmask8) __U); 1992 } 1993 1994 static __inline__ __m128 __DEFAULT_FN_ATTRS128 1995 _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) { 1996 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, 1997 (__v4sf) 1998 _mm_setzero_ps (), 1999 (__mmask8) __U); 2000 } 2001 2002 static __inline__ __m128 __DEFAULT_FN_ATTRS256 2003 _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) { 2004 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2005 (__v4sf)_mm256_cvtpd_ps(__A), 2006 (__v4sf)__W); 2007 } 2008 2009 static __inline__ __m128 __DEFAULT_FN_ATTRS256 2010 _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) { 2011 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2012 (__v4sf)_mm256_cvtpd_ps(__A), 2013 (__v4sf)_mm_setzero_ps()); 2014 } 2015 2016 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2017 _mm_cvtpd_epu32 (__m128d __A) { 2018 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 2019 (__v4si) 2020 _mm_setzero_si128 (), 2021 (__mmask8) -1); 2022 } 2023 2024 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2025 _mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) { 2026 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 2027 (__v4si) __W, 2028 (__mmask8) __U); 2029 } 2030 2031 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2032 _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) { 2033 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 2034 (__v4si) 2035 _mm_setzero_si128 (), 2036 (__mmask8) __U); 2037 } 2038 2039 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2040 _mm256_cvtpd_epu32 (__m256d __A) { 2041 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 2042 (__v4si) 2043 _mm_setzero_si128 (), 2044 (__mmask8) -1); 2045 } 2046 2047 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2048 _mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) { 2049 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 2050 (__v4si) __W, 2051 (__mmask8) __U); 2052 } 2053 2054 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2055 _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) { 2056 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 2057 (__v4si) 2058 _mm_setzero_si128 (), 2059 (__mmask8) __U); 2060 } 2061 2062 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2063 _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) { 2064 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2065 (__v4si)_mm_cvtps_epi32(__A), 2066 (__v4si)__W); 2067 } 2068 2069 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2070 _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) { 2071 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2072 (__v4si)_mm_cvtps_epi32(__A), 2073 (__v4si)_mm_setzero_si128()); 2074 } 2075 2076 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2077 _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) { 2078 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2079 (__v8si)_mm256_cvtps_epi32(__A), 2080 (__v8si)__W); 2081 } 2082 2083 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2084 _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) { 2085 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2086 (__v8si)_mm256_cvtps_epi32(__A), 2087 (__v8si)_mm256_setzero_si256()); 2088 } 2089 2090 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2091 _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) { 2092 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2093 (__v2df)_mm_cvtps_pd(__A), 2094 (__v2df)__W); 2095 } 2096 2097 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2098 _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) { 2099 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2100 (__v2df)_mm_cvtps_pd(__A), 2101 (__v2df)_mm_setzero_pd()); 2102 } 2103 2104 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2105 _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) { 2106 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2107 (__v4df)_mm256_cvtps_pd(__A), 2108 (__v4df)__W); 2109 } 2110 2111 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2112 _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) { 2113 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2114 (__v4df)_mm256_cvtps_pd(__A), 2115 (__v4df)_mm256_setzero_pd()); 2116 } 2117 2118 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2119 _mm_cvtps_epu32 (__m128 __A) { 2120 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 2121 (__v4si) 2122 _mm_setzero_si128 (), 2123 (__mmask8) -1); 2124 } 2125 2126 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2127 _mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) { 2128 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 2129 (__v4si) __W, 2130 (__mmask8) __U); 2131 } 2132 2133 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2134 _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) { 2135 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 2136 (__v4si) 2137 _mm_setzero_si128 (), 2138 (__mmask8) __U); 2139 } 2140 2141 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2142 _mm256_cvtps_epu32 (__m256 __A) { 2143 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 2144 (__v8si) 2145 _mm256_setzero_si256 (), 2146 (__mmask8) -1); 2147 } 2148 2149 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2150 _mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) { 2151 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 2152 (__v8si) __W, 2153 (__mmask8) __U); 2154 } 2155 2156 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2157 _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) { 2158 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 2159 (__v8si) 2160 _mm256_setzero_si256 (), 2161 (__mmask8) __U); 2162 } 2163 2164 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2165 _mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) { 2166 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A, 2167 (__v4si) __W, 2168 (__mmask8) __U); 2169 } 2170 2171 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2172 _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) { 2173 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A, 2174 (__v4si) 2175 _mm_setzero_si128 (), 2176 (__mmask8) __U); 2177 } 2178 2179 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2180 _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) { 2181 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2182 (__v4si)_mm256_cvttpd_epi32(__A), 2183 (__v4si)__W); 2184 } 2185 2186 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2187 _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) { 2188 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2189 (__v4si)_mm256_cvttpd_epi32(__A), 2190 (__v4si)_mm_setzero_si128()); 2191 } 2192 2193 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2194 _mm_cvttpd_epu32 (__m128d __A) { 2195 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 2196 (__v4si) 2197 _mm_setzero_si128 (), 2198 (__mmask8) -1); 2199 } 2200 2201 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2202 _mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) { 2203 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 2204 (__v4si) __W, 2205 (__mmask8) __U); 2206 } 2207 2208 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2209 _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) { 2210 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 2211 (__v4si) 2212 _mm_setzero_si128 (), 2213 (__mmask8) __U); 2214 } 2215 2216 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2217 _mm256_cvttpd_epu32 (__m256d __A) { 2218 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 2219 (__v4si) 2220 _mm_setzero_si128 (), 2221 (__mmask8) -1); 2222 } 2223 2224 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2225 _mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) { 2226 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 2227 (__v4si) __W, 2228 (__mmask8) __U); 2229 } 2230 2231 static __inline__ __m128i __DEFAULT_FN_ATTRS256 2232 _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) { 2233 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 2234 (__v4si) 2235 _mm_setzero_si128 (), 2236 (__mmask8) __U); 2237 } 2238 2239 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2240 _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) { 2241 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2242 (__v4si)_mm_cvttps_epi32(__A), 2243 (__v4si)__W); 2244 } 2245 2246 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2247 _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) { 2248 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2249 (__v4si)_mm_cvttps_epi32(__A), 2250 (__v4si)_mm_setzero_si128()); 2251 } 2252 2253 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2254 _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) { 2255 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2256 (__v8si)_mm256_cvttps_epi32(__A), 2257 (__v8si)__W); 2258 } 2259 2260 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2261 _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) { 2262 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2263 (__v8si)_mm256_cvttps_epi32(__A), 2264 (__v8si)_mm256_setzero_si256()); 2265 } 2266 2267 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2268 _mm_cvttps_epu32 (__m128 __A) { 2269 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 2270 (__v4si) 2271 _mm_setzero_si128 (), 2272 (__mmask8) -1); 2273 } 2274 2275 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2276 _mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) { 2277 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 2278 (__v4si) __W, 2279 (__mmask8) __U); 2280 } 2281 2282 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2283 _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) { 2284 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 2285 (__v4si) 2286 _mm_setzero_si128 (), 2287 (__mmask8) __U); 2288 } 2289 2290 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2291 _mm256_cvttps_epu32 (__m256 __A) { 2292 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 2293 (__v8si) 2294 _mm256_setzero_si256 (), 2295 (__mmask8) -1); 2296 } 2297 2298 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2299 _mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) { 2300 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 2301 (__v8si) __W, 2302 (__mmask8) __U); 2303 } 2304 2305 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2306 _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) { 2307 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 2308 (__v8si) 2309 _mm256_setzero_si256 (), 2310 (__mmask8) __U); 2311 } 2312 2313 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2314 _mm_cvtepu32_pd (__m128i __A) { 2315 return (__m128d) __builtin_convertvector( 2316 __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df); 2317 } 2318 2319 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2320 _mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) { 2321 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 2322 (__v2df)_mm_cvtepu32_pd(__A), 2323 (__v2df)__W); 2324 } 2325 2326 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2327 _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { 2328 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 2329 (__v2df)_mm_cvtepu32_pd(__A), 2330 (__v2df)_mm_setzero_pd()); 2331 } 2332 2333 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2334 _mm256_cvtepu32_pd (__m128i __A) { 2335 return (__m256d)__builtin_convertvector((__v4su)__A, __v4df); 2336 } 2337 2338 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2339 _mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) { 2340 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 2341 (__v4df)_mm256_cvtepu32_pd(__A), 2342 (__v4df)__W); 2343 } 2344 2345 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2346 _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { 2347 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 2348 (__v4df)_mm256_cvtepu32_pd(__A), 2349 (__v4df)_mm256_setzero_pd()); 2350 } 2351 2352 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2353 _mm_cvtepu32_ps (__m128i __A) { 2354 return (__m128)__builtin_convertvector((__v4su)__A, __v4sf); 2355 } 2356 2357 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2358 _mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) { 2359 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2360 (__v4sf)_mm_cvtepu32_ps(__A), 2361 (__v4sf)__W); 2362 } 2363 2364 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2365 _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) { 2366 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2367 (__v4sf)_mm_cvtepu32_ps(__A), 2368 (__v4sf)_mm_setzero_ps()); 2369 } 2370 2371 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2372 _mm256_cvtepu32_ps (__m256i __A) { 2373 return (__m256)__builtin_convertvector((__v8su)__A, __v8sf); 2374 } 2375 2376 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2377 _mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) { 2378 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2379 (__v8sf)_mm256_cvtepu32_ps(__A), 2380 (__v8sf)__W); 2381 } 2382 2383 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2384 _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) { 2385 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2386 (__v8sf)_mm256_cvtepu32_ps(__A), 2387 (__v8sf)_mm256_setzero_ps()); 2388 } 2389 2390 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2391 _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 2392 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2393 (__v2df)_mm_div_pd(__A, __B), 2394 (__v2df)__W); 2395 } 2396 2397 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2398 _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) { 2399 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2400 (__v2df)_mm_div_pd(__A, __B), 2401 (__v2df)_mm_setzero_pd()); 2402 } 2403 2404 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2405 _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 2406 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2407 (__v4df)_mm256_div_pd(__A, __B), 2408 (__v4df)__W); 2409 } 2410 2411 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2412 _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) { 2413 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2414 (__v4df)_mm256_div_pd(__A, __B), 2415 (__v4df)_mm256_setzero_pd()); 2416 } 2417 2418 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2419 _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 2420 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2421 (__v4sf)_mm_div_ps(__A, __B), 2422 (__v4sf)__W); 2423 } 2424 2425 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2426 _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) { 2427 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2428 (__v4sf)_mm_div_ps(__A, __B), 2429 (__v4sf)_mm_setzero_ps()); 2430 } 2431 2432 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2433 _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 2434 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2435 (__v8sf)_mm256_div_ps(__A, __B), 2436 (__v8sf)__W); 2437 } 2438 2439 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2440 _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) { 2441 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2442 (__v8sf)_mm256_div_ps(__A, __B), 2443 (__v8sf)_mm256_setzero_ps()); 2444 } 2445 2446 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2447 _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) { 2448 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A, 2449 (__v2df) __W, 2450 (__mmask8) __U); 2451 } 2452 2453 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2454 _mm_maskz_expand_pd (__mmask8 __U, __m128d __A) { 2455 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A, 2456 (__v2df) 2457 _mm_setzero_pd (), 2458 (__mmask8) __U); 2459 } 2460 2461 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2462 _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) { 2463 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A, 2464 (__v4df) __W, 2465 (__mmask8) __U); 2466 } 2467 2468 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2469 _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) { 2470 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A, 2471 (__v4df) 2472 _mm256_setzero_pd (), 2473 (__mmask8) __U); 2474 } 2475 2476 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2477 _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { 2478 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A, 2479 (__v2di) __W, 2480 (__mmask8) __U); 2481 } 2482 2483 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2484 _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) { 2485 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A, 2486 (__v2di) 2487 _mm_setzero_si128 (), 2488 (__mmask8) __U); 2489 } 2490 2491 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2492 _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { 2493 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A, 2494 (__v4di) __W, 2495 (__mmask8) __U); 2496 } 2497 2498 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2499 _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) { 2500 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A, 2501 (__v4di) 2502 _mm256_setzero_si256 (), 2503 (__mmask8) __U); 2504 } 2505 2506 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2507 _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) { 2508 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P, 2509 (__v2df) __W, 2510 (__mmask8) 2511 __U); 2512 } 2513 2514 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2515 _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { 2516 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P, 2517 (__v2df) 2518 _mm_setzero_pd (), 2519 (__mmask8) 2520 __U); 2521 } 2522 2523 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2524 _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) { 2525 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P, 2526 (__v4df) __W, 2527 (__mmask8) 2528 __U); 2529 } 2530 2531 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2532 _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { 2533 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P, 2534 (__v4df) 2535 _mm256_setzero_pd (), 2536 (__mmask8) 2537 __U); 2538 } 2539 2540 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2541 _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) { 2542 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P, 2543 (__v2di) __W, 2544 (__mmask8) 2545 __U); 2546 } 2547 2548 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2549 _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { 2550 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P, 2551 (__v2di) 2552 _mm_setzero_si128 (), 2553 (__mmask8) 2554 __U); 2555 } 2556 2557 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2558 _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U, 2559 void const *__P) { 2560 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P, 2561 (__v4di) __W, 2562 (__mmask8) 2563 __U); 2564 } 2565 2566 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2567 _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { 2568 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P, 2569 (__v4di) 2570 _mm256_setzero_si256 (), 2571 (__mmask8) 2572 __U); 2573 } 2574 2575 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2576 _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) { 2577 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P, 2578 (__v4sf) __W, 2579 (__mmask8) __U); 2580 } 2581 2582 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2583 _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { 2584 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P, 2585 (__v4sf) 2586 _mm_setzero_ps (), 2587 (__mmask8) 2588 __U); 2589 } 2590 2591 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2592 _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) { 2593 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P, 2594 (__v8sf) __W, 2595 (__mmask8) __U); 2596 } 2597 2598 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2599 _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { 2600 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P, 2601 (__v8sf) 2602 _mm256_setzero_ps (), 2603 (__mmask8) 2604 __U); 2605 } 2606 2607 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2608 _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) { 2609 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P, 2610 (__v4si) __W, 2611 (__mmask8) 2612 __U); 2613 } 2614 2615 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2616 _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { 2617 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P, 2618 (__v4si) 2619 _mm_setzero_si128 (), 2620 (__mmask8) __U); 2621 } 2622 2623 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2624 _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U, 2625 void const *__P) { 2626 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P, 2627 (__v8si) __W, 2628 (__mmask8) 2629 __U); 2630 } 2631 2632 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2633 _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { 2634 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P, 2635 (__v8si) 2636 _mm256_setzero_si256 (), 2637 (__mmask8) 2638 __U); 2639 } 2640 2641 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2642 _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) { 2643 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A, 2644 (__v4sf) __W, 2645 (__mmask8) __U); 2646 } 2647 2648 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2649 _mm_maskz_expand_ps (__mmask8 __U, __m128 __A) { 2650 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A, 2651 (__v4sf) 2652 _mm_setzero_ps (), 2653 (__mmask8) __U); 2654 } 2655 2656 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2657 _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) { 2658 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A, 2659 (__v8sf) __W, 2660 (__mmask8) __U); 2661 } 2662 2663 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2664 _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) { 2665 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A, 2666 (__v8sf) 2667 _mm256_setzero_ps (), 2668 (__mmask8) __U); 2669 } 2670 2671 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2672 _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { 2673 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A, 2674 (__v4si) __W, 2675 (__mmask8) __U); 2676 } 2677 2678 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2679 _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) { 2680 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A, 2681 (__v4si) 2682 _mm_setzero_si128 (), 2683 (__mmask8) __U); 2684 } 2685 2686 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2687 _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { 2688 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A, 2689 (__v8si) __W, 2690 (__mmask8) __U); 2691 } 2692 2693 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2694 _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) { 2695 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A, 2696 (__v8si) 2697 _mm256_setzero_si256 (), 2698 (__mmask8) __U); 2699 } 2700 2701 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2702 _mm_getexp_pd (__m128d __A) { 2703 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 2704 (__v2df) 2705 _mm_setzero_pd (), 2706 (__mmask8) -1); 2707 } 2708 2709 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2710 _mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) { 2711 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 2712 (__v2df) __W, 2713 (__mmask8) __U); 2714 } 2715 2716 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2717 _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) { 2718 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 2719 (__v2df) 2720 _mm_setzero_pd (), 2721 (__mmask8) __U); 2722 } 2723 2724 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2725 _mm256_getexp_pd (__m256d __A) { 2726 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 2727 (__v4df) 2728 _mm256_setzero_pd (), 2729 (__mmask8) -1); 2730 } 2731 2732 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2733 _mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) { 2734 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 2735 (__v4df) __W, 2736 (__mmask8) __U); 2737 } 2738 2739 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2740 _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) { 2741 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 2742 (__v4df) 2743 _mm256_setzero_pd (), 2744 (__mmask8) __U); 2745 } 2746 2747 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2748 _mm_getexp_ps (__m128 __A) { 2749 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 2750 (__v4sf) 2751 _mm_setzero_ps (), 2752 (__mmask8) -1); 2753 } 2754 2755 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2756 _mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) { 2757 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 2758 (__v4sf) __W, 2759 (__mmask8) __U); 2760 } 2761 2762 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2763 _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) { 2764 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 2765 (__v4sf) 2766 _mm_setzero_ps (), 2767 (__mmask8) __U); 2768 } 2769 2770 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2771 _mm256_getexp_ps (__m256 __A) { 2772 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 2773 (__v8sf) 2774 _mm256_setzero_ps (), 2775 (__mmask8) -1); 2776 } 2777 2778 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2779 _mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) { 2780 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 2781 (__v8sf) __W, 2782 (__mmask8) __U); 2783 } 2784 2785 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2786 _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) { 2787 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 2788 (__v8sf) 2789 _mm256_setzero_ps (), 2790 (__mmask8) __U); 2791 } 2792 2793 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2794 _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 2795 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2796 (__v2df)_mm_max_pd(__A, __B), 2797 (__v2df)__W); 2798 } 2799 2800 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2801 _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) { 2802 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2803 (__v2df)_mm_max_pd(__A, __B), 2804 (__v2df)_mm_setzero_pd()); 2805 } 2806 2807 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2808 _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 2809 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2810 (__v4df)_mm256_max_pd(__A, __B), 2811 (__v4df)__W); 2812 } 2813 2814 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2815 _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) { 2816 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2817 (__v4df)_mm256_max_pd(__A, __B), 2818 (__v4df)_mm256_setzero_pd()); 2819 } 2820 2821 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2822 _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 2823 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2824 (__v4sf)_mm_max_ps(__A, __B), 2825 (__v4sf)__W); 2826 } 2827 2828 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2829 _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) { 2830 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2831 (__v4sf)_mm_max_ps(__A, __B), 2832 (__v4sf)_mm_setzero_ps()); 2833 } 2834 2835 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2836 _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 2837 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2838 (__v8sf)_mm256_max_ps(__A, __B), 2839 (__v8sf)__W); 2840 } 2841 2842 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2843 _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) { 2844 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2845 (__v8sf)_mm256_max_ps(__A, __B), 2846 (__v8sf)_mm256_setzero_ps()); 2847 } 2848 2849 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2850 _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 2851 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2852 (__v2df)_mm_min_pd(__A, __B), 2853 (__v2df)__W); 2854 } 2855 2856 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2857 _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) { 2858 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2859 (__v2df)_mm_min_pd(__A, __B), 2860 (__v2df)_mm_setzero_pd()); 2861 } 2862 2863 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2864 _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 2865 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2866 (__v4df)_mm256_min_pd(__A, __B), 2867 (__v4df)__W); 2868 } 2869 2870 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2871 _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) { 2872 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2873 (__v4df)_mm256_min_pd(__A, __B), 2874 (__v4df)_mm256_setzero_pd()); 2875 } 2876 2877 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2878 _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 2879 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2880 (__v4sf)_mm_min_ps(__A, __B), 2881 (__v4sf)__W); 2882 } 2883 2884 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2885 _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) { 2886 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2887 (__v4sf)_mm_min_ps(__A, __B), 2888 (__v4sf)_mm_setzero_ps()); 2889 } 2890 2891 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2892 _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 2893 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2894 (__v8sf)_mm256_min_ps(__A, __B), 2895 (__v8sf)__W); 2896 } 2897 2898 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2899 _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) { 2900 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2901 (__v8sf)_mm256_min_ps(__A, __B), 2902 (__v8sf)_mm256_setzero_ps()); 2903 } 2904 2905 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2906 _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 2907 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2908 (__v2df)_mm_mul_pd(__A, __B), 2909 (__v2df)__W); 2910 } 2911 2912 static __inline__ __m128d __DEFAULT_FN_ATTRS128 2913 _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) { 2914 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2915 (__v2df)_mm_mul_pd(__A, __B), 2916 (__v2df)_mm_setzero_pd()); 2917 } 2918 2919 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2920 _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 2921 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2922 (__v4df)_mm256_mul_pd(__A, __B), 2923 (__v4df)__W); 2924 } 2925 2926 static __inline__ __m256d __DEFAULT_FN_ATTRS256 2927 _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) { 2928 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2929 (__v4df)_mm256_mul_pd(__A, __B), 2930 (__v4df)_mm256_setzero_pd()); 2931 } 2932 2933 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2934 _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 2935 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2936 (__v4sf)_mm_mul_ps(__A, __B), 2937 (__v4sf)__W); 2938 } 2939 2940 static __inline__ __m128 __DEFAULT_FN_ATTRS128 2941 _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) { 2942 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2943 (__v4sf)_mm_mul_ps(__A, __B), 2944 (__v4sf)_mm_setzero_ps()); 2945 } 2946 2947 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2948 _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 2949 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2950 (__v8sf)_mm256_mul_ps(__A, __B), 2951 (__v8sf)__W); 2952 } 2953 2954 static __inline__ __m256 __DEFAULT_FN_ATTRS256 2955 _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) { 2956 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2957 (__v8sf)_mm256_mul_ps(__A, __B), 2958 (__v8sf)_mm256_setzero_ps()); 2959 } 2960 2961 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2962 _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) { 2963 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2964 (__v4si)_mm_abs_epi32(__A), 2965 (__v4si)__W); 2966 } 2967 2968 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2969 _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) { 2970 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2971 (__v4si)_mm_abs_epi32(__A), 2972 (__v4si)_mm_setzero_si128()); 2973 } 2974 2975 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2976 _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) { 2977 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2978 (__v8si)_mm256_abs_epi32(__A), 2979 (__v8si)__W); 2980 } 2981 2982 static __inline__ __m256i __DEFAULT_FN_ATTRS256 2983 _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) { 2984 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2985 (__v8si)_mm256_abs_epi32(__A), 2986 (__v8si)_mm256_setzero_si256()); 2987 } 2988 2989 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2990 _mm_abs_epi64 (__m128i __A) { 2991 return (__m128i)__builtin_ia32_pabsq128((__v2di)__A); 2992 } 2993 2994 static __inline__ __m128i __DEFAULT_FN_ATTRS128 2995 _mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { 2996 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 2997 (__v2di)_mm_abs_epi64(__A), 2998 (__v2di)__W); 2999 } 3000 3001 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3002 _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) { 3003 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 3004 (__v2di)_mm_abs_epi64(__A), 3005 (__v2di)_mm_setzero_si128()); 3006 } 3007 3008 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3009 _mm256_abs_epi64 (__m256i __A) { 3010 return (__m256i)__builtin_ia32_pabsq256 ((__v4di)__A); 3011 } 3012 3013 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3014 _mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { 3015 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 3016 (__v4di)_mm256_abs_epi64(__A), 3017 (__v4di)__W); 3018 } 3019 3020 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3021 _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) { 3022 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 3023 (__v4di)_mm256_abs_epi64(__A), 3024 (__v4di)_mm256_setzero_si256()); 3025 } 3026 3027 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3028 _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) { 3029 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3030 (__v4si)_mm_max_epi32(__A, __B), 3031 (__v4si)_mm_setzero_si128()); 3032 } 3033 3034 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3035 _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3036 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3037 (__v4si)_mm_max_epi32(__A, __B), 3038 (__v4si)__W); 3039 } 3040 3041 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3042 _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) { 3043 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3044 (__v8si)_mm256_max_epi32(__A, __B), 3045 (__v8si)_mm256_setzero_si256()); 3046 } 3047 3048 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3049 _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3050 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3051 (__v8si)_mm256_max_epi32(__A, __B), 3052 (__v8si)__W); 3053 } 3054 3055 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3056 _mm_max_epi64 (__m128i __A, __m128i __B) { 3057 return (__m128i)__builtin_ia32_pmaxsq128((__v2di)__A, (__v2di)__B); 3058 } 3059 3060 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3061 _mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { 3062 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3063 (__v2di)_mm_max_epi64(__A, __B), 3064 (__v2di)_mm_setzero_si128()); 3065 } 3066 3067 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3068 _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3069 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3070 (__v2di)_mm_max_epi64(__A, __B), 3071 (__v2di)__W); 3072 } 3073 3074 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3075 _mm256_max_epi64 (__m256i __A, __m256i __B) { 3076 return (__m256i)__builtin_ia32_pmaxsq256((__v4di)__A, (__v4di)__B); 3077 } 3078 3079 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3080 _mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) { 3081 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3082 (__v4di)_mm256_max_epi64(__A, __B), 3083 (__v4di)_mm256_setzero_si256()); 3084 } 3085 3086 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3087 _mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3088 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3089 (__v4di)_mm256_max_epi64(__A, __B), 3090 (__v4di)__W); 3091 } 3092 3093 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3094 _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) { 3095 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3096 (__v4si)_mm_max_epu32(__A, __B), 3097 (__v4si)_mm_setzero_si128()); 3098 } 3099 3100 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3101 _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3102 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3103 (__v4si)_mm_max_epu32(__A, __B), 3104 (__v4si)__W); 3105 } 3106 3107 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3108 _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) { 3109 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3110 (__v8si)_mm256_max_epu32(__A, __B), 3111 (__v8si)_mm256_setzero_si256()); 3112 } 3113 3114 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3115 _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3116 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3117 (__v8si)_mm256_max_epu32(__A, __B), 3118 (__v8si)__W); 3119 } 3120 3121 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3122 _mm_max_epu64 (__m128i __A, __m128i __B) { 3123 return (__m128i)__builtin_ia32_pmaxuq128((__v2di)__A, (__v2di)__B); 3124 } 3125 3126 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3127 _mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { 3128 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3129 (__v2di)_mm_max_epu64(__A, __B), 3130 (__v2di)_mm_setzero_si128()); 3131 } 3132 3133 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3134 _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3135 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3136 (__v2di)_mm_max_epu64(__A, __B), 3137 (__v2di)__W); 3138 } 3139 3140 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3141 _mm256_max_epu64 (__m256i __A, __m256i __B) { 3142 return (__m256i)__builtin_ia32_pmaxuq256((__v4di)__A, (__v4di)__B); 3143 } 3144 3145 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3146 _mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { 3147 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3148 (__v4di)_mm256_max_epu64(__A, __B), 3149 (__v4di)_mm256_setzero_si256()); 3150 } 3151 3152 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3153 _mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3154 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3155 (__v4di)_mm256_max_epu64(__A, __B), 3156 (__v4di)__W); 3157 } 3158 3159 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3160 _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) { 3161 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3162 (__v4si)_mm_min_epi32(__A, __B), 3163 (__v4si)_mm_setzero_si128()); 3164 } 3165 3166 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3167 _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3168 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3169 (__v4si)_mm_min_epi32(__A, __B), 3170 (__v4si)__W); 3171 } 3172 3173 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3174 _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) { 3175 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3176 (__v8si)_mm256_min_epi32(__A, __B), 3177 (__v8si)_mm256_setzero_si256()); 3178 } 3179 3180 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3181 _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3182 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3183 (__v8si)_mm256_min_epi32(__A, __B), 3184 (__v8si)__W); 3185 } 3186 3187 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3188 _mm_min_epi64 (__m128i __A, __m128i __B) { 3189 return (__m128i)__builtin_ia32_pminsq128((__v2di)__A, (__v2di)__B); 3190 } 3191 3192 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3193 _mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3194 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3195 (__v2di)_mm_min_epi64(__A, __B), 3196 (__v2di)__W); 3197 } 3198 3199 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3200 _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { 3201 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3202 (__v2di)_mm_min_epi64(__A, __B), 3203 (__v2di)_mm_setzero_si128()); 3204 } 3205 3206 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3207 _mm256_min_epi64 (__m256i __A, __m256i __B) { 3208 return (__m256i)__builtin_ia32_pminsq256((__v4di)__A, (__v4di)__B); 3209 } 3210 3211 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3212 _mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3213 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3214 (__v4di)_mm256_min_epi64(__A, __B), 3215 (__v4di)__W); 3216 } 3217 3218 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3219 _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) { 3220 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3221 (__v4di)_mm256_min_epi64(__A, __B), 3222 (__v4di)_mm256_setzero_si256()); 3223 } 3224 3225 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3226 _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) { 3227 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3228 (__v4si)_mm_min_epu32(__A, __B), 3229 (__v4si)_mm_setzero_si128()); 3230 } 3231 3232 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3233 _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3234 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3235 (__v4si)_mm_min_epu32(__A, __B), 3236 (__v4si)__W); 3237 } 3238 3239 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3240 _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) { 3241 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3242 (__v8si)_mm256_min_epu32(__A, __B), 3243 (__v8si)_mm256_setzero_si256()); 3244 } 3245 3246 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3247 _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3248 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3249 (__v8si)_mm256_min_epu32(__A, __B), 3250 (__v8si)__W); 3251 } 3252 3253 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3254 _mm_min_epu64 (__m128i __A, __m128i __B) { 3255 return (__m128i)__builtin_ia32_pminuq128((__v2di)__A, (__v2di)__B); 3256 } 3257 3258 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3259 _mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3260 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3261 (__v2di)_mm_min_epu64(__A, __B), 3262 (__v2di)__W); 3263 } 3264 3265 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3266 _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { 3267 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3268 (__v2di)_mm_min_epu64(__A, __B), 3269 (__v2di)_mm_setzero_si128()); 3270 } 3271 3272 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3273 _mm256_min_epu64 (__m256i __A, __m256i __B) { 3274 return (__m256i)__builtin_ia32_pminuq256((__v4di)__A, (__v4di)__B); 3275 } 3276 3277 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3278 _mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3279 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3280 (__v4di)_mm256_min_epu64(__A, __B), 3281 (__v4di)__W); 3282 } 3283 3284 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3285 _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { 3286 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3287 (__v4di)_mm256_min_epu64(__A, __B), 3288 (__v4di)_mm256_setzero_si256()); 3289 } 3290 3291 #define _mm_roundscale_pd(A, imm) \ 3292 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 3293 (int)(imm), \ 3294 (__v2df)_mm_setzero_pd(), \ 3295 (__mmask8)-1) 3296 3297 3298 #define _mm_mask_roundscale_pd(W, U, A, imm) \ 3299 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 3300 (int)(imm), \ 3301 (__v2df)(__m128d)(W), \ 3302 (__mmask8)(U)) 3303 3304 3305 #define _mm_maskz_roundscale_pd(U, A, imm) \ 3306 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 3307 (int)(imm), \ 3308 (__v2df)_mm_setzero_pd(), \ 3309 (__mmask8)(U)) 3310 3311 3312 #define _mm256_roundscale_pd(A, imm) \ 3313 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 3314 (int)(imm), \ 3315 (__v4df)_mm256_setzero_pd(), \ 3316 (__mmask8)-1) 3317 3318 3319 #define _mm256_mask_roundscale_pd(W, U, A, imm) \ 3320 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 3321 (int)(imm), \ 3322 (__v4df)(__m256d)(W), \ 3323 (__mmask8)(U)) 3324 3325 3326 #define _mm256_maskz_roundscale_pd(U, A, imm) \ 3327 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 3328 (int)(imm), \ 3329 (__v4df)_mm256_setzero_pd(), \ 3330 (__mmask8)(U)) 3331 3332 #define _mm_roundscale_ps(A, imm) \ 3333 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 3334 (__v4sf)_mm_setzero_ps(), \ 3335 (__mmask8)-1) 3336 3337 3338 #define _mm_mask_roundscale_ps(W, U, A, imm) \ 3339 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 3340 (__v4sf)(__m128)(W), \ 3341 (__mmask8)(U)) 3342 3343 3344 #define _mm_maskz_roundscale_ps(U, A, imm) \ 3345 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 3346 (__v4sf)_mm_setzero_ps(), \ 3347 (__mmask8)(U)) 3348 3349 #define _mm256_roundscale_ps(A, imm) \ 3350 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 3351 (__v8sf)_mm256_setzero_ps(), \ 3352 (__mmask8)-1) 3353 3354 #define _mm256_mask_roundscale_ps(W, U, A, imm) \ 3355 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 3356 (__v8sf)(__m256)(W), \ 3357 (__mmask8)(U)) 3358 3359 3360 #define _mm256_maskz_roundscale_ps(U, A, imm) \ 3361 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 3362 (__v8sf)_mm256_setzero_ps(), \ 3363 (__mmask8)(U)) 3364 3365 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3366 _mm_scalef_pd (__m128d __A, __m128d __B) { 3367 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 3368 (__v2df) __B, 3369 (__v2df) 3370 _mm_setzero_pd (), 3371 (__mmask8) -1); 3372 } 3373 3374 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3375 _mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A, 3376 __m128d __B) { 3377 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 3378 (__v2df) __B, 3379 (__v2df) __W, 3380 (__mmask8) __U); 3381 } 3382 3383 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3384 _mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) { 3385 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 3386 (__v2df) __B, 3387 (__v2df) 3388 _mm_setzero_pd (), 3389 (__mmask8) __U); 3390 } 3391 3392 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3393 _mm256_scalef_pd (__m256d __A, __m256d __B) { 3394 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 3395 (__v4df) __B, 3396 (__v4df) 3397 _mm256_setzero_pd (), 3398 (__mmask8) -1); 3399 } 3400 3401 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3402 _mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A, 3403 __m256d __B) { 3404 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 3405 (__v4df) __B, 3406 (__v4df) __W, 3407 (__mmask8) __U); 3408 } 3409 3410 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3411 _mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) { 3412 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 3413 (__v4df) __B, 3414 (__v4df) 3415 _mm256_setzero_pd (), 3416 (__mmask8) __U); 3417 } 3418 3419 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3420 _mm_scalef_ps (__m128 __A, __m128 __B) { 3421 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 3422 (__v4sf) __B, 3423 (__v4sf) 3424 _mm_setzero_ps (), 3425 (__mmask8) -1); 3426 } 3427 3428 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3429 _mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 3430 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 3431 (__v4sf) __B, 3432 (__v4sf) __W, 3433 (__mmask8) __U); 3434 } 3435 3436 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3437 _mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) { 3438 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 3439 (__v4sf) __B, 3440 (__v4sf) 3441 _mm_setzero_ps (), 3442 (__mmask8) __U); 3443 } 3444 3445 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3446 _mm256_scalef_ps (__m256 __A, __m256 __B) { 3447 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 3448 (__v8sf) __B, 3449 (__v8sf) 3450 _mm256_setzero_ps (), 3451 (__mmask8) -1); 3452 } 3453 3454 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3455 _mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A, 3456 __m256 __B) { 3457 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 3458 (__v8sf) __B, 3459 (__v8sf) __W, 3460 (__mmask8) __U); 3461 } 3462 3463 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3464 _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { 3465 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 3466 (__v8sf) __B, 3467 (__v8sf) 3468 _mm256_setzero_ps (), 3469 (__mmask8) __U); 3470 } 3471 3472 #define _mm_i64scatter_pd(addr, index, v1, scale) \ 3473 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)-1, \ 3474 (__v2di)(__m128i)(index), \ 3475 (__v2df)(__m128d)(v1), (int)(scale)) 3476 3477 #define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \ 3478 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)(mask), \ 3479 (__v2di)(__m128i)(index), \ 3480 (__v2df)(__m128d)(v1), (int)(scale)) 3481 3482 #define _mm_i64scatter_epi64(addr, index, v1, scale) \ 3483 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)-1, \ 3484 (__v2di)(__m128i)(index), \ 3485 (__v2di)(__m128i)(v1), (int)(scale)) 3486 3487 #define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \ 3488 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)(mask), \ 3489 (__v2di)(__m128i)(index), \ 3490 (__v2di)(__m128i)(v1), (int)(scale)) 3491 3492 #define _mm256_i64scatter_pd(addr, index, v1, scale) \ 3493 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)-1, \ 3494 (__v4di)(__m256i)(index), \ 3495 (__v4df)(__m256d)(v1), (int)(scale)) 3496 3497 #define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \ 3498 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)(mask), \ 3499 (__v4di)(__m256i)(index), \ 3500 (__v4df)(__m256d)(v1), (int)(scale)) 3501 3502 #define _mm256_i64scatter_epi64(addr, index, v1, scale) \ 3503 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)-1, \ 3504 (__v4di)(__m256i)(index), \ 3505 (__v4di)(__m256i)(v1), (int)(scale)) 3506 3507 #define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \ 3508 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)(mask), \ 3509 (__v4di)(__m256i)(index), \ 3510 (__v4di)(__m256i)(v1), (int)(scale)) 3511 3512 #define _mm_i64scatter_ps(addr, index, v1, scale) \ 3513 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)-1, \ 3514 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3515 (int)(scale)) 3516 3517 #define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \ 3518 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)(mask), \ 3519 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3520 (int)(scale)) 3521 3522 #define _mm_i64scatter_epi32(addr, index, v1, scale) \ 3523 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)-1, \ 3524 (__v2di)(__m128i)(index), \ 3525 (__v4si)(__m128i)(v1), (int)(scale)) 3526 3527 #define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \ 3528 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)(mask), \ 3529 (__v2di)(__m128i)(index), \ 3530 (__v4si)(__m128i)(v1), (int)(scale)) 3531 3532 #define _mm256_i64scatter_ps(addr, index, v1, scale) \ 3533 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)-1, \ 3534 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \ 3535 (int)(scale)) 3536 3537 #define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \ 3538 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)(mask), \ 3539 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \ 3540 (int)(scale)) 3541 3542 #define _mm256_i64scatter_epi32(addr, index, v1, scale) \ 3543 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)-1, \ 3544 (__v4di)(__m256i)(index), \ 3545 (__v4si)(__m128i)(v1), (int)(scale)) 3546 3547 #define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \ 3548 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)(mask), \ 3549 (__v4di)(__m256i)(index), \ 3550 (__v4si)(__m128i)(v1), (int)(scale)) 3551 3552 #define _mm_i32scatter_pd(addr, index, v1, scale) \ 3553 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)-1, \ 3554 (__v4si)(__m128i)(index), \ 3555 (__v2df)(__m128d)(v1), (int)(scale)) 3556 3557 #define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \ 3558 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)(mask), \ 3559 (__v4si)(__m128i)(index), \ 3560 (__v2df)(__m128d)(v1), (int)(scale)) 3561 3562 #define _mm_i32scatter_epi64(addr, index, v1, scale) \ 3563 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)-1, \ 3564 (__v4si)(__m128i)(index), \ 3565 (__v2di)(__m128i)(v1), (int)(scale)) 3566 3567 #define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \ 3568 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)(mask), \ 3569 (__v4si)(__m128i)(index), \ 3570 (__v2di)(__m128i)(v1), (int)(scale)) 3571 3572 #define _mm256_i32scatter_pd(addr, index, v1, scale) \ 3573 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)-1, \ 3574 (__v4si)(__m128i)(index), \ 3575 (__v4df)(__m256d)(v1), (int)(scale)) 3576 3577 #define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \ 3578 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)(mask), \ 3579 (__v4si)(__m128i)(index), \ 3580 (__v4df)(__m256d)(v1), (int)(scale)) 3581 3582 #define _mm256_i32scatter_epi64(addr, index, v1, scale) \ 3583 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)-1, \ 3584 (__v4si)(__m128i)(index), \ 3585 (__v4di)(__m256i)(v1), (int)(scale)) 3586 3587 #define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \ 3588 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)(mask), \ 3589 (__v4si)(__m128i)(index), \ 3590 (__v4di)(__m256i)(v1), (int)(scale)) 3591 3592 #define _mm_i32scatter_ps(addr, index, v1, scale) \ 3593 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)-1, \ 3594 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3595 (int)(scale)) 3596 3597 #define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \ 3598 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)(mask), \ 3599 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3600 (int)(scale)) 3601 3602 #define _mm_i32scatter_epi32(addr, index, v1, scale) \ 3603 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)-1, \ 3604 (__v4si)(__m128i)(index), \ 3605 (__v4si)(__m128i)(v1), (int)(scale)) 3606 3607 #define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \ 3608 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)(mask), \ 3609 (__v4si)(__m128i)(index), \ 3610 (__v4si)(__m128i)(v1), (int)(scale)) 3611 3612 #define _mm256_i32scatter_ps(addr, index, v1, scale) \ 3613 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)-1, \ 3614 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ 3615 (int)(scale)) 3616 3617 #define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \ 3618 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)(mask), \ 3619 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ 3620 (int)(scale)) 3621 3622 #define _mm256_i32scatter_epi32(addr, index, v1, scale) \ 3623 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)-1, \ 3624 (__v8si)(__m256i)(index), \ 3625 (__v8si)(__m256i)(v1), (int)(scale)) 3626 3627 #define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \ 3628 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)(mask), \ 3629 (__v8si)(__m256i)(index), \ 3630 (__v8si)(__m256i)(v1), (int)(scale)) 3631 3632 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3633 _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) { 3634 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3635 (__v2df)_mm_sqrt_pd(__A), 3636 (__v2df)__W); 3637 } 3638 3639 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3640 _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) { 3641 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3642 (__v2df)_mm_sqrt_pd(__A), 3643 (__v2df)_mm_setzero_pd()); 3644 } 3645 3646 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3647 _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) { 3648 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3649 (__v4df)_mm256_sqrt_pd(__A), 3650 (__v4df)__W); 3651 } 3652 3653 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3654 _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) { 3655 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3656 (__v4df)_mm256_sqrt_pd(__A), 3657 (__v4df)_mm256_setzero_pd()); 3658 } 3659 3660 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3661 _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) { 3662 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3663 (__v4sf)_mm_sqrt_ps(__A), 3664 (__v4sf)__W); 3665 } 3666 3667 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3668 _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) { 3669 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3670 (__v4sf)_mm_sqrt_ps(__A), 3671 (__v4sf)_mm_setzero_ps()); 3672 } 3673 3674 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3675 _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) { 3676 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3677 (__v8sf)_mm256_sqrt_ps(__A), 3678 (__v8sf)__W); 3679 } 3680 3681 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3682 _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) { 3683 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3684 (__v8sf)_mm256_sqrt_ps(__A), 3685 (__v8sf)_mm256_setzero_ps()); 3686 } 3687 3688 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3689 _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 3690 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3691 (__v2df)_mm_sub_pd(__A, __B), 3692 (__v2df)__W); 3693 } 3694 3695 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3696 _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) { 3697 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3698 (__v2df)_mm_sub_pd(__A, __B), 3699 (__v2df)_mm_setzero_pd()); 3700 } 3701 3702 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3703 _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 3704 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3705 (__v4df)_mm256_sub_pd(__A, __B), 3706 (__v4df)__W); 3707 } 3708 3709 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3710 _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) { 3711 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3712 (__v4df)_mm256_sub_pd(__A, __B), 3713 (__v4df)_mm256_setzero_pd()); 3714 } 3715 3716 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3717 _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 3718 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3719 (__v4sf)_mm_sub_ps(__A, __B), 3720 (__v4sf)__W); 3721 } 3722 3723 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3724 _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) { 3725 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3726 (__v4sf)_mm_sub_ps(__A, __B), 3727 (__v4sf)_mm_setzero_ps()); 3728 } 3729 3730 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3731 _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 3732 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3733 (__v8sf)_mm256_sub_ps(__A, __B), 3734 (__v8sf)__W); 3735 } 3736 3737 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3738 _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) { 3739 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3740 (__v8sf)_mm256_sub_ps(__A, __B), 3741 (__v8sf)_mm256_setzero_ps()); 3742 } 3743 3744 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3745 _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) { 3746 return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I, 3747 (__v4si)__B); 3748 } 3749 3750 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3751 _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I, 3752 __m128i __B) { 3753 return (__m128i)__builtin_ia32_selectd_128(__U, 3754 (__v4si)_mm_permutex2var_epi32(__A, __I, __B), 3755 (__v4si)__A); 3756 } 3757 3758 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3759 _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U, 3760 __m128i __B) { 3761 return (__m128i)__builtin_ia32_selectd_128(__U, 3762 (__v4si)_mm_permutex2var_epi32(__A, __I, __B), 3763 (__v4si)__I); 3764 } 3765 3766 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3767 _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I, 3768 __m128i __B) { 3769 return (__m128i)__builtin_ia32_selectd_128(__U, 3770 (__v4si)_mm_permutex2var_epi32(__A, __I, __B), 3771 (__v4si)_mm_setzero_si128()); 3772 } 3773 3774 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3775 _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) { 3776 return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I, 3777 (__v8si) __B); 3778 } 3779 3780 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3781 _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I, 3782 __m256i __B) { 3783 return (__m256i)__builtin_ia32_selectd_256(__U, 3784 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B), 3785 (__v8si)__A); 3786 } 3787 3788 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3789 _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U, 3790 __m256i __B) { 3791 return (__m256i)__builtin_ia32_selectd_256(__U, 3792 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B), 3793 (__v8si)__I); 3794 } 3795 3796 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3797 _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I, 3798 __m256i __B) { 3799 return (__m256i)__builtin_ia32_selectd_256(__U, 3800 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B), 3801 (__v8si)_mm256_setzero_si256()); 3802 } 3803 3804 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3805 _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) { 3806 return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I, 3807 (__v2df)__B); 3808 } 3809 3810 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3811 _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) { 3812 return (__m128d)__builtin_ia32_selectpd_128(__U, 3813 (__v2df)_mm_permutex2var_pd(__A, __I, __B), 3814 (__v2df)__A); 3815 } 3816 3817 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3818 _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) { 3819 return (__m128d)__builtin_ia32_selectpd_128(__U, 3820 (__v2df)_mm_permutex2var_pd(__A, __I, __B), 3821 (__v2df)(__m128d)__I); 3822 } 3823 3824 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3825 _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) { 3826 return (__m128d)__builtin_ia32_selectpd_128(__U, 3827 (__v2df)_mm_permutex2var_pd(__A, __I, __B), 3828 (__v2df)_mm_setzero_pd()); 3829 } 3830 3831 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3832 _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) { 3833 return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I, 3834 (__v4df)__B); 3835 } 3836 3837 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3838 _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I, 3839 __m256d __B) { 3840 return (__m256d)__builtin_ia32_selectpd_256(__U, 3841 (__v4df)_mm256_permutex2var_pd(__A, __I, __B), 3842 (__v4df)__A); 3843 } 3844 3845 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3846 _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U, 3847 __m256d __B) { 3848 return (__m256d)__builtin_ia32_selectpd_256(__U, 3849 (__v4df)_mm256_permutex2var_pd(__A, __I, __B), 3850 (__v4df)(__m256d)__I); 3851 } 3852 3853 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3854 _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I, 3855 __m256d __B) { 3856 return (__m256d)__builtin_ia32_selectpd_256(__U, 3857 (__v4df)_mm256_permutex2var_pd(__A, __I, __B), 3858 (__v4df)_mm256_setzero_pd()); 3859 } 3860 3861 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3862 _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) { 3863 return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I, 3864 (__v4sf)__B); 3865 } 3866 3867 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3868 _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) { 3869 return (__m128)__builtin_ia32_selectps_128(__U, 3870 (__v4sf)_mm_permutex2var_ps(__A, __I, __B), 3871 (__v4sf)__A); 3872 } 3873 3874 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3875 _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) { 3876 return (__m128)__builtin_ia32_selectps_128(__U, 3877 (__v4sf)_mm_permutex2var_ps(__A, __I, __B), 3878 (__v4sf)(__m128)__I); 3879 } 3880 3881 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3882 _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) { 3883 return (__m128)__builtin_ia32_selectps_128(__U, 3884 (__v4sf)_mm_permutex2var_ps(__A, __I, __B), 3885 (__v4sf)_mm_setzero_ps()); 3886 } 3887 3888 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3889 _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) { 3890 return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I, 3891 (__v8sf) __B); 3892 } 3893 3894 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3895 _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) { 3896 return (__m256)__builtin_ia32_selectps_256(__U, 3897 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), 3898 (__v8sf)__A); 3899 } 3900 3901 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3902 _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U, 3903 __m256 __B) { 3904 return (__m256)__builtin_ia32_selectps_256(__U, 3905 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), 3906 (__v8sf)(__m256)__I); 3907 } 3908 3909 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3910 _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I, 3911 __m256 __B) { 3912 return (__m256)__builtin_ia32_selectps_256(__U, 3913 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), 3914 (__v8sf)_mm256_setzero_ps()); 3915 } 3916 3917 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3918 _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) { 3919 return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I, 3920 (__v2di)__B); 3921 } 3922 3923 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3924 _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I, 3925 __m128i __B) { 3926 return (__m128i)__builtin_ia32_selectq_128(__U, 3927 (__v2di)_mm_permutex2var_epi64(__A, __I, __B), 3928 (__v2di)__A); 3929 } 3930 3931 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3932 _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U, 3933 __m128i __B) { 3934 return (__m128i)__builtin_ia32_selectq_128(__U, 3935 (__v2di)_mm_permutex2var_epi64(__A, __I, __B), 3936 (__v2di)__I); 3937 } 3938 3939 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3940 _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I, 3941 __m128i __B) { 3942 return (__m128i)__builtin_ia32_selectq_128(__U, 3943 (__v2di)_mm_permutex2var_epi64(__A, __I, __B), 3944 (__v2di)_mm_setzero_si128()); 3945 } 3946 3947 3948 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3949 _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) { 3950 return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I, 3951 (__v4di) __B); 3952 } 3953 3954 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3955 _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I, 3956 __m256i __B) { 3957 return (__m256i)__builtin_ia32_selectq_256(__U, 3958 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B), 3959 (__v4di)__A); 3960 } 3961 3962 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3963 _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U, 3964 __m256i __B) { 3965 return (__m256i)__builtin_ia32_selectq_256(__U, 3966 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B), 3967 (__v4di)__I); 3968 } 3969 3970 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3971 _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I, 3972 __m256i __B) { 3973 return (__m256i)__builtin_ia32_selectq_256(__U, 3974 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B), 3975 (__v4di)_mm256_setzero_si256()); 3976 } 3977 3978 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3979 _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A) 3980 { 3981 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 3982 (__v4si)_mm_cvtepi8_epi32(__A), 3983 (__v4si)__W); 3984 } 3985 3986 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3987 _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A) 3988 { 3989 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 3990 (__v4si)_mm_cvtepi8_epi32(__A), 3991 (__v4si)_mm_setzero_si128()); 3992 } 3993 3994 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3995 _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A) 3996 { 3997 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 3998 (__v8si)_mm256_cvtepi8_epi32(__A), 3999 (__v8si)__W); 4000 } 4001 4002 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4003 _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A) 4004 { 4005 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4006 (__v8si)_mm256_cvtepi8_epi32(__A), 4007 (__v8si)_mm256_setzero_si256()); 4008 } 4009 4010 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4011 _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4012 { 4013 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4014 (__v2di)_mm_cvtepi8_epi64(__A), 4015 (__v2di)__W); 4016 } 4017 4018 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4019 _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) 4020 { 4021 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4022 (__v2di)_mm_cvtepi8_epi64(__A), 4023 (__v2di)_mm_setzero_si128()); 4024 } 4025 4026 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4027 _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4028 { 4029 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4030 (__v4di)_mm256_cvtepi8_epi64(__A), 4031 (__v4di)__W); 4032 } 4033 4034 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4035 _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) 4036 { 4037 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4038 (__v4di)_mm256_cvtepi8_epi64(__A), 4039 (__v4di)_mm256_setzero_si256()); 4040 } 4041 4042 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4043 _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X) 4044 { 4045 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4046 (__v2di)_mm_cvtepi32_epi64(__X), 4047 (__v2di)__W); 4048 } 4049 4050 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4051 _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) 4052 { 4053 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4054 (__v2di)_mm_cvtepi32_epi64(__X), 4055 (__v2di)_mm_setzero_si128()); 4056 } 4057 4058 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4059 _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X) 4060 { 4061 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4062 (__v4di)_mm256_cvtepi32_epi64(__X), 4063 (__v4di)__W); 4064 } 4065 4066 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4067 _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) 4068 { 4069 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4070 (__v4di)_mm256_cvtepi32_epi64(__X), 4071 (__v4di)_mm256_setzero_si256()); 4072 } 4073 4074 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4075 _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A) 4076 { 4077 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4078 (__v4si)_mm_cvtepi16_epi32(__A), 4079 (__v4si)__W); 4080 } 4081 4082 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4083 _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A) 4084 { 4085 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4086 (__v4si)_mm_cvtepi16_epi32(__A), 4087 (__v4si)_mm_setzero_si128()); 4088 } 4089 4090 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4091 _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A) 4092 { 4093 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4094 (__v8si)_mm256_cvtepi16_epi32(__A), 4095 (__v8si)__W); 4096 } 4097 4098 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4099 _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A) 4100 { 4101 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4102 (__v8si)_mm256_cvtepi16_epi32(__A), 4103 (__v8si)_mm256_setzero_si256()); 4104 } 4105 4106 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4107 _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4108 { 4109 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4110 (__v2di)_mm_cvtepi16_epi64(__A), 4111 (__v2di)__W); 4112 } 4113 4114 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4115 _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) 4116 { 4117 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4118 (__v2di)_mm_cvtepi16_epi64(__A), 4119 (__v2di)_mm_setzero_si128()); 4120 } 4121 4122 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4123 _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4124 { 4125 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4126 (__v4di)_mm256_cvtepi16_epi64(__A), 4127 (__v4di)__W); 4128 } 4129 4130 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4131 _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) 4132 { 4133 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4134 (__v4di)_mm256_cvtepi16_epi64(__A), 4135 (__v4di)_mm256_setzero_si256()); 4136 } 4137 4138 4139 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4140 _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A) 4141 { 4142 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4143 (__v4si)_mm_cvtepu8_epi32(__A), 4144 (__v4si)__W); 4145 } 4146 4147 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4148 _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) 4149 { 4150 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4151 (__v4si)_mm_cvtepu8_epi32(__A), 4152 (__v4si)_mm_setzero_si128()); 4153 } 4154 4155 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4156 _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A) 4157 { 4158 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4159 (__v8si)_mm256_cvtepu8_epi32(__A), 4160 (__v8si)__W); 4161 } 4162 4163 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4164 _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) 4165 { 4166 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4167 (__v8si)_mm256_cvtepu8_epi32(__A), 4168 (__v8si)_mm256_setzero_si256()); 4169 } 4170 4171 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4172 _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4173 { 4174 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4175 (__v2di)_mm_cvtepu8_epi64(__A), 4176 (__v2di)__W); 4177 } 4178 4179 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4180 _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) 4181 { 4182 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4183 (__v2di)_mm_cvtepu8_epi64(__A), 4184 (__v2di)_mm_setzero_si128()); 4185 } 4186 4187 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4188 _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4189 { 4190 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4191 (__v4di)_mm256_cvtepu8_epi64(__A), 4192 (__v4di)__W); 4193 } 4194 4195 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4196 _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A) 4197 { 4198 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4199 (__v4di)_mm256_cvtepu8_epi64(__A), 4200 (__v4di)_mm256_setzero_si256()); 4201 } 4202 4203 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4204 _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X) 4205 { 4206 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4207 (__v2di)_mm_cvtepu32_epi64(__X), 4208 (__v2di)__W); 4209 } 4210 4211 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4212 _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) 4213 { 4214 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4215 (__v2di)_mm_cvtepu32_epi64(__X), 4216 (__v2di)_mm_setzero_si128()); 4217 } 4218 4219 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4220 _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X) 4221 { 4222 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4223 (__v4di)_mm256_cvtepu32_epi64(__X), 4224 (__v4di)__W); 4225 } 4226 4227 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4228 _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) 4229 { 4230 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4231 (__v4di)_mm256_cvtepu32_epi64(__X), 4232 (__v4di)_mm256_setzero_si256()); 4233 } 4234 4235 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4236 _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A) 4237 { 4238 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4239 (__v4si)_mm_cvtepu16_epi32(__A), 4240 (__v4si)__W); 4241 } 4242 4243 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4244 _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) 4245 { 4246 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4247 (__v4si)_mm_cvtepu16_epi32(__A), 4248 (__v4si)_mm_setzero_si128()); 4249 } 4250 4251 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4252 _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A) 4253 { 4254 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4255 (__v8si)_mm256_cvtepu16_epi32(__A), 4256 (__v8si)__W); 4257 } 4258 4259 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4260 _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) 4261 { 4262 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4263 (__v8si)_mm256_cvtepu16_epi32(__A), 4264 (__v8si)_mm256_setzero_si256()); 4265 } 4266 4267 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4268 _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4269 { 4270 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4271 (__v2di)_mm_cvtepu16_epi64(__A), 4272 (__v2di)__W); 4273 } 4274 4275 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4276 _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) 4277 { 4278 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4279 (__v2di)_mm_cvtepu16_epi64(__A), 4280 (__v2di)_mm_setzero_si128()); 4281 } 4282 4283 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4284 _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4285 { 4286 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4287 (__v4di)_mm256_cvtepu16_epi64(__A), 4288 (__v4di)__W); 4289 } 4290 4291 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4292 _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) 4293 { 4294 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4295 (__v4di)_mm256_cvtepu16_epi64(__A), 4296 (__v4di)_mm256_setzero_si256()); 4297 } 4298 4299 4300 #define _mm_rol_epi32(a, b) \ 4301 (__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b)) 4302 4303 #define _mm_mask_rol_epi32(w, u, a, b) \ 4304 (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ 4305 (__v4si)_mm_rol_epi32((a), (b)), \ 4306 (__v4si)(__m128i)(w)) 4307 4308 #define _mm_maskz_rol_epi32(u, a, b) \ 4309 (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ 4310 (__v4si)_mm_rol_epi32((a), (b)), \ 4311 (__v4si)_mm_setzero_si128()) 4312 4313 #define _mm256_rol_epi32(a, b) \ 4314 (__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b)) 4315 4316 #define _mm256_mask_rol_epi32(w, u, a, b) \ 4317 (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ 4318 (__v8si)_mm256_rol_epi32((a), (b)), \ 4319 (__v8si)(__m256i)(w)) 4320 4321 #define _mm256_maskz_rol_epi32(u, a, b) \ 4322 (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ 4323 (__v8si)_mm256_rol_epi32((a), (b)), \ 4324 (__v8si)_mm256_setzero_si256()) 4325 4326 #define _mm_rol_epi64(a, b) \ 4327 (__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b)) 4328 4329 #define _mm_mask_rol_epi64(w, u, a, b) \ 4330 (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ 4331 (__v2di)_mm_rol_epi64((a), (b)), \ 4332 (__v2di)(__m128i)(w)) 4333 4334 #define _mm_maskz_rol_epi64(u, a, b) \ 4335 (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ 4336 (__v2di)_mm_rol_epi64((a), (b)), \ 4337 (__v2di)_mm_setzero_si128()) 4338 4339 #define _mm256_rol_epi64(a, b) \ 4340 (__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b)) 4341 4342 #define _mm256_mask_rol_epi64(w, u, a, b) \ 4343 (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ 4344 (__v4di)_mm256_rol_epi64((a), (b)), \ 4345 (__v4di)(__m256i)(w)) 4346 4347 #define _mm256_maskz_rol_epi64(u, a, b) \ 4348 (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ 4349 (__v4di)_mm256_rol_epi64((a), (b)), \ 4350 (__v4di)_mm256_setzero_si256()) 4351 4352 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4353 _mm_rolv_epi32 (__m128i __A, __m128i __B) 4354 { 4355 return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B); 4356 } 4357 4358 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4359 _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4360 { 4361 return (__m128i)__builtin_ia32_selectd_128(__U, 4362 (__v4si)_mm_rolv_epi32(__A, __B), 4363 (__v4si)__W); 4364 } 4365 4366 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4367 _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 4368 { 4369 return (__m128i)__builtin_ia32_selectd_128(__U, 4370 (__v4si)_mm_rolv_epi32(__A, __B), 4371 (__v4si)_mm_setzero_si128()); 4372 } 4373 4374 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4375 _mm256_rolv_epi32 (__m256i __A, __m256i __B) 4376 { 4377 return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B); 4378 } 4379 4380 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4381 _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 4382 { 4383 return (__m256i)__builtin_ia32_selectd_256(__U, 4384 (__v8si)_mm256_rolv_epi32(__A, __B), 4385 (__v8si)__W); 4386 } 4387 4388 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4389 _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 4390 { 4391 return (__m256i)__builtin_ia32_selectd_256(__U, 4392 (__v8si)_mm256_rolv_epi32(__A, __B), 4393 (__v8si)_mm256_setzero_si256()); 4394 } 4395 4396 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4397 _mm_rolv_epi64 (__m128i __A, __m128i __B) 4398 { 4399 return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B); 4400 } 4401 4402 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4403 _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4404 { 4405 return (__m128i)__builtin_ia32_selectq_128(__U, 4406 (__v2di)_mm_rolv_epi64(__A, __B), 4407 (__v2di)__W); 4408 } 4409 4410 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4411 _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 4412 { 4413 return (__m128i)__builtin_ia32_selectq_128(__U, 4414 (__v2di)_mm_rolv_epi64(__A, __B), 4415 (__v2di)_mm_setzero_si128()); 4416 } 4417 4418 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4419 _mm256_rolv_epi64 (__m256i __A, __m256i __B) 4420 { 4421 return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B); 4422 } 4423 4424 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4425 _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 4426 { 4427 return (__m256i)__builtin_ia32_selectq_256(__U, 4428 (__v4di)_mm256_rolv_epi64(__A, __B), 4429 (__v4di)__W); 4430 } 4431 4432 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4433 _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 4434 { 4435 return (__m256i)__builtin_ia32_selectq_256(__U, 4436 (__v4di)_mm256_rolv_epi64(__A, __B), 4437 (__v4di)_mm256_setzero_si256()); 4438 } 4439 4440 #define _mm_ror_epi32(a, b) \ 4441 (__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b)) 4442 4443 #define _mm_mask_ror_epi32(w, u, a, b) \ 4444 (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ 4445 (__v4si)_mm_ror_epi32((a), (b)), \ 4446 (__v4si)(__m128i)(w)) 4447 4448 #define _mm_maskz_ror_epi32(u, a, b) \ 4449 (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ 4450 (__v4si)_mm_ror_epi32((a), (b)), \ 4451 (__v4si)_mm_setzero_si128()) 4452 4453 #define _mm256_ror_epi32(a, b) \ 4454 (__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b)) 4455 4456 #define _mm256_mask_ror_epi32(w, u, a, b) \ 4457 (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ 4458 (__v8si)_mm256_ror_epi32((a), (b)), \ 4459 (__v8si)(__m256i)(w)) 4460 4461 #define _mm256_maskz_ror_epi32(u, a, b) \ 4462 (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ 4463 (__v8si)_mm256_ror_epi32((a), (b)), \ 4464 (__v8si)_mm256_setzero_si256()) 4465 4466 #define _mm_ror_epi64(a, b) \ 4467 (__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b)) 4468 4469 #define _mm_mask_ror_epi64(w, u, a, b) \ 4470 (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ 4471 (__v2di)_mm_ror_epi64((a), (b)), \ 4472 (__v2di)(__m128i)(w)) 4473 4474 #define _mm_maskz_ror_epi64(u, a, b) \ 4475 (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ 4476 (__v2di)_mm_ror_epi64((a), (b)), \ 4477 (__v2di)_mm_setzero_si128()) 4478 4479 #define _mm256_ror_epi64(a, b) \ 4480 (__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b)) 4481 4482 #define _mm256_mask_ror_epi64(w, u, a, b) \ 4483 (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ 4484 (__v4di)_mm256_ror_epi64((a), (b)), \ 4485 (__v4di)(__m256i)(w)) 4486 4487 #define _mm256_maskz_ror_epi64(u, a, b) \ 4488 (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ 4489 (__v4di)_mm256_ror_epi64((a), (b)), \ 4490 (__v4di)_mm256_setzero_si256()) 4491 4492 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4493 _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4494 { 4495 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4496 (__v4si)_mm_sll_epi32(__A, __B), 4497 (__v4si)__W); 4498 } 4499 4500 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4501 _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B) 4502 { 4503 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4504 (__v4si)_mm_sll_epi32(__A, __B), 4505 (__v4si)_mm_setzero_si128()); 4506 } 4507 4508 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4509 _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 4510 { 4511 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4512 (__v8si)_mm256_sll_epi32(__A, __B), 4513 (__v8si)__W); 4514 } 4515 4516 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4517 _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B) 4518 { 4519 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4520 (__v8si)_mm256_sll_epi32(__A, __B), 4521 (__v8si)_mm256_setzero_si256()); 4522 } 4523 4524 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4525 _mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B) 4526 { 4527 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4528 (__v4si)_mm_slli_epi32(__A, __B), 4529 (__v4si)__W); 4530 } 4531 4532 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4533 _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, int __B) 4534 { 4535 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4536 (__v4si)_mm_slli_epi32(__A, __B), 4537 (__v4si)_mm_setzero_si128()); 4538 } 4539 4540 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4541 _mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B) 4542 { 4543 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4544 (__v8si)_mm256_slli_epi32(__A, __B), 4545 (__v8si)__W); 4546 } 4547 4548 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4549 _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, int __B) 4550 { 4551 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4552 (__v8si)_mm256_slli_epi32(__A, __B), 4553 (__v8si)_mm256_setzero_si256()); 4554 } 4555 4556 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4557 _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4558 { 4559 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4560 (__v2di)_mm_sll_epi64(__A, __B), 4561 (__v2di)__W); 4562 } 4563 4564 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4565 _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B) 4566 { 4567 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4568 (__v2di)_mm_sll_epi64(__A, __B), 4569 (__v2di)_mm_setzero_si128()); 4570 } 4571 4572 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4573 _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 4574 { 4575 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4576 (__v4di)_mm256_sll_epi64(__A, __B), 4577 (__v4di)__W); 4578 } 4579 4580 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4581 _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B) 4582 { 4583 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4584 (__v4di)_mm256_sll_epi64(__A, __B), 4585 (__v4di)_mm256_setzero_si256()); 4586 } 4587 4588 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4589 _mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B) 4590 { 4591 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4592 (__v2di)_mm_slli_epi64(__A, __B), 4593 (__v2di)__W); 4594 } 4595 4596 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4597 _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, int __B) 4598 { 4599 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4600 (__v2di)_mm_slli_epi64(__A, __B), 4601 (__v2di)_mm_setzero_si128()); 4602 } 4603 4604 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4605 _mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B) 4606 { 4607 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4608 (__v4di)_mm256_slli_epi64(__A, __B), 4609 (__v4di)__W); 4610 } 4611 4612 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4613 _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, int __B) 4614 { 4615 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4616 (__v4di)_mm256_slli_epi64(__A, __B), 4617 (__v4di)_mm256_setzero_si256()); 4618 } 4619 4620 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4621 _mm_rorv_epi32 (__m128i __A, __m128i __B) 4622 { 4623 return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B); 4624 } 4625 4626 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4627 _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4628 { 4629 return (__m128i)__builtin_ia32_selectd_128(__U, 4630 (__v4si)_mm_rorv_epi32(__A, __B), 4631 (__v4si)__W); 4632 } 4633 4634 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4635 _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 4636 { 4637 return (__m128i)__builtin_ia32_selectd_128(__U, 4638 (__v4si)_mm_rorv_epi32(__A, __B), 4639 (__v4si)_mm_setzero_si128()); 4640 } 4641 4642 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4643 _mm256_rorv_epi32 (__m256i __A, __m256i __B) 4644 { 4645 return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B); 4646 } 4647 4648 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4649 _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 4650 { 4651 return (__m256i)__builtin_ia32_selectd_256(__U, 4652 (__v8si)_mm256_rorv_epi32(__A, __B), 4653 (__v8si)__W); 4654 } 4655 4656 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4657 _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 4658 { 4659 return (__m256i)__builtin_ia32_selectd_256(__U, 4660 (__v8si)_mm256_rorv_epi32(__A, __B), 4661 (__v8si)_mm256_setzero_si256()); 4662 } 4663 4664 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4665 _mm_rorv_epi64 (__m128i __A, __m128i __B) 4666 { 4667 return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B); 4668 } 4669 4670 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4671 _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4672 { 4673 return (__m128i)__builtin_ia32_selectq_128(__U, 4674 (__v2di)_mm_rorv_epi64(__A, __B), 4675 (__v2di)__W); 4676 } 4677 4678 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4679 _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 4680 { 4681 return (__m128i)__builtin_ia32_selectq_128(__U, 4682 (__v2di)_mm_rorv_epi64(__A, __B), 4683 (__v2di)_mm_setzero_si128()); 4684 } 4685 4686 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4687 _mm256_rorv_epi64 (__m256i __A, __m256i __B) 4688 { 4689 return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B); 4690 } 4691 4692 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4693 _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 4694 { 4695 return (__m256i)__builtin_ia32_selectq_256(__U, 4696 (__v4di)_mm256_rorv_epi64(__A, __B), 4697 (__v4di)__W); 4698 } 4699 4700 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4701 _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 4702 { 4703 return (__m256i)__builtin_ia32_selectq_256(__U, 4704 (__v4di)_mm256_rorv_epi64(__A, __B), 4705 (__v4di)_mm256_setzero_si256()); 4706 } 4707 4708 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4709 _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 4710 { 4711 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4712 (__v2di)_mm_sllv_epi64(__X, __Y), 4713 (__v2di)__W); 4714 } 4715 4716 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4717 _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) 4718 { 4719 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4720 (__v2di)_mm_sllv_epi64(__X, __Y), 4721 (__v2di)_mm_setzero_si128()); 4722 } 4723 4724 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4725 _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 4726 { 4727 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4728 (__v4di)_mm256_sllv_epi64(__X, __Y), 4729 (__v4di)__W); 4730 } 4731 4732 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4733 _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) 4734 { 4735 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4736 (__v4di)_mm256_sllv_epi64(__X, __Y), 4737 (__v4di)_mm256_setzero_si256()); 4738 } 4739 4740 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4741 _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 4742 { 4743 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4744 (__v4si)_mm_sllv_epi32(__X, __Y), 4745 (__v4si)__W); 4746 } 4747 4748 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4749 _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) 4750 { 4751 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4752 (__v4si)_mm_sllv_epi32(__X, __Y), 4753 (__v4si)_mm_setzero_si128()); 4754 } 4755 4756 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4757 _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 4758 { 4759 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4760 (__v8si)_mm256_sllv_epi32(__X, __Y), 4761 (__v8si)__W); 4762 } 4763 4764 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4765 _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) 4766 { 4767 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4768 (__v8si)_mm256_sllv_epi32(__X, __Y), 4769 (__v8si)_mm256_setzero_si256()); 4770 } 4771 4772 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4773 _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 4774 { 4775 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4776 (__v2di)_mm_srlv_epi64(__X, __Y), 4777 (__v2di)__W); 4778 } 4779 4780 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4781 _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) 4782 { 4783 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4784 (__v2di)_mm_srlv_epi64(__X, __Y), 4785 (__v2di)_mm_setzero_si128()); 4786 } 4787 4788 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4789 _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 4790 { 4791 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4792 (__v4di)_mm256_srlv_epi64(__X, __Y), 4793 (__v4di)__W); 4794 } 4795 4796 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4797 _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) 4798 { 4799 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4800 (__v4di)_mm256_srlv_epi64(__X, __Y), 4801 (__v4di)_mm256_setzero_si256()); 4802 } 4803 4804 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4805 _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 4806 { 4807 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4808 (__v4si)_mm_srlv_epi32(__X, __Y), 4809 (__v4si)__W); 4810 } 4811 4812 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4813 _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) 4814 { 4815 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4816 (__v4si)_mm_srlv_epi32(__X, __Y), 4817 (__v4si)_mm_setzero_si128()); 4818 } 4819 4820 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4821 _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 4822 { 4823 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4824 (__v8si)_mm256_srlv_epi32(__X, __Y), 4825 (__v8si)__W); 4826 } 4827 4828 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4829 _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) 4830 { 4831 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4832 (__v8si)_mm256_srlv_epi32(__X, __Y), 4833 (__v8si)_mm256_setzero_si256()); 4834 } 4835 4836 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4837 _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4838 { 4839 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4840 (__v4si)_mm_srl_epi32(__A, __B), 4841 (__v4si)__W); 4842 } 4843 4844 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4845 _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B) 4846 { 4847 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4848 (__v4si)_mm_srl_epi32(__A, __B), 4849 (__v4si)_mm_setzero_si128()); 4850 } 4851 4852 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4853 _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 4854 { 4855 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4856 (__v8si)_mm256_srl_epi32(__A, __B), 4857 (__v8si)__W); 4858 } 4859 4860 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4861 _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B) 4862 { 4863 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4864 (__v8si)_mm256_srl_epi32(__A, __B), 4865 (__v8si)_mm256_setzero_si256()); 4866 } 4867 4868 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4869 _mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B) 4870 { 4871 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4872 (__v4si)_mm_srli_epi32(__A, __B), 4873 (__v4si)__W); 4874 } 4875 4876 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4877 _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, int __B) 4878 { 4879 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4880 (__v4si)_mm_srli_epi32(__A, __B), 4881 (__v4si)_mm_setzero_si128()); 4882 } 4883 4884 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4885 _mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B) 4886 { 4887 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4888 (__v8si)_mm256_srli_epi32(__A, __B), 4889 (__v8si)__W); 4890 } 4891 4892 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4893 _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, int __B) 4894 { 4895 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4896 (__v8si)_mm256_srli_epi32(__A, __B), 4897 (__v8si)_mm256_setzero_si256()); 4898 } 4899 4900 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4901 _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4902 { 4903 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4904 (__v2di)_mm_srl_epi64(__A, __B), 4905 (__v2di)__W); 4906 } 4907 4908 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4909 _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B) 4910 { 4911 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4912 (__v2di)_mm_srl_epi64(__A, __B), 4913 (__v2di)_mm_setzero_si128()); 4914 } 4915 4916 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4917 _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 4918 { 4919 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4920 (__v4di)_mm256_srl_epi64(__A, __B), 4921 (__v4di)__W); 4922 } 4923 4924 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4925 _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B) 4926 { 4927 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4928 (__v4di)_mm256_srl_epi64(__A, __B), 4929 (__v4di)_mm256_setzero_si256()); 4930 } 4931 4932 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4933 _mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B) 4934 { 4935 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4936 (__v2di)_mm_srli_epi64(__A, __B), 4937 (__v2di)__W); 4938 } 4939 4940 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4941 _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, int __B) 4942 { 4943 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4944 (__v2di)_mm_srli_epi64(__A, __B), 4945 (__v2di)_mm_setzero_si128()); 4946 } 4947 4948 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4949 _mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B) 4950 { 4951 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4952 (__v4di)_mm256_srli_epi64(__A, __B), 4953 (__v4di)__W); 4954 } 4955 4956 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4957 _mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, int __B) 4958 { 4959 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4960 (__v4di)_mm256_srli_epi64(__A, __B), 4961 (__v4di)_mm256_setzero_si256()); 4962 } 4963 4964 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4965 _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 4966 { 4967 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4968 (__v4si)_mm_srav_epi32(__X, __Y), 4969 (__v4si)__W); 4970 } 4971 4972 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4973 _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y) 4974 { 4975 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4976 (__v4si)_mm_srav_epi32(__X, __Y), 4977 (__v4si)_mm_setzero_si128()); 4978 } 4979 4980 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4981 _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 4982 { 4983 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4984 (__v8si)_mm256_srav_epi32(__X, __Y), 4985 (__v8si)__W); 4986 } 4987 4988 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4989 _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y) 4990 { 4991 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4992 (__v8si)_mm256_srav_epi32(__X, __Y), 4993 (__v8si)_mm256_setzero_si256()); 4994 } 4995 4996 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4997 _mm_srav_epi64(__m128i __X, __m128i __Y) 4998 { 4999 return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y); 5000 } 5001 5002 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5003 _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 5004 { 5005 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5006 (__v2di)_mm_srav_epi64(__X, __Y), 5007 (__v2di)__W); 5008 } 5009 5010 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5011 _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y) 5012 { 5013 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5014 (__v2di)_mm_srav_epi64(__X, __Y), 5015 (__v2di)_mm_setzero_si128()); 5016 } 5017 5018 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5019 _mm256_srav_epi64(__m256i __X, __m256i __Y) 5020 { 5021 return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y); 5022 } 5023 5024 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5025 _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 5026 { 5027 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5028 (__v4di)_mm256_srav_epi64(__X, __Y), 5029 (__v4di)__W); 5030 } 5031 5032 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5033 _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y) 5034 { 5035 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5036 (__v4di)_mm256_srav_epi64(__X, __Y), 5037 (__v4di)_mm256_setzero_si256()); 5038 } 5039 5040 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5041 _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 5042 { 5043 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, 5044 (__v4si) __A, 5045 (__v4si) __W); 5046 } 5047 5048 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5049 _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A) 5050 { 5051 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, 5052 (__v4si) __A, 5053 (__v4si) _mm_setzero_si128 ()); 5054 } 5055 5056 5057 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5058 _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A) 5059 { 5060 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, 5061 (__v8si) __A, 5062 (__v8si) __W); 5063 } 5064 5065 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5066 _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A) 5067 { 5068 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, 5069 (__v8si) __A, 5070 (__v8si) _mm256_setzero_si256 ()); 5071 } 5072 5073 static __inline __m128i __DEFAULT_FN_ATTRS128 5074 _mm_load_epi32 (void const *__P) 5075 { 5076 return *(__m128i *) __P; 5077 } 5078 5079 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5080 _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P) 5081 { 5082 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P, 5083 (__v4si) __W, 5084 (__mmask8) 5085 __U); 5086 } 5087 5088 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5089 _mm_maskz_load_epi32 (__mmask8 __U, void const *__P) 5090 { 5091 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P, 5092 (__v4si) 5093 _mm_setzero_si128 (), 5094 (__mmask8) 5095 __U); 5096 } 5097 5098 static __inline __m256i __DEFAULT_FN_ATTRS256 5099 _mm256_load_epi32 (void const *__P) 5100 { 5101 return *(__m256i *) __P; 5102 } 5103 5104 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5105 _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P) 5106 { 5107 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P, 5108 (__v8si) __W, 5109 (__mmask8) 5110 __U); 5111 } 5112 5113 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5114 _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P) 5115 { 5116 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P, 5117 (__v8si) 5118 _mm256_setzero_si256 (), 5119 (__mmask8) 5120 __U); 5121 } 5122 5123 static __inline void __DEFAULT_FN_ATTRS128 5124 _mm_store_epi32 (void *__P, __m128i __A) 5125 { 5126 *(__m128i *) __P = __A; 5127 } 5128 5129 static __inline__ void __DEFAULT_FN_ATTRS128 5130 _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A) 5131 { 5132 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P, 5133 (__v4si) __A, 5134 (__mmask8) __U); 5135 } 5136 5137 static __inline void __DEFAULT_FN_ATTRS256 5138 _mm256_store_epi32 (void *__P, __m256i __A) 5139 { 5140 *(__m256i *) __P = __A; 5141 } 5142 5143 static __inline__ void __DEFAULT_FN_ATTRS256 5144 _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A) 5145 { 5146 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P, 5147 (__v8si) __A, 5148 (__mmask8) __U); 5149 } 5150 5151 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5152 _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 5153 { 5154 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, 5155 (__v2di) __A, 5156 (__v2di) __W); 5157 } 5158 5159 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5160 _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A) 5161 { 5162 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, 5163 (__v2di) __A, 5164 (__v2di) _mm_setzero_si128 ()); 5165 } 5166 5167 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5168 _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A) 5169 { 5170 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, 5171 (__v4di) __A, 5172 (__v4di) __W); 5173 } 5174 5175 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5176 _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A) 5177 { 5178 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, 5179 (__v4di) __A, 5180 (__v4di) _mm256_setzero_si256 ()); 5181 } 5182 5183 static __inline __m128i __DEFAULT_FN_ATTRS128 5184 _mm_load_epi64 (void const *__P) 5185 { 5186 return *(__m128i *) __P; 5187 } 5188 5189 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5190 _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P) 5191 { 5192 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P, 5193 (__v2di) __W, 5194 (__mmask8) 5195 __U); 5196 } 5197 5198 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5199 _mm_maskz_load_epi64 (__mmask8 __U, void const *__P) 5200 { 5201 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P, 5202 (__v2di) 5203 _mm_setzero_si128 (), 5204 (__mmask8) 5205 __U); 5206 } 5207 5208 static __inline __m256i __DEFAULT_FN_ATTRS256 5209 _mm256_load_epi64 (void const *__P) 5210 { 5211 return *(__m256i *) __P; 5212 } 5213 5214 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5215 _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P) 5216 { 5217 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P, 5218 (__v4di) __W, 5219 (__mmask8) 5220 __U); 5221 } 5222 5223 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5224 _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P) 5225 { 5226 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P, 5227 (__v4di) 5228 _mm256_setzero_si256 (), 5229 (__mmask8) 5230 __U); 5231 } 5232 5233 static __inline void __DEFAULT_FN_ATTRS128 5234 _mm_store_epi64 (void *__P, __m128i __A) 5235 { 5236 *(__m128i *) __P = __A; 5237 } 5238 5239 static __inline__ void __DEFAULT_FN_ATTRS128 5240 _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A) 5241 { 5242 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P, 5243 (__v2di) __A, 5244 (__mmask8) __U); 5245 } 5246 5247 static __inline void __DEFAULT_FN_ATTRS256 5248 _mm256_store_epi64 (void *__P, __m256i __A) 5249 { 5250 *(__m256i *) __P = __A; 5251 } 5252 5253 static __inline__ void __DEFAULT_FN_ATTRS256 5254 _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A) 5255 { 5256 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P, 5257 (__v4di) __A, 5258 (__mmask8) __U); 5259 } 5260 5261 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5262 _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A) 5263 { 5264 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5265 (__v2df)_mm_movedup_pd(__A), 5266 (__v2df)__W); 5267 } 5268 5269 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5270 _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A) 5271 { 5272 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5273 (__v2df)_mm_movedup_pd(__A), 5274 (__v2df)_mm_setzero_pd()); 5275 } 5276 5277 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5278 _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A) 5279 { 5280 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5281 (__v4df)_mm256_movedup_pd(__A), 5282 (__v4df)__W); 5283 } 5284 5285 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5286 _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A) 5287 { 5288 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5289 (__v4df)_mm256_movedup_pd(__A), 5290 (__v4df)_mm256_setzero_pd()); 5291 } 5292 5293 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5294 _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A) 5295 { 5296 return (__m128i)__builtin_ia32_selectd_128(__M, 5297 (__v4si) _mm_set1_epi32(__A), 5298 (__v4si)__O); 5299 } 5300 5301 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5302 _mm_maskz_set1_epi32( __mmask8 __M, int __A) 5303 { 5304 return (__m128i)__builtin_ia32_selectd_128(__M, 5305 (__v4si) _mm_set1_epi32(__A), 5306 (__v4si)_mm_setzero_si128()); 5307 } 5308 5309 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5310 _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A) 5311 { 5312 return (__m256i)__builtin_ia32_selectd_256(__M, 5313 (__v8si) _mm256_set1_epi32(__A), 5314 (__v8si)__O); 5315 } 5316 5317 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5318 _mm256_maskz_set1_epi32( __mmask8 __M, int __A) 5319 { 5320 return (__m256i)__builtin_ia32_selectd_256(__M, 5321 (__v8si) _mm256_set1_epi32(__A), 5322 (__v8si)_mm256_setzero_si256()); 5323 } 5324 5325 5326 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5327 _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A) 5328 { 5329 return (__m128i) __builtin_ia32_selectq_128(__M, 5330 (__v2di) _mm_set1_epi64x(__A), 5331 (__v2di) __O); 5332 } 5333 5334 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5335 _mm_maskz_set1_epi64 (__mmask8 __M, long long __A) 5336 { 5337 return (__m128i) __builtin_ia32_selectq_128(__M, 5338 (__v2di) _mm_set1_epi64x(__A), 5339 (__v2di) _mm_setzero_si128()); 5340 } 5341 5342 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5343 _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A) 5344 { 5345 return (__m256i) __builtin_ia32_selectq_256(__M, 5346 (__v4di) _mm256_set1_epi64x(__A), 5347 (__v4di) __O) ; 5348 } 5349 5350 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5351 _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A) 5352 { 5353 return (__m256i) __builtin_ia32_selectq_256(__M, 5354 (__v4di) _mm256_set1_epi64x(__A), 5355 (__v4di) _mm256_setzero_si256()); 5356 } 5357 5358 #define _mm_fixupimm_pd(A, B, C, imm) \ 5359 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ 5360 (__v2df)(__m128d)(B), \ 5361 (__v2di)(__m128i)(C), (int)(imm), \ 5362 (__mmask8)-1) 5363 5364 #define _mm_mask_fixupimm_pd(A, U, B, C, imm) \ 5365 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ 5366 (__v2df)(__m128d)(B), \ 5367 (__v2di)(__m128i)(C), (int)(imm), \ 5368 (__mmask8)(U)) 5369 5370 #define _mm_maskz_fixupimm_pd(U, A, B, C, imm) \ 5371 (__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \ 5372 (__v2df)(__m128d)(B), \ 5373 (__v2di)(__m128i)(C), \ 5374 (int)(imm), (__mmask8)(U)) 5375 5376 #define _mm256_fixupimm_pd(A, B, C, imm) \ 5377 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ 5378 (__v4df)(__m256d)(B), \ 5379 (__v4di)(__m256i)(C), (int)(imm), \ 5380 (__mmask8)-1) 5381 5382 #define _mm256_mask_fixupimm_pd(A, U, B, C, imm) \ 5383 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ 5384 (__v4df)(__m256d)(B), \ 5385 (__v4di)(__m256i)(C), (int)(imm), \ 5386 (__mmask8)(U)) 5387 5388 #define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) \ 5389 (__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \ 5390 (__v4df)(__m256d)(B), \ 5391 (__v4di)(__m256i)(C), \ 5392 (int)(imm), (__mmask8)(U)) 5393 5394 #define _mm_fixupimm_ps(A, B, C, imm) \ 5395 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ 5396 (__v4sf)(__m128)(B), \ 5397 (__v4si)(__m128i)(C), (int)(imm), \ 5398 (__mmask8)-1) 5399 5400 #define _mm_mask_fixupimm_ps(A, U, B, C, imm) \ 5401 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ 5402 (__v4sf)(__m128)(B), \ 5403 (__v4si)(__m128i)(C), (int)(imm), \ 5404 (__mmask8)(U)) 5405 5406 #define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \ 5407 (__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \ 5408 (__v4sf)(__m128)(B), \ 5409 (__v4si)(__m128i)(C), (int)(imm), \ 5410 (__mmask8)(U)) 5411 5412 #define _mm256_fixupimm_ps(A, B, C, imm) \ 5413 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ 5414 (__v8sf)(__m256)(B), \ 5415 (__v8si)(__m256i)(C), (int)(imm), \ 5416 (__mmask8)-1) 5417 5418 #define _mm256_mask_fixupimm_ps(A, U, B, C, imm) \ 5419 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ 5420 (__v8sf)(__m256)(B), \ 5421 (__v8si)(__m256i)(C), (int)(imm), \ 5422 (__mmask8)(U)) 5423 5424 #define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \ 5425 (__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \ 5426 (__v8sf)(__m256)(B), \ 5427 (__v8si)(__m256i)(C), (int)(imm), \ 5428 (__mmask8)(U)) 5429 5430 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5431 _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P) 5432 { 5433 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P, 5434 (__v2df) __W, 5435 (__mmask8) __U); 5436 } 5437 5438 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5439 _mm_maskz_load_pd (__mmask8 __U, void const *__P) 5440 { 5441 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P, 5442 (__v2df) 5443 _mm_setzero_pd (), 5444 (__mmask8) __U); 5445 } 5446 5447 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5448 _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P) 5449 { 5450 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P, 5451 (__v4df) __W, 5452 (__mmask8) __U); 5453 } 5454 5455 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5456 _mm256_maskz_load_pd (__mmask8 __U, void const *__P) 5457 { 5458 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P, 5459 (__v4df) 5460 _mm256_setzero_pd (), 5461 (__mmask8) __U); 5462 } 5463 5464 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5465 _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P) 5466 { 5467 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P, 5468 (__v4sf) __W, 5469 (__mmask8) __U); 5470 } 5471 5472 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5473 _mm_maskz_load_ps (__mmask8 __U, void const *__P) 5474 { 5475 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P, 5476 (__v4sf) 5477 _mm_setzero_ps (), 5478 (__mmask8) __U); 5479 } 5480 5481 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5482 _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P) 5483 { 5484 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P, 5485 (__v8sf) __W, 5486 (__mmask8) __U); 5487 } 5488 5489 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5490 _mm256_maskz_load_ps (__mmask8 __U, void const *__P) 5491 { 5492 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P, 5493 (__v8sf) 5494 _mm256_setzero_ps (), 5495 (__mmask8) __U); 5496 } 5497 5498 static __inline __m128i __DEFAULT_FN_ATTRS128 5499 _mm_loadu_epi64 (void const *__P) 5500 { 5501 struct __loadu_epi64 { 5502 __m128i_u __v; 5503 } __attribute__((__packed__, __may_alias__)); 5504 return ((struct __loadu_epi64*)__P)->__v; 5505 } 5506 5507 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5508 _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) 5509 { 5510 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P, 5511 (__v2di) __W, 5512 (__mmask8) __U); 5513 } 5514 5515 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5516 _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P) 5517 { 5518 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P, 5519 (__v2di) 5520 _mm_setzero_si128 (), 5521 (__mmask8) __U); 5522 } 5523 5524 static __inline __m256i __DEFAULT_FN_ATTRS256 5525 _mm256_loadu_epi64 (void const *__P) 5526 { 5527 struct __loadu_epi64 { 5528 __m256i_u __v; 5529 } __attribute__((__packed__, __may_alias__)); 5530 return ((struct __loadu_epi64*)__P)->__v; 5531 } 5532 5533 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5534 _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P) 5535 { 5536 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P, 5537 (__v4di) __W, 5538 (__mmask8) __U); 5539 } 5540 5541 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5542 _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P) 5543 { 5544 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P, 5545 (__v4di) 5546 _mm256_setzero_si256 (), 5547 (__mmask8) __U); 5548 } 5549 5550 static __inline __m128i __DEFAULT_FN_ATTRS128 5551 _mm_loadu_epi32 (void const *__P) 5552 { 5553 struct __loadu_epi32 { 5554 __m128i_u __v; 5555 } __attribute__((__packed__, __may_alias__)); 5556 return ((struct __loadu_epi32*)__P)->__v; 5557 } 5558 5559 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5560 _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) 5561 { 5562 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P, 5563 (__v4si) __W, 5564 (__mmask8) __U); 5565 } 5566 5567 static __inline__ __m128i __DEFAULT_FN_ATTRS128 5568 _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P) 5569 { 5570 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P, 5571 (__v4si) 5572 _mm_setzero_si128 (), 5573 (__mmask8) __U); 5574 } 5575 5576 static __inline __m256i __DEFAULT_FN_ATTRS256 5577 _mm256_loadu_epi32 (void const *__P) 5578 { 5579 struct __loadu_epi32 { 5580 __m256i_u __v; 5581 } __attribute__((__packed__, __may_alias__)); 5582 return ((struct __loadu_epi32*)__P)->__v; 5583 } 5584 5585 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5586 _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P) 5587 { 5588 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P, 5589 (__v8si) __W, 5590 (__mmask8) __U); 5591 } 5592 5593 static __inline__ __m256i __DEFAULT_FN_ATTRS256 5594 _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P) 5595 { 5596 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P, 5597 (__v8si) 5598 _mm256_setzero_si256 (), 5599 (__mmask8) __U); 5600 } 5601 5602 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5603 _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P) 5604 { 5605 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P, 5606 (__v2df) __W, 5607 (__mmask8) __U); 5608 } 5609 5610 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5611 _mm_maskz_loadu_pd (__mmask8 __U, void const *__P) 5612 { 5613 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P, 5614 (__v2df) 5615 _mm_setzero_pd (), 5616 (__mmask8) __U); 5617 } 5618 5619 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5620 _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P) 5621 { 5622 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P, 5623 (__v4df) __W, 5624 (__mmask8) __U); 5625 } 5626 5627 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5628 _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P) 5629 { 5630 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P, 5631 (__v4df) 5632 _mm256_setzero_pd (), 5633 (__mmask8) __U); 5634 } 5635 5636 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5637 _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P) 5638 { 5639 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P, 5640 (__v4sf) __W, 5641 (__mmask8) __U); 5642 } 5643 5644 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5645 _mm_maskz_loadu_ps (__mmask8 __U, void const *__P) 5646 { 5647 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P, 5648 (__v4sf) 5649 _mm_setzero_ps (), 5650 (__mmask8) __U); 5651 } 5652 5653 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5654 _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P) 5655 { 5656 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P, 5657 (__v8sf) __W, 5658 (__mmask8) __U); 5659 } 5660 5661 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5662 _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P) 5663 { 5664 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P, 5665 (__v8sf) 5666 _mm256_setzero_ps (), 5667 (__mmask8) __U); 5668 } 5669 5670 static __inline__ void __DEFAULT_FN_ATTRS128 5671 _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A) 5672 { 5673 __builtin_ia32_storeapd128_mask ((__v2df *) __P, 5674 (__v2df) __A, 5675 (__mmask8) __U); 5676 } 5677 5678 static __inline__ void __DEFAULT_FN_ATTRS256 5679 _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A) 5680 { 5681 __builtin_ia32_storeapd256_mask ((__v4df *) __P, 5682 (__v4df) __A, 5683 (__mmask8) __U); 5684 } 5685 5686 static __inline__ void __DEFAULT_FN_ATTRS128 5687 _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A) 5688 { 5689 __builtin_ia32_storeaps128_mask ((__v4sf *) __P, 5690 (__v4sf) __A, 5691 (__mmask8) __U); 5692 } 5693 5694 static __inline__ void __DEFAULT_FN_ATTRS256 5695 _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A) 5696 { 5697 __builtin_ia32_storeaps256_mask ((__v8sf *) __P, 5698 (__v8sf) __A, 5699 (__mmask8) __U); 5700 } 5701 5702 static __inline void __DEFAULT_FN_ATTRS128 5703 _mm_storeu_epi64 (void *__P, __m128i __A) 5704 { 5705 struct __storeu_epi64 { 5706 __m128i_u __v; 5707 } __attribute__((__packed__, __may_alias__)); 5708 ((struct __storeu_epi64*)__P)->__v = __A; 5709 } 5710 5711 static __inline__ void __DEFAULT_FN_ATTRS128 5712 _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A) 5713 { 5714 __builtin_ia32_storedqudi128_mask ((__v2di *) __P, 5715 (__v2di) __A, 5716 (__mmask8) __U); 5717 } 5718 5719 static __inline void __DEFAULT_FN_ATTRS256 5720 _mm256_storeu_epi64 (void *__P, __m256i __A) 5721 { 5722 struct __storeu_epi64 { 5723 __m256i_u __v; 5724 } __attribute__((__packed__, __may_alias__)); 5725 ((struct __storeu_epi64*)__P)->__v = __A; 5726 } 5727 5728 static __inline__ void __DEFAULT_FN_ATTRS256 5729 _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A) 5730 { 5731 __builtin_ia32_storedqudi256_mask ((__v4di *) __P, 5732 (__v4di) __A, 5733 (__mmask8) __U); 5734 } 5735 5736 static __inline void __DEFAULT_FN_ATTRS128 5737 _mm_storeu_epi32 (void *__P, __m128i __A) 5738 { 5739 struct __storeu_epi32 { 5740 __m128i_u __v; 5741 } __attribute__((__packed__, __may_alias__)); 5742 ((struct __storeu_epi32*)__P)->__v = __A; 5743 } 5744 5745 static __inline__ void __DEFAULT_FN_ATTRS128 5746 _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A) 5747 { 5748 __builtin_ia32_storedqusi128_mask ((__v4si *) __P, 5749 (__v4si) __A, 5750 (__mmask8) __U); 5751 } 5752 5753 static __inline void __DEFAULT_FN_ATTRS256 5754 _mm256_storeu_epi32 (void *__P, __m256i __A) 5755 { 5756 struct __storeu_epi32 { 5757 __m256i_u __v; 5758 } __attribute__((__packed__, __may_alias__)); 5759 ((struct __storeu_epi32*)__P)->__v = __A; 5760 } 5761 5762 static __inline__ void __DEFAULT_FN_ATTRS256 5763 _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A) 5764 { 5765 __builtin_ia32_storedqusi256_mask ((__v8si *) __P, 5766 (__v8si) __A, 5767 (__mmask8) __U); 5768 } 5769 5770 static __inline__ void __DEFAULT_FN_ATTRS128 5771 _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A) 5772 { 5773 __builtin_ia32_storeupd128_mask ((__v2df *) __P, 5774 (__v2df) __A, 5775 (__mmask8) __U); 5776 } 5777 5778 static __inline__ void __DEFAULT_FN_ATTRS256 5779 _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A) 5780 { 5781 __builtin_ia32_storeupd256_mask ((__v4df *) __P, 5782 (__v4df) __A, 5783 (__mmask8) __U); 5784 } 5785 5786 static __inline__ void __DEFAULT_FN_ATTRS128 5787 _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A) 5788 { 5789 __builtin_ia32_storeups128_mask ((__v4sf *) __P, 5790 (__v4sf) __A, 5791 (__mmask8) __U); 5792 } 5793 5794 static __inline__ void __DEFAULT_FN_ATTRS256 5795 _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A) 5796 { 5797 __builtin_ia32_storeups256_mask ((__v8sf *) __P, 5798 (__v8sf) __A, 5799 (__mmask8) __U); 5800 } 5801 5802 5803 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5804 _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 5805 { 5806 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5807 (__v2df)_mm_unpackhi_pd(__A, __B), 5808 (__v2df)__W); 5809 } 5810 5811 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5812 _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B) 5813 { 5814 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5815 (__v2df)_mm_unpackhi_pd(__A, __B), 5816 (__v2df)_mm_setzero_pd()); 5817 } 5818 5819 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5820 _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) 5821 { 5822 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5823 (__v4df)_mm256_unpackhi_pd(__A, __B), 5824 (__v4df)__W); 5825 } 5826 5827 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5828 _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) 5829 { 5830 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5831 (__v4df)_mm256_unpackhi_pd(__A, __B), 5832 (__v4df)_mm256_setzero_pd()); 5833 } 5834 5835 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5836 _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 5837 { 5838 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 5839 (__v4sf)_mm_unpackhi_ps(__A, __B), 5840 (__v4sf)__W); 5841 } 5842 5843 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5844 _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B) 5845 { 5846 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 5847 (__v4sf)_mm_unpackhi_ps(__A, __B), 5848 (__v4sf)_mm_setzero_ps()); 5849 } 5850 5851 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5852 _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 5853 { 5854 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 5855 (__v8sf)_mm256_unpackhi_ps(__A, __B), 5856 (__v8sf)__W); 5857 } 5858 5859 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5860 _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B) 5861 { 5862 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 5863 (__v8sf)_mm256_unpackhi_ps(__A, __B), 5864 (__v8sf)_mm256_setzero_ps()); 5865 } 5866 5867 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5868 _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 5869 { 5870 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5871 (__v2df)_mm_unpacklo_pd(__A, __B), 5872 (__v2df)__W); 5873 } 5874 5875 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5876 _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) 5877 { 5878 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5879 (__v2df)_mm_unpacklo_pd(__A, __B), 5880 (__v2df)_mm_setzero_pd()); 5881 } 5882 5883 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5884 _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) 5885 { 5886 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5887 (__v4df)_mm256_unpacklo_pd(__A, __B), 5888 (__v4df)__W); 5889 } 5890 5891 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5892 _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) 5893 { 5894 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5895 (__v4df)_mm256_unpacklo_pd(__A, __B), 5896 (__v4df)_mm256_setzero_pd()); 5897 } 5898 5899 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5900 _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 5901 { 5902 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 5903 (__v4sf)_mm_unpacklo_ps(__A, __B), 5904 (__v4sf)__W); 5905 } 5906 5907 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5908 _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B) 5909 { 5910 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 5911 (__v4sf)_mm_unpacklo_ps(__A, __B), 5912 (__v4sf)_mm_setzero_ps()); 5913 } 5914 5915 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5916 _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 5917 { 5918 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 5919 (__v8sf)_mm256_unpacklo_ps(__A, __B), 5920 (__v8sf)__W); 5921 } 5922 5923 static __inline__ __m256 __DEFAULT_FN_ATTRS256 5924 _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B) 5925 { 5926 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 5927 (__v8sf)_mm256_unpacklo_ps(__A, __B), 5928 (__v8sf)_mm256_setzero_ps()); 5929 } 5930 5931 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5932 _mm_rcp14_pd (__m128d __A) 5933 { 5934 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 5935 (__v2df) 5936 _mm_setzero_pd (), 5937 (__mmask8) -1); 5938 } 5939 5940 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5941 _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A) 5942 { 5943 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 5944 (__v2df) __W, 5945 (__mmask8) __U); 5946 } 5947 5948 static __inline__ __m128d __DEFAULT_FN_ATTRS128 5949 _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A) 5950 { 5951 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 5952 (__v2df) 5953 _mm_setzero_pd (), 5954 (__mmask8) __U); 5955 } 5956 5957 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5958 _mm256_rcp14_pd (__m256d __A) 5959 { 5960 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 5961 (__v4df) 5962 _mm256_setzero_pd (), 5963 (__mmask8) -1); 5964 } 5965 5966 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5967 _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A) 5968 { 5969 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 5970 (__v4df) __W, 5971 (__mmask8) __U); 5972 } 5973 5974 static __inline__ __m256d __DEFAULT_FN_ATTRS256 5975 _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A) 5976 { 5977 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 5978 (__v4df) 5979 _mm256_setzero_pd (), 5980 (__mmask8) __U); 5981 } 5982 5983 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5984 _mm_rcp14_ps (__m128 __A) 5985 { 5986 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 5987 (__v4sf) 5988 _mm_setzero_ps (), 5989 (__mmask8) -1); 5990 } 5991 5992 static __inline__ __m128 __DEFAULT_FN_ATTRS128 5993 _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A) 5994 { 5995 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 5996 (__v4sf) __W, 5997 (__mmask8) __U); 5998 } 5999 6000 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6001 _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A) 6002 { 6003 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 6004 (__v4sf) 6005 _mm_setzero_ps (), 6006 (__mmask8) __U); 6007 } 6008 6009 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6010 _mm256_rcp14_ps (__m256 __A) 6011 { 6012 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 6013 (__v8sf) 6014 _mm256_setzero_ps (), 6015 (__mmask8) -1); 6016 } 6017 6018 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6019 _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A) 6020 { 6021 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 6022 (__v8sf) __W, 6023 (__mmask8) __U); 6024 } 6025 6026 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6027 _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A) 6028 { 6029 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 6030 (__v8sf) 6031 _mm256_setzero_ps (), 6032 (__mmask8) __U); 6033 } 6034 6035 #define _mm_mask_permute_pd(W, U, X, C) \ 6036 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6037 (__v2df)_mm_permute_pd((X), (C)), \ 6038 (__v2df)(__m128d)(W)) 6039 6040 #define _mm_maskz_permute_pd(U, X, C) \ 6041 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6042 (__v2df)_mm_permute_pd((X), (C)), \ 6043 (__v2df)_mm_setzero_pd()) 6044 6045 #define _mm256_mask_permute_pd(W, U, X, C) \ 6046 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6047 (__v4df)_mm256_permute_pd((X), (C)), \ 6048 (__v4df)(__m256d)(W)) 6049 6050 #define _mm256_maskz_permute_pd(U, X, C) \ 6051 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6052 (__v4df)_mm256_permute_pd((X), (C)), \ 6053 (__v4df)_mm256_setzero_pd()) 6054 6055 #define _mm_mask_permute_ps(W, U, X, C) \ 6056 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6057 (__v4sf)_mm_permute_ps((X), (C)), \ 6058 (__v4sf)(__m128)(W)) 6059 6060 #define _mm_maskz_permute_ps(U, X, C) \ 6061 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6062 (__v4sf)_mm_permute_ps((X), (C)), \ 6063 (__v4sf)_mm_setzero_ps()) 6064 6065 #define _mm256_mask_permute_ps(W, U, X, C) \ 6066 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6067 (__v8sf)_mm256_permute_ps((X), (C)), \ 6068 (__v8sf)(__m256)(W)) 6069 6070 #define _mm256_maskz_permute_ps(U, X, C) \ 6071 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6072 (__v8sf)_mm256_permute_ps((X), (C)), \ 6073 (__v8sf)_mm256_setzero_ps()) 6074 6075 static __inline__ __m128d __DEFAULT_FN_ATTRS128 6076 _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) 6077 { 6078 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 6079 (__v2df)_mm_permutevar_pd(__A, __C), 6080 (__v2df)__W); 6081 } 6082 6083 static __inline__ __m128d __DEFAULT_FN_ATTRS128 6084 _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) 6085 { 6086 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 6087 (__v2df)_mm_permutevar_pd(__A, __C), 6088 (__v2df)_mm_setzero_pd()); 6089 } 6090 6091 static __inline__ __m256d __DEFAULT_FN_ATTRS256 6092 _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) 6093 { 6094 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 6095 (__v4df)_mm256_permutevar_pd(__A, __C), 6096 (__v4df)__W); 6097 } 6098 6099 static __inline__ __m256d __DEFAULT_FN_ATTRS256 6100 _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) 6101 { 6102 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 6103 (__v4df)_mm256_permutevar_pd(__A, __C), 6104 (__v4df)_mm256_setzero_pd()); 6105 } 6106 6107 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6108 _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) 6109 { 6110 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 6111 (__v4sf)_mm_permutevar_ps(__A, __C), 6112 (__v4sf)__W); 6113 } 6114 6115 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6116 _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) 6117 { 6118 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 6119 (__v4sf)_mm_permutevar_ps(__A, __C), 6120 (__v4sf)_mm_setzero_ps()); 6121 } 6122 6123 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6124 _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) 6125 { 6126 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 6127 (__v8sf)_mm256_permutevar_ps(__A, __C), 6128 (__v8sf)__W); 6129 } 6130 6131 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6132 _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) 6133 { 6134 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 6135 (__v8sf)_mm256_permutevar_ps(__A, __C), 6136 (__v8sf)_mm256_setzero_ps()); 6137 } 6138 6139 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6140 _mm_test_epi32_mask (__m128i __A, __m128i __B) 6141 { 6142 return _mm_cmpneq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); 6143 } 6144 6145 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6146 _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) 6147 { 6148 return _mm_mask_cmpneq_epi32_mask (__U, _mm_and_si128 (__A, __B), 6149 _mm_setzero_si128()); 6150 } 6151 6152 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6153 _mm256_test_epi32_mask (__m256i __A, __m256i __B) 6154 { 6155 return _mm256_cmpneq_epi32_mask (_mm256_and_si256 (__A, __B), 6156 _mm256_setzero_si256()); 6157 } 6158 6159 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6160 _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) 6161 { 6162 return _mm256_mask_cmpneq_epi32_mask (__U, _mm256_and_si256 (__A, __B), 6163 _mm256_setzero_si256()); 6164 } 6165 6166 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6167 _mm_test_epi64_mask (__m128i __A, __m128i __B) 6168 { 6169 return _mm_cmpneq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); 6170 } 6171 6172 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6173 _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) 6174 { 6175 return _mm_mask_cmpneq_epi64_mask (__U, _mm_and_si128 (__A, __B), 6176 _mm_setzero_si128()); 6177 } 6178 6179 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6180 _mm256_test_epi64_mask (__m256i __A, __m256i __B) 6181 { 6182 return _mm256_cmpneq_epi64_mask (_mm256_and_si256 (__A, __B), 6183 _mm256_setzero_si256()); 6184 } 6185 6186 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6187 _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) 6188 { 6189 return _mm256_mask_cmpneq_epi64_mask (__U, _mm256_and_si256 (__A, __B), 6190 _mm256_setzero_si256()); 6191 } 6192 6193 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6194 _mm_testn_epi32_mask (__m128i __A, __m128i __B) 6195 { 6196 return _mm_cmpeq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); 6197 } 6198 6199 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6200 _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) 6201 { 6202 return _mm_mask_cmpeq_epi32_mask (__U, _mm_and_si128 (__A, __B), 6203 _mm_setzero_si128()); 6204 } 6205 6206 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6207 _mm256_testn_epi32_mask (__m256i __A, __m256i __B) 6208 { 6209 return _mm256_cmpeq_epi32_mask (_mm256_and_si256 (__A, __B), 6210 _mm256_setzero_si256()); 6211 } 6212 6213 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6214 _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) 6215 { 6216 return _mm256_mask_cmpeq_epi32_mask (__U, _mm256_and_si256 (__A, __B), 6217 _mm256_setzero_si256()); 6218 } 6219 6220 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6221 _mm_testn_epi64_mask (__m128i __A, __m128i __B) 6222 { 6223 return _mm_cmpeq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); 6224 } 6225 6226 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6227 _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) 6228 { 6229 return _mm_mask_cmpeq_epi64_mask (__U, _mm_and_si128 (__A, __B), 6230 _mm_setzero_si128()); 6231 } 6232 6233 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6234 _mm256_testn_epi64_mask (__m256i __A, __m256i __B) 6235 { 6236 return _mm256_cmpeq_epi64_mask (_mm256_and_si256 (__A, __B), 6237 _mm256_setzero_si256()); 6238 } 6239 6240 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6241 _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) 6242 { 6243 return _mm256_mask_cmpeq_epi64_mask (__U, _mm256_and_si256 (__A, __B), 6244 _mm256_setzero_si256()); 6245 } 6246 6247 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6248 _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6249 { 6250 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6251 (__v4si)_mm_unpackhi_epi32(__A, __B), 6252 (__v4si)__W); 6253 } 6254 6255 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6256 _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) 6257 { 6258 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6259 (__v4si)_mm_unpackhi_epi32(__A, __B), 6260 (__v4si)_mm_setzero_si128()); 6261 } 6262 6263 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6264 _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6265 { 6266 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6267 (__v8si)_mm256_unpackhi_epi32(__A, __B), 6268 (__v8si)__W); 6269 } 6270 6271 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6272 _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B) 6273 { 6274 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6275 (__v8si)_mm256_unpackhi_epi32(__A, __B), 6276 (__v8si)_mm256_setzero_si256()); 6277 } 6278 6279 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6280 _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6281 { 6282 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6283 (__v2di)_mm_unpackhi_epi64(__A, __B), 6284 (__v2di)__W); 6285 } 6286 6287 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6288 _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B) 6289 { 6290 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6291 (__v2di)_mm_unpackhi_epi64(__A, __B), 6292 (__v2di)_mm_setzero_si128()); 6293 } 6294 6295 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6296 _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6297 { 6298 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6299 (__v4di)_mm256_unpackhi_epi64(__A, __B), 6300 (__v4di)__W); 6301 } 6302 6303 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6304 _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B) 6305 { 6306 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6307 (__v4di)_mm256_unpackhi_epi64(__A, __B), 6308 (__v4di)_mm256_setzero_si256()); 6309 } 6310 6311 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6312 _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6313 { 6314 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6315 (__v4si)_mm_unpacklo_epi32(__A, __B), 6316 (__v4si)__W); 6317 } 6318 6319 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6320 _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B) 6321 { 6322 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6323 (__v4si)_mm_unpacklo_epi32(__A, __B), 6324 (__v4si)_mm_setzero_si128()); 6325 } 6326 6327 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6328 _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6329 { 6330 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6331 (__v8si)_mm256_unpacklo_epi32(__A, __B), 6332 (__v8si)__W); 6333 } 6334 6335 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6336 _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B) 6337 { 6338 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6339 (__v8si)_mm256_unpacklo_epi32(__A, __B), 6340 (__v8si)_mm256_setzero_si256()); 6341 } 6342 6343 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6344 _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6345 { 6346 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6347 (__v2di)_mm_unpacklo_epi64(__A, __B), 6348 (__v2di)__W); 6349 } 6350 6351 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6352 _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B) 6353 { 6354 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6355 (__v2di)_mm_unpacklo_epi64(__A, __B), 6356 (__v2di)_mm_setzero_si128()); 6357 } 6358 6359 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6360 _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6361 { 6362 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6363 (__v4di)_mm256_unpacklo_epi64(__A, __B), 6364 (__v4di)__W); 6365 } 6366 6367 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6368 _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B) 6369 { 6370 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6371 (__v4di)_mm256_unpacklo_epi64(__A, __B), 6372 (__v4di)_mm256_setzero_si256()); 6373 } 6374 6375 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6376 _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6377 { 6378 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6379 (__v4si)_mm_sra_epi32(__A, __B), 6380 (__v4si)__W); 6381 } 6382 6383 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6384 _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B) 6385 { 6386 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6387 (__v4si)_mm_sra_epi32(__A, __B), 6388 (__v4si)_mm_setzero_si128()); 6389 } 6390 6391 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6392 _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 6393 { 6394 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6395 (__v8si)_mm256_sra_epi32(__A, __B), 6396 (__v8si)__W); 6397 } 6398 6399 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6400 _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B) 6401 { 6402 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6403 (__v8si)_mm256_sra_epi32(__A, __B), 6404 (__v8si)_mm256_setzero_si256()); 6405 } 6406 6407 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6408 _mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B) 6409 { 6410 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6411 (__v4si)_mm_srai_epi32(__A, __B), 6412 (__v4si)__W); 6413 } 6414 6415 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6416 _mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, int __B) 6417 { 6418 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6419 (__v4si)_mm_srai_epi32(__A, __B), 6420 (__v4si)_mm_setzero_si128()); 6421 } 6422 6423 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6424 _mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B) 6425 { 6426 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6427 (__v8si)_mm256_srai_epi32(__A, __B), 6428 (__v8si)__W); 6429 } 6430 6431 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6432 _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, int __B) 6433 { 6434 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6435 (__v8si)_mm256_srai_epi32(__A, __B), 6436 (__v8si)_mm256_setzero_si256()); 6437 } 6438 6439 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6440 _mm_sra_epi64(__m128i __A, __m128i __B) 6441 { 6442 return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B); 6443 } 6444 6445 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6446 _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6447 { 6448 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 6449 (__v2di)_mm_sra_epi64(__A, __B), \ 6450 (__v2di)__W); 6451 } 6452 6453 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6454 _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B) 6455 { 6456 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 6457 (__v2di)_mm_sra_epi64(__A, __B), \ 6458 (__v2di)_mm_setzero_si128()); 6459 } 6460 6461 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6462 _mm256_sra_epi64(__m256i __A, __m128i __B) 6463 { 6464 return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B); 6465 } 6466 6467 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6468 _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 6469 { 6470 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 6471 (__v4di)_mm256_sra_epi64(__A, __B), \ 6472 (__v4di)__W); 6473 } 6474 6475 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6476 _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B) 6477 { 6478 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 6479 (__v4di)_mm256_sra_epi64(__A, __B), \ 6480 (__v4di)_mm256_setzero_si256()); 6481 } 6482 6483 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6484 _mm_srai_epi64(__m128i __A, int __imm) 6485 { 6486 return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, __imm); 6487 } 6488 6489 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6490 _mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __imm) 6491 { 6492 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 6493 (__v2di)_mm_srai_epi64(__A, __imm), \ 6494 (__v2di)__W); 6495 } 6496 6497 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6498 _mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, int __imm) 6499 { 6500 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 6501 (__v2di)_mm_srai_epi64(__A, __imm), \ 6502 (__v2di)_mm_setzero_si128()); 6503 } 6504 6505 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6506 _mm256_srai_epi64(__m256i __A, int __imm) 6507 { 6508 return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, __imm); 6509 } 6510 6511 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6512 _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __imm) 6513 { 6514 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 6515 (__v4di)_mm256_srai_epi64(__A, __imm), \ 6516 (__v4di)__W); 6517 } 6518 6519 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6520 _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, int __imm) 6521 { 6522 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 6523 (__v4di)_mm256_srai_epi64(__A, __imm), \ 6524 (__v4di)_mm256_setzero_si256()); 6525 } 6526 6527 #define _mm_ternarylogic_epi32(A, B, C, imm) \ 6528 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \ 6529 (__v4si)(__m128i)(B), \ 6530 (__v4si)(__m128i)(C), (int)(imm), \ 6531 (__mmask8)-1) 6532 6533 #define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) \ 6534 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \ 6535 (__v4si)(__m128i)(B), \ 6536 (__v4si)(__m128i)(C), (int)(imm), \ 6537 (__mmask8)(U)) 6538 6539 #define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) \ 6540 (__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \ 6541 (__v4si)(__m128i)(B), \ 6542 (__v4si)(__m128i)(C), (int)(imm), \ 6543 (__mmask8)(U)) 6544 6545 #define _mm256_ternarylogic_epi32(A, B, C, imm) \ 6546 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \ 6547 (__v8si)(__m256i)(B), \ 6548 (__v8si)(__m256i)(C), (int)(imm), \ 6549 (__mmask8)-1) 6550 6551 #define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) \ 6552 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \ 6553 (__v8si)(__m256i)(B), \ 6554 (__v8si)(__m256i)(C), (int)(imm), \ 6555 (__mmask8)(U)) 6556 6557 #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) \ 6558 (__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \ 6559 (__v8si)(__m256i)(B), \ 6560 (__v8si)(__m256i)(C), (int)(imm), \ 6561 (__mmask8)(U)) 6562 6563 #define _mm_ternarylogic_epi64(A, B, C, imm) \ 6564 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \ 6565 (__v2di)(__m128i)(B), \ 6566 (__v2di)(__m128i)(C), (int)(imm), \ 6567 (__mmask8)-1) 6568 6569 #define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) \ 6570 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \ 6571 (__v2di)(__m128i)(B), \ 6572 (__v2di)(__m128i)(C), (int)(imm), \ 6573 (__mmask8)(U)) 6574 6575 #define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) \ 6576 (__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \ 6577 (__v2di)(__m128i)(B), \ 6578 (__v2di)(__m128i)(C), (int)(imm), \ 6579 (__mmask8)(U)) 6580 6581 #define _mm256_ternarylogic_epi64(A, B, C, imm) \ 6582 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \ 6583 (__v4di)(__m256i)(B), \ 6584 (__v4di)(__m256i)(C), (int)(imm), \ 6585 (__mmask8)-1) 6586 6587 #define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) \ 6588 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \ 6589 (__v4di)(__m256i)(B), \ 6590 (__v4di)(__m256i)(C), (int)(imm), \ 6591 (__mmask8)(U)) 6592 6593 #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) \ 6594 (__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \ 6595 (__v4di)(__m256i)(B), \ 6596 (__v4di)(__m256i)(C), (int)(imm), \ 6597 (__mmask8)(U)) 6598 6599 6600 6601 #define _mm256_shuffle_f32x4(A, B, imm) \ 6602 (__m256)__builtin_ia32_shuf_f32x4_256((__v8sf)(__m256)(A), \ 6603 (__v8sf)(__m256)(B), (int)(imm)) 6604 6605 #define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) \ 6606 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6607 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \ 6608 (__v8sf)(__m256)(W)) 6609 6610 #define _mm256_maskz_shuffle_f32x4(U, A, B, imm) \ 6611 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6612 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \ 6613 (__v8sf)_mm256_setzero_ps()) 6614 6615 #define _mm256_shuffle_f64x2(A, B, imm) \ 6616 (__m256d)__builtin_ia32_shuf_f64x2_256((__v4df)(__m256d)(A), \ 6617 (__v4df)(__m256d)(B), (int)(imm)) 6618 6619 #define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) \ 6620 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6621 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \ 6622 (__v4df)(__m256d)(W)) 6623 6624 #define _mm256_maskz_shuffle_f64x2(U, A, B, imm) \ 6625 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6626 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \ 6627 (__v4df)_mm256_setzero_pd()) 6628 6629 #define _mm256_shuffle_i32x4(A, B, imm) \ 6630 (__m256i)__builtin_ia32_shuf_i32x4_256((__v8si)(__m256i)(A), \ 6631 (__v8si)(__m256i)(B), (int)(imm)) 6632 6633 #define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) \ 6634 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 6635 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \ 6636 (__v8si)(__m256i)(W)) 6637 6638 #define _mm256_maskz_shuffle_i32x4(U, A, B, imm) \ 6639 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 6640 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \ 6641 (__v8si)_mm256_setzero_si256()) 6642 6643 #define _mm256_shuffle_i64x2(A, B, imm) \ 6644 (__m256i)__builtin_ia32_shuf_i64x2_256((__v4di)(__m256i)(A), \ 6645 (__v4di)(__m256i)(B), (int)(imm)) 6646 6647 #define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) \ 6648 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 6649 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \ 6650 (__v4di)(__m256i)(W)) 6651 6652 6653 #define _mm256_maskz_shuffle_i64x2(U, A, B, imm) \ 6654 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 6655 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \ 6656 (__v4di)_mm256_setzero_si256()) 6657 6658 #define _mm_mask_shuffle_pd(W, U, A, B, M) \ 6659 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6660 (__v2df)_mm_shuffle_pd((A), (B), (M)), \ 6661 (__v2df)(__m128d)(W)) 6662 6663 #define _mm_maskz_shuffle_pd(U, A, B, M) \ 6664 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6665 (__v2df)_mm_shuffle_pd((A), (B), (M)), \ 6666 (__v2df)_mm_setzero_pd()) 6667 6668 #define _mm256_mask_shuffle_pd(W, U, A, B, M) \ 6669 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6670 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ 6671 (__v4df)(__m256d)(W)) 6672 6673 #define _mm256_maskz_shuffle_pd(U, A, B, M) \ 6674 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6675 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ 6676 (__v4df)_mm256_setzero_pd()) 6677 6678 #define _mm_mask_shuffle_ps(W, U, A, B, M) \ 6679 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6680 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ 6681 (__v4sf)(__m128)(W)) 6682 6683 #define _mm_maskz_shuffle_ps(U, A, B, M) \ 6684 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6685 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ 6686 (__v4sf)_mm_setzero_ps()) 6687 6688 #define _mm256_mask_shuffle_ps(W, U, A, B, M) \ 6689 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6690 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ 6691 (__v8sf)(__m256)(W)) 6692 6693 #define _mm256_maskz_shuffle_ps(U, A, B, M) \ 6694 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6695 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ 6696 (__v8sf)_mm256_setzero_ps()) 6697 6698 static __inline__ __m128d __DEFAULT_FN_ATTRS128 6699 _mm_rsqrt14_pd (__m128d __A) 6700 { 6701 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 6702 (__v2df) 6703 _mm_setzero_pd (), 6704 (__mmask8) -1); 6705 } 6706 6707 static __inline__ __m128d __DEFAULT_FN_ATTRS128 6708 _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A) 6709 { 6710 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 6711 (__v2df) __W, 6712 (__mmask8) __U); 6713 } 6714 6715 static __inline__ __m128d __DEFAULT_FN_ATTRS128 6716 _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A) 6717 { 6718 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 6719 (__v2df) 6720 _mm_setzero_pd (), 6721 (__mmask8) __U); 6722 } 6723 6724 static __inline__ __m256d __DEFAULT_FN_ATTRS256 6725 _mm256_rsqrt14_pd (__m256d __A) 6726 { 6727 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 6728 (__v4df) 6729 _mm256_setzero_pd (), 6730 (__mmask8) -1); 6731 } 6732 6733 static __inline__ __m256d __DEFAULT_FN_ATTRS256 6734 _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A) 6735 { 6736 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 6737 (__v4df) __W, 6738 (__mmask8) __U); 6739 } 6740 6741 static __inline__ __m256d __DEFAULT_FN_ATTRS256 6742 _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A) 6743 { 6744 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 6745 (__v4df) 6746 _mm256_setzero_pd (), 6747 (__mmask8) __U); 6748 } 6749 6750 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6751 _mm_rsqrt14_ps (__m128 __A) 6752 { 6753 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 6754 (__v4sf) 6755 _mm_setzero_ps (), 6756 (__mmask8) -1); 6757 } 6758 6759 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6760 _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A) 6761 { 6762 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 6763 (__v4sf) __W, 6764 (__mmask8) __U); 6765 } 6766 6767 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6768 _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A) 6769 { 6770 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 6771 (__v4sf) 6772 _mm_setzero_ps (), 6773 (__mmask8) __U); 6774 } 6775 6776 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6777 _mm256_rsqrt14_ps (__m256 __A) 6778 { 6779 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 6780 (__v8sf) 6781 _mm256_setzero_ps (), 6782 (__mmask8) -1); 6783 } 6784 6785 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6786 _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A) 6787 { 6788 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 6789 (__v8sf) __W, 6790 (__mmask8) __U); 6791 } 6792 6793 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6794 _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A) 6795 { 6796 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 6797 (__v8sf) 6798 _mm256_setzero_ps (), 6799 (__mmask8) __U); 6800 } 6801 6802 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6803 _mm256_broadcast_f32x4(__m128 __A) 6804 { 6805 return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 6806 0, 1, 2, 3, 0, 1, 2, 3); 6807 } 6808 6809 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6810 _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A) 6811 { 6812 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, 6813 (__v8sf)_mm256_broadcast_f32x4(__A), 6814 (__v8sf)__O); 6815 } 6816 6817 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6818 _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A) 6819 { 6820 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, 6821 (__v8sf)_mm256_broadcast_f32x4(__A), 6822 (__v8sf)_mm256_setzero_ps()); 6823 } 6824 6825 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6826 _mm256_broadcast_i32x4(__m128i __A) 6827 { 6828 return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 6829 0, 1, 2, 3, 0, 1, 2, 3); 6830 } 6831 6832 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6833 _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A) 6834 { 6835 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 6836 (__v8si)_mm256_broadcast_i32x4(__A), 6837 (__v8si)__O); 6838 } 6839 6840 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6841 _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A) 6842 { 6843 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 6844 (__v8si)_mm256_broadcast_i32x4(__A), 6845 (__v8si)_mm256_setzero_si256()); 6846 } 6847 6848 static __inline__ __m256d __DEFAULT_FN_ATTRS256 6849 _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A) 6850 { 6851 return (__m256d)__builtin_ia32_selectpd_256(__M, 6852 (__v4df) _mm256_broadcastsd_pd(__A), 6853 (__v4df) __O); 6854 } 6855 6856 static __inline__ __m256d __DEFAULT_FN_ATTRS256 6857 _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) 6858 { 6859 return (__m256d)__builtin_ia32_selectpd_256(__M, 6860 (__v4df) _mm256_broadcastsd_pd(__A), 6861 (__v4df) _mm256_setzero_pd()); 6862 } 6863 6864 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6865 _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A) 6866 { 6867 return (__m128)__builtin_ia32_selectps_128(__M, 6868 (__v4sf) _mm_broadcastss_ps(__A), 6869 (__v4sf) __O); 6870 } 6871 6872 static __inline__ __m128 __DEFAULT_FN_ATTRS128 6873 _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) 6874 { 6875 return (__m128)__builtin_ia32_selectps_128(__M, 6876 (__v4sf) _mm_broadcastss_ps(__A), 6877 (__v4sf) _mm_setzero_ps()); 6878 } 6879 6880 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6881 _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A) 6882 { 6883 return (__m256)__builtin_ia32_selectps_256(__M, 6884 (__v8sf) _mm256_broadcastss_ps(__A), 6885 (__v8sf) __O); 6886 } 6887 6888 static __inline__ __m256 __DEFAULT_FN_ATTRS256 6889 _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) 6890 { 6891 return (__m256)__builtin_ia32_selectps_256(__M, 6892 (__v8sf) _mm256_broadcastss_ps(__A), 6893 (__v8sf) _mm256_setzero_ps()); 6894 } 6895 6896 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6897 _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 6898 { 6899 return (__m128i)__builtin_ia32_selectd_128(__M, 6900 (__v4si) _mm_broadcastd_epi32(__A), 6901 (__v4si) __O); 6902 } 6903 6904 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6905 _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) 6906 { 6907 return (__m128i)__builtin_ia32_selectd_128(__M, 6908 (__v4si) _mm_broadcastd_epi32(__A), 6909 (__v4si) _mm_setzero_si128()); 6910 } 6911 6912 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6913 _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A) 6914 { 6915 return (__m256i)__builtin_ia32_selectd_256(__M, 6916 (__v8si) _mm256_broadcastd_epi32(__A), 6917 (__v8si) __O); 6918 } 6919 6920 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6921 _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) 6922 { 6923 return (__m256i)__builtin_ia32_selectd_256(__M, 6924 (__v8si) _mm256_broadcastd_epi32(__A), 6925 (__v8si) _mm256_setzero_si256()); 6926 } 6927 6928 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6929 _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A) 6930 { 6931 return (__m128i)__builtin_ia32_selectq_128(__M, 6932 (__v2di) _mm_broadcastq_epi64(__A), 6933 (__v2di) __O); 6934 } 6935 6936 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6937 _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 6938 { 6939 return (__m128i)__builtin_ia32_selectq_128(__M, 6940 (__v2di) _mm_broadcastq_epi64(__A), 6941 (__v2di) _mm_setzero_si128()); 6942 } 6943 6944 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6945 _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A) 6946 { 6947 return (__m256i)__builtin_ia32_selectq_256(__M, 6948 (__v4di) _mm256_broadcastq_epi64(__A), 6949 (__v4di) __O); 6950 } 6951 6952 static __inline__ __m256i __DEFAULT_FN_ATTRS256 6953 _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 6954 { 6955 return (__m256i)__builtin_ia32_selectq_256(__M, 6956 (__v4di) _mm256_broadcastq_epi64(__A), 6957 (__v4di) _mm256_setzero_si256()); 6958 } 6959 6960 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6961 _mm_cvtsepi32_epi8 (__m128i __A) 6962 { 6963 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 6964 (__v16qi)_mm_undefined_si128(), 6965 (__mmask8) -1); 6966 } 6967 6968 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6969 _mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 6970 { 6971 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 6972 (__v16qi) __O, __M); 6973 } 6974 6975 static __inline__ __m128i __DEFAULT_FN_ATTRS128 6976 _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A) 6977 { 6978 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 6979 (__v16qi) _mm_setzero_si128 (), 6980 __M); 6981 } 6982 6983 static __inline__ void __DEFAULT_FN_ATTRS128 6984 _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 6985 { 6986 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 6987 } 6988 6989 static __inline__ __m128i __DEFAULT_FN_ATTRS256 6990 _mm256_cvtsepi32_epi8 (__m256i __A) 6991 { 6992 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 6993 (__v16qi)_mm_undefined_si128(), 6994 (__mmask8) -1); 6995 } 6996 6997 static __inline__ __m128i __DEFAULT_FN_ATTRS256 6998 _mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 6999 { 7000 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 7001 (__v16qi) __O, __M); 7002 } 7003 7004 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7005 _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A) 7006 { 7007 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 7008 (__v16qi) _mm_setzero_si128 (), 7009 __M); 7010 } 7011 7012 static __inline__ void __DEFAULT_FN_ATTRS256 7013 _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7014 { 7015 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); 7016 } 7017 7018 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7019 _mm_cvtsepi32_epi16 (__m128i __A) 7020 { 7021 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 7022 (__v8hi)_mm_setzero_si128 (), 7023 (__mmask8) -1); 7024 } 7025 7026 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7027 _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7028 { 7029 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 7030 (__v8hi)__O, 7031 __M); 7032 } 7033 7034 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7035 _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A) 7036 { 7037 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 7038 (__v8hi) _mm_setzero_si128 (), 7039 __M); 7040 } 7041 7042 static __inline__ void __DEFAULT_FN_ATTRS128 7043 _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7044 { 7045 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 7046 } 7047 7048 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7049 _mm256_cvtsepi32_epi16 (__m256i __A) 7050 { 7051 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 7052 (__v8hi)_mm_undefined_si128(), 7053 (__mmask8) -1); 7054 } 7055 7056 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7057 _mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7058 { 7059 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 7060 (__v8hi) __O, __M); 7061 } 7062 7063 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7064 _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A) 7065 { 7066 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 7067 (__v8hi) _mm_setzero_si128 (), 7068 __M); 7069 } 7070 7071 static __inline__ void __DEFAULT_FN_ATTRS256 7072 _mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7073 { 7074 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 7075 } 7076 7077 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7078 _mm_cvtsepi64_epi8 (__m128i __A) 7079 { 7080 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 7081 (__v16qi)_mm_undefined_si128(), 7082 (__mmask8) -1); 7083 } 7084 7085 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7086 _mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7087 { 7088 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 7089 (__v16qi) __O, __M); 7090 } 7091 7092 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7093 _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A) 7094 { 7095 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 7096 (__v16qi) _mm_setzero_si128 (), 7097 __M); 7098 } 7099 7100 static __inline__ void __DEFAULT_FN_ATTRS128 7101 _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7102 { 7103 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 7104 } 7105 7106 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7107 _mm256_cvtsepi64_epi8 (__m256i __A) 7108 { 7109 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 7110 (__v16qi)_mm_undefined_si128(), 7111 (__mmask8) -1); 7112 } 7113 7114 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7115 _mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7116 { 7117 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 7118 (__v16qi) __O, __M); 7119 } 7120 7121 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7122 _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A) 7123 { 7124 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 7125 (__v16qi) _mm_setzero_si128 (), 7126 __M); 7127 } 7128 7129 static __inline__ void __DEFAULT_FN_ATTRS256 7130 _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7131 { 7132 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 7133 } 7134 7135 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7136 _mm_cvtsepi64_epi32 (__m128i __A) 7137 { 7138 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 7139 (__v4si)_mm_undefined_si128(), 7140 (__mmask8) -1); 7141 } 7142 7143 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7144 _mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 7145 { 7146 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 7147 (__v4si) __O, __M); 7148 } 7149 7150 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7151 _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A) 7152 { 7153 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 7154 (__v4si) _mm_setzero_si128 (), 7155 __M); 7156 } 7157 7158 static __inline__ void __DEFAULT_FN_ATTRS128 7159 _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 7160 { 7161 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 7162 } 7163 7164 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7165 _mm256_cvtsepi64_epi32 (__m256i __A) 7166 { 7167 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 7168 (__v4si)_mm_undefined_si128(), 7169 (__mmask8) -1); 7170 } 7171 7172 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7173 _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 7174 { 7175 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 7176 (__v4si)__O, 7177 __M); 7178 } 7179 7180 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7181 _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A) 7182 { 7183 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 7184 (__v4si) _mm_setzero_si128 (), 7185 __M); 7186 } 7187 7188 static __inline__ void __DEFAULT_FN_ATTRS256 7189 _mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 7190 { 7191 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 7192 } 7193 7194 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7195 _mm_cvtsepi64_epi16 (__m128i __A) 7196 { 7197 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 7198 (__v8hi)_mm_undefined_si128(), 7199 (__mmask8) -1); 7200 } 7201 7202 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7203 _mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7204 { 7205 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 7206 (__v8hi) __O, __M); 7207 } 7208 7209 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7210 _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A) 7211 { 7212 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 7213 (__v8hi) _mm_setzero_si128 (), 7214 __M); 7215 } 7216 7217 static __inline__ void __DEFAULT_FN_ATTRS128 7218 _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7219 { 7220 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 7221 } 7222 7223 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7224 _mm256_cvtsepi64_epi16 (__m256i __A) 7225 { 7226 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 7227 (__v8hi)_mm_undefined_si128(), 7228 (__mmask8) -1); 7229 } 7230 7231 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7232 _mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7233 { 7234 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 7235 (__v8hi) __O, __M); 7236 } 7237 7238 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7239 _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A) 7240 { 7241 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 7242 (__v8hi) _mm_setzero_si128 (), 7243 __M); 7244 } 7245 7246 static __inline__ void __DEFAULT_FN_ATTRS256 7247 _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7248 { 7249 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 7250 } 7251 7252 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7253 _mm_cvtusepi32_epi8 (__m128i __A) 7254 { 7255 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 7256 (__v16qi)_mm_undefined_si128(), 7257 (__mmask8) -1); 7258 } 7259 7260 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7261 _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7262 { 7263 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 7264 (__v16qi) __O, 7265 __M); 7266 } 7267 7268 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7269 _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A) 7270 { 7271 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 7272 (__v16qi) _mm_setzero_si128 (), 7273 __M); 7274 } 7275 7276 static __inline__ void __DEFAULT_FN_ATTRS128 7277 _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7278 { 7279 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 7280 } 7281 7282 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7283 _mm256_cvtusepi32_epi8 (__m256i __A) 7284 { 7285 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 7286 (__v16qi)_mm_undefined_si128(), 7287 (__mmask8) -1); 7288 } 7289 7290 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7291 _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7292 { 7293 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 7294 (__v16qi) __O, 7295 __M); 7296 } 7297 7298 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7299 _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A) 7300 { 7301 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 7302 (__v16qi) _mm_setzero_si128 (), 7303 __M); 7304 } 7305 7306 static __inline__ void __DEFAULT_FN_ATTRS256 7307 _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7308 { 7309 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M); 7310 } 7311 7312 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7313 _mm_cvtusepi32_epi16 (__m128i __A) 7314 { 7315 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 7316 (__v8hi)_mm_undefined_si128(), 7317 (__mmask8) -1); 7318 } 7319 7320 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7321 _mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7322 { 7323 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 7324 (__v8hi) __O, __M); 7325 } 7326 7327 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7328 _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A) 7329 { 7330 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 7331 (__v8hi) _mm_setzero_si128 (), 7332 __M); 7333 } 7334 7335 static __inline__ void __DEFAULT_FN_ATTRS128 7336 _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7337 { 7338 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 7339 } 7340 7341 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7342 _mm256_cvtusepi32_epi16 (__m256i __A) 7343 { 7344 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 7345 (__v8hi) _mm_undefined_si128(), 7346 (__mmask8) -1); 7347 } 7348 7349 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7350 _mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7351 { 7352 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 7353 (__v8hi) __O, __M); 7354 } 7355 7356 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7357 _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A) 7358 { 7359 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 7360 (__v8hi) _mm_setzero_si128 (), 7361 __M); 7362 } 7363 7364 static __inline__ void __DEFAULT_FN_ATTRS256 7365 _mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7366 { 7367 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 7368 } 7369 7370 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7371 _mm_cvtusepi64_epi8 (__m128i __A) 7372 { 7373 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 7374 (__v16qi)_mm_undefined_si128(), 7375 (__mmask8) -1); 7376 } 7377 7378 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7379 _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7380 { 7381 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 7382 (__v16qi) __O, 7383 __M); 7384 } 7385 7386 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7387 _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A) 7388 { 7389 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 7390 (__v16qi) _mm_setzero_si128 (), 7391 __M); 7392 } 7393 7394 static __inline__ void __DEFAULT_FN_ATTRS128 7395 _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7396 { 7397 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 7398 } 7399 7400 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7401 _mm256_cvtusepi64_epi8 (__m256i __A) 7402 { 7403 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 7404 (__v16qi)_mm_undefined_si128(), 7405 (__mmask8) -1); 7406 } 7407 7408 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7409 _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7410 { 7411 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 7412 (__v16qi) __O, 7413 __M); 7414 } 7415 7416 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7417 _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A) 7418 { 7419 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 7420 (__v16qi) _mm_setzero_si128 (), 7421 __M); 7422 } 7423 7424 static __inline__ void __DEFAULT_FN_ATTRS256 7425 _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7426 { 7427 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 7428 } 7429 7430 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7431 _mm_cvtusepi64_epi32 (__m128i __A) 7432 { 7433 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 7434 (__v4si)_mm_undefined_si128(), 7435 (__mmask8) -1); 7436 } 7437 7438 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7439 _mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 7440 { 7441 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 7442 (__v4si) __O, __M); 7443 } 7444 7445 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7446 _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A) 7447 { 7448 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 7449 (__v4si) _mm_setzero_si128 (), 7450 __M); 7451 } 7452 7453 static __inline__ void __DEFAULT_FN_ATTRS128 7454 _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 7455 { 7456 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 7457 } 7458 7459 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7460 _mm256_cvtusepi64_epi32 (__m256i __A) 7461 { 7462 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 7463 (__v4si)_mm_undefined_si128(), 7464 (__mmask8) -1); 7465 } 7466 7467 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7468 _mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 7469 { 7470 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 7471 (__v4si) __O, __M); 7472 } 7473 7474 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7475 _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A) 7476 { 7477 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 7478 (__v4si) _mm_setzero_si128 (), 7479 __M); 7480 } 7481 7482 static __inline__ void __DEFAULT_FN_ATTRS256 7483 _mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 7484 { 7485 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 7486 } 7487 7488 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7489 _mm_cvtusepi64_epi16 (__m128i __A) 7490 { 7491 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 7492 (__v8hi)_mm_undefined_si128(), 7493 (__mmask8) -1); 7494 } 7495 7496 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7497 _mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7498 { 7499 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 7500 (__v8hi) __O, __M); 7501 } 7502 7503 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7504 _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A) 7505 { 7506 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 7507 (__v8hi) _mm_setzero_si128 (), 7508 __M); 7509 } 7510 7511 static __inline__ void __DEFAULT_FN_ATTRS128 7512 _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7513 { 7514 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 7515 } 7516 7517 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7518 _mm256_cvtusepi64_epi16 (__m256i __A) 7519 { 7520 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 7521 (__v8hi)_mm_undefined_si128(), 7522 (__mmask8) -1); 7523 } 7524 7525 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7526 _mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7527 { 7528 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 7529 (__v8hi) __O, __M); 7530 } 7531 7532 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7533 _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A) 7534 { 7535 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 7536 (__v8hi) _mm_setzero_si128 (), 7537 __M); 7538 } 7539 7540 static __inline__ void __DEFAULT_FN_ATTRS256 7541 _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7542 { 7543 __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 7544 } 7545 7546 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7547 _mm_cvtepi32_epi8 (__m128i __A) 7548 { 7549 return (__m128i)__builtin_shufflevector( 7550 __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1, 7551 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7); 7552 } 7553 7554 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7555 _mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7556 { 7557 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, 7558 (__v16qi) __O, __M); 7559 } 7560 7561 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7562 _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A) 7563 { 7564 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, 7565 (__v16qi) 7566 _mm_setzero_si128 (), 7567 __M); 7568 } 7569 7570 static __inline__ void __DEFAULT_FN_ATTRS128 7571 _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7572 { 7573 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 7574 } 7575 7576 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7577 _mm256_cvtepi32_epi8 (__m256i __A) 7578 { 7579 return (__m128i)__builtin_shufflevector( 7580 __builtin_convertvector((__v8si)__A, __v8qi), 7581 (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 7582 12, 13, 14, 15); 7583 } 7584 7585 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7586 _mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7587 { 7588 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, 7589 (__v16qi) __O, __M); 7590 } 7591 7592 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7593 _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A) 7594 { 7595 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, 7596 (__v16qi) _mm_setzero_si128 (), 7597 __M); 7598 } 7599 7600 static __inline__ void __DEFAULT_FN_ATTRS256 7601 _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7602 { 7603 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); 7604 } 7605 7606 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7607 _mm_cvtepi32_epi16 (__m128i __A) 7608 { 7609 return (__m128i)__builtin_shufflevector( 7610 __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1, 7611 2, 3, 4, 5, 6, 7); 7612 } 7613 7614 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7615 _mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7616 { 7617 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, 7618 (__v8hi) __O, __M); 7619 } 7620 7621 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7622 _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A) 7623 { 7624 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, 7625 (__v8hi) _mm_setzero_si128 (), 7626 __M); 7627 } 7628 7629 static __inline__ void __DEFAULT_FN_ATTRS128 7630 _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7631 { 7632 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 7633 } 7634 7635 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7636 _mm256_cvtepi32_epi16 (__m256i __A) 7637 { 7638 return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi); 7639 } 7640 7641 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7642 _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7643 { 7644 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, 7645 (__v8hi) __O, __M); 7646 } 7647 7648 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7649 _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A) 7650 { 7651 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, 7652 (__v8hi) _mm_setzero_si128 (), 7653 __M); 7654 } 7655 7656 static __inline__ void __DEFAULT_FN_ATTRS256 7657 _mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7658 { 7659 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 7660 } 7661 7662 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7663 _mm_cvtepi64_epi8 (__m128i __A) 7664 { 7665 return (__m128i)__builtin_shufflevector( 7666 __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3, 7667 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3); 7668 } 7669 7670 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7671 _mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7672 { 7673 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, 7674 (__v16qi) __O, __M); 7675 } 7676 7677 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7678 _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A) 7679 { 7680 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, 7681 (__v16qi) _mm_setzero_si128 (), 7682 __M); 7683 } 7684 7685 static __inline__ void __DEFAULT_FN_ATTRS128 7686 _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7687 { 7688 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 7689 } 7690 7691 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7692 _mm256_cvtepi64_epi8 (__m256i __A) 7693 { 7694 return (__m128i)__builtin_shufflevector( 7695 __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1, 7696 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7); 7697 } 7698 7699 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7700 _mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7701 { 7702 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, 7703 (__v16qi) __O, __M); 7704 } 7705 7706 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7707 _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A) 7708 { 7709 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, 7710 (__v16qi) _mm_setzero_si128 (), 7711 __M); 7712 } 7713 7714 static __inline__ void __DEFAULT_FN_ATTRS256 7715 _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7716 { 7717 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 7718 } 7719 7720 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7721 _mm_cvtepi64_epi32 (__m128i __A) 7722 { 7723 return (__m128i)__builtin_shufflevector( 7724 __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3); 7725 } 7726 7727 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7728 _mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 7729 { 7730 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, 7731 (__v4si) __O, __M); 7732 } 7733 7734 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7735 _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A) 7736 { 7737 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, 7738 (__v4si) _mm_setzero_si128 (), 7739 __M); 7740 } 7741 7742 static __inline__ void __DEFAULT_FN_ATTRS128 7743 _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 7744 { 7745 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 7746 } 7747 7748 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7749 _mm256_cvtepi64_epi32 (__m256i __A) 7750 { 7751 return (__m128i)__builtin_convertvector((__v4di)__A, __v4si); 7752 } 7753 7754 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7755 _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 7756 { 7757 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 7758 (__v4si)_mm256_cvtepi64_epi32(__A), 7759 (__v4si)__O); 7760 } 7761 7762 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7763 _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A) 7764 { 7765 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 7766 (__v4si)_mm256_cvtepi64_epi32(__A), 7767 (__v4si)_mm_setzero_si128()); 7768 } 7769 7770 static __inline__ void __DEFAULT_FN_ATTRS256 7771 _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 7772 { 7773 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 7774 } 7775 7776 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7777 _mm_cvtepi64_epi16 (__m128i __A) 7778 { 7779 return (__m128i)__builtin_shufflevector( 7780 __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3, 7781 3, 3, 3, 3); 7782 } 7783 7784 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7785 _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7786 { 7787 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, 7788 (__v8hi)__O, 7789 __M); 7790 } 7791 7792 static __inline__ __m128i __DEFAULT_FN_ATTRS128 7793 _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A) 7794 { 7795 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, 7796 (__v8hi) _mm_setzero_si128 (), 7797 __M); 7798 } 7799 7800 static __inline__ void __DEFAULT_FN_ATTRS128 7801 _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7802 { 7803 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 7804 } 7805 7806 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7807 _mm256_cvtepi64_epi16 (__m256i __A) 7808 { 7809 return (__m128i)__builtin_shufflevector( 7810 __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1, 7811 2, 3, 4, 5, 6, 7); 7812 } 7813 7814 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7815 _mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7816 { 7817 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, 7818 (__v8hi) __O, __M); 7819 } 7820 7821 static __inline__ __m128i __DEFAULT_FN_ATTRS256 7822 _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A) 7823 { 7824 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, 7825 (__v8hi) _mm_setzero_si128 (), 7826 __M); 7827 } 7828 7829 static __inline__ void __DEFAULT_FN_ATTRS256 7830 _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7831 { 7832 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 7833 } 7834 7835 #define _mm256_extractf32x4_ps(A, imm) \ 7836 (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ 7837 (int)(imm), \ 7838 (__v4sf)_mm_undefined_ps(), \ 7839 (__mmask8)-1) 7840 7841 #define _mm256_mask_extractf32x4_ps(W, U, A, imm) \ 7842 (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ 7843 (int)(imm), \ 7844 (__v4sf)(__m128)(W), \ 7845 (__mmask8)(U)) 7846 7847 #define _mm256_maskz_extractf32x4_ps(U, A, imm) \ 7848 (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ 7849 (int)(imm), \ 7850 (__v4sf)_mm_setzero_ps(), \ 7851 (__mmask8)(U)) 7852 7853 #define _mm256_extracti32x4_epi32(A, imm) \ 7854 (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ 7855 (int)(imm), \ 7856 (__v4si)_mm_undefined_si128(), \ 7857 (__mmask8)-1) 7858 7859 #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \ 7860 (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ 7861 (int)(imm), \ 7862 (__v4si)(__m128i)(W), \ 7863 (__mmask8)(U)) 7864 7865 #define _mm256_maskz_extracti32x4_epi32(U, A, imm) \ 7866 (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ 7867 (int)(imm), \ 7868 (__v4si)_mm_setzero_si128(), \ 7869 (__mmask8)(U)) 7870 7871 #define _mm256_insertf32x4(A, B, imm) \ 7872 (__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \ 7873 (__v4sf)(__m128)(B), (int)(imm)) 7874 7875 #define _mm256_mask_insertf32x4(W, U, A, B, imm) \ 7876 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 7877 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \ 7878 (__v8sf)(__m256)(W)) 7879 7880 #define _mm256_maskz_insertf32x4(U, A, B, imm) \ 7881 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 7882 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \ 7883 (__v8sf)_mm256_setzero_ps()) 7884 7885 #define _mm256_inserti32x4(A, B, imm) \ 7886 (__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \ 7887 (__v4si)(__m128i)(B), (int)(imm)) 7888 7889 #define _mm256_mask_inserti32x4(W, U, A, B, imm) \ 7890 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 7891 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \ 7892 (__v8si)(__m256i)(W)) 7893 7894 #define _mm256_maskz_inserti32x4(U, A, B, imm) \ 7895 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 7896 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \ 7897 (__v8si)_mm256_setzero_si256()) 7898 7899 #define _mm_getmant_pd(A, B, C) \ 7900 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 7901 (int)(((C)<<2) | (B)), \ 7902 (__v2df)_mm_setzero_pd(), \ 7903 (__mmask8)-1) 7904 7905 #define _mm_mask_getmant_pd(W, U, A, B, C) \ 7906 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 7907 (int)(((C)<<2) | (B)), \ 7908 (__v2df)(__m128d)(W), \ 7909 (__mmask8)(U)) 7910 7911 #define _mm_maskz_getmant_pd(U, A, B, C) \ 7912 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 7913 (int)(((C)<<2) | (B)), \ 7914 (__v2df)_mm_setzero_pd(), \ 7915 (__mmask8)(U)) 7916 7917 #define _mm256_getmant_pd(A, B, C) \ 7918 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 7919 (int)(((C)<<2) | (B)), \ 7920 (__v4df)_mm256_setzero_pd(), \ 7921 (__mmask8)-1) 7922 7923 #define _mm256_mask_getmant_pd(W, U, A, B, C) \ 7924 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 7925 (int)(((C)<<2) | (B)), \ 7926 (__v4df)(__m256d)(W), \ 7927 (__mmask8)(U)) 7928 7929 #define _mm256_maskz_getmant_pd(U, A, B, C) \ 7930 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 7931 (int)(((C)<<2) | (B)), \ 7932 (__v4df)_mm256_setzero_pd(), \ 7933 (__mmask8)(U)) 7934 7935 #define _mm_getmant_ps(A, B, C) \ 7936 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 7937 (int)(((C)<<2) | (B)), \ 7938 (__v4sf)_mm_setzero_ps(), \ 7939 (__mmask8)-1) 7940 7941 #define _mm_mask_getmant_ps(W, U, A, B, C) \ 7942 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 7943 (int)(((C)<<2) | (B)), \ 7944 (__v4sf)(__m128)(W), \ 7945 (__mmask8)(U)) 7946 7947 #define _mm_maskz_getmant_ps(U, A, B, C) \ 7948 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 7949 (int)(((C)<<2) | (B)), \ 7950 (__v4sf)_mm_setzero_ps(), \ 7951 (__mmask8)(U)) 7952 7953 #define _mm256_getmant_ps(A, B, C) \ 7954 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 7955 (int)(((C)<<2) | (B)), \ 7956 (__v8sf)_mm256_setzero_ps(), \ 7957 (__mmask8)-1) 7958 7959 #define _mm256_mask_getmant_ps(W, U, A, B, C) \ 7960 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 7961 (int)(((C)<<2) | (B)), \ 7962 (__v8sf)(__m256)(W), \ 7963 (__mmask8)(U)) 7964 7965 #define _mm256_maskz_getmant_ps(U, A, B, C) \ 7966 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 7967 (int)(((C)<<2) | (B)), \ 7968 (__v8sf)_mm256_setzero_ps(), \ 7969 (__mmask8)(U)) 7970 7971 #define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \ 7972 (__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \ 7973 (void const *)(addr), \ 7974 (__v2di)(__m128i)(index), \ 7975 (__mmask8)(mask), (int)(scale)) 7976 7977 #define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \ 7978 (__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \ 7979 (void const *)(addr), \ 7980 (__v2di)(__m128i)(index), \ 7981 (__mmask8)(mask), (int)(scale)) 7982 7983 #define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \ 7984 (__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \ 7985 (void const *)(addr), \ 7986 (__v4di)(__m256i)(index), \ 7987 (__mmask8)(mask), (int)(scale)) 7988 7989 #define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \ 7990 (__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \ 7991 (void const *)(addr), \ 7992 (__v4di)(__m256i)(index), \ 7993 (__mmask8)(mask), (int)(scale)) 7994 7995 #define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \ 7996 (__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \ 7997 (void const *)(addr), \ 7998 (__v2di)(__m128i)(index), \ 7999 (__mmask8)(mask), (int)(scale)) 8000 8001 #define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \ 8002 (__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \ 8003 (void const *)(addr), \ 8004 (__v2di)(__m128i)(index), \ 8005 (__mmask8)(mask), (int)(scale)) 8006 8007 #define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \ 8008 (__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \ 8009 (void const *)(addr), \ 8010 (__v4di)(__m256i)(index), \ 8011 (__mmask8)(mask), (int)(scale)) 8012 8013 #define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \ 8014 (__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \ 8015 (void const *)(addr), \ 8016 (__v4di)(__m256i)(index), \ 8017 (__mmask8)(mask), (int)(scale)) 8018 8019 #define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \ 8020 (__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \ 8021 (void const *)(addr), \ 8022 (__v4si)(__m128i)(index), \ 8023 (__mmask8)(mask), (int)(scale)) 8024 8025 #define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \ 8026 (__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \ 8027 (void const *)(addr), \ 8028 (__v4si)(__m128i)(index), \ 8029 (__mmask8)(mask), (int)(scale)) 8030 8031 #define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \ 8032 (__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \ 8033 (void const *)(addr), \ 8034 (__v4si)(__m128i)(index), \ 8035 (__mmask8)(mask), (int)(scale)) 8036 8037 #define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \ 8038 (__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \ 8039 (void const *)(addr), \ 8040 (__v4si)(__m128i)(index), \ 8041 (__mmask8)(mask), (int)(scale)) 8042 8043 #define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \ 8044 (__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \ 8045 (void const *)(addr), \ 8046 (__v4si)(__m128i)(index), \ 8047 (__mmask8)(mask), (int)(scale)) 8048 8049 #define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \ 8050 (__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \ 8051 (void const *)(addr), \ 8052 (__v4si)(__m128i)(index), \ 8053 (__mmask8)(mask), (int)(scale)) 8054 8055 #define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \ 8056 (__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \ 8057 (void const *)(addr), \ 8058 (__v8si)(__m256i)(index), \ 8059 (__mmask8)(mask), (int)(scale)) 8060 8061 #define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \ 8062 (__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \ 8063 (void const *)(addr), \ 8064 (__v8si)(__m256i)(index), \ 8065 (__mmask8)(mask), (int)(scale)) 8066 8067 #define _mm256_permutex_pd(X, C) \ 8068 (__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(X), (int)(C)) 8069 8070 #define _mm256_mask_permutex_pd(W, U, X, C) \ 8071 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 8072 (__v4df)_mm256_permutex_pd((X), (C)), \ 8073 (__v4df)(__m256d)(W)) 8074 8075 #define _mm256_maskz_permutex_pd(U, X, C) \ 8076 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 8077 (__v4df)_mm256_permutex_pd((X), (C)), \ 8078 (__v4df)_mm256_setzero_pd()) 8079 8080 #define _mm256_permutex_epi64(X, C) \ 8081 (__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C)) 8082 8083 #define _mm256_mask_permutex_epi64(W, U, X, C) \ 8084 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8085 (__v4di)_mm256_permutex_epi64((X), (C)), \ 8086 (__v4di)(__m256i)(W)) 8087 8088 #define _mm256_maskz_permutex_epi64(U, X, C) \ 8089 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8090 (__v4di)_mm256_permutex_epi64((X), (C)), \ 8091 (__v4di)_mm256_setzero_si256()) 8092 8093 static __inline__ __m256d __DEFAULT_FN_ATTRS256 8094 _mm256_permutexvar_pd (__m256i __X, __m256d __Y) 8095 { 8096 return (__m256d)__builtin_ia32_permvardf256((__v4df)__Y, (__v4di)__X); 8097 } 8098 8099 static __inline__ __m256d __DEFAULT_FN_ATTRS256 8100 _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X, 8101 __m256d __Y) 8102 { 8103 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 8104 (__v4df)_mm256_permutexvar_pd(__X, __Y), 8105 (__v4df)__W); 8106 } 8107 8108 static __inline__ __m256d __DEFAULT_FN_ATTRS256 8109 _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y) 8110 { 8111 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 8112 (__v4df)_mm256_permutexvar_pd(__X, __Y), 8113 (__v4df)_mm256_setzero_pd()); 8114 } 8115 8116 static __inline__ __m256i __DEFAULT_FN_ATTRS256 8117 _mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y) 8118 { 8119 return (__m256i)__builtin_ia32_permvardi256((__v4di) __Y, (__v4di) __X); 8120 } 8121 8122 static __inline__ __m256i __DEFAULT_FN_ATTRS256 8123 _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y) 8124 { 8125 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 8126 (__v4di)_mm256_permutexvar_epi64(__X, __Y), 8127 (__v4di)_mm256_setzero_si256()); 8128 } 8129 8130 static __inline__ __m256i __DEFAULT_FN_ATTRS256 8131 _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X, 8132 __m256i __Y) 8133 { 8134 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 8135 (__v4di)_mm256_permutexvar_epi64(__X, __Y), 8136 (__v4di)__W); 8137 } 8138 8139 #define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A)) 8140 8141 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8142 _mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y) 8143 { 8144 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8145 (__v8sf)_mm256_permutexvar_ps(__X, __Y), 8146 (__v8sf)__W); 8147 } 8148 8149 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8150 _mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y) 8151 { 8152 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8153 (__v8sf)_mm256_permutexvar_ps(__X, __Y), 8154 (__v8sf)_mm256_setzero_ps()); 8155 } 8156 8157 #define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A)) 8158 8159 static __inline__ __m256i __DEFAULT_FN_ATTRS256 8160 _mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X, 8161 __m256i __Y) 8162 { 8163 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 8164 (__v8si)_mm256_permutexvar_epi32(__X, __Y), 8165 (__v8si)__W); 8166 } 8167 8168 static __inline__ __m256i __DEFAULT_FN_ATTRS256 8169 _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) 8170 { 8171 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 8172 (__v8si)_mm256_permutexvar_epi32(__X, __Y), 8173 (__v8si)_mm256_setzero_si256()); 8174 } 8175 8176 #define _mm_alignr_epi32(A, B, imm) \ 8177 (__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \ 8178 (__v4si)(__m128i)(B), (int)(imm)) 8179 8180 #define _mm_mask_alignr_epi32(W, U, A, B, imm) \ 8181 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8182 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \ 8183 (__v4si)(__m128i)(W)) 8184 8185 #define _mm_maskz_alignr_epi32(U, A, B, imm) \ 8186 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8187 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \ 8188 (__v4si)_mm_setzero_si128()) 8189 8190 #define _mm256_alignr_epi32(A, B, imm) \ 8191 (__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \ 8192 (__v8si)(__m256i)(B), (int)(imm)) 8193 8194 #define _mm256_mask_alignr_epi32(W, U, A, B, imm) \ 8195 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8196 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \ 8197 (__v8si)(__m256i)(W)) 8198 8199 #define _mm256_maskz_alignr_epi32(U, A, B, imm) \ 8200 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8201 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \ 8202 (__v8si)_mm256_setzero_si256()) 8203 8204 #define _mm_alignr_epi64(A, B, imm) \ 8205 (__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \ 8206 (__v2di)(__m128i)(B), (int)(imm)) 8207 8208 #define _mm_mask_alignr_epi64(W, U, A, B, imm) \ 8209 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 8210 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \ 8211 (__v2di)(__m128i)(W)) 8212 8213 #define _mm_maskz_alignr_epi64(U, A, B, imm) \ 8214 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 8215 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \ 8216 (__v2di)_mm_setzero_si128()) 8217 8218 #define _mm256_alignr_epi64(A, B, imm) \ 8219 (__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \ 8220 (__v4di)(__m256i)(B), (int)(imm)) 8221 8222 #define _mm256_mask_alignr_epi64(W, U, A, B, imm) \ 8223 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8224 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ 8225 (__v4di)(__m256i)(W)) 8226 8227 #define _mm256_maskz_alignr_epi64(U, A, B, imm) \ 8228 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8229 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ 8230 (__v4di)_mm256_setzero_si256()) 8231 8232 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8233 _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A) 8234 { 8235 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8236 (__v4sf)_mm_movehdup_ps(__A), 8237 (__v4sf)__W); 8238 } 8239 8240 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8241 _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A) 8242 { 8243 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8244 (__v4sf)_mm_movehdup_ps(__A), 8245 (__v4sf)_mm_setzero_ps()); 8246 } 8247 8248 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8249 _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A) 8250 { 8251 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8252 (__v8sf)_mm256_movehdup_ps(__A), 8253 (__v8sf)__W); 8254 } 8255 8256 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8257 _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A) 8258 { 8259 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8260 (__v8sf)_mm256_movehdup_ps(__A), 8261 (__v8sf)_mm256_setzero_ps()); 8262 } 8263 8264 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8265 _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A) 8266 { 8267 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8268 (__v4sf)_mm_moveldup_ps(__A), 8269 (__v4sf)__W); 8270 } 8271 8272 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8273 _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A) 8274 { 8275 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8276 (__v4sf)_mm_moveldup_ps(__A), 8277 (__v4sf)_mm_setzero_ps()); 8278 } 8279 8280 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8281 _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A) 8282 { 8283 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8284 (__v8sf)_mm256_moveldup_ps(__A), 8285 (__v8sf)__W); 8286 } 8287 8288 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8289 _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A) 8290 { 8291 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8292 (__v8sf)_mm256_moveldup_ps(__A), 8293 (__v8sf)_mm256_setzero_ps()); 8294 } 8295 8296 #define _mm256_mask_shuffle_epi32(W, U, A, I) \ 8297 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8298 (__v8si)_mm256_shuffle_epi32((A), (I)), \ 8299 (__v8si)(__m256i)(W)) 8300 8301 #define _mm256_maskz_shuffle_epi32(U, A, I) \ 8302 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8303 (__v8si)_mm256_shuffle_epi32((A), (I)), \ 8304 (__v8si)_mm256_setzero_si256()) 8305 8306 #define _mm_mask_shuffle_epi32(W, U, A, I) \ 8307 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8308 (__v4si)_mm_shuffle_epi32((A), (I)), \ 8309 (__v4si)(__m128i)(W)) 8310 8311 #define _mm_maskz_shuffle_epi32(U, A, I) \ 8312 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8313 (__v4si)_mm_shuffle_epi32((A), (I)), \ 8314 (__v4si)_mm_setzero_si128()) 8315 8316 static __inline__ __m128d __DEFAULT_FN_ATTRS128 8317 _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A) 8318 { 8319 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, 8320 (__v2df) __A, 8321 (__v2df) __W); 8322 } 8323 8324 static __inline__ __m128d __DEFAULT_FN_ATTRS128 8325 _mm_maskz_mov_pd (__mmask8 __U, __m128d __A) 8326 { 8327 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, 8328 (__v2df) __A, 8329 (__v2df) _mm_setzero_pd ()); 8330 } 8331 8332 static __inline__ __m256d __DEFAULT_FN_ATTRS256 8333 _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A) 8334 { 8335 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, 8336 (__v4df) __A, 8337 (__v4df) __W); 8338 } 8339 8340 static __inline__ __m256d __DEFAULT_FN_ATTRS256 8341 _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A) 8342 { 8343 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, 8344 (__v4df) __A, 8345 (__v4df) _mm256_setzero_pd ()); 8346 } 8347 8348 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8349 _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A) 8350 { 8351 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, 8352 (__v4sf) __A, 8353 (__v4sf) __W); 8354 } 8355 8356 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8357 _mm_maskz_mov_ps (__mmask8 __U, __m128 __A) 8358 { 8359 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, 8360 (__v4sf) __A, 8361 (__v4sf) _mm_setzero_ps ()); 8362 } 8363 8364 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8365 _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A) 8366 { 8367 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, 8368 (__v8sf) __A, 8369 (__v8sf) __W); 8370 } 8371 8372 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8373 _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A) 8374 { 8375 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, 8376 (__v8sf) __A, 8377 (__v8sf) _mm256_setzero_ps ()); 8378 } 8379 8380 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8381 _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A) 8382 { 8383 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A, 8384 (__v4sf) __W, 8385 (__mmask8) __U); 8386 } 8387 8388 static __inline__ __m128 __DEFAULT_FN_ATTRS128 8389 _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A) 8390 { 8391 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A, 8392 (__v4sf) 8393 _mm_setzero_ps (), 8394 (__mmask8) __U); 8395 } 8396 8397 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8398 _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A) 8399 { 8400 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A, 8401 (__v8sf) __W, 8402 (__mmask8) __U); 8403 } 8404 8405 static __inline__ __m256 __DEFAULT_FN_ATTRS256 8406 _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A) 8407 { 8408 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A, 8409 (__v8sf) 8410 _mm256_setzero_ps (), 8411 (__mmask8) __U); 8412 } 8413 8414 #define _mm_mask_cvt_roundps_ph(W, U, A, I) \ 8415 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ 8416 (__v8hi)(__m128i)(W), \ 8417 (__mmask8)(U)) 8418 8419 #define _mm_maskz_cvt_roundps_ph(U, A, I) \ 8420 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ 8421 (__v8hi)_mm_setzero_si128(), \ 8422 (__mmask8)(U)) 8423 8424 #define _mm_mask_cvtps_ph _mm_mask_cvt_roundps_ph 8425 #define _mm_maskz_cvtps_ph _mm_maskz_cvt_roundps_ph 8426 8427 #define _mm256_mask_cvt_roundps_ph(W, U, A, I) \ 8428 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ 8429 (__v8hi)(__m128i)(W), \ 8430 (__mmask8)(U)) 8431 8432 #define _mm256_maskz_cvt_roundps_ph(U, A, I) \ 8433 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ 8434 (__v8hi)_mm_setzero_si128(), \ 8435 (__mmask8)(U)) 8436 8437 #define _mm256_mask_cvtps_ph _mm256_mask_cvt_roundps_ph 8438 #define _mm256_maskz_cvtps_ph _mm256_maskz_cvt_roundps_ph 8439 8440 8441 #undef __DEFAULT_FN_ATTRS128 8442 #undef __DEFAULT_FN_ATTRS256 8443 8444 #endif /* __AVX512VLINTRIN_H */ 8445