1 /*===---- avx512dqintrin.h - AVX512DQ intrinsics ---------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10 #ifndef __IMMINTRIN_H 11 #error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead." 12 #endif 13 14 #ifndef __AVX512DQINTRIN_H 15 #define __AVX512DQINTRIN_H 16 17 /* Define the default attributes for the functions in this file. */ 18 #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq,evex512"), __min_vector_width__(512))) 19 #define __DEFAULT_FN_ATTRS \ 20 __attribute__((__always_inline__, __nodebug__, \ 21 __target__("avx512dq,no-evex512"))) 22 23 static __inline __mmask8 __DEFAULT_FN_ATTRS 24 _knot_mask8(__mmask8 __M) 25 { 26 return __builtin_ia32_knotqi(__M); 27 } 28 29 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 30 _kand_mask8(__mmask8 __A, __mmask8 __B) 31 { 32 return (__mmask8)__builtin_ia32_kandqi((__mmask8)__A, (__mmask8)__B); 33 } 34 35 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 36 _kandn_mask8(__mmask8 __A, __mmask8 __B) 37 { 38 return (__mmask8)__builtin_ia32_kandnqi((__mmask8)__A, (__mmask8)__B); 39 } 40 41 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 42 _kor_mask8(__mmask8 __A, __mmask8 __B) 43 { 44 return (__mmask8)__builtin_ia32_korqi((__mmask8)__A, (__mmask8)__B); 45 } 46 47 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 48 _kxnor_mask8(__mmask8 __A, __mmask8 __B) 49 { 50 return (__mmask8)__builtin_ia32_kxnorqi((__mmask8)__A, (__mmask8)__B); 51 } 52 53 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 54 _kxor_mask8(__mmask8 __A, __mmask8 __B) 55 { 56 return (__mmask8)__builtin_ia32_kxorqi((__mmask8)__A, (__mmask8)__B); 57 } 58 59 static __inline__ unsigned char __DEFAULT_FN_ATTRS 60 _kortestc_mask8_u8(__mmask8 __A, __mmask8 __B) 61 { 62 return (unsigned char)__builtin_ia32_kortestcqi(__A, __B); 63 } 64 65 static __inline__ unsigned char __DEFAULT_FN_ATTRS 66 _kortestz_mask8_u8(__mmask8 __A, __mmask8 __B) 67 { 68 return (unsigned char)__builtin_ia32_kortestzqi(__A, __B); 69 } 70 71 static __inline__ unsigned char __DEFAULT_FN_ATTRS 72 _kortest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) { 73 *__C = (unsigned char)__builtin_ia32_kortestcqi(__A, __B); 74 return (unsigned char)__builtin_ia32_kortestzqi(__A, __B); 75 } 76 77 static __inline__ unsigned char __DEFAULT_FN_ATTRS 78 _ktestc_mask8_u8(__mmask8 __A, __mmask8 __B) 79 { 80 return (unsigned char)__builtin_ia32_ktestcqi(__A, __B); 81 } 82 83 static __inline__ unsigned char __DEFAULT_FN_ATTRS 84 _ktestz_mask8_u8(__mmask8 __A, __mmask8 __B) 85 { 86 return (unsigned char)__builtin_ia32_ktestzqi(__A, __B); 87 } 88 89 static __inline__ unsigned char __DEFAULT_FN_ATTRS 90 _ktest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) { 91 *__C = (unsigned char)__builtin_ia32_ktestcqi(__A, __B); 92 return (unsigned char)__builtin_ia32_ktestzqi(__A, __B); 93 } 94 95 static __inline__ unsigned char __DEFAULT_FN_ATTRS 96 _ktestc_mask16_u8(__mmask16 __A, __mmask16 __B) 97 { 98 return (unsigned char)__builtin_ia32_ktestchi(__A, __B); 99 } 100 101 static __inline__ unsigned char __DEFAULT_FN_ATTRS 102 _ktestz_mask16_u8(__mmask16 __A, __mmask16 __B) 103 { 104 return (unsigned char)__builtin_ia32_ktestzhi(__A, __B); 105 } 106 107 static __inline__ unsigned char __DEFAULT_FN_ATTRS 108 _ktest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) { 109 *__C = (unsigned char)__builtin_ia32_ktestchi(__A, __B); 110 return (unsigned char)__builtin_ia32_ktestzhi(__A, __B); 111 } 112 113 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 114 _kadd_mask8(__mmask8 __A, __mmask8 __B) 115 { 116 return (__mmask8)__builtin_ia32_kaddqi((__mmask8)__A, (__mmask8)__B); 117 } 118 119 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 120 _kadd_mask16(__mmask16 __A, __mmask16 __B) 121 { 122 return (__mmask16)__builtin_ia32_kaddhi((__mmask16)__A, (__mmask16)__B); 123 } 124 125 #define _kshiftli_mask8(A, I) \ 126 ((__mmask8)__builtin_ia32_kshiftliqi((__mmask8)(A), (unsigned int)(I))) 127 128 #define _kshiftri_mask8(A, I) \ 129 ((__mmask8)__builtin_ia32_kshiftriqi((__mmask8)(A), (unsigned int)(I))) 130 131 static __inline__ unsigned int __DEFAULT_FN_ATTRS 132 _cvtmask8_u32(__mmask8 __A) { 133 return (unsigned int)__builtin_ia32_kmovb((__mmask8)__A); 134 } 135 136 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 137 _cvtu32_mask8(unsigned int __A) { 138 return (__mmask8)__builtin_ia32_kmovb((__mmask8)__A); 139 } 140 141 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 142 _load_mask8(__mmask8 *__A) { 143 return (__mmask8)__builtin_ia32_kmovb(*(__mmask8 *)__A); 144 } 145 146 static __inline__ void __DEFAULT_FN_ATTRS 147 _store_mask8(__mmask8 *__A, __mmask8 __B) { 148 *(__mmask8 *)__A = __builtin_ia32_kmovb((__mmask8)__B); 149 } 150 151 static __inline__ __m512i __DEFAULT_FN_ATTRS512 152 _mm512_mullo_epi64 (__m512i __A, __m512i __B) { 153 return (__m512i) ((__v8du) __A * (__v8du) __B); 154 } 155 156 static __inline__ __m512i __DEFAULT_FN_ATTRS512 157 _mm512_mask_mullo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { 158 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 159 (__v8di)_mm512_mullo_epi64(__A, __B), 160 (__v8di)__W); 161 } 162 163 static __inline__ __m512i __DEFAULT_FN_ATTRS512 164 _mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B) { 165 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 166 (__v8di)_mm512_mullo_epi64(__A, __B), 167 (__v8di)_mm512_setzero_si512()); 168 } 169 170 static __inline__ __m512d __DEFAULT_FN_ATTRS512 171 _mm512_xor_pd(__m512d __A, __m512d __B) { 172 return (__m512d)((__v8du)__A ^ (__v8du)__B); 173 } 174 175 static __inline__ __m512d __DEFAULT_FN_ATTRS512 176 _mm512_mask_xor_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 177 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 178 (__v8df)_mm512_xor_pd(__A, __B), 179 (__v8df)__W); 180 } 181 182 static __inline__ __m512d __DEFAULT_FN_ATTRS512 183 _mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B) { 184 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 185 (__v8df)_mm512_xor_pd(__A, __B), 186 (__v8df)_mm512_setzero_pd()); 187 } 188 189 static __inline__ __m512 __DEFAULT_FN_ATTRS512 190 _mm512_xor_ps (__m512 __A, __m512 __B) { 191 return (__m512)((__v16su)__A ^ (__v16su)__B); 192 } 193 194 static __inline__ __m512 __DEFAULT_FN_ATTRS512 195 _mm512_mask_xor_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 196 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 197 (__v16sf)_mm512_xor_ps(__A, __B), 198 (__v16sf)__W); 199 } 200 201 static __inline__ __m512 __DEFAULT_FN_ATTRS512 202 _mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B) { 203 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 204 (__v16sf)_mm512_xor_ps(__A, __B), 205 (__v16sf)_mm512_setzero_ps()); 206 } 207 208 static __inline__ __m512d __DEFAULT_FN_ATTRS512 209 _mm512_or_pd(__m512d __A, __m512d __B) { 210 return (__m512d)((__v8du)__A | (__v8du)__B); 211 } 212 213 static __inline__ __m512d __DEFAULT_FN_ATTRS512 214 _mm512_mask_or_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 215 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 216 (__v8df)_mm512_or_pd(__A, __B), 217 (__v8df)__W); 218 } 219 220 static __inline__ __m512d __DEFAULT_FN_ATTRS512 221 _mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B) { 222 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 223 (__v8df)_mm512_or_pd(__A, __B), 224 (__v8df)_mm512_setzero_pd()); 225 } 226 227 static __inline__ __m512 __DEFAULT_FN_ATTRS512 228 _mm512_or_ps(__m512 __A, __m512 __B) { 229 return (__m512)((__v16su)__A | (__v16su)__B); 230 } 231 232 static __inline__ __m512 __DEFAULT_FN_ATTRS512 233 _mm512_mask_or_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 234 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 235 (__v16sf)_mm512_or_ps(__A, __B), 236 (__v16sf)__W); 237 } 238 239 static __inline__ __m512 __DEFAULT_FN_ATTRS512 240 _mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B) { 241 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 242 (__v16sf)_mm512_or_ps(__A, __B), 243 (__v16sf)_mm512_setzero_ps()); 244 } 245 246 static __inline__ __m512d __DEFAULT_FN_ATTRS512 247 _mm512_and_pd(__m512d __A, __m512d __B) { 248 return (__m512d)((__v8du)__A & (__v8du)__B); 249 } 250 251 static __inline__ __m512d __DEFAULT_FN_ATTRS512 252 _mm512_mask_and_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 253 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 254 (__v8df)_mm512_and_pd(__A, __B), 255 (__v8df)__W); 256 } 257 258 static __inline__ __m512d __DEFAULT_FN_ATTRS512 259 _mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B) { 260 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 261 (__v8df)_mm512_and_pd(__A, __B), 262 (__v8df)_mm512_setzero_pd()); 263 } 264 265 static __inline__ __m512 __DEFAULT_FN_ATTRS512 266 _mm512_and_ps(__m512 __A, __m512 __B) { 267 return (__m512)((__v16su)__A & (__v16su)__B); 268 } 269 270 static __inline__ __m512 __DEFAULT_FN_ATTRS512 271 _mm512_mask_and_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 272 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 273 (__v16sf)_mm512_and_ps(__A, __B), 274 (__v16sf)__W); 275 } 276 277 static __inline__ __m512 __DEFAULT_FN_ATTRS512 278 _mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B) { 279 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 280 (__v16sf)_mm512_and_ps(__A, __B), 281 (__v16sf)_mm512_setzero_ps()); 282 } 283 284 static __inline__ __m512d __DEFAULT_FN_ATTRS512 285 _mm512_andnot_pd(__m512d __A, __m512d __B) { 286 return (__m512d)(~(__v8du)__A & (__v8du)__B); 287 } 288 289 static __inline__ __m512d __DEFAULT_FN_ATTRS512 290 _mm512_mask_andnot_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 291 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 292 (__v8df)_mm512_andnot_pd(__A, __B), 293 (__v8df)__W); 294 } 295 296 static __inline__ __m512d __DEFAULT_FN_ATTRS512 297 _mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B) { 298 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 299 (__v8df)_mm512_andnot_pd(__A, __B), 300 (__v8df)_mm512_setzero_pd()); 301 } 302 303 static __inline__ __m512 __DEFAULT_FN_ATTRS512 304 _mm512_andnot_ps(__m512 __A, __m512 __B) { 305 return (__m512)(~(__v16su)__A & (__v16su)__B); 306 } 307 308 static __inline__ __m512 __DEFAULT_FN_ATTRS512 309 _mm512_mask_andnot_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 310 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 311 (__v16sf)_mm512_andnot_ps(__A, __B), 312 (__v16sf)__W); 313 } 314 315 static __inline__ __m512 __DEFAULT_FN_ATTRS512 316 _mm512_maskz_andnot_ps(__mmask16 __U, __m512 __A, __m512 __B) { 317 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 318 (__v16sf)_mm512_andnot_ps(__A, __B), 319 (__v16sf)_mm512_setzero_ps()); 320 } 321 322 static __inline__ __m512i __DEFAULT_FN_ATTRS512 323 _mm512_cvtpd_epi64 (__m512d __A) { 324 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, 325 (__v8di) _mm512_setzero_si512(), 326 (__mmask8) -1, 327 _MM_FROUND_CUR_DIRECTION); 328 } 329 330 static __inline__ __m512i __DEFAULT_FN_ATTRS512 331 _mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) { 332 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, 333 (__v8di) __W, 334 (__mmask8) __U, 335 _MM_FROUND_CUR_DIRECTION); 336 } 337 338 static __inline__ __m512i __DEFAULT_FN_ATTRS512 339 _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) { 340 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, 341 (__v8di) _mm512_setzero_si512(), 342 (__mmask8) __U, 343 _MM_FROUND_CUR_DIRECTION); 344 } 345 346 #define _mm512_cvt_roundpd_epi64(A, R) \ 347 ((__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ 348 (__v8di)_mm512_setzero_si512(), \ 349 (__mmask8)-1, (int)(R))) 350 351 #define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) \ 352 ((__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ 353 (__v8di)(__m512i)(W), \ 354 (__mmask8)(U), (int)(R))) 355 356 #define _mm512_maskz_cvt_roundpd_epi64(U, A, R) \ 357 ((__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ 358 (__v8di)_mm512_setzero_si512(), \ 359 (__mmask8)(U), (int)(R))) 360 361 static __inline__ __m512i __DEFAULT_FN_ATTRS512 362 _mm512_cvtpd_epu64 (__m512d __A) { 363 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, 364 (__v8di) _mm512_setzero_si512(), 365 (__mmask8) -1, 366 _MM_FROUND_CUR_DIRECTION); 367 } 368 369 static __inline__ __m512i __DEFAULT_FN_ATTRS512 370 _mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) { 371 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, 372 (__v8di) __W, 373 (__mmask8) __U, 374 _MM_FROUND_CUR_DIRECTION); 375 } 376 377 static __inline__ __m512i __DEFAULT_FN_ATTRS512 378 _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) { 379 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, 380 (__v8di) _mm512_setzero_si512(), 381 (__mmask8) __U, 382 _MM_FROUND_CUR_DIRECTION); 383 } 384 385 #define _mm512_cvt_roundpd_epu64(A, R) \ 386 ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ 387 (__v8di)_mm512_setzero_si512(), \ 388 (__mmask8)-1, (int)(R))) 389 390 #define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) \ 391 ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ 392 (__v8di)(__m512i)(W), \ 393 (__mmask8)(U), (int)(R))) 394 395 #define _mm512_maskz_cvt_roundpd_epu64(U, A, R) \ 396 ((__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ 397 (__v8di)_mm512_setzero_si512(), \ 398 (__mmask8)(U), (int)(R))) 399 400 static __inline__ __m512i __DEFAULT_FN_ATTRS512 401 _mm512_cvtps_epi64 (__m256 __A) { 402 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, 403 (__v8di) _mm512_setzero_si512(), 404 (__mmask8) -1, 405 _MM_FROUND_CUR_DIRECTION); 406 } 407 408 static __inline__ __m512i __DEFAULT_FN_ATTRS512 409 _mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) { 410 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, 411 (__v8di) __W, 412 (__mmask8) __U, 413 _MM_FROUND_CUR_DIRECTION); 414 } 415 416 static __inline__ __m512i __DEFAULT_FN_ATTRS512 417 _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) { 418 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, 419 (__v8di) _mm512_setzero_si512(), 420 (__mmask8) __U, 421 _MM_FROUND_CUR_DIRECTION); 422 } 423 424 #define _mm512_cvt_roundps_epi64(A, R) \ 425 ((__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ 426 (__v8di)_mm512_setzero_si512(), \ 427 (__mmask8)-1, (int)(R))) 428 429 #define _mm512_mask_cvt_roundps_epi64(W, U, A, R) \ 430 ((__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ 431 (__v8di)(__m512i)(W), \ 432 (__mmask8)(U), (int)(R))) 433 434 #define _mm512_maskz_cvt_roundps_epi64(U, A, R) \ 435 ((__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ 436 (__v8di)_mm512_setzero_si512(), \ 437 (__mmask8)(U), (int)(R))) 438 439 static __inline__ __m512i __DEFAULT_FN_ATTRS512 440 _mm512_cvtps_epu64 (__m256 __A) { 441 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, 442 (__v8di) _mm512_setzero_si512(), 443 (__mmask8) -1, 444 _MM_FROUND_CUR_DIRECTION); 445 } 446 447 static __inline__ __m512i __DEFAULT_FN_ATTRS512 448 _mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) { 449 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, 450 (__v8di) __W, 451 (__mmask8) __U, 452 _MM_FROUND_CUR_DIRECTION); 453 } 454 455 static __inline__ __m512i __DEFAULT_FN_ATTRS512 456 _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) { 457 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, 458 (__v8di) _mm512_setzero_si512(), 459 (__mmask8) __U, 460 _MM_FROUND_CUR_DIRECTION); 461 } 462 463 #define _mm512_cvt_roundps_epu64(A, R) \ 464 ((__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ 465 (__v8di)_mm512_setzero_si512(), \ 466 (__mmask8)-1, (int)(R))) 467 468 #define _mm512_mask_cvt_roundps_epu64(W, U, A, R) \ 469 ((__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ 470 (__v8di)(__m512i)(W), \ 471 (__mmask8)(U), (int)(R))) 472 473 #define _mm512_maskz_cvt_roundps_epu64(U, A, R) \ 474 ((__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ 475 (__v8di)_mm512_setzero_si512(), \ 476 (__mmask8)(U), (int)(R))) 477 478 479 static __inline__ __m512d __DEFAULT_FN_ATTRS512 480 _mm512_cvtepi64_pd (__m512i __A) { 481 return (__m512d)__builtin_convertvector((__v8di)__A, __v8df); 482 } 483 484 static __inline__ __m512d __DEFAULT_FN_ATTRS512 485 _mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) { 486 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 487 (__v8df)_mm512_cvtepi64_pd(__A), 488 (__v8df)__W); 489 } 490 491 static __inline__ __m512d __DEFAULT_FN_ATTRS512 492 _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) { 493 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 494 (__v8df)_mm512_cvtepi64_pd(__A), 495 (__v8df)_mm512_setzero_pd()); 496 } 497 498 #define _mm512_cvt_roundepi64_pd(A, R) \ 499 ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ 500 (__v8df)_mm512_setzero_pd(), \ 501 (__mmask8)-1, (int)(R))) 502 503 #define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) \ 504 ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ 505 (__v8df)(__m512d)(W), \ 506 (__mmask8)(U), (int)(R))) 507 508 #define _mm512_maskz_cvt_roundepi64_pd(U, A, R) \ 509 ((__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ 510 (__v8df)_mm512_setzero_pd(), \ 511 (__mmask8)(U), (int)(R))) 512 513 static __inline__ __m256 __DEFAULT_FN_ATTRS512 514 _mm512_cvtepi64_ps (__m512i __A) { 515 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, 516 (__v8sf) _mm256_setzero_ps(), 517 (__mmask8) -1, 518 _MM_FROUND_CUR_DIRECTION); 519 } 520 521 static __inline__ __m256 __DEFAULT_FN_ATTRS512 522 _mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) { 523 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, 524 (__v8sf) __W, 525 (__mmask8) __U, 526 _MM_FROUND_CUR_DIRECTION); 527 } 528 529 static __inline__ __m256 __DEFAULT_FN_ATTRS512 530 _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) { 531 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, 532 (__v8sf) _mm256_setzero_ps(), 533 (__mmask8) __U, 534 _MM_FROUND_CUR_DIRECTION); 535 } 536 537 #define _mm512_cvt_roundepi64_ps(A, R) \ 538 ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ 539 (__v8sf)_mm256_setzero_ps(), \ 540 (__mmask8)-1, (int)(R))) 541 542 #define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) \ 543 ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ 544 (__v8sf)(__m256)(W), (__mmask8)(U), \ 545 (int)(R))) 546 547 #define _mm512_maskz_cvt_roundepi64_ps(U, A, R) \ 548 ((__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ 549 (__v8sf)_mm256_setzero_ps(), \ 550 (__mmask8)(U), (int)(R))) 551 552 553 static __inline__ __m512i __DEFAULT_FN_ATTRS512 554 _mm512_cvttpd_epi64 (__m512d __A) { 555 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, 556 (__v8di) _mm512_setzero_si512(), 557 (__mmask8) -1, 558 _MM_FROUND_CUR_DIRECTION); 559 } 560 561 static __inline__ __m512i __DEFAULT_FN_ATTRS512 562 _mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) { 563 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, 564 (__v8di) __W, 565 (__mmask8) __U, 566 _MM_FROUND_CUR_DIRECTION); 567 } 568 569 static __inline__ __m512i __DEFAULT_FN_ATTRS512 570 _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) { 571 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, 572 (__v8di) _mm512_setzero_si512(), 573 (__mmask8) __U, 574 _MM_FROUND_CUR_DIRECTION); 575 } 576 577 #define _mm512_cvtt_roundpd_epi64(A, R) \ 578 ((__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ 579 (__v8di)_mm512_setzero_si512(), \ 580 (__mmask8)-1, (int)(R))) 581 582 #define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) \ 583 ((__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ 584 (__v8di)(__m512i)(W), \ 585 (__mmask8)(U), (int)(R))) 586 587 #define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) \ 588 ((__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ 589 (__v8di)_mm512_setzero_si512(), \ 590 (__mmask8)(U), (int)(R))) 591 592 static __inline__ __m512i __DEFAULT_FN_ATTRS512 593 _mm512_cvttpd_epu64 (__m512d __A) { 594 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, 595 (__v8di) _mm512_setzero_si512(), 596 (__mmask8) -1, 597 _MM_FROUND_CUR_DIRECTION); 598 } 599 600 static __inline__ __m512i __DEFAULT_FN_ATTRS512 601 _mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) { 602 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, 603 (__v8di) __W, 604 (__mmask8) __U, 605 _MM_FROUND_CUR_DIRECTION); 606 } 607 608 static __inline__ __m512i __DEFAULT_FN_ATTRS512 609 _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) { 610 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, 611 (__v8di) _mm512_setzero_si512(), 612 (__mmask8) __U, 613 _MM_FROUND_CUR_DIRECTION); 614 } 615 616 #define _mm512_cvtt_roundpd_epu64(A, R) \ 617 ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ 618 (__v8di)_mm512_setzero_si512(), \ 619 (__mmask8)-1, (int)(R))) 620 621 #define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) \ 622 ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ 623 (__v8di)(__m512i)(W), \ 624 (__mmask8)(U), (int)(R))) 625 626 #define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) \ 627 ((__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ 628 (__v8di)_mm512_setzero_si512(), \ 629 (__mmask8)(U), (int)(R))) 630 631 static __inline__ __m512i __DEFAULT_FN_ATTRS512 632 _mm512_cvttps_epi64 (__m256 __A) { 633 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, 634 (__v8di) _mm512_setzero_si512(), 635 (__mmask8) -1, 636 _MM_FROUND_CUR_DIRECTION); 637 } 638 639 static __inline__ __m512i __DEFAULT_FN_ATTRS512 640 _mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) { 641 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, 642 (__v8di) __W, 643 (__mmask8) __U, 644 _MM_FROUND_CUR_DIRECTION); 645 } 646 647 static __inline__ __m512i __DEFAULT_FN_ATTRS512 648 _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) { 649 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, 650 (__v8di) _mm512_setzero_si512(), 651 (__mmask8) __U, 652 _MM_FROUND_CUR_DIRECTION); 653 } 654 655 #define _mm512_cvtt_roundps_epi64(A, R) \ 656 ((__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ 657 (__v8di)_mm512_setzero_si512(), \ 658 (__mmask8)-1, (int)(R))) 659 660 #define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) \ 661 ((__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ 662 (__v8di)(__m512i)(W), \ 663 (__mmask8)(U), (int)(R))) 664 665 #define _mm512_maskz_cvtt_roundps_epi64(U, A, R) \ 666 ((__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ 667 (__v8di)_mm512_setzero_si512(), \ 668 (__mmask8)(U), (int)(R))) 669 670 static __inline__ __m512i __DEFAULT_FN_ATTRS512 671 _mm512_cvttps_epu64 (__m256 __A) { 672 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, 673 (__v8di) _mm512_setzero_si512(), 674 (__mmask8) -1, 675 _MM_FROUND_CUR_DIRECTION); 676 } 677 678 static __inline__ __m512i __DEFAULT_FN_ATTRS512 679 _mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) { 680 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, 681 (__v8di) __W, 682 (__mmask8) __U, 683 _MM_FROUND_CUR_DIRECTION); 684 } 685 686 static __inline__ __m512i __DEFAULT_FN_ATTRS512 687 _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) { 688 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, 689 (__v8di) _mm512_setzero_si512(), 690 (__mmask8) __U, 691 _MM_FROUND_CUR_DIRECTION); 692 } 693 694 #define _mm512_cvtt_roundps_epu64(A, R) \ 695 ((__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ 696 (__v8di)_mm512_setzero_si512(), \ 697 (__mmask8)-1, (int)(R))) 698 699 #define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) \ 700 ((__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ 701 (__v8di)(__m512i)(W), \ 702 (__mmask8)(U), (int)(R))) 703 704 #define _mm512_maskz_cvtt_roundps_epu64(U, A, R) \ 705 ((__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ 706 (__v8di)_mm512_setzero_si512(), \ 707 (__mmask8)(U), (int)(R))) 708 709 static __inline__ __m512d __DEFAULT_FN_ATTRS512 710 _mm512_cvtepu64_pd (__m512i __A) { 711 return (__m512d)__builtin_convertvector((__v8du)__A, __v8df); 712 } 713 714 static __inline__ __m512d __DEFAULT_FN_ATTRS512 715 _mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) { 716 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 717 (__v8df)_mm512_cvtepu64_pd(__A), 718 (__v8df)__W); 719 } 720 721 static __inline__ __m512d __DEFAULT_FN_ATTRS512 722 _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) { 723 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 724 (__v8df)_mm512_cvtepu64_pd(__A), 725 (__v8df)_mm512_setzero_pd()); 726 } 727 728 #define _mm512_cvt_roundepu64_pd(A, R) \ 729 ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ 730 (__v8df)_mm512_setzero_pd(), \ 731 (__mmask8)-1, (int)(R))) 732 733 #define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) \ 734 ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ 735 (__v8df)(__m512d)(W), \ 736 (__mmask8)(U), (int)(R))) 737 738 739 #define _mm512_maskz_cvt_roundepu64_pd(U, A, R) \ 740 ((__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ 741 (__v8df)_mm512_setzero_pd(), \ 742 (__mmask8)(U), (int)(R))) 743 744 745 static __inline__ __m256 __DEFAULT_FN_ATTRS512 746 _mm512_cvtepu64_ps (__m512i __A) { 747 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, 748 (__v8sf) _mm256_setzero_ps(), 749 (__mmask8) -1, 750 _MM_FROUND_CUR_DIRECTION); 751 } 752 753 static __inline__ __m256 __DEFAULT_FN_ATTRS512 754 _mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) { 755 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, 756 (__v8sf) __W, 757 (__mmask8) __U, 758 _MM_FROUND_CUR_DIRECTION); 759 } 760 761 static __inline__ __m256 __DEFAULT_FN_ATTRS512 762 _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) { 763 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, 764 (__v8sf) _mm256_setzero_ps(), 765 (__mmask8) __U, 766 _MM_FROUND_CUR_DIRECTION); 767 } 768 769 #define _mm512_cvt_roundepu64_ps(A, R) \ 770 ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ 771 (__v8sf)_mm256_setzero_ps(), \ 772 (__mmask8)-1, (int)(R))) 773 774 #define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) \ 775 ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ 776 (__v8sf)(__m256)(W), (__mmask8)(U), \ 777 (int)(R))) 778 779 #define _mm512_maskz_cvt_roundepu64_ps(U, A, R) \ 780 ((__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ 781 (__v8sf)_mm256_setzero_ps(), \ 782 (__mmask8)(U), (int)(R))) 783 784 #define _mm512_range_pd(A, B, C) \ 785 ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 786 (__v8df)(__m512d)(B), (int)(C), \ 787 (__v8df)_mm512_setzero_pd(), \ 788 (__mmask8)-1, \ 789 _MM_FROUND_CUR_DIRECTION)) 790 791 #define _mm512_mask_range_pd(W, U, A, B, C) \ 792 ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 793 (__v8df)(__m512d)(B), (int)(C), \ 794 (__v8df)(__m512d)(W), (__mmask8)(U), \ 795 _MM_FROUND_CUR_DIRECTION)) 796 797 #define _mm512_maskz_range_pd(U, A, B, C) \ 798 ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 799 (__v8df)(__m512d)(B), (int)(C), \ 800 (__v8df)_mm512_setzero_pd(), \ 801 (__mmask8)(U), \ 802 _MM_FROUND_CUR_DIRECTION)) 803 804 #define _mm512_range_round_pd(A, B, C, R) \ 805 ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 806 (__v8df)(__m512d)(B), (int)(C), \ 807 (__v8df)_mm512_setzero_pd(), \ 808 (__mmask8)-1, (int)(R))) 809 810 #define _mm512_mask_range_round_pd(W, U, A, B, C, R) \ 811 ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 812 (__v8df)(__m512d)(B), (int)(C), \ 813 (__v8df)(__m512d)(W), (__mmask8)(U), \ 814 (int)(R))) 815 816 #define _mm512_maskz_range_round_pd(U, A, B, C, R) \ 817 ((__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 818 (__v8df)(__m512d)(B), (int)(C), \ 819 (__v8df)_mm512_setzero_pd(), \ 820 (__mmask8)(U), (int)(R))) 821 822 #define _mm512_range_ps(A, B, C) \ 823 ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 824 (__v16sf)(__m512)(B), (int)(C), \ 825 (__v16sf)_mm512_setzero_ps(), \ 826 (__mmask16)-1, \ 827 _MM_FROUND_CUR_DIRECTION)) 828 829 #define _mm512_mask_range_ps(W, U, A, B, C) \ 830 ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 831 (__v16sf)(__m512)(B), (int)(C), \ 832 (__v16sf)(__m512)(W), (__mmask16)(U), \ 833 _MM_FROUND_CUR_DIRECTION)) 834 835 #define _mm512_maskz_range_ps(U, A, B, C) \ 836 ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 837 (__v16sf)(__m512)(B), (int)(C), \ 838 (__v16sf)_mm512_setzero_ps(), \ 839 (__mmask16)(U), \ 840 _MM_FROUND_CUR_DIRECTION)) 841 842 #define _mm512_range_round_ps(A, B, C, R) \ 843 ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 844 (__v16sf)(__m512)(B), (int)(C), \ 845 (__v16sf)_mm512_setzero_ps(), \ 846 (__mmask16)-1, (int)(R))) 847 848 #define _mm512_mask_range_round_ps(W, U, A, B, C, R) \ 849 ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 850 (__v16sf)(__m512)(B), (int)(C), \ 851 (__v16sf)(__m512)(W), (__mmask16)(U), \ 852 (int)(R))) 853 854 #define _mm512_maskz_range_round_ps(U, A, B, C, R) \ 855 ((__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 856 (__v16sf)(__m512)(B), (int)(C), \ 857 (__v16sf)_mm512_setzero_ps(), \ 858 (__mmask16)(U), (int)(R))) 859 860 #define _mm_range_round_ss(A, B, C, R) \ 861 ((__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ 862 (__v4sf)(__m128)(B), \ 863 (__v4sf)_mm_setzero_ps(), \ 864 (__mmask8) -1, (int)(C),\ 865 (int)(R))) 866 867 #define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION) 868 869 #define _mm_mask_range_round_ss(W, U, A, B, C, R) \ 870 ((__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ 871 (__v4sf)(__m128)(B), \ 872 (__v4sf)(__m128)(W),\ 873 (__mmask8)(U), (int)(C),\ 874 (int)(R))) 875 876 #define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION) 877 878 #define _mm_maskz_range_round_ss(U, A, B, C, R) \ 879 ((__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ 880 (__v4sf)(__m128)(B), \ 881 (__v4sf)_mm_setzero_ps(), \ 882 (__mmask8)(U), (int)(C),\ 883 (int)(R))) 884 885 #define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) 886 887 #define _mm_range_round_sd(A, B, C, R) \ 888 ((__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ 889 (__v2df)(__m128d)(B), \ 890 (__v2df)_mm_setzero_pd(), \ 891 (__mmask8) -1, (int)(C),\ 892 (int)(R))) 893 894 #define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION) 895 896 #define _mm_mask_range_round_sd(W, U, A, B, C, R) \ 897 ((__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ 898 (__v2df)(__m128d)(B), \ 899 (__v2df)(__m128d)(W),\ 900 (__mmask8)(U), (int)(C),\ 901 (int)(R))) 902 903 #define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION) 904 905 #define _mm_maskz_range_round_sd(U, A, B, C, R) \ 906 ((__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ 907 (__v2df)(__m128d)(B), \ 908 (__v2df)_mm_setzero_pd(), \ 909 (__mmask8)(U), (int)(C),\ 910 (int)(R))) 911 912 #define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) 913 914 #define _mm512_reduce_pd(A, B) \ 915 ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 916 (__v8df)_mm512_setzero_pd(), \ 917 (__mmask8)-1, \ 918 _MM_FROUND_CUR_DIRECTION)) 919 920 #define _mm512_mask_reduce_pd(W, U, A, B) \ 921 ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 922 (__v8df)(__m512d)(W), \ 923 (__mmask8)(U), \ 924 _MM_FROUND_CUR_DIRECTION)) 925 926 #define _mm512_maskz_reduce_pd(U, A, B) \ 927 ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 928 (__v8df)_mm512_setzero_pd(), \ 929 (__mmask8)(U), \ 930 _MM_FROUND_CUR_DIRECTION)) 931 932 #define _mm512_reduce_ps(A, B) \ 933 ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 934 (__v16sf)_mm512_setzero_ps(), \ 935 (__mmask16)-1, \ 936 _MM_FROUND_CUR_DIRECTION)) 937 938 #define _mm512_mask_reduce_ps(W, U, A, B) \ 939 ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 940 (__v16sf)(__m512)(W), \ 941 (__mmask16)(U), \ 942 _MM_FROUND_CUR_DIRECTION)) 943 944 #define _mm512_maskz_reduce_ps(U, A, B) \ 945 ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 946 (__v16sf)_mm512_setzero_ps(), \ 947 (__mmask16)(U), \ 948 _MM_FROUND_CUR_DIRECTION)) 949 950 #define _mm512_reduce_round_pd(A, B, R) \ 951 ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 952 (__v8df)_mm512_setzero_pd(), \ 953 (__mmask8)-1, (int)(R))) 954 955 #define _mm512_mask_reduce_round_pd(W, U, A, B, R) \ 956 ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 957 (__v8df)(__m512d)(W), \ 958 (__mmask8)(U), (int)(R))) 959 960 #define _mm512_maskz_reduce_round_pd(U, A, B, R) \ 961 ((__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 962 (__v8df)_mm512_setzero_pd(), \ 963 (__mmask8)(U), (int)(R))) 964 965 #define _mm512_reduce_round_ps(A, B, R) \ 966 ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 967 (__v16sf)_mm512_setzero_ps(), \ 968 (__mmask16)-1, (int)(R))) 969 970 #define _mm512_mask_reduce_round_ps(W, U, A, B, R) \ 971 ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 972 (__v16sf)(__m512)(W), \ 973 (__mmask16)(U), (int)(R))) 974 975 #define _mm512_maskz_reduce_round_ps(U, A, B, R) \ 976 ((__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 977 (__v16sf)_mm512_setzero_ps(), \ 978 (__mmask16)(U), (int)(R))) 979 980 #define _mm_reduce_ss(A, B, C) \ 981 ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 982 (__v4sf)(__m128)(B), \ 983 (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \ 984 (int)(C), _MM_FROUND_CUR_DIRECTION)) 985 986 #define _mm_mask_reduce_ss(W, U, A, B, C) \ 987 ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 988 (__v4sf)(__m128)(B), \ 989 (__v4sf)(__m128)(W), (__mmask8)(U), \ 990 (int)(C), _MM_FROUND_CUR_DIRECTION)) 991 992 #define _mm_maskz_reduce_ss(U, A, B, C) \ 993 ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 994 (__v4sf)(__m128)(B), \ 995 (__v4sf)_mm_setzero_ps(), \ 996 (__mmask8)(U), (int)(C), \ 997 _MM_FROUND_CUR_DIRECTION)) 998 999 #define _mm_reduce_round_ss(A, B, C, R) \ 1000 ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 1001 (__v4sf)(__m128)(B), \ 1002 (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \ 1003 (int)(C), (int)(R))) 1004 1005 #define _mm_mask_reduce_round_ss(W, U, A, B, C, R) \ 1006 ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 1007 (__v4sf)(__m128)(B), \ 1008 (__v4sf)(__m128)(W), (__mmask8)(U), \ 1009 (int)(C), (int)(R))) 1010 1011 #define _mm_maskz_reduce_round_ss(U, A, B, C, R) \ 1012 ((__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 1013 (__v4sf)(__m128)(B), \ 1014 (__v4sf)_mm_setzero_ps(), \ 1015 (__mmask8)(U), (int)(C), (int)(R))) 1016 1017 #define _mm_reduce_sd(A, B, C) \ 1018 ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 1019 (__v2df)(__m128d)(B), \ 1020 (__v2df)_mm_setzero_pd(), \ 1021 (__mmask8)-1, (int)(C), \ 1022 _MM_FROUND_CUR_DIRECTION)) 1023 1024 #define _mm_mask_reduce_sd(W, U, A, B, C) \ 1025 ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 1026 (__v2df)(__m128d)(B), \ 1027 (__v2df)(__m128d)(W), (__mmask8)(U), \ 1028 (int)(C), _MM_FROUND_CUR_DIRECTION)) 1029 1030 #define _mm_maskz_reduce_sd(U, A, B, C) \ 1031 ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 1032 (__v2df)(__m128d)(B), \ 1033 (__v2df)_mm_setzero_pd(), \ 1034 (__mmask8)(U), (int)(C), \ 1035 _MM_FROUND_CUR_DIRECTION)) 1036 1037 #define _mm_reduce_round_sd(A, B, C, R) \ 1038 ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 1039 (__v2df)(__m128d)(B), \ 1040 (__v2df)_mm_setzero_pd(), \ 1041 (__mmask8)-1, (int)(C), (int)(R))) 1042 1043 #define _mm_mask_reduce_round_sd(W, U, A, B, C, R) \ 1044 ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 1045 (__v2df)(__m128d)(B), \ 1046 (__v2df)(__m128d)(W), (__mmask8)(U), \ 1047 (int)(C), (int)(R))) 1048 1049 #define _mm_maskz_reduce_round_sd(U, A, B, C, R) \ 1050 ((__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 1051 (__v2df)(__m128d)(B), \ 1052 (__v2df)_mm_setzero_pd(), \ 1053 (__mmask8)(U), (int)(C), (int)(R))) 1054 1055 static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 1056 _mm512_movepi32_mask (__m512i __A) 1057 { 1058 return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A); 1059 } 1060 1061 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1062 _mm512_movm_epi32 (__mmask16 __A) 1063 { 1064 return (__m512i) __builtin_ia32_cvtmask2d512 (__A); 1065 } 1066 1067 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1068 _mm512_movm_epi64 (__mmask8 __A) 1069 { 1070 return (__m512i) __builtin_ia32_cvtmask2q512 (__A); 1071 } 1072 1073 static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 1074 _mm512_movepi64_mask (__m512i __A) 1075 { 1076 return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A); 1077 } 1078 1079 1080 static __inline__ __m512 __DEFAULT_FN_ATTRS512 1081 _mm512_broadcast_f32x2 (__m128 __A) 1082 { 1083 return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 1084 0, 1, 0, 1, 0, 1, 0, 1, 1085 0, 1, 0, 1, 0, 1, 0, 1); 1086 } 1087 1088 static __inline__ __m512 __DEFAULT_FN_ATTRS512 1089 _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A) 1090 { 1091 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, 1092 (__v16sf)_mm512_broadcast_f32x2(__A), 1093 (__v16sf)__O); 1094 } 1095 1096 static __inline__ __m512 __DEFAULT_FN_ATTRS512 1097 _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A) 1098 { 1099 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, 1100 (__v16sf)_mm512_broadcast_f32x2(__A), 1101 (__v16sf)_mm512_setzero_ps()); 1102 } 1103 1104 static __inline__ __m512 __DEFAULT_FN_ATTRS512 1105 _mm512_broadcast_f32x8(__m256 __A) 1106 { 1107 return (__m512)__builtin_shufflevector((__v8sf)__A, (__v8sf)__A, 1108 0, 1, 2, 3, 4, 5, 6, 7, 1109 0, 1, 2, 3, 4, 5, 6, 7); 1110 } 1111 1112 static __inline__ __m512 __DEFAULT_FN_ATTRS512 1113 _mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A) 1114 { 1115 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, 1116 (__v16sf)_mm512_broadcast_f32x8(__A), 1117 (__v16sf)__O); 1118 } 1119 1120 static __inline__ __m512 __DEFAULT_FN_ATTRS512 1121 _mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A) 1122 { 1123 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, 1124 (__v16sf)_mm512_broadcast_f32x8(__A), 1125 (__v16sf)_mm512_setzero_ps()); 1126 } 1127 1128 static __inline__ __m512d __DEFAULT_FN_ATTRS512 1129 _mm512_broadcast_f64x2(__m128d __A) 1130 { 1131 return (__m512d)__builtin_shufflevector((__v2df)__A, (__v2df)__A, 1132 0, 1, 0, 1, 0, 1, 0, 1); 1133 } 1134 1135 static __inline__ __m512d __DEFAULT_FN_ATTRS512 1136 _mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A) 1137 { 1138 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, 1139 (__v8df)_mm512_broadcast_f64x2(__A), 1140 (__v8df)__O); 1141 } 1142 1143 static __inline__ __m512d __DEFAULT_FN_ATTRS512 1144 _mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) 1145 { 1146 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, 1147 (__v8df)_mm512_broadcast_f64x2(__A), 1148 (__v8df)_mm512_setzero_pd()); 1149 } 1150 1151 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1152 _mm512_broadcast_i32x2 (__m128i __A) 1153 { 1154 return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 1155 0, 1, 0, 1, 0, 1, 0, 1, 1156 0, 1, 0, 1, 0, 1, 0, 1); 1157 } 1158 1159 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1160 _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A) 1161 { 1162 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 1163 (__v16si)_mm512_broadcast_i32x2(__A), 1164 (__v16si)__O); 1165 } 1166 1167 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1168 _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A) 1169 { 1170 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 1171 (__v16si)_mm512_broadcast_i32x2(__A), 1172 (__v16si)_mm512_setzero_si512()); 1173 } 1174 1175 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1176 _mm512_broadcast_i32x8(__m256i __A) 1177 { 1178 return (__m512i)__builtin_shufflevector((__v8si)__A, (__v8si)__A, 1179 0, 1, 2, 3, 4, 5, 6, 7, 1180 0, 1, 2, 3, 4, 5, 6, 7); 1181 } 1182 1183 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1184 _mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A) 1185 { 1186 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 1187 (__v16si)_mm512_broadcast_i32x8(__A), 1188 (__v16si)__O); 1189 } 1190 1191 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1192 _mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A) 1193 { 1194 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 1195 (__v16si)_mm512_broadcast_i32x8(__A), 1196 (__v16si)_mm512_setzero_si512()); 1197 } 1198 1199 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1200 _mm512_broadcast_i64x2(__m128i __A) 1201 { 1202 return (__m512i)__builtin_shufflevector((__v2di)__A, (__v2di)__A, 1203 0, 1, 0, 1, 0, 1, 0, 1); 1204 } 1205 1206 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1207 _mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A) 1208 { 1209 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1210 (__v8di)_mm512_broadcast_i64x2(__A), 1211 (__v8di)__O); 1212 } 1213 1214 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1215 _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) 1216 { 1217 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1218 (__v8di)_mm512_broadcast_i64x2(__A), 1219 (__v8di)_mm512_setzero_si512()); 1220 } 1221 1222 #define _mm512_extractf32x8_ps(A, imm) \ 1223 ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ 1224 (__v8sf)_mm256_undefined_ps(), \ 1225 (__mmask8)-1)) 1226 1227 #define _mm512_mask_extractf32x8_ps(W, U, A, imm) \ 1228 ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ 1229 (__v8sf)(__m256)(W), \ 1230 (__mmask8)(U))) 1231 1232 #define _mm512_maskz_extractf32x8_ps(U, A, imm) \ 1233 ((__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ 1234 (__v8sf)_mm256_setzero_ps(), \ 1235 (__mmask8)(U))) 1236 1237 #define _mm512_extractf64x2_pd(A, imm) \ 1238 ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ 1239 (int)(imm), \ 1240 (__v2df)_mm_undefined_pd(), \ 1241 (__mmask8)-1)) 1242 1243 #define _mm512_mask_extractf64x2_pd(W, U, A, imm) \ 1244 ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ 1245 (int)(imm), \ 1246 (__v2df)(__m128d)(W), \ 1247 (__mmask8)(U))) 1248 1249 #define _mm512_maskz_extractf64x2_pd(U, A, imm) \ 1250 ((__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ 1251 (int)(imm), \ 1252 (__v2df)_mm_setzero_pd(), \ 1253 (__mmask8)(U))) 1254 1255 #define _mm512_extracti32x8_epi32(A, imm) \ 1256 ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ 1257 (__v8si)_mm256_undefined_si256(), \ 1258 (__mmask8)-1)) 1259 1260 #define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \ 1261 ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ 1262 (__v8si)(__m256i)(W), \ 1263 (__mmask8)(U))) 1264 1265 #define _mm512_maskz_extracti32x8_epi32(U, A, imm) \ 1266 ((__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ 1267 (__v8si)_mm256_setzero_si256(), \ 1268 (__mmask8)(U))) 1269 1270 #define _mm512_extracti64x2_epi64(A, imm) \ 1271 ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ 1272 (int)(imm), \ 1273 (__v2di)_mm_undefined_si128(), \ 1274 (__mmask8)-1)) 1275 1276 #define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \ 1277 ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ 1278 (int)(imm), \ 1279 (__v2di)(__m128i)(W), \ 1280 (__mmask8)(U))) 1281 1282 #define _mm512_maskz_extracti64x2_epi64(U, A, imm) \ 1283 ((__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ 1284 (int)(imm), \ 1285 (__v2di)_mm_setzero_si128(), \ 1286 (__mmask8)(U))) 1287 1288 #define _mm512_insertf32x8(A, B, imm) \ 1289 ((__m512)__builtin_ia32_insertf32x8((__v16sf)(__m512)(A), \ 1290 (__v8sf)(__m256)(B), (int)(imm))) 1291 1292 #define _mm512_mask_insertf32x8(W, U, A, B, imm) \ 1293 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 1294 (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \ 1295 (__v16sf)(__m512)(W))) 1296 1297 #define _mm512_maskz_insertf32x8(U, A, B, imm) \ 1298 ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 1299 (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \ 1300 (__v16sf)_mm512_setzero_ps())) 1301 1302 #define _mm512_insertf64x2(A, B, imm) \ 1303 ((__m512d)__builtin_ia32_insertf64x2_512((__v8df)(__m512d)(A), \ 1304 (__v2df)(__m128d)(B), (int)(imm))) 1305 1306 #define _mm512_mask_insertf64x2(W, U, A, B, imm) \ 1307 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1308 (__v8df)_mm512_insertf64x2((A), (B), (imm)), \ 1309 (__v8df)(__m512d)(W))) 1310 1311 #define _mm512_maskz_insertf64x2(U, A, B, imm) \ 1312 ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1313 (__v8df)_mm512_insertf64x2((A), (B), (imm)), \ 1314 (__v8df)_mm512_setzero_pd())) 1315 1316 #define _mm512_inserti32x8(A, B, imm) \ 1317 ((__m512i)__builtin_ia32_inserti32x8((__v16si)(__m512i)(A), \ 1318 (__v8si)(__m256i)(B), (int)(imm))) 1319 1320 #define _mm512_mask_inserti32x8(W, U, A, B, imm) \ 1321 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 1322 (__v16si)_mm512_inserti32x8((A), (B), (imm)), \ 1323 (__v16si)(__m512i)(W))) 1324 1325 #define _mm512_maskz_inserti32x8(U, A, B, imm) \ 1326 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 1327 (__v16si)_mm512_inserti32x8((A), (B), (imm)), \ 1328 (__v16si)_mm512_setzero_si512())) 1329 1330 #define _mm512_inserti64x2(A, B, imm) \ 1331 ((__m512i)__builtin_ia32_inserti64x2_512((__v8di)(__m512i)(A), \ 1332 (__v2di)(__m128i)(B), (int)(imm))) 1333 1334 #define _mm512_mask_inserti64x2(W, U, A, B, imm) \ 1335 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 1336 (__v8di)_mm512_inserti64x2((A), (B), (imm)), \ 1337 (__v8di)(__m512i)(W))) 1338 1339 #define _mm512_maskz_inserti64x2(U, A, B, imm) \ 1340 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 1341 (__v8di)_mm512_inserti64x2((A), (B), (imm)), \ 1342 (__v8di)_mm512_setzero_si512())) 1343 1344 #define _mm512_mask_fpclass_ps_mask(U, A, imm) \ 1345 ((__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \ 1346 (int)(imm), (__mmask16)(U))) 1347 1348 #define _mm512_fpclass_ps_mask(A, imm) \ 1349 ((__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \ 1350 (int)(imm), (__mmask16)-1)) 1351 1352 #define _mm512_mask_fpclass_pd_mask(U, A, imm) \ 1353 ((__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \ 1354 (__mmask8)(U))) 1355 1356 #define _mm512_fpclass_pd_mask(A, imm) \ 1357 ((__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \ 1358 (__mmask8)-1)) 1359 1360 #define _mm_fpclass_sd_mask(A, imm) \ 1361 ((__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \ 1362 (__mmask8)-1)) 1363 1364 #define _mm_mask_fpclass_sd_mask(U, A, imm) \ 1365 ((__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \ 1366 (__mmask8)(U))) 1367 1368 #define _mm_fpclass_ss_mask(A, imm) \ 1369 ((__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \ 1370 (__mmask8)-1)) 1371 1372 #define _mm_mask_fpclass_ss_mask(U, A, imm) \ 1373 ((__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \ 1374 (__mmask8)(U))) 1375 1376 #undef __DEFAULT_FN_ATTRS512 1377 #undef __DEFAULT_FN_ATTRS 1378 1379 #endif 1380