1 /*===---- avx512dqintrin.h - AVX512DQ intrinsics ---------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10 #ifndef __IMMINTRIN_H 11 #error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead." 12 #endif 13 14 #ifndef __AVX512DQINTRIN_H 15 #define __AVX512DQINTRIN_H 16 17 /* Define the default attributes for the functions in this file. */ 18 #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"), __min_vector_width__(512))) 19 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"))) 20 21 static __inline __mmask8 __DEFAULT_FN_ATTRS 22 _knot_mask8(__mmask8 __M) 23 { 24 return __builtin_ia32_knotqi(__M); 25 } 26 27 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 28 _kand_mask8(__mmask8 __A, __mmask8 __B) 29 { 30 return (__mmask8)__builtin_ia32_kandqi((__mmask8)__A, (__mmask8)__B); 31 } 32 33 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 34 _kandn_mask8(__mmask8 __A, __mmask8 __B) 35 { 36 return (__mmask8)__builtin_ia32_kandnqi((__mmask8)__A, (__mmask8)__B); 37 } 38 39 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 40 _kor_mask8(__mmask8 __A, __mmask8 __B) 41 { 42 return (__mmask8)__builtin_ia32_korqi((__mmask8)__A, (__mmask8)__B); 43 } 44 45 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 46 _kxnor_mask8(__mmask8 __A, __mmask8 __B) 47 { 48 return (__mmask8)__builtin_ia32_kxnorqi((__mmask8)__A, (__mmask8)__B); 49 } 50 51 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 52 _kxor_mask8(__mmask8 __A, __mmask8 __B) 53 { 54 return (__mmask8)__builtin_ia32_kxorqi((__mmask8)__A, (__mmask8)__B); 55 } 56 57 static __inline__ unsigned char __DEFAULT_FN_ATTRS 58 _kortestc_mask8_u8(__mmask8 __A, __mmask8 __B) 59 { 60 return (unsigned char)__builtin_ia32_kortestcqi(__A, __B); 61 } 62 63 static __inline__ unsigned char __DEFAULT_FN_ATTRS 64 _kortestz_mask8_u8(__mmask8 __A, __mmask8 __B) 65 { 66 return (unsigned char)__builtin_ia32_kortestzqi(__A, __B); 67 } 68 69 static __inline__ unsigned char __DEFAULT_FN_ATTRS 70 _kortest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) { 71 *__C = (unsigned char)__builtin_ia32_kortestcqi(__A, __B); 72 return (unsigned char)__builtin_ia32_kortestzqi(__A, __B); 73 } 74 75 static __inline__ unsigned char __DEFAULT_FN_ATTRS 76 _ktestc_mask8_u8(__mmask8 __A, __mmask8 __B) 77 { 78 return (unsigned char)__builtin_ia32_ktestcqi(__A, __B); 79 } 80 81 static __inline__ unsigned char __DEFAULT_FN_ATTRS 82 _ktestz_mask8_u8(__mmask8 __A, __mmask8 __B) 83 { 84 return (unsigned char)__builtin_ia32_ktestzqi(__A, __B); 85 } 86 87 static __inline__ unsigned char __DEFAULT_FN_ATTRS 88 _ktest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) { 89 *__C = (unsigned char)__builtin_ia32_ktestcqi(__A, __B); 90 return (unsigned char)__builtin_ia32_ktestzqi(__A, __B); 91 } 92 93 static __inline__ unsigned char __DEFAULT_FN_ATTRS 94 _ktestc_mask16_u8(__mmask16 __A, __mmask16 __B) 95 { 96 return (unsigned char)__builtin_ia32_ktestchi(__A, __B); 97 } 98 99 static __inline__ unsigned char __DEFAULT_FN_ATTRS 100 _ktestz_mask16_u8(__mmask16 __A, __mmask16 __B) 101 { 102 return (unsigned char)__builtin_ia32_ktestzhi(__A, __B); 103 } 104 105 static __inline__ unsigned char __DEFAULT_FN_ATTRS 106 _ktest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) { 107 *__C = (unsigned char)__builtin_ia32_ktestchi(__A, __B); 108 return (unsigned char)__builtin_ia32_ktestzhi(__A, __B); 109 } 110 111 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 112 _kadd_mask8(__mmask8 __A, __mmask8 __B) 113 { 114 return (__mmask8)__builtin_ia32_kaddqi((__mmask8)__A, (__mmask8)__B); 115 } 116 117 static __inline__ __mmask16 __DEFAULT_FN_ATTRS 118 _kadd_mask16(__mmask16 __A, __mmask16 __B) 119 { 120 return (__mmask16)__builtin_ia32_kaddhi((__mmask16)__A, (__mmask16)__B); 121 } 122 123 #define _kshiftli_mask8(A, I) \ 124 (__mmask8)__builtin_ia32_kshiftliqi((__mmask8)(A), (unsigned int)(I)) 125 126 #define _kshiftri_mask8(A, I) \ 127 (__mmask8)__builtin_ia32_kshiftriqi((__mmask8)(A), (unsigned int)(I)) 128 129 static __inline__ unsigned int __DEFAULT_FN_ATTRS 130 _cvtmask8_u32(__mmask8 __A) { 131 return (unsigned int)__builtin_ia32_kmovb((__mmask8)__A); 132 } 133 134 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 135 _cvtu32_mask8(unsigned int __A) { 136 return (__mmask8)__builtin_ia32_kmovb((__mmask8)__A); 137 } 138 139 static __inline__ __mmask8 __DEFAULT_FN_ATTRS 140 _load_mask8(__mmask8 *__A) { 141 return (__mmask8)__builtin_ia32_kmovb(*(__mmask8 *)__A); 142 } 143 144 static __inline__ void __DEFAULT_FN_ATTRS 145 _store_mask8(__mmask8 *__A, __mmask8 __B) { 146 *(__mmask8 *)__A = __builtin_ia32_kmovb((__mmask8)__B); 147 } 148 149 static __inline__ __m512i __DEFAULT_FN_ATTRS512 150 _mm512_mullo_epi64 (__m512i __A, __m512i __B) { 151 return (__m512i) ((__v8du) __A * (__v8du) __B); 152 } 153 154 static __inline__ __m512i __DEFAULT_FN_ATTRS512 155 _mm512_mask_mullo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { 156 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 157 (__v8di)_mm512_mullo_epi64(__A, __B), 158 (__v8di)__W); 159 } 160 161 static __inline__ __m512i __DEFAULT_FN_ATTRS512 162 _mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B) { 163 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, 164 (__v8di)_mm512_mullo_epi64(__A, __B), 165 (__v8di)_mm512_setzero_si512()); 166 } 167 168 static __inline__ __m512d __DEFAULT_FN_ATTRS512 169 _mm512_xor_pd(__m512d __A, __m512d __B) { 170 return (__m512d)((__v8du)__A ^ (__v8du)__B); 171 } 172 173 static __inline__ __m512d __DEFAULT_FN_ATTRS512 174 _mm512_mask_xor_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 175 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 176 (__v8df)_mm512_xor_pd(__A, __B), 177 (__v8df)__W); 178 } 179 180 static __inline__ __m512d __DEFAULT_FN_ATTRS512 181 _mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B) { 182 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 183 (__v8df)_mm512_xor_pd(__A, __B), 184 (__v8df)_mm512_setzero_pd()); 185 } 186 187 static __inline__ __m512 __DEFAULT_FN_ATTRS512 188 _mm512_xor_ps (__m512 __A, __m512 __B) { 189 return (__m512)((__v16su)__A ^ (__v16su)__B); 190 } 191 192 static __inline__ __m512 __DEFAULT_FN_ATTRS512 193 _mm512_mask_xor_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 194 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 195 (__v16sf)_mm512_xor_ps(__A, __B), 196 (__v16sf)__W); 197 } 198 199 static __inline__ __m512 __DEFAULT_FN_ATTRS512 200 _mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B) { 201 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 202 (__v16sf)_mm512_xor_ps(__A, __B), 203 (__v16sf)_mm512_setzero_ps()); 204 } 205 206 static __inline__ __m512d __DEFAULT_FN_ATTRS512 207 _mm512_or_pd(__m512d __A, __m512d __B) { 208 return (__m512d)((__v8du)__A | (__v8du)__B); 209 } 210 211 static __inline__ __m512d __DEFAULT_FN_ATTRS512 212 _mm512_mask_or_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 213 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 214 (__v8df)_mm512_or_pd(__A, __B), 215 (__v8df)__W); 216 } 217 218 static __inline__ __m512d __DEFAULT_FN_ATTRS512 219 _mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B) { 220 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 221 (__v8df)_mm512_or_pd(__A, __B), 222 (__v8df)_mm512_setzero_pd()); 223 } 224 225 static __inline__ __m512 __DEFAULT_FN_ATTRS512 226 _mm512_or_ps(__m512 __A, __m512 __B) { 227 return (__m512)((__v16su)__A | (__v16su)__B); 228 } 229 230 static __inline__ __m512 __DEFAULT_FN_ATTRS512 231 _mm512_mask_or_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 232 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 233 (__v16sf)_mm512_or_ps(__A, __B), 234 (__v16sf)__W); 235 } 236 237 static __inline__ __m512 __DEFAULT_FN_ATTRS512 238 _mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B) { 239 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 240 (__v16sf)_mm512_or_ps(__A, __B), 241 (__v16sf)_mm512_setzero_ps()); 242 } 243 244 static __inline__ __m512d __DEFAULT_FN_ATTRS512 245 _mm512_and_pd(__m512d __A, __m512d __B) { 246 return (__m512d)((__v8du)__A & (__v8du)__B); 247 } 248 249 static __inline__ __m512d __DEFAULT_FN_ATTRS512 250 _mm512_mask_and_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 251 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 252 (__v8df)_mm512_and_pd(__A, __B), 253 (__v8df)__W); 254 } 255 256 static __inline__ __m512d __DEFAULT_FN_ATTRS512 257 _mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B) { 258 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 259 (__v8df)_mm512_and_pd(__A, __B), 260 (__v8df)_mm512_setzero_pd()); 261 } 262 263 static __inline__ __m512 __DEFAULT_FN_ATTRS512 264 _mm512_and_ps(__m512 __A, __m512 __B) { 265 return (__m512)((__v16su)__A & (__v16su)__B); 266 } 267 268 static __inline__ __m512 __DEFAULT_FN_ATTRS512 269 _mm512_mask_and_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 270 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 271 (__v16sf)_mm512_and_ps(__A, __B), 272 (__v16sf)__W); 273 } 274 275 static __inline__ __m512 __DEFAULT_FN_ATTRS512 276 _mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B) { 277 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 278 (__v16sf)_mm512_and_ps(__A, __B), 279 (__v16sf)_mm512_setzero_ps()); 280 } 281 282 static __inline__ __m512d __DEFAULT_FN_ATTRS512 283 _mm512_andnot_pd(__m512d __A, __m512d __B) { 284 return (__m512d)(~(__v8du)__A & (__v8du)__B); 285 } 286 287 static __inline__ __m512d __DEFAULT_FN_ATTRS512 288 _mm512_mask_andnot_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 289 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 290 (__v8df)_mm512_andnot_pd(__A, __B), 291 (__v8df)__W); 292 } 293 294 static __inline__ __m512d __DEFAULT_FN_ATTRS512 295 _mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B) { 296 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 297 (__v8df)_mm512_andnot_pd(__A, __B), 298 (__v8df)_mm512_setzero_pd()); 299 } 300 301 static __inline__ __m512 __DEFAULT_FN_ATTRS512 302 _mm512_andnot_ps(__m512 __A, __m512 __B) { 303 return (__m512)(~(__v16su)__A & (__v16su)__B); 304 } 305 306 static __inline__ __m512 __DEFAULT_FN_ATTRS512 307 _mm512_mask_andnot_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 308 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 309 (__v16sf)_mm512_andnot_ps(__A, __B), 310 (__v16sf)__W); 311 } 312 313 static __inline__ __m512 __DEFAULT_FN_ATTRS512 314 _mm512_maskz_andnot_ps(__mmask16 __U, __m512 __A, __m512 __B) { 315 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, 316 (__v16sf)_mm512_andnot_ps(__A, __B), 317 (__v16sf)_mm512_setzero_ps()); 318 } 319 320 static __inline__ __m512i __DEFAULT_FN_ATTRS512 321 _mm512_cvtpd_epi64 (__m512d __A) { 322 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, 323 (__v8di) _mm512_setzero_si512(), 324 (__mmask8) -1, 325 _MM_FROUND_CUR_DIRECTION); 326 } 327 328 static __inline__ __m512i __DEFAULT_FN_ATTRS512 329 _mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) { 330 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, 331 (__v8di) __W, 332 (__mmask8) __U, 333 _MM_FROUND_CUR_DIRECTION); 334 } 335 336 static __inline__ __m512i __DEFAULT_FN_ATTRS512 337 _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) { 338 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A, 339 (__v8di) _mm512_setzero_si512(), 340 (__mmask8) __U, 341 _MM_FROUND_CUR_DIRECTION); 342 } 343 344 #define _mm512_cvt_roundpd_epi64(A, R) \ 345 (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ 346 (__v8di)_mm512_setzero_si512(), \ 347 (__mmask8)-1, (int)(R)) 348 349 #define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) \ 350 (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ 351 (__v8di)(__m512i)(W), \ 352 (__mmask8)(U), (int)(R)) 353 354 #define _mm512_maskz_cvt_roundpd_epi64(U, A, R) \ 355 (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \ 356 (__v8di)_mm512_setzero_si512(), \ 357 (__mmask8)(U), (int)(R)) 358 359 static __inline__ __m512i __DEFAULT_FN_ATTRS512 360 _mm512_cvtpd_epu64 (__m512d __A) { 361 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, 362 (__v8di) _mm512_setzero_si512(), 363 (__mmask8) -1, 364 _MM_FROUND_CUR_DIRECTION); 365 } 366 367 static __inline__ __m512i __DEFAULT_FN_ATTRS512 368 _mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) { 369 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, 370 (__v8di) __W, 371 (__mmask8) __U, 372 _MM_FROUND_CUR_DIRECTION); 373 } 374 375 static __inline__ __m512i __DEFAULT_FN_ATTRS512 376 _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) { 377 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A, 378 (__v8di) _mm512_setzero_si512(), 379 (__mmask8) __U, 380 _MM_FROUND_CUR_DIRECTION); 381 } 382 383 #define _mm512_cvt_roundpd_epu64(A, R) \ 384 (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ 385 (__v8di)_mm512_setzero_si512(), \ 386 (__mmask8)-1, (int)(R)) 387 388 #define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) \ 389 (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ 390 (__v8di)(__m512i)(W), \ 391 (__mmask8)(U), (int)(R)) 392 393 #define _mm512_maskz_cvt_roundpd_epu64(U, A, R) \ 394 (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \ 395 (__v8di)_mm512_setzero_si512(), \ 396 (__mmask8)(U), (int)(R)) 397 398 static __inline__ __m512i __DEFAULT_FN_ATTRS512 399 _mm512_cvtps_epi64 (__m256 __A) { 400 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, 401 (__v8di) _mm512_setzero_si512(), 402 (__mmask8) -1, 403 _MM_FROUND_CUR_DIRECTION); 404 } 405 406 static __inline__ __m512i __DEFAULT_FN_ATTRS512 407 _mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) { 408 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, 409 (__v8di) __W, 410 (__mmask8) __U, 411 _MM_FROUND_CUR_DIRECTION); 412 } 413 414 static __inline__ __m512i __DEFAULT_FN_ATTRS512 415 _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) { 416 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A, 417 (__v8di) _mm512_setzero_si512(), 418 (__mmask8) __U, 419 _MM_FROUND_CUR_DIRECTION); 420 } 421 422 #define _mm512_cvt_roundps_epi64(A, R) \ 423 (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ 424 (__v8di)_mm512_setzero_si512(), \ 425 (__mmask8)-1, (int)(R)) 426 427 #define _mm512_mask_cvt_roundps_epi64(W, U, A, R) \ 428 (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ 429 (__v8di)(__m512i)(W), \ 430 (__mmask8)(U), (int)(R)) 431 432 #define _mm512_maskz_cvt_roundps_epi64(U, A, R) \ 433 (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \ 434 (__v8di)_mm512_setzero_si512(), \ 435 (__mmask8)(U), (int)(R)) 436 437 static __inline__ __m512i __DEFAULT_FN_ATTRS512 438 _mm512_cvtps_epu64 (__m256 __A) { 439 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, 440 (__v8di) _mm512_setzero_si512(), 441 (__mmask8) -1, 442 _MM_FROUND_CUR_DIRECTION); 443 } 444 445 static __inline__ __m512i __DEFAULT_FN_ATTRS512 446 _mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) { 447 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, 448 (__v8di) __W, 449 (__mmask8) __U, 450 _MM_FROUND_CUR_DIRECTION); 451 } 452 453 static __inline__ __m512i __DEFAULT_FN_ATTRS512 454 _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) { 455 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A, 456 (__v8di) _mm512_setzero_si512(), 457 (__mmask8) __U, 458 _MM_FROUND_CUR_DIRECTION); 459 } 460 461 #define _mm512_cvt_roundps_epu64(A, R) \ 462 (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ 463 (__v8di)_mm512_setzero_si512(), \ 464 (__mmask8)-1, (int)(R)) 465 466 #define _mm512_mask_cvt_roundps_epu64(W, U, A, R) \ 467 (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ 468 (__v8di)(__m512i)(W), \ 469 (__mmask8)(U), (int)(R)) 470 471 #define _mm512_maskz_cvt_roundps_epu64(U, A, R) \ 472 (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \ 473 (__v8di)_mm512_setzero_si512(), \ 474 (__mmask8)(U), (int)(R)) 475 476 477 static __inline__ __m512d __DEFAULT_FN_ATTRS512 478 _mm512_cvtepi64_pd (__m512i __A) { 479 return (__m512d)__builtin_convertvector((__v8di)__A, __v8df); 480 } 481 482 static __inline__ __m512d __DEFAULT_FN_ATTRS512 483 _mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) { 484 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 485 (__v8df)_mm512_cvtepi64_pd(__A), 486 (__v8df)__W); 487 } 488 489 static __inline__ __m512d __DEFAULT_FN_ATTRS512 490 _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) { 491 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 492 (__v8df)_mm512_cvtepi64_pd(__A), 493 (__v8df)_mm512_setzero_pd()); 494 } 495 496 #define _mm512_cvt_roundepi64_pd(A, R) \ 497 (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ 498 (__v8df)_mm512_setzero_pd(), \ 499 (__mmask8)-1, (int)(R)) 500 501 #define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) \ 502 (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ 503 (__v8df)(__m512d)(W), \ 504 (__mmask8)(U), (int)(R)) 505 506 #define _mm512_maskz_cvt_roundepi64_pd(U, A, R) \ 507 (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \ 508 (__v8df)_mm512_setzero_pd(), \ 509 (__mmask8)(U), (int)(R)) 510 511 static __inline__ __m256 __DEFAULT_FN_ATTRS512 512 _mm512_cvtepi64_ps (__m512i __A) { 513 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, 514 (__v8sf) _mm256_setzero_ps(), 515 (__mmask8) -1, 516 _MM_FROUND_CUR_DIRECTION); 517 } 518 519 static __inline__ __m256 __DEFAULT_FN_ATTRS512 520 _mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) { 521 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, 522 (__v8sf) __W, 523 (__mmask8) __U, 524 _MM_FROUND_CUR_DIRECTION); 525 } 526 527 static __inline__ __m256 __DEFAULT_FN_ATTRS512 528 _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) { 529 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A, 530 (__v8sf) _mm256_setzero_ps(), 531 (__mmask8) __U, 532 _MM_FROUND_CUR_DIRECTION); 533 } 534 535 #define _mm512_cvt_roundepi64_ps(A, R) \ 536 (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ 537 (__v8sf)_mm256_setzero_ps(), \ 538 (__mmask8)-1, (int)(R)) 539 540 #define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) \ 541 (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ 542 (__v8sf)(__m256)(W), (__mmask8)(U), \ 543 (int)(R)) 544 545 #define _mm512_maskz_cvt_roundepi64_ps(U, A, R) \ 546 (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \ 547 (__v8sf)_mm256_setzero_ps(), \ 548 (__mmask8)(U), (int)(R)) 549 550 551 static __inline__ __m512i __DEFAULT_FN_ATTRS512 552 _mm512_cvttpd_epi64 (__m512d __A) { 553 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, 554 (__v8di) _mm512_setzero_si512(), 555 (__mmask8) -1, 556 _MM_FROUND_CUR_DIRECTION); 557 } 558 559 static __inline__ __m512i __DEFAULT_FN_ATTRS512 560 _mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) { 561 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, 562 (__v8di) __W, 563 (__mmask8) __U, 564 _MM_FROUND_CUR_DIRECTION); 565 } 566 567 static __inline__ __m512i __DEFAULT_FN_ATTRS512 568 _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) { 569 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A, 570 (__v8di) _mm512_setzero_si512(), 571 (__mmask8) __U, 572 _MM_FROUND_CUR_DIRECTION); 573 } 574 575 #define _mm512_cvtt_roundpd_epi64(A, R) \ 576 (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ 577 (__v8di)_mm512_setzero_si512(), \ 578 (__mmask8)-1, (int)(R)) 579 580 #define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) \ 581 (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ 582 (__v8di)(__m512i)(W), \ 583 (__mmask8)(U), (int)(R)) 584 585 #define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) \ 586 (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \ 587 (__v8di)_mm512_setzero_si512(), \ 588 (__mmask8)(U), (int)(R)) 589 590 static __inline__ __m512i __DEFAULT_FN_ATTRS512 591 _mm512_cvttpd_epu64 (__m512d __A) { 592 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, 593 (__v8di) _mm512_setzero_si512(), 594 (__mmask8) -1, 595 _MM_FROUND_CUR_DIRECTION); 596 } 597 598 static __inline__ __m512i __DEFAULT_FN_ATTRS512 599 _mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) { 600 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, 601 (__v8di) __W, 602 (__mmask8) __U, 603 _MM_FROUND_CUR_DIRECTION); 604 } 605 606 static __inline__ __m512i __DEFAULT_FN_ATTRS512 607 _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) { 608 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A, 609 (__v8di) _mm512_setzero_si512(), 610 (__mmask8) __U, 611 _MM_FROUND_CUR_DIRECTION); 612 } 613 614 #define _mm512_cvtt_roundpd_epu64(A, R) \ 615 (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ 616 (__v8di)_mm512_setzero_si512(), \ 617 (__mmask8)-1, (int)(R)) 618 619 #define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) \ 620 (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ 621 (__v8di)(__m512i)(W), \ 622 (__mmask8)(U), (int)(R)) 623 624 #define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) \ 625 (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \ 626 (__v8di)_mm512_setzero_si512(), \ 627 (__mmask8)(U), (int)(R)) 628 629 static __inline__ __m512i __DEFAULT_FN_ATTRS512 630 _mm512_cvttps_epi64 (__m256 __A) { 631 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, 632 (__v8di) _mm512_setzero_si512(), 633 (__mmask8) -1, 634 _MM_FROUND_CUR_DIRECTION); 635 } 636 637 static __inline__ __m512i __DEFAULT_FN_ATTRS512 638 _mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) { 639 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, 640 (__v8di) __W, 641 (__mmask8) __U, 642 _MM_FROUND_CUR_DIRECTION); 643 } 644 645 static __inline__ __m512i __DEFAULT_FN_ATTRS512 646 _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) { 647 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A, 648 (__v8di) _mm512_setzero_si512(), 649 (__mmask8) __U, 650 _MM_FROUND_CUR_DIRECTION); 651 } 652 653 #define _mm512_cvtt_roundps_epi64(A, R) \ 654 (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ 655 (__v8di)_mm512_setzero_si512(), \ 656 (__mmask8)-1, (int)(R)) 657 658 #define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) \ 659 (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ 660 (__v8di)(__m512i)(W), \ 661 (__mmask8)(U), (int)(R)) 662 663 #define _mm512_maskz_cvtt_roundps_epi64(U, A, R) \ 664 (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \ 665 (__v8di)_mm512_setzero_si512(), \ 666 (__mmask8)(U), (int)(R)) 667 668 static __inline__ __m512i __DEFAULT_FN_ATTRS512 669 _mm512_cvttps_epu64 (__m256 __A) { 670 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, 671 (__v8di) _mm512_setzero_si512(), 672 (__mmask8) -1, 673 _MM_FROUND_CUR_DIRECTION); 674 } 675 676 static __inline__ __m512i __DEFAULT_FN_ATTRS512 677 _mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) { 678 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, 679 (__v8di) __W, 680 (__mmask8) __U, 681 _MM_FROUND_CUR_DIRECTION); 682 } 683 684 static __inline__ __m512i __DEFAULT_FN_ATTRS512 685 _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) { 686 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A, 687 (__v8di) _mm512_setzero_si512(), 688 (__mmask8) __U, 689 _MM_FROUND_CUR_DIRECTION); 690 } 691 692 #define _mm512_cvtt_roundps_epu64(A, R) \ 693 (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ 694 (__v8di)_mm512_setzero_si512(), \ 695 (__mmask8)-1, (int)(R)) 696 697 #define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) \ 698 (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ 699 (__v8di)(__m512i)(W), \ 700 (__mmask8)(U), (int)(R)) 701 702 #define _mm512_maskz_cvtt_roundps_epu64(U, A, R) \ 703 (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \ 704 (__v8di)_mm512_setzero_si512(), \ 705 (__mmask8)(U), (int)(R)) 706 707 static __inline__ __m512d __DEFAULT_FN_ATTRS512 708 _mm512_cvtepu64_pd (__m512i __A) { 709 return (__m512d)__builtin_convertvector((__v8du)__A, __v8df); 710 } 711 712 static __inline__ __m512d __DEFAULT_FN_ATTRS512 713 _mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) { 714 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 715 (__v8df)_mm512_cvtepu64_pd(__A), 716 (__v8df)__W); 717 } 718 719 static __inline__ __m512d __DEFAULT_FN_ATTRS512 720 _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) { 721 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, 722 (__v8df)_mm512_cvtepu64_pd(__A), 723 (__v8df)_mm512_setzero_pd()); 724 } 725 726 #define _mm512_cvt_roundepu64_pd(A, R) \ 727 (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ 728 (__v8df)_mm512_setzero_pd(), \ 729 (__mmask8)-1, (int)(R)) 730 731 #define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) \ 732 (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ 733 (__v8df)(__m512d)(W), \ 734 (__mmask8)(U), (int)(R)) 735 736 737 #define _mm512_maskz_cvt_roundepu64_pd(U, A, R) \ 738 (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \ 739 (__v8df)_mm512_setzero_pd(), \ 740 (__mmask8)(U), (int)(R)) 741 742 743 static __inline__ __m256 __DEFAULT_FN_ATTRS512 744 _mm512_cvtepu64_ps (__m512i __A) { 745 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, 746 (__v8sf) _mm256_setzero_ps(), 747 (__mmask8) -1, 748 _MM_FROUND_CUR_DIRECTION); 749 } 750 751 static __inline__ __m256 __DEFAULT_FN_ATTRS512 752 _mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) { 753 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, 754 (__v8sf) __W, 755 (__mmask8) __U, 756 _MM_FROUND_CUR_DIRECTION); 757 } 758 759 static __inline__ __m256 __DEFAULT_FN_ATTRS512 760 _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) { 761 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A, 762 (__v8sf) _mm256_setzero_ps(), 763 (__mmask8) __U, 764 _MM_FROUND_CUR_DIRECTION); 765 } 766 767 #define _mm512_cvt_roundepu64_ps(A, R) \ 768 (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ 769 (__v8sf)_mm256_setzero_ps(), \ 770 (__mmask8)-1, (int)(R)) 771 772 #define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) \ 773 (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ 774 (__v8sf)(__m256)(W), (__mmask8)(U), \ 775 (int)(R)) 776 777 #define _mm512_maskz_cvt_roundepu64_ps(U, A, R) \ 778 (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \ 779 (__v8sf)_mm256_setzero_ps(), \ 780 (__mmask8)(U), (int)(R)) 781 782 #define _mm512_range_pd(A, B, C) \ 783 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 784 (__v8df)(__m512d)(B), (int)(C), \ 785 (__v8df)_mm512_setzero_pd(), \ 786 (__mmask8)-1, \ 787 _MM_FROUND_CUR_DIRECTION) 788 789 #define _mm512_mask_range_pd(W, U, A, B, C) \ 790 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 791 (__v8df)(__m512d)(B), (int)(C), \ 792 (__v8df)(__m512d)(W), (__mmask8)(U), \ 793 _MM_FROUND_CUR_DIRECTION) 794 795 #define _mm512_maskz_range_pd(U, A, B, C) \ 796 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 797 (__v8df)(__m512d)(B), (int)(C), \ 798 (__v8df)_mm512_setzero_pd(), \ 799 (__mmask8)(U), \ 800 _MM_FROUND_CUR_DIRECTION) 801 802 #define _mm512_range_round_pd(A, B, C, R) \ 803 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 804 (__v8df)(__m512d)(B), (int)(C), \ 805 (__v8df)_mm512_setzero_pd(), \ 806 (__mmask8)-1, (int)(R)) 807 808 #define _mm512_mask_range_round_pd(W, U, A, B, C, R) \ 809 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 810 (__v8df)(__m512d)(B), (int)(C), \ 811 (__v8df)(__m512d)(W), (__mmask8)(U), \ 812 (int)(R)) 813 814 #define _mm512_maskz_range_round_pd(U, A, B, C, R) \ 815 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \ 816 (__v8df)(__m512d)(B), (int)(C), \ 817 (__v8df)_mm512_setzero_pd(), \ 818 (__mmask8)(U), (int)(R)) 819 820 #define _mm512_range_ps(A, B, C) \ 821 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 822 (__v16sf)(__m512)(B), (int)(C), \ 823 (__v16sf)_mm512_setzero_ps(), \ 824 (__mmask16)-1, \ 825 _MM_FROUND_CUR_DIRECTION) 826 827 #define _mm512_mask_range_ps(W, U, A, B, C) \ 828 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 829 (__v16sf)(__m512)(B), (int)(C), \ 830 (__v16sf)(__m512)(W), (__mmask16)(U), \ 831 _MM_FROUND_CUR_DIRECTION) 832 833 #define _mm512_maskz_range_ps(U, A, B, C) \ 834 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 835 (__v16sf)(__m512)(B), (int)(C), \ 836 (__v16sf)_mm512_setzero_ps(), \ 837 (__mmask16)(U), \ 838 _MM_FROUND_CUR_DIRECTION) 839 840 #define _mm512_range_round_ps(A, B, C, R) \ 841 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 842 (__v16sf)(__m512)(B), (int)(C), \ 843 (__v16sf)_mm512_setzero_ps(), \ 844 (__mmask16)-1, (int)(R)) 845 846 #define _mm512_mask_range_round_ps(W, U, A, B, C, R) \ 847 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 848 (__v16sf)(__m512)(B), (int)(C), \ 849 (__v16sf)(__m512)(W), (__mmask16)(U), \ 850 (int)(R)) 851 852 #define _mm512_maskz_range_round_ps(U, A, B, C, R) \ 853 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \ 854 (__v16sf)(__m512)(B), (int)(C), \ 855 (__v16sf)_mm512_setzero_ps(), \ 856 (__mmask16)(U), (int)(R)) 857 858 #define _mm_range_round_ss(A, B, C, R) \ 859 (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ 860 (__v4sf)(__m128)(B), \ 861 (__v4sf)_mm_setzero_ps(), \ 862 (__mmask8) -1, (int)(C),\ 863 (int)(R)) 864 865 #define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION) 866 867 #define _mm_mask_range_round_ss(W, U, A, B, C, R) \ 868 (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ 869 (__v4sf)(__m128)(B), \ 870 (__v4sf)(__m128)(W),\ 871 (__mmask8)(U), (int)(C),\ 872 (int)(R)) 873 874 #define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION) 875 876 #define _mm_maskz_range_round_ss(U, A, B, C, R) \ 877 (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \ 878 (__v4sf)(__m128)(B), \ 879 (__v4sf)_mm_setzero_ps(), \ 880 (__mmask8)(U), (int)(C),\ 881 (int)(R)) 882 883 #define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) 884 885 #define _mm_range_round_sd(A, B, C, R) \ 886 (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ 887 (__v2df)(__m128d)(B), \ 888 (__v2df)_mm_setzero_pd(), \ 889 (__mmask8) -1, (int)(C),\ 890 (int)(R)) 891 892 #define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION) 893 894 #define _mm_mask_range_round_sd(W, U, A, B, C, R) \ 895 (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ 896 (__v2df)(__m128d)(B), \ 897 (__v2df)(__m128d)(W),\ 898 (__mmask8)(U), (int)(C),\ 899 (int)(R)) 900 901 #define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION) 902 903 #define _mm_maskz_range_round_sd(U, A, B, C, R) \ 904 (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \ 905 (__v2df)(__m128d)(B), \ 906 (__v2df)_mm_setzero_pd(), \ 907 (__mmask8)(U), (int)(C),\ 908 (int)(R)) 909 910 #define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION) 911 912 #define _mm512_reduce_pd(A, B) \ 913 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 914 (__v8df)_mm512_setzero_pd(), \ 915 (__mmask8)-1, \ 916 _MM_FROUND_CUR_DIRECTION) 917 918 #define _mm512_mask_reduce_pd(W, U, A, B) \ 919 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 920 (__v8df)(__m512d)(W), \ 921 (__mmask8)(U), \ 922 _MM_FROUND_CUR_DIRECTION) 923 924 #define _mm512_maskz_reduce_pd(U, A, B) \ 925 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 926 (__v8df)_mm512_setzero_pd(), \ 927 (__mmask8)(U), \ 928 _MM_FROUND_CUR_DIRECTION) 929 930 #define _mm512_reduce_ps(A, B) \ 931 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 932 (__v16sf)_mm512_setzero_ps(), \ 933 (__mmask16)-1, \ 934 _MM_FROUND_CUR_DIRECTION) 935 936 #define _mm512_mask_reduce_ps(W, U, A, B) \ 937 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 938 (__v16sf)(__m512)(W), \ 939 (__mmask16)(U), \ 940 _MM_FROUND_CUR_DIRECTION) 941 942 #define _mm512_maskz_reduce_ps(U, A, B) \ 943 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 944 (__v16sf)_mm512_setzero_ps(), \ 945 (__mmask16)(U), \ 946 _MM_FROUND_CUR_DIRECTION) 947 948 #define _mm512_reduce_round_pd(A, B, R) \ 949 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 950 (__v8df)_mm512_setzero_pd(), \ 951 (__mmask8)-1, (int)(R)) 952 953 #define _mm512_mask_reduce_round_pd(W, U, A, B, R) \ 954 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 955 (__v8df)(__m512d)(W), \ 956 (__mmask8)(U), (int)(R)) 957 958 #define _mm512_maskz_reduce_round_pd(U, A, B, R) \ 959 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \ 960 (__v8df)_mm512_setzero_pd(), \ 961 (__mmask8)(U), (int)(R)) 962 963 #define _mm512_reduce_round_ps(A, B, R) \ 964 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 965 (__v16sf)_mm512_setzero_ps(), \ 966 (__mmask16)-1, (int)(R)) 967 968 #define _mm512_mask_reduce_round_ps(W, U, A, B, R) \ 969 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 970 (__v16sf)(__m512)(W), \ 971 (__mmask16)(U), (int)(R)) 972 973 #define _mm512_maskz_reduce_round_ps(U, A, B, R) \ 974 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \ 975 (__v16sf)_mm512_setzero_ps(), \ 976 (__mmask16)(U), (int)(R)) 977 978 #define _mm_reduce_ss(A, B, C) \ 979 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 980 (__v4sf)(__m128)(B), \ 981 (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \ 982 (int)(C), _MM_FROUND_CUR_DIRECTION) 983 984 #define _mm_mask_reduce_ss(W, U, A, B, C) \ 985 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 986 (__v4sf)(__m128)(B), \ 987 (__v4sf)(__m128)(W), (__mmask8)(U), \ 988 (int)(C), _MM_FROUND_CUR_DIRECTION) 989 990 #define _mm_maskz_reduce_ss(U, A, B, C) \ 991 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 992 (__v4sf)(__m128)(B), \ 993 (__v4sf)_mm_setzero_ps(), \ 994 (__mmask8)(U), (int)(C), \ 995 _MM_FROUND_CUR_DIRECTION) 996 997 #define _mm_reduce_round_ss(A, B, C, R) \ 998 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 999 (__v4sf)(__m128)(B), \ 1000 (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \ 1001 (int)(C), (int)(R)) 1002 1003 #define _mm_mask_reduce_round_ss(W, U, A, B, C, R) \ 1004 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 1005 (__v4sf)(__m128)(B), \ 1006 (__v4sf)(__m128)(W), (__mmask8)(U), \ 1007 (int)(C), (int)(R)) 1008 1009 #define _mm_maskz_reduce_round_ss(U, A, B, C, R) \ 1010 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \ 1011 (__v4sf)(__m128)(B), \ 1012 (__v4sf)_mm_setzero_ps(), \ 1013 (__mmask8)(U), (int)(C), (int)(R)) 1014 1015 #define _mm_reduce_sd(A, B, C) \ 1016 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 1017 (__v2df)(__m128d)(B), \ 1018 (__v2df)_mm_setzero_pd(), \ 1019 (__mmask8)-1, (int)(C), \ 1020 _MM_FROUND_CUR_DIRECTION) 1021 1022 #define _mm_mask_reduce_sd(W, U, A, B, C) \ 1023 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 1024 (__v2df)(__m128d)(B), \ 1025 (__v2df)(__m128d)(W), (__mmask8)(U), \ 1026 (int)(C), _MM_FROUND_CUR_DIRECTION) 1027 1028 #define _mm_maskz_reduce_sd(U, A, B, C) \ 1029 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 1030 (__v2df)(__m128d)(B), \ 1031 (__v2df)_mm_setzero_pd(), \ 1032 (__mmask8)(U), (int)(C), \ 1033 _MM_FROUND_CUR_DIRECTION) 1034 1035 #define _mm_reduce_round_sd(A, B, C, R) \ 1036 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 1037 (__v2df)(__m128d)(B), \ 1038 (__v2df)_mm_setzero_pd(), \ 1039 (__mmask8)-1, (int)(C), (int)(R)) 1040 1041 #define _mm_mask_reduce_round_sd(W, U, A, B, C, R) \ 1042 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 1043 (__v2df)(__m128d)(B), \ 1044 (__v2df)(__m128d)(W), (__mmask8)(U), \ 1045 (int)(C), (int)(R)) 1046 1047 #define _mm_maskz_reduce_round_sd(U, A, B, C, R) \ 1048 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \ 1049 (__v2df)(__m128d)(B), \ 1050 (__v2df)_mm_setzero_pd(), \ 1051 (__mmask8)(U), (int)(C), (int)(R)) 1052 1053 static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 1054 _mm512_movepi32_mask (__m512i __A) 1055 { 1056 return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A); 1057 } 1058 1059 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1060 _mm512_movm_epi32 (__mmask16 __A) 1061 { 1062 return (__m512i) __builtin_ia32_cvtmask2d512 (__A); 1063 } 1064 1065 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1066 _mm512_movm_epi64 (__mmask8 __A) 1067 { 1068 return (__m512i) __builtin_ia32_cvtmask2q512 (__A); 1069 } 1070 1071 static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 1072 _mm512_movepi64_mask (__m512i __A) 1073 { 1074 return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A); 1075 } 1076 1077 1078 static __inline__ __m512 __DEFAULT_FN_ATTRS512 1079 _mm512_broadcast_f32x2 (__m128 __A) 1080 { 1081 return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 1082 0, 1, 0, 1, 0, 1, 0, 1, 1083 0, 1, 0, 1, 0, 1, 0, 1); 1084 } 1085 1086 static __inline__ __m512 __DEFAULT_FN_ATTRS512 1087 _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A) 1088 { 1089 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, 1090 (__v16sf)_mm512_broadcast_f32x2(__A), 1091 (__v16sf)__O); 1092 } 1093 1094 static __inline__ __m512 __DEFAULT_FN_ATTRS512 1095 _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A) 1096 { 1097 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, 1098 (__v16sf)_mm512_broadcast_f32x2(__A), 1099 (__v16sf)_mm512_setzero_ps()); 1100 } 1101 1102 static __inline__ __m512 __DEFAULT_FN_ATTRS512 1103 _mm512_broadcast_f32x8(__m256 __A) 1104 { 1105 return (__m512)__builtin_shufflevector((__v8sf)__A, (__v8sf)__A, 1106 0, 1, 2, 3, 4, 5, 6, 7, 1107 0, 1, 2, 3, 4, 5, 6, 7); 1108 } 1109 1110 static __inline__ __m512 __DEFAULT_FN_ATTRS512 1111 _mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A) 1112 { 1113 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, 1114 (__v16sf)_mm512_broadcast_f32x8(__A), 1115 (__v16sf)__O); 1116 } 1117 1118 static __inline__ __m512 __DEFAULT_FN_ATTRS512 1119 _mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A) 1120 { 1121 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, 1122 (__v16sf)_mm512_broadcast_f32x8(__A), 1123 (__v16sf)_mm512_setzero_ps()); 1124 } 1125 1126 static __inline__ __m512d __DEFAULT_FN_ATTRS512 1127 _mm512_broadcast_f64x2(__m128d __A) 1128 { 1129 return (__m512d)__builtin_shufflevector((__v2df)__A, (__v2df)__A, 1130 0, 1, 0, 1, 0, 1, 0, 1); 1131 } 1132 1133 static __inline__ __m512d __DEFAULT_FN_ATTRS512 1134 _mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A) 1135 { 1136 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, 1137 (__v8df)_mm512_broadcast_f64x2(__A), 1138 (__v8df)__O); 1139 } 1140 1141 static __inline__ __m512d __DEFAULT_FN_ATTRS512 1142 _mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) 1143 { 1144 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, 1145 (__v8df)_mm512_broadcast_f64x2(__A), 1146 (__v8df)_mm512_setzero_pd()); 1147 } 1148 1149 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1150 _mm512_broadcast_i32x2 (__m128i __A) 1151 { 1152 return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 1153 0, 1, 0, 1, 0, 1, 0, 1, 1154 0, 1, 0, 1, 0, 1, 0, 1); 1155 } 1156 1157 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1158 _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A) 1159 { 1160 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 1161 (__v16si)_mm512_broadcast_i32x2(__A), 1162 (__v16si)__O); 1163 } 1164 1165 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1166 _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A) 1167 { 1168 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 1169 (__v16si)_mm512_broadcast_i32x2(__A), 1170 (__v16si)_mm512_setzero_si512()); 1171 } 1172 1173 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1174 _mm512_broadcast_i32x8(__m256i __A) 1175 { 1176 return (__m512i)__builtin_shufflevector((__v8si)__A, (__v8si)__A, 1177 0, 1, 2, 3, 4, 5, 6, 7, 1178 0, 1, 2, 3, 4, 5, 6, 7); 1179 } 1180 1181 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1182 _mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A) 1183 { 1184 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 1185 (__v16si)_mm512_broadcast_i32x8(__A), 1186 (__v16si)__O); 1187 } 1188 1189 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1190 _mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A) 1191 { 1192 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, 1193 (__v16si)_mm512_broadcast_i32x8(__A), 1194 (__v16si)_mm512_setzero_si512()); 1195 } 1196 1197 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1198 _mm512_broadcast_i64x2(__m128i __A) 1199 { 1200 return (__m512i)__builtin_shufflevector((__v2di)__A, (__v2di)__A, 1201 0, 1, 0, 1, 0, 1, 0, 1); 1202 } 1203 1204 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1205 _mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A) 1206 { 1207 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1208 (__v8di)_mm512_broadcast_i64x2(__A), 1209 (__v8di)__O); 1210 } 1211 1212 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1213 _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) 1214 { 1215 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, 1216 (__v8di)_mm512_broadcast_i64x2(__A), 1217 (__v8di)_mm512_setzero_si512()); 1218 } 1219 1220 #define _mm512_extractf32x8_ps(A, imm) \ 1221 (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ 1222 (__v8sf)_mm256_undefined_ps(), \ 1223 (__mmask8)-1) 1224 1225 #define _mm512_mask_extractf32x8_ps(W, U, A, imm) \ 1226 (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ 1227 (__v8sf)(__m256)(W), \ 1228 (__mmask8)(U)) 1229 1230 #define _mm512_maskz_extractf32x8_ps(U, A, imm) \ 1231 (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ 1232 (__v8sf)_mm256_setzero_ps(), \ 1233 (__mmask8)(U)) 1234 1235 #define _mm512_extractf64x2_pd(A, imm) \ 1236 (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ 1237 (int)(imm), \ 1238 (__v2df)_mm_undefined_pd(), \ 1239 (__mmask8)-1) 1240 1241 #define _mm512_mask_extractf64x2_pd(W, U, A, imm) \ 1242 (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ 1243 (int)(imm), \ 1244 (__v2df)(__m128d)(W), \ 1245 (__mmask8)(U)) 1246 1247 #define _mm512_maskz_extractf64x2_pd(U, A, imm) \ 1248 (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ 1249 (int)(imm), \ 1250 (__v2df)_mm_setzero_pd(), \ 1251 (__mmask8)(U)) 1252 1253 #define _mm512_extracti32x8_epi32(A, imm) \ 1254 (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ 1255 (__v8si)_mm256_undefined_si256(), \ 1256 (__mmask8)-1) 1257 1258 #define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \ 1259 (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ 1260 (__v8si)(__m256i)(W), \ 1261 (__mmask8)(U)) 1262 1263 #define _mm512_maskz_extracti32x8_epi32(U, A, imm) \ 1264 (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ 1265 (__v8si)_mm256_setzero_si256(), \ 1266 (__mmask8)(U)) 1267 1268 #define _mm512_extracti64x2_epi64(A, imm) \ 1269 (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ 1270 (int)(imm), \ 1271 (__v2di)_mm_undefined_si128(), \ 1272 (__mmask8)-1) 1273 1274 #define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \ 1275 (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ 1276 (int)(imm), \ 1277 (__v2di)(__m128i)(W), \ 1278 (__mmask8)(U)) 1279 1280 #define _mm512_maskz_extracti64x2_epi64(U, A, imm) \ 1281 (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ 1282 (int)(imm), \ 1283 (__v2di)_mm_setzero_si128(), \ 1284 (__mmask8)(U)) 1285 1286 #define _mm512_insertf32x8(A, B, imm) \ 1287 (__m512)__builtin_ia32_insertf32x8((__v16sf)(__m512)(A), \ 1288 (__v8sf)(__m256)(B), (int)(imm)) 1289 1290 #define _mm512_mask_insertf32x8(W, U, A, B, imm) \ 1291 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 1292 (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \ 1293 (__v16sf)(__m512)(W)) 1294 1295 #define _mm512_maskz_insertf32x8(U, A, B, imm) \ 1296 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 1297 (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \ 1298 (__v16sf)_mm512_setzero_ps()) 1299 1300 #define _mm512_insertf64x2(A, B, imm) \ 1301 (__m512d)__builtin_ia32_insertf64x2_512((__v8df)(__m512d)(A), \ 1302 (__v2df)(__m128d)(B), (int)(imm)) 1303 1304 #define _mm512_mask_insertf64x2(W, U, A, B, imm) \ 1305 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1306 (__v8df)_mm512_insertf64x2((A), (B), (imm)), \ 1307 (__v8df)(__m512d)(W)) 1308 1309 #define _mm512_maskz_insertf64x2(U, A, B, imm) \ 1310 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1311 (__v8df)_mm512_insertf64x2((A), (B), (imm)), \ 1312 (__v8df)_mm512_setzero_pd()) 1313 1314 #define _mm512_inserti32x8(A, B, imm) \ 1315 (__m512i)__builtin_ia32_inserti32x8((__v16si)(__m512i)(A), \ 1316 (__v8si)(__m256i)(B), (int)(imm)) 1317 1318 #define _mm512_mask_inserti32x8(W, U, A, B, imm) \ 1319 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 1320 (__v16si)_mm512_inserti32x8((A), (B), (imm)), \ 1321 (__v16si)(__m512i)(W)) 1322 1323 #define _mm512_maskz_inserti32x8(U, A, B, imm) \ 1324 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 1325 (__v16si)_mm512_inserti32x8((A), (B), (imm)), \ 1326 (__v16si)_mm512_setzero_si512()) 1327 1328 #define _mm512_inserti64x2(A, B, imm) \ 1329 (__m512i)__builtin_ia32_inserti64x2_512((__v8di)(__m512i)(A), \ 1330 (__v2di)(__m128i)(B), (int)(imm)) 1331 1332 #define _mm512_mask_inserti64x2(W, U, A, B, imm) \ 1333 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 1334 (__v8di)_mm512_inserti64x2((A), (B), (imm)), \ 1335 (__v8di)(__m512i)(W)) 1336 1337 #define _mm512_maskz_inserti64x2(U, A, B, imm) \ 1338 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 1339 (__v8di)_mm512_inserti64x2((A), (B), (imm)), \ 1340 (__v8di)_mm512_setzero_si512()) 1341 1342 #define _mm512_mask_fpclass_ps_mask(U, A, imm) \ 1343 (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \ 1344 (int)(imm), (__mmask16)(U)) 1345 1346 #define _mm512_fpclass_ps_mask(A, imm) \ 1347 (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \ 1348 (int)(imm), (__mmask16)-1) 1349 1350 #define _mm512_mask_fpclass_pd_mask(U, A, imm) \ 1351 (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \ 1352 (__mmask8)(U)) 1353 1354 #define _mm512_fpclass_pd_mask(A, imm) \ 1355 (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \ 1356 (__mmask8)-1) 1357 1358 #define _mm_fpclass_sd_mask(A, imm) \ 1359 (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \ 1360 (__mmask8)-1) 1361 1362 #define _mm_mask_fpclass_sd_mask(U, A, imm) \ 1363 (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \ 1364 (__mmask8)(U)) 1365 1366 #define _mm_fpclass_ss_mask(A, imm) \ 1367 (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \ 1368 (__mmask8)-1) 1369 1370 #define _mm_mask_fpclass_ss_mask(U, A, imm) \ 1371 (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \ 1372 (__mmask8)(U)) 1373 1374 #undef __DEFAULT_FN_ATTRS512 1375 #undef __DEFAULT_FN_ATTRS 1376 1377 #endif 1378