1 /*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10 #ifndef __IMMINTRIN_H 11 #error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead." 12 #endif 13 14 #ifndef __AVX512VLDQINTRIN_H 15 #define __AVX512VLDQINTRIN_H 16 17 /* Define the default attributes for the functions in this file. */ 18 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(128))) 19 #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(256))) 20 21 static __inline__ __m256i __DEFAULT_FN_ATTRS256 22 _mm256_mullo_epi64 (__m256i __A, __m256i __B) { 23 return (__m256i) ((__v4du) __A * (__v4du) __B); 24 } 25 26 static __inline__ __m256i __DEFAULT_FN_ATTRS256 27 _mm256_mask_mullo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { 28 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 29 (__v4di)_mm256_mullo_epi64(__A, __B), 30 (__v4di)__W); 31 } 32 33 static __inline__ __m256i __DEFAULT_FN_ATTRS256 34 _mm256_maskz_mullo_epi64(__mmask8 __U, __m256i __A, __m256i __B) { 35 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 36 (__v4di)_mm256_mullo_epi64(__A, __B), 37 (__v4di)_mm256_setzero_si256()); 38 } 39 40 static __inline__ __m128i __DEFAULT_FN_ATTRS128 41 _mm_mullo_epi64 (__m128i __A, __m128i __B) { 42 return (__m128i) ((__v2du) __A * (__v2du) __B); 43 } 44 45 static __inline__ __m128i __DEFAULT_FN_ATTRS128 46 _mm_mask_mullo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { 47 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 48 (__v2di)_mm_mullo_epi64(__A, __B), 49 (__v2di)__W); 50 } 51 52 static __inline__ __m128i __DEFAULT_FN_ATTRS128 53 _mm_maskz_mullo_epi64(__mmask8 __U, __m128i __A, __m128i __B) { 54 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 55 (__v2di)_mm_mullo_epi64(__A, __B), 56 (__v2di)_mm_setzero_si128()); 57 } 58 59 static __inline__ __m256d __DEFAULT_FN_ATTRS256 60 _mm256_mask_andnot_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 61 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 62 (__v4df)_mm256_andnot_pd(__A, __B), 63 (__v4df)__W); 64 } 65 66 static __inline__ __m256d __DEFAULT_FN_ATTRS256 67 _mm256_maskz_andnot_pd(__mmask8 __U, __m256d __A, __m256d __B) { 68 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 69 (__v4df)_mm256_andnot_pd(__A, __B), 70 (__v4df)_mm256_setzero_pd()); 71 } 72 73 static __inline__ __m128d __DEFAULT_FN_ATTRS128 74 _mm_mask_andnot_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 75 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 76 (__v2df)_mm_andnot_pd(__A, __B), 77 (__v2df)__W); 78 } 79 80 static __inline__ __m128d __DEFAULT_FN_ATTRS128 81 _mm_maskz_andnot_pd(__mmask8 __U, __m128d __A, __m128d __B) { 82 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 83 (__v2df)_mm_andnot_pd(__A, __B), 84 (__v2df)_mm_setzero_pd()); 85 } 86 87 static __inline__ __m256 __DEFAULT_FN_ATTRS256 88 _mm256_mask_andnot_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 89 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 90 (__v8sf)_mm256_andnot_ps(__A, __B), 91 (__v8sf)__W); 92 } 93 94 static __inline__ __m256 __DEFAULT_FN_ATTRS256 95 _mm256_maskz_andnot_ps(__mmask8 __U, __m256 __A, __m256 __B) { 96 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 97 (__v8sf)_mm256_andnot_ps(__A, __B), 98 (__v8sf)_mm256_setzero_ps()); 99 } 100 101 static __inline__ __m128 __DEFAULT_FN_ATTRS128 102 _mm_mask_andnot_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 103 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 104 (__v4sf)_mm_andnot_ps(__A, __B), 105 (__v4sf)__W); 106 } 107 108 static __inline__ __m128 __DEFAULT_FN_ATTRS128 109 _mm_maskz_andnot_ps(__mmask8 __U, __m128 __A, __m128 __B) { 110 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 111 (__v4sf)_mm_andnot_ps(__A, __B), 112 (__v4sf)_mm_setzero_ps()); 113 } 114 115 static __inline__ __m256d __DEFAULT_FN_ATTRS256 116 _mm256_mask_and_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 117 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 118 (__v4df)_mm256_and_pd(__A, __B), 119 (__v4df)__W); 120 } 121 122 static __inline__ __m256d __DEFAULT_FN_ATTRS256 123 _mm256_maskz_and_pd(__mmask8 __U, __m256d __A, __m256d __B) { 124 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 125 (__v4df)_mm256_and_pd(__A, __B), 126 (__v4df)_mm256_setzero_pd()); 127 } 128 129 static __inline__ __m128d __DEFAULT_FN_ATTRS128 130 _mm_mask_and_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 131 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 132 (__v2df)_mm_and_pd(__A, __B), 133 (__v2df)__W); 134 } 135 136 static __inline__ __m128d __DEFAULT_FN_ATTRS128 137 _mm_maskz_and_pd(__mmask8 __U, __m128d __A, __m128d __B) { 138 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 139 (__v2df)_mm_and_pd(__A, __B), 140 (__v2df)_mm_setzero_pd()); 141 } 142 143 static __inline__ __m256 __DEFAULT_FN_ATTRS256 144 _mm256_mask_and_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 145 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 146 (__v8sf)_mm256_and_ps(__A, __B), 147 (__v8sf)__W); 148 } 149 150 static __inline__ __m256 __DEFAULT_FN_ATTRS256 151 _mm256_maskz_and_ps(__mmask8 __U, __m256 __A, __m256 __B) { 152 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 153 (__v8sf)_mm256_and_ps(__A, __B), 154 (__v8sf)_mm256_setzero_ps()); 155 } 156 157 static __inline__ __m128 __DEFAULT_FN_ATTRS128 158 _mm_mask_and_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 159 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 160 (__v4sf)_mm_and_ps(__A, __B), 161 (__v4sf)__W); 162 } 163 164 static __inline__ __m128 __DEFAULT_FN_ATTRS128 165 _mm_maskz_and_ps(__mmask8 __U, __m128 __A, __m128 __B) { 166 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 167 (__v4sf)_mm_and_ps(__A, __B), 168 (__v4sf)_mm_setzero_ps()); 169 } 170 171 static __inline__ __m256d __DEFAULT_FN_ATTRS256 172 _mm256_mask_xor_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 173 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 174 (__v4df)_mm256_xor_pd(__A, __B), 175 (__v4df)__W); 176 } 177 178 static __inline__ __m256d __DEFAULT_FN_ATTRS256 179 _mm256_maskz_xor_pd(__mmask8 __U, __m256d __A, __m256d __B) { 180 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 181 (__v4df)_mm256_xor_pd(__A, __B), 182 (__v4df)_mm256_setzero_pd()); 183 } 184 185 static __inline__ __m128d __DEFAULT_FN_ATTRS128 186 _mm_mask_xor_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 187 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 188 (__v2df)_mm_xor_pd(__A, __B), 189 (__v2df)__W); 190 } 191 192 static __inline__ __m128d __DEFAULT_FN_ATTRS128 193 _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) { 194 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 195 (__v2df)_mm_xor_pd(__A, __B), 196 (__v2df)_mm_setzero_pd()); 197 } 198 199 static __inline__ __m256 __DEFAULT_FN_ATTRS256 200 _mm256_mask_xor_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 201 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 202 (__v8sf)_mm256_xor_ps(__A, __B), 203 (__v8sf)__W); 204 } 205 206 static __inline__ __m256 __DEFAULT_FN_ATTRS256 207 _mm256_maskz_xor_ps(__mmask8 __U, __m256 __A, __m256 __B) { 208 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 209 (__v8sf)_mm256_xor_ps(__A, __B), 210 (__v8sf)_mm256_setzero_ps()); 211 } 212 213 static __inline__ __m128 __DEFAULT_FN_ATTRS128 214 _mm_mask_xor_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 215 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 216 (__v4sf)_mm_xor_ps(__A, __B), 217 (__v4sf)__W); 218 } 219 220 static __inline__ __m128 __DEFAULT_FN_ATTRS128 221 _mm_maskz_xor_ps(__mmask8 __U, __m128 __A, __m128 __B) { 222 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 223 (__v4sf)_mm_xor_ps(__A, __B), 224 (__v4sf)_mm_setzero_ps()); 225 } 226 227 static __inline__ __m256d __DEFAULT_FN_ATTRS256 228 _mm256_mask_or_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 229 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 230 (__v4df)_mm256_or_pd(__A, __B), 231 (__v4df)__W); 232 } 233 234 static __inline__ __m256d __DEFAULT_FN_ATTRS256 235 _mm256_maskz_or_pd(__mmask8 __U, __m256d __A, __m256d __B) { 236 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 237 (__v4df)_mm256_or_pd(__A, __B), 238 (__v4df)_mm256_setzero_pd()); 239 } 240 241 static __inline__ __m128d __DEFAULT_FN_ATTRS128 242 _mm_mask_or_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 243 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 244 (__v2df)_mm_or_pd(__A, __B), 245 (__v2df)__W); 246 } 247 248 static __inline__ __m128d __DEFAULT_FN_ATTRS128 249 _mm_maskz_or_pd(__mmask8 __U, __m128d __A, __m128d __B) { 250 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 251 (__v2df)_mm_or_pd(__A, __B), 252 (__v2df)_mm_setzero_pd()); 253 } 254 255 static __inline__ __m256 __DEFAULT_FN_ATTRS256 256 _mm256_mask_or_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 257 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 258 (__v8sf)_mm256_or_ps(__A, __B), 259 (__v8sf)__W); 260 } 261 262 static __inline__ __m256 __DEFAULT_FN_ATTRS256 263 _mm256_maskz_or_ps(__mmask8 __U, __m256 __A, __m256 __B) { 264 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 265 (__v8sf)_mm256_or_ps(__A, __B), 266 (__v8sf)_mm256_setzero_ps()); 267 } 268 269 static __inline__ __m128 __DEFAULT_FN_ATTRS128 270 _mm_mask_or_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 271 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 272 (__v4sf)_mm_or_ps(__A, __B), 273 (__v4sf)__W); 274 } 275 276 static __inline__ __m128 __DEFAULT_FN_ATTRS128 277 _mm_maskz_or_ps(__mmask8 __U, __m128 __A, __m128 __B) { 278 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 279 (__v4sf)_mm_or_ps(__A, __B), 280 (__v4sf)_mm_setzero_ps()); 281 } 282 283 static __inline__ __m128i __DEFAULT_FN_ATTRS128 284 _mm_cvtpd_epi64 (__m128d __A) { 285 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, 286 (__v2di) _mm_setzero_si128(), 287 (__mmask8) -1); 288 } 289 290 static __inline__ __m128i __DEFAULT_FN_ATTRS128 291 _mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) { 292 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, 293 (__v2di) __W, 294 (__mmask8) __U); 295 } 296 297 static __inline__ __m128i __DEFAULT_FN_ATTRS128 298 _mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) { 299 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, 300 (__v2di) _mm_setzero_si128(), 301 (__mmask8) __U); 302 } 303 304 static __inline__ __m256i __DEFAULT_FN_ATTRS256 305 _mm256_cvtpd_epi64 (__m256d __A) { 306 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, 307 (__v4di) _mm256_setzero_si256(), 308 (__mmask8) -1); 309 } 310 311 static __inline__ __m256i __DEFAULT_FN_ATTRS256 312 _mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) { 313 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, 314 (__v4di) __W, 315 (__mmask8) __U); 316 } 317 318 static __inline__ __m256i __DEFAULT_FN_ATTRS256 319 _mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) { 320 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, 321 (__v4di) _mm256_setzero_si256(), 322 (__mmask8) __U); 323 } 324 325 static __inline__ __m128i __DEFAULT_FN_ATTRS128 326 _mm_cvtpd_epu64 (__m128d __A) { 327 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, 328 (__v2di) _mm_setzero_si128(), 329 (__mmask8) -1); 330 } 331 332 static __inline__ __m128i __DEFAULT_FN_ATTRS128 333 _mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) { 334 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, 335 (__v2di) __W, 336 (__mmask8) __U); 337 } 338 339 static __inline__ __m128i __DEFAULT_FN_ATTRS128 340 _mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) { 341 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, 342 (__v2di) _mm_setzero_si128(), 343 (__mmask8) __U); 344 } 345 346 static __inline__ __m256i __DEFAULT_FN_ATTRS256 347 _mm256_cvtpd_epu64 (__m256d __A) { 348 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, 349 (__v4di) _mm256_setzero_si256(), 350 (__mmask8) -1); 351 } 352 353 static __inline__ __m256i __DEFAULT_FN_ATTRS256 354 _mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) { 355 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, 356 (__v4di) __W, 357 (__mmask8) __U); 358 } 359 360 static __inline__ __m256i __DEFAULT_FN_ATTRS256 361 _mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) { 362 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, 363 (__v4di) _mm256_setzero_si256(), 364 (__mmask8) __U); 365 } 366 367 static __inline__ __m128i __DEFAULT_FN_ATTRS128 368 _mm_cvtps_epi64 (__m128 __A) { 369 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, 370 (__v2di) _mm_setzero_si128(), 371 (__mmask8) -1); 372 } 373 374 static __inline__ __m128i __DEFAULT_FN_ATTRS128 375 _mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) { 376 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, 377 (__v2di) __W, 378 (__mmask8) __U); 379 } 380 381 static __inline__ __m128i __DEFAULT_FN_ATTRS128 382 _mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) { 383 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, 384 (__v2di) _mm_setzero_si128(), 385 (__mmask8) __U); 386 } 387 388 static __inline__ __m256i __DEFAULT_FN_ATTRS256 389 _mm256_cvtps_epi64 (__m128 __A) { 390 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, 391 (__v4di) _mm256_setzero_si256(), 392 (__mmask8) -1); 393 } 394 395 static __inline__ __m256i __DEFAULT_FN_ATTRS256 396 _mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) { 397 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, 398 (__v4di) __W, 399 (__mmask8) __U); 400 } 401 402 static __inline__ __m256i __DEFAULT_FN_ATTRS256 403 _mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) { 404 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, 405 (__v4di) _mm256_setzero_si256(), 406 (__mmask8) __U); 407 } 408 409 static __inline__ __m128i __DEFAULT_FN_ATTRS128 410 _mm_cvtps_epu64 (__m128 __A) { 411 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, 412 (__v2di) _mm_setzero_si128(), 413 (__mmask8) -1); 414 } 415 416 static __inline__ __m128i __DEFAULT_FN_ATTRS128 417 _mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) { 418 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, 419 (__v2di) __W, 420 (__mmask8) __U); 421 } 422 423 static __inline__ __m128i __DEFAULT_FN_ATTRS128 424 _mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) { 425 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, 426 (__v2di) _mm_setzero_si128(), 427 (__mmask8) __U); 428 } 429 430 static __inline__ __m256i __DEFAULT_FN_ATTRS256 431 _mm256_cvtps_epu64 (__m128 __A) { 432 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, 433 (__v4di) _mm256_setzero_si256(), 434 (__mmask8) -1); 435 } 436 437 static __inline__ __m256i __DEFAULT_FN_ATTRS256 438 _mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) { 439 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, 440 (__v4di) __W, 441 (__mmask8) __U); 442 } 443 444 static __inline__ __m256i __DEFAULT_FN_ATTRS256 445 _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) { 446 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, 447 (__v4di) _mm256_setzero_si256(), 448 (__mmask8) __U); 449 } 450 451 static __inline__ __m128d __DEFAULT_FN_ATTRS128 452 _mm_cvtepi64_pd (__m128i __A) { 453 return (__m128d)__builtin_convertvector((__v2di)__A, __v2df); 454 } 455 456 static __inline__ __m128d __DEFAULT_FN_ATTRS128 457 _mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) { 458 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 459 (__v2df)_mm_cvtepi64_pd(__A), 460 (__v2df)__W); 461 } 462 463 static __inline__ __m128d __DEFAULT_FN_ATTRS128 464 _mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) { 465 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 466 (__v2df)_mm_cvtepi64_pd(__A), 467 (__v2df)_mm_setzero_pd()); 468 } 469 470 static __inline__ __m256d __DEFAULT_FN_ATTRS256 471 _mm256_cvtepi64_pd (__m256i __A) { 472 return (__m256d)__builtin_convertvector((__v4di)__A, __v4df); 473 } 474 475 static __inline__ __m256d __DEFAULT_FN_ATTRS256 476 _mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) { 477 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 478 (__v4df)_mm256_cvtepi64_pd(__A), 479 (__v4df)__W); 480 } 481 482 static __inline__ __m256d __DEFAULT_FN_ATTRS256 483 _mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) { 484 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 485 (__v4df)_mm256_cvtepi64_pd(__A), 486 (__v4df)_mm256_setzero_pd()); 487 } 488 489 static __inline__ __m128 __DEFAULT_FN_ATTRS128 490 _mm_cvtepi64_ps (__m128i __A) { 491 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, 492 (__v4sf) _mm_setzero_ps(), 493 (__mmask8) -1); 494 } 495 496 static __inline__ __m128 __DEFAULT_FN_ATTRS128 497 _mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) { 498 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, 499 (__v4sf) __W, 500 (__mmask8) __U); 501 } 502 503 static __inline__ __m128 __DEFAULT_FN_ATTRS128 504 _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) { 505 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, 506 (__v4sf) _mm_setzero_ps(), 507 (__mmask8) __U); 508 } 509 510 static __inline__ __m128 __DEFAULT_FN_ATTRS256 511 _mm256_cvtepi64_ps (__m256i __A) { 512 return (__m128)__builtin_convertvector((__v4di)__A, __v4sf); 513 } 514 515 static __inline__ __m128 __DEFAULT_FN_ATTRS256 516 _mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) { 517 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 518 (__v4sf)_mm256_cvtepi64_ps(__A), 519 (__v4sf)__W); 520 } 521 522 static __inline__ __m128 __DEFAULT_FN_ATTRS256 523 _mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) { 524 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 525 (__v4sf)_mm256_cvtepi64_ps(__A), 526 (__v4sf)_mm_setzero_ps()); 527 } 528 529 static __inline__ __m128i __DEFAULT_FN_ATTRS128 530 _mm_cvttpd_epi64 (__m128d __A) { 531 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, 532 (__v2di) _mm_setzero_si128(), 533 (__mmask8) -1); 534 } 535 536 static __inline__ __m128i __DEFAULT_FN_ATTRS128 537 _mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) { 538 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, 539 (__v2di) __W, 540 (__mmask8) __U); 541 } 542 543 static __inline__ __m128i __DEFAULT_FN_ATTRS128 544 _mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) { 545 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, 546 (__v2di) _mm_setzero_si128(), 547 (__mmask8) __U); 548 } 549 550 static __inline__ __m256i __DEFAULT_FN_ATTRS256 551 _mm256_cvttpd_epi64 (__m256d __A) { 552 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, 553 (__v4di) _mm256_setzero_si256(), 554 (__mmask8) -1); 555 } 556 557 static __inline__ __m256i __DEFAULT_FN_ATTRS256 558 _mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) { 559 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, 560 (__v4di) __W, 561 (__mmask8) __U); 562 } 563 564 static __inline__ __m256i __DEFAULT_FN_ATTRS256 565 _mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) { 566 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, 567 (__v4di) _mm256_setzero_si256(), 568 (__mmask8) __U); 569 } 570 571 static __inline__ __m128i __DEFAULT_FN_ATTRS128 572 _mm_cvttpd_epu64 (__m128d __A) { 573 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, 574 (__v2di) _mm_setzero_si128(), 575 (__mmask8) -1); 576 } 577 578 static __inline__ __m128i __DEFAULT_FN_ATTRS128 579 _mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) { 580 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, 581 (__v2di) __W, 582 (__mmask8) __U); 583 } 584 585 static __inline__ __m128i __DEFAULT_FN_ATTRS128 586 _mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) { 587 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, 588 (__v2di) _mm_setzero_si128(), 589 (__mmask8) __U); 590 } 591 592 static __inline__ __m256i __DEFAULT_FN_ATTRS256 593 _mm256_cvttpd_epu64 (__m256d __A) { 594 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, 595 (__v4di) _mm256_setzero_si256(), 596 (__mmask8) -1); 597 } 598 599 static __inline__ __m256i __DEFAULT_FN_ATTRS256 600 _mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) { 601 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, 602 (__v4di) __W, 603 (__mmask8) __U); 604 } 605 606 static __inline__ __m256i __DEFAULT_FN_ATTRS256 607 _mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) { 608 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, 609 (__v4di) _mm256_setzero_si256(), 610 (__mmask8) __U); 611 } 612 613 static __inline__ __m128i __DEFAULT_FN_ATTRS128 614 _mm_cvttps_epi64 (__m128 __A) { 615 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, 616 (__v2di) _mm_setzero_si128(), 617 (__mmask8) -1); 618 } 619 620 static __inline__ __m128i __DEFAULT_FN_ATTRS128 621 _mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) { 622 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, 623 (__v2di) __W, 624 (__mmask8) __U); 625 } 626 627 static __inline__ __m128i __DEFAULT_FN_ATTRS128 628 _mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) { 629 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, 630 (__v2di) _mm_setzero_si128(), 631 (__mmask8) __U); 632 } 633 634 static __inline__ __m256i __DEFAULT_FN_ATTRS256 635 _mm256_cvttps_epi64 (__m128 __A) { 636 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, 637 (__v4di) _mm256_setzero_si256(), 638 (__mmask8) -1); 639 } 640 641 static __inline__ __m256i __DEFAULT_FN_ATTRS256 642 _mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) { 643 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, 644 (__v4di) __W, 645 (__mmask8) __U); 646 } 647 648 static __inline__ __m256i __DEFAULT_FN_ATTRS256 649 _mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) { 650 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, 651 (__v4di) _mm256_setzero_si256(), 652 (__mmask8) __U); 653 } 654 655 static __inline__ __m128i __DEFAULT_FN_ATTRS128 656 _mm_cvttps_epu64 (__m128 __A) { 657 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, 658 (__v2di) _mm_setzero_si128(), 659 (__mmask8) -1); 660 } 661 662 static __inline__ __m128i __DEFAULT_FN_ATTRS128 663 _mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) { 664 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, 665 (__v2di) __W, 666 (__mmask8) __U); 667 } 668 669 static __inline__ __m128i __DEFAULT_FN_ATTRS128 670 _mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) { 671 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, 672 (__v2di) _mm_setzero_si128(), 673 (__mmask8) __U); 674 } 675 676 static __inline__ __m256i __DEFAULT_FN_ATTRS256 677 _mm256_cvttps_epu64 (__m128 __A) { 678 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, 679 (__v4di) _mm256_setzero_si256(), 680 (__mmask8) -1); 681 } 682 683 static __inline__ __m256i __DEFAULT_FN_ATTRS256 684 _mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) { 685 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, 686 (__v4di) __W, 687 (__mmask8) __U); 688 } 689 690 static __inline__ __m256i __DEFAULT_FN_ATTRS256 691 _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) { 692 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, 693 (__v4di) _mm256_setzero_si256(), 694 (__mmask8) __U); 695 } 696 697 static __inline__ __m128d __DEFAULT_FN_ATTRS128 698 _mm_cvtepu64_pd (__m128i __A) { 699 return (__m128d)__builtin_convertvector((__v2du)__A, __v2df); 700 } 701 702 static __inline__ __m128d __DEFAULT_FN_ATTRS128 703 _mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) { 704 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 705 (__v2df)_mm_cvtepu64_pd(__A), 706 (__v2df)__W); 707 } 708 709 static __inline__ __m128d __DEFAULT_FN_ATTRS128 710 _mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) { 711 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 712 (__v2df)_mm_cvtepu64_pd(__A), 713 (__v2df)_mm_setzero_pd()); 714 } 715 716 static __inline__ __m256d __DEFAULT_FN_ATTRS256 717 _mm256_cvtepu64_pd (__m256i __A) { 718 return (__m256d)__builtin_convertvector((__v4du)__A, __v4df); 719 } 720 721 static __inline__ __m256d __DEFAULT_FN_ATTRS256 722 _mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) { 723 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 724 (__v4df)_mm256_cvtepu64_pd(__A), 725 (__v4df)__W); 726 } 727 728 static __inline__ __m256d __DEFAULT_FN_ATTRS256 729 _mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) { 730 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 731 (__v4df)_mm256_cvtepu64_pd(__A), 732 (__v4df)_mm256_setzero_pd()); 733 } 734 735 static __inline__ __m128 __DEFAULT_FN_ATTRS128 736 _mm_cvtepu64_ps (__m128i __A) { 737 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, 738 (__v4sf) _mm_setzero_ps(), 739 (__mmask8) -1); 740 } 741 742 static __inline__ __m128 __DEFAULT_FN_ATTRS128 743 _mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) { 744 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, 745 (__v4sf) __W, 746 (__mmask8) __U); 747 } 748 749 static __inline__ __m128 __DEFAULT_FN_ATTRS128 750 _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) { 751 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, 752 (__v4sf) _mm_setzero_ps(), 753 (__mmask8) __U); 754 } 755 756 static __inline__ __m128 __DEFAULT_FN_ATTRS256 757 _mm256_cvtepu64_ps (__m256i __A) { 758 return (__m128)__builtin_convertvector((__v4du)__A, __v4sf); 759 } 760 761 static __inline__ __m128 __DEFAULT_FN_ATTRS256 762 _mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) { 763 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 764 (__v4sf)_mm256_cvtepu64_ps(__A), 765 (__v4sf)__W); 766 } 767 768 static __inline__ __m128 __DEFAULT_FN_ATTRS256 769 _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) { 770 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 771 (__v4sf)_mm256_cvtepu64_ps(__A), 772 (__v4sf)_mm_setzero_ps()); 773 } 774 775 #define _mm_range_pd(A, B, C) \ 776 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ 777 (__v2df)(__m128d)(B), (int)(C), \ 778 (__v2df)_mm_setzero_pd(), \ 779 (__mmask8)-1) 780 781 #define _mm_mask_range_pd(W, U, A, B, C) \ 782 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ 783 (__v2df)(__m128d)(B), (int)(C), \ 784 (__v2df)(__m128d)(W), \ 785 (__mmask8)(U)) 786 787 #define _mm_maskz_range_pd(U, A, B, C) \ 788 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ 789 (__v2df)(__m128d)(B), (int)(C), \ 790 (__v2df)_mm_setzero_pd(), \ 791 (__mmask8)(U)) 792 793 #define _mm256_range_pd(A, B, C) \ 794 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ 795 (__v4df)(__m256d)(B), (int)(C), \ 796 (__v4df)_mm256_setzero_pd(), \ 797 (__mmask8)-1) 798 799 #define _mm256_mask_range_pd(W, U, A, B, C) \ 800 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ 801 (__v4df)(__m256d)(B), (int)(C), \ 802 (__v4df)(__m256d)(W), \ 803 (__mmask8)(U)) 804 805 #define _mm256_maskz_range_pd(U, A, B, C) \ 806 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ 807 (__v4df)(__m256d)(B), (int)(C), \ 808 (__v4df)_mm256_setzero_pd(), \ 809 (__mmask8)(U)) 810 811 #define _mm_range_ps(A, B, C) \ 812 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ 813 (__v4sf)(__m128)(B), (int)(C), \ 814 (__v4sf)_mm_setzero_ps(), \ 815 (__mmask8)-1) 816 817 #define _mm_mask_range_ps(W, U, A, B, C) \ 818 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ 819 (__v4sf)(__m128)(B), (int)(C), \ 820 (__v4sf)(__m128)(W), (__mmask8)(U)) 821 822 #define _mm_maskz_range_ps(U, A, B, C) \ 823 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ 824 (__v4sf)(__m128)(B), (int)(C), \ 825 (__v4sf)_mm_setzero_ps(), \ 826 (__mmask8)(U)) 827 828 #define _mm256_range_ps(A, B, C) \ 829 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ 830 (__v8sf)(__m256)(B), (int)(C), \ 831 (__v8sf)_mm256_setzero_ps(), \ 832 (__mmask8)-1) 833 834 #define _mm256_mask_range_ps(W, U, A, B, C) \ 835 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ 836 (__v8sf)(__m256)(B), (int)(C), \ 837 (__v8sf)(__m256)(W), (__mmask8)(U)) 838 839 #define _mm256_maskz_range_ps(U, A, B, C) \ 840 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ 841 (__v8sf)(__m256)(B), (int)(C), \ 842 (__v8sf)_mm256_setzero_ps(), \ 843 (__mmask8)(U)) 844 845 #define _mm_reduce_pd(A, B) \ 846 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ 847 (__v2df)_mm_setzero_pd(), \ 848 (__mmask8)-1) 849 850 #define _mm_mask_reduce_pd(W, U, A, B) \ 851 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ 852 (__v2df)(__m128d)(W), \ 853 (__mmask8)(U)) 854 855 #define _mm_maskz_reduce_pd(U, A, B) \ 856 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ 857 (__v2df)_mm_setzero_pd(), \ 858 (__mmask8)(U)) 859 860 #define _mm256_reduce_pd(A, B) \ 861 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ 862 (__v4df)_mm256_setzero_pd(), \ 863 (__mmask8)-1) 864 865 #define _mm256_mask_reduce_pd(W, U, A, B) \ 866 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ 867 (__v4df)(__m256d)(W), \ 868 (__mmask8)(U)) 869 870 #define _mm256_maskz_reduce_pd(U, A, B) \ 871 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ 872 (__v4df)_mm256_setzero_pd(), \ 873 (__mmask8)(U)) 874 875 #define _mm_reduce_ps(A, B) \ 876 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ 877 (__v4sf)_mm_setzero_ps(), \ 878 (__mmask8)-1) 879 880 #define _mm_mask_reduce_ps(W, U, A, B) \ 881 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ 882 (__v4sf)(__m128)(W), \ 883 (__mmask8)(U)) 884 885 #define _mm_maskz_reduce_ps(U, A, B) \ 886 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ 887 (__v4sf)_mm_setzero_ps(), \ 888 (__mmask8)(U)) 889 890 #define _mm256_reduce_ps(A, B) \ 891 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ 892 (__v8sf)_mm256_setzero_ps(), \ 893 (__mmask8)-1) 894 895 #define _mm256_mask_reduce_ps(W, U, A, B) \ 896 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ 897 (__v8sf)(__m256)(W), \ 898 (__mmask8)(U)) 899 900 #define _mm256_maskz_reduce_ps(U, A, B) \ 901 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ 902 (__v8sf)_mm256_setzero_ps(), \ 903 (__mmask8)(U)) 904 905 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 906 _mm_movepi32_mask (__m128i __A) 907 { 908 return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A); 909 } 910 911 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 912 _mm256_movepi32_mask (__m256i __A) 913 { 914 return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A); 915 } 916 917 static __inline__ __m128i __DEFAULT_FN_ATTRS128 918 _mm_movm_epi32 (__mmask8 __A) 919 { 920 return (__m128i) __builtin_ia32_cvtmask2d128 (__A); 921 } 922 923 static __inline__ __m256i __DEFAULT_FN_ATTRS256 924 _mm256_movm_epi32 (__mmask8 __A) 925 { 926 return (__m256i) __builtin_ia32_cvtmask2d256 (__A); 927 } 928 929 static __inline__ __m128i __DEFAULT_FN_ATTRS128 930 _mm_movm_epi64 (__mmask8 __A) 931 { 932 return (__m128i) __builtin_ia32_cvtmask2q128 (__A); 933 } 934 935 static __inline__ __m256i __DEFAULT_FN_ATTRS256 936 _mm256_movm_epi64 (__mmask8 __A) 937 { 938 return (__m256i) __builtin_ia32_cvtmask2q256 (__A); 939 } 940 941 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 942 _mm_movepi64_mask (__m128i __A) 943 { 944 return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A); 945 } 946 947 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 948 _mm256_movepi64_mask (__m256i __A) 949 { 950 return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A); 951 } 952 953 static __inline__ __m256 __DEFAULT_FN_ATTRS256 954 _mm256_broadcast_f32x2 (__m128 __A) 955 { 956 return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 957 0, 1, 0, 1, 0, 1, 0, 1); 958 } 959 960 static __inline__ __m256 __DEFAULT_FN_ATTRS256 961 _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A) 962 { 963 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, 964 (__v8sf)_mm256_broadcast_f32x2(__A), 965 (__v8sf)__O); 966 } 967 968 static __inline__ __m256 __DEFAULT_FN_ATTRS256 969 _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A) 970 { 971 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, 972 (__v8sf)_mm256_broadcast_f32x2(__A), 973 (__v8sf)_mm256_setzero_ps()); 974 } 975 976 static __inline__ __m256d __DEFAULT_FN_ATTRS256 977 _mm256_broadcast_f64x2(__m128d __A) 978 { 979 return (__m256d)__builtin_shufflevector((__v2df)__A, (__v2df)__A, 980 0, 1, 0, 1); 981 } 982 983 static __inline__ __m256d __DEFAULT_FN_ATTRS256 984 _mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A) 985 { 986 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M, 987 (__v4df)_mm256_broadcast_f64x2(__A), 988 (__v4df)__O); 989 } 990 991 static __inline__ __m256d __DEFAULT_FN_ATTRS256 992 _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A) 993 { 994 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M, 995 (__v4df)_mm256_broadcast_f64x2(__A), 996 (__v4df)_mm256_setzero_pd()); 997 } 998 999 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1000 _mm_broadcast_i32x2 (__m128i __A) 1001 { 1002 return (__m128i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 1003 0, 1, 0, 1); 1004 } 1005 1006 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1007 _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A) 1008 { 1009 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 1010 (__v4si)_mm_broadcast_i32x2(__A), 1011 (__v4si)__O); 1012 } 1013 1014 static __inline__ __m128i __DEFAULT_FN_ATTRS128 1015 _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) 1016 { 1017 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 1018 (__v4si)_mm_broadcast_i32x2(__A), 1019 (__v4si)_mm_setzero_si128()); 1020 } 1021 1022 static __inline__ __m256i __DEFAULT_FN_ATTRS256 1023 _mm256_broadcast_i32x2 (__m128i __A) 1024 { 1025 return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 1026 0, 1, 0, 1, 0, 1, 0, 1); 1027 } 1028 1029 static __inline__ __m256i __DEFAULT_FN_ATTRS256 1030 _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A) 1031 { 1032 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 1033 (__v8si)_mm256_broadcast_i32x2(__A), 1034 (__v8si)__O); 1035 } 1036 1037 static __inline__ __m256i __DEFAULT_FN_ATTRS256 1038 _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) 1039 { 1040 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 1041 (__v8si)_mm256_broadcast_i32x2(__A), 1042 (__v8si)_mm256_setzero_si256()); 1043 } 1044 1045 static __inline__ __m256i __DEFAULT_FN_ATTRS256 1046 _mm256_broadcast_i64x2(__m128i __A) 1047 { 1048 return (__m256i)__builtin_shufflevector((__v2di)__A, (__v2di)__A, 1049 0, 1, 0, 1); 1050 } 1051 1052 static __inline__ __m256i __DEFAULT_FN_ATTRS256 1053 _mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A) 1054 { 1055 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 1056 (__v4di)_mm256_broadcast_i64x2(__A), 1057 (__v4di)__O); 1058 } 1059 1060 static __inline__ __m256i __DEFAULT_FN_ATTRS256 1061 _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) 1062 { 1063 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 1064 (__v4di)_mm256_broadcast_i64x2(__A), 1065 (__v4di)_mm256_setzero_si256()); 1066 } 1067 1068 #define _mm256_extractf64x2_pd(A, imm) \ 1069 (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ 1070 (int)(imm), \ 1071 (__v2df)_mm_undefined_pd(), \ 1072 (__mmask8)-1) 1073 1074 #define _mm256_mask_extractf64x2_pd(W, U, A, imm) \ 1075 (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ 1076 (int)(imm), \ 1077 (__v2df)(__m128d)(W), \ 1078 (__mmask8)(U)) 1079 1080 #define _mm256_maskz_extractf64x2_pd(U, A, imm) \ 1081 (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ 1082 (int)(imm), \ 1083 (__v2df)_mm_setzero_pd(), \ 1084 (__mmask8)(U)) 1085 1086 #define _mm256_extracti64x2_epi64(A, imm) \ 1087 (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ 1088 (int)(imm), \ 1089 (__v2di)_mm_undefined_si128(), \ 1090 (__mmask8)-1) 1091 1092 #define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \ 1093 (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ 1094 (int)(imm), \ 1095 (__v2di)(__m128i)(W), \ 1096 (__mmask8)(U)) 1097 1098 #define _mm256_maskz_extracti64x2_epi64(U, A, imm) \ 1099 (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ 1100 (int)(imm), \ 1101 (__v2di)_mm_setzero_si128(), \ 1102 (__mmask8)(U)) 1103 1104 #define _mm256_insertf64x2(A, B, imm) \ 1105 (__m256d)__builtin_ia32_insertf64x2_256((__v4df)(__m256d)(A), \ 1106 (__v2df)(__m128d)(B), (int)(imm)) 1107 1108 #define _mm256_mask_insertf64x2(W, U, A, B, imm) \ 1109 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 1110 (__v4df)_mm256_insertf64x2((A), (B), (imm)), \ 1111 (__v4df)(__m256d)(W)) 1112 1113 #define _mm256_maskz_insertf64x2(U, A, B, imm) \ 1114 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 1115 (__v4df)_mm256_insertf64x2((A), (B), (imm)), \ 1116 (__v4df)_mm256_setzero_pd()) 1117 1118 #define _mm256_inserti64x2(A, B, imm) \ 1119 (__m256i)__builtin_ia32_inserti64x2_256((__v4di)(__m256i)(A), \ 1120 (__v2di)(__m128i)(B), (int)(imm)) 1121 1122 #define _mm256_mask_inserti64x2(W, U, A, B, imm) \ 1123 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 1124 (__v4di)_mm256_inserti64x2((A), (B), (imm)), \ 1125 (__v4di)(__m256i)(W)) 1126 1127 #define _mm256_maskz_inserti64x2(U, A, B, imm) \ 1128 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 1129 (__v4di)_mm256_inserti64x2((A), (B), (imm)), \ 1130 (__v4di)_mm256_setzero_si256()) 1131 1132 #define _mm_mask_fpclass_pd_mask(U, A, imm) \ 1133 (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \ 1134 (__mmask8)(U)) 1135 1136 #define _mm_fpclass_pd_mask(A, imm) \ 1137 (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \ 1138 (__mmask8)-1) 1139 1140 #define _mm256_mask_fpclass_pd_mask(U, A, imm) \ 1141 (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \ 1142 (__mmask8)(U)) 1143 1144 #define _mm256_fpclass_pd_mask(A, imm) \ 1145 (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \ 1146 (__mmask8)-1) 1147 1148 #define _mm_mask_fpclass_ps_mask(U, A, imm) \ 1149 (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \ 1150 (__mmask8)(U)) 1151 1152 #define _mm_fpclass_ps_mask(A, imm) \ 1153 (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \ 1154 (__mmask8)-1) 1155 1156 #define _mm256_mask_fpclass_ps_mask(U, A, imm) \ 1157 (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \ 1158 (__mmask8)(U)) 1159 1160 #define _mm256_fpclass_ps_mask(A, imm) \ 1161 (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \ 1162 (__mmask8)-1) 1163 1164 #undef __DEFAULT_FN_ATTRS128 1165 #undef __DEFAULT_FN_ATTRS256 1166 1167 #endif 1168