1 /*===------------- avx512vlvbmi2intrin.h - VBMI2 intrinsics -----------------=== 2 * 3 * 4 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 * See https://llvm.org/LICENSE.txt for license information. 6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 * 8 *===-----------------------------------------------------------------------=== 9 */ 10 #ifndef __IMMINTRIN_H 11 #error "Never use <avx512vlvbmi2intrin.h> directly; include <immintrin.h> instead." 12 #endif 13 14 #ifndef __AVX512VLVBMI2INTRIN_H 15 #define __AVX512VLVBMI2INTRIN_H 16 17 /* Define the default attributes for the functions in this file. */ 18 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"), __min_vector_width__(128))) 19 #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"), __min_vector_width__(256))) 20 21 static __inline__ __m128i __DEFAULT_FN_ATTRS128 22 _mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) 23 { 24 return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D, 25 (__v8hi) __S, 26 __U); 27 } 28 29 static __inline__ __m128i __DEFAULT_FN_ATTRS128 30 _mm_maskz_compress_epi16(__mmask8 __U, __m128i __D) 31 { 32 return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D, 33 (__v8hi) _mm_setzero_si128(), 34 __U); 35 } 36 37 static __inline__ __m128i __DEFAULT_FN_ATTRS128 38 _mm_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D) 39 { 40 return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D, 41 (__v16qi) __S, 42 __U); 43 } 44 45 static __inline__ __m128i __DEFAULT_FN_ATTRS128 46 _mm_maskz_compress_epi8(__mmask16 __U, __m128i __D) 47 { 48 return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D, 49 (__v16qi) _mm_setzero_si128(), 50 __U); 51 } 52 53 static __inline__ void __DEFAULT_FN_ATTRS128 54 _mm_mask_compressstoreu_epi16(void *__P, __mmask8 __U, __m128i __D) 55 { 56 __builtin_ia32_compressstorehi128_mask ((__v8hi *) __P, (__v8hi) __D, 57 __U); 58 } 59 60 static __inline__ void __DEFAULT_FN_ATTRS128 61 _mm_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D) 62 { 63 __builtin_ia32_compressstoreqi128_mask ((__v16qi *) __P, (__v16qi) __D, 64 __U); 65 } 66 67 static __inline__ __m128i __DEFAULT_FN_ATTRS128 68 _mm_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D) 69 { 70 return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D, 71 (__v8hi) __S, 72 __U); 73 } 74 75 static __inline__ __m128i __DEFAULT_FN_ATTRS128 76 _mm_maskz_expand_epi16(__mmask8 __U, __m128i __D) 77 { 78 return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D, 79 (__v8hi) _mm_setzero_si128(), 80 __U); 81 } 82 83 static __inline__ __m128i __DEFAULT_FN_ATTRS128 84 _mm_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D) 85 { 86 return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D, 87 (__v16qi) __S, 88 __U); 89 } 90 91 static __inline__ __m128i __DEFAULT_FN_ATTRS128 92 _mm_maskz_expand_epi8(__mmask16 __U, __m128i __D) 93 { 94 return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D, 95 (__v16qi) _mm_setzero_si128(), 96 __U); 97 } 98 99 static __inline__ __m128i __DEFAULT_FN_ATTRS128 100 _mm_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const *__P) 101 { 102 return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P, 103 (__v8hi) __S, 104 __U); 105 } 106 107 static __inline__ __m128i __DEFAULT_FN_ATTRS128 108 _mm_maskz_expandloadu_epi16(__mmask8 __U, void const *__P) 109 { 110 return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P, 111 (__v8hi) _mm_setzero_si128(), 112 __U); 113 } 114 115 static __inline__ __m128i __DEFAULT_FN_ATTRS128 116 _mm_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const *__P) 117 { 118 return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P, 119 (__v16qi) __S, 120 __U); 121 } 122 123 static __inline__ __m128i __DEFAULT_FN_ATTRS128 124 _mm_maskz_expandloadu_epi8(__mmask16 __U, void const *__P) 125 { 126 return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P, 127 (__v16qi) _mm_setzero_si128(), 128 __U); 129 } 130 131 static __inline__ __m256i __DEFAULT_FN_ATTRS256 132 _mm256_mask_compress_epi16(__m256i __S, __mmask16 __U, __m256i __D) 133 { 134 return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D, 135 (__v16hi) __S, 136 __U); 137 } 138 139 static __inline__ __m256i __DEFAULT_FN_ATTRS256 140 _mm256_maskz_compress_epi16(__mmask16 __U, __m256i __D) 141 { 142 return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D, 143 (__v16hi) _mm256_setzero_si256(), 144 __U); 145 } 146 147 static __inline__ __m256i __DEFAULT_FN_ATTRS256 148 _mm256_mask_compress_epi8(__m256i __S, __mmask32 __U, __m256i __D) 149 { 150 return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D, 151 (__v32qi) __S, 152 __U); 153 } 154 155 static __inline__ __m256i __DEFAULT_FN_ATTRS256 156 _mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D) 157 { 158 return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D, 159 (__v32qi) _mm256_setzero_si256(), 160 __U); 161 } 162 163 static __inline__ void __DEFAULT_FN_ATTRS256 164 _mm256_mask_compressstoreu_epi16(void *__P, __mmask16 __U, __m256i __D) 165 { 166 __builtin_ia32_compressstorehi256_mask ((__v16hi *) __P, (__v16hi) __D, 167 __U); 168 } 169 170 static __inline__ void __DEFAULT_FN_ATTRS256 171 _mm256_mask_compressstoreu_epi8(void *__P, __mmask32 __U, __m256i __D) 172 { 173 __builtin_ia32_compressstoreqi256_mask ((__v32qi *) __P, (__v32qi) __D, 174 __U); 175 } 176 177 static __inline__ __m256i __DEFAULT_FN_ATTRS256 178 _mm256_mask_expand_epi16(__m256i __S, __mmask16 __U, __m256i __D) 179 { 180 return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D, 181 (__v16hi) __S, 182 __U); 183 } 184 185 static __inline__ __m256i __DEFAULT_FN_ATTRS256 186 _mm256_maskz_expand_epi16(__mmask16 __U, __m256i __D) 187 { 188 return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D, 189 (__v16hi) _mm256_setzero_si256(), 190 __U); 191 } 192 193 static __inline__ __m256i __DEFAULT_FN_ATTRS256 194 _mm256_mask_expand_epi8(__m256i __S, __mmask32 __U, __m256i __D) 195 { 196 return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D, 197 (__v32qi) __S, 198 __U); 199 } 200 201 static __inline__ __m256i __DEFAULT_FN_ATTRS256 202 _mm256_maskz_expand_epi8(__mmask32 __U, __m256i __D) 203 { 204 return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D, 205 (__v32qi) _mm256_setzero_si256(), 206 __U); 207 } 208 209 static __inline__ __m256i __DEFAULT_FN_ATTRS256 210 _mm256_mask_expandloadu_epi16(__m256i __S, __mmask16 __U, void const *__P) 211 { 212 return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P, 213 (__v16hi) __S, 214 __U); 215 } 216 217 static __inline__ __m256i __DEFAULT_FN_ATTRS256 218 _mm256_maskz_expandloadu_epi16(__mmask16 __U, void const *__P) 219 { 220 return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P, 221 (__v16hi) _mm256_setzero_si256(), 222 __U); 223 } 224 225 static __inline__ __m256i __DEFAULT_FN_ATTRS256 226 _mm256_mask_expandloadu_epi8(__m256i __S, __mmask32 __U, void const *__P) 227 { 228 return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P, 229 (__v32qi) __S, 230 __U); 231 } 232 233 static __inline__ __m256i __DEFAULT_FN_ATTRS256 234 _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) 235 { 236 return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P, 237 (__v32qi) _mm256_setzero_si256(), 238 __U); 239 } 240 241 #define _mm256_shldi_epi64(A, B, I) \ 242 ((__m256i)__builtin_ia32_vpshldq256((__v4di)(__m256i)(A), \ 243 (__v4di)(__m256i)(B), (int)(I))) 244 245 #define _mm256_mask_shldi_epi64(S, U, A, B, I) \ 246 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 247 (__v4di)_mm256_shldi_epi64((A), (B), (I)), \ 248 (__v4di)(__m256i)(S))) 249 250 #define _mm256_maskz_shldi_epi64(U, A, B, I) \ 251 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 252 (__v4di)_mm256_shldi_epi64((A), (B), (I)), \ 253 (__v4di)_mm256_setzero_si256())) 254 255 #define _mm_shldi_epi64(A, B, I) \ 256 ((__m128i)__builtin_ia32_vpshldq128((__v2di)(__m128i)(A), \ 257 (__v2di)(__m128i)(B), (int)(I))) 258 259 #define _mm_mask_shldi_epi64(S, U, A, B, I) \ 260 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 261 (__v2di)_mm_shldi_epi64((A), (B), (I)), \ 262 (__v2di)(__m128i)(S))) 263 264 #define _mm_maskz_shldi_epi64(U, A, B, I) \ 265 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 266 (__v2di)_mm_shldi_epi64((A), (B), (I)), \ 267 (__v2di)_mm_setzero_si128())) 268 269 #define _mm256_shldi_epi32(A, B, I) \ 270 ((__m256i)__builtin_ia32_vpshldd256((__v8si)(__m256i)(A), \ 271 (__v8si)(__m256i)(B), (int)(I))) 272 273 #define _mm256_mask_shldi_epi32(S, U, A, B, I) \ 274 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 275 (__v8si)_mm256_shldi_epi32((A), (B), (I)), \ 276 (__v8si)(__m256i)(S))) 277 278 #define _mm256_maskz_shldi_epi32(U, A, B, I) \ 279 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 280 (__v8si)_mm256_shldi_epi32((A), (B), (I)), \ 281 (__v8si)_mm256_setzero_si256())) 282 283 #define _mm_shldi_epi32(A, B, I) \ 284 ((__m128i)__builtin_ia32_vpshldd128((__v4si)(__m128i)(A), \ 285 (__v4si)(__m128i)(B), (int)(I))) 286 287 #define _mm_mask_shldi_epi32(S, U, A, B, I) \ 288 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 289 (__v4si)_mm_shldi_epi32((A), (B), (I)), \ 290 (__v4si)(__m128i)(S))) 291 292 #define _mm_maskz_shldi_epi32(U, A, B, I) \ 293 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 294 (__v4si)_mm_shldi_epi32((A), (B), (I)), \ 295 (__v4si)_mm_setzero_si128())) 296 297 #define _mm256_shldi_epi16(A, B, I) \ 298 ((__m256i)__builtin_ia32_vpshldw256((__v16hi)(__m256i)(A), \ 299 (__v16hi)(__m256i)(B), (int)(I))) 300 301 #define _mm256_mask_shldi_epi16(S, U, A, B, I) \ 302 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 303 (__v16hi)_mm256_shldi_epi16((A), (B), (I)), \ 304 (__v16hi)(__m256i)(S))) 305 306 #define _mm256_maskz_shldi_epi16(U, A, B, I) \ 307 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 308 (__v16hi)_mm256_shldi_epi16((A), (B), (I)), \ 309 (__v16hi)_mm256_setzero_si256())) 310 311 #define _mm_shldi_epi16(A, B, I) \ 312 ((__m128i)__builtin_ia32_vpshldw128((__v8hi)(__m128i)(A), \ 313 (__v8hi)(__m128i)(B), (int)(I))) 314 315 #define _mm_mask_shldi_epi16(S, U, A, B, I) \ 316 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 317 (__v8hi)_mm_shldi_epi16((A), (B), (I)), \ 318 (__v8hi)(__m128i)(S))) 319 320 #define _mm_maskz_shldi_epi16(U, A, B, I) \ 321 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 322 (__v8hi)_mm_shldi_epi16((A), (B), (I)), \ 323 (__v8hi)_mm_setzero_si128())) 324 325 #define _mm256_shrdi_epi64(A, B, I) \ 326 ((__m256i)__builtin_ia32_vpshrdq256((__v4di)(__m256i)(A), \ 327 (__v4di)(__m256i)(B), (int)(I))) 328 329 #define _mm256_mask_shrdi_epi64(S, U, A, B, I) \ 330 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 331 (__v4di)_mm256_shrdi_epi64((A), (B), (I)), \ 332 (__v4di)(__m256i)(S))) 333 334 #define _mm256_maskz_shrdi_epi64(U, A, B, I) \ 335 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 336 (__v4di)_mm256_shrdi_epi64((A), (B), (I)), \ 337 (__v4di)_mm256_setzero_si256())) 338 339 #define _mm_shrdi_epi64(A, B, I) \ 340 ((__m128i)__builtin_ia32_vpshrdq128((__v2di)(__m128i)(A), \ 341 (__v2di)(__m128i)(B), (int)(I))) 342 343 #define _mm_mask_shrdi_epi64(S, U, A, B, I) \ 344 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 345 (__v2di)_mm_shrdi_epi64((A), (B), (I)), \ 346 (__v2di)(__m128i)(S))) 347 348 #define _mm_maskz_shrdi_epi64(U, A, B, I) \ 349 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 350 (__v2di)_mm_shrdi_epi64((A), (B), (I)), \ 351 (__v2di)_mm_setzero_si128())) 352 353 #define _mm256_shrdi_epi32(A, B, I) \ 354 ((__m256i)__builtin_ia32_vpshrdd256((__v8si)(__m256i)(A), \ 355 (__v8si)(__m256i)(B), (int)(I))) 356 357 #define _mm256_mask_shrdi_epi32(S, U, A, B, I) \ 358 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 359 (__v8si)_mm256_shrdi_epi32((A), (B), (I)), \ 360 (__v8si)(__m256i)(S))) 361 362 #define _mm256_maskz_shrdi_epi32(U, A, B, I) \ 363 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 364 (__v8si)_mm256_shrdi_epi32((A), (B), (I)), \ 365 (__v8si)_mm256_setzero_si256())) 366 367 #define _mm_shrdi_epi32(A, B, I) \ 368 ((__m128i)__builtin_ia32_vpshrdd128((__v4si)(__m128i)(A), \ 369 (__v4si)(__m128i)(B), (int)(I))) 370 371 #define _mm_mask_shrdi_epi32(S, U, A, B, I) \ 372 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 373 (__v4si)_mm_shrdi_epi32((A), (B), (I)), \ 374 (__v4si)(__m128i)(S))) 375 376 #define _mm_maskz_shrdi_epi32(U, A, B, I) \ 377 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 378 (__v4si)_mm_shrdi_epi32((A), (B), (I)), \ 379 (__v4si)_mm_setzero_si128())) 380 381 #define _mm256_shrdi_epi16(A, B, I) \ 382 ((__m256i)__builtin_ia32_vpshrdw256((__v16hi)(__m256i)(A), \ 383 (__v16hi)(__m256i)(B), (int)(I))) 384 385 #define _mm256_mask_shrdi_epi16(S, U, A, B, I) \ 386 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 387 (__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \ 388 (__v16hi)(__m256i)(S))) 389 390 #define _mm256_maskz_shrdi_epi16(U, A, B, I) \ 391 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 392 (__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \ 393 (__v16hi)_mm256_setzero_si256())) 394 395 #define _mm_shrdi_epi16(A, B, I) \ 396 ((__m128i)__builtin_ia32_vpshrdw128((__v8hi)(__m128i)(A), \ 397 (__v8hi)(__m128i)(B), (int)(I))) 398 399 #define _mm_mask_shrdi_epi16(S, U, A, B, I) \ 400 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 401 (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \ 402 (__v8hi)(__m128i)(S))) 403 404 #define _mm_maskz_shrdi_epi16(U, A, B, I) \ 405 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 406 (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \ 407 (__v8hi)_mm_setzero_si128())) 408 409 static __inline__ __m256i __DEFAULT_FN_ATTRS256 410 _mm256_shldv_epi64(__m256i __A, __m256i __B, __m256i __C) 411 { 412 return (__m256i)__builtin_ia32_vpshldvq256((__v4di)__A, (__v4di)__B, 413 (__v4di)__C); 414 } 415 416 static __inline__ __m256i __DEFAULT_FN_ATTRS256 417 _mm256_mask_shldv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) 418 { 419 return (__m256i)__builtin_ia32_selectq_256(__U, 420 (__v4di)_mm256_shldv_epi64(__A, __B, __C), 421 (__v4di)__A); 422 } 423 424 static __inline__ __m256i __DEFAULT_FN_ATTRS256 425 _mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) 426 { 427 return (__m256i)__builtin_ia32_selectq_256(__U, 428 (__v4di)_mm256_shldv_epi64(__A, __B, __C), 429 (__v4di)_mm256_setzero_si256()); 430 } 431 432 static __inline__ __m128i __DEFAULT_FN_ATTRS128 433 _mm_shldv_epi64(__m128i __A, __m128i __B, __m128i __C) 434 { 435 return (__m128i)__builtin_ia32_vpshldvq128((__v2di)__A, (__v2di)__B, 436 (__v2di)__C); 437 } 438 439 static __inline__ __m128i __DEFAULT_FN_ATTRS128 440 _mm_mask_shldv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) 441 { 442 return (__m128i)__builtin_ia32_selectq_128(__U, 443 (__v2di)_mm_shldv_epi64(__A, __B, __C), 444 (__v2di)__A); 445 } 446 447 static __inline__ __m128i __DEFAULT_FN_ATTRS128 448 _mm_maskz_shldv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) 449 { 450 return (__m128i)__builtin_ia32_selectq_128(__U, 451 (__v2di)_mm_shldv_epi64(__A, __B, __C), 452 (__v2di)_mm_setzero_si128()); 453 } 454 455 static __inline__ __m256i __DEFAULT_FN_ATTRS256 456 _mm256_shldv_epi32(__m256i __A, __m256i __B, __m256i __C) 457 { 458 return (__m256i)__builtin_ia32_vpshldvd256((__v8si)__A, (__v8si)__B, 459 (__v8si)__C); 460 } 461 462 static __inline__ __m256i __DEFAULT_FN_ATTRS256 463 _mm256_mask_shldv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) 464 { 465 return (__m256i)__builtin_ia32_selectd_256(__U, 466 (__v8si)_mm256_shldv_epi32(__A, __B, __C), 467 (__v8si)__A); 468 } 469 470 static __inline__ __m256i __DEFAULT_FN_ATTRS256 471 _mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) 472 { 473 return (__m256i)__builtin_ia32_selectd_256(__U, 474 (__v8si)_mm256_shldv_epi32(__A, __B, __C), 475 (__v8si)_mm256_setzero_si256()); 476 } 477 478 static __inline__ __m128i __DEFAULT_FN_ATTRS128 479 _mm_shldv_epi32(__m128i __A, __m128i __B, __m128i __C) 480 { 481 return (__m128i)__builtin_ia32_vpshldvd128((__v4si)__A, (__v4si)__B, 482 (__v4si)__C); 483 } 484 485 static __inline__ __m128i __DEFAULT_FN_ATTRS128 486 _mm_mask_shldv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) 487 { 488 return (__m128i)__builtin_ia32_selectd_128(__U, 489 (__v4si)_mm_shldv_epi32(__A, __B, __C), 490 (__v4si)__A); 491 } 492 493 static __inline__ __m128i __DEFAULT_FN_ATTRS128 494 _mm_maskz_shldv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) 495 { 496 return (__m128i)__builtin_ia32_selectd_128(__U, 497 (__v4si)_mm_shldv_epi32(__A, __B, __C), 498 (__v4si)_mm_setzero_si128()); 499 } 500 501 static __inline__ __m256i __DEFAULT_FN_ATTRS256 502 _mm256_shldv_epi16(__m256i __A, __m256i __B, __m256i __C) 503 { 504 return (__m256i)__builtin_ia32_vpshldvw256((__v16hi)__A, (__v16hi)__B, 505 (__v16hi)__C); 506 } 507 508 static __inline__ __m256i __DEFAULT_FN_ATTRS256 509 _mm256_mask_shldv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C) 510 { 511 return (__m256i)__builtin_ia32_selectw_256(__U, 512 (__v16hi)_mm256_shldv_epi16(__A, __B, __C), 513 (__v16hi)__A); 514 } 515 516 static __inline__ __m256i __DEFAULT_FN_ATTRS256 517 _mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C) 518 { 519 return (__m256i)__builtin_ia32_selectw_256(__U, 520 (__v16hi)_mm256_shldv_epi16(__A, __B, __C), 521 (__v16hi)_mm256_setzero_si256()); 522 } 523 524 static __inline__ __m128i __DEFAULT_FN_ATTRS128 525 _mm_shldv_epi16(__m128i __A, __m128i __B, __m128i __C) 526 { 527 return (__m128i)__builtin_ia32_vpshldvw128((__v8hi)__A, (__v8hi)__B, 528 (__v8hi)__C); 529 } 530 531 static __inline__ __m128i __DEFAULT_FN_ATTRS128 532 _mm_mask_shldv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) 533 { 534 return (__m128i)__builtin_ia32_selectw_128(__U, 535 (__v8hi)_mm_shldv_epi16(__A, __B, __C), 536 (__v8hi)__A); 537 } 538 539 static __inline__ __m128i __DEFAULT_FN_ATTRS128 540 _mm_maskz_shldv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) 541 { 542 return (__m128i)__builtin_ia32_selectw_128(__U, 543 (__v8hi)_mm_shldv_epi16(__A, __B, __C), 544 (__v8hi)_mm_setzero_si128()); 545 } 546 547 static __inline__ __m256i __DEFAULT_FN_ATTRS256 548 _mm256_shrdv_epi64(__m256i __A, __m256i __B, __m256i __C) 549 { 550 return (__m256i)__builtin_ia32_vpshrdvq256((__v4di)__A, (__v4di)__B, 551 (__v4di)__C); 552 } 553 554 static __inline__ __m256i __DEFAULT_FN_ATTRS256 555 _mm256_mask_shrdv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) 556 { 557 return (__m256i)__builtin_ia32_selectq_256(__U, 558 (__v4di)_mm256_shrdv_epi64(__A, __B, __C), 559 (__v4di)__A); 560 } 561 562 static __inline__ __m256i __DEFAULT_FN_ATTRS256 563 _mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) 564 { 565 return (__m256i)__builtin_ia32_selectq_256(__U, 566 (__v4di)_mm256_shrdv_epi64(__A, __B, __C), 567 (__v4di)_mm256_setzero_si256()); 568 } 569 570 static __inline__ __m128i __DEFAULT_FN_ATTRS128 571 _mm_shrdv_epi64(__m128i __A, __m128i __B, __m128i __C) 572 { 573 return (__m128i)__builtin_ia32_vpshrdvq128((__v2di)__A, (__v2di)__B, 574 (__v2di)__C); 575 } 576 577 static __inline__ __m128i __DEFAULT_FN_ATTRS128 578 _mm_mask_shrdv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) 579 { 580 return (__m128i)__builtin_ia32_selectq_128(__U, 581 (__v2di)_mm_shrdv_epi64(__A, __B, __C), 582 (__v2di)__A); 583 } 584 585 static __inline__ __m128i __DEFAULT_FN_ATTRS128 586 _mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) 587 { 588 return (__m128i)__builtin_ia32_selectq_128(__U, 589 (__v2di)_mm_shrdv_epi64(__A, __B, __C), 590 (__v2di)_mm_setzero_si128()); 591 } 592 593 static __inline__ __m256i __DEFAULT_FN_ATTRS256 594 _mm256_shrdv_epi32(__m256i __A, __m256i __B, __m256i __C) 595 { 596 return (__m256i)__builtin_ia32_vpshrdvd256((__v8si)__A, (__v8si)__B, 597 (__v8si)__C); 598 } 599 600 static __inline__ __m256i __DEFAULT_FN_ATTRS256 601 _mm256_mask_shrdv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) 602 { 603 return (__m256i)__builtin_ia32_selectd_256(__U, 604 (__v8si)_mm256_shrdv_epi32(__A, __B, __C), 605 (__v8si)__A); 606 } 607 608 static __inline__ __m256i __DEFAULT_FN_ATTRS256 609 _mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) 610 { 611 return (__m256i)__builtin_ia32_selectd_256(__U, 612 (__v8si)_mm256_shrdv_epi32(__A, __B, __C), 613 (__v8si)_mm256_setzero_si256()); 614 } 615 616 static __inline__ __m128i __DEFAULT_FN_ATTRS128 617 _mm_shrdv_epi32(__m128i __A, __m128i __B, __m128i __C) 618 { 619 return (__m128i)__builtin_ia32_vpshrdvd128((__v4si)__A, (__v4si)__B, 620 (__v4si)__C); 621 } 622 623 static __inline__ __m128i __DEFAULT_FN_ATTRS128 624 _mm_mask_shrdv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) 625 { 626 return (__m128i)__builtin_ia32_selectd_128(__U, 627 (__v4si)_mm_shrdv_epi32(__A, __B, __C), 628 (__v4si)__A); 629 } 630 631 static __inline__ __m128i __DEFAULT_FN_ATTRS128 632 _mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) 633 { 634 return (__m128i)__builtin_ia32_selectd_128(__U, 635 (__v4si)_mm_shrdv_epi32(__A, __B, __C), 636 (__v4si)_mm_setzero_si128()); 637 } 638 639 static __inline__ __m256i __DEFAULT_FN_ATTRS256 640 _mm256_shrdv_epi16(__m256i __A, __m256i __B, __m256i __C) 641 { 642 return (__m256i)__builtin_ia32_vpshrdvw256((__v16hi)__A, (__v16hi)__B, 643 (__v16hi)__C); 644 } 645 646 static __inline__ __m256i __DEFAULT_FN_ATTRS256 647 _mm256_mask_shrdv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C) 648 { 649 return (__m256i)__builtin_ia32_selectw_256(__U, 650 (__v16hi)_mm256_shrdv_epi16(__A, __B, __C), 651 (__v16hi)__A); 652 } 653 654 static __inline__ __m256i __DEFAULT_FN_ATTRS256 655 _mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C) 656 { 657 return (__m256i)__builtin_ia32_selectw_256(__U, 658 (__v16hi)_mm256_shrdv_epi16(__A, __B, __C), 659 (__v16hi)_mm256_setzero_si256()); 660 } 661 662 static __inline__ __m128i __DEFAULT_FN_ATTRS128 663 _mm_shrdv_epi16(__m128i __A, __m128i __B, __m128i __C) 664 { 665 return (__m128i)__builtin_ia32_vpshrdvw128((__v8hi)__A, (__v8hi)__B, 666 (__v8hi)__C); 667 } 668 669 static __inline__ __m128i __DEFAULT_FN_ATTRS128 670 _mm_mask_shrdv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) 671 { 672 return (__m128i)__builtin_ia32_selectw_128(__U, 673 (__v8hi)_mm_shrdv_epi16(__A, __B, __C), 674 (__v8hi)__A); 675 } 676 677 static __inline__ __m128i __DEFAULT_FN_ATTRS128 678 _mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) 679 { 680 return (__m128i)__builtin_ia32_selectw_128(__U, 681 (__v8hi)_mm_shrdv_epi16(__A, __B, __C), 682 (__v8hi)_mm_setzero_si128()); 683 } 684 685 686 #undef __DEFAULT_FN_ATTRS128 687 #undef __DEFAULT_FN_ATTRS256 688 689 #endif 690