1 /*===------------- avx512vbmi2intrin.h - VBMI2 intrinsics ------------------=== 2 * 3 * 4 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 * See https://llvm.org/LICENSE.txt for license information. 6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 * 8 *===-----------------------------------------------------------------------=== 9 */ 10 #ifndef __IMMINTRIN_H 11 #error "Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead." 12 #endif 13 14 #ifndef __AVX512VBMI2INTRIN_H 15 #define __AVX512VBMI2INTRIN_H 16 17 /* Define the default attributes for the functions in this file. */ 18 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2,evex512"), __min_vector_width__(512))) 19 20 21 static __inline__ __m512i __DEFAULT_FN_ATTRS 22 _mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D) 23 { 24 return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, 25 (__v32hi) __S, 26 __U); 27 } 28 29 static __inline__ __m512i __DEFAULT_FN_ATTRS 30 _mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D) 31 { 32 return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, 33 (__v32hi) _mm512_setzero_si512(), 34 __U); 35 } 36 37 static __inline__ __m512i __DEFAULT_FN_ATTRS 38 _mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D) 39 { 40 return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, 41 (__v64qi) __S, 42 __U); 43 } 44 45 static __inline__ __m512i __DEFAULT_FN_ATTRS 46 _mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D) 47 { 48 return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, 49 (__v64qi) _mm512_setzero_si512(), 50 __U); 51 } 52 53 static __inline__ void __DEFAULT_FN_ATTRS 54 _mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D) 55 { 56 __builtin_ia32_compressstorehi512_mask ((__v32hi *) __P, (__v32hi) __D, 57 __U); 58 } 59 60 static __inline__ void __DEFAULT_FN_ATTRS 61 _mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D) 62 { 63 __builtin_ia32_compressstoreqi512_mask ((__v64qi *) __P, (__v64qi) __D, 64 __U); 65 } 66 67 static __inline__ __m512i __DEFAULT_FN_ATTRS 68 _mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D) 69 { 70 return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, 71 (__v32hi) __S, 72 __U); 73 } 74 75 static __inline__ __m512i __DEFAULT_FN_ATTRS 76 _mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D) 77 { 78 return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, 79 (__v32hi) _mm512_setzero_si512(), 80 __U); 81 } 82 83 static __inline__ __m512i __DEFAULT_FN_ATTRS 84 _mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D) 85 { 86 return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, 87 (__v64qi) __S, 88 __U); 89 } 90 91 static __inline__ __m512i __DEFAULT_FN_ATTRS 92 _mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D) 93 { 94 return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, 95 (__v64qi) _mm512_setzero_si512(), 96 __U); 97 } 98 99 static __inline__ __m512i __DEFAULT_FN_ATTRS 100 _mm512_mask_expandloadu_epi16(__m512i __S, __mmask32 __U, void const *__P) 101 { 102 return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P, 103 (__v32hi) __S, 104 __U); 105 } 106 107 static __inline__ __m512i __DEFAULT_FN_ATTRS 108 _mm512_maskz_expandloadu_epi16(__mmask32 __U, void const *__P) 109 { 110 return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P, 111 (__v32hi) _mm512_setzero_si512(), 112 __U); 113 } 114 115 static __inline__ __m512i __DEFAULT_FN_ATTRS 116 _mm512_mask_expandloadu_epi8(__m512i __S, __mmask64 __U, void const *__P) 117 { 118 return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P, 119 (__v64qi) __S, 120 __U); 121 } 122 123 static __inline__ __m512i __DEFAULT_FN_ATTRS 124 _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) 125 { 126 return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P, 127 (__v64qi) _mm512_setzero_si512(), 128 __U); 129 } 130 131 #define _mm512_shldi_epi64(A, B, I) \ 132 ((__m512i)__builtin_ia32_vpshldq512((__v8di)(__m512i)(A), \ 133 (__v8di)(__m512i)(B), (int)(I))) 134 135 #define _mm512_mask_shldi_epi64(S, U, A, B, I) \ 136 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 137 (__v8di)_mm512_shldi_epi64((A), (B), (I)), \ 138 (__v8di)(__m512i)(S))) 139 140 #define _mm512_maskz_shldi_epi64(U, A, B, I) \ 141 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 142 (__v8di)_mm512_shldi_epi64((A), (B), (I)), \ 143 (__v8di)_mm512_setzero_si512())) 144 145 #define _mm512_shldi_epi32(A, B, I) \ 146 ((__m512i)__builtin_ia32_vpshldd512((__v16si)(__m512i)(A), \ 147 (__v16si)(__m512i)(B), (int)(I))) 148 149 #define _mm512_mask_shldi_epi32(S, U, A, B, I) \ 150 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 151 (__v16si)_mm512_shldi_epi32((A), (B), (I)), \ 152 (__v16si)(__m512i)(S))) 153 154 #define _mm512_maskz_shldi_epi32(U, A, B, I) \ 155 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 156 (__v16si)_mm512_shldi_epi32((A), (B), (I)), \ 157 (__v16si)_mm512_setzero_si512())) 158 159 #define _mm512_shldi_epi16(A, B, I) \ 160 ((__m512i)__builtin_ia32_vpshldw512((__v32hi)(__m512i)(A), \ 161 (__v32hi)(__m512i)(B), (int)(I))) 162 163 #define _mm512_mask_shldi_epi16(S, U, A, B, I) \ 164 ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 165 (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \ 166 (__v32hi)(__m512i)(S))) 167 168 #define _mm512_maskz_shldi_epi16(U, A, B, I) \ 169 ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 170 (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \ 171 (__v32hi)_mm512_setzero_si512())) 172 173 #define _mm512_shrdi_epi64(A, B, I) \ 174 ((__m512i)__builtin_ia32_vpshrdq512((__v8di)(__m512i)(A), \ 175 (__v8di)(__m512i)(B), (int)(I))) 176 177 #define _mm512_mask_shrdi_epi64(S, U, A, B, I) \ 178 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 179 (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \ 180 (__v8di)(__m512i)(S))) 181 182 #define _mm512_maskz_shrdi_epi64(U, A, B, I) \ 183 ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 184 (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \ 185 (__v8di)_mm512_setzero_si512())) 186 187 #define _mm512_shrdi_epi32(A, B, I) \ 188 ((__m512i)__builtin_ia32_vpshrdd512((__v16si)(__m512i)(A), \ 189 (__v16si)(__m512i)(B), (int)(I))) 190 191 #define _mm512_mask_shrdi_epi32(S, U, A, B, I) \ 192 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 193 (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \ 194 (__v16si)(__m512i)(S))) 195 196 #define _mm512_maskz_shrdi_epi32(U, A, B, I) \ 197 ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 198 (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \ 199 (__v16si)_mm512_setzero_si512())) 200 201 #define _mm512_shrdi_epi16(A, B, I) \ 202 ((__m512i)__builtin_ia32_vpshrdw512((__v32hi)(__m512i)(A), \ 203 (__v32hi)(__m512i)(B), (int)(I))) 204 205 #define _mm512_mask_shrdi_epi16(S, U, A, B, I) \ 206 ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 207 (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \ 208 (__v32hi)(__m512i)(S))) 209 210 #define _mm512_maskz_shrdi_epi16(U, A, B, I) \ 211 ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 212 (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \ 213 (__v32hi)_mm512_setzero_si512())) 214 215 static __inline__ __m512i __DEFAULT_FN_ATTRS 216 _mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C) 217 { 218 return (__m512i)__builtin_ia32_vpshldvq512((__v8di)__A, (__v8di)__B, 219 (__v8di)__C); 220 } 221 222 static __inline__ __m512i __DEFAULT_FN_ATTRS 223 _mm512_mask_shldv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) 224 { 225 return (__m512i)__builtin_ia32_selectq_512(__U, 226 (__v8di)_mm512_shldv_epi64(__A, __B, __C), 227 (__v8di)__A); 228 } 229 230 static __inline__ __m512i __DEFAULT_FN_ATTRS 231 _mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) 232 { 233 return (__m512i)__builtin_ia32_selectq_512(__U, 234 (__v8di)_mm512_shldv_epi64(__A, __B, __C), 235 (__v8di)_mm512_setzero_si512()); 236 } 237 238 static __inline__ __m512i __DEFAULT_FN_ATTRS 239 _mm512_shldv_epi32(__m512i __A, __m512i __B, __m512i __C) 240 { 241 return (__m512i)__builtin_ia32_vpshldvd512((__v16si)__A, (__v16si)__B, 242 (__v16si)__C); 243 } 244 245 static __inline__ __m512i __DEFAULT_FN_ATTRS 246 _mm512_mask_shldv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) 247 { 248 return (__m512i)__builtin_ia32_selectd_512(__U, 249 (__v16si)_mm512_shldv_epi32(__A, __B, __C), 250 (__v16si)__A); 251 } 252 253 static __inline__ __m512i __DEFAULT_FN_ATTRS 254 _mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) 255 { 256 return (__m512i)__builtin_ia32_selectd_512(__U, 257 (__v16si)_mm512_shldv_epi32(__A, __B, __C), 258 (__v16si)_mm512_setzero_si512()); 259 } 260 261 static __inline__ __m512i __DEFAULT_FN_ATTRS 262 _mm512_shldv_epi16(__m512i __A, __m512i __B, __m512i __C) 263 { 264 return (__m512i)__builtin_ia32_vpshldvw512((__v32hi)__A, (__v32hi)__B, 265 (__v32hi)__C); 266 } 267 268 static __inline__ __m512i __DEFAULT_FN_ATTRS 269 _mm512_mask_shldv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C) 270 { 271 return (__m512i)__builtin_ia32_selectw_512(__U, 272 (__v32hi)_mm512_shldv_epi16(__A, __B, __C), 273 (__v32hi)__A); 274 } 275 276 static __inline__ __m512i __DEFAULT_FN_ATTRS 277 _mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C) 278 { 279 return (__m512i)__builtin_ia32_selectw_512(__U, 280 (__v32hi)_mm512_shldv_epi16(__A, __B, __C), 281 (__v32hi)_mm512_setzero_si512()); 282 } 283 284 static __inline__ __m512i __DEFAULT_FN_ATTRS 285 _mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C) 286 { 287 return (__m512i)__builtin_ia32_vpshrdvq512((__v8di)__A, (__v8di)__B, 288 (__v8di)__C); 289 } 290 291 static __inline__ __m512i __DEFAULT_FN_ATTRS 292 _mm512_mask_shrdv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) 293 { 294 return (__m512i)__builtin_ia32_selectq_512(__U, 295 (__v8di)_mm512_shrdv_epi64(__A, __B, __C), 296 (__v8di)__A); 297 } 298 299 static __inline__ __m512i __DEFAULT_FN_ATTRS 300 _mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) 301 { 302 return (__m512i)__builtin_ia32_selectq_512(__U, 303 (__v8di)_mm512_shrdv_epi64(__A, __B, __C), 304 (__v8di)_mm512_setzero_si512()); 305 } 306 307 static __inline__ __m512i __DEFAULT_FN_ATTRS 308 _mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C) 309 { 310 return (__m512i)__builtin_ia32_vpshrdvd512((__v16si)__A, (__v16si)__B, 311 (__v16si)__C); 312 } 313 314 static __inline__ __m512i __DEFAULT_FN_ATTRS 315 _mm512_mask_shrdv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) 316 { 317 return (__m512i) __builtin_ia32_selectd_512(__U, 318 (__v16si)_mm512_shrdv_epi32(__A, __B, __C), 319 (__v16si)__A); 320 } 321 322 static __inline__ __m512i __DEFAULT_FN_ATTRS 323 _mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) 324 { 325 return (__m512i) __builtin_ia32_selectd_512(__U, 326 (__v16si)_mm512_shrdv_epi32(__A, __B, __C), 327 (__v16si)_mm512_setzero_si512()); 328 } 329 330 static __inline__ __m512i __DEFAULT_FN_ATTRS 331 _mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C) 332 { 333 return (__m512i)__builtin_ia32_vpshrdvw512((__v32hi)__A, (__v32hi)__B, 334 (__v32hi)__C); 335 } 336 337 static __inline__ __m512i __DEFAULT_FN_ATTRS 338 _mm512_mask_shrdv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C) 339 { 340 return (__m512i)__builtin_ia32_selectw_512(__U, 341 (__v32hi)_mm512_shrdv_epi16(__A, __B, __C), 342 (__v32hi)__A); 343 } 344 345 static __inline__ __m512i __DEFAULT_FN_ATTRS 346 _mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C) 347 { 348 return (__m512i)__builtin_ia32_selectw_512(__U, 349 (__v32hi)_mm512_shrdv_epi16(__A, __B, __C), 350 (__v32hi)_mm512_setzero_si512()); 351 } 352 353 354 #undef __DEFAULT_FN_ATTRS 355 356 #endif 357 358