1 /*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------=== 2 * 3 * 4 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 * See https://llvm.org/LICENSE.txt for license information. 6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 * 8 *===-----------------------------------------------------------------------=== 9 */ 10 #ifndef __IMMINTRIN_H 11 #error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead." 12 #endif 13 14 #ifndef __AVX512BWINTRIN_H 15 #define __AVX512BWINTRIN_H 16 17 typedef unsigned int __mmask32; 18 typedef unsigned long long __mmask64; 19 20 /* Define the default attributes for the functions in this file. */ 21 #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512))) 22 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"))) 23 24 static __inline __mmask32 __DEFAULT_FN_ATTRS 25 _knot_mask32(__mmask32 __M) 26 { 27 return __builtin_ia32_knotsi(__M); 28 } 29 30 static __inline __mmask64 __DEFAULT_FN_ATTRS 31 _knot_mask64(__mmask64 __M) 32 { 33 return __builtin_ia32_knotdi(__M); 34 } 35 36 static __inline__ __mmask32 __DEFAULT_FN_ATTRS 37 _kand_mask32(__mmask32 __A, __mmask32 __B) 38 { 39 return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B); 40 } 41 42 static __inline__ __mmask64 __DEFAULT_FN_ATTRS 43 _kand_mask64(__mmask64 __A, __mmask64 __B) 44 { 45 return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B); 46 } 47 48 static __inline__ __mmask32 __DEFAULT_FN_ATTRS 49 _kandn_mask32(__mmask32 __A, __mmask32 __B) 50 { 51 return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B); 52 } 53 54 static __inline__ __mmask64 __DEFAULT_FN_ATTRS 55 _kandn_mask64(__mmask64 __A, __mmask64 __B) 56 { 57 return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B); 58 } 59 60 static __inline__ __mmask32 __DEFAULT_FN_ATTRS 61 _kor_mask32(__mmask32 __A, __mmask32 __B) 62 { 63 return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B); 64 } 65 66 static __inline__ __mmask64 __DEFAULT_FN_ATTRS 67 _kor_mask64(__mmask64 __A, __mmask64 __B) 68 { 69 return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B); 70 } 71 72 static __inline__ __mmask32 __DEFAULT_FN_ATTRS 73 _kxnor_mask32(__mmask32 __A, __mmask32 __B) 74 { 75 return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B); 76 } 77 78 static __inline__ __mmask64 __DEFAULT_FN_ATTRS 79 _kxnor_mask64(__mmask64 __A, __mmask64 __B) 80 { 81 return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B); 82 } 83 84 static __inline__ __mmask32 __DEFAULT_FN_ATTRS 85 _kxor_mask32(__mmask32 __A, __mmask32 __B) 86 { 87 return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B); 88 } 89 90 static __inline__ __mmask64 __DEFAULT_FN_ATTRS 91 _kxor_mask64(__mmask64 __A, __mmask64 __B) 92 { 93 return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B); 94 } 95 96 static __inline__ unsigned char __DEFAULT_FN_ATTRS 97 _kortestc_mask32_u8(__mmask32 __A, __mmask32 __B) 98 { 99 return (unsigned char)__builtin_ia32_kortestcsi(__A, __B); 100 } 101 102 static __inline__ unsigned char __DEFAULT_FN_ATTRS 103 _kortestz_mask32_u8(__mmask32 __A, __mmask32 __B) 104 { 105 return (unsigned char)__builtin_ia32_kortestzsi(__A, __B); 106 } 107 108 static __inline__ unsigned char __DEFAULT_FN_ATTRS 109 _kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) { 110 *__C = (unsigned char)__builtin_ia32_kortestcsi(__A, __B); 111 return (unsigned char)__builtin_ia32_kortestzsi(__A, __B); 112 } 113 114 static __inline__ unsigned char __DEFAULT_FN_ATTRS 115 _kortestc_mask64_u8(__mmask64 __A, __mmask64 __B) 116 { 117 return (unsigned char)__builtin_ia32_kortestcdi(__A, __B); 118 } 119 120 static __inline__ unsigned char __DEFAULT_FN_ATTRS 121 _kortestz_mask64_u8(__mmask64 __A, __mmask64 __B) 122 { 123 return (unsigned char)__builtin_ia32_kortestzdi(__A, __B); 124 } 125 126 static __inline__ unsigned char __DEFAULT_FN_ATTRS 127 _kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) { 128 *__C = (unsigned char)__builtin_ia32_kortestcdi(__A, __B); 129 return (unsigned char)__builtin_ia32_kortestzdi(__A, __B); 130 } 131 132 static __inline__ unsigned char __DEFAULT_FN_ATTRS 133 _ktestc_mask32_u8(__mmask32 __A, __mmask32 __B) 134 { 135 return (unsigned char)__builtin_ia32_ktestcsi(__A, __B); 136 } 137 138 static __inline__ unsigned char __DEFAULT_FN_ATTRS 139 _ktestz_mask32_u8(__mmask32 __A, __mmask32 __B) 140 { 141 return (unsigned char)__builtin_ia32_ktestzsi(__A, __B); 142 } 143 144 static __inline__ unsigned char __DEFAULT_FN_ATTRS 145 _ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) { 146 *__C = (unsigned char)__builtin_ia32_ktestcsi(__A, __B); 147 return (unsigned char)__builtin_ia32_ktestzsi(__A, __B); 148 } 149 150 static __inline__ unsigned char __DEFAULT_FN_ATTRS 151 _ktestc_mask64_u8(__mmask64 __A, __mmask64 __B) 152 { 153 return (unsigned char)__builtin_ia32_ktestcdi(__A, __B); 154 } 155 156 static __inline__ unsigned char __DEFAULT_FN_ATTRS 157 _ktestz_mask64_u8(__mmask64 __A, __mmask64 __B) 158 { 159 return (unsigned char)__builtin_ia32_ktestzdi(__A, __B); 160 } 161 162 static __inline__ unsigned char __DEFAULT_FN_ATTRS 163 _ktest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) { 164 *__C = (unsigned char)__builtin_ia32_ktestcdi(__A, __B); 165 return (unsigned char)__builtin_ia32_ktestzdi(__A, __B); 166 } 167 168 static __inline__ __mmask32 __DEFAULT_FN_ATTRS 169 _kadd_mask32(__mmask32 __A, __mmask32 __B) 170 { 171 return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B); 172 } 173 174 static __inline__ __mmask64 __DEFAULT_FN_ATTRS 175 _kadd_mask64(__mmask64 __A, __mmask64 __B) 176 { 177 return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B); 178 } 179 180 #define _kshiftli_mask32(A, I) \ 181 (__mmask32)__builtin_ia32_kshiftlisi((__mmask32)(A), (unsigned int)(I)) 182 183 #define _kshiftri_mask32(A, I) \ 184 (__mmask32)__builtin_ia32_kshiftrisi((__mmask32)(A), (unsigned int)(I)) 185 186 #define _kshiftli_mask64(A, I) \ 187 (__mmask64)__builtin_ia32_kshiftlidi((__mmask64)(A), (unsigned int)(I)) 188 189 #define _kshiftri_mask64(A, I) \ 190 (__mmask64)__builtin_ia32_kshiftridi((__mmask64)(A), (unsigned int)(I)) 191 192 static __inline__ unsigned int __DEFAULT_FN_ATTRS 193 _cvtmask32_u32(__mmask32 __A) { 194 return (unsigned int)__builtin_ia32_kmovd((__mmask32)__A); 195 } 196 197 static __inline__ unsigned long long __DEFAULT_FN_ATTRS 198 _cvtmask64_u64(__mmask64 __A) { 199 return (unsigned long long)__builtin_ia32_kmovq((__mmask64)__A); 200 } 201 202 static __inline__ __mmask32 __DEFAULT_FN_ATTRS 203 _cvtu32_mask32(unsigned int __A) { 204 return (__mmask32)__builtin_ia32_kmovd((__mmask32)__A); 205 } 206 207 static __inline__ __mmask64 __DEFAULT_FN_ATTRS 208 _cvtu64_mask64(unsigned long long __A) { 209 return (__mmask64)__builtin_ia32_kmovq((__mmask64)__A); 210 } 211 212 static __inline__ __mmask32 __DEFAULT_FN_ATTRS 213 _load_mask32(__mmask32 *__A) { 214 return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A); 215 } 216 217 static __inline__ __mmask64 __DEFAULT_FN_ATTRS 218 _load_mask64(__mmask64 *__A) { 219 return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A); 220 } 221 222 static __inline__ void __DEFAULT_FN_ATTRS 223 _store_mask32(__mmask32 *__A, __mmask32 __B) { 224 *(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B); 225 } 226 227 static __inline__ void __DEFAULT_FN_ATTRS 228 _store_mask64(__mmask64 *__A, __mmask64 __B) { 229 *(__mmask64 *)__A = __builtin_ia32_kmovq((__mmask64)__B); 230 } 231 232 /* Integer compare */ 233 234 #define _mm512_cmp_epi8_mask(a, b, p) \ 235 (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ 236 (__v64qi)(__m512i)(b), (int)(p), \ 237 (__mmask64)-1) 238 239 #define _mm512_mask_cmp_epi8_mask(m, a, b, p) \ 240 (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ 241 (__v64qi)(__m512i)(b), (int)(p), \ 242 (__mmask64)(m)) 243 244 #define _mm512_cmp_epu8_mask(a, b, p) \ 245 (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ 246 (__v64qi)(__m512i)(b), (int)(p), \ 247 (__mmask64)-1) 248 249 #define _mm512_mask_cmp_epu8_mask(m, a, b, p) \ 250 (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ 251 (__v64qi)(__m512i)(b), (int)(p), \ 252 (__mmask64)(m)) 253 254 #define _mm512_cmp_epi16_mask(a, b, p) \ 255 (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ 256 (__v32hi)(__m512i)(b), (int)(p), \ 257 (__mmask32)-1) 258 259 #define _mm512_mask_cmp_epi16_mask(m, a, b, p) \ 260 (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ 261 (__v32hi)(__m512i)(b), (int)(p), \ 262 (__mmask32)(m)) 263 264 #define _mm512_cmp_epu16_mask(a, b, p) \ 265 (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ 266 (__v32hi)(__m512i)(b), (int)(p), \ 267 (__mmask32)-1) 268 269 #define _mm512_mask_cmp_epu16_mask(m, a, b, p) \ 270 (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ 271 (__v32hi)(__m512i)(b), (int)(p), \ 272 (__mmask32)(m)) 273 274 #define _mm512_cmpeq_epi8_mask(A, B) \ 275 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ) 276 #define _mm512_mask_cmpeq_epi8_mask(k, A, B) \ 277 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ) 278 #define _mm512_cmpge_epi8_mask(A, B) \ 279 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GE) 280 #define _mm512_mask_cmpge_epi8_mask(k, A, B) \ 281 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE) 282 #define _mm512_cmpgt_epi8_mask(A, B) \ 283 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GT) 284 #define _mm512_mask_cmpgt_epi8_mask(k, A, B) \ 285 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT) 286 #define _mm512_cmple_epi8_mask(A, B) \ 287 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LE) 288 #define _mm512_mask_cmple_epi8_mask(k, A, B) \ 289 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE) 290 #define _mm512_cmplt_epi8_mask(A, B) \ 291 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LT) 292 #define _mm512_mask_cmplt_epi8_mask(k, A, B) \ 293 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT) 294 #define _mm512_cmpneq_epi8_mask(A, B) \ 295 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_NE) 296 #define _mm512_mask_cmpneq_epi8_mask(k, A, B) \ 297 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE) 298 299 #define _mm512_cmpeq_epu8_mask(A, B) \ 300 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ) 301 #define _mm512_mask_cmpeq_epu8_mask(k, A, B) \ 302 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ) 303 #define _mm512_cmpge_epu8_mask(A, B) \ 304 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GE) 305 #define _mm512_mask_cmpge_epu8_mask(k, A, B) \ 306 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE) 307 #define _mm512_cmpgt_epu8_mask(A, B) \ 308 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GT) 309 #define _mm512_mask_cmpgt_epu8_mask(k, A, B) \ 310 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT) 311 #define _mm512_cmple_epu8_mask(A, B) \ 312 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LE) 313 #define _mm512_mask_cmple_epu8_mask(k, A, B) \ 314 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE) 315 #define _mm512_cmplt_epu8_mask(A, B) \ 316 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LT) 317 #define _mm512_mask_cmplt_epu8_mask(k, A, B) \ 318 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT) 319 #define _mm512_cmpneq_epu8_mask(A, B) \ 320 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_NE) 321 #define _mm512_mask_cmpneq_epu8_mask(k, A, B) \ 322 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE) 323 324 #define _mm512_cmpeq_epi16_mask(A, B) \ 325 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ) 326 #define _mm512_mask_cmpeq_epi16_mask(k, A, B) \ 327 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ) 328 #define _mm512_cmpge_epi16_mask(A, B) \ 329 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GE) 330 #define _mm512_mask_cmpge_epi16_mask(k, A, B) \ 331 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE) 332 #define _mm512_cmpgt_epi16_mask(A, B) \ 333 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GT) 334 #define _mm512_mask_cmpgt_epi16_mask(k, A, B) \ 335 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT) 336 #define _mm512_cmple_epi16_mask(A, B) \ 337 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LE) 338 #define _mm512_mask_cmple_epi16_mask(k, A, B) \ 339 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE) 340 #define _mm512_cmplt_epi16_mask(A, B) \ 341 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LT) 342 #define _mm512_mask_cmplt_epi16_mask(k, A, B) \ 343 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT) 344 #define _mm512_cmpneq_epi16_mask(A, B) \ 345 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_NE) 346 #define _mm512_mask_cmpneq_epi16_mask(k, A, B) \ 347 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE) 348 349 #define _mm512_cmpeq_epu16_mask(A, B) \ 350 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ) 351 #define _mm512_mask_cmpeq_epu16_mask(k, A, B) \ 352 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ) 353 #define _mm512_cmpge_epu16_mask(A, B) \ 354 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GE) 355 #define _mm512_mask_cmpge_epu16_mask(k, A, B) \ 356 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE) 357 #define _mm512_cmpgt_epu16_mask(A, B) \ 358 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GT) 359 #define _mm512_mask_cmpgt_epu16_mask(k, A, B) \ 360 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT) 361 #define _mm512_cmple_epu16_mask(A, B) \ 362 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LE) 363 #define _mm512_mask_cmple_epu16_mask(k, A, B) \ 364 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE) 365 #define _mm512_cmplt_epu16_mask(A, B) \ 366 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LT) 367 #define _mm512_mask_cmplt_epu16_mask(k, A, B) \ 368 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT) 369 #define _mm512_cmpneq_epu16_mask(A, B) \ 370 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_NE) 371 #define _mm512_mask_cmpneq_epu16_mask(k, A, B) \ 372 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) 373 374 static __inline__ __m512i __DEFAULT_FN_ATTRS512 375 _mm512_add_epi8 (__m512i __A, __m512i __B) { 376 return (__m512i) ((__v64qu) __A + (__v64qu) __B); 377 } 378 379 static __inline__ __m512i __DEFAULT_FN_ATTRS512 380 _mm512_mask_add_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 381 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 382 (__v64qi)_mm512_add_epi8(__A, __B), 383 (__v64qi)__W); 384 } 385 386 static __inline__ __m512i __DEFAULT_FN_ATTRS512 387 _mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 388 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 389 (__v64qi)_mm512_add_epi8(__A, __B), 390 (__v64qi)_mm512_setzero_si512()); 391 } 392 393 static __inline__ __m512i __DEFAULT_FN_ATTRS512 394 _mm512_sub_epi8 (__m512i __A, __m512i __B) { 395 return (__m512i) ((__v64qu) __A - (__v64qu) __B); 396 } 397 398 static __inline__ __m512i __DEFAULT_FN_ATTRS512 399 _mm512_mask_sub_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 400 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 401 (__v64qi)_mm512_sub_epi8(__A, __B), 402 (__v64qi)__W); 403 } 404 405 static __inline__ __m512i __DEFAULT_FN_ATTRS512 406 _mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 407 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 408 (__v64qi)_mm512_sub_epi8(__A, __B), 409 (__v64qi)_mm512_setzero_si512()); 410 } 411 412 static __inline__ __m512i __DEFAULT_FN_ATTRS512 413 _mm512_add_epi16 (__m512i __A, __m512i __B) { 414 return (__m512i) ((__v32hu) __A + (__v32hu) __B); 415 } 416 417 static __inline__ __m512i __DEFAULT_FN_ATTRS512 418 _mm512_mask_add_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 419 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 420 (__v32hi)_mm512_add_epi16(__A, __B), 421 (__v32hi)__W); 422 } 423 424 static __inline__ __m512i __DEFAULT_FN_ATTRS512 425 _mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 426 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 427 (__v32hi)_mm512_add_epi16(__A, __B), 428 (__v32hi)_mm512_setzero_si512()); 429 } 430 431 static __inline__ __m512i __DEFAULT_FN_ATTRS512 432 _mm512_sub_epi16 (__m512i __A, __m512i __B) { 433 return (__m512i) ((__v32hu) __A - (__v32hu) __B); 434 } 435 436 static __inline__ __m512i __DEFAULT_FN_ATTRS512 437 _mm512_mask_sub_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 438 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 439 (__v32hi)_mm512_sub_epi16(__A, __B), 440 (__v32hi)__W); 441 } 442 443 static __inline__ __m512i __DEFAULT_FN_ATTRS512 444 _mm512_maskz_sub_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 445 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 446 (__v32hi)_mm512_sub_epi16(__A, __B), 447 (__v32hi)_mm512_setzero_si512()); 448 } 449 450 static __inline__ __m512i __DEFAULT_FN_ATTRS512 451 _mm512_mullo_epi16 (__m512i __A, __m512i __B) { 452 return (__m512i) ((__v32hu) __A * (__v32hu) __B); 453 } 454 455 static __inline__ __m512i __DEFAULT_FN_ATTRS512 456 _mm512_mask_mullo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 457 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 458 (__v32hi)_mm512_mullo_epi16(__A, __B), 459 (__v32hi)__W); 460 } 461 462 static __inline__ __m512i __DEFAULT_FN_ATTRS512 463 _mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 464 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 465 (__v32hi)_mm512_mullo_epi16(__A, __B), 466 (__v32hi)_mm512_setzero_si512()); 467 } 468 469 static __inline__ __m512i __DEFAULT_FN_ATTRS512 470 _mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W) 471 { 472 return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, 473 (__v64qi) __W, 474 (__v64qi) __A); 475 } 476 477 static __inline__ __m512i __DEFAULT_FN_ATTRS512 478 _mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W) 479 { 480 return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, 481 (__v32hi) __W, 482 (__v32hi) __A); 483 } 484 485 static __inline__ __m512i __DEFAULT_FN_ATTRS512 486 _mm512_abs_epi8 (__m512i __A) 487 { 488 return (__m512i)__builtin_ia32_pabsb512((__v64qi)__A); 489 } 490 491 static __inline__ __m512i __DEFAULT_FN_ATTRS512 492 _mm512_mask_abs_epi8 (__m512i __W, __mmask64 __U, __m512i __A) 493 { 494 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 495 (__v64qi)_mm512_abs_epi8(__A), 496 (__v64qi)__W); 497 } 498 499 static __inline__ __m512i __DEFAULT_FN_ATTRS512 500 _mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A) 501 { 502 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 503 (__v64qi)_mm512_abs_epi8(__A), 504 (__v64qi)_mm512_setzero_si512()); 505 } 506 507 static __inline__ __m512i __DEFAULT_FN_ATTRS512 508 _mm512_abs_epi16 (__m512i __A) 509 { 510 return (__m512i)__builtin_ia32_pabsw512((__v32hi)__A); 511 } 512 513 static __inline__ __m512i __DEFAULT_FN_ATTRS512 514 _mm512_mask_abs_epi16 (__m512i __W, __mmask32 __U, __m512i __A) 515 { 516 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 517 (__v32hi)_mm512_abs_epi16(__A), 518 (__v32hi)__W); 519 } 520 521 static __inline__ __m512i __DEFAULT_FN_ATTRS512 522 _mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A) 523 { 524 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 525 (__v32hi)_mm512_abs_epi16(__A), 526 (__v32hi)_mm512_setzero_si512()); 527 } 528 529 static __inline__ __m512i __DEFAULT_FN_ATTRS512 530 _mm512_packs_epi32(__m512i __A, __m512i __B) 531 { 532 return (__m512i)__builtin_ia32_packssdw512((__v16si)__A, (__v16si)__B); 533 } 534 535 static __inline__ __m512i __DEFAULT_FN_ATTRS512 536 _mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B) 537 { 538 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 539 (__v32hi)_mm512_packs_epi32(__A, __B), 540 (__v32hi)_mm512_setzero_si512()); 541 } 542 543 static __inline__ __m512i __DEFAULT_FN_ATTRS512 544 _mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 545 { 546 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 547 (__v32hi)_mm512_packs_epi32(__A, __B), 548 (__v32hi)__W); 549 } 550 551 static __inline__ __m512i __DEFAULT_FN_ATTRS512 552 _mm512_packs_epi16(__m512i __A, __m512i __B) 553 { 554 return (__m512i)__builtin_ia32_packsswb512((__v32hi)__A, (__v32hi) __B); 555 } 556 557 static __inline__ __m512i __DEFAULT_FN_ATTRS512 558 _mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 559 { 560 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 561 (__v64qi)_mm512_packs_epi16(__A, __B), 562 (__v64qi)__W); 563 } 564 565 static __inline__ __m512i __DEFAULT_FN_ATTRS512 566 _mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B) 567 { 568 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 569 (__v64qi)_mm512_packs_epi16(__A, __B), 570 (__v64qi)_mm512_setzero_si512()); 571 } 572 573 static __inline__ __m512i __DEFAULT_FN_ATTRS512 574 _mm512_packus_epi32(__m512i __A, __m512i __B) 575 { 576 return (__m512i)__builtin_ia32_packusdw512((__v16si) __A, (__v16si) __B); 577 } 578 579 static __inline__ __m512i __DEFAULT_FN_ATTRS512 580 _mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B) 581 { 582 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 583 (__v32hi)_mm512_packus_epi32(__A, __B), 584 (__v32hi)_mm512_setzero_si512()); 585 } 586 587 static __inline__ __m512i __DEFAULT_FN_ATTRS512 588 _mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 589 { 590 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 591 (__v32hi)_mm512_packus_epi32(__A, __B), 592 (__v32hi)__W); 593 } 594 595 static __inline__ __m512i __DEFAULT_FN_ATTRS512 596 _mm512_packus_epi16(__m512i __A, __m512i __B) 597 { 598 return (__m512i)__builtin_ia32_packuswb512((__v32hi) __A, (__v32hi) __B); 599 } 600 601 static __inline__ __m512i __DEFAULT_FN_ATTRS512 602 _mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 603 { 604 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 605 (__v64qi)_mm512_packus_epi16(__A, __B), 606 (__v64qi)__W); 607 } 608 609 static __inline__ __m512i __DEFAULT_FN_ATTRS512 610 _mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B) 611 { 612 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 613 (__v64qi)_mm512_packus_epi16(__A, __B), 614 (__v64qi)_mm512_setzero_si512()); 615 } 616 617 static __inline__ __m512i __DEFAULT_FN_ATTRS512 618 _mm512_adds_epi8 (__m512i __A, __m512i __B) 619 { 620 return (__m512i)__builtin_ia32_paddsb512((__v64qi)__A, (__v64qi)__B); 621 } 622 623 static __inline__ __m512i __DEFAULT_FN_ATTRS512 624 _mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 625 { 626 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 627 (__v64qi)_mm512_adds_epi8(__A, __B), 628 (__v64qi)__W); 629 } 630 631 static __inline__ __m512i __DEFAULT_FN_ATTRS512 632 _mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B) 633 { 634 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 635 (__v64qi)_mm512_adds_epi8(__A, __B), 636 (__v64qi)_mm512_setzero_si512()); 637 } 638 639 static __inline__ __m512i __DEFAULT_FN_ATTRS512 640 _mm512_adds_epi16 (__m512i __A, __m512i __B) 641 { 642 return (__m512i)__builtin_ia32_paddsw512((__v32hi)__A, (__v32hi)__B); 643 } 644 645 static __inline__ __m512i __DEFAULT_FN_ATTRS512 646 _mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 647 { 648 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 649 (__v32hi)_mm512_adds_epi16(__A, __B), 650 (__v32hi)__W); 651 } 652 653 static __inline__ __m512i __DEFAULT_FN_ATTRS512 654 _mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B) 655 { 656 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 657 (__v32hi)_mm512_adds_epi16(__A, __B), 658 (__v32hi)_mm512_setzero_si512()); 659 } 660 661 static __inline__ __m512i __DEFAULT_FN_ATTRS512 662 _mm512_adds_epu8 (__m512i __A, __m512i __B) 663 { 664 return (__m512i)__builtin_ia32_paddusb512((__v64qi) __A, (__v64qi) __B); 665 } 666 667 static __inline__ __m512i __DEFAULT_FN_ATTRS512 668 _mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 669 { 670 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 671 (__v64qi)_mm512_adds_epu8(__A, __B), 672 (__v64qi)__W); 673 } 674 675 static __inline__ __m512i __DEFAULT_FN_ATTRS512 676 _mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B) 677 { 678 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 679 (__v64qi)_mm512_adds_epu8(__A, __B), 680 (__v64qi)_mm512_setzero_si512()); 681 } 682 683 static __inline__ __m512i __DEFAULT_FN_ATTRS512 684 _mm512_adds_epu16 (__m512i __A, __m512i __B) 685 { 686 return (__m512i)__builtin_ia32_paddusw512((__v32hi) __A, (__v32hi) __B); 687 } 688 689 static __inline__ __m512i __DEFAULT_FN_ATTRS512 690 _mm512_mask_adds_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 691 { 692 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 693 (__v32hi)_mm512_adds_epu16(__A, __B), 694 (__v32hi)__W); 695 } 696 697 static __inline__ __m512i __DEFAULT_FN_ATTRS512 698 _mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 699 { 700 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 701 (__v32hi)_mm512_adds_epu16(__A, __B), 702 (__v32hi)_mm512_setzero_si512()); 703 } 704 705 static __inline__ __m512i __DEFAULT_FN_ATTRS512 706 _mm512_avg_epu8 (__m512i __A, __m512i __B) 707 { 708 return (__m512i)__builtin_ia32_pavgb512((__v64qi)__A, (__v64qi)__B); 709 } 710 711 static __inline__ __m512i __DEFAULT_FN_ATTRS512 712 _mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A, 713 __m512i __B) 714 { 715 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 716 (__v64qi)_mm512_avg_epu8(__A, __B), 717 (__v64qi)__W); 718 } 719 720 static __inline__ __m512i __DEFAULT_FN_ATTRS512 721 _mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B) 722 { 723 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 724 (__v64qi)_mm512_avg_epu8(__A, __B), 725 (__v64qi)_mm512_setzero_si512()); 726 } 727 728 static __inline__ __m512i __DEFAULT_FN_ATTRS512 729 _mm512_avg_epu16 (__m512i __A, __m512i __B) 730 { 731 return (__m512i)__builtin_ia32_pavgw512((__v32hi)__A, (__v32hi)__B); 732 } 733 734 static __inline__ __m512i __DEFAULT_FN_ATTRS512 735 _mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A, 736 __m512i __B) 737 { 738 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 739 (__v32hi)_mm512_avg_epu16(__A, __B), 740 (__v32hi)__W); 741 } 742 743 static __inline__ __m512i __DEFAULT_FN_ATTRS512 744 _mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 745 { 746 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 747 (__v32hi)_mm512_avg_epu16(__A, __B), 748 (__v32hi) _mm512_setzero_si512()); 749 } 750 751 static __inline__ __m512i __DEFAULT_FN_ATTRS512 752 _mm512_max_epi8 (__m512i __A, __m512i __B) 753 { 754 return (__m512i)__builtin_ia32_pmaxsb512((__v64qi) __A, (__v64qi) __B); 755 } 756 757 static __inline__ __m512i __DEFAULT_FN_ATTRS512 758 _mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B) 759 { 760 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 761 (__v64qi)_mm512_max_epi8(__A, __B), 762 (__v64qi)_mm512_setzero_si512()); 763 } 764 765 static __inline__ __m512i __DEFAULT_FN_ATTRS512 766 _mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 767 { 768 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 769 (__v64qi)_mm512_max_epi8(__A, __B), 770 (__v64qi)__W); 771 } 772 773 static __inline__ __m512i __DEFAULT_FN_ATTRS512 774 _mm512_max_epi16 (__m512i __A, __m512i __B) 775 { 776 return (__m512i)__builtin_ia32_pmaxsw512((__v32hi) __A, (__v32hi) __B); 777 } 778 779 static __inline__ __m512i __DEFAULT_FN_ATTRS512 780 _mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B) 781 { 782 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 783 (__v32hi)_mm512_max_epi16(__A, __B), 784 (__v32hi)_mm512_setzero_si512()); 785 } 786 787 static __inline__ __m512i __DEFAULT_FN_ATTRS512 788 _mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A, 789 __m512i __B) 790 { 791 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 792 (__v32hi)_mm512_max_epi16(__A, __B), 793 (__v32hi)__W); 794 } 795 796 static __inline__ __m512i __DEFAULT_FN_ATTRS512 797 _mm512_max_epu8 (__m512i __A, __m512i __B) 798 { 799 return (__m512i)__builtin_ia32_pmaxub512((__v64qi)__A, (__v64qi)__B); 800 } 801 802 static __inline__ __m512i __DEFAULT_FN_ATTRS512 803 _mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B) 804 { 805 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 806 (__v64qi)_mm512_max_epu8(__A, __B), 807 (__v64qi)_mm512_setzero_si512()); 808 } 809 810 static __inline__ __m512i __DEFAULT_FN_ATTRS512 811 _mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 812 { 813 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 814 (__v64qi)_mm512_max_epu8(__A, __B), 815 (__v64qi)__W); 816 } 817 818 static __inline__ __m512i __DEFAULT_FN_ATTRS512 819 _mm512_max_epu16 (__m512i __A, __m512i __B) 820 { 821 return (__m512i)__builtin_ia32_pmaxuw512((__v32hi)__A, (__v32hi)__B); 822 } 823 824 static __inline__ __m512i __DEFAULT_FN_ATTRS512 825 _mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B) 826 { 827 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 828 (__v32hi)_mm512_max_epu16(__A, __B), 829 (__v32hi)_mm512_setzero_si512()); 830 } 831 832 static __inline__ __m512i __DEFAULT_FN_ATTRS512 833 _mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 834 { 835 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 836 (__v32hi)_mm512_max_epu16(__A, __B), 837 (__v32hi)__W); 838 } 839 840 static __inline__ __m512i __DEFAULT_FN_ATTRS512 841 _mm512_min_epi8 (__m512i __A, __m512i __B) 842 { 843 return (__m512i)__builtin_ia32_pminsb512((__v64qi) __A, (__v64qi) __B); 844 } 845 846 static __inline__ __m512i __DEFAULT_FN_ATTRS512 847 _mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B) 848 { 849 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 850 (__v64qi)_mm512_min_epi8(__A, __B), 851 (__v64qi)_mm512_setzero_si512()); 852 } 853 854 static __inline__ __m512i __DEFAULT_FN_ATTRS512 855 _mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 856 { 857 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 858 (__v64qi)_mm512_min_epi8(__A, __B), 859 (__v64qi)__W); 860 } 861 862 static __inline__ __m512i __DEFAULT_FN_ATTRS512 863 _mm512_min_epi16 (__m512i __A, __m512i __B) 864 { 865 return (__m512i)__builtin_ia32_pminsw512((__v32hi) __A, (__v32hi) __B); 866 } 867 868 static __inline__ __m512i __DEFAULT_FN_ATTRS512 869 _mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B) 870 { 871 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 872 (__v32hi)_mm512_min_epi16(__A, __B), 873 (__v32hi)_mm512_setzero_si512()); 874 } 875 876 static __inline__ __m512i __DEFAULT_FN_ATTRS512 877 _mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 878 { 879 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 880 (__v32hi)_mm512_min_epi16(__A, __B), 881 (__v32hi)__W); 882 } 883 884 static __inline__ __m512i __DEFAULT_FN_ATTRS512 885 _mm512_min_epu8 (__m512i __A, __m512i __B) 886 { 887 return (__m512i)__builtin_ia32_pminub512((__v64qi)__A, (__v64qi)__B); 888 } 889 890 static __inline__ __m512i __DEFAULT_FN_ATTRS512 891 _mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B) 892 { 893 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 894 (__v64qi)_mm512_min_epu8(__A, __B), 895 (__v64qi)_mm512_setzero_si512()); 896 } 897 898 static __inline__ __m512i __DEFAULT_FN_ATTRS512 899 _mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 900 { 901 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 902 (__v64qi)_mm512_min_epu8(__A, __B), 903 (__v64qi)__W); 904 } 905 906 static __inline__ __m512i __DEFAULT_FN_ATTRS512 907 _mm512_min_epu16 (__m512i __A, __m512i __B) 908 { 909 return (__m512i)__builtin_ia32_pminuw512((__v32hi)__A, (__v32hi)__B); 910 } 911 912 static __inline__ __m512i __DEFAULT_FN_ATTRS512 913 _mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B) 914 { 915 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 916 (__v32hi)_mm512_min_epu16(__A, __B), 917 (__v32hi)_mm512_setzero_si512()); 918 } 919 920 static __inline__ __m512i __DEFAULT_FN_ATTRS512 921 _mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 922 { 923 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 924 (__v32hi)_mm512_min_epu16(__A, __B), 925 (__v32hi)__W); 926 } 927 928 static __inline__ __m512i __DEFAULT_FN_ATTRS512 929 _mm512_shuffle_epi8(__m512i __A, __m512i __B) 930 { 931 return (__m512i)__builtin_ia32_pshufb512((__v64qi)__A,(__v64qi)__B); 932 } 933 934 static __inline__ __m512i __DEFAULT_FN_ATTRS512 935 _mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 936 { 937 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 938 (__v64qi)_mm512_shuffle_epi8(__A, __B), 939 (__v64qi)__W); 940 } 941 942 static __inline__ __m512i __DEFAULT_FN_ATTRS512 943 _mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B) 944 { 945 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 946 (__v64qi)_mm512_shuffle_epi8(__A, __B), 947 (__v64qi)_mm512_setzero_si512()); 948 } 949 950 static __inline__ __m512i __DEFAULT_FN_ATTRS512 951 _mm512_subs_epi8 (__m512i __A, __m512i __B) 952 { 953 return (__m512i)__builtin_ia32_psubsb512((__v64qi)__A, (__v64qi)__B); 954 } 955 956 static __inline__ __m512i __DEFAULT_FN_ATTRS512 957 _mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 958 { 959 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 960 (__v64qi)_mm512_subs_epi8(__A, __B), 961 (__v64qi)__W); 962 } 963 964 static __inline__ __m512i __DEFAULT_FN_ATTRS512 965 _mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B) 966 { 967 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 968 (__v64qi)_mm512_subs_epi8(__A, __B), 969 (__v64qi)_mm512_setzero_si512()); 970 } 971 972 static __inline__ __m512i __DEFAULT_FN_ATTRS512 973 _mm512_subs_epi16 (__m512i __A, __m512i __B) 974 { 975 return (__m512i)__builtin_ia32_psubsw512((__v32hi)__A, (__v32hi)__B); 976 } 977 978 static __inline__ __m512i __DEFAULT_FN_ATTRS512 979 _mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 980 { 981 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 982 (__v32hi)_mm512_subs_epi16(__A, __B), 983 (__v32hi)__W); 984 } 985 986 static __inline__ __m512i __DEFAULT_FN_ATTRS512 987 _mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B) 988 { 989 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 990 (__v32hi)_mm512_subs_epi16(__A, __B), 991 (__v32hi)_mm512_setzero_si512()); 992 } 993 994 static __inline__ __m512i __DEFAULT_FN_ATTRS512 995 _mm512_subs_epu8 (__m512i __A, __m512i __B) 996 { 997 return (__m512i)__builtin_ia32_psubusb512((__v64qi) __A, (__v64qi) __B); 998 } 999 1000 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1001 _mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 1002 { 1003 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1004 (__v64qi)_mm512_subs_epu8(__A, __B), 1005 (__v64qi)__W); 1006 } 1007 1008 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1009 _mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B) 1010 { 1011 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1012 (__v64qi)_mm512_subs_epu8(__A, __B), 1013 (__v64qi)_mm512_setzero_si512()); 1014 } 1015 1016 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1017 _mm512_subs_epu16 (__m512i __A, __m512i __B) 1018 { 1019 return (__m512i)__builtin_ia32_psubusw512((__v32hi) __A, (__v32hi) __B); 1020 } 1021 1022 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1023 _mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1024 { 1025 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1026 (__v32hi)_mm512_subs_epu16(__A, __B), 1027 (__v32hi)__W); 1028 } 1029 1030 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1031 _mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 1032 { 1033 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1034 (__v32hi)_mm512_subs_epu16(__A, __B), 1035 (__v32hi)_mm512_setzero_si512()); 1036 } 1037 1038 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1039 _mm512_permutex2var_epi16(__m512i __A, __m512i __I, __m512i __B) 1040 { 1041 return (__m512i)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I, 1042 (__v32hi)__B); 1043 } 1044 1045 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1046 _mm512_mask_permutex2var_epi16(__m512i __A, __mmask32 __U, __m512i __I, 1047 __m512i __B) 1048 { 1049 return (__m512i)__builtin_ia32_selectw_512(__U, 1050 (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), 1051 (__v32hi)__A); 1052 } 1053 1054 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1055 _mm512_mask2_permutex2var_epi16(__m512i __A, __m512i __I, __mmask32 __U, 1056 __m512i __B) 1057 { 1058 return (__m512i)__builtin_ia32_selectw_512(__U, 1059 (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), 1060 (__v32hi)__I); 1061 } 1062 1063 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1064 _mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I, 1065 __m512i __B) 1066 { 1067 return (__m512i)__builtin_ia32_selectw_512(__U, 1068 (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), 1069 (__v32hi)_mm512_setzero_si512()); 1070 } 1071 1072 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1073 _mm512_mulhrs_epi16(__m512i __A, __m512i __B) 1074 { 1075 return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B); 1076 } 1077 1078 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1079 _mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1080 { 1081 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1082 (__v32hi)_mm512_mulhrs_epi16(__A, __B), 1083 (__v32hi)__W); 1084 } 1085 1086 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1087 _mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1088 { 1089 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1090 (__v32hi)_mm512_mulhrs_epi16(__A, __B), 1091 (__v32hi)_mm512_setzero_si512()); 1092 } 1093 1094 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1095 _mm512_mulhi_epi16(__m512i __A, __m512i __B) 1096 { 1097 return (__m512i)__builtin_ia32_pmulhw512((__v32hi) __A, (__v32hi) __B); 1098 } 1099 1100 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1101 _mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, 1102 __m512i __B) 1103 { 1104 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1105 (__v32hi)_mm512_mulhi_epi16(__A, __B), 1106 (__v32hi)__W); 1107 } 1108 1109 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1110 _mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1111 { 1112 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1113 (__v32hi)_mm512_mulhi_epi16(__A, __B), 1114 (__v32hi)_mm512_setzero_si512()); 1115 } 1116 1117 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1118 _mm512_mulhi_epu16(__m512i __A, __m512i __B) 1119 { 1120 return (__m512i)__builtin_ia32_pmulhuw512((__v32hi) __A, (__v32hi) __B); 1121 } 1122 1123 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1124 _mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1125 { 1126 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1127 (__v32hi)_mm512_mulhi_epu16(__A, __B), 1128 (__v32hi)__W); 1129 } 1130 1131 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1132 _mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 1133 { 1134 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1135 (__v32hi)_mm512_mulhi_epu16(__A, __B), 1136 (__v32hi)_mm512_setzero_si512()); 1137 } 1138 1139 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1140 _mm512_maddubs_epi16(__m512i __X, __m512i __Y) { 1141 return (__m512i)__builtin_ia32_pmaddubsw512((__v64qi)__X, (__v64qi)__Y); 1142 } 1143 1144 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1145 _mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X, 1146 __m512i __Y) { 1147 return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U, 1148 (__v32hi)_mm512_maddubs_epi16(__X, __Y), 1149 (__v32hi)__W); 1150 } 1151 1152 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1153 _mm512_maskz_maddubs_epi16(__mmask32 __U, __m512i __X, __m512i __Y) { 1154 return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U, 1155 (__v32hi)_mm512_maddubs_epi16(__X, __Y), 1156 (__v32hi)_mm512_setzero_si512()); 1157 } 1158 1159 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1160 _mm512_madd_epi16(__m512i __A, __m512i __B) { 1161 return (__m512i)__builtin_ia32_pmaddwd512((__v32hi)__A, (__v32hi)__B); 1162 } 1163 1164 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1165 _mm512_mask_madd_epi16(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { 1166 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 1167 (__v16si)_mm512_madd_epi16(__A, __B), 1168 (__v16si)__W); 1169 } 1170 1171 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1172 _mm512_maskz_madd_epi16(__mmask16 __U, __m512i __A, __m512i __B) { 1173 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 1174 (__v16si)_mm512_madd_epi16(__A, __B), 1175 (__v16si)_mm512_setzero_si512()); 1176 } 1177 1178 static __inline__ __m256i __DEFAULT_FN_ATTRS512 1179 _mm512_cvtsepi16_epi8 (__m512i __A) { 1180 return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, 1181 (__v32qi)_mm256_setzero_si256(), 1182 (__mmask32) -1); 1183 } 1184 1185 static __inline__ __m256i __DEFAULT_FN_ATTRS512 1186 _mm512_mask_cvtsepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { 1187 return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, 1188 (__v32qi)__O, 1189 __M); 1190 } 1191 1192 static __inline__ __m256i __DEFAULT_FN_ATTRS512 1193 _mm512_maskz_cvtsepi16_epi8 (__mmask32 __M, __m512i __A) { 1194 return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, 1195 (__v32qi) _mm256_setzero_si256(), 1196 __M); 1197 } 1198 1199 static __inline__ __m256i __DEFAULT_FN_ATTRS512 1200 _mm512_cvtusepi16_epi8 (__m512i __A) { 1201 return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, 1202 (__v32qi) _mm256_setzero_si256(), 1203 (__mmask32) -1); 1204 } 1205 1206 static __inline__ __m256i __DEFAULT_FN_ATTRS512 1207 _mm512_mask_cvtusepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { 1208 return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, 1209 (__v32qi) __O, 1210 __M); 1211 } 1212 1213 static __inline__ __m256i __DEFAULT_FN_ATTRS512 1214 _mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A) { 1215 return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, 1216 (__v32qi) _mm256_setzero_si256(), 1217 __M); 1218 } 1219 1220 static __inline__ __m256i __DEFAULT_FN_ATTRS512 1221 _mm512_cvtepi16_epi8 (__m512i __A) { 1222 return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, 1223 (__v32qi) _mm256_undefined_si256(), 1224 (__mmask32) -1); 1225 } 1226 1227 static __inline__ __m256i __DEFAULT_FN_ATTRS512 1228 _mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { 1229 return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, 1230 (__v32qi) __O, 1231 __M); 1232 } 1233 1234 static __inline__ __m256i __DEFAULT_FN_ATTRS512 1235 _mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A) { 1236 return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, 1237 (__v32qi) _mm256_setzero_si256(), 1238 __M); 1239 } 1240 1241 static __inline__ void __DEFAULT_FN_ATTRS512 1242 _mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) 1243 { 1244 __builtin_ia32_pmovwb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); 1245 } 1246 1247 static __inline__ void __DEFAULT_FN_ATTRS512 1248 _mm512_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) 1249 { 1250 __builtin_ia32_pmovswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); 1251 } 1252 1253 static __inline__ void __DEFAULT_FN_ATTRS512 1254 _mm512_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) 1255 { 1256 __builtin_ia32_pmovuswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); 1257 } 1258 1259 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1260 _mm512_unpackhi_epi8(__m512i __A, __m512i __B) { 1261 return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B, 1262 8, 64+8, 9, 64+9, 1263 10, 64+10, 11, 64+11, 1264 12, 64+12, 13, 64+13, 1265 14, 64+14, 15, 64+15, 1266 24, 64+24, 25, 64+25, 1267 26, 64+26, 27, 64+27, 1268 28, 64+28, 29, 64+29, 1269 30, 64+30, 31, 64+31, 1270 40, 64+40, 41, 64+41, 1271 42, 64+42, 43, 64+43, 1272 44, 64+44, 45, 64+45, 1273 46, 64+46, 47, 64+47, 1274 56, 64+56, 57, 64+57, 1275 58, 64+58, 59, 64+59, 1276 60, 64+60, 61, 64+61, 1277 62, 64+62, 63, 64+63); 1278 } 1279 1280 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1281 _mm512_mask_unpackhi_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 1282 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1283 (__v64qi)_mm512_unpackhi_epi8(__A, __B), 1284 (__v64qi)__W); 1285 } 1286 1287 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1288 _mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 1289 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1290 (__v64qi)_mm512_unpackhi_epi8(__A, __B), 1291 (__v64qi)_mm512_setzero_si512()); 1292 } 1293 1294 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1295 _mm512_unpackhi_epi16(__m512i __A, __m512i __B) { 1296 return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B, 1297 4, 32+4, 5, 32+5, 1298 6, 32+6, 7, 32+7, 1299 12, 32+12, 13, 32+13, 1300 14, 32+14, 15, 32+15, 1301 20, 32+20, 21, 32+21, 1302 22, 32+22, 23, 32+23, 1303 28, 32+28, 29, 32+29, 1304 30, 32+30, 31, 32+31); 1305 } 1306 1307 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1308 _mm512_mask_unpackhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 1309 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1310 (__v32hi)_mm512_unpackhi_epi16(__A, __B), 1311 (__v32hi)__W); 1312 } 1313 1314 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1315 _mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 1316 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1317 (__v32hi)_mm512_unpackhi_epi16(__A, __B), 1318 (__v32hi)_mm512_setzero_si512()); 1319 } 1320 1321 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1322 _mm512_unpacklo_epi8(__m512i __A, __m512i __B) { 1323 return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B, 1324 0, 64+0, 1, 64+1, 1325 2, 64+2, 3, 64+3, 1326 4, 64+4, 5, 64+5, 1327 6, 64+6, 7, 64+7, 1328 16, 64+16, 17, 64+17, 1329 18, 64+18, 19, 64+19, 1330 20, 64+20, 21, 64+21, 1331 22, 64+22, 23, 64+23, 1332 32, 64+32, 33, 64+33, 1333 34, 64+34, 35, 64+35, 1334 36, 64+36, 37, 64+37, 1335 38, 64+38, 39, 64+39, 1336 48, 64+48, 49, 64+49, 1337 50, 64+50, 51, 64+51, 1338 52, 64+52, 53, 64+53, 1339 54, 64+54, 55, 64+55); 1340 } 1341 1342 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1343 _mm512_mask_unpacklo_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 1344 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1345 (__v64qi)_mm512_unpacklo_epi8(__A, __B), 1346 (__v64qi)__W); 1347 } 1348 1349 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1350 _mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 1351 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1352 (__v64qi)_mm512_unpacklo_epi8(__A, __B), 1353 (__v64qi)_mm512_setzero_si512()); 1354 } 1355 1356 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1357 _mm512_unpacklo_epi16(__m512i __A, __m512i __B) { 1358 return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B, 1359 0, 32+0, 1, 32+1, 1360 2, 32+2, 3, 32+3, 1361 8, 32+8, 9, 32+9, 1362 10, 32+10, 11, 32+11, 1363 16, 32+16, 17, 32+17, 1364 18, 32+18, 19, 32+19, 1365 24, 32+24, 25, 32+25, 1366 26, 32+26, 27, 32+27); 1367 } 1368 1369 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1370 _mm512_mask_unpacklo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 1371 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1372 (__v32hi)_mm512_unpacklo_epi16(__A, __B), 1373 (__v32hi)__W); 1374 } 1375 1376 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1377 _mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 1378 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1379 (__v32hi)_mm512_unpacklo_epi16(__A, __B), 1380 (__v32hi)_mm512_setzero_si512()); 1381 } 1382 1383 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1384 _mm512_cvtepi8_epi16(__m256i __A) 1385 { 1386 /* This function always performs a signed extension, but __v32qi is a char 1387 which may be signed or unsigned, so use __v32qs. */ 1388 return (__m512i)__builtin_convertvector((__v32qs)__A, __v32hi); 1389 } 1390 1391 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1392 _mm512_mask_cvtepi8_epi16(__m512i __W, __mmask32 __U, __m256i __A) 1393 { 1394 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1395 (__v32hi)_mm512_cvtepi8_epi16(__A), 1396 (__v32hi)__W); 1397 } 1398 1399 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1400 _mm512_maskz_cvtepi8_epi16(__mmask32 __U, __m256i __A) 1401 { 1402 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1403 (__v32hi)_mm512_cvtepi8_epi16(__A), 1404 (__v32hi)_mm512_setzero_si512()); 1405 } 1406 1407 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1408 _mm512_cvtepu8_epi16(__m256i __A) 1409 { 1410 return (__m512i)__builtin_convertvector((__v32qu)__A, __v32hi); 1411 } 1412 1413 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1414 _mm512_mask_cvtepu8_epi16(__m512i __W, __mmask32 __U, __m256i __A) 1415 { 1416 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1417 (__v32hi)_mm512_cvtepu8_epi16(__A), 1418 (__v32hi)__W); 1419 } 1420 1421 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1422 _mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A) 1423 { 1424 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1425 (__v32hi)_mm512_cvtepu8_epi16(__A), 1426 (__v32hi)_mm512_setzero_si512()); 1427 } 1428 1429 1430 #define _mm512_shufflehi_epi16(A, imm) \ 1431 (__m512i)__builtin_ia32_pshufhw512((__v32hi)(__m512i)(A), (int)(imm)) 1432 1433 #define _mm512_mask_shufflehi_epi16(W, U, A, imm) \ 1434 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 1435 (__v32hi)_mm512_shufflehi_epi16((A), \ 1436 (imm)), \ 1437 (__v32hi)(__m512i)(W)) 1438 1439 #define _mm512_maskz_shufflehi_epi16(U, A, imm) \ 1440 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 1441 (__v32hi)_mm512_shufflehi_epi16((A), \ 1442 (imm)), \ 1443 (__v32hi)_mm512_setzero_si512()) 1444 1445 #define _mm512_shufflelo_epi16(A, imm) \ 1446 (__m512i)__builtin_ia32_pshuflw512((__v32hi)(__m512i)(A), (int)(imm)) 1447 1448 1449 #define _mm512_mask_shufflelo_epi16(W, U, A, imm) \ 1450 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 1451 (__v32hi)_mm512_shufflelo_epi16((A), \ 1452 (imm)), \ 1453 (__v32hi)(__m512i)(W)) 1454 1455 1456 #define _mm512_maskz_shufflelo_epi16(U, A, imm) \ 1457 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 1458 (__v32hi)_mm512_shufflelo_epi16((A), \ 1459 (imm)), \ 1460 (__v32hi)_mm512_setzero_si512()) 1461 1462 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1463 _mm512_sllv_epi16(__m512i __A, __m512i __B) 1464 { 1465 return (__m512i)__builtin_ia32_psllv32hi((__v32hi) __A, (__v32hi) __B); 1466 } 1467 1468 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1469 _mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1470 { 1471 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1472 (__v32hi)_mm512_sllv_epi16(__A, __B), 1473 (__v32hi)__W); 1474 } 1475 1476 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1477 _mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1478 { 1479 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1480 (__v32hi)_mm512_sllv_epi16(__A, __B), 1481 (__v32hi)_mm512_setzero_si512()); 1482 } 1483 1484 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1485 _mm512_sll_epi16(__m512i __A, __m128i __B) 1486 { 1487 return (__m512i)__builtin_ia32_psllw512((__v32hi) __A, (__v8hi) __B); 1488 } 1489 1490 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1491 _mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) 1492 { 1493 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1494 (__v32hi)_mm512_sll_epi16(__A, __B), 1495 (__v32hi)__W); 1496 } 1497 1498 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1499 _mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B) 1500 { 1501 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1502 (__v32hi)_mm512_sll_epi16(__A, __B), 1503 (__v32hi)_mm512_setzero_si512()); 1504 } 1505 1506 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1507 _mm512_slli_epi16(__m512i __A, unsigned int __B) 1508 { 1509 return (__m512i)__builtin_ia32_psllwi512((__v32hi)__A, __B); 1510 } 1511 1512 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1513 _mm512_mask_slli_epi16(__m512i __W, __mmask32 __U, __m512i __A, 1514 unsigned int __B) 1515 { 1516 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1517 (__v32hi)_mm512_slli_epi16(__A, __B), 1518 (__v32hi)__W); 1519 } 1520 1521 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1522 _mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, unsigned int __B) 1523 { 1524 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1525 (__v32hi)_mm512_slli_epi16(__A, __B), 1526 (__v32hi)_mm512_setzero_si512()); 1527 } 1528 1529 #define _mm512_bslli_epi128(a, imm) \ 1530 (__m512i)__builtin_ia32_pslldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm)) 1531 1532 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1533 _mm512_srlv_epi16(__m512i __A, __m512i __B) 1534 { 1535 return (__m512i)__builtin_ia32_psrlv32hi((__v32hi)__A, (__v32hi)__B); 1536 } 1537 1538 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1539 _mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1540 { 1541 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1542 (__v32hi)_mm512_srlv_epi16(__A, __B), 1543 (__v32hi)__W); 1544 } 1545 1546 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1547 _mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1548 { 1549 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1550 (__v32hi)_mm512_srlv_epi16(__A, __B), 1551 (__v32hi)_mm512_setzero_si512()); 1552 } 1553 1554 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1555 _mm512_srav_epi16(__m512i __A, __m512i __B) 1556 { 1557 return (__m512i)__builtin_ia32_psrav32hi((__v32hi)__A, (__v32hi)__B); 1558 } 1559 1560 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1561 _mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1562 { 1563 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1564 (__v32hi)_mm512_srav_epi16(__A, __B), 1565 (__v32hi)__W); 1566 } 1567 1568 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1569 _mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1570 { 1571 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1572 (__v32hi)_mm512_srav_epi16(__A, __B), 1573 (__v32hi)_mm512_setzero_si512()); 1574 } 1575 1576 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1577 _mm512_sra_epi16(__m512i __A, __m128i __B) 1578 { 1579 return (__m512i)__builtin_ia32_psraw512((__v32hi) __A, (__v8hi) __B); 1580 } 1581 1582 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1583 _mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) 1584 { 1585 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1586 (__v32hi)_mm512_sra_epi16(__A, __B), 1587 (__v32hi)__W); 1588 } 1589 1590 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1591 _mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B) 1592 { 1593 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1594 (__v32hi)_mm512_sra_epi16(__A, __B), 1595 (__v32hi)_mm512_setzero_si512()); 1596 } 1597 1598 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1599 _mm512_srai_epi16(__m512i __A, unsigned int __B) 1600 { 1601 return (__m512i)__builtin_ia32_psrawi512((__v32hi)__A, __B); 1602 } 1603 1604 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1605 _mm512_mask_srai_epi16(__m512i __W, __mmask32 __U, __m512i __A, 1606 unsigned int __B) 1607 { 1608 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1609 (__v32hi)_mm512_srai_epi16(__A, __B), 1610 (__v32hi)__W); 1611 } 1612 1613 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1614 _mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, unsigned int __B) 1615 { 1616 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1617 (__v32hi)_mm512_srai_epi16(__A, __B), 1618 (__v32hi)_mm512_setzero_si512()); 1619 } 1620 1621 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1622 _mm512_srl_epi16(__m512i __A, __m128i __B) 1623 { 1624 return (__m512i)__builtin_ia32_psrlw512((__v32hi) __A, (__v8hi) __B); 1625 } 1626 1627 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1628 _mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) 1629 { 1630 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1631 (__v32hi)_mm512_srl_epi16(__A, __B), 1632 (__v32hi)__W); 1633 } 1634 1635 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1636 _mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B) 1637 { 1638 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1639 (__v32hi)_mm512_srl_epi16(__A, __B), 1640 (__v32hi)_mm512_setzero_si512()); 1641 } 1642 1643 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1644 _mm512_srli_epi16(__m512i __A, unsigned int __B) 1645 { 1646 return (__m512i)__builtin_ia32_psrlwi512((__v32hi)__A, __B); 1647 } 1648 1649 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1650 _mm512_mask_srli_epi16(__m512i __W, __mmask32 __U, __m512i __A, 1651 unsigned int __B) 1652 { 1653 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1654 (__v32hi)_mm512_srli_epi16(__A, __B), 1655 (__v32hi)__W); 1656 } 1657 1658 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1659 _mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B) 1660 { 1661 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1662 (__v32hi)_mm512_srli_epi16(__A, __B), 1663 (__v32hi)_mm512_setzero_si512()); 1664 } 1665 1666 #define _mm512_bsrli_epi128(a, imm) \ 1667 (__m512i)__builtin_ia32_psrldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm)) 1668 1669 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1670 _mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A) 1671 { 1672 return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, 1673 (__v32hi) __A, 1674 (__v32hi) __W); 1675 } 1676 1677 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1678 _mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A) 1679 { 1680 return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, 1681 (__v32hi) __A, 1682 (__v32hi) _mm512_setzero_si512 ()); 1683 } 1684 1685 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1686 _mm512_mask_mov_epi8 (__m512i __W, __mmask64 __U, __m512i __A) 1687 { 1688 return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, 1689 (__v64qi) __A, 1690 (__v64qi) __W); 1691 } 1692 1693 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1694 _mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A) 1695 { 1696 return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, 1697 (__v64qi) __A, 1698 (__v64qi) _mm512_setzero_si512 ()); 1699 } 1700 1701 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1702 _mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A) 1703 { 1704 return (__m512i) __builtin_ia32_selectb_512(__M, 1705 (__v64qi)_mm512_set1_epi8(__A), 1706 (__v64qi) __O); 1707 } 1708 1709 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1710 _mm512_maskz_set1_epi8 (__mmask64 __M, char __A) 1711 { 1712 return (__m512i) __builtin_ia32_selectb_512(__M, 1713 (__v64qi) _mm512_set1_epi8(__A), 1714 (__v64qi) _mm512_setzero_si512()); 1715 } 1716 1717 static __inline__ __mmask64 __DEFAULT_FN_ATTRS 1718 _mm512_kunpackd (__mmask64 __A, __mmask64 __B) 1719 { 1720 return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A, 1721 (__mmask64) __B); 1722 } 1723 1724 static __inline__ __mmask32 __DEFAULT_FN_ATTRS 1725 _mm512_kunpackw (__mmask32 __A, __mmask32 __B) 1726 { 1727 return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, 1728 (__mmask32) __B); 1729 } 1730 1731 static __inline __m512i __DEFAULT_FN_ATTRS512 1732 _mm512_loadu_epi16 (void const *__P) 1733 { 1734 struct __loadu_epi16 { 1735 __m512i_u __v; 1736 } __attribute__((__packed__, __may_alias__)); 1737 return ((const struct __loadu_epi16*)__P)->__v; 1738 } 1739 1740 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1741 _mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P) 1742 { 1743 return (__m512i) __builtin_ia32_loaddquhi512_mask ((const __v32hi *) __P, 1744 (__v32hi) __W, 1745 (__mmask32) __U); 1746 } 1747 1748 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1749 _mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P) 1750 { 1751 return (__m512i) __builtin_ia32_loaddquhi512_mask ((const __v32hi *) __P, 1752 (__v32hi) 1753 _mm512_setzero_si512 (), 1754 (__mmask32) __U); 1755 } 1756 1757 static __inline __m512i __DEFAULT_FN_ATTRS512 1758 _mm512_loadu_epi8 (void const *__P) 1759 { 1760 struct __loadu_epi8 { 1761 __m512i_u __v; 1762 } __attribute__((__packed__, __may_alias__)); 1763 return ((const struct __loadu_epi8*)__P)->__v; 1764 } 1765 1766 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1767 _mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P) 1768 { 1769 return (__m512i) __builtin_ia32_loaddquqi512_mask ((const __v64qi *) __P, 1770 (__v64qi) __W, 1771 (__mmask64) __U); 1772 } 1773 1774 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1775 _mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P) 1776 { 1777 return (__m512i) __builtin_ia32_loaddquqi512_mask ((const __v64qi *) __P, 1778 (__v64qi) 1779 _mm512_setzero_si512 (), 1780 (__mmask64) __U); 1781 } 1782 1783 static __inline void __DEFAULT_FN_ATTRS512 1784 _mm512_storeu_epi16 (void *__P, __m512i __A) 1785 { 1786 struct __storeu_epi16 { 1787 __m512i_u __v; 1788 } __attribute__((__packed__, __may_alias__)); 1789 ((struct __storeu_epi16*)__P)->__v = __A; 1790 } 1791 1792 static __inline__ void __DEFAULT_FN_ATTRS512 1793 _mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A) 1794 { 1795 __builtin_ia32_storedquhi512_mask ((__v32hi *) __P, 1796 (__v32hi) __A, 1797 (__mmask32) __U); 1798 } 1799 1800 static __inline void __DEFAULT_FN_ATTRS512 1801 _mm512_storeu_epi8 (void *__P, __m512i __A) 1802 { 1803 struct __storeu_epi8 { 1804 __m512i_u __v; 1805 } __attribute__((__packed__, __may_alias__)); 1806 ((struct __storeu_epi8*)__P)->__v = __A; 1807 } 1808 1809 static __inline__ void __DEFAULT_FN_ATTRS512 1810 _mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A) 1811 { 1812 __builtin_ia32_storedquqi512_mask ((__v64qi *) __P, 1813 (__v64qi) __A, 1814 (__mmask64) __U); 1815 } 1816 1817 static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 1818 _mm512_test_epi8_mask (__m512i __A, __m512i __B) 1819 { 1820 return _mm512_cmpneq_epi8_mask (_mm512_and_epi32 (__A, __B), 1821 _mm512_setzero_si512()); 1822 } 1823 1824 static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 1825 _mm512_mask_test_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B) 1826 { 1827 return _mm512_mask_cmpneq_epi8_mask (__U, _mm512_and_epi32 (__A, __B), 1828 _mm512_setzero_si512()); 1829 } 1830 1831 static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 1832 _mm512_test_epi16_mask (__m512i __A, __m512i __B) 1833 { 1834 return _mm512_cmpneq_epi16_mask (_mm512_and_epi32 (__A, __B), 1835 _mm512_setzero_si512()); 1836 } 1837 1838 static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 1839 _mm512_mask_test_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B) 1840 { 1841 return _mm512_mask_cmpneq_epi16_mask (__U, _mm512_and_epi32 (__A, __B), 1842 _mm512_setzero_si512()); 1843 } 1844 1845 static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 1846 _mm512_testn_epi8_mask (__m512i __A, __m512i __B) 1847 { 1848 return _mm512_cmpeq_epi8_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); 1849 } 1850 1851 static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 1852 _mm512_mask_testn_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B) 1853 { 1854 return _mm512_mask_cmpeq_epi8_mask (__U, _mm512_and_epi32 (__A, __B), 1855 _mm512_setzero_si512()); 1856 } 1857 1858 static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 1859 _mm512_testn_epi16_mask (__m512i __A, __m512i __B) 1860 { 1861 return _mm512_cmpeq_epi16_mask (_mm512_and_epi32 (__A, __B), 1862 _mm512_setzero_si512()); 1863 } 1864 1865 static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 1866 _mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B) 1867 { 1868 return _mm512_mask_cmpeq_epi16_mask (__U, _mm512_and_epi32 (__A, __B), 1869 _mm512_setzero_si512()); 1870 } 1871 1872 static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 1873 _mm512_movepi8_mask (__m512i __A) 1874 { 1875 return (__mmask64) __builtin_ia32_cvtb2mask512 ((__v64qi) __A); 1876 } 1877 1878 static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 1879 _mm512_movepi16_mask (__m512i __A) 1880 { 1881 return (__mmask32) __builtin_ia32_cvtw2mask512 ((__v32hi) __A); 1882 } 1883 1884 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1885 _mm512_movm_epi8 (__mmask64 __A) 1886 { 1887 return (__m512i) __builtin_ia32_cvtmask2b512 (__A); 1888 } 1889 1890 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1891 _mm512_movm_epi16 (__mmask32 __A) 1892 { 1893 return (__m512i) __builtin_ia32_cvtmask2w512 (__A); 1894 } 1895 1896 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1897 _mm512_broadcastb_epi8 (__m128i __A) 1898 { 1899 return (__m512i)__builtin_shufflevector((__v16qi) __A, (__v16qi) __A, 1900 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1901 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1902 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1903 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); 1904 } 1905 1906 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1907 _mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A) 1908 { 1909 return (__m512i)__builtin_ia32_selectb_512(__M, 1910 (__v64qi) _mm512_broadcastb_epi8(__A), 1911 (__v64qi) __O); 1912 } 1913 1914 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1915 _mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A) 1916 { 1917 return (__m512i)__builtin_ia32_selectb_512(__M, 1918 (__v64qi) _mm512_broadcastb_epi8(__A), 1919 (__v64qi) _mm512_setzero_si512()); 1920 } 1921 1922 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1923 _mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A) 1924 { 1925 return (__m512i) __builtin_ia32_selectw_512(__M, 1926 (__v32hi) _mm512_set1_epi16(__A), 1927 (__v32hi) __O); 1928 } 1929 1930 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1931 _mm512_maskz_set1_epi16 (__mmask32 __M, short __A) 1932 { 1933 return (__m512i) __builtin_ia32_selectw_512(__M, 1934 (__v32hi) _mm512_set1_epi16(__A), 1935 (__v32hi) _mm512_setzero_si512()); 1936 } 1937 1938 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1939 _mm512_broadcastw_epi16 (__m128i __A) 1940 { 1941 return (__m512i)__builtin_shufflevector((__v8hi) __A, (__v8hi) __A, 1942 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1943 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); 1944 } 1945 1946 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1947 _mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A) 1948 { 1949 return (__m512i)__builtin_ia32_selectw_512(__M, 1950 (__v32hi) _mm512_broadcastw_epi16(__A), 1951 (__v32hi) __O); 1952 } 1953 1954 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1955 _mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A) 1956 { 1957 return (__m512i)__builtin_ia32_selectw_512(__M, 1958 (__v32hi) _mm512_broadcastw_epi16(__A), 1959 (__v32hi) _mm512_setzero_si512()); 1960 } 1961 1962 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1963 _mm512_permutexvar_epi16 (__m512i __A, __m512i __B) 1964 { 1965 return (__m512i)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A); 1966 } 1967 1968 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1969 _mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A, 1970 __m512i __B) 1971 { 1972 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 1973 (__v32hi)_mm512_permutexvar_epi16(__A, __B), 1974 (__v32hi)_mm512_setzero_si512()); 1975 } 1976 1977 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1978 _mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A, 1979 __m512i __B) 1980 { 1981 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 1982 (__v32hi)_mm512_permutexvar_epi16(__A, __B), 1983 (__v32hi)__W); 1984 } 1985 1986 #define _mm512_alignr_epi8(A, B, N) \ 1987 (__m512i)__builtin_ia32_palignr512((__v64qi)(__m512i)(A), \ 1988 (__v64qi)(__m512i)(B), (int)(N)) 1989 1990 #define _mm512_mask_alignr_epi8(W, U, A, B, N) \ 1991 (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ 1992 (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \ 1993 (__v64qi)(__m512i)(W)) 1994 1995 #define _mm512_maskz_alignr_epi8(U, A, B, N) \ 1996 (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ 1997 (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \ 1998 (__v64qi)(__m512i)_mm512_setzero_si512()) 1999 2000 #define _mm512_dbsad_epu8(A, B, imm) \ 2001 (__m512i)__builtin_ia32_dbpsadbw512((__v64qi)(__m512i)(A), \ 2002 (__v64qi)(__m512i)(B), (int)(imm)) 2003 2004 #define _mm512_mask_dbsad_epu8(W, U, A, B, imm) \ 2005 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 2006 (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \ 2007 (__v32hi)(__m512i)(W)) 2008 2009 #define _mm512_maskz_dbsad_epu8(U, A, B, imm) \ 2010 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 2011 (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \ 2012 (__v32hi)_mm512_setzero_si512()) 2013 2014 static __inline__ __m512i __DEFAULT_FN_ATTRS512 2015 _mm512_sad_epu8 (__m512i __A, __m512i __B) 2016 { 2017 return (__m512i) __builtin_ia32_psadbw512 ((__v64qi) __A, 2018 (__v64qi) __B); 2019 } 2020 2021 #undef __DEFAULT_FN_ATTRS512 2022 #undef __DEFAULT_FN_ATTRS 2023 2024 #endif 2025