1 /*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------=== 2 * 3 * 4 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 * See https://llvm.org/LICENSE.txt for license information. 6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 * 8 *===-----------------------------------------------------------------------=== 9 */ 10 #ifndef __IMMINTRIN_H 11 #error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead." 12 #endif 13 14 #ifndef __AVX512BWINTRIN_H 15 #define __AVX512BWINTRIN_H 16 17 typedef unsigned int __mmask32; 18 typedef unsigned long long __mmask64; 19 20 /* Define the default attributes for the functions in this file. */ 21 #define __DEFAULT_FN_ATTRS512 \ 22 __attribute__((__always_inline__, __nodebug__, \ 23 __target__("avx512bw,evex512"), __min_vector_width__(512))) 24 #define __DEFAULT_FN_ATTRS \ 25 __attribute__((__always_inline__, __nodebug__, \ 26 __target__("avx512bw,no-evex512"))) 27 28 static __inline __mmask32 __DEFAULT_FN_ATTRS 29 _knot_mask32(__mmask32 __M) 30 { 31 return __builtin_ia32_knotsi(__M); 32 } 33 34 static __inline __mmask64 __DEFAULT_FN_ATTRS _knot_mask64(__mmask64 __M) { 35 return __builtin_ia32_knotdi(__M); 36 } 37 38 static __inline__ __mmask32 __DEFAULT_FN_ATTRS 39 _kand_mask32(__mmask32 __A, __mmask32 __B) 40 { 41 return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B); 42 } 43 44 static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kand_mask64(__mmask64 __A, 45 __mmask64 __B) { 46 return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B); 47 } 48 49 static __inline__ __mmask32 __DEFAULT_FN_ATTRS 50 _kandn_mask32(__mmask32 __A, __mmask32 __B) 51 { 52 return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B); 53 } 54 55 static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kandn_mask64(__mmask64 __A, 56 __mmask64 __B) { 57 return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B); 58 } 59 60 static __inline__ __mmask32 __DEFAULT_FN_ATTRS 61 _kor_mask32(__mmask32 __A, __mmask32 __B) 62 { 63 return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B); 64 } 65 66 static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kor_mask64(__mmask64 __A, 67 __mmask64 __B) { 68 return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B); 69 } 70 71 static __inline__ __mmask32 __DEFAULT_FN_ATTRS 72 _kxnor_mask32(__mmask32 __A, __mmask32 __B) 73 { 74 return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B); 75 } 76 77 static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxnor_mask64(__mmask64 __A, 78 __mmask64 __B) { 79 return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B); 80 } 81 82 static __inline__ __mmask32 __DEFAULT_FN_ATTRS 83 _kxor_mask32(__mmask32 __A, __mmask32 __B) 84 { 85 return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B); 86 } 87 88 static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxor_mask64(__mmask64 __A, 89 __mmask64 __B) { 90 return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B); 91 } 92 93 static __inline__ unsigned char __DEFAULT_FN_ATTRS 94 _kortestc_mask32_u8(__mmask32 __A, __mmask32 __B) 95 { 96 return (unsigned char)__builtin_ia32_kortestcsi(__A, __B); 97 } 98 99 static __inline__ unsigned char __DEFAULT_FN_ATTRS 100 _kortestz_mask32_u8(__mmask32 __A, __mmask32 __B) 101 { 102 return (unsigned char)__builtin_ia32_kortestzsi(__A, __B); 103 } 104 105 static __inline__ unsigned char __DEFAULT_FN_ATTRS 106 _kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) { 107 *__C = (unsigned char)__builtin_ia32_kortestcsi(__A, __B); 108 return (unsigned char)__builtin_ia32_kortestzsi(__A, __B); 109 } 110 111 static __inline__ unsigned char __DEFAULT_FN_ATTRS 112 _kortestc_mask64_u8(__mmask64 __A, __mmask64 __B) { 113 return (unsigned char)__builtin_ia32_kortestcdi(__A, __B); 114 } 115 116 static __inline__ unsigned char __DEFAULT_FN_ATTRS 117 _kortestz_mask64_u8(__mmask64 __A, __mmask64 __B) { 118 return (unsigned char)__builtin_ia32_kortestzdi(__A, __B); 119 } 120 121 static __inline__ unsigned char __DEFAULT_FN_ATTRS 122 _kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) { 123 *__C = (unsigned char)__builtin_ia32_kortestcdi(__A, __B); 124 return (unsigned char)__builtin_ia32_kortestzdi(__A, __B); 125 } 126 127 static __inline__ unsigned char __DEFAULT_FN_ATTRS 128 _ktestc_mask32_u8(__mmask32 __A, __mmask32 __B) 129 { 130 return (unsigned char)__builtin_ia32_ktestcsi(__A, __B); 131 } 132 133 static __inline__ unsigned char __DEFAULT_FN_ATTRS 134 _ktestz_mask32_u8(__mmask32 __A, __mmask32 __B) 135 { 136 return (unsigned char)__builtin_ia32_ktestzsi(__A, __B); 137 } 138 139 static __inline__ unsigned char __DEFAULT_FN_ATTRS 140 _ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) { 141 *__C = (unsigned char)__builtin_ia32_ktestcsi(__A, __B); 142 return (unsigned char)__builtin_ia32_ktestzsi(__A, __B); 143 } 144 145 static __inline__ unsigned char __DEFAULT_FN_ATTRS 146 _ktestc_mask64_u8(__mmask64 __A, __mmask64 __B) { 147 return (unsigned char)__builtin_ia32_ktestcdi(__A, __B); 148 } 149 150 static __inline__ unsigned char __DEFAULT_FN_ATTRS 151 _ktestz_mask64_u8(__mmask64 __A, __mmask64 __B) { 152 return (unsigned char)__builtin_ia32_ktestzdi(__A, __B); 153 } 154 155 static __inline__ unsigned char __DEFAULT_FN_ATTRS 156 _ktest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) { 157 *__C = (unsigned char)__builtin_ia32_ktestcdi(__A, __B); 158 return (unsigned char)__builtin_ia32_ktestzdi(__A, __B); 159 } 160 161 static __inline__ __mmask32 __DEFAULT_FN_ATTRS 162 _kadd_mask32(__mmask32 __A, __mmask32 __B) 163 { 164 return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B); 165 } 166 167 static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kadd_mask64(__mmask64 __A, 168 __mmask64 __B) { 169 return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B); 170 } 171 172 #define _kshiftli_mask32(A, I) \ 173 ((__mmask32)__builtin_ia32_kshiftlisi((__mmask32)(A), (unsigned int)(I))) 174 175 #define _kshiftri_mask32(A, I) \ 176 ((__mmask32)__builtin_ia32_kshiftrisi((__mmask32)(A), (unsigned int)(I))) 177 178 #define _kshiftli_mask64(A, I) \ 179 ((__mmask64)__builtin_ia32_kshiftlidi((__mmask64)(A), (unsigned int)(I))) 180 181 #define _kshiftri_mask64(A, I) \ 182 ((__mmask64)__builtin_ia32_kshiftridi((__mmask64)(A), (unsigned int)(I))) 183 184 static __inline__ unsigned int __DEFAULT_FN_ATTRS 185 _cvtmask32_u32(__mmask32 __A) { 186 return (unsigned int)__builtin_ia32_kmovd((__mmask32)__A); 187 } 188 189 static __inline__ unsigned long long __DEFAULT_FN_ATTRS 190 _cvtmask64_u64(__mmask64 __A) { 191 return (unsigned long long)__builtin_ia32_kmovq((__mmask64)__A); 192 } 193 194 static __inline__ __mmask32 __DEFAULT_FN_ATTRS 195 _cvtu32_mask32(unsigned int __A) { 196 return (__mmask32)__builtin_ia32_kmovd((__mmask32)__A); 197 } 198 199 static __inline__ __mmask64 __DEFAULT_FN_ATTRS 200 _cvtu64_mask64(unsigned long long __A) { 201 return (__mmask64)__builtin_ia32_kmovq((__mmask64)__A); 202 } 203 204 static __inline__ __mmask32 __DEFAULT_FN_ATTRS 205 _load_mask32(__mmask32 *__A) { 206 return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A); 207 } 208 209 static __inline__ __mmask64 __DEFAULT_FN_ATTRS _load_mask64(__mmask64 *__A) { 210 return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A); 211 } 212 213 static __inline__ void __DEFAULT_FN_ATTRS 214 _store_mask32(__mmask32 *__A, __mmask32 __B) { 215 *(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B); 216 } 217 218 static __inline__ void __DEFAULT_FN_ATTRS _store_mask64(__mmask64 *__A, 219 __mmask64 __B) { 220 *(__mmask64 *)__A = __builtin_ia32_kmovq((__mmask64)__B); 221 } 222 223 /* Integer compare */ 224 225 #define _mm512_cmp_epi8_mask(a, b, p) \ 226 ((__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ 227 (__v64qi)(__m512i)(b), (int)(p), \ 228 (__mmask64)-1)) 229 230 #define _mm512_mask_cmp_epi8_mask(m, a, b, p) \ 231 ((__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ 232 (__v64qi)(__m512i)(b), (int)(p), \ 233 (__mmask64)(m))) 234 235 #define _mm512_cmp_epu8_mask(a, b, p) \ 236 ((__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ 237 (__v64qi)(__m512i)(b), (int)(p), \ 238 (__mmask64)-1)) 239 240 #define _mm512_mask_cmp_epu8_mask(m, a, b, p) \ 241 ((__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ 242 (__v64qi)(__m512i)(b), (int)(p), \ 243 (__mmask64)(m))) 244 245 #define _mm512_cmp_epi16_mask(a, b, p) \ 246 ((__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ 247 (__v32hi)(__m512i)(b), (int)(p), \ 248 (__mmask32)-1)) 249 250 #define _mm512_mask_cmp_epi16_mask(m, a, b, p) \ 251 ((__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ 252 (__v32hi)(__m512i)(b), (int)(p), \ 253 (__mmask32)(m))) 254 255 #define _mm512_cmp_epu16_mask(a, b, p) \ 256 ((__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ 257 (__v32hi)(__m512i)(b), (int)(p), \ 258 (__mmask32)-1)) 259 260 #define _mm512_mask_cmp_epu16_mask(m, a, b, p) \ 261 ((__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ 262 (__v32hi)(__m512i)(b), (int)(p), \ 263 (__mmask32)(m))) 264 265 #define _mm512_cmpeq_epi8_mask(A, B) \ 266 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ) 267 #define _mm512_mask_cmpeq_epi8_mask(k, A, B) \ 268 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ) 269 #define _mm512_cmpge_epi8_mask(A, B) \ 270 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GE) 271 #define _mm512_mask_cmpge_epi8_mask(k, A, B) \ 272 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE) 273 #define _mm512_cmpgt_epi8_mask(A, B) \ 274 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GT) 275 #define _mm512_mask_cmpgt_epi8_mask(k, A, B) \ 276 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT) 277 #define _mm512_cmple_epi8_mask(A, B) \ 278 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LE) 279 #define _mm512_mask_cmple_epi8_mask(k, A, B) \ 280 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE) 281 #define _mm512_cmplt_epi8_mask(A, B) \ 282 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LT) 283 #define _mm512_mask_cmplt_epi8_mask(k, A, B) \ 284 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT) 285 #define _mm512_cmpneq_epi8_mask(A, B) \ 286 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_NE) 287 #define _mm512_mask_cmpneq_epi8_mask(k, A, B) \ 288 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE) 289 290 #define _mm512_cmpeq_epu8_mask(A, B) \ 291 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ) 292 #define _mm512_mask_cmpeq_epu8_mask(k, A, B) \ 293 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ) 294 #define _mm512_cmpge_epu8_mask(A, B) \ 295 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GE) 296 #define _mm512_mask_cmpge_epu8_mask(k, A, B) \ 297 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE) 298 #define _mm512_cmpgt_epu8_mask(A, B) \ 299 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GT) 300 #define _mm512_mask_cmpgt_epu8_mask(k, A, B) \ 301 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT) 302 #define _mm512_cmple_epu8_mask(A, B) \ 303 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LE) 304 #define _mm512_mask_cmple_epu8_mask(k, A, B) \ 305 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE) 306 #define _mm512_cmplt_epu8_mask(A, B) \ 307 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LT) 308 #define _mm512_mask_cmplt_epu8_mask(k, A, B) \ 309 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT) 310 #define _mm512_cmpneq_epu8_mask(A, B) \ 311 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_NE) 312 #define _mm512_mask_cmpneq_epu8_mask(k, A, B) \ 313 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE) 314 315 #define _mm512_cmpeq_epi16_mask(A, B) \ 316 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ) 317 #define _mm512_mask_cmpeq_epi16_mask(k, A, B) \ 318 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ) 319 #define _mm512_cmpge_epi16_mask(A, B) \ 320 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GE) 321 #define _mm512_mask_cmpge_epi16_mask(k, A, B) \ 322 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE) 323 #define _mm512_cmpgt_epi16_mask(A, B) \ 324 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GT) 325 #define _mm512_mask_cmpgt_epi16_mask(k, A, B) \ 326 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT) 327 #define _mm512_cmple_epi16_mask(A, B) \ 328 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LE) 329 #define _mm512_mask_cmple_epi16_mask(k, A, B) \ 330 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE) 331 #define _mm512_cmplt_epi16_mask(A, B) \ 332 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LT) 333 #define _mm512_mask_cmplt_epi16_mask(k, A, B) \ 334 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT) 335 #define _mm512_cmpneq_epi16_mask(A, B) \ 336 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_NE) 337 #define _mm512_mask_cmpneq_epi16_mask(k, A, B) \ 338 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE) 339 340 #define _mm512_cmpeq_epu16_mask(A, B) \ 341 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ) 342 #define _mm512_mask_cmpeq_epu16_mask(k, A, B) \ 343 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ) 344 #define _mm512_cmpge_epu16_mask(A, B) \ 345 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GE) 346 #define _mm512_mask_cmpge_epu16_mask(k, A, B) \ 347 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE) 348 #define _mm512_cmpgt_epu16_mask(A, B) \ 349 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GT) 350 #define _mm512_mask_cmpgt_epu16_mask(k, A, B) \ 351 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT) 352 #define _mm512_cmple_epu16_mask(A, B) \ 353 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LE) 354 #define _mm512_mask_cmple_epu16_mask(k, A, B) \ 355 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE) 356 #define _mm512_cmplt_epu16_mask(A, B) \ 357 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LT) 358 #define _mm512_mask_cmplt_epu16_mask(k, A, B) \ 359 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT) 360 #define _mm512_cmpneq_epu16_mask(A, B) \ 361 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_NE) 362 #define _mm512_mask_cmpneq_epu16_mask(k, A, B) \ 363 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) 364 365 static __inline__ __m512i __DEFAULT_FN_ATTRS512 366 _mm512_add_epi8 (__m512i __A, __m512i __B) { 367 return (__m512i) ((__v64qu) __A + (__v64qu) __B); 368 } 369 370 static __inline__ __m512i __DEFAULT_FN_ATTRS512 371 _mm512_mask_add_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 372 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 373 (__v64qi)_mm512_add_epi8(__A, __B), 374 (__v64qi)__W); 375 } 376 377 static __inline__ __m512i __DEFAULT_FN_ATTRS512 378 _mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 379 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 380 (__v64qi)_mm512_add_epi8(__A, __B), 381 (__v64qi)_mm512_setzero_si512()); 382 } 383 384 static __inline__ __m512i __DEFAULT_FN_ATTRS512 385 _mm512_sub_epi8 (__m512i __A, __m512i __B) { 386 return (__m512i) ((__v64qu) __A - (__v64qu) __B); 387 } 388 389 static __inline__ __m512i __DEFAULT_FN_ATTRS512 390 _mm512_mask_sub_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 391 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 392 (__v64qi)_mm512_sub_epi8(__A, __B), 393 (__v64qi)__W); 394 } 395 396 static __inline__ __m512i __DEFAULT_FN_ATTRS512 397 _mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 398 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 399 (__v64qi)_mm512_sub_epi8(__A, __B), 400 (__v64qi)_mm512_setzero_si512()); 401 } 402 403 static __inline__ __m512i __DEFAULT_FN_ATTRS512 404 _mm512_add_epi16 (__m512i __A, __m512i __B) { 405 return (__m512i) ((__v32hu) __A + (__v32hu) __B); 406 } 407 408 static __inline__ __m512i __DEFAULT_FN_ATTRS512 409 _mm512_mask_add_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 410 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 411 (__v32hi)_mm512_add_epi16(__A, __B), 412 (__v32hi)__W); 413 } 414 415 static __inline__ __m512i __DEFAULT_FN_ATTRS512 416 _mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 417 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 418 (__v32hi)_mm512_add_epi16(__A, __B), 419 (__v32hi)_mm512_setzero_si512()); 420 } 421 422 static __inline__ __m512i __DEFAULT_FN_ATTRS512 423 _mm512_sub_epi16 (__m512i __A, __m512i __B) { 424 return (__m512i) ((__v32hu) __A - (__v32hu) __B); 425 } 426 427 static __inline__ __m512i __DEFAULT_FN_ATTRS512 428 _mm512_mask_sub_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 429 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 430 (__v32hi)_mm512_sub_epi16(__A, __B), 431 (__v32hi)__W); 432 } 433 434 static __inline__ __m512i __DEFAULT_FN_ATTRS512 435 _mm512_maskz_sub_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 436 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 437 (__v32hi)_mm512_sub_epi16(__A, __B), 438 (__v32hi)_mm512_setzero_si512()); 439 } 440 441 static __inline__ __m512i __DEFAULT_FN_ATTRS512 442 _mm512_mullo_epi16 (__m512i __A, __m512i __B) { 443 return (__m512i) ((__v32hu) __A * (__v32hu) __B); 444 } 445 446 static __inline__ __m512i __DEFAULT_FN_ATTRS512 447 _mm512_mask_mullo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 448 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 449 (__v32hi)_mm512_mullo_epi16(__A, __B), 450 (__v32hi)__W); 451 } 452 453 static __inline__ __m512i __DEFAULT_FN_ATTRS512 454 _mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 455 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 456 (__v32hi)_mm512_mullo_epi16(__A, __B), 457 (__v32hi)_mm512_setzero_si512()); 458 } 459 460 static __inline__ __m512i __DEFAULT_FN_ATTRS512 461 _mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W) 462 { 463 return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, 464 (__v64qi) __W, 465 (__v64qi) __A); 466 } 467 468 static __inline__ __m512i __DEFAULT_FN_ATTRS512 469 _mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W) 470 { 471 return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, 472 (__v32hi) __W, 473 (__v32hi) __A); 474 } 475 476 static __inline__ __m512i __DEFAULT_FN_ATTRS512 477 _mm512_abs_epi8 (__m512i __A) 478 { 479 return (__m512i)__builtin_elementwise_abs((__v64qs)__A); 480 } 481 482 static __inline__ __m512i __DEFAULT_FN_ATTRS512 483 _mm512_mask_abs_epi8 (__m512i __W, __mmask64 __U, __m512i __A) 484 { 485 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 486 (__v64qi)_mm512_abs_epi8(__A), 487 (__v64qi)__W); 488 } 489 490 static __inline__ __m512i __DEFAULT_FN_ATTRS512 491 _mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A) 492 { 493 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 494 (__v64qi)_mm512_abs_epi8(__A), 495 (__v64qi)_mm512_setzero_si512()); 496 } 497 498 static __inline__ __m512i __DEFAULT_FN_ATTRS512 499 _mm512_abs_epi16 (__m512i __A) 500 { 501 return (__m512i)__builtin_elementwise_abs((__v32hi)__A); 502 } 503 504 static __inline__ __m512i __DEFAULT_FN_ATTRS512 505 _mm512_mask_abs_epi16 (__m512i __W, __mmask32 __U, __m512i __A) 506 { 507 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 508 (__v32hi)_mm512_abs_epi16(__A), 509 (__v32hi)__W); 510 } 511 512 static __inline__ __m512i __DEFAULT_FN_ATTRS512 513 _mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A) 514 { 515 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 516 (__v32hi)_mm512_abs_epi16(__A), 517 (__v32hi)_mm512_setzero_si512()); 518 } 519 520 static __inline__ __m512i __DEFAULT_FN_ATTRS512 521 _mm512_packs_epi32(__m512i __A, __m512i __B) 522 { 523 return (__m512i)__builtin_ia32_packssdw512((__v16si)__A, (__v16si)__B); 524 } 525 526 static __inline__ __m512i __DEFAULT_FN_ATTRS512 527 _mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B) 528 { 529 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 530 (__v32hi)_mm512_packs_epi32(__A, __B), 531 (__v32hi)_mm512_setzero_si512()); 532 } 533 534 static __inline__ __m512i __DEFAULT_FN_ATTRS512 535 _mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 536 { 537 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 538 (__v32hi)_mm512_packs_epi32(__A, __B), 539 (__v32hi)__W); 540 } 541 542 static __inline__ __m512i __DEFAULT_FN_ATTRS512 543 _mm512_packs_epi16(__m512i __A, __m512i __B) 544 { 545 return (__m512i)__builtin_ia32_packsswb512((__v32hi)__A, (__v32hi) __B); 546 } 547 548 static __inline__ __m512i __DEFAULT_FN_ATTRS512 549 _mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 550 { 551 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 552 (__v64qi)_mm512_packs_epi16(__A, __B), 553 (__v64qi)__W); 554 } 555 556 static __inline__ __m512i __DEFAULT_FN_ATTRS512 557 _mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B) 558 { 559 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 560 (__v64qi)_mm512_packs_epi16(__A, __B), 561 (__v64qi)_mm512_setzero_si512()); 562 } 563 564 static __inline__ __m512i __DEFAULT_FN_ATTRS512 565 _mm512_packus_epi32(__m512i __A, __m512i __B) 566 { 567 return (__m512i)__builtin_ia32_packusdw512((__v16si) __A, (__v16si) __B); 568 } 569 570 static __inline__ __m512i __DEFAULT_FN_ATTRS512 571 _mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B) 572 { 573 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 574 (__v32hi)_mm512_packus_epi32(__A, __B), 575 (__v32hi)_mm512_setzero_si512()); 576 } 577 578 static __inline__ __m512i __DEFAULT_FN_ATTRS512 579 _mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 580 { 581 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 582 (__v32hi)_mm512_packus_epi32(__A, __B), 583 (__v32hi)__W); 584 } 585 586 static __inline__ __m512i __DEFAULT_FN_ATTRS512 587 _mm512_packus_epi16(__m512i __A, __m512i __B) 588 { 589 return (__m512i)__builtin_ia32_packuswb512((__v32hi) __A, (__v32hi) __B); 590 } 591 592 static __inline__ __m512i __DEFAULT_FN_ATTRS512 593 _mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 594 { 595 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 596 (__v64qi)_mm512_packus_epi16(__A, __B), 597 (__v64qi)__W); 598 } 599 600 static __inline__ __m512i __DEFAULT_FN_ATTRS512 601 _mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B) 602 { 603 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 604 (__v64qi)_mm512_packus_epi16(__A, __B), 605 (__v64qi)_mm512_setzero_si512()); 606 } 607 608 static __inline__ __m512i __DEFAULT_FN_ATTRS512 609 _mm512_adds_epi8 (__m512i __A, __m512i __B) 610 { 611 return (__m512i)__builtin_elementwise_add_sat((__v64qs)__A, (__v64qs)__B); 612 } 613 614 static __inline__ __m512i __DEFAULT_FN_ATTRS512 615 _mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 616 { 617 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 618 (__v64qi)_mm512_adds_epi8(__A, __B), 619 (__v64qi)__W); 620 } 621 622 static __inline__ __m512i __DEFAULT_FN_ATTRS512 623 _mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B) 624 { 625 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 626 (__v64qi)_mm512_adds_epi8(__A, __B), 627 (__v64qi)_mm512_setzero_si512()); 628 } 629 630 static __inline__ __m512i __DEFAULT_FN_ATTRS512 631 _mm512_adds_epi16 (__m512i __A, __m512i __B) 632 { 633 return (__m512i)__builtin_elementwise_add_sat((__v32hi)__A, (__v32hi)__B); 634 } 635 636 static __inline__ __m512i __DEFAULT_FN_ATTRS512 637 _mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 638 { 639 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 640 (__v32hi)_mm512_adds_epi16(__A, __B), 641 (__v32hi)__W); 642 } 643 644 static __inline__ __m512i __DEFAULT_FN_ATTRS512 645 _mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B) 646 { 647 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 648 (__v32hi)_mm512_adds_epi16(__A, __B), 649 (__v32hi)_mm512_setzero_si512()); 650 } 651 652 static __inline__ __m512i __DEFAULT_FN_ATTRS512 653 _mm512_adds_epu8 (__m512i __A, __m512i __B) 654 { 655 return (__m512i)__builtin_elementwise_add_sat((__v64qu) __A, (__v64qu) __B); 656 } 657 658 static __inline__ __m512i __DEFAULT_FN_ATTRS512 659 _mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 660 { 661 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 662 (__v64qi)_mm512_adds_epu8(__A, __B), 663 (__v64qi)__W); 664 } 665 666 static __inline__ __m512i __DEFAULT_FN_ATTRS512 667 _mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B) 668 { 669 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 670 (__v64qi)_mm512_adds_epu8(__A, __B), 671 (__v64qi)_mm512_setzero_si512()); 672 } 673 674 static __inline__ __m512i __DEFAULT_FN_ATTRS512 675 _mm512_adds_epu16 (__m512i __A, __m512i __B) 676 { 677 return (__m512i)__builtin_elementwise_add_sat((__v32hu) __A, (__v32hu) __B); 678 } 679 680 static __inline__ __m512i __DEFAULT_FN_ATTRS512 681 _mm512_mask_adds_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 682 { 683 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 684 (__v32hi)_mm512_adds_epu16(__A, __B), 685 (__v32hi)__W); 686 } 687 688 static __inline__ __m512i __DEFAULT_FN_ATTRS512 689 _mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 690 { 691 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 692 (__v32hi)_mm512_adds_epu16(__A, __B), 693 (__v32hi)_mm512_setzero_si512()); 694 } 695 696 static __inline__ __m512i __DEFAULT_FN_ATTRS512 697 _mm512_avg_epu8 (__m512i __A, __m512i __B) 698 { 699 return (__m512i)__builtin_ia32_pavgb512((__v64qi)__A, (__v64qi)__B); 700 } 701 702 static __inline__ __m512i __DEFAULT_FN_ATTRS512 703 _mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A, 704 __m512i __B) 705 { 706 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 707 (__v64qi)_mm512_avg_epu8(__A, __B), 708 (__v64qi)__W); 709 } 710 711 static __inline__ __m512i __DEFAULT_FN_ATTRS512 712 _mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B) 713 { 714 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 715 (__v64qi)_mm512_avg_epu8(__A, __B), 716 (__v64qi)_mm512_setzero_si512()); 717 } 718 719 static __inline__ __m512i __DEFAULT_FN_ATTRS512 720 _mm512_avg_epu16 (__m512i __A, __m512i __B) 721 { 722 return (__m512i)__builtin_ia32_pavgw512((__v32hi)__A, (__v32hi)__B); 723 } 724 725 static __inline__ __m512i __DEFAULT_FN_ATTRS512 726 _mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A, 727 __m512i __B) 728 { 729 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 730 (__v32hi)_mm512_avg_epu16(__A, __B), 731 (__v32hi)__W); 732 } 733 734 static __inline__ __m512i __DEFAULT_FN_ATTRS512 735 _mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 736 { 737 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 738 (__v32hi)_mm512_avg_epu16(__A, __B), 739 (__v32hi) _mm512_setzero_si512()); 740 } 741 742 static __inline__ __m512i __DEFAULT_FN_ATTRS512 743 _mm512_max_epi8 (__m512i __A, __m512i __B) 744 { 745 return (__m512i)__builtin_elementwise_max((__v64qs) __A, (__v64qs) __B); 746 } 747 748 static __inline__ __m512i __DEFAULT_FN_ATTRS512 749 _mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B) 750 { 751 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 752 (__v64qi)_mm512_max_epi8(__A, __B), 753 (__v64qi)_mm512_setzero_si512()); 754 } 755 756 static __inline__ __m512i __DEFAULT_FN_ATTRS512 757 _mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 758 { 759 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 760 (__v64qi)_mm512_max_epi8(__A, __B), 761 (__v64qi)__W); 762 } 763 764 static __inline__ __m512i __DEFAULT_FN_ATTRS512 765 _mm512_max_epi16 (__m512i __A, __m512i __B) 766 { 767 return (__m512i)__builtin_elementwise_max((__v32hi) __A, (__v32hi) __B); 768 } 769 770 static __inline__ __m512i __DEFAULT_FN_ATTRS512 771 _mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B) 772 { 773 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 774 (__v32hi)_mm512_max_epi16(__A, __B), 775 (__v32hi)_mm512_setzero_si512()); 776 } 777 778 static __inline__ __m512i __DEFAULT_FN_ATTRS512 779 _mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A, 780 __m512i __B) 781 { 782 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 783 (__v32hi)_mm512_max_epi16(__A, __B), 784 (__v32hi)__W); 785 } 786 787 static __inline__ __m512i __DEFAULT_FN_ATTRS512 788 _mm512_max_epu8 (__m512i __A, __m512i __B) 789 { 790 return (__m512i)__builtin_elementwise_max((__v64qu)__A, (__v64qu)__B); 791 } 792 793 static __inline__ __m512i __DEFAULT_FN_ATTRS512 794 _mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B) 795 { 796 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 797 (__v64qi)_mm512_max_epu8(__A, __B), 798 (__v64qi)_mm512_setzero_si512()); 799 } 800 801 static __inline__ __m512i __DEFAULT_FN_ATTRS512 802 _mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 803 { 804 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 805 (__v64qi)_mm512_max_epu8(__A, __B), 806 (__v64qi)__W); 807 } 808 809 static __inline__ __m512i __DEFAULT_FN_ATTRS512 810 _mm512_max_epu16 (__m512i __A, __m512i __B) 811 { 812 return (__m512i)__builtin_elementwise_max((__v32hu)__A, (__v32hu)__B); 813 } 814 815 static __inline__ __m512i __DEFAULT_FN_ATTRS512 816 _mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B) 817 { 818 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 819 (__v32hi)_mm512_max_epu16(__A, __B), 820 (__v32hi)_mm512_setzero_si512()); 821 } 822 823 static __inline__ __m512i __DEFAULT_FN_ATTRS512 824 _mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 825 { 826 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 827 (__v32hi)_mm512_max_epu16(__A, __B), 828 (__v32hi)__W); 829 } 830 831 static __inline__ __m512i __DEFAULT_FN_ATTRS512 832 _mm512_min_epi8 (__m512i __A, __m512i __B) 833 { 834 return (__m512i)__builtin_elementwise_min((__v64qs) __A, (__v64qs) __B); 835 } 836 837 static __inline__ __m512i __DEFAULT_FN_ATTRS512 838 _mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B) 839 { 840 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 841 (__v64qi)_mm512_min_epi8(__A, __B), 842 (__v64qi)_mm512_setzero_si512()); 843 } 844 845 static __inline__ __m512i __DEFAULT_FN_ATTRS512 846 _mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 847 { 848 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 849 (__v64qi)_mm512_min_epi8(__A, __B), 850 (__v64qi)__W); 851 } 852 853 static __inline__ __m512i __DEFAULT_FN_ATTRS512 854 _mm512_min_epi16 (__m512i __A, __m512i __B) 855 { 856 return (__m512i)__builtin_elementwise_min((__v32hi) __A, (__v32hi) __B); 857 } 858 859 static __inline__ __m512i __DEFAULT_FN_ATTRS512 860 _mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B) 861 { 862 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 863 (__v32hi)_mm512_min_epi16(__A, __B), 864 (__v32hi)_mm512_setzero_si512()); 865 } 866 867 static __inline__ __m512i __DEFAULT_FN_ATTRS512 868 _mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 869 { 870 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 871 (__v32hi)_mm512_min_epi16(__A, __B), 872 (__v32hi)__W); 873 } 874 875 static __inline__ __m512i __DEFAULT_FN_ATTRS512 876 _mm512_min_epu8 (__m512i __A, __m512i __B) 877 { 878 return (__m512i)__builtin_elementwise_min((__v64qu)__A, (__v64qu)__B); 879 } 880 881 static __inline__ __m512i __DEFAULT_FN_ATTRS512 882 _mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B) 883 { 884 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 885 (__v64qi)_mm512_min_epu8(__A, __B), 886 (__v64qi)_mm512_setzero_si512()); 887 } 888 889 static __inline__ __m512i __DEFAULT_FN_ATTRS512 890 _mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 891 { 892 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 893 (__v64qi)_mm512_min_epu8(__A, __B), 894 (__v64qi)__W); 895 } 896 897 static __inline__ __m512i __DEFAULT_FN_ATTRS512 898 _mm512_min_epu16 (__m512i __A, __m512i __B) 899 { 900 return (__m512i)__builtin_elementwise_min((__v32hu)__A, (__v32hu)__B); 901 } 902 903 static __inline__ __m512i __DEFAULT_FN_ATTRS512 904 _mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B) 905 { 906 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 907 (__v32hi)_mm512_min_epu16(__A, __B), 908 (__v32hi)_mm512_setzero_si512()); 909 } 910 911 static __inline__ __m512i __DEFAULT_FN_ATTRS512 912 _mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 913 { 914 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 915 (__v32hi)_mm512_min_epu16(__A, __B), 916 (__v32hi)__W); 917 } 918 919 static __inline__ __m512i __DEFAULT_FN_ATTRS512 920 _mm512_shuffle_epi8(__m512i __A, __m512i __B) 921 { 922 return (__m512i)__builtin_ia32_pshufb512((__v64qi)__A,(__v64qi)__B); 923 } 924 925 static __inline__ __m512i __DEFAULT_FN_ATTRS512 926 _mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 927 { 928 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 929 (__v64qi)_mm512_shuffle_epi8(__A, __B), 930 (__v64qi)__W); 931 } 932 933 static __inline__ __m512i __DEFAULT_FN_ATTRS512 934 _mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B) 935 { 936 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 937 (__v64qi)_mm512_shuffle_epi8(__A, __B), 938 (__v64qi)_mm512_setzero_si512()); 939 } 940 941 static __inline__ __m512i __DEFAULT_FN_ATTRS512 942 _mm512_subs_epi8 (__m512i __A, __m512i __B) 943 { 944 return (__m512i)__builtin_elementwise_sub_sat((__v64qs)__A, (__v64qs)__B); 945 } 946 947 static __inline__ __m512i __DEFAULT_FN_ATTRS512 948 _mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 949 { 950 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 951 (__v64qi)_mm512_subs_epi8(__A, __B), 952 (__v64qi)__W); 953 } 954 955 static __inline__ __m512i __DEFAULT_FN_ATTRS512 956 _mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B) 957 { 958 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 959 (__v64qi)_mm512_subs_epi8(__A, __B), 960 (__v64qi)_mm512_setzero_si512()); 961 } 962 963 static __inline__ __m512i __DEFAULT_FN_ATTRS512 964 _mm512_subs_epi16 (__m512i __A, __m512i __B) 965 { 966 return (__m512i)__builtin_elementwise_sub_sat((__v32hi)__A, (__v32hi)__B); 967 } 968 969 static __inline__ __m512i __DEFAULT_FN_ATTRS512 970 _mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 971 { 972 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 973 (__v32hi)_mm512_subs_epi16(__A, __B), 974 (__v32hi)__W); 975 } 976 977 static __inline__ __m512i __DEFAULT_FN_ATTRS512 978 _mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B) 979 { 980 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 981 (__v32hi)_mm512_subs_epi16(__A, __B), 982 (__v32hi)_mm512_setzero_si512()); 983 } 984 985 static __inline__ __m512i __DEFAULT_FN_ATTRS512 986 _mm512_subs_epu8 (__m512i __A, __m512i __B) 987 { 988 return (__m512i)__builtin_elementwise_sub_sat((__v64qu) __A, (__v64qu) __B); 989 } 990 991 static __inline__ __m512i __DEFAULT_FN_ATTRS512 992 _mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 993 { 994 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 995 (__v64qi)_mm512_subs_epu8(__A, __B), 996 (__v64qi)__W); 997 } 998 999 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1000 _mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B) 1001 { 1002 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1003 (__v64qi)_mm512_subs_epu8(__A, __B), 1004 (__v64qi)_mm512_setzero_si512()); 1005 } 1006 1007 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1008 _mm512_subs_epu16 (__m512i __A, __m512i __B) 1009 { 1010 return (__m512i)__builtin_elementwise_sub_sat((__v32hu) __A, (__v32hu) __B); 1011 } 1012 1013 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1014 _mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1015 { 1016 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1017 (__v32hi)_mm512_subs_epu16(__A, __B), 1018 (__v32hi)__W); 1019 } 1020 1021 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1022 _mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 1023 { 1024 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1025 (__v32hi)_mm512_subs_epu16(__A, __B), 1026 (__v32hi)_mm512_setzero_si512()); 1027 } 1028 1029 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1030 _mm512_permutex2var_epi16(__m512i __A, __m512i __I, __m512i __B) 1031 { 1032 return (__m512i)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I, 1033 (__v32hi)__B); 1034 } 1035 1036 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1037 _mm512_mask_permutex2var_epi16(__m512i __A, __mmask32 __U, __m512i __I, 1038 __m512i __B) 1039 { 1040 return (__m512i)__builtin_ia32_selectw_512(__U, 1041 (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), 1042 (__v32hi)__A); 1043 } 1044 1045 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1046 _mm512_mask2_permutex2var_epi16(__m512i __A, __m512i __I, __mmask32 __U, 1047 __m512i __B) 1048 { 1049 return (__m512i)__builtin_ia32_selectw_512(__U, 1050 (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), 1051 (__v32hi)__I); 1052 } 1053 1054 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1055 _mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I, 1056 __m512i __B) 1057 { 1058 return (__m512i)__builtin_ia32_selectw_512(__U, 1059 (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), 1060 (__v32hi)_mm512_setzero_si512()); 1061 } 1062 1063 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1064 _mm512_mulhrs_epi16(__m512i __A, __m512i __B) 1065 { 1066 return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B); 1067 } 1068 1069 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1070 _mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1071 { 1072 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1073 (__v32hi)_mm512_mulhrs_epi16(__A, __B), 1074 (__v32hi)__W); 1075 } 1076 1077 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1078 _mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1079 { 1080 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1081 (__v32hi)_mm512_mulhrs_epi16(__A, __B), 1082 (__v32hi)_mm512_setzero_si512()); 1083 } 1084 1085 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1086 _mm512_mulhi_epi16(__m512i __A, __m512i __B) 1087 { 1088 return (__m512i)__builtin_ia32_pmulhw512((__v32hi) __A, (__v32hi) __B); 1089 } 1090 1091 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1092 _mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, 1093 __m512i __B) 1094 { 1095 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1096 (__v32hi)_mm512_mulhi_epi16(__A, __B), 1097 (__v32hi)__W); 1098 } 1099 1100 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1101 _mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1102 { 1103 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1104 (__v32hi)_mm512_mulhi_epi16(__A, __B), 1105 (__v32hi)_mm512_setzero_si512()); 1106 } 1107 1108 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1109 _mm512_mulhi_epu16(__m512i __A, __m512i __B) 1110 { 1111 return (__m512i)__builtin_ia32_pmulhuw512((__v32hi) __A, (__v32hi) __B); 1112 } 1113 1114 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1115 _mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1116 { 1117 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1118 (__v32hi)_mm512_mulhi_epu16(__A, __B), 1119 (__v32hi)__W); 1120 } 1121 1122 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1123 _mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 1124 { 1125 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1126 (__v32hi)_mm512_mulhi_epu16(__A, __B), 1127 (__v32hi)_mm512_setzero_si512()); 1128 } 1129 1130 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1131 _mm512_maddubs_epi16(__m512i __X, __m512i __Y) { 1132 return (__m512i)__builtin_ia32_pmaddubsw512((__v64qi)__X, (__v64qi)__Y); 1133 } 1134 1135 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1136 _mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X, 1137 __m512i __Y) { 1138 return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U, 1139 (__v32hi)_mm512_maddubs_epi16(__X, __Y), 1140 (__v32hi)__W); 1141 } 1142 1143 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1144 _mm512_maskz_maddubs_epi16(__mmask32 __U, __m512i __X, __m512i __Y) { 1145 return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U, 1146 (__v32hi)_mm512_maddubs_epi16(__X, __Y), 1147 (__v32hi)_mm512_setzero_si512()); 1148 } 1149 1150 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1151 _mm512_madd_epi16(__m512i __A, __m512i __B) { 1152 return (__m512i)__builtin_ia32_pmaddwd512((__v32hi)__A, (__v32hi)__B); 1153 } 1154 1155 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1156 _mm512_mask_madd_epi16(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { 1157 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 1158 (__v16si)_mm512_madd_epi16(__A, __B), 1159 (__v16si)__W); 1160 } 1161 1162 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1163 _mm512_maskz_madd_epi16(__mmask16 __U, __m512i __A, __m512i __B) { 1164 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 1165 (__v16si)_mm512_madd_epi16(__A, __B), 1166 (__v16si)_mm512_setzero_si512()); 1167 } 1168 1169 static __inline__ __m256i __DEFAULT_FN_ATTRS512 1170 _mm512_cvtsepi16_epi8 (__m512i __A) { 1171 return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, 1172 (__v32qi)_mm256_setzero_si256(), 1173 (__mmask32) -1); 1174 } 1175 1176 static __inline__ __m256i __DEFAULT_FN_ATTRS512 1177 _mm512_mask_cvtsepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { 1178 return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, 1179 (__v32qi)__O, 1180 __M); 1181 } 1182 1183 static __inline__ __m256i __DEFAULT_FN_ATTRS512 1184 _mm512_maskz_cvtsepi16_epi8 (__mmask32 __M, __m512i __A) { 1185 return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, 1186 (__v32qi) _mm256_setzero_si256(), 1187 __M); 1188 } 1189 1190 static __inline__ __m256i __DEFAULT_FN_ATTRS512 1191 _mm512_cvtusepi16_epi8 (__m512i __A) { 1192 return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, 1193 (__v32qi) _mm256_setzero_si256(), 1194 (__mmask32) -1); 1195 } 1196 1197 static __inline__ __m256i __DEFAULT_FN_ATTRS512 1198 _mm512_mask_cvtusepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { 1199 return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, 1200 (__v32qi) __O, 1201 __M); 1202 } 1203 1204 static __inline__ __m256i __DEFAULT_FN_ATTRS512 1205 _mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A) { 1206 return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, 1207 (__v32qi) _mm256_setzero_si256(), 1208 __M); 1209 } 1210 1211 static __inline__ __m256i __DEFAULT_FN_ATTRS512 1212 _mm512_cvtepi16_epi8 (__m512i __A) { 1213 return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, 1214 (__v32qi) _mm256_undefined_si256(), 1215 (__mmask32) -1); 1216 } 1217 1218 static __inline__ __m256i __DEFAULT_FN_ATTRS512 1219 _mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { 1220 return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, 1221 (__v32qi) __O, 1222 __M); 1223 } 1224 1225 static __inline__ __m256i __DEFAULT_FN_ATTRS512 1226 _mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A) { 1227 return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, 1228 (__v32qi) _mm256_setzero_si256(), 1229 __M); 1230 } 1231 1232 static __inline__ void __DEFAULT_FN_ATTRS512 1233 _mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) 1234 { 1235 __builtin_ia32_pmovwb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); 1236 } 1237 1238 static __inline__ void __DEFAULT_FN_ATTRS512 1239 _mm512_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) 1240 { 1241 __builtin_ia32_pmovswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); 1242 } 1243 1244 static __inline__ void __DEFAULT_FN_ATTRS512 1245 _mm512_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) 1246 { 1247 __builtin_ia32_pmovuswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); 1248 } 1249 1250 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1251 _mm512_unpackhi_epi8(__m512i __A, __m512i __B) { 1252 return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B, 1253 8, 64+8, 9, 64+9, 1254 10, 64+10, 11, 64+11, 1255 12, 64+12, 13, 64+13, 1256 14, 64+14, 15, 64+15, 1257 24, 64+24, 25, 64+25, 1258 26, 64+26, 27, 64+27, 1259 28, 64+28, 29, 64+29, 1260 30, 64+30, 31, 64+31, 1261 40, 64+40, 41, 64+41, 1262 42, 64+42, 43, 64+43, 1263 44, 64+44, 45, 64+45, 1264 46, 64+46, 47, 64+47, 1265 56, 64+56, 57, 64+57, 1266 58, 64+58, 59, 64+59, 1267 60, 64+60, 61, 64+61, 1268 62, 64+62, 63, 64+63); 1269 } 1270 1271 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1272 _mm512_mask_unpackhi_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 1273 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1274 (__v64qi)_mm512_unpackhi_epi8(__A, __B), 1275 (__v64qi)__W); 1276 } 1277 1278 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1279 _mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 1280 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1281 (__v64qi)_mm512_unpackhi_epi8(__A, __B), 1282 (__v64qi)_mm512_setzero_si512()); 1283 } 1284 1285 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1286 _mm512_unpackhi_epi16(__m512i __A, __m512i __B) { 1287 return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B, 1288 4, 32+4, 5, 32+5, 1289 6, 32+6, 7, 32+7, 1290 12, 32+12, 13, 32+13, 1291 14, 32+14, 15, 32+15, 1292 20, 32+20, 21, 32+21, 1293 22, 32+22, 23, 32+23, 1294 28, 32+28, 29, 32+29, 1295 30, 32+30, 31, 32+31); 1296 } 1297 1298 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1299 _mm512_mask_unpackhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 1300 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1301 (__v32hi)_mm512_unpackhi_epi16(__A, __B), 1302 (__v32hi)__W); 1303 } 1304 1305 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1306 _mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 1307 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1308 (__v32hi)_mm512_unpackhi_epi16(__A, __B), 1309 (__v32hi)_mm512_setzero_si512()); 1310 } 1311 1312 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1313 _mm512_unpacklo_epi8(__m512i __A, __m512i __B) { 1314 return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B, 1315 0, 64+0, 1, 64+1, 1316 2, 64+2, 3, 64+3, 1317 4, 64+4, 5, 64+5, 1318 6, 64+6, 7, 64+7, 1319 16, 64+16, 17, 64+17, 1320 18, 64+18, 19, 64+19, 1321 20, 64+20, 21, 64+21, 1322 22, 64+22, 23, 64+23, 1323 32, 64+32, 33, 64+33, 1324 34, 64+34, 35, 64+35, 1325 36, 64+36, 37, 64+37, 1326 38, 64+38, 39, 64+39, 1327 48, 64+48, 49, 64+49, 1328 50, 64+50, 51, 64+51, 1329 52, 64+52, 53, 64+53, 1330 54, 64+54, 55, 64+55); 1331 } 1332 1333 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1334 _mm512_mask_unpacklo_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 1335 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1336 (__v64qi)_mm512_unpacklo_epi8(__A, __B), 1337 (__v64qi)__W); 1338 } 1339 1340 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1341 _mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 1342 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1343 (__v64qi)_mm512_unpacklo_epi8(__A, __B), 1344 (__v64qi)_mm512_setzero_si512()); 1345 } 1346 1347 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1348 _mm512_unpacklo_epi16(__m512i __A, __m512i __B) { 1349 return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B, 1350 0, 32+0, 1, 32+1, 1351 2, 32+2, 3, 32+3, 1352 8, 32+8, 9, 32+9, 1353 10, 32+10, 11, 32+11, 1354 16, 32+16, 17, 32+17, 1355 18, 32+18, 19, 32+19, 1356 24, 32+24, 25, 32+25, 1357 26, 32+26, 27, 32+27); 1358 } 1359 1360 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1361 _mm512_mask_unpacklo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 1362 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1363 (__v32hi)_mm512_unpacklo_epi16(__A, __B), 1364 (__v32hi)__W); 1365 } 1366 1367 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1368 _mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 1369 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1370 (__v32hi)_mm512_unpacklo_epi16(__A, __B), 1371 (__v32hi)_mm512_setzero_si512()); 1372 } 1373 1374 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1375 _mm512_cvtepi8_epi16(__m256i __A) 1376 { 1377 /* This function always performs a signed extension, but __v32qi is a char 1378 which may be signed or unsigned, so use __v32qs. */ 1379 return (__m512i)__builtin_convertvector((__v32qs)__A, __v32hi); 1380 } 1381 1382 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1383 _mm512_mask_cvtepi8_epi16(__m512i __W, __mmask32 __U, __m256i __A) 1384 { 1385 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1386 (__v32hi)_mm512_cvtepi8_epi16(__A), 1387 (__v32hi)__W); 1388 } 1389 1390 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1391 _mm512_maskz_cvtepi8_epi16(__mmask32 __U, __m256i __A) 1392 { 1393 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1394 (__v32hi)_mm512_cvtepi8_epi16(__A), 1395 (__v32hi)_mm512_setzero_si512()); 1396 } 1397 1398 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1399 _mm512_cvtepu8_epi16(__m256i __A) 1400 { 1401 return (__m512i)__builtin_convertvector((__v32qu)__A, __v32hi); 1402 } 1403 1404 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1405 _mm512_mask_cvtepu8_epi16(__m512i __W, __mmask32 __U, __m256i __A) 1406 { 1407 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1408 (__v32hi)_mm512_cvtepu8_epi16(__A), 1409 (__v32hi)__W); 1410 } 1411 1412 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1413 _mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A) 1414 { 1415 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1416 (__v32hi)_mm512_cvtepu8_epi16(__A), 1417 (__v32hi)_mm512_setzero_si512()); 1418 } 1419 1420 1421 #define _mm512_shufflehi_epi16(A, imm) \ 1422 ((__m512i)__builtin_ia32_pshufhw512((__v32hi)(__m512i)(A), (int)(imm))) 1423 1424 #define _mm512_mask_shufflehi_epi16(W, U, A, imm) \ 1425 ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 1426 (__v32hi)_mm512_shufflehi_epi16((A), \ 1427 (imm)), \ 1428 (__v32hi)(__m512i)(W))) 1429 1430 #define _mm512_maskz_shufflehi_epi16(U, A, imm) \ 1431 ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 1432 (__v32hi)_mm512_shufflehi_epi16((A), \ 1433 (imm)), \ 1434 (__v32hi)_mm512_setzero_si512())) 1435 1436 #define _mm512_shufflelo_epi16(A, imm) \ 1437 ((__m512i)__builtin_ia32_pshuflw512((__v32hi)(__m512i)(A), (int)(imm))) 1438 1439 1440 #define _mm512_mask_shufflelo_epi16(W, U, A, imm) \ 1441 ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 1442 (__v32hi)_mm512_shufflelo_epi16((A), \ 1443 (imm)), \ 1444 (__v32hi)(__m512i)(W))) 1445 1446 1447 #define _mm512_maskz_shufflelo_epi16(U, A, imm) \ 1448 ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 1449 (__v32hi)_mm512_shufflelo_epi16((A), \ 1450 (imm)), \ 1451 (__v32hi)_mm512_setzero_si512())) 1452 1453 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1454 _mm512_sllv_epi16(__m512i __A, __m512i __B) 1455 { 1456 return (__m512i)__builtin_ia32_psllv32hi((__v32hi) __A, (__v32hi) __B); 1457 } 1458 1459 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1460 _mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1461 { 1462 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1463 (__v32hi)_mm512_sllv_epi16(__A, __B), 1464 (__v32hi)__W); 1465 } 1466 1467 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1468 _mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1469 { 1470 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1471 (__v32hi)_mm512_sllv_epi16(__A, __B), 1472 (__v32hi)_mm512_setzero_si512()); 1473 } 1474 1475 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1476 _mm512_sll_epi16(__m512i __A, __m128i __B) 1477 { 1478 return (__m512i)__builtin_ia32_psllw512((__v32hi) __A, (__v8hi) __B); 1479 } 1480 1481 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1482 _mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) 1483 { 1484 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1485 (__v32hi)_mm512_sll_epi16(__A, __B), 1486 (__v32hi)__W); 1487 } 1488 1489 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1490 _mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B) 1491 { 1492 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1493 (__v32hi)_mm512_sll_epi16(__A, __B), 1494 (__v32hi)_mm512_setzero_si512()); 1495 } 1496 1497 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1498 _mm512_slli_epi16(__m512i __A, unsigned int __B) 1499 { 1500 return (__m512i)__builtin_ia32_psllwi512((__v32hi)__A, (int)__B); 1501 } 1502 1503 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1504 _mm512_mask_slli_epi16(__m512i __W, __mmask32 __U, __m512i __A, 1505 unsigned int __B) 1506 { 1507 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1508 (__v32hi)_mm512_slli_epi16(__A, __B), 1509 (__v32hi)__W); 1510 } 1511 1512 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1513 _mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, unsigned int __B) 1514 { 1515 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1516 (__v32hi)_mm512_slli_epi16(__A, __B), 1517 (__v32hi)_mm512_setzero_si512()); 1518 } 1519 1520 #define _mm512_bslli_epi128(a, imm) \ 1521 ((__m512i)__builtin_ia32_pslldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm))) 1522 1523 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1524 _mm512_srlv_epi16(__m512i __A, __m512i __B) 1525 { 1526 return (__m512i)__builtin_ia32_psrlv32hi((__v32hi)__A, (__v32hi)__B); 1527 } 1528 1529 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1530 _mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1531 { 1532 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1533 (__v32hi)_mm512_srlv_epi16(__A, __B), 1534 (__v32hi)__W); 1535 } 1536 1537 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1538 _mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1539 { 1540 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1541 (__v32hi)_mm512_srlv_epi16(__A, __B), 1542 (__v32hi)_mm512_setzero_si512()); 1543 } 1544 1545 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1546 _mm512_srav_epi16(__m512i __A, __m512i __B) 1547 { 1548 return (__m512i)__builtin_ia32_psrav32hi((__v32hi)__A, (__v32hi)__B); 1549 } 1550 1551 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1552 _mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1553 { 1554 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1555 (__v32hi)_mm512_srav_epi16(__A, __B), 1556 (__v32hi)__W); 1557 } 1558 1559 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1560 _mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1561 { 1562 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1563 (__v32hi)_mm512_srav_epi16(__A, __B), 1564 (__v32hi)_mm512_setzero_si512()); 1565 } 1566 1567 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1568 _mm512_sra_epi16(__m512i __A, __m128i __B) 1569 { 1570 return (__m512i)__builtin_ia32_psraw512((__v32hi) __A, (__v8hi) __B); 1571 } 1572 1573 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1574 _mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) 1575 { 1576 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1577 (__v32hi)_mm512_sra_epi16(__A, __B), 1578 (__v32hi)__W); 1579 } 1580 1581 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1582 _mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B) 1583 { 1584 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1585 (__v32hi)_mm512_sra_epi16(__A, __B), 1586 (__v32hi)_mm512_setzero_si512()); 1587 } 1588 1589 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1590 _mm512_srai_epi16(__m512i __A, unsigned int __B) 1591 { 1592 return (__m512i)__builtin_ia32_psrawi512((__v32hi)__A, (int)__B); 1593 } 1594 1595 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1596 _mm512_mask_srai_epi16(__m512i __W, __mmask32 __U, __m512i __A, 1597 unsigned int __B) 1598 { 1599 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1600 (__v32hi)_mm512_srai_epi16(__A, __B), 1601 (__v32hi)__W); 1602 } 1603 1604 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1605 _mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, unsigned int __B) 1606 { 1607 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1608 (__v32hi)_mm512_srai_epi16(__A, __B), 1609 (__v32hi)_mm512_setzero_si512()); 1610 } 1611 1612 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1613 _mm512_srl_epi16(__m512i __A, __m128i __B) 1614 { 1615 return (__m512i)__builtin_ia32_psrlw512((__v32hi) __A, (__v8hi) __B); 1616 } 1617 1618 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1619 _mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) 1620 { 1621 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1622 (__v32hi)_mm512_srl_epi16(__A, __B), 1623 (__v32hi)__W); 1624 } 1625 1626 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1627 _mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B) 1628 { 1629 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1630 (__v32hi)_mm512_srl_epi16(__A, __B), 1631 (__v32hi)_mm512_setzero_si512()); 1632 } 1633 1634 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1635 _mm512_srli_epi16(__m512i __A, unsigned int __B) 1636 { 1637 return (__m512i)__builtin_ia32_psrlwi512((__v32hi)__A, (int)__B); 1638 } 1639 1640 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1641 _mm512_mask_srli_epi16(__m512i __W, __mmask32 __U, __m512i __A, 1642 unsigned int __B) 1643 { 1644 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1645 (__v32hi)_mm512_srli_epi16(__A, __B), 1646 (__v32hi)__W); 1647 } 1648 1649 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1650 _mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B) 1651 { 1652 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1653 (__v32hi)_mm512_srli_epi16(__A, (unsigned int)__B), 1654 (__v32hi)_mm512_setzero_si512()); 1655 } 1656 1657 #define _mm512_bsrli_epi128(a, imm) \ 1658 ((__m512i)__builtin_ia32_psrldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm))) 1659 1660 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1661 _mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A) 1662 { 1663 return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, 1664 (__v32hi) __A, 1665 (__v32hi) __W); 1666 } 1667 1668 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1669 _mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A) 1670 { 1671 return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, 1672 (__v32hi) __A, 1673 (__v32hi) _mm512_setzero_si512 ()); 1674 } 1675 1676 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1677 _mm512_mask_mov_epi8 (__m512i __W, __mmask64 __U, __m512i __A) 1678 { 1679 return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, 1680 (__v64qi) __A, 1681 (__v64qi) __W); 1682 } 1683 1684 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1685 _mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A) 1686 { 1687 return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, 1688 (__v64qi) __A, 1689 (__v64qi) _mm512_setzero_si512 ()); 1690 } 1691 1692 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1693 _mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A) 1694 { 1695 return (__m512i) __builtin_ia32_selectb_512(__M, 1696 (__v64qi)_mm512_set1_epi8(__A), 1697 (__v64qi) __O); 1698 } 1699 1700 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1701 _mm512_maskz_set1_epi8 (__mmask64 __M, char __A) 1702 { 1703 return (__m512i) __builtin_ia32_selectb_512(__M, 1704 (__v64qi) _mm512_set1_epi8(__A), 1705 (__v64qi) _mm512_setzero_si512()); 1706 } 1707 1708 static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_kunpackd(__mmask64 __A, 1709 __mmask64 __B) { 1710 return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A, 1711 (__mmask64) __B); 1712 } 1713 1714 static __inline__ __mmask32 __DEFAULT_FN_ATTRS 1715 _mm512_kunpackw (__mmask32 __A, __mmask32 __B) 1716 { 1717 return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, 1718 (__mmask32) __B); 1719 } 1720 1721 static __inline __m512i __DEFAULT_FN_ATTRS512 1722 _mm512_loadu_epi16 (void const *__P) 1723 { 1724 struct __loadu_epi16 { 1725 __m512i_u __v; 1726 } __attribute__((__packed__, __may_alias__)); 1727 return ((const struct __loadu_epi16*)__P)->__v; 1728 } 1729 1730 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1731 _mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P) 1732 { 1733 return (__m512i) __builtin_ia32_loaddquhi512_mask ((const __v32hi *) __P, 1734 (__v32hi) __W, 1735 (__mmask32) __U); 1736 } 1737 1738 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1739 _mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P) 1740 { 1741 return (__m512i) __builtin_ia32_loaddquhi512_mask ((const __v32hi *) __P, 1742 (__v32hi) 1743 _mm512_setzero_si512 (), 1744 (__mmask32) __U); 1745 } 1746 1747 static __inline __m512i __DEFAULT_FN_ATTRS512 1748 _mm512_loadu_epi8 (void const *__P) 1749 { 1750 struct __loadu_epi8 { 1751 __m512i_u __v; 1752 } __attribute__((__packed__, __may_alias__)); 1753 return ((const struct __loadu_epi8*)__P)->__v; 1754 } 1755 1756 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1757 _mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P) 1758 { 1759 return (__m512i) __builtin_ia32_loaddquqi512_mask ((const __v64qi *) __P, 1760 (__v64qi) __W, 1761 (__mmask64) __U); 1762 } 1763 1764 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1765 _mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P) 1766 { 1767 return (__m512i) __builtin_ia32_loaddquqi512_mask ((const __v64qi *) __P, 1768 (__v64qi) 1769 _mm512_setzero_si512 (), 1770 (__mmask64) __U); 1771 } 1772 1773 static __inline void __DEFAULT_FN_ATTRS512 1774 _mm512_storeu_epi16 (void *__P, __m512i __A) 1775 { 1776 struct __storeu_epi16 { 1777 __m512i_u __v; 1778 } __attribute__((__packed__, __may_alias__)); 1779 ((struct __storeu_epi16*)__P)->__v = __A; 1780 } 1781 1782 static __inline__ void __DEFAULT_FN_ATTRS512 1783 _mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A) 1784 { 1785 __builtin_ia32_storedquhi512_mask ((__v32hi *) __P, 1786 (__v32hi) __A, 1787 (__mmask32) __U); 1788 } 1789 1790 static __inline void __DEFAULT_FN_ATTRS512 1791 _mm512_storeu_epi8 (void *__P, __m512i __A) 1792 { 1793 struct __storeu_epi8 { 1794 __m512i_u __v; 1795 } __attribute__((__packed__, __may_alias__)); 1796 ((struct __storeu_epi8*)__P)->__v = __A; 1797 } 1798 1799 static __inline__ void __DEFAULT_FN_ATTRS512 1800 _mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A) 1801 { 1802 __builtin_ia32_storedquqi512_mask ((__v64qi *) __P, 1803 (__v64qi) __A, 1804 (__mmask64) __U); 1805 } 1806 1807 static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 1808 _mm512_test_epi8_mask (__m512i __A, __m512i __B) 1809 { 1810 return _mm512_cmpneq_epi8_mask (_mm512_and_epi32 (__A, __B), 1811 _mm512_setzero_si512()); 1812 } 1813 1814 static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 1815 _mm512_mask_test_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B) 1816 { 1817 return _mm512_mask_cmpneq_epi8_mask (__U, _mm512_and_epi32 (__A, __B), 1818 _mm512_setzero_si512()); 1819 } 1820 1821 static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 1822 _mm512_test_epi16_mask (__m512i __A, __m512i __B) 1823 { 1824 return _mm512_cmpneq_epi16_mask (_mm512_and_epi32 (__A, __B), 1825 _mm512_setzero_si512()); 1826 } 1827 1828 static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 1829 _mm512_mask_test_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B) 1830 { 1831 return _mm512_mask_cmpneq_epi16_mask (__U, _mm512_and_epi32 (__A, __B), 1832 _mm512_setzero_si512()); 1833 } 1834 1835 static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 1836 _mm512_testn_epi8_mask (__m512i __A, __m512i __B) 1837 { 1838 return _mm512_cmpeq_epi8_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); 1839 } 1840 1841 static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 1842 _mm512_mask_testn_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B) 1843 { 1844 return _mm512_mask_cmpeq_epi8_mask (__U, _mm512_and_epi32 (__A, __B), 1845 _mm512_setzero_si512()); 1846 } 1847 1848 static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 1849 _mm512_testn_epi16_mask (__m512i __A, __m512i __B) 1850 { 1851 return _mm512_cmpeq_epi16_mask (_mm512_and_epi32 (__A, __B), 1852 _mm512_setzero_si512()); 1853 } 1854 1855 static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 1856 _mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B) 1857 { 1858 return _mm512_mask_cmpeq_epi16_mask (__U, _mm512_and_epi32 (__A, __B), 1859 _mm512_setzero_si512()); 1860 } 1861 1862 static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 1863 _mm512_movepi8_mask (__m512i __A) 1864 { 1865 return (__mmask64) __builtin_ia32_cvtb2mask512 ((__v64qi) __A); 1866 } 1867 1868 static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 1869 _mm512_movepi16_mask (__m512i __A) 1870 { 1871 return (__mmask32) __builtin_ia32_cvtw2mask512 ((__v32hi) __A); 1872 } 1873 1874 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1875 _mm512_movm_epi8 (__mmask64 __A) 1876 { 1877 return (__m512i) __builtin_ia32_cvtmask2b512 (__A); 1878 } 1879 1880 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1881 _mm512_movm_epi16 (__mmask32 __A) 1882 { 1883 return (__m512i) __builtin_ia32_cvtmask2w512 (__A); 1884 } 1885 1886 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1887 _mm512_broadcastb_epi8 (__m128i __A) 1888 { 1889 return (__m512i)__builtin_shufflevector((__v16qi) __A, (__v16qi) __A, 1890 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1891 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1892 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1893 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); 1894 } 1895 1896 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1897 _mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A) 1898 { 1899 return (__m512i)__builtin_ia32_selectb_512(__M, 1900 (__v64qi) _mm512_broadcastb_epi8(__A), 1901 (__v64qi) __O); 1902 } 1903 1904 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1905 _mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A) 1906 { 1907 return (__m512i)__builtin_ia32_selectb_512(__M, 1908 (__v64qi) _mm512_broadcastb_epi8(__A), 1909 (__v64qi) _mm512_setzero_si512()); 1910 } 1911 1912 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1913 _mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A) 1914 { 1915 return (__m512i) __builtin_ia32_selectw_512(__M, 1916 (__v32hi) _mm512_set1_epi16(__A), 1917 (__v32hi) __O); 1918 } 1919 1920 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1921 _mm512_maskz_set1_epi16 (__mmask32 __M, short __A) 1922 { 1923 return (__m512i) __builtin_ia32_selectw_512(__M, 1924 (__v32hi) _mm512_set1_epi16(__A), 1925 (__v32hi) _mm512_setzero_si512()); 1926 } 1927 1928 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1929 _mm512_broadcastw_epi16 (__m128i __A) 1930 { 1931 return (__m512i)__builtin_shufflevector((__v8hi) __A, (__v8hi) __A, 1932 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1933 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); 1934 } 1935 1936 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1937 _mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A) 1938 { 1939 return (__m512i)__builtin_ia32_selectw_512(__M, 1940 (__v32hi) _mm512_broadcastw_epi16(__A), 1941 (__v32hi) __O); 1942 } 1943 1944 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1945 _mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A) 1946 { 1947 return (__m512i)__builtin_ia32_selectw_512(__M, 1948 (__v32hi) _mm512_broadcastw_epi16(__A), 1949 (__v32hi) _mm512_setzero_si512()); 1950 } 1951 1952 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1953 _mm512_permutexvar_epi16 (__m512i __A, __m512i __B) 1954 { 1955 return (__m512i)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A); 1956 } 1957 1958 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1959 _mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A, 1960 __m512i __B) 1961 { 1962 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 1963 (__v32hi)_mm512_permutexvar_epi16(__A, __B), 1964 (__v32hi)_mm512_setzero_si512()); 1965 } 1966 1967 static __inline__ __m512i __DEFAULT_FN_ATTRS512 1968 _mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A, 1969 __m512i __B) 1970 { 1971 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 1972 (__v32hi)_mm512_permutexvar_epi16(__A, __B), 1973 (__v32hi)__W); 1974 } 1975 1976 #define _mm512_alignr_epi8(A, B, N) \ 1977 ((__m512i)__builtin_ia32_palignr512((__v64qi)(__m512i)(A), \ 1978 (__v64qi)(__m512i)(B), (int)(N))) 1979 1980 #define _mm512_mask_alignr_epi8(W, U, A, B, N) \ 1981 ((__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ 1982 (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \ 1983 (__v64qi)(__m512i)(W))) 1984 1985 #define _mm512_maskz_alignr_epi8(U, A, B, N) \ 1986 ((__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ 1987 (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \ 1988 (__v64qi)(__m512i)_mm512_setzero_si512())) 1989 1990 #define _mm512_dbsad_epu8(A, B, imm) \ 1991 ((__m512i)__builtin_ia32_dbpsadbw512((__v64qi)(__m512i)(A), \ 1992 (__v64qi)(__m512i)(B), (int)(imm))) 1993 1994 #define _mm512_mask_dbsad_epu8(W, U, A, B, imm) \ 1995 ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 1996 (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \ 1997 (__v32hi)(__m512i)(W))) 1998 1999 #define _mm512_maskz_dbsad_epu8(U, A, B, imm) \ 2000 ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 2001 (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \ 2002 (__v32hi)_mm512_setzero_si512())) 2003 2004 static __inline__ __m512i __DEFAULT_FN_ATTRS512 2005 _mm512_sad_epu8 (__m512i __A, __m512i __B) 2006 { 2007 return (__m512i) __builtin_ia32_psadbw512 ((__v64qi) __A, 2008 (__v64qi) __B); 2009 } 2010 2011 #undef __DEFAULT_FN_ATTRS512 2012 #undef __DEFAULT_FN_ATTRS 2013 2014 #endif 2015