1 /*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10 #ifndef __TMMINTRIN_H 11 #define __TMMINTRIN_H 12 13 #if !defined(__i386__) && !defined(__x86_64__) 14 #error "This header is only meant to be used on x86 and x64 architecture" 15 #endif 16 17 #include <pmmintrin.h> 18 19 /* Define the default attributes for the functions in this file. */ 20 #define __DEFAULT_FN_ATTRS \ 21 __attribute__((__always_inline__, __nodebug__, \ 22 __target__("ssse3,no-evex512"), __min_vector_width__(64))) 23 #define __DEFAULT_FN_ATTRS_MMX \ 24 __attribute__((__always_inline__, __nodebug__, \ 25 __target__("mmx,ssse3,no-evex512"), \ 26 __min_vector_width__(64))) 27 28 /// Computes the absolute value of each of the packed 8-bit signed 29 /// integers in the source operand and stores the 8-bit unsigned integer 30 /// results in the destination. 31 /// 32 /// \headerfile <x86intrin.h> 33 /// 34 /// This intrinsic corresponds to the \c PABSB instruction. 35 /// 36 /// \param __a 37 /// A 64-bit vector of [8 x i8]. 38 /// \returns A 64-bit integer vector containing the absolute values of the 39 /// elements in the operand. 40 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 41 _mm_abs_pi8(__m64 __a) 42 { 43 return (__m64)__builtin_ia32_pabsb((__v8qi)__a); 44 } 45 46 /// Computes the absolute value of each of the packed 8-bit signed 47 /// integers in the source operand and stores the 8-bit unsigned integer 48 /// results in the destination. 49 /// 50 /// \headerfile <x86intrin.h> 51 /// 52 /// This intrinsic corresponds to the \c VPABSB instruction. 53 /// 54 /// \param __a 55 /// A 128-bit vector of [16 x i8]. 56 /// \returns A 128-bit integer vector containing the absolute values of the 57 /// elements in the operand. 58 static __inline__ __m128i __DEFAULT_FN_ATTRS 59 _mm_abs_epi8(__m128i __a) 60 { 61 return (__m128i)__builtin_elementwise_abs((__v16qs)__a); 62 } 63 64 /// Computes the absolute value of each of the packed 16-bit signed 65 /// integers in the source operand and stores the 16-bit unsigned integer 66 /// results in the destination. 67 /// 68 /// \headerfile <x86intrin.h> 69 /// 70 /// This intrinsic corresponds to the \c PABSW instruction. 71 /// 72 /// \param __a 73 /// A 64-bit vector of [4 x i16]. 74 /// \returns A 64-bit integer vector containing the absolute values of the 75 /// elements in the operand. 76 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 77 _mm_abs_pi16(__m64 __a) 78 { 79 return (__m64)__builtin_ia32_pabsw((__v4hi)__a); 80 } 81 82 /// Computes the absolute value of each of the packed 16-bit signed 83 /// integers in the source operand and stores the 16-bit unsigned integer 84 /// results in the destination. 85 /// 86 /// \headerfile <x86intrin.h> 87 /// 88 /// This intrinsic corresponds to the \c VPABSW instruction. 89 /// 90 /// \param __a 91 /// A 128-bit vector of [8 x i16]. 92 /// \returns A 128-bit integer vector containing the absolute values of the 93 /// elements in the operand. 94 static __inline__ __m128i __DEFAULT_FN_ATTRS 95 _mm_abs_epi16(__m128i __a) 96 { 97 return (__m128i)__builtin_elementwise_abs((__v8hi)__a); 98 } 99 100 /// Computes the absolute value of each of the packed 32-bit signed 101 /// integers in the source operand and stores the 32-bit unsigned integer 102 /// results in the destination. 103 /// 104 /// \headerfile <x86intrin.h> 105 /// 106 /// This intrinsic corresponds to the \c PABSD instruction. 107 /// 108 /// \param __a 109 /// A 64-bit vector of [2 x i32]. 110 /// \returns A 64-bit integer vector containing the absolute values of the 111 /// elements in the operand. 112 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 113 _mm_abs_pi32(__m64 __a) 114 { 115 return (__m64)__builtin_ia32_pabsd((__v2si)__a); 116 } 117 118 /// Computes the absolute value of each of the packed 32-bit signed 119 /// integers in the source operand and stores the 32-bit unsigned integer 120 /// results in the destination. 121 /// 122 /// \headerfile <x86intrin.h> 123 /// 124 /// This intrinsic corresponds to the \c VPABSD instruction. 125 /// 126 /// \param __a 127 /// A 128-bit vector of [4 x i32]. 128 /// \returns A 128-bit integer vector containing the absolute values of the 129 /// elements in the operand. 130 static __inline__ __m128i __DEFAULT_FN_ATTRS 131 _mm_abs_epi32(__m128i __a) 132 { 133 return (__m128i)__builtin_elementwise_abs((__v4si)__a); 134 } 135 136 /// Concatenates the two 128-bit integer vector operands, and 137 /// right-shifts the result by the number of bytes specified in the immediate 138 /// operand. 139 /// 140 /// \headerfile <x86intrin.h> 141 /// 142 /// \code 143 /// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n); 144 /// \endcode 145 /// 146 /// This intrinsic corresponds to the \c PALIGNR instruction. 147 /// 148 /// \param a 149 /// A 128-bit vector of [16 x i8] containing one of the source operands. 150 /// \param b 151 /// A 128-bit vector of [16 x i8] containing one of the source operands. 152 /// \param n 153 /// An immediate operand specifying how many bytes to right-shift the result. 154 /// \returns A 128-bit integer vector containing the concatenated right-shifted 155 /// value. 156 #define _mm_alignr_epi8(a, b, n) \ 157 ((__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \ 158 (__v16qi)(__m128i)(b), (n))) 159 160 /// Concatenates the two 64-bit integer vector operands, and right-shifts 161 /// the result by the number of bytes specified in the immediate operand. 162 /// 163 /// \headerfile <x86intrin.h> 164 /// 165 /// \code 166 /// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n); 167 /// \endcode 168 /// 169 /// This intrinsic corresponds to the \c PALIGNR instruction. 170 /// 171 /// \param a 172 /// A 64-bit vector of [8 x i8] containing one of the source operands. 173 /// \param b 174 /// A 64-bit vector of [8 x i8] containing one of the source operands. 175 /// \param n 176 /// An immediate operand specifying how many bytes to right-shift the result. 177 /// \returns A 64-bit integer vector containing the concatenated right-shifted 178 /// value. 179 #define _mm_alignr_pi8(a, b, n) \ 180 ((__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n))) 181 182 /// Horizontally adds the adjacent pairs of values contained in 2 packed 183 /// 128-bit vectors of [8 x i16]. 184 /// 185 /// \headerfile <x86intrin.h> 186 /// 187 /// This intrinsic corresponds to the \c VPHADDW instruction. 188 /// 189 /// \param __a 190 /// A 128-bit vector of [8 x i16] containing one of the source operands. The 191 /// horizontal sums of the values are stored in the lower bits of the 192 /// destination. 193 /// \param __b 194 /// A 128-bit vector of [8 x i16] containing one of the source operands. The 195 /// horizontal sums of the values are stored in the upper bits of the 196 /// destination. 197 /// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of 198 /// both operands. 199 static __inline__ __m128i __DEFAULT_FN_ATTRS 200 _mm_hadd_epi16(__m128i __a, __m128i __b) 201 { 202 return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b); 203 } 204 205 /// Horizontally adds the adjacent pairs of values contained in 2 packed 206 /// 128-bit vectors of [4 x i32]. 207 /// 208 /// \headerfile <x86intrin.h> 209 /// 210 /// This intrinsic corresponds to the \c VPHADDD instruction. 211 /// 212 /// \param __a 213 /// A 128-bit vector of [4 x i32] containing one of the source operands. The 214 /// horizontal sums of the values are stored in the lower bits of the 215 /// destination. 216 /// \param __b 217 /// A 128-bit vector of [4 x i32] containing one of the source operands. The 218 /// horizontal sums of the values are stored in the upper bits of the 219 /// destination. 220 /// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of 221 /// both operands. 222 static __inline__ __m128i __DEFAULT_FN_ATTRS 223 _mm_hadd_epi32(__m128i __a, __m128i __b) 224 { 225 return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b); 226 } 227 228 /// Horizontally adds the adjacent pairs of values contained in 2 packed 229 /// 64-bit vectors of [4 x i16]. 230 /// 231 /// \headerfile <x86intrin.h> 232 /// 233 /// This intrinsic corresponds to the \c PHADDW instruction. 234 /// 235 /// \param __a 236 /// A 64-bit vector of [4 x i16] containing one of the source operands. The 237 /// horizontal sums of the values are stored in the lower bits of the 238 /// destination. 239 /// \param __b 240 /// A 64-bit vector of [4 x i16] containing one of the source operands. The 241 /// horizontal sums of the values are stored in the upper bits of the 242 /// destination. 243 /// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both 244 /// operands. 245 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 246 _mm_hadd_pi16(__m64 __a, __m64 __b) 247 { 248 return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b); 249 } 250 251 /// Horizontally adds the adjacent pairs of values contained in 2 packed 252 /// 64-bit vectors of [2 x i32]. 253 /// 254 /// \headerfile <x86intrin.h> 255 /// 256 /// This intrinsic corresponds to the \c PHADDD instruction. 257 /// 258 /// \param __a 259 /// A 64-bit vector of [2 x i32] containing one of the source operands. The 260 /// horizontal sums of the values are stored in the lower bits of the 261 /// destination. 262 /// \param __b 263 /// A 64-bit vector of [2 x i32] containing one of the source operands. The 264 /// horizontal sums of the values are stored in the upper bits of the 265 /// destination. 266 /// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both 267 /// operands. 268 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 269 _mm_hadd_pi32(__m64 __a, __m64 __b) 270 { 271 return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b); 272 } 273 274 /// Horizontally adds the adjacent pairs of values contained in 2 packed 275 /// 128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are 276 /// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to 277 /// 0x8000. 278 /// 279 /// \headerfile <x86intrin.h> 280 /// 281 /// This intrinsic corresponds to the \c VPHADDSW instruction. 282 /// 283 /// \param __a 284 /// A 128-bit vector of [8 x i16] containing one of the source operands. The 285 /// horizontal sums of the values are stored in the lower bits of the 286 /// destination. 287 /// \param __b 288 /// A 128-bit vector of [8 x i16] containing one of the source operands. The 289 /// horizontal sums of the values are stored in the upper bits of the 290 /// destination. 291 /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated 292 /// sums of both operands. 293 static __inline__ __m128i __DEFAULT_FN_ATTRS 294 _mm_hadds_epi16(__m128i __a, __m128i __b) 295 { 296 return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b); 297 } 298 299 /// Horizontally adds the adjacent pairs of values contained in 2 packed 300 /// 64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are 301 /// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to 302 /// 0x8000. 303 /// 304 /// \headerfile <x86intrin.h> 305 /// 306 /// This intrinsic corresponds to the \c PHADDSW instruction. 307 /// 308 /// \param __a 309 /// A 64-bit vector of [4 x i16] containing one of the source operands. The 310 /// horizontal sums of the values are stored in the lower bits of the 311 /// destination. 312 /// \param __b 313 /// A 64-bit vector of [4 x i16] containing one of the source operands. The 314 /// horizontal sums of the values are stored in the upper bits of the 315 /// destination. 316 /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated 317 /// sums of both operands. 318 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 319 _mm_hadds_pi16(__m64 __a, __m64 __b) 320 { 321 return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b); 322 } 323 324 /// Horizontally subtracts the adjacent pairs of values contained in 2 325 /// packed 128-bit vectors of [8 x i16]. 326 /// 327 /// \headerfile <x86intrin.h> 328 /// 329 /// This intrinsic corresponds to the \c VPHSUBW instruction. 330 /// 331 /// \param __a 332 /// A 128-bit vector of [8 x i16] containing one of the source operands. The 333 /// horizontal differences between the values are stored in the lower bits of 334 /// the destination. 335 /// \param __b 336 /// A 128-bit vector of [8 x i16] containing one of the source operands. The 337 /// horizontal differences between the values are stored in the upper bits of 338 /// the destination. 339 /// \returns A 128-bit vector of [8 x i16] containing the horizontal differences 340 /// of both operands. 341 static __inline__ __m128i __DEFAULT_FN_ATTRS 342 _mm_hsub_epi16(__m128i __a, __m128i __b) 343 { 344 return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b); 345 } 346 347 /// Horizontally subtracts the adjacent pairs of values contained in 2 348 /// packed 128-bit vectors of [4 x i32]. 349 /// 350 /// \headerfile <x86intrin.h> 351 /// 352 /// This intrinsic corresponds to the \c VPHSUBD instruction. 353 /// 354 /// \param __a 355 /// A 128-bit vector of [4 x i32] containing one of the source operands. The 356 /// horizontal differences between the values are stored in the lower bits of 357 /// the destination. 358 /// \param __b 359 /// A 128-bit vector of [4 x i32] containing one of the source operands. The 360 /// horizontal differences between the values are stored in the upper bits of 361 /// the destination. 362 /// \returns A 128-bit vector of [4 x i32] containing the horizontal differences 363 /// of both operands. 364 static __inline__ __m128i __DEFAULT_FN_ATTRS 365 _mm_hsub_epi32(__m128i __a, __m128i __b) 366 { 367 return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b); 368 } 369 370 /// Horizontally subtracts the adjacent pairs of values contained in 2 371 /// packed 64-bit vectors of [4 x i16]. 372 /// 373 /// \headerfile <x86intrin.h> 374 /// 375 /// This intrinsic corresponds to the \c PHSUBW instruction. 376 /// 377 /// \param __a 378 /// A 64-bit vector of [4 x i16] containing one of the source operands. The 379 /// horizontal differences between the values are stored in the lower bits of 380 /// the destination. 381 /// \param __b 382 /// A 64-bit vector of [4 x i16] containing one of the source operands. The 383 /// horizontal differences between the values are stored in the upper bits of 384 /// the destination. 385 /// \returns A 64-bit vector of [4 x i16] containing the horizontal differences 386 /// of both operands. 387 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 388 _mm_hsub_pi16(__m64 __a, __m64 __b) 389 { 390 return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b); 391 } 392 393 /// Horizontally subtracts the adjacent pairs of values contained in 2 394 /// packed 64-bit vectors of [2 x i32]. 395 /// 396 /// \headerfile <x86intrin.h> 397 /// 398 /// This intrinsic corresponds to the \c PHSUBD instruction. 399 /// 400 /// \param __a 401 /// A 64-bit vector of [2 x i32] containing one of the source operands. The 402 /// horizontal differences between the values are stored in the lower bits of 403 /// the destination. 404 /// \param __b 405 /// A 64-bit vector of [2 x i32] containing one of the source operands. The 406 /// horizontal differences between the values are stored in the upper bits of 407 /// the destination. 408 /// \returns A 64-bit vector of [2 x i32] containing the horizontal differences 409 /// of both operands. 410 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 411 _mm_hsub_pi32(__m64 __a, __m64 __b) 412 { 413 return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b); 414 } 415 416 /// Horizontally subtracts the adjacent pairs of values contained in 2 417 /// packed 128-bit vectors of [8 x i16]. Positive differences greater than 418 /// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are 419 /// saturated to 0x8000. 420 /// 421 /// \headerfile <x86intrin.h> 422 /// 423 /// This intrinsic corresponds to the \c VPHSUBSW instruction. 424 /// 425 /// \param __a 426 /// A 128-bit vector of [8 x i16] containing one of the source operands. The 427 /// horizontal differences between the values are stored in the lower bits of 428 /// the destination. 429 /// \param __b 430 /// A 128-bit vector of [8 x i16] containing one of the source operands. The 431 /// horizontal differences between the values are stored in the upper bits of 432 /// the destination. 433 /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated 434 /// differences of both operands. 435 static __inline__ __m128i __DEFAULT_FN_ATTRS 436 _mm_hsubs_epi16(__m128i __a, __m128i __b) 437 { 438 return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b); 439 } 440 441 /// Horizontally subtracts the adjacent pairs of values contained in 2 442 /// packed 64-bit vectors of [4 x i16]. Positive differences greater than 443 /// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are 444 /// saturated to 0x8000. 445 /// 446 /// \headerfile <x86intrin.h> 447 /// 448 /// This intrinsic corresponds to the \c PHSUBSW instruction. 449 /// 450 /// \param __a 451 /// A 64-bit vector of [4 x i16] containing one of the source operands. The 452 /// horizontal differences between the values are stored in the lower bits of 453 /// the destination. 454 /// \param __b 455 /// A 64-bit vector of [4 x i16] containing one of the source operands. The 456 /// horizontal differences between the values are stored in the upper bits of 457 /// the destination. 458 /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated 459 /// differences of both operands. 460 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 461 _mm_hsubs_pi16(__m64 __a, __m64 __b) 462 { 463 return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b); 464 } 465 466 /// Multiplies corresponding pairs of packed 8-bit unsigned integer 467 /// values contained in the first source operand and packed 8-bit signed 468 /// integer values contained in the second source operand, adds pairs of 469 /// contiguous products with signed saturation, and writes the 16-bit sums to 470 /// the corresponding bits in the destination. 471 /// 472 /// For example, bits [7:0] of both operands are multiplied, bits [15:8] of 473 /// both operands are multiplied, and the sum of both results is written to 474 /// bits [15:0] of the destination. 475 /// 476 /// \headerfile <x86intrin.h> 477 /// 478 /// This intrinsic corresponds to the \c VPMADDUBSW instruction. 479 /// 480 /// \param __a 481 /// A 128-bit integer vector containing the first source operand. 482 /// \param __b 483 /// A 128-bit integer vector containing the second source operand. 484 /// \returns A 128-bit integer vector containing the sums of products of both 485 /// operands: \n 486 /// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n 487 /// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n 488 /// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n 489 /// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) \n 490 /// \a R4 := (\a __a8 * \a __b8) + (\a __a9 * \a __b9) \n 491 /// \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n 492 /// \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n 493 /// \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15) 494 static __inline__ __m128i __DEFAULT_FN_ATTRS 495 _mm_maddubs_epi16(__m128i __a, __m128i __b) 496 { 497 return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b); 498 } 499 500 /// Multiplies corresponding pairs of packed 8-bit unsigned integer 501 /// values contained in the first source operand and packed 8-bit signed 502 /// integer values contained in the second source operand, adds pairs of 503 /// contiguous products with signed saturation, and writes the 16-bit sums to 504 /// the corresponding bits in the destination. 505 /// 506 /// For example, bits [7:0] of both operands are multiplied, bits [15:8] of 507 /// both operands are multiplied, and the sum of both results is written to 508 /// bits [15:0] of the destination. 509 /// 510 /// \headerfile <x86intrin.h> 511 /// 512 /// This intrinsic corresponds to the \c PMADDUBSW instruction. 513 /// 514 /// \param __a 515 /// A 64-bit integer vector containing the first source operand. 516 /// \param __b 517 /// A 64-bit integer vector containing the second source operand. 518 /// \returns A 64-bit integer vector containing the sums of products of both 519 /// operands: \n 520 /// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n 521 /// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n 522 /// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n 523 /// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) 524 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 525 _mm_maddubs_pi16(__m64 __a, __m64 __b) 526 { 527 return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b); 528 } 529 530 /// Multiplies packed 16-bit signed integer values, truncates the 32-bit 531 /// products to the 18 most significant bits by right-shifting, rounds the 532 /// truncated value by adding 1, and writes bits [16:1] to the destination. 533 /// 534 /// \headerfile <x86intrin.h> 535 /// 536 /// This intrinsic corresponds to the \c VPMULHRSW instruction. 537 /// 538 /// \param __a 539 /// A 128-bit vector of [8 x i16] containing one of the source operands. 540 /// \param __b 541 /// A 128-bit vector of [8 x i16] containing one of the source operands. 542 /// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled 543 /// products of both operands. 544 static __inline__ __m128i __DEFAULT_FN_ATTRS 545 _mm_mulhrs_epi16(__m128i __a, __m128i __b) 546 { 547 return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); 548 } 549 550 /// Multiplies packed 16-bit signed integer values, truncates the 32-bit 551 /// products to the 18 most significant bits by right-shifting, rounds the 552 /// truncated value by adding 1, and writes bits [16:1] to the destination. 553 /// 554 /// \headerfile <x86intrin.h> 555 /// 556 /// This intrinsic corresponds to the \c PMULHRSW instruction. 557 /// 558 /// \param __a 559 /// A 64-bit vector of [4 x i16] containing one of the source operands. 560 /// \param __b 561 /// A 64-bit vector of [4 x i16] containing one of the source operands. 562 /// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled 563 /// products of both operands. 564 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 565 _mm_mulhrs_pi16(__m64 __a, __m64 __b) 566 { 567 return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b); 568 } 569 570 /// Copies the 8-bit integers from a 128-bit integer vector to the 571 /// destination or clears 8-bit values in the destination, as specified by 572 /// the second source operand. 573 /// 574 /// \headerfile <x86intrin.h> 575 /// 576 /// This intrinsic corresponds to the \c VPSHUFB instruction. 577 /// 578 /// \param __a 579 /// A 128-bit integer vector containing the values to be copied. 580 /// \param __b 581 /// A 128-bit integer vector containing control bytes corresponding to 582 /// positions in the destination: 583 /// Bit 7: \n 584 /// 1: Clear the corresponding byte in the destination. \n 585 /// 0: Copy the selected source byte to the corresponding byte in the 586 /// destination. \n 587 /// Bits [6:4] Reserved. \n 588 /// Bits [3:0] select the source byte to be copied. 589 /// \returns A 128-bit integer vector containing the copied or cleared values. 590 static __inline__ __m128i __DEFAULT_FN_ATTRS 591 _mm_shuffle_epi8(__m128i __a, __m128i __b) 592 { 593 return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b); 594 } 595 596 /// Copies the 8-bit integers from a 64-bit integer vector to the 597 /// destination or clears 8-bit values in the destination, as specified by 598 /// the second source operand. 599 /// 600 /// \headerfile <x86intrin.h> 601 /// 602 /// This intrinsic corresponds to the \c PSHUFB instruction. 603 /// 604 /// \param __a 605 /// A 64-bit integer vector containing the values to be copied. 606 /// \param __b 607 /// A 64-bit integer vector containing control bytes corresponding to 608 /// positions in the destination: 609 /// Bit 7: \n 610 /// 1: Clear the corresponding byte in the destination. \n 611 /// 0: Copy the selected source byte to the corresponding byte in the 612 /// destination. \n 613 /// Bits [3:0] select the source byte to be copied. 614 /// \returns A 64-bit integer vector containing the copied or cleared values. 615 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 616 _mm_shuffle_pi8(__m64 __a, __m64 __b) 617 { 618 return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b); 619 } 620 621 /// For each 8-bit integer in the first source operand, perform one of 622 /// the following actions as specified by the second source operand. 623 /// 624 /// If the byte in the second source is negative, calculate the two's 625 /// complement of the corresponding byte in the first source, and write that 626 /// value to the destination. If the byte in the second source is positive, 627 /// copy the corresponding byte from the first source to the destination. If 628 /// the byte in the second source is zero, clear the corresponding byte in 629 /// the destination. 630 /// 631 /// \headerfile <x86intrin.h> 632 /// 633 /// This intrinsic corresponds to the \c VPSIGNB instruction. 634 /// 635 /// \param __a 636 /// A 128-bit integer vector containing the values to be copied. 637 /// \param __b 638 /// A 128-bit integer vector containing control bytes corresponding to 639 /// positions in the destination. 640 /// \returns A 128-bit integer vector containing the resultant values. 641 static __inline__ __m128i __DEFAULT_FN_ATTRS 642 _mm_sign_epi8(__m128i __a, __m128i __b) 643 { 644 return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b); 645 } 646 647 /// For each 16-bit integer in the first source operand, perform one of 648 /// the following actions as specified by the second source operand. 649 /// 650 /// If the word in the second source is negative, calculate the two's 651 /// complement of the corresponding word in the first source, and write that 652 /// value to the destination. If the word in the second source is positive, 653 /// copy the corresponding word from the first source to the destination. If 654 /// the word in the second source is zero, clear the corresponding word in 655 /// the destination. 656 /// 657 /// \headerfile <x86intrin.h> 658 /// 659 /// This intrinsic corresponds to the \c VPSIGNW instruction. 660 /// 661 /// \param __a 662 /// A 128-bit integer vector containing the values to be copied. 663 /// \param __b 664 /// A 128-bit integer vector containing control words corresponding to 665 /// positions in the destination. 666 /// \returns A 128-bit integer vector containing the resultant values. 667 static __inline__ __m128i __DEFAULT_FN_ATTRS 668 _mm_sign_epi16(__m128i __a, __m128i __b) 669 { 670 return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b); 671 } 672 673 /// For each 32-bit integer in the first source operand, perform one of 674 /// the following actions as specified by the second source operand. 675 /// 676 /// If the doubleword in the second source is negative, calculate the two's 677 /// complement of the corresponding word in the first source, and write that 678 /// value to the destination. If the doubleword in the second source is 679 /// positive, copy the corresponding word from the first source to the 680 /// destination. If the doubleword in the second source is zero, clear the 681 /// corresponding word in the destination. 682 /// 683 /// \headerfile <x86intrin.h> 684 /// 685 /// This intrinsic corresponds to the \c VPSIGND instruction. 686 /// 687 /// \param __a 688 /// A 128-bit integer vector containing the values to be copied. 689 /// \param __b 690 /// A 128-bit integer vector containing control doublewords corresponding to 691 /// positions in the destination. 692 /// \returns A 128-bit integer vector containing the resultant values. 693 static __inline__ __m128i __DEFAULT_FN_ATTRS 694 _mm_sign_epi32(__m128i __a, __m128i __b) 695 { 696 return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b); 697 } 698 699 /// For each 8-bit integer in the first source operand, perform one of 700 /// the following actions as specified by the second source operand. 701 /// 702 /// If the byte in the second source is negative, calculate the two's 703 /// complement of the corresponding byte in the first source, and write that 704 /// value to the destination. If the byte in the second source is positive, 705 /// copy the corresponding byte from the first source to the destination. If 706 /// the byte in the second source is zero, clear the corresponding byte in 707 /// the destination. 708 /// 709 /// \headerfile <x86intrin.h> 710 /// 711 /// This intrinsic corresponds to the \c PSIGNB instruction. 712 /// 713 /// \param __a 714 /// A 64-bit integer vector containing the values to be copied. 715 /// \param __b 716 /// A 64-bit integer vector containing control bytes corresponding to 717 /// positions in the destination. 718 /// \returns A 64-bit integer vector containing the resultant values. 719 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 720 _mm_sign_pi8(__m64 __a, __m64 __b) 721 { 722 return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b); 723 } 724 725 /// For each 16-bit integer in the first source operand, perform one of 726 /// the following actions as specified by the second source operand. 727 /// 728 /// If the word in the second source is negative, calculate the two's 729 /// complement of the corresponding word in the first source, and write that 730 /// value to the destination. If the word in the second source is positive, 731 /// copy the corresponding word from the first source to the destination. If 732 /// the word in the second source is zero, clear the corresponding word in 733 /// the destination. 734 /// 735 /// \headerfile <x86intrin.h> 736 /// 737 /// This intrinsic corresponds to the \c PSIGNW instruction. 738 /// 739 /// \param __a 740 /// A 64-bit integer vector containing the values to be copied. 741 /// \param __b 742 /// A 64-bit integer vector containing control words corresponding to 743 /// positions in the destination. 744 /// \returns A 64-bit integer vector containing the resultant values. 745 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 746 _mm_sign_pi16(__m64 __a, __m64 __b) 747 { 748 return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b); 749 } 750 751 /// For each 32-bit integer in the first source operand, perform one of 752 /// the following actions as specified by the second source operand. 753 /// 754 /// If the doubleword in the second source is negative, calculate the two's 755 /// complement of the corresponding doubleword in the first source, and 756 /// write that value to the destination. If the doubleword in the second 757 /// source is positive, copy the corresponding doubleword from the first 758 /// source to the destination. If the doubleword in the second source is 759 /// zero, clear the corresponding doubleword in the destination. 760 /// 761 /// \headerfile <x86intrin.h> 762 /// 763 /// This intrinsic corresponds to the \c PSIGND instruction. 764 /// 765 /// \param __a 766 /// A 64-bit integer vector containing the values to be copied. 767 /// \param __b 768 /// A 64-bit integer vector containing two control doublewords corresponding 769 /// to positions in the destination. 770 /// \returns A 64-bit integer vector containing the resultant values. 771 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 772 _mm_sign_pi32(__m64 __a, __m64 __b) 773 { 774 return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b); 775 } 776 777 #undef __DEFAULT_FN_ATTRS 778 #undef __DEFAULT_FN_ATTRS_MMX 779 780 #endif /* __TMMINTRIN_H */ 781