1 /*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10 #ifndef __TMMINTRIN_H 11 #define __TMMINTRIN_H 12 13 #if !defined(__i386__) && !defined(__x86_64__) 14 #error "This header is only meant to be used on x86 and x64 architecture" 15 #endif 16 17 #include <pmmintrin.h> 18 19 /* Define the default attributes for the functions in this file. */ 20 #define __DEFAULT_FN_ATTRS \ 21 __attribute__((__always_inline__, __nodebug__, \ 22 __target__("ssse3,no-evex512"), __min_vector_width__(64))) 23 #define __DEFAULT_FN_ATTRS_MMX \ 24 __attribute__((__always_inline__, __nodebug__, \ 25 __target__("mmx,ssse3,no-evex512"), \ 26 __min_vector_width__(64))) 27 28 /// Computes the absolute value of each of the packed 8-bit signed 29 /// integers in the source operand and stores the 8-bit unsigned integer 30 /// results in the destination. 31 /// 32 /// \headerfile <x86intrin.h> 33 /// 34 /// This intrinsic corresponds to the \c PABSB instruction. 35 /// 36 /// \param __a 37 /// A 64-bit vector of [8 x i8]. 38 /// \returns A 64-bit integer vector containing the absolute values of the 39 /// elements in the operand. 40 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 41 _mm_abs_pi8(__m64 __a) 42 { 43 return (__m64)__builtin_ia32_pabsb((__v8qi)__a); 44 } 45 46 /// Computes the absolute value of each of the packed 8-bit signed 47 /// integers in the source operand and stores the 8-bit unsigned integer 48 /// results in the destination. 49 /// 50 /// \headerfile <x86intrin.h> 51 /// 52 /// This intrinsic corresponds to the \c VPABSB instruction. 53 /// 54 /// \param __a 55 /// A 128-bit vector of [16 x i8]. 56 /// \returns A 128-bit integer vector containing the absolute values of the 57 /// elements in the operand. 58 static __inline__ __m128i __DEFAULT_FN_ATTRS 59 _mm_abs_epi8(__m128i __a) 60 { 61 return (__m128i)__builtin_elementwise_abs((__v16qs)__a); 62 } 63 64 /// Computes the absolute value of each of the packed 16-bit signed 65 /// integers in the source operand and stores the 16-bit unsigned integer 66 /// results in the destination. 67 /// 68 /// \headerfile <x86intrin.h> 69 /// 70 /// This intrinsic corresponds to the \c PABSW instruction. 71 /// 72 /// \param __a 73 /// A 64-bit vector of [4 x i16]. 74 /// \returns A 64-bit integer vector containing the absolute values of the 75 /// elements in the operand. 76 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 77 _mm_abs_pi16(__m64 __a) 78 { 79 return (__m64)__builtin_ia32_pabsw((__v4hi)__a); 80 } 81 82 /// Computes the absolute value of each of the packed 16-bit signed 83 /// integers in the source operand and stores the 16-bit unsigned integer 84 /// results in the destination. 85 /// 86 /// \headerfile <x86intrin.h> 87 /// 88 /// This intrinsic corresponds to the \c VPABSW instruction. 89 /// 90 /// \param __a 91 /// A 128-bit vector of [8 x i16]. 92 /// \returns A 128-bit integer vector containing the absolute values of the 93 /// elements in the operand. 94 static __inline__ __m128i __DEFAULT_FN_ATTRS 95 _mm_abs_epi16(__m128i __a) 96 { 97 return (__m128i)__builtin_elementwise_abs((__v8hi)__a); 98 } 99 100 /// Computes the absolute value of each of the packed 32-bit signed 101 /// integers in the source operand and stores the 32-bit unsigned integer 102 /// results in the destination. 103 /// 104 /// \headerfile <x86intrin.h> 105 /// 106 /// This intrinsic corresponds to the \c PABSD instruction. 107 /// 108 /// \param __a 109 /// A 64-bit vector of [2 x i32]. 110 /// \returns A 64-bit integer vector containing the absolute values of the 111 /// elements in the operand. 112 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 113 _mm_abs_pi32(__m64 __a) 114 { 115 return (__m64)__builtin_ia32_pabsd((__v2si)__a); 116 } 117 118 /// Computes the absolute value of each of the packed 32-bit signed 119 /// integers in the source operand and stores the 32-bit unsigned integer 120 /// results in the destination. 121 /// 122 /// \headerfile <x86intrin.h> 123 /// 124 /// This intrinsic corresponds to the \c VPABSD instruction. 125 /// 126 /// \param __a 127 /// A 128-bit vector of [4 x i32]. 128 /// \returns A 128-bit integer vector containing the absolute values of the 129 /// elements in the operand. 130 static __inline__ __m128i __DEFAULT_FN_ATTRS 131 _mm_abs_epi32(__m128i __a) 132 { 133 return (__m128i)__builtin_elementwise_abs((__v4si)__a); 134 } 135 136 /// Concatenates the two 128-bit integer vector operands, and 137 /// right-shifts the result by the number of bytes specified in the immediate 138 /// operand. 139 /// 140 /// \headerfile <x86intrin.h> 141 /// 142 /// \code 143 /// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n); 144 /// \endcode 145 /// 146 /// This intrinsic corresponds to the \c PALIGNR instruction. 147 /// 148 /// \param a 149 /// A 128-bit vector of [16 x i8] containing one of the source operands. 150 /// \param b 151 /// A 128-bit vector of [16 x i8] containing one of the source operands. 152 /// \param n 153 /// An immediate operand specifying how many bytes to right-shift the result. 154 /// \returns A 128-bit integer vector containing the concatenated right-shifted 155 /// value. 156 #define _mm_alignr_epi8(a, b, n) \ 157 ((__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \ 158 (__v16qi)(__m128i)(b), (n))) 159 160 /// Concatenates the two 64-bit integer vector operands, and right-shifts 161 /// the result by the number of bytes specified in the immediate operand. 162 /// 163 /// \headerfile <x86intrin.h> 164 /// 165 /// \code 166 /// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n); 167 /// \endcode 168 /// 169 /// This intrinsic corresponds to the \c PALIGNR instruction. 170 /// 171 /// \param a 172 /// A 64-bit vector of [8 x i8] containing one of the source operands. 173 /// \param b 174 /// A 64-bit vector of [8 x i8] containing one of the source operands. 175 /// \param n 176 /// An immediate operand specifying how many bytes to right-shift the result. 177 /// \returns A 64-bit integer vector containing the concatenated right-shifted 178 /// value. 179 #define _mm_alignr_pi8(a, b, n) \ 180 ((__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n))) 181 182 /// Horizontally adds the adjacent pairs of values contained in 2 packed 183 /// 128-bit vectors of [8 x i16]. 184 /// 185 /// \headerfile <x86intrin.h> 186 /// 187 /// This intrinsic corresponds to the \c VPHADDW instruction. 188 /// 189 /// \param __a 190 /// A 128-bit vector of [8 x i16] containing one of the source operands. The 191 /// horizontal sums of the values are stored in the lower bits of the 192 /// destination. 193 /// \param __b 194 /// A 128-bit vector of [8 x i16] containing one of the source operands. The 195 /// horizontal sums of the values are stored in the upper bits of the 196 /// destination. 197 /// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of 198 /// both operands. 199 static __inline__ __m128i __DEFAULT_FN_ATTRS 200 _mm_hadd_epi16(__m128i __a, __m128i __b) 201 { 202 return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b); 203 } 204 205 /// Horizontally adds the adjacent pairs of values contained in 2 packed 206 /// 128-bit vectors of [4 x i32]. 207 /// 208 /// \headerfile <x86intrin.h> 209 /// 210 /// This intrinsic corresponds to the \c VPHADDD instruction. 211 /// 212 /// \param __a 213 /// A 128-bit vector of [4 x i32] containing one of the source operands. The 214 /// horizontal sums of the values are stored in the lower bits of the 215 /// destination. 216 /// \param __b 217 /// A 128-bit vector of [4 x i32] containing one of the source operands. The 218 /// horizontal sums of the values are stored in the upper bits of the 219 /// destination. 220 /// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of 221 /// both operands. 222 static __inline__ __m128i __DEFAULT_FN_ATTRS 223 _mm_hadd_epi32(__m128i __a, __m128i __b) 224 { 225 return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b); 226 } 227 228 /// Horizontally adds the adjacent pairs of values contained in 2 packed 229 /// 64-bit vectors of [4 x i16]. 230 /// 231 /// \headerfile <x86intrin.h> 232 /// 233 /// This intrinsic corresponds to the \c PHADDW instruction. 234 /// 235 /// \param __a 236 /// A 64-bit vector of [4 x i16] containing one of the source operands. The 237 /// horizontal sums of the values are stored in the lower bits of the 238 /// destination. 239 /// \param __b 240 /// A 64-bit vector of [4 x i16] containing one of the source operands. The 241 /// horizontal sums of the values are stored in the upper bits of the 242 /// destination. 243 /// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both 244 /// operands. 245 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 246 _mm_hadd_pi16(__m64 __a, __m64 __b) 247 { 248 return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b); 249 } 250 251 /// Horizontally adds the adjacent pairs of values contained in 2 packed 252 /// 64-bit vectors of [2 x i32]. 253 /// 254 /// \headerfile <x86intrin.h> 255 /// 256 /// This intrinsic corresponds to the \c PHADDD instruction. 257 /// 258 /// \param __a 259 /// A 64-bit vector of [2 x i32] containing one of the source operands. The 260 /// horizontal sums of the values are stored in the lower bits of the 261 /// destination. 262 /// \param __b 263 /// A 64-bit vector of [2 x i32] containing one of the source operands. The 264 /// horizontal sums of the values are stored in the upper bits of the 265 /// destination. 266 /// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both 267 /// operands. 268 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 269 _mm_hadd_pi32(__m64 __a, __m64 __b) 270 { 271 return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b); 272 } 273 274 /// Horizontally adds, with saturation, the adjacent pairs of values contained 275 /// in two packed 128-bit vectors of [8 x i16]. 276 /// 277 /// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums 278 /// less than 0x8000 are saturated to 0x8000. 279 /// 280 /// \headerfile <x86intrin.h> 281 /// 282 /// This intrinsic corresponds to the \c VPHADDSW instruction. 283 /// 284 /// \param __a 285 /// A 128-bit vector of [8 x i16] containing one of the source operands. The 286 /// horizontal sums of the values are stored in the lower bits of the 287 /// destination. 288 /// \param __b 289 /// A 128-bit vector of [8 x i16] containing one of the source operands. The 290 /// horizontal sums of the values are stored in the upper bits of the 291 /// destination. 292 /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated 293 /// sums of both operands. 294 static __inline__ __m128i __DEFAULT_FN_ATTRS 295 _mm_hadds_epi16(__m128i __a, __m128i __b) 296 { 297 return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b); 298 } 299 300 /// Horizontally adds, with saturation, the adjacent pairs of values contained 301 /// in two packed 64-bit vectors of [4 x i16]. 302 /// 303 /// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums 304 /// less than 0x8000 are saturated to 0x8000. 305 /// 306 /// \headerfile <x86intrin.h> 307 /// 308 /// This intrinsic corresponds to the \c PHADDSW instruction. 309 /// 310 /// \param __a 311 /// A 64-bit vector of [4 x i16] containing one of the source operands. The 312 /// horizontal sums of the values are stored in the lower bits of the 313 /// destination. 314 /// \param __b 315 /// A 64-bit vector of [4 x i16] containing one of the source operands. The 316 /// horizontal sums of the values are stored in the upper bits of the 317 /// destination. 318 /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated 319 /// sums of both operands. 320 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 321 _mm_hadds_pi16(__m64 __a, __m64 __b) 322 { 323 return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b); 324 } 325 326 /// Horizontally subtracts the adjacent pairs of values contained in 2 327 /// packed 128-bit vectors of [8 x i16]. 328 /// 329 /// \headerfile <x86intrin.h> 330 /// 331 /// This intrinsic corresponds to the \c VPHSUBW instruction. 332 /// 333 /// \param __a 334 /// A 128-bit vector of [8 x i16] containing one of the source operands. The 335 /// horizontal differences between the values are stored in the lower bits of 336 /// the destination. 337 /// \param __b 338 /// A 128-bit vector of [8 x i16] containing one of the source operands. The 339 /// horizontal differences between the values are stored in the upper bits of 340 /// the destination. 341 /// \returns A 128-bit vector of [8 x i16] containing the horizontal differences 342 /// of both operands. 343 static __inline__ __m128i __DEFAULT_FN_ATTRS 344 _mm_hsub_epi16(__m128i __a, __m128i __b) 345 { 346 return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b); 347 } 348 349 /// Horizontally subtracts the adjacent pairs of values contained in 2 350 /// packed 128-bit vectors of [4 x i32]. 351 /// 352 /// \headerfile <x86intrin.h> 353 /// 354 /// This intrinsic corresponds to the \c VPHSUBD instruction. 355 /// 356 /// \param __a 357 /// A 128-bit vector of [4 x i32] containing one of the source operands. The 358 /// horizontal differences between the values are stored in the lower bits of 359 /// the destination. 360 /// \param __b 361 /// A 128-bit vector of [4 x i32] containing one of the source operands. The 362 /// horizontal differences between the values are stored in the upper bits of 363 /// the destination. 364 /// \returns A 128-bit vector of [4 x i32] containing the horizontal differences 365 /// of both operands. 366 static __inline__ __m128i __DEFAULT_FN_ATTRS 367 _mm_hsub_epi32(__m128i __a, __m128i __b) 368 { 369 return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b); 370 } 371 372 /// Horizontally subtracts the adjacent pairs of values contained in 2 373 /// packed 64-bit vectors of [4 x i16]. 374 /// 375 /// \headerfile <x86intrin.h> 376 /// 377 /// This intrinsic corresponds to the \c PHSUBW instruction. 378 /// 379 /// \param __a 380 /// A 64-bit vector of [4 x i16] containing one of the source operands. The 381 /// horizontal differences between the values are stored in the lower bits of 382 /// the destination. 383 /// \param __b 384 /// A 64-bit vector of [4 x i16] containing one of the source operands. The 385 /// horizontal differences between the values are stored in the upper bits of 386 /// the destination. 387 /// \returns A 64-bit vector of [4 x i16] containing the horizontal differences 388 /// of both operands. 389 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 390 _mm_hsub_pi16(__m64 __a, __m64 __b) 391 { 392 return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b); 393 } 394 395 /// Horizontally subtracts the adjacent pairs of values contained in 2 396 /// packed 64-bit vectors of [2 x i32]. 397 /// 398 /// \headerfile <x86intrin.h> 399 /// 400 /// This intrinsic corresponds to the \c PHSUBD instruction. 401 /// 402 /// \param __a 403 /// A 64-bit vector of [2 x i32] containing one of the source operands. The 404 /// horizontal differences between the values are stored in the lower bits of 405 /// the destination. 406 /// \param __b 407 /// A 64-bit vector of [2 x i32] containing one of the source operands. The 408 /// horizontal differences between the values are stored in the upper bits of 409 /// the destination. 410 /// \returns A 64-bit vector of [2 x i32] containing the horizontal differences 411 /// of both operands. 412 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 413 _mm_hsub_pi32(__m64 __a, __m64 __b) 414 { 415 return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b); 416 } 417 418 /// Horizontally subtracts, with saturation, the adjacent pairs of values 419 /// contained in two packed 128-bit vectors of [8 x i16]. 420 /// 421 /// Positive differences greater than 0x7FFF are saturated to 0x7FFF. 422 /// Negative differences less than 0x8000 are saturated to 0x8000. 423 /// 424 /// \headerfile <x86intrin.h> 425 /// 426 /// This intrinsic corresponds to the \c VPHSUBSW instruction. 427 /// 428 /// \param __a 429 /// A 128-bit vector of [8 x i16] containing one of the source operands. The 430 /// horizontal differences between the values are stored in the lower bits of 431 /// the destination. 432 /// \param __b 433 /// A 128-bit vector of [8 x i16] containing one of the source operands. The 434 /// horizontal differences between the values are stored in the upper bits of 435 /// the destination. 436 /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated 437 /// differences of both operands. 438 static __inline__ __m128i __DEFAULT_FN_ATTRS 439 _mm_hsubs_epi16(__m128i __a, __m128i __b) 440 { 441 return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b); 442 } 443 444 /// Horizontally subtracts, with saturation, the adjacent pairs of values 445 /// contained in two packed 64-bit vectors of [4 x i16]. 446 /// 447 /// Positive differences greater than 0x7FFF are saturated to 0x7FFF. 448 /// Negative differences less than 0x8000 are saturated to 0x8000. 449 /// 450 /// \headerfile <x86intrin.h> 451 /// 452 /// This intrinsic corresponds to the \c PHSUBSW instruction. 453 /// 454 /// \param __a 455 /// A 64-bit vector of [4 x i16] containing one of the source operands. The 456 /// horizontal differences between the values are stored in the lower bits of 457 /// the destination. 458 /// \param __b 459 /// A 64-bit vector of [4 x i16] containing one of the source operands. The 460 /// horizontal differences between the values are stored in the upper bits of 461 /// the destination. 462 /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated 463 /// differences of both operands. 464 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 465 _mm_hsubs_pi16(__m64 __a, __m64 __b) 466 { 467 return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b); 468 } 469 470 /// Multiplies corresponding pairs of packed 8-bit unsigned integer 471 /// values contained in the first source operand and packed 8-bit signed 472 /// integer values contained in the second source operand, adds pairs of 473 /// contiguous products with signed saturation, and writes the 16-bit sums to 474 /// the corresponding bits in the destination. 475 /// 476 /// For example, bits [7:0] of both operands are multiplied, bits [15:8] of 477 /// both operands are multiplied, and the sum of both results is written to 478 /// bits [15:0] of the destination. 479 /// 480 /// \headerfile <x86intrin.h> 481 /// 482 /// This intrinsic corresponds to the \c VPMADDUBSW instruction. 483 /// 484 /// \param __a 485 /// A 128-bit integer vector containing the first source operand. 486 /// \param __b 487 /// A 128-bit integer vector containing the second source operand. 488 /// \returns A 128-bit integer vector containing the sums of products of both 489 /// operands: \n 490 /// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n 491 /// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n 492 /// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n 493 /// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) \n 494 /// \a R4 := (\a __a8 * \a __b8) + (\a __a9 * \a __b9) \n 495 /// \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n 496 /// \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n 497 /// \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15) 498 static __inline__ __m128i __DEFAULT_FN_ATTRS 499 _mm_maddubs_epi16(__m128i __a, __m128i __b) 500 { 501 return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b); 502 } 503 504 /// Multiplies corresponding pairs of packed 8-bit unsigned integer 505 /// values contained in the first source operand and packed 8-bit signed 506 /// integer values contained in the second source operand, adds pairs of 507 /// contiguous products with signed saturation, and writes the 16-bit sums to 508 /// the corresponding bits in the destination. 509 /// 510 /// For example, bits [7:0] of both operands are multiplied, bits [15:8] of 511 /// both operands are multiplied, and the sum of both results is written to 512 /// bits [15:0] of the destination. 513 /// 514 /// \headerfile <x86intrin.h> 515 /// 516 /// This intrinsic corresponds to the \c PMADDUBSW instruction. 517 /// 518 /// \param __a 519 /// A 64-bit integer vector containing the first source operand. 520 /// \param __b 521 /// A 64-bit integer vector containing the second source operand. 522 /// \returns A 64-bit integer vector containing the sums of products of both 523 /// operands: \n 524 /// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n 525 /// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n 526 /// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n 527 /// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) 528 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 529 _mm_maddubs_pi16(__m64 __a, __m64 __b) 530 { 531 return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b); 532 } 533 534 /// Multiplies packed 16-bit signed integer values, truncates the 32-bit 535 /// products to the 18 most significant bits by right-shifting, rounds the 536 /// truncated value by adding 1, and writes bits [16:1] to the destination. 537 /// 538 /// \headerfile <x86intrin.h> 539 /// 540 /// This intrinsic corresponds to the \c VPMULHRSW instruction. 541 /// 542 /// \param __a 543 /// A 128-bit vector of [8 x i16] containing one of the source operands. 544 /// \param __b 545 /// A 128-bit vector of [8 x i16] containing one of the source operands. 546 /// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled 547 /// products of both operands. 548 static __inline__ __m128i __DEFAULT_FN_ATTRS 549 _mm_mulhrs_epi16(__m128i __a, __m128i __b) 550 { 551 return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); 552 } 553 554 /// Multiplies packed 16-bit signed integer values, truncates the 32-bit 555 /// products to the 18 most significant bits by right-shifting, rounds the 556 /// truncated value by adding 1, and writes bits [16:1] to the destination. 557 /// 558 /// \headerfile <x86intrin.h> 559 /// 560 /// This intrinsic corresponds to the \c PMULHRSW instruction. 561 /// 562 /// \param __a 563 /// A 64-bit vector of [4 x i16] containing one of the source operands. 564 /// \param __b 565 /// A 64-bit vector of [4 x i16] containing one of the source operands. 566 /// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled 567 /// products of both operands. 568 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 569 _mm_mulhrs_pi16(__m64 __a, __m64 __b) 570 { 571 return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b); 572 } 573 574 /// Copies the 8-bit integers from a 128-bit integer vector to the 575 /// destination or clears 8-bit values in the destination, as specified by 576 /// the second source operand. 577 /// 578 /// \headerfile <x86intrin.h> 579 /// 580 /// This intrinsic corresponds to the \c VPSHUFB instruction. 581 /// 582 /// \param __a 583 /// A 128-bit integer vector containing the values to be copied. 584 /// \param __b 585 /// A 128-bit integer vector containing control bytes corresponding to 586 /// positions in the destination: 587 /// Bit 7: \n 588 /// 1: Clear the corresponding byte in the destination. \n 589 /// 0: Copy the selected source byte to the corresponding byte in the 590 /// destination. \n 591 /// Bits [6:4] Reserved. \n 592 /// Bits [3:0] select the source byte to be copied. 593 /// \returns A 128-bit integer vector containing the copied or cleared values. 594 static __inline__ __m128i __DEFAULT_FN_ATTRS 595 _mm_shuffle_epi8(__m128i __a, __m128i __b) 596 { 597 return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b); 598 } 599 600 /// Copies the 8-bit integers from a 64-bit integer vector to the 601 /// destination or clears 8-bit values in the destination, as specified by 602 /// the second source operand. 603 /// 604 /// \headerfile <x86intrin.h> 605 /// 606 /// This intrinsic corresponds to the \c PSHUFB instruction. 607 /// 608 /// \param __a 609 /// A 64-bit integer vector containing the values to be copied. 610 /// \param __b 611 /// A 64-bit integer vector containing control bytes corresponding to 612 /// positions in the destination: 613 /// Bit 7: \n 614 /// 1: Clear the corresponding byte in the destination. \n 615 /// 0: Copy the selected source byte to the corresponding byte in the 616 /// destination. \n 617 /// Bits [3:0] select the source byte to be copied. 618 /// \returns A 64-bit integer vector containing the copied or cleared values. 619 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 620 _mm_shuffle_pi8(__m64 __a, __m64 __b) 621 { 622 return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b); 623 } 624 625 /// For each 8-bit integer in the first source operand, perform one of 626 /// the following actions as specified by the second source operand. 627 /// 628 /// If the byte in the second source is negative, calculate the two's 629 /// complement of the corresponding byte in the first source, and write that 630 /// value to the destination. If the byte in the second source is positive, 631 /// copy the corresponding byte from the first source to the destination. If 632 /// the byte in the second source is zero, clear the corresponding byte in 633 /// the destination. 634 /// 635 /// \headerfile <x86intrin.h> 636 /// 637 /// This intrinsic corresponds to the \c VPSIGNB instruction. 638 /// 639 /// \param __a 640 /// A 128-bit integer vector containing the values to be copied. 641 /// \param __b 642 /// A 128-bit integer vector containing control bytes corresponding to 643 /// positions in the destination. 644 /// \returns A 128-bit integer vector containing the resultant values. 645 static __inline__ __m128i __DEFAULT_FN_ATTRS 646 _mm_sign_epi8(__m128i __a, __m128i __b) 647 { 648 return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b); 649 } 650 651 /// For each 16-bit integer in the first source operand, perform one of 652 /// the following actions as specified by the second source operand. 653 /// 654 /// If the word in the second source is negative, calculate the two's 655 /// complement of the corresponding word in the first source, and write that 656 /// value to the destination. If the word in the second source is positive, 657 /// copy the corresponding word from the first source to the destination. If 658 /// the word in the second source is zero, clear the corresponding word in 659 /// the destination. 660 /// 661 /// \headerfile <x86intrin.h> 662 /// 663 /// This intrinsic corresponds to the \c VPSIGNW instruction. 664 /// 665 /// \param __a 666 /// A 128-bit integer vector containing the values to be copied. 667 /// \param __b 668 /// A 128-bit integer vector containing control words corresponding to 669 /// positions in the destination. 670 /// \returns A 128-bit integer vector containing the resultant values. 671 static __inline__ __m128i __DEFAULT_FN_ATTRS 672 _mm_sign_epi16(__m128i __a, __m128i __b) 673 { 674 return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b); 675 } 676 677 /// For each 32-bit integer in the first source operand, perform one of 678 /// the following actions as specified by the second source operand. 679 /// 680 /// If the doubleword in the second source is negative, calculate the two's 681 /// complement of the corresponding word in the first source, and write that 682 /// value to the destination. If the doubleword in the second source is 683 /// positive, copy the corresponding word from the first source to the 684 /// destination. If the doubleword in the second source is zero, clear the 685 /// corresponding word in the destination. 686 /// 687 /// \headerfile <x86intrin.h> 688 /// 689 /// This intrinsic corresponds to the \c VPSIGND instruction. 690 /// 691 /// \param __a 692 /// A 128-bit integer vector containing the values to be copied. 693 /// \param __b 694 /// A 128-bit integer vector containing control doublewords corresponding to 695 /// positions in the destination. 696 /// \returns A 128-bit integer vector containing the resultant values. 697 static __inline__ __m128i __DEFAULT_FN_ATTRS 698 _mm_sign_epi32(__m128i __a, __m128i __b) 699 { 700 return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b); 701 } 702 703 /// For each 8-bit integer in the first source operand, perform one of 704 /// the following actions as specified by the second source operand. 705 /// 706 /// If the byte in the second source is negative, calculate the two's 707 /// complement of the corresponding byte in the first source, and write that 708 /// value to the destination. If the byte in the second source is positive, 709 /// copy the corresponding byte from the first source to the destination. If 710 /// the byte in the second source is zero, clear the corresponding byte in 711 /// the destination. 712 /// 713 /// \headerfile <x86intrin.h> 714 /// 715 /// This intrinsic corresponds to the \c PSIGNB instruction. 716 /// 717 /// \param __a 718 /// A 64-bit integer vector containing the values to be copied. 719 /// \param __b 720 /// A 64-bit integer vector containing control bytes corresponding to 721 /// positions in the destination. 722 /// \returns A 64-bit integer vector containing the resultant values. 723 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 724 _mm_sign_pi8(__m64 __a, __m64 __b) 725 { 726 return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b); 727 } 728 729 /// For each 16-bit integer in the first source operand, perform one of 730 /// the following actions as specified by the second source operand. 731 /// 732 /// If the word in the second source is negative, calculate the two's 733 /// complement of the corresponding word in the first source, and write that 734 /// value to the destination. If the word in the second source is positive, 735 /// copy the corresponding word from the first source to the destination. If 736 /// the word in the second source is zero, clear the corresponding word in 737 /// the destination. 738 /// 739 /// \headerfile <x86intrin.h> 740 /// 741 /// This intrinsic corresponds to the \c PSIGNW instruction. 742 /// 743 /// \param __a 744 /// A 64-bit integer vector containing the values to be copied. 745 /// \param __b 746 /// A 64-bit integer vector containing control words corresponding to 747 /// positions in the destination. 748 /// \returns A 64-bit integer vector containing the resultant values. 749 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 750 _mm_sign_pi16(__m64 __a, __m64 __b) 751 { 752 return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b); 753 } 754 755 /// For each 32-bit integer in the first source operand, perform one of 756 /// the following actions as specified by the second source operand. 757 /// 758 /// If the doubleword in the second source is negative, calculate the two's 759 /// complement of the corresponding doubleword in the first source, and 760 /// write that value to the destination. If the doubleword in the second 761 /// source is positive, copy the corresponding doubleword from the first 762 /// source to the destination. If the doubleword in the second source is 763 /// zero, clear the corresponding doubleword in the destination. 764 /// 765 /// \headerfile <x86intrin.h> 766 /// 767 /// This intrinsic corresponds to the \c PSIGND instruction. 768 /// 769 /// \param __a 770 /// A 64-bit integer vector containing the values to be copied. 771 /// \param __b 772 /// A 64-bit integer vector containing two control doublewords corresponding 773 /// to positions in the destination. 774 /// \returns A 64-bit integer vector containing the resultant values. 775 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 776 _mm_sign_pi32(__m64 __a, __m64 __b) 777 { 778 return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b); 779 } 780 781 #undef __DEFAULT_FN_ATTRS 782 #undef __DEFAULT_FN_ATTRS_MMX 783 784 #endif /* __TMMINTRIN_H */ 785