1 /*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10 #ifndef __TMMINTRIN_H 11 #define __TMMINTRIN_H 12 13 #include <pmmintrin.h> 14 15 /* Define the default attributes for the functions in this file. */ 16 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), __min_vector_width__(64))) 17 #define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,ssse3"), __min_vector_width__(64))) 18 19 /// Computes the absolute value of each of the packed 8-bit signed 20 /// integers in the source operand and stores the 8-bit unsigned integer 21 /// results in the destination. 22 /// 23 /// \headerfile <x86intrin.h> 24 /// 25 /// This intrinsic corresponds to the \c PABSB instruction. 26 /// 27 /// \param __a 28 /// A 64-bit vector of [8 x i8]. 29 /// \returns A 64-bit integer vector containing the absolute values of the 30 /// elements in the operand. 31 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 32 _mm_abs_pi8(__m64 __a) 33 { 34 return (__m64)__builtin_ia32_pabsb((__v8qi)__a); 35 } 36 37 /// Computes the absolute value of each of the packed 8-bit signed 38 /// integers in the source operand and stores the 8-bit unsigned integer 39 /// results in the destination. 40 /// 41 /// \headerfile <x86intrin.h> 42 /// 43 /// This intrinsic corresponds to the \c VPABSB instruction. 44 /// 45 /// \param __a 46 /// A 128-bit vector of [16 x i8]. 47 /// \returns A 128-bit integer vector containing the absolute values of the 48 /// elements in the operand. 49 static __inline__ __m128i __DEFAULT_FN_ATTRS 50 _mm_abs_epi8(__m128i __a) 51 { 52 return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a); 53 } 54 55 /// Computes the absolute value of each of the packed 16-bit signed 56 /// integers in the source operand and stores the 16-bit unsigned integer 57 /// results in the destination. 58 /// 59 /// \headerfile <x86intrin.h> 60 /// 61 /// This intrinsic corresponds to the \c PABSW instruction. 62 /// 63 /// \param __a 64 /// A 64-bit vector of [4 x i16]. 65 /// \returns A 64-bit integer vector containing the absolute values of the 66 /// elements in the operand. 67 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 68 _mm_abs_pi16(__m64 __a) 69 { 70 return (__m64)__builtin_ia32_pabsw((__v4hi)__a); 71 } 72 73 /// Computes the absolute value of each of the packed 16-bit signed 74 /// integers in the source operand and stores the 16-bit unsigned integer 75 /// results in the destination. 76 /// 77 /// \headerfile <x86intrin.h> 78 /// 79 /// This intrinsic corresponds to the \c VPABSW instruction. 80 /// 81 /// \param __a 82 /// A 128-bit vector of [8 x i16]. 83 /// \returns A 128-bit integer vector containing the absolute values of the 84 /// elements in the operand. 85 static __inline__ __m128i __DEFAULT_FN_ATTRS 86 _mm_abs_epi16(__m128i __a) 87 { 88 return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a); 89 } 90 91 /// Computes the absolute value of each of the packed 32-bit signed 92 /// integers in the source operand and stores the 32-bit unsigned integer 93 /// results in the destination. 94 /// 95 /// \headerfile <x86intrin.h> 96 /// 97 /// This intrinsic corresponds to the \c PABSD instruction. 98 /// 99 /// \param __a 100 /// A 64-bit vector of [2 x i32]. 101 /// \returns A 64-bit integer vector containing the absolute values of the 102 /// elements in the operand. 103 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 104 _mm_abs_pi32(__m64 __a) 105 { 106 return (__m64)__builtin_ia32_pabsd((__v2si)__a); 107 } 108 109 /// Computes the absolute value of each of the packed 32-bit signed 110 /// integers in the source operand and stores the 32-bit unsigned integer 111 /// results in the destination. 112 /// 113 /// \headerfile <x86intrin.h> 114 /// 115 /// This intrinsic corresponds to the \c VPABSD instruction. 116 /// 117 /// \param __a 118 /// A 128-bit vector of [4 x i32]. 119 /// \returns A 128-bit integer vector containing the absolute values of the 120 /// elements in the operand. 121 static __inline__ __m128i __DEFAULT_FN_ATTRS 122 _mm_abs_epi32(__m128i __a) 123 { 124 return (__m128i)__builtin_ia32_pabsd128((__v4si)__a); 125 } 126 127 /// Concatenates the two 128-bit integer vector operands, and 128 /// right-shifts the result by the number of bytes specified in the immediate 129 /// operand. 130 /// 131 /// \headerfile <x86intrin.h> 132 /// 133 /// \code 134 /// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n); 135 /// \endcode 136 /// 137 /// This intrinsic corresponds to the \c PALIGNR instruction. 138 /// 139 /// \param a 140 /// A 128-bit vector of [16 x i8] containing one of the source operands. 141 /// \param b 142 /// A 128-bit vector of [16 x i8] containing one of the source operands. 143 /// \param n 144 /// An immediate operand specifying how many bytes to right-shift the result. 145 /// \returns A 128-bit integer vector containing the concatenated right-shifted 146 /// value. 147 #define _mm_alignr_epi8(a, b, n) \ 148 (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \ 149 (__v16qi)(__m128i)(b), (n)) 150 151 /// Concatenates the two 64-bit integer vector operands, and right-shifts 152 /// the result by the number of bytes specified in the immediate operand. 153 /// 154 /// \headerfile <x86intrin.h> 155 /// 156 /// \code 157 /// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n); 158 /// \endcode 159 /// 160 /// This intrinsic corresponds to the \c PALIGNR instruction. 161 /// 162 /// \param a 163 /// A 64-bit vector of [8 x i8] containing one of the source operands. 164 /// \param b 165 /// A 64-bit vector of [8 x i8] containing one of the source operands. 166 /// \param n 167 /// An immediate operand specifying how many bytes to right-shift the result. 168 /// \returns A 64-bit integer vector containing the concatenated right-shifted 169 /// value. 170 #define _mm_alignr_pi8(a, b, n) \ 171 (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)) 172 173 /// Horizontally adds the adjacent pairs of values contained in 2 packed 174 /// 128-bit vectors of [8 x i16]. 175 /// 176 /// \headerfile <x86intrin.h> 177 /// 178 /// This intrinsic corresponds to the \c VPHADDW instruction. 179 /// 180 /// \param __a 181 /// A 128-bit vector of [8 x i16] containing one of the source operands. The 182 /// horizontal sums of the values are stored in the lower bits of the 183 /// destination. 184 /// \param __b 185 /// A 128-bit vector of [8 x i16] containing one of the source operands. The 186 /// horizontal sums of the values are stored in the upper bits of the 187 /// destination. 188 /// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of 189 /// both operands. 190 static __inline__ __m128i __DEFAULT_FN_ATTRS 191 _mm_hadd_epi16(__m128i __a, __m128i __b) 192 { 193 return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b); 194 } 195 196 /// Horizontally adds the adjacent pairs of values contained in 2 packed 197 /// 128-bit vectors of [4 x i32]. 198 /// 199 /// \headerfile <x86intrin.h> 200 /// 201 /// This intrinsic corresponds to the \c VPHADDD instruction. 202 /// 203 /// \param __a 204 /// A 128-bit vector of [4 x i32] containing one of the source operands. The 205 /// horizontal sums of the values are stored in the lower bits of the 206 /// destination. 207 /// \param __b 208 /// A 128-bit vector of [4 x i32] containing one of the source operands. The 209 /// horizontal sums of the values are stored in the upper bits of the 210 /// destination. 211 /// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of 212 /// both operands. 213 static __inline__ __m128i __DEFAULT_FN_ATTRS 214 _mm_hadd_epi32(__m128i __a, __m128i __b) 215 { 216 return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b); 217 } 218 219 /// Horizontally adds the adjacent pairs of values contained in 2 packed 220 /// 64-bit vectors of [4 x i16]. 221 /// 222 /// \headerfile <x86intrin.h> 223 /// 224 /// This intrinsic corresponds to the \c PHADDW instruction. 225 /// 226 /// \param __a 227 /// A 64-bit vector of [4 x i16] containing one of the source operands. The 228 /// horizontal sums of the values are stored in the lower bits of the 229 /// destination. 230 /// \param __b 231 /// A 64-bit vector of [4 x i16] containing one of the source operands. The 232 /// horizontal sums of the values are stored in the upper bits of the 233 /// destination. 234 /// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both 235 /// operands. 236 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 237 _mm_hadd_pi16(__m64 __a, __m64 __b) 238 { 239 return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b); 240 } 241 242 /// Horizontally adds the adjacent pairs of values contained in 2 packed 243 /// 64-bit vectors of [2 x i32]. 244 /// 245 /// \headerfile <x86intrin.h> 246 /// 247 /// This intrinsic corresponds to the \c PHADDD instruction. 248 /// 249 /// \param __a 250 /// A 64-bit vector of [2 x i32] containing one of the source operands. The 251 /// horizontal sums of the values are stored in the lower bits of the 252 /// destination. 253 /// \param __b 254 /// A 64-bit vector of [2 x i32] containing one of the source operands. The 255 /// horizontal sums of the values are stored in the upper bits of the 256 /// destination. 257 /// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both 258 /// operands. 259 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 260 _mm_hadd_pi32(__m64 __a, __m64 __b) 261 { 262 return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b); 263 } 264 265 /// Horizontally adds the adjacent pairs of values contained in 2 packed 266 /// 128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are 267 /// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to 268 /// 0x8000. 269 /// 270 /// \headerfile <x86intrin.h> 271 /// 272 /// This intrinsic corresponds to the \c VPHADDSW instruction. 273 /// 274 /// \param __a 275 /// A 128-bit vector of [8 x i16] containing one of the source operands. The 276 /// horizontal sums of the values are stored in the lower bits of the 277 /// destination. 278 /// \param __b 279 /// A 128-bit vector of [8 x i16] containing one of the source operands. The 280 /// horizontal sums of the values are stored in the upper bits of the 281 /// destination. 282 /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated 283 /// sums of both operands. 284 static __inline__ __m128i __DEFAULT_FN_ATTRS 285 _mm_hadds_epi16(__m128i __a, __m128i __b) 286 { 287 return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b); 288 } 289 290 /// Horizontally adds the adjacent pairs of values contained in 2 packed 291 /// 64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are 292 /// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to 293 /// 0x8000. 294 /// 295 /// \headerfile <x86intrin.h> 296 /// 297 /// This intrinsic corresponds to the \c PHADDSW instruction. 298 /// 299 /// \param __a 300 /// A 64-bit vector of [4 x i16] containing one of the source operands. The 301 /// horizontal sums of the values are stored in the lower bits of the 302 /// destination. 303 /// \param __b 304 /// A 64-bit vector of [4 x i16] containing one of the source operands. The 305 /// horizontal sums of the values are stored in the upper bits of the 306 /// destination. 307 /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated 308 /// sums of both operands. 309 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 310 _mm_hadds_pi16(__m64 __a, __m64 __b) 311 { 312 return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b); 313 } 314 315 /// Horizontally subtracts the adjacent pairs of values contained in 2 316 /// packed 128-bit vectors of [8 x i16]. 317 /// 318 /// \headerfile <x86intrin.h> 319 /// 320 /// This intrinsic corresponds to the \c VPHSUBW instruction. 321 /// 322 /// \param __a 323 /// A 128-bit vector of [8 x i16] containing one of the source operands. The 324 /// horizontal differences between the values are stored in the lower bits of 325 /// the destination. 326 /// \param __b 327 /// A 128-bit vector of [8 x i16] containing one of the source operands. The 328 /// horizontal differences between the values are stored in the upper bits of 329 /// the destination. 330 /// \returns A 128-bit vector of [8 x i16] containing the horizontal differences 331 /// of both operands. 332 static __inline__ __m128i __DEFAULT_FN_ATTRS 333 _mm_hsub_epi16(__m128i __a, __m128i __b) 334 { 335 return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b); 336 } 337 338 /// Horizontally subtracts the adjacent pairs of values contained in 2 339 /// packed 128-bit vectors of [4 x i32]. 340 /// 341 /// \headerfile <x86intrin.h> 342 /// 343 /// This intrinsic corresponds to the \c VPHSUBD instruction. 344 /// 345 /// \param __a 346 /// A 128-bit vector of [4 x i32] containing one of the source operands. The 347 /// horizontal differences between the values are stored in the lower bits of 348 /// the destination. 349 /// \param __b 350 /// A 128-bit vector of [4 x i32] containing one of the source operands. The 351 /// horizontal differences between the values are stored in the upper bits of 352 /// the destination. 353 /// \returns A 128-bit vector of [4 x i32] containing the horizontal differences 354 /// of both operands. 355 static __inline__ __m128i __DEFAULT_FN_ATTRS 356 _mm_hsub_epi32(__m128i __a, __m128i __b) 357 { 358 return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b); 359 } 360 361 /// Horizontally subtracts the adjacent pairs of values contained in 2 362 /// packed 64-bit vectors of [4 x i16]. 363 /// 364 /// \headerfile <x86intrin.h> 365 /// 366 /// This intrinsic corresponds to the \c PHSUBW instruction. 367 /// 368 /// \param __a 369 /// A 64-bit vector of [4 x i16] containing one of the source operands. The 370 /// horizontal differences between the values are stored in the lower bits of 371 /// the destination. 372 /// \param __b 373 /// A 64-bit vector of [4 x i16] containing one of the source operands. The 374 /// horizontal differences between the values are stored in the upper bits of 375 /// the destination. 376 /// \returns A 64-bit vector of [4 x i16] containing the horizontal differences 377 /// of both operands. 378 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 379 _mm_hsub_pi16(__m64 __a, __m64 __b) 380 { 381 return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b); 382 } 383 384 /// Horizontally subtracts the adjacent pairs of values contained in 2 385 /// packed 64-bit vectors of [2 x i32]. 386 /// 387 /// \headerfile <x86intrin.h> 388 /// 389 /// This intrinsic corresponds to the \c PHSUBD instruction. 390 /// 391 /// \param __a 392 /// A 64-bit vector of [2 x i32] containing one of the source operands. The 393 /// horizontal differences between the values are stored in the lower bits of 394 /// the destination. 395 /// \param __b 396 /// A 64-bit vector of [2 x i32] containing one of the source operands. The 397 /// horizontal differences between the values are stored in the upper bits of 398 /// the destination. 399 /// \returns A 64-bit vector of [2 x i32] containing the horizontal differences 400 /// of both operands. 401 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 402 _mm_hsub_pi32(__m64 __a, __m64 __b) 403 { 404 return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b); 405 } 406 407 /// Horizontally subtracts the adjacent pairs of values contained in 2 408 /// packed 128-bit vectors of [8 x i16]. Positive differences greater than 409 /// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are 410 /// saturated to 0x8000. 411 /// 412 /// \headerfile <x86intrin.h> 413 /// 414 /// This intrinsic corresponds to the \c VPHSUBSW instruction. 415 /// 416 /// \param __a 417 /// A 128-bit vector of [8 x i16] containing one of the source operands. The 418 /// horizontal differences between the values are stored in the lower bits of 419 /// the destination. 420 /// \param __b 421 /// A 128-bit vector of [8 x i16] containing one of the source operands. The 422 /// horizontal differences between the values are stored in the upper bits of 423 /// the destination. 424 /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated 425 /// differences of both operands. 426 static __inline__ __m128i __DEFAULT_FN_ATTRS 427 _mm_hsubs_epi16(__m128i __a, __m128i __b) 428 { 429 return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b); 430 } 431 432 /// Horizontally subtracts the adjacent pairs of values contained in 2 433 /// packed 64-bit vectors of [4 x i16]. Positive differences greater than 434 /// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are 435 /// saturated to 0x8000. 436 /// 437 /// \headerfile <x86intrin.h> 438 /// 439 /// This intrinsic corresponds to the \c PHSUBSW instruction. 440 /// 441 /// \param __a 442 /// A 64-bit vector of [4 x i16] containing one of the source operands. The 443 /// horizontal differences between the values are stored in the lower bits of 444 /// the destination. 445 /// \param __b 446 /// A 64-bit vector of [4 x i16] containing one of the source operands. The 447 /// horizontal differences between the values are stored in the upper bits of 448 /// the destination. 449 /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated 450 /// differences of both operands. 451 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 452 _mm_hsubs_pi16(__m64 __a, __m64 __b) 453 { 454 return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b); 455 } 456 457 /// Multiplies corresponding pairs of packed 8-bit unsigned integer 458 /// values contained in the first source operand and packed 8-bit signed 459 /// integer values contained in the second source operand, adds pairs of 460 /// contiguous products with signed saturation, and writes the 16-bit sums to 461 /// the corresponding bits in the destination. 462 /// 463 /// For example, bits [7:0] of both operands are multiplied, bits [15:8] of 464 /// both operands are multiplied, and the sum of both results is written to 465 /// bits [15:0] of the destination. 466 /// 467 /// \headerfile <x86intrin.h> 468 /// 469 /// This intrinsic corresponds to the \c VPMADDUBSW instruction. 470 /// 471 /// \param __a 472 /// A 128-bit integer vector containing the first source operand. 473 /// \param __b 474 /// A 128-bit integer vector containing the second source operand. 475 /// \returns A 128-bit integer vector containing the sums of products of both 476 /// operands: \n 477 /// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n 478 /// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n 479 /// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n 480 /// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) \n 481 /// \a R4 := (\a __a8 * \a __b8) + (\a __a9 * \a __b9) \n 482 /// \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n 483 /// \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n 484 /// \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15) 485 static __inline__ __m128i __DEFAULT_FN_ATTRS 486 _mm_maddubs_epi16(__m128i __a, __m128i __b) 487 { 488 return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b); 489 } 490 491 /// Multiplies corresponding pairs of packed 8-bit unsigned integer 492 /// values contained in the first source operand and packed 8-bit signed 493 /// integer values contained in the second source operand, adds pairs of 494 /// contiguous products with signed saturation, and writes the 16-bit sums to 495 /// the corresponding bits in the destination. 496 /// 497 /// For example, bits [7:0] of both operands are multiplied, bits [15:8] of 498 /// both operands are multiplied, and the sum of both results is written to 499 /// bits [15:0] of the destination. 500 /// 501 /// \headerfile <x86intrin.h> 502 /// 503 /// This intrinsic corresponds to the \c PMADDUBSW instruction. 504 /// 505 /// \param __a 506 /// A 64-bit integer vector containing the first source operand. 507 /// \param __b 508 /// A 64-bit integer vector containing the second source operand. 509 /// \returns A 64-bit integer vector containing the sums of products of both 510 /// operands: \n 511 /// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n 512 /// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n 513 /// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n 514 /// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) 515 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 516 _mm_maddubs_pi16(__m64 __a, __m64 __b) 517 { 518 return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b); 519 } 520 521 /// Multiplies packed 16-bit signed integer values, truncates the 32-bit 522 /// products to the 18 most significant bits by right-shifting, rounds the 523 /// truncated value by adding 1, and writes bits [16:1] to the destination. 524 /// 525 /// \headerfile <x86intrin.h> 526 /// 527 /// This intrinsic corresponds to the \c VPMULHRSW instruction. 528 /// 529 /// \param __a 530 /// A 128-bit vector of [8 x i16] containing one of the source operands. 531 /// \param __b 532 /// A 128-bit vector of [8 x i16] containing one of the source operands. 533 /// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled 534 /// products of both operands. 535 static __inline__ __m128i __DEFAULT_FN_ATTRS 536 _mm_mulhrs_epi16(__m128i __a, __m128i __b) 537 { 538 return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); 539 } 540 541 /// Multiplies packed 16-bit signed integer values, truncates the 32-bit 542 /// products to the 18 most significant bits by right-shifting, rounds the 543 /// truncated value by adding 1, and writes bits [16:1] to the destination. 544 /// 545 /// \headerfile <x86intrin.h> 546 /// 547 /// This intrinsic corresponds to the \c PMULHRSW instruction. 548 /// 549 /// \param __a 550 /// A 64-bit vector of [4 x i16] containing one of the source operands. 551 /// \param __b 552 /// A 64-bit vector of [4 x i16] containing one of the source operands. 553 /// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled 554 /// products of both operands. 555 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 556 _mm_mulhrs_pi16(__m64 __a, __m64 __b) 557 { 558 return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b); 559 } 560 561 /// Copies the 8-bit integers from a 128-bit integer vector to the 562 /// destination or clears 8-bit values in the destination, as specified by 563 /// the second source operand. 564 /// 565 /// \headerfile <x86intrin.h> 566 /// 567 /// This intrinsic corresponds to the \c VPSHUFB instruction. 568 /// 569 /// \param __a 570 /// A 128-bit integer vector containing the values to be copied. 571 /// \param __b 572 /// A 128-bit integer vector containing control bytes corresponding to 573 /// positions in the destination: 574 /// Bit 7: \n 575 /// 1: Clear the corresponding byte in the destination. \n 576 /// 0: Copy the selected source byte to the corresponding byte in the 577 /// destination. \n 578 /// Bits [6:4] Reserved. \n 579 /// Bits [3:0] select the source byte to be copied. 580 /// \returns A 128-bit integer vector containing the copied or cleared values. 581 static __inline__ __m128i __DEFAULT_FN_ATTRS 582 _mm_shuffle_epi8(__m128i __a, __m128i __b) 583 { 584 return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b); 585 } 586 587 /// Copies the 8-bit integers from a 64-bit integer vector to the 588 /// destination or clears 8-bit values in the destination, as specified by 589 /// the second source operand. 590 /// 591 /// \headerfile <x86intrin.h> 592 /// 593 /// This intrinsic corresponds to the \c PSHUFB instruction. 594 /// 595 /// \param __a 596 /// A 64-bit integer vector containing the values to be copied. 597 /// \param __b 598 /// A 64-bit integer vector containing control bytes corresponding to 599 /// positions in the destination: 600 /// Bit 7: \n 601 /// 1: Clear the corresponding byte in the destination. \n 602 /// 0: Copy the selected source byte to the corresponding byte in the 603 /// destination. \n 604 /// Bits [3:0] select the source byte to be copied. 605 /// \returns A 64-bit integer vector containing the copied or cleared values. 606 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 607 _mm_shuffle_pi8(__m64 __a, __m64 __b) 608 { 609 return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b); 610 } 611 612 /// For each 8-bit integer in the first source operand, perform one of 613 /// the following actions as specified by the second source operand. 614 /// 615 /// If the byte in the second source is negative, calculate the two's 616 /// complement of the corresponding byte in the first source, and write that 617 /// value to the destination. If the byte in the second source is positive, 618 /// copy the corresponding byte from the first source to the destination. If 619 /// the byte in the second source is zero, clear the corresponding byte in 620 /// the destination. 621 /// 622 /// \headerfile <x86intrin.h> 623 /// 624 /// This intrinsic corresponds to the \c VPSIGNB instruction. 625 /// 626 /// \param __a 627 /// A 128-bit integer vector containing the values to be copied. 628 /// \param __b 629 /// A 128-bit integer vector containing control bytes corresponding to 630 /// positions in the destination. 631 /// \returns A 128-bit integer vector containing the resultant values. 632 static __inline__ __m128i __DEFAULT_FN_ATTRS 633 _mm_sign_epi8(__m128i __a, __m128i __b) 634 { 635 return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b); 636 } 637 638 /// For each 16-bit integer in the first source operand, perform one of 639 /// the following actions as specified by the second source operand. 640 /// 641 /// If the word in the second source is negative, calculate the two's 642 /// complement of the corresponding word in the first source, and write that 643 /// value to the destination. If the word in the second source is positive, 644 /// copy the corresponding word from the first source to the destination. If 645 /// the word in the second source is zero, clear the corresponding word in 646 /// the destination. 647 /// 648 /// \headerfile <x86intrin.h> 649 /// 650 /// This intrinsic corresponds to the \c VPSIGNW instruction. 651 /// 652 /// \param __a 653 /// A 128-bit integer vector containing the values to be copied. 654 /// \param __b 655 /// A 128-bit integer vector containing control words corresponding to 656 /// positions in the destination. 657 /// \returns A 128-bit integer vector containing the resultant values. 658 static __inline__ __m128i __DEFAULT_FN_ATTRS 659 _mm_sign_epi16(__m128i __a, __m128i __b) 660 { 661 return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b); 662 } 663 664 /// For each 32-bit integer in the first source operand, perform one of 665 /// the following actions as specified by the second source operand. 666 /// 667 /// If the doubleword in the second source is negative, calculate the two's 668 /// complement of the corresponding word in the first source, and write that 669 /// value to the destination. If the doubleword in the second source is 670 /// positive, copy the corresponding word from the first source to the 671 /// destination. If the doubleword in the second source is zero, clear the 672 /// corresponding word in the destination. 673 /// 674 /// \headerfile <x86intrin.h> 675 /// 676 /// This intrinsic corresponds to the \c VPSIGND instruction. 677 /// 678 /// \param __a 679 /// A 128-bit integer vector containing the values to be copied. 680 /// \param __b 681 /// A 128-bit integer vector containing control doublewords corresponding to 682 /// positions in the destination. 683 /// \returns A 128-bit integer vector containing the resultant values. 684 static __inline__ __m128i __DEFAULT_FN_ATTRS 685 _mm_sign_epi32(__m128i __a, __m128i __b) 686 { 687 return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b); 688 } 689 690 /// For each 8-bit integer in the first source operand, perform one of 691 /// the following actions as specified by the second source operand. 692 /// 693 /// If the byte in the second source is negative, calculate the two's 694 /// complement of the corresponding byte in the first source, and write that 695 /// value to the destination. If the byte in the second source is positive, 696 /// copy the corresponding byte from the first source to the destination. If 697 /// the byte in the second source is zero, clear the corresponding byte in 698 /// the destination. 699 /// 700 /// \headerfile <x86intrin.h> 701 /// 702 /// This intrinsic corresponds to the \c PSIGNB instruction. 703 /// 704 /// \param __a 705 /// A 64-bit integer vector containing the values to be copied. 706 /// \param __b 707 /// A 64-bit integer vector containing control bytes corresponding to 708 /// positions in the destination. 709 /// \returns A 64-bit integer vector containing the resultant values. 710 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 711 _mm_sign_pi8(__m64 __a, __m64 __b) 712 { 713 return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b); 714 } 715 716 /// For each 16-bit integer in the first source operand, perform one of 717 /// the following actions as specified by the second source operand. 718 /// 719 /// If the word in the second source is negative, calculate the two's 720 /// complement of the corresponding word in the first source, and write that 721 /// value to the destination. If the word in the second source is positive, 722 /// copy the corresponding word from the first source to the destination. If 723 /// the word in the second source is zero, clear the corresponding word in 724 /// the destination. 725 /// 726 /// \headerfile <x86intrin.h> 727 /// 728 /// This intrinsic corresponds to the \c PSIGNW instruction. 729 /// 730 /// \param __a 731 /// A 64-bit integer vector containing the values to be copied. 732 /// \param __b 733 /// A 64-bit integer vector containing control words corresponding to 734 /// positions in the destination. 735 /// \returns A 64-bit integer vector containing the resultant values. 736 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 737 _mm_sign_pi16(__m64 __a, __m64 __b) 738 { 739 return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b); 740 } 741 742 /// For each 32-bit integer in the first source operand, perform one of 743 /// the following actions as specified by the second source operand. 744 /// 745 /// If the doubleword in the second source is negative, calculate the two's 746 /// complement of the corresponding doubleword in the first source, and 747 /// write that value to the destination. If the doubleword in the second 748 /// source is positive, copy the corresponding doubleword from the first 749 /// source to the destination. If the doubleword in the second source is 750 /// zero, clear the corresponding doubleword in the destination. 751 /// 752 /// \headerfile <x86intrin.h> 753 /// 754 /// This intrinsic corresponds to the \c PSIGND instruction. 755 /// 756 /// \param __a 757 /// A 64-bit integer vector containing the values to be copied. 758 /// \param __b 759 /// A 64-bit integer vector containing two control doublewords corresponding 760 /// to positions in the destination. 761 /// \returns A 64-bit integer vector containing the resultant values. 762 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 763 _mm_sign_pi32(__m64 __a, __m64 __b) 764 { 765 return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b); 766 } 767 768 #undef __DEFAULT_FN_ATTRS 769 #undef __DEFAULT_FN_ATTRS_MMX 770 771 #endif /* __TMMINTRIN_H */ 772