1 /*===---- mmintrin.h - MMX intrinsics --------------------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10 #ifndef __MMINTRIN_H 11 #define __MMINTRIN_H 12 13 #if !defined(__i386__) && !defined(__x86_64__) 14 #error "This header is only meant to be used on x86 and x64 architecture" 15 #endif 16 17 typedef long long __m64 __attribute__((__vector_size__(8), __aligned__(8))); 18 19 typedef long long __v1di __attribute__((__vector_size__(8))); 20 typedef int __v2si __attribute__((__vector_size__(8))); 21 typedef short __v4hi __attribute__((__vector_size__(8))); 22 typedef char __v8qi __attribute__((__vector_size__(8))); 23 24 /* Define the default attributes for the functions in this file. */ 25 #define __DEFAULT_FN_ATTRS \ 26 __attribute__((__always_inline__, __nodebug__, __target__("mmx,no-evex512"), \ 27 __min_vector_width__(64))) 28 29 /// Clears the MMX state by setting the state of the x87 stack registers 30 /// to empty. 31 /// 32 /// \headerfile <x86intrin.h> 33 /// 34 /// This intrinsic corresponds to the <c> EMMS </c> instruction. 35 /// 36 static __inline__ void __attribute__((__always_inline__, __nodebug__, 37 __target__("mmx,no-evex512"))) 38 _mm_empty(void) { 39 __builtin_ia32_emms(); 40 } 41 42 /// Constructs a 64-bit integer vector, setting the lower 32 bits to the 43 /// value of the 32-bit integer parameter and setting the upper 32 bits to 0. 44 /// 45 /// \headerfile <x86intrin.h> 46 /// 47 /// This intrinsic corresponds to the <c> MOVD </c> instruction. 48 /// 49 /// \param __i 50 /// A 32-bit integer value. 51 /// \returns A 64-bit integer vector. The lower 32 bits contain the value of the 52 /// parameter. The upper 32 bits are set to 0. 53 static __inline__ __m64 __DEFAULT_FN_ATTRS 54 _mm_cvtsi32_si64(int __i) 55 { 56 return (__m64)__builtin_ia32_vec_init_v2si(__i, 0); 57 } 58 59 /// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit 60 /// signed integer. 61 /// 62 /// \headerfile <x86intrin.h> 63 /// 64 /// This intrinsic corresponds to the <c> MOVD </c> instruction. 65 /// 66 /// \param __m 67 /// A 64-bit integer vector. 68 /// \returns A 32-bit signed integer value containing the lower 32 bits of the 69 /// parameter. 70 static __inline__ int __DEFAULT_FN_ATTRS 71 _mm_cvtsi64_si32(__m64 __m) 72 { 73 return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0); 74 } 75 76 /// Casts a 64-bit signed integer value into a 64-bit integer vector. 77 /// 78 /// \headerfile <x86intrin.h> 79 /// 80 /// This intrinsic corresponds to the <c> MOVQ </c> instruction. 81 /// 82 /// \param __i 83 /// A 64-bit signed integer. 84 /// \returns A 64-bit integer vector containing the same bitwise pattern as the 85 /// parameter. 86 static __inline__ __m64 __DEFAULT_FN_ATTRS 87 _mm_cvtsi64_m64(long long __i) 88 { 89 return (__m64)__i; 90 } 91 92 /// Casts a 64-bit integer vector into a 64-bit signed integer value. 93 /// 94 /// \headerfile <x86intrin.h> 95 /// 96 /// This intrinsic corresponds to the <c> MOVQ </c> instruction. 97 /// 98 /// \param __m 99 /// A 64-bit integer vector. 100 /// \returns A 64-bit signed integer containing the same bitwise pattern as the 101 /// parameter. 102 static __inline__ long long __DEFAULT_FN_ATTRS 103 _mm_cvtm64_si64(__m64 __m) 104 { 105 return (long long)__m; 106 } 107 108 /// Converts, with saturation, 16-bit signed integers from both 64-bit integer 109 /// vector parameters of [4 x i16] into 8-bit signed integer values, and 110 /// constructs a 64-bit integer vector of [8 x i8] as the result. 111 /// 112 /// Positive values greater than 0x7F are saturated to 0x7F. Negative values 113 /// less than 0x80 are saturated to 0x80. 114 /// 115 /// \headerfile <x86intrin.h> 116 /// 117 /// This intrinsic corresponds to the <c> PACKSSWB </c> instruction. 118 /// 119 /// \param __m1 120 /// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are 121 /// written to the lower 32 bits of the result. 122 /// \param __m2 123 /// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are 124 /// written to the upper 32 bits of the result. 125 /// \returns A 64-bit integer vector of [8 x i8] containing the converted 126 /// values. 127 static __inline__ __m64 __DEFAULT_FN_ATTRS 128 _mm_packs_pi16(__m64 __m1, __m64 __m2) 129 { 130 return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2); 131 } 132 133 /// Converts, with saturation, 32-bit signed integers from both 64-bit integer 134 /// vector parameters of [2 x i32] into 16-bit signed integer values, and 135 /// constructs a 64-bit integer vector of [4 x i16] as the result. 136 /// 137 /// Positive values greater than 0x7FFF are saturated to 0x7FFF. Negative 138 /// values less than 0x8000 are saturated to 0x8000. 139 /// 140 /// \headerfile <x86intrin.h> 141 /// 142 /// This intrinsic corresponds to the <c> PACKSSDW </c> instruction. 143 /// 144 /// \param __m1 145 /// A 64-bit integer vector of [2 x i32]. The converted [2 x i16] values are 146 /// written to the lower 32 bits of the result. 147 /// \param __m2 148 /// A 64-bit integer vector of [2 x i32]. The converted [2 x i16] values are 149 /// written to the upper 32 bits of the result. 150 /// \returns A 64-bit integer vector of [4 x i16] containing the converted 151 /// values. 152 static __inline__ __m64 __DEFAULT_FN_ATTRS 153 _mm_packs_pi32(__m64 __m1, __m64 __m2) 154 { 155 return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2); 156 } 157 158 /// Converts, with saturation, 16-bit signed integers from both 64-bit integer 159 /// vector parameters of [4 x i16] into 8-bit unsigned integer values, and 160 /// constructs a 64-bit integer vector of [8 x i8] as the result. 161 /// 162 /// Values greater than 0xFF are saturated to 0xFF. Values less than 0 are 163 /// saturated to 0. 164 /// 165 /// \headerfile <x86intrin.h> 166 /// 167 /// This intrinsic corresponds to the <c> PACKUSWB </c> instruction. 168 /// 169 /// \param __m1 170 /// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are 171 /// written to the lower 32 bits of the result. 172 /// \param __m2 173 /// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are 174 /// written to the upper 32 bits of the result. 175 /// \returns A 64-bit integer vector of [8 x i8] containing the converted 176 /// values. 177 static __inline__ __m64 __DEFAULT_FN_ATTRS 178 _mm_packs_pu16(__m64 __m1, __m64 __m2) 179 { 180 return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2); 181 } 182 183 /// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8] 184 /// and interleaves them into a 64-bit integer vector of [8 x i8]. 185 /// 186 /// \headerfile <x86intrin.h> 187 /// 188 /// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction. 189 /// 190 /// \param __m1 191 /// A 64-bit integer vector of [8 x i8]. \n 192 /// Bits [39:32] are written to bits [7:0] of the result. \n 193 /// Bits [47:40] are written to bits [23:16] of the result. \n 194 /// Bits [55:48] are written to bits [39:32] of the result. \n 195 /// Bits [63:56] are written to bits [55:48] of the result. 196 /// \param __m2 197 /// A 64-bit integer vector of [8 x i8]. 198 /// Bits [39:32] are written to bits [15:8] of the result. \n 199 /// Bits [47:40] are written to bits [31:24] of the result. \n 200 /// Bits [55:48] are written to bits [47:40] of the result. \n 201 /// Bits [63:56] are written to bits [63:56] of the result. 202 /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved 203 /// values. 204 static __inline__ __m64 __DEFAULT_FN_ATTRS 205 _mm_unpackhi_pi8(__m64 __m1, __m64 __m2) 206 { 207 return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2); 208 } 209 210 /// Unpacks the upper 32 bits from two 64-bit integer vectors of 211 /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. 212 /// 213 /// \headerfile <x86intrin.h> 214 /// 215 /// This intrinsic corresponds to the <c> PUNPCKHWD </c> instruction. 216 /// 217 /// \param __m1 218 /// A 64-bit integer vector of [4 x i16]. 219 /// Bits [47:32] are written to bits [15:0] of the result. \n 220 /// Bits [63:48] are written to bits [47:32] of the result. 221 /// \param __m2 222 /// A 64-bit integer vector of [4 x i16]. 223 /// Bits [47:32] are written to bits [31:16] of the result. \n 224 /// Bits [63:48] are written to bits [63:48] of the result. 225 /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved 226 /// values. 227 static __inline__ __m64 __DEFAULT_FN_ATTRS 228 _mm_unpackhi_pi16(__m64 __m1, __m64 __m2) 229 { 230 return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2); 231 } 232 233 /// Unpacks the upper 32 bits from two 64-bit integer vectors of 234 /// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32]. 235 /// 236 /// \headerfile <x86intrin.h> 237 /// 238 /// This intrinsic corresponds to the <c> PUNPCKHDQ </c> instruction. 239 /// 240 /// \param __m1 241 /// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to 242 /// the lower 32 bits of the result. 243 /// \param __m2 244 /// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to 245 /// the upper 32 bits of the result. 246 /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved 247 /// values. 248 static __inline__ __m64 __DEFAULT_FN_ATTRS 249 _mm_unpackhi_pi32(__m64 __m1, __m64 __m2) 250 { 251 return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2); 252 } 253 254 /// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] 255 /// and interleaves them into a 64-bit integer vector of [8 x i8]. 256 /// 257 /// \headerfile <x86intrin.h> 258 /// 259 /// This intrinsic corresponds to the <c> PUNPCKLBW </c> instruction. 260 /// 261 /// \param __m1 262 /// A 64-bit integer vector of [8 x i8]. 263 /// Bits [7:0] are written to bits [7:0] of the result. \n 264 /// Bits [15:8] are written to bits [23:16] of the result. \n 265 /// Bits [23:16] are written to bits [39:32] of the result. \n 266 /// Bits [31:24] are written to bits [55:48] of the result. 267 /// \param __m2 268 /// A 64-bit integer vector of [8 x i8]. 269 /// Bits [7:0] are written to bits [15:8] of the result. \n 270 /// Bits [15:8] are written to bits [31:24] of the result. \n 271 /// Bits [23:16] are written to bits [47:40] of the result. \n 272 /// Bits [31:24] are written to bits [63:56] of the result. 273 /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved 274 /// values. 275 static __inline__ __m64 __DEFAULT_FN_ATTRS 276 _mm_unpacklo_pi8(__m64 __m1, __m64 __m2) 277 { 278 return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2); 279 } 280 281 /// Unpacks the lower 32 bits from two 64-bit integer vectors of 282 /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. 283 /// 284 /// \headerfile <x86intrin.h> 285 /// 286 /// This intrinsic corresponds to the <c> PUNPCKLWD </c> instruction. 287 /// 288 /// \param __m1 289 /// A 64-bit integer vector of [4 x i16]. 290 /// Bits [15:0] are written to bits [15:0] of the result. \n 291 /// Bits [31:16] are written to bits [47:32] of the result. 292 /// \param __m2 293 /// A 64-bit integer vector of [4 x i16]. 294 /// Bits [15:0] are written to bits [31:16] of the result. \n 295 /// Bits [31:16] are written to bits [63:48] of the result. 296 /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved 297 /// values. 298 static __inline__ __m64 __DEFAULT_FN_ATTRS 299 _mm_unpacklo_pi16(__m64 __m1, __m64 __m2) 300 { 301 return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2); 302 } 303 304 /// Unpacks the lower 32 bits from two 64-bit integer vectors of 305 /// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32]. 306 /// 307 /// \headerfile <x86intrin.h> 308 /// 309 /// This intrinsic corresponds to the <c> PUNPCKLDQ </c> instruction. 310 /// 311 /// \param __m1 312 /// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to 313 /// the lower 32 bits of the result. 314 /// \param __m2 315 /// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to 316 /// the upper 32 bits of the result. 317 /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved 318 /// values. 319 static __inline__ __m64 __DEFAULT_FN_ATTRS 320 _mm_unpacklo_pi32(__m64 __m1, __m64 __m2) 321 { 322 return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2); 323 } 324 325 /// Adds each 8-bit integer element of the first 64-bit integer vector 326 /// of [8 x i8] to the corresponding 8-bit integer element of the second 327 /// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are 328 /// packed into a 64-bit integer vector of [8 x i8]. 329 /// 330 /// \headerfile <x86intrin.h> 331 /// 332 /// This intrinsic corresponds to the <c> PADDB </c> instruction. 333 /// 334 /// \param __m1 335 /// A 64-bit integer vector of [8 x i8]. 336 /// \param __m2 337 /// A 64-bit integer vector of [8 x i8]. 338 /// \returns A 64-bit integer vector of [8 x i8] containing the sums of both 339 /// parameters. 340 static __inline__ __m64 __DEFAULT_FN_ATTRS 341 _mm_add_pi8(__m64 __m1, __m64 __m2) 342 { 343 return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2); 344 } 345 346 /// Adds each 16-bit integer element of the first 64-bit integer vector 347 /// of [4 x i16] to the corresponding 16-bit integer element of the second 348 /// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are 349 /// packed into a 64-bit integer vector of [4 x i16]. 350 /// 351 /// \headerfile <x86intrin.h> 352 /// 353 /// This intrinsic corresponds to the <c> PADDW </c> instruction. 354 /// 355 /// \param __m1 356 /// A 64-bit integer vector of [4 x i16]. 357 /// \param __m2 358 /// A 64-bit integer vector of [4 x i16]. 359 /// \returns A 64-bit integer vector of [4 x i16] containing the sums of both 360 /// parameters. 361 static __inline__ __m64 __DEFAULT_FN_ATTRS 362 _mm_add_pi16(__m64 __m1, __m64 __m2) 363 { 364 return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2); 365 } 366 367 /// Adds each 32-bit integer element of the first 64-bit integer vector 368 /// of [2 x i32] to the corresponding 32-bit integer element of the second 369 /// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are 370 /// packed into a 64-bit integer vector of [2 x i32]. 371 /// 372 /// \headerfile <x86intrin.h> 373 /// 374 /// This intrinsic corresponds to the <c> PADDD </c> instruction. 375 /// 376 /// \param __m1 377 /// A 64-bit integer vector of [2 x i32]. 378 /// \param __m2 379 /// A 64-bit integer vector of [2 x i32]. 380 /// \returns A 64-bit integer vector of [2 x i32] containing the sums of both 381 /// parameters. 382 static __inline__ __m64 __DEFAULT_FN_ATTRS 383 _mm_add_pi32(__m64 __m1, __m64 __m2) 384 { 385 return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2); 386 } 387 388 /// Adds, with saturation, each 8-bit signed integer element of the first 389 /// 64-bit integer vector of [8 x i8] to the corresponding 8-bit signed 390 /// integer element of the second 64-bit integer vector of [8 x i8]. 391 /// 392 /// Positive sums greater than 0x7F are saturated to 0x7F. Negative sums 393 /// less than 0x80 are saturated to 0x80. The results are packed into a 394 /// 64-bit integer vector of [8 x i8]. 395 /// 396 /// \headerfile <x86intrin.h> 397 /// 398 /// This intrinsic corresponds to the <c> PADDSB </c> instruction. 399 /// 400 /// \param __m1 401 /// A 64-bit integer vector of [8 x i8]. 402 /// \param __m2 403 /// A 64-bit integer vector of [8 x i8]. 404 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums 405 /// of both parameters. 406 static __inline__ __m64 __DEFAULT_FN_ATTRS 407 _mm_adds_pi8(__m64 __m1, __m64 __m2) 408 { 409 return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2); 410 } 411 412 /// Adds, with saturation, each 16-bit signed integer element of the first 413 /// 64-bit integer vector of [4 x i16] to the corresponding 16-bit signed 414 /// integer element of the second 64-bit integer vector of [4 x i16]. 415 /// 416 /// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums 417 /// less than 0x8000 are saturated to 0x8000. The results are packed into a 418 /// 64-bit integer vector of [4 x i16]. 419 /// 420 /// \headerfile <x86intrin.h> 421 /// 422 /// This intrinsic corresponds to the <c> PADDSW </c> instruction. 423 /// 424 /// \param __m1 425 /// A 64-bit integer vector of [4 x i16]. 426 /// \param __m2 427 /// A 64-bit integer vector of [4 x i16]. 428 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums 429 /// of both parameters. 430 static __inline__ __m64 __DEFAULT_FN_ATTRS 431 _mm_adds_pi16(__m64 __m1, __m64 __m2) 432 { 433 return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2); 434 } 435 436 /// Adds, with saturation, each 8-bit unsigned integer element of the first 437 /// 64-bit integer vector of [8 x i8] to the corresponding 8-bit unsigned 438 /// integer element of the second 64-bit integer vector of [8 x i8]. 439 /// 440 /// Sums greater than 0xFF are saturated to 0xFF. The results are packed 441 /// into a 64-bit integer vector of [8 x i8]. 442 /// 443 /// \headerfile <x86intrin.h> 444 /// 445 /// This intrinsic corresponds to the <c> PADDUSB </c> instruction. 446 /// 447 /// \param __m1 448 /// A 64-bit integer vector of [8 x i8]. 449 /// \param __m2 450 /// A 64-bit integer vector of [8 x i8]. 451 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated 452 /// unsigned sums of both parameters. 453 static __inline__ __m64 __DEFAULT_FN_ATTRS 454 _mm_adds_pu8(__m64 __m1, __m64 __m2) 455 { 456 return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2); 457 } 458 459 /// Adds, with saturation, each 16-bit unsigned integer element of the first 460 /// 64-bit integer vector of [4 x i16] to the corresponding 16-bit unsigned 461 /// integer element of the second 64-bit integer vector of [4 x i16]. 462 /// 463 /// Sums greater than 0xFFFF are saturated to 0xFFFF. The results are packed 464 /// into a 64-bit integer vector of [4 x i16]. 465 /// 466 /// \headerfile <x86intrin.h> 467 /// 468 /// This intrinsic corresponds to the <c> PADDUSW </c> instruction. 469 /// 470 /// \param __m1 471 /// A 64-bit integer vector of [4 x i16]. 472 /// \param __m2 473 /// A 64-bit integer vector of [4 x i16]. 474 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated 475 /// unsigned sums of both parameters. 476 static __inline__ __m64 __DEFAULT_FN_ATTRS 477 _mm_adds_pu16(__m64 __m1, __m64 __m2) 478 { 479 return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2); 480 } 481 482 /// Subtracts each 8-bit integer element of the second 64-bit integer 483 /// vector of [8 x i8] from the corresponding 8-bit integer element of the 484 /// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results 485 /// are packed into a 64-bit integer vector of [8 x i8]. 486 /// 487 /// \headerfile <x86intrin.h> 488 /// 489 /// This intrinsic corresponds to the <c> PSUBB </c> instruction. 490 /// 491 /// \param __m1 492 /// A 64-bit integer vector of [8 x i8] containing the minuends. 493 /// \param __m2 494 /// A 64-bit integer vector of [8 x i8] containing the subtrahends. 495 /// \returns A 64-bit integer vector of [8 x i8] containing the differences of 496 /// both parameters. 497 static __inline__ __m64 __DEFAULT_FN_ATTRS 498 _mm_sub_pi8(__m64 __m1, __m64 __m2) 499 { 500 return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2); 501 } 502 503 /// Subtracts each 16-bit integer element of the second 64-bit integer 504 /// vector of [4 x i16] from the corresponding 16-bit integer element of the 505 /// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the 506 /// results are packed into a 64-bit integer vector of [4 x i16]. 507 /// 508 /// \headerfile <x86intrin.h> 509 /// 510 /// This intrinsic corresponds to the <c> PSUBW </c> instruction. 511 /// 512 /// \param __m1 513 /// A 64-bit integer vector of [4 x i16] containing the minuends. 514 /// \param __m2 515 /// A 64-bit integer vector of [4 x i16] containing the subtrahends. 516 /// \returns A 64-bit integer vector of [4 x i16] containing the differences of 517 /// both parameters. 518 static __inline__ __m64 __DEFAULT_FN_ATTRS 519 _mm_sub_pi16(__m64 __m1, __m64 __m2) 520 { 521 return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2); 522 } 523 524 /// Subtracts each 32-bit integer element of the second 64-bit integer 525 /// vector of [2 x i32] from the corresponding 32-bit integer element of the 526 /// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the 527 /// results are packed into a 64-bit integer vector of [2 x i32]. 528 /// 529 /// \headerfile <x86intrin.h> 530 /// 531 /// This intrinsic corresponds to the <c> PSUBD </c> instruction. 532 /// 533 /// \param __m1 534 /// A 64-bit integer vector of [2 x i32] containing the minuends. 535 /// \param __m2 536 /// A 64-bit integer vector of [2 x i32] containing the subtrahends. 537 /// \returns A 64-bit integer vector of [2 x i32] containing the differences of 538 /// both parameters. 539 static __inline__ __m64 __DEFAULT_FN_ATTRS 540 _mm_sub_pi32(__m64 __m1, __m64 __m2) 541 { 542 return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2); 543 } 544 545 /// Subtracts, with saturation, each 8-bit signed integer element of the second 546 /// 64-bit integer vector of [8 x i8] from the corresponding 8-bit signed 547 /// integer element of the first 64-bit integer vector of [8 x i8]. 548 /// 549 /// Positive results greater than 0x7F are saturated to 0x7F. Negative 550 /// results less than 0x80 are saturated to 0x80. The results are packed 551 /// into a 64-bit integer vector of [8 x i8]. 552 /// 553 /// \headerfile <x86intrin.h> 554 /// 555 /// This intrinsic corresponds to the <c> PSUBSB </c> instruction. 556 /// 557 /// \param __m1 558 /// A 64-bit integer vector of [8 x i8] containing the minuends. 559 /// \param __m2 560 /// A 64-bit integer vector of [8 x i8] containing the subtrahends. 561 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated 562 /// differences of both parameters. 563 static __inline__ __m64 __DEFAULT_FN_ATTRS 564 _mm_subs_pi8(__m64 __m1, __m64 __m2) 565 { 566 return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2); 567 } 568 569 /// Subtracts, with saturation, each 16-bit signed integer element of the 570 /// second 64-bit integer vector of [4 x i16] from the corresponding 16-bit 571 /// signed integer element of the first 64-bit integer vector of [4 x i16]. 572 /// 573 /// Positive results greater than 0x7FFF are saturated to 0x7FFF. Negative 574 /// results less than 0x8000 are saturated to 0x8000. The results are packed 575 /// into a 64-bit integer vector of [4 x i16]. 576 /// 577 /// \headerfile <x86intrin.h> 578 /// 579 /// This intrinsic corresponds to the <c> PSUBSW </c> instruction. 580 /// 581 /// \param __m1 582 /// A 64-bit integer vector of [4 x i16] containing the minuends. 583 /// \param __m2 584 /// A 64-bit integer vector of [4 x i16] containing the subtrahends. 585 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated 586 /// differences of both parameters. 587 static __inline__ __m64 __DEFAULT_FN_ATTRS 588 _mm_subs_pi16(__m64 __m1, __m64 __m2) 589 { 590 return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2); 591 } 592 593 /// Subtracts each 8-bit unsigned integer element of the second 64-bit 594 /// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer 595 /// element of the first 64-bit integer vector of [8 x i8]. 596 /// 597 /// If an element of the first vector is less than the corresponding element 598 /// of the second vector, the result is saturated to 0. The results are 599 /// packed into a 64-bit integer vector of [8 x i8]. 600 /// 601 /// \headerfile <x86intrin.h> 602 /// 603 /// This intrinsic corresponds to the <c> PSUBUSB </c> instruction. 604 /// 605 /// \param __m1 606 /// A 64-bit integer vector of [8 x i8] containing the minuends. 607 /// \param __m2 608 /// A 64-bit integer vector of [8 x i8] containing the subtrahends. 609 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated 610 /// differences of both parameters. 611 static __inline__ __m64 __DEFAULT_FN_ATTRS 612 _mm_subs_pu8(__m64 __m1, __m64 __m2) 613 { 614 return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2); 615 } 616 617 /// Subtracts each 16-bit unsigned integer element of the second 64-bit 618 /// integer vector of [4 x i16] from the corresponding 16-bit unsigned 619 /// integer element of the first 64-bit integer vector of [4 x i16]. 620 /// 621 /// If an element of the first vector is less than the corresponding element 622 /// of the second vector, the result is saturated to 0. The results are 623 /// packed into a 64-bit integer vector of [4 x i16]. 624 /// 625 /// \headerfile <x86intrin.h> 626 /// 627 /// This intrinsic corresponds to the <c> PSUBUSW </c> instruction. 628 /// 629 /// \param __m1 630 /// A 64-bit integer vector of [4 x i16] containing the minuends. 631 /// \param __m2 632 /// A 64-bit integer vector of [4 x i16] containing the subtrahends. 633 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated 634 /// differences of both parameters. 635 static __inline__ __m64 __DEFAULT_FN_ATTRS 636 _mm_subs_pu16(__m64 __m1, __m64 __m2) 637 { 638 return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2); 639 } 640 641 /// Multiplies each 16-bit signed integer element of the first 64-bit 642 /// integer vector of [4 x i16] by the corresponding 16-bit signed integer 643 /// element of the second 64-bit integer vector of [4 x i16] and get four 644 /// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums. 645 /// The lower 32 bits of these two sums are packed into a 64-bit integer 646 /// vector of [2 x i32]. 647 /// 648 /// For example, bits [15:0] of both parameters are multiplied, bits [31:16] 649 /// of both parameters are multiplied, and the sum of both results is written 650 /// to bits [31:0] of the result. 651 /// 652 /// \headerfile <x86intrin.h> 653 /// 654 /// This intrinsic corresponds to the <c> PMADDWD </c> instruction. 655 /// 656 /// \param __m1 657 /// A 64-bit integer vector of [4 x i16]. 658 /// \param __m2 659 /// A 64-bit integer vector of [4 x i16]. 660 /// \returns A 64-bit integer vector of [2 x i32] containing the sums of 661 /// products of both parameters. 662 static __inline__ __m64 __DEFAULT_FN_ATTRS 663 _mm_madd_pi16(__m64 __m1, __m64 __m2) 664 { 665 return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2); 666 } 667 668 /// Multiplies each 16-bit signed integer element of the first 64-bit 669 /// integer vector of [4 x i16] by the corresponding 16-bit signed integer 670 /// element of the second 64-bit integer vector of [4 x i16]. Packs the upper 671 /// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16]. 672 /// 673 /// \headerfile <x86intrin.h> 674 /// 675 /// This intrinsic corresponds to the <c> PMULHW </c> instruction. 676 /// 677 /// \param __m1 678 /// A 64-bit integer vector of [4 x i16]. 679 /// \param __m2 680 /// A 64-bit integer vector of [4 x i16]. 681 /// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits 682 /// of the products of both parameters. 683 static __inline__ __m64 __DEFAULT_FN_ATTRS 684 _mm_mulhi_pi16(__m64 __m1, __m64 __m2) 685 { 686 return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2); 687 } 688 689 /// Multiplies each 16-bit signed integer element of the first 64-bit 690 /// integer vector of [4 x i16] by the corresponding 16-bit signed integer 691 /// element of the second 64-bit integer vector of [4 x i16]. Packs the lower 692 /// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16]. 693 /// 694 /// \headerfile <x86intrin.h> 695 /// 696 /// This intrinsic corresponds to the <c> PMULLW </c> instruction. 697 /// 698 /// \param __m1 699 /// A 64-bit integer vector of [4 x i16]. 700 /// \param __m2 701 /// A 64-bit integer vector of [4 x i16]. 702 /// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits 703 /// of the products of both parameters. 704 static __inline__ __m64 __DEFAULT_FN_ATTRS 705 _mm_mullo_pi16(__m64 __m1, __m64 __m2) 706 { 707 return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2); 708 } 709 710 /// Left-shifts each 16-bit signed integer element of the first 711 /// parameter, which is a 64-bit integer vector of [4 x i16], by the number 712 /// of bits specified by the second parameter, which is a 64-bit integer. The 713 /// lower 16 bits of the results are packed into a 64-bit integer vector of 714 /// [4 x i16]. 715 /// 716 /// \headerfile <x86intrin.h> 717 /// 718 /// This intrinsic corresponds to the <c> PSLLW </c> instruction. 719 /// 720 /// \param __m 721 /// A 64-bit integer vector of [4 x i16]. 722 /// \param __count 723 /// A 64-bit integer vector interpreted as a single 64-bit integer. 724 /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted 725 /// values. If \a __count is greater or equal to 16, the result is set to all 726 /// 0. 727 static __inline__ __m64 __DEFAULT_FN_ATTRS 728 _mm_sll_pi16(__m64 __m, __m64 __count) 729 { 730 return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count); 731 } 732 733 /// Left-shifts each 16-bit signed integer element of a 64-bit integer 734 /// vector of [4 x i16] by the number of bits specified by a 32-bit integer. 735 /// The lower 16 bits of the results are packed into a 64-bit integer vector 736 /// of [4 x i16]. 737 /// 738 /// \headerfile <x86intrin.h> 739 /// 740 /// This intrinsic corresponds to the <c> PSLLW </c> instruction. 741 /// 742 /// \param __m 743 /// A 64-bit integer vector of [4 x i16]. 744 /// \param __count 745 /// A 32-bit integer value. 746 /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted 747 /// values. If \a __count is greater or equal to 16, the result is set to all 748 /// 0. 749 static __inline__ __m64 __DEFAULT_FN_ATTRS 750 _mm_slli_pi16(__m64 __m, int __count) 751 { 752 return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count); 753 } 754 755 /// Left-shifts each 32-bit signed integer element of the first 756 /// parameter, which is a 64-bit integer vector of [2 x i32], by the number 757 /// of bits specified by the second parameter, which is a 64-bit integer. The 758 /// lower 32 bits of the results are packed into a 64-bit integer vector of 759 /// [2 x i32]. 760 /// 761 /// \headerfile <x86intrin.h> 762 /// 763 /// This intrinsic corresponds to the <c> PSLLD </c> instruction. 764 /// 765 /// \param __m 766 /// A 64-bit integer vector of [2 x i32]. 767 /// \param __count 768 /// A 64-bit integer vector interpreted as a single 64-bit integer. 769 /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted 770 /// values. If \a __count is greater or equal to 32, the result is set to all 771 /// 0. 772 static __inline__ __m64 __DEFAULT_FN_ATTRS 773 _mm_sll_pi32(__m64 __m, __m64 __count) 774 { 775 return (__m64)__builtin_ia32_pslld((__v2si)__m, __count); 776 } 777 778 /// Left-shifts each 32-bit signed integer element of a 64-bit integer 779 /// vector of [2 x i32] by the number of bits specified by a 32-bit integer. 780 /// The lower 32 bits of the results are packed into a 64-bit integer vector 781 /// of [2 x i32]. 782 /// 783 /// \headerfile <x86intrin.h> 784 /// 785 /// This intrinsic corresponds to the <c> PSLLD </c> instruction. 786 /// 787 /// \param __m 788 /// A 64-bit integer vector of [2 x i32]. 789 /// \param __count 790 /// A 32-bit integer value. 791 /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted 792 /// values. If \a __count is greater or equal to 32, the result is set to all 793 /// 0. 794 static __inline__ __m64 __DEFAULT_FN_ATTRS 795 _mm_slli_pi32(__m64 __m, int __count) 796 { 797 return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count); 798 } 799 800 /// Left-shifts the first 64-bit integer parameter by the number of bits 801 /// specified by the second 64-bit integer parameter. The lower 64 bits of 802 /// result are returned. 803 /// 804 /// \headerfile <x86intrin.h> 805 /// 806 /// This intrinsic corresponds to the <c> PSLLQ </c> instruction. 807 /// 808 /// \param __m 809 /// A 64-bit integer vector interpreted as a single 64-bit integer. 810 /// \param __count 811 /// A 64-bit integer vector interpreted as a single 64-bit integer. 812 /// \returns A 64-bit integer vector containing the left-shifted value. If 813 /// \a __count is greater or equal to 64, the result is set to 0. 814 static __inline__ __m64 __DEFAULT_FN_ATTRS 815 _mm_sll_si64(__m64 __m, __m64 __count) 816 { 817 return (__m64)__builtin_ia32_psllq((__v1di)__m, __count); 818 } 819 820 /// Left-shifts the first parameter, which is a 64-bit integer, by the 821 /// number of bits specified by the second parameter, which is a 32-bit 822 /// integer. The lower 64 bits of result are returned. 823 /// 824 /// \headerfile <x86intrin.h> 825 /// 826 /// This intrinsic corresponds to the <c> PSLLQ </c> instruction. 827 /// 828 /// \param __m 829 /// A 64-bit integer vector interpreted as a single 64-bit integer. 830 /// \param __count 831 /// A 32-bit integer value. 832 /// \returns A 64-bit integer vector containing the left-shifted value. If 833 /// \a __count is greater or equal to 64, the result is set to 0. 834 static __inline__ __m64 __DEFAULT_FN_ATTRS 835 _mm_slli_si64(__m64 __m, int __count) 836 { 837 return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count); 838 } 839 840 /// Right-shifts each 16-bit integer element of the first parameter, 841 /// which is a 64-bit integer vector of [4 x i16], by the number of bits 842 /// specified by the second parameter, which is a 64-bit integer. 843 /// 844 /// High-order bits are filled with the sign bit of the initial value of each 845 /// 16-bit element. The 16-bit results are packed into a 64-bit integer 846 /// vector of [4 x i16]. 847 /// 848 /// \headerfile <x86intrin.h> 849 /// 850 /// This intrinsic corresponds to the <c> PSRAW </c> instruction. 851 /// 852 /// \param __m 853 /// A 64-bit integer vector of [4 x i16]. 854 /// \param __count 855 /// A 64-bit integer vector interpreted as a single 64-bit integer. 856 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 857 /// values. 858 static __inline__ __m64 __DEFAULT_FN_ATTRS 859 _mm_sra_pi16(__m64 __m, __m64 __count) 860 { 861 return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count); 862 } 863 864 /// Right-shifts each 16-bit integer element of a 64-bit integer vector 865 /// of [4 x i16] by the number of bits specified by a 32-bit integer. 866 /// 867 /// High-order bits are filled with the sign bit of the initial value of each 868 /// 16-bit element. The 16-bit results are packed into a 64-bit integer 869 /// vector of [4 x i16]. 870 /// 871 /// \headerfile <x86intrin.h> 872 /// 873 /// This intrinsic corresponds to the <c> PSRAW </c> instruction. 874 /// 875 /// \param __m 876 /// A 64-bit integer vector of [4 x i16]. 877 /// \param __count 878 /// A 32-bit integer value. 879 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 880 /// values. 881 static __inline__ __m64 __DEFAULT_FN_ATTRS 882 _mm_srai_pi16(__m64 __m, int __count) 883 { 884 return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count); 885 } 886 887 /// Right-shifts each 32-bit integer element of the first parameter, 888 /// which is a 64-bit integer vector of [2 x i32], by the number of bits 889 /// specified by the second parameter, which is a 64-bit integer. 890 /// 891 /// High-order bits are filled with the sign bit of the initial value of each 892 /// 32-bit element. The 32-bit results are packed into a 64-bit integer 893 /// vector of [2 x i32]. 894 /// 895 /// \headerfile <x86intrin.h> 896 /// 897 /// This intrinsic corresponds to the <c> PSRAD </c> instruction. 898 /// 899 /// \param __m 900 /// A 64-bit integer vector of [2 x i32]. 901 /// \param __count 902 /// A 64-bit integer vector interpreted as a single 64-bit integer. 903 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 904 /// values. 905 static __inline__ __m64 __DEFAULT_FN_ATTRS 906 _mm_sra_pi32(__m64 __m, __m64 __count) 907 { 908 return (__m64)__builtin_ia32_psrad((__v2si)__m, __count); 909 } 910 911 /// Right-shifts each 32-bit integer element of a 64-bit integer vector 912 /// of [2 x i32] by the number of bits specified by a 32-bit integer. 913 /// 914 /// High-order bits are filled with the sign bit of the initial value of each 915 /// 32-bit element. The 32-bit results are packed into a 64-bit integer 916 /// vector of [2 x i32]. 917 /// 918 /// \headerfile <x86intrin.h> 919 /// 920 /// This intrinsic corresponds to the <c> PSRAD </c> instruction. 921 /// 922 /// \param __m 923 /// A 64-bit integer vector of [2 x i32]. 924 /// \param __count 925 /// A 32-bit integer value. 926 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 927 /// values. 928 static __inline__ __m64 __DEFAULT_FN_ATTRS 929 _mm_srai_pi32(__m64 __m, int __count) 930 { 931 return (__m64)__builtin_ia32_psradi((__v2si)__m, __count); 932 } 933 934 /// Right-shifts each 16-bit integer element of the first parameter, 935 /// which is a 64-bit integer vector of [4 x i16], by the number of bits 936 /// specified by the second parameter, which is a 64-bit integer. 937 /// 938 /// High-order bits are cleared. The 16-bit results are packed into a 64-bit 939 /// integer vector of [4 x i16]. 940 /// 941 /// \headerfile <x86intrin.h> 942 /// 943 /// This intrinsic corresponds to the <c> PSRLW </c> instruction. 944 /// 945 /// \param __m 946 /// A 64-bit integer vector of [4 x i16]. 947 /// \param __count 948 /// A 64-bit integer vector interpreted as a single 64-bit integer. 949 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 950 /// values. 951 static __inline__ __m64 __DEFAULT_FN_ATTRS 952 _mm_srl_pi16(__m64 __m, __m64 __count) 953 { 954 return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count); 955 } 956 957 /// Right-shifts each 16-bit integer element of a 64-bit integer vector 958 /// of [4 x i16] by the number of bits specified by a 32-bit integer. 959 /// 960 /// High-order bits are cleared. The 16-bit results are packed into a 64-bit 961 /// integer vector of [4 x i16]. 962 /// 963 /// \headerfile <x86intrin.h> 964 /// 965 /// This intrinsic corresponds to the <c> PSRLW </c> instruction. 966 /// 967 /// \param __m 968 /// A 64-bit integer vector of [4 x i16]. 969 /// \param __count 970 /// A 32-bit integer value. 971 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 972 /// values. 973 static __inline__ __m64 __DEFAULT_FN_ATTRS 974 _mm_srli_pi16(__m64 __m, int __count) 975 { 976 return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count); 977 } 978 979 /// Right-shifts each 32-bit integer element of the first parameter, 980 /// which is a 64-bit integer vector of [2 x i32], by the number of bits 981 /// specified by the second parameter, which is a 64-bit integer. 982 /// 983 /// High-order bits are cleared. The 32-bit results are packed into a 64-bit 984 /// integer vector of [2 x i32]. 985 /// 986 /// \headerfile <x86intrin.h> 987 /// 988 /// This intrinsic corresponds to the <c> PSRLD </c> instruction. 989 /// 990 /// \param __m 991 /// A 64-bit integer vector of [2 x i32]. 992 /// \param __count 993 /// A 64-bit integer vector interpreted as a single 64-bit integer. 994 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 995 /// values. 996 static __inline__ __m64 __DEFAULT_FN_ATTRS 997 _mm_srl_pi32(__m64 __m, __m64 __count) 998 { 999 return (__m64)__builtin_ia32_psrld((__v2si)__m, __count); 1000 } 1001 1002 /// Right-shifts each 32-bit integer element of a 64-bit integer vector 1003 /// of [2 x i32] by the number of bits specified by a 32-bit integer. 1004 /// 1005 /// High-order bits are cleared. The 32-bit results are packed into a 64-bit 1006 /// integer vector of [2 x i32]. 1007 /// 1008 /// \headerfile <x86intrin.h> 1009 /// 1010 /// This intrinsic corresponds to the <c> PSRLD </c> instruction. 1011 /// 1012 /// \param __m 1013 /// A 64-bit integer vector of [2 x i32]. 1014 /// \param __count 1015 /// A 32-bit integer value. 1016 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 1017 /// values. 1018 static __inline__ __m64 __DEFAULT_FN_ATTRS 1019 _mm_srli_pi32(__m64 __m, int __count) 1020 { 1021 return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count); 1022 } 1023 1024 /// Right-shifts the first 64-bit integer parameter by the number of bits 1025 /// specified by the second 64-bit integer parameter. 1026 /// 1027 /// High-order bits are cleared. 1028 /// 1029 /// \headerfile <x86intrin.h> 1030 /// 1031 /// This intrinsic corresponds to the <c> PSRLQ </c> instruction. 1032 /// 1033 /// \param __m 1034 /// A 64-bit integer vector interpreted as a single 64-bit integer. 1035 /// \param __count 1036 /// A 64-bit integer vector interpreted as a single 64-bit integer. 1037 /// \returns A 64-bit integer vector containing the right-shifted value. 1038 static __inline__ __m64 __DEFAULT_FN_ATTRS 1039 _mm_srl_si64(__m64 __m, __m64 __count) 1040 { 1041 return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count); 1042 } 1043 1044 /// Right-shifts the first parameter, which is a 64-bit integer, by the 1045 /// number of bits specified by the second parameter, which is a 32-bit 1046 /// integer. 1047 /// 1048 /// High-order bits are cleared. 1049 /// 1050 /// \headerfile <x86intrin.h> 1051 /// 1052 /// This intrinsic corresponds to the <c> PSRLQ </c> instruction. 1053 /// 1054 /// \param __m 1055 /// A 64-bit integer vector interpreted as a single 64-bit integer. 1056 /// \param __count 1057 /// A 32-bit integer value. 1058 /// \returns A 64-bit integer vector containing the right-shifted value. 1059 static __inline__ __m64 __DEFAULT_FN_ATTRS 1060 _mm_srli_si64(__m64 __m, int __count) 1061 { 1062 return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count); 1063 } 1064 1065 /// Performs a bitwise AND of two 64-bit integer vectors. 1066 /// 1067 /// \headerfile <x86intrin.h> 1068 /// 1069 /// This intrinsic corresponds to the <c> PAND </c> instruction. 1070 /// 1071 /// \param __m1 1072 /// A 64-bit integer vector. 1073 /// \param __m2 1074 /// A 64-bit integer vector. 1075 /// \returns A 64-bit integer vector containing the bitwise AND of both 1076 /// parameters. 1077 static __inline__ __m64 __DEFAULT_FN_ATTRS 1078 _mm_and_si64(__m64 __m1, __m64 __m2) 1079 { 1080 return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2); 1081 } 1082 1083 /// Performs a bitwise NOT of the first 64-bit integer vector, and then 1084 /// performs a bitwise AND of the intermediate result and the second 64-bit 1085 /// integer vector. 1086 /// 1087 /// \headerfile <x86intrin.h> 1088 /// 1089 /// This intrinsic corresponds to the <c> PANDN </c> instruction. 1090 /// 1091 /// \param __m1 1092 /// A 64-bit integer vector. The one's complement of this parameter is used 1093 /// in the bitwise AND. 1094 /// \param __m2 1095 /// A 64-bit integer vector. 1096 /// \returns A 64-bit integer vector containing the bitwise AND of the second 1097 /// parameter and the one's complement of the first parameter. 1098 static __inline__ __m64 __DEFAULT_FN_ATTRS 1099 _mm_andnot_si64(__m64 __m1, __m64 __m2) 1100 { 1101 return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2); 1102 } 1103 1104 /// Performs a bitwise OR of two 64-bit integer vectors. 1105 /// 1106 /// \headerfile <x86intrin.h> 1107 /// 1108 /// This intrinsic corresponds to the <c> POR </c> instruction. 1109 /// 1110 /// \param __m1 1111 /// A 64-bit integer vector. 1112 /// \param __m2 1113 /// A 64-bit integer vector. 1114 /// \returns A 64-bit integer vector containing the bitwise OR of both 1115 /// parameters. 1116 static __inline__ __m64 __DEFAULT_FN_ATTRS 1117 _mm_or_si64(__m64 __m1, __m64 __m2) 1118 { 1119 return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2); 1120 } 1121 1122 /// Performs a bitwise exclusive OR of two 64-bit integer vectors. 1123 /// 1124 /// \headerfile <x86intrin.h> 1125 /// 1126 /// This intrinsic corresponds to the <c> PXOR </c> instruction. 1127 /// 1128 /// \param __m1 1129 /// A 64-bit integer vector. 1130 /// \param __m2 1131 /// A 64-bit integer vector. 1132 /// \returns A 64-bit integer vector containing the bitwise exclusive OR of both 1133 /// parameters. 1134 static __inline__ __m64 __DEFAULT_FN_ATTRS 1135 _mm_xor_si64(__m64 __m1, __m64 __m2) 1136 { 1137 return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2); 1138 } 1139 1140 /// Compares the 8-bit integer elements of two 64-bit integer vectors of 1141 /// [8 x i8] to determine if the element of the first vector is equal to the 1142 /// corresponding element of the second vector. 1143 /// 1144 /// Each comparison returns 0 for false, 0xFF for true. 1145 /// 1146 /// \headerfile <x86intrin.h> 1147 /// 1148 /// This intrinsic corresponds to the <c> PCMPEQB </c> instruction. 1149 /// 1150 /// \param __m1 1151 /// A 64-bit integer vector of [8 x i8]. 1152 /// \param __m2 1153 /// A 64-bit integer vector of [8 x i8]. 1154 /// \returns A 64-bit integer vector of [8 x i8] containing the comparison 1155 /// results. 1156 static __inline__ __m64 __DEFAULT_FN_ATTRS 1157 _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) 1158 { 1159 return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2); 1160 } 1161 1162 /// Compares the 16-bit integer elements of two 64-bit integer vectors of 1163 /// [4 x i16] to determine if the element of the first vector is equal to the 1164 /// corresponding element of the second vector. 1165 /// 1166 /// Each comparison returns 0 for false, 0xFFFF for true. 1167 /// 1168 /// \headerfile <x86intrin.h> 1169 /// 1170 /// This intrinsic corresponds to the <c> PCMPEQW </c> instruction. 1171 /// 1172 /// \param __m1 1173 /// A 64-bit integer vector of [4 x i16]. 1174 /// \param __m2 1175 /// A 64-bit integer vector of [4 x i16]. 1176 /// \returns A 64-bit integer vector of [4 x i16] containing the comparison 1177 /// results. 1178 static __inline__ __m64 __DEFAULT_FN_ATTRS 1179 _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) 1180 { 1181 return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2); 1182 } 1183 1184 /// Compares the 32-bit integer elements of two 64-bit integer vectors of 1185 /// [2 x i32] to determine if the element of the first vector is equal to the 1186 /// corresponding element of the second vector. 1187 /// 1188 /// Each comparison returns 0 for false, 0xFFFFFFFF for true. 1189 /// 1190 /// \headerfile <x86intrin.h> 1191 /// 1192 /// This intrinsic corresponds to the <c> PCMPEQD </c> instruction. 1193 /// 1194 /// \param __m1 1195 /// A 64-bit integer vector of [2 x i32]. 1196 /// \param __m2 1197 /// A 64-bit integer vector of [2 x i32]. 1198 /// \returns A 64-bit integer vector of [2 x i32] containing the comparison 1199 /// results. 1200 static __inline__ __m64 __DEFAULT_FN_ATTRS 1201 _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) 1202 { 1203 return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2); 1204 } 1205 1206 /// Compares the 8-bit integer elements of two 64-bit integer vectors of 1207 /// [8 x i8] to determine if the element of the first vector is greater than 1208 /// the corresponding element of the second vector. 1209 /// 1210 /// Each comparison returns 0 for false, 0xFF for true. 1211 /// 1212 /// \headerfile <x86intrin.h> 1213 /// 1214 /// This intrinsic corresponds to the <c> PCMPGTB </c> instruction. 1215 /// 1216 /// \param __m1 1217 /// A 64-bit integer vector of [8 x i8]. 1218 /// \param __m2 1219 /// A 64-bit integer vector of [8 x i8]. 1220 /// \returns A 64-bit integer vector of [8 x i8] containing the comparison 1221 /// results. 1222 static __inline__ __m64 __DEFAULT_FN_ATTRS 1223 _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) 1224 { 1225 return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2); 1226 } 1227 1228 /// Compares the 16-bit integer elements of two 64-bit integer vectors of 1229 /// [4 x i16] to determine if the element of the first vector is greater than 1230 /// the corresponding element of the second vector. 1231 /// 1232 /// Each comparison returns 0 for false, 0xFFFF for true. 1233 /// 1234 /// \headerfile <x86intrin.h> 1235 /// 1236 /// This intrinsic corresponds to the <c> PCMPGTW </c> instruction. 1237 /// 1238 /// \param __m1 1239 /// A 64-bit integer vector of [4 x i16]. 1240 /// \param __m2 1241 /// A 64-bit integer vector of [4 x i16]. 1242 /// \returns A 64-bit integer vector of [4 x i16] containing the comparison 1243 /// results. 1244 static __inline__ __m64 __DEFAULT_FN_ATTRS 1245 _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) 1246 { 1247 return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2); 1248 } 1249 1250 /// Compares the 32-bit integer elements of two 64-bit integer vectors of 1251 /// [2 x i32] to determine if the element of the first vector is greater than 1252 /// the corresponding element of the second vector. 1253 /// 1254 /// Each comparison returns 0 for false, 0xFFFFFFFF for true. 1255 /// 1256 /// \headerfile <x86intrin.h> 1257 /// 1258 /// This intrinsic corresponds to the <c> PCMPGTD </c> instruction. 1259 /// 1260 /// \param __m1 1261 /// A 64-bit integer vector of [2 x i32]. 1262 /// \param __m2 1263 /// A 64-bit integer vector of [2 x i32]. 1264 /// \returns A 64-bit integer vector of [2 x i32] containing the comparison 1265 /// results. 1266 static __inline__ __m64 __DEFAULT_FN_ATTRS 1267 _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) 1268 { 1269 return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2); 1270 } 1271 1272 /// Constructs a 64-bit integer vector initialized to zero. 1273 /// 1274 /// \headerfile <x86intrin.h> 1275 /// 1276 /// This intrinsic corresponds to the <c> PXOR </c> instruction. 1277 /// 1278 /// \returns An initialized 64-bit integer vector with all elements set to zero. 1279 static __inline__ __m64 __DEFAULT_FN_ATTRS 1280 _mm_setzero_si64(void) 1281 { 1282 return __extension__ (__m64){ 0LL }; 1283 } 1284 1285 /// Constructs a 64-bit integer vector initialized with the specified 1286 /// 32-bit integer values. 1287 /// 1288 /// \headerfile <x86intrin.h> 1289 /// 1290 /// This intrinsic is a utility function and does not correspond to a specific 1291 /// instruction. 1292 /// 1293 /// \param __i1 1294 /// A 32-bit integer value used to initialize the upper 32 bits of the 1295 /// result. 1296 /// \param __i0 1297 /// A 32-bit integer value used to initialize the lower 32 bits of the 1298 /// result. 1299 /// \returns An initialized 64-bit integer vector. 1300 static __inline__ __m64 __DEFAULT_FN_ATTRS 1301 _mm_set_pi32(int __i1, int __i0) 1302 { 1303 return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1); 1304 } 1305 1306 /// Constructs a 64-bit integer vector initialized with the specified 1307 /// 16-bit integer values. 1308 /// 1309 /// \headerfile <x86intrin.h> 1310 /// 1311 /// This intrinsic is a utility function and does not correspond to a specific 1312 /// instruction. 1313 /// 1314 /// \param __s3 1315 /// A 16-bit integer value used to initialize bits [63:48] of the result. 1316 /// \param __s2 1317 /// A 16-bit integer value used to initialize bits [47:32] of the result. 1318 /// \param __s1 1319 /// A 16-bit integer value used to initialize bits [31:16] of the result. 1320 /// \param __s0 1321 /// A 16-bit integer value used to initialize bits [15:0] of the result. 1322 /// \returns An initialized 64-bit integer vector. 1323 static __inline__ __m64 __DEFAULT_FN_ATTRS 1324 _mm_set_pi16(short __s3, short __s2, short __s1, short __s0) 1325 { 1326 return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3); 1327 } 1328 1329 /// Constructs a 64-bit integer vector initialized with the specified 1330 /// 8-bit integer values. 1331 /// 1332 /// \headerfile <x86intrin.h> 1333 /// 1334 /// This intrinsic is a utility function and does not correspond to a specific 1335 /// instruction. 1336 /// 1337 /// \param __b7 1338 /// An 8-bit integer value used to initialize bits [63:56] of the result. 1339 /// \param __b6 1340 /// An 8-bit integer value used to initialize bits [55:48] of the result. 1341 /// \param __b5 1342 /// An 8-bit integer value used to initialize bits [47:40] of the result. 1343 /// \param __b4 1344 /// An 8-bit integer value used to initialize bits [39:32] of the result. 1345 /// \param __b3 1346 /// An 8-bit integer value used to initialize bits [31:24] of the result. 1347 /// \param __b2 1348 /// An 8-bit integer value used to initialize bits [23:16] of the result. 1349 /// \param __b1 1350 /// An 8-bit integer value used to initialize bits [15:8] of the result. 1351 /// \param __b0 1352 /// An 8-bit integer value used to initialize bits [7:0] of the result. 1353 /// \returns An initialized 64-bit integer vector. 1354 static __inline__ __m64 __DEFAULT_FN_ATTRS 1355 _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, 1356 char __b1, char __b0) 1357 { 1358 return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3, 1359 __b4, __b5, __b6, __b7); 1360 } 1361 1362 /// Constructs a 64-bit integer vector of [2 x i32], with each of the 1363 /// 32-bit integer vector elements set to the specified 32-bit integer 1364 /// value. 1365 /// 1366 /// \headerfile <x86intrin.h> 1367 /// 1368 /// This intrinsic is a utility function and does not correspond to a specific 1369 /// instruction. 1370 /// 1371 /// \param __i 1372 /// A 32-bit integer value used to initialize each vector element of the 1373 /// result. 1374 /// \returns An initialized 64-bit integer vector of [2 x i32]. 1375 static __inline__ __m64 __DEFAULT_FN_ATTRS 1376 _mm_set1_pi32(int __i) 1377 { 1378 return _mm_set_pi32(__i, __i); 1379 } 1380 1381 /// Constructs a 64-bit integer vector of [4 x i16], with each of the 1382 /// 16-bit integer vector elements set to the specified 16-bit integer 1383 /// value. 1384 /// 1385 /// \headerfile <x86intrin.h> 1386 /// 1387 /// This intrinsic is a utility function and does not correspond to a specific 1388 /// instruction. 1389 /// 1390 /// \param __w 1391 /// A 16-bit integer value used to initialize each vector element of the 1392 /// result. 1393 /// \returns An initialized 64-bit integer vector of [4 x i16]. 1394 static __inline__ __m64 __DEFAULT_FN_ATTRS 1395 _mm_set1_pi16(short __w) 1396 { 1397 return _mm_set_pi16(__w, __w, __w, __w); 1398 } 1399 1400 /// Constructs a 64-bit integer vector of [8 x i8], with each of the 1401 /// 8-bit integer vector elements set to the specified 8-bit integer value. 1402 /// 1403 /// \headerfile <x86intrin.h> 1404 /// 1405 /// This intrinsic is a utility function and does not correspond to a specific 1406 /// instruction. 1407 /// 1408 /// \param __b 1409 /// An 8-bit integer value used to initialize each vector element of the 1410 /// result. 1411 /// \returns An initialized 64-bit integer vector of [8 x i8]. 1412 static __inline__ __m64 __DEFAULT_FN_ATTRS 1413 _mm_set1_pi8(char __b) 1414 { 1415 return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b); 1416 } 1417 1418 /// Constructs a 64-bit integer vector, initialized in reverse order with 1419 /// the specified 32-bit integer values. 1420 /// 1421 /// \headerfile <x86intrin.h> 1422 /// 1423 /// This intrinsic is a utility function and does not correspond to a specific 1424 /// instruction. 1425 /// 1426 /// \param __i0 1427 /// A 32-bit integer value used to initialize the lower 32 bits of the 1428 /// result. 1429 /// \param __i1 1430 /// A 32-bit integer value used to initialize the upper 32 bits of the 1431 /// result. 1432 /// \returns An initialized 64-bit integer vector. 1433 static __inline__ __m64 __DEFAULT_FN_ATTRS 1434 _mm_setr_pi32(int __i0, int __i1) 1435 { 1436 return _mm_set_pi32(__i1, __i0); 1437 } 1438 1439 /// Constructs a 64-bit integer vector, initialized in reverse order with 1440 /// the specified 16-bit integer values. 1441 /// 1442 /// \headerfile <x86intrin.h> 1443 /// 1444 /// This intrinsic is a utility function and does not correspond to a specific 1445 /// instruction. 1446 /// 1447 /// \param __w0 1448 /// A 16-bit integer value used to initialize bits [15:0] of the result. 1449 /// \param __w1 1450 /// A 16-bit integer value used to initialize bits [31:16] of the result. 1451 /// \param __w2 1452 /// A 16-bit integer value used to initialize bits [47:32] of the result. 1453 /// \param __w3 1454 /// A 16-bit integer value used to initialize bits [63:48] of the result. 1455 /// \returns An initialized 64-bit integer vector. 1456 static __inline__ __m64 __DEFAULT_FN_ATTRS 1457 _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) 1458 { 1459 return _mm_set_pi16(__w3, __w2, __w1, __w0); 1460 } 1461 1462 /// Constructs a 64-bit integer vector, initialized in reverse order with 1463 /// the specified 8-bit integer values. 1464 /// 1465 /// \headerfile <x86intrin.h> 1466 /// 1467 /// This intrinsic is a utility function and does not correspond to a specific 1468 /// instruction. 1469 /// 1470 /// \param __b0 1471 /// An 8-bit integer value used to initialize bits [7:0] of the result. 1472 /// \param __b1 1473 /// An 8-bit integer value used to initialize bits [15:8] of the result. 1474 /// \param __b2 1475 /// An 8-bit integer value used to initialize bits [23:16] of the result. 1476 /// \param __b3 1477 /// An 8-bit integer value used to initialize bits [31:24] of the result. 1478 /// \param __b4 1479 /// An 8-bit integer value used to initialize bits [39:32] of the result. 1480 /// \param __b5 1481 /// An 8-bit integer value used to initialize bits [47:40] of the result. 1482 /// \param __b6 1483 /// An 8-bit integer value used to initialize bits [55:48] of the result. 1484 /// \param __b7 1485 /// An 8-bit integer value used to initialize bits [63:56] of the result. 1486 /// \returns An initialized 64-bit integer vector. 1487 static __inline__ __m64 __DEFAULT_FN_ATTRS 1488 _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, 1489 char __b6, char __b7) 1490 { 1491 return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); 1492 } 1493 1494 #undef __DEFAULT_FN_ATTRS 1495 1496 /* Aliases for compatibility. */ 1497 #define _m_empty _mm_empty 1498 #define _m_from_int _mm_cvtsi32_si64 1499 #define _m_from_int64 _mm_cvtsi64_m64 1500 #define _m_to_int _mm_cvtsi64_si32 1501 #define _m_to_int64 _mm_cvtm64_si64 1502 #define _m_packsswb _mm_packs_pi16 1503 #define _m_packssdw _mm_packs_pi32 1504 #define _m_packuswb _mm_packs_pu16 1505 #define _m_punpckhbw _mm_unpackhi_pi8 1506 #define _m_punpckhwd _mm_unpackhi_pi16 1507 #define _m_punpckhdq _mm_unpackhi_pi32 1508 #define _m_punpcklbw _mm_unpacklo_pi8 1509 #define _m_punpcklwd _mm_unpacklo_pi16 1510 #define _m_punpckldq _mm_unpacklo_pi32 1511 #define _m_paddb _mm_add_pi8 1512 #define _m_paddw _mm_add_pi16 1513 #define _m_paddd _mm_add_pi32 1514 #define _m_paddsb _mm_adds_pi8 1515 #define _m_paddsw _mm_adds_pi16 1516 #define _m_paddusb _mm_adds_pu8 1517 #define _m_paddusw _mm_adds_pu16 1518 #define _m_psubb _mm_sub_pi8 1519 #define _m_psubw _mm_sub_pi16 1520 #define _m_psubd _mm_sub_pi32 1521 #define _m_psubsb _mm_subs_pi8 1522 #define _m_psubsw _mm_subs_pi16 1523 #define _m_psubusb _mm_subs_pu8 1524 #define _m_psubusw _mm_subs_pu16 1525 #define _m_pmaddwd _mm_madd_pi16 1526 #define _m_pmulhw _mm_mulhi_pi16 1527 #define _m_pmullw _mm_mullo_pi16 1528 #define _m_psllw _mm_sll_pi16 1529 #define _m_psllwi _mm_slli_pi16 1530 #define _m_pslld _mm_sll_pi32 1531 #define _m_pslldi _mm_slli_pi32 1532 #define _m_psllq _mm_sll_si64 1533 #define _m_psllqi _mm_slli_si64 1534 #define _m_psraw _mm_sra_pi16 1535 #define _m_psrawi _mm_srai_pi16 1536 #define _m_psrad _mm_sra_pi32 1537 #define _m_psradi _mm_srai_pi32 1538 #define _m_psrlw _mm_srl_pi16 1539 #define _m_psrlwi _mm_srli_pi16 1540 #define _m_psrld _mm_srl_pi32 1541 #define _m_psrldi _mm_srli_pi32 1542 #define _m_psrlq _mm_srl_si64 1543 #define _m_psrlqi _mm_srli_si64 1544 #define _m_pand _mm_and_si64 1545 #define _m_pandn _mm_andnot_si64 1546 #define _m_por _mm_or_si64 1547 #define _m_pxor _mm_xor_si64 1548 #define _m_pcmpeqb _mm_cmpeq_pi8 1549 #define _m_pcmpeqw _mm_cmpeq_pi16 1550 #define _m_pcmpeqd _mm_cmpeq_pi32 1551 #define _m_pcmpgtb _mm_cmpgt_pi8 1552 #define _m_pcmpgtw _mm_cmpgt_pi16 1553 #define _m_pcmpgtd _mm_cmpgt_pi32 1554 1555 #endif /* __MMINTRIN_H */ 1556 1557