1 /*===---- mmintrin.h - MMX intrinsics --------------------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10 #ifndef __MMINTRIN_H 11 #define __MMINTRIN_H 12 13 #if !defined(__i386__) && !defined(__x86_64__) 14 #error "This header is only meant to be used on x86 and x64 architecture" 15 #endif 16 17 typedef long long __m64 __attribute__((__vector_size__(8), __aligned__(8))); 18 19 typedef long long __v1di __attribute__((__vector_size__(8))); 20 typedef int __v2si __attribute__((__vector_size__(8))); 21 typedef short __v4hi __attribute__((__vector_size__(8))); 22 typedef char __v8qi __attribute__((__vector_size__(8))); 23 24 /* Define the default attributes for the functions in this file. */ 25 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx"), __min_vector_width__(64))) 26 27 /// Clears the MMX state by setting the state of the x87 stack registers 28 /// to empty. 29 /// 30 /// \headerfile <x86intrin.h> 31 /// 32 /// This intrinsic corresponds to the <c> EMMS </c> instruction. 33 /// 34 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("mmx"))) 35 _mm_empty(void) 36 { 37 __builtin_ia32_emms(); 38 } 39 40 /// Constructs a 64-bit integer vector, setting the lower 32 bits to the 41 /// value of the 32-bit integer parameter and setting the upper 32 bits to 0. 42 /// 43 /// \headerfile <x86intrin.h> 44 /// 45 /// This intrinsic corresponds to the <c> MOVD </c> instruction. 46 /// 47 /// \param __i 48 /// A 32-bit integer value. 49 /// \returns A 64-bit integer vector. The lower 32 bits contain the value of the 50 /// parameter. The upper 32 bits are set to 0. 51 static __inline__ __m64 __DEFAULT_FN_ATTRS 52 _mm_cvtsi32_si64(int __i) 53 { 54 return (__m64)__builtin_ia32_vec_init_v2si(__i, 0); 55 } 56 57 /// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit 58 /// signed integer. 59 /// 60 /// \headerfile <x86intrin.h> 61 /// 62 /// This intrinsic corresponds to the <c> MOVD </c> instruction. 63 /// 64 /// \param __m 65 /// A 64-bit integer vector. 66 /// \returns A 32-bit signed integer value containing the lower 32 bits of the 67 /// parameter. 68 static __inline__ int __DEFAULT_FN_ATTRS 69 _mm_cvtsi64_si32(__m64 __m) 70 { 71 return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0); 72 } 73 74 /// Casts a 64-bit signed integer value into a 64-bit integer vector. 75 /// 76 /// \headerfile <x86intrin.h> 77 /// 78 /// This intrinsic corresponds to the <c> MOVQ </c> instruction. 79 /// 80 /// \param __i 81 /// A 64-bit signed integer. 82 /// \returns A 64-bit integer vector containing the same bitwise pattern as the 83 /// parameter. 84 static __inline__ __m64 __DEFAULT_FN_ATTRS 85 _mm_cvtsi64_m64(long long __i) 86 { 87 return (__m64)__i; 88 } 89 90 /// Casts a 64-bit integer vector into a 64-bit signed integer value. 91 /// 92 /// \headerfile <x86intrin.h> 93 /// 94 /// This intrinsic corresponds to the <c> MOVQ </c> instruction. 95 /// 96 /// \param __m 97 /// A 64-bit integer vector. 98 /// \returns A 64-bit signed integer containing the same bitwise pattern as the 99 /// parameter. 100 static __inline__ long long __DEFAULT_FN_ATTRS 101 _mm_cvtm64_si64(__m64 __m) 102 { 103 return (long long)__m; 104 } 105 106 /// Converts 16-bit signed integers from both 64-bit integer vector 107 /// parameters of [4 x i16] into 8-bit signed integer values, and constructs 108 /// a 64-bit integer vector of [8 x i8] as the result. Positive values 109 /// greater than 0x7F are saturated to 0x7F. Negative values less than 0x80 110 /// are saturated to 0x80. 111 /// 112 /// \headerfile <x86intrin.h> 113 /// 114 /// This intrinsic corresponds to the <c> PACKSSWB </c> instruction. 115 /// 116 /// \param __m1 117 /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a 118 /// 16-bit signed integer and is converted to an 8-bit signed integer with 119 /// saturation. Positive values greater than 0x7F are saturated to 0x7F. 120 /// Negative values less than 0x80 are saturated to 0x80. The converted 121 /// [4 x i8] values are written to the lower 32 bits of the result. 122 /// \param __m2 123 /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a 124 /// 16-bit signed integer and is converted to an 8-bit signed integer with 125 /// saturation. Positive values greater than 0x7F are saturated to 0x7F. 126 /// Negative values less than 0x80 are saturated to 0x80. The converted 127 /// [4 x i8] values are written to the upper 32 bits of the result. 128 /// \returns A 64-bit integer vector of [8 x i8] containing the converted 129 /// values. 130 static __inline__ __m64 __DEFAULT_FN_ATTRS 131 _mm_packs_pi16(__m64 __m1, __m64 __m2) 132 { 133 return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2); 134 } 135 136 /// Converts 32-bit signed integers from both 64-bit integer vector 137 /// parameters of [2 x i32] into 16-bit signed integer values, and constructs 138 /// a 64-bit integer vector of [4 x i16] as the result. Positive values 139 /// greater than 0x7FFF are saturated to 0x7FFF. Negative values less than 140 /// 0x8000 are saturated to 0x8000. 141 /// 142 /// \headerfile <x86intrin.h> 143 /// 144 /// This intrinsic corresponds to the <c> PACKSSDW </c> instruction. 145 /// 146 /// \param __m1 147 /// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a 148 /// 32-bit signed integer and is converted to a 16-bit signed integer with 149 /// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF. 150 /// Negative values less than 0x8000 are saturated to 0x8000. The converted 151 /// [2 x i16] values are written to the lower 32 bits of the result. 152 /// \param __m2 153 /// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a 154 /// 32-bit signed integer and is converted to a 16-bit signed integer with 155 /// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF. 156 /// Negative values less than 0x8000 are saturated to 0x8000. The converted 157 /// [2 x i16] values are written to the upper 32 bits of the result. 158 /// \returns A 64-bit integer vector of [4 x i16] containing the converted 159 /// values. 160 static __inline__ __m64 __DEFAULT_FN_ATTRS 161 _mm_packs_pi32(__m64 __m1, __m64 __m2) 162 { 163 return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2); 164 } 165 166 /// Converts 16-bit signed integers from both 64-bit integer vector 167 /// parameters of [4 x i16] into 8-bit unsigned integer values, and 168 /// constructs a 64-bit integer vector of [8 x i8] as the result. Values 169 /// greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated 170 /// to 0. 171 /// 172 /// \headerfile <x86intrin.h> 173 /// 174 /// This intrinsic corresponds to the <c> PACKUSWB </c> instruction. 175 /// 176 /// \param __m1 177 /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a 178 /// 16-bit signed integer and is converted to an 8-bit unsigned integer with 179 /// saturation. Values greater than 0xFF are saturated to 0xFF. Values less 180 /// than 0 are saturated to 0. The converted [4 x i8] values are written to 181 /// the lower 32 bits of the result. 182 /// \param __m2 183 /// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a 184 /// 16-bit signed integer and is converted to an 8-bit unsigned integer with 185 /// saturation. Values greater than 0xFF are saturated to 0xFF. Values less 186 /// than 0 are saturated to 0. The converted [4 x i8] values are written to 187 /// the upper 32 bits of the result. 188 /// \returns A 64-bit integer vector of [8 x i8] containing the converted 189 /// values. 190 static __inline__ __m64 __DEFAULT_FN_ATTRS 191 _mm_packs_pu16(__m64 __m1, __m64 __m2) 192 { 193 return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2); 194 } 195 196 /// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8] 197 /// and interleaves them into a 64-bit integer vector of [8 x i8]. 198 /// 199 /// \headerfile <x86intrin.h> 200 /// 201 /// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction. 202 /// 203 /// \param __m1 204 /// A 64-bit integer vector of [8 x i8]. \n 205 /// Bits [39:32] are written to bits [7:0] of the result. \n 206 /// Bits [47:40] are written to bits [23:16] of the result. \n 207 /// Bits [55:48] are written to bits [39:32] of the result. \n 208 /// Bits [63:56] are written to bits [55:48] of the result. 209 /// \param __m2 210 /// A 64-bit integer vector of [8 x i8]. 211 /// Bits [39:32] are written to bits [15:8] of the result. \n 212 /// Bits [47:40] are written to bits [31:24] of the result. \n 213 /// Bits [55:48] are written to bits [47:40] of the result. \n 214 /// Bits [63:56] are written to bits [63:56] of the result. 215 /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved 216 /// values. 217 static __inline__ __m64 __DEFAULT_FN_ATTRS 218 _mm_unpackhi_pi8(__m64 __m1, __m64 __m2) 219 { 220 return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2); 221 } 222 223 /// Unpacks the upper 32 bits from two 64-bit integer vectors of 224 /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. 225 /// 226 /// \headerfile <x86intrin.h> 227 /// 228 /// This intrinsic corresponds to the <c> PUNPCKHWD </c> instruction. 229 /// 230 /// \param __m1 231 /// A 64-bit integer vector of [4 x i16]. 232 /// Bits [47:32] are written to bits [15:0] of the result. \n 233 /// Bits [63:48] are written to bits [47:32] of the result. 234 /// \param __m2 235 /// A 64-bit integer vector of [4 x i16]. 236 /// Bits [47:32] are written to bits [31:16] of the result. \n 237 /// Bits [63:48] are written to bits [63:48] of the result. 238 /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved 239 /// values. 240 static __inline__ __m64 __DEFAULT_FN_ATTRS 241 _mm_unpackhi_pi16(__m64 __m1, __m64 __m2) 242 { 243 return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2); 244 } 245 246 /// Unpacks the upper 32 bits from two 64-bit integer vectors of 247 /// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32]. 248 /// 249 /// \headerfile <x86intrin.h> 250 /// 251 /// This intrinsic corresponds to the <c> PUNPCKHDQ </c> instruction. 252 /// 253 /// \param __m1 254 /// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to 255 /// the lower 32 bits of the result. 256 /// \param __m2 257 /// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to 258 /// the upper 32 bits of the result. 259 /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved 260 /// values. 261 static __inline__ __m64 __DEFAULT_FN_ATTRS 262 _mm_unpackhi_pi32(__m64 __m1, __m64 __m2) 263 { 264 return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2); 265 } 266 267 /// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] 268 /// and interleaves them into a 64-bit integer vector of [8 x i8]. 269 /// 270 /// \headerfile <x86intrin.h> 271 /// 272 /// This intrinsic corresponds to the <c> PUNPCKLBW </c> instruction. 273 /// 274 /// \param __m1 275 /// A 64-bit integer vector of [8 x i8]. 276 /// Bits [7:0] are written to bits [7:0] of the result. \n 277 /// Bits [15:8] are written to bits [23:16] of the result. \n 278 /// Bits [23:16] are written to bits [39:32] of the result. \n 279 /// Bits [31:24] are written to bits [55:48] of the result. 280 /// \param __m2 281 /// A 64-bit integer vector of [8 x i8]. 282 /// Bits [7:0] are written to bits [15:8] of the result. \n 283 /// Bits [15:8] are written to bits [31:24] of the result. \n 284 /// Bits [23:16] are written to bits [47:40] of the result. \n 285 /// Bits [31:24] are written to bits [63:56] of the result. 286 /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved 287 /// values. 288 static __inline__ __m64 __DEFAULT_FN_ATTRS 289 _mm_unpacklo_pi8(__m64 __m1, __m64 __m2) 290 { 291 return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2); 292 } 293 294 /// Unpacks the lower 32 bits from two 64-bit integer vectors of 295 /// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. 296 /// 297 /// \headerfile <x86intrin.h> 298 /// 299 /// This intrinsic corresponds to the <c> PUNPCKLWD </c> instruction. 300 /// 301 /// \param __m1 302 /// A 64-bit integer vector of [4 x i16]. 303 /// Bits [15:0] are written to bits [15:0] of the result. \n 304 /// Bits [31:16] are written to bits [47:32] of the result. 305 /// \param __m2 306 /// A 64-bit integer vector of [4 x i16]. 307 /// Bits [15:0] are written to bits [31:16] of the result. \n 308 /// Bits [31:16] are written to bits [63:48] of the result. 309 /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved 310 /// values. 311 static __inline__ __m64 __DEFAULT_FN_ATTRS 312 _mm_unpacklo_pi16(__m64 __m1, __m64 __m2) 313 { 314 return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2); 315 } 316 317 /// Unpacks the lower 32 bits from two 64-bit integer vectors of 318 /// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32]. 319 /// 320 /// \headerfile <x86intrin.h> 321 /// 322 /// This intrinsic corresponds to the <c> PUNPCKLDQ </c> instruction. 323 /// 324 /// \param __m1 325 /// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to 326 /// the lower 32 bits of the result. 327 /// \param __m2 328 /// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to 329 /// the upper 32 bits of the result. 330 /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved 331 /// values. 332 static __inline__ __m64 __DEFAULT_FN_ATTRS 333 _mm_unpacklo_pi32(__m64 __m1, __m64 __m2) 334 { 335 return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2); 336 } 337 338 /// Adds each 8-bit integer element of the first 64-bit integer vector 339 /// of [8 x i8] to the corresponding 8-bit integer element of the second 340 /// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are 341 /// packed into a 64-bit integer vector of [8 x i8]. 342 /// 343 /// \headerfile <x86intrin.h> 344 /// 345 /// This intrinsic corresponds to the <c> PADDB </c> instruction. 346 /// 347 /// \param __m1 348 /// A 64-bit integer vector of [8 x i8]. 349 /// \param __m2 350 /// A 64-bit integer vector of [8 x i8]. 351 /// \returns A 64-bit integer vector of [8 x i8] containing the sums of both 352 /// parameters. 353 static __inline__ __m64 __DEFAULT_FN_ATTRS 354 _mm_add_pi8(__m64 __m1, __m64 __m2) 355 { 356 return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2); 357 } 358 359 /// Adds each 16-bit integer element of the first 64-bit integer vector 360 /// of [4 x i16] to the corresponding 16-bit integer element of the second 361 /// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are 362 /// packed into a 64-bit integer vector of [4 x i16]. 363 /// 364 /// \headerfile <x86intrin.h> 365 /// 366 /// This intrinsic corresponds to the <c> PADDW </c> instruction. 367 /// 368 /// \param __m1 369 /// A 64-bit integer vector of [4 x i16]. 370 /// \param __m2 371 /// A 64-bit integer vector of [4 x i16]. 372 /// \returns A 64-bit integer vector of [4 x i16] containing the sums of both 373 /// parameters. 374 static __inline__ __m64 __DEFAULT_FN_ATTRS 375 _mm_add_pi16(__m64 __m1, __m64 __m2) 376 { 377 return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2); 378 } 379 380 /// Adds each 32-bit integer element of the first 64-bit integer vector 381 /// of [2 x i32] to the corresponding 32-bit integer element of the second 382 /// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are 383 /// packed into a 64-bit integer vector of [2 x i32]. 384 /// 385 /// \headerfile <x86intrin.h> 386 /// 387 /// This intrinsic corresponds to the <c> PADDD </c> instruction. 388 /// 389 /// \param __m1 390 /// A 64-bit integer vector of [2 x i32]. 391 /// \param __m2 392 /// A 64-bit integer vector of [2 x i32]. 393 /// \returns A 64-bit integer vector of [2 x i32] containing the sums of both 394 /// parameters. 395 static __inline__ __m64 __DEFAULT_FN_ATTRS 396 _mm_add_pi32(__m64 __m1, __m64 __m2) 397 { 398 return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2); 399 } 400 401 /// Adds each 8-bit signed integer element of the first 64-bit integer 402 /// vector of [8 x i8] to the corresponding 8-bit signed integer element of 403 /// the second 64-bit integer vector of [8 x i8]. Positive sums greater than 404 /// 0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to 405 /// 0x80. The results are packed into a 64-bit integer vector of [8 x i8]. 406 /// 407 /// \headerfile <x86intrin.h> 408 /// 409 /// This intrinsic corresponds to the <c> PADDSB </c> instruction. 410 /// 411 /// \param __m1 412 /// A 64-bit integer vector of [8 x i8]. 413 /// \param __m2 414 /// A 64-bit integer vector of [8 x i8]. 415 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums 416 /// of both parameters. 417 static __inline__ __m64 __DEFAULT_FN_ATTRS 418 _mm_adds_pi8(__m64 __m1, __m64 __m2) 419 { 420 return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2); 421 } 422 423 /// Adds each 16-bit signed integer element of the first 64-bit integer 424 /// vector of [4 x i16] to the corresponding 16-bit signed integer element of 425 /// the second 64-bit integer vector of [4 x i16]. Positive sums greater than 426 /// 0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are 427 /// saturated to 0x8000. The results are packed into a 64-bit integer vector 428 /// of [4 x i16]. 429 /// 430 /// \headerfile <x86intrin.h> 431 /// 432 /// This intrinsic corresponds to the <c> PADDSW </c> instruction. 433 /// 434 /// \param __m1 435 /// A 64-bit integer vector of [4 x i16]. 436 /// \param __m2 437 /// A 64-bit integer vector of [4 x i16]. 438 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums 439 /// of both parameters. 440 static __inline__ __m64 __DEFAULT_FN_ATTRS 441 _mm_adds_pi16(__m64 __m1, __m64 __m2) 442 { 443 return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2); 444 } 445 446 /// Adds each 8-bit unsigned integer element of the first 64-bit integer 447 /// vector of [8 x i8] to the corresponding 8-bit unsigned integer element of 448 /// the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are 449 /// saturated to 0xFF. The results are packed into a 64-bit integer vector of 450 /// [8 x i8]. 451 /// 452 /// \headerfile <x86intrin.h> 453 /// 454 /// This intrinsic corresponds to the <c> PADDUSB </c> instruction. 455 /// 456 /// \param __m1 457 /// A 64-bit integer vector of [8 x i8]. 458 /// \param __m2 459 /// A 64-bit integer vector of [8 x i8]. 460 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated 461 /// unsigned sums of both parameters. 462 static __inline__ __m64 __DEFAULT_FN_ATTRS 463 _mm_adds_pu8(__m64 __m1, __m64 __m2) 464 { 465 return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2); 466 } 467 468 /// Adds each 16-bit unsigned integer element of the first 64-bit integer 469 /// vector of [4 x i16] to the corresponding 16-bit unsigned integer element 470 /// of the second 64-bit integer vector of [4 x i16]. Sums greater than 471 /// 0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit 472 /// integer vector of [4 x i16]. 473 /// 474 /// \headerfile <x86intrin.h> 475 /// 476 /// This intrinsic corresponds to the <c> PADDUSW </c> instruction. 477 /// 478 /// \param __m1 479 /// A 64-bit integer vector of [4 x i16]. 480 /// \param __m2 481 /// A 64-bit integer vector of [4 x i16]. 482 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated 483 /// unsigned sums of both parameters. 484 static __inline__ __m64 __DEFAULT_FN_ATTRS 485 _mm_adds_pu16(__m64 __m1, __m64 __m2) 486 { 487 return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2); 488 } 489 490 /// Subtracts each 8-bit integer element of the second 64-bit integer 491 /// vector of [8 x i8] from the corresponding 8-bit integer element of the 492 /// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results 493 /// are packed into a 64-bit integer vector of [8 x i8]. 494 /// 495 /// \headerfile <x86intrin.h> 496 /// 497 /// This intrinsic corresponds to the <c> PSUBB </c> instruction. 498 /// 499 /// \param __m1 500 /// A 64-bit integer vector of [8 x i8] containing the minuends. 501 /// \param __m2 502 /// A 64-bit integer vector of [8 x i8] containing the subtrahends. 503 /// \returns A 64-bit integer vector of [8 x i8] containing the differences of 504 /// both parameters. 505 static __inline__ __m64 __DEFAULT_FN_ATTRS 506 _mm_sub_pi8(__m64 __m1, __m64 __m2) 507 { 508 return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2); 509 } 510 511 /// Subtracts each 16-bit integer element of the second 64-bit integer 512 /// vector of [4 x i16] from the corresponding 16-bit integer element of the 513 /// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the 514 /// results are packed into a 64-bit integer vector of [4 x i16]. 515 /// 516 /// \headerfile <x86intrin.h> 517 /// 518 /// This intrinsic corresponds to the <c> PSUBW </c> instruction. 519 /// 520 /// \param __m1 521 /// A 64-bit integer vector of [4 x i16] containing the minuends. 522 /// \param __m2 523 /// A 64-bit integer vector of [4 x i16] containing the subtrahends. 524 /// \returns A 64-bit integer vector of [4 x i16] containing the differences of 525 /// both parameters. 526 static __inline__ __m64 __DEFAULT_FN_ATTRS 527 _mm_sub_pi16(__m64 __m1, __m64 __m2) 528 { 529 return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2); 530 } 531 532 /// Subtracts each 32-bit integer element of the second 64-bit integer 533 /// vector of [2 x i32] from the corresponding 32-bit integer element of the 534 /// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the 535 /// results are packed into a 64-bit integer vector of [2 x i32]. 536 /// 537 /// \headerfile <x86intrin.h> 538 /// 539 /// This intrinsic corresponds to the <c> PSUBD </c> instruction. 540 /// 541 /// \param __m1 542 /// A 64-bit integer vector of [2 x i32] containing the minuends. 543 /// \param __m2 544 /// A 64-bit integer vector of [2 x i32] containing the subtrahends. 545 /// \returns A 64-bit integer vector of [2 x i32] containing the differences of 546 /// both parameters. 547 static __inline__ __m64 __DEFAULT_FN_ATTRS 548 _mm_sub_pi32(__m64 __m1, __m64 __m2) 549 { 550 return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2); 551 } 552 553 /// Subtracts each 8-bit signed integer element of the second 64-bit 554 /// integer vector of [8 x i8] from the corresponding 8-bit signed integer 555 /// element of the first 64-bit integer vector of [8 x i8]. Positive results 556 /// greater than 0x7F are saturated to 0x7F. Negative results less than 0x80 557 /// are saturated to 0x80. The results are packed into a 64-bit integer 558 /// vector of [8 x i8]. 559 /// 560 /// \headerfile <x86intrin.h> 561 /// 562 /// This intrinsic corresponds to the <c> PSUBSB </c> instruction. 563 /// 564 /// \param __m1 565 /// A 64-bit integer vector of [8 x i8] containing the minuends. 566 /// \param __m2 567 /// A 64-bit integer vector of [8 x i8] containing the subtrahends. 568 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated 569 /// differences of both parameters. 570 static __inline__ __m64 __DEFAULT_FN_ATTRS 571 _mm_subs_pi8(__m64 __m1, __m64 __m2) 572 { 573 return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2); 574 } 575 576 /// Subtracts each 16-bit signed integer element of the second 64-bit 577 /// integer vector of [4 x i16] from the corresponding 16-bit signed integer 578 /// element of the first 64-bit integer vector of [4 x i16]. Positive results 579 /// greater than 0x7FFF are saturated to 0x7FFF. Negative results less than 580 /// 0x8000 are saturated to 0x8000. The results are packed into a 64-bit 581 /// integer vector of [4 x i16]. 582 /// 583 /// \headerfile <x86intrin.h> 584 /// 585 /// This intrinsic corresponds to the <c> PSUBSW </c> instruction. 586 /// 587 /// \param __m1 588 /// A 64-bit integer vector of [4 x i16] containing the minuends. 589 /// \param __m2 590 /// A 64-bit integer vector of [4 x i16] containing the subtrahends. 591 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated 592 /// differences of both parameters. 593 static __inline__ __m64 __DEFAULT_FN_ATTRS 594 _mm_subs_pi16(__m64 __m1, __m64 __m2) 595 { 596 return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2); 597 } 598 599 /// Subtracts each 8-bit unsigned integer element of the second 64-bit 600 /// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer 601 /// element of the first 64-bit integer vector of [8 x i8]. 602 /// 603 /// If an element of the first vector is less than the corresponding element 604 /// of the second vector, the result is saturated to 0. The results are 605 /// packed into a 64-bit integer vector of [8 x i8]. 606 /// 607 /// \headerfile <x86intrin.h> 608 /// 609 /// This intrinsic corresponds to the <c> PSUBUSB </c> instruction. 610 /// 611 /// \param __m1 612 /// A 64-bit integer vector of [8 x i8] containing the minuends. 613 /// \param __m2 614 /// A 64-bit integer vector of [8 x i8] containing the subtrahends. 615 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated 616 /// differences of both parameters. 617 static __inline__ __m64 __DEFAULT_FN_ATTRS 618 _mm_subs_pu8(__m64 __m1, __m64 __m2) 619 { 620 return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2); 621 } 622 623 /// Subtracts each 16-bit unsigned integer element of the second 64-bit 624 /// integer vector of [4 x i16] from the corresponding 16-bit unsigned 625 /// integer element of the first 64-bit integer vector of [4 x i16]. 626 /// 627 /// If an element of the first vector is less than the corresponding element 628 /// of the second vector, the result is saturated to 0. The results are 629 /// packed into a 64-bit integer vector of [4 x i16]. 630 /// 631 /// \headerfile <x86intrin.h> 632 /// 633 /// This intrinsic corresponds to the <c> PSUBUSW </c> instruction. 634 /// 635 /// \param __m1 636 /// A 64-bit integer vector of [4 x i16] containing the minuends. 637 /// \param __m2 638 /// A 64-bit integer vector of [4 x i16] containing the subtrahends. 639 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated 640 /// differences of both parameters. 641 static __inline__ __m64 __DEFAULT_FN_ATTRS 642 _mm_subs_pu16(__m64 __m1, __m64 __m2) 643 { 644 return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2); 645 } 646 647 /// Multiplies each 16-bit signed integer element of the first 64-bit 648 /// integer vector of [4 x i16] by the corresponding 16-bit signed integer 649 /// element of the second 64-bit integer vector of [4 x i16] and get four 650 /// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums. 651 /// The lower 32 bits of these two sums are packed into a 64-bit integer 652 /// vector of [2 x i32]. 653 /// 654 /// For example, bits [15:0] of both parameters are multiplied, bits [31:16] 655 /// of both parameters are multiplied, and the sum of both results is written 656 /// to bits [31:0] of the result. 657 /// 658 /// \headerfile <x86intrin.h> 659 /// 660 /// This intrinsic corresponds to the <c> PMADDWD </c> instruction. 661 /// 662 /// \param __m1 663 /// A 64-bit integer vector of [4 x i16]. 664 /// \param __m2 665 /// A 64-bit integer vector of [4 x i16]. 666 /// \returns A 64-bit integer vector of [2 x i32] containing the sums of 667 /// products of both parameters. 668 static __inline__ __m64 __DEFAULT_FN_ATTRS 669 _mm_madd_pi16(__m64 __m1, __m64 __m2) 670 { 671 return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2); 672 } 673 674 /// Multiplies each 16-bit signed integer element of the first 64-bit 675 /// integer vector of [4 x i16] by the corresponding 16-bit signed integer 676 /// element of the second 64-bit integer vector of [4 x i16]. Packs the upper 677 /// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16]. 678 /// 679 /// \headerfile <x86intrin.h> 680 /// 681 /// This intrinsic corresponds to the <c> PMULHW </c> instruction. 682 /// 683 /// \param __m1 684 /// A 64-bit integer vector of [4 x i16]. 685 /// \param __m2 686 /// A 64-bit integer vector of [4 x i16]. 687 /// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits 688 /// of the products of both parameters. 689 static __inline__ __m64 __DEFAULT_FN_ATTRS 690 _mm_mulhi_pi16(__m64 __m1, __m64 __m2) 691 { 692 return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2); 693 } 694 695 /// Multiplies each 16-bit signed integer element of the first 64-bit 696 /// integer vector of [4 x i16] by the corresponding 16-bit signed integer 697 /// element of the second 64-bit integer vector of [4 x i16]. Packs the lower 698 /// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16]. 699 /// 700 /// \headerfile <x86intrin.h> 701 /// 702 /// This intrinsic corresponds to the <c> PMULLW </c> instruction. 703 /// 704 /// \param __m1 705 /// A 64-bit integer vector of [4 x i16]. 706 /// \param __m2 707 /// A 64-bit integer vector of [4 x i16]. 708 /// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits 709 /// of the products of both parameters. 710 static __inline__ __m64 __DEFAULT_FN_ATTRS 711 _mm_mullo_pi16(__m64 __m1, __m64 __m2) 712 { 713 return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2); 714 } 715 716 /// Left-shifts each 16-bit signed integer element of the first 717 /// parameter, which is a 64-bit integer vector of [4 x i16], by the number 718 /// of bits specified by the second parameter, which is a 64-bit integer. The 719 /// lower 16 bits of the results are packed into a 64-bit integer vector of 720 /// [4 x i16]. 721 /// 722 /// \headerfile <x86intrin.h> 723 /// 724 /// This intrinsic corresponds to the <c> PSLLW </c> instruction. 725 /// 726 /// \param __m 727 /// A 64-bit integer vector of [4 x i16]. 728 /// \param __count 729 /// A 64-bit integer vector interpreted as a single 64-bit integer. 730 /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted 731 /// values. If \a __count is greater or equal to 16, the result is set to all 732 /// 0. 733 static __inline__ __m64 __DEFAULT_FN_ATTRS 734 _mm_sll_pi16(__m64 __m, __m64 __count) 735 { 736 return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count); 737 } 738 739 /// Left-shifts each 16-bit signed integer element of a 64-bit integer 740 /// vector of [4 x i16] by the number of bits specified by a 32-bit integer. 741 /// The lower 16 bits of the results are packed into a 64-bit integer vector 742 /// of [4 x i16]. 743 /// 744 /// \headerfile <x86intrin.h> 745 /// 746 /// This intrinsic corresponds to the <c> PSLLW </c> instruction. 747 /// 748 /// \param __m 749 /// A 64-bit integer vector of [4 x i16]. 750 /// \param __count 751 /// A 32-bit integer value. 752 /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted 753 /// values. If \a __count is greater or equal to 16, the result is set to all 754 /// 0. 755 static __inline__ __m64 __DEFAULT_FN_ATTRS 756 _mm_slli_pi16(__m64 __m, int __count) 757 { 758 return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count); 759 } 760 761 /// Left-shifts each 32-bit signed integer element of the first 762 /// parameter, which is a 64-bit integer vector of [2 x i32], by the number 763 /// of bits specified by the second parameter, which is a 64-bit integer. The 764 /// lower 32 bits of the results are packed into a 64-bit integer vector of 765 /// [2 x i32]. 766 /// 767 /// \headerfile <x86intrin.h> 768 /// 769 /// This intrinsic corresponds to the <c> PSLLD </c> instruction. 770 /// 771 /// \param __m 772 /// A 64-bit integer vector of [2 x i32]. 773 /// \param __count 774 /// A 64-bit integer vector interpreted as a single 64-bit integer. 775 /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted 776 /// values. If \a __count is greater or equal to 32, the result is set to all 777 /// 0. 778 static __inline__ __m64 __DEFAULT_FN_ATTRS 779 _mm_sll_pi32(__m64 __m, __m64 __count) 780 { 781 return (__m64)__builtin_ia32_pslld((__v2si)__m, __count); 782 } 783 784 /// Left-shifts each 32-bit signed integer element of a 64-bit integer 785 /// vector of [2 x i32] by the number of bits specified by a 32-bit integer. 786 /// The lower 32 bits of the results are packed into a 64-bit integer vector 787 /// of [2 x i32]. 788 /// 789 /// \headerfile <x86intrin.h> 790 /// 791 /// This intrinsic corresponds to the <c> PSLLD </c> instruction. 792 /// 793 /// \param __m 794 /// A 64-bit integer vector of [2 x i32]. 795 /// \param __count 796 /// A 32-bit integer value. 797 /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted 798 /// values. If \a __count is greater or equal to 32, the result is set to all 799 /// 0. 800 static __inline__ __m64 __DEFAULT_FN_ATTRS 801 _mm_slli_pi32(__m64 __m, int __count) 802 { 803 return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count); 804 } 805 806 /// Left-shifts the first 64-bit integer parameter by the number of bits 807 /// specified by the second 64-bit integer parameter. The lower 64 bits of 808 /// result are returned. 809 /// 810 /// \headerfile <x86intrin.h> 811 /// 812 /// This intrinsic corresponds to the <c> PSLLQ </c> instruction. 813 /// 814 /// \param __m 815 /// A 64-bit integer vector interpreted as a single 64-bit integer. 816 /// \param __count 817 /// A 64-bit integer vector interpreted as a single 64-bit integer. 818 /// \returns A 64-bit integer vector containing the left-shifted value. If 819 /// \a __count is greater or equal to 64, the result is set to 0. 820 static __inline__ __m64 __DEFAULT_FN_ATTRS 821 _mm_sll_si64(__m64 __m, __m64 __count) 822 { 823 return (__m64)__builtin_ia32_psllq((__v1di)__m, __count); 824 } 825 826 /// Left-shifts the first parameter, which is a 64-bit integer, by the 827 /// number of bits specified by the second parameter, which is a 32-bit 828 /// integer. The lower 64 bits of result are returned. 829 /// 830 /// \headerfile <x86intrin.h> 831 /// 832 /// This intrinsic corresponds to the <c> PSLLQ </c> instruction. 833 /// 834 /// \param __m 835 /// A 64-bit integer vector interpreted as a single 64-bit integer. 836 /// \param __count 837 /// A 32-bit integer value. 838 /// \returns A 64-bit integer vector containing the left-shifted value. If 839 /// \a __count is greater or equal to 64, the result is set to 0. 840 static __inline__ __m64 __DEFAULT_FN_ATTRS 841 _mm_slli_si64(__m64 __m, int __count) 842 { 843 return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count); 844 } 845 846 /// Right-shifts each 16-bit integer element of the first parameter, 847 /// which is a 64-bit integer vector of [4 x i16], by the number of bits 848 /// specified by the second parameter, which is a 64-bit integer. 849 /// 850 /// High-order bits are filled with the sign bit of the initial value of each 851 /// 16-bit element. The 16-bit results are packed into a 64-bit integer 852 /// vector of [4 x i16]. 853 /// 854 /// \headerfile <x86intrin.h> 855 /// 856 /// This intrinsic corresponds to the <c> PSRAW </c> instruction. 857 /// 858 /// \param __m 859 /// A 64-bit integer vector of [4 x i16]. 860 /// \param __count 861 /// A 64-bit integer vector interpreted as a single 64-bit integer. 862 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 863 /// values. 864 static __inline__ __m64 __DEFAULT_FN_ATTRS 865 _mm_sra_pi16(__m64 __m, __m64 __count) 866 { 867 return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count); 868 } 869 870 /// Right-shifts each 16-bit integer element of a 64-bit integer vector 871 /// of [4 x i16] by the number of bits specified by a 32-bit integer. 872 /// 873 /// High-order bits are filled with the sign bit of the initial value of each 874 /// 16-bit element. The 16-bit results are packed into a 64-bit integer 875 /// vector of [4 x i16]. 876 /// 877 /// \headerfile <x86intrin.h> 878 /// 879 /// This intrinsic corresponds to the <c> PSRAW </c> instruction. 880 /// 881 /// \param __m 882 /// A 64-bit integer vector of [4 x i16]. 883 /// \param __count 884 /// A 32-bit integer value. 885 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 886 /// values. 887 static __inline__ __m64 __DEFAULT_FN_ATTRS 888 _mm_srai_pi16(__m64 __m, int __count) 889 { 890 return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count); 891 } 892 893 /// Right-shifts each 32-bit integer element of the first parameter, 894 /// which is a 64-bit integer vector of [2 x i32], by the number of bits 895 /// specified by the second parameter, which is a 64-bit integer. 896 /// 897 /// High-order bits are filled with the sign bit of the initial value of each 898 /// 32-bit element. The 32-bit results are packed into a 64-bit integer 899 /// vector of [2 x i32]. 900 /// 901 /// \headerfile <x86intrin.h> 902 /// 903 /// This intrinsic corresponds to the <c> PSRAD </c> instruction. 904 /// 905 /// \param __m 906 /// A 64-bit integer vector of [2 x i32]. 907 /// \param __count 908 /// A 64-bit integer vector interpreted as a single 64-bit integer. 909 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 910 /// values. 911 static __inline__ __m64 __DEFAULT_FN_ATTRS 912 _mm_sra_pi32(__m64 __m, __m64 __count) 913 { 914 return (__m64)__builtin_ia32_psrad((__v2si)__m, __count); 915 } 916 917 /// Right-shifts each 32-bit integer element of a 64-bit integer vector 918 /// of [2 x i32] by the number of bits specified by a 32-bit integer. 919 /// 920 /// High-order bits are filled with the sign bit of the initial value of each 921 /// 32-bit element. The 32-bit results are packed into a 64-bit integer 922 /// vector of [2 x i32]. 923 /// 924 /// \headerfile <x86intrin.h> 925 /// 926 /// This intrinsic corresponds to the <c> PSRAD </c> instruction. 927 /// 928 /// \param __m 929 /// A 64-bit integer vector of [2 x i32]. 930 /// \param __count 931 /// A 32-bit integer value. 932 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 933 /// values. 934 static __inline__ __m64 __DEFAULT_FN_ATTRS 935 _mm_srai_pi32(__m64 __m, int __count) 936 { 937 return (__m64)__builtin_ia32_psradi((__v2si)__m, __count); 938 } 939 940 /// Right-shifts each 16-bit integer element of the first parameter, 941 /// which is a 64-bit integer vector of [4 x i16], by the number of bits 942 /// specified by the second parameter, which is a 64-bit integer. 943 /// 944 /// High-order bits are cleared. The 16-bit results are packed into a 64-bit 945 /// integer vector of [4 x i16]. 946 /// 947 /// \headerfile <x86intrin.h> 948 /// 949 /// This intrinsic corresponds to the <c> PSRLW </c> instruction. 950 /// 951 /// \param __m 952 /// A 64-bit integer vector of [4 x i16]. 953 /// \param __count 954 /// A 64-bit integer vector interpreted as a single 64-bit integer. 955 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 956 /// values. 957 static __inline__ __m64 __DEFAULT_FN_ATTRS 958 _mm_srl_pi16(__m64 __m, __m64 __count) 959 { 960 return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count); 961 } 962 963 /// Right-shifts each 16-bit integer element of a 64-bit integer vector 964 /// of [4 x i16] by the number of bits specified by a 32-bit integer. 965 /// 966 /// High-order bits are cleared. The 16-bit results are packed into a 64-bit 967 /// integer vector of [4 x i16]. 968 /// 969 /// \headerfile <x86intrin.h> 970 /// 971 /// This intrinsic corresponds to the <c> PSRLW </c> instruction. 972 /// 973 /// \param __m 974 /// A 64-bit integer vector of [4 x i16]. 975 /// \param __count 976 /// A 32-bit integer value. 977 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 978 /// values. 979 static __inline__ __m64 __DEFAULT_FN_ATTRS 980 _mm_srli_pi16(__m64 __m, int __count) 981 { 982 return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count); 983 } 984 985 /// Right-shifts each 32-bit integer element of the first parameter, 986 /// which is a 64-bit integer vector of [2 x i32], by the number of bits 987 /// specified by the second parameter, which is a 64-bit integer. 988 /// 989 /// High-order bits are cleared. The 32-bit results are packed into a 64-bit 990 /// integer vector of [2 x i32]. 991 /// 992 /// \headerfile <x86intrin.h> 993 /// 994 /// This intrinsic corresponds to the <c> PSRLD </c> instruction. 995 /// 996 /// \param __m 997 /// A 64-bit integer vector of [2 x i32]. 998 /// \param __count 999 /// A 64-bit integer vector interpreted as a single 64-bit integer. 1000 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 1001 /// values. 1002 static __inline__ __m64 __DEFAULT_FN_ATTRS 1003 _mm_srl_pi32(__m64 __m, __m64 __count) 1004 { 1005 return (__m64)__builtin_ia32_psrld((__v2si)__m, __count); 1006 } 1007 1008 /// Right-shifts each 32-bit integer element of a 64-bit integer vector 1009 /// of [2 x i32] by the number of bits specified by a 32-bit integer. 1010 /// 1011 /// High-order bits are cleared. The 32-bit results are packed into a 64-bit 1012 /// integer vector of [2 x i32]. 1013 /// 1014 /// \headerfile <x86intrin.h> 1015 /// 1016 /// This intrinsic corresponds to the <c> PSRLD </c> instruction. 1017 /// 1018 /// \param __m 1019 /// A 64-bit integer vector of [2 x i32]. 1020 /// \param __count 1021 /// A 32-bit integer value. 1022 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 1023 /// values. 1024 static __inline__ __m64 __DEFAULT_FN_ATTRS 1025 _mm_srli_pi32(__m64 __m, int __count) 1026 { 1027 return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count); 1028 } 1029 1030 /// Right-shifts the first 64-bit integer parameter by the number of bits 1031 /// specified by the second 64-bit integer parameter. 1032 /// 1033 /// High-order bits are cleared. 1034 /// 1035 /// \headerfile <x86intrin.h> 1036 /// 1037 /// This intrinsic corresponds to the <c> PSRLQ </c> instruction. 1038 /// 1039 /// \param __m 1040 /// A 64-bit integer vector interpreted as a single 64-bit integer. 1041 /// \param __count 1042 /// A 64-bit integer vector interpreted as a single 64-bit integer. 1043 /// \returns A 64-bit integer vector containing the right-shifted value. 1044 static __inline__ __m64 __DEFAULT_FN_ATTRS 1045 _mm_srl_si64(__m64 __m, __m64 __count) 1046 { 1047 return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count); 1048 } 1049 1050 /// Right-shifts the first parameter, which is a 64-bit integer, by the 1051 /// number of bits specified by the second parameter, which is a 32-bit 1052 /// integer. 1053 /// 1054 /// High-order bits are cleared. 1055 /// 1056 /// \headerfile <x86intrin.h> 1057 /// 1058 /// This intrinsic corresponds to the <c> PSRLQ </c> instruction. 1059 /// 1060 /// \param __m 1061 /// A 64-bit integer vector interpreted as a single 64-bit integer. 1062 /// \param __count 1063 /// A 32-bit integer value. 1064 /// \returns A 64-bit integer vector containing the right-shifted value. 1065 static __inline__ __m64 __DEFAULT_FN_ATTRS 1066 _mm_srli_si64(__m64 __m, int __count) 1067 { 1068 return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count); 1069 } 1070 1071 /// Performs a bitwise AND of two 64-bit integer vectors. 1072 /// 1073 /// \headerfile <x86intrin.h> 1074 /// 1075 /// This intrinsic corresponds to the <c> PAND </c> instruction. 1076 /// 1077 /// \param __m1 1078 /// A 64-bit integer vector. 1079 /// \param __m2 1080 /// A 64-bit integer vector. 1081 /// \returns A 64-bit integer vector containing the bitwise AND of both 1082 /// parameters. 1083 static __inline__ __m64 __DEFAULT_FN_ATTRS 1084 _mm_and_si64(__m64 __m1, __m64 __m2) 1085 { 1086 return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2); 1087 } 1088 1089 /// Performs a bitwise NOT of the first 64-bit integer vector, and then 1090 /// performs a bitwise AND of the intermediate result and the second 64-bit 1091 /// integer vector. 1092 /// 1093 /// \headerfile <x86intrin.h> 1094 /// 1095 /// This intrinsic corresponds to the <c> PANDN </c> instruction. 1096 /// 1097 /// \param __m1 1098 /// A 64-bit integer vector. The one's complement of this parameter is used 1099 /// in the bitwise AND. 1100 /// \param __m2 1101 /// A 64-bit integer vector. 1102 /// \returns A 64-bit integer vector containing the bitwise AND of the second 1103 /// parameter and the one's complement of the first parameter. 1104 static __inline__ __m64 __DEFAULT_FN_ATTRS 1105 _mm_andnot_si64(__m64 __m1, __m64 __m2) 1106 { 1107 return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2); 1108 } 1109 1110 /// Performs a bitwise OR of two 64-bit integer vectors. 1111 /// 1112 /// \headerfile <x86intrin.h> 1113 /// 1114 /// This intrinsic corresponds to the <c> POR </c> instruction. 1115 /// 1116 /// \param __m1 1117 /// A 64-bit integer vector. 1118 /// \param __m2 1119 /// A 64-bit integer vector. 1120 /// \returns A 64-bit integer vector containing the bitwise OR of both 1121 /// parameters. 1122 static __inline__ __m64 __DEFAULT_FN_ATTRS 1123 _mm_or_si64(__m64 __m1, __m64 __m2) 1124 { 1125 return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2); 1126 } 1127 1128 /// Performs a bitwise exclusive OR of two 64-bit integer vectors. 1129 /// 1130 /// \headerfile <x86intrin.h> 1131 /// 1132 /// This intrinsic corresponds to the <c> PXOR </c> instruction. 1133 /// 1134 /// \param __m1 1135 /// A 64-bit integer vector. 1136 /// \param __m2 1137 /// A 64-bit integer vector. 1138 /// \returns A 64-bit integer vector containing the bitwise exclusive OR of both 1139 /// parameters. 1140 static __inline__ __m64 __DEFAULT_FN_ATTRS 1141 _mm_xor_si64(__m64 __m1, __m64 __m2) 1142 { 1143 return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2); 1144 } 1145 1146 /// Compares the 8-bit integer elements of two 64-bit integer vectors of 1147 /// [8 x i8] to determine if the element of the first vector is equal to the 1148 /// corresponding element of the second vector. 1149 /// 1150 /// The comparison yields 0 for false, 0xFF for true. 1151 /// 1152 /// \headerfile <x86intrin.h> 1153 /// 1154 /// This intrinsic corresponds to the <c> PCMPEQB </c> instruction. 1155 /// 1156 /// \param __m1 1157 /// A 64-bit integer vector of [8 x i8]. 1158 /// \param __m2 1159 /// A 64-bit integer vector of [8 x i8]. 1160 /// \returns A 64-bit integer vector of [8 x i8] containing the comparison 1161 /// results. 1162 static __inline__ __m64 __DEFAULT_FN_ATTRS 1163 _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) 1164 { 1165 return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2); 1166 } 1167 1168 /// Compares the 16-bit integer elements of two 64-bit integer vectors of 1169 /// [4 x i16] to determine if the element of the first vector is equal to the 1170 /// corresponding element of the second vector. 1171 /// 1172 /// The comparison yields 0 for false, 0xFFFF for true. 1173 /// 1174 /// \headerfile <x86intrin.h> 1175 /// 1176 /// This intrinsic corresponds to the <c> PCMPEQW </c> instruction. 1177 /// 1178 /// \param __m1 1179 /// A 64-bit integer vector of [4 x i16]. 1180 /// \param __m2 1181 /// A 64-bit integer vector of [4 x i16]. 1182 /// \returns A 64-bit integer vector of [4 x i16] containing the comparison 1183 /// results. 1184 static __inline__ __m64 __DEFAULT_FN_ATTRS 1185 _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) 1186 { 1187 return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2); 1188 } 1189 1190 /// Compares the 32-bit integer elements of two 64-bit integer vectors of 1191 /// [2 x i32] to determine if the element of the first vector is equal to the 1192 /// corresponding element of the second vector. 1193 /// 1194 /// The comparison yields 0 for false, 0xFFFFFFFF for true. 1195 /// 1196 /// \headerfile <x86intrin.h> 1197 /// 1198 /// This intrinsic corresponds to the <c> PCMPEQD </c> instruction. 1199 /// 1200 /// \param __m1 1201 /// A 64-bit integer vector of [2 x i32]. 1202 /// \param __m2 1203 /// A 64-bit integer vector of [2 x i32]. 1204 /// \returns A 64-bit integer vector of [2 x i32] containing the comparison 1205 /// results. 1206 static __inline__ __m64 __DEFAULT_FN_ATTRS 1207 _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) 1208 { 1209 return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2); 1210 } 1211 1212 /// Compares the 8-bit integer elements of two 64-bit integer vectors of 1213 /// [8 x i8] to determine if the element of the first vector is greater than 1214 /// the corresponding element of the second vector. 1215 /// 1216 /// The comparison yields 0 for false, 0xFF for true. 1217 /// 1218 /// \headerfile <x86intrin.h> 1219 /// 1220 /// This intrinsic corresponds to the <c> PCMPGTB </c> instruction. 1221 /// 1222 /// \param __m1 1223 /// A 64-bit integer vector of [8 x i8]. 1224 /// \param __m2 1225 /// A 64-bit integer vector of [8 x i8]. 1226 /// \returns A 64-bit integer vector of [8 x i8] containing the comparison 1227 /// results. 1228 static __inline__ __m64 __DEFAULT_FN_ATTRS 1229 _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) 1230 { 1231 return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2); 1232 } 1233 1234 /// Compares the 16-bit integer elements of two 64-bit integer vectors of 1235 /// [4 x i16] to determine if the element of the first vector is greater than 1236 /// the corresponding element of the second vector. 1237 /// 1238 /// The comparison yields 0 for false, 0xFFFF for true. 1239 /// 1240 /// \headerfile <x86intrin.h> 1241 /// 1242 /// This intrinsic corresponds to the <c> PCMPGTW </c> instruction. 1243 /// 1244 /// \param __m1 1245 /// A 64-bit integer vector of [4 x i16]. 1246 /// \param __m2 1247 /// A 64-bit integer vector of [4 x i16]. 1248 /// \returns A 64-bit integer vector of [4 x i16] containing the comparison 1249 /// results. 1250 static __inline__ __m64 __DEFAULT_FN_ATTRS 1251 _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) 1252 { 1253 return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2); 1254 } 1255 1256 /// Compares the 32-bit integer elements of two 64-bit integer vectors of 1257 /// [2 x i32] to determine if the element of the first vector is greater than 1258 /// the corresponding element of the second vector. 1259 /// 1260 /// The comparison yields 0 for false, 0xFFFFFFFF for true. 1261 /// 1262 /// \headerfile <x86intrin.h> 1263 /// 1264 /// This intrinsic corresponds to the <c> PCMPGTD </c> instruction. 1265 /// 1266 /// \param __m1 1267 /// A 64-bit integer vector of [2 x i32]. 1268 /// \param __m2 1269 /// A 64-bit integer vector of [2 x i32]. 1270 /// \returns A 64-bit integer vector of [2 x i32] containing the comparison 1271 /// results. 1272 static __inline__ __m64 __DEFAULT_FN_ATTRS 1273 _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) 1274 { 1275 return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2); 1276 } 1277 1278 /// Constructs a 64-bit integer vector initialized to zero. 1279 /// 1280 /// \headerfile <x86intrin.h> 1281 /// 1282 /// This intrinsic corresponds to the <c> PXOR </c> instruction. 1283 /// 1284 /// \returns An initialized 64-bit integer vector with all elements set to zero. 1285 static __inline__ __m64 __DEFAULT_FN_ATTRS 1286 _mm_setzero_si64(void) 1287 { 1288 return __extension__ (__m64){ 0LL }; 1289 } 1290 1291 /// Constructs a 64-bit integer vector initialized with the specified 1292 /// 32-bit integer values. 1293 /// 1294 /// \headerfile <x86intrin.h> 1295 /// 1296 /// This intrinsic is a utility function and does not correspond to a specific 1297 /// instruction. 1298 /// 1299 /// \param __i1 1300 /// A 32-bit integer value used to initialize the upper 32 bits of the 1301 /// result. 1302 /// \param __i0 1303 /// A 32-bit integer value used to initialize the lower 32 bits of the 1304 /// result. 1305 /// \returns An initialized 64-bit integer vector. 1306 static __inline__ __m64 __DEFAULT_FN_ATTRS 1307 _mm_set_pi32(int __i1, int __i0) 1308 { 1309 return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1); 1310 } 1311 1312 /// Constructs a 64-bit integer vector initialized with the specified 1313 /// 16-bit integer values. 1314 /// 1315 /// \headerfile <x86intrin.h> 1316 /// 1317 /// This intrinsic is a utility function and does not correspond to a specific 1318 /// instruction. 1319 /// 1320 /// \param __s3 1321 /// A 16-bit integer value used to initialize bits [63:48] of the result. 1322 /// \param __s2 1323 /// A 16-bit integer value used to initialize bits [47:32] of the result. 1324 /// \param __s1 1325 /// A 16-bit integer value used to initialize bits [31:16] of the result. 1326 /// \param __s0 1327 /// A 16-bit integer value used to initialize bits [15:0] of the result. 1328 /// \returns An initialized 64-bit integer vector. 1329 static __inline__ __m64 __DEFAULT_FN_ATTRS 1330 _mm_set_pi16(short __s3, short __s2, short __s1, short __s0) 1331 { 1332 return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3); 1333 } 1334 1335 /// Constructs a 64-bit integer vector initialized with the specified 1336 /// 8-bit integer values. 1337 /// 1338 /// \headerfile <x86intrin.h> 1339 /// 1340 /// This intrinsic is a utility function and does not correspond to a specific 1341 /// instruction. 1342 /// 1343 /// \param __b7 1344 /// An 8-bit integer value used to initialize bits [63:56] of the result. 1345 /// \param __b6 1346 /// An 8-bit integer value used to initialize bits [55:48] of the result. 1347 /// \param __b5 1348 /// An 8-bit integer value used to initialize bits [47:40] of the result. 1349 /// \param __b4 1350 /// An 8-bit integer value used to initialize bits [39:32] of the result. 1351 /// \param __b3 1352 /// An 8-bit integer value used to initialize bits [31:24] of the result. 1353 /// \param __b2 1354 /// An 8-bit integer value used to initialize bits [23:16] of the result. 1355 /// \param __b1 1356 /// An 8-bit integer value used to initialize bits [15:8] of the result. 1357 /// \param __b0 1358 /// An 8-bit integer value used to initialize bits [7:0] of the result. 1359 /// \returns An initialized 64-bit integer vector. 1360 static __inline__ __m64 __DEFAULT_FN_ATTRS 1361 _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, 1362 char __b1, char __b0) 1363 { 1364 return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3, 1365 __b4, __b5, __b6, __b7); 1366 } 1367 1368 /// Constructs a 64-bit integer vector of [2 x i32], with each of the 1369 /// 32-bit integer vector elements set to the specified 32-bit integer 1370 /// value. 1371 /// 1372 /// \headerfile <x86intrin.h> 1373 /// 1374 /// This intrinsic is a utility function and does not correspond to a specific 1375 /// instruction. 1376 /// 1377 /// \param __i 1378 /// A 32-bit integer value used to initialize each vector element of the 1379 /// result. 1380 /// \returns An initialized 64-bit integer vector of [2 x i32]. 1381 static __inline__ __m64 __DEFAULT_FN_ATTRS 1382 _mm_set1_pi32(int __i) 1383 { 1384 return _mm_set_pi32(__i, __i); 1385 } 1386 1387 /// Constructs a 64-bit integer vector of [4 x i16], with each of the 1388 /// 16-bit integer vector elements set to the specified 16-bit integer 1389 /// value. 1390 /// 1391 /// \headerfile <x86intrin.h> 1392 /// 1393 /// This intrinsic is a utility function and does not correspond to a specific 1394 /// instruction. 1395 /// 1396 /// \param __w 1397 /// A 16-bit integer value used to initialize each vector element of the 1398 /// result. 1399 /// \returns An initialized 64-bit integer vector of [4 x i16]. 1400 static __inline__ __m64 __DEFAULT_FN_ATTRS 1401 _mm_set1_pi16(short __w) 1402 { 1403 return _mm_set_pi16(__w, __w, __w, __w); 1404 } 1405 1406 /// Constructs a 64-bit integer vector of [8 x i8], with each of the 1407 /// 8-bit integer vector elements set to the specified 8-bit integer value. 1408 /// 1409 /// \headerfile <x86intrin.h> 1410 /// 1411 /// This intrinsic is a utility function and does not correspond to a specific 1412 /// instruction. 1413 /// 1414 /// \param __b 1415 /// An 8-bit integer value used to initialize each vector element of the 1416 /// result. 1417 /// \returns An initialized 64-bit integer vector of [8 x i8]. 1418 static __inline__ __m64 __DEFAULT_FN_ATTRS 1419 _mm_set1_pi8(char __b) 1420 { 1421 return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b); 1422 } 1423 1424 /// Constructs a 64-bit integer vector, initialized in reverse order with 1425 /// the specified 32-bit integer values. 1426 /// 1427 /// \headerfile <x86intrin.h> 1428 /// 1429 /// This intrinsic is a utility function and does not correspond to a specific 1430 /// instruction. 1431 /// 1432 /// \param __i0 1433 /// A 32-bit integer value used to initialize the lower 32 bits of the 1434 /// result. 1435 /// \param __i1 1436 /// A 32-bit integer value used to initialize the upper 32 bits of the 1437 /// result. 1438 /// \returns An initialized 64-bit integer vector. 1439 static __inline__ __m64 __DEFAULT_FN_ATTRS 1440 _mm_setr_pi32(int __i0, int __i1) 1441 { 1442 return _mm_set_pi32(__i1, __i0); 1443 } 1444 1445 /// Constructs a 64-bit integer vector, initialized in reverse order with 1446 /// the specified 16-bit integer values. 1447 /// 1448 /// \headerfile <x86intrin.h> 1449 /// 1450 /// This intrinsic is a utility function and does not correspond to a specific 1451 /// instruction. 1452 /// 1453 /// \param __w0 1454 /// A 16-bit integer value used to initialize bits [15:0] of the result. 1455 /// \param __w1 1456 /// A 16-bit integer value used to initialize bits [31:16] of the result. 1457 /// \param __w2 1458 /// A 16-bit integer value used to initialize bits [47:32] of the result. 1459 /// \param __w3 1460 /// A 16-bit integer value used to initialize bits [63:48] of the result. 1461 /// \returns An initialized 64-bit integer vector. 1462 static __inline__ __m64 __DEFAULT_FN_ATTRS 1463 _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) 1464 { 1465 return _mm_set_pi16(__w3, __w2, __w1, __w0); 1466 } 1467 1468 /// Constructs a 64-bit integer vector, initialized in reverse order with 1469 /// the specified 8-bit integer values. 1470 /// 1471 /// \headerfile <x86intrin.h> 1472 /// 1473 /// This intrinsic is a utility function and does not correspond to a specific 1474 /// instruction. 1475 /// 1476 /// \param __b0 1477 /// An 8-bit integer value used to initialize bits [7:0] of the result. 1478 /// \param __b1 1479 /// An 8-bit integer value used to initialize bits [15:8] of the result. 1480 /// \param __b2 1481 /// An 8-bit integer value used to initialize bits [23:16] of the result. 1482 /// \param __b3 1483 /// An 8-bit integer value used to initialize bits [31:24] of the result. 1484 /// \param __b4 1485 /// An 8-bit integer value used to initialize bits [39:32] of the result. 1486 /// \param __b5 1487 /// An 8-bit integer value used to initialize bits [47:40] of the result. 1488 /// \param __b6 1489 /// An 8-bit integer value used to initialize bits [55:48] of the result. 1490 /// \param __b7 1491 /// An 8-bit integer value used to initialize bits [63:56] of the result. 1492 /// \returns An initialized 64-bit integer vector. 1493 static __inline__ __m64 __DEFAULT_FN_ATTRS 1494 _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, 1495 char __b6, char __b7) 1496 { 1497 return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); 1498 } 1499 1500 #undef __DEFAULT_FN_ATTRS 1501 1502 /* Aliases for compatibility. */ 1503 #define _m_empty _mm_empty 1504 #define _m_from_int _mm_cvtsi32_si64 1505 #define _m_from_int64 _mm_cvtsi64_m64 1506 #define _m_to_int _mm_cvtsi64_si32 1507 #define _m_to_int64 _mm_cvtm64_si64 1508 #define _m_packsswb _mm_packs_pi16 1509 #define _m_packssdw _mm_packs_pi32 1510 #define _m_packuswb _mm_packs_pu16 1511 #define _m_punpckhbw _mm_unpackhi_pi8 1512 #define _m_punpckhwd _mm_unpackhi_pi16 1513 #define _m_punpckhdq _mm_unpackhi_pi32 1514 #define _m_punpcklbw _mm_unpacklo_pi8 1515 #define _m_punpcklwd _mm_unpacklo_pi16 1516 #define _m_punpckldq _mm_unpacklo_pi32 1517 #define _m_paddb _mm_add_pi8 1518 #define _m_paddw _mm_add_pi16 1519 #define _m_paddd _mm_add_pi32 1520 #define _m_paddsb _mm_adds_pi8 1521 #define _m_paddsw _mm_adds_pi16 1522 #define _m_paddusb _mm_adds_pu8 1523 #define _m_paddusw _mm_adds_pu16 1524 #define _m_psubb _mm_sub_pi8 1525 #define _m_psubw _mm_sub_pi16 1526 #define _m_psubd _mm_sub_pi32 1527 #define _m_psubsb _mm_subs_pi8 1528 #define _m_psubsw _mm_subs_pi16 1529 #define _m_psubusb _mm_subs_pu8 1530 #define _m_psubusw _mm_subs_pu16 1531 #define _m_pmaddwd _mm_madd_pi16 1532 #define _m_pmulhw _mm_mulhi_pi16 1533 #define _m_pmullw _mm_mullo_pi16 1534 #define _m_psllw _mm_sll_pi16 1535 #define _m_psllwi _mm_slli_pi16 1536 #define _m_pslld _mm_sll_pi32 1537 #define _m_pslldi _mm_slli_pi32 1538 #define _m_psllq _mm_sll_si64 1539 #define _m_psllqi _mm_slli_si64 1540 #define _m_psraw _mm_sra_pi16 1541 #define _m_psrawi _mm_srai_pi16 1542 #define _m_psrad _mm_sra_pi32 1543 #define _m_psradi _mm_srai_pi32 1544 #define _m_psrlw _mm_srl_pi16 1545 #define _m_psrlwi _mm_srli_pi16 1546 #define _m_psrld _mm_srl_pi32 1547 #define _m_psrldi _mm_srli_pi32 1548 #define _m_psrlq _mm_srl_si64 1549 #define _m_psrlqi _mm_srli_si64 1550 #define _m_pand _mm_and_si64 1551 #define _m_pandn _mm_andnot_si64 1552 #define _m_por _mm_or_si64 1553 #define _m_pxor _mm_xor_si64 1554 #define _m_pcmpeqb _mm_cmpeq_pi8 1555 #define _m_pcmpeqw _mm_cmpeq_pi16 1556 #define _m_pcmpeqd _mm_cmpeq_pi32 1557 #define _m_pcmpgtb _mm_cmpgt_pi8 1558 #define _m_pcmpgtw _mm_cmpgt_pi16 1559 #define _m_pcmpgtd _mm_cmpgt_pi32 1560 1561 #endif /* __MMINTRIN_H */ 1562 1563