1 /* ===-------- ia32intrin.h ---------------------------------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10 #ifndef __X86INTRIN_H 11 #error "Never use <ia32intrin.h> directly; include <x86intrin.h> instead." 12 #endif 13 14 #ifndef __IA32INTRIN_H 15 #define __IA32INTRIN_H 16 17 /** Find the first set bit starting from the lsb. Result is undefined if 18 * input is 0. 19 * 20 * \headerfile <x86intrin.h> 21 * 22 * This intrinsic corresponds to the <c> BSF </c> instruction or the 23 * <c> TZCNT </c> instruction. 24 * 25 * \param __A 26 * A 32-bit integer operand. 27 * \returns A 32-bit integer containing the bit number. 28 */ 29 static __inline__ int __attribute__((__always_inline__, __nodebug__)) 30 __bsfd(int __A) { 31 return __builtin_ctz(__A); 32 } 33 34 /** Find the first set bit starting from the msb. Result is undefined if 35 * input is 0. 36 * 37 * \headerfile <x86intrin.h> 38 * 39 * This intrinsic corresponds to the <c> BSR </c> instruction or the 40 * <c> LZCNT </c> instruction and an <c> XOR </c>. 41 * 42 * \param __A 43 * A 32-bit integer operand. 44 * \returns A 32-bit integer containing the bit number. 45 */ 46 static __inline__ int __attribute__((__always_inline__, __nodebug__)) 47 __bsrd(int __A) { 48 return 31 - __builtin_clz(__A); 49 } 50 51 /** Swaps the bytes in the input. Converting little endian to big endian or 52 * vice versa. 53 * 54 * \headerfile <x86intrin.h> 55 * 56 * This intrinsic corresponds to the <c> BSWAP </c> instruction. 57 * 58 * \param __A 59 * A 32-bit integer operand. 60 * \returns A 32-bit integer containing the swapped bytes. 61 */ 62 static __inline__ int __attribute__((__always_inline__, __nodebug__)) 63 __bswapd(int __A) { 64 return __builtin_bswap32(__A); 65 } 66 67 static __inline__ int __attribute__((__always_inline__, __nodebug__)) 68 _bswap(int __A) { 69 return __builtin_bswap32(__A); 70 } 71 72 #define _bit_scan_forward(A) __bsfd((A)) 73 #define _bit_scan_reverse(A) __bsrd((A)) 74 75 #ifdef __x86_64__ 76 /** Find the first set bit starting from the lsb. Result is undefined if 77 * input is 0. 78 * 79 * \headerfile <x86intrin.h> 80 * 81 * This intrinsic corresponds to the <c> BSF </c> instruction or the 82 * <c> TZCNT </c> instruction. 83 * 84 * \param __A 85 * A 64-bit integer operand. 86 * \returns A 32-bit integer containing the bit number. 87 */ 88 static __inline__ int __attribute__((__always_inline__, __nodebug__)) 89 __bsfq(long long __A) { 90 return __builtin_ctzll(__A); 91 } 92 93 /** Find the first set bit starting from the msb. Result is undefined if 94 * input is 0. 95 * 96 * \headerfile <x86intrin.h> 97 * 98 * This intrinsic corresponds to the <c> BSR </c> instruction or the 99 * <c> LZCNT </c> instruction and an <c> XOR </c>. 100 * 101 * \param __A 102 * A 64-bit integer operand. 103 * \returns A 32-bit integer containing the bit number. 104 */ 105 static __inline__ int __attribute__((__always_inline__, __nodebug__)) 106 __bsrq(long long __A) { 107 return 63 - __builtin_clzll(__A); 108 } 109 110 /** Swaps the bytes in the input. Converting little endian to big endian or 111 * vice versa. 112 * 113 * \headerfile <x86intrin.h> 114 * 115 * This intrinsic corresponds to the <c> BSWAP </c> instruction. 116 * 117 * \param __A 118 * A 64-bit integer operand. 119 * \returns A 64-bit integer containing the swapped bytes. 120 */ 121 static __inline__ long long __attribute__((__always_inline__, __nodebug__)) 122 __bswapq(long long __A) { 123 return __builtin_bswap64(__A); 124 } 125 126 #define _bswap64(A) __bswapq((A)) 127 #endif 128 129 /** Counts the number of bits in the source operand having a value of 1. 130 * 131 * \headerfile <x86intrin.h> 132 * 133 * This intrinsic corresponds to the <c> POPCNT </c> instruction or a 134 * a sequence of arithmetic and logic ops to calculate it. 135 * 136 * \param __A 137 * An unsigned 32-bit integer operand. 138 * \returns A 32-bit integer containing the number of bits with value 1 in the 139 * source operand. 140 */ 141 static __inline__ int __attribute__((__always_inline__, __nodebug__)) 142 __popcntd(unsigned int __A) 143 { 144 return __builtin_popcount(__A); 145 } 146 147 #define _popcnt32(A) __popcntd((A)) 148 149 #ifdef __x86_64__ 150 /** Counts the number of bits in the source operand having a value of 1. 151 * 152 * \headerfile <x86intrin.h> 153 * 154 * This intrinsic corresponds to the <c> POPCNT </c> instruction or a 155 * a sequence of arithmetic and logic ops to calculate it. 156 * 157 * \param __A 158 * An unsigned 64-bit integer operand. 159 * \returns A 64-bit integer containing the number of bits with value 1 in the 160 * source operand. 161 */ 162 static __inline__ long long __attribute__((__always_inline__, __nodebug__)) 163 __popcntq(unsigned long long __A) 164 { 165 return __builtin_popcountll(__A); 166 } 167 168 #define _popcnt64(A) __popcntq((A)) 169 #endif /* __x86_64__ */ 170 171 #ifdef __x86_64__ 172 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) 173 __readeflags(void) 174 { 175 return __builtin_ia32_readeflags_u64(); 176 } 177 178 static __inline__ void __attribute__((__always_inline__, __nodebug__)) 179 __writeeflags(unsigned long long __f) 180 { 181 __builtin_ia32_writeeflags_u64(__f); 182 } 183 184 #else /* !__x86_64__ */ 185 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) 186 __readeflags(void) 187 { 188 return __builtin_ia32_readeflags_u32(); 189 } 190 191 static __inline__ void __attribute__((__always_inline__, __nodebug__)) 192 __writeeflags(unsigned int __f) 193 { 194 __builtin_ia32_writeeflags_u32(__f); 195 } 196 #endif /* !__x86_64__ */ 197 198 /** Cast a 32-bit float value to a 32-bit unsigned integer value 199 * 200 * \headerfile <x86intrin.h> 201 * This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction in x86_64, 202 * and corresponds to the <c> VMOVL / MOVL </c> instruction in ia32. 203 * 204 * \param __A 205 * A 32-bit float value. 206 * \returns a 32-bit unsigned integer containing the converted value. 207 */ 208 static __inline__ unsigned int __attribute__((__always_inline__)) 209 _castf32_u32(float __A) { 210 unsigned int D; 211 __builtin_memcpy(&D, &__A, sizeof(__A)); 212 return D; 213 } 214 215 /** Cast a 64-bit float value to a 64-bit unsigned integer value 216 * 217 * \headerfile <x86intrin.h> 218 * This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64, 219 * and corresponds to the <c> VMOVL / MOVL </c> instruction in ia32. 220 * 221 * \param __A 222 * A 64-bit float value. 223 * \returns a 64-bit unsigned integer containing the converted value. 224 */ 225 static __inline__ unsigned long long __attribute__((__always_inline__)) 226 _castf64_u64(double __A) { 227 unsigned long long D; 228 __builtin_memcpy(&D, &__A, sizeof(__A)); 229 return D; 230 } 231 232 /** Cast a 32-bit unsigned integer value to a 32-bit float value 233 * 234 * \headerfile <x86intrin.h> 235 * This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64, 236 * and corresponds to the <c> FLDS </c> instruction in ia32. 237 * 238 * \param __A 239 * A 32-bit unsigned integer value. 240 * \returns a 32-bit float value containing the converted value. 241 */ 242 static __inline__ float __attribute__((__always_inline__)) 243 _castu32_f32(unsigned int __A) { 244 float D; 245 __builtin_memcpy(&D, &__A, sizeof(__A)); 246 return D; 247 } 248 249 /** Cast a 64-bit unsigned integer value to a 64-bit float value 250 * 251 * \headerfile <x86intrin.h> 252 * This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64, 253 * and corresponds to the <c> FLDL </c> instruction in ia32. 254 * 255 * \param __A 256 * A 64-bit unsigned integer value. 257 * \returns a 64-bit float value containing the converted value. 258 */ 259 static __inline__ double __attribute__((__always_inline__)) 260 _castu64_f64(unsigned long long __A) { 261 double D; 262 __builtin_memcpy(&D, &__A, sizeof(__A)); 263 return D; 264 } 265 266 /** Adds the unsigned integer operand to the CRC-32C checksum of the 267 * unsigned char operand. 268 * 269 * \headerfile <x86intrin.h> 270 * 271 * This intrinsic corresponds to the <c> CRC32B </c> instruction. 272 * 273 * \param __C 274 * An unsigned integer operand to add to the CRC-32C checksum of operand 275 * \a __D. 276 * \param __D 277 * An unsigned 8-bit integer operand used to compute the CRC-32C checksum. 278 * \returns The result of adding operand \a __C to the CRC-32C checksum of 279 * operand \a __D. 280 */ 281 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2"))) 282 __crc32b(unsigned int __C, unsigned char __D) 283 { 284 return __builtin_ia32_crc32qi(__C, __D); 285 } 286 287 /** Adds the unsigned integer operand to the CRC-32C checksum of the 288 * unsigned short operand. 289 * 290 * \headerfile <x86intrin.h> 291 * 292 * This intrinsic corresponds to the <c> CRC32W </c> instruction. 293 * 294 * \param __C 295 * An unsigned integer operand to add to the CRC-32C checksum of operand 296 * \a __D. 297 * \param __D 298 * An unsigned 16-bit integer operand used to compute the CRC-32C checksum. 299 * \returns The result of adding operand \a __C to the CRC-32C checksum of 300 * operand \a __D. 301 */ 302 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2"))) 303 __crc32w(unsigned int __C, unsigned short __D) 304 { 305 return __builtin_ia32_crc32hi(__C, __D); 306 } 307 308 /** Adds the unsigned integer operand to the CRC-32C checksum of the 309 * second unsigned integer operand. 310 * 311 * \headerfile <x86intrin.h> 312 * 313 * This intrinsic corresponds to the <c> CRC32D </c> instruction. 314 * 315 * \param __C 316 * An unsigned integer operand to add to the CRC-32C checksum of operand 317 * \a __D. 318 * \param __D 319 * An unsigned 32-bit integer operand used to compute the CRC-32C checksum. 320 * \returns The result of adding operand \a __C to the CRC-32C checksum of 321 * operand \a __D. 322 */ 323 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2"))) 324 __crc32d(unsigned int __C, unsigned int __D) 325 { 326 return __builtin_ia32_crc32si(__C, __D); 327 } 328 329 #ifdef __x86_64__ 330 /** Adds the unsigned integer operand to the CRC-32C checksum of the 331 * unsigned 64-bit integer operand. 332 * 333 * \headerfile <x86intrin.h> 334 * 335 * This intrinsic corresponds to the <c> CRC32Q </c> instruction. 336 * 337 * \param __C 338 * An unsigned integer operand to add to the CRC-32C checksum of operand 339 * \a __D. 340 * \param __D 341 * An unsigned 64-bit integer operand used to compute the CRC-32C checksum. 342 * \returns The result of adding operand \a __C to the CRC-32C checksum of 343 * operand \a __D. 344 */ 345 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("sse4.2"))) 346 __crc32q(unsigned long long __C, unsigned long long __D) 347 { 348 return __builtin_ia32_crc32di(__C, __D); 349 } 350 #endif /* __x86_64__ */ 351 352 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) 353 __rdpmc(int __A) { 354 return __builtin_ia32_rdpmc(__A); 355 } 356 357 /* __rdtscp */ 358 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) 359 __rdtscp(unsigned int *__A) { 360 return __builtin_ia32_rdtscp(__A); 361 } 362 363 #define _rdtsc() __rdtsc() 364 365 #define _rdpmc(A) __rdpmc(A) 366 367 static __inline__ void __attribute__((__always_inline__, __nodebug__)) 368 _wbinvd(void) { 369 __builtin_ia32_wbinvd(); 370 } 371 372 static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 373 __rolb(unsigned char __X, int __C) { 374 return __builtin_rotateleft8(__X, __C); 375 } 376 377 static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) 378 __rorb(unsigned char __X, int __C) { 379 return __builtin_rotateright8(__X, __C); 380 } 381 382 static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) 383 __rolw(unsigned short __X, int __C) { 384 return __builtin_rotateleft16(__X, __C); 385 } 386 387 static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) 388 __rorw(unsigned short __X, int __C) { 389 return __builtin_rotateright16(__X, __C); 390 } 391 392 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) 393 __rold(unsigned int __X, int __C) { 394 return __builtin_rotateleft32(__X, __C); 395 } 396 397 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) 398 __rord(unsigned int __X, int __C) { 399 return __builtin_rotateright32(__X, __C); 400 } 401 402 #ifdef __x86_64__ 403 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) 404 __rolq(unsigned long long __X, int __C) { 405 return __builtin_rotateleft64(__X, __C); 406 } 407 408 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) 409 __rorq(unsigned long long __X, int __C) { 410 return __builtin_rotateright64(__X, __C); 411 } 412 #endif /* __x86_64__ */ 413 414 #ifndef _MSC_VER 415 /* These are already provided as builtins for MSVC. */ 416 /* Select the correct function based on the size of long. */ 417 #ifdef __LP64__ 418 #define _lrotl(a,b) __rolq((a), (b)) 419 #define _lrotr(a,b) __rorq((a), (b)) 420 #else 421 #define _lrotl(a,b) __rold((a), (b)) 422 #define _lrotr(a,b) __rord((a), (b)) 423 #endif 424 #define _rotl(a,b) __rold((a), (b)) 425 #define _rotr(a,b) __rord((a), (b)) 426 #endif // _MSC_VER 427 428 /* These are not builtins so need to be provided in all modes. */ 429 #define _rotwl(a,b) __rolw((a), (b)) 430 #define _rotwr(a,b) __rorw((a), (b)) 431 432 #endif /* __IA32INTRIN_H */ 433