1 /*===---- bmiintrin.h - BMI intrinsics -------------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10 #if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
11 #error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead."
12 #endif
13
14 #ifndef __BMIINTRIN_H
15 #define __BMIINTRIN_H
16
17 /* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT
18 instruction behaves as BSF on non-BMI targets, there is code that expects
19 to use it as a potentially faster version of BSF. */
20 #define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
21
22 /// Counts the number of trailing zero bits in the operand.
23 ///
24 /// \headerfile <x86intrin.h>
25 ///
26 /// This intrinsic corresponds to the \c TZCNT instruction.
27 ///
28 /// \param __X
29 /// An unsigned 16-bit integer whose trailing zeros are to be counted.
30 /// \returns An unsigned 16-bit integer containing the number of trailing zero
31 /// bits in the operand.
32 /// \see _tzcnt_u16
33 static __inline__ unsigned short __RELAXED_FN_ATTRS
__tzcnt_u16(unsigned short __X)34 __tzcnt_u16(unsigned short __X)
35 {
36 return __builtin_ia32_tzcnt_u16(__X);
37 }
38
39 /// Counts the number of trailing zero bits in the operand.
40 ///
41 /// \headerfile <x86intrin.h>
42 ///
43 /// \code
44 /// unsigned short _tzcnt_u16(unsigned short __X);
45 /// \endcode
46 ///
47 /// This intrinsic corresponds to the \c TZCNT instruction.
48 ///
49 /// \param __X
50 /// An unsigned 16-bit integer whose trailing zeros are to be counted.
51 /// \returns An unsigned 16-bit integer containing the number of trailing zero
52 /// bits in the operand.
53 /// \see __tzcnt_u16
54 #define _tzcnt_u16 __tzcnt_u16
55
56 /// Counts the number of trailing zero bits in the operand.
57 ///
58 /// \headerfile <x86intrin.h>
59 ///
60 /// This intrinsic corresponds to the \c TZCNT instruction.
61 ///
62 /// \param __X
63 /// An unsigned 32-bit integer whose trailing zeros are to be counted.
64 /// \returns An unsigned 32-bit integer containing the number of trailing zero
65 /// bits in the operand.
66 /// \see { _mm_tzcnt_32 _tzcnt_u32 }
67 static __inline__ unsigned int __RELAXED_FN_ATTRS
__tzcnt_u32(unsigned int __X)68 __tzcnt_u32(unsigned int __X)
69 {
70 return __builtin_ia32_tzcnt_u32(__X);
71 }
72
73 /// Counts the number of trailing zero bits in the operand.
74 ///
75 /// \headerfile <x86intrin.h>
76 ///
77 /// This intrinsic corresponds to the \c TZCNT instruction.
78 ///
79 /// \param __X
80 /// An unsigned 32-bit integer whose trailing zeros are to be counted.
81 /// \returns A 32-bit integer containing the number of trailing zero bits in
82 /// the operand.
83 /// \see { __tzcnt_u32 _tzcnt_u32 }
84 static __inline__ int __RELAXED_FN_ATTRS
_mm_tzcnt_32(unsigned int __X)85 _mm_tzcnt_32(unsigned int __X)
86 {
87 return (int)__builtin_ia32_tzcnt_u32(__X);
88 }
89
90 /// Counts the number of trailing zero bits in the operand.
91 ///
92 /// \headerfile <x86intrin.h>
93 ///
94 /// \code
95 /// unsigned int _tzcnt_u32(unsigned int __X);
96 /// \endcode
97 ///
98 /// This intrinsic corresponds to the \c TZCNT instruction.
99 ///
100 /// \param __X
101 /// An unsigned 32-bit integer whose trailing zeros are to be counted.
102 /// \returns An unsigned 32-bit integer containing the number of trailing zero
103 /// bits in the operand.
104 /// \see { _mm_tzcnt_32 __tzcnt_u32 }
105 #define _tzcnt_u32 __tzcnt_u32
106
107 #ifdef __x86_64__
108
109 /// Counts the number of trailing zero bits in the operand.
110 ///
111 /// \headerfile <x86intrin.h>
112 ///
113 /// This intrinsic corresponds to the \c TZCNT instruction.
114 ///
115 /// \param __X
116 /// An unsigned 64-bit integer whose trailing zeros are to be counted.
117 /// \returns An unsigned 64-bit integer containing the number of trailing zero
118 /// bits in the operand.
119 /// \see { _mm_tzcnt_64 _tzcnt_u64 }
120 static __inline__ unsigned long long __RELAXED_FN_ATTRS
__tzcnt_u64(unsigned long long __X)121 __tzcnt_u64(unsigned long long __X)
122 {
123 return __builtin_ia32_tzcnt_u64(__X);
124 }
125
126 /// Counts the number of trailing zero bits in the operand.
127 ///
128 /// \headerfile <x86intrin.h>
129 ///
130 /// This intrinsic corresponds to the \c TZCNT instruction.
131 ///
132 /// \param __X
133 /// An unsigned 64-bit integer whose trailing zeros are to be counted.
134 /// \returns An 64-bit integer containing the number of trailing zero bits in
135 /// the operand.
136 /// \see { __tzcnt_u64 _tzcnt_u64 }
137 static __inline__ long long __RELAXED_FN_ATTRS
_mm_tzcnt_64(unsigned long long __X)138 _mm_tzcnt_64(unsigned long long __X)
139 {
140 return (long long)__builtin_ia32_tzcnt_u64(__X);
141 }
142
143 /// Counts the number of trailing zero bits in the operand.
144 ///
145 /// \headerfile <x86intrin.h>
146 ///
147 /// \code
148 /// unsigned long long _tzcnt_u64(unsigned long long __X);
149 /// \endcode
150 ///
151 /// This intrinsic corresponds to the \c TZCNT instruction.
152 ///
153 /// \param __X
154 /// An unsigned 64-bit integer whose trailing zeros are to be counted.
155 /// \returns An unsigned 64-bit integer containing the number of trailing zero
156 /// bits in the operand.
157 /// \see { _mm_tzcnt_64 __tzcnt_u64
158 #define _tzcnt_u64 __tzcnt_u64
159
160 #endif /* __x86_64__ */
161
162 #undef __RELAXED_FN_ATTRS
163
164 #if !defined(__SCE__) || __has_feature(modules) || defined(__BMI__)
165
166 /* Define the default attributes for the functions in this file. */
167 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi")))
168
169 /// Performs a bitwise AND of the second operand with the one's
170 /// complement of the first operand.
171 ///
172 /// \headerfile <x86intrin.h>
173 ///
174 /// This intrinsic corresponds to the \c ANDN instruction.
175 ///
176 /// \param __X
177 /// An unsigned integer containing one of the operands.
178 /// \param __Y
179 /// An unsigned integer containing one of the operands.
180 /// \returns An unsigned integer containing the bitwise AND of the second
181 /// operand with the one's complement of the first operand.
182 /// \see _andn_u32
183 static __inline__ unsigned int __DEFAULT_FN_ATTRS
__andn_u32(unsigned int __X,unsigned int __Y)184 __andn_u32(unsigned int __X, unsigned int __Y)
185 {
186 return ~__X & __Y;
187 }
188
189 /// Performs a bitwise AND of the second operand with the one's
190 /// complement of the first operand.
191 ///
192 /// \headerfile <x86intrin.h>
193 ///
194 /// \code
195 /// unsigned int _andn_u32(unsigned int __X, unsigned int __Y);
196 /// \endcode
197 ///
198 /// This intrinsic corresponds to the \c ANDN instruction.
199 ///
200 /// \param __X
201 /// An unsigned integer containing one of the operands.
202 /// \param __Y
203 /// An unsigned integer containing one of the operands.
204 /// \returns An unsigned integer containing the bitwise AND of the second
205 /// operand with the one's complement of the first operand.
206 /// \see __andn_u32
207 #define _andn_u32 __andn_u32
208
209 /* AMD-specified, double-leading-underscore version of BEXTR */
210 /// Extracts the specified bits from the first operand and returns them
211 /// in the least significant bits of the result.
212 ///
213 /// \headerfile <x86intrin.h>
214 ///
215 /// This intrinsic corresponds to the \c BEXTR instruction.
216 ///
217 /// \param __X
218 /// An unsigned integer whose bits are to be extracted.
219 /// \param __Y
220 /// An unsigned integer used to specify which bits are extracted. Bits [7:0]
221 /// specify the index of the least significant bit. Bits [15:8] specify the
222 /// number of bits to be extracted.
223 /// \returns An unsigned integer whose least significant bits contain the
224 /// extracted bits.
225 /// \see _bextr_u32
226 static __inline__ unsigned int __DEFAULT_FN_ATTRS
__bextr_u32(unsigned int __X,unsigned int __Y)227 __bextr_u32(unsigned int __X, unsigned int __Y)
228 {
229 return __builtin_ia32_bextr_u32(__X, __Y);
230 }
231
232 /* Intel-specified, single-leading-underscore version of BEXTR */
233 /// Extracts the specified bits from the first operand and returns them
234 /// in the least significant bits of the result.
235 ///
236 /// \headerfile <x86intrin.h>
237 ///
238 /// This intrinsic corresponds to the \c BEXTR instruction.
239 ///
240 /// \param __X
241 /// An unsigned integer whose bits are to be extracted.
242 /// \param __Y
243 /// An unsigned integer used to specify the index of the least significant
244 /// bit for the bits to be extracted. Bits [7:0] specify the index.
245 /// \param __Z
246 /// An unsigned integer used to specify the number of bits to be extracted.
247 /// Bits [7:0] specify the number of bits.
248 /// \returns An unsigned integer whose least significant bits contain the
249 /// extracted bits.
250 /// \see __bextr_u32
251 static __inline__ unsigned int __DEFAULT_FN_ATTRS
_bextr_u32(unsigned int __X,unsigned int __Y,unsigned int __Z)252 _bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z)
253 {
254 return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
255 }
256
257 /* Intel-specified, single-leading-underscore version of BEXTR2 */
258 /// Extracts the specified bits from the first operand and returns them
259 /// in the least significant bits of the result.
260 ///
261 /// \headerfile <x86intrin.h>
262 ///
263 /// This intrinsic corresponds to the \c BEXTR instruction.
264 ///
265 /// \param __X
266 /// An unsigned integer whose bits are to be extracted.
267 /// \param __Y
268 /// An unsigned integer used to specify which bits are extracted. Bits [7:0]
269 /// specify the index of the least significant bit. Bits [15:8] specify the
270 /// number of bits to be extracted.
271 /// \returns An unsigned integer whose least significant bits contain the
272 /// extracted bits.
273 /// \see __bextr_u32
274 static __inline__ unsigned int __DEFAULT_FN_ATTRS
_bextr2_u32(unsigned int __X,unsigned int __Y)275 _bextr2_u32(unsigned int __X, unsigned int __Y) {
276 return __builtin_ia32_bextr_u32(__X, __Y);
277 }
278
279 /// Clears all bits in the source except for the least significant bit
280 /// containing a value of 1 and returns the result.
281 ///
282 /// \headerfile <x86intrin.h>
283 ///
284 /// This intrinsic corresponds to the \c BLSI instruction.
285 ///
286 /// \param __X
287 /// An unsigned integer whose bits are to be cleared.
288 /// \returns An unsigned integer containing the result of clearing the bits from
289 /// the source operand.
290 /// \see _blsi_u32
291 static __inline__ unsigned int __DEFAULT_FN_ATTRS
__blsi_u32(unsigned int __X)292 __blsi_u32(unsigned int __X)
293 {
294 return __X & -__X;
295 }
296
297 /// Clears all bits in the source except for the least significant bit
298 /// containing a value of 1 and returns the result.
299 ///
300 /// \headerfile <x86intrin.h>
301 ///
302 /// \code
303 /// unsigned int _blsi_u32(unsigned int __X);
304 /// \endcode
305 ///
306 /// This intrinsic corresponds to the \c BLSI instruction.
307 ///
308 /// \param __X
309 /// An unsigned integer whose bits are to be cleared.
310 /// \returns An unsigned integer containing the result of clearing the bits from
311 /// the source operand.
312 /// \see __blsi_u32
313 #define _blsi_u32 __blsi_u32
314
315 /// Creates a mask whose bits are set to 1, using bit 0 up to and
316 /// including the least significant bit that is set to 1 in the source
317 /// operand and returns the result.
318 ///
319 /// \headerfile <x86intrin.h>
320 ///
321 /// This intrinsic corresponds to the \c BLSMSK instruction.
322 ///
323 /// \param __X
324 /// An unsigned integer used to create the mask.
325 /// \returns An unsigned integer containing the newly created mask.
326 /// \see _blsmsk_u32
327 static __inline__ unsigned int __DEFAULT_FN_ATTRS
__blsmsk_u32(unsigned int __X)328 __blsmsk_u32(unsigned int __X)
329 {
330 return __X ^ (__X - 1);
331 }
332
333 /// Creates a mask whose bits are set to 1, using bit 0 up to and
334 /// including the least significant bit that is set to 1 in the source
335 /// operand and returns the result.
336 ///
337 /// \headerfile <x86intrin.h>
338 ///
339 /// \code
340 /// unsigned int _blsmsk_u32(unsigned int __X);
341 /// \endcode
342 ///
343 /// This intrinsic corresponds to the \c BLSMSK instruction.
344 ///
345 /// \param __X
346 /// An unsigned integer used to create the mask.
347 /// \returns An unsigned integer containing the newly created mask.
348 /// \see __blsmsk_u32
349 #define _blsmsk_u32 __blsmsk_u32
350
351 /// Clears the least significant bit that is set to 1 in the source
352 /// operand and returns the result.
353 ///
354 /// \headerfile <x86intrin.h>
355 ///
356 /// This intrinsic corresponds to the \c BLSR instruction.
357 ///
358 /// \param __X
359 /// An unsigned integer containing the operand to be cleared.
360 /// \returns An unsigned integer containing the result of clearing the source
361 /// operand.
362 /// \see _blsr_u32
363 static __inline__ unsigned int __DEFAULT_FN_ATTRS
__blsr_u32(unsigned int __X)364 __blsr_u32(unsigned int __X)
365 {
366 return __X & (__X - 1);
367 }
368
369 /// Clears the least significant bit that is set to 1 in the source
370 /// operand and returns the result.
371 ///
372 /// \headerfile <x86intrin.h>
373 ///
374 /// \code
375 /// unsigned int _bls4_u32(unsigned int __X);
376 /// \endcode
377 ///
378 /// This intrinsic corresponds to the \c BLSR instruction.
379 ///
380 /// \param __X
381 /// An unsigned integer containing the operand to be cleared.
382 /// \returns An unsigned integer containing the result of clearing the source
383 /// operand.
384 /// \see __blsr_u32
385 #define _blsr_u32 __blsr_u32
386
387 #ifdef __x86_64__
388
389 /// Performs a bitwise AND of the second operand with the one's
390 /// complement of the first operand.
391 ///
392 /// \headerfile <x86intrin.h>
393 ///
394 /// This intrinsic corresponds to the \c ANDN instruction.
395 ///
396 /// \param __X
397 /// An unsigned 64-bit integer containing one of the operands.
398 /// \param __Y
399 /// An unsigned 64-bit integer containing one of the operands.
400 /// \returns An unsigned 64-bit integer containing the bitwise AND of the second
401 /// operand with the one's complement of the first operand.
402 /// \see _andn_u64
403 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
__andn_u64(unsigned long long __X,unsigned long long __Y)404 __andn_u64 (unsigned long long __X, unsigned long long __Y)
405 {
406 return ~__X & __Y;
407 }
408
409 /// Performs a bitwise AND of the second operand with the one's
410 /// complement of the first operand.
411 ///
412 /// \headerfile <x86intrin.h>
413 ///
414 /// \code
415 /// unsigned long long _andn_u64(unsigned long long __X,
416 /// unsigned long long __Y);
417 /// \endcode
418 ///
419 /// This intrinsic corresponds to the \c ANDN instruction.
420 ///
421 /// \param __X
422 /// An unsigned 64-bit integer containing one of the operands.
423 /// \param __Y
424 /// An unsigned 64-bit integer containing one of the operands.
425 /// \returns An unsigned 64-bit integer containing the bitwise AND of the second
426 /// operand with the one's complement of the first operand.
427 /// \see __andn_u64
428 #define _andn_u64 __andn_u64
429
430 /* AMD-specified, double-leading-underscore version of BEXTR */
431 /// Extracts the specified bits from the first operand and returns them
432 /// in the least significant bits of the result.
433 ///
434 /// \headerfile <x86intrin.h>
435 ///
436 /// This intrinsic corresponds to the \c BEXTR instruction.
437 ///
438 /// \param __X
439 /// An unsigned 64-bit integer whose bits are to be extracted.
440 /// \param __Y
441 /// An unsigned 64-bit integer used to specify which bits are extracted. Bits
442 /// [7:0] specify the index of the least significant bit. Bits [15:8] specify
443 /// the number of bits to be extracted.
444 /// \returns An unsigned 64-bit integer whose least significant bits contain the
445 /// extracted bits.
446 /// \see _bextr_u64
447 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
__bextr_u64(unsigned long long __X,unsigned long long __Y)448 __bextr_u64(unsigned long long __X, unsigned long long __Y)
449 {
450 return __builtin_ia32_bextr_u64(__X, __Y);
451 }
452
453 /* Intel-specified, single-leading-underscore version of BEXTR */
454 /// Extracts the specified bits from the first operand and returns them
455 /// in the least significant bits of the result.
456 ///
457 /// \headerfile <x86intrin.h>
458 ///
459 /// This intrinsic corresponds to the \c BEXTR instruction.
460 ///
461 /// \param __X
462 /// An unsigned 64-bit integer whose bits are to be extracted.
463 /// \param __Y
464 /// An unsigned integer used to specify the index of the least significant
465 /// bit for the bits to be extracted. Bits [7:0] specify the index.
466 /// \param __Z
467 /// An unsigned integer used to specify the number of bits to be extracted.
468 /// Bits [7:0] specify the number of bits.
469 /// \returns An unsigned 64-bit integer whose least significant bits contain the
470 /// extracted bits.
471 /// \see __bextr_u64
472 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
_bextr_u64(unsigned long long __X,unsigned int __Y,unsigned int __Z)473 _bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z)
474 {
475 return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
476 }
477
478 /* Intel-specified, single-leading-underscore version of BEXTR2 */
479 /// Extracts the specified bits from the first operand and returns them
480 /// in the least significant bits of the result.
481 ///
482 /// \headerfile <x86intrin.h>
483 ///
484 /// This intrinsic corresponds to the \c BEXTR instruction.
485 ///
486 /// \param __X
487 /// An unsigned 64-bit integer whose bits are to be extracted.
488 /// \param __Y
489 /// An unsigned 64-bit integer used to specify which bits are extracted. Bits
490 /// [7:0] specify the index of the least significant bit. Bits [15:8] specify
491 /// the number of bits to be extracted.
492 /// \returns An unsigned 64-bit integer whose least significant bits contain the
493 /// extracted bits.
494 /// \see __bextr_u64
495 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
_bextr2_u64(unsigned long long __X,unsigned long long __Y)496 _bextr2_u64(unsigned long long __X, unsigned long long __Y) {
497 return __builtin_ia32_bextr_u64(__X, __Y);
498 }
499
500 /// Clears all bits in the source except for the least significant bit
501 /// containing a value of 1 and returns the result.
502 ///
503 /// \headerfile <x86intrin.h>
504 ///
505 /// This intrinsic corresponds to the \c BLSI instruction.
506 ///
507 /// \param __X
508 /// An unsigned 64-bit integer whose bits are to be cleared.
509 /// \returns An unsigned 64-bit integer containing the result of clearing the
510 /// bits from the source operand.
511 /// \see _blsi_u64
512 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
__blsi_u64(unsigned long long __X)513 __blsi_u64(unsigned long long __X)
514 {
515 return __X & -__X;
516 }
517
518 /// Clears all bits in the source except for the least significant bit
519 /// containing a value of 1 and returns the result.
520 ///
521 /// \headerfile <x86intrin.h>
522 ///
523 /// \code
524 /// unsigned long long _blsi_u64(unsigned long long __X);
525 /// \endcode
526 ///
527 /// This intrinsic corresponds to the \c BLSI instruction.
528 ///
529 /// \param __X
530 /// An unsigned 64-bit integer whose bits are to be cleared.
531 /// \returns An unsigned 64-bit integer containing the result of clearing the
532 /// bits from the source operand.
533 /// \see __blsi_u64
534 #define _blsi_u64 __blsi_u64
535
536 /// Creates a mask whose bits are set to 1, using bit 0 up to and
537 /// including the least significant bit that is set to 1 in the source
538 /// operand and returns the result.
539 ///
540 /// \headerfile <x86intrin.h>
541 ///
542 /// This intrinsic corresponds to the \c BLSMSK instruction.
543 ///
544 /// \param __X
545 /// An unsigned 64-bit integer used to create the mask.
546 /// \returns An unsigned 64-bit integer containing the newly created mask.
547 /// \see _blsmsk_u64
548 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
__blsmsk_u64(unsigned long long __X)549 __blsmsk_u64(unsigned long long __X)
550 {
551 return __X ^ (__X - 1);
552 }
553
554 /// Creates a mask whose bits are set to 1, using bit 0 up to and
555 /// including the least significant bit that is set to 1 in the source
556 /// operand and returns the result.
557 ///
558 /// \headerfile <x86intrin.h>
559 ///
560 /// \code
561 /// unsigned long long _blsmsk_u64(unsigned long long __X);
562 /// \endcode
563 ///
564 /// This intrinsic corresponds to the \c BLSMSK instruction.
565 ///
566 /// \param __X
567 /// An unsigned 64-bit integer used to create the mask.
568 /// \returns An unsigned 64-bit integer containing the newly created mask.
569 /// \see __blsmsk_u64
570 #define _blsmsk_u64 __blsmsk_u64
571
572 /// Clears the least significant bit that is set to 1 in the source
573 /// operand and returns the result.
574 ///
575 /// \headerfile <x86intrin.h>
576 ///
577 /// This intrinsic corresponds to the \c BLSR instruction.
578 ///
579 /// \param __X
580 /// An unsigned 64-bit integer containing the operand to be cleared.
581 /// \returns An unsigned 64-bit integer containing the result of clearing the
582 /// source operand.
583 /// \see _blsr_u64
584 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
__blsr_u64(unsigned long long __X)585 __blsr_u64(unsigned long long __X)
586 {
587 return __X & (__X - 1);
588 }
589
590 /// Clears the least significant bit that is set to 1 in the source
591 /// operand and returns the result.
592 ///
593 /// \headerfile <x86intrin.h>
594 ///
595 /// \code
596 /// unsigned long long _blsr_u64(unsigned long long __X);
597 /// \endcode
598 ///
599 /// This intrinsic corresponds to the \c BLSR instruction.
600 ///
601 /// \param __X
602 /// An unsigned 64-bit integer containing the operand to be cleared.
603 /// \returns An unsigned 64-bit integer containing the result of clearing the
604 /// source operand.
605 /// \see __blsr_u64
606 #define _blsr_u64 __blsr_u64
607
608 #endif /* __x86_64__ */
609
610 #undef __DEFAULT_FN_ATTRS
611
612 #endif /* !defined(__SCE__) || __has_feature(modules) || defined(__BMI__) */
613
614 #endif /* __BMIINTRIN_H */
615