xref: /freebsd/contrib/llvm-project/clang/lib/Headers/bmiintrin.h (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 /*===---- bmiintrin.h - BMI intrinsics -------------------------------------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 
10 #if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
11 #error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead."
12 #endif
13 
14 #ifndef __BMIINTRIN_H
15 #define __BMIINTRIN_H
16 
17 /* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT
18    instruction behaves as BSF on non-BMI targets, there is code that expects
19    to use it as a potentially faster version of BSF. */
20 #define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
21 
22 /// Counts the number of trailing zero bits in the operand.
23 ///
24 /// \headerfile <x86intrin.h>
25 ///
26 /// This intrinsic corresponds to the \c TZCNT instruction.
27 ///
28 /// \param __X
29 ///    An unsigned 16-bit integer whose trailing zeros are to be counted.
30 /// \returns An unsigned 16-bit integer containing the number of trailing zero
31 ///    bits in the operand.
32 /// \see _tzcnt_u16
33 static __inline__ unsigned short __RELAXED_FN_ATTRS
34 __tzcnt_u16(unsigned short __X)
35 {
36   return __builtin_ia32_tzcnt_u16(__X);
37 }
38 
39 /// Counts the number of trailing zero bits in the operand.
40 ///
41 /// \headerfile <x86intrin.h>
42 ///
43 /// \code
44 /// unsigned short _tzcnt_u16(unsigned short __X);
45 /// \endcode
46 ///
47 /// This intrinsic corresponds to the \c TZCNT instruction.
48 ///
49 /// \param __X
50 ///    An unsigned 16-bit integer whose trailing zeros are to be counted.
51 /// \returns An unsigned 16-bit integer containing the number of trailing zero
52 ///    bits in the operand.
53 /// \see __tzcnt_u16
54 #define _tzcnt_u16 __tzcnt_u16
55 
56 /// Counts the number of trailing zero bits in the operand.
57 ///
58 /// \headerfile <x86intrin.h>
59 ///
60 /// This intrinsic corresponds to the \c TZCNT instruction.
61 ///
62 /// \param __X
63 ///    An unsigned 32-bit integer whose trailing zeros are to be counted.
64 /// \returns An unsigned 32-bit integer containing the number of trailing zero
65 ///    bits in the operand.
66 /// \see { _mm_tzcnt_32 _tzcnt_u32 }
67 static __inline__ unsigned int __RELAXED_FN_ATTRS
68 __tzcnt_u32(unsigned int __X)
69 {
70   return __builtin_ia32_tzcnt_u32(__X);
71 }
72 
73 /// Counts the number of trailing zero bits in the operand.
74 ///
75 /// \headerfile <x86intrin.h>
76 ///
77 /// This intrinsic corresponds to the \c TZCNT instruction.
78 ///
79 /// \param __X
80 ///    An unsigned 32-bit integer whose trailing zeros are to be counted.
81 /// \returns A 32-bit integer containing the number of trailing zero bits in
82 ///    the operand.
83 /// \see { __tzcnt_u32 _tzcnt_u32 }
84 static __inline__ int __RELAXED_FN_ATTRS
85 _mm_tzcnt_32(unsigned int __X)
86 {
87   return (int)__builtin_ia32_tzcnt_u32(__X);
88 }
89 
90 /// Counts the number of trailing zero bits in the operand.
91 ///
92 /// \headerfile <x86intrin.h>
93 ///
94 /// \code
95 /// unsigned int _tzcnt_u32(unsigned int __X);
96 /// \endcode
97 ///
98 /// This intrinsic corresponds to the \c TZCNT instruction.
99 ///
100 /// \param __X
101 ///    An unsigned 32-bit integer whose trailing zeros are to be counted.
102 /// \returns An unsigned 32-bit integer containing the number of trailing zero
103 ///    bits in the operand.
104 /// \see { _mm_tzcnt_32 __tzcnt_u32 }
105 #define _tzcnt_u32 __tzcnt_u32
106 
107 #ifdef __x86_64__
108 
109 /// Counts the number of trailing zero bits in the operand.
110 ///
111 /// \headerfile <x86intrin.h>
112 ///
113 /// This intrinsic corresponds to the \c TZCNT instruction.
114 ///
115 /// \param __X
116 ///    An unsigned 64-bit integer whose trailing zeros are to be counted.
117 /// \returns An unsigned 64-bit integer containing the number of trailing zero
118 ///    bits in the operand.
119 /// \see { _mm_tzcnt_64 _tzcnt_u64 }
120 static __inline__ unsigned long long __RELAXED_FN_ATTRS
121 __tzcnt_u64(unsigned long long __X)
122 {
123   return __builtin_ia32_tzcnt_u64(__X);
124 }
125 
126 /// Counts the number of trailing zero bits in the operand.
127 ///
128 /// \headerfile <x86intrin.h>
129 ///
130 /// This intrinsic corresponds to the \c TZCNT instruction.
131 ///
132 /// \param __X
133 ///    An unsigned 64-bit integer whose trailing zeros are to be counted.
134 /// \returns An 64-bit integer containing the number of trailing zero bits in
135 ///    the operand.
136 /// \see { __tzcnt_u64 _tzcnt_u64 }
137 static __inline__ long long __RELAXED_FN_ATTRS
138 _mm_tzcnt_64(unsigned long long __X)
139 {
140   return (long long)__builtin_ia32_tzcnt_u64(__X);
141 }
142 
143 /// Counts the number of trailing zero bits in the operand.
144 ///
145 /// \headerfile <x86intrin.h>
146 ///
147 /// \code
148 /// unsigned long long _tzcnt_u64(unsigned long long __X);
149 /// \endcode
150 ///
151 /// This intrinsic corresponds to the \c TZCNT instruction.
152 ///
153 /// \param __X
154 ///    An unsigned 64-bit integer whose trailing zeros are to be counted.
155 /// \returns An unsigned 64-bit integer containing the number of trailing zero
156 ///    bits in the operand.
157 /// \see { _mm_tzcnt_64 __tzcnt_u64
158 #define _tzcnt_u64 __tzcnt_u64
159 
160 #endif /* __x86_64__ */
161 
162 #undef __RELAXED_FN_ATTRS
163 
164 #if !defined(__SCE__) || __has_feature(modules) || defined(__BMI__)
165 
166 /* Define the default attributes for the functions in this file. */
167 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi")))
168 
169 /// Performs a bitwise AND of the second operand with the one's
170 ///    complement of the first operand.
171 ///
172 /// \headerfile <x86intrin.h>
173 ///
174 /// This intrinsic corresponds to the \c ANDN instruction.
175 ///
176 /// \param __X
177 ///    An unsigned integer containing one of the operands.
178 /// \param __Y
179 ///    An unsigned integer containing one of the operands.
180 /// \returns An unsigned integer containing the bitwise AND of the second
181 ///    operand with the one's complement of the first operand.
182 /// \see _andn_u32
183 static __inline__ unsigned int __DEFAULT_FN_ATTRS
184 __andn_u32(unsigned int __X, unsigned int __Y)
185 {
186   return ~__X & __Y;
187 }
188 
189 /// Performs a bitwise AND of the second operand with the one's
190 ///    complement of the first operand.
191 ///
192 /// \headerfile <x86intrin.h>
193 ///
194 /// \code
195 /// unsigned int _andn_u32(unsigned int __X, unsigned int __Y);
196 /// \endcode
197 ///
198 /// This intrinsic corresponds to the \c ANDN instruction.
199 ///
200 /// \param __X
201 ///    An unsigned integer containing one of the operands.
202 /// \param __Y
203 ///    An unsigned integer containing one of the operands.
204 /// \returns An unsigned integer containing the bitwise AND of the second
205 ///    operand with the one's complement of the first operand.
206 /// \see __andn_u32
207 #define _andn_u32 __andn_u32
208 
209 /* AMD-specified, double-leading-underscore version of BEXTR */
210 /// Extracts the specified bits from the first operand and returns them
211 ///    in the least significant bits of the result.
212 ///
213 /// \headerfile <x86intrin.h>
214 ///
215 /// This intrinsic corresponds to the \c BEXTR instruction.
216 ///
217 /// \param __X
218 ///    An unsigned integer whose bits are to be extracted.
219 /// \param __Y
220 ///    An unsigned integer used to specify which bits are extracted. Bits [7:0]
221 ///    specify the index of the least significant bit. Bits [15:8] specify the
222 ///    number of bits to be extracted.
223 /// \returns An unsigned integer whose least significant bits contain the
224 ///    extracted bits.
225 /// \see _bextr_u32
226 static __inline__ unsigned int __DEFAULT_FN_ATTRS
227 __bextr_u32(unsigned int __X, unsigned int __Y)
228 {
229   return __builtin_ia32_bextr_u32(__X, __Y);
230 }
231 
232 /* Intel-specified, single-leading-underscore version of BEXTR */
233 /// Extracts the specified bits from the first operand and returns them
234 ///    in the least significant bits of the result.
235 ///
236 /// \headerfile <x86intrin.h>
237 ///
238 /// This intrinsic corresponds to the \c BEXTR instruction.
239 ///
240 /// \param __X
241 ///    An unsigned integer whose bits are to be extracted.
242 /// \param __Y
243 ///    An unsigned integer used to specify the index of the least significant
244 ///    bit for the bits to be extracted. Bits [7:0] specify the index.
245 /// \param __Z
246 ///    An unsigned integer used to specify the number of bits to be extracted.
247 ///    Bits [7:0] specify the number of bits.
248 /// \returns An unsigned integer whose least significant bits contain the
249 ///    extracted bits.
250 /// \see __bextr_u32
251 static __inline__ unsigned int __DEFAULT_FN_ATTRS
252 _bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z)
253 {
254   return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
255 }
256 
257 /* Intel-specified, single-leading-underscore version of BEXTR2 */
258 /// Extracts the specified bits from the first operand and returns them
259 ///    in the least significant bits of the result.
260 ///
261 /// \headerfile <x86intrin.h>
262 ///
263 /// This intrinsic corresponds to the \c BEXTR instruction.
264 ///
265 /// \param __X
266 ///    An unsigned integer whose bits are to be extracted.
267 /// \param __Y
268 ///    An unsigned integer used to specify which bits are extracted. Bits [7:0]
269 ///    specify the index of the least significant bit. Bits [15:8] specify the
270 ///    number of bits to be extracted.
271 /// \returns An unsigned integer whose least significant bits contain the
272 ///    extracted bits.
273 /// \see __bextr_u32
274 static __inline__ unsigned int __DEFAULT_FN_ATTRS
275 _bextr2_u32(unsigned int __X, unsigned int __Y) {
276   return __builtin_ia32_bextr_u32(__X, __Y);
277 }
278 
279 /// Clears all bits in the source except for the least significant bit
280 ///    containing a value of 1 and returns the result.
281 ///
282 /// \headerfile <x86intrin.h>
283 ///
284 /// This intrinsic corresponds to the \c BLSI instruction.
285 ///
286 /// \param __X
287 ///    An unsigned integer whose bits are to be cleared.
288 /// \returns An unsigned integer containing the result of clearing the bits from
289 ///    the source operand.
290 /// \see _blsi_u32
291 static __inline__ unsigned int __DEFAULT_FN_ATTRS
292 __blsi_u32(unsigned int __X)
293 {
294   return __X & -__X;
295 }
296 
297 /// Clears all bits in the source except for the least significant bit
298 ///    containing a value of 1 and returns the result.
299 ///
300 /// \headerfile <x86intrin.h>
301 ///
302 /// \code
303 /// unsigned int _blsi_u32(unsigned int __X);
304 /// \endcode
305 ///
306 /// This intrinsic corresponds to the \c BLSI instruction.
307 ///
308 /// \param __X
309 ///    An unsigned integer whose bits are to be cleared.
310 /// \returns An unsigned integer containing the result of clearing the bits from
311 ///    the source operand.
312 /// \see __blsi_u32
313 #define _blsi_u32 __blsi_u32
314 
315 /// Creates a mask whose bits are set to 1, using bit 0 up to and
316 ///    including the least significant bit that is set to 1 in the source
317 ///    operand and returns the result.
318 ///
319 /// \headerfile <x86intrin.h>
320 ///
321 /// This intrinsic corresponds to the \c BLSMSK instruction.
322 ///
323 /// \param __X
324 ///    An unsigned integer used to create the mask.
325 /// \returns An unsigned integer containing the newly created mask.
326 /// \see _blsmsk_u32
327 static __inline__ unsigned int __DEFAULT_FN_ATTRS
328 __blsmsk_u32(unsigned int __X)
329 {
330   return __X ^ (__X - 1);
331 }
332 
333 /// Creates a mask whose bits are set to 1, using bit 0 up to and
334 ///    including the least significant bit that is set to 1 in the source
335 ///    operand and returns the result.
336 ///
337 /// \headerfile <x86intrin.h>
338 ///
339 /// \code
340 /// unsigned int _blsmsk_u32(unsigned int __X);
341 /// \endcode
342 ///
343 /// This intrinsic corresponds to the \c BLSMSK instruction.
344 ///
345 /// \param __X
346 ///    An unsigned integer used to create the mask.
347 /// \returns An unsigned integer containing the newly created mask.
348 /// \see __blsmsk_u32
349 #define _blsmsk_u32 __blsmsk_u32
350 
351 /// Clears the least significant bit that is set to 1 in the source
352 ///    operand and returns the result.
353 ///
354 /// \headerfile <x86intrin.h>
355 ///
356 /// This intrinsic corresponds to the \c BLSR instruction.
357 ///
358 /// \param __X
359 ///    An unsigned integer containing the operand to be cleared.
360 /// \returns An unsigned integer containing the result of clearing the source
361 ///    operand.
362 /// \see _blsr_u32
363 static __inline__ unsigned int __DEFAULT_FN_ATTRS
364 __blsr_u32(unsigned int __X)
365 {
366   return __X & (__X - 1);
367 }
368 
369 /// Clears the least significant bit that is set to 1 in the source
370 ///    operand and returns the result.
371 ///
372 /// \headerfile <x86intrin.h>
373 ///
374 /// \code
375 /// unsigned int _bls4_u32(unsigned int __X);
376 /// \endcode
377 ///
378 /// This intrinsic corresponds to the \c BLSR instruction.
379 ///
380 /// \param __X
381 ///    An unsigned integer containing the operand to be cleared.
382 /// \returns An unsigned integer containing the result of clearing the source
383 ///    operand.
384 /// \see __blsr_u32
385 #define _blsr_u32 __blsr_u32
386 
387 #ifdef __x86_64__
388 
389 /// Performs a bitwise AND of the second operand with the one's
390 ///    complement of the first operand.
391 ///
392 /// \headerfile <x86intrin.h>
393 ///
394 /// This intrinsic corresponds to the \c ANDN instruction.
395 ///
396 /// \param __X
397 ///    An unsigned 64-bit integer containing one of the operands.
398 /// \param __Y
399 ///    An unsigned 64-bit integer containing one of the operands.
400 /// \returns An unsigned 64-bit integer containing the bitwise AND of the second
401 ///    operand with the one's complement of the first operand.
402 /// \see _andn_u64
403 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
404 __andn_u64 (unsigned long long __X, unsigned long long __Y)
405 {
406   return ~__X & __Y;
407 }
408 
409 /// Performs a bitwise AND of the second operand with the one's
410 ///    complement of the first operand.
411 ///
412 /// \headerfile <x86intrin.h>
413 ///
414 /// \code
415 /// unsigned long long _andn_u64(unsigned long long __X,
416 ///                              unsigned long long __Y);
417 /// \endcode
418 ///
419 /// This intrinsic corresponds to the \c ANDN instruction.
420 ///
421 /// \param __X
422 ///    An unsigned 64-bit integer containing one of the operands.
423 /// \param __Y
424 ///    An unsigned 64-bit integer containing one of the operands.
425 /// \returns An unsigned 64-bit integer containing the bitwise AND of the second
426 ///    operand with the one's complement of the first operand.
427 /// \see __andn_u64
428 #define _andn_u64 __andn_u64
429 
430 /* AMD-specified, double-leading-underscore version of BEXTR */
431 /// Extracts the specified bits from the first operand and returns them
432 ///    in the least significant bits of the result.
433 ///
434 /// \headerfile <x86intrin.h>
435 ///
436 /// This intrinsic corresponds to the \c BEXTR instruction.
437 ///
438 /// \param __X
439 ///    An unsigned 64-bit integer whose bits are to be extracted.
440 /// \param __Y
441 ///    An unsigned 64-bit integer used to specify which bits are extracted. Bits
442 ///    [7:0] specify the index of the least significant bit. Bits [15:8] specify
443 ///    the number of bits to be extracted.
444 /// \returns An unsigned 64-bit integer whose least significant bits contain the
445 ///    extracted bits.
446 /// \see _bextr_u64
447 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
448 __bextr_u64(unsigned long long __X, unsigned long long __Y)
449 {
450   return __builtin_ia32_bextr_u64(__X, __Y);
451 }
452 
453 /* Intel-specified, single-leading-underscore version of BEXTR */
454 /// Extracts the specified bits from the first operand and returns them
455 ///     in the least significant bits of the result.
456 ///
457 /// \headerfile <x86intrin.h>
458 ///
459 /// This intrinsic corresponds to the \c BEXTR instruction.
460 ///
461 /// \param __X
462 ///    An unsigned 64-bit integer whose bits are to be extracted.
463 /// \param __Y
464 ///    An unsigned integer used to specify the index of the least significant
465 ///    bit for the bits to be extracted. Bits [7:0] specify the index.
466 /// \param __Z
467 ///    An unsigned integer used to specify the number of bits to be extracted.
468 ///    Bits [7:0] specify the number of bits.
469 /// \returns An unsigned 64-bit integer whose least significant bits contain the
470 ///    extracted bits.
471 /// \see __bextr_u64
472 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
473 _bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z)
474 {
475   return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
476 }
477 
478 /* Intel-specified, single-leading-underscore version of BEXTR2 */
479 /// Extracts the specified bits from the first operand and returns them
480 ///    in the least significant bits of the result.
481 ///
482 /// \headerfile <x86intrin.h>
483 ///
484 /// This intrinsic corresponds to the \c BEXTR instruction.
485 ///
486 /// \param __X
487 ///    An unsigned 64-bit integer whose bits are to be extracted.
488 /// \param __Y
489 ///    An unsigned 64-bit integer used to specify which bits are extracted. Bits
490 ///    [7:0] specify the index of the least significant bit. Bits [15:8] specify
491 ///    the number of bits to be extracted.
492 /// \returns An unsigned 64-bit integer whose least significant bits contain the
493 ///    extracted bits.
494 /// \see __bextr_u64
495 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
496 _bextr2_u64(unsigned long long __X, unsigned long long __Y) {
497   return __builtin_ia32_bextr_u64(__X, __Y);
498 }
499 
500 /// Clears all bits in the source except for the least significant bit
501 ///    containing a value of 1 and returns the result.
502 ///
503 /// \headerfile <x86intrin.h>
504 ///
505 /// This intrinsic corresponds to the \c BLSI instruction.
506 ///
507 /// \param __X
508 ///    An unsigned 64-bit integer whose bits are to be cleared.
509 /// \returns An unsigned 64-bit integer containing the result of clearing the
510 ///    bits from the source operand.
511 /// \see _blsi_u64
512 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
513 __blsi_u64(unsigned long long __X)
514 {
515   return __X & -__X;
516 }
517 
518 /// Clears all bits in the source except for the least significant bit
519 ///    containing a value of 1 and returns the result.
520 ///
521 /// \headerfile <x86intrin.h>
522 ///
523 /// \code
524 /// unsigned long long _blsi_u64(unsigned long long __X);
525 /// \endcode
526 ///
527 /// This intrinsic corresponds to the \c BLSI instruction.
528 ///
529 /// \param __X
530 ///    An unsigned 64-bit integer whose bits are to be cleared.
531 /// \returns An unsigned 64-bit integer containing the result of clearing the
532 ///    bits from the source operand.
533 /// \see __blsi_u64
534 #define _blsi_u64 __blsi_u64
535 
536 /// Creates a mask whose bits are set to 1, using bit 0 up to and
537 ///    including the least significant bit that is set to 1 in the source
538 ///    operand and returns the result.
539 ///
540 /// \headerfile <x86intrin.h>
541 ///
542 /// This intrinsic corresponds to the \c BLSMSK instruction.
543 ///
544 /// \param __X
545 ///    An unsigned 64-bit integer used to create the mask.
546 /// \returns An unsigned 64-bit integer containing the newly created mask.
547 /// \see _blsmsk_u64
548 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
549 __blsmsk_u64(unsigned long long __X)
550 {
551   return __X ^ (__X - 1);
552 }
553 
554 /// Creates a mask whose bits are set to 1, using bit 0 up to and
555 ///    including the least significant bit that is set to 1 in the source
556 ///    operand and returns the result.
557 ///
558 /// \headerfile <x86intrin.h>
559 ///
560 /// \code
561 /// unsigned long long _blsmsk_u64(unsigned long long __X);
562 /// \endcode
563 ///
564 /// This intrinsic corresponds to the \c BLSMSK instruction.
565 ///
566 /// \param __X
567 ///    An unsigned 64-bit integer used to create the mask.
568 /// \returns An unsigned 64-bit integer containing the newly created mask.
569 /// \see __blsmsk_u64
570 #define _blsmsk_u64 __blsmsk_u64
571 
572 /// Clears the least significant bit that is set to 1 in the source
573 ///    operand and returns the result.
574 ///
575 /// \headerfile <x86intrin.h>
576 ///
577 /// This intrinsic corresponds to the \c BLSR instruction.
578 ///
579 /// \param __X
580 ///    An unsigned 64-bit integer containing the operand to be cleared.
581 /// \returns An unsigned 64-bit integer containing the result of clearing the
582 ///    source operand.
583 /// \see _blsr_u64
584 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
585 __blsr_u64(unsigned long long __X)
586 {
587   return __X & (__X - 1);
588 }
589 
590 /// Clears the least significant bit that is set to 1 in the source
591 ///    operand and returns the result.
592 ///
593 /// \headerfile <x86intrin.h>
594 ///
595 /// \code
596 /// unsigned long long _blsr_u64(unsigned long long __X);
597 /// \endcode
598 ///
599 /// This intrinsic corresponds to the \c BLSR instruction.
600 ///
601 /// \param __X
602 ///    An unsigned 64-bit integer containing the operand to be cleared.
603 /// \returns An unsigned 64-bit integer containing the result of clearing the
604 ///    source operand.
605 /// \see __blsr_u64
606 #define _blsr_u64 __blsr_u64
607 
608 #endif /* __x86_64__ */
609 
610 #undef __DEFAULT_FN_ATTRS
611 
612 #endif /* !defined(__SCE__) || __has_feature(modules) || defined(__BMI__) */
613 
614 #endif /* __BMIINTRIN_H */
615