xref: /linux/arch/x86/include/asm/bitops.h (revision 8a7c601e14576a22c2bbf7f67455ccf3f3d2737f)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _ASM_X86_BITOPS_H
3 #define _ASM_X86_BITOPS_H
4 
5 /*
6  * Copyright 1992, Linus Torvalds.
7  *
8  * Note: inlines with more than a single statement should be marked
9  * __always_inline to avoid problems with older gcc's inlining heuristics.
10  */
11 
12 #ifndef _LINUX_BITOPS_H
13 #error only <linux/bitops.h> can be included directly
14 #endif
15 
16 #include <linux/compiler.h>
17 #include <asm/alternative.h>
18 #include <asm/rmwcc.h>
19 #include <asm/barrier.h>
20 
21 #if BITS_PER_LONG == 32
22 # define _BITOPS_LONG_SHIFT 5
23 #elif BITS_PER_LONG == 64
24 # define _BITOPS_LONG_SHIFT 6
25 #else
26 # error "Unexpected BITS_PER_LONG"
27 #endif
28 
29 #define BIT_64(n)			(U64_C(1) << (n))
30 
31 /*
32  * These have to be done with inline assembly: that way the bit-setting
33  * is guaranteed to be atomic. All bit operations return 0 if the bit
34  * was cleared before the operation and != 0 if it was not.
35  *
36  * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
37  */
38 
39 #define RLONG_ADDR(x)			 "m" (*(volatile long *) (x))
40 #define WBYTE_ADDR(x)			"+m" (*(volatile char *) (x))
41 
42 #define ADDR				RLONG_ADDR(addr)
43 
44 /*
45  * We do the locked ops that don't return the old value as
46  * a mask operation on a byte.
47  */
48 #define CONST_MASK_ADDR(nr, addr)	WBYTE_ADDR((void *)(addr) + ((nr)>>3))
49 #define CONST_MASK(nr)			(1 << ((nr) & 7))
50 
51 static __always_inline void
52 arch_set_bit(long nr, volatile unsigned long *addr)
53 {
54 	if (__builtin_constant_p(nr)) {
55 		asm_inline volatile(LOCK_PREFIX "orb %b1,%0"
56 			: CONST_MASK_ADDR(nr, addr)
57 			: "iq" (CONST_MASK(nr))
58 			: "memory");
59 	} else {
60 		asm_inline volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
61 			: : RLONG_ADDR(addr), "Ir" (nr) : "memory");
62 	}
63 }
64 
65 static __always_inline void
66 arch___set_bit(unsigned long nr, volatile unsigned long *addr)
67 {
68 	asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
69 }
70 
71 static __always_inline void
72 arch_clear_bit(long nr, volatile unsigned long *addr)
73 {
74 	if (__builtin_constant_p(nr)) {
75 		asm_inline volatile(LOCK_PREFIX "andb %b1,%0"
76 			: CONST_MASK_ADDR(nr, addr)
77 			: "iq" (~CONST_MASK(nr)));
78 	} else {
79 		asm_inline volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
80 			: : RLONG_ADDR(addr), "Ir" (nr) : "memory");
81 	}
82 }
83 
84 static __always_inline void
85 arch_clear_bit_unlock(long nr, volatile unsigned long *addr)
86 {
87 	barrier();
88 	arch_clear_bit(nr, addr);
89 }
90 
91 static __always_inline void
92 arch___clear_bit(unsigned long nr, volatile unsigned long *addr)
93 {
94 	asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
95 }
96 
97 static __always_inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
98 		volatile unsigned long *addr)
99 {
100 	bool negative;
101 	asm_inline volatile(LOCK_PREFIX "xorb %2,%1"
102 		: "=@ccs" (negative), WBYTE_ADDR(addr)
103 		: "iq" ((char)mask) : "memory");
104 	return negative;
105 }
106 #define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte
107 
108 static __always_inline void
109 arch___clear_bit_unlock(long nr, volatile unsigned long *addr)
110 {
111 	arch___clear_bit(nr, addr);
112 }
113 
114 static __always_inline void
115 arch___change_bit(unsigned long nr, volatile unsigned long *addr)
116 {
117 	asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
118 }
119 
120 static __always_inline void
121 arch_change_bit(long nr, volatile unsigned long *addr)
122 {
123 	if (__builtin_constant_p(nr)) {
124 		asm_inline volatile(LOCK_PREFIX "xorb %b1,%0"
125 			: CONST_MASK_ADDR(nr, addr)
126 			: "iq" (CONST_MASK(nr)));
127 	} else {
128 		asm_inline volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"
129 			: : RLONG_ADDR(addr), "Ir" (nr) : "memory");
130 	}
131 }
132 
133 static __always_inline bool
134 arch_test_and_set_bit(long nr, volatile unsigned long *addr)
135 {
136 	return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), *addr, c, "Ir", nr);
137 }
138 
139 static __always_inline bool
140 arch_test_and_set_bit_lock(long nr, volatile unsigned long *addr)
141 {
142 	return arch_test_and_set_bit(nr, addr);
143 }
144 
145 static __always_inline bool
146 arch___test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
147 {
148 	bool oldbit;
149 
150 	asm(__ASM_SIZE(bts) " %2,%1"
151 	    : "=@ccc" (oldbit)
152 	    : ADDR, "Ir" (nr) : "memory");
153 	return oldbit;
154 }
155 
156 static __always_inline bool
157 arch_test_and_clear_bit(long nr, volatile unsigned long *addr)
158 {
159 	return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr), *addr, c, "Ir", nr);
160 }
161 
162 /*
163  * Note: the operation is performed atomically with respect to
164  * the local CPU, but not other CPUs. Portable code should not
165  * rely on this behaviour.
166  * KVM relies on this behaviour on x86 for modifying memory that is also
167  * accessed from a hypervisor on the same CPU if running in a VM: don't change
168  * this without also updating arch/x86/kernel/kvm.c
169  */
170 static __always_inline bool
171 arch___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
172 {
173 	bool oldbit;
174 
175 	asm volatile(__ASM_SIZE(btr) " %2,%1"
176 		     : "=@ccc" (oldbit)
177 		     : ADDR, "Ir" (nr) : "memory");
178 	return oldbit;
179 }
180 
181 static __always_inline bool
182 arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
183 {
184 	bool oldbit;
185 
186 	asm volatile(__ASM_SIZE(btc) " %2,%1"
187 		     : "=@ccc" (oldbit)
188 		     : ADDR, "Ir" (nr) : "memory");
189 
190 	return oldbit;
191 }
192 
193 static __always_inline bool
194 arch_test_and_change_bit(long nr, volatile unsigned long *addr)
195 {
196 	return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, c, "Ir", nr);
197 }
198 
199 static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr)
200 {
201 	return ((1UL << (nr & (BITS_PER_LONG-1))) &
202 		(addr[nr >> _BITOPS_LONG_SHIFT])) != 0;
203 }
204 
205 static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr)
206 {
207 	bool oldbit;
208 
209 	asm volatile("testb %2,%1"
210 		     : "=@ccnz" (oldbit)
211 		     : "m" (((unsigned char *)addr)[nr >> 3]),
212 		       "i" (1 << (nr & 7))
213 		     :"memory");
214 
215 	return oldbit;
216 }
217 
218 static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr)
219 {
220 	bool oldbit;
221 
222 	asm volatile(__ASM_SIZE(bt) " %2,%1"
223 		     : "=@ccc" (oldbit)
224 		     : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");
225 
226 	return oldbit;
227 }
228 
229 static __always_inline bool
230 arch_test_bit(unsigned long nr, const volatile unsigned long *addr)
231 {
232 	return __builtin_constant_p(nr) ? constant_test_bit(nr, addr) :
233 					  variable_test_bit(nr, addr);
234 }
235 
236 static __always_inline bool
237 arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr)
238 {
239 	return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) :
240 					  variable_test_bit(nr, addr);
241 }
242 
243 static __always_inline __attribute_const__ unsigned long variable__ffs(unsigned long word)
244 {
245 	asm("tzcnt %1,%0"
246 		: "=r" (word)
247 		: ASM_INPUT_RM (word));
248 	return word;
249 }
250 
251 /**
252  * __ffs - find first set bit in word
253  * @word: The word to search
254  *
255  * Undefined if no bit exists, so code should check against 0 first.
256  */
257 #define __ffs(word)				\
258 	(__builtin_constant_p(word) ?		\
259 	 (unsigned long)__builtin_ctzl(word) :	\
260 	 variable__ffs(word))
261 
262 static __always_inline __attribute_const__ unsigned long variable_ffz(unsigned long word)
263 {
264 	return variable__ffs(~word);
265 }
266 
267 /**
268  * ffz - find first zero bit in word
269  * @word: The word to search
270  *
271  * Undefined if no zero exists, so code should check against ~0UL first.
272  */
273 #define ffz(word)				\
274 	(__builtin_constant_p(word) ?		\
275 	 (unsigned long)__builtin_ctzl(~word) :	\
276 	 variable_ffz(word))
277 
278 /*
279  * __fls: find last set bit in word
280  * @word: The word to search
281  *
282  * Undefined if no set bit exists, so code should check against 0 first.
283  */
284 static __always_inline __attribute_const__ unsigned long __fls(unsigned long word)
285 {
286 	if (__builtin_constant_p(word))
287 		return BITS_PER_LONG - 1 - __builtin_clzl(word);
288 
289 	asm("bsr %1,%0"
290 	    : "=r" (word)
291 	    : ASM_INPUT_RM (word));
292 	return word;
293 }
294 
295 #undef ADDR
296 
297 #ifdef __KERNEL__
298 static __always_inline __attribute_const__ int variable_ffs(int x)
299 {
300 	int r;
301 
302 #ifdef CONFIG_X86_64
303 	/*
304 	 * AMD64 says BSFL won't clobber the dest reg if x==0; Intel64 says the
305 	 * dest reg is undefined if x==0, but their CPU architect says its
306 	 * value is written to set it to the same as before, except that the
307 	 * top 32 bits will be cleared.
308 	 *
309 	 * We cannot do this on 32 bits because at the very least some
310 	 * 486 CPUs did not behave this way.
311 	 */
312 	asm("bsfl %1,%0"
313 	    : "=r" (r)
314 	    : ASM_INPUT_RM (x), "0" (-1));
315 #elif defined(CONFIG_X86_CMOV)
316 	asm("bsfl %1,%0\n\t"
317 	    "cmovzl %2,%0"
318 	    : "=&r" (r) : "rm" (x), "r" (-1));
319 #else
320 	asm("bsfl %1,%0\n\t"
321 	    "jnz 1f\n\t"
322 	    "movl $-1,%0\n"
323 	    "1:" : "=r" (r) : "rm" (x));
324 #endif
325 	return r + 1;
326 }
327 
328 /**
329  * ffs - find first set bit in word
330  * @x: the word to search
331  *
332  * This is defined the same way as the libc and compiler builtin ffs
333  * routines, therefore differs in spirit from the other bitops.
334  *
335  * ffs(value) returns 0 if value is 0 or the position of the first
336  * set bit if value is nonzero. The first (least significant) bit
337  * is at position 1.
338  */
339 #define ffs(x) (__builtin_constant_p(x) ? __builtin_ffs(x) : variable_ffs(x))
340 
341 /**
342  * fls - find last set bit in word
343  * @x: the word to search
344  *
345  * This is defined in a similar way as the libc and compiler builtin
346  * ffs, but returns the position of the most significant set bit.
347  *
348  * fls(value) returns 0 if value is 0 or the position of the last
349  * set bit if value is nonzero. The last (most significant) bit is
350  * at position 32.
351  */
352 static __always_inline __attribute_const__ int fls(unsigned int x)
353 {
354 	int r;
355 
356 	if (__builtin_constant_p(x))
357 		return x ? 32 - __builtin_clz(x) : 0;
358 
359 #ifdef CONFIG_X86_64
360 	/*
361 	 * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the
362 	 * dest reg is undefined if x==0, but their CPU architect says its
363 	 * value is written to set it to the same as before, except that the
364 	 * top 32 bits will be cleared.
365 	 *
366 	 * We cannot do this on 32 bits because at the very least some
367 	 * 486 CPUs did not behave this way.
368 	 */
369 	asm("bsrl %1,%0"
370 	    : "=r" (r)
371 	    : ASM_INPUT_RM (x), "0" (-1));
372 #elif defined(CONFIG_X86_CMOV)
373 	asm("bsrl %1,%0\n\t"
374 	    "cmovzl %2,%0"
375 	    : "=&r" (r) : "rm" (x), "rm" (-1));
376 #else
377 	asm("bsrl %1,%0\n\t"
378 	    "jnz 1f\n\t"
379 	    "movl $-1,%0\n"
380 	    "1:" : "=r" (r) : "rm" (x));
381 #endif
382 	return r + 1;
383 }
384 
385 /**
386  * fls64 - find last set bit in a 64-bit word
387  * @x: the word to search
388  *
389  * This is defined in a similar way as the libc and compiler builtin
390  * ffsll, but returns the position of the most significant set bit.
391  *
392  * fls64(value) returns 0 if value is 0 or the position of the last
393  * set bit if value is nonzero. The last (most significant) bit is
394  * at position 64.
395  */
396 #ifdef CONFIG_X86_64
397 static __always_inline __attribute_const__ int fls64(__u64 x)
398 {
399 	int bitpos = -1;
400 
401 	if (__builtin_constant_p(x))
402 		return x ? 64 - __builtin_clzll(x) : 0;
403 	/*
404 	 * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the
405 	 * dest reg is undefined if x==0, but their CPU architect says its
406 	 * value is written to set it to the same as before.
407 	 */
408 	asm("bsrq %1,%q0"
409 	    : "+r" (bitpos)
410 	    : ASM_INPUT_RM (x));
411 	return bitpos + 1;
412 }
413 #else
414 #include <asm-generic/bitops/fls64.h>
415 #endif
416 
417 #include <asm-generic/bitops/sched.h>
418 
419 #include <asm/arch_hweight.h>
420 
421 #include <asm-generic/bitops/const_hweight.h>
422 
423 #include <asm-generic/bitops/instrumented-atomic.h>
424 #include <asm-generic/bitops/instrumented-non-atomic.h>
425 #include <asm-generic/bitops/instrumented-lock.h>
426 
427 #include <asm-generic/bitops/le.h>
428 
429 #include <asm-generic/bitops/ext2-atomic-setbit.h>
430 
431 #endif /* __KERNEL__ */
432 #endif /* _ASM_X86_BITOPS_H */
433