xref: /freebsd/sys/i386/include/atomic.h (revision c1839039b193b48c8eb7520c75487f0bd4340c3b)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 1998 Doug Rabson
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 #ifndef _MACHINE_ATOMIC_H_
31 #define	_MACHINE_ATOMIC_H_
32 
33 #include <sys/atomic_common.h>
34 
35 #ifdef _KERNEL
36 #include <machine/md_var.h>
37 #include <machine/specialreg.h>
38 #endif
39 
40 #ifndef __OFFSETOF_MONITORBUF
41 /*
42  * __OFFSETOF_MONITORBUF == __pcpu_offset(pc_monitorbuf).
43  *
44  * The open-coded number is used instead of the symbolic expression to
45  * avoid a dependency on sys/pcpu.h in machine/atomic.h consumers.
46  * An assertion in i386/vm_machdep.c ensures that the value is correct.
47  */
48 #define	__OFFSETOF_MONITORBUF	0x80
49 
50 static __inline void
51 __mbk(void)
52 {
53 
54 	__asm __volatile("lock; addl $0,%%fs:%0"
55 	    : "+m" (*(u_int *)__OFFSETOF_MONITORBUF) : : "memory", "cc");
56 }
57 
58 static __inline void
59 __mbu(void)
60 {
61 
62 	__asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc");
63 }
64 #endif
65 
66 /*
67  * Various simple operations on memory, each of which is atomic in the
68  * presence of interrupts and multiple processors.
69  *
70  * atomic_set_char(P, V)	(*(u_char *)(P) |= (V))
71  * atomic_clear_char(P, V)	(*(u_char *)(P) &= ~(V))
72  * atomic_add_char(P, V)	(*(u_char *)(P) += (V))
73  * atomic_subtract_char(P, V)	(*(u_char *)(P) -= (V))
74  *
75  * atomic_set_short(P, V)	(*(u_short *)(P) |= (V))
76  * atomic_clear_short(P, V)	(*(u_short *)(P) &= ~(V))
77  * atomic_add_short(P, V)	(*(u_short *)(P) += (V))
78  * atomic_subtract_short(P, V)	(*(u_short *)(P) -= (V))
79  *
80  * atomic_set_int(P, V)		(*(u_int *)(P) |= (V))
81  * atomic_clear_int(P, V)	(*(u_int *)(P) &= ~(V))
82  * atomic_add_int(P, V)		(*(u_int *)(P) += (V))
83  * atomic_subtract_int(P, V)	(*(u_int *)(P) -= (V))
84  * atomic_swap_int(P, V)	(return (*(u_int *)(P)); *(u_int *)(P) = (V);)
85  * atomic_readandclear_int(P)	(return (*(u_int *)(P)); *(u_int *)(P) = 0;)
86  *
87  * atomic_set_long(P, V)	(*(u_long *)(P) |= (V))
88  * atomic_clear_long(P, V)	(*(u_long *)(P) &= ~(V))
89  * atomic_add_long(P, V)	(*(u_long *)(P) += (V))
90  * atomic_subtract_long(P, V)	(*(u_long *)(P) -= (V))
91  * atomic_swap_long(P, V)	(return (*(u_long *)(P)); *(u_long *)(P) = (V);)
92  * atomic_readandclear_long(P)	(return (*(u_long *)(P)); *(u_long *)(P) = 0;)
93  */
94 
95 /*
96  * Always use lock prefixes.  The result is slighly less optimal for
97  * UP systems, but it matters less now, and sometimes UP is emulated
98  * over SMP.
99  *
100  * The assembly is volatilized to avoid code chunk removal by the compiler.
101  * GCC aggressively reorders operations and memory clobbering is necessary
102  * in order to avoid that for memory barriers.
103  */
104 #define	ATOMIC_ASM(NAME, TYPE, OP, CONS, V)		\
105 static __inline void					\
106 atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
107 {							\
108 	__asm __volatile("lock; " OP			\
109 	: "+m" (*p)					\
110 	: CONS (V)					\
111 	: "cc");					\
112 }							\
113 							\
114 static __inline void					\
115 atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
116 {							\
117 	__asm __volatile("lock; " OP			\
118 	: "+m" (*p)					\
119 	: CONS (V)					\
120 	: "memory", "cc");				\
121 }							\
122 struct __hack
123 
124 /*
125  * Atomic compare and set, used by the mutex functions.
126  *
127  * cmpset:
128  *	if (*dst == expect)
129  *		*dst = src
130  *
131  * fcmpset:
132  *	if (*dst == *expect)
133  *		*dst = src
134  *	else
135  *		*expect = *dst
136  *
137  * Returns 0 on failure, non-zero on success.
138  */
139 #define	ATOMIC_CMPSET(TYPE, CONS)			\
140 static __inline int					\
141 atomic_cmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE expect, u_##TYPE src) \
142 {							\
143 	u_char res;					\
144 							\
145 	__asm __volatile(				\
146 	"	lock; cmpxchg	%3,%1 ;	"		\
147 	"	sete	%0 ;		"		\
148 	"# atomic_cmpset_" #TYPE "	"		\
149 	: "=q" (res),			/* 0 */		\
150 	  "+m" (*dst),			/* 1 */		\
151 	  "+a" (expect)			/* 2 */		\
152 	: CONS (src)			/* 3 */		\
153 	: "memory", "cc");				\
154 	return (res);					\
155 }							\
156 							\
157 static __inline int					\
158 atomic_fcmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE *expect, u_##TYPE src) \
159 {							\
160 	u_char res;					\
161 							\
162 	__asm __volatile(				\
163 	"	lock; cmpxchg	%3,%1 ;	"		\
164 	"	sete	%0 ;		"		\
165 	"# atomic_fcmpset_" #TYPE "	"		\
166 	: "=q" (res),			/* 0 */		\
167 	  "+m" (*dst),			/* 1 */		\
168 	  "+a" (*expect)		/* 2 */		\
169 	: CONS (src)			/* 3 */		\
170 	: "memory", "cc");				\
171 	return (res);					\
172 }
173 
174 ATOMIC_CMPSET(char, "q");
175 ATOMIC_CMPSET(short, "r");
176 ATOMIC_CMPSET(int, "r");
177 
178 /*
179  * Atomically add the value of v to the integer pointed to by p and return
180  * the previous value of *p.
181  */
182 static __inline u_int
183 atomic_fetchadd_int(volatile u_int *p, u_int v)
184 {
185 
186 	__asm __volatile(
187 	"	lock; xaddl	%0,%1 ;	"
188 	"# atomic_fetchadd_int"
189 	: "+r" (v),			/* 0 */
190 	  "+m" (*p)			/* 1 */
191 	: : "cc");
192 	return (v);
193 }
194 
195 static __inline int
196 atomic_testandset_int(volatile u_int *p, u_int v)
197 {
198 	u_char res;
199 
200 	__asm __volatile(
201 	"	lock; btsl	%2,%1 ;	"
202 	"	setc	%0 ;		"
203 	"# atomic_testandset_int"
204 	: "=q" (res),			/* 0 */
205 	  "+m" (*p)			/* 1 */
206 	: "Ir" (v & 0x1f)		/* 2 */
207 	: "cc");
208 	return (res);
209 }
210 
211 static __inline int
212 atomic_testandclear_int(volatile u_int *p, u_int v)
213 {
214 	u_char res;
215 
216 	__asm __volatile(
217 	"	lock; btrl	%2,%1 ;	"
218 	"	setc	%0 ;		"
219 	"# atomic_testandclear_int"
220 	: "=q" (res),			/* 0 */
221 	  "+m" (*p)			/* 1 */
222 	: "Ir" (v & 0x1f)		/* 2 */
223 	: "cc");
224 	return (res);
225 }
226 
227 /*
228  * We assume that a = b will do atomic loads and stores.  Due to the
229  * IA32 memory model, a simple store guarantees release semantics.
230  *
231  * However, a load may pass a store if they are performed on distinct
232  * addresses, so we need Store/Load barrier for sequentially
233  * consistent fences in SMP kernels.  We use "lock addl $0,mem" for a
234  * Store/Load barrier, as recommended by the AMD Software Optimization
235  * Guide, and not mfence.  In the kernel, we use a private per-cpu
236  * cache line for "mem", to avoid introducing false data
237  * dependencies.  In user space, we use the word at the top of the
238  * stack.
239  *
240  * For UP kernels, however, the memory of the single processor is
241  * always consistent, so we only need to stop the compiler from
242  * reordering accesses in a way that violates the semantics of acquire
243  * and release.
244  */
245 
246 #if defined(_KERNEL)
247 #define	__storeload_barrier()	__mbk()
248 #else /* !_KERNEL */
249 #define	__storeload_barrier()	__mbu()
250 #endif /* _KERNEL*/
251 
252 #define	ATOMIC_LOAD(TYPE)					\
253 static __inline u_##TYPE					\
254 atomic_load_acq_##TYPE(volatile u_##TYPE *p)			\
255 {								\
256 	u_##TYPE res;						\
257 								\
258 	res = *p;						\
259 	__compiler_membar();					\
260 	return (res);						\
261 }								\
262 struct __hack
263 
264 #define	ATOMIC_STORE(TYPE)					\
265 static __inline void						\
266 atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)	\
267 {								\
268 								\
269 	__compiler_membar();					\
270 	*p = v;							\
271 }								\
272 struct __hack
273 
274 static __inline void
275 atomic_thread_fence_acq(void)
276 {
277 
278 	__compiler_membar();
279 }
280 
281 static __inline void
282 atomic_thread_fence_rel(void)
283 {
284 
285 	__compiler_membar();
286 }
287 
288 static __inline void
289 atomic_thread_fence_acq_rel(void)
290 {
291 
292 	__compiler_membar();
293 }
294 
295 static __inline void
296 atomic_thread_fence_seq_cst(void)
297 {
298 
299 	__storeload_barrier();
300 }
301 
302 #ifdef _KERNEL
303 
304 #ifdef WANT_FUNCTIONS
305 int		atomic_cmpset_64_i386(volatile uint64_t *, uint64_t, uint64_t);
306 int		atomic_cmpset_64_i586(volatile uint64_t *, uint64_t, uint64_t);
307 uint64_t	atomic_load_acq_64_i386(volatile uint64_t *);
308 uint64_t	atomic_load_acq_64_i586(volatile uint64_t *);
309 void		atomic_store_rel_64_i386(volatile uint64_t *, uint64_t);
310 void		atomic_store_rel_64_i586(volatile uint64_t *, uint64_t);
311 uint64_t	atomic_swap_64_i386(volatile uint64_t *, uint64_t);
312 uint64_t	atomic_swap_64_i586(volatile uint64_t *, uint64_t);
313 #endif
314 
315 /* I486 does not support SMP or CMPXCHG8B. */
316 static __inline int
317 atomic_cmpset_64_i386(volatile uint64_t *dst, uint64_t expect, uint64_t src)
318 {
319 	volatile uint32_t *p;
320 	u_char res;
321 
322 	p = (volatile uint32_t *)dst;
323 	__asm __volatile(
324 	"	pushfl ;		"
325 	"	cli ;			"
326 	"	xorl	%1,%%eax ;	"
327 	"	xorl	%2,%%edx ;	"
328 	"	orl	%%edx,%%eax ;	"
329 	"	jne	1f ;		"
330 	"	movl	%4,%1 ;		"
331 	"	movl	%5,%2 ;		"
332 	"1:				"
333 	"	sete	%3 ;		"
334 	"	popfl"
335 	: "+A" (expect),		/* 0 */
336 	  "+m" (*p),			/* 1 */
337 	  "+m" (*(p + 1)),		/* 2 */
338 	  "=q" (res)			/* 3 */
339 	: "r" ((uint32_t)src),		/* 4 */
340 	  "r" ((uint32_t)(src >> 32))	/* 5 */
341 	: "memory", "cc");
342 	return (res);
343 }
344 
345 static __inline int
346 atomic_fcmpset_64_i386(volatile uint64_t *dst, uint64_t *expect, uint64_t src)
347 {
348 
349 	if (atomic_cmpset_64_i386(dst, *expect, src)) {
350 		return (1);
351 	} else {
352 		*expect = *dst;
353 		return (0);
354 	}
355 }
356 
357 static __inline uint64_t
358 atomic_load_acq_64_i386(volatile uint64_t *p)
359 {
360 	volatile uint32_t *q;
361 	uint64_t res;
362 
363 	q = (volatile uint32_t *)p;
364 	__asm __volatile(
365 	"	pushfl ;		"
366 	"	cli ;			"
367 	"	movl	%1,%%eax ;	"
368 	"	movl	%2,%%edx ;	"
369 	"	popfl"
370 	: "=&A" (res)			/* 0 */
371 	: "m" (*q),			/* 1 */
372 	  "m" (*(q + 1))		/* 2 */
373 	: "memory");
374 	return (res);
375 }
376 
377 static __inline void
378 atomic_store_rel_64_i386(volatile uint64_t *p, uint64_t v)
379 {
380 	volatile uint32_t *q;
381 
382 	q = (volatile uint32_t *)p;
383 	__asm __volatile(
384 	"	pushfl ;		"
385 	"	cli ;			"
386 	"	movl	%%eax,%0 ;	"
387 	"	movl	%%edx,%1 ;	"
388 	"	popfl"
389 	: "=m" (*q),			/* 0 */
390 	  "=m" (*(q + 1))		/* 1 */
391 	: "A" (v)			/* 2 */
392 	: "memory");
393 }
394 
395 static __inline uint64_t
396 atomic_swap_64_i386(volatile uint64_t *p, uint64_t v)
397 {
398 	volatile uint32_t *q;
399 	uint64_t res;
400 
401 	q = (volatile uint32_t *)p;
402 	__asm __volatile(
403 	"	pushfl ;		"
404 	"	cli ;			"
405 	"	movl	%1,%%eax ;	"
406 	"	movl	%2,%%edx ;	"
407 	"	movl	%4,%2 ;		"
408 	"	movl	%3,%1 ;		"
409 	"	popfl"
410 	: "=&A" (res),			/* 0 */
411 	  "+m" (*q),			/* 1 */
412 	  "+m" (*(q + 1))		/* 2 */
413 	: "r" ((uint32_t)v),		/* 3 */
414 	  "r" ((uint32_t)(v >> 32)));	/* 4 */
415 	return (res);
416 }
417 
418 static __inline int
419 atomic_cmpset_64_i586(volatile uint64_t *dst, uint64_t expect, uint64_t src)
420 {
421 	u_char res;
422 
423 	__asm __volatile(
424 	"	lock; cmpxchg8b %1 ;	"
425 	"	sete	%0"
426 	: "=q" (res),			/* 0 */
427 	  "+m" (*dst),			/* 1 */
428 	  "+A" (expect)			/* 2 */
429 	: "b" ((uint32_t)src),		/* 3 */
430 	  "c" ((uint32_t)(src >> 32))	/* 4 */
431 	: "memory", "cc");
432 	return (res);
433 }
434 
435 static __inline int
436 atomic_fcmpset_64_i586(volatile uint64_t *dst, uint64_t *expect, uint64_t src)
437 {
438 	u_char res;
439 
440 	__asm __volatile(
441 	"	lock; cmpxchg8b %1 ;	"
442 	"	sete	%0"
443 	: "=q" (res),			/* 0 */
444 	  "+m" (*dst),			/* 1 */
445 	  "+A" (*expect)		/* 2 */
446 	: "b" ((uint32_t)src),		/* 3 */
447 	  "c" ((uint32_t)(src >> 32))	/* 4 */
448 	: "memory", "cc");
449 	return (res);
450 }
451 
452 static __inline uint64_t
453 atomic_load_acq_64_i586(volatile uint64_t *p)
454 {
455 	uint64_t res;
456 
457 	__asm __volatile(
458 	"	movl	%%ebx,%%eax ;	"
459 	"	movl	%%ecx,%%edx ;	"
460 	"	lock; cmpxchg8b %1"
461 	: "=&A" (res),			/* 0 */
462 	  "+m" (*p)			/* 1 */
463 	: : "memory", "cc");
464 	return (res);
465 }
466 
467 static __inline void
468 atomic_store_rel_64_i586(volatile uint64_t *p, uint64_t v)
469 {
470 
471 	__asm __volatile(
472 	"	movl	%%eax,%%ebx ;	"
473 	"	movl	%%edx,%%ecx ;	"
474 	"1:				"
475 	"	lock; cmpxchg8b %0 ;	"
476 	"	jne	1b"
477 	: "+m" (*p),			/* 0 */
478 	  "+A" (v)			/* 1 */
479 	: : "ebx", "ecx", "memory", "cc");
480 }
481 
482 static __inline uint64_t
483 atomic_swap_64_i586(volatile uint64_t *p, uint64_t v)
484 {
485 
486 	__asm __volatile(
487 	"	movl	%%eax,%%ebx ;	"
488 	"	movl	%%edx,%%ecx ;	"
489 	"1:				"
490 	"	lock; cmpxchg8b %0 ;	"
491 	"	jne	1b"
492 	: "+m" (*p),			/* 0 */
493 	  "+A" (v)			/* 1 */
494 	: : "ebx", "ecx", "memory", "cc");
495 	return (v);
496 }
497 
498 static __inline int
499 atomic_cmpset_64(volatile uint64_t *dst, uint64_t expect, uint64_t src)
500 {
501 
502 	if ((cpu_feature & CPUID_CX8) == 0)
503 		return (atomic_cmpset_64_i386(dst, expect, src));
504 	else
505 		return (atomic_cmpset_64_i586(dst, expect, src));
506 }
507 
508 static __inline int
509 atomic_fcmpset_64(volatile uint64_t *dst, uint64_t *expect, uint64_t src)
510 {
511 
512   	if ((cpu_feature & CPUID_CX8) == 0)
513 		return (atomic_fcmpset_64_i386(dst, expect, src));
514 	else
515 		return (atomic_fcmpset_64_i586(dst, expect, src));
516 }
517 
518 static __inline uint64_t
519 atomic_load_acq_64(volatile uint64_t *p)
520 {
521 
522 	if ((cpu_feature & CPUID_CX8) == 0)
523 		return (atomic_load_acq_64_i386(p));
524 	else
525 		return (atomic_load_acq_64_i586(p));
526 }
527 
528 static __inline void
529 atomic_store_rel_64(volatile uint64_t *p, uint64_t v)
530 {
531 
532 	if ((cpu_feature & CPUID_CX8) == 0)
533 		atomic_store_rel_64_i386(p, v);
534 	else
535 		atomic_store_rel_64_i586(p, v);
536 }
537 
538 static __inline uint64_t
539 atomic_swap_64(volatile uint64_t *p, uint64_t v)
540 {
541 
542 	if ((cpu_feature & CPUID_CX8) == 0)
543 		return (atomic_swap_64_i386(p, v));
544 	else
545 		return (atomic_swap_64_i586(p, v));
546 }
547 
548 static __inline uint64_t
549 atomic_fetchadd_64(volatile uint64_t *p, uint64_t v)
550 {
551 
552 	for (;;) {
553 		uint64_t t = *p;
554 		if (atomic_cmpset_64(p, t, t + v))
555 			return (t);
556 	}
557 }
558 
559 static __inline void
560 atomic_add_64(volatile uint64_t *p, uint64_t v)
561 {
562 	uint64_t t;
563 
564 	for (;;) {
565 		t = *p;
566 		if (atomic_cmpset_64(p, t, t + v))
567 			break;
568 	}
569 }
570 
571 static __inline void
572 atomic_subtract_64(volatile uint64_t *p, uint64_t v)
573 {
574 	uint64_t t;
575 
576 	for (;;) {
577 		t = *p;
578 		if (atomic_cmpset_64(p, t, t - v))
579 			break;
580 	}
581 }
582 
583 #endif /* _KERNEL */
584 
585 ATOMIC_ASM(set,	     char,  "orb %b1,%0",  "iq",  v);
586 ATOMIC_ASM(clear,    char,  "andb %b1,%0", "iq", ~v);
587 ATOMIC_ASM(add,	     char,  "addb %b1,%0", "iq",  v);
588 ATOMIC_ASM(subtract, char,  "subb %b1,%0", "iq",  v);
589 
590 ATOMIC_ASM(set,	     short, "orw %w1,%0",  "ir",  v);
591 ATOMIC_ASM(clear,    short, "andw %w1,%0", "ir", ~v);
592 ATOMIC_ASM(add,	     short, "addw %w1,%0", "ir",  v);
593 ATOMIC_ASM(subtract, short, "subw %w1,%0", "ir",  v);
594 
595 ATOMIC_ASM(set,	     int,   "orl %1,%0",   "ir",  v);
596 ATOMIC_ASM(clear,    int,   "andl %1,%0",  "ir", ~v);
597 ATOMIC_ASM(add,	     int,   "addl %1,%0",  "ir",  v);
598 ATOMIC_ASM(subtract, int,   "subl %1,%0",  "ir",  v);
599 
600 ATOMIC_ASM(set,	     long,  "orl %1,%0",   "ir",  v);
601 ATOMIC_ASM(clear,    long,  "andl %1,%0",  "ir", ~v);
602 ATOMIC_ASM(add,	     long,  "addl %1,%0",  "ir",  v);
603 ATOMIC_ASM(subtract, long,  "subl %1,%0",  "ir",  v);
604 
605 #define	ATOMIC_LOADSTORE(TYPE)				\
606 	ATOMIC_LOAD(TYPE);				\
607 	ATOMIC_STORE(TYPE)
608 
609 ATOMIC_LOADSTORE(char);
610 ATOMIC_LOADSTORE(short);
611 ATOMIC_LOADSTORE(int);
612 ATOMIC_LOADSTORE(long);
613 
614 #undef ATOMIC_ASM
615 #undef ATOMIC_LOAD
616 #undef ATOMIC_STORE
617 #undef ATOMIC_LOADSTORE
618 
619 #ifndef WANT_FUNCTIONS
620 
621 static __inline int
622 atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src)
623 {
624 
625 	return (atomic_cmpset_int((volatile u_int *)dst, (u_int)expect,
626 	    (u_int)src));
627 }
628 
629 static __inline int
630 atomic_fcmpset_long(volatile u_long *dst, u_long *expect, u_long src)
631 {
632 
633 	return (atomic_fcmpset_int((volatile u_int *)dst, (u_int *)expect,
634 	    (u_int)src));
635 }
636 
637 static __inline u_long
638 atomic_fetchadd_long(volatile u_long *p, u_long v)
639 {
640 
641 	return (atomic_fetchadd_int((volatile u_int *)p, (u_int)v));
642 }
643 
644 static __inline int
645 atomic_testandset_long(volatile u_long *p, u_int v)
646 {
647 
648 	return (atomic_testandset_int((volatile u_int *)p, v));
649 }
650 
651 static __inline int
652 atomic_testandclear_long(volatile u_long *p, u_int v)
653 {
654 
655 	return (atomic_testandclear_int((volatile u_int *)p, v));
656 }
657 
658 /* Read the current value and store a new value in the destination. */
659 static __inline u_int
660 atomic_swap_int(volatile u_int *p, u_int v)
661 {
662 
663 	__asm __volatile(
664 	"	xchgl	%1,%0 ;		"
665 	"# atomic_swap_int"
666 	: "+r" (v),			/* 0 */
667 	  "+m" (*p));			/* 1 */
668 	return (v);
669 }
670 
671 static __inline u_long
672 atomic_swap_long(volatile u_long *p, u_long v)
673 {
674 
675 	return (atomic_swap_int((volatile u_int *)p, (u_int)v));
676 }
677 
678 #define	atomic_set_acq_char		atomic_set_barr_char
679 #define	atomic_set_rel_char		atomic_set_barr_char
680 #define	atomic_clear_acq_char		atomic_clear_barr_char
681 #define	atomic_clear_rel_char		atomic_clear_barr_char
682 #define	atomic_add_acq_char		atomic_add_barr_char
683 #define	atomic_add_rel_char		atomic_add_barr_char
684 #define	atomic_subtract_acq_char	atomic_subtract_barr_char
685 #define	atomic_subtract_rel_char	atomic_subtract_barr_char
686 #define	atomic_cmpset_acq_char		atomic_cmpset_char
687 #define	atomic_cmpset_rel_char		atomic_cmpset_char
688 #define	atomic_fcmpset_acq_char		atomic_fcmpset_char
689 #define	atomic_fcmpset_rel_char		atomic_fcmpset_char
690 
691 #define	atomic_set_acq_short		atomic_set_barr_short
692 #define	atomic_set_rel_short		atomic_set_barr_short
693 #define	atomic_clear_acq_short		atomic_clear_barr_short
694 #define	atomic_clear_rel_short		atomic_clear_barr_short
695 #define	atomic_add_acq_short		atomic_add_barr_short
696 #define	atomic_add_rel_short		atomic_add_barr_short
697 #define	atomic_subtract_acq_short	atomic_subtract_barr_short
698 #define	atomic_subtract_rel_short	atomic_subtract_barr_short
699 #define	atomic_cmpset_acq_short		atomic_cmpset_short
700 #define	atomic_cmpset_rel_short		atomic_cmpset_short
701 #define	atomic_fcmpset_acq_short	atomic_fcmpset_short
702 #define	atomic_fcmpset_rel_short	atomic_fcmpset_short
703 
704 #define	atomic_set_acq_int		atomic_set_barr_int
705 #define	atomic_set_rel_int		atomic_set_barr_int
706 #define	atomic_clear_acq_int		atomic_clear_barr_int
707 #define	atomic_clear_rel_int		atomic_clear_barr_int
708 #define	atomic_add_acq_int		atomic_add_barr_int
709 #define	atomic_add_rel_int		atomic_add_barr_int
710 #define	atomic_subtract_acq_int		atomic_subtract_barr_int
711 #define	atomic_subtract_rel_int		atomic_subtract_barr_int
712 #define	atomic_cmpset_acq_int		atomic_cmpset_int
713 #define	atomic_cmpset_rel_int		atomic_cmpset_int
714 #define	atomic_fcmpset_acq_int		atomic_fcmpset_int
715 #define	atomic_fcmpset_rel_int		atomic_fcmpset_int
716 
717 #define	atomic_set_acq_long		atomic_set_barr_long
718 #define	atomic_set_rel_long		atomic_set_barr_long
719 #define	atomic_clear_acq_long		atomic_clear_barr_long
720 #define	atomic_clear_rel_long		atomic_clear_barr_long
721 #define	atomic_add_acq_long		atomic_add_barr_long
722 #define	atomic_add_rel_long		atomic_add_barr_long
723 #define	atomic_subtract_acq_long	atomic_subtract_barr_long
724 #define	atomic_subtract_rel_long	atomic_subtract_barr_long
725 #define	atomic_cmpset_acq_long		atomic_cmpset_long
726 #define	atomic_cmpset_rel_long		atomic_cmpset_long
727 #define	atomic_fcmpset_acq_long		atomic_fcmpset_long
728 #define	atomic_fcmpset_rel_long		atomic_fcmpset_long
729 
730 #define	atomic_readandclear_int(p)	atomic_swap_int(p, 0)
731 #define	atomic_readandclear_long(p)	atomic_swap_long(p, 0)
732 #define	atomic_testandset_acq_long	atomic_testandset_long
733 
734 /* Operations on 8-bit bytes. */
735 #define	atomic_set_8		atomic_set_char
736 #define	atomic_set_acq_8	atomic_set_acq_char
737 #define	atomic_set_rel_8	atomic_set_rel_char
738 #define	atomic_clear_8		atomic_clear_char
739 #define	atomic_clear_acq_8	atomic_clear_acq_char
740 #define	atomic_clear_rel_8	atomic_clear_rel_char
741 #define	atomic_add_8		atomic_add_char
742 #define	atomic_add_acq_8	atomic_add_acq_char
743 #define	atomic_add_rel_8	atomic_add_rel_char
744 #define	atomic_subtract_8	atomic_subtract_char
745 #define	atomic_subtract_acq_8	atomic_subtract_acq_char
746 #define	atomic_subtract_rel_8	atomic_subtract_rel_char
747 #define	atomic_load_acq_8	atomic_load_acq_char
748 #define	atomic_store_rel_8	atomic_store_rel_char
749 #define	atomic_cmpset_8		atomic_cmpset_char
750 #define	atomic_cmpset_acq_8	atomic_cmpset_acq_char
751 #define	atomic_cmpset_rel_8	atomic_cmpset_rel_char
752 #define	atomic_fcmpset_8	atomic_fcmpset_char
753 #define	atomic_fcmpset_acq_8	atomic_fcmpset_acq_char
754 #define	atomic_fcmpset_rel_8	atomic_fcmpset_rel_char
755 
756 /* Operations on 16-bit words. */
757 #define	atomic_set_16		atomic_set_short
758 #define	atomic_set_acq_16	atomic_set_acq_short
759 #define	atomic_set_rel_16	atomic_set_rel_short
760 #define	atomic_clear_16		atomic_clear_short
761 #define	atomic_clear_acq_16	atomic_clear_acq_short
762 #define	atomic_clear_rel_16	atomic_clear_rel_short
763 #define	atomic_add_16		atomic_add_short
764 #define	atomic_add_acq_16	atomic_add_acq_short
765 #define	atomic_add_rel_16	atomic_add_rel_short
766 #define	atomic_subtract_16	atomic_subtract_short
767 #define	atomic_subtract_acq_16	atomic_subtract_acq_short
768 #define	atomic_subtract_rel_16	atomic_subtract_rel_short
769 #define	atomic_load_acq_16	atomic_load_acq_short
770 #define	atomic_store_rel_16	atomic_store_rel_short
771 #define	atomic_cmpset_16	atomic_cmpset_short
772 #define	atomic_cmpset_acq_16	atomic_cmpset_acq_short
773 #define	atomic_cmpset_rel_16	atomic_cmpset_rel_short
774 #define	atomic_fcmpset_16	atomic_fcmpset_short
775 #define	atomic_fcmpset_acq_16	atomic_fcmpset_acq_short
776 #define	atomic_fcmpset_rel_16	atomic_fcmpset_rel_short
777 
778 /* Operations on 32-bit double words. */
779 #define	atomic_set_32		atomic_set_int
780 #define	atomic_set_acq_32	atomic_set_acq_int
781 #define	atomic_set_rel_32	atomic_set_rel_int
782 #define	atomic_clear_32		atomic_clear_int
783 #define	atomic_clear_acq_32	atomic_clear_acq_int
784 #define	atomic_clear_rel_32	atomic_clear_rel_int
785 #define	atomic_add_32		atomic_add_int
786 #define	atomic_add_acq_32	atomic_add_acq_int
787 #define	atomic_add_rel_32	atomic_add_rel_int
788 #define	atomic_subtract_32	atomic_subtract_int
789 #define	atomic_subtract_acq_32	atomic_subtract_acq_int
790 #define	atomic_subtract_rel_32	atomic_subtract_rel_int
791 #define	atomic_load_acq_32	atomic_load_acq_int
792 #define	atomic_store_rel_32	atomic_store_rel_int
793 #define	atomic_cmpset_32	atomic_cmpset_int
794 #define	atomic_cmpset_acq_32	atomic_cmpset_acq_int
795 #define	atomic_cmpset_rel_32	atomic_cmpset_rel_int
796 #define	atomic_fcmpset_32	atomic_fcmpset_int
797 #define	atomic_fcmpset_acq_32	atomic_fcmpset_acq_int
798 #define	atomic_fcmpset_rel_32	atomic_fcmpset_rel_int
799 #define	atomic_swap_32		atomic_swap_int
800 #define	atomic_readandclear_32	atomic_readandclear_int
801 #define	atomic_fetchadd_32	atomic_fetchadd_int
802 #define	atomic_testandset_32	atomic_testandset_int
803 #define	atomic_testandclear_32	atomic_testandclear_int
804 
805 #ifdef _KERNEL
806 /* Operations on 64-bit quad words. */
807 #define	atomic_cmpset_acq_64 atomic_cmpset_64
808 #define	atomic_cmpset_rel_64 atomic_cmpset_64
809 #define	atomic_fcmpset_acq_64 atomic_fcmpset_64
810 #define	atomic_fcmpset_rel_64 atomic_fcmpset_64
811 #define	atomic_fetchadd_acq_64	atomic_fetchadd_64
812 #define	atomic_fetchadd_rel_64	atomic_fetchadd_64
813 #define	atomic_add_acq_64 atomic_add_64
814 #define	atomic_add_rel_64 atomic_add_64
815 #define	atomic_subtract_acq_64 atomic_subtract_64
816 #define	atomic_subtract_rel_64 atomic_subtract_64
817 #define	atomic_load_64 atomic_load_acq_64
818 #define	atomic_store_64 atomic_store_rel_64
819 #endif
820 
821 /* Operations on pointers. */
822 #define	atomic_set_ptr(p, v) \
823 	atomic_set_int((volatile u_int *)(p), (u_int)(v))
824 #define	atomic_set_acq_ptr(p, v) \
825 	atomic_set_acq_int((volatile u_int *)(p), (u_int)(v))
826 #define	atomic_set_rel_ptr(p, v) \
827 	atomic_set_rel_int((volatile u_int *)(p), (u_int)(v))
828 #define	atomic_clear_ptr(p, v) \
829 	atomic_clear_int((volatile u_int *)(p), (u_int)(v))
830 #define	atomic_clear_acq_ptr(p, v) \
831 	atomic_clear_acq_int((volatile u_int *)(p), (u_int)(v))
832 #define	atomic_clear_rel_ptr(p, v) \
833 	atomic_clear_rel_int((volatile u_int *)(p), (u_int)(v))
834 #define	atomic_add_ptr(p, v) \
835 	atomic_add_int((volatile u_int *)(p), (u_int)(v))
836 #define	atomic_add_acq_ptr(p, v) \
837 	atomic_add_acq_int((volatile u_int *)(p), (u_int)(v))
838 #define	atomic_add_rel_ptr(p, v) \
839 	atomic_add_rel_int((volatile u_int *)(p), (u_int)(v))
840 #define	atomic_subtract_ptr(p, v) \
841 	atomic_subtract_int((volatile u_int *)(p), (u_int)(v))
842 #define	atomic_subtract_acq_ptr(p, v) \
843 	atomic_subtract_acq_int((volatile u_int *)(p), (u_int)(v))
844 #define	atomic_subtract_rel_ptr(p, v) \
845 	atomic_subtract_rel_int((volatile u_int *)(p), (u_int)(v))
846 #define	atomic_load_acq_ptr(p) \
847 	atomic_load_acq_int((volatile u_int *)(p))
848 #define	atomic_store_rel_ptr(p, v) \
849 	atomic_store_rel_int((volatile u_int *)(p), (v))
850 #define	atomic_cmpset_ptr(dst, old, new) \
851 	atomic_cmpset_int((volatile u_int *)(dst), (u_int)(old), (u_int)(new))
852 #define	atomic_cmpset_acq_ptr(dst, old, new) \
853 	atomic_cmpset_acq_int((volatile u_int *)(dst), (u_int)(old), \
854 	    (u_int)(new))
855 #define	atomic_cmpset_rel_ptr(dst, old, new) \
856 	atomic_cmpset_rel_int((volatile u_int *)(dst), (u_int)(old), \
857 	    (u_int)(new))
858 #define	atomic_fcmpset_ptr(dst, old, new) \
859 	atomic_fcmpset_int((volatile u_int *)(dst), (u_int *)(old), (u_int)(new))
860 #define	atomic_fcmpset_acq_ptr(dst, old, new) \
861 	atomic_fcmpset_acq_int((volatile u_int *)(dst), (u_int *)(old), \
862 	    (u_int)(new))
863 #define	atomic_fcmpset_rel_ptr(dst, old, new) \
864 	atomic_fcmpset_rel_int((volatile u_int *)(dst), (u_int *)(old), \
865 	    (u_int)(new))
866 #define	atomic_swap_ptr(p, v) \
867 	atomic_swap_int((volatile u_int *)(p), (u_int)(v))
868 #define	atomic_readandclear_ptr(p) \
869 	atomic_readandclear_int((volatile u_int *)(p))
870 
871 #endif /* !WANT_FUNCTIONS */
872 
873 #if defined(_KERNEL)
874 #define	mb()	__mbk()
875 #define	wmb()	__mbk()
876 #define	rmb()	__mbk()
877 #else
878 #define	mb()	__mbu()
879 #define	wmb()	__mbu()
880 #define	rmb()	__mbu()
881 #endif
882 
883 #endif /* !_MACHINE_ATOMIC_H_ */
884