xref: /freebsd/sys/i386/include/atomic.h (revision f1ed5c000c688cf9781b486134baf4ba25415efd)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 1998 Doug Rabson
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 #ifndef _MACHINE_ATOMIC_H_
31 #define	_MACHINE_ATOMIC_H_
32 
33 #ifndef _SYS_CDEFS_H_
34 #error this file needs sys/cdefs.h as a prerequisite
35 #endif
36 
37 #include <sys/atomic_common.h>
38 
39 #ifdef _KERNEL
40 #include <machine/md_var.h>
41 #include <machine/specialreg.h>
42 #endif
43 
44 #ifndef __OFFSETOF_MONITORBUF
45 /*
46  * __OFFSETOF_MONITORBUF == __pcpu_offset(pc_monitorbuf).
47  *
48  * The open-coded number is used instead of the symbolic expression to
49  * avoid a dependency on sys/pcpu.h in machine/atomic.h consumers.
50  * An assertion in i386/vm_machdep.c ensures that the value is correct.
51  */
52 #define	__OFFSETOF_MONITORBUF	0x80
53 
54 static __inline void
55 __mbk(void)
56 {
57 
58 	__asm __volatile("lock; addl $0,%%fs:%0"
59 	    : "+m" (*(u_int *)__OFFSETOF_MONITORBUF) : : "memory", "cc");
60 }
61 
62 static __inline void
63 __mbu(void)
64 {
65 
66 	__asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc");
67 }
68 #endif
69 
70 /*
71  * Various simple operations on memory, each of which is atomic in the
72  * presence of interrupts and multiple processors.
73  *
74  * atomic_set_char(P, V)	(*(u_char *)(P) |= (V))
75  * atomic_clear_char(P, V)	(*(u_char *)(P) &= ~(V))
76  * atomic_add_char(P, V)	(*(u_char *)(P) += (V))
77  * atomic_subtract_char(P, V)	(*(u_char *)(P) -= (V))
78  *
79  * atomic_set_short(P, V)	(*(u_short *)(P) |= (V))
80  * atomic_clear_short(P, V)	(*(u_short *)(P) &= ~(V))
81  * atomic_add_short(P, V)	(*(u_short *)(P) += (V))
82  * atomic_subtract_short(P, V)	(*(u_short *)(P) -= (V))
83  *
84  * atomic_set_int(P, V)		(*(u_int *)(P) |= (V))
85  * atomic_clear_int(P, V)	(*(u_int *)(P) &= ~(V))
86  * atomic_add_int(P, V)		(*(u_int *)(P) += (V))
87  * atomic_subtract_int(P, V)	(*(u_int *)(P) -= (V))
88  * atomic_swap_int(P, V)	(return (*(u_int *)(P)); *(u_int *)(P) = (V);)
89  * atomic_readandclear_int(P)	(return (*(u_int *)(P)); *(u_int *)(P) = 0;)
90  *
91  * atomic_set_long(P, V)	(*(u_long *)(P) |= (V))
92  * atomic_clear_long(P, V)	(*(u_long *)(P) &= ~(V))
93  * atomic_add_long(P, V)	(*(u_long *)(P) += (V))
94  * atomic_subtract_long(P, V)	(*(u_long *)(P) -= (V))
95  * atomic_swap_long(P, V)	(return (*(u_long *)(P)); *(u_long *)(P) = (V);)
96  * atomic_readandclear_long(P)	(return (*(u_long *)(P)); *(u_long *)(P) = 0;)
97  */
98 
99 /*
100  * The above functions are expanded inline in the statically-linked
101  * kernel.  Lock prefixes are generated if an SMP kernel is being
102  * built.
103  *
104  * Kernel modules call real functions which are built into the kernel.
105  * This allows kernel modules to be portable between UP and SMP systems.
106  */
107 #if defined(KLD_MODULE) || !defined(__GNUCLIKE_ASM)
108 #define	ATOMIC_ASM(NAME, TYPE, OP, CONS, V)			\
109 void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v);	\
110 void atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
111 
112 int	atomic_cmpset_char(volatile u_char *dst, u_char expect, u_char src);
113 int	atomic_cmpset_short(volatile u_short *dst, u_short expect, u_short src);
114 int	atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src);
115 int	atomic_fcmpset_char(volatile u_char *dst, u_char *expect, u_char src);
116 int	atomic_fcmpset_short(volatile u_short *dst, u_short *expect,
117 	    u_short src);
118 int	atomic_fcmpset_int(volatile u_int *dst, u_int *expect, u_int src);
119 u_int	atomic_fetchadd_int(volatile u_int *p, u_int v);
120 int	atomic_testandset_int(volatile u_int *p, u_int v);
121 int	atomic_testandclear_int(volatile u_int *p, u_int v);
122 void	atomic_thread_fence_acq(void);
123 void	atomic_thread_fence_acq_rel(void);
124 void	atomic_thread_fence_rel(void);
125 void	atomic_thread_fence_seq_cst(void);
126 
127 #define	ATOMIC_LOAD(TYPE)					\
128 u_##TYPE	atomic_load_acq_##TYPE(volatile u_##TYPE *p)
129 #define	ATOMIC_STORE(TYPE)					\
130 void		atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
131 
132 int		atomic_cmpset_64(volatile uint64_t *, uint64_t, uint64_t);
133 uint64_t	atomic_load_acq_64(volatile uint64_t *);
134 void		atomic_store_rel_64(volatile uint64_t *, uint64_t);
135 uint64_t	atomic_swap_64(volatile uint64_t *, uint64_t);
136 uint64_t	atomic_fetchadd_64(volatile uint64_t *, uint64_t);
137 void		atomic_add_64(volatile uint64_t *, uint64_t);
138 void		atomic_subtract_64(volatile uint64_t *, uint64_t);
139 
140 #else /* !KLD_MODULE && __GNUCLIKE_ASM */
141 
142 /*
143  * For userland, always use lock prefixes so that the binaries will run
144  * on both SMP and !SMP systems.
145  */
146 #if defined(SMP) || !defined(_KERNEL)
147 #define	MPLOCKED	"lock ; "
148 #else
149 #define	MPLOCKED
150 #endif
151 
152 /*
153  * The assembly is volatilized to avoid code chunk removal by the compiler.
154  * GCC aggressively reorders operations and memory clobbering is necessary
155  * in order to avoid that for memory barriers.
156  */
157 #define	ATOMIC_ASM(NAME, TYPE, OP, CONS, V)		\
158 static __inline void					\
159 atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
160 {							\
161 	__asm __volatile(MPLOCKED OP			\
162 	: "+m" (*p)					\
163 	: CONS (V)					\
164 	: "cc");					\
165 }							\
166 							\
167 static __inline void					\
168 atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
169 {							\
170 	__asm __volatile(MPLOCKED OP			\
171 	: "+m" (*p)					\
172 	: CONS (V)					\
173 	: "memory", "cc");				\
174 }							\
175 struct __hack
176 
177 /*
178  * Atomic compare and set, used by the mutex functions.
179  *
180  * cmpset:
181  *	if (*dst == expect)
182  *		*dst = src
183  *
184  * fcmpset:
185  *	if (*dst == *expect)
186  *		*dst = src
187  *	else
188  *		*expect = *dst
189  *
190  * Returns 0 on failure, non-zero on success.
191  */
192 #define	ATOMIC_CMPSET(TYPE, CONS)			\
193 static __inline int					\
194 atomic_cmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE expect, u_##TYPE src) \
195 {							\
196 	u_char res;					\
197 							\
198 	__asm __volatile(				\
199 	"	" MPLOCKED "		"		\
200 	"	cmpxchg	%3,%1 ;		"		\
201 	"	sete	%0 ;		"		\
202 	"# atomic_cmpset_" #TYPE "	"		\
203 	: "=q" (res),			/* 0 */		\
204 	  "+m" (*dst),			/* 1 */		\
205 	  "+a" (expect)			/* 2 */		\
206 	: CONS (src)			/* 3 */		\
207 	: "memory", "cc");				\
208 	return (res);					\
209 }							\
210 							\
211 static __inline int					\
212 atomic_fcmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE *expect, u_##TYPE src) \
213 {							\
214 	u_char res;					\
215 							\
216 	__asm __volatile(				\
217 	"	" MPLOCKED "		"		\
218 	"	cmpxchg	%3,%1 ;		"		\
219 	"	sete	%0 ;		"		\
220 	"# atomic_fcmpset_" #TYPE "	"		\
221 	: "=q" (res),			/* 0 */		\
222 	  "+m" (*dst),			/* 1 */		\
223 	  "+a" (*expect)		/* 2 */		\
224 	: CONS (src)			/* 3 */		\
225 	: "memory", "cc");				\
226 	return (res);					\
227 }
228 
229 ATOMIC_CMPSET(char, "q");
230 ATOMIC_CMPSET(short, "r");
231 ATOMIC_CMPSET(int, "r");
232 
233 /*
234  * Atomically add the value of v to the integer pointed to by p and return
235  * the previous value of *p.
236  */
237 static __inline u_int
238 atomic_fetchadd_int(volatile u_int *p, u_int v)
239 {
240 
241 	__asm __volatile(
242 	"	" MPLOCKED "		"
243 	"	xaddl	%0,%1 ;		"
244 	"# atomic_fetchadd_int"
245 	: "+r" (v),			/* 0 */
246 	  "+m" (*p)			/* 1 */
247 	: : "cc");
248 	return (v);
249 }
250 
251 static __inline int
252 atomic_testandset_int(volatile u_int *p, u_int v)
253 {
254 	u_char res;
255 
256 	__asm __volatile(
257 	"	" MPLOCKED "		"
258 	"	btsl	%2,%1 ;		"
259 	"	setc	%0 ;		"
260 	"# atomic_testandset_int"
261 	: "=q" (res),			/* 0 */
262 	  "+m" (*p)			/* 1 */
263 	: "Ir" (v & 0x1f)		/* 2 */
264 	: "cc");
265 	return (res);
266 }
267 
268 static __inline int
269 atomic_testandclear_int(volatile u_int *p, u_int v)
270 {
271 	u_char res;
272 
273 	__asm __volatile(
274 	"	" MPLOCKED "		"
275 	"	btrl	%2,%1 ;		"
276 	"	setc	%0 ;		"
277 	"# atomic_testandclear_int"
278 	: "=q" (res),			/* 0 */
279 	  "+m" (*p)			/* 1 */
280 	: "Ir" (v & 0x1f)		/* 2 */
281 	: "cc");
282 	return (res);
283 }
284 
285 /*
286  * We assume that a = b will do atomic loads and stores.  Due to the
287  * IA32 memory model, a simple store guarantees release semantics.
288  *
289  * However, a load may pass a store if they are performed on distinct
290  * addresses, so we need Store/Load barrier for sequentially
291  * consistent fences in SMP kernels.  We use "lock addl $0,mem" for a
292  * Store/Load barrier, as recommended by the AMD Software Optimization
293  * Guide, and not mfence.  In the kernel, we use a private per-cpu
294  * cache line for "mem", to avoid introducing false data
295  * dependencies.  In user space, we use the word at the top of the
296  * stack.
297  *
298  * For UP kernels, however, the memory of the single processor is
299  * always consistent, so we only need to stop the compiler from
300  * reordering accesses in a way that violates the semantics of acquire
301  * and release.
302  */
303 
304 #if defined(_KERNEL)
305 #if defined(SMP)
306 #define	__storeload_barrier()	__mbk()
307 #else /* _KERNEL && UP */
308 #define	__storeload_barrier()	__compiler_membar()
309 #endif /* SMP */
310 #else /* !_KERNEL */
311 #define	__storeload_barrier()	__mbu()
312 #endif /* _KERNEL*/
313 
314 #define	ATOMIC_LOAD(TYPE)					\
315 static __inline u_##TYPE					\
316 atomic_load_acq_##TYPE(volatile u_##TYPE *p)			\
317 {								\
318 	u_##TYPE res;						\
319 								\
320 	res = *p;						\
321 	__compiler_membar();					\
322 	return (res);						\
323 }								\
324 struct __hack
325 
326 #define	ATOMIC_STORE(TYPE)					\
327 static __inline void						\
328 atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)	\
329 {								\
330 								\
331 	__compiler_membar();					\
332 	*p = v;							\
333 }								\
334 struct __hack
335 
336 static __inline void
337 atomic_thread_fence_acq(void)
338 {
339 
340 	__compiler_membar();
341 }
342 
343 static __inline void
344 atomic_thread_fence_rel(void)
345 {
346 
347 	__compiler_membar();
348 }
349 
350 static __inline void
351 atomic_thread_fence_acq_rel(void)
352 {
353 
354 	__compiler_membar();
355 }
356 
357 static __inline void
358 atomic_thread_fence_seq_cst(void)
359 {
360 
361 	__storeload_barrier();
362 }
363 
364 #ifdef _KERNEL
365 
366 #ifdef WANT_FUNCTIONS
367 int		atomic_cmpset_64_i386(volatile uint64_t *, uint64_t, uint64_t);
368 int		atomic_cmpset_64_i586(volatile uint64_t *, uint64_t, uint64_t);
369 uint64_t	atomic_load_acq_64_i386(volatile uint64_t *);
370 uint64_t	atomic_load_acq_64_i586(volatile uint64_t *);
371 void		atomic_store_rel_64_i386(volatile uint64_t *, uint64_t);
372 void		atomic_store_rel_64_i586(volatile uint64_t *, uint64_t);
373 uint64_t	atomic_swap_64_i386(volatile uint64_t *, uint64_t);
374 uint64_t	atomic_swap_64_i586(volatile uint64_t *, uint64_t);
375 #endif
376 
377 /* I486 does not support SMP or CMPXCHG8B. */
378 static __inline int
379 atomic_cmpset_64_i386(volatile uint64_t *dst, uint64_t expect, uint64_t src)
380 {
381 	volatile uint32_t *p;
382 	u_char res;
383 
384 	p = (volatile uint32_t *)dst;
385 	__asm __volatile(
386 	"	pushfl ;		"
387 	"	cli ;			"
388 	"	xorl	%1,%%eax ;	"
389 	"	xorl	%2,%%edx ;	"
390 	"	orl	%%edx,%%eax ;	"
391 	"	jne	1f ;		"
392 	"	movl	%4,%1 ;		"
393 	"	movl	%5,%2 ;		"
394 	"1:				"
395 	"	sete	%3 ;		"
396 	"	popfl"
397 	: "+A" (expect),		/* 0 */
398 	  "+m" (*p),			/* 1 */
399 	  "+m" (*(p + 1)),		/* 2 */
400 	  "=q" (res)			/* 3 */
401 	: "r" ((uint32_t)src),		/* 4 */
402 	  "r" ((uint32_t)(src >> 32))	/* 5 */
403 	: "memory", "cc");
404 	return (res);
405 }
406 
407 static __inline uint64_t
408 atomic_load_acq_64_i386(volatile uint64_t *p)
409 {
410 	volatile uint32_t *q;
411 	uint64_t res;
412 
413 	q = (volatile uint32_t *)p;
414 	__asm __volatile(
415 	"	pushfl ;		"
416 	"	cli ;			"
417 	"	movl	%1,%%eax ;	"
418 	"	movl	%2,%%edx ;	"
419 	"	popfl"
420 	: "=&A" (res)			/* 0 */
421 	: "m" (*q),			/* 1 */
422 	  "m" (*(q + 1))		/* 2 */
423 	: "memory");
424 	return (res);
425 }
426 
427 static __inline void
428 atomic_store_rel_64_i386(volatile uint64_t *p, uint64_t v)
429 {
430 	volatile uint32_t *q;
431 
432 	q = (volatile uint32_t *)p;
433 	__asm __volatile(
434 	"	pushfl ;		"
435 	"	cli ;			"
436 	"	movl	%%eax,%0 ;	"
437 	"	movl	%%edx,%1 ;	"
438 	"	popfl"
439 	: "=m" (*q),			/* 0 */
440 	  "=m" (*(q + 1))		/* 1 */
441 	: "A" (v)			/* 2 */
442 	: "memory");
443 }
444 
445 static __inline uint64_t
446 atomic_swap_64_i386(volatile uint64_t *p, uint64_t v)
447 {
448 	volatile uint32_t *q;
449 	uint64_t res;
450 
451 	q = (volatile uint32_t *)p;
452 	__asm __volatile(
453 	"	pushfl ;		"
454 	"	cli ;			"
455 	"	movl	%1,%%eax ;	"
456 	"	movl	%2,%%edx ;	"
457 	"	movl	%4,%2 ;		"
458 	"	movl	%3,%1 ;		"
459 	"	popfl"
460 	: "=&A" (res),			/* 0 */
461 	  "+m" (*q),			/* 1 */
462 	  "+m" (*(q + 1))		/* 2 */
463 	: "r" ((uint32_t)v),		/* 3 */
464 	  "r" ((uint32_t)(v >> 32)));	/* 4 */
465 	return (res);
466 }
467 
468 static __inline int
469 atomic_cmpset_64_i586(volatile uint64_t *dst, uint64_t expect, uint64_t src)
470 {
471 	u_char res;
472 
473 	__asm __volatile(
474 	"	" MPLOCKED "		"
475 	"	cmpxchg8b %1 ;		"
476 	"	sete	%0"
477 	: "=q" (res),			/* 0 */
478 	  "+m" (*dst),			/* 1 */
479 	  "+A" (expect)			/* 2 */
480 	: "b" ((uint32_t)src),		/* 3 */
481 	  "c" ((uint32_t)(src >> 32))	/* 4 */
482 	: "memory", "cc");
483 	return (res);
484 }
485 
486 static __inline uint64_t
487 atomic_load_acq_64_i586(volatile uint64_t *p)
488 {
489 	uint64_t res;
490 
491 	__asm __volatile(
492 	"	movl	%%ebx,%%eax ;	"
493 	"	movl	%%ecx,%%edx ;	"
494 	"	" MPLOCKED "		"
495 	"	cmpxchg8b %1"
496 	: "=&A" (res),			/* 0 */
497 	  "+m" (*p)			/* 1 */
498 	: : "memory", "cc");
499 	return (res);
500 }
501 
502 static __inline void
503 atomic_store_rel_64_i586(volatile uint64_t *p, uint64_t v)
504 {
505 
506 	__asm __volatile(
507 	"	movl	%%eax,%%ebx ;	"
508 	"	movl	%%edx,%%ecx ;	"
509 	"1:				"
510 	"	" MPLOCKED "		"
511 	"	cmpxchg8b %0 ;		"
512 	"	jne	1b"
513 	: "+m" (*p),			/* 0 */
514 	  "+A" (v)			/* 1 */
515 	: : "ebx", "ecx", "memory", "cc");
516 }
517 
518 static __inline uint64_t
519 atomic_swap_64_i586(volatile uint64_t *p, uint64_t v)
520 {
521 
522 	__asm __volatile(
523 	"	movl	%%eax,%%ebx ;	"
524 	"	movl	%%edx,%%ecx ;	"
525 	"1:				"
526 	"	" MPLOCKED "		"
527 	"	cmpxchg8b %0 ;		"
528 	"	jne	1b"
529 	: "+m" (*p),			/* 0 */
530 	  "+A" (v)			/* 1 */
531 	: : "ebx", "ecx", "memory", "cc");
532 	return (v);
533 }
534 
535 static __inline int
536 atomic_cmpset_64(volatile uint64_t *dst, uint64_t expect, uint64_t src)
537 {
538 
539 	if ((cpu_feature & CPUID_CX8) == 0)
540 		return (atomic_cmpset_64_i386(dst, expect, src));
541 	else
542 		return (atomic_cmpset_64_i586(dst, expect, src));
543 }
544 
545 static __inline uint64_t
546 atomic_load_acq_64(volatile uint64_t *p)
547 {
548 
549 	if ((cpu_feature & CPUID_CX8) == 0)
550 		return (atomic_load_acq_64_i386(p));
551 	else
552 		return (atomic_load_acq_64_i586(p));
553 }
554 
555 static __inline void
556 atomic_store_rel_64(volatile uint64_t *p, uint64_t v)
557 {
558 
559 	if ((cpu_feature & CPUID_CX8) == 0)
560 		atomic_store_rel_64_i386(p, v);
561 	else
562 		atomic_store_rel_64_i586(p, v);
563 }
564 
565 static __inline uint64_t
566 atomic_swap_64(volatile uint64_t *p, uint64_t v)
567 {
568 
569 	if ((cpu_feature & CPUID_CX8) == 0)
570 		return (atomic_swap_64_i386(p, v));
571 	else
572 		return (atomic_swap_64_i586(p, v));
573 }
574 
575 static __inline uint64_t
576 atomic_fetchadd_64(volatile uint64_t *p, uint64_t v)
577 {
578 
579 	for (;;) {
580 		uint64_t t = *p;
581 		if (atomic_cmpset_64(p, t, t + v))
582 			return (t);
583 	}
584 }
585 
586 static __inline void
587 atomic_add_64(volatile uint64_t *p, uint64_t v)
588 {
589 	uint64_t t;
590 
591 	for (;;) {
592 		t = *p;
593 		if (atomic_cmpset_64(p, t, t + v))
594 			break;
595 	}
596 }
597 
598 static __inline void
599 atomic_subtract_64(volatile uint64_t *p, uint64_t v)
600 {
601 	uint64_t t;
602 
603 	for (;;) {
604 		t = *p;
605 		if (atomic_cmpset_64(p, t, t - v))
606 			break;
607 	}
608 }
609 
610 #endif /* _KERNEL */
611 
612 #endif /* KLD_MODULE || !__GNUCLIKE_ASM */
613 
614 ATOMIC_ASM(set,	     char,  "orb %b1,%0",  "iq",  v);
615 ATOMIC_ASM(clear,    char,  "andb %b1,%0", "iq", ~v);
616 ATOMIC_ASM(add,	     char,  "addb %b1,%0", "iq",  v);
617 ATOMIC_ASM(subtract, char,  "subb %b1,%0", "iq",  v);
618 
619 ATOMIC_ASM(set,	     short, "orw %w1,%0",  "ir",  v);
620 ATOMIC_ASM(clear,    short, "andw %w1,%0", "ir", ~v);
621 ATOMIC_ASM(add,	     short, "addw %w1,%0", "ir",  v);
622 ATOMIC_ASM(subtract, short, "subw %w1,%0", "ir",  v);
623 
624 ATOMIC_ASM(set,	     int,   "orl %1,%0",   "ir",  v);
625 ATOMIC_ASM(clear,    int,   "andl %1,%0",  "ir", ~v);
626 ATOMIC_ASM(add,	     int,   "addl %1,%0",  "ir",  v);
627 ATOMIC_ASM(subtract, int,   "subl %1,%0",  "ir",  v);
628 
629 ATOMIC_ASM(set,	     long,  "orl %1,%0",   "ir",  v);
630 ATOMIC_ASM(clear,    long,  "andl %1,%0",  "ir", ~v);
631 ATOMIC_ASM(add,	     long,  "addl %1,%0",  "ir",  v);
632 ATOMIC_ASM(subtract, long,  "subl %1,%0",  "ir",  v);
633 
634 #define	ATOMIC_LOADSTORE(TYPE)				\
635 	ATOMIC_LOAD(TYPE);				\
636 	ATOMIC_STORE(TYPE)
637 
638 ATOMIC_LOADSTORE(char);
639 ATOMIC_LOADSTORE(short);
640 ATOMIC_LOADSTORE(int);
641 ATOMIC_LOADSTORE(long);
642 
643 #undef ATOMIC_ASM
644 #undef ATOMIC_LOAD
645 #undef ATOMIC_STORE
646 #undef ATOMIC_LOADSTORE
647 
648 #ifndef WANT_FUNCTIONS
649 
650 static __inline int
651 atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src)
652 {
653 
654 	return (atomic_cmpset_int((volatile u_int *)dst, (u_int)expect,
655 	    (u_int)src));
656 }
657 
658 static __inline u_long
659 atomic_fetchadd_long(volatile u_long *p, u_long v)
660 {
661 
662 	return (atomic_fetchadd_int((volatile u_int *)p, (u_int)v));
663 }
664 
665 static __inline int
666 atomic_testandset_long(volatile u_long *p, u_int v)
667 {
668 
669 	return (atomic_testandset_int((volatile u_int *)p, v));
670 }
671 
672 static __inline int
673 atomic_testandclear_long(volatile u_long *p, u_int v)
674 {
675 
676 	return (atomic_testandclear_int((volatile u_int *)p, v));
677 }
678 
679 /* Read the current value and store a new value in the destination. */
680 #ifdef __GNUCLIKE_ASM
681 
682 static __inline u_int
683 atomic_swap_int(volatile u_int *p, u_int v)
684 {
685 
686 	__asm __volatile(
687 	"	xchgl	%1,%0 ;		"
688 	"# atomic_swap_int"
689 	: "+r" (v),			/* 0 */
690 	  "+m" (*p));			/* 1 */
691 	return (v);
692 }
693 
694 static __inline u_long
695 atomic_swap_long(volatile u_long *p, u_long v)
696 {
697 
698 	return (atomic_swap_int((volatile u_int *)p, (u_int)v));
699 }
700 
701 #else /* !__GNUCLIKE_ASM */
702 
703 u_int	atomic_swap_int(volatile u_int *p, u_int v);
704 u_long	atomic_swap_long(volatile u_long *p, u_long v);
705 
706 #endif /* __GNUCLIKE_ASM */
707 
708 #define	atomic_set_acq_char		atomic_set_barr_char
709 #define	atomic_set_rel_char		atomic_set_barr_char
710 #define	atomic_clear_acq_char		atomic_clear_barr_char
711 #define	atomic_clear_rel_char		atomic_clear_barr_char
712 #define	atomic_add_acq_char		atomic_add_barr_char
713 #define	atomic_add_rel_char		atomic_add_barr_char
714 #define	atomic_subtract_acq_char	atomic_subtract_barr_char
715 #define	atomic_subtract_rel_char	atomic_subtract_barr_char
716 #define	atomic_cmpset_acq_char		atomic_cmpset_char
717 #define	atomic_cmpset_rel_char		atomic_cmpset_char
718 #define	atomic_fcmpset_acq_char		atomic_fcmpset_char
719 #define	atomic_fcmpset_rel_char		atomic_fcmpset_char
720 
721 #define	atomic_set_acq_short		atomic_set_barr_short
722 #define	atomic_set_rel_short		atomic_set_barr_short
723 #define	atomic_clear_acq_short		atomic_clear_barr_short
724 #define	atomic_clear_rel_short		atomic_clear_barr_short
725 #define	atomic_add_acq_short		atomic_add_barr_short
726 #define	atomic_add_rel_short		atomic_add_barr_short
727 #define	atomic_subtract_acq_short	atomic_subtract_barr_short
728 #define	atomic_subtract_rel_short	atomic_subtract_barr_short
729 #define	atomic_cmpset_acq_short		atomic_cmpset_short
730 #define	atomic_cmpset_rel_short		atomic_cmpset_short
731 #define	atomic_fcmpset_acq_short	atomic_fcmpset_short
732 #define	atomic_fcmpset_rel_short	atomic_fcmpset_short
733 
734 #define	atomic_set_acq_int		atomic_set_barr_int
735 #define	atomic_set_rel_int		atomic_set_barr_int
736 #define	atomic_clear_acq_int		atomic_clear_barr_int
737 #define	atomic_clear_rel_int		atomic_clear_barr_int
738 #define	atomic_add_acq_int		atomic_add_barr_int
739 #define	atomic_add_rel_int		atomic_add_barr_int
740 #define	atomic_subtract_acq_int		atomic_subtract_barr_int
741 #define	atomic_subtract_rel_int		atomic_subtract_barr_int
742 #define	atomic_cmpset_acq_int		atomic_cmpset_int
743 #define	atomic_cmpset_rel_int		atomic_cmpset_int
744 #define	atomic_fcmpset_acq_int		atomic_fcmpset_int
745 #define	atomic_fcmpset_rel_int		atomic_fcmpset_int
746 
747 #define	atomic_set_acq_long		atomic_set_barr_long
748 #define	atomic_set_rel_long		atomic_set_barr_long
749 #define	atomic_clear_acq_long		atomic_clear_barr_long
750 #define	atomic_clear_rel_long		atomic_clear_barr_long
751 #define	atomic_add_acq_long		atomic_add_barr_long
752 #define	atomic_add_rel_long		atomic_add_barr_long
753 #define	atomic_subtract_acq_long	atomic_subtract_barr_long
754 #define	atomic_subtract_rel_long	atomic_subtract_barr_long
755 #define	atomic_cmpset_acq_long		atomic_cmpset_long
756 #define	atomic_cmpset_rel_long		atomic_cmpset_long
757 #define	atomic_fcmpset_acq_long		atomic_fcmpset_long
758 #define	atomic_fcmpset_rel_long		atomic_fcmpset_long
759 
760 #define	atomic_readandclear_int(p)	atomic_swap_int(p, 0)
761 #define	atomic_readandclear_long(p)	atomic_swap_long(p, 0)
762 
763 /* Operations on 8-bit bytes. */
764 #define	atomic_set_8		atomic_set_char
765 #define	atomic_set_acq_8	atomic_set_acq_char
766 #define	atomic_set_rel_8	atomic_set_rel_char
767 #define	atomic_clear_8		atomic_clear_char
768 #define	atomic_clear_acq_8	atomic_clear_acq_char
769 #define	atomic_clear_rel_8	atomic_clear_rel_char
770 #define	atomic_add_8		atomic_add_char
771 #define	atomic_add_acq_8	atomic_add_acq_char
772 #define	atomic_add_rel_8	atomic_add_rel_char
773 #define	atomic_subtract_8	atomic_subtract_char
774 #define	atomic_subtract_acq_8	atomic_subtract_acq_char
775 #define	atomic_subtract_rel_8	atomic_subtract_rel_char
776 #define	atomic_load_acq_8	atomic_load_acq_char
777 #define	atomic_store_rel_8	atomic_store_rel_char
778 #define	atomic_cmpset_8		atomic_cmpset_char
779 #define	atomic_cmpset_acq_8	atomic_cmpset_acq_char
780 #define	atomic_cmpset_rel_8	atomic_cmpset_rel_char
781 #define	atomic_fcmpset_8	atomic_fcmpset_char
782 #define	atomic_fcmpset_acq_8	atomic_fcmpset_acq_char
783 #define	atomic_fcmpset_rel_8	atomic_fcmpset_rel_char
784 
785 /* Operations on 16-bit words. */
786 #define	atomic_set_16		atomic_set_short
787 #define	atomic_set_acq_16	atomic_set_acq_short
788 #define	atomic_set_rel_16	atomic_set_rel_short
789 #define	atomic_clear_16		atomic_clear_short
790 #define	atomic_clear_acq_16	atomic_clear_acq_short
791 #define	atomic_clear_rel_16	atomic_clear_rel_short
792 #define	atomic_add_16		atomic_add_short
793 #define	atomic_add_acq_16	atomic_add_acq_short
794 #define	atomic_add_rel_16	atomic_add_rel_short
795 #define	atomic_subtract_16	atomic_subtract_short
796 #define	atomic_subtract_acq_16	atomic_subtract_acq_short
797 #define	atomic_subtract_rel_16	atomic_subtract_rel_short
798 #define	atomic_load_acq_16	atomic_load_acq_short
799 #define	atomic_store_rel_16	atomic_store_rel_short
800 #define	atomic_cmpset_16	atomic_cmpset_short
801 #define	atomic_cmpset_acq_16	atomic_cmpset_acq_short
802 #define	atomic_cmpset_rel_16	atomic_cmpset_rel_short
803 #define	atomic_fcmpset_16	atomic_fcmpset_short
804 #define	atomic_fcmpset_acq_16	atomic_fcmpset_acq_short
805 #define	atomic_fcmpset_rel_16	atomic_fcmpset_rel_short
806 
807 /* Operations on 32-bit double words. */
808 #define	atomic_set_32		atomic_set_int
809 #define	atomic_set_acq_32	atomic_set_acq_int
810 #define	atomic_set_rel_32	atomic_set_rel_int
811 #define	atomic_clear_32		atomic_clear_int
812 #define	atomic_clear_acq_32	atomic_clear_acq_int
813 #define	atomic_clear_rel_32	atomic_clear_rel_int
814 #define	atomic_add_32		atomic_add_int
815 #define	atomic_add_acq_32	atomic_add_acq_int
816 #define	atomic_add_rel_32	atomic_add_rel_int
817 #define	atomic_subtract_32	atomic_subtract_int
818 #define	atomic_subtract_acq_32	atomic_subtract_acq_int
819 #define	atomic_subtract_rel_32	atomic_subtract_rel_int
820 #define	atomic_load_acq_32	atomic_load_acq_int
821 #define	atomic_store_rel_32	atomic_store_rel_int
822 #define	atomic_cmpset_32	atomic_cmpset_int
823 #define	atomic_cmpset_acq_32	atomic_cmpset_acq_int
824 #define	atomic_cmpset_rel_32	atomic_cmpset_rel_int
825 #define	atomic_fcmpset_32	atomic_fcmpset_int
826 #define	atomic_fcmpset_acq_32	atomic_fcmpset_acq_int
827 #define	atomic_fcmpset_rel_32	atomic_fcmpset_rel_int
828 #define	atomic_swap_32		atomic_swap_int
829 #define	atomic_readandclear_32	atomic_readandclear_int
830 #define	atomic_fetchadd_32	atomic_fetchadd_int
831 #define	atomic_testandset_32	atomic_testandset_int
832 #define	atomic_testandclear_32	atomic_testandclear_int
833 
834 /* Operations on 64-bit quad words. */
835 #define	atomic_cmpset_acq_64 atomic_cmpset_64
836 #define	atomic_cmpset_rel_64 atomic_cmpset_64
837 #define	atomic_fetchadd_acq_64	atomic_fetchadd_64
838 #define	atomic_fetchadd_rel_64	atomic_fetchadd_64
839 #define	atomic_add_acq_64 atomic_add_64
840 #define	atomic_add_rel_64 atomic_add_64
841 #define	atomic_subtract_acq_64 atomic_subtract_64
842 #define	atomic_subtract_rel_64 atomic_subtract_64
843 
844 /* Operations on pointers. */
845 #define	atomic_set_ptr(p, v) \
846 	atomic_set_int((volatile u_int *)(p), (u_int)(v))
847 #define	atomic_set_acq_ptr(p, v) \
848 	atomic_set_acq_int((volatile u_int *)(p), (u_int)(v))
849 #define	atomic_set_rel_ptr(p, v) \
850 	atomic_set_rel_int((volatile u_int *)(p), (u_int)(v))
851 #define	atomic_clear_ptr(p, v) \
852 	atomic_clear_int((volatile u_int *)(p), (u_int)(v))
853 #define	atomic_clear_acq_ptr(p, v) \
854 	atomic_clear_acq_int((volatile u_int *)(p), (u_int)(v))
855 #define	atomic_clear_rel_ptr(p, v) \
856 	atomic_clear_rel_int((volatile u_int *)(p), (u_int)(v))
857 #define	atomic_add_ptr(p, v) \
858 	atomic_add_int((volatile u_int *)(p), (u_int)(v))
859 #define	atomic_add_acq_ptr(p, v) \
860 	atomic_add_acq_int((volatile u_int *)(p), (u_int)(v))
861 #define	atomic_add_rel_ptr(p, v) \
862 	atomic_add_rel_int((volatile u_int *)(p), (u_int)(v))
863 #define	atomic_subtract_ptr(p, v) \
864 	atomic_subtract_int((volatile u_int *)(p), (u_int)(v))
865 #define	atomic_subtract_acq_ptr(p, v) \
866 	atomic_subtract_acq_int((volatile u_int *)(p), (u_int)(v))
867 #define	atomic_subtract_rel_ptr(p, v) \
868 	atomic_subtract_rel_int((volatile u_int *)(p), (u_int)(v))
869 #define	atomic_load_acq_ptr(p) \
870 	atomic_load_acq_int((volatile u_int *)(p))
871 #define	atomic_store_rel_ptr(p, v) \
872 	atomic_store_rel_int((volatile u_int *)(p), (v))
873 #define	atomic_cmpset_ptr(dst, old, new) \
874 	atomic_cmpset_int((volatile u_int *)(dst), (u_int)(old), (u_int)(new))
875 #define	atomic_cmpset_acq_ptr(dst, old, new) \
876 	atomic_cmpset_acq_int((volatile u_int *)(dst), (u_int)(old), \
877 	    (u_int)(new))
878 #define	atomic_cmpset_rel_ptr(dst, old, new) \
879 	atomic_cmpset_rel_int((volatile u_int *)(dst), (u_int)(old), \
880 	    (u_int)(new))
881 #define	atomic_fcmpset_ptr(dst, old, new) \
882 	atomic_fcmpset_int((volatile u_int *)(dst), (u_int *)(old), (u_int)(new))
883 #define	atomic_fcmpset_acq_ptr(dst, old, new) \
884 	atomic_fcmpset_acq_int((volatile u_int *)(dst), (u_int *)(old), \
885 	    (u_int)(new))
886 #define	atomic_fcmpset_rel_ptr(dst, old, new) \
887 	atomic_fcmpset_rel_int((volatile u_int *)(dst), (u_int *)(old), \
888 	    (u_int)(new))
889 #define	atomic_swap_ptr(p, v) \
890 	atomic_swap_int((volatile u_int *)(p), (u_int)(v))
891 #define	atomic_readandclear_ptr(p) \
892 	atomic_readandclear_int((volatile u_int *)(p))
893 
894 #endif /* !WANT_FUNCTIONS */
895 
896 #if defined(_KERNEL)
897 #define	mb()	__mbk()
898 #define	wmb()	__mbk()
899 #define	rmb()	__mbk()
900 #else
901 #define	mb()	__mbu()
902 #define	wmb()	__mbu()
903 #define	rmb()	__mbu()
904 #endif
905 
906 #endif /* !_MACHINE_ATOMIC_H_ */
907