xref: /freebsd/sys/i386/include/atomic.h (revision 13ec1e3155c7e9bf037b12af186351b7fa9b9450)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 1998 Doug Rabson
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 #ifndef _MACHINE_ATOMIC_H_
31 #define	_MACHINE_ATOMIC_H_
32 
33 #ifndef _SYS_CDEFS_H_
34 #error this file needs sys/cdefs.h as a prerequisite
35 #endif
36 
37 #include <sys/atomic_common.h>
38 
39 #ifdef _KERNEL
40 #include <machine/md_var.h>
41 #include <machine/specialreg.h>
42 #endif
43 
44 #ifndef __OFFSETOF_MONITORBUF
45 /*
46  * __OFFSETOF_MONITORBUF == __pcpu_offset(pc_monitorbuf).
47  *
48  * The open-coded number is used instead of the symbolic expression to
49  * avoid a dependency on sys/pcpu.h in machine/atomic.h consumers.
50  * An assertion in i386/vm_machdep.c ensures that the value is correct.
51  */
52 #define	__OFFSETOF_MONITORBUF	0x80
53 
54 static __inline void
55 __mbk(void)
56 {
57 
58 	__asm __volatile("lock; addl $0,%%fs:%0"
59 	    : "+m" (*(u_int *)__OFFSETOF_MONITORBUF) : : "memory", "cc");
60 }
61 
62 static __inline void
63 __mbu(void)
64 {
65 
66 	__asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc");
67 }
68 #endif
69 
70 /*
71  * Various simple operations on memory, each of which is atomic in the
72  * presence of interrupts and multiple processors.
73  *
74  * atomic_set_char(P, V)	(*(u_char *)(P) |= (V))
75  * atomic_clear_char(P, V)	(*(u_char *)(P) &= ~(V))
76  * atomic_add_char(P, V)	(*(u_char *)(P) += (V))
77  * atomic_subtract_char(P, V)	(*(u_char *)(P) -= (V))
78  *
79  * atomic_set_short(P, V)	(*(u_short *)(P) |= (V))
80  * atomic_clear_short(P, V)	(*(u_short *)(P) &= ~(V))
81  * atomic_add_short(P, V)	(*(u_short *)(P) += (V))
82  * atomic_subtract_short(P, V)	(*(u_short *)(P) -= (V))
83  *
84  * atomic_set_int(P, V)		(*(u_int *)(P) |= (V))
85  * atomic_clear_int(P, V)	(*(u_int *)(P) &= ~(V))
86  * atomic_add_int(P, V)		(*(u_int *)(P) += (V))
87  * atomic_subtract_int(P, V)	(*(u_int *)(P) -= (V))
88  * atomic_swap_int(P, V)	(return (*(u_int *)(P)); *(u_int *)(P) = (V);)
89  * atomic_readandclear_int(P)	(return (*(u_int *)(P)); *(u_int *)(P) = 0;)
90  *
91  * atomic_set_long(P, V)	(*(u_long *)(P) |= (V))
92  * atomic_clear_long(P, V)	(*(u_long *)(P) &= ~(V))
93  * atomic_add_long(P, V)	(*(u_long *)(P) += (V))
94  * atomic_subtract_long(P, V)	(*(u_long *)(P) -= (V))
95  * atomic_swap_long(P, V)	(return (*(u_long *)(P)); *(u_long *)(P) = (V);)
96  * atomic_readandclear_long(P)	(return (*(u_long *)(P)); *(u_long *)(P) = 0;)
97  */
98 
99 #if !defined(__GNUCLIKE_ASM)
100 #define	ATOMIC_ASM(NAME, TYPE, OP, CONS, V)			\
101 void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v);	\
102 void atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
103 
104 int	atomic_cmpset_char(volatile u_char *dst, u_char expect, u_char src);
105 int	atomic_cmpset_short(volatile u_short *dst, u_short expect, u_short src);
106 int	atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src);
107 int	atomic_fcmpset_char(volatile u_char *dst, u_char *expect, u_char src);
108 int	atomic_fcmpset_short(volatile u_short *dst, u_short *expect,
109 	    u_short src);
110 int	atomic_fcmpset_int(volatile u_int *dst, u_int *expect, u_int src);
111 u_int	atomic_fetchadd_int(volatile u_int *p, u_int v);
112 int	atomic_testandset_int(volatile u_int *p, u_int v);
113 int	atomic_testandclear_int(volatile u_int *p, u_int v);
114 void	atomic_thread_fence_acq(void);
115 void	atomic_thread_fence_acq_rel(void);
116 void	atomic_thread_fence_rel(void);
117 void	atomic_thread_fence_seq_cst(void);
118 
119 #define	ATOMIC_LOAD(TYPE)					\
120 u_##TYPE	atomic_load_acq_##TYPE(volatile u_##TYPE *p)
121 #define	ATOMIC_STORE(TYPE)					\
122 void		atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
123 
124 int		atomic_cmpset_64(volatile uint64_t *, uint64_t, uint64_t);
125 int		atomic_fcmpset_64(volatile uint64_t *, uint64_t *, uint64_t);
126 uint64_t	atomic_load_acq_64(volatile uint64_t *);
127 void		atomic_store_rel_64(volatile uint64_t *, uint64_t);
128 uint64_t	atomic_swap_64(volatile uint64_t *, uint64_t);
129 uint64_t	atomic_fetchadd_64(volatile uint64_t *, uint64_t);
130 void		atomic_add_64(volatile uint64_t *, uint64_t);
131 void		atomic_subtract_64(volatile uint64_t *, uint64_t);
132 
133 #else /* !__GNUCLIKE_ASM */
134 
135 /*
136  * Always use lock prefixes.  The result is slighly less optimal for
137  * UP systems, but it matters less now, and sometimes UP is emulated
138  * over SMP.
139  *
140  * The assembly is volatilized to avoid code chunk removal by the compiler.
141  * GCC aggressively reorders operations and memory clobbering is necessary
142  * in order to avoid that for memory barriers.
143  */
144 #define	ATOMIC_ASM(NAME, TYPE, OP, CONS, V)		\
145 static __inline void					\
146 atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
147 {							\
148 	__asm __volatile("lock; " OP			\
149 	: "+m" (*p)					\
150 	: CONS (V)					\
151 	: "cc");					\
152 }							\
153 							\
154 static __inline void					\
155 atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
156 {							\
157 	__asm __volatile("lock; " OP			\
158 	: "+m" (*p)					\
159 	: CONS (V)					\
160 	: "memory", "cc");				\
161 }							\
162 struct __hack
163 
164 /*
165  * Atomic compare and set, used by the mutex functions.
166  *
167  * cmpset:
168  *	if (*dst == expect)
169  *		*dst = src
170  *
171  * fcmpset:
172  *	if (*dst == *expect)
173  *		*dst = src
174  *	else
175  *		*expect = *dst
176  *
177  * Returns 0 on failure, non-zero on success.
178  */
179 #define	ATOMIC_CMPSET(TYPE, CONS)			\
180 static __inline int					\
181 atomic_cmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE expect, u_##TYPE src) \
182 {							\
183 	u_char res;					\
184 							\
185 	__asm __volatile(				\
186 	"	lock; cmpxchg	%3,%1 ;	"		\
187 	"	sete	%0 ;		"		\
188 	"# atomic_cmpset_" #TYPE "	"		\
189 	: "=q" (res),			/* 0 */		\
190 	  "+m" (*dst),			/* 1 */		\
191 	  "+a" (expect)			/* 2 */		\
192 	: CONS (src)			/* 3 */		\
193 	: "memory", "cc");				\
194 	return (res);					\
195 }							\
196 							\
197 static __inline int					\
198 atomic_fcmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE *expect, u_##TYPE src) \
199 {							\
200 	u_char res;					\
201 							\
202 	__asm __volatile(				\
203 	"	lock; cmpxchg	%3,%1 ;	"		\
204 	"	sete	%0 ;		"		\
205 	"# atomic_fcmpset_" #TYPE "	"		\
206 	: "=q" (res),			/* 0 */		\
207 	  "+m" (*dst),			/* 1 */		\
208 	  "+a" (*expect)		/* 2 */		\
209 	: CONS (src)			/* 3 */		\
210 	: "memory", "cc");				\
211 	return (res);					\
212 }
213 
214 ATOMIC_CMPSET(char, "q");
215 ATOMIC_CMPSET(short, "r");
216 ATOMIC_CMPSET(int, "r");
217 
218 /*
219  * Atomically add the value of v to the integer pointed to by p and return
220  * the previous value of *p.
221  */
222 static __inline u_int
223 atomic_fetchadd_int(volatile u_int *p, u_int v)
224 {
225 
226 	__asm __volatile(
227 	"	lock; xaddl	%0,%1 ;	"
228 	"# atomic_fetchadd_int"
229 	: "+r" (v),			/* 0 */
230 	  "+m" (*p)			/* 1 */
231 	: : "cc");
232 	return (v);
233 }
234 
235 static __inline int
236 atomic_testandset_int(volatile u_int *p, u_int v)
237 {
238 	u_char res;
239 
240 	__asm __volatile(
241 	"	lock; btsl	%2,%1 ;	"
242 	"	setc	%0 ;		"
243 	"# atomic_testandset_int"
244 	: "=q" (res),			/* 0 */
245 	  "+m" (*p)			/* 1 */
246 	: "Ir" (v & 0x1f)		/* 2 */
247 	: "cc");
248 	return (res);
249 }
250 
251 static __inline int
252 atomic_testandclear_int(volatile u_int *p, u_int v)
253 {
254 	u_char res;
255 
256 	__asm __volatile(
257 	"	lock; btrl	%2,%1 ;	"
258 	"	setc	%0 ;		"
259 	"# atomic_testandclear_int"
260 	: "=q" (res),			/* 0 */
261 	  "+m" (*p)			/* 1 */
262 	: "Ir" (v & 0x1f)		/* 2 */
263 	: "cc");
264 	return (res);
265 }
266 
267 /*
268  * We assume that a = b will do atomic loads and stores.  Due to the
269  * IA32 memory model, a simple store guarantees release semantics.
270  *
271  * However, a load may pass a store if they are performed on distinct
272  * addresses, so we need Store/Load barrier for sequentially
273  * consistent fences in SMP kernels.  We use "lock addl $0,mem" for a
274  * Store/Load barrier, as recommended by the AMD Software Optimization
275  * Guide, and not mfence.  In the kernel, we use a private per-cpu
276  * cache line for "mem", to avoid introducing false data
277  * dependencies.  In user space, we use the word at the top of the
278  * stack.
279  *
280  * For UP kernels, however, the memory of the single processor is
281  * always consistent, so we only need to stop the compiler from
282  * reordering accesses in a way that violates the semantics of acquire
283  * and release.
284  */
285 
286 #if defined(_KERNEL)
287 #define	__storeload_barrier()	__mbk()
288 #else /* !_KERNEL */
289 #define	__storeload_barrier()	__mbu()
290 #endif /* _KERNEL*/
291 
292 #define	ATOMIC_LOAD(TYPE)					\
293 static __inline u_##TYPE					\
294 atomic_load_acq_##TYPE(volatile u_##TYPE *p)			\
295 {								\
296 	u_##TYPE res;						\
297 								\
298 	res = *p;						\
299 	__compiler_membar();					\
300 	return (res);						\
301 }								\
302 struct __hack
303 
304 #define	ATOMIC_STORE(TYPE)					\
305 static __inline void						\
306 atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)	\
307 {								\
308 								\
309 	__compiler_membar();					\
310 	*p = v;							\
311 }								\
312 struct __hack
313 
314 static __inline void
315 atomic_thread_fence_acq(void)
316 {
317 
318 	__compiler_membar();
319 }
320 
321 static __inline void
322 atomic_thread_fence_rel(void)
323 {
324 
325 	__compiler_membar();
326 }
327 
328 static __inline void
329 atomic_thread_fence_acq_rel(void)
330 {
331 
332 	__compiler_membar();
333 }
334 
335 static __inline void
336 atomic_thread_fence_seq_cst(void)
337 {
338 
339 	__storeload_barrier();
340 }
341 
342 #ifdef _KERNEL
343 
344 #ifdef WANT_FUNCTIONS
345 int		atomic_cmpset_64_i386(volatile uint64_t *, uint64_t, uint64_t);
346 int		atomic_cmpset_64_i586(volatile uint64_t *, uint64_t, uint64_t);
347 uint64_t	atomic_load_acq_64_i386(volatile uint64_t *);
348 uint64_t	atomic_load_acq_64_i586(volatile uint64_t *);
349 void		atomic_store_rel_64_i386(volatile uint64_t *, uint64_t);
350 void		atomic_store_rel_64_i586(volatile uint64_t *, uint64_t);
351 uint64_t	atomic_swap_64_i386(volatile uint64_t *, uint64_t);
352 uint64_t	atomic_swap_64_i586(volatile uint64_t *, uint64_t);
353 #endif
354 
355 /* I486 does not support SMP or CMPXCHG8B. */
356 static __inline int
357 atomic_cmpset_64_i386(volatile uint64_t *dst, uint64_t expect, uint64_t src)
358 {
359 	volatile uint32_t *p;
360 	u_char res;
361 
362 	p = (volatile uint32_t *)dst;
363 	__asm __volatile(
364 	"	pushfl ;		"
365 	"	cli ;			"
366 	"	xorl	%1,%%eax ;	"
367 	"	xorl	%2,%%edx ;	"
368 	"	orl	%%edx,%%eax ;	"
369 	"	jne	1f ;		"
370 	"	movl	%4,%1 ;		"
371 	"	movl	%5,%2 ;		"
372 	"1:				"
373 	"	sete	%3 ;		"
374 	"	popfl"
375 	: "+A" (expect),		/* 0 */
376 	  "+m" (*p),			/* 1 */
377 	  "+m" (*(p + 1)),		/* 2 */
378 	  "=q" (res)			/* 3 */
379 	: "r" ((uint32_t)src),		/* 4 */
380 	  "r" ((uint32_t)(src >> 32))	/* 5 */
381 	: "memory", "cc");
382 	return (res);
383 }
384 
385 static __inline int
386 atomic_fcmpset_64_i386(volatile uint64_t *dst, uint64_t *expect, uint64_t src)
387 {
388 
389 	if (atomic_cmpset_64_i386(dst, *expect, src)) {
390 		return (1);
391 	} else {
392 		*expect = *dst;
393 		return (0);
394 	}
395 }
396 
397 static __inline uint64_t
398 atomic_load_acq_64_i386(volatile uint64_t *p)
399 {
400 	volatile uint32_t *q;
401 	uint64_t res;
402 
403 	q = (volatile uint32_t *)p;
404 	__asm __volatile(
405 	"	pushfl ;		"
406 	"	cli ;			"
407 	"	movl	%1,%%eax ;	"
408 	"	movl	%2,%%edx ;	"
409 	"	popfl"
410 	: "=&A" (res)			/* 0 */
411 	: "m" (*q),			/* 1 */
412 	  "m" (*(q + 1))		/* 2 */
413 	: "memory");
414 	return (res);
415 }
416 
417 static __inline void
418 atomic_store_rel_64_i386(volatile uint64_t *p, uint64_t v)
419 {
420 	volatile uint32_t *q;
421 
422 	q = (volatile uint32_t *)p;
423 	__asm __volatile(
424 	"	pushfl ;		"
425 	"	cli ;			"
426 	"	movl	%%eax,%0 ;	"
427 	"	movl	%%edx,%1 ;	"
428 	"	popfl"
429 	: "=m" (*q),			/* 0 */
430 	  "=m" (*(q + 1))		/* 1 */
431 	: "A" (v)			/* 2 */
432 	: "memory");
433 }
434 
435 static __inline uint64_t
436 atomic_swap_64_i386(volatile uint64_t *p, uint64_t v)
437 {
438 	volatile uint32_t *q;
439 	uint64_t res;
440 
441 	q = (volatile uint32_t *)p;
442 	__asm __volatile(
443 	"	pushfl ;		"
444 	"	cli ;			"
445 	"	movl	%1,%%eax ;	"
446 	"	movl	%2,%%edx ;	"
447 	"	movl	%4,%2 ;		"
448 	"	movl	%3,%1 ;		"
449 	"	popfl"
450 	: "=&A" (res),			/* 0 */
451 	  "+m" (*q),			/* 1 */
452 	  "+m" (*(q + 1))		/* 2 */
453 	: "r" ((uint32_t)v),		/* 3 */
454 	  "r" ((uint32_t)(v >> 32)));	/* 4 */
455 	return (res);
456 }
457 
458 static __inline int
459 atomic_cmpset_64_i586(volatile uint64_t *dst, uint64_t expect, uint64_t src)
460 {
461 	u_char res;
462 
463 	__asm __volatile(
464 	"	lock; cmpxchg8b %1 ;	"
465 	"	sete	%0"
466 	: "=q" (res),			/* 0 */
467 	  "+m" (*dst),			/* 1 */
468 	  "+A" (expect)			/* 2 */
469 	: "b" ((uint32_t)src),		/* 3 */
470 	  "c" ((uint32_t)(src >> 32))	/* 4 */
471 	: "memory", "cc");
472 	return (res);
473 }
474 
475 static __inline int
476 atomic_fcmpset_64_i586(volatile uint64_t *dst, uint64_t *expect, uint64_t src)
477 {
478 	u_char res;
479 
480 	__asm __volatile(
481 	"	lock; cmpxchg8b %1 ;	"
482 	"	sete	%0"
483 	: "=q" (res),			/* 0 */
484 	  "+m" (*dst),			/* 1 */
485 	  "+A" (*expect)		/* 2 */
486 	: "b" ((uint32_t)src),		/* 3 */
487 	  "c" ((uint32_t)(src >> 32))	/* 4 */
488 	: "memory", "cc");
489 	return (res);
490 }
491 
492 static __inline uint64_t
493 atomic_load_acq_64_i586(volatile uint64_t *p)
494 {
495 	uint64_t res;
496 
497 	__asm __volatile(
498 	"	movl	%%ebx,%%eax ;	"
499 	"	movl	%%ecx,%%edx ;	"
500 	"	lock; cmpxchg8b %1"
501 	: "=&A" (res),			/* 0 */
502 	  "+m" (*p)			/* 1 */
503 	: : "memory", "cc");
504 	return (res);
505 }
506 
507 static __inline void
508 atomic_store_rel_64_i586(volatile uint64_t *p, uint64_t v)
509 {
510 
511 	__asm __volatile(
512 	"	movl	%%eax,%%ebx ;	"
513 	"	movl	%%edx,%%ecx ;	"
514 	"1:				"
515 	"	lock; cmpxchg8b %0 ;	"
516 	"	jne	1b"
517 	: "+m" (*p),			/* 0 */
518 	  "+A" (v)			/* 1 */
519 	: : "ebx", "ecx", "memory", "cc");
520 }
521 
522 static __inline uint64_t
523 atomic_swap_64_i586(volatile uint64_t *p, uint64_t v)
524 {
525 
526 	__asm __volatile(
527 	"	movl	%%eax,%%ebx ;	"
528 	"	movl	%%edx,%%ecx ;	"
529 	"1:				"
530 	"	lock; cmpxchg8b %0 ;	"
531 	"	jne	1b"
532 	: "+m" (*p),			/* 0 */
533 	  "+A" (v)			/* 1 */
534 	: : "ebx", "ecx", "memory", "cc");
535 	return (v);
536 }
537 
538 static __inline int
539 atomic_cmpset_64(volatile uint64_t *dst, uint64_t expect, uint64_t src)
540 {
541 
542 	if ((cpu_feature & CPUID_CX8) == 0)
543 		return (atomic_cmpset_64_i386(dst, expect, src));
544 	else
545 		return (atomic_cmpset_64_i586(dst, expect, src));
546 }
547 
548 static __inline int
549 atomic_fcmpset_64(volatile uint64_t *dst, uint64_t *expect, uint64_t src)
550 {
551 
552   	if ((cpu_feature & CPUID_CX8) == 0)
553 		return (atomic_fcmpset_64_i386(dst, expect, src));
554 	else
555 		return (atomic_fcmpset_64_i586(dst, expect, src));
556 }
557 
558 static __inline uint64_t
559 atomic_load_acq_64(volatile uint64_t *p)
560 {
561 
562 	if ((cpu_feature & CPUID_CX8) == 0)
563 		return (atomic_load_acq_64_i386(p));
564 	else
565 		return (atomic_load_acq_64_i586(p));
566 }
567 
568 static __inline void
569 atomic_store_rel_64(volatile uint64_t *p, uint64_t v)
570 {
571 
572 	if ((cpu_feature & CPUID_CX8) == 0)
573 		atomic_store_rel_64_i386(p, v);
574 	else
575 		atomic_store_rel_64_i586(p, v);
576 }
577 
578 static __inline uint64_t
579 atomic_swap_64(volatile uint64_t *p, uint64_t v)
580 {
581 
582 	if ((cpu_feature & CPUID_CX8) == 0)
583 		return (atomic_swap_64_i386(p, v));
584 	else
585 		return (atomic_swap_64_i586(p, v));
586 }
587 
588 static __inline uint64_t
589 atomic_fetchadd_64(volatile uint64_t *p, uint64_t v)
590 {
591 
592 	for (;;) {
593 		uint64_t t = *p;
594 		if (atomic_cmpset_64(p, t, t + v))
595 			return (t);
596 	}
597 }
598 
599 static __inline void
600 atomic_add_64(volatile uint64_t *p, uint64_t v)
601 {
602 	uint64_t t;
603 
604 	for (;;) {
605 		t = *p;
606 		if (atomic_cmpset_64(p, t, t + v))
607 			break;
608 	}
609 }
610 
611 static __inline void
612 atomic_subtract_64(volatile uint64_t *p, uint64_t v)
613 {
614 	uint64_t t;
615 
616 	for (;;) {
617 		t = *p;
618 		if (atomic_cmpset_64(p, t, t - v))
619 			break;
620 	}
621 }
622 
623 #endif /* _KERNEL */
624 
625 #endif /* !__GNUCLIKE_ASM */
626 
627 ATOMIC_ASM(set,	     char,  "orb %b1,%0",  "iq",  v);
628 ATOMIC_ASM(clear,    char,  "andb %b1,%0", "iq", ~v);
629 ATOMIC_ASM(add,	     char,  "addb %b1,%0", "iq",  v);
630 ATOMIC_ASM(subtract, char,  "subb %b1,%0", "iq",  v);
631 
632 ATOMIC_ASM(set,	     short, "orw %w1,%0",  "ir",  v);
633 ATOMIC_ASM(clear,    short, "andw %w1,%0", "ir", ~v);
634 ATOMIC_ASM(add,	     short, "addw %w1,%0", "ir",  v);
635 ATOMIC_ASM(subtract, short, "subw %w1,%0", "ir",  v);
636 
637 ATOMIC_ASM(set,	     int,   "orl %1,%0",   "ir",  v);
638 ATOMIC_ASM(clear,    int,   "andl %1,%0",  "ir", ~v);
639 ATOMIC_ASM(add,	     int,   "addl %1,%0",  "ir",  v);
640 ATOMIC_ASM(subtract, int,   "subl %1,%0",  "ir",  v);
641 
642 ATOMIC_ASM(set,	     long,  "orl %1,%0",   "ir",  v);
643 ATOMIC_ASM(clear,    long,  "andl %1,%0",  "ir", ~v);
644 ATOMIC_ASM(add,	     long,  "addl %1,%0",  "ir",  v);
645 ATOMIC_ASM(subtract, long,  "subl %1,%0",  "ir",  v);
646 
647 #define	ATOMIC_LOADSTORE(TYPE)				\
648 	ATOMIC_LOAD(TYPE);				\
649 	ATOMIC_STORE(TYPE)
650 
651 ATOMIC_LOADSTORE(char);
652 ATOMIC_LOADSTORE(short);
653 ATOMIC_LOADSTORE(int);
654 ATOMIC_LOADSTORE(long);
655 
656 #undef ATOMIC_ASM
657 #undef ATOMIC_LOAD
658 #undef ATOMIC_STORE
659 #undef ATOMIC_LOADSTORE
660 
661 #ifndef WANT_FUNCTIONS
662 
663 static __inline int
664 atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src)
665 {
666 
667 	return (atomic_cmpset_int((volatile u_int *)dst, (u_int)expect,
668 	    (u_int)src));
669 }
670 
671 static __inline int
672 atomic_fcmpset_long(volatile u_long *dst, u_long *expect, u_long src)
673 {
674 
675 	return (atomic_fcmpset_int((volatile u_int *)dst, (u_int *)expect,
676 	    (u_int)src));
677 }
678 
679 static __inline u_long
680 atomic_fetchadd_long(volatile u_long *p, u_long v)
681 {
682 
683 	return (atomic_fetchadd_int((volatile u_int *)p, (u_int)v));
684 }
685 
686 static __inline int
687 atomic_testandset_long(volatile u_long *p, u_int v)
688 {
689 
690 	return (atomic_testandset_int((volatile u_int *)p, v));
691 }
692 
693 static __inline int
694 atomic_testandclear_long(volatile u_long *p, u_int v)
695 {
696 
697 	return (atomic_testandclear_int((volatile u_int *)p, v));
698 }
699 
700 /* Read the current value and store a new value in the destination. */
701 #ifdef __GNUCLIKE_ASM
702 
703 static __inline u_int
704 atomic_swap_int(volatile u_int *p, u_int v)
705 {
706 
707 	__asm __volatile(
708 	"	xchgl	%1,%0 ;		"
709 	"# atomic_swap_int"
710 	: "+r" (v),			/* 0 */
711 	  "+m" (*p));			/* 1 */
712 	return (v);
713 }
714 
715 static __inline u_long
716 atomic_swap_long(volatile u_long *p, u_long v)
717 {
718 
719 	return (atomic_swap_int((volatile u_int *)p, (u_int)v));
720 }
721 
722 #else /* !__GNUCLIKE_ASM */
723 
724 u_int	atomic_swap_int(volatile u_int *p, u_int v);
725 u_long	atomic_swap_long(volatile u_long *p, u_long v);
726 
727 #endif /* __GNUCLIKE_ASM */
728 
729 #define	atomic_set_acq_char		atomic_set_barr_char
730 #define	atomic_set_rel_char		atomic_set_barr_char
731 #define	atomic_clear_acq_char		atomic_clear_barr_char
732 #define	atomic_clear_rel_char		atomic_clear_barr_char
733 #define	atomic_add_acq_char		atomic_add_barr_char
734 #define	atomic_add_rel_char		atomic_add_barr_char
735 #define	atomic_subtract_acq_char	atomic_subtract_barr_char
736 #define	atomic_subtract_rel_char	atomic_subtract_barr_char
737 #define	atomic_cmpset_acq_char		atomic_cmpset_char
738 #define	atomic_cmpset_rel_char		atomic_cmpset_char
739 #define	atomic_fcmpset_acq_char		atomic_fcmpset_char
740 #define	atomic_fcmpset_rel_char		atomic_fcmpset_char
741 
742 #define	atomic_set_acq_short		atomic_set_barr_short
743 #define	atomic_set_rel_short		atomic_set_barr_short
744 #define	atomic_clear_acq_short		atomic_clear_barr_short
745 #define	atomic_clear_rel_short		atomic_clear_barr_short
746 #define	atomic_add_acq_short		atomic_add_barr_short
747 #define	atomic_add_rel_short		atomic_add_barr_short
748 #define	atomic_subtract_acq_short	atomic_subtract_barr_short
749 #define	atomic_subtract_rel_short	atomic_subtract_barr_short
750 #define	atomic_cmpset_acq_short		atomic_cmpset_short
751 #define	atomic_cmpset_rel_short		atomic_cmpset_short
752 #define	atomic_fcmpset_acq_short	atomic_fcmpset_short
753 #define	atomic_fcmpset_rel_short	atomic_fcmpset_short
754 
755 #define	atomic_set_acq_int		atomic_set_barr_int
756 #define	atomic_set_rel_int		atomic_set_barr_int
757 #define	atomic_clear_acq_int		atomic_clear_barr_int
758 #define	atomic_clear_rel_int		atomic_clear_barr_int
759 #define	atomic_add_acq_int		atomic_add_barr_int
760 #define	atomic_add_rel_int		atomic_add_barr_int
761 #define	atomic_subtract_acq_int		atomic_subtract_barr_int
762 #define	atomic_subtract_rel_int		atomic_subtract_barr_int
763 #define	atomic_cmpset_acq_int		atomic_cmpset_int
764 #define	atomic_cmpset_rel_int		atomic_cmpset_int
765 #define	atomic_fcmpset_acq_int		atomic_fcmpset_int
766 #define	atomic_fcmpset_rel_int		atomic_fcmpset_int
767 
768 #define	atomic_set_acq_long		atomic_set_barr_long
769 #define	atomic_set_rel_long		atomic_set_barr_long
770 #define	atomic_clear_acq_long		atomic_clear_barr_long
771 #define	atomic_clear_rel_long		atomic_clear_barr_long
772 #define	atomic_add_acq_long		atomic_add_barr_long
773 #define	atomic_add_rel_long		atomic_add_barr_long
774 #define	atomic_subtract_acq_long	atomic_subtract_barr_long
775 #define	atomic_subtract_rel_long	atomic_subtract_barr_long
776 #define	atomic_cmpset_acq_long		atomic_cmpset_long
777 #define	atomic_cmpset_rel_long		atomic_cmpset_long
778 #define	atomic_fcmpset_acq_long		atomic_fcmpset_long
779 #define	atomic_fcmpset_rel_long		atomic_fcmpset_long
780 
781 #define	atomic_readandclear_int(p)	atomic_swap_int(p, 0)
782 #define	atomic_readandclear_long(p)	atomic_swap_long(p, 0)
783 #define	atomic_testandset_acq_long	atomic_testandset_long
784 
785 /* Operations on 8-bit bytes. */
786 #define	atomic_set_8		atomic_set_char
787 #define	atomic_set_acq_8	atomic_set_acq_char
788 #define	atomic_set_rel_8	atomic_set_rel_char
789 #define	atomic_clear_8		atomic_clear_char
790 #define	atomic_clear_acq_8	atomic_clear_acq_char
791 #define	atomic_clear_rel_8	atomic_clear_rel_char
792 #define	atomic_add_8		atomic_add_char
793 #define	atomic_add_acq_8	atomic_add_acq_char
794 #define	atomic_add_rel_8	atomic_add_rel_char
795 #define	atomic_subtract_8	atomic_subtract_char
796 #define	atomic_subtract_acq_8	atomic_subtract_acq_char
797 #define	atomic_subtract_rel_8	atomic_subtract_rel_char
798 #define	atomic_load_acq_8	atomic_load_acq_char
799 #define	atomic_store_rel_8	atomic_store_rel_char
800 #define	atomic_cmpset_8		atomic_cmpset_char
801 #define	atomic_cmpset_acq_8	atomic_cmpset_acq_char
802 #define	atomic_cmpset_rel_8	atomic_cmpset_rel_char
803 #define	atomic_fcmpset_8	atomic_fcmpset_char
804 #define	atomic_fcmpset_acq_8	atomic_fcmpset_acq_char
805 #define	atomic_fcmpset_rel_8	atomic_fcmpset_rel_char
806 
807 /* Operations on 16-bit words. */
808 #define	atomic_set_16		atomic_set_short
809 #define	atomic_set_acq_16	atomic_set_acq_short
810 #define	atomic_set_rel_16	atomic_set_rel_short
811 #define	atomic_clear_16		atomic_clear_short
812 #define	atomic_clear_acq_16	atomic_clear_acq_short
813 #define	atomic_clear_rel_16	atomic_clear_rel_short
814 #define	atomic_add_16		atomic_add_short
815 #define	atomic_add_acq_16	atomic_add_acq_short
816 #define	atomic_add_rel_16	atomic_add_rel_short
817 #define	atomic_subtract_16	atomic_subtract_short
818 #define	atomic_subtract_acq_16	atomic_subtract_acq_short
819 #define	atomic_subtract_rel_16	atomic_subtract_rel_short
820 #define	atomic_load_acq_16	atomic_load_acq_short
821 #define	atomic_store_rel_16	atomic_store_rel_short
822 #define	atomic_cmpset_16	atomic_cmpset_short
823 #define	atomic_cmpset_acq_16	atomic_cmpset_acq_short
824 #define	atomic_cmpset_rel_16	atomic_cmpset_rel_short
825 #define	atomic_fcmpset_16	atomic_fcmpset_short
826 #define	atomic_fcmpset_acq_16	atomic_fcmpset_acq_short
827 #define	atomic_fcmpset_rel_16	atomic_fcmpset_rel_short
828 
829 /* Operations on 32-bit double words. */
830 #define	atomic_set_32		atomic_set_int
831 #define	atomic_set_acq_32	atomic_set_acq_int
832 #define	atomic_set_rel_32	atomic_set_rel_int
833 #define	atomic_clear_32		atomic_clear_int
834 #define	atomic_clear_acq_32	atomic_clear_acq_int
835 #define	atomic_clear_rel_32	atomic_clear_rel_int
836 #define	atomic_add_32		atomic_add_int
837 #define	atomic_add_acq_32	atomic_add_acq_int
838 #define	atomic_add_rel_32	atomic_add_rel_int
839 #define	atomic_subtract_32	atomic_subtract_int
840 #define	atomic_subtract_acq_32	atomic_subtract_acq_int
841 #define	atomic_subtract_rel_32	atomic_subtract_rel_int
842 #define	atomic_load_acq_32	atomic_load_acq_int
843 #define	atomic_store_rel_32	atomic_store_rel_int
844 #define	atomic_cmpset_32	atomic_cmpset_int
845 #define	atomic_cmpset_acq_32	atomic_cmpset_acq_int
846 #define	atomic_cmpset_rel_32	atomic_cmpset_rel_int
847 #define	atomic_fcmpset_32	atomic_fcmpset_int
848 #define	atomic_fcmpset_acq_32	atomic_fcmpset_acq_int
849 #define	atomic_fcmpset_rel_32	atomic_fcmpset_rel_int
850 #define	atomic_swap_32		atomic_swap_int
851 #define	atomic_readandclear_32	atomic_readandclear_int
852 #define	atomic_fetchadd_32	atomic_fetchadd_int
853 #define	atomic_testandset_32	atomic_testandset_int
854 #define	atomic_testandclear_32	atomic_testandclear_int
855 
856 #ifdef _KERNEL
857 /* Operations on 64-bit quad words. */
858 #define	atomic_cmpset_acq_64 atomic_cmpset_64
859 #define	atomic_cmpset_rel_64 atomic_cmpset_64
860 #define	atomic_fcmpset_acq_64 atomic_fcmpset_64
861 #define	atomic_fcmpset_rel_64 atomic_fcmpset_64
862 #define	atomic_fetchadd_acq_64	atomic_fetchadd_64
863 #define	atomic_fetchadd_rel_64	atomic_fetchadd_64
864 #define	atomic_add_acq_64 atomic_add_64
865 #define	atomic_add_rel_64 atomic_add_64
866 #define	atomic_subtract_acq_64 atomic_subtract_64
867 #define	atomic_subtract_rel_64 atomic_subtract_64
868 #define	atomic_load_64 atomic_load_acq_64
869 #define	atomic_store_64 atomic_store_rel_64
870 #endif
871 
872 /* Operations on pointers. */
873 #define	atomic_set_ptr(p, v) \
874 	atomic_set_int((volatile u_int *)(p), (u_int)(v))
875 #define	atomic_set_acq_ptr(p, v) \
876 	atomic_set_acq_int((volatile u_int *)(p), (u_int)(v))
877 #define	atomic_set_rel_ptr(p, v) \
878 	atomic_set_rel_int((volatile u_int *)(p), (u_int)(v))
879 #define	atomic_clear_ptr(p, v) \
880 	atomic_clear_int((volatile u_int *)(p), (u_int)(v))
881 #define	atomic_clear_acq_ptr(p, v) \
882 	atomic_clear_acq_int((volatile u_int *)(p), (u_int)(v))
883 #define	atomic_clear_rel_ptr(p, v) \
884 	atomic_clear_rel_int((volatile u_int *)(p), (u_int)(v))
885 #define	atomic_add_ptr(p, v) \
886 	atomic_add_int((volatile u_int *)(p), (u_int)(v))
887 #define	atomic_add_acq_ptr(p, v) \
888 	atomic_add_acq_int((volatile u_int *)(p), (u_int)(v))
889 #define	atomic_add_rel_ptr(p, v) \
890 	atomic_add_rel_int((volatile u_int *)(p), (u_int)(v))
891 #define	atomic_subtract_ptr(p, v) \
892 	atomic_subtract_int((volatile u_int *)(p), (u_int)(v))
893 #define	atomic_subtract_acq_ptr(p, v) \
894 	atomic_subtract_acq_int((volatile u_int *)(p), (u_int)(v))
895 #define	atomic_subtract_rel_ptr(p, v) \
896 	atomic_subtract_rel_int((volatile u_int *)(p), (u_int)(v))
897 #define	atomic_load_acq_ptr(p) \
898 	atomic_load_acq_int((volatile u_int *)(p))
899 #define	atomic_store_rel_ptr(p, v) \
900 	atomic_store_rel_int((volatile u_int *)(p), (v))
901 #define	atomic_cmpset_ptr(dst, old, new) \
902 	atomic_cmpset_int((volatile u_int *)(dst), (u_int)(old), (u_int)(new))
903 #define	atomic_cmpset_acq_ptr(dst, old, new) \
904 	atomic_cmpset_acq_int((volatile u_int *)(dst), (u_int)(old), \
905 	    (u_int)(new))
906 #define	atomic_cmpset_rel_ptr(dst, old, new) \
907 	atomic_cmpset_rel_int((volatile u_int *)(dst), (u_int)(old), \
908 	    (u_int)(new))
909 #define	atomic_fcmpset_ptr(dst, old, new) \
910 	atomic_fcmpset_int((volatile u_int *)(dst), (u_int *)(old), (u_int)(new))
911 #define	atomic_fcmpset_acq_ptr(dst, old, new) \
912 	atomic_fcmpset_acq_int((volatile u_int *)(dst), (u_int *)(old), \
913 	    (u_int)(new))
914 #define	atomic_fcmpset_rel_ptr(dst, old, new) \
915 	atomic_fcmpset_rel_int((volatile u_int *)(dst), (u_int *)(old), \
916 	    (u_int)(new))
917 #define	atomic_swap_ptr(p, v) \
918 	atomic_swap_int((volatile u_int *)(p), (u_int)(v))
919 #define	atomic_readandclear_ptr(p) \
920 	atomic_readandclear_int((volatile u_int *)(p))
921 
922 #endif /* !WANT_FUNCTIONS */
923 
924 #if defined(_KERNEL)
925 #define	mb()	__mbk()
926 #define	wmb()	__mbk()
927 #define	rmb()	__mbk()
928 #else
929 #define	mb()	__mbu()
930 #define	wmb()	__mbu()
931 #define	rmb()	__mbu()
932 #endif
933 
934 #endif /* !_MACHINE_ATOMIC_H_ */
935