xref: /freebsd/sys/arm/include/atomic.h (revision e45764721aedfa6460e1767664864bda9457c10e)
1 /* $NetBSD: atomic.h,v 1.1 2002/10/19 12:22:34 bsh Exp $ */
2 
3 /*-
4  * Copyright (C) 2003-2004 Olivier Houchard
5  * Copyright (C) 1994-1997 Mark Brinicombe
6  * Copyright (C) 1994 Brini
7  * All rights reserved.
8  *
9  * This code is derived from software written for Brini by Mark Brinicombe
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by Brini.
22  * 4. The name of Brini may not be used to endorse or promote products
23  *    derived from this software without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR
26  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
28  * IN NO EVENT SHALL BRINI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
31  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
32  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
33  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
34  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35  *
36  * $FreeBSD$
37  */
38 
39 #ifndef	_MACHINE_ATOMIC_H_
40 #define	_MACHINE_ATOMIC_H_
41 
42 #include <sys/types.h>
43 
44 #ifndef _KERNEL
45 #include <machine/sysarch.h>
46 #else
47 #include <machine/cpuconf.h>
48 #endif
49 
50 #if defined (__ARM_ARCH_7__) || defined (__ARM_ARCH_7A__)
51 #define isb()  __asm __volatile("isb" : : : "memory")
52 #define dsb()  __asm __volatile("dsb" : : : "memory")
53 #define dmb()  __asm __volatile("dmb" : : : "memory")
54 #elif defined (__ARM_ARCH_6__) || defined (__ARM_ARCH_6J__) || \
55   defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6T2__) || \
56   defined (__ARM_ARCH_6Z__) || defined (__ARM_ARCH_6ZK__)
57 #define isb()  __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory")
58 #define dsb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory")
59 #define dmb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory")
60 #else
61 #define isb()  __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory")
62 #define dsb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory")
63 #define dmb()  dsb()
64 #endif
65 
66 #define mb()   dmb()
67 #define wmb()  dmb()
68 #define rmb()  dmb()
69 
70 #ifndef I32_bit
71 #define I32_bit (1 << 7)        /* IRQ disable */
72 #endif
73 #ifndef F32_bit
74 #define F32_bit (1 << 6)        /* FIQ disable */
75 #endif
76 
77 /*
78  * It would be nice to use _HAVE_ARMv6_INSTRUCTIONS from machine/asm.h
79  * here, but that header can't be included here because this is C
80  * code.  I would like to move the _HAVE_ARMv6_INSTRUCTIONS definition
81  * out of asm.h so it can be used in both asm and C code. - kientzle@
82  */
83 #if defined (__ARM_ARCH_7__) || \
84 	defined (__ARM_ARCH_7A__)  || \
85 	defined (__ARM_ARCH_6__)   || \
86 	defined (__ARM_ARCH_6J__)  || \
87 	defined (__ARM_ARCH_6K__)  || \
88 	defined (__ARM_ARCH_6T2__) || \
89 	defined (__ARM_ARCH_6Z__)  || \
90 	defined (__ARM_ARCH_6ZK__)
91 #define	ARM_HAVE_ATOMIC64
92 
93 static __inline void
94 __do_dmb(void)
95 {
96 
97 #if defined (__ARM_ARCH_7__) || defined (__ARM_ARCH_7A__)
98 	__asm __volatile("dmb" : : : "memory");
99 #else
100 	__asm __volatile("mcr p15, 0, r0, c7, c10, 5" : : : "memory");
101 #endif
102 }
103 
104 #define ATOMIC_ACQ_REL_LONG(NAME)					\
105 static __inline void							\
106 atomic_##NAME##_acq_long(__volatile u_long *p, u_long v)		\
107 {									\
108 	atomic_##NAME##_long(p, v);					\
109 	__do_dmb();							\
110 }									\
111 									\
112 static __inline  void							\
113 atomic_##NAME##_rel_long(__volatile u_long *p, u_long v)		\
114 {									\
115 	__do_dmb();							\
116 	atomic_##NAME##_long(p, v);					\
117 }
118 
119 #define	ATOMIC_ACQ_REL(NAME, WIDTH)					\
120 static __inline  void							\
121 atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
122 {									\
123 	atomic_##NAME##_##WIDTH(p, v);					\
124 	__do_dmb();							\
125 }									\
126 									\
127 static __inline  void							\
128 atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
129 {									\
130 	__do_dmb();							\
131 	atomic_##NAME##_##WIDTH(p, v);					\
132 }
133 
134 static __inline void
135 atomic_set_32(volatile uint32_t *address, uint32_t setmask)
136 {
137 	uint32_t tmp = 0, tmp2 = 0;
138 
139 	__asm __volatile("1: ldrex %0, [%2]\n"
140 	    		    "orr %0, %0, %3\n"
141 			    "strex %1, %0, [%2]\n"
142 			    "cmp %1, #0\n"
143 	                    "it ne\n"
144 			    "bne	1b\n"
145 			   : "=&r" (tmp), "+r" (tmp2)
146 			   , "+r" (address), "+r" (setmask) : : "cc", "memory");
147 
148 }
149 
150 static __inline void
151 atomic_set_64(volatile uint64_t *p, uint64_t val)
152 {
153 	uint64_t tmp;
154 	uint32_t exflag;
155 
156 	__asm __volatile(
157 		"1:          \n"
158 		"   ldrexd   %[tmp], [%[ptr]]\n"
159 		"   orr      %Q[tmp], %Q[val]\n"
160 		"   orr      %R[tmp], %R[val]\n"
161 		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
162 		"   teq      %[exf], #0\n"
163 		"   it ne    \n"
164 		"   bne      1b\n"
165 		:   [exf]    "=&r"  (exflag),
166 		    [tmp]    "=&r"  (tmp)
167 		:   [ptr]    "r"    (p),
168 		    [val]    "r"    (val)
169 		:   "cc", "memory");
170 }
171 
172 static __inline void
173 atomic_set_long(volatile u_long *address, u_long setmask)
174 {
175 	u_long tmp = 0, tmp2 = 0;
176 
177 	__asm __volatile("1: ldrex %0, [%2]\n"
178 	    		    "orr %0, %0, %3\n"
179 			    "strex %1, %0, [%2]\n"
180 			    "cmp %1, #0\n"
181 	                    "it ne\n"
182 			    "bne	1b\n"
183 			   : "=&r" (tmp), "+r" (tmp2)
184 			   , "+r" (address), "+r" (setmask) : : "cc", "memory");
185 
186 }
187 
188 static __inline void
189 atomic_clear_32(volatile uint32_t *address, uint32_t setmask)
190 {
191 	uint32_t tmp = 0, tmp2 = 0;
192 
193 	__asm __volatile("1: ldrex %0, [%2]\n"
194 	    		    "bic %0, %0, %3\n"
195 			    "strex %1, %0, [%2]\n"
196 			    "cmp %1, #0\n"
197 	                    "it ne\n"
198 			    "bne	1b\n"
199 			   : "=&r" (tmp), "+r" (tmp2)
200 			   ,"+r" (address), "+r" (setmask) : : "cc", "memory");
201 }
202 
203 static __inline void
204 atomic_clear_64(volatile uint64_t *p, uint64_t val)
205 {
206 	uint64_t tmp;
207 	uint32_t exflag;
208 
209 	__asm __volatile(
210 		"1:          \n"
211 		"   ldrexd   %[tmp], [%[ptr]]\n"
212 		"   bic      %Q[tmp], %Q[val]\n"
213 		"   bic      %R[tmp], %R[val]\n"
214 		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
215 		"   teq      %[exf], #0\n"
216 		"   it ne    \n"
217 		"   bne      1b\n"
218 		:   [exf]    "=&r"  (exflag),
219 		    [tmp]    "=&r"  (tmp)
220 		:   [ptr]    "r"    (p),
221 		    [val]    "r"    (val)
222 		:   "cc", "memory");
223 }
224 
225 static __inline void
226 atomic_clear_long(volatile u_long *address, u_long setmask)
227 {
228 	u_long tmp = 0, tmp2 = 0;
229 
230 	__asm __volatile("1: ldrex %0, [%2]\n"
231 	    		    "bic %0, %0, %3\n"
232 			    "strex %1, %0, [%2]\n"
233 			    "cmp %1, #0\n"
234 	                    "it ne\n"
235 			    "bne	1b\n"
236 			   : "=&r" (tmp), "+r" (tmp2)
237 			   ,"+r" (address), "+r" (setmask) : : "cc", "memory");
238 }
239 
240 static __inline u_int32_t
241 atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
242 {
243 	uint32_t ret;
244 
245 	__asm __volatile("1: ldrex %0, [%1]\n"
246 	                 "cmp %0, %2\n"
247 	                 "itt ne\n"
248 			 "movne %0, #0\n"
249 			 "bne 2f\n"
250 			 "strex %0, %3, [%1]\n"
251 			 "cmp %0, #0\n"
252 	                 "ite eq\n"
253 			 "moveq %0, #1\n"
254 			 "bne	1b\n"
255 			 "2:"
256 			 : "=&r" (ret)
257 			 ,"+r" (p), "+r" (cmpval), "+r" (newval) : : "cc",
258 			 "memory");
259 	return (ret);
260 }
261 
262 static __inline int
263 atomic_cmpset_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
264 {
265 	uint64_t tmp;
266 	uint32_t ret;
267 
268 	__asm __volatile(
269 		"1:          \n"
270 		"   ldrexd   %[tmp], [%[ptr]]\n"
271 		"   teq      %Q[tmp], %Q[cmp]\n"
272 		"   itee eq  \n"
273 		"   teqeq    %R[tmp], %R[cmp]\n"
274 		"   movne    %[ret], #0\n"
275 		"   bne      2f\n"
276 		"   strexd   %[ret], %[new], [%[ptr]]\n"
277 		"   teq      %[ret], #0\n"
278 		"   it ne    \n"
279 		"   bne      1b\n"
280 		"   mov      %[ret], #1\n"
281 		"2:          \n"
282 		:   [ret]    "=&r"  (ret),
283 		    [tmp]    "=&r"  (tmp)
284 		:   [ptr]    "r"    (p),
285 		    [cmp]    "r"    (cmpval),
286 		    [new]    "r"    (newval)
287 		:   "cc", "memory");
288 	return (ret);
289 }
290 
291 static __inline u_long
292 atomic_cmpset_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval)
293 {
294 	u_long ret;
295 
296 	__asm __volatile("1: ldrex %0, [%1]\n"
297 	                 "cmp %0, %2\n"
298 	                 "itt ne\n"
299 			 "movne %0, #0\n"
300 			 "bne 2f\n"
301 			 "strex %0, %3, [%1]\n"
302 			 "cmp %0, #0\n"
303 	                 "ite eq\n"
304 			 "moveq %0, #1\n"
305 			 "bne	1b\n"
306 			 "2:"
307 			 : "=&r" (ret)
308 			 ,"+r" (p), "+r" (cmpval), "+r" (newval) : : "cc",
309 			 "memory");
310 	return (ret);
311 }
312 
313 static __inline u_int32_t
314 atomic_cmpset_acq_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
315 {
316 	u_int32_t ret = atomic_cmpset_32(p, cmpval, newval);
317 
318 	__do_dmb();
319 	return (ret);
320 }
321 
322 static __inline uint64_t
323 atomic_cmpset_acq_64(volatile uint64_t *p, volatile uint64_t cmpval, volatile uint64_t newval)
324 {
325 	uint64_t ret = atomic_cmpset_64(p, cmpval, newval);
326 
327 	__do_dmb();
328 	return (ret);
329 }
330 
331 static __inline u_long
332 atomic_cmpset_acq_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval)
333 {
334 	u_long ret = atomic_cmpset_long(p, cmpval, newval);
335 
336 	__do_dmb();
337 	return (ret);
338 }
339 
340 static __inline u_int32_t
341 atomic_cmpset_rel_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
342 {
343 
344 	__do_dmb();
345 	return (atomic_cmpset_32(p, cmpval, newval));
346 }
347 
348 static __inline uint64_t
349 atomic_cmpset_rel_64(volatile uint64_t *p, volatile uint64_t cmpval, volatile uint64_t newval)
350 {
351 
352 	__do_dmb();
353 	return (atomic_cmpset_64(p, cmpval, newval));
354 }
355 
356 static __inline u_long
357 atomic_cmpset_rel_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval)
358 {
359 
360 	__do_dmb();
361 	return (atomic_cmpset_long(p, cmpval, newval));
362 }
363 
364 
365 static __inline void
366 atomic_add_32(volatile u_int32_t *p, u_int32_t val)
367 {
368 	uint32_t tmp = 0, tmp2 = 0;
369 
370 	__asm __volatile("1: ldrex %0, [%2]\n"
371 	    		    "add %0, %0, %3\n"
372 			    "strex %1, %0, [%2]\n"
373 			    "cmp %1, #0\n"
374 	                    "it ne\n"
375 			    "bne	1b\n"
376 			    : "=&r" (tmp), "+r" (tmp2)
377 			    ,"+r" (p), "+r" (val) : : "cc", "memory");
378 }
379 
380 static __inline void
381 atomic_add_64(volatile uint64_t *p, uint64_t val)
382 {
383 	uint64_t tmp;
384 	uint32_t exflag;
385 
386 	__asm __volatile(
387 		"1:          \n"
388 		"   ldrexd   %[tmp], [%[ptr]]\n"
389 		"   adds     %Q[tmp], %Q[val]\n"
390 		"   adc      %R[tmp], %R[val]\n"
391 		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
392 		"   teq      %[exf], #0\n"
393 		"   it ne    \n"
394 		"   bne      1b\n"
395 		:   [exf]    "=&r"  (exflag),
396 		    [tmp]    "=&r"  (tmp)
397 		:   [ptr]    "r"    (p),
398 		    [val]    "r"    (val)
399 		:   "cc", "memory");
400 }
401 
402 static __inline void
403 atomic_add_long(volatile u_long *p, u_long val)
404 {
405 	u_long tmp = 0, tmp2 = 0;
406 
407 	__asm __volatile("1: ldrex %0, [%2]\n"
408 	    		    "add %0, %0, %3\n"
409 			    "strex %1, %0, [%2]\n"
410 			    "cmp %1, #0\n"
411 	                    "it ne\n"
412 			    "bne	1b\n"
413 			    : "=&r" (tmp), "+r" (tmp2)
414 			    ,"+r" (p), "+r" (val) : : "cc", "memory");
415 }
416 
417 static __inline void
418 atomic_subtract_32(volatile u_int32_t *p, u_int32_t val)
419 {
420 	uint32_t tmp = 0, tmp2 = 0;
421 
422 	__asm __volatile("1: ldrex %0, [%2]\n"
423 	    		    "sub %0, %0, %3\n"
424 			    "strex %1, %0, [%2]\n"
425 			    "cmp %1, #0\n"
426 	                    "it ne\n"
427 			    "bne	1b\n"
428 			    : "=&r" (tmp), "+r" (tmp2)
429 			    ,"+r" (p), "+r" (val) : : "cc", "memory");
430 }
431 
432 static __inline void
433 atomic_subtract_64(volatile uint64_t *p, uint64_t val)
434 {
435 	uint64_t tmp;
436 	uint32_t exflag;
437 
438 	__asm __volatile(
439 		"1:          \n"
440 		"   ldrexd   %[tmp], [%[ptr]]\n"
441 		"   subs     %Q[tmp], %Q[val]\n"
442 		"   sbc      %R[tmp], %R[val]\n"
443 		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
444 		"   teq      %[exf], #0\n"
445 		"   it ne    \n"
446 		"   bne      1b\n"
447 		:   [exf]    "=&r"  (exflag),
448 		    [tmp]    "=&r"  (tmp)
449 		:   [ptr]    "r"    (p),
450 		    [val]    "r"    (val)
451 		:   "cc", "memory");
452 }
453 
454 static __inline void
455 atomic_subtract_long(volatile u_long *p, u_long val)
456 {
457 	u_long tmp = 0, tmp2 = 0;
458 
459 	__asm __volatile("1: ldrex %0, [%2]\n"
460 	    		    "sub %0, %0, %3\n"
461 			    "strex %1, %0, [%2]\n"
462 			    "cmp %1, #0\n"
463 	                    "it ne\n"
464 			    "bne	1b\n"
465 			    : "=&r" (tmp), "+r" (tmp2)
466 			    ,"+r" (p), "+r" (val) : : "cc", "memory");
467 }
468 
469 ATOMIC_ACQ_REL(clear, 32)
470 ATOMIC_ACQ_REL(add, 32)
471 ATOMIC_ACQ_REL(subtract, 32)
472 ATOMIC_ACQ_REL(set, 32)
473 ATOMIC_ACQ_REL(clear, 64)
474 ATOMIC_ACQ_REL(add, 64)
475 ATOMIC_ACQ_REL(subtract, 64)
476 ATOMIC_ACQ_REL(set, 64)
477 ATOMIC_ACQ_REL_LONG(clear)
478 ATOMIC_ACQ_REL_LONG(add)
479 ATOMIC_ACQ_REL_LONG(subtract)
480 ATOMIC_ACQ_REL_LONG(set)
481 
482 #undef ATOMIC_ACQ_REL
483 #undef ATOMIC_ACQ_REL_LONG
484 
485 static __inline uint32_t
486 atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
487 {
488 	uint32_t tmp = 0, tmp2 = 0, ret = 0;
489 
490 	__asm __volatile("1: ldrex %0, [%3]\n"
491 	    		    "add %1, %0, %4\n"
492 			    "strex %2, %1, [%3]\n"
493 			    "cmp %2, #0\n"
494 	                    "it ne\n"
495 			    "bne	1b\n"
496 			   : "+r" (ret), "=&r" (tmp), "+r" (tmp2)
497 			   ,"+r" (p), "+r" (val) : : "cc", "memory");
498 	return (ret);
499 }
500 
501 static __inline uint32_t
502 atomic_readandclear_32(volatile u_int32_t *p)
503 {
504 	uint32_t ret, tmp = 0, tmp2 = 0;
505 
506 	__asm __volatile("1: ldrex %0, [%3]\n"
507 	    		 "mov %1, #0\n"
508 			 "strex %2, %1, [%3]\n"
509 			 "cmp %2, #0\n"
510 	                 "it ne\n"
511 			 "bne 1b\n"
512 			 : "=r" (ret), "=&r" (tmp), "+r" (tmp2)
513 			 ,"+r" (p) : : "cc", "memory");
514 	return (ret);
515 }
516 
517 static __inline uint32_t
518 atomic_load_acq_32(volatile uint32_t *p)
519 {
520 	uint32_t v;
521 
522 	v = *p;
523 	__do_dmb();
524 	return (v);
525 }
526 
527 static __inline void
528 atomic_store_rel_32(volatile uint32_t *p, uint32_t v)
529 {
530 
531 	__do_dmb();
532 	*p = v;
533 }
534 
535 static __inline uint64_t
536 atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
537 {
538 	uint64_t ret, tmp;
539 	uint32_t exflag;
540 
541 	__asm __volatile(
542 		"1:          \n"
543 		"   ldrexd   %[ret], [%[ptr]]\n"
544 		"   adds     %Q[tmp], %Q[ret], %Q[val]\n"
545 		"   adc      %R[tmp], %R[ret], %R[val]\n"
546 		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
547 		"   teq      %[exf], #0\n"
548 		"   it ne    \n"
549 		"   bne      1b\n"
550 		:   [ret]    "=&r"  (ret),
551 		    [exf]    "=&r"  (exflag),
552 		    [tmp]    "=&r"  (tmp)
553 		:   [ptr]    "r"    (p),
554 		    [val]    "r"    (val)
555 		:   "cc", "memory");
556 	return (ret);
557 }
558 
559 static __inline uint64_t
560 atomic_readandclear_64(volatile uint64_t *p)
561 {
562 	uint64_t ret, tmp;
563 	uint32_t exflag;
564 
565 	__asm __volatile(
566 		"1:          \n"
567 		"   ldrexd   %[ret], [%[ptr]]\n"
568 		"   mov      %Q[tmp], #0\n"
569 		"   mov      %R[tmp], #0\n"
570 		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
571 		"   teq      %[exf], #0\n"
572 		"   it ne    \n"
573 		"   bne      1b\n"
574 		:   [ret]    "=&r"  (ret),
575 		    [exf]    "=&r"  (exflag),
576 		    [tmp]    "=&r"  (tmp)
577 		:   [ptr]    "r"    (p)
578 		:   "cc", "memory");
579 	return (ret);
580 }
581 
582 static __inline uint64_t
583 atomic_load_64(volatile uint64_t *p)
584 {
585 	uint64_t ret;
586 
587 	/*
588 	 * The only way to atomically load 64 bits is with LDREXD which puts the
589 	 * exclusive monitor into the open state, so reset it with CLREX because
590 	 * we don't actually need to store anything.
591 	 */
592 	__asm __volatile(
593 		"1:          \n"
594 		"   ldrexd   %[ret], [%[ptr]]\n"
595 		"   clrex    \n"
596 		:   [ret]    "=&r"  (ret)
597 		:   [ptr]    "r"    (p)
598 		:   "cc", "memory");
599 	return (ret);
600 }
601 
602 static __inline uint64_t
603 atomic_load_acq_64(volatile uint64_t *p)
604 {
605 	uint64_t ret;
606 
607 	ret = atomic_load_64(p);
608 	__do_dmb();
609 	return (ret);
610 }
611 
612 static __inline void
613 atomic_store_64(volatile uint64_t *p, uint64_t val)
614 {
615 	uint64_t tmp;
616 	uint32_t exflag;
617 
618 	/*
619 	 * The only way to atomically store 64 bits is with STREXD, which will
620 	 * succeed only if paired up with a preceeding LDREXD using the same
621 	 * address, so we read and discard the existing value before storing.
622 	 */
623 	__asm __volatile(
624 		"1:          \n"
625 		"   ldrexd   %[tmp], [%[ptr]]\n"
626 		"   strexd   %[exf], %[val], [%[ptr]]\n"
627 		"   teq      %[exf], #0\n"
628 		"   it ne    \n"
629 		"   bne      1b\n"
630 		:   [tmp]    "=&r"  (tmp),
631 		    [exf]    "=&r"  (exflag)
632 		:   [ptr]    "r"    (p),
633 		    [val]    "r"    (val)
634 		:   "cc", "memory");
635 }
636 
637 static __inline void
638 atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
639 {
640 
641 	__do_dmb();
642 	atomic_store_64(p, val);
643 }
644 
645 static __inline u_long
646 atomic_fetchadd_long(volatile u_long *p, u_long val)
647 {
648 	u_long tmp = 0, tmp2 = 0, ret = 0;
649 
650 	__asm __volatile("1: ldrex %0, [%3]\n"
651 	    		    "add %1, %0, %4\n"
652 			    "strex %2, %1, [%3]\n"
653 			    "cmp %2, #0\n"
654 	                    "it ne\n"
655 			    "bne	1b\n"
656 			   : "+r" (ret), "=&r" (tmp), "+r" (tmp2)
657 			   ,"+r" (p), "+r" (val) : : "cc", "memory");
658 	return (ret);
659 }
660 
661 static __inline u_long
662 atomic_readandclear_long(volatile u_long *p)
663 {
664 	u_long ret, tmp = 0, tmp2 = 0;
665 
666 	__asm __volatile("1: ldrex %0, [%3]\n"
667 	    		 "mov %1, #0\n"
668 			 "strex %2, %1, [%3]\n"
669 			 "cmp %2, #0\n"
670 	                 "it ne\n"
671 			 "bne 1b\n"
672 			 : "=r" (ret), "=&r" (tmp), "+r" (tmp2)
673 			 ,"+r" (p) : : "cc", "memory");
674 	return (ret);
675 }
676 
677 static __inline u_long
678 atomic_load_acq_long(volatile u_long *p)
679 {
680 	u_long v;
681 
682 	v = *p;
683 	__do_dmb();
684 	return (v);
685 }
686 
687 static __inline void
688 atomic_store_rel_long(volatile u_long *p, u_long v)
689 {
690 
691 	__do_dmb();
692 	*p = v;
693 }
694 #else /* < armv6 */
695 
696 #define __with_interrupts_disabled(expr) \
697 	do {						\
698 		u_int cpsr_save, tmp;			\
699 							\
700 		__asm __volatile(			\
701 			"mrs  %0, cpsr;"		\
702 			"orr  %1, %0, %2;"		\
703 			"msr  cpsr_fsxc, %1;"		\
704 			: "=r" (cpsr_save), "=r" (tmp)	\
705 			: "I" (I32_bit | F32_bit)		\
706 		        : "cc" );		\
707 		(expr);				\
708 		 __asm __volatile(		\
709 			"msr  cpsr_fsxc, %0"	\
710 			: /* no output */	\
711 			: "r" (cpsr_save)	\
712 			: "cc" );		\
713 	} while(0)
714 
715 static __inline uint32_t
716 __swp(uint32_t val, volatile uint32_t *ptr)
717 {
718 	__asm __volatile("swp	%0, %2, [%3]"
719 	    : "=&r" (val), "=m" (*ptr)
720 	    : "r" (val), "r" (ptr), "m" (*ptr)
721 	    : "memory");
722 	return (val);
723 }
724 
725 
726 #ifdef _KERNEL
727 #define	ARM_HAVE_ATOMIC64
728 
729 static __inline void
730 atomic_set_32(volatile uint32_t *address, uint32_t setmask)
731 {
732 	__with_interrupts_disabled(*address |= setmask);
733 }
734 
735 static __inline void
736 atomic_set_64(volatile uint64_t *address, uint64_t setmask)
737 {
738 	__with_interrupts_disabled(*address |= setmask);
739 }
740 
741 static __inline void
742 atomic_clear_32(volatile uint32_t *address, uint32_t clearmask)
743 {
744 	__with_interrupts_disabled(*address &= ~clearmask);
745 }
746 
747 static __inline void
748 atomic_clear_64(volatile uint64_t *address, uint64_t clearmask)
749 {
750 	__with_interrupts_disabled(*address &= ~clearmask);
751 }
752 
753 static __inline u_int32_t
754 atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
755 {
756 	int ret;
757 
758 	__with_interrupts_disabled(
759 	 {
760 	    	if (*p == cmpval) {
761 			*p = newval;
762 			ret = 1;
763 		} else {
764 			ret = 0;
765 		}
766 	});
767 	return (ret);
768 }
769 
770 static __inline u_int64_t
771 atomic_cmpset_64(volatile u_int64_t *p, volatile u_int64_t cmpval, volatile u_int64_t newval)
772 {
773 	int ret;
774 
775 	__with_interrupts_disabled(
776 	 {
777 	    	if (*p == cmpval) {
778 			*p = newval;
779 			ret = 1;
780 		} else {
781 			ret = 0;
782 		}
783 	});
784 	return (ret);
785 }
786 
787 static __inline void
788 atomic_add_32(volatile u_int32_t *p, u_int32_t val)
789 {
790 	__with_interrupts_disabled(*p += val);
791 }
792 
793 static __inline void
794 atomic_add_64(volatile u_int64_t *p, u_int64_t val)
795 {
796 	__with_interrupts_disabled(*p += val);
797 }
798 
799 static __inline void
800 atomic_subtract_32(volatile u_int32_t *p, u_int32_t val)
801 {
802 	__with_interrupts_disabled(*p -= val);
803 }
804 
805 static __inline void
806 atomic_subtract_64(volatile u_int64_t *p, u_int64_t val)
807 {
808 	__with_interrupts_disabled(*p -= val);
809 }
810 
811 static __inline uint32_t
812 atomic_fetchadd_32(volatile uint32_t *p, uint32_t v)
813 {
814 	uint32_t value;
815 
816 	__with_interrupts_disabled(
817 	{
818 	    	value = *p;
819 		*p += v;
820 	});
821 	return (value);
822 }
823 
824 static __inline uint64_t
825 atomic_fetchadd_64(volatile uint64_t *p, uint64_t v)
826 {
827 	uint64_t value;
828 
829 	__with_interrupts_disabled(
830 	{
831 	    	value = *p;
832 		*p += v;
833 	});
834 	return (value);
835 }
836 
837 static __inline uint64_t
838 atomic_load_64(volatile uint64_t *p)
839 {
840 	uint64_t value;
841 
842 	__with_interrupts_disabled(value = *p);
843 	return (value);
844 }
845 
846 static __inline void
847 atomic_store_64(volatile uint64_t *p, uint64_t value)
848 {
849 	__with_interrupts_disabled(*p = value);
850 }
851 
852 #else /* !_KERNEL */
853 
854 static __inline u_int32_t
855 atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
856 {
857 	register int done, ras_start = ARM_RAS_START;
858 
859 	__asm __volatile("1:\n"
860 	    "adr	%1, 1b\n"
861 	    "str	%1, [%0]\n"
862 	    "adr	%1, 2f\n"
863 	    "str	%1, [%0, #4]\n"
864 	    "ldr	%1, [%2]\n"
865 	    "cmp	%1, %3\n"
866 	    "streq	%4, [%2]\n"
867 	    "2:\n"
868 	    "mov	%1, #0\n"
869 	    "str	%1, [%0]\n"
870 	    "mov	%1, #0xffffffff\n"
871 	    "str	%1, [%0, #4]\n"
872 	    "moveq	%1, #1\n"
873 	    "movne	%1, #0\n"
874 	    : "+r" (ras_start), "=r" (done)
875 	    ,"+r" (p), "+r" (cmpval), "+r" (newval) : : "cc", "memory");
876 	return (done);
877 }
878 
879 static __inline void
880 atomic_add_32(volatile u_int32_t *p, u_int32_t val)
881 {
882 	int start, ras_start = ARM_RAS_START;
883 
884 	__asm __volatile("1:\n"
885 	    "adr	%1, 1b\n"
886 	    "str	%1, [%0]\n"
887 	    "adr	%1, 2f\n"
888 	    "str	%1, [%0, #4]\n"
889 	    "ldr	%1, [%2]\n"
890 	    "add	%1, %1, %3\n"
891 	    "str	%1, [%2]\n"
892 	    "2:\n"
893 	    "mov	%1, #0\n"
894 	    "str	%1, [%0]\n"
895 	    "mov	%1, #0xffffffff\n"
896 	    "str	%1, [%0, #4]\n"
897 	    : "+r" (ras_start), "=r" (start), "+r" (p), "+r" (val)
898 	    : : "memory");
899 }
900 
901 static __inline void
902 atomic_subtract_32(volatile u_int32_t *p, u_int32_t val)
903 {
904 	int start, ras_start = ARM_RAS_START;
905 
906 	__asm __volatile("1:\n"
907 	    "adr	%1, 1b\n"
908 	    "str	%1, [%0]\n"
909 	    "adr	%1, 2f\n"
910 	    "str	%1, [%0, #4]\n"
911 	    "ldr	%1, [%2]\n"
912 	    "sub	%1, %1, %3\n"
913 	    "str	%1, [%2]\n"
914 	    "2:\n"
915 	    "mov	%1, #0\n"
916 	    "str	%1, [%0]\n"
917 	    "mov	%1, #0xffffffff\n"
918 	    "str	%1, [%0, #4]\n"
919 
920 	    : "+r" (ras_start), "=r" (start), "+r" (p), "+r" (val)
921 	    : : "memory");
922 }
923 
924 static __inline void
925 atomic_set_32(volatile uint32_t *address, uint32_t setmask)
926 {
927 	int start, ras_start = ARM_RAS_START;
928 
929 	__asm __volatile("1:\n"
930 	    "adr	%1, 1b\n"
931 	    "str	%1, [%0]\n"
932 	    "adr	%1, 2f\n"
933 	    "str	%1, [%0, #4]\n"
934 	    "ldr	%1, [%2]\n"
935 	    "orr	%1, %1, %3\n"
936 	    "str	%1, [%2]\n"
937 	    "2:\n"
938 	    "mov	%1, #0\n"
939 	    "str	%1, [%0]\n"
940 	    "mov	%1, #0xffffffff\n"
941 	    "str	%1, [%0, #4]\n"
942 
943 	    : "+r" (ras_start), "=r" (start), "+r" (address), "+r" (setmask)
944 	    : : "memory");
945 }
946 
947 static __inline void
948 atomic_clear_32(volatile uint32_t *address, uint32_t clearmask)
949 {
950 	int start, ras_start = ARM_RAS_START;
951 
952 	__asm __volatile("1:\n"
953 	    "adr	%1, 1b\n"
954 	    "str	%1, [%0]\n"
955 	    "adr	%1, 2f\n"
956 	    "str	%1, [%0, #4]\n"
957 	    "ldr	%1, [%2]\n"
958 	    "bic	%1, %1, %3\n"
959 	    "str	%1, [%2]\n"
960 	    "2:\n"
961 	    "mov	%1, #0\n"
962 	    "str	%1, [%0]\n"
963 	    "mov	%1, #0xffffffff\n"
964 	    "str	%1, [%0, #4]\n"
965 	    : "+r" (ras_start), "=r" (start), "+r" (address), "+r" (clearmask)
966 	    : : "memory");
967 
968 }
969 
970 static __inline uint32_t
971 atomic_fetchadd_32(volatile uint32_t *p, uint32_t v)
972 {
973 	uint32_t start, tmp, ras_start = ARM_RAS_START;
974 
975 	__asm __volatile("1:\n"
976 	    "adr	%1, 1b\n"
977 	    "str	%1, [%0]\n"
978 	    "adr	%1, 2f\n"
979 	    "str	%1, [%0, #4]\n"
980 	    "ldr	%1, [%3]\n"
981 	    "mov	%2, %1\n"
982 	    "add	%2, %2, %4\n"
983 	    "str	%2, [%3]\n"
984 	    "2:\n"
985 	    "mov	%2, #0\n"
986 	    "str	%2, [%0]\n"
987 	    "mov	%2, #0xffffffff\n"
988 	    "str	%2, [%0, #4]\n"
989 	    : "+r" (ras_start), "=r" (start), "=r" (tmp), "+r" (p), "+r" (v)
990 	    : : "memory");
991 	return (start);
992 }
993 
994 #endif /* _KERNEL */
995 
996 
997 static __inline uint32_t
998 atomic_readandclear_32(volatile u_int32_t *p)
999 {
1000 
1001 	return (__swp(0, p));
1002 }
1003 
1004 #define atomic_cmpset_rel_32	atomic_cmpset_32
1005 #define atomic_cmpset_acq_32	atomic_cmpset_32
1006 #define atomic_set_rel_32	atomic_set_32
1007 #define atomic_set_acq_32	atomic_set_32
1008 #define atomic_clear_rel_32	atomic_clear_32
1009 #define atomic_clear_acq_32	atomic_clear_32
1010 #define atomic_add_rel_32	atomic_add_32
1011 #define atomic_add_acq_32	atomic_add_32
1012 #define atomic_subtract_rel_32	atomic_subtract_32
1013 #define atomic_subtract_acq_32	atomic_subtract_32
1014 #define atomic_store_rel_32	atomic_store_32
1015 #define atomic_store_rel_long	atomic_store_long
1016 #define atomic_load_acq_32	atomic_load_32
1017 #define atomic_load_acq_long	atomic_load_long
1018 #define atomic_add_acq_long		atomic_add_long
1019 #define atomic_add_rel_long		atomic_add_long
1020 #define atomic_subtract_acq_long	atomic_subtract_long
1021 #define atomic_subtract_rel_long	atomic_subtract_long
1022 #define atomic_clear_acq_long		atomic_clear_long
1023 #define atomic_clear_rel_long		atomic_clear_long
1024 #define atomic_set_acq_long		atomic_set_long
1025 #define atomic_set_rel_long		atomic_set_long
1026 #define atomic_cmpset_acq_long		atomic_cmpset_long
1027 #define atomic_cmpset_rel_long		atomic_cmpset_long
1028 #define atomic_load_acq_long		atomic_load_long
1029 #undef __with_interrupts_disabled
1030 
1031 static __inline void
1032 atomic_add_long(volatile u_long *p, u_long v)
1033 {
1034 
1035 	atomic_add_32((volatile uint32_t *)p, v);
1036 }
1037 
1038 static __inline void
1039 atomic_clear_long(volatile u_long *p, u_long v)
1040 {
1041 
1042 	atomic_clear_32((volatile uint32_t *)p, v);
1043 }
1044 
1045 static __inline int
1046 atomic_cmpset_long(volatile u_long *dst, u_long old, u_long newe)
1047 {
1048 
1049 	return (atomic_cmpset_32((volatile uint32_t *)dst, old, newe));
1050 }
1051 
1052 static __inline u_long
1053 atomic_fetchadd_long(volatile u_long *p, u_long v)
1054 {
1055 
1056 	return (atomic_fetchadd_32((volatile uint32_t *)p, v));
1057 }
1058 
1059 static __inline void
1060 atomic_readandclear_long(volatile u_long *p)
1061 {
1062 
1063 	atomic_readandclear_32((volatile uint32_t *)p);
1064 }
1065 
1066 static __inline void
1067 atomic_set_long(volatile u_long *p, u_long v)
1068 {
1069 
1070 	atomic_set_32((volatile uint32_t *)p, v);
1071 }
1072 
1073 static __inline void
1074 atomic_subtract_long(volatile u_long *p, u_long v)
1075 {
1076 
1077 	atomic_subtract_32((volatile uint32_t *)p, v);
1078 }
1079 
1080 
1081 
1082 #endif /* Arch >= v6 */
1083 
1084 static __inline int
1085 atomic_load_32(volatile uint32_t *v)
1086 {
1087 
1088 	return (*v);
1089 }
1090 
1091 static __inline void
1092 atomic_store_32(volatile uint32_t *dst, uint32_t src)
1093 {
1094 	*dst = src;
1095 }
1096 
1097 static __inline int
1098 atomic_load_long(volatile u_long *v)
1099 {
1100 
1101 	return (*v);
1102 }
1103 
1104 static __inline void
1105 atomic_store_long(volatile u_long *dst, u_long src)
1106 {
1107 	*dst = src;
1108 }
1109 
1110 #define atomic_clear_ptr		atomic_clear_32
1111 #define atomic_set_ptr			atomic_set_32
1112 #define atomic_cmpset_ptr		atomic_cmpset_32
1113 #define atomic_cmpset_rel_ptr		atomic_cmpset_rel_32
1114 #define atomic_cmpset_acq_ptr		atomic_cmpset_acq_32
1115 #define atomic_store_ptr		atomic_store_32
1116 #define atomic_store_rel_ptr		atomic_store_rel_32
1117 
1118 #define atomic_add_int			atomic_add_32
1119 #define atomic_add_acq_int		atomic_add_acq_32
1120 #define atomic_add_rel_int		atomic_add_rel_32
1121 #define atomic_subtract_int		atomic_subtract_32
1122 #define atomic_subtract_acq_int		atomic_subtract_acq_32
1123 #define atomic_subtract_rel_int		atomic_subtract_rel_32
1124 #define atomic_clear_int		atomic_clear_32
1125 #define atomic_clear_acq_int		atomic_clear_acq_32
1126 #define atomic_clear_rel_int		atomic_clear_rel_32
1127 #define atomic_set_int			atomic_set_32
1128 #define atomic_set_acq_int		atomic_set_acq_32
1129 #define atomic_set_rel_int		atomic_set_rel_32
1130 #define atomic_cmpset_int		atomic_cmpset_32
1131 #define atomic_cmpset_acq_int		atomic_cmpset_acq_32
1132 #define atomic_cmpset_rel_int		atomic_cmpset_rel_32
1133 #define atomic_fetchadd_int		atomic_fetchadd_32
1134 #define atomic_readandclear_int		atomic_readandclear_32
1135 #define atomic_load_acq_int		atomic_load_acq_32
1136 #define atomic_store_rel_int		atomic_store_rel_32
1137 
1138 #endif /* _MACHINE_ATOMIC_H_ */
1139