xref: /freebsd/sys/arm/include/atomic.h (revision fcb560670601b2a4d87bb31d7531c8dcc37ee71b)
1 /* $NetBSD: atomic.h,v 1.1 2002/10/19 12:22:34 bsh Exp $ */
2 
3 /*-
4  * Copyright (C) 2003-2004 Olivier Houchard
5  * Copyright (C) 1994-1997 Mark Brinicombe
6  * Copyright (C) 1994 Brini
7  * All rights reserved.
8  *
9  * This code is derived from software written for Brini by Mark Brinicombe
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by Brini.
22  * 4. The name of Brini may not be used to endorse or promote products
23  *    derived from this software without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR
26  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
28  * IN NO EVENT SHALL BRINI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
31  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
32  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
33  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
34  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35  *
36  * $FreeBSD$
37  */
38 
39 #ifndef	_MACHINE_ATOMIC_H_
40 #define	_MACHINE_ATOMIC_H_
41 
42 #include <sys/types.h>
43 #include <machine/armreg.h>
44 
45 #ifndef _KERNEL
46 #include <machine/sysarch.h>
47 #else
48 #include <machine/cpuconf.h>
49 #endif
50 
51 #if defined (__ARM_ARCH_7__) || defined (__ARM_ARCH_7A__)
52 #define isb()  __asm __volatile("isb" : : : "memory")
53 #define dsb()  __asm __volatile("dsb" : : : "memory")
54 #define dmb()  __asm __volatile("dmb" : : : "memory")
55 #elif defined (__ARM_ARCH_6__) || defined (__ARM_ARCH_6J__) || \
56   defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6T2__) || \
57   defined (__ARM_ARCH_6Z__) || defined (__ARM_ARCH_6ZK__)
58 #define isb()  __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory")
59 #define dsb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory")
60 #define dmb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory")
61 #else
62 #define isb()  __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory")
63 #define dsb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory")
64 #define dmb()  dsb()
65 #endif
66 
67 #define mb()   dmb()
68 #define wmb()  dmb()
69 #define rmb()  dmb()
70 
71 
72 
73 /*
74  * It would be nice to use _HAVE_ARMv6_INSTRUCTIONS from machine/asm.h
75  * here, but that header can't be included here because this is C
76  * code.  I would like to move the _HAVE_ARMv6_INSTRUCTIONS definition
77  * out of asm.h so it can be used in both asm and C code. - kientzle@
78  */
79 #if defined (__ARM_ARCH_7__) || \
80 	defined (__ARM_ARCH_7A__)  || \
81 	defined (__ARM_ARCH_6__)   || \
82 	defined (__ARM_ARCH_6J__)  || \
83 	defined (__ARM_ARCH_6K__)  || \
84 	defined (__ARM_ARCH_6T2__) || \
85 	defined (__ARM_ARCH_6Z__)  || \
86 	defined (__ARM_ARCH_6ZK__)
87 #define	ARM_HAVE_ATOMIC64
88 
89 static __inline void
90 __do_dmb(void)
91 {
92 
93 #if defined (__ARM_ARCH_7__) || defined (__ARM_ARCH_7A__)
94 	__asm __volatile("dmb" : : : "memory");
95 #else
96 	__asm __volatile("mcr p15, 0, r0, c7, c10, 5" : : : "memory");
97 #endif
98 }
99 
100 #define ATOMIC_ACQ_REL_LONG(NAME)					\
101 static __inline void							\
102 atomic_##NAME##_acq_long(__volatile u_long *p, u_long v)		\
103 {									\
104 	atomic_##NAME##_long(p, v);					\
105 	__do_dmb();							\
106 }									\
107 									\
108 static __inline  void							\
109 atomic_##NAME##_rel_long(__volatile u_long *p, u_long v)		\
110 {									\
111 	__do_dmb();							\
112 	atomic_##NAME##_long(p, v);					\
113 }
114 
115 #define	ATOMIC_ACQ_REL(NAME, WIDTH)					\
116 static __inline  void							\
117 atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
118 {									\
119 	atomic_##NAME##_##WIDTH(p, v);					\
120 	__do_dmb();							\
121 }									\
122 									\
123 static __inline  void							\
124 atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
125 {									\
126 	__do_dmb();							\
127 	atomic_##NAME##_##WIDTH(p, v);					\
128 }
129 
130 static __inline void
131 atomic_set_32(volatile uint32_t *address, uint32_t setmask)
132 {
133 	uint32_t tmp = 0, tmp2 = 0;
134 
135 	__asm __volatile("1: ldrex %0, [%2]\n"
136 	    		    "orr %0, %0, %3\n"
137 			    "strex %1, %0, [%2]\n"
138 			    "cmp %1, #0\n"
139 	                    "it ne\n"
140 			    "bne	1b\n"
141 			   : "=&r" (tmp), "+r" (tmp2)
142 			   , "+r" (address), "+r" (setmask) : : "cc", "memory");
143 
144 }
145 
146 static __inline void
147 atomic_set_64(volatile uint64_t *p, uint64_t val)
148 {
149 	uint64_t tmp;
150 	uint32_t exflag;
151 
152 	__asm __volatile(
153 		"1:          \n"
154 		"   ldrexd   %[tmp], [%[ptr]]\n"
155 		"   orr      %Q[tmp], %Q[val]\n"
156 		"   orr      %R[tmp], %R[val]\n"
157 		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
158 		"   teq      %[exf], #0\n"
159 		"   it ne    \n"
160 		"   bne      1b\n"
161 		:   [exf]    "=&r"  (exflag),
162 		    [tmp]    "=&r"  (tmp)
163 		:   [ptr]    "r"    (p),
164 		    [val]    "r"    (val)
165 		:   "cc", "memory");
166 }
167 
168 static __inline void
169 atomic_set_long(volatile u_long *address, u_long setmask)
170 {
171 	u_long tmp = 0, tmp2 = 0;
172 
173 	__asm __volatile("1: ldrex %0, [%2]\n"
174 	    		    "orr %0, %0, %3\n"
175 			    "strex %1, %0, [%2]\n"
176 			    "cmp %1, #0\n"
177 	                    "it ne\n"
178 			    "bne	1b\n"
179 			   : "=&r" (tmp), "+r" (tmp2)
180 			   , "+r" (address), "+r" (setmask) : : "cc", "memory");
181 
182 }
183 
184 static __inline void
185 atomic_clear_32(volatile uint32_t *address, uint32_t setmask)
186 {
187 	uint32_t tmp = 0, tmp2 = 0;
188 
189 	__asm __volatile("1: ldrex %0, [%2]\n"
190 	    		    "bic %0, %0, %3\n"
191 			    "strex %1, %0, [%2]\n"
192 			    "cmp %1, #0\n"
193 	                    "it ne\n"
194 			    "bne	1b\n"
195 			   : "=&r" (tmp), "+r" (tmp2)
196 			   ,"+r" (address), "+r" (setmask) : : "cc", "memory");
197 }
198 
199 static __inline void
200 atomic_clear_64(volatile uint64_t *p, uint64_t val)
201 {
202 	uint64_t tmp;
203 	uint32_t exflag;
204 
205 	__asm __volatile(
206 		"1:          \n"
207 		"   ldrexd   %[tmp], [%[ptr]]\n"
208 		"   bic      %Q[tmp], %Q[val]\n"
209 		"   bic      %R[tmp], %R[val]\n"
210 		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
211 		"   teq      %[exf], #0\n"
212 		"   it ne    \n"
213 		"   bne      1b\n"
214 		:   [exf]    "=&r"  (exflag),
215 		    [tmp]    "=&r"  (tmp)
216 		:   [ptr]    "r"    (p),
217 		    [val]    "r"    (val)
218 		:   "cc", "memory");
219 }
220 
221 static __inline void
222 atomic_clear_long(volatile u_long *address, u_long setmask)
223 {
224 	u_long tmp = 0, tmp2 = 0;
225 
226 	__asm __volatile("1: ldrex %0, [%2]\n"
227 	    		    "bic %0, %0, %3\n"
228 			    "strex %1, %0, [%2]\n"
229 			    "cmp %1, #0\n"
230 	                    "it ne\n"
231 			    "bne	1b\n"
232 			   : "=&r" (tmp), "+r" (tmp2)
233 			   ,"+r" (address), "+r" (setmask) : : "cc", "memory");
234 }
235 
236 static __inline u_int32_t
237 atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
238 {
239 	uint32_t ret;
240 
241 	__asm __volatile("1: ldrex %0, [%1]\n"
242 	                 "cmp %0, %2\n"
243 	                 "itt ne\n"
244 			 "movne %0, #0\n"
245 			 "bne 2f\n"
246 			 "strex %0, %3, [%1]\n"
247 			 "cmp %0, #0\n"
248 	                 "ite eq\n"
249 			 "moveq %0, #1\n"
250 			 "bne	1b\n"
251 			 "2:"
252 			 : "=&r" (ret)
253 			 ,"+r" (p), "+r" (cmpval), "+r" (newval) : : "cc",
254 			 "memory");
255 	return (ret);
256 }
257 
258 static __inline int
259 atomic_cmpset_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
260 {
261 	uint64_t tmp;
262 	uint32_t ret;
263 
264 	__asm __volatile(
265 		"1:          \n"
266 		"   ldrexd   %[tmp], [%[ptr]]\n"
267 		"   teq      %Q[tmp], %Q[cmpval]\n"
268 		"   itee eq  \n"
269 		"   teqeq    %R[tmp], %R[cmpval]\n"
270 		"   movne    %[ret], #0\n"
271 		"   bne      2f\n"
272 		"   strexd   %[ret], %[newval], [%[ptr]]\n"
273 		"   teq      %[ret], #0\n"
274 		"   it ne    \n"
275 		"   bne      1b\n"
276 		"   mov      %[ret], #1\n"
277 		"2:          \n"
278 		:   [ret]    "=&r"  (ret),
279 		    [tmp]    "=&r"  (tmp)
280 		:   [ptr]    "r"    (p),
281 		    [cmpval] "r"    (cmpval),
282 		    [newval] "r"    (newval)
283 		:   "cc", "memory");
284 	return (ret);
285 }
286 
287 static __inline u_long
288 atomic_cmpset_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval)
289 {
290 	u_long ret;
291 
292 	__asm __volatile("1: ldrex %0, [%1]\n"
293 	                 "cmp %0, %2\n"
294 	                 "itt ne\n"
295 			 "movne %0, #0\n"
296 			 "bne 2f\n"
297 			 "strex %0, %3, [%1]\n"
298 			 "cmp %0, #0\n"
299 	                 "ite eq\n"
300 			 "moveq %0, #1\n"
301 			 "bne	1b\n"
302 			 "2:"
303 			 : "=&r" (ret)
304 			 ,"+r" (p), "+r" (cmpval), "+r" (newval) : : "cc",
305 			 "memory");
306 	return (ret);
307 }
308 
309 static __inline u_int32_t
310 atomic_cmpset_acq_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
311 {
312 	u_int32_t ret = atomic_cmpset_32(p, cmpval, newval);
313 
314 	__do_dmb();
315 	return (ret);
316 }
317 
318 static __inline uint64_t
319 atomic_cmpset_acq_64(volatile uint64_t *p, volatile uint64_t cmpval, volatile uint64_t newval)
320 {
321 	uint64_t ret = atomic_cmpset_64(p, cmpval, newval);
322 
323 	__do_dmb();
324 	return (ret);
325 }
326 
327 static __inline u_long
328 atomic_cmpset_acq_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval)
329 {
330 	u_long ret = atomic_cmpset_long(p, cmpval, newval);
331 
332 	__do_dmb();
333 	return (ret);
334 }
335 
336 static __inline u_int32_t
337 atomic_cmpset_rel_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
338 {
339 
340 	__do_dmb();
341 	return (atomic_cmpset_32(p, cmpval, newval));
342 }
343 
344 static __inline uint64_t
345 atomic_cmpset_rel_64(volatile uint64_t *p, volatile uint64_t cmpval, volatile uint64_t newval)
346 {
347 
348 	__do_dmb();
349 	return (atomic_cmpset_64(p, cmpval, newval));
350 }
351 
352 static __inline u_long
353 atomic_cmpset_rel_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval)
354 {
355 
356 	__do_dmb();
357 	return (atomic_cmpset_long(p, cmpval, newval));
358 }
359 
360 
361 static __inline void
362 atomic_add_32(volatile u_int32_t *p, u_int32_t val)
363 {
364 	uint32_t tmp = 0, tmp2 = 0;
365 
366 	__asm __volatile("1: ldrex %0, [%2]\n"
367 	    		    "add %0, %0, %3\n"
368 			    "strex %1, %0, [%2]\n"
369 			    "cmp %1, #0\n"
370 	                    "it ne\n"
371 			    "bne	1b\n"
372 			    : "=&r" (tmp), "+r" (tmp2)
373 			    ,"+r" (p), "+r" (val) : : "cc", "memory");
374 }
375 
376 static __inline void
377 atomic_add_64(volatile uint64_t *p, uint64_t val)
378 {
379 	uint64_t tmp;
380 	uint32_t exflag;
381 
382 	__asm __volatile(
383 		"1:          \n"
384 		"   ldrexd   %[tmp], [%[ptr]]\n"
385 		"   adds     %Q[tmp], %Q[val]\n"
386 		"   adc      %R[tmp], %R[val]\n"
387 		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
388 		"   teq      %[exf], #0\n"
389 		"   it ne    \n"
390 		"   bne      1b\n"
391 		:   [exf]    "=&r"  (exflag),
392 		    [tmp]    "=&r"  (tmp)
393 		:   [ptr]    "r"    (p),
394 		    [val]    "r"    (val)
395 		:   "cc", "memory");
396 }
397 
398 static __inline void
399 atomic_add_long(volatile u_long *p, u_long val)
400 {
401 	u_long tmp = 0, tmp2 = 0;
402 
403 	__asm __volatile("1: ldrex %0, [%2]\n"
404 	    		    "add %0, %0, %3\n"
405 			    "strex %1, %0, [%2]\n"
406 			    "cmp %1, #0\n"
407 	                    "it ne\n"
408 			    "bne	1b\n"
409 			    : "=&r" (tmp), "+r" (tmp2)
410 			    ,"+r" (p), "+r" (val) : : "cc", "memory");
411 }
412 
413 static __inline void
414 atomic_subtract_32(volatile u_int32_t *p, u_int32_t val)
415 {
416 	uint32_t tmp = 0, tmp2 = 0;
417 
418 	__asm __volatile("1: ldrex %0, [%2]\n"
419 	    		    "sub %0, %0, %3\n"
420 			    "strex %1, %0, [%2]\n"
421 			    "cmp %1, #0\n"
422 	                    "it ne\n"
423 			    "bne	1b\n"
424 			    : "=&r" (tmp), "+r" (tmp2)
425 			    ,"+r" (p), "+r" (val) : : "cc", "memory");
426 }
427 
428 static __inline void
429 atomic_subtract_64(volatile uint64_t *p, uint64_t val)
430 {
431 	uint64_t tmp;
432 	uint32_t exflag;
433 
434 	__asm __volatile(
435 		"1:          \n"
436 		"   ldrexd   %[tmp], [%[ptr]]\n"
437 		"   subs     %Q[tmp], %Q[val]\n"
438 		"   sbc      %R[tmp], %R[val]\n"
439 		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
440 		"   teq      %[exf], #0\n"
441 		"   it ne    \n"
442 		"   bne      1b\n"
443 		:   [exf]    "=&r"  (exflag),
444 		    [tmp]    "=&r"  (tmp)
445 		:   [ptr]    "r"    (p),
446 		    [val]    "r"    (val)
447 		:   "cc", "memory");
448 }
449 
450 static __inline void
451 atomic_subtract_long(volatile u_long *p, u_long val)
452 {
453 	u_long tmp = 0, tmp2 = 0;
454 
455 	__asm __volatile("1: ldrex %0, [%2]\n"
456 	    		    "sub %0, %0, %3\n"
457 			    "strex %1, %0, [%2]\n"
458 			    "cmp %1, #0\n"
459 	                    "it ne\n"
460 			    "bne	1b\n"
461 			    : "=&r" (tmp), "+r" (tmp2)
462 			    ,"+r" (p), "+r" (val) : : "cc", "memory");
463 }
464 
465 ATOMIC_ACQ_REL(clear, 32)
466 ATOMIC_ACQ_REL(add, 32)
467 ATOMIC_ACQ_REL(subtract, 32)
468 ATOMIC_ACQ_REL(set, 32)
469 ATOMIC_ACQ_REL(clear, 64)
470 ATOMIC_ACQ_REL(add, 64)
471 ATOMIC_ACQ_REL(subtract, 64)
472 ATOMIC_ACQ_REL(set, 64)
473 ATOMIC_ACQ_REL_LONG(clear)
474 ATOMIC_ACQ_REL_LONG(add)
475 ATOMIC_ACQ_REL_LONG(subtract)
476 ATOMIC_ACQ_REL_LONG(set)
477 
478 #undef ATOMIC_ACQ_REL
479 #undef ATOMIC_ACQ_REL_LONG
480 
481 static __inline uint32_t
482 atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
483 {
484 	uint32_t tmp = 0, tmp2 = 0, ret = 0;
485 
486 	__asm __volatile("1: ldrex %0, [%3]\n"
487 	    		    "add %1, %0, %4\n"
488 			    "strex %2, %1, [%3]\n"
489 			    "cmp %2, #0\n"
490 	                    "it ne\n"
491 			    "bne	1b\n"
492 			   : "+r" (ret), "=&r" (tmp), "+r" (tmp2)
493 			   ,"+r" (p), "+r" (val) : : "cc", "memory");
494 	return (ret);
495 }
496 
497 static __inline uint32_t
498 atomic_readandclear_32(volatile u_int32_t *p)
499 {
500 	uint32_t ret, tmp = 0, tmp2 = 0;
501 
502 	__asm __volatile("1: ldrex %0, [%3]\n"
503 	    		 "mov %1, #0\n"
504 			 "strex %2, %1, [%3]\n"
505 			 "cmp %2, #0\n"
506 	                 "it ne\n"
507 			 "bne 1b\n"
508 			 : "=r" (ret), "=&r" (tmp), "+r" (tmp2)
509 			 ,"+r" (p) : : "cc", "memory");
510 	return (ret);
511 }
512 
513 static __inline uint32_t
514 atomic_load_acq_32(volatile uint32_t *p)
515 {
516 	uint32_t v;
517 
518 	v = *p;
519 	__do_dmb();
520 	return (v);
521 }
522 
523 static __inline void
524 atomic_store_rel_32(volatile uint32_t *p, uint32_t v)
525 {
526 
527 	__do_dmb();
528 	*p = v;
529 }
530 
531 static __inline uint64_t
532 atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
533 {
534 	uint64_t ret, tmp;
535 	uint32_t exflag;
536 
537 	__asm __volatile(
538 		"1:          \n"
539 		"   ldrexd   %[ret], [%[ptr]]\n"
540 		"   adds     %Q[tmp], %Q[ret], %Q[val]\n"
541 		"   adc      %R[tmp], %R[ret], %R[val]\n"
542 		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
543 		"   teq      %[exf], #0\n"
544 		"   it ne    \n"
545 		"   bne      1b\n"
546 		:   [ret]    "=&r"  (ret),
547 		    [exf]    "=&r"  (exflag),
548 		    [tmp]    "=&r"  (tmp)
549 		:   [ptr]    "r"    (p),
550 		    [val]    "r"    (val)
551 		:   "cc", "memory");
552 	return (ret);
553 }
554 
555 static __inline uint64_t
556 atomic_readandclear_64(volatile uint64_t *p)
557 {
558 	uint64_t ret, tmp;
559 	uint32_t exflag;
560 
561 	__asm __volatile(
562 		"1:          \n"
563 		"   ldrexd   %[ret], [%[ptr]]\n"
564 		"   mov      %Q[tmp], #0\n"
565 		"   mov      %R[tmp], #0\n"
566 		"   strexd   %[exf], %[tmp], [%[ptr]]\n"
567 		"   teq      %[exf], #0\n"
568 		"   it ne    \n"
569 		"   bne      1b\n"
570 		:   [ret]    "=&r"  (ret),
571 		    [exf]    "=&r"  (exflag),
572 		    [tmp]    "=&r"  (tmp)
573 		:   [ptr]    "r"    (p)
574 		:   "cc", "memory");
575 	return (ret);
576 }
577 
578 static __inline uint64_t
579 atomic_load_64(volatile uint64_t *p)
580 {
581 	uint64_t ret;
582 
583 	/*
584 	 * The only way to atomically load 64 bits is with LDREXD which puts the
585 	 * exclusive monitor into the open state, so reset it with CLREX because
586 	 * we don't actually need to store anything.
587 	 */
588 	__asm __volatile(
589 		"1:          \n"
590 		"   ldrexd   %[ret], [%[ptr]]\n"
591 		"   clrex    \n"
592 		:   [ret]    "=&r"  (ret)
593 		:   [ptr]    "r"    (p)
594 		:   "cc", "memory");
595 	return (ret);
596 }
597 
598 static __inline uint64_t
599 atomic_load_acq_64(volatile uint64_t *p)
600 {
601 	uint64_t ret;
602 
603 	ret = atomic_load_64(p);
604 	__do_dmb();
605 	return (ret);
606 }
607 
608 static __inline void
609 atomic_store_64(volatile uint64_t *p, uint64_t val)
610 {
611 	uint64_t tmp;
612 	uint32_t exflag;
613 
614 	/*
615 	 * The only way to atomically store 64 bits is with STREXD, which will
616 	 * succeed only if paired up with a preceeding LDREXD using the same
617 	 * address, so we read and discard the existing value before storing.
618 	 */
619 	__asm __volatile(
620 		"1:          \n"
621 		"   ldrexd   %[tmp], [%[ptr]]\n"
622 		"   strexd   %[exf], %[val], [%[ptr]]\n"
623 		"   teq      %[exf], #0\n"
624 		"   it ne    \n"
625 		"   bne      1b\n"
626 		:   [tmp]    "=&r"  (tmp),
627 		    [exf]    "=&r"  (exflag)
628 		:   [ptr]    "r"    (p),
629 		    [val]    "r"    (val)
630 		:   "cc", "memory");
631 }
632 
633 static __inline void
634 atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
635 {
636 
637 	__do_dmb();
638 	atomic_store_64(p, val);
639 }
640 
641 static __inline u_long
642 atomic_fetchadd_long(volatile u_long *p, u_long val)
643 {
644 	u_long tmp = 0, tmp2 = 0, ret = 0;
645 
646 	__asm __volatile("1: ldrex %0, [%3]\n"
647 	    		    "add %1, %0, %4\n"
648 			    "strex %2, %1, [%3]\n"
649 			    "cmp %2, #0\n"
650 	                    "it ne\n"
651 			    "bne	1b\n"
652 			   : "+r" (ret), "=&r" (tmp), "+r" (tmp2)
653 			   ,"+r" (p), "+r" (val) : : "cc", "memory");
654 	return (ret);
655 }
656 
657 static __inline u_long
658 atomic_readandclear_long(volatile u_long *p)
659 {
660 	u_long ret, tmp = 0, tmp2 = 0;
661 
662 	__asm __volatile("1: ldrex %0, [%3]\n"
663 	    		 "mov %1, #0\n"
664 			 "strex %2, %1, [%3]\n"
665 			 "cmp %2, #0\n"
666 	                 "it ne\n"
667 			 "bne 1b\n"
668 			 : "=r" (ret), "=&r" (tmp), "+r" (tmp2)
669 			 ,"+r" (p) : : "cc", "memory");
670 	return (ret);
671 }
672 
673 static __inline u_long
674 atomic_load_acq_long(volatile u_long *p)
675 {
676 	u_long v;
677 
678 	v = *p;
679 	__do_dmb();
680 	return (v);
681 }
682 
683 static __inline void
684 atomic_store_rel_long(volatile u_long *p, u_long v)
685 {
686 
687 	__do_dmb();
688 	*p = v;
689 }
690 #else /* < armv6 */
691 
692 #define __with_interrupts_disabled(expr) \
693 	do {						\
694 		u_int cpsr_save, tmp;			\
695 							\
696 		__asm __volatile(			\
697 			"mrs  %0, cpsr;"		\
698 			"orr  %1, %0, %2;"		\
699 			"msr  cpsr_fsxc, %1;"		\
700 			: "=r" (cpsr_save), "=r" (tmp)	\
701 			: "I" (PSR_I | PSR_F)		\
702 		        : "cc" );		\
703 		(expr);				\
704 		 __asm __volatile(		\
705 			"msr  cpsr_fsxc, %0"	\
706 			: /* no output */	\
707 			: "r" (cpsr_save)	\
708 			: "cc" );		\
709 	} while(0)
710 
711 static __inline uint32_t
712 __swp(uint32_t val, volatile uint32_t *ptr)
713 {
714 	__asm __volatile("swp	%0, %2, [%3]"
715 	    : "=&r" (val), "=m" (*ptr)
716 	    : "r" (val), "r" (ptr), "m" (*ptr)
717 	    : "memory");
718 	return (val);
719 }
720 
721 
722 #ifdef _KERNEL
723 #define	ARM_HAVE_ATOMIC64
724 
725 static __inline void
726 atomic_set_32(volatile uint32_t *address, uint32_t setmask)
727 {
728 	__with_interrupts_disabled(*address |= setmask);
729 }
730 
731 static __inline void
732 atomic_set_64(volatile uint64_t *address, uint64_t setmask)
733 {
734 	__with_interrupts_disabled(*address |= setmask);
735 }
736 
737 static __inline void
738 atomic_clear_32(volatile uint32_t *address, uint32_t clearmask)
739 {
740 	__with_interrupts_disabled(*address &= ~clearmask);
741 }
742 
743 static __inline void
744 atomic_clear_64(volatile uint64_t *address, uint64_t clearmask)
745 {
746 	__with_interrupts_disabled(*address &= ~clearmask);
747 }
748 
749 static __inline u_int32_t
750 atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
751 {
752 	int ret;
753 
754 	__with_interrupts_disabled(
755 	 {
756 	    	if (*p == cmpval) {
757 			*p = newval;
758 			ret = 1;
759 		} else {
760 			ret = 0;
761 		}
762 	});
763 	return (ret);
764 }
765 
766 static __inline u_int64_t
767 atomic_cmpset_64(volatile u_int64_t *p, volatile u_int64_t cmpval, volatile u_int64_t newval)
768 {
769 	int ret;
770 
771 	__with_interrupts_disabled(
772 	 {
773 	    	if (*p == cmpval) {
774 			*p = newval;
775 			ret = 1;
776 		} else {
777 			ret = 0;
778 		}
779 	});
780 	return (ret);
781 }
782 
783 static __inline void
784 atomic_add_32(volatile u_int32_t *p, u_int32_t val)
785 {
786 	__with_interrupts_disabled(*p += val);
787 }
788 
789 static __inline void
790 atomic_add_64(volatile u_int64_t *p, u_int64_t val)
791 {
792 	__with_interrupts_disabled(*p += val);
793 }
794 
795 static __inline void
796 atomic_subtract_32(volatile u_int32_t *p, u_int32_t val)
797 {
798 	__with_interrupts_disabled(*p -= val);
799 }
800 
801 static __inline void
802 atomic_subtract_64(volatile u_int64_t *p, u_int64_t val)
803 {
804 	__with_interrupts_disabled(*p -= val);
805 }
806 
807 static __inline uint32_t
808 atomic_fetchadd_32(volatile uint32_t *p, uint32_t v)
809 {
810 	uint32_t value;
811 
812 	__with_interrupts_disabled(
813 	{
814 	    	value = *p;
815 		*p += v;
816 	});
817 	return (value);
818 }
819 
820 static __inline uint64_t
821 atomic_fetchadd_64(volatile uint64_t *p, uint64_t v)
822 {
823 	uint64_t value;
824 
825 	__with_interrupts_disabled(
826 	{
827 	    	value = *p;
828 		*p += v;
829 	});
830 	return (value);
831 }
832 
833 static __inline uint64_t
834 atomic_load_64(volatile uint64_t *p)
835 {
836 	uint64_t value;
837 
838 	__with_interrupts_disabled(value = *p);
839 	return (value);
840 }
841 
842 static __inline void
843 atomic_store_64(volatile uint64_t *p, uint64_t value)
844 {
845 	__with_interrupts_disabled(*p = value);
846 }
847 
848 #else /* !_KERNEL */
849 
850 static __inline u_int32_t
851 atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
852 {
853 	register int done, ras_start = ARM_RAS_START;
854 
855 	__asm __volatile("1:\n"
856 	    "adr	%1, 1b\n"
857 	    "str	%1, [%0]\n"
858 	    "adr	%1, 2f\n"
859 	    "str	%1, [%0, #4]\n"
860 	    "ldr	%1, [%2]\n"
861 	    "cmp	%1, %3\n"
862 	    "streq	%4, [%2]\n"
863 	    "2:\n"
864 	    "mov	%1, #0\n"
865 	    "str	%1, [%0]\n"
866 	    "mov	%1, #0xffffffff\n"
867 	    "str	%1, [%0, #4]\n"
868 	    "moveq	%1, #1\n"
869 	    "movne	%1, #0\n"
870 	    : "+r" (ras_start), "=r" (done)
871 	    ,"+r" (p), "+r" (cmpval), "+r" (newval) : : "cc", "memory");
872 	return (done);
873 }
874 
875 static __inline void
876 atomic_add_32(volatile u_int32_t *p, u_int32_t val)
877 {
878 	int start, ras_start = ARM_RAS_START;
879 
880 	__asm __volatile("1:\n"
881 	    "adr	%1, 1b\n"
882 	    "str	%1, [%0]\n"
883 	    "adr	%1, 2f\n"
884 	    "str	%1, [%0, #4]\n"
885 	    "ldr	%1, [%2]\n"
886 	    "add	%1, %1, %3\n"
887 	    "str	%1, [%2]\n"
888 	    "2:\n"
889 	    "mov	%1, #0\n"
890 	    "str	%1, [%0]\n"
891 	    "mov	%1, #0xffffffff\n"
892 	    "str	%1, [%0, #4]\n"
893 	    : "+r" (ras_start), "=r" (start), "+r" (p), "+r" (val)
894 	    : : "memory");
895 }
896 
897 static __inline void
898 atomic_subtract_32(volatile u_int32_t *p, u_int32_t val)
899 {
900 	int start, ras_start = ARM_RAS_START;
901 
902 	__asm __volatile("1:\n"
903 	    "adr	%1, 1b\n"
904 	    "str	%1, [%0]\n"
905 	    "adr	%1, 2f\n"
906 	    "str	%1, [%0, #4]\n"
907 	    "ldr	%1, [%2]\n"
908 	    "sub	%1, %1, %3\n"
909 	    "str	%1, [%2]\n"
910 	    "2:\n"
911 	    "mov	%1, #0\n"
912 	    "str	%1, [%0]\n"
913 	    "mov	%1, #0xffffffff\n"
914 	    "str	%1, [%0, #4]\n"
915 
916 	    : "+r" (ras_start), "=r" (start), "+r" (p), "+r" (val)
917 	    : : "memory");
918 }
919 
920 static __inline void
921 atomic_set_32(volatile uint32_t *address, uint32_t setmask)
922 {
923 	int start, ras_start = ARM_RAS_START;
924 
925 	__asm __volatile("1:\n"
926 	    "adr	%1, 1b\n"
927 	    "str	%1, [%0]\n"
928 	    "adr	%1, 2f\n"
929 	    "str	%1, [%0, #4]\n"
930 	    "ldr	%1, [%2]\n"
931 	    "orr	%1, %1, %3\n"
932 	    "str	%1, [%2]\n"
933 	    "2:\n"
934 	    "mov	%1, #0\n"
935 	    "str	%1, [%0]\n"
936 	    "mov	%1, #0xffffffff\n"
937 	    "str	%1, [%0, #4]\n"
938 
939 	    : "+r" (ras_start), "=r" (start), "+r" (address), "+r" (setmask)
940 	    : : "memory");
941 }
942 
943 static __inline void
944 atomic_clear_32(volatile uint32_t *address, uint32_t clearmask)
945 {
946 	int start, ras_start = ARM_RAS_START;
947 
948 	__asm __volatile("1:\n"
949 	    "adr	%1, 1b\n"
950 	    "str	%1, [%0]\n"
951 	    "adr	%1, 2f\n"
952 	    "str	%1, [%0, #4]\n"
953 	    "ldr	%1, [%2]\n"
954 	    "bic	%1, %1, %3\n"
955 	    "str	%1, [%2]\n"
956 	    "2:\n"
957 	    "mov	%1, #0\n"
958 	    "str	%1, [%0]\n"
959 	    "mov	%1, #0xffffffff\n"
960 	    "str	%1, [%0, #4]\n"
961 	    : "+r" (ras_start), "=r" (start), "+r" (address), "+r" (clearmask)
962 	    : : "memory");
963 
964 }
965 
966 static __inline uint32_t
967 atomic_fetchadd_32(volatile uint32_t *p, uint32_t v)
968 {
969 	uint32_t start, tmp, ras_start = ARM_RAS_START;
970 
971 	__asm __volatile("1:\n"
972 	    "adr	%1, 1b\n"
973 	    "str	%1, [%0]\n"
974 	    "adr	%1, 2f\n"
975 	    "str	%1, [%0, #4]\n"
976 	    "ldr	%1, [%3]\n"
977 	    "mov	%2, %1\n"
978 	    "add	%2, %2, %4\n"
979 	    "str	%2, [%3]\n"
980 	    "2:\n"
981 	    "mov	%2, #0\n"
982 	    "str	%2, [%0]\n"
983 	    "mov	%2, #0xffffffff\n"
984 	    "str	%2, [%0, #4]\n"
985 	    : "+r" (ras_start), "=r" (start), "=r" (tmp), "+r" (p), "+r" (v)
986 	    : : "memory");
987 	return (start);
988 }
989 
990 #endif /* _KERNEL */
991 
992 
993 static __inline uint32_t
994 atomic_readandclear_32(volatile u_int32_t *p)
995 {
996 
997 	return (__swp(0, p));
998 }
999 
1000 #define atomic_cmpset_rel_32	atomic_cmpset_32
1001 #define atomic_cmpset_acq_32	atomic_cmpset_32
1002 #define atomic_set_rel_32	atomic_set_32
1003 #define atomic_set_acq_32	atomic_set_32
1004 #define atomic_clear_rel_32	atomic_clear_32
1005 #define atomic_clear_acq_32	atomic_clear_32
1006 #define atomic_add_rel_32	atomic_add_32
1007 #define atomic_add_acq_32	atomic_add_32
1008 #define atomic_subtract_rel_32	atomic_subtract_32
1009 #define atomic_subtract_acq_32	atomic_subtract_32
1010 #define atomic_store_rel_32	atomic_store_32
1011 #define atomic_store_rel_long	atomic_store_long
1012 #define atomic_load_acq_32	atomic_load_32
1013 #define atomic_load_acq_long	atomic_load_long
1014 #define atomic_add_acq_long		atomic_add_long
1015 #define atomic_add_rel_long		atomic_add_long
1016 #define atomic_subtract_acq_long	atomic_subtract_long
1017 #define atomic_subtract_rel_long	atomic_subtract_long
1018 #define atomic_clear_acq_long		atomic_clear_long
1019 #define atomic_clear_rel_long		atomic_clear_long
1020 #define atomic_set_acq_long		atomic_set_long
1021 #define atomic_set_rel_long		atomic_set_long
1022 #define atomic_cmpset_acq_long		atomic_cmpset_long
1023 #define atomic_cmpset_rel_long		atomic_cmpset_long
1024 #define atomic_load_acq_long		atomic_load_long
1025 #undef __with_interrupts_disabled
1026 
1027 static __inline void
1028 atomic_add_long(volatile u_long *p, u_long v)
1029 {
1030 
1031 	atomic_add_32((volatile uint32_t *)p, v);
1032 }
1033 
1034 static __inline void
1035 atomic_clear_long(volatile u_long *p, u_long v)
1036 {
1037 
1038 	atomic_clear_32((volatile uint32_t *)p, v);
1039 }
1040 
1041 static __inline int
1042 atomic_cmpset_long(volatile u_long *dst, u_long old, u_long newe)
1043 {
1044 
1045 	return (atomic_cmpset_32((volatile uint32_t *)dst, old, newe));
1046 }
1047 
1048 static __inline u_long
1049 atomic_fetchadd_long(volatile u_long *p, u_long v)
1050 {
1051 
1052 	return (atomic_fetchadd_32((volatile uint32_t *)p, v));
1053 }
1054 
1055 static __inline void
1056 atomic_readandclear_long(volatile u_long *p)
1057 {
1058 
1059 	atomic_readandclear_32((volatile uint32_t *)p);
1060 }
1061 
1062 static __inline void
1063 atomic_set_long(volatile u_long *p, u_long v)
1064 {
1065 
1066 	atomic_set_32((volatile uint32_t *)p, v);
1067 }
1068 
1069 static __inline void
1070 atomic_subtract_long(volatile u_long *p, u_long v)
1071 {
1072 
1073 	atomic_subtract_32((volatile uint32_t *)p, v);
1074 }
1075 
1076 
1077 
1078 #endif /* Arch >= v6 */
1079 
1080 static __inline int
1081 atomic_load_32(volatile uint32_t *v)
1082 {
1083 
1084 	return (*v);
1085 }
1086 
1087 static __inline void
1088 atomic_store_32(volatile uint32_t *dst, uint32_t src)
1089 {
1090 	*dst = src;
1091 }
1092 
1093 static __inline int
1094 atomic_load_long(volatile u_long *v)
1095 {
1096 
1097 	return (*v);
1098 }
1099 
1100 static __inline void
1101 atomic_store_long(volatile u_long *dst, u_long src)
1102 {
1103 	*dst = src;
1104 }
1105 
1106 #define atomic_clear_ptr		atomic_clear_32
1107 #define atomic_set_ptr			atomic_set_32
1108 #define atomic_cmpset_ptr		atomic_cmpset_32
1109 #define atomic_cmpset_rel_ptr		atomic_cmpset_rel_32
1110 #define atomic_cmpset_acq_ptr		atomic_cmpset_acq_32
1111 #define atomic_store_ptr		atomic_store_32
1112 #define atomic_store_rel_ptr		atomic_store_rel_32
1113 
1114 #define atomic_add_int			atomic_add_32
1115 #define atomic_add_acq_int		atomic_add_acq_32
1116 #define atomic_add_rel_int		atomic_add_rel_32
1117 #define atomic_subtract_int		atomic_subtract_32
1118 #define atomic_subtract_acq_int		atomic_subtract_acq_32
1119 #define atomic_subtract_rel_int		atomic_subtract_rel_32
1120 #define atomic_clear_int		atomic_clear_32
1121 #define atomic_clear_acq_int		atomic_clear_acq_32
1122 #define atomic_clear_rel_int		atomic_clear_rel_32
1123 #define atomic_set_int			atomic_set_32
1124 #define atomic_set_acq_int		atomic_set_acq_32
1125 #define atomic_set_rel_int		atomic_set_rel_32
1126 #define atomic_cmpset_int		atomic_cmpset_32
1127 #define atomic_cmpset_acq_int		atomic_cmpset_acq_32
1128 #define atomic_cmpset_rel_int		atomic_cmpset_rel_32
1129 #define atomic_fetchadd_int		atomic_fetchadd_32
1130 #define atomic_readandclear_int		atomic_readandclear_32
1131 #define atomic_load_acq_int		atomic_load_acq_32
1132 #define atomic_store_rel_int		atomic_store_rel_32
1133 
1134 #endif /* _MACHINE_ATOMIC_H_ */
1135