xref: /freebsd/sys/arm/include/atomic.h (revision dd21556857e8d40f66bf5ad54754d9d52669ebf7)
1 /* $NetBSD: atomic.h,v 1.1 2002/10/19 12:22:34 bsh Exp $ */
2 
3 /*-
4  * SPDX-License-Identifier: BSD-4-Clause
5  *
6  * Copyright (C) 2003-2004 Olivier Houchard
7  * Copyright (C) 1994-1997 Mark Brinicombe
8  * Copyright (C) 1994 Brini
9  * All rights reserved.
10  *
11  * This code is derived from software written for Brini by Mark Brinicombe
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by Brini.
24  * 4. The name of Brini may not be used to endorse or promote products
25  *    derived from this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR
28  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
29  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
30  * IN NO EVENT SHALL BRINI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
33  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
34  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
35  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
36  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 #ifndef	_MACHINE_ATOMIC_H_
40 #define	_MACHINE_ATOMIC_H_
41 
42 #include <sys/atomic_common.h>
43 
44 #define isb()  __asm __volatile("isb" : : : "memory")
45 #define dsb()  __asm __volatile("dsb" : : : "memory")
46 #define dmb()  __asm __volatile("dmb" : : : "memory")
47 
48 #define mb()   dmb()
49 #define wmb()  dmb()
50 #define rmb()  dmb()
51 
52 #define	ARM_HAVE_ATOMIC64
53 
54 #define ATOMIC_ACQ_REL_LONG(NAME)					\
55 static __inline void							\
56 atomic_##NAME##_acq_long(__volatile u_long *p, u_long v)		\
57 {									\
58 	atomic_##NAME##_long(p, v);					\
59 	dmb();								\
60 }									\
61 									\
62 static __inline  void							\
63 atomic_##NAME##_rel_long(__volatile u_long *p, u_long v)		\
64 {									\
65 	dmb();								\
66 	atomic_##NAME##_long(p, v);					\
67 }
68 
69 #define	ATOMIC_ACQ_REL(NAME, WIDTH)					\
70 static __inline  void							\
71 atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
72 {									\
73 	atomic_##NAME##_##WIDTH(p, v);					\
74 	dmb();								\
75 }									\
76 									\
77 static __inline  void							\
78 atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
79 {									\
80 	dmb();								\
81 	atomic_##NAME##_##WIDTH(p, v);					\
82 }
83 
84 static __inline void
85 atomic_add_32(volatile uint32_t *p, uint32_t val)
86 {
87 	uint32_t tmp = 0, tmp2 = 0;
88 
89 	__asm __volatile(
90 	    "1: ldrex	%0, [%2]	\n"
91 	    "   add	%0, %0, %3	\n"
92 	    "   strex	%1, %0, [%2]	\n"
93 	    "   cmp	%1, #0		\n"
94 	    "   it	ne		\n"
95 	    "   bne	1b		\n"
96 	    : "=&r" (tmp), "+r" (tmp2)
97 	    ,"+r" (p), "+r" (val) : : "cc", "memory");
98 }
99 
100 static __inline void
101 atomic_add_64(volatile uint64_t *p, uint64_t val)
102 {
103 	uint64_t tmp;
104 	uint32_t exflag;
105 
106 	__asm __volatile(
107 	    "1:							\n"
108 	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
109 	    "   adds	%Q[tmp], %Q[val]			\n"
110 	    "   adc	%R[tmp], %R[tmp], %R[val]		\n"
111 	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
112 	    "   teq	%[exf], #0				\n"
113 	    "   it	ne					\n"
114 	    "   bne	1b					\n"
115 	    : [exf] "=&r" (exflag),
116 	      [tmp] "=&r" (tmp)
117 	    : [ptr] "r"   (p),
118 	      [val] "r"   (val)
119 	    : "cc", "memory");
120 }
121 
122 static __inline void
123 atomic_add_long(volatile u_long *p, u_long val)
124 {
125 
126 	atomic_add_32((volatile uint32_t *)p, val);
127 }
128 
129 ATOMIC_ACQ_REL(add, 32)
130 ATOMIC_ACQ_REL(add, 64)
131 ATOMIC_ACQ_REL_LONG(add)
132 
133 static __inline void
134 atomic_clear_32(volatile uint32_t *address, uint32_t setmask)
135 {
136 	uint32_t tmp = 0, tmp2 = 0;
137 
138 	__asm __volatile(
139 	    "1: ldrex	%0, [%2]	\n"
140 	    "   bic	%0, %0, %3	\n"
141 	    "   strex	%1, %0, [%2]	\n"
142 	    "   cmp	%1, #0		\n"
143 	    "   it	ne		\n"
144 	    "   bne	1b		\n"
145 	    : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
146 	    : : "cc", "memory");
147 }
148 
149 static __inline void
150 atomic_clear_64(volatile uint64_t *p, uint64_t val)
151 {
152 	uint64_t tmp;
153 	uint32_t exflag;
154 
155 	__asm __volatile(
156 	    "1:							\n"
157 	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
158 	    "   bic	%Q[tmp], %Q[val]			\n"
159 	    "   bic	%R[tmp], %R[val]			\n"
160 	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
161 	    "   teq	%[exf], #0				\n"
162 	    "   it	ne					\n"
163 	    "   bne	1b					\n"
164 	    : [exf] "=&r" (exflag),
165 	      [tmp] "=&r" (tmp)
166 	    : [ptr] "r"   (p),
167 	      [val] "r"   (val)
168 	    : "cc", "memory");
169 }
170 
171 static __inline void
172 atomic_clear_long(volatile u_long *address, u_long setmask)
173 {
174 
175 	atomic_clear_32((volatile uint32_t *)address, setmask);
176 }
177 
178 ATOMIC_ACQ_REL(clear, 32)
179 ATOMIC_ACQ_REL(clear, 64)
180 ATOMIC_ACQ_REL_LONG(clear)
181 
182 #define ATOMIC_FCMPSET_CODE(RET, TYPE, SUF)                   \
183     {                                                         \
184 	TYPE tmp;                                             \
185                                                               \
186 	__asm __volatile(                                     \
187 	    "1: ldrex" SUF "   %[tmp], [%[ptr]]          \n"  \
188 	    "   ldr" SUF "     %[ret], [%[oldv]]         \n"  \
189 	    "   teq            %[tmp], %[ret]            \n"  \
190 	    "   ittee          ne                        \n"  \
191 	    "   str" SUF "ne   %[tmp], [%[oldv]]         \n"  \
192 	    "   movne          %[ret], #0                \n"  \
193 	    "   strex" SUF "eq %[ret], %[newv], [%[ptr]] \n"  \
194 	    "   eorseq         %[ret], #1                \n"  \
195 	    "   beq            1b                        \n"  \
196 	    : [ret] "=&r" (RET),                              \
197 	      [tmp] "=&r" (tmp)                               \
198 	    : [ptr] "r"   (_ptr),                             \
199 	      [oldv] "r"  (_old),                             \
200 	      [newv] "r"  (_new)                              \
201 	    : "cc", "memory");                                \
202     }
203 
204 #define ATOMIC_FCMPSET_CODE64(RET)                                 \
205     {                                                              \
206 	uint64_t cmp, tmp;                                         \
207                                                                    \
208 	__asm __volatile(                                          \
209 	    "1: ldrexd   %Q[tmp], %R[tmp], [%[ptr]]           \n"  \
210 	    "   ldrd     %Q[cmp], %R[cmp], [%[oldv]]          \n"  \
211 	    "   teq      %Q[tmp], %Q[cmp]                     \n"  \
212 	    "   it       eq                                   \n"  \
213 	    "   teqeq    %R[tmp], %R[cmp]                     \n"  \
214 	    "   ittee    ne                                   \n"  \
215 	    "   movne    %[ret], #0                           \n"  \
216 	    "   strdne   %[cmp], [%[oldv]]                    \n"  \
217 	    "   strexdeq %[ret], %Q[newv], %R[newv], [%[ptr]] \n"  \
218 	    "   eorseq   %[ret], #1                           \n"  \
219 	    "   beq      1b                                   \n"  \
220 	    : [ret] "=&r" (RET),                                   \
221 	      [cmp] "=&r" (cmp),                                   \
222 	      [tmp] "=&r" (tmp)                                    \
223 	    : [ptr] "r"   (_ptr),                                  \
224 	      [oldv] "r"  (_old),                                  \
225 	      [newv] "r"  (_new)                                   \
226 	    : "cc", "memory");                                     \
227     }
228 
229 static __inline int
230 atomic_fcmpset_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
231 {
232 	int ret;
233 
234 	ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
235 	return (ret);
236 }
237 #define	atomic_fcmpset_8	atomic_fcmpset_8
238 
239 static __inline int
240 atomic_fcmpset_acq_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
241 {
242 	int ret;
243 
244 	ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
245 	dmb();
246 	return (ret);
247 }
248 
249 static __inline int
250 atomic_fcmpset_rel_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
251 {
252 	int ret;
253 
254 	dmb();
255 	ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
256 	return (ret);
257 }
258 
259 static __inline int
260 atomic_fcmpset_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
261 {
262 	int ret;
263 
264 	ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
265 	return (ret);
266 }
267 #define	atomic_fcmpset_16	atomic_fcmpset_16
268 
269 static __inline int
270 atomic_fcmpset_acq_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
271 {
272 	int ret;
273 
274 	ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
275 	dmb();
276 	return (ret);
277 }
278 
279 static __inline int
280 atomic_fcmpset_rel_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
281 {
282 	int ret;
283 
284 	dmb();
285 	ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
286 	return (ret);
287 }
288 
289 static __inline int
290 atomic_fcmpset_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
291 {
292 	int ret;
293 
294 	ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
295 	return (ret);
296 }
297 
298 static __inline int
299 atomic_fcmpset_acq_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
300 {
301 	int ret;
302 
303 	ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
304 	dmb();
305 	return (ret);
306 }
307 
308 static __inline int
309 atomic_fcmpset_rel_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
310 {
311 	int ret;
312 
313 	dmb();
314 	ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
315 	return (ret);
316 }
317 
318 static __inline int
319 atomic_fcmpset_long(volatile u_long *_ptr, u_long *_old, u_long _new)
320 {
321 	int ret;
322 
323 	ATOMIC_FCMPSET_CODE(ret, u_long, "");
324 	return (ret);
325 }
326 
327 static __inline int
328 atomic_fcmpset_acq_long(volatile u_long *_ptr, u_long *_old, u_long _new)
329 {
330 	int ret;
331 
332 	ATOMIC_FCMPSET_CODE(ret, u_long, "");
333 	dmb();
334 	return (ret);
335 }
336 
337 static __inline int
338 atomic_fcmpset_rel_long(volatile u_long *_ptr, u_long *_old, u_long _new)
339 {
340 	int ret;
341 
342 	dmb();
343 	ATOMIC_FCMPSET_CODE(ret, u_long, "");
344 	return (ret);
345 }
346 
347 static __inline int
348 atomic_fcmpset_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
349 {
350 	int ret;
351 
352 	ATOMIC_FCMPSET_CODE64(ret);
353 	return (ret);
354 }
355 
356 static __inline int
357 atomic_fcmpset_acq_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
358 {
359 	int ret;
360 
361 	ATOMIC_FCMPSET_CODE64(ret);
362 	dmb();
363 	return (ret);
364 }
365 
366 static __inline int
367 atomic_fcmpset_rel_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
368 {
369 	int ret;
370 
371 	dmb();
372 	ATOMIC_FCMPSET_CODE64(ret);
373 	return (ret);
374 }
375 
376 #define ATOMIC_CMPSET_CODE(RET, SUF)                         \
377     {                                                        \
378 	__asm __volatile(                                    \
379 	    "1: ldrex" SUF "   %[ret], [%[ptr]]          \n" \
380 	    "   teq            %[ret], %[oldv]           \n" \
381 	    "   itee           ne                        \n" \
382 	    "   movne          %[ret], #0                \n" \
383 	    "   strex" SUF "eq %[ret], %[newv], [%[ptr]] \n" \
384 	    "   eorseq         %[ret], #1                \n" \
385 	    "   beq            1b                        \n" \
386 	    : [ret] "=&r" (RET)                              \
387 	    : [ptr] "r"   (_ptr),                            \
388 	      [oldv] "r"  (_old),                            \
389 	      [newv] "r"  (_new)                             \
390 	    : "cc", "memory");                               \
391     }
392 
393 #define ATOMIC_CMPSET_CODE64(RET)                                 \
394     {                                                             \
395 	uint64_t tmp;                                             \
396 	                                                          \
397 	__asm __volatile(                                         \
398 	    "1: ldrexd   %Q[tmp], %R[tmp], [%[ptr]]           \n" \
399 	    "   teq      %Q[tmp], %Q[oldv]                    \n" \
400 	    "   it       eq                                   \n" \
401 	    "   teqeq    %R[tmp], %R[oldv]                    \n" \
402 	    "   itee     ne                                   \n" \
403 	    "   movne    %[ret], #0                           \n" \
404 	    "   strexdeq %[ret], %Q[newv], %R[newv], [%[ptr]] \n" \
405 	    "   eorseq   %[ret], #1                           \n" \
406 	    "   beq      1b                                   \n" \
407 	    : [ret] "=&r" (RET),                                  \
408 	      [tmp] "=&r" (tmp)                                   \
409 	    : [ptr] "r"   (_ptr),                                 \
410 	      [oldv] "r"  (_old),                                 \
411 	      [newv] "r"  (_new)                                  \
412 	    : "cc", "memory");                                    \
413     }
414 
415 static __inline int
416 atomic_cmpset_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
417 {
418 	int ret;
419 
420 	ATOMIC_CMPSET_CODE(ret, "b");
421 	return (ret);
422 }
423 #define	atomic_cmpset_8		atomic_cmpset_8
424 
425 static __inline int
426 atomic_cmpset_acq_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
427 {
428 	int ret;
429 
430 	ATOMIC_CMPSET_CODE(ret, "b");
431 	dmb();
432 	return (ret);
433 }
434 
435 static __inline int
436 atomic_cmpset_rel_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
437 {
438 	int ret;
439 
440 	dmb();
441 	ATOMIC_CMPSET_CODE(ret, "b");
442 	return (ret);
443 }
444 
445 static __inline int
446 atomic_cmpset_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
447 {
448 	int ret;
449 
450 	ATOMIC_CMPSET_CODE(ret, "h");
451 	return (ret);
452 }
453 #define	atomic_cmpset_16	atomic_cmpset_16
454 
455 static __inline int
456 atomic_cmpset_acq_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
457 {
458 	int ret;
459 
460 	ATOMIC_CMPSET_CODE(ret, "h");
461 	dmb();
462 	return (ret);
463 }
464 
465 static __inline int
466 atomic_cmpset_rel_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
467 {
468 	int ret;
469 
470 	dmb();
471 	ATOMIC_CMPSET_CODE(ret, "h");
472 	return (ret);
473 }
474 
475 static __inline int
476 atomic_cmpset_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
477 {
478 	int ret;
479 
480 	ATOMIC_CMPSET_CODE(ret, "");
481 	return (ret);
482 }
483 
484 static __inline int
485 atomic_cmpset_acq_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
486 {
487 	int ret;
488 
489 	ATOMIC_CMPSET_CODE(ret, "");
490 	dmb();
491 	return (ret);
492 }
493 
494 static __inline int
495 atomic_cmpset_rel_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
496 {
497 	int ret;
498 
499 	dmb();
500 	ATOMIC_CMPSET_CODE(ret, "");
501 	return (ret);
502 }
503 
504 static __inline int
505 atomic_cmpset_long(volatile u_long *_ptr, u_long _old, u_long _new)
506 {
507 	int ret;
508 
509 	ATOMIC_CMPSET_CODE(ret, "");
510 	return (ret);
511 }
512 
513 static __inline int
514 atomic_cmpset_acq_long(volatile u_long *_ptr, u_long _old, u_long _new)
515 {
516 	int ret;
517 
518 	ATOMIC_CMPSET_CODE(ret, "");
519 	dmb();
520 	return (ret);
521 }
522 
523 static __inline int
524 atomic_cmpset_rel_long(volatile u_long *_ptr, u_long _old, u_long _new)
525 {
526 	int ret;
527 
528 	dmb();
529 	ATOMIC_CMPSET_CODE(ret, "");
530 	return (ret);
531 }
532 
533 static __inline int
534 atomic_cmpset_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
535 {
536 	int ret;
537 
538 	ATOMIC_CMPSET_CODE64(ret);
539 	return (ret);
540 }
541 
542 static __inline int
543 atomic_cmpset_acq_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
544 {
545 	int ret;
546 
547 	ATOMIC_CMPSET_CODE64(ret);
548 	dmb();
549 	return (ret);
550 }
551 
552 static __inline int
553 atomic_cmpset_rel_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
554 {
555 	int ret;
556 
557 	dmb();
558 	ATOMIC_CMPSET_CODE64(ret);
559 	return (ret);
560 }
561 
562 static __inline uint32_t
563 atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
564 {
565 	uint32_t tmp = 0, tmp2 = 0, ret = 0;
566 
567 	__asm __volatile(
568 	    "1: ldrex	%0, [%3]	\n"
569 	    "   add	%1, %0, %4	\n"
570 	    "   strex	%2, %1, [%3]	\n"
571 	    "   cmp	%2, #0		\n"
572 	    "   it	ne		\n"
573 	    "   bne	1b		\n"
574 	    : "+r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
575 	    : : "cc", "memory");
576 	return (ret);
577 }
578 
579 static __inline uint64_t
580 atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
581 {
582 	uint64_t ret, tmp;
583 	uint32_t exflag;
584 
585 	__asm __volatile(
586 	    "1:							\n"
587 	    "   ldrexd	%Q[ret], %R[ret], [%[ptr]]		\n"
588 	    "   adds	%Q[tmp], %Q[ret], %Q[val]		\n"
589 	    "   adc	%R[tmp], %R[ret], %R[val]		\n"
590 	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
591 	    "   teq	%[exf], #0				\n"
592 	    "   it	ne					\n"
593 	    "   bne	1b					\n"
594 	    : [ret] "=&r" (ret),
595 	      [exf] "=&r" (exflag),
596 	      [tmp] "=&r" (tmp)
597 	    : [ptr] "r"   (p),
598 	      [val] "r"   (val)
599 	    : "cc", "memory");
600 	return (ret);
601 }
602 
603 static __inline u_long
604 atomic_fetchadd_long(volatile u_long *p, u_long val)
605 {
606 
607 	return (atomic_fetchadd_32((volatile uint32_t *)p, val));
608 }
609 
610 static __inline uint32_t
611 atomic_load_acq_32(const volatile uint32_t *p)
612 {
613 	uint32_t v;
614 
615 	v = *p;
616 	dmb();
617 	return (v);
618 }
619 
620 static __inline uint64_t
621 atomic_load_64(const volatile uint64_t *p)
622 {
623 	uint64_t ret;
624 
625 	/*
626 	 * The only way to atomically load 64 bits is with LDREXD which puts the
627 	 * exclusive monitor into the exclusive state, so reset it to open state
628 	 * with CLREX because we don't actually need to store anything.
629 	 */
630 	__asm __volatile(
631 	    "ldrexd	%Q[ret], %R[ret], [%[ptr]]	\n"
632 	    "clrex					\n"
633 	    : [ret] "=&r" (ret)
634 	    : [ptr] "r"   (p)
635 	    : "cc", "memory");
636 	return (ret);
637 }
638 
639 static __inline uint64_t
640 atomic_load_acq_64(const volatile uint64_t *p)
641 {
642 	uint64_t ret;
643 
644 	ret = atomic_load_64(p);
645 	dmb();
646 	return (ret);
647 }
648 
649 static __inline u_long
650 atomic_load_acq_long(const volatile u_long *p)
651 {
652 	u_long v;
653 
654 	v = *p;
655 	dmb();
656 	return (v);
657 }
658 
659 static __inline uint32_t
660 atomic_readandclear_32(volatile uint32_t *p)
661 {
662 	uint32_t ret, tmp = 0, tmp2 = 0;
663 
664 	__asm __volatile(
665 	    "1: ldrex	%0, [%3]	\n"
666 	    "   mov	%1, #0		\n"
667 	    "   strex	%2, %1, [%3]	\n"
668 	    "   cmp	%2, #0		\n"
669 	    "   it	ne		\n"
670 	    "   bne	1b		\n"
671 	    : "=r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p)
672 	    : : "cc", "memory");
673 	return (ret);
674 }
675 
676 static __inline uint64_t
677 atomic_readandclear_64(volatile uint64_t *p)
678 {
679 	uint64_t ret, tmp;
680 	uint32_t exflag;
681 
682 	__asm __volatile(
683 	    "1:							\n"
684 	    "   ldrexd	%Q[ret], %R[ret], [%[ptr]]		\n"
685 	    "   mov	%Q[tmp], #0				\n"
686 	    "   mov	%R[tmp], #0				\n"
687 	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
688 	    "   teq	%[exf], #0				\n"
689 	    "   it	ne					\n"
690 	    "   bne	1b					\n"
691 	    : [ret] "=&r" (ret),
692 	      [exf] "=&r" (exflag),
693 	      [tmp] "=&r" (tmp)
694 	    : [ptr] "r"   (p)
695 	    : "cc", "memory");
696 	return (ret);
697 }
698 
699 static __inline u_long
700 atomic_readandclear_long(volatile u_long *p)
701 {
702 
703 	return (atomic_readandclear_32((volatile uint32_t *)p));
704 }
705 
706 static __inline void
707 atomic_set_32(volatile uint32_t *address, uint32_t setmask)
708 {
709 	uint32_t tmp = 0, tmp2 = 0;
710 
711 	__asm __volatile(
712 	    "1: ldrex	%0, [%2]	\n"
713 	    "   orr	%0, %0, %3	\n"
714 	    "   strex	%1, %0, [%2]	\n"
715 	    "   cmp	%1, #0		\n"
716 	    "   it	ne		\n"
717 	    "   bne	1b		\n"
718 	    : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
719 	    : : "cc", "memory");
720 }
721 
722 static __inline void
723 atomic_set_64(volatile uint64_t *p, uint64_t val)
724 {
725 	uint64_t tmp;
726 	uint32_t exflag;
727 
728 	__asm __volatile(
729 	    "1:							\n"
730 	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
731 	    "   orr	%Q[tmp], %Q[val]			\n"
732 	    "   orr	%R[tmp], %R[val]			\n"
733 	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
734 	    "   teq	%[exf], #0				\n"
735 	    "   it	ne					\n"
736 	    "   bne	1b					\n"
737 	    : [exf] "=&r" (exflag),
738 	      [tmp] "=&r" (tmp)
739 	    : [ptr] "r"   (p),
740 	      [val] "r"   (val)
741 	    : "cc", "memory");
742 }
743 
744 static __inline void
745 atomic_set_long(volatile u_long *address, u_long setmask)
746 {
747 
748 	atomic_set_32((volatile uint32_t *)address, setmask);
749 }
750 
751 ATOMIC_ACQ_REL(set, 32)
752 ATOMIC_ACQ_REL(set, 64)
753 ATOMIC_ACQ_REL_LONG(set)
754 
755 static __inline void
756 atomic_subtract_32(volatile uint32_t *p, uint32_t val)
757 {
758 	uint32_t tmp = 0, tmp2 = 0;
759 
760 	__asm __volatile(
761 	    "1: ldrex	%0, [%2]	\n"
762 	    "   sub	%0, %0, %3	\n"
763 	    "   strex	%1, %0, [%2]	\n"
764 	    "   cmp	%1, #0		\n"
765 	    "   it	ne		\n"
766 	    "   bne	1b		\n"
767 	    : "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
768 	    : : "cc", "memory");
769 }
770 
771 static __inline void
772 atomic_subtract_64(volatile uint64_t *p, uint64_t val)
773 {
774 	uint64_t tmp;
775 	uint32_t exflag;
776 
777 	__asm __volatile(
778 	    "1:							\n"
779 	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
780 	    "   subs	%Q[tmp], %Q[val]			\n"
781 	    "   sbc	%R[tmp], %R[tmp], %R[val]		\n"
782 	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
783 	    "   teq	%[exf], #0				\n"
784 	    "   it	ne					\n"
785 	    "   bne	1b					\n"
786 	    : [exf] "=&r" (exflag),
787 	      [tmp] "=&r" (tmp)
788 	    : [ptr] "r"   (p),
789 	      [val] "r"   (val)
790 	    : "cc", "memory");
791 }
792 
793 static __inline void
794 atomic_subtract_long(volatile u_long *p, u_long val)
795 {
796 
797 	atomic_subtract_32((volatile uint32_t *)p, val);
798 }
799 
800 ATOMIC_ACQ_REL(subtract, 32)
801 ATOMIC_ACQ_REL(subtract, 64)
802 ATOMIC_ACQ_REL_LONG(subtract)
803 
804 static __inline void
805 atomic_store_64(volatile uint64_t *p, uint64_t val)
806 {
807 	uint64_t tmp;
808 	uint32_t exflag;
809 
810 	/*
811 	 * The only way to atomically store 64 bits is with STREXD, which will
812 	 * succeed only if paired up with a preceeding LDREXD using the same
813 	 * address, so we read and discard the existing value before storing.
814 	 */
815 	__asm __volatile(
816 	    "1:							\n"
817 	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
818 	    "   strexd	%[exf], %Q[val], %R[val], [%[ptr]]	\n"
819 	    "   teq	%[exf], #0				\n"
820 	    "   it	ne					\n"
821 	    "   bne	1b					\n"
822 	    : [tmp] "=&r" (tmp),
823 	      [exf] "=&r" (exflag)
824 	    : [ptr] "r"   (p),
825 	      [val] "r"   (val)
826 	    : "cc", "memory");
827 }
828 
829 static __inline void
830 atomic_store_rel_32(volatile uint32_t *p, uint32_t v)
831 {
832 
833 	dmb();
834 	*p = v;
835 }
836 
837 static __inline void
838 atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
839 {
840 
841 	dmb();
842 	atomic_store_64(p, val);
843 }
844 
845 static __inline void
846 atomic_store_rel_long(volatile u_long *p, u_long v)
847 {
848 
849 	dmb();
850 	*p = v;
851 }
852 
853 static __inline int
854 atomic_testandclear_32(volatile uint32_t *ptr, u_int bit)
855 {
856 	int newv, oldv, result;
857 
858 	__asm __volatile(
859 	    "   mov     ip, #1					\n"
860 	    "   lsl     ip, ip, %[bit]				\n"
861 	    /*  Done with %[bit] as input, reuse below as output. */
862 	    "1:							\n"
863 	    "   ldrex	%[oldv], [%[ptr]]			\n"
864 	    "   bic     %[newv], %[oldv], ip			\n"
865 	    "   strex	%[bit], %[newv], [%[ptr]]		\n"
866 	    "   teq	%[bit], #0				\n"
867 	    "   it	ne					\n"
868 	    "   bne	1b					\n"
869 	    "   ands	%[bit], %[oldv], ip			\n"
870 	    "   it	ne					\n"
871 	    "   movne   %[bit], #1                              \n"
872 	    : [bit]  "=&r"   (result),
873 	      [oldv] "=&r"   (oldv),
874 	      [newv] "=&r"   (newv)
875 	    : [ptr]  "r"     (ptr),
876 	             "[bit]" (bit & 0x1f)
877 	    : "cc", "ip", "memory");
878 
879 	return (result);
880 }
881 
882 static __inline int
883 atomic_testandclear_int(volatile u_int *p, u_int v)
884 {
885 
886 	return (atomic_testandclear_32((volatile uint32_t *)p, v));
887 }
888 
889 static __inline int
890 atomic_testandclear_long(volatile u_long *p, u_int v)
891 {
892 
893 	return (atomic_testandclear_32((volatile uint32_t *)p, v));
894 }
895 
896 static __inline int
897 atomic_testandclear_64(volatile uint64_t *p, u_int v)
898 {
899 	volatile uint32_t *p32;
900 
901 	p32 = (volatile uint32_t *)p;
902 	/*
903 	 * Assume little-endian,
904 	 * atomic_testandclear_32() uses only last 5 bits of v
905 	 */
906 	if ((v & 0x20) != 0)
907 		p32++;
908 	return (atomic_testandclear_32(p32, v));
909 }
910 
911 static __inline int
912 atomic_testandset_32(volatile uint32_t *ptr, u_int bit)
913 {
914 	int newv, oldv, result;
915 
916 	__asm __volatile(
917 	    "   mov     ip, #1					\n"
918 	    "   lsl     ip, ip, %[bit]				\n"
919 	    /*  Done with %[bit] as input, reuse below as output. */
920 	    "1:							\n"
921 	    "   ldrex	%[oldv], [%[ptr]]			\n"
922 	    "   orr     %[newv], %[oldv], ip			\n"
923 	    "   strex	%[bit], %[newv], [%[ptr]]		\n"
924 	    "   teq	%[bit], #0				\n"
925 	    "   it	ne					\n"
926 	    "   bne	1b					\n"
927 	    "   ands	%[bit], %[oldv], ip			\n"
928 	    "   it	ne					\n"
929 	    "   movne   %[bit], #1                              \n"
930 	    : [bit]  "=&r"   (result),
931 	      [oldv] "=&r"   (oldv),
932 	      [newv] "=&r"   (newv)
933 	    : [ptr]  "r"     (ptr),
934 	             "[bit]" (bit & 0x1f)
935 	    : "cc", "ip", "memory");
936 
937 	return (result);
938 }
939 
940 static __inline int
941 atomic_testandset_int(volatile u_int *p, u_int v)
942 {
943 
944 	return (atomic_testandset_32((volatile uint32_t *)p, v));
945 }
946 
947 static __inline int
948 atomic_testandset_long(volatile u_long *p, u_int v)
949 {
950 
951 	return (atomic_testandset_32((volatile uint32_t *)p, v));
952 }
953 
954 static __inline int
955 atomic_testandset_acq_long(volatile u_long *p, u_int v)
956 {
957 	int ret;
958 
959 	ret = atomic_testandset_32((volatile uint32_t *)p, v);
960 	dmb();
961 	return (ret);
962 }
963 
964 static __inline int
965 atomic_testandset_64(volatile uint64_t *p, u_int v)
966 {
967 	volatile uint32_t *p32;
968 
969 	p32 = (volatile uint32_t *)p;
970 	/*
971 	 * Assume little-endian,
972 	 * atomic_testandset_32() uses only last 5 bits of v
973 	 */
974 	if ((v & 0x20) != 0)
975 		p32++;
976 	return (atomic_testandset_32(p32, v));
977 }
978 
979 static __inline uint32_t
980 atomic_swap_32(volatile uint32_t *p, uint32_t v)
981 {
982 	uint32_t ret, exflag;
983 
984 	__asm __volatile(
985 	    "1: ldrex	%[ret], [%[ptr]]		\n"
986 	    "   strex	%[exf], %[val], [%[ptr]]	\n"
987 	    "   teq	%[exf], #0			\n"
988 	    "   it	ne				\n"
989 	    "   bne	1b				\n"
990 	    : [ret] "=&r"  (ret),
991 	      [exf] "=&r" (exflag)
992 	    : [val] "r"  (v),
993 	      [ptr] "r"  (p)
994 	    : "cc", "memory");
995 	return (ret);
996 }
997 
998 static __inline u_long
999 atomic_swap_long(volatile u_long *p, u_long v)
1000 {
1001 
1002 	return (atomic_swap_32((volatile uint32_t *)p, v));
1003 }
1004 
1005 static __inline uint64_t
1006 atomic_swap_64(volatile uint64_t *p, uint64_t v)
1007 {
1008 	uint64_t ret;
1009 	uint32_t exflag;
1010 
1011 	__asm __volatile(
1012 	    "1: ldrexd	%Q[ret], %R[ret], [%[ptr]]		\n"
1013 	    "   strexd	%[exf], %Q[val], %R[val], [%[ptr]]	\n"
1014 	    "   teq	%[exf], #0				\n"
1015 	    "   it	ne					\n"
1016 	    "   bne	1b					\n"
1017 	    : [ret] "=&r" (ret),
1018 	      [exf] "=&r" (exflag)
1019 	    : [val] "r"   (v),
1020 	      [ptr] "r"   (p)
1021 	    : "cc", "memory");
1022 	return (ret);
1023 }
1024 
1025 #undef ATOMIC_ACQ_REL
1026 #undef ATOMIC_ACQ_REL_LONG
1027 
1028 static __inline void
1029 atomic_thread_fence_acq(void)
1030 {
1031 
1032 	dmb();
1033 }
1034 
1035 static __inline void
1036 atomic_thread_fence_rel(void)
1037 {
1038 
1039 	dmb();
1040 }
1041 
1042 static __inline void
1043 atomic_thread_fence_acq_rel(void)
1044 {
1045 
1046 	dmb();
1047 }
1048 
1049 static __inline void
1050 atomic_thread_fence_seq_cst(void)
1051 {
1052 
1053 	dmb();
1054 }
1055 
1056 #define atomic_add_ptr			atomic_add_32
1057 #define atomic_add_acq_ptr		atomic_add_acq_32
1058 #define atomic_add_rel_ptr		atomic_add_rel_32
1059 #define atomic_subtract_ptr		atomic_subtract_32
1060 #define atomic_subtract_acq_ptr		atomic_subtract_acq_32
1061 #define atomic_subtract_rel_ptr		atomic_subtract_rel_32
1062 #define atomic_clear_ptr		atomic_clear_32
1063 #define atomic_clear_acq_ptr		atomic_clear_acq_32
1064 #define atomic_clear_rel_ptr		atomic_clear_rel_32
1065 #define atomic_set_ptr			atomic_set_32
1066 #define atomic_set_acq_ptr		atomic_set_acq_32
1067 #define atomic_set_rel_ptr		atomic_set_rel_32
1068 #define atomic_fcmpset_ptr		atomic_fcmpset_32
1069 #define atomic_fcmpset_acq_ptr		atomic_fcmpset_acq_32
1070 #define atomic_fcmpset_rel_ptr		atomic_fcmpset_rel_32
1071 #define atomic_cmpset_ptr		atomic_cmpset_32
1072 #define atomic_cmpset_acq_ptr		atomic_cmpset_acq_32
1073 #define atomic_cmpset_rel_ptr		atomic_cmpset_rel_32
1074 #define atomic_fetchadd_ptr		atomic_fetchadd_32
1075 #define atomic_readandclear_ptr		atomic_readandclear_32
1076 #define atomic_load_acq_ptr		atomic_load_acq_32
1077 #define atomic_store_rel_ptr		atomic_store_rel_32
1078 #define atomic_swap_ptr			atomic_swap_32
1079 #define	atomic_testandset_ptr		atomic_testandset_32
1080 #define	atomic_testandclear_ptr		atomic_testandclear_32
1081 
1082 #define atomic_add_int			atomic_add_32
1083 #define atomic_add_acq_int		atomic_add_acq_32
1084 #define atomic_add_rel_int		atomic_add_rel_32
1085 #define atomic_subtract_int		atomic_subtract_32
1086 #define atomic_subtract_acq_int		atomic_subtract_acq_32
1087 #define atomic_subtract_rel_int		atomic_subtract_rel_32
1088 #define atomic_clear_int		atomic_clear_32
1089 #define atomic_clear_acq_int		atomic_clear_acq_32
1090 #define atomic_clear_rel_int		atomic_clear_rel_32
1091 #define atomic_set_int			atomic_set_32
1092 #define atomic_set_acq_int		atomic_set_acq_32
1093 #define atomic_set_rel_int		atomic_set_rel_32
1094 #define atomic_fcmpset_int		atomic_fcmpset_32
1095 #define atomic_fcmpset_acq_int		atomic_fcmpset_acq_32
1096 #define atomic_fcmpset_rel_int		atomic_fcmpset_rel_32
1097 #define atomic_cmpset_int		atomic_cmpset_32
1098 #define atomic_cmpset_acq_int		atomic_cmpset_acq_32
1099 #define atomic_cmpset_rel_int		atomic_cmpset_rel_32
1100 #define atomic_fetchadd_int		atomic_fetchadd_32
1101 #define atomic_readandclear_int		atomic_readandclear_32
1102 #define atomic_load_acq_int		atomic_load_acq_32
1103 #define atomic_store_rel_int		atomic_store_rel_32
1104 #define atomic_swap_int			atomic_swap_32
1105 
1106 /*
1107  * For:
1108  *  - atomic_load_acq_8
1109  *  - atomic_load_acq_16
1110  */
1111 #include <sys/_atomic_subword.h>
1112 
1113 #endif /* _MACHINE_ATOMIC_H_ */
1114