xref: /freebsd/sys/arm/include/atomic.h (revision bfed2417f472f87e720b37bdac9ffd75ca2abc54)
1 /* $NetBSD: atomic.h,v 1.1 2002/10/19 12:22:34 bsh Exp $ */
2 
3 /*-
4  * SPDX-License-Identifier: BSD-4-Clause
5  *
6  * Copyright (C) 2003-2004 Olivier Houchard
7  * Copyright (C) 1994-1997 Mark Brinicombe
8  * Copyright (C) 1994 Brini
9  * All rights reserved.
10  *
11  * This code is derived from software written for Brini by Mark Brinicombe
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by Brini.
24  * 4. The name of Brini may not be used to endorse or promote products
25  *    derived from this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR
28  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
29  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
30  * IN NO EVENT SHALL BRINI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
33  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
34  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
35  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
36  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37  *
38  * $FreeBSD$
39  */
40 
41 #ifndef	_MACHINE_ATOMIC_H_
42 #define	_MACHINE_ATOMIC_H_
43 
44 #include <sys/atomic_common.h>
45 
46 #if __ARM_ARCH >= 7
47 #define isb()  __asm __volatile("isb" : : : "memory")
48 #define dsb()  __asm __volatile("dsb" : : : "memory")
49 #define dmb()  __asm __volatile("dmb" : : : "memory")
50 #else
51 #define isb()  __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory")
52 #define dsb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory")
53 #define dmb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory")
54 #endif
55 
56 #define mb()   dmb()
57 #define wmb()  dmb()
58 #define rmb()  dmb()
59 
60 #define	ARM_HAVE_ATOMIC64
61 
62 #define ATOMIC_ACQ_REL_LONG(NAME)					\
63 static __inline void							\
64 atomic_##NAME##_acq_long(__volatile u_long *p, u_long v)		\
65 {									\
66 	atomic_##NAME##_long(p, v);					\
67 	dmb();								\
68 }									\
69 									\
70 static __inline  void							\
71 atomic_##NAME##_rel_long(__volatile u_long *p, u_long v)		\
72 {									\
73 	dmb();								\
74 	atomic_##NAME##_long(p, v);					\
75 }
76 
77 #define	ATOMIC_ACQ_REL(NAME, WIDTH)					\
78 static __inline  void							\
79 atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
80 {									\
81 	atomic_##NAME##_##WIDTH(p, v);					\
82 	dmb();								\
83 }									\
84 									\
85 static __inline  void							\
86 atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
87 {									\
88 	dmb();								\
89 	atomic_##NAME##_##WIDTH(p, v);					\
90 }
91 
92 static __inline void
93 atomic_add_32(volatile uint32_t *p, uint32_t val)
94 {
95 	uint32_t tmp = 0, tmp2 = 0;
96 
97 	__asm __volatile(
98 	    "1: ldrex	%0, [%2]	\n"
99 	    "   add	%0, %0, %3	\n"
100 	    "   strex	%1, %0, [%2]	\n"
101 	    "   cmp	%1, #0		\n"
102 	    "   it	ne		\n"
103 	    "   bne	1b		\n"
104 	    : "=&r" (tmp), "+r" (tmp2)
105 	    ,"+r" (p), "+r" (val) : : "cc", "memory");
106 }
107 
108 static __inline void
109 atomic_add_64(volatile uint64_t *p, uint64_t val)
110 {
111 	uint64_t tmp;
112 	uint32_t exflag;
113 
114 	__asm __volatile(
115 	    "1:							\n"
116 	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
117 	    "   adds	%Q[tmp], %Q[val]			\n"
118 	    "   adc	%R[tmp], %R[tmp], %R[val]		\n"
119 	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
120 	    "   teq	%[exf], #0				\n"
121 	    "   it	ne					\n"
122 	    "   bne	1b					\n"
123 	    : [exf] "=&r" (exflag),
124 	      [tmp] "=&r" (tmp)
125 	    : [ptr] "r"   (p),
126 	      [val] "r"   (val)
127 	    : "cc", "memory");
128 }
129 
130 static __inline void
131 atomic_add_long(volatile u_long *p, u_long val)
132 {
133 
134 	atomic_add_32((volatile uint32_t *)p, val);
135 }
136 
137 ATOMIC_ACQ_REL(add, 32)
138 ATOMIC_ACQ_REL(add, 64)
139 ATOMIC_ACQ_REL_LONG(add)
140 
141 static __inline void
142 atomic_clear_32(volatile uint32_t *address, uint32_t setmask)
143 {
144 	uint32_t tmp = 0, tmp2 = 0;
145 
146 	__asm __volatile(
147 	    "1: ldrex	%0, [%2]	\n"
148 	    "   bic	%0, %0, %3	\n"
149 	    "   strex	%1, %0, [%2]	\n"
150 	    "   cmp	%1, #0		\n"
151 	    "   it	ne		\n"
152 	    "   bne	1b		\n"
153 	    : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
154 	    : : "cc", "memory");
155 }
156 
157 static __inline void
158 atomic_clear_64(volatile uint64_t *p, uint64_t val)
159 {
160 	uint64_t tmp;
161 	uint32_t exflag;
162 
163 	__asm __volatile(
164 	    "1:							\n"
165 	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
166 	    "   bic	%Q[tmp], %Q[val]			\n"
167 	    "   bic	%R[tmp], %R[val]			\n"
168 	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
169 	    "   teq	%[exf], #0				\n"
170 	    "   it	ne					\n"
171 	    "   bne	1b					\n"
172 	    : [exf] "=&r" (exflag),
173 	      [tmp] "=&r" (tmp)
174 	    : [ptr] "r"   (p),
175 	      [val] "r"   (val)
176 	    : "cc", "memory");
177 }
178 
179 static __inline void
180 atomic_clear_long(volatile u_long *address, u_long setmask)
181 {
182 
183 	atomic_clear_32((volatile uint32_t *)address, setmask);
184 }
185 
186 ATOMIC_ACQ_REL(clear, 32)
187 ATOMIC_ACQ_REL(clear, 64)
188 ATOMIC_ACQ_REL_LONG(clear)
189 
190 #define ATOMIC_FCMPSET_CODE(RET, TYPE, SUF)                   \
191     {                                                         \
192 	TYPE tmp;                                             \
193                                                               \
194 	__asm __volatile(                                     \
195 	    "1: ldrex" SUF "   %[tmp], [%[ptr]]          \n"  \
196 	    "   ldr" SUF "     %[ret], [%[oldv]]         \n"  \
197 	    "   teq            %[tmp], %[ret]            \n"  \
198 	    "   ittee          ne                        \n"  \
199 	    "   str" SUF "ne   %[tmp], [%[oldv]]         \n"  \
200 	    "   movne          %[ret], #0                \n"  \
201 	    "   strex" SUF "eq %[ret], %[newv], [%[ptr]] \n"  \
202 	    "   eorseq         %[ret], #1                \n"  \
203 	    "   beq            1b                        \n"  \
204 	    : [ret] "=&r" (RET),                              \
205 	      [tmp] "=&r" (tmp)                               \
206 	    : [ptr] "r"   (_ptr),                             \
207 	      [oldv] "r"  (_old),                             \
208 	      [newv] "r"  (_new)                              \
209 	    : "cc", "memory");                                \
210     }
211 
212 #define ATOMIC_FCMPSET_CODE64(RET)                                 \
213     {                                                              \
214 	uint64_t cmp, tmp;                                         \
215                                                                    \
216 	__asm __volatile(                                          \
217 	    "1: ldrexd   %Q[tmp], %R[tmp], [%[ptr]]           \n"  \
218 	    "   ldrd     %Q[cmp], %R[cmp], [%[oldv]]          \n"  \
219 	    "   teq      %Q[tmp], %Q[cmp]                     \n"  \
220 	    "   it       eq                                   \n"  \
221 	    "   teqeq    %R[tmp], %R[cmp]                     \n"  \
222 	    "   ittee    ne                                   \n"  \
223 	    "   movne    %[ret], #0                           \n"  \
224 	    "   strdne   %[cmp], [%[oldv]]                    \n"  \
225 	    "   strexdeq %[ret], %Q[newv], %R[newv], [%[ptr]] \n"  \
226 	    "   eorseq   %[ret], #1                           \n"  \
227 	    "   beq      1b                                   \n"  \
228 	    : [ret] "=&r" (RET),                                   \
229 	      [cmp] "=&r" (cmp),                                   \
230 	      [tmp] "=&r" (tmp)                                    \
231 	    : [ptr] "r"   (_ptr),                                  \
232 	      [oldv] "r"  (_old),                                  \
233 	      [newv] "r"  (_new)                                   \
234 	    : "cc", "memory");                                     \
235     }
236 
237 static __inline int
238 atomic_fcmpset_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
239 {
240 	int ret;
241 
242 	ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
243 	return (ret);
244 }
245 #define	atomic_fcmpset_8	atomic_fcmpset_8
246 
247 static __inline int
248 atomic_fcmpset_acq_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
249 {
250 	int ret;
251 
252 	ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
253 	dmb();
254 	return (ret);
255 }
256 
257 static __inline int
258 atomic_fcmpset_rel_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
259 {
260 	int ret;
261 
262 	dmb();
263 	ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
264 	return (ret);
265 }
266 
267 static __inline int
268 atomic_fcmpset_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
269 {
270 	int ret;
271 
272 	ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
273 	return (ret);
274 }
275 #define	atomic_fcmpset_16	atomic_fcmpset_16
276 
277 static __inline int
278 atomic_fcmpset_acq_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
279 {
280 	int ret;
281 
282 	ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
283 	dmb();
284 	return (ret);
285 }
286 
287 static __inline int
288 atomic_fcmpset_rel_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
289 {
290 	int ret;
291 
292 	dmb();
293 	ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
294 	return (ret);
295 }
296 
297 static __inline int
298 atomic_fcmpset_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
299 {
300 	int ret;
301 
302 	ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
303 	return (ret);
304 }
305 
306 static __inline int
307 atomic_fcmpset_acq_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
308 {
309 	int ret;
310 
311 	ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
312 	dmb();
313 	return (ret);
314 }
315 
316 static __inline int
317 atomic_fcmpset_rel_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
318 {
319 	int ret;
320 
321 	dmb();
322 	ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
323 	return (ret);
324 }
325 
326 static __inline int
327 atomic_fcmpset_long(volatile u_long *_ptr, u_long *_old, u_long _new)
328 {
329 	int ret;
330 
331 	ATOMIC_FCMPSET_CODE(ret, u_long, "");
332 	return (ret);
333 }
334 
335 static __inline int
336 atomic_fcmpset_acq_long(volatile u_long *_ptr, u_long *_old, u_long _new)
337 {
338 	int ret;
339 
340 	ATOMIC_FCMPSET_CODE(ret, u_long, "");
341 	dmb();
342 	return (ret);
343 }
344 
345 static __inline int
346 atomic_fcmpset_rel_long(volatile u_long *_ptr, u_long *_old, u_long _new)
347 {
348 	int ret;
349 
350 	dmb();
351 	ATOMIC_FCMPSET_CODE(ret, u_long, "");
352 	return (ret);
353 }
354 
355 static __inline int
356 atomic_fcmpset_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
357 {
358 	int ret;
359 
360 	ATOMIC_FCMPSET_CODE64(ret);
361 	return (ret);
362 }
363 
364 static __inline int
365 atomic_fcmpset_acq_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
366 {
367 	int ret;
368 
369 	ATOMIC_FCMPSET_CODE64(ret);
370 	dmb();
371 	return (ret);
372 }
373 
374 static __inline int
375 atomic_fcmpset_rel_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
376 {
377 	int ret;
378 
379 	dmb();
380 	ATOMIC_FCMPSET_CODE64(ret);
381 	return (ret);
382 }
383 
384 #define ATOMIC_CMPSET_CODE(RET, SUF)                         \
385     {                                                        \
386 	__asm __volatile(                                    \
387 	    "1: ldrex" SUF "   %[ret], [%[ptr]]          \n" \
388 	    "   teq            %[ret], %[oldv]           \n" \
389 	    "   itee           ne                        \n" \
390 	    "   movne          %[ret], #0                \n" \
391 	    "   strex" SUF "eq %[ret], %[newv], [%[ptr]] \n" \
392 	    "   eorseq         %[ret], #1                \n" \
393 	    "   beq            1b                        \n" \
394 	    : [ret] "=&r" (RET)                              \
395 	    : [ptr] "r"   (_ptr),                            \
396 	      [oldv] "r"  (_old),                            \
397 	      [newv] "r"  (_new)                             \
398 	    : "cc", "memory");                               \
399     }
400 
401 #define ATOMIC_CMPSET_CODE64(RET)                                 \
402     {                                                             \
403 	uint64_t tmp;                                             \
404 	                                                          \
405 	__asm __volatile(                                         \
406 	    "1: ldrexd   %Q[tmp], %R[tmp], [%[ptr]]           \n" \
407 	    "   teq      %Q[tmp], %Q[oldv]                    \n" \
408 	    "   it       eq                                   \n" \
409 	    "   teqeq    %R[tmp], %R[oldv]                    \n" \
410 	    "   itee     ne                                   \n" \
411 	    "   movne    %[ret], #0                           \n" \
412 	    "   strexdeq %[ret], %Q[newv], %R[newv], [%[ptr]] \n" \
413 	    "   eorseq   %[ret], #1                           \n" \
414 	    "   beq      1b                                   \n" \
415 	    : [ret] "=&r" (RET),                                  \
416 	      [tmp] "=&r" (tmp)                                   \
417 	    : [ptr] "r"   (_ptr),                                 \
418 	      [oldv] "r"  (_old),                                 \
419 	      [newv] "r"  (_new)                                  \
420 	    : "cc", "memory");                                    \
421     }
422 
423 static __inline int
424 atomic_cmpset_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
425 {
426 	int ret;
427 
428 	ATOMIC_CMPSET_CODE(ret, "b");
429 	return (ret);
430 }
431 #define	atomic_cmpset_8		atomic_cmpset_8
432 
433 static __inline int
434 atomic_cmpset_acq_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
435 {
436 	int ret;
437 
438 	ATOMIC_CMPSET_CODE(ret, "b");
439 	dmb();
440 	return (ret);
441 }
442 
443 static __inline int
444 atomic_cmpset_rel_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
445 {
446 	int ret;
447 
448 	dmb();
449 	ATOMIC_CMPSET_CODE(ret, "b");
450 	return (ret);
451 }
452 
453 static __inline int
454 atomic_cmpset_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
455 {
456 	int ret;
457 
458 	ATOMIC_CMPSET_CODE(ret, "h");
459 	return (ret);
460 }
461 #define	atomic_cmpset_16	atomic_cmpset_16
462 
463 static __inline int
464 atomic_cmpset_acq_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
465 {
466 	int ret;
467 
468 	ATOMIC_CMPSET_CODE(ret, "h");
469 	dmb();
470 	return (ret);
471 }
472 
473 static __inline int
474 atomic_cmpset_rel_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
475 {
476 	int ret;
477 
478 	dmb();
479 	ATOMIC_CMPSET_CODE(ret, "h");
480 	return (ret);
481 }
482 
483 static __inline int
484 atomic_cmpset_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
485 {
486 	int ret;
487 
488 	ATOMIC_CMPSET_CODE(ret, "");
489 	return (ret);
490 }
491 
492 static __inline int
493 atomic_cmpset_acq_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
494 {
495 	int ret;
496 
497 	ATOMIC_CMPSET_CODE(ret, "");
498 	dmb();
499 	return (ret);
500 }
501 
502 static __inline int
503 atomic_cmpset_rel_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
504 {
505 	int ret;
506 
507 	dmb();
508 	ATOMIC_CMPSET_CODE(ret, "");
509 	return (ret);
510 }
511 
512 static __inline int
513 atomic_cmpset_long(volatile u_long *_ptr, u_long _old, u_long _new)
514 {
515 	int ret;
516 
517 	ATOMIC_CMPSET_CODE(ret, "");
518 	return (ret);
519 }
520 
521 static __inline int
522 atomic_cmpset_acq_long(volatile u_long *_ptr, u_long _old, u_long _new)
523 {
524 	int ret;
525 
526 	ATOMIC_CMPSET_CODE(ret, "");
527 	dmb();
528 	return (ret);
529 }
530 
531 static __inline int
532 atomic_cmpset_rel_long(volatile u_long *_ptr, u_long _old, u_long _new)
533 {
534 	int ret;
535 
536 	dmb();
537 	ATOMIC_CMPSET_CODE(ret, "");
538 	return (ret);
539 }
540 
541 static __inline int
542 atomic_cmpset_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
543 {
544 	int ret;
545 
546 	ATOMIC_CMPSET_CODE64(ret);
547 	return (ret);
548 }
549 
550 static __inline int
551 atomic_cmpset_acq_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
552 {
553 	int ret;
554 
555 	ATOMIC_CMPSET_CODE64(ret);
556 	dmb();
557 	return (ret);
558 }
559 
560 static __inline int
561 atomic_cmpset_rel_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
562 {
563 	int ret;
564 
565 	dmb();
566 	ATOMIC_CMPSET_CODE64(ret);
567 	return (ret);
568 }
569 
570 static __inline uint32_t
571 atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
572 {
573 	uint32_t tmp = 0, tmp2 = 0, ret = 0;
574 
575 	__asm __volatile(
576 	    "1: ldrex	%0, [%3]	\n"
577 	    "   add	%1, %0, %4	\n"
578 	    "   strex	%2, %1, [%3]	\n"
579 	    "   cmp	%2, #0		\n"
580 	    "   it	ne		\n"
581 	    "   bne	1b		\n"
582 	    : "+r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
583 	    : : "cc", "memory");
584 	return (ret);
585 }
586 
587 static __inline uint64_t
588 atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
589 {
590 	uint64_t ret, tmp;
591 	uint32_t exflag;
592 
593 	__asm __volatile(
594 	    "1:							\n"
595 	    "   ldrexd	%Q[ret], %R[ret], [%[ptr]]		\n"
596 	    "   adds	%Q[tmp], %Q[ret], %Q[val]		\n"
597 	    "   adc	%R[tmp], %R[ret], %R[val]		\n"
598 	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
599 	    "   teq	%[exf], #0				\n"
600 	    "   it	ne					\n"
601 	    "   bne	1b					\n"
602 	    : [ret] "=&r" (ret),
603 	      [exf] "=&r" (exflag),
604 	      [tmp] "=&r" (tmp)
605 	    : [ptr] "r"   (p),
606 	      [val] "r"   (val)
607 	    : "cc", "memory");
608 	return (ret);
609 }
610 
611 static __inline u_long
612 atomic_fetchadd_long(volatile u_long *p, u_long val)
613 {
614 
615 	return (atomic_fetchadd_32((volatile uint32_t *)p, val));
616 }
617 
618 static __inline uint32_t
619 atomic_load_acq_32(volatile uint32_t *p)
620 {
621 	uint32_t v;
622 
623 	v = *p;
624 	dmb();
625 	return (v);
626 }
627 
628 static __inline uint64_t
629 atomic_load_64(volatile uint64_t *p)
630 {
631 	uint64_t ret;
632 
633 	/*
634 	 * The only way to atomically load 64 bits is with LDREXD which puts the
635 	 * exclusive monitor into the exclusive state, so reset it to open state
636 	 * with CLREX because we don't actually need to store anything.
637 	 */
638 	__asm __volatile(
639 	    "ldrexd	%Q[ret], %R[ret], [%[ptr]]	\n"
640 	    "clrex					\n"
641 	    : [ret] "=&r" (ret)
642 	    : [ptr] "r"   (p)
643 	    : "cc", "memory");
644 	return (ret);
645 }
646 
647 static __inline uint64_t
648 atomic_load_acq_64(volatile uint64_t *p)
649 {
650 	uint64_t ret;
651 
652 	ret = atomic_load_64(p);
653 	dmb();
654 	return (ret);
655 }
656 
657 static __inline u_long
658 atomic_load_acq_long(volatile u_long *p)
659 {
660 	u_long v;
661 
662 	v = *p;
663 	dmb();
664 	return (v);
665 }
666 
667 static __inline uint32_t
668 atomic_readandclear_32(volatile uint32_t *p)
669 {
670 	uint32_t ret, tmp = 0, tmp2 = 0;
671 
672 	__asm __volatile(
673 	    "1: ldrex	%0, [%3]	\n"
674 	    "   mov	%1, #0		\n"
675 	    "   strex	%2, %1, [%3]	\n"
676 	    "   cmp	%2, #0		\n"
677 	    "   it	ne		\n"
678 	    "   bne	1b		\n"
679 	    : "=r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p)
680 	    : : "cc", "memory");
681 	return (ret);
682 }
683 
684 static __inline uint64_t
685 atomic_readandclear_64(volatile uint64_t *p)
686 {
687 	uint64_t ret, tmp;
688 	uint32_t exflag;
689 
690 	__asm __volatile(
691 	    "1:							\n"
692 	    "   ldrexd	%Q[ret], %R[ret], [%[ptr]]		\n"
693 	    "   mov	%Q[tmp], #0				\n"
694 	    "   mov	%R[tmp], #0				\n"
695 	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
696 	    "   teq	%[exf], #0				\n"
697 	    "   it	ne					\n"
698 	    "   bne	1b					\n"
699 	    : [ret] "=&r" (ret),
700 	      [exf] "=&r" (exflag),
701 	      [tmp] "=&r" (tmp)
702 	    : [ptr] "r"   (p)
703 	    : "cc", "memory");
704 	return (ret);
705 }
706 
707 static __inline u_long
708 atomic_readandclear_long(volatile u_long *p)
709 {
710 
711 	return (atomic_readandclear_32((volatile uint32_t *)p));
712 }
713 
714 static __inline void
715 atomic_set_32(volatile uint32_t *address, uint32_t setmask)
716 {
717 	uint32_t tmp = 0, tmp2 = 0;
718 
719 	__asm __volatile(
720 	    "1: ldrex	%0, [%2]	\n"
721 	    "   orr	%0, %0, %3	\n"
722 	    "   strex	%1, %0, [%2]	\n"
723 	    "   cmp	%1, #0		\n"
724 	    "   it	ne		\n"
725 	    "   bne	1b		\n"
726 	    : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
727 	    : : "cc", "memory");
728 }
729 
730 static __inline void
731 atomic_set_64(volatile uint64_t *p, uint64_t val)
732 {
733 	uint64_t tmp;
734 	uint32_t exflag;
735 
736 	__asm __volatile(
737 	    "1:							\n"
738 	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
739 	    "   orr	%Q[tmp], %Q[val]			\n"
740 	    "   orr	%R[tmp], %R[val]			\n"
741 	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
742 	    "   teq	%[exf], #0				\n"
743 	    "   it	ne					\n"
744 	    "   bne	1b					\n"
745 	    : [exf] "=&r" (exflag),
746 	      [tmp] "=&r" (tmp)
747 	    : [ptr] "r"   (p),
748 	      [val] "r"   (val)
749 	    : "cc", "memory");
750 }
751 
752 static __inline void
753 atomic_set_long(volatile u_long *address, u_long setmask)
754 {
755 
756 	atomic_set_32((volatile uint32_t *)address, setmask);
757 }
758 
759 ATOMIC_ACQ_REL(set, 32)
760 ATOMIC_ACQ_REL(set, 64)
761 ATOMIC_ACQ_REL_LONG(set)
762 
763 static __inline void
764 atomic_subtract_32(volatile uint32_t *p, uint32_t val)
765 {
766 	uint32_t tmp = 0, tmp2 = 0;
767 
768 	__asm __volatile(
769 	    "1: ldrex	%0, [%2]	\n"
770 	    "   sub	%0, %0, %3	\n"
771 	    "   strex	%1, %0, [%2]	\n"
772 	    "   cmp	%1, #0		\n"
773 	    "   it	ne		\n"
774 	    "   bne	1b		\n"
775 	    : "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
776 	    : : "cc", "memory");
777 }
778 
779 static __inline void
780 atomic_subtract_64(volatile uint64_t *p, uint64_t val)
781 {
782 	uint64_t tmp;
783 	uint32_t exflag;
784 
785 	__asm __volatile(
786 	    "1:							\n"
787 	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
788 	    "   subs	%Q[tmp], %Q[val]			\n"
789 	    "   sbc	%R[tmp], %R[tmp], %R[val]		\n"
790 	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
791 	    "   teq	%[exf], #0				\n"
792 	    "   it	ne					\n"
793 	    "   bne	1b					\n"
794 	    : [exf] "=&r" (exflag),
795 	      [tmp] "=&r" (tmp)
796 	    : [ptr] "r"   (p),
797 	      [val] "r"   (val)
798 	    : "cc", "memory");
799 }
800 
801 static __inline void
802 atomic_subtract_long(volatile u_long *p, u_long val)
803 {
804 
805 	atomic_subtract_32((volatile uint32_t *)p, val);
806 }
807 
808 ATOMIC_ACQ_REL(subtract, 32)
809 ATOMIC_ACQ_REL(subtract, 64)
810 ATOMIC_ACQ_REL_LONG(subtract)
811 
812 static __inline void
813 atomic_store_64(volatile uint64_t *p, uint64_t val)
814 {
815 	uint64_t tmp;
816 	uint32_t exflag;
817 
818 	/*
819 	 * The only way to atomically store 64 bits is with STREXD, which will
820 	 * succeed only if paired up with a preceeding LDREXD using the same
821 	 * address, so we read and discard the existing value before storing.
822 	 */
823 	__asm __volatile(
824 	    "1:							\n"
825 	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
826 	    "   strexd	%[exf], %Q[val], %R[val], [%[ptr]]	\n"
827 	    "   teq	%[exf], #0				\n"
828 	    "   it	ne					\n"
829 	    "   bne	1b					\n"
830 	    : [tmp] "=&r" (tmp),
831 	      [exf] "=&r" (exflag)
832 	    : [ptr] "r"   (p),
833 	      [val] "r"   (val)
834 	    : "cc", "memory");
835 }
836 
837 static __inline void
838 atomic_store_rel_32(volatile uint32_t *p, uint32_t v)
839 {
840 
841 	dmb();
842 	*p = v;
843 }
844 
845 static __inline void
846 atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
847 {
848 
849 	dmb();
850 	atomic_store_64(p, val);
851 }
852 
853 static __inline void
854 atomic_store_rel_long(volatile u_long *p, u_long v)
855 {
856 
857 	dmb();
858 	*p = v;
859 }
860 
861 static __inline int
862 atomic_testandclear_32(volatile uint32_t *ptr, u_int bit)
863 {
864 	int newv, oldv, result;
865 
866 	__asm __volatile(
867 	    "   mov     ip, #1					\n"
868 	    "   lsl     ip, ip, %[bit]				\n"
869 	    /*  Done with %[bit] as input, reuse below as output. */
870 	    "1:							\n"
871 	    "   ldrex	%[oldv], [%[ptr]]			\n"
872 	    "   bic     %[newv], %[oldv], ip			\n"
873 	    "   strex	%[bit], %[newv], [%[ptr]]		\n"
874 	    "   teq	%[bit], #0				\n"
875 	    "   it	ne					\n"
876 	    "   bne	1b					\n"
877 	    "   ands	%[bit], %[oldv], ip			\n"
878 	    "   it	ne					\n"
879 	    "   movne   %[bit], #1                              \n"
880 	    : [bit]  "=&r"   (result),
881 	      [oldv] "=&r"   (oldv),
882 	      [newv] "=&r"   (newv)
883 	    : [ptr]  "r"     (ptr),
884 	             "[bit]" (bit & 0x1f)
885 	    : "cc", "ip", "memory");
886 
887 	return (result);
888 }
889 
890 static __inline int
891 atomic_testandclear_int(volatile u_int *p, u_int v)
892 {
893 
894 	return (atomic_testandclear_32((volatile uint32_t *)p, v));
895 }
896 
897 static __inline int
898 atomic_testandclear_long(volatile u_long *p, u_int v)
899 {
900 
901 	return (atomic_testandclear_32((volatile uint32_t *)p, v));
902 }
903 #define	atomic_testandclear_long	atomic_testandclear_long
904 
905 
906 static __inline int
907 atomic_testandclear_64(volatile uint64_t *p, u_int v)
908 {
909 	volatile uint32_t *p32;
910 
911 	p32 = (volatile uint32_t *)p;
912 	/*
913 	 * Assume little-endian,
914 	 * atomic_testandclear_32() uses only last 5 bits of v
915 	 */
916 	if ((v & 0x20) != 0)
917 		p32++;
918 	return (atomic_testandclear_32(p32, v));
919 }
920 
921 static __inline int
922 atomic_testandset_32(volatile uint32_t *ptr, u_int bit)
923 {
924 	int newv, oldv, result;
925 
926 	__asm __volatile(
927 	    "   mov     ip, #1					\n"
928 	    "   lsl     ip, ip, %[bit]				\n"
929 	    /*  Done with %[bit] as input, reuse below as output. */
930 	    "1:							\n"
931 	    "   ldrex	%[oldv], [%[ptr]]			\n"
932 	    "   orr     %[newv], %[oldv], ip			\n"
933 	    "   strex	%[bit], %[newv], [%[ptr]]		\n"
934 	    "   teq	%[bit], #0				\n"
935 	    "   it	ne					\n"
936 	    "   bne	1b					\n"
937 	    "   ands	%[bit], %[oldv], ip			\n"
938 	    "   it	ne					\n"
939 	    "   movne   %[bit], #1                              \n"
940 	    : [bit]  "=&r"   (result),
941 	      [oldv] "=&r"   (oldv),
942 	      [newv] "=&r"   (newv)
943 	    : [ptr]  "r"     (ptr),
944 	             "[bit]" (bit & 0x1f)
945 	    : "cc", "ip", "memory");
946 
947 	return (result);
948 }
949 
950 static __inline int
951 atomic_testandset_int(volatile u_int *p, u_int v)
952 {
953 
954 	return (atomic_testandset_32((volatile uint32_t *)p, v));
955 }
956 
957 static __inline int
958 atomic_testandset_long(volatile u_long *p, u_int v)
959 {
960 
961 	return (atomic_testandset_32((volatile uint32_t *)p, v));
962 }
963 #define	atomic_testandset_long	atomic_testandset_long
964 
965 static __inline int
966 atomic_testandset_64(volatile uint64_t *p, u_int v)
967 {
968 	volatile uint32_t *p32;
969 
970 	p32 = (volatile uint32_t *)p;
971 	/*
972 	 * Assume little-endian,
973 	 * atomic_testandset_32() uses only last 5 bits of v
974 	 */
975 	if ((v & 0x20) != 0)
976 		p32++;
977 	return (atomic_testandset_32(p32, v));
978 }
979 
980 static __inline uint32_t
981 atomic_swap_32(volatile uint32_t *p, uint32_t v)
982 {
983 	uint32_t ret, exflag;
984 
985 	__asm __volatile(
986 	    "1: ldrex	%[ret], [%[ptr]]		\n"
987 	    "   strex	%[exf], %[val], [%[ptr]]	\n"
988 	    "   teq	%[exf], #0			\n"
989 	    "   it	ne				\n"
990 	    "   bne	1b				\n"
991 	    : [ret] "=&r"  (ret),
992 	      [exf] "=&r" (exflag)
993 	    : [val] "r"  (v),
994 	      [ptr] "r"  (p)
995 	    : "cc", "memory");
996 	return (ret);
997 }
998 
999 static __inline u_long
1000 atomic_swap_long(volatile u_long *p, u_long v)
1001 {
1002 
1003 	return (atomic_swap_32((volatile uint32_t *)p, v));
1004 }
1005 
1006 static __inline uint64_t
1007 atomic_swap_64(volatile uint64_t *p, uint64_t v)
1008 {
1009 	uint64_t ret;
1010 	uint32_t exflag;
1011 
1012 	__asm __volatile(
1013 	    "1: ldrexd	%Q[ret], %R[ret], [%[ptr]]		\n"
1014 	    "   strexd	%[exf], %Q[val], %R[val], [%[ptr]]	\n"
1015 	    "   teq	%[exf], #0				\n"
1016 	    "   it	ne					\n"
1017 	    "   bne	1b					\n"
1018 	    : [ret] "=&r" (ret),
1019 	      [exf] "=&r" (exflag)
1020 	    : [val] "r"   (v),
1021 	      [ptr] "r"   (p)
1022 	    : "cc", "memory");
1023 	return (ret);
1024 }
1025 
1026 #undef ATOMIC_ACQ_REL
1027 #undef ATOMIC_ACQ_REL_LONG
1028 
1029 static __inline void
1030 atomic_thread_fence_acq(void)
1031 {
1032 
1033 	dmb();
1034 }
1035 
1036 static __inline void
1037 atomic_thread_fence_rel(void)
1038 {
1039 
1040 	dmb();
1041 }
1042 
1043 static __inline void
1044 atomic_thread_fence_acq_rel(void)
1045 {
1046 
1047 	dmb();
1048 }
1049 
1050 static __inline void
1051 atomic_thread_fence_seq_cst(void)
1052 {
1053 
1054 	dmb();
1055 }
1056 
1057 #define atomic_clear_ptr		atomic_clear_32
1058 #define atomic_clear_acq_ptr		atomic_clear_acq_32
1059 #define atomic_clear_rel_ptr		atomic_clear_rel_32
1060 #define atomic_set_ptr			atomic_set_32
1061 #define atomic_set_acq_ptr		atomic_set_acq_32
1062 #define atomic_set_rel_ptr		atomic_set_rel_32
1063 #define atomic_fcmpset_ptr		atomic_fcmpset_32
1064 #define atomic_fcmpset_rel_ptr		atomic_fcmpset_rel_32
1065 #define atomic_fcmpset_acq_ptr		atomic_fcmpset_acq_32
1066 #define atomic_cmpset_ptr		atomic_cmpset_32
1067 #define atomic_cmpset_acq_ptr		atomic_cmpset_acq_32
1068 #define atomic_cmpset_rel_ptr		atomic_cmpset_rel_32
1069 #define atomic_load_acq_ptr		atomic_load_acq_32
1070 #define atomic_store_rel_ptr		atomic_store_rel_32
1071 #define atomic_swap_ptr			atomic_swap_32
1072 #define atomic_readandclear_ptr		atomic_readandclear_32
1073 
1074 #define atomic_add_int			atomic_add_32
1075 #define atomic_add_acq_int		atomic_add_acq_32
1076 #define atomic_add_rel_int		atomic_add_rel_32
1077 #define atomic_subtract_int		atomic_subtract_32
1078 #define atomic_subtract_acq_int		atomic_subtract_acq_32
1079 #define atomic_subtract_rel_int		atomic_subtract_rel_32
1080 #define atomic_clear_int		atomic_clear_32
1081 #define atomic_clear_acq_int		atomic_clear_acq_32
1082 #define atomic_clear_rel_int		atomic_clear_rel_32
1083 #define atomic_set_int			atomic_set_32
1084 #define atomic_set_acq_int		atomic_set_acq_32
1085 #define atomic_set_rel_int		atomic_set_rel_32
1086 #define atomic_fcmpset_int		atomic_fcmpset_32
1087 #define atomic_fcmpset_acq_int		atomic_fcmpset_acq_32
1088 #define atomic_fcmpset_rel_int		atomic_fcmpset_rel_32
1089 #define atomic_cmpset_int		atomic_cmpset_32
1090 #define atomic_cmpset_acq_int		atomic_cmpset_acq_32
1091 #define atomic_cmpset_rel_int		atomic_cmpset_rel_32
1092 #define atomic_fetchadd_int		atomic_fetchadd_32
1093 #define atomic_readandclear_int		atomic_readandclear_32
1094 #define atomic_load_acq_int		atomic_load_acq_32
1095 #define atomic_store_rel_int		atomic_store_rel_32
1096 #define atomic_swap_int			atomic_swap_32
1097 
1098 /*
1099  * For:
1100  *  - atomic_load_acq_8
1101  *  - atomic_load_acq_16
1102  *  - atomic_testandset_acq_long
1103  */
1104 #include <sys/_atomic_subword.h>
1105 
1106 #endif /* _MACHINE_ATOMIC_H_ */
1107