xref: /freebsd/sys/arm/include/atomic.h (revision a4e5e0106ac7145f56eb39a691e302cabb4635be)
1 /* $NetBSD: atomic.h,v 1.1 2002/10/19 12:22:34 bsh Exp $ */
2 
3 /*-
4  * SPDX-License-Identifier: BSD-4-Clause
5  *
6  * Copyright (C) 2003-2004 Olivier Houchard
7  * Copyright (C) 1994-1997 Mark Brinicombe
8  * Copyright (C) 1994 Brini
9  * All rights reserved.
10  *
11  * This code is derived from software written for Brini by Mark Brinicombe
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by Brini.
24  * 4. The name of Brini may not be used to endorse or promote products
25  *    derived from this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR
28  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
29  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
30  * IN NO EVENT SHALL BRINI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
33  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
34  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
35  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
36  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 #ifndef	_MACHINE_ATOMIC_H_
40 #define	_MACHINE_ATOMIC_H_
41 
42 #include <sys/atomic_common.h>
43 
44 #if __ARM_ARCH >= 7
45 #define isb()  __asm __volatile("isb" : : : "memory")
46 #define dsb()  __asm __volatile("dsb" : : : "memory")
47 #define dmb()  __asm __volatile("dmb" : : : "memory")
48 #else
49 #define isb()  __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory")
50 #define dsb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory")
51 #define dmb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory")
52 #endif
53 
54 #define mb()   dmb()
55 #define wmb()  dmb()
56 #define rmb()  dmb()
57 
58 #define	ARM_HAVE_ATOMIC64
59 
60 #define ATOMIC_ACQ_REL_LONG(NAME)					\
61 static __inline void							\
62 atomic_##NAME##_acq_long(__volatile u_long *p, u_long v)		\
63 {									\
64 	atomic_##NAME##_long(p, v);					\
65 	dmb();								\
66 }									\
67 									\
68 static __inline  void							\
69 atomic_##NAME##_rel_long(__volatile u_long *p, u_long v)		\
70 {									\
71 	dmb();								\
72 	atomic_##NAME##_long(p, v);					\
73 }
74 
75 #define	ATOMIC_ACQ_REL(NAME, WIDTH)					\
76 static __inline  void							\
77 atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
78 {									\
79 	atomic_##NAME##_##WIDTH(p, v);					\
80 	dmb();								\
81 }									\
82 									\
83 static __inline  void							\
84 atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
85 {									\
86 	dmb();								\
87 	atomic_##NAME##_##WIDTH(p, v);					\
88 }
89 
90 static __inline void
91 atomic_add_32(volatile uint32_t *p, uint32_t val)
92 {
93 	uint32_t tmp = 0, tmp2 = 0;
94 
95 	__asm __volatile(
96 	    "1: ldrex	%0, [%2]	\n"
97 	    "   add	%0, %0, %3	\n"
98 	    "   strex	%1, %0, [%2]	\n"
99 	    "   cmp	%1, #0		\n"
100 	    "   it	ne		\n"
101 	    "   bne	1b		\n"
102 	    : "=&r" (tmp), "+r" (tmp2)
103 	    ,"+r" (p), "+r" (val) : : "cc", "memory");
104 }
105 
106 static __inline void
107 atomic_add_64(volatile uint64_t *p, uint64_t val)
108 {
109 	uint64_t tmp;
110 	uint32_t exflag;
111 
112 	__asm __volatile(
113 	    "1:							\n"
114 	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
115 	    "   adds	%Q[tmp], %Q[val]			\n"
116 	    "   adc	%R[tmp], %R[tmp], %R[val]		\n"
117 	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
118 	    "   teq	%[exf], #0				\n"
119 	    "   it	ne					\n"
120 	    "   bne	1b					\n"
121 	    : [exf] "=&r" (exflag),
122 	      [tmp] "=&r" (tmp)
123 	    : [ptr] "r"   (p),
124 	      [val] "r"   (val)
125 	    : "cc", "memory");
126 }
127 
128 static __inline void
129 atomic_add_long(volatile u_long *p, u_long val)
130 {
131 
132 	atomic_add_32((volatile uint32_t *)p, val);
133 }
134 
135 ATOMIC_ACQ_REL(add, 32)
136 ATOMIC_ACQ_REL(add, 64)
137 ATOMIC_ACQ_REL_LONG(add)
138 
139 static __inline void
140 atomic_clear_32(volatile uint32_t *address, uint32_t setmask)
141 {
142 	uint32_t tmp = 0, tmp2 = 0;
143 
144 	__asm __volatile(
145 	    "1: ldrex	%0, [%2]	\n"
146 	    "   bic	%0, %0, %3	\n"
147 	    "   strex	%1, %0, [%2]	\n"
148 	    "   cmp	%1, #0		\n"
149 	    "   it	ne		\n"
150 	    "   bne	1b		\n"
151 	    : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
152 	    : : "cc", "memory");
153 }
154 
155 static __inline void
156 atomic_clear_64(volatile uint64_t *p, uint64_t val)
157 {
158 	uint64_t tmp;
159 	uint32_t exflag;
160 
161 	__asm __volatile(
162 	    "1:							\n"
163 	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
164 	    "   bic	%Q[tmp], %Q[val]			\n"
165 	    "   bic	%R[tmp], %R[val]			\n"
166 	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
167 	    "   teq	%[exf], #0				\n"
168 	    "   it	ne					\n"
169 	    "   bne	1b					\n"
170 	    : [exf] "=&r" (exflag),
171 	      [tmp] "=&r" (tmp)
172 	    : [ptr] "r"   (p),
173 	      [val] "r"   (val)
174 	    : "cc", "memory");
175 }
176 
177 static __inline void
178 atomic_clear_long(volatile u_long *address, u_long setmask)
179 {
180 
181 	atomic_clear_32((volatile uint32_t *)address, setmask);
182 }
183 
184 ATOMIC_ACQ_REL(clear, 32)
185 ATOMIC_ACQ_REL(clear, 64)
186 ATOMIC_ACQ_REL_LONG(clear)
187 
188 #define ATOMIC_FCMPSET_CODE(RET, TYPE, SUF)                   \
189     {                                                         \
190 	TYPE tmp;                                             \
191                                                               \
192 	__asm __volatile(                                     \
193 	    "1: ldrex" SUF "   %[tmp], [%[ptr]]          \n"  \
194 	    "   ldr" SUF "     %[ret], [%[oldv]]         \n"  \
195 	    "   teq            %[tmp], %[ret]            \n"  \
196 	    "   ittee          ne                        \n"  \
197 	    "   str" SUF "ne   %[tmp], [%[oldv]]         \n"  \
198 	    "   movne          %[ret], #0                \n"  \
199 	    "   strex" SUF "eq %[ret], %[newv], [%[ptr]] \n"  \
200 	    "   eorseq         %[ret], #1                \n"  \
201 	    "   beq            1b                        \n"  \
202 	    : [ret] "=&r" (RET),                              \
203 	      [tmp] "=&r" (tmp)                               \
204 	    : [ptr] "r"   (_ptr),                             \
205 	      [oldv] "r"  (_old),                             \
206 	      [newv] "r"  (_new)                              \
207 	    : "cc", "memory");                                \
208     }
209 
210 #define ATOMIC_FCMPSET_CODE64(RET)                                 \
211     {                                                              \
212 	uint64_t cmp, tmp;                                         \
213                                                                    \
214 	__asm __volatile(                                          \
215 	    "1: ldrexd   %Q[tmp], %R[tmp], [%[ptr]]           \n"  \
216 	    "   ldrd     %Q[cmp], %R[cmp], [%[oldv]]          \n"  \
217 	    "   teq      %Q[tmp], %Q[cmp]                     \n"  \
218 	    "   it       eq                                   \n"  \
219 	    "   teqeq    %R[tmp], %R[cmp]                     \n"  \
220 	    "   ittee    ne                                   \n"  \
221 	    "   movne    %[ret], #0                           \n"  \
222 	    "   strdne   %[cmp], [%[oldv]]                    \n"  \
223 	    "   strexdeq %[ret], %Q[newv], %R[newv], [%[ptr]] \n"  \
224 	    "   eorseq   %[ret], #1                           \n"  \
225 	    "   beq      1b                                   \n"  \
226 	    : [ret] "=&r" (RET),                                   \
227 	      [cmp] "=&r" (cmp),                                   \
228 	      [tmp] "=&r" (tmp)                                    \
229 	    : [ptr] "r"   (_ptr),                                  \
230 	      [oldv] "r"  (_old),                                  \
231 	      [newv] "r"  (_new)                                   \
232 	    : "cc", "memory");                                     \
233     }
234 
235 static __inline int
236 atomic_fcmpset_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
237 {
238 	int ret;
239 
240 	ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
241 	return (ret);
242 }
243 #define	atomic_fcmpset_8	atomic_fcmpset_8
244 
245 static __inline int
246 atomic_fcmpset_acq_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
247 {
248 	int ret;
249 
250 	ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
251 	dmb();
252 	return (ret);
253 }
254 
255 static __inline int
256 atomic_fcmpset_rel_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
257 {
258 	int ret;
259 
260 	dmb();
261 	ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
262 	return (ret);
263 }
264 
265 static __inline int
266 atomic_fcmpset_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
267 {
268 	int ret;
269 
270 	ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
271 	return (ret);
272 }
273 #define	atomic_fcmpset_16	atomic_fcmpset_16
274 
275 static __inline int
276 atomic_fcmpset_acq_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
277 {
278 	int ret;
279 
280 	ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
281 	dmb();
282 	return (ret);
283 }
284 
285 static __inline int
286 atomic_fcmpset_rel_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
287 {
288 	int ret;
289 
290 	dmb();
291 	ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
292 	return (ret);
293 }
294 
295 static __inline int
296 atomic_fcmpset_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
297 {
298 	int ret;
299 
300 	ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
301 	return (ret);
302 }
303 
304 static __inline int
305 atomic_fcmpset_acq_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
306 {
307 	int ret;
308 
309 	ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
310 	dmb();
311 	return (ret);
312 }
313 
314 static __inline int
315 atomic_fcmpset_rel_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
316 {
317 	int ret;
318 
319 	dmb();
320 	ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
321 	return (ret);
322 }
323 
324 static __inline int
325 atomic_fcmpset_long(volatile u_long *_ptr, u_long *_old, u_long _new)
326 {
327 	int ret;
328 
329 	ATOMIC_FCMPSET_CODE(ret, u_long, "");
330 	return (ret);
331 }
332 
333 static __inline int
334 atomic_fcmpset_acq_long(volatile u_long *_ptr, u_long *_old, u_long _new)
335 {
336 	int ret;
337 
338 	ATOMIC_FCMPSET_CODE(ret, u_long, "");
339 	dmb();
340 	return (ret);
341 }
342 
343 static __inline int
344 atomic_fcmpset_rel_long(volatile u_long *_ptr, u_long *_old, u_long _new)
345 {
346 	int ret;
347 
348 	dmb();
349 	ATOMIC_FCMPSET_CODE(ret, u_long, "");
350 	return (ret);
351 }
352 
353 static __inline int
354 atomic_fcmpset_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
355 {
356 	int ret;
357 
358 	ATOMIC_FCMPSET_CODE64(ret);
359 	return (ret);
360 }
361 
362 static __inline int
363 atomic_fcmpset_acq_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
364 {
365 	int ret;
366 
367 	ATOMIC_FCMPSET_CODE64(ret);
368 	dmb();
369 	return (ret);
370 }
371 
372 static __inline int
373 atomic_fcmpset_rel_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
374 {
375 	int ret;
376 
377 	dmb();
378 	ATOMIC_FCMPSET_CODE64(ret);
379 	return (ret);
380 }
381 
382 #define ATOMIC_CMPSET_CODE(RET, SUF)                         \
383     {                                                        \
384 	__asm __volatile(                                    \
385 	    "1: ldrex" SUF "   %[ret], [%[ptr]]          \n" \
386 	    "   teq            %[ret], %[oldv]           \n" \
387 	    "   itee           ne                        \n" \
388 	    "   movne          %[ret], #0                \n" \
389 	    "   strex" SUF "eq %[ret], %[newv], [%[ptr]] \n" \
390 	    "   eorseq         %[ret], #1                \n" \
391 	    "   beq            1b                        \n" \
392 	    : [ret] "=&r" (RET)                              \
393 	    : [ptr] "r"   (_ptr),                            \
394 	      [oldv] "r"  (_old),                            \
395 	      [newv] "r"  (_new)                             \
396 	    : "cc", "memory");                               \
397     }
398 
399 #define ATOMIC_CMPSET_CODE64(RET)                                 \
400     {                                                             \
401 	uint64_t tmp;                                             \
402 	                                                          \
403 	__asm __volatile(                                         \
404 	    "1: ldrexd   %Q[tmp], %R[tmp], [%[ptr]]           \n" \
405 	    "   teq      %Q[tmp], %Q[oldv]                    \n" \
406 	    "   it       eq                                   \n" \
407 	    "   teqeq    %R[tmp], %R[oldv]                    \n" \
408 	    "   itee     ne                                   \n" \
409 	    "   movne    %[ret], #0                           \n" \
410 	    "   strexdeq %[ret], %Q[newv], %R[newv], [%[ptr]] \n" \
411 	    "   eorseq   %[ret], #1                           \n" \
412 	    "   beq      1b                                   \n" \
413 	    : [ret] "=&r" (RET),                                  \
414 	      [tmp] "=&r" (tmp)                                   \
415 	    : [ptr] "r"   (_ptr),                                 \
416 	      [oldv] "r"  (_old),                                 \
417 	      [newv] "r"  (_new)                                  \
418 	    : "cc", "memory");                                    \
419     }
420 
421 static __inline int
422 atomic_cmpset_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
423 {
424 	int ret;
425 
426 	ATOMIC_CMPSET_CODE(ret, "b");
427 	return (ret);
428 }
429 #define	atomic_cmpset_8		atomic_cmpset_8
430 
431 static __inline int
432 atomic_cmpset_acq_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
433 {
434 	int ret;
435 
436 	ATOMIC_CMPSET_CODE(ret, "b");
437 	dmb();
438 	return (ret);
439 }
440 
441 static __inline int
442 atomic_cmpset_rel_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
443 {
444 	int ret;
445 
446 	dmb();
447 	ATOMIC_CMPSET_CODE(ret, "b");
448 	return (ret);
449 }
450 
451 static __inline int
452 atomic_cmpset_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
453 {
454 	int ret;
455 
456 	ATOMIC_CMPSET_CODE(ret, "h");
457 	return (ret);
458 }
459 #define	atomic_cmpset_16	atomic_cmpset_16
460 
461 static __inline int
462 atomic_cmpset_acq_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
463 {
464 	int ret;
465 
466 	ATOMIC_CMPSET_CODE(ret, "h");
467 	dmb();
468 	return (ret);
469 }
470 
471 static __inline int
472 atomic_cmpset_rel_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
473 {
474 	int ret;
475 
476 	dmb();
477 	ATOMIC_CMPSET_CODE(ret, "h");
478 	return (ret);
479 }
480 
481 static __inline int
482 atomic_cmpset_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
483 {
484 	int ret;
485 
486 	ATOMIC_CMPSET_CODE(ret, "");
487 	return (ret);
488 }
489 
490 static __inline int
491 atomic_cmpset_acq_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
492 {
493 	int ret;
494 
495 	ATOMIC_CMPSET_CODE(ret, "");
496 	dmb();
497 	return (ret);
498 }
499 
500 static __inline int
501 atomic_cmpset_rel_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
502 {
503 	int ret;
504 
505 	dmb();
506 	ATOMIC_CMPSET_CODE(ret, "");
507 	return (ret);
508 }
509 
510 static __inline int
511 atomic_cmpset_long(volatile u_long *_ptr, u_long _old, u_long _new)
512 {
513 	int ret;
514 
515 	ATOMIC_CMPSET_CODE(ret, "");
516 	return (ret);
517 }
518 
519 static __inline int
520 atomic_cmpset_acq_long(volatile u_long *_ptr, u_long _old, u_long _new)
521 {
522 	int ret;
523 
524 	ATOMIC_CMPSET_CODE(ret, "");
525 	dmb();
526 	return (ret);
527 }
528 
529 static __inline int
530 atomic_cmpset_rel_long(volatile u_long *_ptr, u_long _old, u_long _new)
531 {
532 	int ret;
533 
534 	dmb();
535 	ATOMIC_CMPSET_CODE(ret, "");
536 	return (ret);
537 }
538 
539 static __inline int
540 atomic_cmpset_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
541 {
542 	int ret;
543 
544 	ATOMIC_CMPSET_CODE64(ret);
545 	return (ret);
546 }
547 
548 static __inline int
549 atomic_cmpset_acq_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
550 {
551 	int ret;
552 
553 	ATOMIC_CMPSET_CODE64(ret);
554 	dmb();
555 	return (ret);
556 }
557 
558 static __inline int
559 atomic_cmpset_rel_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
560 {
561 	int ret;
562 
563 	dmb();
564 	ATOMIC_CMPSET_CODE64(ret);
565 	return (ret);
566 }
567 
568 static __inline uint32_t
569 atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
570 {
571 	uint32_t tmp = 0, tmp2 = 0, ret = 0;
572 
573 	__asm __volatile(
574 	    "1: ldrex	%0, [%3]	\n"
575 	    "   add	%1, %0, %4	\n"
576 	    "   strex	%2, %1, [%3]	\n"
577 	    "   cmp	%2, #0		\n"
578 	    "   it	ne		\n"
579 	    "   bne	1b		\n"
580 	    : "+r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
581 	    : : "cc", "memory");
582 	return (ret);
583 }
584 
585 static __inline uint64_t
586 atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
587 {
588 	uint64_t ret, tmp;
589 	uint32_t exflag;
590 
591 	__asm __volatile(
592 	    "1:							\n"
593 	    "   ldrexd	%Q[ret], %R[ret], [%[ptr]]		\n"
594 	    "   adds	%Q[tmp], %Q[ret], %Q[val]		\n"
595 	    "   adc	%R[tmp], %R[ret], %R[val]		\n"
596 	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
597 	    "   teq	%[exf], #0				\n"
598 	    "   it	ne					\n"
599 	    "   bne	1b					\n"
600 	    : [ret] "=&r" (ret),
601 	      [exf] "=&r" (exflag),
602 	      [tmp] "=&r" (tmp)
603 	    : [ptr] "r"   (p),
604 	      [val] "r"   (val)
605 	    : "cc", "memory");
606 	return (ret);
607 }
608 
609 static __inline u_long
610 atomic_fetchadd_long(volatile u_long *p, u_long val)
611 {
612 
613 	return (atomic_fetchadd_32((volatile uint32_t *)p, val));
614 }
615 
616 static __inline uint32_t
617 atomic_load_acq_32(volatile uint32_t *p)
618 {
619 	uint32_t v;
620 
621 	v = *p;
622 	dmb();
623 	return (v);
624 }
625 
626 static __inline uint64_t
627 atomic_load_64(volatile uint64_t *p)
628 {
629 	uint64_t ret;
630 
631 	/*
632 	 * The only way to atomically load 64 bits is with LDREXD which puts the
633 	 * exclusive monitor into the exclusive state, so reset it to open state
634 	 * with CLREX because we don't actually need to store anything.
635 	 */
636 	__asm __volatile(
637 	    "ldrexd	%Q[ret], %R[ret], [%[ptr]]	\n"
638 	    "clrex					\n"
639 	    : [ret] "=&r" (ret)
640 	    : [ptr] "r"   (p)
641 	    : "cc", "memory");
642 	return (ret);
643 }
644 
645 static __inline uint64_t
646 atomic_load_acq_64(volatile uint64_t *p)
647 {
648 	uint64_t ret;
649 
650 	ret = atomic_load_64(p);
651 	dmb();
652 	return (ret);
653 }
654 
655 static __inline u_long
656 atomic_load_acq_long(volatile u_long *p)
657 {
658 	u_long v;
659 
660 	v = *p;
661 	dmb();
662 	return (v);
663 }
664 
665 static __inline uint32_t
666 atomic_readandclear_32(volatile uint32_t *p)
667 {
668 	uint32_t ret, tmp = 0, tmp2 = 0;
669 
670 	__asm __volatile(
671 	    "1: ldrex	%0, [%3]	\n"
672 	    "   mov	%1, #0		\n"
673 	    "   strex	%2, %1, [%3]	\n"
674 	    "   cmp	%2, #0		\n"
675 	    "   it	ne		\n"
676 	    "   bne	1b		\n"
677 	    : "=r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p)
678 	    : : "cc", "memory");
679 	return (ret);
680 }
681 
682 static __inline uint64_t
683 atomic_readandclear_64(volatile uint64_t *p)
684 {
685 	uint64_t ret, tmp;
686 	uint32_t exflag;
687 
688 	__asm __volatile(
689 	    "1:							\n"
690 	    "   ldrexd	%Q[ret], %R[ret], [%[ptr]]		\n"
691 	    "   mov	%Q[tmp], #0				\n"
692 	    "   mov	%R[tmp], #0				\n"
693 	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
694 	    "   teq	%[exf], #0				\n"
695 	    "   it	ne					\n"
696 	    "   bne	1b					\n"
697 	    : [ret] "=&r" (ret),
698 	      [exf] "=&r" (exflag),
699 	      [tmp] "=&r" (tmp)
700 	    : [ptr] "r"   (p)
701 	    : "cc", "memory");
702 	return (ret);
703 }
704 
705 static __inline u_long
706 atomic_readandclear_long(volatile u_long *p)
707 {
708 
709 	return (atomic_readandclear_32((volatile uint32_t *)p));
710 }
711 
712 static __inline void
713 atomic_set_32(volatile uint32_t *address, uint32_t setmask)
714 {
715 	uint32_t tmp = 0, tmp2 = 0;
716 
717 	__asm __volatile(
718 	    "1: ldrex	%0, [%2]	\n"
719 	    "   orr	%0, %0, %3	\n"
720 	    "   strex	%1, %0, [%2]	\n"
721 	    "   cmp	%1, #0		\n"
722 	    "   it	ne		\n"
723 	    "   bne	1b		\n"
724 	    : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
725 	    : : "cc", "memory");
726 }
727 
728 static __inline void
729 atomic_set_64(volatile uint64_t *p, uint64_t val)
730 {
731 	uint64_t tmp;
732 	uint32_t exflag;
733 
734 	__asm __volatile(
735 	    "1:							\n"
736 	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
737 	    "   orr	%Q[tmp], %Q[val]			\n"
738 	    "   orr	%R[tmp], %R[val]			\n"
739 	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
740 	    "   teq	%[exf], #0				\n"
741 	    "   it	ne					\n"
742 	    "   bne	1b					\n"
743 	    : [exf] "=&r" (exflag),
744 	      [tmp] "=&r" (tmp)
745 	    : [ptr] "r"   (p),
746 	      [val] "r"   (val)
747 	    : "cc", "memory");
748 }
749 
750 static __inline void
751 atomic_set_long(volatile u_long *address, u_long setmask)
752 {
753 
754 	atomic_set_32((volatile uint32_t *)address, setmask);
755 }
756 
757 ATOMIC_ACQ_REL(set, 32)
758 ATOMIC_ACQ_REL(set, 64)
759 ATOMIC_ACQ_REL_LONG(set)
760 
761 static __inline void
762 atomic_subtract_32(volatile uint32_t *p, uint32_t val)
763 {
764 	uint32_t tmp = 0, tmp2 = 0;
765 
766 	__asm __volatile(
767 	    "1: ldrex	%0, [%2]	\n"
768 	    "   sub	%0, %0, %3	\n"
769 	    "   strex	%1, %0, [%2]	\n"
770 	    "   cmp	%1, #0		\n"
771 	    "   it	ne		\n"
772 	    "   bne	1b		\n"
773 	    : "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
774 	    : : "cc", "memory");
775 }
776 
777 static __inline void
778 atomic_subtract_64(volatile uint64_t *p, uint64_t val)
779 {
780 	uint64_t tmp;
781 	uint32_t exflag;
782 
783 	__asm __volatile(
784 	    "1:							\n"
785 	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
786 	    "   subs	%Q[tmp], %Q[val]			\n"
787 	    "   sbc	%R[tmp], %R[tmp], %R[val]		\n"
788 	    "   strexd	%[exf], %Q[tmp], %R[tmp], [%[ptr]]	\n"
789 	    "   teq	%[exf], #0				\n"
790 	    "   it	ne					\n"
791 	    "   bne	1b					\n"
792 	    : [exf] "=&r" (exflag),
793 	      [tmp] "=&r" (tmp)
794 	    : [ptr] "r"   (p),
795 	      [val] "r"   (val)
796 	    : "cc", "memory");
797 }
798 
799 static __inline void
800 atomic_subtract_long(volatile u_long *p, u_long val)
801 {
802 
803 	atomic_subtract_32((volatile uint32_t *)p, val);
804 }
805 
806 ATOMIC_ACQ_REL(subtract, 32)
807 ATOMIC_ACQ_REL(subtract, 64)
808 ATOMIC_ACQ_REL_LONG(subtract)
809 
810 static __inline void
811 atomic_store_64(volatile uint64_t *p, uint64_t val)
812 {
813 	uint64_t tmp;
814 	uint32_t exflag;
815 
816 	/*
817 	 * The only way to atomically store 64 bits is with STREXD, which will
818 	 * succeed only if paired up with a preceeding LDREXD using the same
819 	 * address, so we read and discard the existing value before storing.
820 	 */
821 	__asm __volatile(
822 	    "1:							\n"
823 	    "   ldrexd	%Q[tmp], %R[tmp], [%[ptr]]		\n"
824 	    "   strexd	%[exf], %Q[val], %R[val], [%[ptr]]	\n"
825 	    "   teq	%[exf], #0				\n"
826 	    "   it	ne					\n"
827 	    "   bne	1b					\n"
828 	    : [tmp] "=&r" (tmp),
829 	      [exf] "=&r" (exflag)
830 	    : [ptr] "r"   (p),
831 	      [val] "r"   (val)
832 	    : "cc", "memory");
833 }
834 
835 static __inline void
836 atomic_store_rel_32(volatile uint32_t *p, uint32_t v)
837 {
838 
839 	dmb();
840 	*p = v;
841 }
842 
843 static __inline void
844 atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
845 {
846 
847 	dmb();
848 	atomic_store_64(p, val);
849 }
850 
851 static __inline void
852 atomic_store_rel_long(volatile u_long *p, u_long v)
853 {
854 
855 	dmb();
856 	*p = v;
857 }
858 
859 static __inline int
860 atomic_testandclear_32(volatile uint32_t *ptr, u_int bit)
861 {
862 	int newv, oldv, result;
863 
864 	__asm __volatile(
865 	    "   mov     ip, #1					\n"
866 	    "   lsl     ip, ip, %[bit]				\n"
867 	    /*  Done with %[bit] as input, reuse below as output. */
868 	    "1:							\n"
869 	    "   ldrex	%[oldv], [%[ptr]]			\n"
870 	    "   bic     %[newv], %[oldv], ip			\n"
871 	    "   strex	%[bit], %[newv], [%[ptr]]		\n"
872 	    "   teq	%[bit], #0				\n"
873 	    "   it	ne					\n"
874 	    "   bne	1b					\n"
875 	    "   ands	%[bit], %[oldv], ip			\n"
876 	    "   it	ne					\n"
877 	    "   movne   %[bit], #1                              \n"
878 	    : [bit]  "=&r"   (result),
879 	      [oldv] "=&r"   (oldv),
880 	      [newv] "=&r"   (newv)
881 	    : [ptr]  "r"     (ptr),
882 	             "[bit]" (bit & 0x1f)
883 	    : "cc", "ip", "memory");
884 
885 	return (result);
886 }
887 
888 static __inline int
889 atomic_testandclear_int(volatile u_int *p, u_int v)
890 {
891 
892 	return (atomic_testandclear_32((volatile uint32_t *)p, v));
893 }
894 
895 static __inline int
896 atomic_testandclear_long(volatile u_long *p, u_int v)
897 {
898 
899 	return (atomic_testandclear_32((volatile uint32_t *)p, v));
900 }
901 #define	atomic_testandclear_long	atomic_testandclear_long
902 
903 
904 static __inline int
905 atomic_testandclear_64(volatile uint64_t *p, u_int v)
906 {
907 	volatile uint32_t *p32;
908 
909 	p32 = (volatile uint32_t *)p;
910 	/*
911 	 * Assume little-endian,
912 	 * atomic_testandclear_32() uses only last 5 bits of v
913 	 */
914 	if ((v & 0x20) != 0)
915 		p32++;
916 	return (atomic_testandclear_32(p32, v));
917 }
918 
919 static __inline int
920 atomic_testandset_32(volatile uint32_t *ptr, u_int bit)
921 {
922 	int newv, oldv, result;
923 
924 	__asm __volatile(
925 	    "   mov     ip, #1					\n"
926 	    "   lsl     ip, ip, %[bit]				\n"
927 	    /*  Done with %[bit] as input, reuse below as output. */
928 	    "1:							\n"
929 	    "   ldrex	%[oldv], [%[ptr]]			\n"
930 	    "   orr     %[newv], %[oldv], ip			\n"
931 	    "   strex	%[bit], %[newv], [%[ptr]]		\n"
932 	    "   teq	%[bit], #0				\n"
933 	    "   it	ne					\n"
934 	    "   bne	1b					\n"
935 	    "   ands	%[bit], %[oldv], ip			\n"
936 	    "   it	ne					\n"
937 	    "   movne   %[bit], #1                              \n"
938 	    : [bit]  "=&r"   (result),
939 	      [oldv] "=&r"   (oldv),
940 	      [newv] "=&r"   (newv)
941 	    : [ptr]  "r"     (ptr),
942 	             "[bit]" (bit & 0x1f)
943 	    : "cc", "ip", "memory");
944 
945 	return (result);
946 }
947 
948 static __inline int
949 atomic_testandset_int(volatile u_int *p, u_int v)
950 {
951 
952 	return (atomic_testandset_32((volatile uint32_t *)p, v));
953 }
954 
955 static __inline int
956 atomic_testandset_long(volatile u_long *p, u_int v)
957 {
958 
959 	return (atomic_testandset_32((volatile uint32_t *)p, v));
960 }
961 #define	atomic_testandset_long	atomic_testandset_long
962 
963 static __inline int
964 atomic_testandset_64(volatile uint64_t *p, u_int v)
965 {
966 	volatile uint32_t *p32;
967 
968 	p32 = (volatile uint32_t *)p;
969 	/*
970 	 * Assume little-endian,
971 	 * atomic_testandset_32() uses only last 5 bits of v
972 	 */
973 	if ((v & 0x20) != 0)
974 		p32++;
975 	return (atomic_testandset_32(p32, v));
976 }
977 
978 static __inline uint32_t
979 atomic_swap_32(volatile uint32_t *p, uint32_t v)
980 {
981 	uint32_t ret, exflag;
982 
983 	__asm __volatile(
984 	    "1: ldrex	%[ret], [%[ptr]]		\n"
985 	    "   strex	%[exf], %[val], [%[ptr]]	\n"
986 	    "   teq	%[exf], #0			\n"
987 	    "   it	ne				\n"
988 	    "   bne	1b				\n"
989 	    : [ret] "=&r"  (ret),
990 	      [exf] "=&r" (exflag)
991 	    : [val] "r"  (v),
992 	      [ptr] "r"  (p)
993 	    : "cc", "memory");
994 	return (ret);
995 }
996 
997 static __inline u_long
998 atomic_swap_long(volatile u_long *p, u_long v)
999 {
1000 
1001 	return (atomic_swap_32((volatile uint32_t *)p, v));
1002 }
1003 
1004 static __inline uint64_t
1005 atomic_swap_64(volatile uint64_t *p, uint64_t v)
1006 {
1007 	uint64_t ret;
1008 	uint32_t exflag;
1009 
1010 	__asm __volatile(
1011 	    "1: ldrexd	%Q[ret], %R[ret], [%[ptr]]		\n"
1012 	    "   strexd	%[exf], %Q[val], %R[val], [%[ptr]]	\n"
1013 	    "   teq	%[exf], #0				\n"
1014 	    "   it	ne					\n"
1015 	    "   bne	1b					\n"
1016 	    : [ret] "=&r" (ret),
1017 	      [exf] "=&r" (exflag)
1018 	    : [val] "r"   (v),
1019 	      [ptr] "r"   (p)
1020 	    : "cc", "memory");
1021 	return (ret);
1022 }
1023 
1024 #undef ATOMIC_ACQ_REL
1025 #undef ATOMIC_ACQ_REL_LONG
1026 
1027 static __inline void
1028 atomic_thread_fence_acq(void)
1029 {
1030 
1031 	dmb();
1032 }
1033 
1034 static __inline void
1035 atomic_thread_fence_rel(void)
1036 {
1037 
1038 	dmb();
1039 }
1040 
1041 static __inline void
1042 atomic_thread_fence_acq_rel(void)
1043 {
1044 
1045 	dmb();
1046 }
1047 
1048 static __inline void
1049 atomic_thread_fence_seq_cst(void)
1050 {
1051 
1052 	dmb();
1053 }
1054 
1055 #define atomic_clear_ptr		atomic_clear_32
1056 #define atomic_clear_acq_ptr		atomic_clear_acq_32
1057 #define atomic_clear_rel_ptr		atomic_clear_rel_32
1058 #define atomic_set_ptr			atomic_set_32
1059 #define atomic_set_acq_ptr		atomic_set_acq_32
1060 #define atomic_set_rel_ptr		atomic_set_rel_32
1061 #define atomic_fcmpset_ptr		atomic_fcmpset_32
1062 #define atomic_fcmpset_rel_ptr		atomic_fcmpset_rel_32
1063 #define atomic_fcmpset_acq_ptr		atomic_fcmpset_acq_32
1064 #define atomic_cmpset_ptr		atomic_cmpset_32
1065 #define atomic_cmpset_acq_ptr		atomic_cmpset_acq_32
1066 #define atomic_cmpset_rel_ptr		atomic_cmpset_rel_32
1067 #define atomic_load_acq_ptr		atomic_load_acq_32
1068 #define atomic_store_rel_ptr		atomic_store_rel_32
1069 #define atomic_swap_ptr			atomic_swap_32
1070 #define atomic_readandclear_ptr		atomic_readandclear_32
1071 
1072 #define atomic_add_int			atomic_add_32
1073 #define atomic_add_acq_int		atomic_add_acq_32
1074 #define atomic_add_rel_int		atomic_add_rel_32
1075 #define atomic_subtract_int		atomic_subtract_32
1076 #define atomic_subtract_acq_int		atomic_subtract_acq_32
1077 #define atomic_subtract_rel_int		atomic_subtract_rel_32
1078 #define atomic_clear_int		atomic_clear_32
1079 #define atomic_clear_acq_int		atomic_clear_acq_32
1080 #define atomic_clear_rel_int		atomic_clear_rel_32
1081 #define atomic_set_int			atomic_set_32
1082 #define atomic_set_acq_int		atomic_set_acq_32
1083 #define atomic_set_rel_int		atomic_set_rel_32
1084 #define atomic_fcmpset_int		atomic_fcmpset_32
1085 #define atomic_fcmpset_acq_int		atomic_fcmpset_acq_32
1086 #define atomic_fcmpset_rel_int		atomic_fcmpset_rel_32
1087 #define atomic_cmpset_int		atomic_cmpset_32
1088 #define atomic_cmpset_acq_int		atomic_cmpset_acq_32
1089 #define atomic_cmpset_rel_int		atomic_cmpset_rel_32
1090 #define atomic_fetchadd_int		atomic_fetchadd_32
1091 #define atomic_readandclear_int		atomic_readandclear_32
1092 #define atomic_load_acq_int		atomic_load_acq_32
1093 #define atomic_store_rel_int		atomic_store_rel_32
1094 #define atomic_swap_int			atomic_swap_32
1095 
1096 /*
1097  * For:
1098  *  - atomic_load_acq_8
1099  *  - atomic_load_acq_16
1100  *  - atomic_testandset_acq_long
1101  */
1102 #include <sys/_atomic_subword.h>
1103 
1104 #endif /* _MACHINE_ATOMIC_H_ */
1105