xref: /freebsd/sys/arm64/include/atomic.h (revision 9034852c84a13f0e3b5527e1c886ca94b2863b2b)
1 /*-
2  * Copyright (c) 2013 Andrew Turner <andrew@freebsd.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #ifndef	_MACHINE_ATOMIC_H_
30 #define	_MACHINE_ATOMIC_H_
31 
32 #define	isb()		__asm __volatile("isb" : : : "memory")
33 
34 /*
35  * Options for DMB and DSB:
36  *	oshld	Outer Shareable, load
37  *	oshst	Outer Shareable, store
38  *	osh	Outer Shareable, all
39  *	nshld	Non-shareable, load
40  *	nshst	Non-shareable, store
41  *	nsh	Non-shareable, all
42  *	ishld	Inner Shareable, load
43  *	ishst	Inner Shareable, store
44  *	ish	Inner Shareable, all
45  *	ld	Full system, load
46  *	st	Full system, store
47  *	sy	Full system, all
48  */
49 #define	dsb(opt)	__asm __volatile("dsb " __STRING(opt) : : : "memory")
50 #define	dmb(opt)	__asm __volatile("dmb " __STRING(opt) : : : "memory")
51 
52 #define	mb()	dmb(sy)	/* Full system memory barrier all */
53 #define	wmb()	dmb(st)	/* Full system memory barrier store */
54 #define	rmb()	dmb(ld)	/* Full system memory barrier load */
55 
56 static __inline void
57 atomic_add_32(volatile uint32_t *p, uint32_t val)
58 {
59 	uint32_t tmp;
60 	int res;
61 
62 	__asm __volatile(
63 	    "1: ldxr	%w0, [%2]      \n"
64 	    "   add	%w0, %w0, %w3  \n"
65 	    "   stxr	%w1, %w0, [%2] \n"
66             "   cbnz	%w1, 1b        \n"
67 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc"
68 	);
69 }
70 
71 static __inline void
72 atomic_clear_32(volatile uint32_t *p, uint32_t val)
73 {
74 	uint32_t tmp;
75 	int res;
76 
77 	__asm __volatile(
78 	    "1: ldxr	%w0, [%2]      \n"
79 	    "   bic	%w0, %w0, %w3  \n"
80 	    "   stxr	%w1, %w0, [%2] \n"
81             "   cbnz	%w1, 1b        \n"
82 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc"
83 	);
84 }
85 
86 static __inline int
87 atomic_cmpset_32(volatile uint32_t *p, uint32_t cmpval, uint32_t newval)
88 {
89 	uint32_t tmp;
90 	int res;
91 
92 	__asm __volatile(
93 	    "1: mov	%w1, #1        \n"
94 	    "   ldxr	%w0, [%2]      \n"
95 	    "   cmp	%w0, %w3       \n"
96 	    "   b.ne	2f             \n"
97 	    "   stxr	%w1, %w4, [%2] \n"
98             "   cbnz	%w1, 1b        \n"
99 	    "2:"
100 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (cmpval), "+r" (newval)
101 	    : : "cc"
102 	);
103 
104 	return (!res);
105 }
106 
107 static __inline uint32_t
108 atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
109 {
110 	uint32_t tmp, ret;
111 	int res;
112 
113 	__asm __volatile(
114 	    "1: ldxr	%w4, [%2]      \n"
115 	    "   add	%w0, %w4, %w3  \n"
116 	    "   stxr	%w1, %w0, [%2] \n"
117             "   cbnz	%w1, 1b        \n"
118 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val), "=&r"(ret) : : "cc"
119 	);
120 
121 	return (ret);
122 }
123 
124 static __inline uint32_t
125 atomic_readandclear_32(volatile uint32_t *p)
126 {
127 	uint32_t tmp, ret;
128 	int res;
129 
130 	__asm __volatile(
131 	    "   mov	%w0, #0        \n"
132 	    "1: ldxr	%w3, [%2]      \n"
133 	    "   stxr	%w1, %w0, [%2] \n"
134             "   cbnz	%w1, 1b        \n"
135 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "=&r"(ret) : : "cc"
136 	);
137 
138 	return (ret);
139 }
140 
141 static __inline void
142 atomic_set_32(volatile uint32_t *p, uint32_t val)
143 {
144 	uint32_t tmp;
145 	int res;
146 
147 	__asm __volatile(
148 	    "1: ldxr	%w0, [%2]      \n"
149 	    "   orr	%w0, %w0, %w3  \n"
150 	    "   stxr	%w1, %w0, [%2] \n"
151             "   cbnz	%w1, 1b        \n"
152 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc"
153 	);
154 }
155 
156 static __inline uint32_t
157 atomic_swap_32(volatile uint32_t *p, uint32_t val)
158 {
159 	uint32_t tmp;
160 	int res;
161 
162 	__asm __volatile(
163 	    "1: ldxr	%w0, [%2]      \n"
164 	    "   stxr	%w1, %w3, [%2] \n"
165 	    "   cbnz	%w1, 1b        \n"
166 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
167 	);
168 
169 	return (tmp);
170 }
171 
172 static __inline void
173 atomic_subtract_32(volatile uint32_t *p, uint32_t val)
174 {
175 	uint32_t tmp;
176 	int res;
177 
178 	__asm __volatile(
179 	    "1: ldxr	%w0, [%2]      \n"
180 	    "   sub	%w0, %w0, %w3  \n"
181 	    "   stxr	%w1, %w0, [%2] \n"
182             "   cbnz	%w1, 1b        \n"
183 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc"
184 	);
185 }
186 
187 #define	atomic_add_int		atomic_add_32
188 #define	atomic_clear_int	atomic_clear_32
189 #define	atomic_cmpset_int	atomic_cmpset_32
190 #define	atomic_fetchadd_int	atomic_fetchadd_32
191 #define	atomic_readandclear_int	atomic_readandclear_32
192 #define	atomic_set_int		atomic_set_32
193 #define	atomic_swap_int		atomic_swap_32
194 #define	atomic_subtract_int	atomic_subtract_32
195 
196 static __inline void
197 atomic_add_acq_32(volatile uint32_t *p, uint32_t val)
198 {
199 	uint32_t tmp;
200 	int res;
201 
202 	__asm __volatile(
203 	    "1: ldaxr	%w0, [%2]      \n"
204 	    "   add	%w0, %w0, %w3  \n"
205 	    "   stxr	%w1, %w0, [%2] \n"
206             "   cbnz	%w1, 1b        \n"
207 	    "2:"
208 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
209 	);
210 }
211 
212 static __inline void
213 atomic_clear_acq_32(volatile uint32_t *p, uint32_t val)
214 {
215 	uint32_t tmp;
216 	int res;
217 
218 	__asm __volatile(
219 	    "1: ldaxr	%w0, [%2]      \n"
220 	    "   bic	%w0, %w0, %w3  \n"
221 	    "   stxr	%w1, %w0, [%2] \n"
222             "   cbnz	%w1, 1b        \n"
223 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
224 	);
225 }
226 
227 static __inline int
228 atomic_cmpset_acq_32(volatile uint32_t *p, uint32_t cmpval, uint32_t newval)
229 {
230 	uint32_t tmp;
231 	int res;
232 
233 	__asm __volatile(
234 	    "1: mov	%w1, #1        \n"
235 	    "   ldaxr	%w0, [%2]      \n"
236 	    "   cmp	%w0, %w3       \n"
237 	    "   b.ne	2f             \n"
238 	    "   stxr	%w1, %w4, [%2] \n"
239             "   cbnz	%w1, 1b        \n"
240 	    "2:"
241 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (cmpval), "+r" (newval)
242 	    : : "cc", "memory"
243 	);
244 
245 	return (!res);
246 }
247 
248 static __inline uint32_t
249 atomic_load_acq_32(volatile uint32_t *p)
250 {
251 	uint32_t ret;
252 
253 	__asm __volatile(
254 	    "ldar	%w0, [%1] \n"
255 	    : "=&r" (ret) : "r" (p) : "memory");
256 
257 	return (ret);
258 }
259 
260 static __inline void
261 atomic_set_acq_32(volatile uint32_t *p, uint32_t val)
262 {
263 	uint32_t tmp;
264 	int res;
265 
266 	__asm __volatile(
267 	    "1: ldaxr	%w0, [%2]      \n"
268 	    "   orr	%w0, %w0, %w3  \n"
269 	    "   stxr	%w1, %w0, [%2] \n"
270             "   cbnz	%w1, 1b        \n"
271 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
272 	);
273 }
274 
275 static __inline void
276 atomic_subtract_acq_32(volatile uint32_t *p, uint32_t val)
277 {
278 	uint32_t tmp;
279 	int res;
280 
281 	__asm __volatile(
282 	    "1: ldaxr	%w0, [%2]      \n"
283 	    "   sub	%w0, %w0, %w3  \n"
284 	    "   stxr	%w1, %w0, [%2] \n"
285             "   cbnz	%w1, 1b        \n"
286 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
287 	);
288 }
289 
290 #define	atomic_add_acq_int	atomic_add_acq_32
291 #define	atomic_clear_acq_int	atomic_clear_acq_32
292 #define	atomic_cmpset_acq_int	atomic_cmpset_acq_32
293 #define	atomic_load_acq_int	atomic_load_acq_32
294 #define	atomic_set_acq_int	atomic_set_acq_32
295 #define	atomic_subtract_acq_int	atomic_subtract_acq_32
296 
297 /* The atomic functions currently are both acq and rel, we should fix this. */
298 
299 static __inline void
300 atomic_add_rel_32(volatile uint32_t *p, uint32_t val)
301 {
302 	uint32_t tmp;
303 	int res;
304 
305 	__asm __volatile(
306 	    "1: ldxr	%w0, [%2]      \n"
307 	    "   add	%w0, %w0, %w3  \n"
308 	    "   stlxr	%w1, %w0, [%2] \n"
309             "   cbnz	%w1, 1b        \n"
310 	    "2:"
311 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
312 	);
313 }
314 
315 static __inline void
316 atomic_clear_rel_32(volatile uint32_t *p, uint32_t val)
317 {
318 	uint32_t tmp;
319 	int res;
320 
321 	__asm __volatile(
322 	    "1: ldxr	%w0, [%2]      \n"
323 	    "   bic	%w0, %w0, %w3  \n"
324 	    "   stlxr	%w1, %w0, [%2] \n"
325             "   cbnz	%w1, 1b        \n"
326 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
327 	);
328 }
329 
330 static __inline int
331 atomic_cmpset_rel_32(volatile uint32_t *p, uint32_t cmpval, uint32_t newval)
332 {
333 	uint32_t tmp;
334 	int res;
335 
336 	__asm __volatile(
337 	    "1: mov	%w1, #1        \n"
338 	    "   ldxr	%w0, [%2]      \n"
339 	    "   cmp	%w0, %w3       \n"
340 	    "   b.ne	2f             \n"
341 	    "   stlxr	%w1, %w4, [%2] \n"
342             "   cbnz	%w1, 1b        \n"
343 	    "2:"
344 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (cmpval), "+r" (newval)
345 	    : : "cc", "memory"
346 	);
347 
348 	return (!res);
349 }
350 
351 static __inline void
352 atomic_set_rel_32(volatile uint32_t *p, uint32_t val)
353 {
354 	uint32_t tmp;
355 	int res;
356 
357 	__asm __volatile(
358 	    "1: ldxr	%w0, [%2]      \n"
359 	    "   orr	%w0, %w0, %w3  \n"
360 	    "   stlxr	%w1, %w0, [%2] \n"
361             "   cbnz	%w1, 1b        \n"
362 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
363 	);
364 }
365 
366 static __inline void
367 atomic_store_rel_32(volatile uint32_t *p, uint32_t val)
368 {
369 
370 	__asm __volatile(
371 	    "stlr	%w0, [%1] \n"
372 	    : : "r" (val), "r" (p) : "memory");
373 }
374 
375 static __inline void
376 atomic_subtract_rel_32(volatile uint32_t *p, uint32_t val)
377 {
378 	uint32_t tmp;
379 	int res;
380 
381 	__asm __volatile(
382 	    "1: ldxr	%w0, [%2]      \n"
383 	    "   sub	%w0, %w0, %w3  \n"
384 	    "   stlxr	%w1, %w0, [%2] \n"
385             "   cbnz	%w1, 1b        \n"
386 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
387 	);
388 }
389 
390 #define	atomic_add_rel_int	atomic_add_rel_32
391 #define	atomic_clear_rel_int	atomic_add_rel_32
392 #define	atomic_cmpset_rel_int	atomic_cmpset_rel_32
393 #define	atomic_set_rel_int	atomic_set_rel_32
394 #define	atomic_subtract_rel_int	atomic_subtract_rel_32
395 #define	atomic_store_rel_int	atomic_store_rel_32
396 
397 
398 static __inline void
399 atomic_add_64(volatile uint64_t *p, uint64_t val)
400 {
401 	uint64_t tmp;
402 	int res;
403 
404 	__asm __volatile(
405 	    "1: ldxr	%0, [%2]      \n"
406 	    "   add	%0, %0, %3    \n"
407 	    "   stxr	%w1, %0, [%2] \n"
408             "   cbnz	%w1, 1b       \n"
409 	    : "=&r" (tmp), "=&r" (res), "+r" (p), "+r" (val) : : "cc"
410 	);
411 }
412 
413 static __inline void
414 atomic_clear_64(volatile uint64_t *p, uint64_t val)
415 {
416 	uint64_t tmp;
417 	int res;
418 
419 	__asm __volatile(
420 	    "1: ldxr	%0, [%2]      \n"
421 	    "   bic	%0, %0, %3    \n"
422 	    "   stxr	%w1, %0, [%2] \n"
423             "   cbnz	%w1, 1b       \n"
424 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc"
425 	);
426 }
427 
428 static __inline int
429 atomic_cmpset_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
430 {
431 	uint64_t tmp;
432 	int res;
433 
434 	__asm __volatile(
435 	    "1: mov	%w1, #1       \n"
436 	    "   ldxr	%0, [%2]      \n"
437 	    "   cmp	%0, %3        \n"
438 	    "   b.ne	2f            \n"
439 	    "   stxr	%w1, %4, [%2] \n"
440             "   cbnz	%w1, 1b       \n"
441 	    "2:"
442 	    : "=&r" (tmp), "=&r"(res), "+r" (p), "+r" (cmpval), "+r" (newval)
443 	    : : "cc", "memory"
444 	);
445 
446 	return (!res);
447 }
448 
449 static __inline uint64_t
450 atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
451 {
452 	uint64_t tmp, ret;
453 	int res;
454 
455 	__asm __volatile(
456 	    "1: ldxr	%4, [%2]      \n"
457 	    "   add	%0, %4, %3    \n"
458 	    "   stxr	%w1, %0, [%2] \n"
459             "   cbnz	%w1, 1b       \n"
460 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val), "=&r"(ret) : : "cc"
461 	);
462 
463 	return (ret);
464 }
465 
466 static __inline uint64_t
467 atomic_readandclear_64(volatile uint64_t *p)
468 {
469 	uint64_t tmp, ret;
470 	int res;
471 
472 	__asm __volatile(
473 	    "   mov	%0, #0        \n"
474 	    "1: ldxr	%3, [%2]      \n"
475 	    "   stxr	%w1, %0, [%2] \n"
476             "   cbnz	%w1, 1b       \n"
477 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "=&r"(ret) : : "cc"
478 	);
479 
480 	return (ret);
481 }
482 
483 static __inline void
484 atomic_set_64(volatile uint64_t *p, uint64_t val)
485 {
486 	uint64_t tmp;
487 	int res;
488 
489 	__asm __volatile(
490 	    "1: ldxr	%0, [%2]      \n"
491 	    "   orr	%0, %0, %3    \n"
492 	    "   stxr	%w1, %0, [%2] \n"
493             "   cbnz	%w1, 1b       \n"
494 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc"
495 	);
496 }
497 
498 static __inline void
499 atomic_subtract_64(volatile uint64_t *p, uint64_t val)
500 {
501 	uint64_t tmp;
502 	int res;
503 
504 	__asm __volatile(
505 	    "1: ldxr	%0, [%2]      \n"
506 	    "   sub	%0, %0, %3    \n"
507 	    "   stxr	%w1, %0, [%2] \n"
508             "   cbnz	%w1, 1b       \n"
509 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc"
510 	);
511 }
512 
513 static __inline uint64_t
514 atomic_swap_64(volatile uint64_t *p, uint64_t val)
515 {
516 	uint64_t old;
517 	int res;
518 
519 	__asm __volatile(
520 	    "1: ldxr	%0, [%2]      \n"
521 	    "   stxr	%w1, %3, [%2] \n"
522             "   cbnz	%w1, 1b       \n"
523 	    : "=&r"(old), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
524 	);
525 
526 	return (old);
527 }
528 
529 #define	atomic_add_long			atomic_add_64
530 #define	atomic_clear_long		atomic_clear_64
531 #define	atomic_cmpset_long		atomic_cmpset_64
532 #define	atomic_fetchadd_long		atomic_fetchadd_64
533 #define	atomic_readandclear_long	atomic_readandclear_64
534 #define	atomic_set_long			atomic_set_64
535 #define	atomic_swap_long		atomic_swap_64
536 #define	atomic_subtract_long		atomic_subtract_64
537 
538 #define	atomic_add_ptr			atomic_add_64
539 #define	atomic_clear_ptr		atomic_clear_64
540 #define	atomic_cmpset_ptr		atomic_cmpset_64
541 #define	atomic_fetchadd_ptr		atomic_fetchadd_64
542 #define	atomic_readandclear_ptr		atomic_readandclear_64
543 #define	atomic_set_ptr			atomic_set_64
544 #define	atomic_swap_ptr			atomic_swap_64
545 #define	atomic_subtract_ptr		atomic_subtract_64
546 
547 static __inline void
548 atomic_add_acq_64(volatile uint64_t *p, uint64_t val)
549 {
550 	uint64_t tmp;
551 	int res;
552 
553 	__asm __volatile(
554 	    "1: ldaxr	%0, [%2]      \n"
555 	    "   add	%0, %0, %3    \n"
556 	    "   stxr	%w1, %0, [%2] \n"
557             "   cbnz	%w1, 1b       \n"
558 	    "2:"
559 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
560 	);
561 }
562 
563 static __inline void
564 atomic_clear_acq_64(volatile uint64_t *p, uint64_t val)
565 {
566 	uint64_t tmp;
567 	int res;
568 
569 	__asm __volatile(
570 	    "1: ldaxr	%0, [%2]      \n"
571 	    "   bic	%0, %0, %3    \n"
572 	    "   stxr	%w1, %0, [%2] \n"
573             "   cbnz	%w1, 1b       \n"
574 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
575 	);
576 }
577 
578 static __inline int
579 atomic_cmpset_acq_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
580 {
581 	uint64_t tmp;
582 	int res;
583 
584 	__asm __volatile(
585 	    "1: mov	%w1, #1       \n"
586 	    "   ldaxr	%0, [%2]      \n"
587 	    "   cmp	%0, %3        \n"
588 	    "   b.ne	2f            \n"
589 	    "   stxr	%w1, %4, [%2] \n"
590             "   cbnz	%w1, 1b       \n"
591 	    "2:"
592 	    : "=&r" (tmp), "=&r" (res), "+r" (p), "+r" (cmpval), "+r" (newval)
593 	    : : "cc", "memory"
594 	);
595 
596 	return (!res);
597 }
598 
599 static __inline uint64_t
600 atomic_load_acq_64(volatile uint64_t *p)
601 {
602 	uint64_t ret;
603 
604 	__asm __volatile(
605 	    "ldar	%0, [%1] \n"
606 	    : "=&r" (ret) : "r" (p) : "memory");
607 
608 	return (ret);
609 }
610 
611 static __inline void
612 atomic_set_acq_64(volatile uint64_t *p, uint64_t val)
613 {
614 	uint64_t tmp;
615 	int res;
616 
617 	__asm __volatile(
618 	    "1: ldaxr	%0, [%2]      \n"
619 	    "   orr	%0, %0, %3    \n"
620 	    "   stxr	%w1, %0, [%2] \n"
621             "   cbnz	%w1, 1b       \n"
622 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
623 	);
624 }
625 
626 static __inline void
627 atomic_subtract_acq_64(volatile uint64_t *p, uint64_t val)
628 {
629 	uint64_t tmp;
630 	int res;
631 
632 	__asm __volatile(
633 	    "1: ldaxr	%0, [%2]      \n"
634 	    "   sub	%0, %0, %3    \n"
635 	    "   stxr	%w1, %0, [%2] \n"
636             "   cbnz	%w1, 1b       \n"
637 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
638 	);
639 }
640 
641 #define	atomic_add_acq_long		atomic_add_acq_64
642 #define	atomic_clear_acq_long		atomic_add_acq_64
643 #define	atomic_cmpset_acq_long		atomic_cmpset_acq_64
644 #define	atomic_load_acq_long		atomic_load_acq_64
645 #define	atomic_set_acq_long		atomic_set_acq_64
646 #define	atomic_subtract_acq_long	atomic_subtract_acq_64
647 
648 #define	atomic_add_acq_ptr		atomic_add_acq_64
649 #define	atomic_clear_acq_ptr		atomic_add_acq_64
650 #define	atomic_cmpset_acq_ptr		atomic_cmpset_acq_64
651 #define	atomic_load_acq_ptr		atomic_load_acq_64
652 #define	atomic_set_acq_ptr		atomic_set_acq_64
653 #define	atomic_subtract_acq_ptr		atomic_subtract_acq_64
654 
655 /*
656  * TODO: The atomic functions currently are both acq and rel, we should fix
657  * this.
658  */
659 static __inline void
660 atomic_add_rel_64(volatile uint64_t *p, uint64_t val)
661 {
662 	uint64_t tmp;
663 	int res;
664 
665 	__asm __volatile(
666 	    "1: ldxr	%0, [%2]      \n"
667 	    "   add	%0, %0, %3    \n"
668 	    "   stlxr	%w1, %0, [%2] \n"
669             "   cbnz	%w1, 1b       \n"
670 	    "2:"
671 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
672 	);
673 }
674 
675 static __inline void
676 atomic_clear_rel_64(volatile uint64_t *p, uint64_t val)
677 {
678 	uint64_t tmp;
679 	int res;
680 
681 	__asm __volatile(
682 	    "1: ldxr	%0, [%2]      \n"
683 	    "   bic	%0, %0, %3    \n"
684 	    "   stlxr	%w1, %0, [%2] \n"
685             "   cbnz	%w1, 1b       \n"
686 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
687 	);
688 }
689 
690 static __inline int
691 atomic_cmpset_rel_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
692 {
693 	uint64_t tmp;
694 	int res;
695 
696 	__asm __volatile(
697 	    "1: mov	%w1, #1       \n"
698 	    "   ldxr	%0, [%2]      \n"
699 	    "   cmp	%0, %3        \n"
700 	    "   b.ne	2f            \n"
701 	    "   stlxr	%w1, %4, [%2] \n"
702             "   cbnz	%w1, 1b       \n"
703 	    "2:"
704 	    : "=&r" (tmp), "=&r" (res), "+r" (p), "+r" (cmpval), "+r" (newval)
705 	    : : "cc", "memory"
706 	);
707 
708 	return (!res);
709 }
710 
711 static __inline void
712 atomic_set_rel_64(volatile uint64_t *p, uint64_t val)
713 {
714 	uint64_t tmp;
715 	int res;
716 
717 	__asm __volatile(
718 	    "1: ldxr	%0, [%2]      \n"
719 	    "   orr	%0, %0, %3    \n"
720 	    "   stlxr	%w1, %0, [%2] \n"
721             "   cbnz	%w1, 1b       \n"
722 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
723 	);
724 }
725 
726 static __inline void
727 atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
728 {
729 
730 	__asm __volatile(
731 	    "stlr	%0, [%1] \n"
732 	    : : "r" (val), "r" (p) : "memory");
733 }
734 
735 static __inline void
736 atomic_subtract_rel_64(volatile uint64_t *p, uint64_t val)
737 {
738 	uint64_t tmp;
739 	int res;
740 
741 	__asm __volatile(
742 	    "1: ldxr	%0, [%2]      \n"
743 	    "   sub	%0, %0, %3    \n"
744 	    "   stlxr	%w1, %0, [%2] \n"
745             "   cbnz	%w1, 1b       \n"
746 	    : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
747 	);
748 }
749 
750 static __inline void
751 atomic_thread_fence_acq(void)
752 {
753 
754 	dmb(ld);
755 }
756 
757 static __inline void
758 atomic_thread_fence_rel(void)
759 {
760 
761 	dmb(sy);
762 }
763 
764 static __inline void
765 atomic_thread_fence_acq_rel(void)
766 {
767 
768 	dmb(sy);
769 }
770 
771 static __inline void
772 atomic_thread_fence_seq_cst(void)
773 {
774 
775 	dmb(sy);
776 }
777 
778 #define	atomic_add_rel_long		atomic_add_rel_64
779 #define	atomic_clear_rel_long		atomic_clear_rel_64
780 #define	atomic_cmpset_rel_long		atomic_cmpset_rel_64
781 #define	atomic_set_rel_long		atomic_set_rel_64
782 #define	atomic_subtract_rel_long	atomic_subtract_rel_64
783 #define	atomic_store_rel_long		atomic_store_rel_64
784 
785 #define	atomic_add_rel_ptr		atomic_add_rel_64
786 #define	atomic_clear_rel_ptr		atomic_clear_rel_64
787 #define	atomic_cmpset_rel_ptr		atomic_cmpset_rel_64
788 #define	atomic_set_rel_ptr		atomic_set_rel_64
789 #define	atomic_subtract_rel_ptr		atomic_subtract_rel_64
790 #define	atomic_store_rel_ptr		atomic_store_rel_64
791 
792 #endif /* _MACHINE_ATOMIC_H_ */
793 
794