1 /* $NetBSD: atomic.h,v 1.1 2002/10/19 12:22:34 bsh Exp $ */
2
3 /*-
4 * SPDX-License-Identifier: BSD-4-Clause
5 *
6 * Copyright (C) 2003-2004 Olivier Houchard
7 * Copyright (C) 1994-1997 Mark Brinicombe
8 * Copyright (C) 1994 Brini
9 * All rights reserved.
10 *
11 * This code is derived from software written for Brini by Mark Brinicombe
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 * must display the following acknowledgement:
23 * This product includes software developed by Brini.
24 * 4. The name of Brini may not be used to endorse or promote products
25 * derived from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR
28 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
29 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
30 * IN NO EVENT SHALL BRINI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
33 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
34 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
35 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
36 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 #ifndef _MACHINE_ATOMIC_H_
40 #define _MACHINE_ATOMIC_H_
41
42 #include <sys/atomic_common.h>
43
44 #define isb() __asm __volatile("isb" : : : "memory")
45 #define dsb() __asm __volatile("dsb" : : : "memory")
46 #define dmb() __asm __volatile("dmb" : : : "memory")
47
48 #define mb() dmb()
49 #define wmb() dmb()
50 #define rmb() dmb()
51
52 #define ARM_HAVE_ATOMIC64
53
54 #define ATOMIC_ACQ_REL_LONG(NAME) \
55 static __inline void \
56 atomic_##NAME##_acq_long(__volatile u_long *p, u_long v) \
57 { \
58 atomic_##NAME##_long(p, v); \
59 dmb(); \
60 } \
61 \
62 static __inline void \
63 atomic_##NAME##_rel_long(__volatile u_long *p, u_long v) \
64 { \
65 dmb(); \
66 atomic_##NAME##_long(p, v); \
67 }
68
69 #define ATOMIC_ACQ_REL(NAME, WIDTH) \
70 static __inline void \
71 atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
72 { \
73 atomic_##NAME##_##WIDTH(p, v); \
74 dmb(); \
75 } \
76 \
77 static __inline void \
78 atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
79 { \
80 dmb(); \
81 atomic_##NAME##_##WIDTH(p, v); \
82 }
83
84 static __inline void
atomic_add_32(volatile uint32_t * p,uint32_t val)85 atomic_add_32(volatile uint32_t *p, uint32_t val)
86 {
87 uint32_t tmp = 0, tmp2 = 0;
88
89 __asm __volatile(
90 "1: ldrex %0, [%2] \n"
91 " add %0, %0, %3 \n"
92 " strex %1, %0, [%2] \n"
93 " cmp %1, #0 \n"
94 " it ne \n"
95 " bne 1b \n"
96 : "=&r" (tmp), "+r" (tmp2)
97 ,"+r" (p), "+r" (val) : : "cc", "memory");
98 }
99
100 static __inline void
atomic_add_64(volatile uint64_t * p,uint64_t val)101 atomic_add_64(volatile uint64_t *p, uint64_t val)
102 {
103 uint64_t tmp;
104 uint32_t exflag;
105
106 __asm __volatile(
107 "1: \n"
108 " ldrexd %Q[tmp], %R[tmp], [%[ptr]] \n"
109 " adds %Q[tmp], %Q[val] \n"
110 " adc %R[tmp], %R[tmp], %R[val] \n"
111 " strexd %[exf], %Q[tmp], %R[tmp], [%[ptr]] \n"
112 " teq %[exf], #0 \n"
113 " it ne \n"
114 " bne 1b \n"
115 : [exf] "=&r" (exflag),
116 [tmp] "=&r" (tmp)
117 : [ptr] "r" (p),
118 [val] "r" (val)
119 : "cc", "memory");
120 }
121
122 static __inline void
atomic_add_long(volatile u_long * p,u_long val)123 atomic_add_long(volatile u_long *p, u_long val)
124 {
125
126 atomic_add_32((volatile uint32_t *)p, val);
127 }
128
129 ATOMIC_ACQ_REL(add, 32)
130 ATOMIC_ACQ_REL(add, 64)
ATOMIC_ACQ_REL_LONG(add)131 ATOMIC_ACQ_REL_LONG(add)
132
133 static __inline void
134 atomic_clear_32(volatile uint32_t *address, uint32_t setmask)
135 {
136 uint32_t tmp = 0, tmp2 = 0;
137
138 __asm __volatile(
139 "1: ldrex %0, [%2] \n"
140 " bic %0, %0, %3 \n"
141 " strex %1, %0, [%2] \n"
142 " cmp %1, #0 \n"
143 " it ne \n"
144 " bne 1b \n"
145 : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
146 : : "cc", "memory");
147 }
148
149 static __inline void
atomic_clear_64(volatile uint64_t * p,uint64_t val)150 atomic_clear_64(volatile uint64_t *p, uint64_t val)
151 {
152 uint64_t tmp;
153 uint32_t exflag;
154
155 __asm __volatile(
156 "1: \n"
157 " ldrexd %Q[tmp], %R[tmp], [%[ptr]] \n"
158 " bic %Q[tmp], %Q[val] \n"
159 " bic %R[tmp], %R[val] \n"
160 " strexd %[exf], %Q[tmp], %R[tmp], [%[ptr]] \n"
161 " teq %[exf], #0 \n"
162 " it ne \n"
163 " bne 1b \n"
164 : [exf] "=&r" (exflag),
165 [tmp] "=&r" (tmp)
166 : [ptr] "r" (p),
167 [val] "r" (val)
168 : "cc", "memory");
169 }
170
171 static __inline void
atomic_clear_long(volatile u_long * address,u_long setmask)172 atomic_clear_long(volatile u_long *address, u_long setmask)
173 {
174
175 atomic_clear_32((volatile uint32_t *)address, setmask);
176 }
177
178 ATOMIC_ACQ_REL(clear, 32)
179 ATOMIC_ACQ_REL(clear, 64)
ATOMIC_ACQ_REL_LONG(clear)180 ATOMIC_ACQ_REL_LONG(clear)
181
182 #define ATOMIC_FCMPSET_CODE(RET, TYPE, SUF) \
183 { \
184 TYPE tmp; \
185 \
186 __asm __volatile( \
187 "1: ldrex" SUF " %[tmp], [%[ptr]] \n" \
188 " ldr" SUF " %[ret], [%[oldv]] \n" \
189 " teq %[tmp], %[ret] \n" \
190 " ittee ne \n" \
191 " str" SUF "ne %[tmp], [%[oldv]] \n" \
192 " movne %[ret], #0 \n" \
193 " strex" SUF "eq %[ret], %[newv], [%[ptr]] \n" \
194 " eorseq %[ret], #1 \n" \
195 " beq 1b \n" \
196 : [ret] "=&r" (RET), \
197 [tmp] "=&r" (tmp) \
198 : [ptr] "r" (_ptr), \
199 [oldv] "r" (_old), \
200 [newv] "r" (_new) \
201 : "cc", "memory"); \
202 }
203
204 #define ATOMIC_FCMPSET_CODE64(RET) \
205 { \
206 uint64_t cmp, tmp; \
207 \
208 __asm __volatile( \
209 "1: ldrexd %Q[tmp], %R[tmp], [%[ptr]] \n" \
210 " ldrd %Q[cmp], %R[cmp], [%[oldv]] \n" \
211 " teq %Q[tmp], %Q[cmp] \n" \
212 " it eq \n" \
213 " teqeq %R[tmp], %R[cmp] \n" \
214 " ittee ne \n" \
215 " movne %[ret], #0 \n" \
216 " strdne %[cmp], [%[oldv]] \n" \
217 " strexdeq %[ret], %Q[newv], %R[newv], [%[ptr]] \n" \
218 " eorseq %[ret], #1 \n" \
219 " beq 1b \n" \
220 : [ret] "=&r" (RET), \
221 [cmp] "=&r" (cmp), \
222 [tmp] "=&r" (tmp) \
223 : [ptr] "r" (_ptr), \
224 [oldv] "r" (_old), \
225 [newv] "r" (_new) \
226 : "cc", "memory"); \
227 }
228
229 static __inline int
230 atomic_fcmpset_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
231 {
232 int ret;
233
234 ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
235 return (ret);
236 }
237 #define atomic_fcmpset_8 atomic_fcmpset_8
238
239 static __inline int
atomic_fcmpset_acq_8(volatile uint8_t * _ptr,uint8_t * _old,uint8_t _new)240 atomic_fcmpset_acq_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
241 {
242 int ret;
243
244 ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
245 dmb();
246 return (ret);
247 }
248
249 static __inline int
atomic_fcmpset_rel_8(volatile uint8_t * _ptr,uint8_t * _old,uint8_t _new)250 atomic_fcmpset_rel_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
251 {
252 int ret;
253
254 dmb();
255 ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
256 return (ret);
257 }
258
259 static __inline int
atomic_fcmpset_16(volatile uint16_t * _ptr,uint16_t * _old,uint16_t _new)260 atomic_fcmpset_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
261 {
262 int ret;
263
264 ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
265 return (ret);
266 }
267 #define atomic_fcmpset_16 atomic_fcmpset_16
268
269 static __inline int
atomic_fcmpset_acq_16(volatile uint16_t * _ptr,uint16_t * _old,uint16_t _new)270 atomic_fcmpset_acq_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
271 {
272 int ret;
273
274 ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
275 dmb();
276 return (ret);
277 }
278
279 static __inline int
atomic_fcmpset_rel_16(volatile uint16_t * _ptr,uint16_t * _old,uint16_t _new)280 atomic_fcmpset_rel_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
281 {
282 int ret;
283
284 dmb();
285 ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
286 return (ret);
287 }
288
289 static __inline int
atomic_fcmpset_32(volatile uint32_t * _ptr,uint32_t * _old,uint32_t _new)290 atomic_fcmpset_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
291 {
292 int ret;
293
294 ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
295 return (ret);
296 }
297
298 static __inline int
atomic_fcmpset_acq_32(volatile uint32_t * _ptr,uint32_t * _old,uint32_t _new)299 atomic_fcmpset_acq_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
300 {
301 int ret;
302
303 ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
304 dmb();
305 return (ret);
306 }
307
308 static __inline int
atomic_fcmpset_rel_32(volatile uint32_t * _ptr,uint32_t * _old,uint32_t _new)309 atomic_fcmpset_rel_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
310 {
311 int ret;
312
313 dmb();
314 ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
315 return (ret);
316 }
317
318 static __inline int
atomic_fcmpset_long(volatile u_long * _ptr,u_long * _old,u_long _new)319 atomic_fcmpset_long(volatile u_long *_ptr, u_long *_old, u_long _new)
320 {
321 int ret;
322
323 ATOMIC_FCMPSET_CODE(ret, u_long, "");
324 return (ret);
325 }
326
327 static __inline int
atomic_fcmpset_acq_long(volatile u_long * _ptr,u_long * _old,u_long _new)328 atomic_fcmpset_acq_long(volatile u_long *_ptr, u_long *_old, u_long _new)
329 {
330 int ret;
331
332 ATOMIC_FCMPSET_CODE(ret, u_long, "");
333 dmb();
334 return (ret);
335 }
336
337 static __inline int
atomic_fcmpset_rel_long(volatile u_long * _ptr,u_long * _old,u_long _new)338 atomic_fcmpset_rel_long(volatile u_long *_ptr, u_long *_old, u_long _new)
339 {
340 int ret;
341
342 dmb();
343 ATOMIC_FCMPSET_CODE(ret, u_long, "");
344 return (ret);
345 }
346
347 static __inline int
atomic_fcmpset_64(volatile uint64_t * _ptr,uint64_t * _old,uint64_t _new)348 atomic_fcmpset_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
349 {
350 int ret;
351
352 ATOMIC_FCMPSET_CODE64(ret);
353 return (ret);
354 }
355
356 static __inline int
atomic_fcmpset_acq_64(volatile uint64_t * _ptr,uint64_t * _old,uint64_t _new)357 atomic_fcmpset_acq_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
358 {
359 int ret;
360
361 ATOMIC_FCMPSET_CODE64(ret);
362 dmb();
363 return (ret);
364 }
365
366 static __inline int
atomic_fcmpset_rel_64(volatile uint64_t * _ptr,uint64_t * _old,uint64_t _new)367 atomic_fcmpset_rel_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
368 {
369 int ret;
370
371 dmb();
372 ATOMIC_FCMPSET_CODE64(ret);
373 return (ret);
374 }
375
376 #define ATOMIC_CMPSET_CODE(RET, SUF) \
377 { \
378 __asm __volatile( \
379 "1: ldrex" SUF " %[ret], [%[ptr]] \n" \
380 " teq %[ret], %[oldv] \n" \
381 " itee ne \n" \
382 " movne %[ret], #0 \n" \
383 " strex" SUF "eq %[ret], %[newv], [%[ptr]] \n" \
384 " eorseq %[ret], #1 \n" \
385 " beq 1b \n" \
386 : [ret] "=&r" (RET) \
387 : [ptr] "r" (_ptr), \
388 [oldv] "r" (_old), \
389 [newv] "r" (_new) \
390 : "cc", "memory"); \
391 }
392
393 #define ATOMIC_CMPSET_CODE64(RET) \
394 { \
395 uint64_t tmp; \
396 \
397 __asm __volatile( \
398 "1: ldrexd %Q[tmp], %R[tmp], [%[ptr]] \n" \
399 " teq %Q[tmp], %Q[oldv] \n" \
400 " it eq \n" \
401 " teqeq %R[tmp], %R[oldv] \n" \
402 " itee ne \n" \
403 " movne %[ret], #0 \n" \
404 " strexdeq %[ret], %Q[newv], %R[newv], [%[ptr]] \n" \
405 " eorseq %[ret], #1 \n" \
406 " beq 1b \n" \
407 : [ret] "=&r" (RET), \
408 [tmp] "=&r" (tmp) \
409 : [ptr] "r" (_ptr), \
410 [oldv] "r" (_old), \
411 [newv] "r" (_new) \
412 : "cc", "memory"); \
413 }
414
415 static __inline int
atomic_cmpset_8(volatile uint8_t * _ptr,uint8_t _old,uint8_t _new)416 atomic_cmpset_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
417 {
418 int ret;
419
420 ATOMIC_CMPSET_CODE(ret, "b");
421 return (ret);
422 }
423 #define atomic_cmpset_8 atomic_cmpset_8
424
425 static __inline int
atomic_cmpset_acq_8(volatile uint8_t * _ptr,uint8_t _old,uint8_t _new)426 atomic_cmpset_acq_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
427 {
428 int ret;
429
430 ATOMIC_CMPSET_CODE(ret, "b");
431 dmb();
432 return (ret);
433 }
434
435 static __inline int
atomic_cmpset_rel_8(volatile uint8_t * _ptr,uint8_t _old,uint8_t _new)436 atomic_cmpset_rel_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
437 {
438 int ret;
439
440 dmb();
441 ATOMIC_CMPSET_CODE(ret, "b");
442 return (ret);
443 }
444
445 static __inline int
atomic_cmpset_16(volatile uint16_t * _ptr,uint16_t _old,uint16_t _new)446 atomic_cmpset_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
447 {
448 int ret;
449
450 ATOMIC_CMPSET_CODE(ret, "h");
451 return (ret);
452 }
453 #define atomic_cmpset_16 atomic_cmpset_16
454
455 static __inline int
atomic_cmpset_acq_16(volatile uint16_t * _ptr,uint16_t _old,uint16_t _new)456 atomic_cmpset_acq_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
457 {
458 int ret;
459
460 ATOMIC_CMPSET_CODE(ret, "h");
461 dmb();
462 return (ret);
463 }
464
465 static __inline int
atomic_cmpset_rel_16(volatile uint16_t * _ptr,uint16_t _old,uint16_t _new)466 atomic_cmpset_rel_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
467 {
468 int ret;
469
470 dmb();
471 ATOMIC_CMPSET_CODE(ret, "h");
472 return (ret);
473 }
474
475 static __inline int
atomic_cmpset_32(volatile uint32_t * _ptr,uint32_t _old,uint32_t _new)476 atomic_cmpset_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
477 {
478 int ret;
479
480 ATOMIC_CMPSET_CODE(ret, "");
481 return (ret);
482 }
483
484 static __inline int
atomic_cmpset_acq_32(volatile uint32_t * _ptr,uint32_t _old,uint32_t _new)485 atomic_cmpset_acq_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
486 {
487 int ret;
488
489 ATOMIC_CMPSET_CODE(ret, "");
490 dmb();
491 return (ret);
492 }
493
494 static __inline int
atomic_cmpset_rel_32(volatile uint32_t * _ptr,uint32_t _old,uint32_t _new)495 atomic_cmpset_rel_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
496 {
497 int ret;
498
499 dmb();
500 ATOMIC_CMPSET_CODE(ret, "");
501 return (ret);
502 }
503
504 static __inline int
atomic_cmpset_long(volatile u_long * _ptr,u_long _old,u_long _new)505 atomic_cmpset_long(volatile u_long *_ptr, u_long _old, u_long _new)
506 {
507 int ret;
508
509 ATOMIC_CMPSET_CODE(ret, "");
510 return (ret);
511 }
512
513 static __inline int
atomic_cmpset_acq_long(volatile u_long * _ptr,u_long _old,u_long _new)514 atomic_cmpset_acq_long(volatile u_long *_ptr, u_long _old, u_long _new)
515 {
516 int ret;
517
518 ATOMIC_CMPSET_CODE(ret, "");
519 dmb();
520 return (ret);
521 }
522
523 static __inline int
atomic_cmpset_rel_long(volatile u_long * _ptr,u_long _old,u_long _new)524 atomic_cmpset_rel_long(volatile u_long *_ptr, u_long _old, u_long _new)
525 {
526 int ret;
527
528 dmb();
529 ATOMIC_CMPSET_CODE(ret, "");
530 return (ret);
531 }
532
533 static __inline int
atomic_cmpset_64(volatile uint64_t * _ptr,uint64_t _old,uint64_t _new)534 atomic_cmpset_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
535 {
536 int ret;
537
538 ATOMIC_CMPSET_CODE64(ret);
539 return (ret);
540 }
541
542 static __inline int
atomic_cmpset_acq_64(volatile uint64_t * _ptr,uint64_t _old,uint64_t _new)543 atomic_cmpset_acq_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
544 {
545 int ret;
546
547 ATOMIC_CMPSET_CODE64(ret);
548 dmb();
549 return (ret);
550 }
551
552 static __inline int
atomic_cmpset_rel_64(volatile uint64_t * _ptr,uint64_t _old,uint64_t _new)553 atomic_cmpset_rel_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
554 {
555 int ret;
556
557 dmb();
558 ATOMIC_CMPSET_CODE64(ret);
559 return (ret);
560 }
561
562 static __inline uint32_t
atomic_fetchadd_32(volatile uint32_t * p,uint32_t val)563 atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
564 {
565 uint32_t tmp = 0, tmp2 = 0, ret = 0;
566
567 __asm __volatile(
568 "1: ldrex %0, [%3] \n"
569 " add %1, %0, %4 \n"
570 " strex %2, %1, [%3] \n"
571 " cmp %2, #0 \n"
572 " it ne \n"
573 " bne 1b \n"
574 : "+r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
575 : : "cc", "memory");
576 return (ret);
577 }
578
579 static __inline uint64_t
atomic_fetchadd_64(volatile uint64_t * p,uint64_t val)580 atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
581 {
582 uint64_t ret, tmp;
583 uint32_t exflag;
584
585 __asm __volatile(
586 "1: \n"
587 " ldrexd %Q[ret], %R[ret], [%[ptr]] \n"
588 " adds %Q[tmp], %Q[ret], %Q[val] \n"
589 " adc %R[tmp], %R[ret], %R[val] \n"
590 " strexd %[exf], %Q[tmp], %R[tmp], [%[ptr]] \n"
591 " teq %[exf], #0 \n"
592 " it ne \n"
593 " bne 1b \n"
594 : [ret] "=&r" (ret),
595 [exf] "=&r" (exflag),
596 [tmp] "=&r" (tmp)
597 : [ptr] "r" (p),
598 [val] "r" (val)
599 : "cc", "memory");
600 return (ret);
601 }
602
603 static __inline u_long
atomic_fetchadd_long(volatile u_long * p,u_long val)604 atomic_fetchadd_long(volatile u_long *p, u_long val)
605 {
606
607 return (atomic_fetchadd_32((volatile uint32_t *)p, val));
608 }
609
610 static __inline uint32_t
atomic_load_acq_32(volatile uint32_t * p)611 atomic_load_acq_32(volatile uint32_t *p)
612 {
613 uint32_t v;
614
615 v = *p;
616 dmb();
617 return (v);
618 }
619
620 static __inline uint64_t
atomic_load_64(volatile uint64_t * p)621 atomic_load_64(volatile uint64_t *p)
622 {
623 uint64_t ret;
624
625 /*
626 * The only way to atomically load 64 bits is with LDREXD which puts the
627 * exclusive monitor into the exclusive state, so reset it to open state
628 * with CLREX because we don't actually need to store anything.
629 */
630 __asm __volatile(
631 "ldrexd %Q[ret], %R[ret], [%[ptr]] \n"
632 "clrex \n"
633 : [ret] "=&r" (ret)
634 : [ptr] "r" (p)
635 : "cc", "memory");
636 return (ret);
637 }
638
639 static __inline uint64_t
atomic_load_acq_64(volatile uint64_t * p)640 atomic_load_acq_64(volatile uint64_t *p)
641 {
642 uint64_t ret;
643
644 ret = atomic_load_64(p);
645 dmb();
646 return (ret);
647 }
648
649 static __inline u_long
atomic_load_acq_long(volatile u_long * p)650 atomic_load_acq_long(volatile u_long *p)
651 {
652 u_long v;
653
654 v = *p;
655 dmb();
656 return (v);
657 }
658
659 static __inline uint32_t
atomic_readandclear_32(volatile uint32_t * p)660 atomic_readandclear_32(volatile uint32_t *p)
661 {
662 uint32_t ret, tmp = 0, tmp2 = 0;
663
664 __asm __volatile(
665 "1: ldrex %0, [%3] \n"
666 " mov %1, #0 \n"
667 " strex %2, %1, [%3] \n"
668 " cmp %2, #0 \n"
669 " it ne \n"
670 " bne 1b \n"
671 : "=r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p)
672 : : "cc", "memory");
673 return (ret);
674 }
675
676 static __inline uint64_t
atomic_readandclear_64(volatile uint64_t * p)677 atomic_readandclear_64(volatile uint64_t *p)
678 {
679 uint64_t ret, tmp;
680 uint32_t exflag;
681
682 __asm __volatile(
683 "1: \n"
684 " ldrexd %Q[ret], %R[ret], [%[ptr]] \n"
685 " mov %Q[tmp], #0 \n"
686 " mov %R[tmp], #0 \n"
687 " strexd %[exf], %Q[tmp], %R[tmp], [%[ptr]] \n"
688 " teq %[exf], #0 \n"
689 " it ne \n"
690 " bne 1b \n"
691 : [ret] "=&r" (ret),
692 [exf] "=&r" (exflag),
693 [tmp] "=&r" (tmp)
694 : [ptr] "r" (p)
695 : "cc", "memory");
696 return (ret);
697 }
698
699 static __inline u_long
atomic_readandclear_long(volatile u_long * p)700 atomic_readandclear_long(volatile u_long *p)
701 {
702
703 return (atomic_readandclear_32((volatile uint32_t *)p));
704 }
705
706 static __inline void
atomic_set_32(volatile uint32_t * address,uint32_t setmask)707 atomic_set_32(volatile uint32_t *address, uint32_t setmask)
708 {
709 uint32_t tmp = 0, tmp2 = 0;
710
711 __asm __volatile(
712 "1: ldrex %0, [%2] \n"
713 " orr %0, %0, %3 \n"
714 " strex %1, %0, [%2] \n"
715 " cmp %1, #0 \n"
716 " it ne \n"
717 " bne 1b \n"
718 : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
719 : : "cc", "memory");
720 }
721
722 static __inline void
atomic_set_64(volatile uint64_t * p,uint64_t val)723 atomic_set_64(volatile uint64_t *p, uint64_t val)
724 {
725 uint64_t tmp;
726 uint32_t exflag;
727
728 __asm __volatile(
729 "1: \n"
730 " ldrexd %Q[tmp], %R[tmp], [%[ptr]] \n"
731 " orr %Q[tmp], %Q[val] \n"
732 " orr %R[tmp], %R[val] \n"
733 " strexd %[exf], %Q[tmp], %R[tmp], [%[ptr]] \n"
734 " teq %[exf], #0 \n"
735 " it ne \n"
736 " bne 1b \n"
737 : [exf] "=&r" (exflag),
738 [tmp] "=&r" (tmp)
739 : [ptr] "r" (p),
740 [val] "r" (val)
741 : "cc", "memory");
742 }
743
744 static __inline void
atomic_set_long(volatile u_long * address,u_long setmask)745 atomic_set_long(volatile u_long *address, u_long setmask)
746 {
747
748 atomic_set_32((volatile uint32_t *)address, setmask);
749 }
750
751 ATOMIC_ACQ_REL(set, 32)
752 ATOMIC_ACQ_REL(set, 64)
ATOMIC_ACQ_REL_LONG(set)753 ATOMIC_ACQ_REL_LONG(set)
754
755 static __inline void
756 atomic_subtract_32(volatile uint32_t *p, uint32_t val)
757 {
758 uint32_t tmp = 0, tmp2 = 0;
759
760 __asm __volatile(
761 "1: ldrex %0, [%2] \n"
762 " sub %0, %0, %3 \n"
763 " strex %1, %0, [%2] \n"
764 " cmp %1, #0 \n"
765 " it ne \n"
766 " bne 1b \n"
767 : "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
768 : : "cc", "memory");
769 }
770
771 static __inline void
atomic_subtract_64(volatile uint64_t * p,uint64_t val)772 atomic_subtract_64(volatile uint64_t *p, uint64_t val)
773 {
774 uint64_t tmp;
775 uint32_t exflag;
776
777 __asm __volatile(
778 "1: \n"
779 " ldrexd %Q[tmp], %R[tmp], [%[ptr]] \n"
780 " subs %Q[tmp], %Q[val] \n"
781 " sbc %R[tmp], %R[tmp], %R[val] \n"
782 " strexd %[exf], %Q[tmp], %R[tmp], [%[ptr]] \n"
783 " teq %[exf], #0 \n"
784 " it ne \n"
785 " bne 1b \n"
786 : [exf] "=&r" (exflag),
787 [tmp] "=&r" (tmp)
788 : [ptr] "r" (p),
789 [val] "r" (val)
790 : "cc", "memory");
791 }
792
793 static __inline void
atomic_subtract_long(volatile u_long * p,u_long val)794 atomic_subtract_long(volatile u_long *p, u_long val)
795 {
796
797 atomic_subtract_32((volatile uint32_t *)p, val);
798 }
799
800 ATOMIC_ACQ_REL(subtract, 32)
801 ATOMIC_ACQ_REL(subtract, 64)
ATOMIC_ACQ_REL_LONG(subtract)802 ATOMIC_ACQ_REL_LONG(subtract)
803
804 static __inline void
805 atomic_store_64(volatile uint64_t *p, uint64_t val)
806 {
807 uint64_t tmp;
808 uint32_t exflag;
809
810 /*
811 * The only way to atomically store 64 bits is with STREXD, which will
812 * succeed only if paired up with a preceeding LDREXD using the same
813 * address, so we read and discard the existing value before storing.
814 */
815 __asm __volatile(
816 "1: \n"
817 " ldrexd %Q[tmp], %R[tmp], [%[ptr]] \n"
818 " strexd %[exf], %Q[val], %R[val], [%[ptr]] \n"
819 " teq %[exf], #0 \n"
820 " it ne \n"
821 " bne 1b \n"
822 : [tmp] "=&r" (tmp),
823 [exf] "=&r" (exflag)
824 : [ptr] "r" (p),
825 [val] "r" (val)
826 : "cc", "memory");
827 }
828
829 static __inline void
atomic_store_rel_32(volatile uint32_t * p,uint32_t v)830 atomic_store_rel_32(volatile uint32_t *p, uint32_t v)
831 {
832
833 dmb();
834 *p = v;
835 }
836
837 static __inline void
atomic_store_rel_64(volatile uint64_t * p,uint64_t val)838 atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
839 {
840
841 dmb();
842 atomic_store_64(p, val);
843 }
844
845 static __inline void
atomic_store_rel_long(volatile u_long * p,u_long v)846 atomic_store_rel_long(volatile u_long *p, u_long v)
847 {
848
849 dmb();
850 *p = v;
851 }
852
853 static __inline int
atomic_testandclear_32(volatile uint32_t * ptr,u_int bit)854 atomic_testandclear_32(volatile uint32_t *ptr, u_int bit)
855 {
856 int newv, oldv, result;
857
858 __asm __volatile(
859 " mov ip, #1 \n"
860 " lsl ip, ip, %[bit] \n"
861 /* Done with %[bit] as input, reuse below as output. */
862 "1: \n"
863 " ldrex %[oldv], [%[ptr]] \n"
864 " bic %[newv], %[oldv], ip \n"
865 " strex %[bit], %[newv], [%[ptr]] \n"
866 " teq %[bit], #0 \n"
867 " it ne \n"
868 " bne 1b \n"
869 " ands %[bit], %[oldv], ip \n"
870 " it ne \n"
871 " movne %[bit], #1 \n"
872 : [bit] "=&r" (result),
873 [oldv] "=&r" (oldv),
874 [newv] "=&r" (newv)
875 : [ptr] "r" (ptr),
876 "[bit]" (bit & 0x1f)
877 : "cc", "ip", "memory");
878
879 return (result);
880 }
881
882 static __inline int
atomic_testandclear_int(volatile u_int * p,u_int v)883 atomic_testandclear_int(volatile u_int *p, u_int v)
884 {
885
886 return (atomic_testandclear_32((volatile uint32_t *)p, v));
887 }
888
889 static __inline int
atomic_testandclear_long(volatile u_long * p,u_int v)890 atomic_testandclear_long(volatile u_long *p, u_int v)
891 {
892
893 return (atomic_testandclear_32((volatile uint32_t *)p, v));
894 }
895
896 static __inline int
atomic_testandclear_64(volatile uint64_t * p,u_int v)897 atomic_testandclear_64(volatile uint64_t *p, u_int v)
898 {
899 volatile uint32_t *p32;
900
901 p32 = (volatile uint32_t *)p;
902 /*
903 * Assume little-endian,
904 * atomic_testandclear_32() uses only last 5 bits of v
905 */
906 if ((v & 0x20) != 0)
907 p32++;
908 return (atomic_testandclear_32(p32, v));
909 }
910
911 static __inline int
atomic_testandset_32(volatile uint32_t * ptr,u_int bit)912 atomic_testandset_32(volatile uint32_t *ptr, u_int bit)
913 {
914 int newv, oldv, result;
915
916 __asm __volatile(
917 " mov ip, #1 \n"
918 " lsl ip, ip, %[bit] \n"
919 /* Done with %[bit] as input, reuse below as output. */
920 "1: \n"
921 " ldrex %[oldv], [%[ptr]] \n"
922 " orr %[newv], %[oldv], ip \n"
923 " strex %[bit], %[newv], [%[ptr]] \n"
924 " teq %[bit], #0 \n"
925 " it ne \n"
926 " bne 1b \n"
927 " ands %[bit], %[oldv], ip \n"
928 " it ne \n"
929 " movne %[bit], #1 \n"
930 : [bit] "=&r" (result),
931 [oldv] "=&r" (oldv),
932 [newv] "=&r" (newv)
933 : [ptr] "r" (ptr),
934 "[bit]" (bit & 0x1f)
935 : "cc", "ip", "memory");
936
937 return (result);
938 }
939
940 static __inline int
atomic_testandset_int(volatile u_int * p,u_int v)941 atomic_testandset_int(volatile u_int *p, u_int v)
942 {
943
944 return (atomic_testandset_32((volatile uint32_t *)p, v));
945 }
946
947 static __inline int
atomic_testandset_long(volatile u_long * p,u_int v)948 atomic_testandset_long(volatile u_long *p, u_int v)
949 {
950
951 return (atomic_testandset_32((volatile uint32_t *)p, v));
952 }
953
954 static __inline int
atomic_testandset_acq_long(volatile u_long * p,u_int v)955 atomic_testandset_acq_long(volatile u_long *p, u_int v)
956 {
957 int ret;
958
959 ret = atomic_testandset_32((volatile uint32_t *)p, v);
960 dmb();
961 return (ret);
962 }
963
964 static __inline int
atomic_testandset_64(volatile uint64_t * p,u_int v)965 atomic_testandset_64(volatile uint64_t *p, u_int v)
966 {
967 volatile uint32_t *p32;
968
969 p32 = (volatile uint32_t *)p;
970 /*
971 * Assume little-endian,
972 * atomic_testandset_32() uses only last 5 bits of v
973 */
974 if ((v & 0x20) != 0)
975 p32++;
976 return (atomic_testandset_32(p32, v));
977 }
978
979 static __inline uint32_t
atomic_swap_32(volatile uint32_t * p,uint32_t v)980 atomic_swap_32(volatile uint32_t *p, uint32_t v)
981 {
982 uint32_t ret, exflag;
983
984 __asm __volatile(
985 "1: ldrex %[ret], [%[ptr]] \n"
986 " strex %[exf], %[val], [%[ptr]] \n"
987 " teq %[exf], #0 \n"
988 " it ne \n"
989 " bne 1b \n"
990 : [ret] "=&r" (ret),
991 [exf] "=&r" (exflag)
992 : [val] "r" (v),
993 [ptr] "r" (p)
994 : "cc", "memory");
995 return (ret);
996 }
997
998 static __inline u_long
atomic_swap_long(volatile u_long * p,u_long v)999 atomic_swap_long(volatile u_long *p, u_long v)
1000 {
1001
1002 return (atomic_swap_32((volatile uint32_t *)p, v));
1003 }
1004
1005 static __inline uint64_t
atomic_swap_64(volatile uint64_t * p,uint64_t v)1006 atomic_swap_64(volatile uint64_t *p, uint64_t v)
1007 {
1008 uint64_t ret;
1009 uint32_t exflag;
1010
1011 __asm __volatile(
1012 "1: ldrexd %Q[ret], %R[ret], [%[ptr]] \n"
1013 " strexd %[exf], %Q[val], %R[val], [%[ptr]] \n"
1014 " teq %[exf], #0 \n"
1015 " it ne \n"
1016 " bne 1b \n"
1017 : [ret] "=&r" (ret),
1018 [exf] "=&r" (exflag)
1019 : [val] "r" (v),
1020 [ptr] "r" (p)
1021 : "cc", "memory");
1022 return (ret);
1023 }
1024
1025 #undef ATOMIC_ACQ_REL
1026 #undef ATOMIC_ACQ_REL_LONG
1027
1028 static __inline void
atomic_thread_fence_acq(void)1029 atomic_thread_fence_acq(void)
1030 {
1031
1032 dmb();
1033 }
1034
1035 static __inline void
atomic_thread_fence_rel(void)1036 atomic_thread_fence_rel(void)
1037 {
1038
1039 dmb();
1040 }
1041
1042 static __inline void
atomic_thread_fence_acq_rel(void)1043 atomic_thread_fence_acq_rel(void)
1044 {
1045
1046 dmb();
1047 }
1048
1049 static __inline void
atomic_thread_fence_seq_cst(void)1050 atomic_thread_fence_seq_cst(void)
1051 {
1052
1053 dmb();
1054 }
1055
1056 #define atomic_add_ptr atomic_add_32
1057 #define atomic_add_acq_ptr atomic_add_acq_32
1058 #define atomic_add_rel_ptr atomic_add_rel_32
1059 #define atomic_subtract_ptr atomic_subtract_32
1060 #define atomic_subtract_acq_ptr atomic_subtract_acq_32
1061 #define atomic_subtract_rel_ptr atomic_subtract_rel_32
1062 #define atomic_clear_ptr atomic_clear_32
1063 #define atomic_clear_acq_ptr atomic_clear_acq_32
1064 #define atomic_clear_rel_ptr atomic_clear_rel_32
1065 #define atomic_set_ptr atomic_set_32
1066 #define atomic_set_acq_ptr atomic_set_acq_32
1067 #define atomic_set_rel_ptr atomic_set_rel_32
1068 #define atomic_fcmpset_ptr atomic_fcmpset_32
1069 #define atomic_fcmpset_acq_ptr atomic_fcmpset_acq_32
1070 #define atomic_fcmpset_rel_ptr atomic_fcmpset_rel_32
1071 #define atomic_cmpset_ptr atomic_cmpset_32
1072 #define atomic_cmpset_acq_ptr atomic_cmpset_acq_32
1073 #define atomic_cmpset_rel_ptr atomic_cmpset_rel_32
1074 #define atomic_fetchadd_ptr atomic_fetchadd_32
1075 #define atomic_readandclear_ptr atomic_readandclear_32
1076 #define atomic_load_acq_ptr atomic_load_acq_32
1077 #define atomic_store_rel_ptr atomic_store_rel_32
1078 #define atomic_swap_ptr atomic_swap_32
1079 #define atomic_testandset_ptr atomic_testandset_32
1080 #define atomic_testandclear_ptr atomic_testandclear_32
1081
1082 #define atomic_add_int atomic_add_32
1083 #define atomic_add_acq_int atomic_add_acq_32
1084 #define atomic_add_rel_int atomic_add_rel_32
1085 #define atomic_subtract_int atomic_subtract_32
1086 #define atomic_subtract_acq_int atomic_subtract_acq_32
1087 #define atomic_subtract_rel_int atomic_subtract_rel_32
1088 #define atomic_clear_int atomic_clear_32
1089 #define atomic_clear_acq_int atomic_clear_acq_32
1090 #define atomic_clear_rel_int atomic_clear_rel_32
1091 #define atomic_set_int atomic_set_32
1092 #define atomic_set_acq_int atomic_set_acq_32
1093 #define atomic_set_rel_int atomic_set_rel_32
1094 #define atomic_fcmpset_int atomic_fcmpset_32
1095 #define atomic_fcmpset_acq_int atomic_fcmpset_acq_32
1096 #define atomic_fcmpset_rel_int atomic_fcmpset_rel_32
1097 #define atomic_cmpset_int atomic_cmpset_32
1098 #define atomic_cmpset_acq_int atomic_cmpset_acq_32
1099 #define atomic_cmpset_rel_int atomic_cmpset_rel_32
1100 #define atomic_fetchadd_int atomic_fetchadd_32
1101 #define atomic_readandclear_int atomic_readandclear_32
1102 #define atomic_load_acq_int atomic_load_acq_32
1103 #define atomic_store_rel_int atomic_store_rel_32
1104 #define atomic_swap_int atomic_swap_32
1105
1106 /*
1107 * For:
1108 * - atomic_load_acq_8
1109 * - atomic_load_acq_16
1110 */
1111 #include <sys/_atomic_subword.h>
1112
1113 #endif /* _MACHINE_ATOMIC_H_ */
1114