1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/param.h>
30 #include <sys/stdatomic.h>
31 #include <sys/types.h>
32
33 #include <machine/atomic.h>
34 #include <machine/cpufunc.h>
35 #include <machine/sysarch.h>
36
37 /*
38 * Executing statements with interrupts disabled.
39 */
40
41 #if defined(_KERNEL) && !defined(SMP)
42 #define WITHOUT_INTERRUPTS(s) do { \
43 register_t regs; \
44 \
45 regs = intr_disable(); \
46 do s while (0); \
47 intr_restore(regs); \
48 } while (0)
49 #endif /* _KERNEL && !SMP */
50
51 /*
52 * Memory barriers.
53 *
54 * It turns out __sync_synchronize() does not emit any code when used
55 * with GCC 4.2. Implement our own version that does work reliably.
56 *
57 * Although __sync_lock_test_and_set() should only perform an acquire
58 * barrier, make it do a full barrier like the other functions. This
59 * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably.
60 */
61
62 #if defined(_KERNEL) && !defined(SMP)
63 static inline void
do_sync(void)64 do_sync(void)
65 {
66
67 __asm volatile ("" : : : "memory");
68 }
69 #else
70 static inline void
do_sync(void)71 do_sync(void)
72 {
73
74 dmb();
75 }
76 #endif
77
78
79 #if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS)
80
81 #ifdef __clang__
82 #pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1
83 #pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2
84 #pragma redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4
85 #pragma redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1
86 #pragma redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2
87 #pragma redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4
88 #pragma redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1
89 #pragma redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2
90 #pragma redefine_extname __sync_fetch_and_add_4_c __sync_fetch_and_add_4
91 #pragma redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1
92 #pragma redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2
93 #pragma redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4
94 #pragma redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1
95 #pragma redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2
96 #pragma redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4
97 #pragma redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1
98 #pragma redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2
99 #pragma redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4
100 #pragma redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1
101 #pragma redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2
102 #pragma redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4
103 #endif
104
105 /*
106 * Old __sync_* API.
107 */
108
109
110 /* Implementations for old GCC versions, lacking support for atomics. */
111
112 typedef union {
113 uint8_t v8[4];
114 uint32_t v32;
115 } reg_t;
116
117 /*
118 * Given a memory address pointing to an 8-bit or 16-bit integer, return
119 * the address of the 32-bit word containing it.
120 */
121
122 static inline uint32_t *
round_to_word(void * ptr)123 round_to_word(void *ptr)
124 {
125
126 return ((uint32_t *)((intptr_t)ptr & ~3));
127 }
128
129 /*
130 * Utility functions for loading and storing 8-bit and 16-bit integers
131 * in 32-bit words at an offset corresponding with the location of the
132 * atomic variable.
133 */
134
135 static inline void
put_1(reg_t * r,const uint8_t * offset_ptr,uint8_t val)136 put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val)
137 {
138 size_t offset;
139
140 offset = (intptr_t)offset_ptr & 3;
141 r->v8[offset] = val;
142 }
143
144 static inline uint8_t
get_1(const reg_t * r,const uint8_t * offset_ptr)145 get_1(const reg_t *r, const uint8_t *offset_ptr)
146 {
147 size_t offset;
148
149 offset = (intptr_t)offset_ptr & 3;
150 return (r->v8[offset]);
151 }
152
153 static inline void
put_2(reg_t * r,const uint16_t * offset_ptr,uint16_t val)154 put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val)
155 {
156 size_t offset;
157 union {
158 uint16_t in;
159 uint8_t out[2];
160 } bytes;
161
162 offset = (intptr_t)offset_ptr & 3;
163 bytes.in = val;
164 r->v8[offset] = bytes.out[0];
165 r->v8[offset + 1] = bytes.out[1];
166 }
167
168 static inline uint16_t
get_2(const reg_t * r,const uint16_t * offset_ptr)169 get_2(const reg_t *r, const uint16_t *offset_ptr)
170 {
171 size_t offset;
172 union {
173 uint8_t in[2];
174 uint16_t out;
175 } bytes;
176
177 offset = (intptr_t)offset_ptr & 3;
178 bytes.in[0] = r->v8[offset];
179 bytes.in[1] = r->v8[offset + 1];
180 return (bytes.out);
181 }
182
183 /*
184 * 8-bit and 16-bit routines.
185 *
186 * These operations are not natively supported by the CPU, so we use
187 * some shifting and bitmasking on top of the 32-bit instructions.
188 */
189
190 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t) \
191 uintN_t \
192 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \
193 { \
194 uint32_t *mem32; \
195 reg_t val32, negmask, old; \
196 uint32_t temp1, temp2; \
197 \
198 mem32 = round_to_word(mem); \
199 val32.v32 = 0x00000000; \
200 put_##N(&val32, mem, val); \
201 negmask.v32 = 0xffffffff; \
202 put_##N(&negmask, mem, 0); \
203 \
204 do_sync(); \
205 __asm volatile ( \
206 "1:" \
207 "\tldrex %0, %6\n" /* Load old value. */ \
208 "\tand %2, %5, %0\n" /* Remove the old value. */ \
209 "\torr %2, %2, %4\n" /* Put in the new value. */ \
210 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \
211 "\tcmp %3, #0\n" /* Did it succeed? */ \
212 "\tbne 1b\n" /* Spin if failed. */ \
213 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \
214 "=&r" (temp2) \
215 : "r" (val32.v32), "r" (negmask.v32), "m" (*mem32)); \
216 return (get_##N(&old, mem)); \
217 }
218
219 EMIT_LOCK_TEST_AND_SET_N(1, uint8_t)
220 EMIT_LOCK_TEST_AND_SET_N(2, uint16_t)
221
222 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \
223 uintN_t \
224 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \
225 uintN_t desired) \
226 { \
227 uint32_t *mem32; \
228 reg_t expected32, desired32, posmask, old; \
229 uint32_t negmask, temp1, temp2; \
230 \
231 mem32 = round_to_word(mem); \
232 expected32.v32 = 0x00000000; \
233 put_##N(&expected32, mem, expected); \
234 desired32.v32 = 0x00000000; \
235 put_##N(&desired32, mem, desired); \
236 posmask.v32 = 0x00000000; \
237 put_##N(&posmask, mem, ~0); \
238 negmask = ~posmask.v32; \
239 \
240 do_sync(); \
241 __asm volatile ( \
242 "1:" \
243 "\tldrex %0, %8\n" /* Load old value. */ \
244 "\tand %2, %6, %0\n" /* Isolate the old value. */ \
245 "\tcmp %2, %4\n" /* Compare to expected value. */\
246 "\tbne 2f\n" /* Values are unequal. */ \
247 "\tand %2, %7, %0\n" /* Remove the old value. */ \
248 "\torr %2, %5\n" /* Put in the new value. */ \
249 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \
250 "\tcmp %3, #0\n" /* Did it succeed? */ \
251 "\tbne 1b\n" /* Spin if failed. */ \
252 "2:" \
253 : "=&r" (old), "=m" (*mem32), "=&r" (temp1), \
254 "=&r" (temp2) \
255 : "r" (expected32.v32), "r" (desired32.v32), \
256 "r" (posmask.v32), "r" (negmask), "m" (*mem32)); \
257 return (get_##N(&old, mem)); \
258 }
259
260 EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t)
261 EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t)
262
263 #define EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op) \
264 uintN_t \
265 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \
266 { \
267 uint32_t *mem32; \
268 reg_t val32, posmask, old; \
269 uint32_t negmask, temp1, temp2; \
270 \
271 mem32 = round_to_word(mem); \
272 val32.v32 = 0x00000000; \
273 put_##N(&val32, mem, val); \
274 posmask.v32 = 0x00000000; \
275 put_##N(&posmask, mem, ~0); \
276 negmask = ~posmask.v32; \
277 \
278 do_sync(); \
279 __asm volatile ( \
280 "1:" \
281 "\tldrex %0, %7\n" /* Load old value. */ \
282 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \
283 "\tand %2, %5\n" /* Isolate the new value. */ \
284 "\tand %3, %6, %0\n" /* Remove the old value. */ \
285 "\torr %2, %2, %3\n" /* Put in the new value. */ \
286 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \
287 "\tcmp %3, #0\n" /* Did it succeed? */ \
288 "\tbne 1b\n" /* Spin if failed. */ \
289 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \
290 "=&r" (temp2) \
291 : "r" (val32.v32), "r" (posmask.v32), "r" (negmask), \
292 "m" (*mem32)); \
293 return (get_##N(&old, mem)); \
294 }
295
296 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add")
297 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub")
298 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add")
299 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub")
300
301 #define EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence) \
302 uintN_t \
303 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \
304 { \
305 uint32_t *mem32; \
306 reg_t val32, old; \
307 uint32_t temp1, temp2; \
308 \
309 mem32 = round_to_word(mem); \
310 val32.v32 = idempotence ? 0xffffffff : 0x00000000; \
311 put_##N(&val32, mem, val); \
312 \
313 do_sync(); \
314 __asm volatile ( \
315 "1:" \
316 "\tldrex %0, %5\n" /* Load old value. */ \
317 "\t"op" %2, %4, %0\n" /* Calculate new value. */ \
318 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \
319 "\tcmp %3, #0\n" /* Did it succeed? */ \
320 "\tbne 1b\n" /* Spin if failed. */ \
321 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \
322 "=&r" (temp2) \
323 : "r" (val32.v32), "m" (*mem32)); \
324 return (get_##N(&old, mem)); \
325 }
326
327 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1)
328 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0)
329 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0)
330 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1)
331 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0)
332 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0)
333
334 /*
335 * 32-bit routines.
336 */
337
338 uint32_t
__sync_lock_test_and_set_4_c(uint32_t * mem,uint32_t val)339 __sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val)
340 {
341 uint32_t old, temp;
342
343 do_sync();
344 __asm volatile (
345 "1:"
346 "\tldrex %0, %4\n" /* Load old value. */
347 "\tstrex %2, %3, %1\n" /* Attempt to store. */
348 "\tcmp %2, #0\n" /* Did it succeed? */
349 "\tbne 1b\n" /* Spin if failed. */
350 : "=&r" (old), "=m" (*mem), "=&r" (temp)
351 : "r" (val), "m" (*mem));
352 return (old);
353 }
354
355 uint32_t
__sync_val_compare_and_swap_4_c(uint32_t * mem,uint32_t expected,uint32_t desired)356 __sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected,
357 uint32_t desired)
358 {
359 uint32_t old, temp;
360
361 do_sync();
362 __asm volatile (
363 "1:"
364 "\tldrex %0, %5\n" /* Load old value. */
365 "\tcmp %0, %3\n" /* Compare to expected value. */
366 "\tbne 2f\n" /* Values are unequal. */
367 "\tstrex %2, %4, %1\n" /* Attempt to store. */
368 "\tcmp %2, #0\n" /* Did it succeed? */
369 "\tbne 1b\n" /* Spin if failed. */
370 "2:"
371 : "=&r" (old), "=m" (*mem), "=&r" (temp)
372 : "r" (expected), "r" (desired), "m" (*mem));
373 return (old);
374 }
375
376 #define EMIT_FETCH_AND_OP_4(name, op) \
377 uint32_t \
378 __sync_##name##_4##_c(uint32_t *mem, uint32_t val) \
379 { \
380 uint32_t old, temp1, temp2; \
381 \
382 do_sync(); \
383 __asm volatile ( \
384 "1:" \
385 "\tldrex %0, %5\n" /* Load old value. */ \
386 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \
387 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \
388 "\tcmp %3, #0\n" /* Did it succeed? */ \
389 "\tbne 1b\n" /* Spin if failed. */ \
390 : "=&r" (old), "=m" (*mem), "=&r" (temp1), \
391 "=&r" (temp2) \
392 : "r" (val), "m" (*mem)); \
393 return (old); \
394 }
395
396 EMIT_FETCH_AND_OP_4(fetch_and_add, "add")
397 EMIT_FETCH_AND_OP_4(fetch_and_and, "and")
398 EMIT_FETCH_AND_OP_4(fetch_and_or, "orr")
399 EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub")
400 EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor")
401
402 #ifndef __clang__
403 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
404 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
405 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
406 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
407 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
408 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
409 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
410 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
411 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4);
412 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
413 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
414 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
415 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
416 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
417 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
418 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
419 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
420 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
421 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
422 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
423 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
424 #endif
425
426 #endif /* __SYNC_ATOMICS */
427