1 //===-- atomic.c - Implement support functions for atomic operations.------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // atomic.c defines a set of functions for performing atomic accesses on 10 // arbitrary-sized memory locations. This design uses locks that should 11 // be fast in the uncontended case, for two reasons: 12 // 13 // 1) This code must work with C programs that do not link to anything 14 // (including pthreads) and so it should not depend on any pthread 15 // functions. 16 // 2) Atomic operations, rather than explicit mutexes, are most commonly used 17 // on code where contended operations are rate. 18 // 19 // To avoid needing a per-object lock, this code allocates an array of 20 // locks and hashes the object pointers to find the one that it should use. 21 // For operations that must be atomic on two locations, the lower lock is 22 // always acquired first, to avoid deadlock. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include <stdbool.h> 27 #include <stddef.h> 28 #include <stdint.h> 29 30 #include "assembly.h" 31 32 // We use __builtin_mem* here to avoid dependencies on libc-provided headers. 33 #define memcpy __builtin_memcpy 34 #define memcmp __builtin_memcmp 35 36 // Clang objects if you redefine a builtin. This little hack allows us to 37 // define a function with the same name as an intrinsic. 38 #pragma redefine_extname __atomic_load_c SYMBOL_NAME(__atomic_load) 39 #pragma redefine_extname __atomic_store_c SYMBOL_NAME(__atomic_store) 40 #pragma redefine_extname __atomic_exchange_c SYMBOL_NAME(__atomic_exchange) 41 #pragma redefine_extname __atomic_compare_exchange_c SYMBOL_NAME( \ 42 __atomic_compare_exchange) 43 #pragma redefine_extname __atomic_is_lock_free_c SYMBOL_NAME( \ 44 __atomic_is_lock_free) 45 46 /// Number of locks. This allocates one page on 32-bit platforms, two on 47 /// 64-bit. This can be specified externally if a different trade between 48 /// memory usage and contention probability is required for a given platform. 49 #ifndef SPINLOCK_COUNT 50 #define SPINLOCK_COUNT (1 << 10) 51 #endif 52 static const long SPINLOCK_MASK = SPINLOCK_COUNT - 1; 53 54 //////////////////////////////////////////////////////////////////////////////// 55 // Platform-specific lock implementation. Falls back to spinlocks if none is 56 // defined. Each platform should define the Lock type, and corresponding 57 // lock() and unlock() functions. 58 //////////////////////////////////////////////////////////////////////////////// 59 #if defined(__FreeBSD__) || defined(__DragonFly__) 60 #include <errno.h> 61 // clang-format off 62 #include <sys/types.h> 63 #include <machine/atomic.h> 64 #include <sys/umtx.h> 65 // clang-format on 66 typedef struct _usem Lock; 67 __inline static void unlock(Lock *l) { 68 __c11_atomic_store((_Atomic(uint32_t) *)&l->_count, 1, __ATOMIC_RELEASE); 69 __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); 70 if (l->_has_waiters) 71 _umtx_op(l, UMTX_OP_SEM_WAKE, 1, 0, 0); 72 } 73 __inline static void lock(Lock *l) { 74 uint32_t old = 1; 75 while (!__c11_atomic_compare_exchange_weak((_Atomic(uint32_t) *)&l->_count, 76 &old, 0, __ATOMIC_ACQUIRE, 77 __ATOMIC_RELAXED)) { 78 _umtx_op(l, UMTX_OP_SEM_WAIT, 0, 0, 0); 79 old = 1; 80 } 81 } 82 /// locks for atomic operations 83 static Lock locks[SPINLOCK_COUNT] = {[0 ... SPINLOCK_COUNT - 1] = {0, 1, 0}}; 84 85 #elif defined(__APPLE__) 86 #include <libkern/OSAtomic.h> 87 typedef OSSpinLock Lock; 88 __inline static void unlock(Lock *l) { OSSpinLockUnlock(l); } 89 /// Locks a lock. In the current implementation, this is potentially 90 /// unbounded in the contended case. 91 __inline static void lock(Lock *l) { OSSpinLockLock(l); } 92 static Lock locks[SPINLOCK_COUNT]; // initialized to OS_SPINLOCK_INIT which is 0 93 94 #else 95 _Static_assert(__atomic_always_lock_free(sizeof(uintptr_t), 0), 96 "Implementation assumes lock-free pointer-size cmpxchg"); 97 typedef _Atomic(uintptr_t) Lock; 98 /// Unlock a lock. This is a release operation. 99 __inline static void unlock(Lock *l) { 100 __c11_atomic_store(l, 0, __ATOMIC_RELEASE); 101 } 102 /// Locks a lock. In the current implementation, this is potentially 103 /// unbounded in the contended case. 104 __inline static void lock(Lock *l) { 105 uintptr_t old = 0; 106 while (!__c11_atomic_compare_exchange_weak(l, &old, 1, __ATOMIC_ACQUIRE, 107 __ATOMIC_RELAXED)) 108 old = 0; 109 } 110 /// locks for atomic operations 111 static Lock locks[SPINLOCK_COUNT]; 112 #endif 113 114 /// Returns a lock to use for a given pointer. 115 static __inline Lock *lock_for_pointer(void *ptr) { 116 intptr_t hash = (intptr_t)ptr; 117 // Disregard the lowest 4 bits. We want all values that may be part of the 118 // same memory operation to hash to the same value and therefore use the same 119 // lock. 120 hash >>= 4; 121 // Use the next bits as the basis for the hash 122 intptr_t low = hash & SPINLOCK_MASK; 123 // Now use the high(er) set of bits to perturb the hash, so that we don't 124 // get collisions from atomic fields in a single object 125 hash >>= 16; 126 hash ^= low; 127 // Return a pointer to the word to use 128 return locks + (hash & SPINLOCK_MASK); 129 } 130 131 /// Macros for determining whether a size is lock free. 132 #define ATOMIC_ALWAYS_LOCK_FREE_OR_ALIGNED_LOCK_FREE(size, p) \ 133 (__atomic_always_lock_free(size, p) || \ 134 (__atomic_always_lock_free(size, 0) && ((uintptr_t)p % size) == 0)) 135 #define IS_LOCK_FREE_1(p) ATOMIC_ALWAYS_LOCK_FREE_OR_ALIGNED_LOCK_FREE(1, p) 136 #define IS_LOCK_FREE_2(p) ATOMIC_ALWAYS_LOCK_FREE_OR_ALIGNED_LOCK_FREE(2, p) 137 #define IS_LOCK_FREE_4(p) ATOMIC_ALWAYS_LOCK_FREE_OR_ALIGNED_LOCK_FREE(4, p) 138 #define IS_LOCK_FREE_8(p) ATOMIC_ALWAYS_LOCK_FREE_OR_ALIGNED_LOCK_FREE(8, p) 139 #define IS_LOCK_FREE_16(p) ATOMIC_ALWAYS_LOCK_FREE_OR_ALIGNED_LOCK_FREE(16, p) 140 141 /// Macro that calls the compiler-generated lock-free versions of functions 142 /// when they exist. 143 #define TRY_LOCK_FREE_CASE(n, type, ptr) \ 144 case n: \ 145 if (IS_LOCK_FREE_##n(ptr)) { \ 146 LOCK_FREE_ACTION(type); \ 147 } \ 148 break; 149 #ifdef __SIZEOF_INT128__ 150 #define TRY_LOCK_FREE_CASE_16(p) TRY_LOCK_FREE_CASE(16, __uint128_t, p) 151 #else 152 #define TRY_LOCK_FREE_CASE_16(p) /* __uint128_t not available */ 153 #endif 154 155 #define LOCK_FREE_CASES(ptr) \ 156 do { \ 157 switch (size) { \ 158 TRY_LOCK_FREE_CASE(1, uint8_t, ptr) \ 159 TRY_LOCK_FREE_CASE(2, uint16_t, ptr) \ 160 TRY_LOCK_FREE_CASE(4, uint32_t, ptr) \ 161 TRY_LOCK_FREE_CASE(8, uint64_t, ptr) \ 162 TRY_LOCK_FREE_CASE_16(ptr) /* __uint128_t may not be supported */ \ 163 default: \ 164 break; \ 165 } \ 166 } while (0) 167 168 /// Whether atomic operations for the given size (and alignment) are lock-free. 169 bool __atomic_is_lock_free_c(size_t size, void *ptr) { 170 #define LOCK_FREE_ACTION(type) return true; 171 LOCK_FREE_CASES(ptr); 172 #undef LOCK_FREE_ACTION 173 return false; 174 } 175 176 /// An atomic load operation. This is atomic with respect to the source 177 /// pointer only. 178 void __atomic_load_c(int size, void *src, void *dest, int model) { 179 #define LOCK_FREE_ACTION(type) \ 180 *((type *)dest) = __c11_atomic_load((_Atomic(type) *)src, model); \ 181 return; 182 LOCK_FREE_CASES(src); 183 #undef LOCK_FREE_ACTION 184 Lock *l = lock_for_pointer(src); 185 lock(l); 186 memcpy(dest, src, size); 187 unlock(l); 188 } 189 190 /// An atomic store operation. This is atomic with respect to the destination 191 /// pointer only. 192 void __atomic_store_c(int size, void *dest, void *src, int model) { 193 #define LOCK_FREE_ACTION(type) \ 194 __c11_atomic_store((_Atomic(type) *)dest, *(type *)src, model); \ 195 return; 196 LOCK_FREE_CASES(dest); 197 #undef LOCK_FREE_ACTION 198 Lock *l = lock_for_pointer(dest); 199 lock(l); 200 memcpy(dest, src, size); 201 unlock(l); 202 } 203 204 /// Atomic compare and exchange operation. If the value at *ptr is identical 205 /// to the value at *expected, then this copies value at *desired to *ptr. If 206 /// they are not, then this stores the current value from *ptr in *expected. 207 /// 208 /// This function returns 1 if the exchange takes place or 0 if it fails. 209 int __atomic_compare_exchange_c(int size, void *ptr, void *expected, 210 void *desired, int success, int failure) { 211 #define LOCK_FREE_ACTION(type) \ 212 return __c11_atomic_compare_exchange_strong( \ 213 (_Atomic(type) *)ptr, (type *)expected, *(type *)desired, success, \ 214 failure) 215 LOCK_FREE_CASES(ptr); 216 #undef LOCK_FREE_ACTION 217 Lock *l = lock_for_pointer(ptr); 218 lock(l); 219 if (memcmp(ptr, expected, size) == 0) { 220 memcpy(ptr, desired, size); 221 unlock(l); 222 return 1; 223 } 224 memcpy(expected, ptr, size); 225 unlock(l); 226 return 0; 227 } 228 229 /// Performs an atomic exchange operation between two pointers. This is atomic 230 /// with respect to the target address. 231 void __atomic_exchange_c(int size, void *ptr, void *val, void *old, int model) { 232 #define LOCK_FREE_ACTION(type) \ 233 *(type *)old = \ 234 __c11_atomic_exchange((_Atomic(type) *)ptr, *(type *)val, model); \ 235 return; 236 LOCK_FREE_CASES(ptr); 237 #undef LOCK_FREE_ACTION 238 Lock *l = lock_for_pointer(ptr); 239 lock(l); 240 memcpy(old, ptr, size); 241 memcpy(ptr, val, size); 242 unlock(l); 243 } 244 245 //////////////////////////////////////////////////////////////////////////////// 246 // Where the size is known at compile time, the compiler may emit calls to 247 // specialised versions of the above functions. 248 //////////////////////////////////////////////////////////////////////////////// 249 #ifdef __SIZEOF_INT128__ 250 #define OPTIMISED_CASES \ 251 OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t) \ 252 OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t) \ 253 OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t) \ 254 OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t) \ 255 OPTIMISED_CASE(16, IS_LOCK_FREE_16, __uint128_t) 256 #else 257 #define OPTIMISED_CASES \ 258 OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t) \ 259 OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t) \ 260 OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t) \ 261 OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t) 262 #endif 263 264 #define OPTIMISED_CASE(n, lockfree, type) \ 265 type __atomic_load_##n(type *src, int model) { \ 266 if (lockfree(src)) \ 267 return __c11_atomic_load((_Atomic(type) *)src, model); \ 268 Lock *l = lock_for_pointer(src); \ 269 lock(l); \ 270 type val = *src; \ 271 unlock(l); \ 272 return val; \ 273 } 274 OPTIMISED_CASES 275 #undef OPTIMISED_CASE 276 277 #define OPTIMISED_CASE(n, lockfree, type) \ 278 void __atomic_store_##n(type *dest, type val, int model) { \ 279 if (lockfree(dest)) { \ 280 __c11_atomic_store((_Atomic(type) *)dest, val, model); \ 281 return; \ 282 } \ 283 Lock *l = lock_for_pointer(dest); \ 284 lock(l); \ 285 *dest = val; \ 286 unlock(l); \ 287 return; \ 288 } 289 OPTIMISED_CASES 290 #undef OPTIMISED_CASE 291 292 #define OPTIMISED_CASE(n, lockfree, type) \ 293 type __atomic_exchange_##n(type *dest, type val, int model) { \ 294 if (lockfree(dest)) \ 295 return __c11_atomic_exchange((_Atomic(type) *)dest, val, model); \ 296 Lock *l = lock_for_pointer(dest); \ 297 lock(l); \ 298 type tmp = *dest; \ 299 *dest = val; \ 300 unlock(l); \ 301 return tmp; \ 302 } 303 OPTIMISED_CASES 304 #undef OPTIMISED_CASE 305 306 #define OPTIMISED_CASE(n, lockfree, type) \ 307 bool __atomic_compare_exchange_##n(type *ptr, type *expected, type desired, \ 308 int success, int failure) { \ 309 if (lockfree(ptr)) \ 310 return __c11_atomic_compare_exchange_strong( \ 311 (_Atomic(type) *)ptr, expected, desired, success, failure); \ 312 Lock *l = lock_for_pointer(ptr); \ 313 lock(l); \ 314 if (*ptr == *expected) { \ 315 *ptr = desired; \ 316 unlock(l); \ 317 return true; \ 318 } \ 319 *expected = *ptr; \ 320 unlock(l); \ 321 return false; \ 322 } 323 OPTIMISED_CASES 324 #undef OPTIMISED_CASE 325 326 //////////////////////////////////////////////////////////////////////////////// 327 // Atomic read-modify-write operations for integers of various sizes. 328 //////////////////////////////////////////////////////////////////////////////// 329 #define ATOMIC_RMW(n, lockfree, type, opname, op) \ 330 type __atomic_fetch_##opname##_##n(type *ptr, type val, int model) { \ 331 if (lockfree(ptr)) \ 332 return __c11_atomic_fetch_##opname((_Atomic(type) *)ptr, val, model); \ 333 Lock *l = lock_for_pointer(ptr); \ 334 lock(l); \ 335 type tmp = *ptr; \ 336 *ptr = tmp op val; \ 337 unlock(l); \ 338 return tmp; \ 339 } 340 341 #define ATOMIC_RMW_NAND(n, lockfree, type) \ 342 type __atomic_fetch_nand_##n(type *ptr, type val, int model) { \ 343 if (lockfree(ptr)) \ 344 return __c11_atomic_fetch_nand((_Atomic(type) *)ptr, val, model); \ 345 Lock *l = lock_for_pointer(ptr); \ 346 lock(l); \ 347 type tmp = *ptr; \ 348 *ptr = ~(tmp & val); \ 349 unlock(l); \ 350 return tmp; \ 351 } 352 353 #define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, add, +) 354 OPTIMISED_CASES 355 #undef OPTIMISED_CASE 356 #define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, sub, -) 357 OPTIMISED_CASES 358 #undef OPTIMISED_CASE 359 #define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, and, &) 360 OPTIMISED_CASES 361 #undef OPTIMISED_CASE 362 #define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, or, |) 363 OPTIMISED_CASES 364 #undef OPTIMISED_CASE 365 #define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, xor, ^) 366 OPTIMISED_CASES 367 #undef OPTIMISED_CASE 368 // Allow build with clang without __c11_atomic_fetch_nand builtin (pre-14) 369 #if __has_builtin(__c11_atomic_fetch_nand) 370 #define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW_NAND(n, lockfree, type) 371 OPTIMISED_CASES 372 #undef OPTIMISED_CASE 373 #endif 374