1 //===-- atomic.c - Implement support functions for atomic operations.------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // atomic.c defines a set of functions for performing atomic accesses on 10 // arbitrary-sized memory locations. This design uses locks that should 11 // be fast in the uncontended case, for two reasons: 12 // 13 // 1) This code must work with C programs that do not link to anything 14 // (including pthreads) and so it should not depend on any pthread 15 // functions. If the user wishes to opt into using pthreads, they may do so. 16 // 2) Atomic operations, rather than explicit mutexes, are most commonly used 17 // on code where contended operations are rate. 18 // 19 // To avoid needing a per-object lock, this code allocates an array of 20 // locks and hashes the object pointers to find the one that it should use. 21 // For operations that must be atomic on two locations, the lower lock is 22 // always acquired first, to avoid deadlock. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include <stdbool.h> 27 #include <stddef.h> 28 #include <stdint.h> 29 30 #include "assembly.h" 31 32 // We use __builtin_mem* here to avoid dependencies on libc-provided headers. 33 #define memcpy __builtin_memcpy 34 #define memcmp __builtin_memcmp 35 36 // Clang objects if you redefine a builtin. This little hack allows us to 37 // define a function with the same name as an intrinsic. 38 #pragma redefine_extname __atomic_load_c SYMBOL_NAME(__atomic_load) 39 #pragma redefine_extname __atomic_store_c SYMBOL_NAME(__atomic_store) 40 #pragma redefine_extname __atomic_exchange_c SYMBOL_NAME(__atomic_exchange) 41 #pragma redefine_extname __atomic_compare_exchange_c SYMBOL_NAME( \ 42 __atomic_compare_exchange) 43 #pragma redefine_extname __atomic_is_lock_free_c SYMBOL_NAME( \ 44 __atomic_is_lock_free) 45 46 /// Number of locks. This allocates one page on 32-bit platforms, two on 47 /// 64-bit. This can be specified externally if a different trade between 48 /// memory usage and contention probability is required for a given platform. 49 #ifndef SPINLOCK_COUNT 50 #define SPINLOCK_COUNT (1 << 10) 51 #endif 52 static const long SPINLOCK_MASK = SPINLOCK_COUNT - 1; 53 54 //////////////////////////////////////////////////////////////////////////////// 55 // Platform-specific lock implementation. Falls back to spinlocks if none is 56 // defined. Each platform should define the Lock type, and corresponding 57 // lock() and unlock() functions. 58 //////////////////////////////////////////////////////////////////////////////// 59 #if defined(_LIBATOMIC_USE_PTHREAD) 60 #include <pthread.h> 61 typedef pthread_mutex_t Lock; 62 /// Unlock a lock. This is a release operation. 63 __inline static void unlock(Lock *l) { pthread_mutex_unlock(l); } 64 /// Locks a lock. 65 __inline static void lock(Lock *l) { pthread_mutex_lock(l); } 66 /// locks for atomic operations 67 static Lock locks[SPINLOCK_COUNT]; 68 69 #elif defined(__FreeBSD__) || defined(__DragonFly__) 70 #include <errno.h> 71 // clang-format off 72 #include <sys/types.h> 73 #include <machine/atomic.h> 74 #include <sys/umtx.h> 75 // clang-format on 76 typedef struct _usem Lock; 77 __inline static void unlock(Lock *l) { 78 __c11_atomic_store((_Atomic(uint32_t) *)&l->_count, 1, __ATOMIC_RELEASE); 79 __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); 80 if (l->_has_waiters) 81 _umtx_op(l, UMTX_OP_SEM_WAKE, 1, 0, 0); 82 } 83 __inline static void lock(Lock *l) { 84 uint32_t old = 1; 85 while (!__c11_atomic_compare_exchange_weak((_Atomic(uint32_t) *)&l->_count, 86 &old, 0, __ATOMIC_ACQUIRE, 87 __ATOMIC_RELAXED)) { 88 _umtx_op(l, UMTX_OP_SEM_WAIT, 0, 0, 0); 89 old = 1; 90 } 91 } 92 /// locks for atomic operations 93 static Lock locks[SPINLOCK_COUNT] = {[0 ... SPINLOCK_COUNT - 1] = {0, 1, 0}}; 94 95 #elif defined(__APPLE__) 96 #include <libkern/OSAtomic.h> 97 typedef OSSpinLock Lock; 98 __inline static void unlock(Lock *l) { OSSpinLockUnlock(l); } 99 /// Locks a lock. In the current implementation, this is potentially 100 /// unbounded in the contended case. 101 __inline static void lock(Lock *l) { OSSpinLockLock(l); } 102 static Lock locks[SPINLOCK_COUNT]; // initialized to OS_SPINLOCK_INIT which is 0 103 104 #else 105 _Static_assert(__atomic_always_lock_free(sizeof(uintptr_t), 0), 106 "Implementation assumes lock-free pointer-size cmpxchg"); 107 typedef _Atomic(uintptr_t) Lock; 108 /// Unlock a lock. This is a release operation. 109 __inline static void unlock(Lock *l) { 110 __c11_atomic_store(l, 0, __ATOMIC_RELEASE); 111 } 112 /// Locks a lock. In the current implementation, this is potentially 113 /// unbounded in the contended case. 114 __inline static void lock(Lock *l) { 115 uintptr_t old = 0; 116 while (!__c11_atomic_compare_exchange_weak(l, &old, 1, __ATOMIC_ACQUIRE, 117 __ATOMIC_RELAXED)) 118 old = 0; 119 } 120 /// locks for atomic operations 121 static Lock locks[SPINLOCK_COUNT]; 122 #endif 123 124 /// Returns a lock to use for a given pointer. 125 static __inline Lock *lock_for_pointer(void *ptr) { 126 intptr_t hash = (intptr_t)ptr; 127 // Disregard the lowest 4 bits. We want all values that may be part of the 128 // same memory operation to hash to the same value and therefore use the same 129 // lock. 130 hash >>= 4; 131 // Use the next bits as the basis for the hash 132 intptr_t low = hash & SPINLOCK_MASK; 133 // Now use the high(er) set of bits to perturb the hash, so that we don't 134 // get collisions from atomic fields in a single object 135 hash >>= 16; 136 hash ^= low; 137 // Return a pointer to the word to use 138 return locks + (hash & SPINLOCK_MASK); 139 } 140 141 /// Macros for determining whether a size is lock free. 142 #define ATOMIC_ALWAYS_LOCK_FREE_OR_ALIGNED_LOCK_FREE(size, p) \ 143 (__atomic_always_lock_free(size, p) || \ 144 (__atomic_always_lock_free(size, 0) && ((uintptr_t)p % size) == 0)) 145 #define IS_LOCK_FREE_1(p) ATOMIC_ALWAYS_LOCK_FREE_OR_ALIGNED_LOCK_FREE(1, p) 146 #define IS_LOCK_FREE_2(p) ATOMIC_ALWAYS_LOCK_FREE_OR_ALIGNED_LOCK_FREE(2, p) 147 #define IS_LOCK_FREE_4(p) ATOMIC_ALWAYS_LOCK_FREE_OR_ALIGNED_LOCK_FREE(4, p) 148 #define IS_LOCK_FREE_8(p) ATOMIC_ALWAYS_LOCK_FREE_OR_ALIGNED_LOCK_FREE(8, p) 149 #define IS_LOCK_FREE_16(p) ATOMIC_ALWAYS_LOCK_FREE_OR_ALIGNED_LOCK_FREE(16, p) 150 151 /// Macro that calls the compiler-generated lock-free versions of functions 152 /// when they exist. 153 #define TRY_LOCK_FREE_CASE(n, type, ptr) \ 154 case n: \ 155 if (IS_LOCK_FREE_##n(ptr)) { \ 156 LOCK_FREE_ACTION(type); \ 157 } \ 158 break; 159 #ifdef __SIZEOF_INT128__ 160 #define TRY_LOCK_FREE_CASE_16(p) TRY_LOCK_FREE_CASE(16, __uint128_t, p) 161 #else 162 #define TRY_LOCK_FREE_CASE_16(p) /* __uint128_t not available */ 163 #endif 164 165 #define LOCK_FREE_CASES(ptr) \ 166 do { \ 167 switch (size) { \ 168 TRY_LOCK_FREE_CASE(1, uint8_t, ptr) \ 169 TRY_LOCK_FREE_CASE(2, uint16_t, ptr) \ 170 TRY_LOCK_FREE_CASE(4, uint32_t, ptr) \ 171 TRY_LOCK_FREE_CASE(8, uint64_t, ptr) \ 172 TRY_LOCK_FREE_CASE_16(ptr) /* __uint128_t may not be supported */ \ 173 default: \ 174 break; \ 175 } \ 176 } while (0) 177 178 /// Whether atomic operations for the given size (and alignment) are lock-free. 179 bool __atomic_is_lock_free_c(size_t size, void *ptr) { 180 #define LOCK_FREE_ACTION(type) return true; 181 LOCK_FREE_CASES(ptr); 182 #undef LOCK_FREE_ACTION 183 return false; 184 } 185 186 /// An atomic load operation. This is atomic with respect to the source 187 /// pointer only. 188 void __atomic_load_c(int size, void *src, void *dest, int model) { 189 #define LOCK_FREE_ACTION(type) \ 190 *((type *)dest) = __c11_atomic_load((_Atomic(type) *)src, model); \ 191 return; 192 LOCK_FREE_CASES(src); 193 #undef LOCK_FREE_ACTION 194 Lock *l = lock_for_pointer(src); 195 lock(l); 196 memcpy(dest, src, size); 197 unlock(l); 198 } 199 200 /// An atomic store operation. This is atomic with respect to the destination 201 /// pointer only. 202 void __atomic_store_c(int size, void *dest, void *src, int model) { 203 #define LOCK_FREE_ACTION(type) \ 204 __c11_atomic_store((_Atomic(type) *)dest, *(type *)src, model); \ 205 return; 206 LOCK_FREE_CASES(dest); 207 #undef LOCK_FREE_ACTION 208 Lock *l = lock_for_pointer(dest); 209 lock(l); 210 memcpy(dest, src, size); 211 unlock(l); 212 } 213 214 /// Atomic compare and exchange operation. If the value at *ptr is identical 215 /// to the value at *expected, then this copies value at *desired to *ptr. If 216 /// they are not, then this stores the current value from *ptr in *expected. 217 /// 218 /// This function returns 1 if the exchange takes place or 0 if it fails. 219 int __atomic_compare_exchange_c(int size, void *ptr, void *expected, 220 void *desired, int success, int failure) { 221 #define LOCK_FREE_ACTION(type) \ 222 return __c11_atomic_compare_exchange_strong( \ 223 (_Atomic(type) *)ptr, (type *)expected, *(type *)desired, success, \ 224 failure) 225 LOCK_FREE_CASES(ptr); 226 #undef LOCK_FREE_ACTION 227 Lock *l = lock_for_pointer(ptr); 228 lock(l); 229 if (memcmp(ptr, expected, size) == 0) { 230 memcpy(ptr, desired, size); 231 unlock(l); 232 return 1; 233 } 234 memcpy(expected, ptr, size); 235 unlock(l); 236 return 0; 237 } 238 239 /// Performs an atomic exchange operation between two pointers. This is atomic 240 /// with respect to the target address. 241 void __atomic_exchange_c(int size, void *ptr, void *val, void *old, int model) { 242 #define LOCK_FREE_ACTION(type) \ 243 *(type *)old = \ 244 __c11_atomic_exchange((_Atomic(type) *)ptr, *(type *)val, model); \ 245 return; 246 LOCK_FREE_CASES(ptr); 247 #undef LOCK_FREE_ACTION 248 Lock *l = lock_for_pointer(ptr); 249 lock(l); 250 memcpy(old, ptr, size); 251 memcpy(ptr, val, size); 252 unlock(l); 253 } 254 255 //////////////////////////////////////////////////////////////////////////////// 256 // Where the size is known at compile time, the compiler may emit calls to 257 // specialised versions of the above functions. 258 //////////////////////////////////////////////////////////////////////////////// 259 #ifdef __SIZEOF_INT128__ 260 #define OPTIMISED_CASES \ 261 OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t) \ 262 OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t) \ 263 OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t) \ 264 OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t) \ 265 OPTIMISED_CASE(16, IS_LOCK_FREE_16, __uint128_t) 266 #else 267 #define OPTIMISED_CASES \ 268 OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t) \ 269 OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t) \ 270 OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t) \ 271 OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t) 272 #endif 273 274 #define OPTIMISED_CASE(n, lockfree, type) \ 275 type __atomic_load_##n(type *src, int model) { \ 276 if (lockfree(src)) \ 277 return __c11_atomic_load((_Atomic(type) *)src, model); \ 278 Lock *l = lock_for_pointer(src); \ 279 lock(l); \ 280 type val = *src; \ 281 unlock(l); \ 282 return val; \ 283 } 284 OPTIMISED_CASES 285 #undef OPTIMISED_CASE 286 287 #define OPTIMISED_CASE(n, lockfree, type) \ 288 void __atomic_store_##n(type *dest, type val, int model) { \ 289 if (lockfree(dest)) { \ 290 __c11_atomic_store((_Atomic(type) *)dest, val, model); \ 291 return; \ 292 } \ 293 Lock *l = lock_for_pointer(dest); \ 294 lock(l); \ 295 *dest = val; \ 296 unlock(l); \ 297 return; \ 298 } 299 OPTIMISED_CASES 300 #undef OPTIMISED_CASE 301 302 #define OPTIMISED_CASE(n, lockfree, type) \ 303 type __atomic_exchange_##n(type *dest, type val, int model) { \ 304 if (lockfree(dest)) \ 305 return __c11_atomic_exchange((_Atomic(type) *)dest, val, model); \ 306 Lock *l = lock_for_pointer(dest); \ 307 lock(l); \ 308 type tmp = *dest; \ 309 *dest = val; \ 310 unlock(l); \ 311 return tmp; \ 312 } 313 OPTIMISED_CASES 314 #undef OPTIMISED_CASE 315 316 #define OPTIMISED_CASE(n, lockfree, type) \ 317 bool __atomic_compare_exchange_##n(type *ptr, type *expected, type desired, \ 318 int success, int failure) { \ 319 if (lockfree(ptr)) \ 320 return __c11_atomic_compare_exchange_strong( \ 321 (_Atomic(type) *)ptr, expected, desired, success, failure); \ 322 Lock *l = lock_for_pointer(ptr); \ 323 lock(l); \ 324 if (*ptr == *expected) { \ 325 *ptr = desired; \ 326 unlock(l); \ 327 return true; \ 328 } \ 329 *expected = *ptr; \ 330 unlock(l); \ 331 return false; \ 332 } 333 OPTIMISED_CASES 334 #undef OPTIMISED_CASE 335 336 //////////////////////////////////////////////////////////////////////////////// 337 // Atomic read-modify-write operations for integers of various sizes. 338 //////////////////////////////////////////////////////////////////////////////// 339 #define ATOMIC_RMW(n, lockfree, type, opname, op) \ 340 type __atomic_fetch_##opname##_##n(type *ptr, type val, int model) { \ 341 if (lockfree(ptr)) \ 342 return __c11_atomic_fetch_##opname((_Atomic(type) *)ptr, val, model); \ 343 Lock *l = lock_for_pointer(ptr); \ 344 lock(l); \ 345 type tmp = *ptr; \ 346 *ptr = tmp op val; \ 347 unlock(l); \ 348 return tmp; \ 349 } 350 351 #define ATOMIC_RMW_NAND(n, lockfree, type) \ 352 type __atomic_fetch_nand_##n(type *ptr, type val, int model) { \ 353 if (lockfree(ptr)) \ 354 return __c11_atomic_fetch_nand((_Atomic(type) *)ptr, val, model); \ 355 Lock *l = lock_for_pointer(ptr); \ 356 lock(l); \ 357 type tmp = *ptr; \ 358 *ptr = ~(tmp & val); \ 359 unlock(l); \ 360 return tmp; \ 361 } 362 363 #define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, add, +) 364 OPTIMISED_CASES 365 #undef OPTIMISED_CASE 366 #define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, sub, -) 367 OPTIMISED_CASES 368 #undef OPTIMISED_CASE 369 #define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, and, &) 370 OPTIMISED_CASES 371 #undef OPTIMISED_CASE 372 #define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, or, |) 373 OPTIMISED_CASES 374 #undef OPTIMISED_CASE 375 #define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, xor, ^) 376 OPTIMISED_CASES 377 #undef OPTIMISED_CASE 378 // Allow build with clang without __c11_atomic_fetch_nand builtin (pre-14) 379 #if __has_builtin(__c11_atomic_fetch_nand) 380 #define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW_NAND(n, lockfree, type) 381 OPTIMISED_CASES 382 #undef OPTIMISED_CASE 383 #endif 384