1e71b7053SJung-uk Kim /* 2*b077aed3SPierre Pronchery * Copyright 2018-2022 The OpenSSL Project Authors. All Rights Reserved. 3e71b7053SJung-uk Kim * 4*b077aed3SPierre Pronchery * Licensed under the Apache License 2.0 (the "License"). You may not use 5e71b7053SJung-uk Kim * this file except in compliance with the License. You can obtain a copy 6e71b7053SJung-uk Kim * in the file LICENSE in the source distribution or at 7e71b7053SJung-uk Kim * https://www.openssl.org/source/license.html 8e71b7053SJung-uk Kim */ 9e71b7053SJung-uk Kim 10e71b7053SJung-uk Kim /* 11e71b7053SJung-uk Kim * Contemporary compilers implement lock-free atomic memory access 12e71b7053SJung-uk Kim * primitives that facilitate writing "thread-opportunistic" or even real 13e71b7053SJung-uk Kim * multi-threading low-overhead code. "Thread-opportunistic" is when 14e71b7053SJung-uk Kim * exact result is not required, e.g. some statistics, or execution flow 15e71b7053SJung-uk Kim * doesn't have to be unambiguous. Simplest example is lazy "constant" 16e71b7053SJung-uk Kim * initialization when one can synchronize on variable itself, e.g. 17e71b7053SJung-uk Kim * 18e71b7053SJung-uk Kim * if (var == NOT_YET_INITIALIZED) 19e71b7053SJung-uk Kim * var = function_returning_same_value(); 20e71b7053SJung-uk Kim * 21da327cd2SJung-uk Kim * This does work provided that loads and stores are single-instruction 22e71b7053SJung-uk Kim * operations (and integer ones are on *all* supported platforms), but 23e71b7053SJung-uk Kim * it upsets Thread Sanitizer. Suggested solution is 24e71b7053SJung-uk Kim * 25e71b7053SJung-uk Kim * if (tsan_load(&var) == NOT_YET_INITIALIZED) 26e71b7053SJung-uk Kim * tsan_store(&var, function_returning_same_value()); 27e71b7053SJung-uk Kim * 28e71b7053SJung-uk Kim * Production machine code would be the same, so one can wonder why 29e71b7053SJung-uk Kim * bother. Having Thread Sanitizer accept "thread-opportunistic" code 30e71b7053SJung-uk Kim * allows to move on trouble-shooting real bugs. 31e71b7053SJung-uk Kim * 32e71b7053SJung-uk Kim * Resolving Thread Sanitizer nits was the initial purpose for this module, 33e71b7053SJung-uk Kim * but it was later extended with more nuanced primitives that are useful 34e71b7053SJung-uk Kim * even in "non-opportunistic" scenarios. Most notably verifying if a shared 35e71b7053SJung-uk Kim * structure is fully initialized and bypassing the initialization lock. 36e71b7053SJung-uk Kim * It's suggested to view macros defined in this module as "annotations" for 37e71b7053SJung-uk Kim * thread-safe lock-free code, "Thread-Safe ANnotations"... 38e71b7053SJung-uk Kim * 39e71b7053SJung-uk Kim * It's assumed that ATOMIC_{LONG|INT}_LOCK_FREE are assigned same value as 40e71b7053SJung-uk Kim * ATOMIC_POINTER_LOCK_FREE. And check for >= 2 ensures that corresponding 41e71b7053SJung-uk Kim * code is inlined. It should be noted that statistics counters become 42e71b7053SJung-uk Kim * accurate in such case. 43e71b7053SJung-uk Kim * 44e71b7053SJung-uk Kim * Special note about TSAN_QUALIFIER. It might be undesired to use it in 45e71b7053SJung-uk Kim * a shared header. Because whether operation on specific variable or member 46e71b7053SJung-uk Kim * is atomic or not might be irrelevant in other modules. In such case one 47e71b7053SJung-uk Kim * can use TSAN_QUALIFIER in cast specifically when it has to count. 48e71b7053SJung-uk Kim */ 49e71b7053SJung-uk Kim 50e71b7053SJung-uk Kim #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L \ 51e71b7053SJung-uk Kim && !defined(__STDC_NO_ATOMICS__) 52e71b7053SJung-uk Kim # include <stdatomic.h> 53e71b7053SJung-uk Kim 54e71b7053SJung-uk Kim # if defined(ATOMIC_POINTER_LOCK_FREE) \ 55e71b7053SJung-uk Kim && ATOMIC_POINTER_LOCK_FREE >= 2 56e71b7053SJung-uk Kim # define TSAN_QUALIFIER _Atomic 57e71b7053SJung-uk Kim # define tsan_load(ptr) atomic_load_explicit((ptr), memory_order_relaxed) 58e71b7053SJung-uk Kim # define tsan_store(ptr, val) atomic_store_explicit((ptr), (val), memory_order_relaxed) 59e71b7053SJung-uk Kim # define tsan_counter(ptr) atomic_fetch_add_explicit((ptr), 1, memory_order_relaxed) 60c9cf7b5cSJung-uk Kim # define tsan_decr(ptr) atomic_fetch_add_explicit((ptr), -1, memory_order_relaxed) 61e71b7053SJung-uk Kim # define tsan_ld_acq(ptr) atomic_load_explicit((ptr), memory_order_acquire) 62e71b7053SJung-uk Kim # define tsan_st_rel(ptr, val) atomic_store_explicit((ptr), (val), memory_order_release) 63e71b7053SJung-uk Kim # endif 64e71b7053SJung-uk Kim 65e71b7053SJung-uk Kim #elif defined(__GNUC__) && defined(__ATOMIC_RELAXED) 66e71b7053SJung-uk Kim 67e71b7053SJung-uk Kim # if defined(__GCC_ATOMIC_POINTER_LOCK_FREE) \ 68e71b7053SJung-uk Kim && __GCC_ATOMIC_POINTER_LOCK_FREE >= 2 69e71b7053SJung-uk Kim # define TSAN_QUALIFIER volatile 70e71b7053SJung-uk Kim # define tsan_load(ptr) __atomic_load_n((ptr), __ATOMIC_RELAXED) 71e71b7053SJung-uk Kim # define tsan_store(ptr, val) __atomic_store_n((ptr), (val), __ATOMIC_RELAXED) 72e71b7053SJung-uk Kim # define tsan_counter(ptr) __atomic_fetch_add((ptr), 1, __ATOMIC_RELAXED) 73c9cf7b5cSJung-uk Kim # define tsan_decr(ptr) __atomic_fetch_add((ptr), -1, __ATOMIC_RELAXED) 74e71b7053SJung-uk Kim # define tsan_ld_acq(ptr) __atomic_load_n((ptr), __ATOMIC_ACQUIRE) 75e71b7053SJung-uk Kim # define tsan_st_rel(ptr, val) __atomic_store_n((ptr), (val), __ATOMIC_RELEASE) 76e71b7053SJung-uk Kim # endif 77e71b7053SJung-uk Kim 78e71b7053SJung-uk Kim #elif defined(_MSC_VER) && _MSC_VER>=1200 \ 79e71b7053SJung-uk Kim && (defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || \ 80610a21fdSJung-uk Kim defined(_M_ARM64) || (defined(_M_ARM) && _M_ARM >= 7 && !defined(_WIN32_WCE))) 81e71b7053SJung-uk Kim /* 82e71b7053SJung-uk Kim * There is subtle dependency on /volatile:<iso|ms> command-line option. 83e71b7053SJung-uk Kim * "ms" implies same semantic as memory_order_acquire for loads and 84e71b7053SJung-uk Kim * memory_order_release for stores, while "iso" - memory_order_relaxed for 85e71b7053SJung-uk Kim * either. Real complication is that defaults are different on x86 and ARM. 86e71b7053SJung-uk Kim * There is explanation for that, "ms" is backward compatible with earlier 87e71b7053SJung-uk Kim * compiler versions, while multi-processor ARM can be viewed as brand new 88e71b7053SJung-uk Kim * platform to MSC and its users, and with non-relaxed semantic taking toll 89e71b7053SJung-uk Kim * with additional instructions and penalties, it kind of makes sense to 90e71b7053SJung-uk Kim * default to "iso"... 91e71b7053SJung-uk Kim */ 92e71b7053SJung-uk Kim # define TSAN_QUALIFIER volatile 93e71b7053SJung-uk Kim # if defined(_M_ARM) || defined(_M_ARM64) 94e71b7053SJung-uk Kim # define _InterlockedExchangeAdd _InterlockedExchangeAdd_nf 95e71b7053SJung-uk Kim # pragma intrinsic(_InterlockedExchangeAdd_nf) 96e71b7053SJung-uk Kim # pragma intrinsic(__iso_volatile_load32, __iso_volatile_store32) 97e71b7053SJung-uk Kim # ifdef _WIN64 98e71b7053SJung-uk Kim # define _InterlockedExchangeAdd64 _InterlockedExchangeAdd64_nf 99e71b7053SJung-uk Kim # pragma intrinsic(_InterlockedExchangeAdd64_nf) 100e71b7053SJung-uk Kim # pragma intrinsic(__iso_volatile_load64, __iso_volatile_store64) 101e71b7053SJung-uk Kim # define tsan_load(ptr) (sizeof(*(ptr)) == 8 ? __iso_volatile_load64(ptr) \ 102e71b7053SJung-uk Kim : __iso_volatile_load32(ptr)) 103e71b7053SJung-uk Kim # define tsan_store(ptr, val) (sizeof(*(ptr)) == 8 ? __iso_volatile_store64((ptr), (val)) \ 104e71b7053SJung-uk Kim : __iso_volatile_store32((ptr), (val))) 105e71b7053SJung-uk Kim # else 106e71b7053SJung-uk Kim # define tsan_load(ptr) __iso_volatile_load32(ptr) 107e71b7053SJung-uk Kim # define tsan_store(ptr, val) __iso_volatile_store32((ptr), (val)) 108e71b7053SJung-uk Kim # endif 109e71b7053SJung-uk Kim # else 110e71b7053SJung-uk Kim # define tsan_load(ptr) (*(ptr)) 111e71b7053SJung-uk Kim # define tsan_store(ptr, val) (*(ptr) = (val)) 112e71b7053SJung-uk Kim # endif 113e71b7053SJung-uk Kim # pragma intrinsic(_InterlockedExchangeAdd) 114e71b7053SJung-uk Kim # ifdef _WIN64 115e71b7053SJung-uk Kim # pragma intrinsic(_InterlockedExchangeAdd64) 116e71b7053SJung-uk Kim # define tsan_counter(ptr) (sizeof(*(ptr)) == 8 ? _InterlockedExchangeAdd64((ptr), 1) \ 117e71b7053SJung-uk Kim : _InterlockedExchangeAdd((ptr), 1)) 118c9cf7b5cSJung-uk Kim # define tsan_decr(ptr) (sizeof(*(ptr)) == 8 ? _InterlockedExchangeAdd64((ptr), -1) \ 119c9cf7b5cSJung-uk Kim : _InterlockedExchangeAdd((ptr), -1)) 120e71b7053SJung-uk Kim # else 121e71b7053SJung-uk Kim # define tsan_counter(ptr) _InterlockedExchangeAdd((ptr), 1) 122c9cf7b5cSJung-uk Kim # define tsan_decr(ptr) _InterlockedExchangeAdd((ptr), -1) 123e71b7053SJung-uk Kim # endif 124e71b7053SJung-uk Kim # if !defined(_ISO_VOLATILE) 125e71b7053SJung-uk Kim # define tsan_ld_acq(ptr) (*(ptr)) 126e71b7053SJung-uk Kim # define tsan_st_rel(ptr, val) (*(ptr) = (val)) 127e71b7053SJung-uk Kim # endif 128e71b7053SJung-uk Kim 129e71b7053SJung-uk Kim #endif 130e71b7053SJung-uk Kim 131e71b7053SJung-uk Kim #ifndef TSAN_QUALIFIER 132e71b7053SJung-uk Kim 133*b077aed3SPierre Pronchery # ifdef OPENSSL_THREADS 134e71b7053SJung-uk Kim # define TSAN_QUALIFIER volatile 135*b077aed3SPierre Pronchery # define TSAN_REQUIRES_LOCKING 136*b077aed3SPierre Pronchery # else /* OPENSSL_THREADS */ 137*b077aed3SPierre Pronchery # define TSAN_QUALIFIER 138*b077aed3SPierre Pronchery # endif /* OPENSSL_THREADS */ 139*b077aed3SPierre Pronchery 140e71b7053SJung-uk Kim # define tsan_load(ptr) (*(ptr)) 141e71b7053SJung-uk Kim # define tsan_store(ptr, val) (*(ptr) = (val)) 142e71b7053SJung-uk Kim # define tsan_counter(ptr) ((*(ptr))++) 143c9cf7b5cSJung-uk Kim # define tsan_decr(ptr) ((*(ptr))--) 144e71b7053SJung-uk Kim /* 145e71b7053SJung-uk Kim * Lack of tsan_ld_acq and tsan_ld_rel means that compiler support is not 146e71b7053SJung-uk Kim * sophisticated enough to support them. Code that relies on them should be 147e71b7053SJung-uk Kim * protected with #ifdef tsan_ld_acq with locked fallback. 148e71b7053SJung-uk Kim */ 149e71b7053SJung-uk Kim 150e71b7053SJung-uk Kim #endif 151