1 #ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
2 #define JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
3
4 #define ATOMIC_INIT(...) {__VA_ARGS__}
5
6 typedef enum {
7 atomic_memory_order_relaxed,
8 atomic_memory_order_acquire,
9 atomic_memory_order_release,
10 atomic_memory_order_acq_rel,
11 atomic_memory_order_seq_cst
12 } atomic_memory_order_t;
13
14 ATOMIC_INLINE void
atomic_fence(atomic_memory_order_t mo)15 atomic_fence(atomic_memory_order_t mo) {
16 /* Easy cases first: no barrier, and full barrier. */
17 if (mo == atomic_memory_order_relaxed) {
18 asm volatile("" ::: "memory");
19 return;
20 }
21 if (mo == atomic_memory_order_seq_cst) {
22 asm volatile("" ::: "memory");
23 __sync_synchronize();
24 asm volatile("" ::: "memory");
25 return;
26 }
27 asm volatile("" ::: "memory");
28 # if defined(__i386__) || defined(__x86_64__)
29 /* This is implicit on x86. */
30 # elif defined(__ppc64__)
31 asm volatile("lwsync");
32 # elif defined(__ppc__)
33 asm volatile("sync");
34 # elif defined(__sparc__) && defined(__arch64__)
35 if (mo == atomic_memory_order_acquire) {
36 asm volatile("membar #LoadLoad | #LoadStore");
37 } else if (mo == atomic_memory_order_release) {
38 asm volatile("membar #LoadStore | #StoreStore");
39 } else {
40 asm volatile("membar #LoadLoad | #LoadStore | #StoreStore");
41 }
42 # else
43 __sync_synchronize();
44 # endif
45 asm volatile("" ::: "memory");
46 }
47
48 /*
49 * A correct implementation of seq_cst loads and stores on weakly ordered
50 * architectures could do either of the following:
51 * 1. store() is weak-fence -> store -> strong fence, load() is load ->
52 * strong-fence.
53 * 2. store() is strong-fence -> store, load() is strong-fence -> load ->
54 * weak-fence.
55 * The tricky thing is, load() and store() above can be the load or store
56 * portions of a gcc __sync builtin, so we have to follow GCC's lead, which
57 * means going with strategy 2.
58 * On strongly ordered architectures, the natural strategy is to stick a strong
59 * fence after seq_cst stores, and have naked loads. So we want the strong
60 * fences in different places on different architectures.
61 * atomic_pre_sc_load_fence and atomic_post_sc_store_fence allow us to
62 * accomplish this.
63 */
64
65 ATOMIC_INLINE void
atomic_pre_sc_load_fence()66 atomic_pre_sc_load_fence() {
67 # if defined(__i386__) || defined(__x86_64__) || \
68 (defined(__sparc__) && defined(__arch64__))
69 atomic_fence(atomic_memory_order_relaxed);
70 # else
71 atomic_fence(atomic_memory_order_seq_cst);
72 # endif
73 }
74
75 ATOMIC_INLINE void
atomic_post_sc_store_fence()76 atomic_post_sc_store_fence() {
77 # if defined(__i386__) || defined(__x86_64__) || \
78 (defined(__sparc__) && defined(__arch64__))
79 atomic_fence(atomic_memory_order_seq_cst);
80 # else
81 atomic_fence(atomic_memory_order_relaxed);
82 # endif
83
84 }
85
86 #define JEMALLOC_GENERATE_ATOMICS(type, short_type, \
87 /* unused */ lg_size) \
88 typedef struct { \
89 type volatile repr; \
90 } atomic_##short_type##_t; \
91 \
92 ATOMIC_INLINE type \
93 atomic_load_##short_type(const atomic_##short_type##_t *a, \
94 atomic_memory_order_t mo) { \
95 if (mo == atomic_memory_order_seq_cst) { \
96 atomic_pre_sc_load_fence(); \
97 } \
98 type result = a->repr; \
99 if (mo != atomic_memory_order_relaxed) { \
100 atomic_fence(atomic_memory_order_acquire); \
101 } \
102 return result; \
103 } \
104 \
105 ATOMIC_INLINE void \
106 atomic_store_##short_type(atomic_##short_type##_t *a, \
107 type val, atomic_memory_order_t mo) { \
108 if (mo != atomic_memory_order_relaxed) { \
109 atomic_fence(atomic_memory_order_release); \
110 } \
111 a->repr = val; \
112 if (mo == atomic_memory_order_seq_cst) { \
113 atomic_post_sc_store_fence(); \
114 } \
115 } \
116 \
117 ATOMIC_INLINE type \
118 atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \
119 atomic_memory_order_t mo) { \
120 /* \
121 * Because of FreeBSD, we care about gcc 4.2, which doesn't have\
122 * an atomic exchange builtin. We fake it with a CAS loop. \
123 */ \
124 while (true) { \
125 type old = a->repr; \
126 if (__sync_bool_compare_and_swap(&a->repr, old, val)) { \
127 return old; \
128 } \
129 } \
130 } \
131 \
132 ATOMIC_INLINE bool \
133 atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \
134 type *expected, type desired, \
135 atomic_memory_order_t success_mo, \
136 atomic_memory_order_t failure_mo) { \
137 type prev = __sync_val_compare_and_swap(&a->repr, *expected, \
138 desired); \
139 if (prev == *expected) { \
140 return true; \
141 } else { \
142 *expected = prev; \
143 return false; \
144 } \
145 } \
146 ATOMIC_INLINE bool \
147 atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \
148 type *expected, type desired, \
149 atomic_memory_order_t success_mo, \
150 atomic_memory_order_t failure_mo) { \
151 type prev = __sync_val_compare_and_swap(&a->repr, *expected, \
152 desired); \
153 if (prev == *expected) { \
154 return true; \
155 } else { \
156 *expected = prev; \
157 return false; \
158 } \
159 }
160
161 #define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, \
162 /* unused */ lg_size) \
163 JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size) \
164 \
165 ATOMIC_INLINE type \
166 atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val, \
167 atomic_memory_order_t mo) { \
168 return __sync_fetch_and_add(&a->repr, val); \
169 } \
170 \
171 ATOMIC_INLINE type \
172 atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val, \
173 atomic_memory_order_t mo) { \
174 return __sync_fetch_and_sub(&a->repr, val); \
175 } \
176 \
177 ATOMIC_INLINE type \
178 atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val, \
179 atomic_memory_order_t mo) { \
180 return __sync_fetch_and_and(&a->repr, val); \
181 } \
182 \
183 ATOMIC_INLINE type \
184 atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val, \
185 atomic_memory_order_t mo) { \
186 return __sync_fetch_and_or(&a->repr, val); \
187 } \
188 \
189 ATOMIC_INLINE type \
190 atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val, \
191 atomic_memory_order_t mo) { \
192 return __sync_fetch_and_xor(&a->repr, val); \
193 }
194
195 #endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H */
196