1 /* SPDX-License-Identifier: GPL-2.0-only */
2
3 /*
4 * ARCv2 supports 64-bit exclusive load (LLOCKD) / store (SCONDD)
5 * - The address HAS to be 64-bit aligned
6 */
7
8 #ifndef _ASM_ARC_ATOMIC64_ARCV2_H
9 #define _ASM_ARC_ATOMIC64_ARCV2_H
10
11 typedef struct {
12 s64 __aligned(8) counter;
13 } atomic64_t;
14
15 #define ATOMIC64_INIT(a) { (a) }
16
arch_atomic64_read(const atomic64_t * v)17 static inline s64 arch_atomic64_read(const atomic64_t *v)
18 {
19 s64 val;
20
21 __asm__ __volatile__(
22 " ldd %0, [%1] \n"
23 : "=r"(val)
24 : "r"(&v->counter));
25
26 return val;
27 }
28
arch_atomic64_set(atomic64_t * v,s64 a)29 static inline void arch_atomic64_set(atomic64_t *v, s64 a)
30 {
31 /*
32 * This could have been a simple assignment in "C" but would need
33 * explicit volatile. Otherwise gcc optimizers could elide the store
34 * which borked atomic64 self-test
35 * In the inline asm version, memory clobber needed for exact same
36 * reason, to tell gcc about the store.
37 *
38 * This however is not needed for sibling atomic64_add() etc since both
39 * load/store are explicitly done in inline asm. As long as API is used
40 * for each access, gcc has no way to optimize away any load/store
41 */
42 __asm__ __volatile__(
43 " std %0, [%1] \n"
44 :
45 : "r"(a), "r"(&v->counter)
46 : "memory");
47 }
48
49 #define ATOMIC64_OP(op, op1, op2) \
50 static inline void arch_atomic64_##op(s64 a, atomic64_t *v) \
51 { \
52 s64 val; \
53 \
54 __asm__ __volatile__( \
55 "1: \n" \
56 " llockd %0, [%1] \n" \
57 " " #op1 " %L0, %L0, %L2 \n" \
58 " " #op2 " %H0, %H0, %H2 \n" \
59 " scondd %0, [%1] \n" \
60 " bnz 1b \n" \
61 : "=&r"(val) \
62 : "r"(&v->counter), "ir"(a) \
63 : "cc", "memory"); \
64 } \
65
66 #define ATOMIC64_OP_RETURN(op, op1, op2) \
67 static inline s64 arch_atomic64_##op##_return_relaxed(s64 a, atomic64_t *v) \
68 { \
69 s64 val; \
70 \
71 __asm__ __volatile__( \
72 "1: \n" \
73 " llockd %0, [%1] \n" \
74 " " #op1 " %L0, %L0, %L2 \n" \
75 " " #op2 " %H0, %H0, %H2 \n" \
76 " scondd %0, [%1] \n" \
77 " bnz 1b \n" \
78 : [val] "=&r"(val) \
79 : "r"(&v->counter), "ir"(a) \
80 : "cc", "memory"); \
81 \
82 return val; \
83 }
84
85 #define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed
86 #define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed
87
88 #define ATOMIC64_FETCH_OP(op, op1, op2) \
89 static inline s64 arch_atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v) \
90 { \
91 s64 val, orig; \
92 \
93 __asm__ __volatile__( \
94 "1: \n" \
95 " llockd %0, [%2] \n" \
96 " " #op1 " %L1, %L0, %L3 \n" \
97 " " #op2 " %H1, %H0, %H3 \n" \
98 " scondd %1, [%2] \n" \
99 " bnz 1b \n" \
100 : "=&r"(orig), "=&r"(val) \
101 : "r"(&v->counter), "ir"(a) \
102 : "cc", "memory"); \
103 \
104 return orig; \
105 }
106
107 #define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed
108 #define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed
109
110 #define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed
111 #define arch_atomic64_fetch_andnot_relaxed arch_atomic64_fetch_andnot_relaxed
112 #define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed
113 #define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed
114
115 #define ATOMIC64_OPS(op, op1, op2) \
116 ATOMIC64_OP(op, op1, op2) \
117 ATOMIC64_OP_RETURN(op, op1, op2) \
118 ATOMIC64_FETCH_OP(op, op1, op2)
119
120 ATOMIC64_OPS(add, add.f, adc)
121 ATOMIC64_OPS(sub, sub.f, sbc)
122
123 #undef ATOMIC64_OPS
124 #define ATOMIC64_OPS(op, op1, op2) \
125 ATOMIC64_OP(op, op1, op2) \
126 ATOMIC64_FETCH_OP(op, op1, op2)
127
ATOMIC64_OPS(and,and,and)128 ATOMIC64_OPS(and, and, and)
129 ATOMIC64_OPS(andnot, bic, bic)
130 ATOMIC64_OPS(or, or, or)
131 ATOMIC64_OPS(xor, xor, xor)
132
133 #define arch_atomic64_andnot arch_atomic64_andnot
134
135 #undef ATOMIC64_OPS
136 #undef ATOMIC64_FETCH_OP
137 #undef ATOMIC64_OP_RETURN
138 #undef ATOMIC64_OP
139
140 static inline s64
141 arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
142 {
143 s64 prev;
144
145 smp_mb();
146
147 __asm__ __volatile__(
148 "1: llockd %0, [%1] \n"
149 " brne %L0, %L2, 2f \n"
150 " brne %H0, %H2, 2f \n"
151 " scondd %3, [%1] \n"
152 " bnz 1b \n"
153 "2: \n"
154 : "=&r"(prev)
155 : "r"(ptr), "ir"(expected), "r"(new)
156 : "cc"); /* memory clobber comes from smp_mb() */
157
158 smp_mb();
159
160 return prev;
161 }
162 #define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
163
arch_atomic64_xchg(atomic64_t * ptr,s64 new)164 static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new)
165 {
166 s64 prev;
167
168 smp_mb();
169
170 __asm__ __volatile__(
171 "1: llockd %0, [%1] \n"
172 " scondd %2, [%1] \n"
173 " bnz 1b \n"
174 "2: \n"
175 : "=&r"(prev)
176 : "r"(ptr), "r"(new)
177 : "cc"); /* memory clobber comes from smp_mb() */
178
179 smp_mb();
180
181 return prev;
182 }
183 #define arch_atomic64_xchg arch_atomic64_xchg
184
arch_atomic64_dec_if_positive(atomic64_t * v)185 static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
186 {
187 s64 val;
188
189 smp_mb();
190
191 __asm__ __volatile__(
192 "1: llockd %0, [%1] \n"
193 " sub.f %L0, %L0, 1 # w0 - 1, set C on borrow\n"
194 " sub.c %H0, %H0, 1 # if C set, w1 - 1\n"
195 " brlt %H0, 0, 2f \n"
196 " scondd %0, [%1] \n"
197 " bnz 1b \n"
198 "2: \n"
199 : "=&r"(val)
200 : "r"(&v->counter)
201 : "cc"); /* memory clobber comes from smp_mb() */
202
203 smp_mb();
204
205 return val;
206 }
207 #define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
208
arch_atomic64_fetch_add_unless(atomic64_t * v,s64 a,s64 u)209 static inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
210 {
211 s64 old, temp;
212
213 smp_mb();
214
215 __asm__ __volatile__(
216 "1: llockd %0, [%2] \n"
217 " brne %L0, %L4, 2f # continue to add since v != u \n"
218 " breq.d %H0, %H4, 3f # return since v == u \n"
219 "2: \n"
220 " add.f %L1, %L0, %L3 \n"
221 " adc %H1, %H0, %H3 \n"
222 " scondd %1, [%2] \n"
223 " bnz 1b \n"
224 "3: \n"
225 : "=&r"(old), "=&r" (temp)
226 : "r"(&v->counter), "r"(a), "r"(u)
227 : "cc"); /* memory clobber comes from smp_mb() */
228
229 smp_mb();
230
231 return old;
232 }
233 #define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
234
235 #endif
236