1 /* SPDX-License-Identifier: GPL-2.0 */
2
3 #ifndef _LINUX_SIX_H
4 #define _LINUX_SIX_H
5
6 /**
7 * DOC: SIX locks overview
8 *
9 * Shared/intent/exclusive locks: sleepable read/write locks, like rw semaphores
10 * but with an additional state: read/shared, intent, exclusive/write
11 *
12 * The purpose of the intent state is to allow for greater concurrency on tree
13 * structures without deadlocking. In general, a read can't be upgraded to a
14 * write lock without deadlocking, so an operation that updates multiple nodes
15 * will have to take write locks for the full duration of the operation.
16 *
17 * But by adding an intent state, which is exclusive with other intent locks but
18 * not with readers, we can take intent locks at the start of the operation,
19 * and then take write locks only for the actual update to each individual
20 * nodes, without deadlocking.
21 *
22 * Example usage:
23 * six_lock_read(&foo->lock);
24 * six_unlock_read(&foo->lock);
25 *
26 * An intent lock must be held before taking a write lock:
27 * six_lock_intent(&foo->lock);
28 * six_lock_write(&foo->lock);
29 * six_unlock_write(&foo->lock);
30 * six_unlock_intent(&foo->lock);
31 *
32 * Other operations:
33 * six_trylock_read()
34 * six_trylock_intent()
35 * six_trylock_write()
36 *
37 * six_lock_downgrade() convert from intent to read
38 * six_lock_tryupgrade() attempt to convert from read to intent, may fail
39 *
40 * There are also interfaces that take the lock type as an enum:
41 *
42 * six_lock_type(&foo->lock, SIX_LOCK_read);
43 * six_trylock_convert(&foo->lock, SIX_LOCK_read, SIX_LOCK_intent)
44 * six_lock_type(&foo->lock, SIX_LOCK_write);
45 * six_unlock_type(&foo->lock, SIX_LOCK_write);
46 * six_unlock_type(&foo->lock, SIX_LOCK_intent);
47 *
48 * Lock sequence numbers - unlock(), relock():
49 *
50 * Locks embed sequences numbers, which are incremented on write lock/unlock.
51 * This allows locks to be dropped and the retaken iff the state they protect
52 * hasn't changed; this makes it much easier to avoid holding locks while e.g.
53 * doing IO or allocating memory.
54 *
55 * Example usage:
56 * six_lock_read(&foo->lock);
57 * u32 seq = six_lock_seq(&foo->lock);
58 * six_unlock_read(&foo->lock);
59 *
60 * some_operation_that_may_block();
61 *
62 * if (six_relock_read(&foo->lock, seq)) { ... }
63 *
64 * If the relock operation succeeds, it is as if the lock was never unlocked.
65 *
66 * Reentrancy:
67 *
68 * Six locks are not by themselves reentrant, but have counters for both the
69 * read and intent states that can be used to provide reentrancy by an upper
70 * layer that tracks held locks. If a lock is known to already be held in the
71 * read or intent state, six_lock_increment() can be used to bump the "lock
72 * held in this state" counter, increasing the number of unlock calls that
73 * will be required to fully unlock it.
74 *
75 * Example usage:
76 * six_lock_read(&foo->lock);
77 * six_lock_increment(&foo->lock, SIX_LOCK_read);
78 * six_unlock_read(&foo->lock);
79 * six_unlock_read(&foo->lock);
80 * foo->lock is now fully unlocked.
81 *
82 * Since the intent state supercedes read, it's legal to increment the read
83 * counter when holding an intent lock, but not the reverse.
84 *
85 * A lock may only be held once for write: six_lock_increment(.., SIX_LOCK_write)
86 * is not legal.
87 *
88 * should_sleep_fn:
89 *
90 * There is a six_lock() variant that takes a function pointer that is called
91 * immediately prior to schedule() when blocking, and may return an error to
92 * abort.
93 *
94 * One possible use for this feature is when objects being locked are part of
95 * a cache and may reused, and lock ordering is based on a property of the
96 * object that will change when the object is reused - i.e. logical key order.
97 *
98 * If looking up an object in the cache may race with object reuse, and lock
99 * ordering is required to prevent deadlock, object reuse may change the
100 * correct lock order for that object and cause a deadlock. should_sleep_fn
101 * can be used to check if the object is still the object we want and avoid
102 * this deadlock.
103 *
104 * Wait list entry interface:
105 *
106 * There is a six_lock() variant, six_lock_waiter(), that takes a pointer to a
107 * wait list entry. By embedding six_lock_waiter into another object, and by
108 * traversing lock waitlists, it is then possible for an upper layer to
109 * implement full cycle detection for deadlock avoidance.
110 *
111 * should_sleep_fn should be used for invoking the cycle detector, walking the
112 * graph of held locks to check for a deadlock. The upper layer must track
113 * held locks for each thread, and each thread's held locks must be reachable
114 * from its six_lock_waiter object.
115 *
116 * six_lock_waiter() will add the wait object to the waitlist re-trying taking
117 * the lock, and before calling should_sleep_fn, and the wait object will not
118 * be removed from the waitlist until either the lock has been successfully
119 * acquired, or we aborted because should_sleep_fn returned an error.
120 *
121 * Also, six_lock_waiter contains a timestamp, and waiters on a waitlist will
122 * have timestamps in strictly ascending order - this is so the timestamp can
123 * be used as a cursor for lock graph traverse.
124 */
125
126 #include <linux/lockdep.h>
127 #include <linux/sched.h>
128 #include <linux/types.h>
129
130 enum six_lock_type {
131 SIX_LOCK_read,
132 SIX_LOCK_intent,
133 SIX_LOCK_write,
134 };
135
136 struct six_lock {
137 atomic_t state;
138 u32 seq;
139 unsigned intent_lock_recurse;
140 unsigned write_lock_recurse;
141 struct task_struct *owner;
142 unsigned __percpu *readers;
143 raw_spinlock_t wait_lock;
144 struct list_head wait_list;
145 #ifdef CONFIG_DEBUG_LOCK_ALLOC
146 struct lockdep_map dep_map;
147 #endif
148 };
149
150 struct six_lock_waiter {
151 struct list_head list;
152 struct task_struct *task;
153 enum six_lock_type lock_want;
154 bool lock_acquired;
155 u64 start_time;
156 };
157
158 typedef int (*six_lock_should_sleep_fn)(struct six_lock *lock, void *);
159
160 void six_lock_exit(struct six_lock *lock);
161
162 enum six_lock_init_flags {
163 SIX_LOCK_INIT_PCPU = 1U << 0,
164 };
165
166 void __six_lock_init(struct six_lock *lock, const char *name,
167 struct lock_class_key *key, enum six_lock_init_flags flags,
168 gfp_t gfp);
169
170 /**
171 * six_lock_init - initialize a six lock
172 * @lock: lock to initialize
173 * @flags: optional flags, i.e. SIX_LOCK_INIT_PCPU
174 */
175 #define six_lock_init(lock, flags, gfp) \
176 do { \
177 static struct lock_class_key __key; \
178 \
179 __six_lock_init((lock), #lock, &__key, flags, gfp); \
180 } while (0)
181
182 /**
183 * six_lock_seq - obtain current lock sequence number
184 * @lock: six_lock to obtain sequence number for
185 *
186 * @lock should be held for read or intent, and not write
187 *
188 * By saving the lock sequence number, we can unlock @lock and then (typically
189 * after some blocking operation) attempt to relock it: the relock will succeed
190 * if the sequence number hasn't changed, meaning no write locks have been taken
191 * and state corresponding to what @lock protects is still valid.
192 */
six_lock_seq(const struct six_lock * lock)193 static inline u32 six_lock_seq(const struct six_lock *lock)
194 {
195 return lock->seq;
196 }
197
198 bool six_trylock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip);
199
200 /**
201 * six_trylock_type - attempt to take a six lock without blocking
202 * @lock: lock to take
203 * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
204 *
205 * Return: true on success, false on failure.
206 */
six_trylock_type(struct six_lock * lock,enum six_lock_type type)207 static inline bool six_trylock_type(struct six_lock *lock, enum six_lock_type type)
208 {
209 return six_trylock_ip(lock, type, _THIS_IP_);
210 }
211
212 int six_lock_ip_waiter(struct six_lock *lock, enum six_lock_type type,
213 struct six_lock_waiter *wait,
214 six_lock_should_sleep_fn should_sleep_fn, void *p,
215 unsigned long ip);
216
217 /**
218 * six_lock_waiter - take a lock, with full waitlist interface
219 * @lock: lock to take
220 * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
221 * @wait: pointer to wait object, which will be added to lock's waitlist
222 * @should_sleep_fn: callback run after adding to waitlist, immediately prior
223 * to scheduling
224 * @p: passed through to @should_sleep_fn
225 *
226 * This is a convenience wrapper around six_lock_ip_waiter(), see that function
227 * for full documentation.
228 *
229 * Return: 0 on success, or the return code from @should_sleep_fn on failure.
230 */
six_lock_waiter(struct six_lock * lock,enum six_lock_type type,struct six_lock_waiter * wait,six_lock_should_sleep_fn should_sleep_fn,void * p)231 static inline int six_lock_waiter(struct six_lock *lock, enum six_lock_type type,
232 struct six_lock_waiter *wait,
233 six_lock_should_sleep_fn should_sleep_fn, void *p)
234 {
235 return six_lock_ip_waiter(lock, type, wait, should_sleep_fn, p, _THIS_IP_);
236 }
237
238 /**
239 * six_lock_ip - take a six lock lock
240 * @lock: lock to take
241 * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
242 * @should_sleep_fn: callback run after adding to waitlist, immediately prior
243 * to scheduling
244 * @p: passed through to @should_sleep_fn
245 * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_
246 *
247 * Return: 0 on success, or the return code from @should_sleep_fn on failure.
248 */
six_lock_ip(struct six_lock * lock,enum six_lock_type type,six_lock_should_sleep_fn should_sleep_fn,void * p,unsigned long ip)249 static inline int six_lock_ip(struct six_lock *lock, enum six_lock_type type,
250 six_lock_should_sleep_fn should_sleep_fn, void *p,
251 unsigned long ip)
252 {
253 struct six_lock_waiter wait;
254
255 return six_lock_ip_waiter(lock, type, &wait, should_sleep_fn, p, ip);
256 }
257
258 /**
259 * six_lock_type - take a six lock lock
260 * @lock: lock to take
261 * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
262 * @should_sleep_fn: callback run after adding to waitlist, immediately prior
263 * to scheduling
264 * @p: passed through to @should_sleep_fn
265 *
266 * Return: 0 on success, or the return code from @should_sleep_fn on failure.
267 */
six_lock_type(struct six_lock * lock,enum six_lock_type type,six_lock_should_sleep_fn should_sleep_fn,void * p)268 static inline int six_lock_type(struct six_lock *lock, enum six_lock_type type,
269 six_lock_should_sleep_fn should_sleep_fn, void *p)
270 {
271 struct six_lock_waiter wait;
272
273 return six_lock_ip_waiter(lock, type, &wait, should_sleep_fn, p, _THIS_IP_);
274 }
275
276 bool six_relock_ip(struct six_lock *lock, enum six_lock_type type,
277 unsigned seq, unsigned long ip);
278
279 /**
280 * six_relock_type - attempt to re-take a lock that was held previously
281 * @lock: lock to take
282 * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
283 * @seq: lock sequence number obtained from six_lock_seq() while lock was
284 * held previously
285 *
286 * Return: true on success, false on failure.
287 */
six_relock_type(struct six_lock * lock,enum six_lock_type type,unsigned seq)288 static inline bool six_relock_type(struct six_lock *lock, enum six_lock_type type,
289 unsigned seq)
290 {
291 return six_relock_ip(lock, type, seq, _THIS_IP_);
292 }
293
294 void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip);
295
296 /**
297 * six_unlock_type - drop a six lock
298 * @lock: lock to unlock
299 * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
300 *
301 * When a lock is held multiple times (because six_lock_incement()) was used),
302 * this decrements the 'lock held' counter by one.
303 *
304 * For example:
305 * six_lock_read(&foo->lock); read count 1
306 * six_lock_increment(&foo->lock, SIX_LOCK_read); read count 2
307 * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 1
308 * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 0
309 */
six_unlock_type(struct six_lock * lock,enum six_lock_type type)310 static inline void six_unlock_type(struct six_lock *lock, enum six_lock_type type)
311 {
312 six_unlock_ip(lock, type, _THIS_IP_);
313 }
314
315 #define __SIX_LOCK(type) \
316 static inline bool six_trylock_ip_##type(struct six_lock *lock, unsigned long ip)\
317 { \
318 return six_trylock_ip(lock, SIX_LOCK_##type, ip); \
319 } \
320 \
321 static inline bool six_trylock_##type(struct six_lock *lock) \
322 { \
323 return six_trylock_ip(lock, SIX_LOCK_##type, _THIS_IP_); \
324 } \
325 \
326 static inline int six_lock_ip_waiter_##type(struct six_lock *lock, \
327 struct six_lock_waiter *wait, \
328 six_lock_should_sleep_fn should_sleep_fn, void *p,\
329 unsigned long ip) \
330 { \
331 return six_lock_ip_waiter(lock, SIX_LOCK_##type, wait, should_sleep_fn, p, ip);\
332 } \
333 \
334 static inline int six_lock_ip_##type(struct six_lock *lock, \
335 six_lock_should_sleep_fn should_sleep_fn, void *p, \
336 unsigned long ip) \
337 { \
338 return six_lock_ip(lock, SIX_LOCK_##type, should_sleep_fn, p, ip);\
339 } \
340 \
341 static inline bool six_relock_ip_##type(struct six_lock *lock, u32 seq, unsigned long ip)\
342 { \
343 return six_relock_ip(lock, SIX_LOCK_##type, seq, ip); \
344 } \
345 \
346 static inline bool six_relock_##type(struct six_lock *lock, u32 seq) \
347 { \
348 return six_relock_ip(lock, SIX_LOCK_##type, seq, _THIS_IP_); \
349 } \
350 \
351 static inline int six_lock_##type(struct six_lock *lock, \
352 six_lock_should_sleep_fn fn, void *p)\
353 { \
354 return six_lock_ip_##type(lock, fn, p, _THIS_IP_); \
355 } \
356 \
357 static inline void six_unlock_ip_##type(struct six_lock *lock, unsigned long ip) \
358 { \
359 six_unlock_ip(lock, SIX_LOCK_##type, ip); \
360 } \
361 \
362 static inline void six_unlock_##type(struct six_lock *lock) \
363 { \
364 six_unlock_ip(lock, SIX_LOCK_##type, _THIS_IP_); \
365 }
366
367 __SIX_LOCK(read)
368 __SIX_LOCK(intent)
369 __SIX_LOCK(write)
370 #undef __SIX_LOCK
371
372 void six_lock_downgrade(struct six_lock *);
373 bool six_lock_tryupgrade(struct six_lock *);
374 bool six_trylock_convert(struct six_lock *, enum six_lock_type,
375 enum six_lock_type);
376
377 void six_lock_increment(struct six_lock *, enum six_lock_type);
378
379 void six_lock_wakeup_all(struct six_lock *);
380
381 struct six_lock_count {
382 unsigned n[3];
383 };
384
385 struct six_lock_count six_lock_counts(struct six_lock *);
386 void six_lock_readers_add(struct six_lock *, int);
387
388 #endif /* _LINUX_SIX_H */
389