xref: /linux/fs/bcachefs/six.h (revision dd83757f6e686a2188997cb58b5975f744bb7786)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 
3 #ifndef _LINUX_SIX_H
4 #define _LINUX_SIX_H
5 
6 /**
7  * DOC: SIX locks overview
8  *
9  * Shared/intent/exclusive locks: sleepable read/write locks, like rw semaphores
10  * but with an additional state: read/shared, intent, exclusive/write
11  *
12  * The purpose of the intent state is to allow for greater concurrency on tree
13  * structures without deadlocking. In general, a read can't be upgraded to a
14  * write lock without deadlocking, so an operation that updates multiple nodes
15  * will have to take write locks for the full duration of the operation.
16  *
17  * But by adding an intent state, which is exclusive with other intent locks but
18  * not with readers, we can take intent locks at the start of the operation,
19  * and then take write locks only for the actual update to each individual
20  * nodes, without deadlocking.
21  *
22  * Example usage:
23  *   six_lock_read(&foo->lock);
24  *   six_unlock_read(&foo->lock);
25  *
26  * An intent lock must be held before taking a write lock:
27  *   six_lock_intent(&foo->lock);
28  *   six_lock_write(&foo->lock);
29  *   six_unlock_write(&foo->lock);
30  *   six_unlock_intent(&foo->lock);
31  *
32  * Other operations:
33  *   six_trylock_read()
34  *   six_trylock_intent()
35  *   six_trylock_write()
36  *
37  *   six_lock_downgrade()	convert from intent to read
38  *   six_lock_tryupgrade()	attempt to convert from read to intent, may fail
39  *
40  * There are also interfaces that take the lock type as an enum:
41  *
42  *   six_lock_type(&foo->lock, SIX_LOCK_read);
43  *   six_trylock_convert(&foo->lock, SIX_LOCK_read, SIX_LOCK_intent)
44  *   six_lock_type(&foo->lock, SIX_LOCK_write);
45  *   six_unlock_type(&foo->lock, SIX_LOCK_write);
46  *   six_unlock_type(&foo->lock, SIX_LOCK_intent);
47  *
48  * Lock sequence numbers - unlock(), relock():
49  *
50  *   Locks embed sequences numbers, which are incremented on write lock/unlock.
51  *   This allows locks to be dropped and the retaken iff the state they protect
52  *   hasn't changed; this makes it much easier to avoid holding locks while e.g.
53  *   doing IO or allocating memory.
54  *
55  *   Example usage:
56  *     six_lock_read(&foo->lock);
57  *     u32 seq = six_lock_seq(&foo->lock);
58  *     six_unlock_read(&foo->lock);
59  *
60  *     some_operation_that_may_block();
61  *
62  *     if (six_relock_read(&foo->lock, seq)) { ... }
63  *
64  *   If the relock operation succeeds, it is as if the lock was never unlocked.
65  *
66  * Reentrancy:
67  *
68  *   Six locks are not by themselves reentrant, but have counters for both the
69  *   read and intent states that can be used to provide reentrancy by an upper
70  *   layer that tracks held locks. If a lock is known to already be held in the
71  *   read or intent state, six_lock_increment() can be used to bump the "lock
72  *   held in this state" counter, increasing the number of unlock calls that
73  *   will be required to fully unlock it.
74  *
75  *   Example usage:
76  *     six_lock_read(&foo->lock);
77  *     six_lock_increment(&foo->lock, SIX_LOCK_read);
78  *     six_unlock_read(&foo->lock);
79  *     six_unlock_read(&foo->lock);
80  *   foo->lock is now fully unlocked.
81  *
82  *   Since the intent state supercedes read, it's legal to increment the read
83  *   counter when holding an intent lock, but not the reverse.
84  *
85  *   A lock may only be held once for write: six_lock_increment(.., SIX_LOCK_write)
86  *   is not legal.
87  *
88  * should_sleep_fn:
89  *
90  *   There is a six_lock() variant that takes a function pointer that is called
91  *   immediately prior to schedule() when blocking, and may return an error to
92  *   abort.
93  *
94  *   One possible use for this feature is when objects being locked are part of
95  *   a cache and may reused, and lock ordering is based on a property of the
96  *   object that will change when the object is reused - i.e. logical key order.
97  *
98  *   If looking up an object in the cache may race with object reuse, and lock
99  *   ordering is required to prevent deadlock, object reuse may change the
100  *   correct lock order for that object and cause a deadlock. should_sleep_fn
101  *   can be used to check if the object is still the object we want and avoid
102  *   this deadlock.
103  *
104  * Wait list entry interface:
105  *
106  *   There is a six_lock() variant, six_lock_waiter(), that takes a pointer to a
107  *   wait list entry. By embedding six_lock_waiter into another object, and by
108  *   traversing lock waitlists, it is then possible for an upper layer to
109  *   implement full cycle detection for deadlock avoidance.
110  *
111  *   should_sleep_fn should be used for invoking the cycle detector, walking the
112  *   graph of held locks to check for a deadlock. The upper layer must track
113  *   held locks for each thread, and each thread's held locks must be reachable
114  *   from its six_lock_waiter object.
115  *
116  *   six_lock_waiter() will add the wait object to the waitlist re-trying taking
117  *   the lock, and before calling should_sleep_fn, and the wait object will not
118  *   be removed from the waitlist until either the lock has been successfully
119  *   acquired, or we aborted because should_sleep_fn returned an error.
120  *
121  *   Also, six_lock_waiter contains a timestamp, and waiters on a waitlist will
122  *   have timestamps in strictly ascending order - this is so the timestamp can
123  *   be used as a cursor for lock graph traverse.
124  */
125 
126 #include <linux/lockdep.h>
127 #include <linux/sched.h>
128 #include <linux/types.h>
129 
130 enum six_lock_type {
131 	SIX_LOCK_read,
132 	SIX_LOCK_intent,
133 	SIX_LOCK_write,
134 };
135 
136 struct six_lock {
137 	atomic_t		state;
138 	u32			seq;
139 	unsigned		intent_lock_recurse;
140 	unsigned		write_lock_recurse;
141 	struct task_struct	*owner;
142 	unsigned __percpu	*readers;
143 	raw_spinlock_t		wait_lock;
144 	struct list_head	wait_list;
145 #ifdef CONFIG_DEBUG_LOCK_ALLOC
146 	struct lockdep_map	dep_map;
147 #endif
148 };
149 
150 struct six_lock_waiter {
151 	struct list_head	list;
152 	struct task_struct	*task;
153 	enum six_lock_type	lock_want;
154 	bool			lock_acquired;
155 	u64			start_time;
156 };
157 
158 typedef int (*six_lock_should_sleep_fn)(struct six_lock *lock, void *);
159 
160 void six_lock_exit(struct six_lock *lock);
161 
162 enum six_lock_init_flags {
163 	SIX_LOCK_INIT_PCPU	= 1U << 0,
164 };
165 
166 void __six_lock_init(struct six_lock *lock, const char *name,
167 		     struct lock_class_key *key, enum six_lock_init_flags flags,
168 		     gfp_t gfp);
169 
170 /**
171  * six_lock_init - initialize a six lock
172  * @lock:	lock to initialize
173  * @flags:	optional flags, i.e. SIX_LOCK_INIT_PCPU
174  */
175 #define six_lock_init(lock, flags, gfp)					\
176 do {									\
177 	static struct lock_class_key __key;				\
178 									\
179 	__six_lock_init((lock), #lock, &__key, flags, gfp);			\
180 } while (0)
181 
182 /**
183  * six_lock_seq - obtain current lock sequence number
184  * @lock:	six_lock to obtain sequence number for
185  *
186  * @lock should be held for read or intent, and not write
187  *
188  * By saving the lock sequence number, we can unlock @lock and then (typically
189  * after some blocking operation) attempt to relock it: the relock will succeed
190  * if the sequence number hasn't changed, meaning no write locks have been taken
191  * and state corresponding to what @lock protects is still valid.
192  */
six_lock_seq(const struct six_lock * lock)193 static inline u32 six_lock_seq(const struct six_lock *lock)
194 {
195 	return lock->seq;
196 }
197 
198 bool six_trylock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip);
199 
200 /**
201  * six_trylock_type - attempt to take a six lock without blocking
202  * @lock:	lock to take
203  * @type:	SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
204  *
205  * Return: true on success, false on failure.
206  */
six_trylock_type(struct six_lock * lock,enum six_lock_type type)207 static inline bool six_trylock_type(struct six_lock *lock, enum six_lock_type type)
208 {
209 	return six_trylock_ip(lock, type, _THIS_IP_);
210 }
211 
212 int six_lock_ip_waiter(struct six_lock *lock, enum six_lock_type type,
213 		       struct six_lock_waiter *wait,
214 		       six_lock_should_sleep_fn should_sleep_fn, void *p,
215 		       unsigned long ip);
216 
217 /**
218  * six_lock_waiter - take a lock, with full waitlist interface
219  * @lock:	lock to take
220  * @type:	SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
221  * @wait:	pointer to wait object, which will be added to lock's waitlist
222  * @should_sleep_fn: callback run after adding to waitlist, immediately prior
223  *		to scheduling
224  * @p:		passed through to @should_sleep_fn
225  *
226  * This is a convenience wrapper around six_lock_ip_waiter(), see that function
227  * for full documentation.
228  *
229  * Return: 0 on success, or the return code from @should_sleep_fn on failure.
230  */
six_lock_waiter(struct six_lock * lock,enum six_lock_type type,struct six_lock_waiter * wait,six_lock_should_sleep_fn should_sleep_fn,void * p)231 static inline int six_lock_waiter(struct six_lock *lock, enum six_lock_type type,
232 				  struct six_lock_waiter *wait,
233 				  six_lock_should_sleep_fn should_sleep_fn, void *p)
234 {
235 	return six_lock_ip_waiter(lock, type, wait, should_sleep_fn, p, _THIS_IP_);
236 }
237 
238 /**
239  * six_lock_ip - take a six lock lock
240  * @lock:	lock to take
241  * @type:	SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
242  * @should_sleep_fn: callback run after adding to waitlist, immediately prior
243  *		to scheduling
244  * @p:		passed through to @should_sleep_fn
245  * @ip:		ip parameter for lockdep/lockstat, i.e. _THIS_IP_
246  *
247  * Return: 0 on success, or the return code from @should_sleep_fn on failure.
248  */
six_lock_ip(struct six_lock * lock,enum six_lock_type type,six_lock_should_sleep_fn should_sleep_fn,void * p,unsigned long ip)249 static inline int six_lock_ip(struct six_lock *lock, enum six_lock_type type,
250 			      six_lock_should_sleep_fn should_sleep_fn, void *p,
251 			      unsigned long ip)
252 {
253 	struct six_lock_waiter wait;
254 
255 	return six_lock_ip_waiter(lock, type, &wait, should_sleep_fn, p, ip);
256 }
257 
258 /**
259  * six_lock_type - take a six lock lock
260  * @lock:	lock to take
261  * @type:	SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
262  * @should_sleep_fn: callback run after adding to waitlist, immediately prior
263  *		to scheduling
264  * @p:		passed through to @should_sleep_fn
265  *
266  * Return: 0 on success, or the return code from @should_sleep_fn on failure.
267  */
six_lock_type(struct six_lock * lock,enum six_lock_type type,six_lock_should_sleep_fn should_sleep_fn,void * p)268 static inline int six_lock_type(struct six_lock *lock, enum six_lock_type type,
269 				six_lock_should_sleep_fn should_sleep_fn, void *p)
270 {
271 	struct six_lock_waiter wait;
272 
273 	return six_lock_ip_waiter(lock, type, &wait, should_sleep_fn, p, _THIS_IP_);
274 }
275 
276 bool six_relock_ip(struct six_lock *lock, enum six_lock_type type,
277 		   unsigned seq, unsigned long ip);
278 
279 /**
280  * six_relock_type - attempt to re-take a lock that was held previously
281  * @lock:	lock to take
282  * @type:	SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
283  * @seq:	lock sequence number obtained from six_lock_seq() while lock was
284  *		held previously
285  *
286  * Return: true on success, false on failure.
287  */
six_relock_type(struct six_lock * lock,enum six_lock_type type,unsigned seq)288 static inline bool six_relock_type(struct six_lock *lock, enum six_lock_type type,
289 				   unsigned seq)
290 {
291 	return six_relock_ip(lock, type, seq, _THIS_IP_);
292 }
293 
294 void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip);
295 
296 /**
297  * six_unlock_type - drop a six lock
298  * @lock:	lock to unlock
299  * @type:	SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
300  *
301  * When a lock is held multiple times (because six_lock_incement()) was used),
302  * this decrements the 'lock held' counter by one.
303  *
304  * For example:
305  * six_lock_read(&foo->lock);				read count 1
306  * six_lock_increment(&foo->lock, SIX_LOCK_read);	read count 2
307  * six_lock_unlock(&foo->lock, SIX_LOCK_read);		read count 1
308  * six_lock_unlock(&foo->lock, SIX_LOCK_read);		read count 0
309  */
six_unlock_type(struct six_lock * lock,enum six_lock_type type)310 static inline void six_unlock_type(struct six_lock *lock, enum six_lock_type type)
311 {
312 	six_unlock_ip(lock, type, _THIS_IP_);
313 }
314 
315 #define __SIX_LOCK(type)						\
316 static inline bool six_trylock_ip_##type(struct six_lock *lock, unsigned long ip)\
317 {									\
318 	return six_trylock_ip(lock, SIX_LOCK_##type, ip);		\
319 }									\
320 									\
321 static inline bool six_trylock_##type(struct six_lock *lock)		\
322 {									\
323 	return six_trylock_ip(lock, SIX_LOCK_##type, _THIS_IP_);	\
324 }									\
325 									\
326 static inline int six_lock_ip_waiter_##type(struct six_lock *lock,	\
327 			   struct six_lock_waiter *wait,		\
328 			   six_lock_should_sleep_fn should_sleep_fn, void *p,\
329 			   unsigned long ip)				\
330 {									\
331 	return six_lock_ip_waiter(lock, SIX_LOCK_##type, wait, should_sleep_fn, p, ip);\
332 }									\
333 									\
334 static inline int six_lock_ip_##type(struct six_lock *lock,		\
335 		    six_lock_should_sleep_fn should_sleep_fn, void *p,	\
336 		    unsigned long ip)					\
337 {									\
338 	return six_lock_ip(lock, SIX_LOCK_##type, should_sleep_fn, p, ip);\
339 }									\
340 									\
341 static inline bool six_relock_ip_##type(struct six_lock *lock, u32 seq, unsigned long ip)\
342 {									\
343 	return six_relock_ip(lock, SIX_LOCK_##type, seq, ip);		\
344 }									\
345 									\
346 static inline bool six_relock_##type(struct six_lock *lock, u32 seq)	\
347 {									\
348 	return six_relock_ip(lock, SIX_LOCK_##type, seq, _THIS_IP_);	\
349 }									\
350 									\
351 static inline int six_lock_##type(struct six_lock *lock,		\
352 				  six_lock_should_sleep_fn fn, void *p)\
353 {									\
354 	return six_lock_ip_##type(lock, fn, p, _THIS_IP_);		\
355 }									\
356 									\
357 static inline void six_unlock_ip_##type(struct six_lock *lock, unsigned long ip)	\
358 {									\
359 	six_unlock_ip(lock, SIX_LOCK_##type, ip);			\
360 }									\
361 									\
362 static inline void six_unlock_##type(struct six_lock *lock)		\
363 {									\
364 	six_unlock_ip(lock, SIX_LOCK_##type, _THIS_IP_);		\
365 }
366 
367 __SIX_LOCK(read)
368 __SIX_LOCK(intent)
369 __SIX_LOCK(write)
370 #undef __SIX_LOCK
371 
372 void six_lock_downgrade(struct six_lock *);
373 bool six_lock_tryupgrade(struct six_lock *);
374 bool six_trylock_convert(struct six_lock *, enum six_lock_type,
375 			 enum six_lock_type);
376 
377 void six_lock_increment(struct six_lock *, enum six_lock_type);
378 
379 void six_lock_wakeup_all(struct six_lock *);
380 
381 struct six_lock_count {
382 	unsigned n[3];
383 };
384 
385 struct six_lock_count six_lock_counts(struct six_lock *);
386 void six_lock_readers_add(struct six_lock *, int);
387 
388 #endif /* _LINUX_SIX_H */
389