xref: /linux/fs/bcachefs/six.c (revision c6e9dba3be5ef3b701b29b143609561915e5d0e9)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #include <linux/export.h>
4 #include <linux/log2.h>
5 #include <linux/percpu.h>
6 #include <linux/preempt.h>
7 #include <linux/rcupdate.h>
8 #include <linux/sched.h>
9 #include <linux/sched/clock.h>
10 #include <linux/sched/rt.h>
11 #include <linux/sched/task.h>
12 #include <linux/slab.h>
13 
14 #include <trace/events/lock.h>
15 
16 #include "six.h"
17 
18 #ifdef DEBUG
19 #define EBUG_ON(cond)			BUG_ON(cond)
20 #else
21 #define EBUG_ON(cond)			do {} while (0)
22 #endif
23 
24 #define six_acquire(l, t, r, ip)	lock_acquire(l, 0, t, r, 1, NULL, ip)
25 #define six_release(l, ip)		lock_release(l, ip)
26 
27 static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type);
28 
29 #define SIX_LOCK_HELD_read_OFFSET	0
30 #define SIX_LOCK_HELD_read		~(~0U << 26)
31 #define SIX_LOCK_HELD_intent		(1U << 26)
32 #define SIX_LOCK_HELD_write		(1U << 27)
33 #define SIX_LOCK_WAITING_read		(1U << (28 + SIX_LOCK_read))
34 #define SIX_LOCK_WAITING_write		(1U << (28 + SIX_LOCK_write))
35 #define SIX_LOCK_NOSPIN			(1U << 31)
36 
37 struct six_lock_vals {
38 	/* Value we add to the lock in order to take the lock: */
39 	u32			lock_val;
40 
41 	/* If the lock has this value (used as a mask), taking the lock fails: */
42 	u32			lock_fail;
43 
44 	/* Mask that indicates lock is held for this type: */
45 	u32			held_mask;
46 
47 	/* Waitlist we wakeup when releasing the lock: */
48 	enum six_lock_type	unlock_wakeup;
49 };
50 
51 static const struct six_lock_vals l[] = {
52 	[SIX_LOCK_read] = {
53 		.lock_val	= 1U << SIX_LOCK_HELD_read_OFFSET,
54 		.lock_fail	= SIX_LOCK_HELD_write,
55 		.held_mask	= SIX_LOCK_HELD_read,
56 		.unlock_wakeup	= SIX_LOCK_write,
57 	},
58 	[SIX_LOCK_intent] = {
59 		.lock_val	= SIX_LOCK_HELD_intent,
60 		.lock_fail	= SIX_LOCK_HELD_intent,
61 		.held_mask	= SIX_LOCK_HELD_intent,
62 		.unlock_wakeup	= SIX_LOCK_intent,
63 	},
64 	[SIX_LOCK_write] = {
65 		.lock_val	= SIX_LOCK_HELD_write,
66 		.lock_fail	= SIX_LOCK_HELD_read,
67 		.held_mask	= SIX_LOCK_HELD_write,
68 		.unlock_wakeup	= SIX_LOCK_read,
69 	},
70 };
71 
72 static inline void six_set_bitmask(struct six_lock *lock, u32 mask)
73 {
74 	if ((atomic_read(&lock->state) & mask) != mask)
75 		atomic_or(mask, &lock->state);
76 }
77 
78 static inline void six_clear_bitmask(struct six_lock *lock, u32 mask)
79 {
80 	if (atomic_read(&lock->state) & mask)
81 		atomic_and(~mask, &lock->state);
82 }
83 
84 static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type,
85 				 u32 old, struct task_struct *owner)
86 {
87 	if (type != SIX_LOCK_intent)
88 		return;
89 
90 	if (!(old & SIX_LOCK_HELD_intent)) {
91 		EBUG_ON(lock->owner);
92 		lock->owner = owner;
93 	} else {
94 		EBUG_ON(lock->owner != current);
95 	}
96 }
97 
98 static inline unsigned pcpu_read_count(struct six_lock *lock)
99 {
100 	unsigned read_count = 0;
101 	int cpu;
102 
103 	for_each_possible_cpu(cpu)
104 		read_count += *per_cpu_ptr(lock->readers, cpu);
105 	return read_count;
106 }
107 
108 /*
109  * __do_six_trylock() - main trylock routine
110  *
111  * Returns 1 on success, 0 on failure
112  *
113  * In percpu reader mode, a failed trylock may cause a spurious trylock failure
114  * for anoter thread taking the competing lock type, and we may havve to do a
115  * wakeup: when a wakeup is required, we return -1 - wakeup_type.
116  */
117 static int __do_six_trylock(struct six_lock *lock, enum six_lock_type type,
118 			    struct task_struct *task, bool try)
119 {
120 	int ret;
121 	u32 old;
122 
123 	EBUG_ON(type == SIX_LOCK_write && lock->owner != task);
124 	EBUG_ON(type == SIX_LOCK_write &&
125 		(try != !(atomic_read(&lock->state) & SIX_LOCK_HELD_write)));
126 
127 	/*
128 	 * Percpu reader mode:
129 	 *
130 	 * The basic idea behind this algorithm is that you can implement a lock
131 	 * between two threads without any atomics, just memory barriers:
132 	 *
133 	 * For two threads you'll need two variables, one variable for "thread a
134 	 * has the lock" and another for "thread b has the lock".
135 	 *
136 	 * To take the lock, a thread sets its variable indicating that it holds
137 	 * the lock, then issues a full memory barrier, then reads from the
138 	 * other thread's variable to check if the other thread thinks it has
139 	 * the lock. If we raced, we backoff and retry/sleep.
140 	 *
141 	 * Failure to take the lock may cause a spurious trylock failure in
142 	 * another thread, because we temporarily set the lock to indicate that
143 	 * we held it. This would be a problem for a thread in six_lock(), when
144 	 * they are calling trylock after adding themself to the waitlist and
145 	 * prior to sleeping.
146 	 *
147 	 * Therefore, if we fail to get the lock, and there were waiters of the
148 	 * type we conflict with, we will have to issue a wakeup.
149 	 *
150 	 * Since we may be called under wait_lock (and by the wakeup code
151 	 * itself), we return that the wakeup has to be done instead of doing it
152 	 * here.
153 	 */
154 	if (type == SIX_LOCK_read && lock->readers) {
155 		preempt_disable();
156 		this_cpu_inc(*lock->readers); /* signal that we own lock */
157 
158 		smp_mb();
159 
160 		old = atomic_read(&lock->state);
161 		ret = !(old & l[type].lock_fail);
162 
163 		this_cpu_sub(*lock->readers, !ret);
164 		preempt_enable();
165 
166 		if (!ret && (old & SIX_LOCK_WAITING_write))
167 			ret = -1 - SIX_LOCK_write;
168 	} else if (type == SIX_LOCK_write && lock->readers) {
169 		if (try) {
170 			atomic_add(SIX_LOCK_HELD_write, &lock->state);
171 			smp_mb__after_atomic();
172 		}
173 
174 		ret = !pcpu_read_count(lock);
175 
176 		if (try && !ret) {
177 			old = atomic_sub_return(SIX_LOCK_HELD_write, &lock->state);
178 			if (old & SIX_LOCK_WAITING_read)
179 				ret = -1 - SIX_LOCK_read;
180 		}
181 	} else {
182 		old = atomic_read(&lock->state);
183 		do {
184 			ret = !(old & l[type].lock_fail);
185 			if (!ret || (type == SIX_LOCK_write && !try)) {
186 				smp_mb();
187 				break;
188 			}
189 		} while (!atomic_try_cmpxchg_acquire(&lock->state, &old, old + l[type].lock_val));
190 
191 		EBUG_ON(ret && !(atomic_read(&lock->state) & l[type].held_mask));
192 	}
193 
194 	if (ret > 0)
195 		six_set_owner(lock, type, old, task);
196 
197 	EBUG_ON(type == SIX_LOCK_write && try && ret <= 0 &&
198 		(atomic_read(&lock->state) & SIX_LOCK_HELD_write));
199 
200 	return ret;
201 }
202 
203 static void __six_lock_wakeup(struct six_lock *lock, enum six_lock_type lock_type)
204 {
205 	struct six_lock_waiter *w, *next;
206 	struct task_struct *task;
207 	bool saw_one;
208 	int ret;
209 again:
210 	ret = 0;
211 	saw_one = false;
212 	raw_spin_lock(&lock->wait_lock);
213 
214 	list_for_each_entry_safe(w, next, &lock->wait_list, list) {
215 		if (w->lock_want != lock_type)
216 			continue;
217 
218 		if (saw_one && lock_type != SIX_LOCK_read)
219 			goto unlock;
220 		saw_one = true;
221 
222 		ret = __do_six_trylock(lock, lock_type, w->task, false);
223 		if (ret <= 0)
224 			goto unlock;
225 
226 		/*
227 		 * Similar to percpu_rwsem_wake_function(), we need to guard
228 		 * against the wakee noticing w->lock_acquired, returning, and
229 		 * then exiting before we do the wakeup:
230 		 */
231 		task = get_task_struct(w->task);
232 		__list_del(w->list.prev, w->list.next);
233 		/*
234 		 * The release barrier here ensures the ordering of the
235 		 * __list_del before setting w->lock_acquired; @w is on the
236 		 * stack of the thread doing the waiting and will be reused
237 		 * after it sees w->lock_acquired with no other locking:
238 		 * pairs with smp_load_acquire() in six_lock_slowpath()
239 		 */
240 		smp_store_release(&w->lock_acquired, true);
241 		wake_up_process(task);
242 		put_task_struct(task);
243 	}
244 
245 	six_clear_bitmask(lock, SIX_LOCK_WAITING_read << lock_type);
246 unlock:
247 	raw_spin_unlock(&lock->wait_lock);
248 
249 	if (ret < 0) {
250 		lock_type = -ret - 1;
251 		goto again;
252 	}
253 }
254 
255 __always_inline
256 static void six_lock_wakeup(struct six_lock *lock, u32 state,
257 			    enum six_lock_type lock_type)
258 {
259 	if (lock_type == SIX_LOCK_write && (state & SIX_LOCK_HELD_read))
260 		return;
261 
262 	if (!(state & (SIX_LOCK_WAITING_read << lock_type)))
263 		return;
264 
265 	__six_lock_wakeup(lock, lock_type);
266 }
267 
268 __always_inline
269 static bool do_six_trylock(struct six_lock *lock, enum six_lock_type type, bool try)
270 {
271 	int ret;
272 
273 	ret = __do_six_trylock(lock, type, current, try);
274 	if (ret < 0)
275 		__six_lock_wakeup(lock, -ret - 1);
276 
277 	return ret > 0;
278 }
279 
280 /**
281  * six_trylock_ip - attempt to take a six lock without blocking
282  * @lock:	lock to take
283  * @type:	SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
284  * @ip:		ip parameter for lockdep/lockstat, i.e. _THIS_IP_
285  *
286  * Return: true on success, false on failure.
287  */
288 bool six_trylock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip)
289 {
290 	if (!do_six_trylock(lock, type, true))
291 		return false;
292 
293 	if (type != SIX_LOCK_write)
294 		six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read, ip);
295 	return true;
296 }
297 EXPORT_SYMBOL_GPL(six_trylock_ip);
298 
299 /**
300  * six_relock_ip - attempt to re-take a lock that was held previously
301  * @lock:	lock to take
302  * @type:	SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
303  * @seq:	lock sequence number obtained from six_lock_seq() while lock was
304  *		held previously
305  * @ip:		ip parameter for lockdep/lockstat, i.e. _THIS_IP_
306  *
307  * Return: true on success, false on failure.
308  */
309 bool six_relock_ip(struct six_lock *lock, enum six_lock_type type,
310 		   unsigned seq, unsigned long ip)
311 {
312 	if (six_lock_seq(lock) != seq || !six_trylock_ip(lock, type, ip))
313 		return false;
314 
315 	if (six_lock_seq(lock) != seq) {
316 		six_unlock_ip(lock, type, ip);
317 		return false;
318 	}
319 
320 	return true;
321 }
322 EXPORT_SYMBOL_GPL(six_relock_ip);
323 
324 #ifdef CONFIG_SIX_LOCK_SPIN_ON_OWNER
325 
326 static inline bool six_can_spin_on_owner(struct six_lock *lock)
327 {
328 	struct task_struct *owner;
329 	bool ret;
330 
331 	if (need_resched())
332 		return false;
333 
334 	rcu_read_lock();
335 	owner = READ_ONCE(lock->owner);
336 	ret = !owner || owner_on_cpu(owner);
337 	rcu_read_unlock();
338 
339 	return ret;
340 }
341 
342 static inline bool six_spin_on_owner(struct six_lock *lock,
343 				     struct task_struct *owner,
344 				     u64 end_time)
345 {
346 	bool ret = true;
347 	unsigned loop = 0;
348 
349 	rcu_read_lock();
350 	while (lock->owner == owner) {
351 		/*
352 		 * Ensure we emit the owner->on_cpu, dereference _after_
353 		 * checking lock->owner still matches owner. If that fails,
354 		 * owner might point to freed memory. If it still matches,
355 		 * the rcu_read_lock() ensures the memory stays valid.
356 		 */
357 		barrier();
358 
359 		if (!owner_on_cpu(owner) || need_resched()) {
360 			ret = false;
361 			break;
362 		}
363 
364 		if (!(++loop & 0xf) && (time_after64(sched_clock(), end_time))) {
365 			six_set_bitmask(lock, SIX_LOCK_NOSPIN);
366 			ret = false;
367 			break;
368 		}
369 
370 		cpu_relax();
371 	}
372 	rcu_read_unlock();
373 
374 	return ret;
375 }
376 
377 static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
378 {
379 	struct task_struct *task = current;
380 	u64 end_time;
381 
382 	if (type == SIX_LOCK_write)
383 		return false;
384 
385 	preempt_disable();
386 	if (!six_can_spin_on_owner(lock))
387 		goto fail;
388 
389 	if (!osq_lock(&lock->osq))
390 		goto fail;
391 
392 	end_time = sched_clock() + 10 * NSEC_PER_USEC;
393 
394 	while (1) {
395 		struct task_struct *owner;
396 
397 		/*
398 		 * If there's an owner, wait for it to either
399 		 * release the lock or go to sleep.
400 		 */
401 		owner = READ_ONCE(lock->owner);
402 		if (owner && !six_spin_on_owner(lock, owner, end_time))
403 			break;
404 
405 		if (do_six_trylock(lock, type, false)) {
406 			osq_unlock(&lock->osq);
407 			preempt_enable();
408 			return true;
409 		}
410 
411 		/*
412 		 * When there's no owner, we might have preempted between the
413 		 * owner acquiring the lock and setting the owner field. If
414 		 * we're an RT task that will live-lock because we won't let
415 		 * the owner complete.
416 		 */
417 		if (!owner && (need_resched() || rt_task(task)))
418 			break;
419 
420 		/*
421 		 * The cpu_relax() call is a compiler barrier which forces
422 		 * everything in this loop to be re-loaded. We don't need
423 		 * memory barriers as we'll eventually observe the right
424 		 * values at the cost of a few extra spins.
425 		 */
426 		cpu_relax();
427 	}
428 
429 	osq_unlock(&lock->osq);
430 fail:
431 	preempt_enable();
432 
433 	/*
434 	 * If we fell out of the spin path because of need_resched(),
435 	 * reschedule now, before we try-lock again. This avoids getting
436 	 * scheduled out right after we obtained the lock.
437 	 */
438 	if (need_resched())
439 		schedule();
440 
441 	return false;
442 }
443 
444 #else /* CONFIG_SIX_LOCK_SPIN_ON_OWNER */
445 
446 static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
447 {
448 	return false;
449 }
450 
451 #endif
452 
453 noinline
454 static int six_lock_slowpath(struct six_lock *lock, enum six_lock_type type,
455 			     struct six_lock_waiter *wait,
456 			     six_lock_should_sleep_fn should_sleep_fn, void *p,
457 			     unsigned long ip)
458 {
459 	int ret = 0;
460 
461 	if (type == SIX_LOCK_write) {
462 		EBUG_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_write);
463 		atomic_add(SIX_LOCK_HELD_write, &lock->state);
464 		smp_mb__after_atomic();
465 	}
466 
467 	trace_contention_begin(lock, 0);
468 	lock_contended(&lock->dep_map, ip);
469 
470 	if (six_optimistic_spin(lock, type))
471 		goto out;
472 
473 	wait->task		= current;
474 	wait->lock_want		= type;
475 	wait->lock_acquired	= false;
476 
477 	raw_spin_lock(&lock->wait_lock);
478 	six_set_bitmask(lock, SIX_LOCK_WAITING_read << type);
479 	/*
480 	 * Retry taking the lock after taking waitlist lock, in case we raced
481 	 * with an unlock:
482 	 */
483 	ret = __do_six_trylock(lock, type, current, false);
484 	if (ret <= 0) {
485 		wait->start_time = local_clock();
486 
487 		if (!list_empty(&lock->wait_list)) {
488 			struct six_lock_waiter *last =
489 				list_last_entry(&lock->wait_list,
490 					struct six_lock_waiter, list);
491 
492 			if (time_before_eq64(wait->start_time, last->start_time))
493 				wait->start_time = last->start_time + 1;
494 		}
495 
496 		list_add_tail(&wait->list, &lock->wait_list);
497 	}
498 	raw_spin_unlock(&lock->wait_lock);
499 
500 	if (unlikely(ret > 0)) {
501 		ret = 0;
502 		goto out;
503 	}
504 
505 	if (unlikely(ret < 0)) {
506 		__six_lock_wakeup(lock, -ret - 1);
507 		ret = 0;
508 	}
509 
510 	while (1) {
511 		set_current_state(TASK_UNINTERRUPTIBLE);
512 
513 		/*
514 		 * Ensures that writes to the waitlist entry happen after we see
515 		 * wait->lock_acquired: pairs with the smp_store_release in
516 		 * __six_lock_wakeup
517 		 */
518 		if (smp_load_acquire(&wait->lock_acquired))
519 			break;
520 
521 		ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
522 		if (unlikely(ret)) {
523 			bool acquired;
524 
525 			/*
526 			 * If should_sleep_fn() returns an error, we are
527 			 * required to return that error even if we already
528 			 * acquired the lock - should_sleep_fn() might have
529 			 * modified external state (e.g. when the deadlock cycle
530 			 * detector in bcachefs issued a transaction restart)
531 			 */
532 			raw_spin_lock(&lock->wait_lock);
533 			acquired = wait->lock_acquired;
534 			if (!acquired)
535 				list_del(&wait->list);
536 			raw_spin_unlock(&lock->wait_lock);
537 
538 			if (unlikely(acquired))
539 				do_six_unlock_type(lock, type);
540 			break;
541 		}
542 
543 		schedule();
544 	}
545 
546 	__set_current_state(TASK_RUNNING);
547 out:
548 	if (ret && type == SIX_LOCK_write) {
549 		six_clear_bitmask(lock, SIX_LOCK_HELD_write);
550 		six_lock_wakeup(lock, atomic_read(&lock->state), SIX_LOCK_read);
551 	}
552 	trace_contention_end(lock, 0);
553 
554 	return ret;
555 }
556 
557 /**
558  * six_lock_ip_waiter - take a lock, with full waitlist interface
559  * @lock:	lock to take
560  * @type:	SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
561  * @wait:	pointer to wait object, which will be added to lock's waitlist
562  * @should_sleep_fn: callback run after adding to waitlist, immediately prior
563  *		to scheduling
564  * @p:		passed through to @should_sleep_fn
565  * @ip:		ip parameter for lockdep/lockstat, i.e. _THIS_IP_
566  *
567  * This is the most general six_lock() variant, with parameters to support full
568  * cycle detection for deadlock avoidance.
569  *
570  * The code calling this function must implement tracking of held locks, and the
571  * @wait object should be embedded into the struct that tracks held locks -
572  * which must also be accessible in a thread-safe way.
573  *
574  * @should_sleep_fn should invoke the cycle detector; it should walk each
575  * lock's waiters, and for each waiter recursively walk their held locks.
576  *
577  * When this function must block, @wait will be added to @lock's waitlist before
578  * calling trylock, and before calling @should_sleep_fn, and @wait will not be
579  * removed from the lock waitlist until the lock has been successfully acquired,
580  * or we abort.
581  *
582  * @wait.start_time will be monotonically increasing for any given waitlist, and
583  * thus may be used as a loop cursor.
584  *
585  * Return: 0 on success, or the return code from @should_sleep_fn on failure.
586  */
587 int six_lock_ip_waiter(struct six_lock *lock, enum six_lock_type type,
588 		       struct six_lock_waiter *wait,
589 		       six_lock_should_sleep_fn should_sleep_fn, void *p,
590 		       unsigned long ip)
591 {
592 	int ret;
593 
594 	wait->start_time = 0;
595 
596 	if (type != SIX_LOCK_write)
597 		six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, ip);
598 
599 	ret = do_six_trylock(lock, type, true) ? 0
600 		: six_lock_slowpath(lock, type, wait, should_sleep_fn, p, ip);
601 
602 	if (ret && type != SIX_LOCK_write)
603 		six_release(&lock->dep_map, ip);
604 	if (!ret)
605 		lock_acquired(&lock->dep_map, ip);
606 
607 	return ret;
608 }
609 EXPORT_SYMBOL_GPL(six_lock_ip_waiter);
610 
611 __always_inline
612 static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type)
613 {
614 	u32 state;
615 
616 	if (type == SIX_LOCK_intent)
617 		lock->owner = NULL;
618 
619 	if (type == SIX_LOCK_read &&
620 	    lock->readers) {
621 		smp_mb(); /* unlock barrier */
622 		this_cpu_dec(*lock->readers);
623 		smp_mb(); /* between unlocking and checking for waiters */
624 		state = atomic_read(&lock->state);
625 	} else {
626 		u32 v = l[type].lock_val;
627 
628 		if (type != SIX_LOCK_read)
629 			v += atomic_read(&lock->state) & SIX_LOCK_NOSPIN;
630 
631 		EBUG_ON(!(atomic_read(&lock->state) & l[type].held_mask));
632 		state = atomic_sub_return_release(v, &lock->state);
633 	}
634 
635 	six_lock_wakeup(lock, state, l[type].unlock_wakeup);
636 }
637 
638 /**
639  * six_unlock_ip - drop a six lock
640  * @lock:	lock to unlock
641  * @type:	SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
642  * @ip:		ip parameter for lockdep/lockstat, i.e. _THIS_IP_
643  *
644  * When a lock is held multiple times (because six_lock_incement()) was used),
645  * this decrements the 'lock held' counter by one.
646  *
647  * For example:
648  * six_lock_read(&foo->lock);				read count 1
649  * six_lock_increment(&foo->lock, SIX_LOCK_read);	read count 2
650  * six_lock_unlock(&foo->lock, SIX_LOCK_read);		read count 1
651  * six_lock_unlock(&foo->lock, SIX_LOCK_read);		read count 0
652  */
653 void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip)
654 {
655 	EBUG_ON(type == SIX_LOCK_write &&
656 		!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent));
657 	EBUG_ON((type == SIX_LOCK_write ||
658 		 type == SIX_LOCK_intent) &&
659 		lock->owner != current);
660 
661 	if (type != SIX_LOCK_write)
662 		six_release(&lock->dep_map, ip);
663 	else
664 		lock->seq++;
665 
666 	if (type == SIX_LOCK_intent &&
667 	    lock->intent_lock_recurse) {
668 		--lock->intent_lock_recurse;
669 		return;
670 	}
671 
672 	do_six_unlock_type(lock, type);
673 }
674 EXPORT_SYMBOL_GPL(six_unlock_ip);
675 
676 /**
677  * six_lock_downgrade - convert an intent lock to a read lock
678  * @lock:	lock to dowgrade
679  *
680  * @lock will have read count incremented and intent count decremented
681  */
682 void six_lock_downgrade(struct six_lock *lock)
683 {
684 	six_lock_increment(lock, SIX_LOCK_read);
685 	six_unlock_intent(lock);
686 }
687 EXPORT_SYMBOL_GPL(six_lock_downgrade);
688 
689 /**
690  * six_lock_tryupgrade - attempt to convert read lock to an intent lock
691  * @lock:	lock to upgrade
692  *
693  * On success, @lock will have intent count incremented and read count
694  * decremented
695  *
696  * Return: true on success, false on failure
697  */
698 bool six_lock_tryupgrade(struct six_lock *lock)
699 {
700 	u32 old = atomic_read(&lock->state), new;
701 
702 	do {
703 		new = old;
704 
705 		if (new & SIX_LOCK_HELD_intent)
706 			return false;
707 
708 		if (!lock->readers) {
709 			EBUG_ON(!(new & SIX_LOCK_HELD_read));
710 			new -= l[SIX_LOCK_read].lock_val;
711 		}
712 
713 		new |= SIX_LOCK_HELD_intent;
714 	} while (!atomic_try_cmpxchg_acquire(&lock->state, &old, new));
715 
716 	if (lock->readers)
717 		this_cpu_dec(*lock->readers);
718 
719 	six_set_owner(lock, SIX_LOCK_intent, old, current);
720 
721 	return true;
722 }
723 EXPORT_SYMBOL_GPL(six_lock_tryupgrade);
724 
725 /**
726  * six_trylock_convert - attempt to convert a held lock from one type to another
727  * @lock:	lock to upgrade
728  * @from:	SIX_LOCK_read or SIX_LOCK_intent
729  * @to:		SIX_LOCK_read or SIX_LOCK_intent
730  *
731  * On success, @lock will have intent count incremented and read count
732  * decremented
733  *
734  * Return: true on success, false on failure
735  */
736 bool six_trylock_convert(struct six_lock *lock,
737 			 enum six_lock_type from,
738 			 enum six_lock_type to)
739 {
740 	EBUG_ON(to == SIX_LOCK_write || from == SIX_LOCK_write);
741 
742 	if (to == from)
743 		return true;
744 
745 	if (to == SIX_LOCK_read) {
746 		six_lock_downgrade(lock);
747 		return true;
748 	} else {
749 		return six_lock_tryupgrade(lock);
750 	}
751 }
752 EXPORT_SYMBOL_GPL(six_trylock_convert);
753 
754 /**
755  * six_lock_increment - increase held lock count on a lock that is already held
756  * @lock:	lock to increment
757  * @type:	SIX_LOCK_read or SIX_LOCK_intent
758  *
759  * @lock must already be held, with a lock type that is greater than or equal to
760  * @type
761  *
762  * A corresponding six_unlock_type() call will be required for @lock to be fully
763  * unlocked.
764  */
765 void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
766 {
767 	six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, _RET_IP_);
768 
769 	/* XXX: assert already locked, and that we don't overflow: */
770 
771 	switch (type) {
772 	case SIX_LOCK_read:
773 		if (lock->readers) {
774 			this_cpu_inc(*lock->readers);
775 		} else {
776 			EBUG_ON(!(atomic_read(&lock->state) &
777 				  (SIX_LOCK_HELD_read|
778 				   SIX_LOCK_HELD_intent)));
779 			atomic_add(l[type].lock_val, &lock->state);
780 		}
781 		break;
782 	case SIX_LOCK_intent:
783 		EBUG_ON(!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent));
784 		lock->intent_lock_recurse++;
785 		break;
786 	case SIX_LOCK_write:
787 		BUG();
788 		break;
789 	}
790 }
791 EXPORT_SYMBOL_GPL(six_lock_increment);
792 
793 /**
794  * six_lock_wakeup_all - wake up all waiters on @lock
795  * @lock:	lock to wake up waiters for
796  *
797  * Wakeing up waiters will cause them to re-run should_sleep_fn, which may then
798  * abort the lock operation.
799  *
800  * This function is never needed in a bug-free program; it's only useful in
801  * debug code, e.g. to determine if a cycle detector is at fault.
802  */
803 void six_lock_wakeup_all(struct six_lock *lock)
804 {
805 	u32 state = atomic_read(&lock->state);
806 	struct six_lock_waiter *w;
807 
808 	six_lock_wakeup(lock, state, SIX_LOCK_read);
809 	six_lock_wakeup(lock, state, SIX_LOCK_intent);
810 	six_lock_wakeup(lock, state, SIX_LOCK_write);
811 
812 	raw_spin_lock(&lock->wait_lock);
813 	list_for_each_entry(w, &lock->wait_list, list)
814 		wake_up_process(w->task);
815 	raw_spin_unlock(&lock->wait_lock);
816 }
817 EXPORT_SYMBOL_GPL(six_lock_wakeup_all);
818 
819 /**
820  * six_lock_counts - return held lock counts, for each lock type
821  * @lock:	lock to return counters for
822  *
823  * Return: the number of times a lock is held for read, intent and write.
824  */
825 struct six_lock_count six_lock_counts(struct six_lock *lock)
826 {
827 	struct six_lock_count ret;
828 
829 	ret.n[SIX_LOCK_read]	= !lock->readers
830 		? atomic_read(&lock->state) & SIX_LOCK_HELD_read
831 		: pcpu_read_count(lock);
832 	ret.n[SIX_LOCK_intent]	= !!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent) +
833 		lock->intent_lock_recurse;
834 	ret.n[SIX_LOCK_write]	= !!(atomic_read(&lock->state) & SIX_LOCK_HELD_write);
835 
836 	return ret;
837 }
838 EXPORT_SYMBOL_GPL(six_lock_counts);
839 
840 /**
841  * six_lock_readers_add - directly manipulate reader count of a lock
842  * @lock:	lock to add/subtract readers for
843  * @nr:		reader count to add/subtract
844  *
845  * When an upper layer is implementing lock reentrency, we may have both read
846  * and intent locks on the same lock.
847  *
848  * When we need to take a write lock, the read locks will cause self-deadlock,
849  * because six locks themselves do not track which read locks are held by the
850  * current thread and which are held by a different thread - it does no
851  * per-thread tracking of held locks.
852  *
853  * The upper layer that is tracking held locks may however, if trylock() has
854  * failed, count up its own read locks, subtract them, take the write lock, and
855  * then re-add them.
856  *
857  * As in any other situation when taking a write lock, @lock must be held for
858  * intent one (or more) times, so @lock will never be left unlocked.
859  */
860 void six_lock_readers_add(struct six_lock *lock, int nr)
861 {
862 	if (lock->readers) {
863 		this_cpu_add(*lock->readers, nr);
864 	} else {
865 		EBUG_ON((int) (atomic_read(&lock->state) & SIX_LOCK_HELD_read) + nr < 0);
866 		/* reader count starts at bit 0 */
867 		atomic_add(nr, &lock->state);
868 	}
869 }
870 EXPORT_SYMBOL_GPL(six_lock_readers_add);
871 
872 /**
873  * six_lock_exit - release resources held by a lock prior to freeing
874  * @lock:	lock to exit
875  *
876  * When a lock was initialized in percpu mode (SIX_OLCK_INIT_PCPU), this is
877  * required to free the percpu read counts.
878  */
879 void six_lock_exit(struct six_lock *lock)
880 {
881 	WARN_ON(lock->readers && pcpu_read_count(lock));
882 	WARN_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_read);
883 
884 	free_percpu(lock->readers);
885 	lock->readers = NULL;
886 }
887 EXPORT_SYMBOL_GPL(six_lock_exit);
888 
889 void __six_lock_init(struct six_lock *lock, const char *name,
890 		     struct lock_class_key *key, enum six_lock_init_flags flags)
891 {
892 	atomic_set(&lock->state, 0);
893 	raw_spin_lock_init(&lock->wait_lock);
894 	INIT_LIST_HEAD(&lock->wait_list);
895 #ifdef CONFIG_DEBUG_LOCK_ALLOC
896 	debug_check_no_locks_freed((void *) lock, sizeof(*lock));
897 	lockdep_init_map(&lock->dep_map, name, key, 0);
898 #endif
899 
900 	/*
901 	 * Don't assume that we have real percpu variables available in
902 	 * userspace:
903 	 */
904 #ifdef __KERNEL__
905 	if (flags & SIX_LOCK_INIT_PCPU) {
906 		/*
907 		 * We don't return an error here on memory allocation failure
908 		 * since percpu is an optimization, and locks will work with the
909 		 * same semantics in non-percpu mode: callers can check for
910 		 * failure if they wish by checking lock->readers, but generally
911 		 * will not want to treat it as an error.
912 		 */
913 		lock->readers = alloc_percpu(unsigned);
914 	}
915 #endif
916 }
917 EXPORT_SYMBOL_GPL(__six_lock_init);
918