xref: /linux/fs/bcachefs/six.c (revision 031fba65fc202abf1f193e321be7a2c274fd88ba)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #include <linux/export.h>
4 #include <linux/log2.h>
5 #include <linux/percpu.h>
6 #include <linux/preempt.h>
7 #include <linux/rcupdate.h>
8 #include <linux/sched.h>
9 #include <linux/sched/clock.h>
10 #include <linux/sched/rt.h>
11 #include <linux/sched/task.h>
12 #include <linux/slab.h>
13 
14 #include "six.h"
15 
16 #ifdef DEBUG
17 #define EBUG_ON(cond)			BUG_ON(cond)
18 #else
19 #define EBUG_ON(cond)			do {} while (0)
20 #endif
21 
22 #define six_acquire(l, t, r, ip)	lock_acquire(l, 0, t, r, 1, NULL, ip)
23 #define six_release(l, ip)		lock_release(l, ip)
24 
25 static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type);
26 
27 #define SIX_LOCK_HELD_read_OFFSET	0
28 #define SIX_LOCK_HELD_read		~(~0U << 26)
29 #define SIX_LOCK_HELD_intent		(1U << 26)
30 #define SIX_LOCK_HELD_write		(1U << 27)
31 #define SIX_LOCK_WAITING_read		(1U << (28 + SIX_LOCK_read))
32 #define SIX_LOCK_WAITING_write		(1U << (28 + SIX_LOCK_write))
33 #define SIX_LOCK_NOSPIN			(1U << 31)
34 
35 struct six_lock_vals {
36 	/* Value we add to the lock in order to take the lock: */
37 	u32			lock_val;
38 
39 	/* If the lock has this value (used as a mask), taking the lock fails: */
40 	u32			lock_fail;
41 
42 	/* Mask that indicates lock is held for this type: */
43 	u32			held_mask;
44 
45 	/* Waitlist we wakeup when releasing the lock: */
46 	enum six_lock_type	unlock_wakeup;
47 };
48 
49 static const struct six_lock_vals l[] = {
50 	[SIX_LOCK_read] = {
51 		.lock_val	= 1U << SIX_LOCK_HELD_read_OFFSET,
52 		.lock_fail	= SIX_LOCK_HELD_write,
53 		.held_mask	= SIX_LOCK_HELD_read,
54 		.unlock_wakeup	= SIX_LOCK_write,
55 	},
56 	[SIX_LOCK_intent] = {
57 		.lock_val	= SIX_LOCK_HELD_intent,
58 		.lock_fail	= SIX_LOCK_HELD_intent,
59 		.held_mask	= SIX_LOCK_HELD_intent,
60 		.unlock_wakeup	= SIX_LOCK_intent,
61 	},
62 	[SIX_LOCK_write] = {
63 		.lock_val	= SIX_LOCK_HELD_write,
64 		.lock_fail	= SIX_LOCK_HELD_read,
65 		.held_mask	= SIX_LOCK_HELD_write,
66 		.unlock_wakeup	= SIX_LOCK_read,
67 	},
68 };
69 
70 static inline void six_set_bitmask(struct six_lock *lock, u32 mask)
71 {
72 	if ((atomic_read(&lock->state) & mask) != mask)
73 		atomic_or(mask, &lock->state);
74 }
75 
76 static inline void six_clear_bitmask(struct six_lock *lock, u32 mask)
77 {
78 	if (atomic_read(&lock->state) & mask)
79 		atomic_and(~mask, &lock->state);
80 }
81 
82 static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type,
83 				 u32 old, struct task_struct *owner)
84 {
85 	if (type != SIX_LOCK_intent)
86 		return;
87 
88 	if (!(old & SIX_LOCK_HELD_intent)) {
89 		EBUG_ON(lock->owner);
90 		lock->owner = owner;
91 	} else {
92 		EBUG_ON(lock->owner != current);
93 	}
94 }
95 
96 static inline unsigned pcpu_read_count(struct six_lock *lock)
97 {
98 	unsigned read_count = 0;
99 	int cpu;
100 
101 	for_each_possible_cpu(cpu)
102 		read_count += *per_cpu_ptr(lock->readers, cpu);
103 	return read_count;
104 }
105 
106 /*
107  * __do_six_trylock() - main trylock routine
108  *
109  * Returns 1 on success, 0 on failure
110  *
111  * In percpu reader mode, a failed trylock may cause a spurious trylock failure
112  * for anoter thread taking the competing lock type, and we may havve to do a
113  * wakeup: when a wakeup is required, we return -1 - wakeup_type.
114  */
115 static int __do_six_trylock(struct six_lock *lock, enum six_lock_type type,
116 			    struct task_struct *task, bool try)
117 {
118 	int ret;
119 	u32 old;
120 
121 	EBUG_ON(type == SIX_LOCK_write && lock->owner != task);
122 	EBUG_ON(type == SIX_LOCK_write &&
123 		(try != !(atomic_read(&lock->state) & SIX_LOCK_HELD_write)));
124 
125 	/*
126 	 * Percpu reader mode:
127 	 *
128 	 * The basic idea behind this algorithm is that you can implement a lock
129 	 * between two threads without any atomics, just memory barriers:
130 	 *
131 	 * For two threads you'll need two variables, one variable for "thread a
132 	 * has the lock" and another for "thread b has the lock".
133 	 *
134 	 * To take the lock, a thread sets its variable indicating that it holds
135 	 * the lock, then issues a full memory barrier, then reads from the
136 	 * other thread's variable to check if the other thread thinks it has
137 	 * the lock. If we raced, we backoff and retry/sleep.
138 	 *
139 	 * Failure to take the lock may cause a spurious trylock failure in
140 	 * another thread, because we temporarily set the lock to indicate that
141 	 * we held it. This would be a problem for a thread in six_lock(), when
142 	 * they are calling trylock after adding themself to the waitlist and
143 	 * prior to sleeping.
144 	 *
145 	 * Therefore, if we fail to get the lock, and there were waiters of the
146 	 * type we conflict with, we will have to issue a wakeup.
147 	 *
148 	 * Since we may be called under wait_lock (and by the wakeup code
149 	 * itself), we return that the wakeup has to be done instead of doing it
150 	 * here.
151 	 */
152 	if (type == SIX_LOCK_read && lock->readers) {
153 		preempt_disable();
154 		this_cpu_inc(*lock->readers); /* signal that we own lock */
155 
156 		smp_mb();
157 
158 		old = atomic_read(&lock->state);
159 		ret = !(old & l[type].lock_fail);
160 
161 		this_cpu_sub(*lock->readers, !ret);
162 		preempt_enable();
163 
164 		if (!ret && (old & SIX_LOCK_WAITING_write))
165 			ret = -1 - SIX_LOCK_write;
166 	} else if (type == SIX_LOCK_write && lock->readers) {
167 		if (try) {
168 			atomic_add(SIX_LOCK_HELD_write, &lock->state);
169 			smp_mb__after_atomic();
170 		}
171 
172 		ret = !pcpu_read_count(lock);
173 
174 		if (try && !ret) {
175 			old = atomic_sub_return(SIX_LOCK_HELD_write, &lock->state);
176 			if (old & SIX_LOCK_WAITING_read)
177 				ret = -1 - SIX_LOCK_read;
178 		}
179 	} else {
180 		old = atomic_read(&lock->state);
181 		do {
182 			ret = !(old & l[type].lock_fail);
183 			if (!ret || (type == SIX_LOCK_write && !try)) {
184 				smp_mb();
185 				break;
186 			}
187 		} while (!atomic_try_cmpxchg_acquire(&lock->state, &old, old + l[type].lock_val));
188 
189 		EBUG_ON(ret && !(atomic_read(&lock->state) & l[type].held_mask));
190 	}
191 
192 	if (ret > 0)
193 		six_set_owner(lock, type, old, task);
194 
195 	EBUG_ON(type == SIX_LOCK_write && try && ret <= 0 &&
196 		(atomic_read(&lock->state) & SIX_LOCK_HELD_write));
197 
198 	return ret;
199 }
200 
201 static void __six_lock_wakeup(struct six_lock *lock, enum six_lock_type lock_type)
202 {
203 	struct six_lock_waiter *w, *next;
204 	struct task_struct *task;
205 	bool saw_one;
206 	int ret;
207 again:
208 	ret = 0;
209 	saw_one = false;
210 	raw_spin_lock(&lock->wait_lock);
211 
212 	list_for_each_entry_safe(w, next, &lock->wait_list, list) {
213 		if (w->lock_want != lock_type)
214 			continue;
215 
216 		if (saw_one && lock_type != SIX_LOCK_read)
217 			goto unlock;
218 		saw_one = true;
219 
220 		ret = __do_six_trylock(lock, lock_type, w->task, false);
221 		if (ret <= 0)
222 			goto unlock;
223 
224 		/*
225 		 * Similar to percpu_rwsem_wake_function(), we need to guard
226 		 * against the wakee noticing w->lock_acquired, returning, and
227 		 * then exiting before we do the wakeup:
228 		 */
229 		task = get_task_struct(w->task);
230 		__list_del(w->list.prev, w->list.next);
231 		/*
232 		 * The release barrier here ensures the ordering of the
233 		 * __list_del before setting w->lock_acquired; @w is on the
234 		 * stack of the thread doing the waiting and will be reused
235 		 * after it sees w->lock_acquired with no other locking:
236 		 * pairs with smp_load_acquire() in six_lock_slowpath()
237 		 */
238 		smp_store_release(&w->lock_acquired, true);
239 		wake_up_process(task);
240 		put_task_struct(task);
241 	}
242 
243 	six_clear_bitmask(lock, SIX_LOCK_WAITING_read << lock_type);
244 unlock:
245 	raw_spin_unlock(&lock->wait_lock);
246 
247 	if (ret < 0) {
248 		lock_type = -ret - 1;
249 		goto again;
250 	}
251 }
252 
253 __always_inline
254 static void six_lock_wakeup(struct six_lock *lock, u32 state,
255 			    enum six_lock_type lock_type)
256 {
257 	if (lock_type == SIX_LOCK_write && (state & SIX_LOCK_HELD_read))
258 		return;
259 
260 	if (!(state & (SIX_LOCK_WAITING_read << lock_type)))
261 		return;
262 
263 	__six_lock_wakeup(lock, lock_type);
264 }
265 
266 __always_inline
267 static bool do_six_trylock(struct six_lock *lock, enum six_lock_type type, bool try)
268 {
269 	int ret;
270 
271 	ret = __do_six_trylock(lock, type, current, try);
272 	if (ret < 0)
273 		__six_lock_wakeup(lock, -ret - 1);
274 
275 	return ret > 0;
276 }
277 
278 /**
279  * six_trylock_ip - attempt to take a six lock without blocking
280  * @lock:	lock to take
281  * @type:	SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
282  * @ip:		ip parameter for lockdep/lockstat, i.e. _THIS_IP_
283  *
284  * Return: true on success, false on failure.
285  */
286 bool six_trylock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip)
287 {
288 	if (!do_six_trylock(lock, type, true))
289 		return false;
290 
291 	if (type != SIX_LOCK_write)
292 		six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read, ip);
293 	return true;
294 }
295 EXPORT_SYMBOL_GPL(six_trylock_ip);
296 
297 /**
298  * six_relock_ip - attempt to re-take a lock that was held previously
299  * @lock:	lock to take
300  * @type:	SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
301  * @seq:	lock sequence number obtained from six_lock_seq() while lock was
302  *		held previously
303  * @ip:		ip parameter for lockdep/lockstat, i.e. _THIS_IP_
304  *
305  * Return: true on success, false on failure.
306  */
307 bool six_relock_ip(struct six_lock *lock, enum six_lock_type type,
308 		   unsigned seq, unsigned long ip)
309 {
310 	if (six_lock_seq(lock) != seq || !six_trylock_ip(lock, type, ip))
311 		return false;
312 
313 	if (six_lock_seq(lock) != seq) {
314 		six_unlock_ip(lock, type, ip);
315 		return false;
316 	}
317 
318 	return true;
319 }
320 EXPORT_SYMBOL_GPL(six_relock_ip);
321 
322 #ifdef CONFIG_SIX_LOCK_SPIN_ON_OWNER
323 
324 static inline bool six_can_spin_on_owner(struct six_lock *lock)
325 {
326 	struct task_struct *owner;
327 	bool ret;
328 
329 	if (need_resched())
330 		return false;
331 
332 	rcu_read_lock();
333 	owner = READ_ONCE(lock->owner);
334 	ret = !owner || owner_on_cpu(owner);
335 	rcu_read_unlock();
336 
337 	return ret;
338 }
339 
340 static inline bool six_spin_on_owner(struct six_lock *lock,
341 				     struct task_struct *owner,
342 				     u64 end_time)
343 {
344 	bool ret = true;
345 	unsigned loop = 0;
346 
347 	rcu_read_lock();
348 	while (lock->owner == owner) {
349 		/*
350 		 * Ensure we emit the owner->on_cpu, dereference _after_
351 		 * checking lock->owner still matches owner. If that fails,
352 		 * owner might point to freed memory. If it still matches,
353 		 * the rcu_read_lock() ensures the memory stays valid.
354 		 */
355 		barrier();
356 
357 		if (!owner_on_cpu(owner) || need_resched()) {
358 			ret = false;
359 			break;
360 		}
361 
362 		if (!(++loop & 0xf) && (time_after64(sched_clock(), end_time))) {
363 			six_set_bitmask(lock, SIX_LOCK_NOSPIN);
364 			ret = false;
365 			break;
366 		}
367 
368 		cpu_relax();
369 	}
370 	rcu_read_unlock();
371 
372 	return ret;
373 }
374 
375 static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
376 {
377 	struct task_struct *task = current;
378 	u64 end_time;
379 
380 	if (type == SIX_LOCK_write)
381 		return false;
382 
383 	preempt_disable();
384 	if (!six_can_spin_on_owner(lock))
385 		goto fail;
386 
387 	if (!osq_lock(&lock->osq))
388 		goto fail;
389 
390 	end_time = sched_clock() + 10 * NSEC_PER_USEC;
391 
392 	while (1) {
393 		struct task_struct *owner;
394 
395 		/*
396 		 * If there's an owner, wait for it to either
397 		 * release the lock or go to sleep.
398 		 */
399 		owner = READ_ONCE(lock->owner);
400 		if (owner && !six_spin_on_owner(lock, owner, end_time))
401 			break;
402 
403 		if (do_six_trylock(lock, type, false)) {
404 			osq_unlock(&lock->osq);
405 			preempt_enable();
406 			return true;
407 		}
408 
409 		/*
410 		 * When there's no owner, we might have preempted between the
411 		 * owner acquiring the lock and setting the owner field. If
412 		 * we're an RT task that will live-lock because we won't let
413 		 * the owner complete.
414 		 */
415 		if (!owner && (need_resched() || rt_task(task)))
416 			break;
417 
418 		/*
419 		 * The cpu_relax() call is a compiler barrier which forces
420 		 * everything in this loop to be re-loaded. We don't need
421 		 * memory barriers as we'll eventually observe the right
422 		 * values at the cost of a few extra spins.
423 		 */
424 		cpu_relax();
425 	}
426 
427 	osq_unlock(&lock->osq);
428 fail:
429 	preempt_enable();
430 
431 	/*
432 	 * If we fell out of the spin path because of need_resched(),
433 	 * reschedule now, before we try-lock again. This avoids getting
434 	 * scheduled out right after we obtained the lock.
435 	 */
436 	if (need_resched())
437 		schedule();
438 
439 	return false;
440 }
441 
442 #else /* CONFIG_SIX_LOCK_SPIN_ON_OWNER */
443 
444 static inline bool six_optimistic_spin(struct six_lock *lock, enum six_lock_type type)
445 {
446 	return false;
447 }
448 
449 #endif
450 
451 noinline
452 static int six_lock_slowpath(struct six_lock *lock, enum six_lock_type type,
453 			     struct six_lock_waiter *wait,
454 			     six_lock_should_sleep_fn should_sleep_fn, void *p,
455 			     unsigned long ip)
456 {
457 	int ret = 0;
458 
459 	if (type == SIX_LOCK_write) {
460 		EBUG_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_write);
461 		atomic_add(SIX_LOCK_HELD_write, &lock->state);
462 		smp_mb__after_atomic();
463 	}
464 
465 	if (six_optimistic_spin(lock, type))
466 		goto out;
467 
468 	lock_contended(&lock->dep_map, ip);
469 
470 	wait->task		= current;
471 	wait->lock_want		= type;
472 	wait->lock_acquired	= false;
473 
474 	raw_spin_lock(&lock->wait_lock);
475 	six_set_bitmask(lock, SIX_LOCK_WAITING_read << type);
476 	/*
477 	 * Retry taking the lock after taking waitlist lock, in case we raced
478 	 * with an unlock:
479 	 */
480 	ret = __do_six_trylock(lock, type, current, false);
481 	if (ret <= 0) {
482 		wait->start_time = local_clock();
483 
484 		if (!list_empty(&lock->wait_list)) {
485 			struct six_lock_waiter *last =
486 				list_last_entry(&lock->wait_list,
487 					struct six_lock_waiter, list);
488 
489 			if (time_before_eq64(wait->start_time, last->start_time))
490 				wait->start_time = last->start_time + 1;
491 		}
492 
493 		list_add_tail(&wait->list, &lock->wait_list);
494 	}
495 	raw_spin_unlock(&lock->wait_lock);
496 
497 	if (unlikely(ret > 0)) {
498 		ret = 0;
499 		goto out;
500 	}
501 
502 	if (unlikely(ret < 0)) {
503 		__six_lock_wakeup(lock, -ret - 1);
504 		ret = 0;
505 	}
506 
507 	while (1) {
508 		set_current_state(TASK_UNINTERRUPTIBLE);
509 
510 		/*
511 		 * Ensures that writes to the waitlist entry happen after we see
512 		 * wait->lock_acquired: pairs with the smp_store_release in
513 		 * __six_lock_wakeup
514 		 */
515 		if (smp_load_acquire(&wait->lock_acquired))
516 			break;
517 
518 		ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
519 		if (unlikely(ret)) {
520 			bool acquired;
521 
522 			/*
523 			 * If should_sleep_fn() returns an error, we are
524 			 * required to return that error even if we already
525 			 * acquired the lock - should_sleep_fn() might have
526 			 * modified external state (e.g. when the deadlock cycle
527 			 * detector in bcachefs issued a transaction restart)
528 			 */
529 			raw_spin_lock(&lock->wait_lock);
530 			acquired = wait->lock_acquired;
531 			if (!acquired)
532 				list_del(&wait->list);
533 			raw_spin_unlock(&lock->wait_lock);
534 
535 			if (unlikely(acquired))
536 				do_six_unlock_type(lock, type);
537 			break;
538 		}
539 
540 		schedule();
541 	}
542 
543 	__set_current_state(TASK_RUNNING);
544 out:
545 	if (ret && type == SIX_LOCK_write) {
546 		six_clear_bitmask(lock, SIX_LOCK_HELD_write);
547 		six_lock_wakeup(lock, atomic_read(&lock->state), SIX_LOCK_read);
548 	}
549 
550 	return ret;
551 }
552 
553 /**
554  * six_lock_ip_waiter - take a lock, with full waitlist interface
555  * @lock:	lock to take
556  * @type:	SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
557  * @wait:	pointer to wait object, which will be added to lock's waitlist
558  * @should_sleep_fn: callback run after adding to waitlist, immediately prior
559  *		to scheduling
560  * @p:		passed through to @should_sleep_fn
561  * @ip:		ip parameter for lockdep/lockstat, i.e. _THIS_IP_
562  *
563  * This is the most general six_lock() variant, with parameters to support full
564  * cycle detection for deadlock avoidance.
565  *
566  * The code calling this function must implement tracking of held locks, and the
567  * @wait object should be embedded into the struct that tracks held locks -
568  * which must also be accessible in a thread-safe way.
569  *
570  * @should_sleep_fn should invoke the cycle detector; it should walk each
571  * lock's waiters, and for each waiter recursively walk their held locks.
572  *
573  * When this function must block, @wait will be added to @lock's waitlist before
574  * calling trylock, and before calling @should_sleep_fn, and @wait will not be
575  * removed from the lock waitlist until the lock has been successfully acquired,
576  * or we abort.
577  *
578  * @wait.start_time will be monotonically increasing for any given waitlist, and
579  * thus may be used as a loop cursor.
580  *
581  * Return: 0 on success, or the return code from @should_sleep_fn on failure.
582  */
583 int six_lock_ip_waiter(struct six_lock *lock, enum six_lock_type type,
584 		       struct six_lock_waiter *wait,
585 		       six_lock_should_sleep_fn should_sleep_fn, void *p,
586 		       unsigned long ip)
587 {
588 	int ret;
589 
590 	wait->start_time = 0;
591 
592 	if (type != SIX_LOCK_write)
593 		six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, ip);
594 
595 	ret = do_six_trylock(lock, type, true) ? 0
596 		: six_lock_slowpath(lock, type, wait, should_sleep_fn, p, ip);
597 
598 	if (ret && type != SIX_LOCK_write)
599 		six_release(&lock->dep_map, ip);
600 	if (!ret)
601 		lock_acquired(&lock->dep_map, ip);
602 
603 	return ret;
604 }
605 EXPORT_SYMBOL_GPL(six_lock_ip_waiter);
606 
607 __always_inline
608 static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type)
609 {
610 	u32 state;
611 
612 	if (type == SIX_LOCK_intent)
613 		lock->owner = NULL;
614 
615 	if (type == SIX_LOCK_read &&
616 	    lock->readers) {
617 		smp_mb(); /* unlock barrier */
618 		this_cpu_dec(*lock->readers);
619 		smp_mb(); /* between unlocking and checking for waiters */
620 		state = atomic_read(&lock->state);
621 	} else {
622 		u32 v = l[type].lock_val;
623 
624 		if (type != SIX_LOCK_read)
625 			v += atomic_read(&lock->state) & SIX_LOCK_NOSPIN;
626 
627 		EBUG_ON(!(atomic_read(&lock->state) & l[type].held_mask));
628 		state = atomic_sub_return_release(v, &lock->state);
629 	}
630 
631 	six_lock_wakeup(lock, state, l[type].unlock_wakeup);
632 }
633 
634 /**
635  * six_unlock_ip - drop a six lock
636  * @lock:	lock to unlock
637  * @type:	SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
638  * @ip:		ip parameter for lockdep/lockstat, i.e. _THIS_IP_
639  *
640  * When a lock is held multiple times (because six_lock_incement()) was used),
641  * this decrements the 'lock held' counter by one.
642  *
643  * For example:
644  * six_lock_read(&foo->lock);				read count 1
645  * six_lock_increment(&foo->lock, SIX_LOCK_read);	read count 2
646  * six_lock_unlock(&foo->lock, SIX_LOCK_read);		read count 1
647  * six_lock_unlock(&foo->lock, SIX_LOCK_read);		read count 0
648  */
649 void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip)
650 {
651 	EBUG_ON(type == SIX_LOCK_write &&
652 		!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent));
653 	EBUG_ON((type == SIX_LOCK_write ||
654 		 type == SIX_LOCK_intent) &&
655 		lock->owner != current);
656 
657 	if (type != SIX_LOCK_write)
658 		six_release(&lock->dep_map, ip);
659 	else
660 		lock->seq++;
661 
662 	if (type == SIX_LOCK_intent &&
663 	    lock->intent_lock_recurse) {
664 		--lock->intent_lock_recurse;
665 		return;
666 	}
667 
668 	do_six_unlock_type(lock, type);
669 }
670 EXPORT_SYMBOL_GPL(six_unlock_ip);
671 
672 /**
673  * six_lock_downgrade - convert an intent lock to a read lock
674  * @lock:	lock to dowgrade
675  *
676  * @lock will have read count incremented and intent count decremented
677  */
678 void six_lock_downgrade(struct six_lock *lock)
679 {
680 	six_lock_increment(lock, SIX_LOCK_read);
681 	six_unlock_intent(lock);
682 }
683 EXPORT_SYMBOL_GPL(six_lock_downgrade);
684 
685 /**
686  * six_lock_tryupgrade - attempt to convert read lock to an intent lock
687  * @lock:	lock to upgrade
688  *
689  * On success, @lock will have intent count incremented and read count
690  * decremented
691  *
692  * Return: true on success, false on failure
693  */
694 bool six_lock_tryupgrade(struct six_lock *lock)
695 {
696 	u32 old = atomic_read(&lock->state), new;
697 
698 	do {
699 		new = old;
700 
701 		if (new & SIX_LOCK_HELD_intent)
702 			return false;
703 
704 		if (!lock->readers) {
705 			EBUG_ON(!(new & SIX_LOCK_HELD_read));
706 			new -= l[SIX_LOCK_read].lock_val;
707 		}
708 
709 		new |= SIX_LOCK_HELD_intent;
710 	} while (!atomic_try_cmpxchg_acquire(&lock->state, &old, new));
711 
712 	if (lock->readers)
713 		this_cpu_dec(*lock->readers);
714 
715 	six_set_owner(lock, SIX_LOCK_intent, old, current);
716 
717 	return true;
718 }
719 EXPORT_SYMBOL_GPL(six_lock_tryupgrade);
720 
721 /**
722  * six_trylock_convert - attempt to convert a held lock from one type to another
723  * @lock:	lock to upgrade
724  * @from:	SIX_LOCK_read or SIX_LOCK_intent
725  * @to:		SIX_LOCK_read or SIX_LOCK_intent
726  *
727  * On success, @lock will have intent count incremented and read count
728  * decremented
729  *
730  * Return: true on success, false on failure
731  */
732 bool six_trylock_convert(struct six_lock *lock,
733 			 enum six_lock_type from,
734 			 enum six_lock_type to)
735 {
736 	EBUG_ON(to == SIX_LOCK_write || from == SIX_LOCK_write);
737 
738 	if (to == from)
739 		return true;
740 
741 	if (to == SIX_LOCK_read) {
742 		six_lock_downgrade(lock);
743 		return true;
744 	} else {
745 		return six_lock_tryupgrade(lock);
746 	}
747 }
748 EXPORT_SYMBOL_GPL(six_trylock_convert);
749 
750 /**
751  * six_lock_increment - increase held lock count on a lock that is already held
752  * @lock:	lock to increment
753  * @type:	SIX_LOCK_read or SIX_LOCK_intent
754  *
755  * @lock must already be held, with a lock type that is greater than or equal to
756  * @type
757  *
758  * A corresponding six_unlock_type() call will be required for @lock to be fully
759  * unlocked.
760  */
761 void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
762 {
763 	six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, _RET_IP_);
764 
765 	/* XXX: assert already locked, and that we don't overflow: */
766 
767 	switch (type) {
768 	case SIX_LOCK_read:
769 		if (lock->readers) {
770 			this_cpu_inc(*lock->readers);
771 		} else {
772 			EBUG_ON(!(atomic_read(&lock->state) &
773 				  (SIX_LOCK_HELD_read|
774 				   SIX_LOCK_HELD_intent)));
775 			atomic_add(l[type].lock_val, &lock->state);
776 		}
777 		break;
778 	case SIX_LOCK_intent:
779 		EBUG_ON(!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent));
780 		lock->intent_lock_recurse++;
781 		break;
782 	case SIX_LOCK_write:
783 		BUG();
784 		break;
785 	}
786 }
787 EXPORT_SYMBOL_GPL(six_lock_increment);
788 
789 /**
790  * six_lock_wakeup_all - wake up all waiters on @lock
791  * @lock:	lock to wake up waiters for
792  *
793  * Wakeing up waiters will cause them to re-run should_sleep_fn, which may then
794  * abort the lock operation.
795  *
796  * This function is never needed in a bug-free program; it's only useful in
797  * debug code, e.g. to determine if a cycle detector is at fault.
798  */
799 void six_lock_wakeup_all(struct six_lock *lock)
800 {
801 	u32 state = atomic_read(&lock->state);
802 	struct six_lock_waiter *w;
803 
804 	six_lock_wakeup(lock, state, SIX_LOCK_read);
805 	six_lock_wakeup(lock, state, SIX_LOCK_intent);
806 	six_lock_wakeup(lock, state, SIX_LOCK_write);
807 
808 	raw_spin_lock(&lock->wait_lock);
809 	list_for_each_entry(w, &lock->wait_list, list)
810 		wake_up_process(w->task);
811 	raw_spin_unlock(&lock->wait_lock);
812 }
813 EXPORT_SYMBOL_GPL(six_lock_wakeup_all);
814 
815 /**
816  * six_lock_counts - return held lock counts, for each lock type
817  * @lock:	lock to return counters for
818  *
819  * Return: the number of times a lock is held for read, intent and write.
820  */
821 struct six_lock_count six_lock_counts(struct six_lock *lock)
822 {
823 	struct six_lock_count ret;
824 
825 	ret.n[SIX_LOCK_read]	= !lock->readers
826 		? atomic_read(&lock->state) & SIX_LOCK_HELD_read
827 		: pcpu_read_count(lock);
828 	ret.n[SIX_LOCK_intent]	= !!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent) +
829 		lock->intent_lock_recurse;
830 	ret.n[SIX_LOCK_write]	= !!(atomic_read(&lock->state) & SIX_LOCK_HELD_write);
831 
832 	return ret;
833 }
834 EXPORT_SYMBOL_GPL(six_lock_counts);
835 
836 /**
837  * six_lock_readers_add - directly manipulate reader count of a lock
838  * @lock:	lock to add/subtract readers for
839  * @nr:		reader count to add/subtract
840  *
841  * When an upper layer is implementing lock reentrency, we may have both read
842  * and intent locks on the same lock.
843  *
844  * When we need to take a write lock, the read locks will cause self-deadlock,
845  * because six locks themselves do not track which read locks are held by the
846  * current thread and which are held by a different thread - it does no
847  * per-thread tracking of held locks.
848  *
849  * The upper layer that is tracking held locks may however, if trylock() has
850  * failed, count up its own read locks, subtract them, take the write lock, and
851  * then re-add them.
852  *
853  * As in any other situation when taking a write lock, @lock must be held for
854  * intent one (or more) times, so @lock will never be left unlocked.
855  */
856 void six_lock_readers_add(struct six_lock *lock, int nr)
857 {
858 	if (lock->readers) {
859 		this_cpu_add(*lock->readers, nr);
860 	} else {
861 		EBUG_ON((int) (atomic_read(&lock->state) & SIX_LOCK_HELD_read) + nr < 0);
862 		/* reader count starts at bit 0 */
863 		atomic_add(nr, &lock->state);
864 	}
865 }
866 EXPORT_SYMBOL_GPL(six_lock_readers_add);
867 
868 /**
869  * six_lock_exit - release resources held by a lock prior to freeing
870  * @lock:	lock to exit
871  *
872  * When a lock was initialized in percpu mode (SIX_OLCK_INIT_PCPU), this is
873  * required to free the percpu read counts.
874  */
875 void six_lock_exit(struct six_lock *lock)
876 {
877 	WARN_ON(lock->readers && pcpu_read_count(lock));
878 	WARN_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_read);
879 
880 	free_percpu(lock->readers);
881 	lock->readers = NULL;
882 }
883 EXPORT_SYMBOL_GPL(six_lock_exit);
884 
885 void __six_lock_init(struct six_lock *lock, const char *name,
886 		     struct lock_class_key *key, enum six_lock_init_flags flags)
887 {
888 	atomic_set(&lock->state, 0);
889 	raw_spin_lock_init(&lock->wait_lock);
890 	INIT_LIST_HEAD(&lock->wait_list);
891 #ifdef CONFIG_DEBUG_LOCK_ALLOC
892 	debug_check_no_locks_freed((void *) lock, sizeof(*lock));
893 	lockdep_init_map(&lock->dep_map, name, key, 0);
894 #endif
895 
896 	/*
897 	 * Don't assume that we have real percpu variables available in
898 	 * userspace:
899 	 */
900 #ifdef __KERNEL__
901 	if (flags & SIX_LOCK_INIT_PCPU) {
902 		/*
903 		 * We don't return an error here on memory allocation failure
904 		 * since percpu is an optimization, and locks will work with the
905 		 * same semantics in non-percpu mode: callers can check for
906 		 * failure if they wish by checking lock->readers, but generally
907 		 * will not want to treat it as an error.
908 		 */
909 		lock->readers = alloc_percpu(unsigned);
910 	}
911 #endif
912 }
913 EXPORT_SYMBOL_GPL(__six_lock_init);
914