xref: /freebsd/sys/kern/subr_turnstile.c (revision 4b3b0413d2c5cb4f2a570fa3bed3c134d5377608)
10384fff8SJason Evans /*-
20384fff8SJason Evans  * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
30384fff8SJason Evans  *
40384fff8SJason Evans  * Redistribution and use in source and binary forms, with or without
50384fff8SJason Evans  * modification, are permitted provided that the following conditions
60384fff8SJason Evans  * are met:
70384fff8SJason Evans  * 1. Redistributions of source code must retain the above copyright
80384fff8SJason Evans  *    notice, this list of conditions and the following disclaimer.
90384fff8SJason Evans  * 2. Redistributions in binary form must reproduce the above copyright
100384fff8SJason Evans  *    notice, this list of conditions and the following disclaimer in the
110384fff8SJason Evans  *    documentation and/or other materials provided with the distribution.
120384fff8SJason Evans  * 3. Berkeley Software Design Inc's name may not be used to endorse or
130384fff8SJason Evans  *    promote products derived from this software without specific prior
140384fff8SJason Evans  *    written permission.
150384fff8SJason Evans  *
160384fff8SJason Evans  * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
170384fff8SJason Evans  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
180384fff8SJason Evans  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
190384fff8SJason Evans  * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
200384fff8SJason Evans  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
210384fff8SJason Evans  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
220384fff8SJason Evans  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
230384fff8SJason Evans  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
240384fff8SJason Evans  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
250384fff8SJason Evans  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
260384fff8SJason Evans  * SUCH DAMAGE.
270384fff8SJason Evans  *
280384fff8SJason Evans  *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
2936412d79SJohn Baldwin  *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
300384fff8SJason Evans  */
310384fff8SJason Evans 
320384fff8SJason Evans /*
33961a7b24SJohn Baldwin  * Implementation of turnstiles used to hold queue of threads blocked on
34961a7b24SJohn Baldwin  * non-sleepable locks.  Sleepable locks use condition variables to
35961a7b24SJohn Baldwin  * implement their queues.  Turnstiles differ from a sleep queue in that
36961a7b24SJohn Baldwin  * turnstile queue's are assigned to a lock held by an owning thread.  Thus,
37961a7b24SJohn Baldwin  * when one thread is enqueued onto a turnstile, it can lend its priority
38961a7b24SJohn Baldwin  * to the owning thread.
39961a7b24SJohn Baldwin  *
40961a7b24SJohn Baldwin  * We wish to avoid bloating locks with an embedded turnstile and we do not
41961a7b24SJohn Baldwin  * want to use back-pointers in the locks for the same reason.  Thus, we
42961a7b24SJohn Baldwin  * use a similar approach to that of Solaris 7 as described in Solaris
43961a7b24SJohn Baldwin  * Internals by Jim Mauro and Richard McDougall.  Turnstiles are looked up
44961a7b24SJohn Baldwin  * in a hash table based on the address of the lock.  Each entry in the
45961a7b24SJohn Baldwin  * hash table is a linked-lists of turnstiles and is called a turnstile
46961a7b24SJohn Baldwin  * chain.  Each chain contains a spin mutex that protects all of the
47961a7b24SJohn Baldwin  * turnstiles in the chain.
48961a7b24SJohn Baldwin  *
49961a7b24SJohn Baldwin  * Each time a thread is created, a turnstile is malloc'd and attached to
50961a7b24SJohn Baldwin  * that thread.  When a thread blocks on a lock, if it is the first thread
51961a7b24SJohn Baldwin  * to block, it lends its turnstile to the lock.  If the lock already has
52961a7b24SJohn Baldwin  * a turnstile, then it gives its turnstile to the lock's turnstile's free
53861a7db5SJohn Baldwin  * list.  When a thread is woken up, it takes a turnstile from the free list
54961a7b24SJohn Baldwin  * if there are any other waiters.  If it is the only thread blocked on the
55961a7b24SJohn Baldwin  * lock, then it reclaims the turnstile associated with the lock and removes
56961a7b24SJohn Baldwin  * it from the hash table.
570384fff8SJason Evans  */
580384fff8SJason Evans 
59677b542eSDavid E. O'Brien #include <sys/cdefs.h>
60677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$");
61677b542eSDavid E. O'Brien 
627aa4f685SJohn Baldwin #include "opt_ddb.h"
637aa4f685SJohn Baldwin #include "opt_turnstile_profiling.h"
647aa4f685SJohn Baldwin 
650384fff8SJason Evans #include <sys/param.h>
666c35e809SDag-Erling Smørgrav #include <sys/systm.h>
6736412d79SJohn Baldwin #include <sys/kernel.h>
686c35e809SDag-Erling Smørgrav #include <sys/ktr.h>
6919284646SJohn Baldwin #include <sys/lock.h>
70fb919e4dSMark Murray #include <sys/malloc.h>
7119284646SJohn Baldwin #include <sys/mutex.h>
720384fff8SJason Evans #include <sys/proc.h>
73961a7b24SJohn Baldwin #include <sys/queue.h>
74b43179fbSJeff Roberson #include <sys/sched.h>
75ef0ebfc3SJohn Baldwin #include <sys/sysctl.h>
76ef0ebfc3SJohn Baldwin #include <sys/turnstile.h>
7736412d79SJohn Baldwin 
787aa4f685SJohn Baldwin #ifdef DDB
797aa4f685SJohn Baldwin #include <ddb/ddb.h>
807aa4f685SJohn Baldwin #endif
817aa4f685SJohn Baldwin 
820cde2e34SJason Evans /*
83961a7b24SJohn Baldwin  * Constants for the hash table of turnstile chains.  TC_SHIFT is a magic
84961a7b24SJohn Baldwin  * number chosen because the sleep queue's use the same value for the
85961a7b24SJohn Baldwin  * shift.  Basically, we ignore the lower 8 bits of the address.
86961a7b24SJohn Baldwin  * TC_TABLESIZE must be a power of two for TC_MASK to work properly.
870cde2e34SJason Evans  */
88961a7b24SJohn Baldwin #define	TC_TABLESIZE	128			/* Must be power of 2. */
89961a7b24SJohn Baldwin #define	TC_MASK		(TC_TABLESIZE - 1)
90961a7b24SJohn Baldwin #define	TC_SHIFT	8
91961a7b24SJohn Baldwin #define	TC_HASH(lock)	(((uintptr_t)(lock) >> TC_SHIFT) & TC_MASK)
92961a7b24SJohn Baldwin #define	TC_LOOKUP(lock)	&turnstile_chains[TC_HASH(lock)]
939ed346baSBosko Milekic 
940cde2e34SJason Evans /*
95961a7b24SJohn Baldwin  * There are three different lists of turnstiles as follows.  The list
96961a7b24SJohn Baldwin  * connected by ts_link entries is a per-thread list of all the turnstiles
97961a7b24SJohn Baldwin  * attached to locks that we own.  This is used to fixup our priority when
98961a7b24SJohn Baldwin  * a lock is released.  The other two lists use the ts_hash entries.  The
995b7de7e1SJohn Baldwin  * first of these two is the turnstile chain list that a turnstile is on
1005b7de7e1SJohn Baldwin  * when it is attached to a lock.  The second list to use ts_hash is the
1015b7de7e1SJohn Baldwin  * free list hung off of a turnstile that is attached to a lock.
102961a7b24SJohn Baldwin  *
1037aa4f685SJohn Baldwin  * Each turnstile contains three lists of threads.  The two ts_blocked lists
1047aa4f685SJohn Baldwin  * are linked list of threads blocked on the turnstile's lock.  One list is
1057aa4f685SJohn Baldwin  * for exclusive waiters, and the other is for shared waiters.  The
106595bc82aSJohn Baldwin  * ts_pending list is a linked list of threads previously awakened by
107961a7b24SJohn Baldwin  * turnstile_signal() or turnstile_wait() that are waiting to be put on
108961a7b24SJohn Baldwin  * the run queue.
109961a7b24SJohn Baldwin  *
110961a7b24SJohn Baldwin  * Locking key:
111961a7b24SJohn Baldwin  *  c - turnstile chain lock
112961a7b24SJohn Baldwin  *  q - td_contested lock
1130cde2e34SJason Evans  */
114961a7b24SJohn Baldwin struct turnstile {
1157aa4f685SJohn Baldwin 	struct threadqueue ts_blocked[2];	/* (c + q) Blocked threads. */
1167aa4f685SJohn Baldwin 	struct threadqueue ts_pending;		/* (c) Pending threads. */
117961a7b24SJohn Baldwin 	LIST_ENTRY(turnstile) ts_hash;		/* (c) Chain and free list. */
118961a7b24SJohn Baldwin 	LIST_ENTRY(turnstile) ts_link;		/* (q) Contested locks. */
119961a7b24SJohn Baldwin 	LIST_HEAD(, turnstile) ts_free;		/* (c) Free turnstiles. */
120961a7b24SJohn Baldwin 	struct lock_object *ts_lockobj;		/* (c) Lock we reference. */
12179a13d01SJohn Baldwin 	struct thread *ts_owner;		/* (c + q) Who owns the lock. */
1228484de75SJohn Baldwin };
1238484de75SJohn Baldwin 
124961a7b24SJohn Baldwin struct turnstile_chain {
125961a7b24SJohn Baldwin 	LIST_HEAD(, turnstile) tc_turnstiles;	/* List of turnstiles. */
126961a7b24SJohn Baldwin 	struct mtx tc_lock;			/* Spin lock for this chain. */
127ef0ebfc3SJohn Baldwin #ifdef TURNSTILE_PROFILING
128ef0ebfc3SJohn Baldwin 	u_int	tc_depth;			/* Length of tc_queues. */
129ef0ebfc3SJohn Baldwin 	u_int	tc_max_depth;			/* Max length of tc_queues. */
130ef0ebfc3SJohn Baldwin #endif
131961a7b24SJohn Baldwin };
132961a7b24SJohn Baldwin 
133ef0ebfc3SJohn Baldwin #ifdef TURNSTILE_PROFILING
134ef0ebfc3SJohn Baldwin u_int turnstile_max_depth;
135ef0ebfc3SJohn Baldwin SYSCTL_NODE(_debug, OID_AUTO, turnstile, CTLFLAG_RD, 0, "turnstile profiling");
136ef0ebfc3SJohn Baldwin SYSCTL_NODE(_debug_turnstile, OID_AUTO, chains, CTLFLAG_RD, 0,
137ef0ebfc3SJohn Baldwin     "turnstile chain stats");
138ef0ebfc3SJohn Baldwin SYSCTL_UINT(_debug_turnstile, OID_AUTO, max_depth, CTLFLAG_RD,
139ef0ebfc3SJohn Baldwin     &turnstile_max_depth, 0, "maxmimum depth achieved of a single chain");
140ef0ebfc3SJohn Baldwin #endif
141961a7b24SJohn Baldwin static struct mtx td_contested_lock;
142961a7b24SJohn Baldwin static struct turnstile_chain turnstile_chains[TC_TABLESIZE];
143961a7b24SJohn Baldwin 
144c711aea6SPoul-Henning Kamp static MALLOC_DEFINE(M_TURNSTILE, "turnstiles", "turnstiles");
145c53c013bSJohn Baldwin 
146c53c013bSJohn Baldwin /*
1479ed346baSBosko Milekic  * Prototypes for non-exported routines.
1489ed346baSBosko Milekic  */
149961a7b24SJohn Baldwin static void	init_turnstile0(void *dummy);
15001bd10e1SJohn Baldwin #ifdef TURNSTILE_PROFILING
15101bd10e1SJohn Baldwin static void	init_turnstile_profiling(void *arg);
15201bd10e1SJohn Baldwin #endif
153f5c157d9SJohn Baldwin static void	propagate_priority(struct thread *td);
154f5c157d9SJohn Baldwin static int	turnstile_adjust_thread(struct turnstile *ts,
155f5c157d9SJohn Baldwin 		    struct thread *td);
1567aa4f685SJohn Baldwin static struct thread *turnstile_first_waiter(struct turnstile *ts);
157961a7b24SJohn Baldwin static void	turnstile_setowner(struct turnstile *ts, struct thread *owner);
15836412d79SJohn Baldwin 
159961a7b24SJohn Baldwin /*
160961a7b24SJohn Baldwin  * Walks the chain of turnstiles and their owners to propagate the priority
161961a7b24SJohn Baldwin  * of the thread being blocked to all the threads holding locks that have to
162961a7b24SJohn Baldwin  * release their locks before this thread can run again.
163961a7b24SJohn Baldwin  */
16436412d79SJohn Baldwin static void
165b40ce416SJulian Elischer propagate_priority(struct thread *td)
16636412d79SJohn Baldwin {
167961a7b24SJohn Baldwin 	struct turnstile_chain *tc;
168961a7b24SJohn Baldwin 	struct turnstile *ts;
169961a7b24SJohn Baldwin 	int pri;
17036412d79SJohn Baldwin 
1711bd0eefbSJohn Baldwin 	mtx_assert(&sched_lock, MA_OWNED);
172961a7b24SJohn Baldwin 	pri = td->td_priority;
173961a7b24SJohn Baldwin 	ts = td->td_blocked;
17436412d79SJohn Baldwin 	for (;;) {
175961a7b24SJohn Baldwin 		td = ts->ts_owner;
17636412d79SJohn Baldwin 
177b40ce416SJulian Elischer 		if (td == NULL) {
17836412d79SJohn Baldwin 			/*
1797aa4f685SJohn Baldwin 			 * This might be a read lock with no owner.  There's
1807aa4f685SJohn Baldwin 			 * not much we can do, so just bail.
18136412d79SJohn Baldwin 			 */
18236412d79SJohn Baldwin 			return;
18336412d79SJohn Baldwin 		}
1849ed346baSBosko Milekic 
185e602ba25SJulian Elischer 		MPASS(td->td_proc != NULL);
186b40ce416SJulian Elischer 		MPASS(td->td_proc->p_magic == P_MAGIC);
1871bd0eefbSJohn Baldwin 
188961a7b24SJohn Baldwin 		/*
1894b3b0413SJohn Baldwin 		 * If the thread is asleep, then we are probably about
1904b3b0413SJohn Baldwin 		 * to deadlock.  To make debugging this easier, just
1914b3b0413SJohn Baldwin 		 * panic and tell the user which thread misbehaved so
1924b3b0413SJohn Baldwin 		 * they can hopefully get a stack trace from the truly
1934b3b0413SJohn Baldwin 		 * misbehaving thread.
194961a7b24SJohn Baldwin 		 */
1954b3b0413SJohn Baldwin 		if (TD_IS_SLEEPING(td)) {
1964b3b0413SJohn Baldwin 			printf(
1974b3b0413SJohn Baldwin 		"Sleeping thread (tid %d, pid %d) owns a non-sleepable lock\n",
1984b3b0413SJohn Baldwin 			    td->td_tid, td->td_proc->p_pid);
1994b3b0413SJohn Baldwin #ifdef DDB
2004b3b0413SJohn Baldwin 			db_trace_thread(td, -1);
2014b3b0413SJohn Baldwin #endif
2024b3b0413SJohn Baldwin 			panic("sleeping thread");
2034b3b0413SJohn Baldwin 		}
204961a7b24SJohn Baldwin 
205961a7b24SJohn Baldwin 		/*
206961a7b24SJohn Baldwin 		 * If this thread already has higher priority than the
207961a7b24SJohn Baldwin 		 * thread that is being blocked, we are finished.
208961a7b24SJohn Baldwin 		 */
209961a7b24SJohn Baldwin 		if (td->td_priority <= pri)
210961a7b24SJohn Baldwin 			return;
2111bd0eefbSJohn Baldwin 
21236412d79SJohn Baldwin 		/*
213f5c157d9SJohn Baldwin 		 * Bump this thread's priority.
21436412d79SJohn Baldwin 		 */
215f5c157d9SJohn Baldwin 		sched_lend_prio(td, pri);
216f5c157d9SJohn Baldwin 
217f5c157d9SJohn Baldwin 		/*
218f5c157d9SJohn Baldwin 		 * If lock holder is actually running or on the run queue
219f5c157d9SJohn Baldwin 		 * then we are done.
220f5c157d9SJohn Baldwin 		 */
221f5c157d9SJohn Baldwin 		if (TD_IS_RUNNING(td) || TD_ON_RUNQ(td)) {
222f5c157d9SJohn Baldwin 			MPASS(td->td_blocked == NULL);
22336412d79SJohn Baldwin 			return;
22436412d79SJohn Baldwin 		}
225d5a08a60SJake Burkholder 
2261b43703bSJohn Baldwin #ifndef SMP
2271b43703bSJohn Baldwin 		/*
228b40ce416SJulian Elischer 		 * For UP, we check to see if td is curthread (this shouldn't
2291b43703bSJohn Baldwin 		 * ever happen however as it would mean we are in a deadlock.)
2301b43703bSJohn Baldwin 		 */
231b40ce416SJulian Elischer 		KASSERT(td != curthread, ("Deadlock detected"));
2321b43703bSJohn Baldwin #endif
2331b43703bSJohn Baldwin 
23436412d79SJohn Baldwin 		/*
235961a7b24SJohn Baldwin 		 * If we aren't blocked on a lock, we should be.
23636412d79SJohn Baldwin 		 */
237551cf4e1SJohn Baldwin 		KASSERT(TD_ON_LOCK(td), (
238f5c157d9SJohn Baldwin 		    "thread %d(%s):%d holds %s but isn't blocked on a lock\n",
239f5c157d9SJohn Baldwin 		    td->td_tid, td->td_proc->p_comm, td->td_state,
240961a7b24SJohn Baldwin 		    ts->ts_lockobj->lo_name));
24136412d79SJohn Baldwin 
24236412d79SJohn Baldwin 		/*
243961a7b24SJohn Baldwin 		 * Pick up the lock that td is blocked on.
24436412d79SJohn Baldwin 		 */
245961a7b24SJohn Baldwin 		ts = td->td_blocked;
246961a7b24SJohn Baldwin 		MPASS(ts != NULL);
247961a7b24SJohn Baldwin 		tc = TC_LOOKUP(ts->ts_lockobj);
248961a7b24SJohn Baldwin 		mtx_lock_spin(&tc->tc_lock);
24936412d79SJohn Baldwin 
250f5c157d9SJohn Baldwin 		/* Resort td on the list if needed. */
251f5c157d9SJohn Baldwin 		if (!turnstile_adjust_thread(ts, td)) {
252f5c157d9SJohn Baldwin 			mtx_unlock_spin(&tc->tc_lock);
253f5c157d9SJohn Baldwin 			return;
254f5c157d9SJohn Baldwin 		}
255f5c157d9SJohn Baldwin 		mtx_unlock_spin(&tc->tc_lock);
256f5c157d9SJohn Baldwin 	}
257f5c157d9SJohn Baldwin }
258f5c157d9SJohn Baldwin 
259f5c157d9SJohn Baldwin /*
260f5c157d9SJohn Baldwin  * Adjust the thread's position on a turnstile after its priority has been
261f5c157d9SJohn Baldwin  * changed.
262f5c157d9SJohn Baldwin  */
263f5c157d9SJohn Baldwin static int
264f5c157d9SJohn Baldwin turnstile_adjust_thread(struct turnstile *ts, struct thread *td)
265f5c157d9SJohn Baldwin {
266f5c157d9SJohn Baldwin 	struct turnstile_chain *tc;
267f5c157d9SJohn Baldwin 	struct thread *td1, *td2;
2687aa4f685SJohn Baldwin 	int queue;
269f5c157d9SJohn Baldwin 
270f5c157d9SJohn Baldwin 	mtx_assert(&sched_lock, MA_OWNED);
271f5c157d9SJohn Baldwin 	MPASS(TD_ON_LOCK(td));
272f5c157d9SJohn Baldwin 
27336412d79SJohn Baldwin 	/*
2746b6bd95eSJohn Baldwin 	 * This thread may not be blocked on this turnstile anymore
2756b6bd95eSJohn Baldwin 	 * but instead might already be woken up on another CPU
2766b6bd95eSJohn Baldwin 	 * that is waiting on sched_lock in turnstile_unpend() to
2776b6bd95eSJohn Baldwin 	 * finish waking this thread up.  We can detect this case
2786b6bd95eSJohn Baldwin 	 * by checking to see if this thread has been given a
2796b6bd95eSJohn Baldwin 	 * turnstile by either turnstile_signal() or
280ef2c0ba7SJohn Baldwin 	 * turnstile_broadcast().  In this case, treat the thread as
2816b6bd95eSJohn Baldwin 	 * if it was already running.
28279a13d01SJohn Baldwin 	 */
283f5c157d9SJohn Baldwin 	if (td->td_turnstile != NULL)
284f5c157d9SJohn Baldwin 		return (0);
28579a13d01SJohn Baldwin 
28679a13d01SJohn Baldwin 	/*
287f5c157d9SJohn Baldwin 	 * Check if the thread needs to be moved on the blocked chain.
288f5c157d9SJohn Baldwin 	 * It needs to be moved if either its priority is lower than
289f5c157d9SJohn Baldwin 	 * the previous thread or higher than the next thread.
29036412d79SJohn Baldwin 	 */
291f5c157d9SJohn Baldwin 	tc = TC_LOOKUP(ts->ts_lockobj);
292f5c157d9SJohn Baldwin 	mtx_assert(&tc->tc_lock, MA_OWNED);
293551cf4e1SJohn Baldwin 	td1 = TAILQ_PREV(td, threadqueue, td_lockq);
294f5c157d9SJohn Baldwin 	td2 = TAILQ_NEXT(td, td_lockq);
295f5c157d9SJohn Baldwin 	if ((td1 != NULL && td->td_priority < td1->td_priority) ||
296f5c157d9SJohn Baldwin 	    (td2 != NULL && td->td_priority > td2->td_priority)) {
29736412d79SJohn Baldwin 
29836412d79SJohn Baldwin 		/*
299b40ce416SJulian Elischer 		 * Remove thread from blocked chain and determine where
300f5c157d9SJohn Baldwin 		 * it should be moved to.
30136412d79SJohn Baldwin 		 */
3027aa4f685SJohn Baldwin 		queue = td->td_tsqueue;
3037aa4f685SJohn Baldwin 		MPASS(queue == TS_EXCLUSIVE_QUEUE || queue == TS_SHARED_QUEUE);
304961a7b24SJohn Baldwin 		mtx_lock_spin(&td_contested_lock);
3057aa4f685SJohn Baldwin 		TAILQ_REMOVE(&ts->ts_blocked[queue], td, td_lockq);
3067aa4f685SJohn Baldwin 		TAILQ_FOREACH(td1, &ts->ts_blocked[queue], td_lockq) {
307b40ce416SJulian Elischer 			MPASS(td1->td_proc->p_magic == P_MAGIC);
308f5c157d9SJohn Baldwin 			if (td1->td_priority > td->td_priority)
30936412d79SJohn Baldwin 				break;
31036412d79SJohn Baldwin 		}
3119ed346baSBosko Milekic 
312f5c157d9SJohn Baldwin 		if (td1 == NULL)
3137aa4f685SJohn Baldwin 			TAILQ_INSERT_TAIL(&ts->ts_blocked[queue], td, td_lockq);
314f5c157d9SJohn Baldwin 		else
315551cf4e1SJohn Baldwin 			TAILQ_INSERT_BEFORE(td1, td, td_lockq);
316961a7b24SJohn Baldwin 		mtx_unlock_spin(&td_contested_lock);
317f5c157d9SJohn Baldwin 		if (td1 == NULL)
318f5c157d9SJohn Baldwin 			CTR3(KTR_LOCK,
319f5c157d9SJohn Baldwin 		    "turnstile_adjust_thread: td %d put at tail on [%p] %s",
320f5c157d9SJohn Baldwin 			    td->td_tid, ts->ts_lockobj, ts->ts_lockobj->lo_name);
321f5c157d9SJohn Baldwin 		else
32236412d79SJohn Baldwin 			CTR4(KTR_LOCK,
323f5c157d9SJohn Baldwin 		    "turnstile_adjust_thread: td %d moved before %d on [%p] %s",
324f5c157d9SJohn Baldwin 			    td->td_tid, td1->td_tid, ts->ts_lockobj,
325f5c157d9SJohn Baldwin 			    ts->ts_lockobj->lo_name);
32636412d79SJohn Baldwin 	}
327f5c157d9SJohn Baldwin 	return (1);
32836412d79SJohn Baldwin }
32936412d79SJohn Baldwin 
3306c35e809SDag-Erling Smørgrav /*
331961a7b24SJohn Baldwin  * Early initialization of turnstiles.  This is not done via a SYSINIT()
332961a7b24SJohn Baldwin  * since this needs to be initialized very early when mutexes are first
333961a7b24SJohn Baldwin  * initialized.
3346283b7d0SJohn Baldwin  */
3356283b7d0SJohn Baldwin void
336961a7b24SJohn Baldwin init_turnstiles(void)
3376283b7d0SJohn Baldwin {
338961a7b24SJohn Baldwin 	int i;
3396283b7d0SJohn Baldwin 
340961a7b24SJohn Baldwin 	for (i = 0; i < TC_TABLESIZE; i++) {
341961a7b24SJohn Baldwin 		LIST_INIT(&turnstile_chains[i].tc_turnstiles);
342961a7b24SJohn Baldwin 		mtx_init(&turnstile_chains[i].tc_lock, "turnstile chain",
343961a7b24SJohn Baldwin 		    NULL, MTX_SPIN);
34401bd10e1SJohn Baldwin 	}
34501bd10e1SJohn Baldwin 	mtx_init(&td_contested_lock, "td_contested", NULL, MTX_SPIN);
346550d1c93SJohn Baldwin 	LIST_INIT(&thread0.td_contested);
34701bd10e1SJohn Baldwin 	thread0.td_turnstile = NULL;
34801bd10e1SJohn Baldwin }
34901bd10e1SJohn Baldwin 
350ef0ebfc3SJohn Baldwin #ifdef TURNSTILE_PROFILING
35101bd10e1SJohn Baldwin static void
35201bd10e1SJohn Baldwin init_turnstile_profiling(void *arg)
35301bd10e1SJohn Baldwin {
35401bd10e1SJohn Baldwin 	struct sysctl_oid *chain_oid;
35501bd10e1SJohn Baldwin 	char chain_name[10];
35601bd10e1SJohn Baldwin 	int i;
35701bd10e1SJohn Baldwin 
35801bd10e1SJohn Baldwin 	for (i = 0; i < TC_TABLESIZE; i++) {
359ef0ebfc3SJohn Baldwin 		snprintf(chain_name, sizeof(chain_name), "%d", i);
360ef0ebfc3SJohn Baldwin 		chain_oid = SYSCTL_ADD_NODE(NULL,
361ef0ebfc3SJohn Baldwin 		    SYSCTL_STATIC_CHILDREN(_debug_turnstile_chains), OID_AUTO,
362ef0ebfc3SJohn Baldwin 		    chain_name, CTLFLAG_RD, NULL, "turnstile chain stats");
363ef0ebfc3SJohn Baldwin 		SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
364ef0ebfc3SJohn Baldwin 		    "depth", CTLFLAG_RD, &turnstile_chains[i].tc_depth, 0,
365ef0ebfc3SJohn Baldwin 		    NULL);
366ef0ebfc3SJohn Baldwin 		SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
367ef0ebfc3SJohn Baldwin 		    "max_depth", CTLFLAG_RD, &turnstile_chains[i].tc_max_depth,
368ef0ebfc3SJohn Baldwin 		    0, NULL);
36901bd10e1SJohn Baldwin 	}
37001bd10e1SJohn Baldwin }
37101bd10e1SJohn Baldwin SYSINIT(turnstile_profiling, SI_SUB_LOCK, SI_ORDER_ANY,
37201bd10e1SJohn Baldwin     init_turnstile_profiling, NULL);
373ef0ebfc3SJohn Baldwin #endif
3746283b7d0SJohn Baldwin 
375961a7b24SJohn Baldwin static void
376961a7b24SJohn Baldwin init_turnstile0(void *dummy)
3776283b7d0SJohn Baldwin {
3786283b7d0SJohn Baldwin 
379961a7b24SJohn Baldwin 	thread0.td_turnstile = turnstile_alloc();
380961a7b24SJohn Baldwin }
381961a7b24SJohn Baldwin SYSINIT(turnstile0, SI_SUB_LOCK, SI_ORDER_ANY, init_turnstile0, NULL);
3826c35e809SDag-Erling Smørgrav 
383961a7b24SJohn Baldwin /*
384f5c157d9SJohn Baldwin  * Update a thread on the turnstile list after it's priority has been changed.
385f5c157d9SJohn Baldwin  * The old priority is passed in as an argument.
386f5c157d9SJohn Baldwin  */
387f5c157d9SJohn Baldwin void
388f5c157d9SJohn Baldwin turnstile_adjust(struct thread *td, u_char oldpri)
389f5c157d9SJohn Baldwin {
390f5c157d9SJohn Baldwin 	struct turnstile_chain *tc;
391f5c157d9SJohn Baldwin 	struct turnstile *ts;
392f5c157d9SJohn Baldwin 
393f5c157d9SJohn Baldwin 	mtx_assert(&sched_lock, MA_OWNED);
394f5c157d9SJohn Baldwin 	MPASS(TD_ON_LOCK(td));
395f5c157d9SJohn Baldwin 
396f5c157d9SJohn Baldwin 	/*
397f5c157d9SJohn Baldwin 	 * Pick up the lock that td is blocked on.
398f5c157d9SJohn Baldwin 	 */
399f5c157d9SJohn Baldwin 	ts = td->td_blocked;
400f5c157d9SJohn Baldwin 	MPASS(ts != NULL);
401f5c157d9SJohn Baldwin 	tc = TC_LOOKUP(ts->ts_lockobj);
402f5c157d9SJohn Baldwin 	mtx_lock_spin(&tc->tc_lock);
403f5c157d9SJohn Baldwin 
404f5c157d9SJohn Baldwin 	/* Resort the turnstile on the list. */
405f5c157d9SJohn Baldwin 	if (!turnstile_adjust_thread(ts, td)) {
406f5c157d9SJohn Baldwin 		mtx_unlock_spin(&tc->tc_lock);
407f5c157d9SJohn Baldwin 		return;
408f5c157d9SJohn Baldwin 	}
409f5c157d9SJohn Baldwin 
410f5c157d9SJohn Baldwin 	/*
411f5c157d9SJohn Baldwin 	 * If our priority was lowered and we are at the head of the
412f5c157d9SJohn Baldwin 	 * turnstile, then propagate our new priority up the chain.
413f5c157d9SJohn Baldwin 	 * Note that we currently don't try to revoke lent priorities
414f5c157d9SJohn Baldwin 	 * when our priority goes up.
415f5c157d9SJohn Baldwin 	 */
4167aa4f685SJohn Baldwin 	MPASS(td->td_tsqueue == TS_EXCLUSIVE_QUEUE ||
4177aa4f685SJohn Baldwin 	    td->td_tsqueue == TS_SHARED_QUEUE);
4187aa4f685SJohn Baldwin 	if (td == TAILQ_FIRST(&ts->ts_blocked[td->td_tsqueue]) &&
4197aa4f685SJohn Baldwin 	    td->td_priority < oldpri) {
420f5c157d9SJohn Baldwin 		mtx_unlock_spin(&tc->tc_lock);
421f5c157d9SJohn Baldwin 		propagate_priority(td);
422f5c157d9SJohn Baldwin 	} else
423f5c157d9SJohn Baldwin 		mtx_unlock_spin(&tc->tc_lock);
424f5c157d9SJohn Baldwin }
425f5c157d9SJohn Baldwin 
426f5c157d9SJohn Baldwin /*
427961a7b24SJohn Baldwin  * Set the owner of the lock this turnstile is attached to.
428961a7b24SJohn Baldwin  */
429961a7b24SJohn Baldwin static void
430961a7b24SJohn Baldwin turnstile_setowner(struct turnstile *ts, struct thread *owner)
431961a7b24SJohn Baldwin {
432961a7b24SJohn Baldwin 
433961a7b24SJohn Baldwin 	mtx_assert(&td_contested_lock, MA_OWNED);
434961a7b24SJohn Baldwin 	MPASS(ts->ts_owner == NULL);
4357aa4f685SJohn Baldwin 
4367aa4f685SJohn Baldwin 	/* A shared lock might not have an owner. */
4377aa4f685SJohn Baldwin 	if (owner == NULL)
4387aa4f685SJohn Baldwin 		return;
4397aa4f685SJohn Baldwin 
4407aa4f685SJohn Baldwin 	MPASS(owner->td_proc->p_magic == P_MAGIC);
441961a7b24SJohn Baldwin 	ts->ts_owner = owner;
442961a7b24SJohn Baldwin 	LIST_INSERT_HEAD(&owner->td_contested, ts, ts_link);
443961a7b24SJohn Baldwin }
444961a7b24SJohn Baldwin 
445961a7b24SJohn Baldwin /*
446961a7b24SJohn Baldwin  * Malloc a turnstile for a new thread, initialize it and return it.
447961a7b24SJohn Baldwin  */
448961a7b24SJohn Baldwin struct turnstile *
449961a7b24SJohn Baldwin turnstile_alloc(void)
450961a7b24SJohn Baldwin {
451961a7b24SJohn Baldwin 	struct turnstile *ts;
452961a7b24SJohn Baldwin 
453961a7b24SJohn Baldwin 	ts = malloc(sizeof(struct turnstile), M_TURNSTILE, M_WAITOK | M_ZERO);
4547aa4f685SJohn Baldwin 	TAILQ_INIT(&ts->ts_blocked[TS_EXCLUSIVE_QUEUE]);
4557aa4f685SJohn Baldwin 	TAILQ_INIT(&ts->ts_blocked[TS_SHARED_QUEUE]);
456961a7b24SJohn Baldwin 	TAILQ_INIT(&ts->ts_pending);
457961a7b24SJohn Baldwin 	LIST_INIT(&ts->ts_free);
458961a7b24SJohn Baldwin 	return (ts);
459961a7b24SJohn Baldwin }
460961a7b24SJohn Baldwin 
461961a7b24SJohn Baldwin /*
462961a7b24SJohn Baldwin  * Free a turnstile when a thread is destroyed.
463961a7b24SJohn Baldwin  */
464961a7b24SJohn Baldwin void
465961a7b24SJohn Baldwin turnstile_free(struct turnstile *ts)
466961a7b24SJohn Baldwin {
467961a7b24SJohn Baldwin 
468961a7b24SJohn Baldwin 	MPASS(ts != NULL);
4697aa4f685SJohn Baldwin 	MPASS(TAILQ_EMPTY(&ts->ts_blocked[TS_EXCLUSIVE_QUEUE]));
4707aa4f685SJohn Baldwin 	MPASS(TAILQ_EMPTY(&ts->ts_blocked[TS_SHARED_QUEUE]));
471961a7b24SJohn Baldwin 	MPASS(TAILQ_EMPTY(&ts->ts_pending));
472961a7b24SJohn Baldwin 	free(ts, M_TURNSTILE);
473961a7b24SJohn Baldwin }
474961a7b24SJohn Baldwin 
475961a7b24SJohn Baldwin /*
4762ff0e645SJohn Baldwin  * Lock the turnstile chain associated with the specified lock.
4772ff0e645SJohn Baldwin  */
4782ff0e645SJohn Baldwin void
4792ff0e645SJohn Baldwin turnstile_lock(struct lock_object *lock)
4802ff0e645SJohn Baldwin {
4812ff0e645SJohn Baldwin 	struct turnstile_chain *tc;
4822ff0e645SJohn Baldwin 
4832ff0e645SJohn Baldwin 	tc = TC_LOOKUP(lock);
4842ff0e645SJohn Baldwin 	mtx_lock_spin(&tc->tc_lock);
4852ff0e645SJohn Baldwin }
4862ff0e645SJohn Baldwin 
4872ff0e645SJohn Baldwin /*
488961a7b24SJohn Baldwin  * Look up the turnstile for a lock in the hash table locking the associated
4892ff0e645SJohn Baldwin  * turnstile chain along the way.  If no turnstile is found in the hash
4902ff0e645SJohn Baldwin  * table, NULL is returned.
491961a7b24SJohn Baldwin  */
492961a7b24SJohn Baldwin struct turnstile *
493961a7b24SJohn Baldwin turnstile_lookup(struct lock_object *lock)
494961a7b24SJohn Baldwin {
495961a7b24SJohn Baldwin 	struct turnstile_chain *tc;
496961a7b24SJohn Baldwin 	struct turnstile *ts;
497961a7b24SJohn Baldwin 
498961a7b24SJohn Baldwin 	tc = TC_LOOKUP(lock);
4992ff0e645SJohn Baldwin 	mtx_assert(&tc->tc_lock, MA_OWNED);
500961a7b24SJohn Baldwin 	LIST_FOREACH(ts, &tc->tc_turnstiles, ts_hash)
501961a7b24SJohn Baldwin 		if (ts->ts_lockobj == lock)
502961a7b24SJohn Baldwin 			return (ts);
503961a7b24SJohn Baldwin 	return (NULL);
504961a7b24SJohn Baldwin }
505961a7b24SJohn Baldwin 
506961a7b24SJohn Baldwin /*
507961a7b24SJohn Baldwin  * Unlock the turnstile chain associated with a given lock.
508961a7b24SJohn Baldwin  */
509961a7b24SJohn Baldwin void
510961a7b24SJohn Baldwin turnstile_release(struct lock_object *lock)
511961a7b24SJohn Baldwin {
512961a7b24SJohn Baldwin 	struct turnstile_chain *tc;
513961a7b24SJohn Baldwin 
514961a7b24SJohn Baldwin 	tc = TC_LOOKUP(lock);
515961a7b24SJohn Baldwin 	mtx_unlock_spin(&tc->tc_lock);
516961a7b24SJohn Baldwin }
517961a7b24SJohn Baldwin 
518961a7b24SJohn Baldwin /*
5197aa4f685SJohn Baldwin  * Return a pointer to the thread waiting on this turnstile with the
5207aa4f685SJohn Baldwin  * most important priority or NULL if the turnstile has no waiters.
5217aa4f685SJohn Baldwin  */
5227aa4f685SJohn Baldwin static struct thread *
5237aa4f685SJohn Baldwin turnstile_first_waiter(struct turnstile *ts)
5247aa4f685SJohn Baldwin {
5257aa4f685SJohn Baldwin 	struct thread *std, *xtd;
5267aa4f685SJohn Baldwin 
5277aa4f685SJohn Baldwin 	std = TAILQ_FIRST(&ts->ts_blocked[TS_SHARED_QUEUE]);
5287aa4f685SJohn Baldwin 	xtd = TAILQ_FIRST(&ts->ts_blocked[TS_EXCLUSIVE_QUEUE]);
5297aa4f685SJohn Baldwin 	if (xtd == NULL || (std != NULL && std->td_priority < xtd->td_priority))
5307aa4f685SJohn Baldwin 		return (std);
5317aa4f685SJohn Baldwin 	return (xtd);
5327aa4f685SJohn Baldwin }
5337aa4f685SJohn Baldwin 
5347aa4f685SJohn Baldwin /*
535961a7b24SJohn Baldwin  * Take ownership of a turnstile and adjust the priority of the new
536961a7b24SJohn Baldwin  * owner appropriately.
537961a7b24SJohn Baldwin  */
538961a7b24SJohn Baldwin void
5392ff0e645SJohn Baldwin turnstile_claim(struct lock_object *lock)
540961a7b24SJohn Baldwin {
541961a7b24SJohn Baldwin 	struct turnstile_chain *tc;
5422ff0e645SJohn Baldwin 	struct turnstile *ts;
543961a7b24SJohn Baldwin 	struct thread *td, *owner;
544961a7b24SJohn Baldwin 
5452ff0e645SJohn Baldwin 	tc = TC_LOOKUP(lock);
546961a7b24SJohn Baldwin 	mtx_assert(&tc->tc_lock, MA_OWNED);
5472ff0e645SJohn Baldwin 	ts = turnstile_lookup(lock);
5482ff0e645SJohn Baldwin 	MPASS(ts != NULL);
549961a7b24SJohn Baldwin 
550961a7b24SJohn Baldwin 	owner = curthread;
551961a7b24SJohn Baldwin 	mtx_lock_spin(&td_contested_lock);
552961a7b24SJohn Baldwin 	turnstile_setowner(ts, owner);
553961a7b24SJohn Baldwin 	mtx_unlock_spin(&td_contested_lock);
554961a7b24SJohn Baldwin 
5557aa4f685SJohn Baldwin 	td = turnstile_first_waiter(ts);
556961a7b24SJohn Baldwin 	MPASS(td != NULL);
557961a7b24SJohn Baldwin 	MPASS(td->td_proc->p_magic == P_MAGIC);
558961a7b24SJohn Baldwin 	mtx_unlock_spin(&tc->tc_lock);
559961a7b24SJohn Baldwin 
560961a7b24SJohn Baldwin 	/*
561961a7b24SJohn Baldwin 	 * Update the priority of the new owner if needed.
562961a7b24SJohn Baldwin 	 */
563961a7b24SJohn Baldwin 	mtx_lock_spin(&sched_lock);
564961a7b24SJohn Baldwin 	if (td->td_priority < owner->td_priority)
565f5c157d9SJohn Baldwin 		sched_lend_prio(owner, td->td_priority);
566961a7b24SJohn Baldwin 	mtx_unlock_spin(&sched_lock);
567961a7b24SJohn Baldwin }
568961a7b24SJohn Baldwin 
569961a7b24SJohn Baldwin /*
5702ff0e645SJohn Baldwin  * Block the current thread on the turnstile assicated with 'lock'.  This
5712ff0e645SJohn Baldwin  * function will context switch and not return until this thread has been
5722ff0e645SJohn Baldwin  * woken back up.  This function must be called with the appropriate
5732ff0e645SJohn Baldwin  * turnstile chain locked and will return with it unlocked.
574961a7b24SJohn Baldwin  */
575961a7b24SJohn Baldwin void
5767aa4f685SJohn Baldwin turnstile_wait(struct lock_object *lock, struct thread *owner, int queue)
577961a7b24SJohn Baldwin {
578961a7b24SJohn Baldwin 	struct turnstile_chain *tc;
5792ff0e645SJohn Baldwin 	struct turnstile *ts;
580961a7b24SJohn Baldwin 	struct thread *td, *td1;
581961a7b24SJohn Baldwin 
582961a7b24SJohn Baldwin 	td = curthread;
583961a7b24SJohn Baldwin 	tc = TC_LOOKUP(lock);
584961a7b24SJohn Baldwin 	mtx_assert(&tc->tc_lock, MA_OWNED);
585961a7b24SJohn Baldwin 	MPASS(td->td_turnstile != NULL);
5867aa4f685SJohn Baldwin 	if (queue == TS_SHARED_QUEUE)
587961a7b24SJohn Baldwin 		MPASS(owner != NULL);
5887aa4f685SJohn Baldwin 	if (owner)
589961a7b24SJohn Baldwin 		MPASS(owner->td_proc->p_magic == P_MAGIC);
5907aa4f685SJohn Baldwin 	MPASS(queue == TS_SHARED_QUEUE || queue == TS_EXCLUSIVE_QUEUE);
591961a7b24SJohn Baldwin 
5922ff0e645SJohn Baldwin 	/* Look up the turnstile associated with the lock 'lock'. */
5932ff0e645SJohn Baldwin 	ts = turnstile_lookup(lock);
5942ff0e645SJohn Baldwin 
5952ff0e645SJohn Baldwin 	/*
5962ff0e645SJohn Baldwin 	 * If the lock does not already have a turnstile, use this thread's
5972ff0e645SJohn Baldwin 	 * turnstile.  Otherwise insert the current thread into the
5982ff0e645SJohn Baldwin 	 * turnstile already in use by this lock.
5992ff0e645SJohn Baldwin 	 */
600961a7b24SJohn Baldwin 	if (ts == NULL) {
601ef0ebfc3SJohn Baldwin #ifdef TURNSTILE_PROFILING
602ef0ebfc3SJohn Baldwin 		tc->tc_depth++;
603ef0ebfc3SJohn Baldwin 		if (tc->tc_depth > tc->tc_max_depth) {
604ef0ebfc3SJohn Baldwin 			tc->tc_max_depth = tc->tc_depth;
605ef0ebfc3SJohn Baldwin 			if (tc->tc_max_depth > turnstile_max_depth)
606ef0ebfc3SJohn Baldwin 				turnstile_max_depth = tc->tc_max_depth;
607ef0ebfc3SJohn Baldwin 		}
608ef0ebfc3SJohn Baldwin #endif
609961a7b24SJohn Baldwin 		ts = td->td_turnstile;
610961a7b24SJohn Baldwin 		LIST_INSERT_HEAD(&tc->tc_turnstiles, ts, ts_hash);
611961a7b24SJohn Baldwin 		KASSERT(TAILQ_EMPTY(&ts->ts_pending),
612961a7b24SJohn Baldwin 		    ("thread's turnstile has pending threads"));
6137aa4f685SJohn Baldwin 		KASSERT(TAILQ_EMPTY(&ts->ts_blocked[TS_EXCLUSIVE_QUEUE]),
6147aa4f685SJohn Baldwin 		    ("thread's turnstile has exclusive waiters"));
6157aa4f685SJohn Baldwin 		KASSERT(TAILQ_EMPTY(&ts->ts_blocked[TS_SHARED_QUEUE]),
6167aa4f685SJohn Baldwin 		    ("thread's turnstile has shared waiters"));
617961a7b24SJohn Baldwin 		KASSERT(LIST_EMPTY(&ts->ts_free),
618961a7b24SJohn Baldwin 		    ("thread's turnstile has a non-empty free list"));
619961a7b24SJohn Baldwin 		KASSERT(ts->ts_lockobj == NULL, ("stale ts_lockobj pointer"));
620961a7b24SJohn Baldwin 		ts->ts_lockobj = lock;
621961a7b24SJohn Baldwin 		mtx_lock_spin(&td_contested_lock);
6227aa4f685SJohn Baldwin 		TAILQ_INSERT_TAIL(&ts->ts_blocked[queue], td, td_lockq);
623961a7b24SJohn Baldwin 		turnstile_setowner(ts, owner);
624961a7b24SJohn Baldwin 		mtx_unlock_spin(&td_contested_lock);
625961a7b24SJohn Baldwin 	} else {
6267aa4f685SJohn Baldwin 		TAILQ_FOREACH(td1, &ts->ts_blocked[queue], td_lockq)
627961a7b24SJohn Baldwin 			if (td1->td_priority > td->td_priority)
6286c35e809SDag-Erling Smørgrav 				break;
629961a7b24SJohn Baldwin 		mtx_lock_spin(&td_contested_lock);
630961a7b24SJohn Baldwin 		if (td1 != NULL)
631961a7b24SJohn Baldwin 			TAILQ_INSERT_BEFORE(td1, td, td_lockq);
632961a7b24SJohn Baldwin 		else
6337aa4f685SJohn Baldwin 			TAILQ_INSERT_TAIL(&ts->ts_blocked[queue], td, td_lockq);
6347aa4f685SJohn Baldwin 		MPASS(owner == ts->ts_owner);
635961a7b24SJohn Baldwin 		mtx_unlock_spin(&td_contested_lock);
636961a7b24SJohn Baldwin 		MPASS(td->td_turnstile != NULL);
637961a7b24SJohn Baldwin 		LIST_INSERT_HEAD(&ts->ts_free, td->td_turnstile, ts_hash);
6386c35e809SDag-Erling Smørgrav 	}
639961a7b24SJohn Baldwin 	td->td_turnstile = NULL;
640961a7b24SJohn Baldwin 	mtx_unlock_spin(&tc->tc_lock);
64136412d79SJohn Baldwin 
6429ed346baSBosko Milekic 	mtx_lock_spin(&sched_lock);
64336412d79SJohn Baldwin 	/*
644961a7b24SJohn Baldwin 	 * Handle race condition where a thread on another CPU that owns
645961a7b24SJohn Baldwin 	 * lock 'lock' could have woken us in between us dropping the
646961a7b24SJohn Baldwin 	 * turnstile chain lock and acquiring the sched_lock.
64736412d79SJohn Baldwin 	 */
648961a7b24SJohn Baldwin 	if (td->td_flags & TDF_TSNOBLOCK) {
649961a7b24SJohn Baldwin 		td->td_flags &= ~TDF_TSNOBLOCK;
6509ed346baSBosko Milekic 		mtx_unlock_spin(&sched_lock);
65136412d79SJohn Baldwin 		return;
65236412d79SJohn Baldwin 	}
6539ed346baSBosko Milekic 
65436412d79SJohn Baldwin #ifdef notyet
65536412d79SJohn Baldwin 	/*
6569ed346baSBosko Milekic 	 * If we're borrowing an interrupted thread's VM context, we
6579ed346baSBosko Milekic 	 * must clean up before going to sleep.
65836412d79SJohn Baldwin 	 */
659b40ce416SJulian Elischer 	if (td->td_ithd != NULL) {
660b40ce416SJulian Elischer 		struct ithd *it = td->td_ithd;
66136412d79SJohn Baldwin 
66236412d79SJohn Baldwin 		if (it->it_interrupted) {
663961a7b24SJohn Baldwin 			if (LOCK_LOG_TEST(lock, 0))
664961a7b24SJohn Baldwin 				CTR3(KTR_LOCK, "%s: %p interrupted %p",
665961a7b24SJohn Baldwin 				    __func__, it, it->it_interrupted);
66636412d79SJohn Baldwin 			intr_thd_fixup(it);
66736412d79SJohn Baldwin 		}
66836412d79SJohn Baldwin 	}
66936412d79SJohn Baldwin #endif
67036412d79SJohn Baldwin 
671961a7b24SJohn Baldwin 	/* Save who we are blocked on and switch. */
6727aa4f685SJohn Baldwin 	td->td_tsqueue = queue;
673961a7b24SJohn Baldwin 	td->td_blocked = ts;
674961a7b24SJohn Baldwin 	td->td_lockname = lock->lo_name;
675551cf4e1SJohn Baldwin 	TD_SET_LOCK(td);
676b40ce416SJulian Elischer 	propagate_priority(td);
6779ed346baSBosko Milekic 
678961a7b24SJohn Baldwin 	if (LOCK_LOG_TEST(lock, 0))
679f5c157d9SJohn Baldwin 		CTR4(KTR_LOCK, "%s: td %d blocked on [%p] %s", __func__,
680f5c157d9SJohn Baldwin 		    td->td_tid, lock, lock->lo_name);
6819ed346baSBosko Milekic 
682bf0acc27SJohn Baldwin 	mi_switch(SW_VOL, NULL);
6839ed346baSBosko Milekic 
684961a7b24SJohn Baldwin 	if (LOCK_LOG_TEST(lock, 0))
685f5c157d9SJohn Baldwin 		CTR4(KTR_LOCK, "%s: td %d free from blocked on [%p] %s",
686f5c157d9SJohn Baldwin 		    __func__, td->td_tid, lock, lock->lo_name);
6879ed346baSBosko Milekic 
6889ed346baSBosko Milekic 	mtx_unlock_spin(&sched_lock);
68936412d79SJohn Baldwin }
6909ed346baSBosko Milekic 
691961a7b24SJohn Baldwin /*
692961a7b24SJohn Baldwin  * Pick the highest priority thread on this turnstile and put it on the
693961a7b24SJohn Baldwin  * pending list.  This must be called with the turnstile chain locked.
694961a7b24SJohn Baldwin  */
695961a7b24SJohn Baldwin int
6967aa4f685SJohn Baldwin turnstile_signal(struct turnstile *ts, int queue)
697961a7b24SJohn Baldwin {
698961a7b24SJohn Baldwin 	struct turnstile_chain *tc;
699961a7b24SJohn Baldwin 	struct thread *td;
700961a7b24SJohn Baldwin 	int empty;
701961a7b24SJohn Baldwin 
702961a7b24SJohn Baldwin 	MPASS(ts != NULL);
703961a7b24SJohn Baldwin 	MPASS(curthread->td_proc->p_magic == P_MAGIC);
7047aa4f685SJohn Baldwin 	MPASS(ts->ts_owner == curthread ||
7057aa4f685SJohn Baldwin 	    (queue == TS_EXCLUSIVE_QUEUE && ts->ts_owner == NULL));
706961a7b24SJohn Baldwin 	tc = TC_LOOKUP(ts->ts_lockobj);
707961a7b24SJohn Baldwin 	mtx_assert(&tc->tc_lock, MA_OWNED);
7087aa4f685SJohn Baldwin 	MPASS(queue == TS_SHARED_QUEUE || queue == TS_EXCLUSIVE_QUEUE);
7099ed346baSBosko Milekic 
7109ed346baSBosko Milekic 	/*
711961a7b24SJohn Baldwin 	 * Pick the highest priority thread blocked on this lock and
712961a7b24SJohn Baldwin 	 * move it to the pending list.
7139ed346baSBosko Milekic 	 */
7147aa4f685SJohn Baldwin 	td = TAILQ_FIRST(&ts->ts_blocked[queue]);
715b40ce416SJulian Elischer 	MPASS(td->td_proc->p_magic == P_MAGIC);
716961a7b24SJohn Baldwin 	mtx_lock_spin(&td_contested_lock);
7177aa4f685SJohn Baldwin 	TAILQ_REMOVE(&ts->ts_blocked[queue], td, td_lockq);
718961a7b24SJohn Baldwin 	mtx_unlock_spin(&td_contested_lock);
719961a7b24SJohn Baldwin 	TAILQ_INSERT_TAIL(&ts->ts_pending, td, td_lockq);
7209ed346baSBosko Milekic 
721961a7b24SJohn Baldwin 	/*
722961a7b24SJohn Baldwin 	 * If the turnstile is now empty, remove it from its chain and
723961a7b24SJohn Baldwin 	 * give it to the about-to-be-woken thread.  Otherwise take a
724961a7b24SJohn Baldwin 	 * turnstile from the free list and give it to the thread.
725961a7b24SJohn Baldwin 	 */
7267aa4f685SJohn Baldwin 	empty = TAILQ_EMPTY(&ts->ts_blocked[TS_EXCLUSIVE_QUEUE]) &&
7277aa4f685SJohn Baldwin 	    TAILQ_EMPTY(&ts->ts_blocked[TS_SHARED_QUEUE]);
728ef0ebfc3SJohn Baldwin 	if (empty) {
729961a7b24SJohn Baldwin 		MPASS(LIST_EMPTY(&ts->ts_free));
730ef0ebfc3SJohn Baldwin #ifdef TURNSTILE_PROFILING
731ef0ebfc3SJohn Baldwin 		tc->tc_depth--;
732ef0ebfc3SJohn Baldwin #endif
733ef0ebfc3SJohn Baldwin 	} else
734961a7b24SJohn Baldwin 		ts = LIST_FIRST(&ts->ts_free);
735da1d503bSJohn Baldwin 	MPASS(ts != NULL);
736961a7b24SJohn Baldwin 	LIST_REMOVE(ts, ts_hash);
737961a7b24SJohn Baldwin 	td->td_turnstile = ts;
7389ed346baSBosko Milekic 
739961a7b24SJohn Baldwin 	return (empty);
740961a7b24SJohn Baldwin }
741961a7b24SJohn Baldwin 
742961a7b24SJohn Baldwin /*
743961a7b24SJohn Baldwin  * Put all blocked threads on the pending list.  This must be called with
744961a7b24SJohn Baldwin  * the turnstile chain locked.
745961a7b24SJohn Baldwin  */
746961a7b24SJohn Baldwin void
7477aa4f685SJohn Baldwin turnstile_broadcast(struct turnstile *ts, int queue)
748961a7b24SJohn Baldwin {
749961a7b24SJohn Baldwin 	struct turnstile_chain *tc;
750961a7b24SJohn Baldwin 	struct turnstile *ts1;
751961a7b24SJohn Baldwin 	struct thread *td;
752961a7b24SJohn Baldwin 
753961a7b24SJohn Baldwin 	MPASS(ts != NULL);
754961a7b24SJohn Baldwin 	MPASS(curthread->td_proc->p_magic == P_MAGIC);
7557aa4f685SJohn Baldwin 	MPASS(ts->ts_owner == curthread ||
7567aa4f685SJohn Baldwin 	    (queue == TS_EXCLUSIVE_QUEUE && ts->ts_owner == NULL));
757961a7b24SJohn Baldwin 	tc = TC_LOOKUP(ts->ts_lockobj);
758961a7b24SJohn Baldwin 	mtx_assert(&tc->tc_lock, MA_OWNED);
7597aa4f685SJohn Baldwin 	MPASS(queue == TS_SHARED_QUEUE || queue == TS_EXCLUSIVE_QUEUE);
760961a7b24SJohn Baldwin 
761961a7b24SJohn Baldwin 	/*
762961a7b24SJohn Baldwin 	 * Transfer the blocked list to the pending list.
763961a7b24SJohn Baldwin 	 */
764961a7b24SJohn Baldwin 	mtx_lock_spin(&td_contested_lock);
7657aa4f685SJohn Baldwin 	TAILQ_CONCAT(&ts->ts_pending, &ts->ts_blocked[queue], td_lockq);
766961a7b24SJohn Baldwin 	mtx_unlock_spin(&td_contested_lock);
767961a7b24SJohn Baldwin 
768961a7b24SJohn Baldwin 	/*
769961a7b24SJohn Baldwin 	 * Give a turnstile to each thread.  The last thread gets
7707aa4f685SJohn Baldwin 	 * this turnstile if the turnstile is empty.
771961a7b24SJohn Baldwin 	 */
772961a7b24SJohn Baldwin 	TAILQ_FOREACH(td, &ts->ts_pending, td_lockq) {
773961a7b24SJohn Baldwin 		if (LIST_EMPTY(&ts->ts_free)) {
774961a7b24SJohn Baldwin 			MPASS(TAILQ_NEXT(td, td_lockq) == NULL);
775961a7b24SJohn Baldwin 			ts1 = ts;
776ef0ebfc3SJohn Baldwin #ifdef TURNSTILE_PROFILING
777ef0ebfc3SJohn Baldwin 			tc->tc_depth--;
778ef0ebfc3SJohn Baldwin #endif
77936412d79SJohn Baldwin 		} else
780961a7b24SJohn Baldwin 			ts1 = LIST_FIRST(&ts->ts_free);
781da1d503bSJohn Baldwin 		MPASS(ts1 != NULL);
782961a7b24SJohn Baldwin 		LIST_REMOVE(ts1, ts_hash);
783961a7b24SJohn Baldwin 		td->td_turnstile = ts1;
784961a7b24SJohn Baldwin 	}
785961a7b24SJohn Baldwin }
7869ed346baSBosko Milekic 
787961a7b24SJohn Baldwin /*
788961a7b24SJohn Baldwin  * Wakeup all threads on the pending list and adjust the priority of the
789961a7b24SJohn Baldwin  * current thread appropriately.  This must be called with the turnstile
790961a7b24SJohn Baldwin  * chain locked.
791961a7b24SJohn Baldwin  */
792961a7b24SJohn Baldwin void
7937aa4f685SJohn Baldwin turnstile_unpend(struct turnstile *ts, int owner_type)
794961a7b24SJohn Baldwin {
795961a7b24SJohn Baldwin 	TAILQ_HEAD( ,thread) pending_threads;
796961a7b24SJohn Baldwin 	struct turnstile_chain *tc;
797961a7b24SJohn Baldwin 	struct thread *td;
798f5c157d9SJohn Baldwin 	u_char cp, pri;
799961a7b24SJohn Baldwin 
800961a7b24SJohn Baldwin 	MPASS(ts != NULL);
8017aa4f685SJohn Baldwin 	MPASS(ts->ts_owner == curthread ||
8027aa4f685SJohn Baldwin 	    (owner_type == TS_SHARED_LOCK && ts->ts_owner == NULL));
803961a7b24SJohn Baldwin 	tc = TC_LOOKUP(ts->ts_lockobj);
804961a7b24SJohn Baldwin 	mtx_assert(&tc->tc_lock, MA_OWNED);
805961a7b24SJohn Baldwin 	MPASS(!TAILQ_EMPTY(&ts->ts_pending));
806961a7b24SJohn Baldwin 
807961a7b24SJohn Baldwin 	/*
808961a7b24SJohn Baldwin 	 * Move the list of pending threads out of the turnstile and
809961a7b24SJohn Baldwin 	 * into a local variable.
810961a7b24SJohn Baldwin 	 */
811961a7b24SJohn Baldwin 	TAILQ_INIT(&pending_threads);
812961a7b24SJohn Baldwin 	TAILQ_CONCAT(&pending_threads, &ts->ts_pending, td_lockq);
813961a7b24SJohn Baldwin #ifdef INVARIANTS
8147aa4f685SJohn Baldwin 	if (TAILQ_EMPTY(&ts->ts_blocked[TS_EXCLUSIVE_QUEUE]) &&
8157aa4f685SJohn Baldwin 	    TAILQ_EMPTY(&ts->ts_blocked[TS_SHARED_QUEUE]))
816961a7b24SJohn Baldwin 		ts->ts_lockobj = NULL;
817961a7b24SJohn Baldwin #endif
818961a7b24SJohn Baldwin 
819961a7b24SJohn Baldwin 	/*
820961a7b24SJohn Baldwin 	 * Remove the turnstile from this thread's list of contested locks
821961a7b24SJohn Baldwin 	 * since this thread doesn't own it anymore.  New threads will
822961a7b24SJohn Baldwin 	 * not be blocking on the turnstile until it is claimed by a new
8237aa4f685SJohn Baldwin 	 * owner.  There might not be a current owner if this is a shared
8247aa4f685SJohn Baldwin 	 * lock.
825961a7b24SJohn Baldwin 	 */
8267aa4f685SJohn Baldwin 	if (ts->ts_owner != NULL) {
827961a7b24SJohn Baldwin 		mtx_lock_spin(&td_contested_lock);
828961a7b24SJohn Baldwin 		ts->ts_owner = NULL;
829961a7b24SJohn Baldwin 		LIST_REMOVE(ts, ts_link);
830961a7b24SJohn Baldwin 		mtx_unlock_spin(&td_contested_lock);
8317aa4f685SJohn Baldwin 	}
832b8597527SJohn Baldwin 	critical_enter();
833961a7b24SJohn Baldwin 	mtx_unlock_spin(&tc->tc_lock);
834961a7b24SJohn Baldwin 
835961a7b24SJohn Baldwin 	/*
836961a7b24SJohn Baldwin 	 * Adjust the priority of curthread based on other contested
837961a7b24SJohn Baldwin 	 * locks it owns.  Don't lower the priority below the base
838961a7b24SJohn Baldwin 	 * priority however.
839961a7b24SJohn Baldwin 	 */
840961a7b24SJohn Baldwin 	td = curthread;
841d5a08a60SJake Burkholder 	pri = PRI_MAX;
842961a7b24SJohn Baldwin 	mtx_lock_spin(&sched_lock);
843961a7b24SJohn Baldwin 	mtx_lock_spin(&td_contested_lock);
844961a7b24SJohn Baldwin 	LIST_FOREACH(ts, &td->td_contested, ts_link) {
8457aa4f685SJohn Baldwin 		cp = turnstile_first_waiter(ts)->td_priority;
84636412d79SJohn Baldwin 		if (cp < pri)
84736412d79SJohn Baldwin 			pri = cp;
84836412d79SJohn Baldwin 	}
849961a7b24SJohn Baldwin 	mtx_unlock_spin(&td_contested_lock);
850f5c157d9SJohn Baldwin 	sched_unlend_prio(td, pri);
8519ed346baSBosko Milekic 
852961a7b24SJohn Baldwin 	/*
853961a7b24SJohn Baldwin 	 * Wake up all the pending threads.  If a thread is not blocked
854961a7b24SJohn Baldwin 	 * on a lock, then it is currently executing on another CPU in
85567ba8678SJohn Baldwin 	 * turnstile_wait() or sitting on a run queue waiting to resume
85667ba8678SJohn Baldwin 	 * in turnstile_wait().  Set a flag to force it to try to acquire
857961a7b24SJohn Baldwin 	 * the lock again instead of blocking.
858961a7b24SJohn Baldwin 	 */
859961a7b24SJohn Baldwin 	while (!TAILQ_EMPTY(&pending_threads)) {
860961a7b24SJohn Baldwin 		td = TAILQ_FIRST(&pending_threads);
861961a7b24SJohn Baldwin 		TAILQ_REMOVE(&pending_threads, td, td_lockq);
862961a7b24SJohn Baldwin 		MPASS(td->td_proc->p_magic == P_MAGIC);
863961a7b24SJohn Baldwin 		if (TD_ON_LOCK(td)) {
864961a7b24SJohn Baldwin 			td->td_blocked = NULL;
865961a7b24SJohn Baldwin 			td->td_lockname = NULL;
8667aa4f685SJohn Baldwin #ifdef INVARIANTS
8677aa4f685SJohn Baldwin 			td->td_tsqueue = 0xff;
8687aa4f685SJohn Baldwin #endif
869961a7b24SJohn Baldwin 			TD_CLR_LOCK(td);
870961a7b24SJohn Baldwin 			MPASS(TD_CAN_RUN(td));
8712630e4c9SJulian Elischer 			setrunqueue(td, SRQ_BORING);
872961a7b24SJohn Baldwin 		} else {
873961a7b24SJohn Baldwin 			td->td_flags |= TDF_TSNOBLOCK;
87467ba8678SJohn Baldwin 			MPASS(TD_IS_RUNNING(td) || TD_ON_RUNQ(td));
875961a7b24SJohn Baldwin 		}
876961a7b24SJohn Baldwin 	}
877b8597527SJohn Baldwin 	critical_exit();
878e0817317SJulian Elischer 	mtx_unlock_spin(&sched_lock);
8799ed346baSBosko Milekic }
8809ed346baSBosko Milekic 
8819ed346baSBosko Milekic /*
882961a7b24SJohn Baldwin  * Return the first thread in a turnstile.
8839ed346baSBosko Milekic  */
884961a7b24SJohn Baldwin struct thread *
8857aa4f685SJohn Baldwin turnstile_head(struct turnstile *ts, int queue)
8860cde2e34SJason Evans {
887961a7b24SJohn Baldwin #ifdef INVARIANTS
888961a7b24SJohn Baldwin 	struct turnstile_chain *tc;
8895cb0fbe4SJohn Baldwin 
890961a7b24SJohn Baldwin 	MPASS(ts != NULL);
8917aa4f685SJohn Baldwin 	MPASS(queue == TS_SHARED_QUEUE || queue == TS_EXCLUSIVE_QUEUE);
892961a7b24SJohn Baldwin 	tc = TC_LOOKUP(ts->ts_lockobj);
893961a7b24SJohn Baldwin 	mtx_assert(&tc->tc_lock, MA_OWNED);
8940cde2e34SJason Evans #endif
8957aa4f685SJohn Baldwin 	return (TAILQ_FIRST(&ts->ts_blocked[queue]));
896961a7b24SJohn Baldwin }
8977aa4f685SJohn Baldwin 
8987aa4f685SJohn Baldwin #ifdef DDB
8997aa4f685SJohn Baldwin static void
9007aa4f685SJohn Baldwin print_thread(struct thread *td, const char *prefix)
9017aa4f685SJohn Baldwin {
9027aa4f685SJohn Baldwin 
9037aa4f685SJohn Baldwin 	db_printf("%s%p (tid %d, pid %d, \"%s\")\n", prefix, td, td->td_tid,
9047aa4f685SJohn Baldwin 	    td->td_proc->p_pid, td->td_proc->p_comm);
9057aa4f685SJohn Baldwin }
9067aa4f685SJohn Baldwin 
9077aa4f685SJohn Baldwin static void
9087aa4f685SJohn Baldwin print_queue(struct threadqueue *queue, const char *header, const char *prefix)
9097aa4f685SJohn Baldwin {
9107aa4f685SJohn Baldwin 	struct thread *td;
9117aa4f685SJohn Baldwin 
9127aa4f685SJohn Baldwin 	db_printf("%s:\n", header);
9137aa4f685SJohn Baldwin 	if (TAILQ_EMPTY(queue)) {
9147aa4f685SJohn Baldwin 		db_printf("%sempty\n", prefix);
9157aa4f685SJohn Baldwin 		return;
9167aa4f685SJohn Baldwin 	}
9177aa4f685SJohn Baldwin 	TAILQ_FOREACH(td, queue, td_lockq) {
9187aa4f685SJohn Baldwin 		print_thread(td, prefix);
9197aa4f685SJohn Baldwin 	}
9207aa4f685SJohn Baldwin }
9217aa4f685SJohn Baldwin 
9227aa4f685SJohn Baldwin DB_SHOW_COMMAND(turnstile, db_show_turnstile)
9237aa4f685SJohn Baldwin {
9247aa4f685SJohn Baldwin 	struct turnstile_chain *tc;
9257aa4f685SJohn Baldwin 	struct turnstile *ts;
9267aa4f685SJohn Baldwin 	struct lock_object *lock;
9277aa4f685SJohn Baldwin 	int i;
9287aa4f685SJohn Baldwin 
9297aa4f685SJohn Baldwin 	if (!have_addr)
9307aa4f685SJohn Baldwin 		return;
9317aa4f685SJohn Baldwin 
9327aa4f685SJohn Baldwin 	/*
9337aa4f685SJohn Baldwin 	 * First, see if there is an active turnstile for the lock indicated
9347aa4f685SJohn Baldwin 	 * by the address.
9357aa4f685SJohn Baldwin 	 */
9367aa4f685SJohn Baldwin 	lock = (struct lock_object *)addr;
9377aa4f685SJohn Baldwin 	tc = TC_LOOKUP(lock);
9387aa4f685SJohn Baldwin 	LIST_FOREACH(ts, &tc->tc_turnstiles, ts_hash)
9397aa4f685SJohn Baldwin 		if (ts->ts_lockobj == lock)
9407aa4f685SJohn Baldwin 			goto found;
9417aa4f685SJohn Baldwin 
9427aa4f685SJohn Baldwin 	/*
9437aa4f685SJohn Baldwin 	 * Second, see if there is an active turnstile at the address
9447aa4f685SJohn Baldwin 	 * indicated.
9457aa4f685SJohn Baldwin 	 */
9467aa4f685SJohn Baldwin 	for (i = 0; i < TC_TABLESIZE; i++)
9477aa4f685SJohn Baldwin 		LIST_FOREACH(ts, &turnstile_chains[i].tc_turnstiles, ts_hash) {
9487aa4f685SJohn Baldwin 			if (ts == (struct turnstile *)addr)
9497aa4f685SJohn Baldwin 				goto found;
9507aa4f685SJohn Baldwin 		}
9517aa4f685SJohn Baldwin 
9527aa4f685SJohn Baldwin 	db_printf("Unable to locate a turnstile via %p\n", (void *)addr);
9537aa4f685SJohn Baldwin 	return;
9547aa4f685SJohn Baldwin found:
9557aa4f685SJohn Baldwin 	lock = ts->ts_lockobj;
9567aa4f685SJohn Baldwin 	db_printf("Lock: %p - (%s) %s\n", lock, LOCK_CLASS(lock)->lc_name,
9577aa4f685SJohn Baldwin 	    lock->lo_name);
9587aa4f685SJohn Baldwin 	if (ts->ts_owner)
9597aa4f685SJohn Baldwin 		print_thread(ts->ts_owner, "Lock Owner: ");
9607aa4f685SJohn Baldwin 	else
9617aa4f685SJohn Baldwin 		db_printf("Lock Owner: none\n");
9627aa4f685SJohn Baldwin 	print_queue(&ts->ts_blocked[TS_SHARED_QUEUE], "Shared Waiters", "\t");
9637aa4f685SJohn Baldwin 	print_queue(&ts->ts_blocked[TS_EXCLUSIVE_QUEUE], "Exclusive Waiters",
9647aa4f685SJohn Baldwin 	    "\t");
9657aa4f685SJohn Baldwin 	print_queue(&ts->ts_pending, "Pending Threads", "\t");
9667aa4f685SJohn Baldwin 
9677aa4f685SJohn Baldwin }
9687aa4f685SJohn Baldwin #endif
969