xref: /freebsd/sys/kern/subr_turnstile.c (revision e602ba25fd1f9a7ea2215c01f470c08f140de809)
10384fff8SJason Evans /*-
20384fff8SJason Evans  * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
30384fff8SJason Evans  *
40384fff8SJason Evans  * Redistribution and use in source and binary forms, with or without
50384fff8SJason Evans  * modification, are permitted provided that the following conditions
60384fff8SJason Evans  * are met:
70384fff8SJason Evans  * 1. Redistributions of source code must retain the above copyright
80384fff8SJason Evans  *    notice, this list of conditions and the following disclaimer.
90384fff8SJason Evans  * 2. Redistributions in binary form must reproduce the above copyright
100384fff8SJason Evans  *    notice, this list of conditions and the following disclaimer in the
110384fff8SJason Evans  *    documentation and/or other materials provided with the distribution.
120384fff8SJason Evans  * 3. Berkeley Software Design Inc's name may not be used to endorse or
130384fff8SJason Evans  *    promote products derived from this software without specific prior
140384fff8SJason Evans  *    written permission.
150384fff8SJason Evans  *
160384fff8SJason Evans  * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
170384fff8SJason Evans  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
180384fff8SJason Evans  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
190384fff8SJason Evans  * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
200384fff8SJason Evans  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
210384fff8SJason Evans  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
220384fff8SJason Evans  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
230384fff8SJason Evans  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
240384fff8SJason Evans  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
250384fff8SJason Evans  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
260384fff8SJason Evans  * SUCH DAMAGE.
270384fff8SJason Evans  *
280384fff8SJason Evans  *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
2936412d79SJohn Baldwin  *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
300384fff8SJason Evans  * $FreeBSD$
310384fff8SJason Evans  */
320384fff8SJason Evans 
330384fff8SJason Evans /*
34ba48b69aSJohn Baldwin  * Machine independent bits of mutex implementation.
350384fff8SJason Evans  */
360384fff8SJason Evans 
372498cf8cSJohn Baldwin #include "opt_adaptive_mutexes.h"
389c36c934SJohn Baldwin #include "opt_ddb.h"
39a5a96a19SJohn Baldwin 
400384fff8SJason Evans #include <sys/param.h>
416c35e809SDag-Erling Smørgrav #include <sys/systm.h>
4236412d79SJohn Baldwin #include <sys/bus.h>
4336412d79SJohn Baldwin #include <sys/kernel.h>
446c35e809SDag-Erling Smørgrav #include <sys/ktr.h>
4519284646SJohn Baldwin #include <sys/lock.h>
46fb919e4dSMark Murray #include <sys/malloc.h>
4719284646SJohn Baldwin #include <sys/mutex.h>
480384fff8SJason Evans #include <sys/proc.h>
49c4f7a187SJohn Baldwin #include <sys/resourcevar.h>
506c35e809SDag-Erling Smørgrav #include <sys/sbuf.h>
51db586c8bSDag-Erling Smørgrav #include <sys/stdint.h>
52a5a96a19SJohn Baldwin #include <sys/sysctl.h>
5336412d79SJohn Baldwin #include <sys/vmmeter.h>
540384fff8SJason Evans 
5536412d79SJohn Baldwin #include <machine/atomic.h>
5636412d79SJohn Baldwin #include <machine/bus.h>
5736412d79SJohn Baldwin #include <machine/clock.h>
580384fff8SJason Evans #include <machine/cpu.h>
5936412d79SJohn Baldwin 
609c36c934SJohn Baldwin #include <ddb/ddb.h>
619c36c934SJohn Baldwin 
6236412d79SJohn Baldwin #include <vm/vm.h>
6336412d79SJohn Baldwin #include <vm/vm_extern.h>
6436412d79SJohn Baldwin 
650cde2e34SJason Evans /*
669ed346baSBosko Milekic  * Internal utility macros.
670cde2e34SJason Evans  */
689ed346baSBosko Milekic #define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
690cde2e34SJason Evans 
709ed346baSBosko Milekic #define mtx_owner(m)	(mtx_unowned((m)) ? NULL \
71b40ce416SJulian Elischer 	: (struct thread *)((m)->mtx_lock & MTX_FLAGMASK))
729ed346baSBosko Milekic 
736a95e08fSJohn Baldwin /* XXXKSE This test will change. */
746a95e08fSJohn Baldwin #define	thread_running(td)						\
755853d37dSJohn Baldwin 	((td)->td_kse != NULL && (td)->td_kse->ke_oncpu != NOCPU)
765853d37dSJohn Baldwin 
770cde2e34SJason Evans /*
7819284646SJohn Baldwin  * Lock classes for sleep and spin mutexes.
790cde2e34SJason Evans  */
8019284646SJohn Baldwin struct lock_class lock_class_mtx_sleep = {
8119284646SJohn Baldwin 	"sleep mutex",
8219284646SJohn Baldwin 	LC_SLEEPLOCK | LC_RECURSABLE
8319284646SJohn Baldwin };
8419284646SJohn Baldwin struct lock_class lock_class_mtx_spin = {
8519284646SJohn Baldwin 	"spin mutex",
8619284646SJohn Baldwin 	LC_SPINLOCK | LC_RECURSABLE
878484de75SJohn Baldwin };
888484de75SJohn Baldwin 
899ed346baSBosko Milekic /*
90c53c013bSJohn Baldwin  * System-wide mutexes
91c53c013bSJohn Baldwin  */
92c53c013bSJohn Baldwin struct mtx sched_lock;
93c53c013bSJohn Baldwin struct mtx Giant;
94c53c013bSJohn Baldwin 
95c53c013bSJohn Baldwin /*
969ed346baSBosko Milekic  * Prototypes for non-exported routines.
979ed346baSBosko Milekic  */
98b40ce416SJulian Elischer static void	propagate_priority(struct thread *);
9936412d79SJohn Baldwin 
10036412d79SJohn Baldwin static void
101b40ce416SJulian Elischer propagate_priority(struct thread *td)
10236412d79SJohn Baldwin {
1032c100766SJulian Elischer 	int pri = td->td_priority;
104b40ce416SJulian Elischer 	struct mtx *m = td->td_blocked;
10536412d79SJohn Baldwin 
1061bd0eefbSJohn Baldwin 	mtx_assert(&sched_lock, MA_OWNED);
10736412d79SJohn Baldwin 	for (;;) {
108b40ce416SJulian Elischer 		struct thread *td1;
10936412d79SJohn Baldwin 
110b40ce416SJulian Elischer 		td = mtx_owner(m);
11136412d79SJohn Baldwin 
112b40ce416SJulian Elischer 		if (td == NULL) {
11336412d79SJohn Baldwin 			/*
11436412d79SJohn Baldwin 			 * This really isn't quite right. Really
115b40ce416SJulian Elischer 			 * ought to bump priority of thread that
11636412d79SJohn Baldwin 			 * next acquires the mutex.
11736412d79SJohn Baldwin 			 */
11836412d79SJohn Baldwin 			MPASS(m->mtx_lock == MTX_CONTESTED);
11936412d79SJohn Baldwin 			return;
12036412d79SJohn Baldwin 		}
1219ed346baSBosko Milekic 
122e602ba25SJulian Elischer 		KASSERT(td->td_state != TDS_SURPLUS, ("Mutex owner SURPLUS"));
123e602ba25SJulian Elischer 		MPASS(td->td_proc != NULL);
124b40ce416SJulian Elischer 		MPASS(td->td_proc->p_magic == P_MAGIC);
125e602ba25SJulian Elischer 		KASSERT(td->td_state != TDS_SLP,
126e602ba25SJulian Elischer 		    ("sleeping thread owns a mutex"));
1272c100766SJulian Elischer 		if (td->td_priority <= pri) /* lower is higher priority */
12836412d79SJohn Baldwin 			return;
1291bd0eefbSJohn Baldwin 
1301bd0eefbSJohn Baldwin 
13136412d79SJohn Baldwin 		/*
13236412d79SJohn Baldwin 		 * If lock holder is actually running, just bump priority.
13336412d79SJohn Baldwin 		 */
134e602ba25SJulian Elischer 		if (td->td_state == TDS_RUNNING) {
135e602ba25SJulian Elischer 			td->td_priority = pri;
13636412d79SJohn Baldwin 			return;
13736412d79SJohn Baldwin 		}
138d5a08a60SJake Burkholder 
1391b43703bSJohn Baldwin #ifndef SMP
1401b43703bSJohn Baldwin 		/*
141b40ce416SJulian Elischer 		 * For UP, we check to see if td is curthread (this shouldn't
1421b43703bSJohn Baldwin 		 * ever happen however as it would mean we are in a deadlock.)
1431b43703bSJohn Baldwin 		 */
144b40ce416SJulian Elischer 		KASSERT(td != curthread, ("Deadlock detected"));
1451b43703bSJohn Baldwin #endif
1461b43703bSJohn Baldwin 
14736412d79SJohn Baldwin 		/*
148b40ce416SJulian Elischer 		 * If on run queue move to new run queue, and quit.
149b40ce416SJulian Elischer 		 * XXXKSE this gets a lot more complicated under threads
150b40ce416SJulian Elischer 		 * but try anyhow.
151e602ba25SJulian Elischer 		 * We should have a special call to do this more efficiently.
15236412d79SJohn Baldwin 		 */
153e602ba25SJulian Elischer 		if (td->td_state == TDS_RUNQ) {
154b40ce416SJulian Elischer 			MPASS(td->td_blocked == NULL);
155b40ce416SJulian Elischer 			remrunqueue(td);
156e602ba25SJulian Elischer 			td->td_priority = pri;
157b40ce416SJulian Elischer 			setrunqueue(td);
15836412d79SJohn Baldwin 			return;
15936412d79SJohn Baldwin 		}
160e602ba25SJulian Elischer 		/*
161e602ba25SJulian Elischer 		 * Adjust for any other cases.
162e602ba25SJulian Elischer 		 */
163e602ba25SJulian Elischer 		td->td_priority = pri;
16436412d79SJohn Baldwin 
16536412d79SJohn Baldwin 		/*
1661bd0eefbSJohn Baldwin 		 * If we aren't blocked on a mutex, we should be.
16736412d79SJohn Baldwin 		 */
168e602ba25SJulian Elischer 		KASSERT(td->td_state == TDS_MTX, (
1691bd0eefbSJohn Baldwin 		    "process %d(%s):%d holds %s but isn't blocked on a mutex\n",
170e602ba25SJulian Elischer 		    td->td_proc->p_pid, td->td_proc->p_comm, td->td_state,
17119284646SJohn Baldwin 		    m->mtx_object.lo_name));
17236412d79SJohn Baldwin 
17336412d79SJohn Baldwin 		/*
174b40ce416SJulian Elischer 		 * Pick up the mutex that td is blocked on.
17536412d79SJohn Baldwin 		 */
176b40ce416SJulian Elischer 		m = td->td_blocked;
17736412d79SJohn Baldwin 		MPASS(m != NULL);
17836412d79SJohn Baldwin 
17936412d79SJohn Baldwin 		/*
180b40ce416SJulian Elischer 		 * Check if the thread needs to be moved up on
18136412d79SJohn Baldwin 		 * the blocked chain
18236412d79SJohn Baldwin 		 */
183b40ce416SJulian Elischer 		if (td == TAILQ_FIRST(&m->mtx_blocked)) {
1841bd0eefbSJohn Baldwin 			continue;
1851bd0eefbSJohn Baldwin 		}
1869ed346baSBosko Milekic 
187b40ce416SJulian Elischer 		td1 = TAILQ_PREV(td, threadqueue, td_blkq);
1882c100766SJulian Elischer 		if (td1->td_priority <= pri) {
18936412d79SJohn Baldwin 			continue;
19036412d79SJohn Baldwin 		}
19136412d79SJohn Baldwin 
19236412d79SJohn Baldwin 		/*
193b40ce416SJulian Elischer 		 * Remove thread from blocked chain and determine where
194b40ce416SJulian Elischer 		 * it should be moved up to.  Since we know that td1 has
195b40ce416SJulian Elischer 		 * a lower priority than td, we know that at least one
196b40ce416SJulian Elischer 		 * thread in the chain has a lower priority and that
197b40ce416SJulian Elischer 		 * td1 will thus not be NULL after the loop.
19836412d79SJohn Baldwin 		 */
199b40ce416SJulian Elischer 		TAILQ_REMOVE(&m->mtx_blocked, td, td_blkq);
200b40ce416SJulian Elischer 		TAILQ_FOREACH(td1, &m->mtx_blocked, td_blkq) {
201b40ce416SJulian Elischer 			MPASS(td1->td_proc->p_magic == P_MAGIC);
2022c100766SJulian Elischer 			if (td1->td_priority > pri)
20336412d79SJohn Baldwin 				break;
20436412d79SJohn Baldwin 		}
2059ed346baSBosko Milekic 
206b40ce416SJulian Elischer 		MPASS(td1 != NULL);
207b40ce416SJulian Elischer 		TAILQ_INSERT_BEFORE(td1, td, td_blkq);
20836412d79SJohn Baldwin 		CTR4(KTR_LOCK,
2098484de75SJohn Baldwin 		    "propagate_priority: p %p moved before %p on [%p] %s",
210b40ce416SJulian Elischer 		    td, td1, m, m->mtx_object.lo_name);
21136412d79SJohn Baldwin 	}
21236412d79SJohn Baldwin }
21336412d79SJohn Baldwin 
2146c35e809SDag-Erling Smørgrav #ifdef MUTEX_PROFILING
2156c35e809SDag-Erling Smørgrav SYSCTL_NODE(_debug, OID_AUTO, mutex, CTLFLAG_RD, NULL, "mutex debugging");
2166c35e809SDag-Erling Smørgrav SYSCTL_NODE(_debug_mutex, OID_AUTO, prof, CTLFLAG_RD, NULL, "mutex profiling");
2176c35e809SDag-Erling Smørgrav static int mutex_prof_enable = 0;
2186c35e809SDag-Erling Smørgrav SYSCTL_INT(_debug_mutex_prof, OID_AUTO, enable, CTLFLAG_RW,
2196c35e809SDag-Erling Smørgrav     &mutex_prof_enable, 0, "Enable tracing of mutex holdtime");
2206c35e809SDag-Erling Smørgrav 
2216c35e809SDag-Erling Smørgrav struct mutex_prof {
2226c35e809SDag-Erling Smørgrav 	const char *name;
2236c35e809SDag-Erling Smørgrav 	const char *file;
2246c35e809SDag-Erling Smørgrav 	int line;
2256c35e809SDag-Erling Smørgrav #define MPROF_MAX 0
2266c35e809SDag-Erling Smørgrav #define MPROF_TOT 1
2276c35e809SDag-Erling Smørgrav #define MPROF_CNT 2
2286c35e809SDag-Erling Smørgrav #define MPROF_AVG 3
229db586c8bSDag-Erling Smørgrav 	uintmax_t counter[4];
230e6330704SDag-Erling Smørgrav 	struct mutex_prof *next;
2316c35e809SDag-Erling Smørgrav };
2326c35e809SDag-Erling Smørgrav 
2336c35e809SDag-Erling Smørgrav /*
2346c35e809SDag-Erling Smørgrav  * mprof_buf is a static pool of profiling records to avoid possible
2356c35e809SDag-Erling Smørgrav  * reentrance of the memory allocation functions.
2366c35e809SDag-Erling Smørgrav  *
2376c35e809SDag-Erling Smørgrav  * Note: NUM_MPROF_BUFFERS must be smaller than MPROF_HASH_SIZE.
2386c35e809SDag-Erling Smørgrav  */
239e6330704SDag-Erling Smørgrav #define NUM_MPROF_BUFFERS 1000
2406c35e809SDag-Erling Smørgrav static struct mutex_prof mprof_buf[NUM_MPROF_BUFFERS];
2416c35e809SDag-Erling Smørgrav static int first_free_mprof_buf;
242e6330704SDag-Erling Smørgrav #define MPROF_HASH_SIZE 1009
2436c35e809SDag-Erling Smørgrav static struct mutex_prof *mprof_hash[MPROF_HASH_SIZE];
2446c35e809SDag-Erling Smørgrav 
2456c35e809SDag-Erling Smørgrav static int mutex_prof_acquisitions;
2466c35e809SDag-Erling Smørgrav SYSCTL_INT(_debug_mutex_prof, OID_AUTO, acquisitions, CTLFLAG_RD,
2476c35e809SDag-Erling Smørgrav     &mutex_prof_acquisitions, 0, "Number of mutex acquistions recorded");
2486c35e809SDag-Erling Smørgrav static int mutex_prof_records;
2496c35e809SDag-Erling Smørgrav SYSCTL_INT(_debug_mutex_prof, OID_AUTO, records, CTLFLAG_RD,
2506c35e809SDag-Erling Smørgrav     &mutex_prof_records, 0, "Number of profiling records");
2516c35e809SDag-Erling Smørgrav static int mutex_prof_maxrecords = NUM_MPROF_BUFFERS;
2526c35e809SDag-Erling Smørgrav SYSCTL_INT(_debug_mutex_prof, OID_AUTO, maxrecords, CTLFLAG_RD,
2536c35e809SDag-Erling Smørgrav     &mutex_prof_maxrecords, 0, "Maximum number of profiling records");
2546c35e809SDag-Erling Smørgrav static int mutex_prof_rejected;
2556c35e809SDag-Erling Smørgrav SYSCTL_INT(_debug_mutex_prof, OID_AUTO, rejected, CTLFLAG_RD,
2566c35e809SDag-Erling Smørgrav     &mutex_prof_rejected, 0, "Number of rejected profiling records");
2576c35e809SDag-Erling Smørgrav static int mutex_prof_hashsize = MPROF_HASH_SIZE;
2586c35e809SDag-Erling Smørgrav SYSCTL_INT(_debug_mutex_prof, OID_AUTO, hashsize, CTLFLAG_RD,
2596c35e809SDag-Erling Smørgrav     &mutex_prof_hashsize, 0, "Hash size");
2606c35e809SDag-Erling Smørgrav static int mutex_prof_collisions = 0;
2616c35e809SDag-Erling Smørgrav SYSCTL_INT(_debug_mutex_prof, OID_AUTO, collisions, CTLFLAG_RD,
2626c35e809SDag-Erling Smørgrav     &mutex_prof_collisions, 0, "Number of hash collisions");
2636c35e809SDag-Erling Smørgrav 
2646c35e809SDag-Erling Smørgrav /*
2656c35e809SDag-Erling Smørgrav  * mprof_mtx protects the profiling buffers and the hash.
2666c35e809SDag-Erling Smørgrav  */
2676c35e809SDag-Erling Smørgrav static struct mtx mprof_mtx;
268e6330704SDag-Erling Smørgrav MTX_SYSINIT(mprof, &mprof_mtx, "mutex profiling lock", MTX_SPIN | MTX_QUIET);
2696c35e809SDag-Erling Smørgrav 
270b784ffe9SDag-Erling Smørgrav static u_int64_t
271b784ffe9SDag-Erling Smørgrav nanoseconds(void)
272b784ffe9SDag-Erling Smørgrav {
273b784ffe9SDag-Erling Smørgrav 	struct timespec tv;
274b784ffe9SDag-Erling Smørgrav 
275b784ffe9SDag-Erling Smørgrav 	nanotime(&tv);
276b784ffe9SDag-Erling Smørgrav 	return (tv.tv_sec * (u_int64_t)1000000000 + tv.tv_nsec);
277b784ffe9SDag-Erling Smørgrav }
278b784ffe9SDag-Erling Smørgrav 
2796c35e809SDag-Erling Smørgrav static int
2806c35e809SDag-Erling Smørgrav dump_mutex_prof_stats(SYSCTL_HANDLER_ARGS)
2816c35e809SDag-Erling Smørgrav {
2826c35e809SDag-Erling Smørgrav 	struct sbuf *sb;
2836c35e809SDag-Erling Smørgrav 	int error, i;
2846c35e809SDag-Erling Smørgrav 
2856c35e809SDag-Erling Smørgrav 	if (first_free_mprof_buf == 0)
2866c35e809SDag-Erling Smørgrav 		return SYSCTL_OUT(req, "No locking recorded",
2876c35e809SDag-Erling Smørgrav 		    sizeof("No locking recorded"));
2886c35e809SDag-Erling Smørgrav 
2896c35e809SDag-Erling Smørgrav 	sb = sbuf_new(NULL, NULL, 1024, SBUF_AUTOEXTEND);
2906c35e809SDag-Erling Smørgrav 	sbuf_printf(sb, "%12s %12s %12s %12s %s\n",
2916c35e809SDag-Erling Smørgrav 	    "max", "total", "count", "average", "name");
2926c35e809SDag-Erling Smørgrav 	mtx_lock_spin(&mprof_mtx);
2936c35e809SDag-Erling Smørgrav 	for (i = 0; i < first_free_mprof_buf; ++i)
294db586c8bSDag-Erling Smørgrav 		sbuf_printf(sb, "%12ju %12ju %12ju %12ju %s:%d (%s)\n",
295b784ffe9SDag-Erling Smørgrav 		    mprof_buf[i].counter[MPROF_MAX] / 1000,
296b784ffe9SDag-Erling Smørgrav 		    mprof_buf[i].counter[MPROF_TOT] / 1000,
297b784ffe9SDag-Erling Smørgrav 		    mprof_buf[i].counter[MPROF_CNT],
298b784ffe9SDag-Erling Smørgrav 		    mprof_buf[i].counter[MPROF_AVG] / 1000,
2996c35e809SDag-Erling Smørgrav 		    mprof_buf[i].file, mprof_buf[i].line, mprof_buf[i].name);
3006c35e809SDag-Erling Smørgrav 	mtx_unlock_spin(&mprof_mtx);
3016c35e809SDag-Erling Smørgrav 	sbuf_finish(sb);
3026c35e809SDag-Erling Smørgrav 	error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3036c35e809SDag-Erling Smørgrav 	sbuf_delete(sb);
3046c35e809SDag-Erling Smørgrav 	return (error);
3056c35e809SDag-Erling Smørgrav }
3066c35e809SDag-Erling Smørgrav SYSCTL_PROC(_debug_mutex_prof, OID_AUTO, stats, CTLTYPE_STRING|CTLFLAG_RD,
3076c35e809SDag-Erling Smørgrav     NULL, 0, dump_mutex_prof_stats, "A", "Mutex profiling statistics");
3086c35e809SDag-Erling Smørgrav #endif
3096c35e809SDag-Erling Smørgrav 
3100cde2e34SJason Evans /*
3116283b7d0SJohn Baldwin  * Function versions of the inlined __mtx_* macros.  These are used by
3126283b7d0SJohn Baldwin  * modules and can also be called from assembly language if needed.
3136283b7d0SJohn Baldwin  */
3146283b7d0SJohn Baldwin void
3156283b7d0SJohn Baldwin _mtx_lock_flags(struct mtx *m, int opts, const char *file, int line)
3166283b7d0SJohn Baldwin {
3176283b7d0SJohn Baldwin 
318dde96c99SJohn Baldwin 	MPASS(curthread != NULL);
319dde96c99SJohn Baldwin 	_get_sleep_lock(m, curthread, opts, file, line);
320dde96c99SJohn Baldwin 	LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file,
321dde96c99SJohn Baldwin 	    line);
322dde96c99SJohn Baldwin 	WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
3236c35e809SDag-Erling Smørgrav #ifdef MUTEX_PROFILING
3246c35e809SDag-Erling Smørgrav 	/* don't reset the timer when/if recursing */
325b784ffe9SDag-Erling Smørgrav 	if (m->acqtime == 0) {
3266c35e809SDag-Erling Smørgrav 		m->file = file;
3276c35e809SDag-Erling Smørgrav 		m->line = line;
328b784ffe9SDag-Erling Smørgrav 		m->acqtime = mutex_prof_enable ? nanoseconds() : 0;
3296c35e809SDag-Erling Smørgrav 		++mutex_prof_acquisitions;
3306c35e809SDag-Erling Smørgrav 	}
3316c35e809SDag-Erling Smørgrav #endif
3326283b7d0SJohn Baldwin }
3336283b7d0SJohn Baldwin 
3346283b7d0SJohn Baldwin void
3356283b7d0SJohn Baldwin _mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line)
3366283b7d0SJohn Baldwin {
3376283b7d0SJohn Baldwin 
338dde96c99SJohn Baldwin 	MPASS(curthread != NULL);
33921377ce0SJohn Baldwin 	mtx_assert(m, MA_OWNED);
3406c35e809SDag-Erling Smørgrav #ifdef MUTEX_PROFILING
341b784ffe9SDag-Erling Smørgrav 	if (m->acqtime != 0) {
3426c35e809SDag-Erling Smørgrav 		static const char *unknown = "(unknown)";
3436c35e809SDag-Erling Smørgrav 		struct mutex_prof *mpp;
344b784ffe9SDag-Erling Smørgrav 		u_int64_t acqtime, now;
3456c35e809SDag-Erling Smørgrav 		const char *p, *q;
346e6330704SDag-Erling Smørgrav 		volatile u_int hash;
3476c35e809SDag-Erling Smørgrav 
348b784ffe9SDag-Erling Smørgrav 		now = nanoseconds();
349b784ffe9SDag-Erling Smørgrav 		acqtime = m->acqtime;
350b784ffe9SDag-Erling Smørgrav 		m->acqtime = 0;
351b784ffe9SDag-Erling Smørgrav 		if (now <= acqtime)
3526c35e809SDag-Erling Smørgrav 			goto out;
3536c35e809SDag-Erling Smørgrav 		for (p = file; strncmp(p, "../", 3) == 0; p += 3)
3546c35e809SDag-Erling Smørgrav 			/* nothing */ ;
3556c35e809SDag-Erling Smørgrav 		if (p == NULL || *p == '\0')
3566c35e809SDag-Erling Smørgrav 			p = unknown;
3576c35e809SDag-Erling Smørgrav 		for (hash = line, q = p; *q != '\0'; ++q)
3586c35e809SDag-Erling Smørgrav 			hash = (hash * 2 + *q) % MPROF_HASH_SIZE;
3596c35e809SDag-Erling Smørgrav 		mtx_lock_spin(&mprof_mtx);
360e6330704SDag-Erling Smørgrav 		for (mpp = mprof_hash[hash]; mpp != NULL; mpp = mpp->next)
3616c35e809SDag-Erling Smørgrav 			if (mpp->line == line && strcmp(mpp->file, p) == 0)
3626c35e809SDag-Erling Smørgrav 				break;
3636c35e809SDag-Erling Smørgrav 		if (mpp == NULL) {
3646c35e809SDag-Erling Smørgrav 			/* Just exit if we cannot get a trace buffer */
3656c35e809SDag-Erling Smørgrav 			if (first_free_mprof_buf >= NUM_MPROF_BUFFERS) {
3666c35e809SDag-Erling Smørgrav 				++mutex_prof_rejected;
3676c35e809SDag-Erling Smørgrav 				goto unlock;
3686c35e809SDag-Erling Smørgrav 			}
3696c35e809SDag-Erling Smørgrav 			mpp = &mprof_buf[first_free_mprof_buf++];
3706c35e809SDag-Erling Smørgrav 			mpp->name = mtx_name(m);
3716c35e809SDag-Erling Smørgrav 			mpp->file = p;
3726c35e809SDag-Erling Smørgrav 			mpp->line = line;
373e6330704SDag-Erling Smørgrav 			mpp->next = mprof_hash[hash];
374e6330704SDag-Erling Smørgrav 			if (mprof_hash[hash] != NULL)
375e6330704SDag-Erling Smørgrav 				++mutex_prof_collisions;
3766c35e809SDag-Erling Smørgrav 			mprof_hash[hash] = mpp;
377e6330704SDag-Erling Smørgrav 			++mutex_prof_records;
3786c35e809SDag-Erling Smørgrav 		}
3796c35e809SDag-Erling Smørgrav 		/*
3806c35e809SDag-Erling Smørgrav 		 * Record if the mutex has been held longer now than ever
3816c35e809SDag-Erling Smørgrav 		 * before
3826c35e809SDag-Erling Smørgrav 		 */
383b784ffe9SDag-Erling Smørgrav 		if ((now - acqtime) > mpp->counter[MPROF_MAX])
384b784ffe9SDag-Erling Smørgrav 			mpp->counter[MPROF_MAX] = now - acqtime;
385b784ffe9SDag-Erling Smørgrav 		mpp->counter[MPROF_TOT] += now - acqtime;
386b784ffe9SDag-Erling Smørgrav 		mpp->counter[MPROF_CNT] += 1;
387b784ffe9SDag-Erling Smørgrav 		mpp->counter[MPROF_AVG] =
388b784ffe9SDag-Erling Smørgrav 		    mpp->counter[MPROF_TOT] / mpp->counter[MPROF_CNT];
3896c35e809SDag-Erling Smørgrav unlock:
3906c35e809SDag-Erling Smørgrav 		mtx_unlock_spin(&mprof_mtx);
3916c35e809SDag-Erling Smørgrav 	}
3926c35e809SDag-Erling Smørgrav out:
3936c35e809SDag-Erling Smørgrav #endif
394dde96c99SJohn Baldwin  	WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
395dde96c99SJohn Baldwin 	LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file,
396dde96c99SJohn Baldwin 	    line);
397dde96c99SJohn Baldwin 	_rel_sleep_lock(m, curthread, opts, file, line);
3986283b7d0SJohn Baldwin }
3996283b7d0SJohn Baldwin 
4006283b7d0SJohn Baldwin void
4016283b7d0SJohn Baldwin _mtx_lock_spin_flags(struct mtx *m, int opts, const char *file, int line)
4026283b7d0SJohn Baldwin {
4036283b7d0SJohn Baldwin 
404dde96c99SJohn Baldwin 	MPASS(curthread != NULL);
405e8fdcfb5SJohn Baldwin #if defined(SMP) || LOCK_DEBUG > 0
406dde96c99SJohn Baldwin 	_get_spin_lock(m, curthread, opts, file, line);
407e8fdcfb5SJohn Baldwin #else
408e8fdcfb5SJohn Baldwin 	critical_enter();
409e8fdcfb5SJohn Baldwin #endif
410dde96c99SJohn Baldwin 	LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file,
411dde96c99SJohn Baldwin 	    line);
412dde96c99SJohn Baldwin 	WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
4136283b7d0SJohn Baldwin }
4146283b7d0SJohn Baldwin 
4156283b7d0SJohn Baldwin void
4166283b7d0SJohn Baldwin _mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, int line)
4176283b7d0SJohn Baldwin {
4186283b7d0SJohn Baldwin 
419dde96c99SJohn Baldwin 	MPASS(curthread != NULL);
42021377ce0SJohn Baldwin 	mtx_assert(m, MA_OWNED);
421dde96c99SJohn Baldwin  	WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
422dde96c99SJohn Baldwin 	LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file,
423dde96c99SJohn Baldwin 	    line);
424e8fdcfb5SJohn Baldwin #if defined(SMP) || LOCK_DEBUG > 0
425dde96c99SJohn Baldwin 	_rel_spin_lock(m);
426e8fdcfb5SJohn Baldwin #else
427e8fdcfb5SJohn Baldwin 	critical_exit();
428e8fdcfb5SJohn Baldwin #endif
4296283b7d0SJohn Baldwin }
4306283b7d0SJohn Baldwin 
4316283b7d0SJohn Baldwin /*
4329ed346baSBosko Milekic  * The important part of mtx_trylock{,_flags}()
4339ed346baSBosko Milekic  * Tries to acquire lock `m.' We do NOT handle recursion here; we assume that
4349ed346baSBosko Milekic  * if we're called, it's because we know we don't already own this lock.
4350cde2e34SJason Evans  */
4360cde2e34SJason Evans int
4379ed346baSBosko Milekic _mtx_trylock(struct mtx *m, int opts, const char *file, int line)
4380cde2e34SJason Evans {
4390cde2e34SJason Evans 	int rval;
4400cde2e34SJason Evans 
441b40ce416SJulian Elischer 	MPASS(curthread != NULL);
4429ed346baSBosko Milekic 
443b40ce416SJulian Elischer 	rval = _obtain_lock(m, curthread);
4449ed346baSBosko Milekic 
44519284646SJohn Baldwin 	LOCK_LOG_TRY("LOCK", &m->mtx_object, opts, rval, file, line);
44619284646SJohn Baldwin 	if (rval) {
4479ed346baSBosko Milekic 		/*
4489ed346baSBosko Milekic 		 * We do not handle recursion in _mtx_trylock; see the
4499ed346baSBosko Milekic 		 * note at the top of the routine.
4509ed346baSBosko Milekic 		 */
4515746a1d8SBosko Milekic 		KASSERT(!mtx_recursed(m),
4525746a1d8SBosko Milekic 		    ("mtx_trylock() called on a recursed mutex"));
4532d96f0b1SJohn Baldwin 		WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK,
4542d96f0b1SJohn Baldwin 		    file, line);
4550cde2e34SJason Evans 	}
4569ed346baSBosko Milekic 
45719284646SJohn Baldwin 	return (rval);
4580cde2e34SJason Evans }
4590cde2e34SJason Evans 
4600cde2e34SJason Evans /*
4619ed346baSBosko Milekic  * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
4629ed346baSBosko Milekic  *
4639ed346baSBosko Milekic  * We call this if the lock is either contested (i.e. we need to go to
4649ed346baSBosko Milekic  * sleep waiting for it), or if we need to recurse on it.
4650cde2e34SJason Evans  */
4660cde2e34SJason Evans void
4679ed346baSBosko Milekic _mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
46836412d79SJohn Baldwin {
469b40ce416SJulian Elischer 	struct thread *td = curthread;
4702498cf8cSJohn Baldwin #if defined(SMP) && defined(ADAPTIVE_MUTEXES)
4712498cf8cSJohn Baldwin 	struct thread *owner;
4722498cf8cSJohn Baldwin #endif
47336412d79SJohn Baldwin 
474b40ce416SJulian Elischer 	if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)td) {
47536412d79SJohn Baldwin 		m->mtx_recurse++;
47608812b39SBosko Milekic 		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
47719284646SJohn Baldwin 		if (LOCK_LOG_TEST(&m->mtx_object, opts))
4785746a1d8SBosko Milekic 			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
47936412d79SJohn Baldwin 		return;
48036412d79SJohn Baldwin 	}
4819ed346baSBosko Milekic 
48219284646SJohn Baldwin 	if (LOCK_LOG_TEST(&m->mtx_object, opts))
48315ec816aSJohn Baldwin 		CTR4(KTR_LOCK,
48415ec816aSJohn Baldwin 		    "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
48519284646SJohn Baldwin 		    m->mtx_object.lo_name, (void *)m->mtx_lock, file, line);
4861bd0eefbSJohn Baldwin 
487b40ce416SJulian Elischer 	while (!_obtain_lock(m, td)) {
488f5271ebcSJohn Baldwin 		uintptr_t v;
489b40ce416SJulian Elischer 		struct thread *td1;
49036412d79SJohn Baldwin 
4919ed346baSBosko Milekic 		mtx_lock_spin(&sched_lock);
49236412d79SJohn Baldwin 		/*
4939ed346baSBosko Milekic 		 * Check if the lock has been released while spinning for
4949ed346baSBosko Milekic 		 * the sched_lock.
49536412d79SJohn Baldwin 		 */
49636412d79SJohn Baldwin 		if ((v = m->mtx_lock) == MTX_UNOWNED) {
4979ed346baSBosko Milekic 			mtx_unlock_spin(&sched_lock);
498703fc290SJohn Baldwin #ifdef __i386__
4996b8c6989SJohn Baldwin 			ia32_pause();
500703fc290SJohn Baldwin #endif
50136412d79SJohn Baldwin 			continue;
50236412d79SJohn Baldwin 		}
5039ed346baSBosko Milekic 
50436412d79SJohn Baldwin 		/*
5059ed346baSBosko Milekic 		 * The mutex was marked contested on release. This means that
506b40ce416SJulian Elischer 		 * there are threads blocked on it.
50736412d79SJohn Baldwin 		 */
50836412d79SJohn Baldwin 		if (v == MTX_CONTESTED) {
509b40ce416SJulian Elischer 			td1 = TAILQ_FIRST(&m->mtx_blocked);
510b40ce416SJulian Elischer 			MPASS(td1 != NULL);
511b40ce416SJulian Elischer 			m->mtx_lock = (uintptr_t)td | MTX_CONTESTED;
5129ed346baSBosko Milekic 
5132c100766SJulian Elischer 			if (td1->td_priority < td->td_priority)
5142c100766SJulian Elischer 				td->td_priority = td1->td_priority;
5159ed346baSBosko Milekic 			mtx_unlock_spin(&sched_lock);
51636412d79SJohn Baldwin 			return;
51736412d79SJohn Baldwin 		}
5189ed346baSBosko Milekic 
51936412d79SJohn Baldwin 		/*
5209ed346baSBosko Milekic 		 * If the mutex isn't already contested and a failure occurs
5219ed346baSBosko Milekic 		 * setting the contested bit, the mutex was either released
5229ed346baSBosko Milekic 		 * or the state of the MTX_RECURSED bit changed.
52336412d79SJohn Baldwin 		 */
52436412d79SJohn Baldwin 		if ((v & MTX_CONTESTED) == 0 &&
52536412d79SJohn Baldwin 		    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
52636412d79SJohn Baldwin 			(void *)(v | MTX_CONTESTED))) {
5279ed346baSBosko Milekic 			mtx_unlock_spin(&sched_lock);
528703fc290SJohn Baldwin #ifdef __i386__
5296b8c6989SJohn Baldwin 			ia32_pause();
530703fc290SJohn Baldwin #endif
53136412d79SJohn Baldwin 			continue;
53236412d79SJohn Baldwin 		}
53336412d79SJohn Baldwin 
5342498cf8cSJohn Baldwin #if defined(SMP) && defined(ADAPTIVE_MUTEXES)
5352498cf8cSJohn Baldwin 		/*
5362498cf8cSJohn Baldwin 		 * If the current owner of the lock is executing on another
5372498cf8cSJohn Baldwin 		 * CPU, spin instead of blocking.
5382498cf8cSJohn Baldwin 		 */
5392498cf8cSJohn Baldwin 		owner = (struct thread *)(v & MTX_FLAGMASK);
5406a95e08fSJohn Baldwin 		if (m != &Giant && thread_running(owner)) {
5412498cf8cSJohn Baldwin 			mtx_unlock_spin(&sched_lock);
5426a95e08fSJohn Baldwin 			while (mtx_owner(m) == owner && thread_running(owner)) {
543703fc290SJohn Baldwin #ifdef __i386__
5446b8c6989SJohn Baldwin 				ia32_pause();
545703fc290SJohn Baldwin #endif
5467fcca609SJohn Baldwin 			}
5472498cf8cSJohn Baldwin 			continue;
5482498cf8cSJohn Baldwin 		}
5492498cf8cSJohn Baldwin #endif	/* SMP && ADAPTIVE_MUTEXES */
5502498cf8cSJohn Baldwin 
5519ed346baSBosko Milekic 		/*
5527feefcd6SJohn Baldwin 		 * We definitely must sleep for this lock.
5539ed346baSBosko Milekic 		 */
55436412d79SJohn Baldwin 		mtx_assert(m, MA_NOTOWNED);
55536412d79SJohn Baldwin 
55636412d79SJohn Baldwin #ifdef notyet
55736412d79SJohn Baldwin 		/*
5589ed346baSBosko Milekic 		 * If we're borrowing an interrupted thread's VM context, we
5599ed346baSBosko Milekic 		 * must clean up before going to sleep.
56036412d79SJohn Baldwin 		 */
561b40ce416SJulian Elischer 		if (td->td_ithd != NULL) {
562b40ce416SJulian Elischer 			struct ithd *it = td->td_ithd;
56336412d79SJohn Baldwin 
56436412d79SJohn Baldwin 			if (it->it_interrupted) {
56519284646SJohn Baldwin 				if (LOCK_LOG_TEST(&m->mtx_object, opts))
56636412d79SJohn Baldwin 					CTR2(KTR_LOCK,
56715ec816aSJohn Baldwin 				    "_mtx_lock_sleep: %p interrupted %p",
56836412d79SJohn Baldwin 					    it, it->it_interrupted);
56936412d79SJohn Baldwin 				intr_thd_fixup(it);
57036412d79SJohn Baldwin 			}
57136412d79SJohn Baldwin 		}
57236412d79SJohn Baldwin #endif
57336412d79SJohn Baldwin 
5749ed346baSBosko Milekic 		/*
5759ed346baSBosko Milekic 		 * Put us on the list of threads blocked on this mutex.
5769ed346baSBosko Milekic 		 */
57736412d79SJohn Baldwin 		if (TAILQ_EMPTY(&m->mtx_blocked)) {
57818fc2ba9SJohn Baldwin 			td1 = mtx_owner(m);
579b40ce416SJulian Elischer 			LIST_INSERT_HEAD(&td1->td_contested, m, mtx_contested);
580b40ce416SJulian Elischer 			TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_blkq);
58136412d79SJohn Baldwin 		} else {
582b40ce416SJulian Elischer 			TAILQ_FOREACH(td1, &m->mtx_blocked, td_blkq)
5832c100766SJulian Elischer 				if (td1->td_priority > td->td_priority)
58436412d79SJohn Baldwin 					break;
585b40ce416SJulian Elischer 			if (td1)
586b40ce416SJulian Elischer 				TAILQ_INSERT_BEFORE(td1, td, td_blkq);
58736412d79SJohn Baldwin 			else
588b40ce416SJulian Elischer 				TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_blkq);
58936412d79SJohn Baldwin 		}
59036412d79SJohn Baldwin 
5919ed346baSBosko Milekic 		/*
5929ed346baSBosko Milekic 		 * Save who we're blocked on.
5939ed346baSBosko Milekic 		 */
594b40ce416SJulian Elischer 		td->td_blocked = m;
595b40ce416SJulian Elischer 		td->td_mtxname = m->mtx_object.lo_name;
596e602ba25SJulian Elischer 		td->td_state = TDS_MTX;
597b40ce416SJulian Elischer 		propagate_priority(td);
5989ed346baSBosko Milekic 
59919284646SJohn Baldwin 		if (LOCK_LOG_TEST(&m->mtx_object, opts))
600562e4ffeSJohn Baldwin 			CTR3(KTR_LOCK,
601b40ce416SJulian Elischer 			    "_mtx_lock_sleep: p %p blocked on [%p] %s", td, m,
60219284646SJohn Baldwin 			    m->mtx_object.lo_name);
6039ed346baSBosko Milekic 
604b40ce416SJulian Elischer 		td->td_proc->p_stats->p_ru.ru_nvcsw++;
60520cdcc5bSJohn Baldwin 		mi_switch();
6069ed346baSBosko Milekic 
60719284646SJohn Baldwin 		if (LOCK_LOG_TEST(&m->mtx_object, opts))
60836412d79SJohn Baldwin 			CTR3(KTR_LOCK,
6099ed346baSBosko Milekic 			  "_mtx_lock_sleep: p %p free from blocked on [%p] %s",
610b40ce416SJulian Elischer 			  td, m, m->mtx_object.lo_name);
6119ed346baSBosko Milekic 
6129ed346baSBosko Milekic 		mtx_unlock_spin(&sched_lock);
61336412d79SJohn Baldwin 	}
6149ed346baSBosko Milekic 
61536412d79SJohn Baldwin 	return;
6169ed346baSBosko Milekic }
6179ed346baSBosko Milekic 
6189ed346baSBosko Milekic /*
6199ed346baSBosko Milekic  * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock.
6209ed346baSBosko Milekic  *
6219ed346baSBosko Milekic  * This is only called if we need to actually spin for the lock. Recursion
6229ed346baSBosko Milekic  * is handled inline.
6239ed346baSBosko Milekic  */
6249ed346baSBosko Milekic void
6257e1f6dfeSJohn Baldwin _mtx_lock_spin(struct mtx *m, int opts, const char *file, int line)
62636412d79SJohn Baldwin {
62736412d79SJohn Baldwin 	int i = 0;
62836412d79SJohn Baldwin 
62919284646SJohn Baldwin 	if (LOCK_LOG_TEST(&m->mtx_object, opts))
6305746a1d8SBosko Milekic 		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
6319ed346baSBosko Milekic 
63236412d79SJohn Baldwin 	for (;;) {
633b40ce416SJulian Elischer 		if (_obtain_lock(m, curthread))
63436412d79SJohn Baldwin 			break;
6359ed346baSBosko Milekic 
6367141f2adSJohn Baldwin 		/* Give interrupts a chance while we spin. */
6377e1f6dfeSJohn Baldwin 		critical_exit();
63836412d79SJohn Baldwin 		while (m->mtx_lock != MTX_UNOWNED) {
639703fc290SJohn Baldwin 			if (i++ < 10000000) {
640703fc290SJohn Baldwin #ifdef __i386__
6416b8c6989SJohn Baldwin 				ia32_pause();
642703fc290SJohn Baldwin #endif
64336412d79SJohn Baldwin 				continue;
644703fc290SJohn Baldwin 			}
6450e54ddadSJohn Baldwin 			if (i < 60000000)
64636412d79SJohn Baldwin 				DELAY(1);
64736412d79SJohn Baldwin #ifdef DDB
64836412d79SJohn Baldwin 			else if (!db_active)
64936412d79SJohn Baldwin #else
65036412d79SJohn Baldwin 			else
65136412d79SJohn Baldwin #endif
6529ed346baSBosko Milekic 				panic("spin lock %s held by %p for > 5 seconds",
65319284646SJohn Baldwin 				    m->mtx_object.lo_name, (void *)m->mtx_lock);
654703fc290SJohn Baldwin #ifdef __i386__
6556b8c6989SJohn Baldwin 			ia32_pause();
656703fc290SJohn Baldwin #endif
65736412d79SJohn Baldwin 		}
6587e1f6dfeSJohn Baldwin 		critical_enter();
65936412d79SJohn Baldwin 	}
66036412d79SJohn Baldwin 
66119284646SJohn Baldwin 	if (LOCK_LOG_TEST(&m->mtx_object, opts))
6629ed346baSBosko Milekic 		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
6639ed346baSBosko Milekic 
66436412d79SJohn Baldwin 	return;
66536412d79SJohn Baldwin }
66636412d79SJohn Baldwin 
6679ed346baSBosko Milekic /*
6689ed346baSBosko Milekic  * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
6699ed346baSBosko Milekic  *
6709ed346baSBosko Milekic  * We are only called here if the lock is recursed or contested (i.e. we
6719ed346baSBosko Milekic  * need to wake up a blocked thread).
6729ed346baSBosko Milekic  */
67336412d79SJohn Baldwin void
6749ed346baSBosko Milekic _mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
67536412d79SJohn Baldwin {
676b40ce416SJulian Elischer 	struct thread *td, *td1;
67736412d79SJohn Baldwin 	struct mtx *m1;
67836412d79SJohn Baldwin 	int pri;
67936412d79SJohn Baldwin 
680b40ce416SJulian Elischer 	td = curthread;
6819ed346baSBosko Milekic 
68208812b39SBosko Milekic 	if (mtx_recursed(m)) {
68336412d79SJohn Baldwin 		if (--(m->mtx_recurse) == 0)
68408812b39SBosko Milekic 			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
68519284646SJohn Baldwin 		if (LOCK_LOG_TEST(&m->mtx_object, opts))
6869ed346baSBosko Milekic 			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
68736412d79SJohn Baldwin 		return;
68836412d79SJohn Baldwin 	}
6899ed346baSBosko Milekic 
6909ed346baSBosko Milekic 	mtx_lock_spin(&sched_lock);
69119284646SJohn Baldwin 	if (LOCK_LOG_TEST(&m->mtx_object, opts))
6929ed346baSBosko Milekic 		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
6939ed346baSBosko Milekic 
694b40ce416SJulian Elischer 	td1 = TAILQ_FIRST(&m->mtx_blocked);
6952498cf8cSJohn Baldwin #if defined(SMP) && defined(ADAPTIVE_MUTEXES)
6962498cf8cSJohn Baldwin 	if (td1 == NULL) {
6972498cf8cSJohn Baldwin 		_release_lock_quick(m);
6982498cf8cSJohn Baldwin 		if (LOCK_LOG_TEST(&m->mtx_object, opts))
6992498cf8cSJohn Baldwin 			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p no sleepers", m);
7002498cf8cSJohn Baldwin 		mtx_unlock_spin(&sched_lock);
7012498cf8cSJohn Baldwin 		return;
7022498cf8cSJohn Baldwin 	}
7032498cf8cSJohn Baldwin #endif
704b40ce416SJulian Elischer 	MPASS(td->td_proc->p_magic == P_MAGIC);
705b40ce416SJulian Elischer 	MPASS(td1->td_proc->p_magic == P_MAGIC);
7069ed346baSBosko Milekic 
707b40ce416SJulian Elischer 	TAILQ_REMOVE(&m->mtx_blocked, td1, td_blkq);
7089ed346baSBosko Milekic 
70936412d79SJohn Baldwin 	if (TAILQ_EMPTY(&m->mtx_blocked)) {
71036412d79SJohn Baldwin 		LIST_REMOVE(m, mtx_contested);
71136412d79SJohn Baldwin 		_release_lock_quick(m);
71219284646SJohn Baldwin 		if (LOCK_LOG_TEST(&m->mtx_object, opts))
7139ed346baSBosko Milekic 			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m);
71436412d79SJohn Baldwin 	} else
7159ed346baSBosko Milekic 		atomic_store_rel_ptr(&m->mtx_lock, (void *)MTX_CONTESTED);
7169ed346baSBosko Milekic 
717d5a08a60SJake Burkholder 	pri = PRI_MAX;
718b40ce416SJulian Elischer 	LIST_FOREACH(m1, &td->td_contested, mtx_contested) {
7192c100766SJulian Elischer 		int cp = TAILQ_FIRST(&m1->mtx_blocked)->td_priority;
72036412d79SJohn Baldwin 		if (cp < pri)
72136412d79SJohn Baldwin 			pri = cp;
72236412d79SJohn Baldwin 	}
7239ed346baSBosko Milekic 
7242c100766SJulian Elischer 	if (pri > td->td_base_pri)
7252c100766SJulian Elischer 		pri = td->td_base_pri;
7262c100766SJulian Elischer 	td->td_priority = pri;
7279ed346baSBosko Milekic 
72819284646SJohn Baldwin 	if (LOCK_LOG_TEST(&m->mtx_object, opts))
7299ed346baSBosko Milekic 		CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p contested setrunqueue %p",
730b40ce416SJulian Elischer 		    m, td1);
7319ed346baSBosko Milekic 
732b40ce416SJulian Elischer 	td1->td_blocked = NULL;
733b40ce416SJulian Elischer 	setrunqueue(td1);
7349ed346baSBosko Milekic 
7352c100766SJulian Elischer 	if (td->td_critnest == 1 && td1->td_priority < pri) {
73636412d79SJohn Baldwin #ifdef notyet
737b40ce416SJulian Elischer 		if (td->td_ithd != NULL) {
738b40ce416SJulian Elischer 			struct ithd *it = td->td_ithd;
73936412d79SJohn Baldwin 
74036412d79SJohn Baldwin 			if (it->it_interrupted) {
74119284646SJohn Baldwin 				if (LOCK_LOG_TEST(&m->mtx_object, opts))
74236412d79SJohn Baldwin 					CTR2(KTR_LOCK,
74315ec816aSJohn Baldwin 				    "_mtx_unlock_sleep: %p interrupted %p",
74436412d79SJohn Baldwin 					    it, it->it_interrupted);
74536412d79SJohn Baldwin 				intr_thd_fixup(it);
74636412d79SJohn Baldwin 			}
74736412d79SJohn Baldwin 		}
74836412d79SJohn Baldwin #endif
74919284646SJohn Baldwin 		if (LOCK_LOG_TEST(&m->mtx_object, opts))
750562e4ffeSJohn Baldwin 			CTR2(KTR_LOCK,
7519ed346baSBosko Milekic 			    "_mtx_unlock_sleep: %p switching out lock=%p", m,
7529ed346baSBosko Milekic 			    (void *)m->mtx_lock);
7539ed346baSBosko Milekic 
754b40ce416SJulian Elischer 		td->td_proc->p_stats->p_ru.ru_nivcsw++;
75536412d79SJohn Baldwin 		mi_switch();
75619284646SJohn Baldwin 		if (LOCK_LOG_TEST(&m->mtx_object, opts))
7579ed346baSBosko Milekic 			CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p",
75831271627SJohn Baldwin 			    m, (void *)m->mtx_lock);
75936412d79SJohn Baldwin 	}
76036412d79SJohn Baldwin 
7619ed346baSBosko Milekic 	mtx_unlock_spin(&sched_lock);
7629ed346baSBosko Milekic 
7639ed346baSBosko Milekic 	return;
7649ed346baSBosko Milekic }
7659ed346baSBosko Milekic 
7669ed346baSBosko Milekic /*
7679ed346baSBosko Milekic  * All the unlocking of MTX_SPIN locks is done inline.
7689ed346baSBosko Milekic  * See the _rel_spin_lock() macro for the details.
7699ed346baSBosko Milekic  */
7709ed346baSBosko Milekic 
7719ed346baSBosko Milekic /*
77215ec816aSJohn Baldwin  * The backing function for the INVARIANTS-enabled mtx_assert()
7739ed346baSBosko Milekic  */
7741103f3b0SJohn Baldwin #ifdef INVARIANT_SUPPORT
7750cde2e34SJason Evans void
77656771ca7SJason Evans _mtx_assert(struct mtx *m, int what, const char *file, int line)
7770cde2e34SJason Evans {
7785cb0fbe4SJohn Baldwin 
7795cb0fbe4SJohn Baldwin 	if (panicstr != NULL)
7805cb0fbe4SJohn Baldwin 		return;
781a10f4966SJake Burkholder 	switch (what) {
7820cde2e34SJason Evans 	case MA_OWNED:
7830cde2e34SJason Evans 	case MA_OWNED | MA_RECURSED:
7840cde2e34SJason Evans 	case MA_OWNED | MA_NOTRECURSED:
785a10f4966SJake Burkholder 		if (!mtx_owned(m))
7860cde2e34SJason Evans 			panic("mutex %s not owned at %s:%d",
78719284646SJohn Baldwin 			    m->mtx_object.lo_name, file, line);
788a10f4966SJake Burkholder 		if (mtx_recursed(m)) {
789a10f4966SJake Burkholder 			if ((what & MA_NOTRECURSED) != 0)
7900cde2e34SJason Evans 				panic("mutex %s recursed at %s:%d",
79119284646SJohn Baldwin 				    m->mtx_object.lo_name, file, line);
792a10f4966SJake Burkholder 		} else if ((what & MA_RECURSED) != 0) {
7930cde2e34SJason Evans 			panic("mutex %s unrecursed at %s:%d",
79419284646SJohn Baldwin 			    m->mtx_object.lo_name, file, line);
7950cde2e34SJason Evans 		}
7960cde2e34SJason Evans 		break;
7970cde2e34SJason Evans 	case MA_NOTOWNED:
798a10f4966SJake Burkholder 		if (mtx_owned(m))
7990cde2e34SJason Evans 			panic("mutex %s owned at %s:%d",
80019284646SJohn Baldwin 			    m->mtx_object.lo_name, file, line);
8010cde2e34SJason Evans 		break;
8020cde2e34SJason Evans 	default:
80356771ca7SJason Evans 		panic("unknown mtx_assert at %s:%d", file, line);
8040cde2e34SJason Evans 	}
8050cde2e34SJason Evans }
8060cde2e34SJason Evans #endif
8070cde2e34SJason Evans 
8089ed346baSBosko Milekic /*
8099ed346baSBosko Milekic  * The MUTEX_DEBUG-enabled mtx_validate()
81019284646SJohn Baldwin  *
81119284646SJohn Baldwin  * Most of these checks have been moved off into the LO_INITIALIZED flag
81219284646SJohn Baldwin  * maintained by the witness code.
8139ed346baSBosko Milekic  */
81436412d79SJohn Baldwin #ifdef MUTEX_DEBUG
81536412d79SJohn Baldwin 
8164d77a549SAlfred Perlstein void	mtx_validate(struct mtx *);
81736412d79SJohn Baldwin 
81819284646SJohn Baldwin void
81919284646SJohn Baldwin mtx_validate(struct mtx *m)
82036412d79SJohn Baldwin {
82136412d79SJohn Baldwin 
82236412d79SJohn Baldwin /*
82336412d79SJohn Baldwin  * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
82436412d79SJohn Baldwin  * we can re-enable the kernacc() checks.
82536412d79SJohn Baldwin  */
82636412d79SJohn Baldwin #ifndef __alpha__
82776dcbd6fSBosko Milekic 	/*
82876dcbd6fSBosko Milekic 	 * Can't call kernacc() from early init386(), especially when
82976dcbd6fSBosko Milekic 	 * initializing Giant mutex, because some stuff in kernacc()
83076dcbd6fSBosko Milekic 	 * requires Giant itself.
83176dcbd6fSBosko Milekic 	 */
832ab07087eSBosko Milekic 	if (!cold)
833ab07087eSBosko Milekic 		if (!kernacc((caddr_t)m, sizeof(m),
834ab07087eSBosko Milekic 		    VM_PROT_READ | VM_PROT_WRITE))
83519284646SJohn Baldwin 			panic("Can't read and write to mutex %p", m);
83636412d79SJohn Baldwin #endif
83736412d79SJohn Baldwin }
83836412d79SJohn Baldwin #endif
83936412d79SJohn Baldwin 
8409ed346baSBosko Milekic /*
841c27b5699SAndrew R. Reiter  * General init routine used by the MTX_SYSINIT() macro.
842c27b5699SAndrew R. Reiter  */
843c27b5699SAndrew R. Reiter void
844c27b5699SAndrew R. Reiter mtx_sysinit(void *arg)
845c27b5699SAndrew R. Reiter {
846c27b5699SAndrew R. Reiter 	struct mtx_args *margs = arg;
847c27b5699SAndrew R. Reiter 
8480c88508aSJohn Baldwin 	mtx_init(margs->ma_mtx, margs->ma_desc, NULL, margs->ma_opts);
849c27b5699SAndrew R. Reiter }
850c27b5699SAndrew R. Reiter 
851c27b5699SAndrew R. Reiter /*
8529ed346baSBosko Milekic  * Mutex initialization routine; initialize lock `m' of type contained in
8530c88508aSJohn Baldwin  * `opts' with options contained in `opts' and name `name.'  The optional
8540c88508aSJohn Baldwin  * lock type `type' is used as a general lock category name for use with
8550c88508aSJohn Baldwin  * witness.
8569ed346baSBosko Milekic  */
85736412d79SJohn Baldwin void
8580c88508aSJohn Baldwin mtx_init(struct mtx *m, const char *name, const char *type, int opts)
85936412d79SJohn Baldwin {
86019284646SJohn Baldwin 	struct lock_object *lock;
8619ed346baSBosko Milekic 
86219284646SJohn Baldwin 	MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE |
863f22a4b62SJeff Roberson 	    MTX_SLEEPABLE | MTX_NOWITNESS | MTX_DUPOK)) == 0);
8649ed346baSBosko Milekic 
86536412d79SJohn Baldwin #ifdef MUTEX_DEBUG
8669ed346baSBosko Milekic 	/* Diagnostic and error correction */
86719284646SJohn Baldwin 	mtx_validate(m);
8686936206eSJohn Baldwin #endif
86936412d79SJohn Baldwin 
87019284646SJohn Baldwin 	lock = &m->mtx_object;
8717ada5876SJohn Baldwin 	KASSERT((lock->lo_flags & LO_INITIALIZED) == 0,
8720c88508aSJohn Baldwin 	    ("mutex %s %p already initialized", name, m));
8737ada5876SJohn Baldwin 	bzero(m, sizeof(*m));
87419284646SJohn Baldwin 	if (opts & MTX_SPIN)
87519284646SJohn Baldwin 		lock->lo_class = &lock_class_mtx_spin;
87619284646SJohn Baldwin 	else
87719284646SJohn Baldwin 		lock->lo_class = &lock_class_mtx_sleep;
8780c88508aSJohn Baldwin 	lock->lo_name = name;
8790c88508aSJohn Baldwin 	lock->lo_type = type != NULL ? type : name;
88019284646SJohn Baldwin 	if (opts & MTX_QUIET)
88119284646SJohn Baldwin 		lock->lo_flags = LO_QUIET;
88219284646SJohn Baldwin 	if (opts & MTX_RECURSE)
88319284646SJohn Baldwin 		lock->lo_flags |= LO_RECURSABLE;
88419284646SJohn Baldwin 	if (opts & MTX_SLEEPABLE)
88519284646SJohn Baldwin 		lock->lo_flags |= LO_SLEEPABLE;
88619284646SJohn Baldwin 	if ((opts & MTX_NOWITNESS) == 0)
88719284646SJohn Baldwin 		lock->lo_flags |= LO_WITNESS;
888f22a4b62SJeff Roberson 	if (opts & MTX_DUPOK)
889f22a4b62SJeff Roberson 		lock->lo_flags |= LO_DUPOK;
89019284646SJohn Baldwin 
89119284646SJohn Baldwin 	m->mtx_lock = MTX_UNOWNED;
89236412d79SJohn Baldwin 	TAILQ_INIT(&m->mtx_blocked);
8939ed346baSBosko Milekic 
89419284646SJohn Baldwin 	LOCK_LOG_INIT(lock, opts);
895d1c1b841SJason Evans 
89619284646SJohn Baldwin 	WITNESS_INIT(lock);
89736412d79SJohn Baldwin }
89836412d79SJohn Baldwin 
8999ed346baSBosko Milekic /*
90019284646SJohn Baldwin  * Remove lock `m' from all_mtx queue.  We don't allow MTX_QUIET to be
90119284646SJohn Baldwin  * passed in as a flag here because if the corresponding mtx_init() was
90219284646SJohn Baldwin  * called with MTX_QUIET set, then it will already be set in the mutex's
90319284646SJohn Baldwin  * flags.
9049ed346baSBosko Milekic  */
90536412d79SJohn Baldwin void
90636412d79SJohn Baldwin mtx_destroy(struct mtx *m)
90736412d79SJohn Baldwin {
90836412d79SJohn Baldwin 
90919284646SJohn Baldwin 	LOCK_LOG_DESTROY(&m->mtx_object, 0);
9109ed346baSBosko Milekic 
91119284646SJohn Baldwin 	if (!mtx_owned(m))
91219284646SJohn Baldwin 		MPASS(mtx_unowned(m));
91319284646SJohn Baldwin 	else {
91408812b39SBosko Milekic 		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
9159ed346baSBosko Milekic 
91619284646SJohn Baldwin 		/* Tell witness this isn't locked to make it happy. */
917c86b6ff5SJohn Baldwin 		WITNESS_UNLOCK(&m->mtx_object, LOP_EXCLUSIVE, __FILE__,
918c86b6ff5SJohn Baldwin 		    __LINE__);
91936412d79SJohn Baldwin 	}
9200384fff8SJason Evans 
92119284646SJohn Baldwin 	WITNESS_DESTROY(&m->mtx_object);
9220384fff8SJason Evans }
923d23f5958SMatthew Dillon 
924d23f5958SMatthew Dillon /*
925c53c013bSJohn Baldwin  * Intialize the mutex code and system mutexes.  This is called from the MD
926c53c013bSJohn Baldwin  * startup code prior to mi_startup().  The per-CPU data space needs to be
927c53c013bSJohn Baldwin  * setup before this is called.
928c53c013bSJohn Baldwin  */
929c53c013bSJohn Baldwin void
930c53c013bSJohn Baldwin mutex_init(void)
931c53c013bSJohn Baldwin {
932c53c013bSJohn Baldwin 
933c53c013bSJohn Baldwin 	/* Setup thread0 so that mutexes work. */
934c53c013bSJohn Baldwin 	LIST_INIT(&thread0.td_contested);
935c53c013bSJohn Baldwin 
936c53c013bSJohn Baldwin 	/*
937c53c013bSJohn Baldwin 	 * Initialize mutexes.
938c53c013bSJohn Baldwin 	 */
9390c88508aSJohn Baldwin 	mtx_init(&Giant, "Giant", NULL, MTX_DEF | MTX_RECURSE);
9400c88508aSJohn Baldwin 	mtx_init(&sched_lock, "sched lock", NULL, MTX_SPIN | MTX_RECURSE);
9410c88508aSJohn Baldwin 	mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK);
942c53c013bSJohn Baldwin 	mtx_lock(&Giant);
943c53c013bSJohn Baldwin }
944c53c013bSJohn Baldwin 
945c53c013bSJohn Baldwin /*
946d23f5958SMatthew Dillon  * Encapsulated Giant mutex routines.  These routines provide encapsulation
947d23f5958SMatthew Dillon  * control for the Giant mutex, allowing sysctls to be used to turn on and
948d23f5958SMatthew Dillon  * off Giant around certain subsystems.  The default value for the sysctls
949d23f5958SMatthew Dillon  * are set to what developers believe is stable and working in regards to
950d23f5958SMatthew Dillon  * the Giant pushdown.  Developers should not turn off Giant via these
951d23f5958SMatthew Dillon  * sysctls unless they know what they are doing.
952d23f5958SMatthew Dillon  *
953d23f5958SMatthew Dillon  * Callers of mtx_lock_giant() are expected to pass the return value to an
954d23f5958SMatthew Dillon  * accompanying mtx_unlock_giant() later on.  If multiple subsystems are
955d23f5958SMatthew Dillon  * effected by a Giant wrap, all related sysctl variables must be zero for
956d23f5958SMatthew Dillon  * the subsystem call to operate without Giant (as determined by the caller).
957d23f5958SMatthew Dillon  */
958d23f5958SMatthew Dillon 
959d23f5958SMatthew Dillon SYSCTL_NODE(_kern, OID_AUTO, giant, CTLFLAG_RD, NULL, "Giant mutex manipulation");
960d23f5958SMatthew Dillon 
961d23f5958SMatthew Dillon static int kern_giant_all = 0;
962d23f5958SMatthew Dillon SYSCTL_INT(_kern_giant, OID_AUTO, all, CTLFLAG_RW, &kern_giant_all, 0, "");
963d23f5958SMatthew Dillon 
964d23f5958SMatthew Dillon int kern_giant_proc = 1;	/* Giant around PROC locks */
965d23f5958SMatthew Dillon int kern_giant_file = 1;	/* Giant around struct file & filedesc */
966735da6deSMatthew Dillon int kern_giant_ucred = 1;	/* Giant around ucred */
967d23f5958SMatthew Dillon SYSCTL_INT(_kern_giant, OID_AUTO, proc, CTLFLAG_RW, &kern_giant_proc, 0, "");
968d23f5958SMatthew Dillon SYSCTL_INT(_kern_giant, OID_AUTO, file, CTLFLAG_RW, &kern_giant_file, 0, "");
969735da6deSMatthew Dillon SYSCTL_INT(_kern_giant, OID_AUTO, ucred, CTLFLAG_RW, &kern_giant_ucred, 0, "");
970d23f5958SMatthew Dillon 
971d23f5958SMatthew Dillon int
972d23f5958SMatthew Dillon mtx_lock_giant(int sysctlvar)
973d23f5958SMatthew Dillon {
974d23f5958SMatthew Dillon 	if (sysctlvar || kern_giant_all) {
975d23f5958SMatthew Dillon 		mtx_lock(&Giant);
976d23f5958SMatthew Dillon 		return(1);
977d23f5958SMatthew Dillon 	}
978d23f5958SMatthew Dillon 	return(0);
979d23f5958SMatthew Dillon }
980d23f5958SMatthew Dillon 
981d23f5958SMatthew Dillon void
982d23f5958SMatthew Dillon mtx_unlock_giant(int s)
983d23f5958SMatthew Dillon {
984d23f5958SMatthew Dillon 	if (s)
985d23f5958SMatthew Dillon 		mtx_unlock(&Giant);
986d23f5958SMatthew Dillon }
987d23f5958SMatthew Dillon 
988