xref: /freebsd/sys/kern/kern_lock.c (revision 8302d183f3d1a77b104ef136a10ae2a5a3d8e4f5)
153bf4bb2SPeter Wemm /*
253bf4bb2SPeter Wemm  * Copyright (c) 1995
353bf4bb2SPeter Wemm  *	The Regents of the University of California.  All rights reserved.
453bf4bb2SPeter Wemm  *
503e9c6c1SJohn Dyson  * Copyright (C) 1997
603e9c6c1SJohn Dyson  *	John S. Dyson.  All rights reserved.
703e9c6c1SJohn Dyson  *
853bf4bb2SPeter Wemm  * This code contains ideas from software contributed to Berkeley by
953bf4bb2SPeter Wemm  * Avadis Tevanian, Jr., Michael Wayne Young, and the Mach Operating
1053bf4bb2SPeter Wemm  * System project at Carnegie-Mellon University.
1153bf4bb2SPeter Wemm  *
1253bf4bb2SPeter Wemm  * Redistribution and use in source and binary forms, with or without
1353bf4bb2SPeter Wemm  * modification, are permitted provided that the following conditions
1453bf4bb2SPeter Wemm  * are met:
1553bf4bb2SPeter Wemm  * 1. Redistributions of source code must retain the above copyright
1653bf4bb2SPeter Wemm  *    notice, this list of conditions and the following disclaimer.
1753bf4bb2SPeter Wemm  * 2. Redistributions in binary form must reproduce the above copyright
1853bf4bb2SPeter Wemm  *    notice, this list of conditions and the following disclaimer in the
1953bf4bb2SPeter Wemm  *    documentation and/or other materials provided with the distribution.
2053bf4bb2SPeter Wemm  * 3. All advertising materials mentioning features or use of this software
2153bf4bb2SPeter Wemm  *    must display the following acknowledgement:
2253bf4bb2SPeter Wemm  *	This product includes software developed by the University of
2353bf4bb2SPeter Wemm  *	California, Berkeley and its contributors.
2453bf4bb2SPeter Wemm  * 4. Neither the name of the University nor the names of its contributors
2553bf4bb2SPeter Wemm  *    may be used to endorse or promote products derived from this software
2653bf4bb2SPeter Wemm  *    without specific prior written permission.
2753bf4bb2SPeter Wemm  *
2853bf4bb2SPeter Wemm  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2953bf4bb2SPeter Wemm  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
3053bf4bb2SPeter Wemm  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
3153bf4bb2SPeter Wemm  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
3253bf4bb2SPeter Wemm  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
3353bf4bb2SPeter Wemm  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
3453bf4bb2SPeter Wemm  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3553bf4bb2SPeter Wemm  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3653bf4bb2SPeter Wemm  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3753bf4bb2SPeter Wemm  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3853bf4bb2SPeter Wemm  * SUCH DAMAGE.
3953bf4bb2SPeter Wemm  *
4053bf4bb2SPeter Wemm  *	@(#)kern_lock.c	8.18 (Berkeley) 5/21/95
41c3aac50fSPeter Wemm  * $FreeBSD$
4253bf4bb2SPeter Wemm  */
4353bf4bb2SPeter Wemm 
4453bf4bb2SPeter Wemm #include <sys/param.h>
459722d88fSJason Evans #include <sys/kernel.h>
4661d80e90SJohn Baldwin #include <sys/ktr.h>
4753bf4bb2SPeter Wemm #include <sys/lock.h>
488302d183SBruce Evans #include <sys/lockmgr.h>
49d8881ca3SJohn Baldwin #include <sys/mutex.h>
508302d183SBruce Evans #include <sys/proc.h>
514bdb9b11SPeter Wemm #include <sys/systm.h>
5253bf4bb2SPeter Wemm 
5353bf4bb2SPeter Wemm /*
5453bf4bb2SPeter Wemm  * Locking primitives implementation.
5553bf4bb2SPeter Wemm  * Locks provide shared/exclusive sychronization.
5653bf4bb2SPeter Wemm  */
5753bf4bb2SPeter Wemm 
5803e9c6c1SJohn Dyson #define LOCK_WAIT_TIME 100
5903e9c6c1SJohn Dyson #define LOCK_SAMPLE_WAIT 7
6053bf4bb2SPeter Wemm 
6103e9c6c1SJohn Dyson #if defined(DIAGNOSTIC)
6203e9c6c1SJohn Dyson #define LOCK_INLINE
6303e9c6c1SJohn Dyson #else
64ab36c3d3SBruce Evans #define LOCK_INLINE __inline
6503e9c6c1SJohn Dyson #endif
6603e9c6c1SJohn Dyson 
6799448ed1SJohn Dyson #define LK_ALL (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE | \
6899448ed1SJohn Dyson 	LK_SHARE_NONZERO | LK_WAIT_NONZERO)
6999448ed1SJohn Dyson 
709722d88fSJason Evans /*
719722d88fSJason Evans  * Mutex array variables.  Rather than each lockmgr lock having its own mutex,
729722d88fSJason Evans  * share a fixed (at boot time) number of mutexes across all lockmgr locks in
739722d88fSJason Evans  * order to keep sizeof(struct lock) down.
749722d88fSJason Evans  */
75f2860039SMatthew Dillon int lock_mtx_valid;
76d1c1b841SJason Evans static struct mtx lock_mtx;
779722d88fSJason Evans 
7803e9c6c1SJohn Dyson static int acquire(struct lock *lkp, int extflags, int wanted);
7999448ed1SJohn Dyson static int apause(struct lock *lkp, int flags);
8099448ed1SJohn Dyson static int acquiredrain(struct lock *lkp, int extflags) ;
8103e9c6c1SJohn Dyson 
829722d88fSJason Evans static void
839722d88fSJason Evans lockmgr_init(void *dummy __unused)
849722d88fSJason Evans {
859722d88fSJason Evans 	/*
869722d88fSJason Evans 	 * Initialize the lockmgr protection mutex if it hasn't already been
879722d88fSJason Evans 	 * done.  Unless something changes about kernel startup order, VM
889722d88fSJason Evans 	 * initialization will always cause this mutex to already be
899722d88fSJason Evans 	 * initialized in a call to lockinit().
909722d88fSJason Evans 	 */
91f2860039SMatthew Dillon 	if (lock_mtx_valid == 0) {
926008862bSJohn Baldwin 		mtx_init(&lock_mtx, "lockmgr", NULL, MTX_DEF);
93f2860039SMatthew Dillon 		lock_mtx_valid = 1;
949722d88fSJason Evans 	}
959722d88fSJason Evans }
969722d88fSJason Evans SYSINIT(lmgrinit, SI_SUB_LOCK, SI_ORDER_FIRST, lockmgr_init, NULL)
979722d88fSJason Evans 
9803e9c6c1SJohn Dyson static LOCK_INLINE void
9903e9c6c1SJohn Dyson sharelock(struct lock *lkp, int incr) {
10003e9c6c1SJohn Dyson 	lkp->lk_flags |= LK_SHARE_NONZERO;
10103e9c6c1SJohn Dyson 	lkp->lk_sharecount += incr;
10203e9c6c1SJohn Dyson }
10303e9c6c1SJohn Dyson 
10403e9c6c1SJohn Dyson static LOCK_INLINE void
10503e9c6c1SJohn Dyson shareunlock(struct lock *lkp, int decr) {
106219cbf59SEivind Eklund 
1075526d2d9SEivind Eklund 	KASSERT(lkp->lk_sharecount >= decr, ("shareunlock: count < decr"));
10803e9c6c1SJohn Dyson 
1099b2e5badSJohn Dyson 	if (lkp->lk_sharecount == decr) {
11003e9c6c1SJohn Dyson 		lkp->lk_flags &= ~LK_SHARE_NONZERO;
1119b2e5badSJohn Dyson 		if (lkp->lk_flags & (LK_WANT_UPGRADE | LK_WANT_EXCL)) {
1129b2e5badSJohn Dyson 			wakeup(lkp);
1139b2e5badSJohn Dyson 		}
1149b2e5badSJohn Dyson 		lkp->lk_sharecount = 0;
1159b2e5badSJohn Dyson 	} else {
1169b2e5badSJohn Dyson 		lkp->lk_sharecount -= decr;
1179b2e5badSJohn Dyson 	}
11803e9c6c1SJohn Dyson }
11903e9c6c1SJohn Dyson 
12099448ed1SJohn Dyson /*
1211b367556SJason Evans  * This is the waitloop optimization.
12299448ed1SJohn Dyson  */
12303e9c6c1SJohn Dyson static int
12499c9d349SAlan Cox apause(struct lock *lkp, int flags)
12599c9d349SAlan Cox {
12699c9d349SAlan Cox #ifdef SMP
12799c9d349SAlan Cox 	int i, lock_wait;
12899c9d349SAlan Cox #endif
12999c9d349SAlan Cox 
13003e9c6c1SJohn Dyson 	if ((lkp->lk_flags & flags) == 0)
13103e9c6c1SJohn Dyson 		return 0;
13299c9d349SAlan Cox #ifdef SMP
13399c9d349SAlan Cox 	for (lock_wait = LOCK_WAIT_TIME; lock_wait > 0; lock_wait--) {
1349ed346baSBosko Milekic 		mtx_unlock(lkp->lk_interlock);
13599c9d349SAlan Cox 		for (i = LOCK_SAMPLE_WAIT; i > 0; i--)
13699c9d349SAlan Cox 			if ((lkp->lk_flags & flags) == 0)
13799c9d349SAlan Cox 				break;
1389ed346baSBosko Milekic 		mtx_lock(lkp->lk_interlock);
13903e9c6c1SJohn Dyson 		if ((lkp->lk_flags & flags) == 0)
14003e9c6c1SJohn Dyson 			return 0;
14103e9c6c1SJohn Dyson 	}
14299c9d349SAlan Cox #endif
14303e9c6c1SJohn Dyson 	return 1;
14403e9c6c1SJohn Dyson }
14553bf4bb2SPeter Wemm 
14603e9c6c1SJohn Dyson static int
14703e9c6c1SJohn Dyson acquire(struct lock *lkp, int extflags, int wanted) {
1489b2e5badSJohn Dyson 	int s, error;
14953bf4bb2SPeter Wemm 
150a18b1f1dSJason Evans 	CTR3(KTR_LOCKMGR,
151a18b1f1dSJason Evans 	    "acquire(): lkp == %p, extflags == 0x%x, wanted == 0x%x\n",
152a18b1f1dSJason Evans 	    lkp, extflags, wanted);
153a18b1f1dSJason Evans 
15403e9c6c1SJohn Dyson 	if ((extflags & LK_NOWAIT) && (lkp->lk_flags & wanted)) {
15503e9c6c1SJohn Dyson 		return EBUSY;
15603e9c6c1SJohn Dyson 	}
15753bf4bb2SPeter Wemm 
15899448ed1SJohn Dyson 	if (((lkp->lk_flags | extflags) & LK_NOPAUSE) == 0) {
15903e9c6c1SJohn Dyson 		error = apause(lkp, wanted);
16003e9c6c1SJohn Dyson 		if (error == 0)
16103e9c6c1SJohn Dyson 			return 0;
16299448ed1SJohn Dyson 	}
16303e9c6c1SJohn Dyson 
1649b2e5badSJohn Dyson 	s = splhigh();
16503e9c6c1SJohn Dyson 	while ((lkp->lk_flags & wanted) != 0) {
16603e9c6c1SJohn Dyson 		lkp->lk_flags |= LK_WAIT_NONZERO;
16703e9c6c1SJohn Dyson 		lkp->lk_waitcount++;
16896fde7daSJake Burkholder 		error = msleep(lkp, lkp->lk_interlock, lkp->lk_prio,
16923b59018SMatthew Dillon 		    lkp->lk_wmesg,
17023b59018SMatthew Dillon 		    ((extflags & LK_TIMELOCK) ? lkp->lk_timo : 0));
1719b2e5badSJohn Dyson 		if (lkp->lk_waitcount == 1) {
17203e9c6c1SJohn Dyson 			lkp->lk_flags &= ~LK_WAIT_NONZERO;
1739b2e5badSJohn Dyson 			lkp->lk_waitcount = 0;
1749b2e5badSJohn Dyson 		} else {
1759b2e5badSJohn Dyson 			lkp->lk_waitcount--;
1769b2e5badSJohn Dyson 		}
1779b2e5badSJohn Dyson 		if (error) {
1789b2e5badSJohn Dyson 			splx(s);
17903e9c6c1SJohn Dyson 			return error;
1809b2e5badSJohn Dyson 		}
18103e9c6c1SJohn Dyson 		if (extflags & LK_SLEEPFAIL) {
1829b2e5badSJohn Dyson 			splx(s);
18303e9c6c1SJohn Dyson 			return ENOLCK;
18403e9c6c1SJohn Dyson 		}
18503e9c6c1SJohn Dyson 	}
1869b2e5badSJohn Dyson 	splx(s);
18703e9c6c1SJohn Dyson 	return 0;
18803e9c6c1SJohn Dyson }
18903e9c6c1SJohn Dyson 
19053bf4bb2SPeter Wemm /*
19153bf4bb2SPeter Wemm  * Set, change, or release a lock.
19253bf4bb2SPeter Wemm  *
19353bf4bb2SPeter Wemm  * Shared requests increment the shared count. Exclusive requests set the
19453bf4bb2SPeter Wemm  * LK_WANT_EXCL flag (preventing further shared locks), and wait for already
19553bf4bb2SPeter Wemm  * accepted shared locks and shared-to-exclusive upgrades to go away.
19653bf4bb2SPeter Wemm  */
19753bf4bb2SPeter Wemm int
19815a1057cSEivind Eklund #ifndef	DEBUG_LOCKS
199b40ce416SJulian Elischer lockmgr(lkp, flags, interlkp, td)
20015a1057cSEivind Eklund #else
201b40ce416SJulian Elischer debuglockmgr(lkp, flags, interlkp, td, name, file, line)
20215a1057cSEivind Eklund #endif
203248fcb66SSteve Passe 	struct lock *lkp;
20453bf4bb2SPeter Wemm 	u_int flags;
205a18b1f1dSJason Evans 	struct mtx *interlkp;
206b40ce416SJulian Elischer 	struct thread *td;
20715a1057cSEivind Eklund #ifdef	DEBUG_LOCKS
20815a1057cSEivind Eklund 	const char *name;	/* Name of lock function */
20915a1057cSEivind Eklund 	const char *file;	/* Name of file call is from */
21015a1057cSEivind Eklund 	int line;		/* Line number in file */
21115a1057cSEivind Eklund #endif
21253bf4bb2SPeter Wemm {
21353bf4bb2SPeter Wemm 	int error;
21453bf4bb2SPeter Wemm 	pid_t pid;
215635962afSJohn Baldwin 	int extflags, lockflags;
21653bf4bb2SPeter Wemm 
217a18b1f1dSJason Evans 	CTR5(KTR_LOCKMGR,
218a18b1f1dSJason Evans 	    "lockmgr(): lkp == %p (lk_wmesg == \"%s\"), flags == 0x%x, "
219b40ce416SJulian Elischer 	    "interlkp == %p, td == %p", lkp, lkp->lk_wmesg, flags, interlkp, td);
220a18b1f1dSJason Evans 
22153bf4bb2SPeter Wemm 	error = 0;
222b40ce416SJulian Elischer 	if (td == NULL)
223891e0f24SJohn Dyson 		pid = LK_KERNPROC;
224891e0f24SJohn Dyson 	else
225b40ce416SJulian Elischer 		pid = td->td_proc->p_pid;
22603e9c6c1SJohn Dyson 
2279ed346baSBosko Milekic 	mtx_lock(lkp->lk_interlock);
22898689e1eSAlfred Perlstein 	if (flags & LK_INTERLOCK) {
2296157b69fSAlfred Perlstein 		mtx_assert(interlkp, MA_OWNED | MA_NOTRECURSED);
2309ed346baSBosko Milekic 		mtx_unlock(interlkp);
23198689e1eSAlfred Perlstein 	}
23203e9c6c1SJohn Dyson 
2333f085c22SJohn Baldwin 	if (panicstr != NULL) {
2343f085c22SJohn Baldwin 		mtx_unlock(lkp->lk_interlock);
2353f085c22SJohn Baldwin 		return (0);
2363f085c22SJohn Baldwin 	}
2373f085c22SJohn Baldwin 
23853bf4bb2SPeter Wemm 	extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK;
23953bf4bb2SPeter Wemm 
24053bf4bb2SPeter Wemm 	switch (flags & LK_TYPE_MASK) {
24153bf4bb2SPeter Wemm 
24253bf4bb2SPeter Wemm 	case LK_SHARED:
243beef8a36SJulian Elischer 		/*
244beef8a36SJulian Elischer 		 * If we are not the exclusive lock holder, we have to block
245beef8a36SJulian Elischer 		 * while there is an exclusive lock holder or while an
246beef8a36SJulian Elischer 		 * exclusive lock request or upgrade request is in progress.
247beef8a36SJulian Elischer 		 *
248b40ce416SJulian Elischer 		 * However, if TDF_DEADLKTREAT is set, we override exclusive
249beef8a36SJulian Elischer 		 * lock requests or upgrade requests ( but not the exclusive
250beef8a36SJulian Elischer 		 * lock itself ).
251beef8a36SJulian Elischer 		 */
25253bf4bb2SPeter Wemm 		if (lkp->lk_lockholder != pid) {
253635962afSJohn Baldwin 			lockflags = LK_HAVE_EXCL;
254bce98419SJohn Baldwin 			mtx_lock_spin(&sched_lock);
255bce98419SJohn Baldwin 			if (td != NULL && !(td->td_flags & TDF_DEADLKTREAT))
256bce98419SJohn Baldwin 				lockflags |= LK_WANT_EXCL | LK_WANT_UPGRADE;
257bce98419SJohn Baldwin 			mtx_unlock_spin(&sched_lock);
258635962afSJohn Baldwin 			error = acquire(lkp, extflags, lockflags);
25953bf4bb2SPeter Wemm 			if (error)
26053bf4bb2SPeter Wemm 				break;
26103e9c6c1SJohn Dyson 			sharelock(lkp, 1);
2627181624aSJeff Roberson #if defined(DEBUG_LOCKS)
2637181624aSJeff Roberson 			lkp->lk_slockholder = pid;
2647181624aSJeff Roberson 			lkp->lk_sfilename = file;
2657181624aSJeff Roberson 			lkp->lk_slineno = line;
2667181624aSJeff Roberson 			lkp->lk_slockername = name;
2677181624aSJeff Roberson #endif
26853bf4bb2SPeter Wemm 			break;
26953bf4bb2SPeter Wemm 		}
27053bf4bb2SPeter Wemm 		/*
27153bf4bb2SPeter Wemm 		 * We hold an exclusive lock, so downgrade it to shared.
27253bf4bb2SPeter Wemm 		 * An alternative would be to fail with EDEADLK.
27353bf4bb2SPeter Wemm 		 */
27403e9c6c1SJohn Dyson 		sharelock(lkp, 1);
27593b0017fSPhilippe Charnier 		/* FALLTHROUGH downgrade */
27653bf4bb2SPeter Wemm 
27753bf4bb2SPeter Wemm 	case LK_DOWNGRADE:
2781375ed7eSAlfred Perlstein 		KASSERT(lkp->lk_lockholder == pid && lkp->lk_exclusivecount != 0,
2791375ed7eSAlfred Perlstein 			("lockmgr: not holding exclusive lock "
2801375ed7eSAlfred Perlstein 			"(owner pid (%d) != pid (%d), exlcnt (%d) != 0",
2811375ed7eSAlfred Perlstein 			lkp->lk_lockholder, pid, lkp->lk_exclusivecount));
28203e9c6c1SJohn Dyson 		sharelock(lkp, lkp->lk_exclusivecount);
28353bf4bb2SPeter Wemm 		lkp->lk_exclusivecount = 0;
28453bf4bb2SPeter Wemm 		lkp->lk_flags &= ~LK_HAVE_EXCL;
28553bf4bb2SPeter Wemm 		lkp->lk_lockholder = LK_NOPROC;
28653bf4bb2SPeter Wemm 		if (lkp->lk_waitcount)
28753bf4bb2SPeter Wemm 			wakeup((void *)lkp);
28853bf4bb2SPeter Wemm 		break;
28953bf4bb2SPeter Wemm 
29053bf4bb2SPeter Wemm 	case LK_EXCLUPGRADE:
29153bf4bb2SPeter Wemm 		/*
29253bf4bb2SPeter Wemm 		 * If another process is ahead of us to get an upgrade,
29353bf4bb2SPeter Wemm 		 * then we want to fail rather than have an intervening
29453bf4bb2SPeter Wemm 		 * exclusive access.
29553bf4bb2SPeter Wemm 		 */
29653bf4bb2SPeter Wemm 		if (lkp->lk_flags & LK_WANT_UPGRADE) {
29703e9c6c1SJohn Dyson 			shareunlock(lkp, 1);
29853bf4bb2SPeter Wemm 			error = EBUSY;
29953bf4bb2SPeter Wemm 			break;
30053bf4bb2SPeter Wemm 		}
30193b0017fSPhilippe Charnier 		/* FALLTHROUGH normal upgrade */
30253bf4bb2SPeter Wemm 
30353bf4bb2SPeter Wemm 	case LK_UPGRADE:
30453bf4bb2SPeter Wemm 		/*
30553bf4bb2SPeter Wemm 		 * Upgrade a shared lock to an exclusive one. If another
30653bf4bb2SPeter Wemm 		 * shared lock has already requested an upgrade to an
30753bf4bb2SPeter Wemm 		 * exclusive lock, our shared lock is released and an
30853bf4bb2SPeter Wemm 		 * exclusive lock is requested (which will be granted
30953bf4bb2SPeter Wemm 		 * after the upgrade). If we return an error, the file
31053bf4bb2SPeter Wemm 		 * will always be unlocked.
31153bf4bb2SPeter Wemm 		 */
31203e9c6c1SJohn Dyson 		if ((lkp->lk_lockholder == pid) || (lkp->lk_sharecount <= 0))
31353bf4bb2SPeter Wemm 			panic("lockmgr: upgrade exclusive lock");
31403e9c6c1SJohn Dyson 		shareunlock(lkp, 1);
31553bf4bb2SPeter Wemm 		/*
31653bf4bb2SPeter Wemm 		 * If we are just polling, check to see if we will block.
31753bf4bb2SPeter Wemm 		 */
31853bf4bb2SPeter Wemm 		if ((extflags & LK_NOWAIT) &&
31953bf4bb2SPeter Wemm 		    ((lkp->lk_flags & LK_WANT_UPGRADE) ||
32053bf4bb2SPeter Wemm 		     lkp->lk_sharecount > 1)) {
32153bf4bb2SPeter Wemm 			error = EBUSY;
32253bf4bb2SPeter Wemm 			break;
32353bf4bb2SPeter Wemm 		}
32453bf4bb2SPeter Wemm 		if ((lkp->lk_flags & LK_WANT_UPGRADE) == 0) {
32553bf4bb2SPeter Wemm 			/*
32653bf4bb2SPeter Wemm 			 * We are first shared lock to request an upgrade, so
32753bf4bb2SPeter Wemm 			 * request upgrade and wait for the shared count to
32853bf4bb2SPeter Wemm 			 * drop to zero, then take exclusive lock.
32953bf4bb2SPeter Wemm 			 */
33053bf4bb2SPeter Wemm 			lkp->lk_flags |= LK_WANT_UPGRADE;
33103e9c6c1SJohn Dyson 			error = acquire(lkp, extflags, LK_SHARE_NONZERO);
33253bf4bb2SPeter Wemm 			lkp->lk_flags &= ~LK_WANT_UPGRADE;
3339b2e5badSJohn Dyson 
33453bf4bb2SPeter Wemm 			if (error)
33553bf4bb2SPeter Wemm 				break;
33653bf4bb2SPeter Wemm 			lkp->lk_flags |= LK_HAVE_EXCL;
33753bf4bb2SPeter Wemm 			lkp->lk_lockholder = pid;
33853bf4bb2SPeter Wemm 			if (lkp->lk_exclusivecount != 0)
33953bf4bb2SPeter Wemm 				panic("lockmgr: non-zero exclusive count");
34053bf4bb2SPeter Wemm 			lkp->lk_exclusivecount = 1;
34115a1057cSEivind Eklund #if defined(DEBUG_LOCKS)
34215a1057cSEivind Eklund 			lkp->lk_filename = file;
34315a1057cSEivind Eklund 			lkp->lk_lineno = line;
34415a1057cSEivind Eklund 			lkp->lk_lockername = name;
34515a1057cSEivind Eklund #endif
34653bf4bb2SPeter Wemm 			break;
34753bf4bb2SPeter Wemm 		}
34853bf4bb2SPeter Wemm 		/*
34953bf4bb2SPeter Wemm 		 * Someone else has requested upgrade. Release our shared
35053bf4bb2SPeter Wemm 		 * lock, awaken upgrade requestor if we are the last shared
35153bf4bb2SPeter Wemm 		 * lock, then request an exclusive lock.
35253bf4bb2SPeter Wemm 		 */
35303e9c6c1SJohn Dyson 		if ( (lkp->lk_flags & (LK_SHARE_NONZERO|LK_WAIT_NONZERO)) ==
35403e9c6c1SJohn Dyson 			LK_WAIT_NONZERO)
35553bf4bb2SPeter Wemm 			wakeup((void *)lkp);
35693b0017fSPhilippe Charnier 		/* FALLTHROUGH exclusive request */
35753bf4bb2SPeter Wemm 
35853bf4bb2SPeter Wemm 	case LK_EXCLUSIVE:
35953bf4bb2SPeter Wemm 		if (lkp->lk_lockholder == pid && pid != LK_KERNPROC) {
36053bf4bb2SPeter Wemm 			/*
36153bf4bb2SPeter Wemm 			 *	Recursive lock.
36253bf4bb2SPeter Wemm 			 */
36333638e93SKirk McKusick 			if ((extflags & (LK_NOWAIT | LK_CANRECURSE)) == 0)
36453bf4bb2SPeter Wemm 				panic("lockmgr: locking against myself");
36533638e93SKirk McKusick 			if ((extflags & LK_CANRECURSE) != 0) {
36653bf4bb2SPeter Wemm 				lkp->lk_exclusivecount++;
36753bf4bb2SPeter Wemm 				break;
36853bf4bb2SPeter Wemm 			}
36933638e93SKirk McKusick 		}
37053bf4bb2SPeter Wemm 		/*
37153bf4bb2SPeter Wemm 		 * If we are just polling, check to see if we will sleep.
37253bf4bb2SPeter Wemm 		 */
37303e9c6c1SJohn Dyson 		if ((extflags & LK_NOWAIT) &&
37403e9c6c1SJohn Dyson 		    (lkp->lk_flags & (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE | LK_SHARE_NONZERO))) {
37553bf4bb2SPeter Wemm 			error = EBUSY;
37653bf4bb2SPeter Wemm 			break;
37753bf4bb2SPeter Wemm 		}
37853bf4bb2SPeter Wemm 		/*
37953bf4bb2SPeter Wemm 		 * Try to acquire the want_exclusive flag.
38053bf4bb2SPeter Wemm 		 */
38103e9c6c1SJohn Dyson 		error = acquire(lkp, extflags, (LK_HAVE_EXCL | LK_WANT_EXCL));
38253bf4bb2SPeter Wemm 		if (error)
38353bf4bb2SPeter Wemm 			break;
38453bf4bb2SPeter Wemm 		lkp->lk_flags |= LK_WANT_EXCL;
38553bf4bb2SPeter Wemm 		/*
38653bf4bb2SPeter Wemm 		 * Wait for shared locks and upgrades to finish.
38753bf4bb2SPeter Wemm 		 */
38803e9c6c1SJohn Dyson 		error = acquire(lkp, extflags, LK_WANT_UPGRADE | LK_SHARE_NONZERO);
38953bf4bb2SPeter Wemm 		lkp->lk_flags &= ~LK_WANT_EXCL;
39053bf4bb2SPeter Wemm 		if (error)
39153bf4bb2SPeter Wemm 			break;
39253bf4bb2SPeter Wemm 		lkp->lk_flags |= LK_HAVE_EXCL;
39353bf4bb2SPeter Wemm 		lkp->lk_lockholder = pid;
39453bf4bb2SPeter Wemm 		if (lkp->lk_exclusivecount != 0)
39553bf4bb2SPeter Wemm 			panic("lockmgr: non-zero exclusive count");
39653bf4bb2SPeter Wemm 		lkp->lk_exclusivecount = 1;
39715a1057cSEivind Eklund #if defined(DEBUG_LOCKS)
39815a1057cSEivind Eklund 			lkp->lk_filename = file;
39915a1057cSEivind Eklund 			lkp->lk_lineno = line;
40015a1057cSEivind Eklund 			lkp->lk_lockername = name;
40115a1057cSEivind Eklund #endif
40253bf4bb2SPeter Wemm 		break;
40353bf4bb2SPeter Wemm 
40453bf4bb2SPeter Wemm 	case LK_RELEASE:
40553bf4bb2SPeter Wemm 		if (lkp->lk_exclusivecount != 0) {
40667812eacSKirk McKusick 			if (lkp->lk_lockholder != pid &&
407e701df7dSMatthew Dillon 			    lkp->lk_lockholder != LK_KERNPROC) {
40853bf4bb2SPeter Wemm 				panic("lockmgr: pid %d, not %s %d unlocking",
40953bf4bb2SPeter Wemm 				    pid, "exclusive lock holder",
41053bf4bb2SPeter Wemm 				    lkp->lk_lockholder);
411e701df7dSMatthew Dillon 			}
4129b2e5badSJohn Dyson 			if (lkp->lk_exclusivecount == 1) {
41353bf4bb2SPeter Wemm 				lkp->lk_flags &= ~LK_HAVE_EXCL;
41453bf4bb2SPeter Wemm 				lkp->lk_lockholder = LK_NOPROC;
4159b2e5badSJohn Dyson 				lkp->lk_exclusivecount = 0;
4169b2e5badSJohn Dyson 			} else {
4179b2e5badSJohn Dyson 				lkp->lk_exclusivecount--;
41853bf4bb2SPeter Wemm 			}
4191b367556SJason Evans 		} else if (lkp->lk_flags & LK_SHARE_NONZERO)
42003e9c6c1SJohn Dyson 			shareunlock(lkp, 1);
42103e9c6c1SJohn Dyson 		if (lkp->lk_flags & LK_WAIT_NONZERO)
42253bf4bb2SPeter Wemm 			wakeup((void *)lkp);
42353bf4bb2SPeter Wemm 		break;
42453bf4bb2SPeter Wemm 
42553bf4bb2SPeter Wemm 	case LK_DRAIN:
42653bf4bb2SPeter Wemm 		/*
42753bf4bb2SPeter Wemm 		 * Check that we do not already hold the lock, as it can
42853bf4bb2SPeter Wemm 		 * never drain if we do. Unfortunately, we have no way to
42953bf4bb2SPeter Wemm 		 * check for holding a shared lock, but at least we can
43053bf4bb2SPeter Wemm 		 * check for an exclusive one.
43153bf4bb2SPeter Wemm 		 */
43253bf4bb2SPeter Wemm 		if (lkp->lk_lockholder == pid)
43353bf4bb2SPeter Wemm 			panic("lockmgr: draining against myself");
43403e9c6c1SJohn Dyson 
43503e9c6c1SJohn Dyson 		error = acquiredrain(lkp, extflags);
43603e9c6c1SJohn Dyson 		if (error)
43753bf4bb2SPeter Wemm 			break;
43853bf4bb2SPeter Wemm 		lkp->lk_flags |= LK_DRAINING | LK_HAVE_EXCL;
43953bf4bb2SPeter Wemm 		lkp->lk_lockholder = pid;
44053bf4bb2SPeter Wemm 		lkp->lk_exclusivecount = 1;
44115a1057cSEivind Eklund #if defined(DEBUG_LOCKS)
44215a1057cSEivind Eklund 			lkp->lk_filename = file;
44315a1057cSEivind Eklund 			lkp->lk_lineno = line;
44415a1057cSEivind Eklund 			lkp->lk_lockername = name;
44515a1057cSEivind Eklund #endif
44653bf4bb2SPeter Wemm 		break;
44753bf4bb2SPeter Wemm 
44853bf4bb2SPeter Wemm 	default:
4499ed346baSBosko Milekic 		mtx_unlock(lkp->lk_interlock);
45053bf4bb2SPeter Wemm 		panic("lockmgr: unknown locktype request %d",
45153bf4bb2SPeter Wemm 		    flags & LK_TYPE_MASK);
45253bf4bb2SPeter Wemm 		/* NOTREACHED */
45353bf4bb2SPeter Wemm 	}
45403e9c6c1SJohn Dyson 	if ((lkp->lk_flags & LK_WAITDRAIN) &&
45503e9c6c1SJohn Dyson 	    (lkp->lk_flags & (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE |
45603e9c6c1SJohn Dyson 		LK_SHARE_NONZERO | LK_WAIT_NONZERO)) == 0) {
45753bf4bb2SPeter Wemm 		lkp->lk_flags &= ~LK_WAITDRAIN;
45853bf4bb2SPeter Wemm 		wakeup((void *)&lkp->lk_flags);
45953bf4bb2SPeter Wemm 	}
4609ed346baSBosko Milekic 	mtx_unlock(lkp->lk_interlock);
46153bf4bb2SPeter Wemm 	return (error);
46253bf4bb2SPeter Wemm }
46353bf4bb2SPeter Wemm 
46499448ed1SJohn Dyson static int
46599448ed1SJohn Dyson acquiredrain(struct lock *lkp, int extflags) {
46699448ed1SJohn Dyson 	int error;
46799448ed1SJohn Dyson 
46899448ed1SJohn Dyson 	if ((extflags & LK_NOWAIT) && (lkp->lk_flags & LK_ALL)) {
46999448ed1SJohn Dyson 		return EBUSY;
47099448ed1SJohn Dyson 	}
47199448ed1SJohn Dyson 
47299448ed1SJohn Dyson 	error = apause(lkp, LK_ALL);
47399448ed1SJohn Dyson 	if (error == 0)
47499448ed1SJohn Dyson 		return 0;
47599448ed1SJohn Dyson 
47699448ed1SJohn Dyson 	while (lkp->lk_flags & LK_ALL) {
47799448ed1SJohn Dyson 		lkp->lk_flags |= LK_WAITDRAIN;
47896fde7daSJake Burkholder 		error = msleep(&lkp->lk_flags, lkp->lk_interlock, lkp->lk_prio,
47923b59018SMatthew Dillon 			lkp->lk_wmesg,
48023b59018SMatthew Dillon 			((extflags & LK_TIMELOCK) ? lkp->lk_timo : 0));
48199448ed1SJohn Dyson 		if (error)
48299448ed1SJohn Dyson 			return error;
48399448ed1SJohn Dyson 		if (extflags & LK_SLEEPFAIL) {
48499448ed1SJohn Dyson 			return ENOLCK;
48599448ed1SJohn Dyson 		}
48699448ed1SJohn Dyson 	}
48799448ed1SJohn Dyson 	return 0;
48899448ed1SJohn Dyson }
48999448ed1SJohn Dyson 
49099448ed1SJohn Dyson /*
49199448ed1SJohn Dyson  * Initialize a lock; required before use.
49299448ed1SJohn Dyson  */
49399448ed1SJohn Dyson void
49499448ed1SJohn Dyson lockinit(lkp, prio, wmesg, timo, flags)
49599448ed1SJohn Dyson 	struct lock *lkp;
49699448ed1SJohn Dyson 	int prio;
49704858e7eSEivind Eklund 	const char *wmesg;
49899448ed1SJohn Dyson 	int timo;
49999448ed1SJohn Dyson 	int flags;
50099448ed1SJohn Dyson {
501a18b1f1dSJason Evans 	CTR5(KTR_LOCKMGR, "lockinit(): lkp == %p, prio == %d, wmesg == \"%s\", "
502a18b1f1dSJason Evans 	    "timo == %d, flags = 0x%x\n", lkp, prio, wmesg, timo, flags);
50399448ed1SJohn Dyson 
504f2860039SMatthew Dillon 	if (lock_mtx_valid == 0) {
5056008862bSJohn Baldwin 		mtx_init(&lock_mtx, "lockmgr", NULL, MTX_DEF);
506f2860039SMatthew Dillon 		lock_mtx_valid = 1;
507f2860039SMatthew Dillon 	}
508f2860039SMatthew Dillon 	/*
509f2860039SMatthew Dillon 	 * XXX cleanup - make sure mtxpool is always initialized before
510f2860039SMatthew Dillon 	 * this is ever called.
511f2860039SMatthew Dillon 	 */
512f2860039SMatthew Dillon 	if (mtx_pool_valid) {
5139ed346baSBosko Milekic 		mtx_lock(&lock_mtx);
514f2860039SMatthew Dillon 		lkp->lk_interlock = mtx_pool_alloc();
5159ed346baSBosko Milekic 		mtx_unlock(&lock_mtx);
5169722d88fSJason Evans 	} else {
5179722d88fSJason Evans 		lkp->lk_interlock = &lock_mtx;
5189722d88fSJason Evans 	}
5199722d88fSJason Evans 	lkp->lk_flags = (flags & LK_EXTFLG_MASK);
52099448ed1SJohn Dyson 	lkp->lk_sharecount = 0;
52199448ed1SJohn Dyson 	lkp->lk_waitcount = 0;
52299448ed1SJohn Dyson 	lkp->lk_exclusivecount = 0;
52399448ed1SJohn Dyson 	lkp->lk_prio = prio;
52499448ed1SJohn Dyson 	lkp->lk_wmesg = wmesg;
52599448ed1SJohn Dyson 	lkp->lk_timo = timo;
52699448ed1SJohn Dyson 	lkp->lk_lockholder = LK_NOPROC;
52799448ed1SJohn Dyson }
52899448ed1SJohn Dyson 
52999448ed1SJohn Dyson /*
530a18b1f1dSJason Evans  * Destroy a lock.
531a18b1f1dSJason Evans  */
532a18b1f1dSJason Evans void
533a18b1f1dSJason Evans lockdestroy(lkp)
534a18b1f1dSJason Evans 	struct lock *lkp;
535a18b1f1dSJason Evans {
536a18b1f1dSJason Evans 	CTR2(KTR_LOCKMGR, "lockdestroy(): lkp == %p (lk_wmesg == \"%s\")",
537a18b1f1dSJason Evans 	    lkp, lkp->lk_wmesg);
538a18b1f1dSJason Evans }
539a18b1f1dSJason Evans 
540a18b1f1dSJason Evans /*
54199448ed1SJohn Dyson  * Determine the status of a lock.
54299448ed1SJohn Dyson  */
54399448ed1SJohn Dyson int
544b40ce416SJulian Elischer lockstatus(lkp, td)
54599448ed1SJohn Dyson 	struct lock *lkp;
546b40ce416SJulian Elischer 	struct thread *td;
54799448ed1SJohn Dyson {
54899448ed1SJohn Dyson 	int lock_type = 0;
54999448ed1SJohn Dyson 
5509ed346baSBosko Milekic 	mtx_lock(lkp->lk_interlock);
5516bdfe06aSEivind Eklund 	if (lkp->lk_exclusivecount != 0) {
552b40ce416SJulian Elischer 		if (td == NULL || lkp->lk_lockholder == td->td_proc->p_pid)
55399448ed1SJohn Dyson 			lock_type = LK_EXCLUSIVE;
5546bdfe06aSEivind Eklund 		else
5556bdfe06aSEivind Eklund 			lock_type = LK_EXCLOTHER;
5566bdfe06aSEivind Eklund 	} else if (lkp->lk_sharecount != 0)
55799448ed1SJohn Dyson 		lock_type = LK_SHARED;
5589ed346baSBosko Milekic 	mtx_unlock(lkp->lk_interlock);
55999448ed1SJohn Dyson 	return (lock_type);
56099448ed1SJohn Dyson }
56199448ed1SJohn Dyson 
56253bf4bb2SPeter Wemm /*
56367812eacSKirk McKusick  * Determine the number of holders of a lock.
56467812eacSKirk McKusick  */
56567812eacSKirk McKusick int
56667812eacSKirk McKusick lockcount(lkp)
56767812eacSKirk McKusick 	struct lock *lkp;
56867812eacSKirk McKusick {
56967812eacSKirk McKusick 	int count;
57067812eacSKirk McKusick 
5719ed346baSBosko Milekic 	mtx_lock(lkp->lk_interlock);
57267812eacSKirk McKusick 	count = lkp->lk_exclusivecount + lkp->lk_sharecount;
5739ed346baSBosko Milekic 	mtx_unlock(lkp->lk_interlock);
57467812eacSKirk McKusick 	return (count);
57567812eacSKirk McKusick }
57667812eacSKirk McKusick 
57767812eacSKirk McKusick /*
57853bf4bb2SPeter Wemm  * Print out information about state of a lock. Used by VOP_PRINT
5790e61ac7bSPoul-Henning Kamp  * routines to display status about contained locks.
58053bf4bb2SPeter Wemm  */
581a1ce9d5cSPeter Wemm void
58253bf4bb2SPeter Wemm lockmgr_printinfo(lkp)
58353bf4bb2SPeter Wemm 	struct lock *lkp;
58453bf4bb2SPeter Wemm {
58553bf4bb2SPeter Wemm 
58653bf4bb2SPeter Wemm 	if (lkp->lk_sharecount)
58753bf4bb2SPeter Wemm 		printf(" lock type %s: SHARED (count %d)", lkp->lk_wmesg,
58853bf4bb2SPeter Wemm 		    lkp->lk_sharecount);
58953bf4bb2SPeter Wemm 	else if (lkp->lk_flags & LK_HAVE_EXCL)
59053bf4bb2SPeter Wemm 		printf(" lock type %s: EXCL (count %d) by pid %d",
59153bf4bb2SPeter Wemm 		    lkp->lk_wmesg, lkp->lk_exclusivecount, lkp->lk_lockholder);
59253bf4bb2SPeter Wemm 	if (lkp->lk_waitcount > 0)
59353bf4bb2SPeter Wemm 		printf(" with %d pending", lkp->lk_waitcount);
59453bf4bb2SPeter Wemm }
595