xref: /freebsd/sys/kern/kern_lock.c (revision 545ddfbe7d4fe8adfb862903b24eac1d5896c1ef)
1 /*-
2  * Copyright (c) 2008 Attilio Rao <attilio@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice(s), this list of conditions and the following disclaimer as
10  *    the first lines of this file unmodified other than the possible
11  *    addition of one or more copyright notices.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice(s), this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
26  * DAMAGE.
27  */
28 
29 #include "opt_adaptive_lockmgrs.h"
30 #include "opt_ddb.h"
31 #include "opt_hwpmc_hooks.h"
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 
36 #include <sys/param.h>
37 #include <sys/kdb.h>
38 #include <sys/ktr.h>
39 #include <sys/lock.h>
40 #include <sys/lock_profile.h>
41 #include <sys/lockmgr.h>
42 #include <sys/mutex.h>
43 #include <sys/proc.h>
44 #include <sys/sleepqueue.h>
45 #ifdef DEBUG_LOCKS
46 #include <sys/stack.h>
47 #endif
48 #include <sys/sysctl.h>
49 #include <sys/systm.h>
50 
51 #include <machine/cpu.h>
52 
53 #ifdef DDB
54 #include <ddb/ddb.h>
55 #endif
56 
57 #ifdef HWPMC_HOOKS
58 #include <sys/pmckern.h>
59 PMC_SOFT_DECLARE( , , lock, failed);
60 #endif
61 
62 CTASSERT(((LK_ADAPTIVE | LK_NOSHARE) & LO_CLASSFLAGS) ==
63     (LK_ADAPTIVE | LK_NOSHARE));
64 CTASSERT(LK_UNLOCKED == (LK_UNLOCKED &
65     ~(LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS)));
66 
67 #define	SQ_EXCLUSIVE_QUEUE	0
68 #define	SQ_SHARED_QUEUE		1
69 
70 #ifndef INVARIANTS
71 #define	_lockmgr_assert(lk, what, file, line)
72 #define	TD_LOCKS_INC(td)
73 #define	TD_LOCKS_DEC(td)
74 #else
75 #define	TD_LOCKS_INC(td)	((td)->td_locks++)
76 #define	TD_LOCKS_DEC(td)	((td)->td_locks--)
77 #endif
78 #define	TD_SLOCKS_INC(td)	((td)->td_lk_slocks++)
79 #define	TD_SLOCKS_DEC(td)	((td)->td_lk_slocks--)
80 
81 #ifndef DEBUG_LOCKS
82 #define	STACK_PRINT(lk)
83 #define	STACK_SAVE(lk)
84 #define	STACK_ZERO(lk)
85 #else
86 #define	STACK_PRINT(lk)	stack_print_ddb(&(lk)->lk_stack)
87 #define	STACK_SAVE(lk)	stack_save(&(lk)->lk_stack)
88 #define	STACK_ZERO(lk)	stack_zero(&(lk)->lk_stack)
89 #endif
90 
91 #define	LOCK_LOG2(lk, string, arg1, arg2)				\
92 	if (LOCK_LOG_TEST(&(lk)->lock_object, 0))			\
93 		CTR2(KTR_LOCK, (string), (arg1), (arg2))
94 #define	LOCK_LOG3(lk, string, arg1, arg2, arg3)				\
95 	if (LOCK_LOG_TEST(&(lk)->lock_object, 0))			\
96 		CTR3(KTR_LOCK, (string), (arg1), (arg2), (arg3))
97 
98 #define	GIANT_DECLARE							\
99 	int _i = 0;							\
100 	WITNESS_SAVE_DECL(Giant)
101 #define	GIANT_RESTORE() do {						\
102 	if (_i > 0) {							\
103 		while (_i--)						\
104 			mtx_lock(&Giant);				\
105 		WITNESS_RESTORE(&Giant.lock_object, Giant);		\
106 	}								\
107 } while (0)
108 #define	GIANT_SAVE() do {						\
109 	if (mtx_owned(&Giant)) {					\
110 		WITNESS_SAVE(&Giant.lock_object, Giant);		\
111 		while (mtx_owned(&Giant)) {				\
112 			_i++;						\
113 			mtx_unlock(&Giant);				\
114 		}							\
115 	}								\
116 } while (0)
117 
118 #define	LK_CAN_SHARE(x, flags)						\
119 	(((x) & LK_SHARE) &&						\
120 	(((x) & (LK_EXCLUSIVE_WAITERS | LK_EXCLUSIVE_SPINNERS)) == 0 ||	\
121 	(curthread->td_lk_slocks != 0 && !(flags & LK_NODDLKTREAT)) ||	\
122 	(curthread->td_pflags & TDP_DEADLKTREAT)))
123 #define	LK_TRYOP(x)							\
124 	((x) & LK_NOWAIT)
125 
126 #define	LK_CAN_WITNESS(x)						\
127 	(((x) & LK_NOWITNESS) == 0 && !LK_TRYOP(x))
128 #define	LK_TRYWIT(x)							\
129 	(LK_TRYOP(x) ? LOP_TRYLOCK : 0)
130 
131 #define	LK_CAN_ADAPT(lk, f)						\
132 	(((lk)->lock_object.lo_flags & LK_ADAPTIVE) != 0 &&		\
133 	((f) & LK_SLEEPFAIL) == 0)
134 
135 #define	lockmgr_disowned(lk)						\
136 	(((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == LK_KERNPROC)
137 
138 #define	lockmgr_xlocked(lk)						\
139 	(((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == (uintptr_t)curthread)
140 
141 static void	assert_lockmgr(const struct lock_object *lock, int how);
142 #ifdef DDB
143 static void	db_show_lockmgr(const struct lock_object *lock);
144 #endif
145 static void	lock_lockmgr(struct lock_object *lock, uintptr_t how);
146 #ifdef KDTRACE_HOOKS
147 static int	owner_lockmgr(const struct lock_object *lock,
148 		    struct thread **owner);
149 #endif
150 static uintptr_t unlock_lockmgr(struct lock_object *lock);
151 
152 struct lock_class lock_class_lockmgr = {
153 	.lc_name = "lockmgr",
154 	.lc_flags = LC_RECURSABLE | LC_SLEEPABLE | LC_SLEEPLOCK | LC_UPGRADABLE,
155 	.lc_assert = assert_lockmgr,
156 #ifdef DDB
157 	.lc_ddb_show = db_show_lockmgr,
158 #endif
159 	.lc_lock = lock_lockmgr,
160 	.lc_unlock = unlock_lockmgr,
161 #ifdef KDTRACE_HOOKS
162 	.lc_owner = owner_lockmgr,
163 #endif
164 };
165 
166 #ifdef ADAPTIVE_LOCKMGRS
167 static u_int alk_retries = 10;
168 static u_int alk_loops = 10000;
169 static SYSCTL_NODE(_debug, OID_AUTO, lockmgr, CTLFLAG_RD, NULL,
170     "lockmgr debugging");
171 SYSCTL_UINT(_debug_lockmgr, OID_AUTO, retries, CTLFLAG_RW, &alk_retries, 0, "");
172 SYSCTL_UINT(_debug_lockmgr, OID_AUTO, loops, CTLFLAG_RW, &alk_loops, 0, "");
173 #endif
174 
175 static __inline struct thread *
176 lockmgr_xholder(const struct lock *lk)
177 {
178 	uintptr_t x;
179 
180 	x = lk->lk_lock;
181 	return ((x & LK_SHARE) ? NULL : (struct thread *)LK_HOLDER(x));
182 }
183 
184 /*
185  * It assumes sleepq_lock held and returns with this one unheld.
186  * It also assumes the generic interlock is sane and previously checked.
187  * If LK_INTERLOCK is specified the interlock is not reacquired after the
188  * sleep.
189  */
190 static __inline int
191 sleeplk(struct lock *lk, u_int flags, struct lock_object *ilk,
192     const char *wmesg, int pri, int timo, int queue)
193 {
194 	GIANT_DECLARE;
195 	struct lock_class *class;
196 	int catch, error;
197 
198 	class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
199 	catch = pri & PCATCH;
200 	pri &= PRIMASK;
201 	error = 0;
202 
203 	LOCK_LOG3(lk, "%s: %p blocking on the %s sleepqueue", __func__, lk,
204 	    (queue == SQ_EXCLUSIVE_QUEUE) ? "exclusive" : "shared");
205 
206 	if (flags & LK_INTERLOCK)
207 		class->lc_unlock(ilk);
208 	if (queue == SQ_EXCLUSIVE_QUEUE && (flags & LK_SLEEPFAIL) != 0)
209 		lk->lk_exslpfail++;
210 	GIANT_SAVE();
211 	sleepq_add(&lk->lock_object, NULL, wmesg, SLEEPQ_LK | (catch ?
212 	    SLEEPQ_INTERRUPTIBLE : 0), queue);
213 	if ((flags & LK_TIMELOCK) && timo) {
214 		sleepq_release(&lk->lock_object);
215 		sleepq_set_timeout(&lk->lock_object, timo);
216 		sleepq_lock(&lk->lock_object);
217 	}
218 	/*
219 	 * Decisional switch for real sleeping.
220 	 */
221 	if ((flags & LK_TIMELOCK) && timo && catch)
222 		error = sleepq_timedwait_sig(&lk->lock_object, pri);
223 	else if ((flags & LK_TIMELOCK) && timo)
224 		error = sleepq_timedwait(&lk->lock_object, pri);
225 	else if (catch)
226 		error = sleepq_wait_sig(&lk->lock_object, pri);
227 	else
228 		sleepq_wait(&lk->lock_object, pri);
229 	GIANT_RESTORE();
230 	if ((flags & LK_SLEEPFAIL) && error == 0)
231 		error = ENOLCK;
232 
233 	return (error);
234 }
235 
236 static __inline int
237 wakeupshlk(struct lock *lk, const char *file, int line)
238 {
239 	uintptr_t v, x;
240 	u_int realexslp;
241 	int queue, wakeup_swapper;
242 
243 	WITNESS_UNLOCK(&lk->lock_object, 0, file, line);
244 	LOCK_LOG_LOCK("SUNLOCK", &lk->lock_object, 0, 0, file, line);
245 
246 	wakeup_swapper = 0;
247 	for (;;) {
248 		x = lk->lk_lock;
249 
250 		/*
251 		 * If there is more than one shared lock held, just drop one
252 		 * and return.
253 		 */
254 		if (LK_SHARERS(x) > 1) {
255 			if (atomic_cmpset_rel_ptr(&lk->lk_lock, x,
256 			    x - LK_ONE_SHARER))
257 				break;
258 			continue;
259 		}
260 
261 		/*
262 		 * If there are not waiters on the exclusive queue, drop the
263 		 * lock quickly.
264 		 */
265 		if ((x & LK_ALL_WAITERS) == 0) {
266 			MPASS((x & ~LK_EXCLUSIVE_SPINNERS) ==
267 			    LK_SHARERS_LOCK(1));
268 			if (atomic_cmpset_rel_ptr(&lk->lk_lock, x, LK_UNLOCKED))
269 				break;
270 			continue;
271 		}
272 
273 		/*
274 		 * We should have a sharer with waiters, so enter the hard
275 		 * path in order to handle wakeups correctly.
276 		 */
277 		sleepq_lock(&lk->lock_object);
278 		x = lk->lk_lock & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
279 		v = LK_UNLOCKED;
280 
281 		/*
282 		 * If the lock has exclusive waiters, give them preference in
283 		 * order to avoid deadlock with shared runners up.
284 		 * If interruptible sleeps left the exclusive queue empty
285 		 * avoid a starvation for the threads sleeping on the shared
286 		 * queue by giving them precedence and cleaning up the
287 		 * exclusive waiters bit anyway.
288 		 * Please note that lk_exslpfail count may be lying about
289 		 * the real number of waiters with the LK_SLEEPFAIL flag on
290 		 * because they may be used in conjuction with interruptible
291 		 * sleeps so lk_exslpfail might be considered an 'upper limit'
292 		 * bound, including the edge cases.
293 		 */
294 		realexslp = sleepq_sleepcnt(&lk->lock_object,
295 		    SQ_EXCLUSIVE_QUEUE);
296 		if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
297 			if (lk->lk_exslpfail < realexslp) {
298 				lk->lk_exslpfail = 0;
299 				queue = SQ_EXCLUSIVE_QUEUE;
300 				v |= (x & LK_SHARED_WAITERS);
301 			} else {
302 				lk->lk_exslpfail = 0;
303 				LOCK_LOG2(lk,
304 				    "%s: %p has only LK_SLEEPFAIL sleepers",
305 				    __func__, lk);
306 				LOCK_LOG2(lk,
307 			    "%s: %p waking up threads on the exclusive queue",
308 				    __func__, lk);
309 				wakeup_swapper =
310 				    sleepq_broadcast(&lk->lock_object,
311 				    SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
312 				queue = SQ_SHARED_QUEUE;
313 			}
314 
315 		} else {
316 
317 			/*
318 			 * Exclusive waiters sleeping with LK_SLEEPFAIL on
319 			 * and using interruptible sleeps/timeout may have
320 			 * left spourious lk_exslpfail counts on, so clean
321 			 * it up anyway.
322 			 */
323 			lk->lk_exslpfail = 0;
324 			queue = SQ_SHARED_QUEUE;
325 		}
326 
327 		if (!atomic_cmpset_rel_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x,
328 		    v)) {
329 			sleepq_release(&lk->lock_object);
330 			continue;
331 		}
332 		LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
333 		    __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
334 		    "exclusive");
335 		wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK,
336 		    0, queue);
337 		sleepq_release(&lk->lock_object);
338 		break;
339 	}
340 
341 	lock_profile_release_lock(&lk->lock_object);
342 	TD_LOCKS_DEC(curthread);
343 	TD_SLOCKS_DEC(curthread);
344 	return (wakeup_swapper);
345 }
346 
347 static void
348 assert_lockmgr(const struct lock_object *lock, int what)
349 {
350 
351 	panic("lockmgr locks do not support assertions");
352 }
353 
354 static void
355 lock_lockmgr(struct lock_object *lock, uintptr_t how)
356 {
357 
358 	panic("lockmgr locks do not support sleep interlocking");
359 }
360 
361 static uintptr_t
362 unlock_lockmgr(struct lock_object *lock)
363 {
364 
365 	panic("lockmgr locks do not support sleep interlocking");
366 }
367 
368 #ifdef KDTRACE_HOOKS
369 static int
370 owner_lockmgr(const struct lock_object *lock, struct thread **owner)
371 {
372 
373 	panic("lockmgr locks do not support owner inquiring");
374 }
375 #endif
376 
377 void
378 lockinit(struct lock *lk, int pri, const char *wmesg, int timo, int flags)
379 {
380 	int iflags;
381 
382 	MPASS((flags & ~LK_INIT_MASK) == 0);
383 	ASSERT_ATOMIC_LOAD_PTR(lk->lk_lock,
384             ("%s: lockmgr not aligned for %s: %p", __func__, wmesg,
385             &lk->lk_lock));
386 
387 	iflags = LO_SLEEPABLE | LO_UPGRADABLE;
388 	if (flags & LK_CANRECURSE)
389 		iflags |= LO_RECURSABLE;
390 	if ((flags & LK_NODUP) == 0)
391 		iflags |= LO_DUPOK;
392 	if (flags & LK_NOPROFILE)
393 		iflags |= LO_NOPROFILE;
394 	if ((flags & LK_NOWITNESS) == 0)
395 		iflags |= LO_WITNESS;
396 	if (flags & LK_QUIET)
397 		iflags |= LO_QUIET;
398 	if (flags & LK_IS_VNODE)
399 		iflags |= LO_IS_VNODE;
400 	iflags |= flags & (LK_ADAPTIVE | LK_NOSHARE);
401 
402 	lock_init(&lk->lock_object, &lock_class_lockmgr, wmesg, NULL, iflags);
403 	lk->lk_lock = LK_UNLOCKED;
404 	lk->lk_recurse = 0;
405 	lk->lk_exslpfail = 0;
406 	lk->lk_timo = timo;
407 	lk->lk_pri = pri;
408 	STACK_ZERO(lk);
409 }
410 
411 /*
412  * XXX: Gross hacks to manipulate external lock flags after
413  * initialization.  Used for certain vnode and buf locks.
414  */
415 void
416 lockallowshare(struct lock *lk)
417 {
418 
419 	lockmgr_assert(lk, KA_XLOCKED);
420 	lk->lock_object.lo_flags &= ~LK_NOSHARE;
421 }
422 
423 void
424 lockdisableshare(struct lock *lk)
425 {
426 
427 	lockmgr_assert(lk, KA_XLOCKED);
428 	lk->lock_object.lo_flags |= LK_NOSHARE;
429 }
430 
431 void
432 lockallowrecurse(struct lock *lk)
433 {
434 
435 	lockmgr_assert(lk, KA_XLOCKED);
436 	lk->lock_object.lo_flags |= LO_RECURSABLE;
437 }
438 
439 void
440 lockdisablerecurse(struct lock *lk)
441 {
442 
443 	lockmgr_assert(lk, KA_XLOCKED);
444 	lk->lock_object.lo_flags &= ~LO_RECURSABLE;
445 }
446 
447 void
448 lockdestroy(struct lock *lk)
449 {
450 
451 	KASSERT(lk->lk_lock == LK_UNLOCKED, ("lockmgr still held"));
452 	KASSERT(lk->lk_recurse == 0, ("lockmgr still recursed"));
453 	KASSERT(lk->lk_exslpfail == 0, ("lockmgr still exclusive waiters"));
454 	lock_destroy(&lk->lock_object);
455 }
456 
457 int
458 __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk,
459     const char *wmesg, int pri, int timo, const char *file, int line)
460 {
461 	GIANT_DECLARE;
462 	struct lock_class *class;
463 	const char *iwmesg;
464 	uintptr_t tid, v, x;
465 	u_int op, realexslp;
466 	int error, ipri, itimo, queue, wakeup_swapper;
467 #ifdef LOCK_PROFILING
468 	uint64_t waittime = 0;
469 	int contested = 0;
470 #endif
471 #ifdef ADAPTIVE_LOCKMGRS
472 	volatile struct thread *owner;
473 	u_int i, spintries = 0;
474 #endif
475 
476 	error = 0;
477 	tid = (uintptr_t)curthread;
478 	op = (flags & LK_TYPE_MASK);
479 	iwmesg = (wmesg == LK_WMESG_DEFAULT) ? lk->lock_object.lo_name : wmesg;
480 	ipri = (pri == LK_PRIO_DEFAULT) ? lk->lk_pri : pri;
481 	itimo = (timo == LK_TIMO_DEFAULT) ? lk->lk_timo : timo;
482 
483 	MPASS((flags & ~LK_TOTAL_MASK) == 0);
484 	KASSERT((op & (op - 1)) == 0,
485 	    ("%s: Invalid requested operation @ %s:%d", __func__, file, line));
486 	KASSERT((flags & (LK_NOWAIT | LK_SLEEPFAIL)) == 0 ||
487 	    (op != LK_DOWNGRADE && op != LK_RELEASE),
488 	    ("%s: Invalid flags in regard of the operation desired @ %s:%d",
489 	    __func__, file, line));
490 	KASSERT((flags & LK_INTERLOCK) == 0 || ilk != NULL,
491 	    ("%s: LK_INTERLOCK passed without valid interlock @ %s:%d",
492 	    __func__, file, line));
493 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
494 	    ("%s: idle thread %p on lockmgr %s @ %s:%d", __func__, curthread,
495 	    lk->lock_object.lo_name, file, line));
496 
497 	class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
498 	if (panicstr != NULL) {
499 		if (flags & LK_INTERLOCK)
500 			class->lc_unlock(ilk);
501 		return (0);
502 	}
503 
504 	if (lk->lock_object.lo_flags & LK_NOSHARE) {
505 		switch (op) {
506 		case LK_SHARED:
507 			op = LK_EXCLUSIVE;
508 			break;
509 		case LK_UPGRADE:
510 		case LK_TRYUPGRADE:
511 		case LK_DOWNGRADE:
512 			_lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED,
513 			    file, line);
514 			if (flags & LK_INTERLOCK)
515 				class->lc_unlock(ilk);
516 			return (0);
517 		}
518 	}
519 
520 	wakeup_swapper = 0;
521 	switch (op) {
522 	case LK_SHARED:
523 		if (LK_CAN_WITNESS(flags))
524 			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
525 			    file, line, flags & LK_INTERLOCK ? ilk : NULL);
526 		for (;;) {
527 			x = lk->lk_lock;
528 
529 			/*
530 			 * If no other thread has an exclusive lock, or
531 			 * no exclusive waiter is present, bump the count of
532 			 * sharers.  Since we have to preserve the state of
533 			 * waiters, if we fail to acquire the shared lock
534 			 * loop back and retry.
535 			 */
536 			if (LK_CAN_SHARE(x, flags)) {
537 				if (atomic_cmpset_acq_ptr(&lk->lk_lock, x,
538 				    x + LK_ONE_SHARER))
539 					break;
540 				continue;
541 			}
542 #ifdef HWPMC_HOOKS
543 			PMC_SOFT_CALL( , , lock, failed);
544 #endif
545 			lock_profile_obtain_lock_failed(&lk->lock_object,
546 			    &contested, &waittime);
547 
548 			/*
549 			 * If the lock is already held by curthread in
550 			 * exclusive way avoid a deadlock.
551 			 */
552 			if (LK_HOLDER(x) == tid) {
553 				LOCK_LOG2(lk,
554 				    "%s: %p already held in exclusive mode",
555 				    __func__, lk);
556 				error = EDEADLK;
557 				break;
558 			}
559 
560 			/*
561 			 * If the lock is expected to not sleep just give up
562 			 * and return.
563 			 */
564 			if (LK_TRYOP(flags)) {
565 				LOCK_LOG2(lk, "%s: %p fails the try operation",
566 				    __func__, lk);
567 				error = EBUSY;
568 				break;
569 			}
570 
571 #ifdef ADAPTIVE_LOCKMGRS
572 			/*
573 			 * If the owner is running on another CPU, spin until
574 			 * the owner stops running or the state of the lock
575 			 * changes.  We need a double-state handle here
576 			 * because for a failed acquisition the lock can be
577 			 * either held in exclusive mode or shared mode
578 			 * (for the writer starvation avoidance technique).
579 			 */
580 			if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 &&
581 			    LK_HOLDER(x) != LK_KERNPROC) {
582 				owner = (struct thread *)LK_HOLDER(x);
583 				if (LOCK_LOG_TEST(&lk->lock_object, 0))
584 					CTR3(KTR_LOCK,
585 					    "%s: spinning on %p held by %p",
586 					    __func__, lk, owner);
587 				KTR_STATE1(KTR_SCHED, "thread",
588 				    sched_tdname(td), "spinning",
589 				    "lockname:\"%s\"", lk->lock_object.lo_name);
590 
591 				/*
592 				 * If we are holding also an interlock drop it
593 				 * in order to avoid a deadlock if the lockmgr
594 				 * owner is adaptively spinning on the
595 				 * interlock itself.
596 				 */
597 				if (flags & LK_INTERLOCK) {
598 					class->lc_unlock(ilk);
599 					flags &= ~LK_INTERLOCK;
600 				}
601 				GIANT_SAVE();
602 				while (LK_HOLDER(lk->lk_lock) ==
603 				    (uintptr_t)owner && TD_IS_RUNNING(owner))
604 					cpu_spinwait();
605 				KTR_STATE0(KTR_SCHED, "thread",
606 				    sched_tdname(td), "running");
607 				GIANT_RESTORE();
608 				continue;
609 			} else if (LK_CAN_ADAPT(lk, flags) &&
610 			    (x & LK_SHARE) != 0 && LK_SHARERS(x) &&
611 			    spintries < alk_retries) {
612 				KTR_STATE1(KTR_SCHED, "thread",
613 				    sched_tdname(td), "spinning",
614 				    "lockname:\"%s\"", lk->lock_object.lo_name);
615 				if (flags & LK_INTERLOCK) {
616 					class->lc_unlock(ilk);
617 					flags &= ~LK_INTERLOCK;
618 				}
619 				GIANT_SAVE();
620 				spintries++;
621 				for (i = 0; i < alk_loops; i++) {
622 					if (LOCK_LOG_TEST(&lk->lock_object, 0))
623 						CTR4(KTR_LOCK,
624 				    "%s: shared spinning on %p with %u and %u",
625 						    __func__, lk, spintries, i);
626 					x = lk->lk_lock;
627 					if ((x & LK_SHARE) == 0 ||
628 					    LK_CAN_SHARE(x, flags) != 0)
629 						break;
630 					cpu_spinwait();
631 				}
632 				KTR_STATE0(KTR_SCHED, "thread",
633 				    sched_tdname(td), "running");
634 				GIANT_RESTORE();
635 				if (i != alk_loops)
636 					continue;
637 			}
638 #endif
639 
640 			/*
641 			 * Acquire the sleepqueue chain lock because we
642 			 * probabilly will need to manipulate waiters flags.
643 			 */
644 			sleepq_lock(&lk->lock_object);
645 			x = lk->lk_lock;
646 
647 			/*
648 			 * if the lock can be acquired in shared mode, try
649 			 * again.
650 			 */
651 			if (LK_CAN_SHARE(x, flags)) {
652 				sleepq_release(&lk->lock_object);
653 				continue;
654 			}
655 
656 #ifdef ADAPTIVE_LOCKMGRS
657 			/*
658 			 * The current lock owner might have started executing
659 			 * on another CPU (or the lock could have changed
660 			 * owner) while we were waiting on the turnstile
661 			 * chain lock.  If so, drop the turnstile lock and try
662 			 * again.
663 			 */
664 			if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 &&
665 			    LK_HOLDER(x) != LK_KERNPROC) {
666 				owner = (struct thread *)LK_HOLDER(x);
667 				if (TD_IS_RUNNING(owner)) {
668 					sleepq_release(&lk->lock_object);
669 					continue;
670 				}
671 			}
672 #endif
673 
674 			/*
675 			 * Try to set the LK_SHARED_WAITERS flag.  If we fail,
676 			 * loop back and retry.
677 			 */
678 			if ((x & LK_SHARED_WAITERS) == 0) {
679 				if (!atomic_cmpset_acq_ptr(&lk->lk_lock, x,
680 				    x | LK_SHARED_WAITERS)) {
681 					sleepq_release(&lk->lock_object);
682 					continue;
683 				}
684 				LOCK_LOG2(lk, "%s: %p set shared waiters flag",
685 				    __func__, lk);
686 			}
687 
688 			/*
689 			 * As far as we have been unable to acquire the
690 			 * shared lock and the shared waiters flag is set,
691 			 * we will sleep.
692 			 */
693 			error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
694 			    SQ_SHARED_QUEUE);
695 			flags &= ~LK_INTERLOCK;
696 			if (error) {
697 				LOCK_LOG3(lk,
698 				    "%s: interrupted sleep for %p with %d",
699 				    __func__, lk, error);
700 				break;
701 			}
702 			LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
703 			    __func__, lk);
704 		}
705 		if (error == 0) {
706 			lock_profile_obtain_lock_success(&lk->lock_object,
707 			    contested, waittime, file, line);
708 			LOCK_LOG_LOCK("SLOCK", &lk->lock_object, 0, 0, file,
709 			    line);
710 			WITNESS_LOCK(&lk->lock_object, LK_TRYWIT(flags), file,
711 			    line);
712 			TD_LOCKS_INC(curthread);
713 			TD_SLOCKS_INC(curthread);
714 			STACK_SAVE(lk);
715 		}
716 		break;
717 	case LK_UPGRADE:
718 	case LK_TRYUPGRADE:
719 		_lockmgr_assert(lk, KA_SLOCKED, file, line);
720 		v = lk->lk_lock;
721 		x = v & LK_ALL_WAITERS;
722 		v &= LK_EXCLUSIVE_SPINNERS;
723 
724 		/*
725 		 * Try to switch from one shared lock to an exclusive one.
726 		 * We need to preserve waiters flags during the operation.
727 		 */
728 		if (atomic_cmpset_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x | v,
729 		    tid | x)) {
730 			LOCK_LOG_LOCK("XUPGRADE", &lk->lock_object, 0, 0, file,
731 			    line);
732 			WITNESS_UPGRADE(&lk->lock_object, LOP_EXCLUSIVE |
733 			    LK_TRYWIT(flags), file, line);
734 			TD_SLOCKS_DEC(curthread);
735 			break;
736 		}
737 
738 		/*
739 		 * In LK_TRYUPGRADE mode, do not drop the lock,
740 		 * returning EBUSY instead.
741 		 */
742 		if (op == LK_TRYUPGRADE) {
743 			LOCK_LOG2(lk, "%s: %p failed the nowait upgrade",
744 			    __func__, lk);
745 			error = EBUSY;
746 			break;
747 		}
748 
749 		/*
750 		 * We have been unable to succeed in upgrading, so just
751 		 * give up the shared lock.
752 		 */
753 		wakeup_swapper |= wakeupshlk(lk, file, line);
754 
755 		/* FALLTHROUGH */
756 	case LK_EXCLUSIVE:
757 		if (LK_CAN_WITNESS(flags))
758 			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
759 			    LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
760 			    ilk : NULL);
761 
762 		/*
763 		 * If curthread already holds the lock and this one is
764 		 * allowed to recurse, simply recurse on it.
765 		 */
766 		if (lockmgr_xlocked(lk)) {
767 			if ((flags & LK_CANRECURSE) == 0 &&
768 			    (lk->lock_object.lo_flags & LO_RECURSABLE) == 0) {
769 
770 				/*
771 				 * If the lock is expected to not panic just
772 				 * give up and return.
773 				 */
774 				if (LK_TRYOP(flags)) {
775 					LOCK_LOG2(lk,
776 					    "%s: %p fails the try operation",
777 					    __func__, lk);
778 					error = EBUSY;
779 					break;
780 				}
781 				if (flags & LK_INTERLOCK)
782 					class->lc_unlock(ilk);
783 		panic("%s: recursing on non recursive lockmgr %s @ %s:%d\n",
784 				    __func__, iwmesg, file, line);
785 			}
786 			lk->lk_recurse++;
787 			LOCK_LOG2(lk, "%s: %p recursing", __func__, lk);
788 			LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0,
789 			    lk->lk_recurse, file, line);
790 			WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
791 			    LK_TRYWIT(flags), file, line);
792 			TD_LOCKS_INC(curthread);
793 			break;
794 		}
795 
796 		while (!atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED,
797 		    tid)) {
798 #ifdef HWPMC_HOOKS
799 			PMC_SOFT_CALL( , , lock, failed);
800 #endif
801 			lock_profile_obtain_lock_failed(&lk->lock_object,
802 			    &contested, &waittime);
803 
804 			/*
805 			 * If the lock is expected to not sleep just give up
806 			 * and return.
807 			 */
808 			if (LK_TRYOP(flags)) {
809 				LOCK_LOG2(lk, "%s: %p fails the try operation",
810 				    __func__, lk);
811 				error = EBUSY;
812 				break;
813 			}
814 
815 #ifdef ADAPTIVE_LOCKMGRS
816 			/*
817 			 * If the owner is running on another CPU, spin until
818 			 * the owner stops running or the state of the lock
819 			 * changes.
820 			 */
821 			x = lk->lk_lock;
822 			if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 &&
823 			    LK_HOLDER(x) != LK_KERNPROC) {
824 				owner = (struct thread *)LK_HOLDER(x);
825 				if (LOCK_LOG_TEST(&lk->lock_object, 0))
826 					CTR3(KTR_LOCK,
827 					    "%s: spinning on %p held by %p",
828 					    __func__, lk, owner);
829 				KTR_STATE1(KTR_SCHED, "thread",
830 				    sched_tdname(td), "spinning",
831 				    "lockname:\"%s\"", lk->lock_object.lo_name);
832 
833 				/*
834 				 * If we are holding also an interlock drop it
835 				 * in order to avoid a deadlock if the lockmgr
836 				 * owner is adaptively spinning on the
837 				 * interlock itself.
838 				 */
839 				if (flags & LK_INTERLOCK) {
840 					class->lc_unlock(ilk);
841 					flags &= ~LK_INTERLOCK;
842 				}
843 				GIANT_SAVE();
844 				while (LK_HOLDER(lk->lk_lock) ==
845 				    (uintptr_t)owner && TD_IS_RUNNING(owner))
846 					cpu_spinwait();
847 				KTR_STATE0(KTR_SCHED, "thread",
848 				    sched_tdname(td), "running");
849 				GIANT_RESTORE();
850 				continue;
851 			} else if (LK_CAN_ADAPT(lk, flags) &&
852 			    (x & LK_SHARE) != 0 && LK_SHARERS(x) &&
853 			    spintries < alk_retries) {
854 				if ((x & LK_EXCLUSIVE_SPINNERS) == 0 &&
855 				    !atomic_cmpset_ptr(&lk->lk_lock, x,
856 				    x | LK_EXCLUSIVE_SPINNERS))
857 					continue;
858 				KTR_STATE1(KTR_SCHED, "thread",
859 				    sched_tdname(td), "spinning",
860 				    "lockname:\"%s\"", lk->lock_object.lo_name);
861 				if (flags & LK_INTERLOCK) {
862 					class->lc_unlock(ilk);
863 					flags &= ~LK_INTERLOCK;
864 				}
865 				GIANT_SAVE();
866 				spintries++;
867 				for (i = 0; i < alk_loops; i++) {
868 					if (LOCK_LOG_TEST(&lk->lock_object, 0))
869 						CTR4(KTR_LOCK,
870 				    "%s: shared spinning on %p with %u and %u",
871 						    __func__, lk, spintries, i);
872 					if ((lk->lk_lock &
873 					    LK_EXCLUSIVE_SPINNERS) == 0)
874 						break;
875 					cpu_spinwait();
876 				}
877 				KTR_STATE0(KTR_SCHED, "thread",
878 				    sched_tdname(td), "running");
879 				GIANT_RESTORE();
880 				if (i != alk_loops)
881 					continue;
882 			}
883 #endif
884 
885 			/*
886 			 * Acquire the sleepqueue chain lock because we
887 			 * probabilly will need to manipulate waiters flags.
888 			 */
889 			sleepq_lock(&lk->lock_object);
890 			x = lk->lk_lock;
891 
892 			/*
893 			 * if the lock has been released while we spun on
894 			 * the sleepqueue chain lock just try again.
895 			 */
896 			if (x == LK_UNLOCKED) {
897 				sleepq_release(&lk->lock_object);
898 				continue;
899 			}
900 
901 #ifdef ADAPTIVE_LOCKMGRS
902 			/*
903 			 * The current lock owner might have started executing
904 			 * on another CPU (or the lock could have changed
905 			 * owner) while we were waiting on the turnstile
906 			 * chain lock.  If so, drop the turnstile lock and try
907 			 * again.
908 			 */
909 			if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 &&
910 			    LK_HOLDER(x) != LK_KERNPROC) {
911 				owner = (struct thread *)LK_HOLDER(x);
912 				if (TD_IS_RUNNING(owner)) {
913 					sleepq_release(&lk->lock_object);
914 					continue;
915 				}
916 			}
917 #endif
918 
919 			/*
920 			 * The lock can be in the state where there is a
921 			 * pending queue of waiters, but still no owner.
922 			 * This happens when the lock is contested and an
923 			 * owner is going to claim the lock.
924 			 * If curthread is the one successfully acquiring it
925 			 * claim lock ownership and return, preserving waiters
926 			 * flags.
927 			 */
928 			v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
929 			if ((x & ~v) == LK_UNLOCKED) {
930 				v &= ~LK_EXCLUSIVE_SPINNERS;
931 				if (atomic_cmpset_acq_ptr(&lk->lk_lock, x,
932 				    tid | v)) {
933 					sleepq_release(&lk->lock_object);
934 					LOCK_LOG2(lk,
935 					    "%s: %p claimed by a new writer",
936 					    __func__, lk);
937 					break;
938 				}
939 				sleepq_release(&lk->lock_object);
940 				continue;
941 			}
942 
943 			/*
944 			 * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
945 			 * fail, loop back and retry.
946 			 */
947 			if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
948 				if (!atomic_cmpset_ptr(&lk->lk_lock, x,
949 				    x | LK_EXCLUSIVE_WAITERS)) {
950 					sleepq_release(&lk->lock_object);
951 					continue;
952 				}
953 				LOCK_LOG2(lk, "%s: %p set excl waiters flag",
954 				    __func__, lk);
955 			}
956 
957 			/*
958 			 * As far as we have been unable to acquire the
959 			 * exclusive lock and the exclusive waiters flag
960 			 * is set, we will sleep.
961 			 */
962 			error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
963 			    SQ_EXCLUSIVE_QUEUE);
964 			flags &= ~LK_INTERLOCK;
965 			if (error) {
966 				LOCK_LOG3(lk,
967 				    "%s: interrupted sleep for %p with %d",
968 				    __func__, lk, error);
969 				break;
970 			}
971 			LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
972 			    __func__, lk);
973 		}
974 		if (error == 0) {
975 			lock_profile_obtain_lock_success(&lk->lock_object,
976 			    contested, waittime, file, line);
977 			LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0,
978 			    lk->lk_recurse, file, line);
979 			WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
980 			    LK_TRYWIT(flags), file, line);
981 			TD_LOCKS_INC(curthread);
982 			STACK_SAVE(lk);
983 		}
984 		break;
985 	case LK_DOWNGRADE:
986 		_lockmgr_assert(lk, KA_XLOCKED, file, line);
987 		LOCK_LOG_LOCK("XDOWNGRADE", &lk->lock_object, 0, 0, file, line);
988 		WITNESS_DOWNGRADE(&lk->lock_object, 0, file, line);
989 
990 		/*
991 		 * Panic if the lock is recursed.
992 		 */
993 		if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) {
994 			if (flags & LK_INTERLOCK)
995 				class->lc_unlock(ilk);
996 			panic("%s: downgrade a recursed lockmgr %s @ %s:%d\n",
997 			    __func__, iwmesg, file, line);
998 		}
999 		TD_SLOCKS_INC(curthread);
1000 
1001 		/*
1002 		 * In order to preserve waiters flags, just spin.
1003 		 */
1004 		for (;;) {
1005 			x = lk->lk_lock;
1006 			MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1007 			x &= LK_ALL_WAITERS;
1008 			if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
1009 			    LK_SHARERS_LOCK(1) | x))
1010 				break;
1011 			cpu_spinwait();
1012 		}
1013 		break;
1014 	case LK_RELEASE:
1015 		_lockmgr_assert(lk, KA_LOCKED, file, line);
1016 		x = lk->lk_lock;
1017 
1018 		if ((x & LK_SHARE) == 0) {
1019 
1020 			/*
1021 			 * As first option, treact the lock as if it has not
1022 			 * any waiter.
1023 			 * Fix-up the tid var if the lock has been disowned.
1024 			 */
1025 			if (LK_HOLDER(x) == LK_KERNPROC)
1026 				tid = LK_KERNPROC;
1027 			else {
1028 				WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE,
1029 				    file, line);
1030 				TD_LOCKS_DEC(curthread);
1031 			}
1032 			LOCK_LOG_LOCK("XUNLOCK", &lk->lock_object, 0,
1033 			    lk->lk_recurse, file, line);
1034 
1035 			/*
1036 			 * The lock is held in exclusive mode.
1037 			 * If the lock is recursed also, then unrecurse it.
1038 			 */
1039 			if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) {
1040 				LOCK_LOG2(lk, "%s: %p unrecursing", __func__,
1041 				    lk);
1042 				lk->lk_recurse--;
1043 				break;
1044 			}
1045 			if (tid != LK_KERNPROC)
1046 				lock_profile_release_lock(&lk->lock_object);
1047 
1048 			if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid,
1049 			    LK_UNLOCKED))
1050 				break;
1051 
1052 			sleepq_lock(&lk->lock_object);
1053 			x = lk->lk_lock;
1054 			v = LK_UNLOCKED;
1055 
1056 			/*
1057 		 	 * If the lock has exclusive waiters, give them
1058 			 * preference in order to avoid deadlock with
1059 			 * shared runners up.
1060 			 * If interruptible sleeps left the exclusive queue
1061 			 * empty avoid a starvation for the threads sleeping
1062 			 * on the shared queue by giving them precedence
1063 			 * and cleaning up the exclusive waiters bit anyway.
1064 			 * Please note that lk_exslpfail count may be lying
1065 			 * about the real number of waiters with the
1066 			 * LK_SLEEPFAIL flag on because they may be used in
1067 			 * conjuction with interruptible sleeps so
1068 			 * lk_exslpfail might be considered an 'upper limit'
1069 			 * bound, including the edge cases.
1070 			 */
1071 			MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1072 			realexslp = sleepq_sleepcnt(&lk->lock_object,
1073 			    SQ_EXCLUSIVE_QUEUE);
1074 			if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
1075 				if (lk->lk_exslpfail < realexslp) {
1076 					lk->lk_exslpfail = 0;
1077 					queue = SQ_EXCLUSIVE_QUEUE;
1078 					v |= (x & LK_SHARED_WAITERS);
1079 				} else {
1080 					lk->lk_exslpfail = 0;
1081 					LOCK_LOG2(lk,
1082 					"%s: %p has only LK_SLEEPFAIL sleepers",
1083 					    __func__, lk);
1084 					LOCK_LOG2(lk,
1085 			"%s: %p waking up threads on the exclusive queue",
1086 					    __func__, lk);
1087 					wakeup_swapper =
1088 					    sleepq_broadcast(&lk->lock_object,
1089 					    SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
1090 					queue = SQ_SHARED_QUEUE;
1091 				}
1092 			} else {
1093 
1094 				/*
1095 				 * Exclusive waiters sleeping with LK_SLEEPFAIL
1096 				 * on and using interruptible sleeps/timeout
1097 				 * may have left spourious lk_exslpfail counts
1098 				 * on, so clean it up anyway.
1099 				 */
1100 				lk->lk_exslpfail = 0;
1101 				queue = SQ_SHARED_QUEUE;
1102 			}
1103 
1104 			LOCK_LOG3(lk,
1105 			    "%s: %p waking up threads on the %s queue",
1106 			    __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
1107 			    "exclusive");
1108 			atomic_store_rel_ptr(&lk->lk_lock, v);
1109 			wakeup_swapper |= sleepq_broadcast(&lk->lock_object,
1110 			    SLEEPQ_LK, 0, queue);
1111 			sleepq_release(&lk->lock_object);
1112 			break;
1113 		} else
1114 			wakeup_swapper = wakeupshlk(lk, file, line);
1115 		break;
1116 	case LK_DRAIN:
1117 		if (LK_CAN_WITNESS(flags))
1118 			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
1119 			    LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
1120 			    ilk : NULL);
1121 
1122 		/*
1123 		 * Trying to drain a lock we already own will result in a
1124 		 * deadlock.
1125 		 */
1126 		if (lockmgr_xlocked(lk)) {
1127 			if (flags & LK_INTERLOCK)
1128 				class->lc_unlock(ilk);
1129 			panic("%s: draining %s with the lock held @ %s:%d\n",
1130 			    __func__, iwmesg, file, line);
1131 		}
1132 
1133 		while (!atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) {
1134 #ifdef HWPMC_HOOKS
1135 			PMC_SOFT_CALL( , , lock, failed);
1136 #endif
1137 			lock_profile_obtain_lock_failed(&lk->lock_object,
1138 			    &contested, &waittime);
1139 
1140 			/*
1141 			 * If the lock is expected to not sleep just give up
1142 			 * and return.
1143 			 */
1144 			if (LK_TRYOP(flags)) {
1145 				LOCK_LOG2(lk, "%s: %p fails the try operation",
1146 				    __func__, lk);
1147 				error = EBUSY;
1148 				break;
1149 			}
1150 
1151 			/*
1152 			 * Acquire the sleepqueue chain lock because we
1153 			 * probabilly will need to manipulate waiters flags.
1154 			 */
1155 			sleepq_lock(&lk->lock_object);
1156 			x = lk->lk_lock;
1157 
1158 			/*
1159 			 * if the lock has been released while we spun on
1160 			 * the sleepqueue chain lock just try again.
1161 			 */
1162 			if (x == LK_UNLOCKED) {
1163 				sleepq_release(&lk->lock_object);
1164 				continue;
1165 			}
1166 
1167 			v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
1168 			if ((x & ~v) == LK_UNLOCKED) {
1169 				v = (x & ~LK_EXCLUSIVE_SPINNERS);
1170 
1171 				/*
1172 				 * If interruptible sleeps left the exclusive
1173 				 * queue empty avoid a starvation for the
1174 				 * threads sleeping on the shared queue by
1175 				 * giving them precedence and cleaning up the
1176 				 * exclusive waiters bit anyway.
1177 				 * Please note that lk_exslpfail count may be
1178 				 * lying about the real number of waiters with
1179 				 * the LK_SLEEPFAIL flag on because they may
1180 				 * be used in conjuction with interruptible
1181 				 * sleeps so lk_exslpfail might be considered
1182 				 * an 'upper limit' bound, including the edge
1183 				 * cases.
1184 				 */
1185 				if (v & LK_EXCLUSIVE_WAITERS) {
1186 					queue = SQ_EXCLUSIVE_QUEUE;
1187 					v &= ~LK_EXCLUSIVE_WAITERS;
1188 				} else {
1189 
1190 					/*
1191 					 * Exclusive waiters sleeping with
1192 					 * LK_SLEEPFAIL on and using
1193 					 * interruptible sleeps/timeout may
1194 					 * have left spourious lk_exslpfail
1195 					 * counts on, so clean it up anyway.
1196 					 */
1197 					MPASS(v & LK_SHARED_WAITERS);
1198 					lk->lk_exslpfail = 0;
1199 					queue = SQ_SHARED_QUEUE;
1200 					v &= ~LK_SHARED_WAITERS;
1201 				}
1202 				if (queue == SQ_EXCLUSIVE_QUEUE) {
1203 					realexslp =
1204 					    sleepq_sleepcnt(&lk->lock_object,
1205 					    SQ_EXCLUSIVE_QUEUE);
1206 					if (lk->lk_exslpfail >= realexslp) {
1207 						lk->lk_exslpfail = 0;
1208 						queue = SQ_SHARED_QUEUE;
1209 						v &= ~LK_SHARED_WAITERS;
1210 						if (realexslp != 0) {
1211 							LOCK_LOG2(lk,
1212 					"%s: %p has only LK_SLEEPFAIL sleepers",
1213 							    __func__, lk);
1214 							LOCK_LOG2(lk,
1215 			"%s: %p waking up threads on the exclusive queue",
1216 							    __func__, lk);
1217 							wakeup_swapper =
1218 							    sleepq_broadcast(
1219 							    &lk->lock_object,
1220 							    SLEEPQ_LK, 0,
1221 							    SQ_EXCLUSIVE_QUEUE);
1222 						}
1223 					} else
1224 						lk->lk_exslpfail = 0;
1225 				}
1226 				if (!atomic_cmpset_ptr(&lk->lk_lock, x, v)) {
1227 					sleepq_release(&lk->lock_object);
1228 					continue;
1229 				}
1230 				LOCK_LOG3(lk,
1231 				"%s: %p waking up all threads on the %s queue",
1232 				    __func__, lk, queue == SQ_SHARED_QUEUE ?
1233 				    "shared" : "exclusive");
1234 				wakeup_swapper |= sleepq_broadcast(
1235 				    &lk->lock_object, SLEEPQ_LK, 0, queue);
1236 
1237 				/*
1238 				 * If shared waiters have been woken up we need
1239 				 * to wait for one of them to acquire the lock
1240 				 * before to set the exclusive waiters in
1241 				 * order to avoid a deadlock.
1242 				 */
1243 				if (queue == SQ_SHARED_QUEUE) {
1244 					for (v = lk->lk_lock;
1245 					    (v & LK_SHARE) && !LK_SHARERS(v);
1246 					    v = lk->lk_lock)
1247 						cpu_spinwait();
1248 				}
1249 			}
1250 
1251 			/*
1252 			 * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
1253 			 * fail, loop back and retry.
1254 			 */
1255 			if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
1256 				if (!atomic_cmpset_ptr(&lk->lk_lock, x,
1257 				    x | LK_EXCLUSIVE_WAITERS)) {
1258 					sleepq_release(&lk->lock_object);
1259 					continue;
1260 				}
1261 				LOCK_LOG2(lk, "%s: %p set drain waiters flag",
1262 				    __func__, lk);
1263 			}
1264 
1265 			/*
1266 			 * As far as we have been unable to acquire the
1267 			 * exclusive lock and the exclusive waiters flag
1268 			 * is set, we will sleep.
1269 			 */
1270 			if (flags & LK_INTERLOCK) {
1271 				class->lc_unlock(ilk);
1272 				flags &= ~LK_INTERLOCK;
1273 			}
1274 			GIANT_SAVE();
1275 			sleepq_add(&lk->lock_object, NULL, iwmesg, SLEEPQ_LK,
1276 			    SQ_EXCLUSIVE_QUEUE);
1277 			sleepq_wait(&lk->lock_object, ipri & PRIMASK);
1278 			GIANT_RESTORE();
1279 			LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
1280 			    __func__, lk);
1281 		}
1282 
1283 		if (error == 0) {
1284 			lock_profile_obtain_lock_success(&lk->lock_object,
1285 			    contested, waittime, file, line);
1286 			LOCK_LOG_LOCK("DRAIN", &lk->lock_object, 0,
1287 			    lk->lk_recurse, file, line);
1288 			WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
1289 			    LK_TRYWIT(flags), file, line);
1290 			TD_LOCKS_INC(curthread);
1291 			STACK_SAVE(lk);
1292 		}
1293 		break;
1294 	default:
1295 		if (flags & LK_INTERLOCK)
1296 			class->lc_unlock(ilk);
1297 		panic("%s: unknown lockmgr request 0x%x\n", __func__, op);
1298 	}
1299 
1300 	if (flags & LK_INTERLOCK)
1301 		class->lc_unlock(ilk);
1302 	if (wakeup_swapper)
1303 		kick_proc0();
1304 
1305 	return (error);
1306 }
1307 
1308 void
1309 _lockmgr_disown(struct lock *lk, const char *file, int line)
1310 {
1311 	uintptr_t tid, x;
1312 
1313 	if (SCHEDULER_STOPPED())
1314 		return;
1315 
1316 	tid = (uintptr_t)curthread;
1317 	_lockmgr_assert(lk, KA_XLOCKED, file, line);
1318 
1319 	/*
1320 	 * Panic if the lock is recursed.
1321 	 */
1322 	if (lockmgr_xlocked(lk) && lockmgr_recursed(lk))
1323 		panic("%s: disown a recursed lockmgr @ %s:%d\n",
1324 		    __func__,  file, line);
1325 
1326 	/*
1327 	 * If the owner is already LK_KERNPROC just skip the whole operation.
1328 	 */
1329 	if (LK_HOLDER(lk->lk_lock) != tid)
1330 		return;
1331 	lock_profile_release_lock(&lk->lock_object);
1332 	LOCK_LOG_LOCK("XDISOWN", &lk->lock_object, 0, 0, file, line);
1333 	WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
1334 	TD_LOCKS_DEC(curthread);
1335 	STACK_SAVE(lk);
1336 
1337 	/*
1338 	 * In order to preserve waiters flags, just spin.
1339 	 */
1340 	for (;;) {
1341 		x = lk->lk_lock;
1342 		MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1343 		x &= LK_ALL_WAITERS;
1344 		if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
1345 		    LK_KERNPROC | x))
1346 			return;
1347 		cpu_spinwait();
1348 	}
1349 }
1350 
1351 void
1352 lockmgr_printinfo(const struct lock *lk)
1353 {
1354 	struct thread *td;
1355 	uintptr_t x;
1356 
1357 	if (lk->lk_lock == LK_UNLOCKED)
1358 		printf("lock type %s: UNLOCKED\n", lk->lock_object.lo_name);
1359 	else if (lk->lk_lock & LK_SHARE)
1360 		printf("lock type %s: SHARED (count %ju)\n",
1361 		    lk->lock_object.lo_name,
1362 		    (uintmax_t)LK_SHARERS(lk->lk_lock));
1363 	else {
1364 		td = lockmgr_xholder(lk);
1365 		if (td == (struct thread *)LK_KERNPROC)
1366 			printf("lock type %s: EXCL by KERNPROC\n",
1367 			    lk->lock_object.lo_name);
1368 		else
1369 			printf("lock type %s: EXCL by thread %p "
1370 			    "(pid %d, %s, tid %d)\n", lk->lock_object.lo_name,
1371 			    td, td->td_proc->p_pid, td->td_proc->p_comm,
1372 			    td->td_tid);
1373 	}
1374 
1375 	x = lk->lk_lock;
1376 	if (x & LK_EXCLUSIVE_WAITERS)
1377 		printf(" with exclusive waiters pending\n");
1378 	if (x & LK_SHARED_WAITERS)
1379 		printf(" with shared waiters pending\n");
1380 	if (x & LK_EXCLUSIVE_SPINNERS)
1381 		printf(" with exclusive spinners pending\n");
1382 
1383 	STACK_PRINT(lk);
1384 }
1385 
1386 int
1387 lockstatus(const struct lock *lk)
1388 {
1389 	uintptr_t v, x;
1390 	int ret;
1391 
1392 	ret = LK_SHARED;
1393 	x = lk->lk_lock;
1394 	v = LK_HOLDER(x);
1395 
1396 	if ((x & LK_SHARE) == 0) {
1397 		if (v == (uintptr_t)curthread || v == LK_KERNPROC)
1398 			ret = LK_EXCLUSIVE;
1399 		else
1400 			ret = LK_EXCLOTHER;
1401 	} else if (x == LK_UNLOCKED)
1402 		ret = 0;
1403 
1404 	return (ret);
1405 }
1406 
1407 #ifdef INVARIANT_SUPPORT
1408 
1409 FEATURE(invariant_support,
1410     "Support for modules compiled with INVARIANTS option");
1411 
1412 #ifndef INVARIANTS
1413 #undef	_lockmgr_assert
1414 #endif
1415 
1416 void
1417 _lockmgr_assert(const struct lock *lk, int what, const char *file, int line)
1418 {
1419 	int slocked = 0;
1420 
1421 	if (panicstr != NULL)
1422 		return;
1423 	switch (what) {
1424 	case KA_SLOCKED:
1425 	case KA_SLOCKED | KA_NOTRECURSED:
1426 	case KA_SLOCKED | KA_RECURSED:
1427 		slocked = 1;
1428 	case KA_LOCKED:
1429 	case KA_LOCKED | KA_NOTRECURSED:
1430 	case KA_LOCKED | KA_RECURSED:
1431 #ifdef WITNESS
1432 
1433 		/*
1434 		 * We cannot trust WITNESS if the lock is held in exclusive
1435 		 * mode and a call to lockmgr_disown() happened.
1436 		 * Workaround this skipping the check if the lock is held in
1437 		 * exclusive mode even for the KA_LOCKED case.
1438 		 */
1439 		if (slocked || (lk->lk_lock & LK_SHARE)) {
1440 			witness_assert(&lk->lock_object, what, file, line);
1441 			break;
1442 		}
1443 #endif
1444 		if (lk->lk_lock == LK_UNLOCKED ||
1445 		    ((lk->lk_lock & LK_SHARE) == 0 && (slocked ||
1446 		    (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk)))))
1447 			panic("Lock %s not %slocked @ %s:%d\n",
1448 			    lk->lock_object.lo_name, slocked ? "share" : "",
1449 			    file, line);
1450 
1451 		if ((lk->lk_lock & LK_SHARE) == 0) {
1452 			if (lockmgr_recursed(lk)) {
1453 				if (what & KA_NOTRECURSED)
1454 					panic("Lock %s recursed @ %s:%d\n",
1455 					    lk->lock_object.lo_name, file,
1456 					    line);
1457 			} else if (what & KA_RECURSED)
1458 				panic("Lock %s not recursed @ %s:%d\n",
1459 				    lk->lock_object.lo_name, file, line);
1460 		}
1461 		break;
1462 	case KA_XLOCKED:
1463 	case KA_XLOCKED | KA_NOTRECURSED:
1464 	case KA_XLOCKED | KA_RECURSED:
1465 		if (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk))
1466 			panic("Lock %s not exclusively locked @ %s:%d\n",
1467 			    lk->lock_object.lo_name, file, line);
1468 		if (lockmgr_recursed(lk)) {
1469 			if (what & KA_NOTRECURSED)
1470 				panic("Lock %s recursed @ %s:%d\n",
1471 				    lk->lock_object.lo_name, file, line);
1472 		} else if (what & KA_RECURSED)
1473 			panic("Lock %s not recursed @ %s:%d\n",
1474 			    lk->lock_object.lo_name, file, line);
1475 		break;
1476 	case KA_UNLOCKED:
1477 		if (lockmgr_xlocked(lk) || lockmgr_disowned(lk))
1478 			panic("Lock %s exclusively locked @ %s:%d\n",
1479 			    lk->lock_object.lo_name, file, line);
1480 		break;
1481 	default:
1482 		panic("Unknown lockmgr assertion: %d @ %s:%d\n", what, file,
1483 		    line);
1484 	}
1485 }
1486 #endif
1487 
1488 #ifdef DDB
1489 int
1490 lockmgr_chain(struct thread *td, struct thread **ownerp)
1491 {
1492 	struct lock *lk;
1493 
1494 	lk = td->td_wchan;
1495 
1496 	if (LOCK_CLASS(&lk->lock_object) != &lock_class_lockmgr)
1497 		return (0);
1498 	db_printf("blocked on lockmgr %s", lk->lock_object.lo_name);
1499 	if (lk->lk_lock & LK_SHARE)
1500 		db_printf("SHARED (count %ju)\n",
1501 		    (uintmax_t)LK_SHARERS(lk->lk_lock));
1502 	else
1503 		db_printf("EXCL\n");
1504 	*ownerp = lockmgr_xholder(lk);
1505 
1506 	return (1);
1507 }
1508 
1509 static void
1510 db_show_lockmgr(const struct lock_object *lock)
1511 {
1512 	struct thread *td;
1513 	const struct lock *lk;
1514 
1515 	lk = (const struct lock *)lock;
1516 
1517 	db_printf(" state: ");
1518 	if (lk->lk_lock == LK_UNLOCKED)
1519 		db_printf("UNLOCKED\n");
1520 	else if (lk->lk_lock & LK_SHARE)
1521 		db_printf("SLOCK: %ju\n", (uintmax_t)LK_SHARERS(lk->lk_lock));
1522 	else {
1523 		td = lockmgr_xholder(lk);
1524 		if (td == (struct thread *)LK_KERNPROC)
1525 			db_printf("XLOCK: LK_KERNPROC\n");
1526 		else
1527 			db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1528 			    td->td_tid, td->td_proc->p_pid,
1529 			    td->td_proc->p_comm);
1530 		if (lockmgr_recursed(lk))
1531 			db_printf(" recursed: %d\n", lk->lk_recurse);
1532 	}
1533 	db_printf(" waiters: ");
1534 	switch (lk->lk_lock & LK_ALL_WAITERS) {
1535 	case LK_SHARED_WAITERS:
1536 		db_printf("shared\n");
1537 		break;
1538 	case LK_EXCLUSIVE_WAITERS:
1539 		db_printf("exclusive\n");
1540 		break;
1541 	case LK_ALL_WAITERS:
1542 		db_printf("shared and exclusive\n");
1543 		break;
1544 	default:
1545 		db_printf("none\n");
1546 	}
1547 	db_printf(" spinners: ");
1548 	if (lk->lk_lock & LK_EXCLUSIVE_SPINNERS)
1549 		db_printf("exclusive\n");
1550 	else
1551 		db_printf("none\n");
1552 }
1553 #endif
1554