xref: /freebsd/sys/kern/kern_lock.c (revision 5944f899a2519c6321bac3c17cc076418643a088)
1 /*-
2  * Copyright (c) 2008 Attilio Rao <attilio@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice(s), this list of conditions and the following disclaimer as
10  *    the first lines of this file unmodified other than the possible
11  *    addition of one or more copyright notices.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice(s), this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
26  * DAMAGE.
27  */
28 
29 #include "opt_adaptive_lockmgrs.h"
30 #include "opt_ddb.h"
31 #include "opt_hwpmc_hooks.h"
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 
36 #include <sys/param.h>
37 #include <sys/kdb.h>
38 #include <sys/ktr.h>
39 #include <sys/lock.h>
40 #include <sys/lock_profile.h>
41 #include <sys/lockmgr.h>
42 #include <sys/mutex.h>
43 #include <sys/proc.h>
44 #include <sys/sleepqueue.h>
45 #ifdef DEBUG_LOCKS
46 #include <sys/stack.h>
47 #endif
48 #include <sys/sysctl.h>
49 #include <sys/systm.h>
50 
51 #include <machine/cpu.h>
52 
53 #ifdef DDB
54 #include <ddb/ddb.h>
55 #endif
56 
57 #ifdef HWPMC_HOOKS
58 #include <sys/pmckern.h>
59 PMC_SOFT_DECLARE( , , lock, failed);
60 #endif
61 
62 CTASSERT(((LK_ADAPTIVE | LK_NOSHARE) & LO_CLASSFLAGS) ==
63     (LK_ADAPTIVE | LK_NOSHARE));
64 CTASSERT(LK_UNLOCKED == (LK_UNLOCKED &
65     ~(LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS)));
66 
67 #define	SQ_EXCLUSIVE_QUEUE	0
68 #define	SQ_SHARED_QUEUE		1
69 
70 #ifndef INVARIANTS
71 #define	_lockmgr_assert(lk, what, file, line)
72 #endif
73 
74 #define	TD_SLOCKS_INC(td)	((td)->td_lk_slocks++)
75 #define	TD_SLOCKS_DEC(td)	((td)->td_lk_slocks--)
76 
77 #ifndef DEBUG_LOCKS
78 #define	STACK_PRINT(lk)
79 #define	STACK_SAVE(lk)
80 #define	STACK_ZERO(lk)
81 #else
82 #define	STACK_PRINT(lk)	stack_print_ddb(&(lk)->lk_stack)
83 #define	STACK_SAVE(lk)	stack_save(&(lk)->lk_stack)
84 #define	STACK_ZERO(lk)	stack_zero(&(lk)->lk_stack)
85 #endif
86 
87 #define	LOCK_LOG2(lk, string, arg1, arg2)				\
88 	if (LOCK_LOG_TEST(&(lk)->lock_object, 0))			\
89 		CTR2(KTR_LOCK, (string), (arg1), (arg2))
90 #define	LOCK_LOG3(lk, string, arg1, arg2, arg3)				\
91 	if (LOCK_LOG_TEST(&(lk)->lock_object, 0))			\
92 		CTR3(KTR_LOCK, (string), (arg1), (arg2), (arg3))
93 
94 #define	GIANT_DECLARE							\
95 	int _i = 0;							\
96 	WITNESS_SAVE_DECL(Giant)
97 #define	GIANT_RESTORE() do {						\
98 	if (_i > 0) {							\
99 		while (_i--)						\
100 			mtx_lock(&Giant);				\
101 		WITNESS_RESTORE(&Giant.lock_object, Giant);		\
102 	}								\
103 } while (0)
104 #define	GIANT_SAVE() do {						\
105 	if (mtx_owned(&Giant)) {					\
106 		WITNESS_SAVE(&Giant.lock_object, Giant);		\
107 		while (mtx_owned(&Giant)) {				\
108 			_i++;						\
109 			mtx_unlock(&Giant);				\
110 		}							\
111 	}								\
112 } while (0)
113 
114 #define	LK_CAN_SHARE(x, flags)						\
115 	(((x) & LK_SHARE) &&						\
116 	(((x) & (LK_EXCLUSIVE_WAITERS | LK_EXCLUSIVE_SPINNERS)) == 0 ||	\
117 	(curthread->td_lk_slocks != 0 && !(flags & LK_NODDLKTREAT)) ||	\
118 	(curthread->td_pflags & TDP_DEADLKTREAT)))
119 #define	LK_TRYOP(x)							\
120 	((x) & LK_NOWAIT)
121 
122 #define	LK_CAN_WITNESS(x)						\
123 	(((x) & LK_NOWITNESS) == 0 && !LK_TRYOP(x))
124 #define	LK_TRYWIT(x)							\
125 	(LK_TRYOP(x) ? LOP_TRYLOCK : 0)
126 
127 #define	LK_CAN_ADAPT(lk, f)						\
128 	(((lk)->lock_object.lo_flags & LK_ADAPTIVE) != 0 &&		\
129 	((f) & LK_SLEEPFAIL) == 0)
130 
131 #define	lockmgr_disowned(lk)						\
132 	(((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == LK_KERNPROC)
133 
134 #define	lockmgr_xlocked(lk)						\
135 	(((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == (uintptr_t)curthread)
136 
137 static void	assert_lockmgr(const struct lock_object *lock, int how);
138 #ifdef DDB
139 static void	db_show_lockmgr(const struct lock_object *lock);
140 #endif
141 static void	lock_lockmgr(struct lock_object *lock, uintptr_t how);
142 #ifdef KDTRACE_HOOKS
143 static int	owner_lockmgr(const struct lock_object *lock,
144 		    struct thread **owner);
145 #endif
146 static uintptr_t unlock_lockmgr(struct lock_object *lock);
147 
148 struct lock_class lock_class_lockmgr = {
149 	.lc_name = "lockmgr",
150 	.lc_flags = LC_RECURSABLE | LC_SLEEPABLE | LC_SLEEPLOCK | LC_UPGRADABLE,
151 	.lc_assert = assert_lockmgr,
152 #ifdef DDB
153 	.lc_ddb_show = db_show_lockmgr,
154 #endif
155 	.lc_lock = lock_lockmgr,
156 	.lc_unlock = unlock_lockmgr,
157 #ifdef KDTRACE_HOOKS
158 	.lc_owner = owner_lockmgr,
159 #endif
160 };
161 
162 #ifdef ADAPTIVE_LOCKMGRS
163 static u_int alk_retries = 10;
164 static u_int alk_loops = 10000;
165 static SYSCTL_NODE(_debug, OID_AUTO, lockmgr, CTLFLAG_RD, NULL,
166     "lockmgr debugging");
167 SYSCTL_UINT(_debug_lockmgr, OID_AUTO, retries, CTLFLAG_RW, &alk_retries, 0, "");
168 SYSCTL_UINT(_debug_lockmgr, OID_AUTO, loops, CTLFLAG_RW, &alk_loops, 0, "");
169 #endif
170 
171 static bool __always_inline lockmgr_slock_try(struct lock *lk, uintptr_t *xp,
172     int flags);
173 static bool __always_inline lockmgr_sunlock_try(struct lock *lk, uintptr_t x);
174 
175 static void
176 lockmgr_note_shared_acquire(struct lock *lk, int contested,
177     uint64_t waittime, const char *file, int line, int flags)
178 {
179 
180 	lock_profile_obtain_lock_success(&lk->lock_object, contested, waittime,
181 	    file, line);
182 	LOCK_LOG_LOCK("SLOCK", &lk->lock_object, 0, 0, file, line);
183 	WITNESS_LOCK(&lk->lock_object, LK_TRYWIT(flags), file, line);
184 	TD_LOCKS_INC(curthread);
185 	TD_SLOCKS_INC(curthread);
186 	STACK_SAVE(lk);
187 }
188 
189 static void
190 lockmgr_note_shared_release(struct lock *lk, const char *file, int line)
191 {
192 
193 	lock_profile_release_lock(&lk->lock_object);
194 	WITNESS_UNLOCK(&lk->lock_object, 0, file, line);
195 	LOCK_LOG_LOCK("SUNLOCK", &lk->lock_object, 0, 0, file, line);
196 	TD_LOCKS_DEC(curthread);
197 	TD_SLOCKS_DEC(curthread);
198 }
199 
200 static void
201 lockmgr_note_exclusive_acquire(struct lock *lk, int contested,
202     uint64_t waittime, const char *file, int line, int flags)
203 {
204 
205 	lock_profile_obtain_lock_success(&lk->lock_object, contested, waittime,
206 	    file, line);
207 	LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0, lk->lk_recurse, file, line);
208 	WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE | LK_TRYWIT(flags), file,
209 	    line);
210 	TD_LOCKS_INC(curthread);
211 	STACK_SAVE(lk);
212 }
213 
214 static void
215 lockmgr_note_exclusive_release(struct lock *lk, const char *file, int line)
216 {
217 
218 	lock_profile_release_lock(&lk->lock_object);
219 	LOCK_LOG_LOCK("XUNLOCK", &lk->lock_object, 0, lk->lk_recurse, file,
220 	    line);
221 	WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
222 	TD_LOCKS_DEC(curthread);
223 }
224 
225 static void
226 lockmgr_note_exclusive_upgrade(struct lock *lk, const char *file, int line,
227     int flags)
228 {
229 
230 	LOCK_LOG_LOCK("XUPGRADE", &lk->lock_object, 0, 0, file,
231 	    line);
232 	WITNESS_UPGRADE(&lk->lock_object, LOP_EXCLUSIVE |
233 	    LK_TRYWIT(flags), file, line);
234 	TD_SLOCKS_DEC(curthread);
235 }
236 
237 static __inline struct thread *
238 lockmgr_xholder(const struct lock *lk)
239 {
240 	uintptr_t x;
241 
242 	x = lk->lk_lock;
243 	return ((x & LK_SHARE) ? NULL : (struct thread *)LK_HOLDER(x));
244 }
245 
246 /*
247  * It assumes sleepq_lock held and returns with this one unheld.
248  * It also assumes the generic interlock is sane and previously checked.
249  * If LK_INTERLOCK is specified the interlock is not reacquired after the
250  * sleep.
251  */
252 static __inline int
253 sleeplk(struct lock *lk, u_int flags, struct lock_object *ilk,
254     const char *wmesg, int pri, int timo, int queue)
255 {
256 	GIANT_DECLARE;
257 	struct lock_class *class;
258 	int catch, error;
259 
260 	class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
261 	catch = pri & PCATCH;
262 	pri &= PRIMASK;
263 	error = 0;
264 
265 	LOCK_LOG3(lk, "%s: %p blocking on the %s sleepqueue", __func__, lk,
266 	    (queue == SQ_EXCLUSIVE_QUEUE) ? "exclusive" : "shared");
267 
268 	if (flags & LK_INTERLOCK)
269 		class->lc_unlock(ilk);
270 	if (queue == SQ_EXCLUSIVE_QUEUE && (flags & LK_SLEEPFAIL) != 0)
271 		lk->lk_exslpfail++;
272 	GIANT_SAVE();
273 	sleepq_add(&lk->lock_object, NULL, wmesg, SLEEPQ_LK | (catch ?
274 	    SLEEPQ_INTERRUPTIBLE : 0), queue);
275 	if ((flags & LK_TIMELOCK) && timo)
276 		sleepq_set_timeout(&lk->lock_object, timo);
277 
278 	/*
279 	 * Decisional switch for real sleeping.
280 	 */
281 	if ((flags & LK_TIMELOCK) && timo && catch)
282 		error = sleepq_timedwait_sig(&lk->lock_object, pri);
283 	else if ((flags & LK_TIMELOCK) && timo)
284 		error = sleepq_timedwait(&lk->lock_object, pri);
285 	else if (catch)
286 		error = sleepq_wait_sig(&lk->lock_object, pri);
287 	else
288 		sleepq_wait(&lk->lock_object, pri);
289 	GIANT_RESTORE();
290 	if ((flags & LK_SLEEPFAIL) && error == 0)
291 		error = ENOLCK;
292 
293 	return (error);
294 }
295 
296 static __inline int
297 wakeupshlk(struct lock *lk, const char *file, int line)
298 {
299 	uintptr_t v, x;
300 	u_int realexslp;
301 	int queue, wakeup_swapper;
302 
303 	wakeup_swapper = 0;
304 	for (;;) {
305 		x = lk->lk_lock;
306 		if (lockmgr_sunlock_try(lk, x))
307 			break;
308 
309 		/*
310 		 * We should have a sharer with waiters, so enter the hard
311 		 * path in order to handle wakeups correctly.
312 		 */
313 		sleepq_lock(&lk->lock_object);
314 		x = lk->lk_lock & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
315 		v = LK_UNLOCKED;
316 
317 		/*
318 		 * If the lock has exclusive waiters, give them preference in
319 		 * order to avoid deadlock with shared runners up.
320 		 * If interruptible sleeps left the exclusive queue empty
321 		 * avoid a starvation for the threads sleeping on the shared
322 		 * queue by giving them precedence and cleaning up the
323 		 * exclusive waiters bit anyway.
324 		 * Please note that lk_exslpfail count may be lying about
325 		 * the real number of waiters with the LK_SLEEPFAIL flag on
326 		 * because they may be used in conjunction with interruptible
327 		 * sleeps so lk_exslpfail might be considered an 'upper limit'
328 		 * bound, including the edge cases.
329 		 */
330 		realexslp = sleepq_sleepcnt(&lk->lock_object,
331 		    SQ_EXCLUSIVE_QUEUE);
332 		if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
333 			if (lk->lk_exslpfail < realexslp) {
334 				lk->lk_exslpfail = 0;
335 				queue = SQ_EXCLUSIVE_QUEUE;
336 				v |= (x & LK_SHARED_WAITERS);
337 			} else {
338 				lk->lk_exslpfail = 0;
339 				LOCK_LOG2(lk,
340 				    "%s: %p has only LK_SLEEPFAIL sleepers",
341 				    __func__, lk);
342 				LOCK_LOG2(lk,
343 			    "%s: %p waking up threads on the exclusive queue",
344 				    __func__, lk);
345 				wakeup_swapper =
346 				    sleepq_broadcast(&lk->lock_object,
347 				    SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
348 				queue = SQ_SHARED_QUEUE;
349 			}
350 
351 		} else {
352 
353 			/*
354 			 * Exclusive waiters sleeping with LK_SLEEPFAIL on
355 			 * and using interruptible sleeps/timeout may have
356 			 * left spourious lk_exslpfail counts on, so clean
357 			 * it up anyway.
358 			 */
359 			lk->lk_exslpfail = 0;
360 			queue = SQ_SHARED_QUEUE;
361 		}
362 
363 		if (!atomic_cmpset_rel_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x,
364 		    v)) {
365 			sleepq_release(&lk->lock_object);
366 			continue;
367 		}
368 		LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
369 		    __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
370 		    "exclusive");
371 		wakeup_swapper |= sleepq_broadcast(&lk->lock_object, SLEEPQ_LK,
372 		    0, queue);
373 		sleepq_release(&lk->lock_object);
374 		break;
375 	}
376 
377 	lockmgr_note_shared_release(lk, file, line);
378 	return (wakeup_swapper);
379 }
380 
381 static void
382 assert_lockmgr(const struct lock_object *lock, int what)
383 {
384 
385 	panic("lockmgr locks do not support assertions");
386 }
387 
388 static void
389 lock_lockmgr(struct lock_object *lock, uintptr_t how)
390 {
391 
392 	panic("lockmgr locks do not support sleep interlocking");
393 }
394 
395 static uintptr_t
396 unlock_lockmgr(struct lock_object *lock)
397 {
398 
399 	panic("lockmgr locks do not support sleep interlocking");
400 }
401 
402 #ifdef KDTRACE_HOOKS
403 static int
404 owner_lockmgr(const struct lock_object *lock, struct thread **owner)
405 {
406 
407 	panic("lockmgr locks do not support owner inquiring");
408 }
409 #endif
410 
411 void
412 lockinit(struct lock *lk, int pri, const char *wmesg, int timo, int flags)
413 {
414 	int iflags;
415 
416 	MPASS((flags & ~LK_INIT_MASK) == 0);
417 	ASSERT_ATOMIC_LOAD_PTR(lk->lk_lock,
418             ("%s: lockmgr not aligned for %s: %p", __func__, wmesg,
419             &lk->lk_lock));
420 
421 	iflags = LO_SLEEPABLE | LO_UPGRADABLE;
422 	if (flags & LK_CANRECURSE)
423 		iflags |= LO_RECURSABLE;
424 	if ((flags & LK_NODUP) == 0)
425 		iflags |= LO_DUPOK;
426 	if (flags & LK_NOPROFILE)
427 		iflags |= LO_NOPROFILE;
428 	if ((flags & LK_NOWITNESS) == 0)
429 		iflags |= LO_WITNESS;
430 	if (flags & LK_QUIET)
431 		iflags |= LO_QUIET;
432 	if (flags & LK_IS_VNODE)
433 		iflags |= LO_IS_VNODE;
434 	iflags |= flags & (LK_ADAPTIVE | LK_NOSHARE);
435 
436 	lock_init(&lk->lock_object, &lock_class_lockmgr, wmesg, NULL, iflags);
437 	lk->lk_lock = LK_UNLOCKED;
438 	lk->lk_recurse = 0;
439 	lk->lk_exslpfail = 0;
440 	lk->lk_timo = timo;
441 	lk->lk_pri = pri;
442 	STACK_ZERO(lk);
443 }
444 
445 /*
446  * XXX: Gross hacks to manipulate external lock flags after
447  * initialization.  Used for certain vnode and buf locks.
448  */
449 void
450 lockallowshare(struct lock *lk)
451 {
452 
453 	lockmgr_assert(lk, KA_XLOCKED);
454 	lk->lock_object.lo_flags &= ~LK_NOSHARE;
455 }
456 
457 void
458 lockdisableshare(struct lock *lk)
459 {
460 
461 	lockmgr_assert(lk, KA_XLOCKED);
462 	lk->lock_object.lo_flags |= LK_NOSHARE;
463 }
464 
465 void
466 lockallowrecurse(struct lock *lk)
467 {
468 
469 	lockmgr_assert(lk, KA_XLOCKED);
470 	lk->lock_object.lo_flags |= LO_RECURSABLE;
471 }
472 
473 void
474 lockdisablerecurse(struct lock *lk)
475 {
476 
477 	lockmgr_assert(lk, KA_XLOCKED);
478 	lk->lock_object.lo_flags &= ~LO_RECURSABLE;
479 }
480 
481 void
482 lockdestroy(struct lock *lk)
483 {
484 
485 	KASSERT(lk->lk_lock == LK_UNLOCKED, ("lockmgr still held"));
486 	KASSERT(lk->lk_recurse == 0, ("lockmgr still recursed"));
487 	KASSERT(lk->lk_exslpfail == 0, ("lockmgr still exclusive waiters"));
488 	lock_destroy(&lk->lock_object);
489 }
490 
491 static bool __always_inline
492 lockmgr_slock_try(struct lock *lk, uintptr_t *xp, int flags)
493 {
494 
495 	/*
496 	 * If no other thread has an exclusive lock, or
497 	 * no exclusive waiter is present, bump the count of
498 	 * sharers.  Since we have to preserve the state of
499 	 * waiters, if we fail to acquire the shared lock
500 	 * loop back and retry.
501 	 */
502 	*xp = lk->lk_lock;
503 	while (LK_CAN_SHARE(*xp, flags)) {
504 		if (atomic_fcmpset_acq_ptr(&lk->lk_lock, xp,
505 		    *xp + LK_ONE_SHARER)) {
506 			return (true);
507 		}
508 	}
509 	return (false);
510 }
511 
512 static bool __always_inline
513 lockmgr_sunlock_try(struct lock *lk, uintptr_t x)
514 {
515 
516 	for (;;) {
517 		/*
518 		 * If there is more than one shared lock held, just drop one
519 		 * and return.
520 		 */
521 		if (LK_SHARERS(x) > 1) {
522 			if (atomic_fcmpset_rel_ptr(&lk->lk_lock, &x,
523 			    x - LK_ONE_SHARER))
524 				return (true);
525 			continue;
526 		}
527 
528 		/*
529 		 * If there are not waiters on the exclusive queue, drop the
530 		 * lock quickly.
531 		 */
532 		if ((x & LK_ALL_WAITERS) == 0) {
533 			MPASS((x & ~LK_EXCLUSIVE_SPINNERS) ==
534 			    LK_SHARERS_LOCK(1));
535 			if (atomic_fcmpset_rel_ptr(&lk->lk_lock, &x,
536 			    LK_UNLOCKED))
537 				return (true);
538 			continue;
539 		}
540 		break;
541 	}
542 	return (false);
543 }
544 
545 int
546 lockmgr_lock_fast_path(struct lock *lk, u_int flags, struct lock_object *ilk,
547     const char *file, int line)
548 {
549 	struct lock_class *class;
550 	uintptr_t x, v, tid;
551 	u_int op;
552 	bool locked;
553 
554 	op = flags & LK_TYPE_MASK;
555 	locked = false;
556 	switch (op) {
557 	case LK_SHARED:
558 		if (LK_CAN_WITNESS(flags))
559 			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
560 			    file, line, flags & LK_INTERLOCK ? ilk : NULL);
561 		if (__predict_false(lk->lock_object.lo_flags & LK_NOSHARE))
562 			break;
563 		if (lockmgr_slock_try(lk, &x, flags)) {
564 			lockmgr_note_shared_acquire(lk, 0, 0,
565 			    file, line, flags);
566 			locked = true;
567 		}
568 		break;
569 	case LK_EXCLUSIVE:
570 		if (LK_CAN_WITNESS(flags))
571 			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
572 			    LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
573 			    ilk : NULL);
574 		tid = (uintptr_t)curthread;
575 		if (lk->lk_lock == LK_UNLOCKED &&
576 		    atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) {
577 			lockmgr_note_exclusive_acquire(lk, 0, 0, file, line,
578 			    flags);
579 			locked = true;
580 		}
581 		break;
582 	case LK_UPGRADE:
583 	case LK_TRYUPGRADE:
584 		_lockmgr_assert(lk, KA_SLOCKED, file, line);
585 		tid = (uintptr_t)curthread;
586 		v = lk->lk_lock;
587 		x = v & LK_ALL_WAITERS;
588 		v &= LK_EXCLUSIVE_SPINNERS;
589 		if (atomic_cmpset_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x | v,
590 		    tid | x)) {
591 			lockmgr_note_exclusive_upgrade(lk, file, line, flags);
592 			locked = true;
593 		}
594 		break;
595 	default:
596 		break;
597 	}
598 	if (__predict_true(locked)) {
599 		if (__predict_false(flags & LK_INTERLOCK)) {
600 			class = LOCK_CLASS(ilk);
601 			class->lc_unlock(ilk);
602 		}
603 		return (0);
604 	} else {
605 		return (__lockmgr_args(lk, flags, ilk, LK_WMESG_DEFAULT,
606 		    LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, file, line));
607 	}
608 }
609 
610 int
611 lockmgr_unlock_fast_path(struct lock *lk, u_int flags, struct lock_object *ilk)
612 {
613 	struct lock_class *class;
614 	uintptr_t x, tid;
615 	bool unlocked;
616 	const char *file;
617 	int line;
618 
619 	file = __FILE__;
620 	line = __LINE__;
621 
622 	_lockmgr_assert(lk, KA_LOCKED, file, line);
623 	unlocked = false;
624 	x = lk->lk_lock;
625 	if (__predict_true(x & LK_SHARE) != 0) {
626 		if (lockmgr_sunlock_try(lk, x)) {
627 			lockmgr_note_shared_release(lk, file, line);
628 			unlocked = true;
629 		}
630 	} else {
631 		tid = (uintptr_t)curthread;
632 		if (!lockmgr_recursed(lk) &&
633 		    atomic_cmpset_rel_ptr(&lk->lk_lock, tid, LK_UNLOCKED)) {
634 			lockmgr_note_exclusive_release(lk, file, line);
635 			unlocked = true;
636 		}
637 	}
638 	if (__predict_true(unlocked)) {
639 		if (__predict_false(flags & LK_INTERLOCK)) {
640 			class = LOCK_CLASS(ilk);
641 			class->lc_unlock(ilk);
642 		}
643 		return (0);
644 	} else {
645 		return (__lockmgr_args(lk, flags | LK_RELEASE, ilk, LK_WMESG_DEFAULT,
646 		    LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, LOCK_FILE, LOCK_LINE));
647 	}
648 }
649 
650 int
651 __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk,
652     const char *wmesg, int pri, int timo, const char *file, int line)
653 {
654 	GIANT_DECLARE;
655 	struct lock_class *class;
656 	const char *iwmesg;
657 	uintptr_t tid, v, x;
658 	u_int op, realexslp;
659 	int error, ipri, itimo, queue, wakeup_swapper;
660 #ifdef LOCK_PROFILING
661 	uint64_t waittime = 0;
662 	int contested = 0;
663 #endif
664 #ifdef ADAPTIVE_LOCKMGRS
665 	volatile struct thread *owner;
666 	u_int i, spintries = 0;
667 #endif
668 
669 	error = 0;
670 	tid = (uintptr_t)curthread;
671 	op = (flags & LK_TYPE_MASK);
672 	iwmesg = (wmesg == LK_WMESG_DEFAULT) ? lk->lock_object.lo_name : wmesg;
673 	ipri = (pri == LK_PRIO_DEFAULT) ? lk->lk_pri : pri;
674 	itimo = (timo == LK_TIMO_DEFAULT) ? lk->lk_timo : timo;
675 
676 	MPASS((flags & ~LK_TOTAL_MASK) == 0);
677 	KASSERT((op & (op - 1)) == 0,
678 	    ("%s: Invalid requested operation @ %s:%d", __func__, file, line));
679 	KASSERT((flags & (LK_NOWAIT | LK_SLEEPFAIL)) == 0 ||
680 	    (op != LK_DOWNGRADE && op != LK_RELEASE),
681 	    ("%s: Invalid flags in regard of the operation desired @ %s:%d",
682 	    __func__, file, line));
683 	KASSERT((flags & LK_INTERLOCK) == 0 || ilk != NULL,
684 	    ("%s: LK_INTERLOCK passed without valid interlock @ %s:%d",
685 	    __func__, file, line));
686 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
687 	    ("%s: idle thread %p on lockmgr %s @ %s:%d", __func__, curthread,
688 	    lk->lock_object.lo_name, file, line));
689 
690 	class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
691 	if (panicstr != NULL) {
692 		if (flags & LK_INTERLOCK)
693 			class->lc_unlock(ilk);
694 		return (0);
695 	}
696 
697 	if (lk->lock_object.lo_flags & LK_NOSHARE) {
698 		switch (op) {
699 		case LK_SHARED:
700 			op = LK_EXCLUSIVE;
701 			break;
702 		case LK_UPGRADE:
703 		case LK_TRYUPGRADE:
704 		case LK_DOWNGRADE:
705 			_lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED,
706 			    file, line);
707 			if (flags & LK_INTERLOCK)
708 				class->lc_unlock(ilk);
709 			return (0);
710 		}
711 	}
712 
713 	wakeup_swapper = 0;
714 	switch (op) {
715 	case LK_SHARED:
716 		if (LK_CAN_WITNESS(flags))
717 			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
718 			    file, line, flags & LK_INTERLOCK ? ilk : NULL);
719 		for (;;) {
720 			if (lockmgr_slock_try(lk, &x, flags))
721 				break;
722 #ifdef HWPMC_HOOKS
723 			PMC_SOFT_CALL( , , lock, failed);
724 #endif
725 			lock_profile_obtain_lock_failed(&lk->lock_object,
726 			    &contested, &waittime);
727 
728 			/*
729 			 * If the lock is already held by curthread in
730 			 * exclusive way avoid a deadlock.
731 			 */
732 			if (LK_HOLDER(x) == tid) {
733 				LOCK_LOG2(lk,
734 				    "%s: %p already held in exclusive mode",
735 				    __func__, lk);
736 				error = EDEADLK;
737 				break;
738 			}
739 
740 			/*
741 			 * If the lock is expected to not sleep just give up
742 			 * and return.
743 			 */
744 			if (LK_TRYOP(flags)) {
745 				LOCK_LOG2(lk, "%s: %p fails the try operation",
746 				    __func__, lk);
747 				error = EBUSY;
748 				break;
749 			}
750 
751 #ifdef ADAPTIVE_LOCKMGRS
752 			/*
753 			 * If the owner is running on another CPU, spin until
754 			 * the owner stops running or the state of the lock
755 			 * changes.  We need a double-state handle here
756 			 * because for a failed acquisition the lock can be
757 			 * either held in exclusive mode or shared mode
758 			 * (for the writer starvation avoidance technique).
759 			 */
760 			if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 &&
761 			    LK_HOLDER(x) != LK_KERNPROC) {
762 				owner = (struct thread *)LK_HOLDER(x);
763 				if (LOCK_LOG_TEST(&lk->lock_object, 0))
764 					CTR3(KTR_LOCK,
765 					    "%s: spinning on %p held by %p",
766 					    __func__, lk, owner);
767 				KTR_STATE1(KTR_SCHED, "thread",
768 				    sched_tdname(td), "spinning",
769 				    "lockname:\"%s\"", lk->lock_object.lo_name);
770 
771 				/*
772 				 * If we are holding also an interlock drop it
773 				 * in order to avoid a deadlock if the lockmgr
774 				 * owner is adaptively spinning on the
775 				 * interlock itself.
776 				 */
777 				if (flags & LK_INTERLOCK) {
778 					class->lc_unlock(ilk);
779 					flags &= ~LK_INTERLOCK;
780 				}
781 				GIANT_SAVE();
782 				while (LK_HOLDER(lk->lk_lock) ==
783 				    (uintptr_t)owner && TD_IS_RUNNING(owner))
784 					cpu_spinwait();
785 				KTR_STATE0(KTR_SCHED, "thread",
786 				    sched_tdname(td), "running");
787 				GIANT_RESTORE();
788 				continue;
789 			} else if (LK_CAN_ADAPT(lk, flags) &&
790 			    (x & LK_SHARE) != 0 && LK_SHARERS(x) &&
791 			    spintries < alk_retries) {
792 				KTR_STATE1(KTR_SCHED, "thread",
793 				    sched_tdname(td), "spinning",
794 				    "lockname:\"%s\"", lk->lock_object.lo_name);
795 				if (flags & LK_INTERLOCK) {
796 					class->lc_unlock(ilk);
797 					flags &= ~LK_INTERLOCK;
798 				}
799 				GIANT_SAVE();
800 				spintries++;
801 				for (i = 0; i < alk_loops; i++) {
802 					if (LOCK_LOG_TEST(&lk->lock_object, 0))
803 						CTR4(KTR_LOCK,
804 				    "%s: shared spinning on %p with %u and %u",
805 						    __func__, lk, spintries, i);
806 					x = lk->lk_lock;
807 					if ((x & LK_SHARE) == 0 ||
808 					    LK_CAN_SHARE(x, flags) != 0)
809 						break;
810 					cpu_spinwait();
811 				}
812 				KTR_STATE0(KTR_SCHED, "thread",
813 				    sched_tdname(td), "running");
814 				GIANT_RESTORE();
815 				if (i != alk_loops)
816 					continue;
817 			}
818 #endif
819 
820 			/*
821 			 * Acquire the sleepqueue chain lock because we
822 			 * probabilly will need to manipulate waiters flags.
823 			 */
824 			sleepq_lock(&lk->lock_object);
825 			x = lk->lk_lock;
826 
827 			/*
828 			 * if the lock can be acquired in shared mode, try
829 			 * again.
830 			 */
831 			if (LK_CAN_SHARE(x, flags)) {
832 				sleepq_release(&lk->lock_object);
833 				continue;
834 			}
835 
836 #ifdef ADAPTIVE_LOCKMGRS
837 			/*
838 			 * The current lock owner might have started executing
839 			 * on another CPU (or the lock could have changed
840 			 * owner) while we were waiting on the turnstile
841 			 * chain lock.  If so, drop the turnstile lock and try
842 			 * again.
843 			 */
844 			if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 &&
845 			    LK_HOLDER(x) != LK_KERNPROC) {
846 				owner = (struct thread *)LK_HOLDER(x);
847 				if (TD_IS_RUNNING(owner)) {
848 					sleepq_release(&lk->lock_object);
849 					continue;
850 				}
851 			}
852 #endif
853 
854 			/*
855 			 * Try to set the LK_SHARED_WAITERS flag.  If we fail,
856 			 * loop back and retry.
857 			 */
858 			if ((x & LK_SHARED_WAITERS) == 0) {
859 				if (!atomic_cmpset_acq_ptr(&lk->lk_lock, x,
860 				    x | LK_SHARED_WAITERS)) {
861 					sleepq_release(&lk->lock_object);
862 					continue;
863 				}
864 				LOCK_LOG2(lk, "%s: %p set shared waiters flag",
865 				    __func__, lk);
866 			}
867 
868 			/*
869 			 * As far as we have been unable to acquire the
870 			 * shared lock and the shared waiters flag is set,
871 			 * we will sleep.
872 			 */
873 			error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
874 			    SQ_SHARED_QUEUE);
875 			flags &= ~LK_INTERLOCK;
876 			if (error) {
877 				LOCK_LOG3(lk,
878 				    "%s: interrupted sleep for %p with %d",
879 				    __func__, lk, error);
880 				break;
881 			}
882 			LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
883 			    __func__, lk);
884 		}
885 		if (error == 0) {
886 #ifdef LOCK_PROFILING
887 			lockmgr_note_shared_acquire(lk, contested, waittime,
888 			    file, line, flags);
889 #else
890 			lockmgr_note_shared_acquire(lk, 0, 0, file, line,
891 			    flags);
892 #endif
893 		}
894 		break;
895 	case LK_UPGRADE:
896 	case LK_TRYUPGRADE:
897 		_lockmgr_assert(lk, KA_SLOCKED, file, line);
898 		v = lk->lk_lock;
899 		x = v & LK_ALL_WAITERS;
900 		v &= LK_EXCLUSIVE_SPINNERS;
901 
902 		/*
903 		 * Try to switch from one shared lock to an exclusive one.
904 		 * We need to preserve waiters flags during the operation.
905 		 */
906 		if (atomic_cmpset_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x | v,
907 		    tid | x)) {
908 			LOCK_LOG_LOCK("XUPGRADE", &lk->lock_object, 0, 0, file,
909 			    line);
910 			WITNESS_UPGRADE(&lk->lock_object, LOP_EXCLUSIVE |
911 			    LK_TRYWIT(flags), file, line);
912 			TD_SLOCKS_DEC(curthread);
913 			break;
914 		}
915 
916 		/*
917 		 * In LK_TRYUPGRADE mode, do not drop the lock,
918 		 * returning EBUSY instead.
919 		 */
920 		if (op == LK_TRYUPGRADE) {
921 			LOCK_LOG2(lk, "%s: %p failed the nowait upgrade",
922 			    __func__, lk);
923 			error = EBUSY;
924 			break;
925 		}
926 
927 		/*
928 		 * We have been unable to succeed in upgrading, so just
929 		 * give up the shared lock.
930 		 */
931 		wakeup_swapper |= wakeupshlk(lk, file, line);
932 
933 		/* FALLTHROUGH */
934 	case LK_EXCLUSIVE:
935 		if (LK_CAN_WITNESS(flags))
936 			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
937 			    LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
938 			    ilk : NULL);
939 
940 		/*
941 		 * If curthread already holds the lock and this one is
942 		 * allowed to recurse, simply recurse on it.
943 		 */
944 		if (lockmgr_xlocked(lk)) {
945 			if ((flags & LK_CANRECURSE) == 0 &&
946 			    (lk->lock_object.lo_flags & LO_RECURSABLE) == 0) {
947 
948 				/*
949 				 * If the lock is expected to not panic just
950 				 * give up and return.
951 				 */
952 				if (LK_TRYOP(flags)) {
953 					LOCK_LOG2(lk,
954 					    "%s: %p fails the try operation",
955 					    __func__, lk);
956 					error = EBUSY;
957 					break;
958 				}
959 				if (flags & LK_INTERLOCK)
960 					class->lc_unlock(ilk);
961 		panic("%s: recursing on non recursive lockmgr %s @ %s:%d\n",
962 				    __func__, iwmesg, file, line);
963 			}
964 			lk->lk_recurse++;
965 			LOCK_LOG2(lk, "%s: %p recursing", __func__, lk);
966 			LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0,
967 			    lk->lk_recurse, file, line);
968 			WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
969 			    LK_TRYWIT(flags), file, line);
970 			TD_LOCKS_INC(curthread);
971 			break;
972 		}
973 
974 		for (;;) {
975 			if (lk->lk_lock == LK_UNLOCKED &&
976 			    atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid))
977 				break;
978 #ifdef HWPMC_HOOKS
979 			PMC_SOFT_CALL( , , lock, failed);
980 #endif
981 			lock_profile_obtain_lock_failed(&lk->lock_object,
982 			    &contested, &waittime);
983 
984 			/*
985 			 * If the lock is expected to not sleep just give up
986 			 * and return.
987 			 */
988 			if (LK_TRYOP(flags)) {
989 				LOCK_LOG2(lk, "%s: %p fails the try operation",
990 				    __func__, lk);
991 				error = EBUSY;
992 				break;
993 			}
994 
995 #ifdef ADAPTIVE_LOCKMGRS
996 			/*
997 			 * If the owner is running on another CPU, spin until
998 			 * the owner stops running or the state of the lock
999 			 * changes.
1000 			 */
1001 			x = lk->lk_lock;
1002 			if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 &&
1003 			    LK_HOLDER(x) != LK_KERNPROC) {
1004 				owner = (struct thread *)LK_HOLDER(x);
1005 				if (LOCK_LOG_TEST(&lk->lock_object, 0))
1006 					CTR3(KTR_LOCK,
1007 					    "%s: spinning on %p held by %p",
1008 					    __func__, lk, owner);
1009 				KTR_STATE1(KTR_SCHED, "thread",
1010 				    sched_tdname(td), "spinning",
1011 				    "lockname:\"%s\"", lk->lock_object.lo_name);
1012 
1013 				/*
1014 				 * If we are holding also an interlock drop it
1015 				 * in order to avoid a deadlock if the lockmgr
1016 				 * owner is adaptively spinning on the
1017 				 * interlock itself.
1018 				 */
1019 				if (flags & LK_INTERLOCK) {
1020 					class->lc_unlock(ilk);
1021 					flags &= ~LK_INTERLOCK;
1022 				}
1023 				GIANT_SAVE();
1024 				while (LK_HOLDER(lk->lk_lock) ==
1025 				    (uintptr_t)owner && TD_IS_RUNNING(owner))
1026 					cpu_spinwait();
1027 				KTR_STATE0(KTR_SCHED, "thread",
1028 				    sched_tdname(td), "running");
1029 				GIANT_RESTORE();
1030 				continue;
1031 			} else if (LK_CAN_ADAPT(lk, flags) &&
1032 			    (x & LK_SHARE) != 0 && LK_SHARERS(x) &&
1033 			    spintries < alk_retries) {
1034 				if ((x & LK_EXCLUSIVE_SPINNERS) == 0 &&
1035 				    !atomic_cmpset_ptr(&lk->lk_lock, x,
1036 				    x | LK_EXCLUSIVE_SPINNERS))
1037 					continue;
1038 				KTR_STATE1(KTR_SCHED, "thread",
1039 				    sched_tdname(td), "spinning",
1040 				    "lockname:\"%s\"", lk->lock_object.lo_name);
1041 				if (flags & LK_INTERLOCK) {
1042 					class->lc_unlock(ilk);
1043 					flags &= ~LK_INTERLOCK;
1044 				}
1045 				GIANT_SAVE();
1046 				spintries++;
1047 				for (i = 0; i < alk_loops; i++) {
1048 					if (LOCK_LOG_TEST(&lk->lock_object, 0))
1049 						CTR4(KTR_LOCK,
1050 				    "%s: shared spinning on %p with %u and %u",
1051 						    __func__, lk, spintries, i);
1052 					if ((lk->lk_lock &
1053 					    LK_EXCLUSIVE_SPINNERS) == 0)
1054 						break;
1055 					cpu_spinwait();
1056 				}
1057 				KTR_STATE0(KTR_SCHED, "thread",
1058 				    sched_tdname(td), "running");
1059 				GIANT_RESTORE();
1060 				if (i != alk_loops)
1061 					continue;
1062 			}
1063 #endif
1064 
1065 			/*
1066 			 * Acquire the sleepqueue chain lock because we
1067 			 * probabilly will need to manipulate waiters flags.
1068 			 */
1069 			sleepq_lock(&lk->lock_object);
1070 			x = lk->lk_lock;
1071 
1072 			/*
1073 			 * if the lock has been released while we spun on
1074 			 * the sleepqueue chain lock just try again.
1075 			 */
1076 			if (x == LK_UNLOCKED) {
1077 				sleepq_release(&lk->lock_object);
1078 				continue;
1079 			}
1080 
1081 #ifdef ADAPTIVE_LOCKMGRS
1082 			/*
1083 			 * The current lock owner might have started executing
1084 			 * on another CPU (or the lock could have changed
1085 			 * owner) while we were waiting on the turnstile
1086 			 * chain lock.  If so, drop the turnstile lock and try
1087 			 * again.
1088 			 */
1089 			if (LK_CAN_ADAPT(lk, flags) && (x & LK_SHARE) == 0 &&
1090 			    LK_HOLDER(x) != LK_KERNPROC) {
1091 				owner = (struct thread *)LK_HOLDER(x);
1092 				if (TD_IS_RUNNING(owner)) {
1093 					sleepq_release(&lk->lock_object);
1094 					continue;
1095 				}
1096 			}
1097 #endif
1098 
1099 			/*
1100 			 * The lock can be in the state where there is a
1101 			 * pending queue of waiters, but still no owner.
1102 			 * This happens when the lock is contested and an
1103 			 * owner is going to claim the lock.
1104 			 * If curthread is the one successfully acquiring it
1105 			 * claim lock ownership and return, preserving waiters
1106 			 * flags.
1107 			 */
1108 			v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
1109 			if ((x & ~v) == LK_UNLOCKED) {
1110 				v &= ~LK_EXCLUSIVE_SPINNERS;
1111 				if (atomic_cmpset_acq_ptr(&lk->lk_lock, x,
1112 				    tid | v)) {
1113 					sleepq_release(&lk->lock_object);
1114 					LOCK_LOG2(lk,
1115 					    "%s: %p claimed by a new writer",
1116 					    __func__, lk);
1117 					break;
1118 				}
1119 				sleepq_release(&lk->lock_object);
1120 				continue;
1121 			}
1122 
1123 			/*
1124 			 * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
1125 			 * fail, loop back and retry.
1126 			 */
1127 			if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
1128 				if (!atomic_cmpset_ptr(&lk->lk_lock, x,
1129 				    x | LK_EXCLUSIVE_WAITERS)) {
1130 					sleepq_release(&lk->lock_object);
1131 					continue;
1132 				}
1133 				LOCK_LOG2(lk, "%s: %p set excl waiters flag",
1134 				    __func__, lk);
1135 			}
1136 
1137 			/*
1138 			 * As far as we have been unable to acquire the
1139 			 * exclusive lock and the exclusive waiters flag
1140 			 * is set, we will sleep.
1141 			 */
1142 			error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
1143 			    SQ_EXCLUSIVE_QUEUE);
1144 			flags &= ~LK_INTERLOCK;
1145 			if (error) {
1146 				LOCK_LOG3(lk,
1147 				    "%s: interrupted sleep for %p with %d",
1148 				    __func__, lk, error);
1149 				break;
1150 			}
1151 			LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
1152 			    __func__, lk);
1153 		}
1154 		if (error == 0) {
1155 #ifdef LOCK_PROFILING
1156 			lockmgr_note_exclusive_acquire(lk, contested, waittime,
1157 			    file, line, flags);
1158 #else
1159 			lockmgr_note_exclusive_acquire(lk, 0, 0, file, line,
1160 			    flags);
1161 #endif
1162 		}
1163 		break;
1164 	case LK_DOWNGRADE:
1165 		_lockmgr_assert(lk, KA_XLOCKED, file, line);
1166 		LOCK_LOG_LOCK("XDOWNGRADE", &lk->lock_object, 0, 0, file, line);
1167 		WITNESS_DOWNGRADE(&lk->lock_object, 0, file, line);
1168 
1169 		/*
1170 		 * Panic if the lock is recursed.
1171 		 */
1172 		if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) {
1173 			if (flags & LK_INTERLOCK)
1174 				class->lc_unlock(ilk);
1175 			panic("%s: downgrade a recursed lockmgr %s @ %s:%d\n",
1176 			    __func__, iwmesg, file, line);
1177 		}
1178 		TD_SLOCKS_INC(curthread);
1179 
1180 		/*
1181 		 * In order to preserve waiters flags, just spin.
1182 		 */
1183 		for (;;) {
1184 			x = lk->lk_lock;
1185 			MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1186 			x &= LK_ALL_WAITERS;
1187 			if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
1188 			    LK_SHARERS_LOCK(1) | x))
1189 				break;
1190 			cpu_spinwait();
1191 		}
1192 		break;
1193 	case LK_RELEASE:
1194 		_lockmgr_assert(lk, KA_LOCKED, file, line);
1195 		x = lk->lk_lock;
1196 
1197 		if ((x & LK_SHARE) == 0) {
1198 
1199 			/*
1200 			 * As first option, treact the lock as if it has not
1201 			 * any waiter.
1202 			 * Fix-up the tid var if the lock has been disowned.
1203 			 */
1204 			if (LK_HOLDER(x) == LK_KERNPROC)
1205 				tid = LK_KERNPROC;
1206 			else {
1207 				WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE,
1208 				    file, line);
1209 				TD_LOCKS_DEC(curthread);
1210 			}
1211 			LOCK_LOG_LOCK("XUNLOCK", &lk->lock_object, 0,
1212 			    lk->lk_recurse, file, line);
1213 
1214 			/*
1215 			 * The lock is held in exclusive mode.
1216 			 * If the lock is recursed also, then unrecurse it.
1217 			 */
1218 			if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) {
1219 				LOCK_LOG2(lk, "%s: %p unrecursing", __func__,
1220 				    lk);
1221 				lk->lk_recurse--;
1222 				break;
1223 			}
1224 			if (tid != LK_KERNPROC)
1225 				lock_profile_release_lock(&lk->lock_object);
1226 
1227 			if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid,
1228 			    LK_UNLOCKED))
1229 				break;
1230 
1231 			sleepq_lock(&lk->lock_object);
1232 			x = lk->lk_lock;
1233 			v = LK_UNLOCKED;
1234 
1235 			/*
1236 		 	 * If the lock has exclusive waiters, give them
1237 			 * preference in order to avoid deadlock with
1238 			 * shared runners up.
1239 			 * If interruptible sleeps left the exclusive queue
1240 			 * empty avoid a starvation for the threads sleeping
1241 			 * on the shared queue by giving them precedence
1242 			 * and cleaning up the exclusive waiters bit anyway.
1243 			 * Please note that lk_exslpfail count may be lying
1244 			 * about the real number of waiters with the
1245 			 * LK_SLEEPFAIL flag on because they may be used in
1246 			 * conjunction with interruptible sleeps so
1247 			 * lk_exslpfail might be considered an 'upper limit'
1248 			 * bound, including the edge cases.
1249 			 */
1250 			MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1251 			realexslp = sleepq_sleepcnt(&lk->lock_object,
1252 			    SQ_EXCLUSIVE_QUEUE);
1253 			if ((x & LK_EXCLUSIVE_WAITERS) != 0 && realexslp != 0) {
1254 				if (lk->lk_exslpfail < realexslp) {
1255 					lk->lk_exslpfail = 0;
1256 					queue = SQ_EXCLUSIVE_QUEUE;
1257 					v |= (x & LK_SHARED_WAITERS);
1258 				} else {
1259 					lk->lk_exslpfail = 0;
1260 					LOCK_LOG2(lk,
1261 					"%s: %p has only LK_SLEEPFAIL sleepers",
1262 					    __func__, lk);
1263 					LOCK_LOG2(lk,
1264 			"%s: %p waking up threads on the exclusive queue",
1265 					    __func__, lk);
1266 					wakeup_swapper =
1267 					    sleepq_broadcast(&lk->lock_object,
1268 					    SLEEPQ_LK, 0, SQ_EXCLUSIVE_QUEUE);
1269 					queue = SQ_SHARED_QUEUE;
1270 				}
1271 			} else {
1272 
1273 				/*
1274 				 * Exclusive waiters sleeping with LK_SLEEPFAIL
1275 				 * on and using interruptible sleeps/timeout
1276 				 * may have left spourious lk_exslpfail counts
1277 				 * on, so clean it up anyway.
1278 				 */
1279 				lk->lk_exslpfail = 0;
1280 				queue = SQ_SHARED_QUEUE;
1281 			}
1282 
1283 			LOCK_LOG3(lk,
1284 			    "%s: %p waking up threads on the %s queue",
1285 			    __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
1286 			    "exclusive");
1287 			atomic_store_rel_ptr(&lk->lk_lock, v);
1288 			wakeup_swapper |= sleepq_broadcast(&lk->lock_object,
1289 			    SLEEPQ_LK, 0, queue);
1290 			sleepq_release(&lk->lock_object);
1291 			break;
1292 		} else
1293 			wakeup_swapper = wakeupshlk(lk, file, line);
1294 		break;
1295 	case LK_DRAIN:
1296 		if (LK_CAN_WITNESS(flags))
1297 			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
1298 			    LOP_EXCLUSIVE, file, line, flags & LK_INTERLOCK ?
1299 			    ilk : NULL);
1300 
1301 		/*
1302 		 * Trying to drain a lock we already own will result in a
1303 		 * deadlock.
1304 		 */
1305 		if (lockmgr_xlocked(lk)) {
1306 			if (flags & LK_INTERLOCK)
1307 				class->lc_unlock(ilk);
1308 			panic("%s: draining %s with the lock held @ %s:%d\n",
1309 			    __func__, iwmesg, file, line);
1310 		}
1311 
1312 		for (;;) {
1313 			if (lk->lk_lock == LK_UNLOCKED &&
1314 			    atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid))
1315 				break;
1316 
1317 #ifdef HWPMC_HOOKS
1318 			PMC_SOFT_CALL( , , lock, failed);
1319 #endif
1320 			lock_profile_obtain_lock_failed(&lk->lock_object,
1321 			    &contested, &waittime);
1322 
1323 			/*
1324 			 * If the lock is expected to not sleep just give up
1325 			 * and return.
1326 			 */
1327 			if (LK_TRYOP(flags)) {
1328 				LOCK_LOG2(lk, "%s: %p fails the try operation",
1329 				    __func__, lk);
1330 				error = EBUSY;
1331 				break;
1332 			}
1333 
1334 			/*
1335 			 * Acquire the sleepqueue chain lock because we
1336 			 * probabilly will need to manipulate waiters flags.
1337 			 */
1338 			sleepq_lock(&lk->lock_object);
1339 			x = lk->lk_lock;
1340 
1341 			/*
1342 			 * if the lock has been released while we spun on
1343 			 * the sleepqueue chain lock just try again.
1344 			 */
1345 			if (x == LK_UNLOCKED) {
1346 				sleepq_release(&lk->lock_object);
1347 				continue;
1348 			}
1349 
1350 			v = x & (LK_ALL_WAITERS | LK_EXCLUSIVE_SPINNERS);
1351 			if ((x & ~v) == LK_UNLOCKED) {
1352 				v = (x & ~LK_EXCLUSIVE_SPINNERS);
1353 
1354 				/*
1355 				 * If interruptible sleeps left the exclusive
1356 				 * queue empty avoid a starvation for the
1357 				 * threads sleeping on the shared queue by
1358 				 * giving them precedence and cleaning up the
1359 				 * exclusive waiters bit anyway.
1360 				 * Please note that lk_exslpfail count may be
1361 				 * lying about the real number of waiters with
1362 				 * the LK_SLEEPFAIL flag on because they may
1363 				 * be used in conjunction with interruptible
1364 				 * sleeps so lk_exslpfail might be considered
1365 				 * an 'upper limit' bound, including the edge
1366 				 * cases.
1367 				 */
1368 				if (v & LK_EXCLUSIVE_WAITERS) {
1369 					queue = SQ_EXCLUSIVE_QUEUE;
1370 					v &= ~LK_EXCLUSIVE_WAITERS;
1371 				} else {
1372 
1373 					/*
1374 					 * Exclusive waiters sleeping with
1375 					 * LK_SLEEPFAIL on and using
1376 					 * interruptible sleeps/timeout may
1377 					 * have left spourious lk_exslpfail
1378 					 * counts on, so clean it up anyway.
1379 					 */
1380 					MPASS(v & LK_SHARED_WAITERS);
1381 					lk->lk_exslpfail = 0;
1382 					queue = SQ_SHARED_QUEUE;
1383 					v &= ~LK_SHARED_WAITERS;
1384 				}
1385 				if (queue == SQ_EXCLUSIVE_QUEUE) {
1386 					realexslp =
1387 					    sleepq_sleepcnt(&lk->lock_object,
1388 					    SQ_EXCLUSIVE_QUEUE);
1389 					if (lk->lk_exslpfail >= realexslp) {
1390 						lk->lk_exslpfail = 0;
1391 						queue = SQ_SHARED_QUEUE;
1392 						v &= ~LK_SHARED_WAITERS;
1393 						if (realexslp != 0) {
1394 							LOCK_LOG2(lk,
1395 					"%s: %p has only LK_SLEEPFAIL sleepers",
1396 							    __func__, lk);
1397 							LOCK_LOG2(lk,
1398 			"%s: %p waking up threads on the exclusive queue",
1399 							    __func__, lk);
1400 							wakeup_swapper =
1401 							    sleepq_broadcast(
1402 							    &lk->lock_object,
1403 							    SLEEPQ_LK, 0,
1404 							    SQ_EXCLUSIVE_QUEUE);
1405 						}
1406 					} else
1407 						lk->lk_exslpfail = 0;
1408 				}
1409 				if (!atomic_cmpset_ptr(&lk->lk_lock, x, v)) {
1410 					sleepq_release(&lk->lock_object);
1411 					continue;
1412 				}
1413 				LOCK_LOG3(lk,
1414 				"%s: %p waking up all threads on the %s queue",
1415 				    __func__, lk, queue == SQ_SHARED_QUEUE ?
1416 				    "shared" : "exclusive");
1417 				wakeup_swapper |= sleepq_broadcast(
1418 				    &lk->lock_object, SLEEPQ_LK, 0, queue);
1419 
1420 				/*
1421 				 * If shared waiters have been woken up we need
1422 				 * to wait for one of them to acquire the lock
1423 				 * before to set the exclusive waiters in
1424 				 * order to avoid a deadlock.
1425 				 */
1426 				if (queue == SQ_SHARED_QUEUE) {
1427 					for (v = lk->lk_lock;
1428 					    (v & LK_SHARE) && !LK_SHARERS(v);
1429 					    v = lk->lk_lock)
1430 						cpu_spinwait();
1431 				}
1432 			}
1433 
1434 			/*
1435 			 * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
1436 			 * fail, loop back and retry.
1437 			 */
1438 			if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
1439 				if (!atomic_cmpset_ptr(&lk->lk_lock, x,
1440 				    x | LK_EXCLUSIVE_WAITERS)) {
1441 					sleepq_release(&lk->lock_object);
1442 					continue;
1443 				}
1444 				LOCK_LOG2(lk, "%s: %p set drain waiters flag",
1445 				    __func__, lk);
1446 			}
1447 
1448 			/*
1449 			 * As far as we have been unable to acquire the
1450 			 * exclusive lock and the exclusive waiters flag
1451 			 * is set, we will sleep.
1452 			 */
1453 			if (flags & LK_INTERLOCK) {
1454 				class->lc_unlock(ilk);
1455 				flags &= ~LK_INTERLOCK;
1456 			}
1457 			GIANT_SAVE();
1458 			sleepq_add(&lk->lock_object, NULL, iwmesg, SLEEPQ_LK,
1459 			    SQ_EXCLUSIVE_QUEUE);
1460 			sleepq_wait(&lk->lock_object, ipri & PRIMASK);
1461 			GIANT_RESTORE();
1462 			LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
1463 			    __func__, lk);
1464 		}
1465 
1466 		if (error == 0) {
1467 			lock_profile_obtain_lock_success(&lk->lock_object,
1468 			    contested, waittime, file, line);
1469 			LOCK_LOG_LOCK("DRAIN", &lk->lock_object, 0,
1470 			    lk->lk_recurse, file, line);
1471 			WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
1472 			    LK_TRYWIT(flags), file, line);
1473 			TD_LOCKS_INC(curthread);
1474 			STACK_SAVE(lk);
1475 		}
1476 		break;
1477 	default:
1478 		if (flags & LK_INTERLOCK)
1479 			class->lc_unlock(ilk);
1480 		panic("%s: unknown lockmgr request 0x%x\n", __func__, op);
1481 	}
1482 
1483 	if (flags & LK_INTERLOCK)
1484 		class->lc_unlock(ilk);
1485 	if (wakeup_swapper)
1486 		kick_proc0();
1487 
1488 	return (error);
1489 }
1490 
1491 void
1492 _lockmgr_disown(struct lock *lk, const char *file, int line)
1493 {
1494 	uintptr_t tid, x;
1495 
1496 	if (SCHEDULER_STOPPED())
1497 		return;
1498 
1499 	tid = (uintptr_t)curthread;
1500 	_lockmgr_assert(lk, KA_XLOCKED, file, line);
1501 
1502 	/*
1503 	 * Panic if the lock is recursed.
1504 	 */
1505 	if (lockmgr_xlocked(lk) && lockmgr_recursed(lk))
1506 		panic("%s: disown a recursed lockmgr @ %s:%d\n",
1507 		    __func__,  file, line);
1508 
1509 	/*
1510 	 * If the owner is already LK_KERNPROC just skip the whole operation.
1511 	 */
1512 	if (LK_HOLDER(lk->lk_lock) != tid)
1513 		return;
1514 	lock_profile_release_lock(&lk->lock_object);
1515 	LOCK_LOG_LOCK("XDISOWN", &lk->lock_object, 0, 0, file, line);
1516 	WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
1517 	TD_LOCKS_DEC(curthread);
1518 	STACK_SAVE(lk);
1519 
1520 	/*
1521 	 * In order to preserve waiters flags, just spin.
1522 	 */
1523 	for (;;) {
1524 		x = lk->lk_lock;
1525 		MPASS((x & LK_EXCLUSIVE_SPINNERS) == 0);
1526 		x &= LK_ALL_WAITERS;
1527 		if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
1528 		    LK_KERNPROC | x))
1529 			return;
1530 		cpu_spinwait();
1531 	}
1532 }
1533 
1534 void
1535 lockmgr_printinfo(const struct lock *lk)
1536 {
1537 	struct thread *td;
1538 	uintptr_t x;
1539 
1540 	if (lk->lk_lock == LK_UNLOCKED)
1541 		printf("lock type %s: UNLOCKED\n", lk->lock_object.lo_name);
1542 	else if (lk->lk_lock & LK_SHARE)
1543 		printf("lock type %s: SHARED (count %ju)\n",
1544 		    lk->lock_object.lo_name,
1545 		    (uintmax_t)LK_SHARERS(lk->lk_lock));
1546 	else {
1547 		td = lockmgr_xholder(lk);
1548 		if (td == (struct thread *)LK_KERNPROC)
1549 			printf("lock type %s: EXCL by KERNPROC\n",
1550 			    lk->lock_object.lo_name);
1551 		else
1552 			printf("lock type %s: EXCL by thread %p "
1553 			    "(pid %d, %s, tid %d)\n", lk->lock_object.lo_name,
1554 			    td, td->td_proc->p_pid, td->td_proc->p_comm,
1555 			    td->td_tid);
1556 	}
1557 
1558 	x = lk->lk_lock;
1559 	if (x & LK_EXCLUSIVE_WAITERS)
1560 		printf(" with exclusive waiters pending\n");
1561 	if (x & LK_SHARED_WAITERS)
1562 		printf(" with shared waiters pending\n");
1563 	if (x & LK_EXCLUSIVE_SPINNERS)
1564 		printf(" with exclusive spinners pending\n");
1565 
1566 	STACK_PRINT(lk);
1567 }
1568 
1569 int
1570 lockstatus(const struct lock *lk)
1571 {
1572 	uintptr_t v, x;
1573 	int ret;
1574 
1575 	ret = LK_SHARED;
1576 	x = lk->lk_lock;
1577 	v = LK_HOLDER(x);
1578 
1579 	if ((x & LK_SHARE) == 0) {
1580 		if (v == (uintptr_t)curthread || v == LK_KERNPROC)
1581 			ret = LK_EXCLUSIVE;
1582 		else
1583 			ret = LK_EXCLOTHER;
1584 	} else if (x == LK_UNLOCKED)
1585 		ret = 0;
1586 
1587 	return (ret);
1588 }
1589 
1590 #ifdef INVARIANT_SUPPORT
1591 
1592 FEATURE(invariant_support,
1593     "Support for modules compiled with INVARIANTS option");
1594 
1595 #ifndef INVARIANTS
1596 #undef	_lockmgr_assert
1597 #endif
1598 
1599 void
1600 _lockmgr_assert(const struct lock *lk, int what, const char *file, int line)
1601 {
1602 	int slocked = 0;
1603 
1604 	if (panicstr != NULL)
1605 		return;
1606 	switch (what) {
1607 	case KA_SLOCKED:
1608 	case KA_SLOCKED | KA_NOTRECURSED:
1609 	case KA_SLOCKED | KA_RECURSED:
1610 		slocked = 1;
1611 	case KA_LOCKED:
1612 	case KA_LOCKED | KA_NOTRECURSED:
1613 	case KA_LOCKED | KA_RECURSED:
1614 #ifdef WITNESS
1615 
1616 		/*
1617 		 * We cannot trust WITNESS if the lock is held in exclusive
1618 		 * mode and a call to lockmgr_disown() happened.
1619 		 * Workaround this skipping the check if the lock is held in
1620 		 * exclusive mode even for the KA_LOCKED case.
1621 		 */
1622 		if (slocked || (lk->lk_lock & LK_SHARE)) {
1623 			witness_assert(&lk->lock_object, what, file, line);
1624 			break;
1625 		}
1626 #endif
1627 		if (lk->lk_lock == LK_UNLOCKED ||
1628 		    ((lk->lk_lock & LK_SHARE) == 0 && (slocked ||
1629 		    (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk)))))
1630 			panic("Lock %s not %slocked @ %s:%d\n",
1631 			    lk->lock_object.lo_name, slocked ? "share" : "",
1632 			    file, line);
1633 
1634 		if ((lk->lk_lock & LK_SHARE) == 0) {
1635 			if (lockmgr_recursed(lk)) {
1636 				if (what & KA_NOTRECURSED)
1637 					panic("Lock %s recursed @ %s:%d\n",
1638 					    lk->lock_object.lo_name, file,
1639 					    line);
1640 			} else if (what & KA_RECURSED)
1641 				panic("Lock %s not recursed @ %s:%d\n",
1642 				    lk->lock_object.lo_name, file, line);
1643 		}
1644 		break;
1645 	case KA_XLOCKED:
1646 	case KA_XLOCKED | KA_NOTRECURSED:
1647 	case KA_XLOCKED | KA_RECURSED:
1648 		if (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk))
1649 			panic("Lock %s not exclusively locked @ %s:%d\n",
1650 			    lk->lock_object.lo_name, file, line);
1651 		if (lockmgr_recursed(lk)) {
1652 			if (what & KA_NOTRECURSED)
1653 				panic("Lock %s recursed @ %s:%d\n",
1654 				    lk->lock_object.lo_name, file, line);
1655 		} else if (what & KA_RECURSED)
1656 			panic("Lock %s not recursed @ %s:%d\n",
1657 			    lk->lock_object.lo_name, file, line);
1658 		break;
1659 	case KA_UNLOCKED:
1660 		if (lockmgr_xlocked(lk) || lockmgr_disowned(lk))
1661 			panic("Lock %s exclusively locked @ %s:%d\n",
1662 			    lk->lock_object.lo_name, file, line);
1663 		break;
1664 	default:
1665 		panic("Unknown lockmgr assertion: %d @ %s:%d\n", what, file,
1666 		    line);
1667 	}
1668 }
1669 #endif
1670 
1671 #ifdef DDB
1672 int
1673 lockmgr_chain(struct thread *td, struct thread **ownerp)
1674 {
1675 	struct lock *lk;
1676 
1677 	lk = td->td_wchan;
1678 
1679 	if (LOCK_CLASS(&lk->lock_object) != &lock_class_lockmgr)
1680 		return (0);
1681 	db_printf("blocked on lockmgr %s", lk->lock_object.lo_name);
1682 	if (lk->lk_lock & LK_SHARE)
1683 		db_printf("SHARED (count %ju)\n",
1684 		    (uintmax_t)LK_SHARERS(lk->lk_lock));
1685 	else
1686 		db_printf("EXCL\n");
1687 	*ownerp = lockmgr_xholder(lk);
1688 
1689 	return (1);
1690 }
1691 
1692 static void
1693 db_show_lockmgr(const struct lock_object *lock)
1694 {
1695 	struct thread *td;
1696 	const struct lock *lk;
1697 
1698 	lk = (const struct lock *)lock;
1699 
1700 	db_printf(" state: ");
1701 	if (lk->lk_lock == LK_UNLOCKED)
1702 		db_printf("UNLOCKED\n");
1703 	else if (lk->lk_lock & LK_SHARE)
1704 		db_printf("SLOCK: %ju\n", (uintmax_t)LK_SHARERS(lk->lk_lock));
1705 	else {
1706 		td = lockmgr_xholder(lk);
1707 		if (td == (struct thread *)LK_KERNPROC)
1708 			db_printf("XLOCK: LK_KERNPROC\n");
1709 		else
1710 			db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1711 			    td->td_tid, td->td_proc->p_pid,
1712 			    td->td_proc->p_comm);
1713 		if (lockmgr_recursed(lk))
1714 			db_printf(" recursed: %d\n", lk->lk_recurse);
1715 	}
1716 	db_printf(" waiters: ");
1717 	switch (lk->lk_lock & LK_ALL_WAITERS) {
1718 	case LK_SHARED_WAITERS:
1719 		db_printf("shared\n");
1720 		break;
1721 	case LK_EXCLUSIVE_WAITERS:
1722 		db_printf("exclusive\n");
1723 		break;
1724 	case LK_ALL_WAITERS:
1725 		db_printf("shared and exclusive\n");
1726 		break;
1727 	default:
1728 		db_printf("none\n");
1729 	}
1730 	db_printf(" spinners: ");
1731 	if (lk->lk_lock & LK_EXCLUSIVE_SPINNERS)
1732 		db_printf("exclusive\n");
1733 	else
1734 		db_printf("none\n");
1735 }
1736 #endif
1737