xref: /freebsd/sys/kern/kern_lock.c (revision b3aaa0cc21c63d388230c7ef2a80abd631ff20d5)
1 /*-
2  * Copyright (c) 2008 Attilio Rao <attilio@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice(s), this list of conditions and the following disclaimer as
10  *    the first lines of this file unmodified other than the possible
11  *    addition of one or more copyright notices.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice(s), this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
26  * DAMAGE.
27  */
28 
29 #include "opt_ddb.h"
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include <sys/param.h>
35 #include <sys/ktr.h>
36 #include <sys/lock.h>
37 #include <sys/lock_profile.h>
38 #include <sys/lockmgr.h>
39 #include <sys/mutex.h>
40 #include <sys/proc.h>
41 #include <sys/sleepqueue.h>
42 #ifdef DEBUG_LOCKS
43 #include <sys/stack.h>
44 #endif
45 #include <sys/systm.h>
46 
47 #include <machine/cpu.h>
48 
49 #ifdef DDB
50 #include <ddb/ddb.h>
51 #endif
52 
53 CTASSERT(((LK_CANRECURSE | LK_NOSHARE) & LO_CLASSFLAGS) ==
54     (LK_CANRECURSE | LK_NOSHARE));
55 
56 #define	SQ_EXCLUSIVE_QUEUE	0
57 #define	SQ_SHARED_QUEUE		1
58 
59 #ifndef INVARIANTS
60 #define	_lockmgr_assert(lk, what, file, line)
61 #define	TD_LOCKS_INC(td)
62 #define	TD_LOCKS_DEC(td)
63 #else
64 #define	TD_LOCKS_INC(td)	((td)->td_locks++)
65 #define	TD_LOCKS_DEC(td)	((td)->td_locks--)
66 #endif
67 #define	TD_SLOCKS_INC(td)	((td)->td_lk_slocks++)
68 #define	TD_SLOCKS_DEC(td)	((td)->td_lk_slocks--)
69 
70 #ifndef DEBUG_LOCKS
71 #define	STACK_PRINT(lk)
72 #define	STACK_SAVE(lk)
73 #define	STACK_ZERO(lk)
74 #else
75 #define	STACK_PRINT(lk)	stack_print_ddb(&(lk)->lk_stack)
76 #define	STACK_SAVE(lk)	stack_save(&(lk)->lk_stack)
77 #define	STACK_ZERO(lk)	stack_zero(&(lk)->lk_stack)
78 #endif
79 
80 #define	LOCK_LOG2(lk, string, arg1, arg2)				\
81 	if (LOCK_LOG_TEST(&(lk)->lock_object, 0))			\
82 		CTR2(KTR_LOCK, (string), (arg1), (arg2))
83 #define	LOCK_LOG3(lk, string, arg1, arg2, arg3)				\
84 	if (LOCK_LOG_TEST(&(lk)->lock_object, 0))			\
85 		CTR3(KTR_LOCK, (string), (arg1), (arg2), (arg3))
86 
87 #define	GIANT_DECLARE							\
88 	int _i = 0;							\
89 	WITNESS_SAVE_DECL(Giant)
90 #define	GIANT_RESTORE() do {						\
91 	if (_i > 0) {							\
92 		while (_i--)						\
93 			mtx_lock(&Giant);				\
94 		WITNESS_RESTORE(&Giant.lock_object, Giant);		\
95 	}								\
96 } while (0)
97 #define	GIANT_SAVE() do {						\
98 	if (mtx_owned(&Giant)) {					\
99 		WITNESS_SAVE(&Giant.lock_object, Giant);		\
100 		while (mtx_owned(&Giant)) {				\
101 			_i++;						\
102 			mtx_unlock(&Giant);				\
103 		}							\
104 	}								\
105 } while (0)
106 
107 #define	LK_CAN_SHARE(x)							\
108 	(((x) & LK_SHARE) && (((x) & LK_EXCLUSIVE_WAITERS) == 0 ||	\
109 	curthread->td_lk_slocks || (curthread->td_pflags & TDP_DEADLKTREAT)))
110 #define	LK_TRYOP(x)							\
111 	((x) & LK_NOWAIT)
112 
113 #define	LK_CAN_WITNESS(x)						\
114 	(((x) & LK_NOWITNESS) == 0 && !LK_TRYOP(x))
115 #define	LK_TRYWIT(x)							\
116 	(LK_TRYOP(x) ? LOP_TRYLOCK : 0)
117 
118 #define	lockmgr_disowned(lk)						\
119 	(((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == LK_KERNPROC)
120 
121 #define	lockmgr_xlocked(lk)						\
122 	(((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == (uintptr_t)curthread)
123 
124 static void	 assert_lockmgr(struct lock_object *lock, int how);
125 #ifdef DDB
126 static void	 db_show_lockmgr(struct lock_object *lock);
127 #endif
128 static void	 lock_lockmgr(struct lock_object *lock, int how);
129 static int	 unlock_lockmgr(struct lock_object *lock);
130 
131 struct lock_class lock_class_lockmgr = {
132 	.lc_name = "lockmgr",
133 	.lc_flags = LC_RECURSABLE | LC_SLEEPABLE | LC_SLEEPLOCK | LC_UPGRADABLE,
134 	.lc_assert = assert_lockmgr,
135 #ifdef DDB
136 	.lc_ddb_show = db_show_lockmgr,
137 #endif
138 	.lc_lock = lock_lockmgr,
139 	.lc_unlock = unlock_lockmgr
140 };
141 
142 static __inline struct thread *
143 lockmgr_xholder(struct lock *lk)
144 {
145 	uintptr_t x;
146 
147 	x = lk->lk_lock;
148 	return ((x & LK_SHARE) ? NULL : (struct thread *)LK_HOLDER(x));
149 }
150 
151 /*
152  * It assumes sleepq_lock held and returns with this one unheld.
153  * It also assumes the generic interlock is sane and previously checked.
154  * If LK_INTERLOCK is specified the interlock is not reacquired after the
155  * sleep.
156  */
157 static __inline int
158 sleeplk(struct lock *lk, u_int flags, struct lock_object *ilk,
159     const char *wmesg, int pri, int timo, int queue)
160 {
161 	GIANT_DECLARE;
162 	struct lock_class *class;
163 	int catch, error;
164 
165 	class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
166 	catch = pri & PCATCH;
167 	pri &= PRIMASK;
168 	error = 0;
169 
170 	LOCK_LOG3(lk, "%s: %p blocking on the %s sleepqueue", __func__, lk,
171 	    (queue == SQ_EXCLUSIVE_QUEUE) ? "exclusive" : "shared");
172 
173 	if (flags & LK_INTERLOCK)
174 		class->lc_unlock(ilk);
175 	GIANT_SAVE();
176 	sleepq_add(&lk->lock_object, NULL, wmesg, SLEEPQ_LK | (catch ?
177 	    SLEEPQ_INTERRUPTIBLE : 0), queue);
178 	if ((flags & LK_TIMELOCK) && timo)
179 		sleepq_set_timeout(&lk->lock_object, timo);
180 
181 	/*
182 	 * Decisional switch for real sleeping.
183 	 */
184 	if ((flags & LK_TIMELOCK) && timo && catch)
185 		error = sleepq_timedwait_sig(&lk->lock_object, pri);
186 	else if ((flags & LK_TIMELOCK) && timo)
187 		error = sleepq_timedwait(&lk->lock_object, pri);
188 	else if (catch)
189 		error = sleepq_wait_sig(&lk->lock_object, pri);
190 	else
191 		sleepq_wait(&lk->lock_object, pri);
192 	GIANT_RESTORE();
193 	if ((flags & LK_SLEEPFAIL) && error == 0)
194 		error = ENOLCK;
195 
196 	return (error);
197 }
198 
199 static __inline int
200 wakeupshlk(struct lock *lk, const char *file, int line)
201 {
202 	uintptr_t v, x;
203 	int queue, wakeup_swapper;
204 
205 	TD_LOCKS_DEC(curthread);
206 	TD_SLOCKS_DEC(curthread);
207 	WITNESS_UNLOCK(&lk->lock_object, 0, file, line);
208 	LOCK_LOG_LOCK("SUNLOCK", &lk->lock_object, 0, 0, file, line);
209 
210 	wakeup_swapper = 0;
211 	for (;;) {
212 		x = lk->lk_lock;
213 
214 		/*
215 		 * If there is more than one shared lock held, just drop one
216 		 * and return.
217 		 */
218 		if (LK_SHARERS(x) > 1) {
219 			if (atomic_cmpset_ptr(&lk->lk_lock, x,
220 			    x - LK_ONE_SHARER))
221 				break;
222 			continue;
223 		}
224 
225 		/*
226 		 * If there are not waiters on the exclusive queue, drop the
227 		 * lock quickly.
228 		 */
229 		if ((x & LK_ALL_WAITERS) == 0) {
230 			MPASS(x == LK_SHARERS_LOCK(1));
231 			if (atomic_cmpset_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1),
232 			    LK_UNLOCKED))
233 				break;
234 			continue;
235 		}
236 
237 		/*
238 		 * We should have a sharer with waiters, so enter the hard
239 		 * path in order to handle wakeups correctly.
240 		 */
241 		sleepq_lock(&lk->lock_object);
242 		x = lk->lk_lock & LK_ALL_WAITERS;
243 		v = LK_UNLOCKED;
244 
245 		/*
246 		 * If the lock has exclusive waiters, give them preference in
247 		 * order to avoid deadlock with shared runners up.
248 		 */
249 		if (x & LK_EXCLUSIVE_WAITERS) {
250 			queue = SQ_EXCLUSIVE_QUEUE;
251 			v |= (x & LK_SHARED_WAITERS);
252 		} else {
253 			MPASS(x == LK_SHARED_WAITERS);
254 			queue = SQ_SHARED_QUEUE;
255 		}
256 
257 		if (!atomic_cmpset_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x,
258 		    v)) {
259 			sleepq_release(&lk->lock_object);
260 			continue;
261 		}
262 		LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
263 		    __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
264 		    "exclusive");
265 		wakeup_swapper = sleepq_broadcast(&lk->lock_object, SLEEPQ_LK,
266 		    0, queue);
267 		sleepq_release(&lk->lock_object);
268 		break;
269 	}
270 
271 	lock_profile_release_lock(&lk->lock_object);
272 	return (wakeup_swapper);
273 }
274 
275 static void
276 assert_lockmgr(struct lock_object *lock, int what)
277 {
278 
279 	panic("lockmgr locks do not support assertions");
280 }
281 
282 static void
283 lock_lockmgr(struct lock_object *lock, int how)
284 {
285 
286 	panic("lockmgr locks do not support sleep interlocking");
287 }
288 
289 static int
290 unlock_lockmgr(struct lock_object *lock)
291 {
292 
293 	panic("lockmgr locks do not support sleep interlocking");
294 }
295 
296 void
297 lockinit(struct lock *lk, int pri, const char *wmesg, int timo, int flags)
298 {
299 	int iflags;
300 
301 	MPASS((flags & ~LK_INIT_MASK) == 0);
302 
303 	iflags = LO_RECURSABLE | LO_SLEEPABLE | LO_UPGRADABLE;
304 	if ((flags & LK_NODUP) == 0)
305 		iflags |= LO_DUPOK;
306 	if (flags & LK_NOPROFILE)
307 		iflags |= LO_NOPROFILE;
308 	if ((flags & LK_NOWITNESS) == 0)
309 		iflags |= LO_WITNESS;
310 	if (flags & LK_QUIET)
311 		iflags |= LO_QUIET;
312 	iflags |= flags & (LK_CANRECURSE | LK_NOSHARE);
313 
314 	lk->lk_lock = LK_UNLOCKED;
315 	lk->lk_recurse = 0;
316 	lk->lk_timo = timo;
317 	lk->lk_pri = pri;
318 	lock_init(&lk->lock_object, &lock_class_lockmgr, wmesg, NULL, iflags);
319 	STACK_ZERO(lk);
320 }
321 
322 void
323 lockdestroy(struct lock *lk)
324 {
325 
326 	KASSERT(lk->lk_lock == LK_UNLOCKED, ("lockmgr still held"));
327 	KASSERT(lk->lk_recurse == 0, ("lockmgr still recursed"));
328 	lock_destroy(&lk->lock_object);
329 }
330 
331 int
332 __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk,
333     const char *wmesg, int pri, int timo, const char *file, int line)
334 {
335 	GIANT_DECLARE;
336 	uint64_t waittime;
337 	struct lock_class *class;
338 	const char *iwmesg;
339 	uintptr_t tid, v, x;
340 	u_int op;
341 	int contested, error, ipri, itimo, queue, wakeup_swapper;
342 
343 	contested = 0;
344 	error = 0;
345 	waittime = 0;
346 	tid = (uintptr_t)curthread;
347 	op = (flags & LK_TYPE_MASK);
348 	iwmesg = (wmesg == LK_WMESG_DEFAULT) ? lk->lock_object.lo_name : wmesg;
349 	ipri = (pri == LK_PRIO_DEFAULT) ? lk->lk_pri : pri;
350 	itimo = (timo == LK_TIMO_DEFAULT) ? lk->lk_timo : timo;
351 
352 	MPASS((flags & ~LK_TOTAL_MASK) == 0);
353 	KASSERT((op & (op - 1)) == 0,
354 	    ("%s: Invalid requested operation @ %s:%d", __func__, file, line));
355 	KASSERT((flags & (LK_NOWAIT | LK_SLEEPFAIL)) == 0 ||
356 	    (op != LK_DOWNGRADE && op != LK_RELEASE),
357 	    ("%s: Invalid flags in regard of the operation desired @ %s:%d",
358 	    __func__, file, line));
359 	KASSERT((flags & LK_INTERLOCK) == 0 || ilk != NULL,
360 	    ("%s: LK_INTERLOCK passed without valid interlock @ %s:%d",
361 	    __func__, file, line));
362 
363 	class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
364 	if (panicstr != NULL) {
365 		if (flags & LK_INTERLOCK)
366 			class->lc_unlock(ilk);
367 		return (0);
368 	}
369 
370 	if (op == LK_SHARED && (lk->lock_object.lo_flags & LK_NOSHARE))
371 		op = LK_EXCLUSIVE;
372 
373 	wakeup_swapper = 0;
374 	switch (op) {
375 	case LK_SHARED:
376 		if (LK_CAN_WITNESS(flags))
377 			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
378 			    file, line, ilk);
379 		for (;;) {
380 			x = lk->lk_lock;
381 
382 			/*
383 			 * If no other thread has an exclusive lock, or
384 			 * no exclusive waiter is present, bump the count of
385 			 * sharers.  Since we have to preserve the state of
386 			 * waiters, if we fail to acquire the shared lock
387 			 * loop back and retry.
388 			 */
389 			if (LK_CAN_SHARE(x)) {
390 				if (atomic_cmpset_acq_ptr(&lk->lk_lock, x,
391 				    x + LK_ONE_SHARER))
392 					break;
393 				continue;
394 			}
395 			lock_profile_obtain_lock_failed(&lk->lock_object,
396 			    &contested, &waittime);
397 
398 			/*
399 			 * If the lock is already held by curthread in
400 			 * exclusive way avoid a deadlock.
401 			 */
402 			if (LK_HOLDER(x) == tid) {
403 				LOCK_LOG2(lk,
404 				    "%s: %p already held in exclusive mode",
405 				    __func__, lk);
406 				error = EDEADLK;
407 				break;
408 			}
409 
410 			/*
411 			 * If the lock is expected to not sleep just give up
412 			 * and return.
413 			 */
414 			if (LK_TRYOP(flags)) {
415 				LOCK_LOG2(lk, "%s: %p fails the try operation",
416 				    __func__, lk);
417 				error = EBUSY;
418 				break;
419 			}
420 
421 			/*
422 			 * Acquire the sleepqueue chain lock because we
423 			 * probabilly will need to manipulate waiters flags.
424 			 */
425 			sleepq_lock(&lk->lock_object);
426 			x = lk->lk_lock;
427 
428 			/*
429 			 * if the lock can be acquired in shared mode, try
430 			 * again.
431 			 */
432 			if (LK_CAN_SHARE(x)) {
433 				sleepq_release(&lk->lock_object);
434 				continue;
435 			}
436 
437 			/*
438 			 * Try to set the LK_SHARED_WAITERS flag.  If we fail,
439 			 * loop back and retry.
440 			 */
441 			if ((x & LK_SHARED_WAITERS) == 0) {
442 				if (!atomic_cmpset_acq_ptr(&lk->lk_lock, x,
443 				    x | LK_SHARED_WAITERS)) {
444 					sleepq_release(&lk->lock_object);
445 					continue;
446 				}
447 				LOCK_LOG2(lk, "%s: %p set shared waiters flag",
448 				    __func__, lk);
449 			}
450 
451 			/*
452 			 * As far as we have been unable to acquire the
453 			 * shared lock and the shared waiters flag is set,
454 			 * we will sleep.
455 			 */
456 			error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
457 			    SQ_SHARED_QUEUE);
458 			flags &= ~LK_INTERLOCK;
459 			if (error) {
460 				LOCK_LOG3(lk,
461 				    "%s: interrupted sleep for %p with %d",
462 				    __func__, lk, error);
463 				break;
464 			}
465 			LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
466 			    __func__, lk);
467 		}
468 		if (error == 0) {
469 			lock_profile_obtain_lock_success(&lk->lock_object,
470 			    contested, waittime, file, line);
471 			LOCK_LOG_LOCK("SLOCK", &lk->lock_object, 0, 0, file,
472 			    line);
473 			WITNESS_LOCK(&lk->lock_object, LK_TRYWIT(flags), file,
474 			    line);
475 			TD_LOCKS_INC(curthread);
476 			TD_SLOCKS_INC(curthread);
477 			STACK_SAVE(lk);
478 		}
479 		break;
480 	case LK_UPGRADE:
481 		_lockmgr_assert(lk, KA_SLOCKED, file, line);
482 		x = lk->lk_lock & LK_ALL_WAITERS;
483 
484 		/*
485 		 * Try to switch from one shared lock to an exclusive one.
486 		 * We need to preserve waiters flags during the operation.
487 		 */
488 		if (atomic_cmpset_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x,
489 		    tid | x)) {
490 			LOCK_LOG_LOCK("XUPGRADE", &lk->lock_object, 0, 0, file,
491 			    line);
492 			WITNESS_UPGRADE(&lk->lock_object, LOP_EXCLUSIVE |
493 			    LK_TRYWIT(flags), file, line);
494 			TD_SLOCKS_DEC(curthread);
495 			break;
496 		}
497 
498 		/*
499 		 * We have been unable to succeed in upgrading, so just
500 		 * give up the shared lock.
501 		 */
502 		wakeup_swapper |= wakeupshlk(lk, file, line);
503 
504 		/* FALLTHROUGH */
505 	case LK_EXCLUSIVE:
506 		if (LK_CAN_WITNESS(flags))
507 			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
508 			    LOP_EXCLUSIVE, file, line, ilk);
509 
510 		/*
511 		 * If curthread already holds the lock and this one is
512 		 * allowed to recurse, simply recurse on it.
513 		 */
514 		if (lockmgr_xlocked(lk)) {
515 			if ((flags & LK_CANRECURSE) == 0 &&
516 			    (lk->lock_object.lo_flags & LK_CANRECURSE) == 0) {
517 
518 				/*
519 				 * If the lock is expected to not panic just
520 				 * give up and return.
521 				 */
522 				if (LK_TRYOP(flags)) {
523 					LOCK_LOG2(lk,
524 					    "%s: %p fails the try operation",
525 					    __func__, lk);
526 					error = EBUSY;
527 					break;
528 				}
529 				if (flags & LK_INTERLOCK)
530 					class->lc_unlock(ilk);
531 		panic("%s: recursing on non recursive lockmgr %s @ %s:%d\n",
532 				    __func__, iwmesg, file, line);
533 			}
534 			lk->lk_recurse++;
535 			LOCK_LOG2(lk, "%s: %p recursing", __func__, lk);
536 			LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0,
537 			    lk->lk_recurse, file, line);
538 			WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
539 			    LK_TRYWIT(flags), file, line);
540 			TD_LOCKS_INC(curthread);
541 			break;
542 		}
543 
544 		while (!atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED,
545 		    tid)) {
546 			lock_profile_obtain_lock_failed(&lk->lock_object,
547 			    &contested, &waittime);
548 
549 			/*
550 			 * If the lock is expected to not sleep just give up
551 			 * and return.
552 			 */
553 			if (LK_TRYOP(flags)) {
554 				LOCK_LOG2(lk, "%s: %p fails the try operation",
555 				    __func__, lk);
556 				error = EBUSY;
557 				break;
558 			}
559 
560 			/*
561 			 * Acquire the sleepqueue chain lock because we
562 			 * probabilly will need to manipulate waiters flags.
563 			 */
564 			sleepq_lock(&lk->lock_object);
565 			x = lk->lk_lock;
566 			v = x & LK_ALL_WAITERS;
567 
568 			/*
569 			 * if the lock has been released while we spun on
570 			 * the sleepqueue chain lock just try again.
571 			 */
572 			if (x == LK_UNLOCKED) {
573 				sleepq_release(&lk->lock_object);
574 				continue;
575 			}
576 
577 			/*
578 			 * The lock can be in the state where there is a
579 			 * pending queue of waiters, but still no owner.
580 			 * This happens when the lock is contested and an
581 			 * owner is going to claim the lock.
582 			 * If curthread is the one successfully acquiring it
583 			 * claim lock ownership and return, preserving waiters
584 			 * flags.
585 			 */
586 			if (x == (LK_UNLOCKED | v)) {
587 				if (atomic_cmpset_acq_ptr(&lk->lk_lock, x,
588 				    tid | v)) {
589 					sleepq_release(&lk->lock_object);
590 					LOCK_LOG2(lk,
591 					    "%s: %p claimed by a new writer",
592 					    __func__, lk);
593 					break;
594 				}
595 				sleepq_release(&lk->lock_object);
596 				continue;
597 			}
598 
599 			/*
600 			 * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
601 			 * fail, loop back and retry.
602 			 */
603 			if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
604 				if (!atomic_cmpset_ptr(&lk->lk_lock, x,
605 				    x | LK_EXCLUSIVE_WAITERS)) {
606 					sleepq_release(&lk->lock_object);
607 					continue;
608 				}
609 				LOCK_LOG2(lk, "%s: %p set excl waiters flag",
610 				    __func__, lk);
611 			}
612 
613 			/*
614 			 * As far as we have been unable to acquire the
615 			 * exclusive lock and the exclusive waiters flag
616 			 * is set, we will sleep.
617 			 */
618 			error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
619 			    SQ_EXCLUSIVE_QUEUE);
620 			flags &= ~LK_INTERLOCK;
621 			if (error) {
622 				LOCK_LOG3(lk,
623 				    "%s: interrupted sleep for %p with %d",
624 				    __func__, lk, error);
625 				break;
626 			}
627 			LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
628 			    __func__, lk);
629 		}
630 		if (error == 0) {
631 			lock_profile_obtain_lock_success(&lk->lock_object,
632 			    contested, waittime, file, line);
633 			LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0,
634 			    lk->lk_recurse, file, line);
635 			WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
636 			    LK_TRYWIT(flags), file, line);
637 			TD_LOCKS_INC(curthread);
638 			STACK_SAVE(lk);
639 		}
640 		break;
641 	case LK_DOWNGRADE:
642 		_lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED, file, line);
643 		LOCK_LOG_LOCK("XDOWNGRADE", &lk->lock_object, 0, 0, file, line);
644 		WITNESS_DOWNGRADE(&lk->lock_object, 0, file, line);
645 		TD_SLOCKS_INC(curthread);
646 
647 		/*
648 		 * In order to preserve waiters flags, just spin.
649 		 */
650 		for (;;) {
651 			x = lk->lk_lock & LK_ALL_WAITERS;
652 			if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
653 			    LK_SHARERS_LOCK(1) | x))
654 				break;
655 			cpu_spinwait();
656 		}
657 		break;
658 	case LK_RELEASE:
659 		_lockmgr_assert(lk, KA_LOCKED, file, line);
660 		x = lk->lk_lock;
661 
662 		if ((x & LK_SHARE) == 0) {
663 
664 			/*
665 			 * As first option, treact the lock as if it has not
666 			 * any waiter.
667 			 * Fix-up the tid var if the lock has been disowned.
668 			 */
669 			if (LK_HOLDER(x) == LK_KERNPROC)
670 				tid = LK_KERNPROC;
671 			else {
672 				WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE,
673 				    file, line);
674 				TD_LOCKS_DEC(curthread);
675 			}
676 			LOCK_LOG_LOCK("XUNLOCK", &lk->lock_object, 0,
677 			    lk->lk_recurse, file, line);
678 
679 			/*
680 			 * The lock is held in exclusive mode.
681 			 * If the lock is recursed also, then unrecurse it.
682 			 */
683 			if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) {
684 				LOCK_LOG2(lk, "%s: %p unrecursing", __func__,
685 				    lk);
686 				lk->lk_recurse--;
687 				break;
688 			}
689 			lock_profile_release_lock(&lk->lock_object);
690 
691 			if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid,
692 			    LK_UNLOCKED))
693 				break;
694 
695 			sleepq_lock(&lk->lock_object);
696 			x = lk->lk_lock & LK_ALL_WAITERS;
697 			v = LK_UNLOCKED;
698 
699 			/*
700 		 	 * If the lock has exclusive waiters, give them
701 			 * preference in order to avoid deadlock with
702 			 * shared runners up.
703 			 */
704 			if (x & LK_EXCLUSIVE_WAITERS) {
705 				queue = SQ_EXCLUSIVE_QUEUE;
706 				v |= (x & LK_SHARED_WAITERS);
707 			} else {
708 				MPASS(x == LK_SHARED_WAITERS);
709 				queue = SQ_SHARED_QUEUE;
710 			}
711 
712 			LOCK_LOG3(lk,
713 			    "%s: %p waking up threads on the %s queue",
714 			    __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
715 			    "exclusive");
716 			atomic_store_rel_ptr(&lk->lk_lock, v);
717 			wakeup_swapper = sleepq_broadcast(&lk->lock_object,
718 			    SLEEPQ_LK, 0, queue);
719 			sleepq_release(&lk->lock_object);
720 			break;
721 		} else
722 			wakeup_swapper = wakeupshlk(lk, file, line);
723 		break;
724 	case LK_DRAIN:
725 		if (LK_CAN_WITNESS(flags))
726 			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
727 			    LOP_EXCLUSIVE, file, line, ilk);
728 
729 		/*
730 		 * Trying to drain a lock we already own will result in a
731 		 * deadlock.
732 		 */
733 		if (lockmgr_xlocked(lk)) {
734 			if (flags & LK_INTERLOCK)
735 				class->lc_unlock(ilk);
736 			panic("%s: draining %s with the lock held @ %s:%d\n",
737 			    __func__, iwmesg, file, line);
738 		}
739 
740 		while (!atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) {
741 			lock_profile_obtain_lock_failed(&lk->lock_object,
742 			    &contested, &waittime);
743 
744 			/*
745 			 * If the lock is expected to not sleep just give up
746 			 * and return.
747 			 */
748 			if (LK_TRYOP(flags)) {
749 				LOCK_LOG2(lk, "%s: %p fails the try operation",
750 				    __func__, lk);
751 				error = EBUSY;
752 				break;
753 			}
754 
755 			/*
756 			 * Acquire the sleepqueue chain lock because we
757 			 * probabilly will need to manipulate waiters flags.
758 			 */
759 			sleepq_lock(&lk->lock_object);
760 			x = lk->lk_lock;
761 			v = x & LK_ALL_WAITERS;
762 
763 			/*
764 			 * if the lock has been released while we spun on
765 			 * the sleepqueue chain lock just try again.
766 			 */
767 			if (x == LK_UNLOCKED) {
768 				sleepq_release(&lk->lock_object);
769 				continue;
770 			}
771 
772 			if (x == (LK_UNLOCKED | v)) {
773 				v = x;
774 				if (v & LK_EXCLUSIVE_WAITERS) {
775 					queue = SQ_EXCLUSIVE_QUEUE;
776 					v &= ~LK_EXCLUSIVE_WAITERS;
777 				} else {
778 					MPASS(v & LK_SHARED_WAITERS);
779 					queue = SQ_SHARED_QUEUE;
780 					v &= ~LK_SHARED_WAITERS;
781 				}
782 				if (!atomic_cmpset_ptr(&lk->lk_lock, x, v)) {
783 					sleepq_release(&lk->lock_object);
784 					continue;
785 				}
786 				LOCK_LOG3(lk,
787 				"%s: %p waking up all threads on the %s queue",
788 				    __func__, lk, queue == SQ_SHARED_QUEUE ?
789 				    "shared" : "exclusive");
790 				wakeup_swapper |= sleepq_broadcast(
791 				    &lk->lock_object, SLEEPQ_LK, 0, queue);
792 
793 				/*
794 				 * If shared waiters have been woken up we need
795 				 * to wait for one of them to acquire the lock
796 				 * before to set the exclusive waiters in
797 				 * order to avoid a deadlock.
798 				 */
799 				if (queue == SQ_SHARED_QUEUE) {
800 					for (v = lk->lk_lock;
801 					    (v & LK_SHARE) && !LK_SHARERS(v);
802 					    v = lk->lk_lock)
803 						cpu_spinwait();
804 				}
805 			}
806 
807 			/*
808 			 * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
809 			 * fail, loop back and retry.
810 			 */
811 			if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
812 				if (!atomic_cmpset_ptr(&lk->lk_lock, x,
813 				    x | LK_EXCLUSIVE_WAITERS)) {
814 					sleepq_release(&lk->lock_object);
815 					continue;
816 				}
817 				LOCK_LOG2(lk, "%s: %p set drain waiters flag",
818 				    __func__, lk);
819 			}
820 
821 			/*
822 			 * As far as we have been unable to acquire the
823 			 * exclusive lock and the exclusive waiters flag
824 			 * is set, we will sleep.
825 			 */
826 			if (flags & LK_INTERLOCK) {
827 				class->lc_unlock(ilk);
828 				flags &= ~LK_INTERLOCK;
829 			}
830 			GIANT_SAVE();
831 			sleepq_add(&lk->lock_object, NULL, iwmesg, SLEEPQ_LK,
832 			    SQ_EXCLUSIVE_QUEUE);
833 			sleepq_wait(&lk->lock_object, ipri & PRIMASK);
834 			GIANT_RESTORE();
835 			LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
836 			    __func__, lk);
837 		}
838 
839 		if (error == 0) {
840 			lock_profile_obtain_lock_success(&lk->lock_object,
841 			    contested, waittime, file, line);
842 			LOCK_LOG_LOCK("DRAIN", &lk->lock_object, 0,
843 			    lk->lk_recurse, file, line);
844 			WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
845 			    LK_TRYWIT(flags), file, line);
846 			TD_LOCKS_INC(curthread);
847 			STACK_SAVE(lk);
848 		}
849 		break;
850 	default:
851 		if (flags & LK_INTERLOCK)
852 			class->lc_unlock(ilk);
853 		panic("%s: unknown lockmgr request 0x%x\n", __func__, op);
854 	}
855 
856 	if (flags & LK_INTERLOCK)
857 		class->lc_unlock(ilk);
858 	if (wakeup_swapper)
859 		kick_proc0();
860 
861 	return (error);
862 }
863 
864 void
865 _lockmgr_disown(struct lock *lk, const char *file, int line)
866 {
867 	uintptr_t tid, x;
868 
869 	tid = (uintptr_t)curthread;
870 	_lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED, file, line);
871 
872 	/*
873 	 * If the owner is already LK_KERNPROC just skip the whole operation.
874 	 */
875 	if (LK_HOLDER(lk->lk_lock) != tid)
876 		return;
877 	LOCK_LOG_LOCK("XDISOWN", &lk->lock_object, 0, 0, file, line);
878 	WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
879 	TD_LOCKS_DEC(curthread);
880 
881 	/*
882 	 * In order to preserve waiters flags, just spin.
883 	 */
884 	for (;;) {
885 		x = lk->lk_lock & LK_ALL_WAITERS;
886 		if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
887 		    LK_KERNPROC | x))
888 			return;
889 		cpu_spinwait();
890 	}
891 }
892 
893 void
894 lockmgr_printinfo(struct lock *lk)
895 {
896 	struct thread *td;
897 	uintptr_t x;
898 
899 	if (lk->lk_lock == LK_UNLOCKED)
900 		printf("lock type %s: UNLOCKED\n", lk->lock_object.lo_name);
901 	else if (lk->lk_lock & LK_SHARE)
902 		printf("lock type %s: SHARED (count %ju)\n",
903 		    lk->lock_object.lo_name,
904 		    (uintmax_t)LK_SHARERS(lk->lk_lock));
905 	else {
906 		td = lockmgr_xholder(lk);
907 		printf("lock type %s: EXCL by thread %p (pid %d)\n",
908 		    lk->lock_object.lo_name, td, td->td_proc->p_pid);
909 	}
910 
911 	x = lk->lk_lock;
912 	if (x & LK_EXCLUSIVE_WAITERS)
913 		printf(" with exclusive waiters pending\n");
914 	if (x & LK_SHARED_WAITERS)
915 		printf(" with shared waiters pending\n");
916 
917 	STACK_PRINT(lk);
918 }
919 
920 int
921 lockstatus(struct lock *lk)
922 {
923 	uintptr_t v, x;
924 	int ret;
925 
926 	ret = LK_SHARED;
927 	x = lk->lk_lock;
928 	v = LK_HOLDER(x);
929 
930 	if ((x & LK_SHARE) == 0) {
931 		if (v == (uintptr_t)curthread || v == LK_KERNPROC)
932 			ret = LK_EXCLUSIVE;
933 		else
934 			ret = LK_EXCLOTHER;
935 	} else if (x == LK_UNLOCKED)
936 		ret = 0;
937 
938 	return (ret);
939 }
940 
941 #ifdef INVARIANT_SUPPORT
942 #ifndef INVARIANTS
943 #undef	_lockmgr_assert
944 #endif
945 
946 void
947 _lockmgr_assert(struct lock *lk, int what, const char *file, int line)
948 {
949 	int slocked = 0;
950 
951 	if (panicstr != NULL)
952 		return;
953 	switch (what) {
954 	case KA_SLOCKED:
955 	case KA_SLOCKED | KA_NOTRECURSED:
956 	case KA_SLOCKED | KA_RECURSED:
957 		slocked = 1;
958 	case KA_LOCKED:
959 	case KA_LOCKED | KA_NOTRECURSED:
960 	case KA_LOCKED | KA_RECURSED:
961 #ifdef WITNESS
962 
963 		/*
964 		 * We cannot trust WITNESS if the lock is held in exclusive
965 		 * mode and a call to lockmgr_disown() happened.
966 		 * Workaround this skipping the check if the lock is held in
967 		 * exclusive mode even for the KA_LOCKED case.
968 		 */
969 		if (slocked || (lk->lk_lock & LK_SHARE)) {
970 			witness_assert(&lk->lock_object, what, file, line);
971 			break;
972 		}
973 #endif
974 		if (lk->lk_lock == LK_UNLOCKED ||
975 		    ((lk->lk_lock & LK_SHARE) == 0 && (slocked ||
976 		    (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk)))))
977 			panic("Lock %s not %slocked @ %s:%d\n",
978 			    lk->lock_object.lo_name, slocked ? "share" : "",
979 			    file, line);
980 
981 		if ((lk->lk_lock & LK_SHARE) == 0) {
982 			if (lockmgr_recursed(lk)) {
983 				if (what & KA_NOTRECURSED)
984 					panic("Lock %s recursed @ %s:%d\n",
985 					    lk->lock_object.lo_name, file,
986 					    line);
987 			} else if (what & KA_RECURSED)
988 				panic("Lock %s not recursed @ %s:%d\n",
989 				    lk->lock_object.lo_name, file, line);
990 		}
991 		break;
992 	case KA_XLOCKED:
993 	case KA_XLOCKED | KA_NOTRECURSED:
994 	case KA_XLOCKED | KA_RECURSED:
995 		if (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk))
996 			panic("Lock %s not exclusively locked @ %s:%d\n",
997 			    lk->lock_object.lo_name, file, line);
998 		if (lockmgr_recursed(lk)) {
999 			if (what & KA_NOTRECURSED)
1000 				panic("Lock %s recursed @ %s:%d\n",
1001 				    lk->lock_object.lo_name, file, line);
1002 		} else if (what & KA_RECURSED)
1003 			panic("Lock %s not recursed @ %s:%d\n",
1004 			    lk->lock_object.lo_name, file, line);
1005 		break;
1006 	case KA_UNLOCKED:
1007 		if (lockmgr_xlocked(lk) || lockmgr_disowned(lk))
1008 			panic("Lock %s exclusively locked @ %s:%d\n",
1009 			    lk->lock_object.lo_name, file, line);
1010 		break;
1011 	default:
1012 		panic("Unknown lockmgr assertion: %d @ %s:%d\n", what, file,
1013 		    line);
1014 	}
1015 }
1016 #endif
1017 
1018 #ifdef DDB
1019 int
1020 lockmgr_chain(struct thread *td, struct thread **ownerp)
1021 {
1022 	struct lock *lk;
1023 
1024 	lk = td->td_wchan;
1025 
1026 	if (LOCK_CLASS(&lk->lock_object) != &lock_class_lockmgr)
1027 		return (0);
1028 	db_printf("blocked on lockmgr %s", lk->lock_object.lo_name);
1029 	if (lk->lk_lock & LK_SHARE)
1030 		db_printf("SHARED (count %ju)\n",
1031 		    (uintmax_t)LK_SHARERS(lk->lk_lock));
1032 	else
1033 		db_printf("EXCL\n");
1034 	*ownerp = lockmgr_xholder(lk);
1035 
1036 	return (1);
1037 }
1038 
1039 static void
1040 db_show_lockmgr(struct lock_object *lock)
1041 {
1042 	struct thread *td;
1043 	struct lock *lk;
1044 
1045 	lk = (struct lock *)lock;
1046 
1047 	db_printf(" state: ");
1048 	if (lk->lk_lock == LK_UNLOCKED)
1049 		db_printf("UNLOCKED\n");
1050 	else if (lk->lk_lock & LK_SHARE)
1051 		db_printf("SLOCK: %ju\n", (uintmax_t)LK_SHARERS(lk->lk_lock));
1052 	else {
1053 		td = lockmgr_xholder(lk);
1054 		if (td == (struct thread *)LK_KERNPROC)
1055 			db_printf("XLOCK: LK_KERNPROC\n");
1056 		else
1057 			db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1058 			    td->td_tid, td->td_proc->p_pid,
1059 			    td->td_proc->p_comm);
1060 		if (lockmgr_recursed(lk))
1061 			db_printf(" recursed: %d\n", lk->lk_recurse);
1062 	}
1063 	db_printf(" waiters: ");
1064 	switch (lk->lk_lock & LK_ALL_WAITERS) {
1065 	case LK_SHARED_WAITERS:
1066 		db_printf("shared\n");
1067 	case LK_EXCLUSIVE_WAITERS:
1068 		db_printf("exclusive\n");
1069 		break;
1070 	case LK_ALL_WAITERS:
1071 		db_printf("shared and exclusive\n");
1072 		break;
1073 	default:
1074 		db_printf("none\n");
1075 	}
1076 }
1077 #endif
1078