xref: /freebsd/sys/kern/kern_lock.c (revision c0020399a650364d0134f79f3fa319f84064372d)
1 /*-
2  * Copyright (c) 2008 Attilio Rao <attilio@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice(s), this list of conditions and the following disclaimer as
10  *    the first lines of this file unmodified other than the possible
11  *    addition of one or more copyright notices.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice(s), this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
26  * DAMAGE.
27  */
28 
29 #include "opt_ddb.h"
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include <sys/param.h>
35 #include <sys/ktr.h>
36 #include <sys/lock.h>
37 #include <sys/lock_profile.h>
38 #include <sys/lockmgr.h>
39 #include <sys/mutex.h>
40 #include <sys/proc.h>
41 #include <sys/sleepqueue.h>
42 #ifdef DEBUG_LOCKS
43 #include <sys/stack.h>
44 #endif
45 #include <sys/systm.h>
46 
47 #include <machine/cpu.h>
48 
49 #ifdef DDB
50 #include <ddb/ddb.h>
51 #endif
52 
53 CTASSERT(((LK_CANRECURSE | LK_NOSHARE) & LO_CLASSFLAGS) ==
54     (LK_CANRECURSE | LK_NOSHARE));
55 
56 #define	SQ_EXCLUSIVE_QUEUE	0
57 #define	SQ_SHARED_QUEUE		1
58 
59 #ifndef INVARIANTS
60 #define	_lockmgr_assert(lk, what, file, line)
61 #define	TD_LOCKS_INC(td)
62 #define	TD_LOCKS_DEC(td)
63 #else
64 #define	TD_LOCKS_INC(td)	((td)->td_locks++)
65 #define	TD_LOCKS_DEC(td)	((td)->td_locks--)
66 #endif
67 #define	TD_SLOCKS_INC(td)	((td)->td_lk_slocks++)
68 #define	TD_SLOCKS_DEC(td)	((td)->td_lk_slocks--)
69 
70 #ifndef DEBUG_LOCKS
71 #define	STACK_PRINT(lk)
72 #define	STACK_SAVE(lk)
73 #define	STACK_ZERO(lk)
74 #else
75 #define	STACK_PRINT(lk)	stack_print_ddb(&(lk)->lk_stack)
76 #define	STACK_SAVE(lk)	stack_save(&(lk)->lk_stack)
77 #define	STACK_ZERO(lk)	stack_zero(&(lk)->lk_stack)
78 #endif
79 
80 #define	LOCK_LOG2(lk, string, arg1, arg2)				\
81 	if (LOCK_LOG_TEST(&(lk)->lock_object, 0))			\
82 		CTR2(KTR_LOCK, (string), (arg1), (arg2))
83 #define	LOCK_LOG3(lk, string, arg1, arg2, arg3)				\
84 	if (LOCK_LOG_TEST(&(lk)->lock_object, 0))			\
85 		CTR3(KTR_LOCK, (string), (arg1), (arg2), (arg3))
86 
87 #define	GIANT_DECLARE							\
88 	int _i = 0;							\
89 	WITNESS_SAVE_DECL(Giant)
90 #define	GIANT_RESTORE() do {						\
91 	if (_i > 0) {							\
92 		while (_i--)						\
93 			mtx_lock(&Giant);				\
94 		WITNESS_RESTORE(&Giant.lock_object, Giant);		\
95 	}								\
96 } while (0)
97 #define	GIANT_SAVE() do {						\
98 	if (mtx_owned(&Giant)) {					\
99 		WITNESS_SAVE(&Giant.lock_object, Giant);		\
100 		while (mtx_owned(&Giant)) {				\
101 			_i++;						\
102 			mtx_unlock(&Giant);				\
103 		}							\
104 	}								\
105 } while (0)
106 
107 #define	LK_CAN_SHARE(x)							\
108 	(((x) & LK_SHARE) && (((x) & LK_EXCLUSIVE_WAITERS) == 0 ||	\
109 	curthread->td_lk_slocks || (curthread->td_pflags & TDP_DEADLKTREAT)))
110 #define	LK_TRYOP(x)							\
111 	((x) & LK_NOWAIT)
112 
113 #define	LK_CAN_WITNESS(x)						\
114 	(((x) & LK_NOWITNESS) == 0 && !LK_TRYOP(x))
115 #define	LK_TRYWIT(x)							\
116 	(LK_TRYOP(x) ? LOP_TRYLOCK : 0)
117 
118 #define	lockmgr_disowned(lk)						\
119 	(((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == LK_KERNPROC)
120 
121 #define	lockmgr_xlocked(lk)						\
122 	(((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == (uintptr_t)curthread)
123 
124 static void	 assert_lockmgr(struct lock_object *lock, int how);
125 #ifdef DDB
126 static void	 db_show_lockmgr(struct lock_object *lock);
127 #endif
128 static void	 lock_lockmgr(struct lock_object *lock, int how);
129 static int	 unlock_lockmgr(struct lock_object *lock);
130 
131 struct lock_class lock_class_lockmgr = {
132 	.lc_name = "lockmgr",
133 	.lc_flags = LC_RECURSABLE | LC_SLEEPABLE | LC_SLEEPLOCK | LC_UPGRADABLE,
134 	.lc_assert = assert_lockmgr,
135 #ifdef DDB
136 	.lc_ddb_show = db_show_lockmgr,
137 #endif
138 	.lc_lock = lock_lockmgr,
139 	.lc_unlock = unlock_lockmgr
140 };
141 
142 static __inline struct thread *
143 lockmgr_xholder(struct lock *lk)
144 {
145 	uintptr_t x;
146 
147 	x = lk->lk_lock;
148 	return ((x & LK_SHARE) ? NULL : (struct thread *)LK_HOLDER(x));
149 }
150 
151 /*
152  * It assumes sleepq_lock held and returns with this one unheld.
153  * It also assumes the generic interlock is sane and previously checked.
154  * If LK_INTERLOCK is specified the interlock is not reacquired after the
155  * sleep.
156  */
157 static __inline int
158 sleeplk(struct lock *lk, u_int flags, struct lock_object *ilk,
159     const char *wmesg, int pri, int timo, int queue)
160 {
161 	GIANT_DECLARE;
162 	struct lock_class *class;
163 	int catch, error;
164 
165 	class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
166 	catch = pri & PCATCH;
167 	pri &= PRIMASK;
168 	error = 0;
169 
170 	LOCK_LOG3(lk, "%s: %p blocking on the %s sleepqueue", __func__, lk,
171 	    (queue == SQ_EXCLUSIVE_QUEUE) ? "exclusive" : "shared");
172 
173 	if (flags & LK_INTERLOCK)
174 		class->lc_unlock(ilk);
175 	GIANT_SAVE();
176 	sleepq_add(&lk->lock_object, NULL, wmesg, SLEEPQ_LK | (catch ?
177 	    SLEEPQ_INTERRUPTIBLE : 0), queue);
178 	if ((flags & LK_TIMELOCK) && timo)
179 		sleepq_set_timeout(&lk->lock_object, timo);
180 
181 	/*
182 	 * Decisional switch for real sleeping.
183 	 */
184 	if ((flags & LK_TIMELOCK) && timo && catch)
185 		error = sleepq_timedwait_sig(&lk->lock_object, pri);
186 	else if ((flags & LK_TIMELOCK) && timo)
187 		error = sleepq_timedwait(&lk->lock_object, pri);
188 	else if (catch)
189 		error = sleepq_wait_sig(&lk->lock_object, pri);
190 	else
191 		sleepq_wait(&lk->lock_object, pri);
192 	GIANT_RESTORE();
193 	if ((flags & LK_SLEEPFAIL) && error == 0)
194 		error = ENOLCK;
195 
196 	return (error);
197 }
198 
199 static __inline int
200 wakeupshlk(struct lock *lk, const char *file, int line)
201 {
202 	uintptr_t v, x;
203 	int queue, wakeup_swapper;
204 
205 	TD_LOCKS_DEC(curthread);
206 	TD_SLOCKS_DEC(curthread);
207 	WITNESS_UNLOCK(&lk->lock_object, 0, file, line);
208 	LOCK_LOG_LOCK("SUNLOCK", &lk->lock_object, 0, 0, file, line);
209 
210 	wakeup_swapper = 0;
211 	for (;;) {
212 		x = lk->lk_lock;
213 
214 		/*
215 		 * If there is more than one shared lock held, just drop one
216 		 * and return.
217 		 */
218 		if (LK_SHARERS(x) > 1) {
219 			if (atomic_cmpset_ptr(&lk->lk_lock, x,
220 			    x - LK_ONE_SHARER))
221 				break;
222 			continue;
223 		}
224 
225 		/*
226 		 * If there are not waiters on the exclusive queue, drop the
227 		 * lock quickly.
228 		 */
229 		if ((x & LK_ALL_WAITERS) == 0) {
230 			MPASS(x == LK_SHARERS_LOCK(1));
231 			if (atomic_cmpset_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1),
232 			    LK_UNLOCKED))
233 				break;
234 			continue;
235 		}
236 
237 		/*
238 		 * We should have a sharer with waiters, so enter the hard
239 		 * path in order to handle wakeups correctly.
240 		 */
241 		sleepq_lock(&lk->lock_object);
242 		x = lk->lk_lock & LK_ALL_WAITERS;
243 		v = LK_UNLOCKED;
244 
245 		/*
246 		 * If the lock has exclusive waiters, give them preference in
247 		 * order to avoid deadlock with shared runners up.
248 		 */
249 		if (x & LK_EXCLUSIVE_WAITERS) {
250 			queue = SQ_EXCLUSIVE_QUEUE;
251 			v |= (x & LK_SHARED_WAITERS);
252 		} else {
253 			MPASS(x == LK_SHARED_WAITERS);
254 			queue = SQ_SHARED_QUEUE;
255 		}
256 
257 		if (!atomic_cmpset_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x,
258 		    v)) {
259 			sleepq_release(&lk->lock_object);
260 			continue;
261 		}
262 		LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
263 		    __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
264 		    "exclusive");
265 		wakeup_swapper = sleepq_broadcast(&lk->lock_object, SLEEPQ_LK,
266 		    0, queue);
267 		sleepq_release(&lk->lock_object);
268 		break;
269 	}
270 
271 	lock_profile_release_lock(&lk->lock_object);
272 	return (wakeup_swapper);
273 }
274 
275 static void
276 assert_lockmgr(struct lock_object *lock, int what)
277 {
278 
279 	panic("lockmgr locks do not support assertions");
280 }
281 
282 static void
283 lock_lockmgr(struct lock_object *lock, int how)
284 {
285 
286 	panic("lockmgr locks do not support sleep interlocking");
287 }
288 
289 static int
290 unlock_lockmgr(struct lock_object *lock)
291 {
292 
293 	panic("lockmgr locks do not support sleep interlocking");
294 }
295 
296 void
297 lockinit(struct lock *lk, int pri, const char *wmesg, int timo, int flags)
298 {
299 	int iflags;
300 
301 	MPASS((flags & ~LK_INIT_MASK) == 0);
302 
303 	iflags = LO_RECURSABLE | LO_SLEEPABLE | LO_UPGRADABLE;
304 	if ((flags & LK_NODUP) == 0)
305 		iflags |= LO_DUPOK;
306 	if (flags & LK_NOPROFILE)
307 		iflags |= LO_NOPROFILE;
308 	if ((flags & LK_NOWITNESS) == 0)
309 		iflags |= LO_WITNESS;
310 	if (flags & LK_QUIET)
311 		iflags |= LO_QUIET;
312 	iflags |= flags & (LK_CANRECURSE | LK_NOSHARE);
313 
314 	lk->lk_lock = LK_UNLOCKED;
315 	lk->lk_recurse = 0;
316 	lk->lk_timo = timo;
317 	lk->lk_pri = pri;
318 	lock_init(&lk->lock_object, &lock_class_lockmgr, wmesg, NULL, iflags);
319 	STACK_ZERO(lk);
320 }
321 
322 void
323 lockdestroy(struct lock *lk)
324 {
325 
326 	KASSERT(lk->lk_lock == LK_UNLOCKED, ("lockmgr still held"));
327 	KASSERT(lk->lk_recurse == 0, ("lockmgr still recursed"));
328 	lock_destroy(&lk->lock_object);
329 }
330 
331 int
332 __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk,
333     const char *wmesg, int pri, int timo, const char *file, int line)
334 {
335 	GIANT_DECLARE;
336 	struct lock_class *class;
337 	const char *iwmesg;
338 	uintptr_t tid, v, x;
339 	u_int op;
340 	int error, ipri, itimo, queue, wakeup_swapper;
341 #ifdef LOCK_PROFILING
342 	uint64_t waittime = 0;
343 	int contested = 0;
344 #endif
345 
346 	error = 0;
347 	tid = (uintptr_t)curthread;
348 	op = (flags & LK_TYPE_MASK);
349 	iwmesg = (wmesg == LK_WMESG_DEFAULT) ? lk->lock_object.lo_name : wmesg;
350 	ipri = (pri == LK_PRIO_DEFAULT) ? lk->lk_pri : pri;
351 	itimo = (timo == LK_TIMO_DEFAULT) ? lk->lk_timo : timo;
352 
353 	MPASS((flags & ~LK_TOTAL_MASK) == 0);
354 	KASSERT((op & (op - 1)) == 0,
355 	    ("%s: Invalid requested operation @ %s:%d", __func__, file, line));
356 	KASSERT((flags & (LK_NOWAIT | LK_SLEEPFAIL)) == 0 ||
357 	    (op != LK_DOWNGRADE && op != LK_RELEASE),
358 	    ("%s: Invalid flags in regard of the operation desired @ %s:%d",
359 	    __func__, file, line));
360 	KASSERT((flags & LK_INTERLOCK) == 0 || ilk != NULL,
361 	    ("%s: LK_INTERLOCK passed without valid interlock @ %s:%d",
362 	    __func__, file, line));
363 
364 	class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
365 	if (panicstr != NULL) {
366 		if (flags & LK_INTERLOCK)
367 			class->lc_unlock(ilk);
368 		return (0);
369 	}
370 
371 	if (op == LK_SHARED && (lk->lock_object.lo_flags & LK_NOSHARE))
372 		op = LK_EXCLUSIVE;
373 
374 	wakeup_swapper = 0;
375 	switch (op) {
376 	case LK_SHARED:
377 		if (LK_CAN_WITNESS(flags))
378 			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
379 			    file, line, ilk);
380 		for (;;) {
381 			x = lk->lk_lock;
382 
383 			/*
384 			 * If no other thread has an exclusive lock, or
385 			 * no exclusive waiter is present, bump the count of
386 			 * sharers.  Since we have to preserve the state of
387 			 * waiters, if we fail to acquire the shared lock
388 			 * loop back and retry.
389 			 */
390 			if (LK_CAN_SHARE(x)) {
391 				if (atomic_cmpset_acq_ptr(&lk->lk_lock, x,
392 				    x + LK_ONE_SHARER))
393 					break;
394 				continue;
395 			}
396 			lock_profile_obtain_lock_failed(&lk->lock_object,
397 			    &contested, &waittime);
398 
399 			/*
400 			 * If the lock is already held by curthread in
401 			 * exclusive way avoid a deadlock.
402 			 */
403 			if (LK_HOLDER(x) == tid) {
404 				LOCK_LOG2(lk,
405 				    "%s: %p already held in exclusive mode",
406 				    __func__, lk);
407 				error = EDEADLK;
408 				break;
409 			}
410 
411 			/*
412 			 * If the lock is expected to not sleep just give up
413 			 * and return.
414 			 */
415 			if (LK_TRYOP(flags)) {
416 				LOCK_LOG2(lk, "%s: %p fails the try operation",
417 				    __func__, lk);
418 				error = EBUSY;
419 				break;
420 			}
421 
422 			/*
423 			 * Acquire the sleepqueue chain lock because we
424 			 * probabilly will need to manipulate waiters flags.
425 			 */
426 			sleepq_lock(&lk->lock_object);
427 			x = lk->lk_lock;
428 
429 			/*
430 			 * if the lock can be acquired in shared mode, try
431 			 * again.
432 			 */
433 			if (LK_CAN_SHARE(x)) {
434 				sleepq_release(&lk->lock_object);
435 				continue;
436 			}
437 
438 			/*
439 			 * Try to set the LK_SHARED_WAITERS flag.  If we fail,
440 			 * loop back and retry.
441 			 */
442 			if ((x & LK_SHARED_WAITERS) == 0) {
443 				if (!atomic_cmpset_acq_ptr(&lk->lk_lock, x,
444 				    x | LK_SHARED_WAITERS)) {
445 					sleepq_release(&lk->lock_object);
446 					continue;
447 				}
448 				LOCK_LOG2(lk, "%s: %p set shared waiters flag",
449 				    __func__, lk);
450 			}
451 
452 			/*
453 			 * As far as we have been unable to acquire the
454 			 * shared lock and the shared waiters flag is set,
455 			 * we will sleep.
456 			 */
457 			error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
458 			    SQ_SHARED_QUEUE);
459 			flags &= ~LK_INTERLOCK;
460 			if (error) {
461 				LOCK_LOG3(lk,
462 				    "%s: interrupted sleep for %p with %d",
463 				    __func__, lk, error);
464 				break;
465 			}
466 			LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
467 			    __func__, lk);
468 		}
469 		if (error == 0) {
470 			lock_profile_obtain_lock_success(&lk->lock_object,
471 			    contested, waittime, file, line);
472 			LOCK_LOG_LOCK("SLOCK", &lk->lock_object, 0, 0, file,
473 			    line);
474 			WITNESS_LOCK(&lk->lock_object, LK_TRYWIT(flags), file,
475 			    line);
476 			TD_LOCKS_INC(curthread);
477 			TD_SLOCKS_INC(curthread);
478 			STACK_SAVE(lk);
479 		}
480 		break;
481 	case LK_UPGRADE:
482 		_lockmgr_assert(lk, KA_SLOCKED, file, line);
483 		x = lk->lk_lock & LK_ALL_WAITERS;
484 
485 		/*
486 		 * Try to switch from one shared lock to an exclusive one.
487 		 * We need to preserve waiters flags during the operation.
488 		 */
489 		if (atomic_cmpset_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x,
490 		    tid | x)) {
491 			LOCK_LOG_LOCK("XUPGRADE", &lk->lock_object, 0, 0, file,
492 			    line);
493 			WITNESS_UPGRADE(&lk->lock_object, LOP_EXCLUSIVE |
494 			    LK_TRYWIT(flags), file, line);
495 			TD_SLOCKS_DEC(curthread);
496 			break;
497 		}
498 
499 		/*
500 		 * We have been unable to succeed in upgrading, so just
501 		 * give up the shared lock.
502 		 */
503 		wakeup_swapper |= wakeupshlk(lk, file, line);
504 
505 		/* FALLTHROUGH */
506 	case LK_EXCLUSIVE:
507 		if (LK_CAN_WITNESS(flags))
508 			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
509 			    LOP_EXCLUSIVE, file, line, ilk);
510 
511 		/*
512 		 * If curthread already holds the lock and this one is
513 		 * allowed to recurse, simply recurse on it.
514 		 */
515 		if (lockmgr_xlocked(lk)) {
516 			if ((flags & LK_CANRECURSE) == 0 &&
517 			    (lk->lock_object.lo_flags & LK_CANRECURSE) == 0) {
518 
519 				/*
520 				 * If the lock is expected to not panic just
521 				 * give up and return.
522 				 */
523 				if (LK_TRYOP(flags)) {
524 					LOCK_LOG2(lk,
525 					    "%s: %p fails the try operation",
526 					    __func__, lk);
527 					error = EBUSY;
528 					break;
529 				}
530 				if (flags & LK_INTERLOCK)
531 					class->lc_unlock(ilk);
532 		panic("%s: recursing on non recursive lockmgr %s @ %s:%d\n",
533 				    __func__, iwmesg, file, line);
534 			}
535 			lk->lk_recurse++;
536 			LOCK_LOG2(lk, "%s: %p recursing", __func__, lk);
537 			LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0,
538 			    lk->lk_recurse, file, line);
539 			WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
540 			    LK_TRYWIT(flags), file, line);
541 			TD_LOCKS_INC(curthread);
542 			break;
543 		}
544 
545 		while (!atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED,
546 		    tid)) {
547 			lock_profile_obtain_lock_failed(&lk->lock_object,
548 			    &contested, &waittime);
549 
550 			/*
551 			 * If the lock is expected to not sleep just give up
552 			 * and return.
553 			 */
554 			if (LK_TRYOP(flags)) {
555 				LOCK_LOG2(lk, "%s: %p fails the try operation",
556 				    __func__, lk);
557 				error = EBUSY;
558 				break;
559 			}
560 
561 			/*
562 			 * Acquire the sleepqueue chain lock because we
563 			 * probabilly will need to manipulate waiters flags.
564 			 */
565 			sleepq_lock(&lk->lock_object);
566 			x = lk->lk_lock;
567 			v = x & LK_ALL_WAITERS;
568 
569 			/*
570 			 * if the lock has been released while we spun on
571 			 * the sleepqueue chain lock just try again.
572 			 */
573 			if (x == LK_UNLOCKED) {
574 				sleepq_release(&lk->lock_object);
575 				continue;
576 			}
577 
578 			/*
579 			 * The lock can be in the state where there is a
580 			 * pending queue of waiters, but still no owner.
581 			 * This happens when the lock is contested and an
582 			 * owner is going to claim the lock.
583 			 * If curthread is the one successfully acquiring it
584 			 * claim lock ownership and return, preserving waiters
585 			 * flags.
586 			 */
587 			if (x == (LK_UNLOCKED | v)) {
588 				if (atomic_cmpset_acq_ptr(&lk->lk_lock, x,
589 				    tid | v)) {
590 					sleepq_release(&lk->lock_object);
591 					LOCK_LOG2(lk,
592 					    "%s: %p claimed by a new writer",
593 					    __func__, lk);
594 					break;
595 				}
596 				sleepq_release(&lk->lock_object);
597 				continue;
598 			}
599 
600 			/*
601 			 * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
602 			 * fail, loop back and retry.
603 			 */
604 			if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
605 				if (!atomic_cmpset_ptr(&lk->lk_lock, x,
606 				    x | LK_EXCLUSIVE_WAITERS)) {
607 					sleepq_release(&lk->lock_object);
608 					continue;
609 				}
610 				LOCK_LOG2(lk, "%s: %p set excl waiters flag",
611 				    __func__, lk);
612 			}
613 
614 			/*
615 			 * As far as we have been unable to acquire the
616 			 * exclusive lock and the exclusive waiters flag
617 			 * is set, we will sleep.
618 			 */
619 			error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
620 			    SQ_EXCLUSIVE_QUEUE);
621 			flags &= ~LK_INTERLOCK;
622 			if (error) {
623 				LOCK_LOG3(lk,
624 				    "%s: interrupted sleep for %p with %d",
625 				    __func__, lk, error);
626 				break;
627 			}
628 			LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
629 			    __func__, lk);
630 		}
631 		if (error == 0) {
632 			lock_profile_obtain_lock_success(&lk->lock_object,
633 			    contested, waittime, file, line);
634 			LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0,
635 			    lk->lk_recurse, file, line);
636 			WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
637 			    LK_TRYWIT(flags), file, line);
638 			TD_LOCKS_INC(curthread);
639 			STACK_SAVE(lk);
640 		}
641 		break;
642 	case LK_DOWNGRADE:
643 		_lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED, file, line);
644 		LOCK_LOG_LOCK("XDOWNGRADE", &lk->lock_object, 0, 0, file, line);
645 		WITNESS_DOWNGRADE(&lk->lock_object, 0, file, line);
646 		TD_SLOCKS_INC(curthread);
647 
648 		/*
649 		 * In order to preserve waiters flags, just spin.
650 		 */
651 		for (;;) {
652 			x = lk->lk_lock & LK_ALL_WAITERS;
653 			if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
654 			    LK_SHARERS_LOCK(1) | x))
655 				break;
656 			cpu_spinwait();
657 		}
658 		break;
659 	case LK_RELEASE:
660 		_lockmgr_assert(lk, KA_LOCKED, file, line);
661 		x = lk->lk_lock;
662 
663 		if ((x & LK_SHARE) == 0) {
664 
665 			/*
666 			 * As first option, treact the lock as if it has not
667 			 * any waiter.
668 			 * Fix-up the tid var if the lock has been disowned.
669 			 */
670 			if (LK_HOLDER(x) == LK_KERNPROC)
671 				tid = LK_KERNPROC;
672 			else {
673 				WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE,
674 				    file, line);
675 				TD_LOCKS_DEC(curthread);
676 			}
677 			LOCK_LOG_LOCK("XUNLOCK", &lk->lock_object, 0,
678 			    lk->lk_recurse, file, line);
679 
680 			/*
681 			 * The lock is held in exclusive mode.
682 			 * If the lock is recursed also, then unrecurse it.
683 			 */
684 			if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) {
685 				LOCK_LOG2(lk, "%s: %p unrecursing", __func__,
686 				    lk);
687 				lk->lk_recurse--;
688 				break;
689 			}
690 			if (tid != LK_KERNPROC)
691 				lock_profile_release_lock(&lk->lock_object);
692 
693 			if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid,
694 			    LK_UNLOCKED))
695 				break;
696 
697 			sleepq_lock(&lk->lock_object);
698 			x = lk->lk_lock & LK_ALL_WAITERS;
699 			v = LK_UNLOCKED;
700 
701 			/*
702 		 	 * If the lock has exclusive waiters, give them
703 			 * preference in order to avoid deadlock with
704 			 * shared runners up.
705 			 */
706 			if (x & LK_EXCLUSIVE_WAITERS) {
707 				queue = SQ_EXCLUSIVE_QUEUE;
708 				v |= (x & LK_SHARED_WAITERS);
709 			} else {
710 				MPASS(x == LK_SHARED_WAITERS);
711 				queue = SQ_SHARED_QUEUE;
712 			}
713 
714 			LOCK_LOG3(lk,
715 			    "%s: %p waking up threads on the %s queue",
716 			    __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
717 			    "exclusive");
718 			atomic_store_rel_ptr(&lk->lk_lock, v);
719 			wakeup_swapper = sleepq_broadcast(&lk->lock_object,
720 			    SLEEPQ_LK, 0, queue);
721 			sleepq_release(&lk->lock_object);
722 			break;
723 		} else
724 			wakeup_swapper = wakeupshlk(lk, file, line);
725 		break;
726 	case LK_DRAIN:
727 		if (LK_CAN_WITNESS(flags))
728 			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
729 			    LOP_EXCLUSIVE, file, line, ilk);
730 
731 		/*
732 		 * Trying to drain a lock we already own will result in a
733 		 * deadlock.
734 		 */
735 		if (lockmgr_xlocked(lk)) {
736 			if (flags & LK_INTERLOCK)
737 				class->lc_unlock(ilk);
738 			panic("%s: draining %s with the lock held @ %s:%d\n",
739 			    __func__, iwmesg, file, line);
740 		}
741 
742 		while (!atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) {
743 			lock_profile_obtain_lock_failed(&lk->lock_object,
744 			    &contested, &waittime);
745 
746 			/*
747 			 * If the lock is expected to not sleep just give up
748 			 * and return.
749 			 */
750 			if (LK_TRYOP(flags)) {
751 				LOCK_LOG2(lk, "%s: %p fails the try operation",
752 				    __func__, lk);
753 				error = EBUSY;
754 				break;
755 			}
756 
757 			/*
758 			 * Acquire the sleepqueue chain lock because we
759 			 * probabilly will need to manipulate waiters flags.
760 			 */
761 			sleepq_lock(&lk->lock_object);
762 			x = lk->lk_lock;
763 			v = x & LK_ALL_WAITERS;
764 
765 			/*
766 			 * if the lock has been released while we spun on
767 			 * the sleepqueue chain lock just try again.
768 			 */
769 			if (x == LK_UNLOCKED) {
770 				sleepq_release(&lk->lock_object);
771 				continue;
772 			}
773 
774 			if (x == (LK_UNLOCKED | v)) {
775 				v = x;
776 				if (v & LK_EXCLUSIVE_WAITERS) {
777 					queue = SQ_EXCLUSIVE_QUEUE;
778 					v &= ~LK_EXCLUSIVE_WAITERS;
779 				} else {
780 					MPASS(v & LK_SHARED_WAITERS);
781 					queue = SQ_SHARED_QUEUE;
782 					v &= ~LK_SHARED_WAITERS;
783 				}
784 				if (!atomic_cmpset_ptr(&lk->lk_lock, x, v)) {
785 					sleepq_release(&lk->lock_object);
786 					continue;
787 				}
788 				LOCK_LOG3(lk,
789 				"%s: %p waking up all threads on the %s queue",
790 				    __func__, lk, queue == SQ_SHARED_QUEUE ?
791 				    "shared" : "exclusive");
792 				wakeup_swapper |= sleepq_broadcast(
793 				    &lk->lock_object, SLEEPQ_LK, 0, queue);
794 
795 				/*
796 				 * If shared waiters have been woken up we need
797 				 * to wait for one of them to acquire the lock
798 				 * before to set the exclusive waiters in
799 				 * order to avoid a deadlock.
800 				 */
801 				if (queue == SQ_SHARED_QUEUE) {
802 					for (v = lk->lk_lock;
803 					    (v & LK_SHARE) && !LK_SHARERS(v);
804 					    v = lk->lk_lock)
805 						cpu_spinwait();
806 				}
807 			}
808 
809 			/*
810 			 * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
811 			 * fail, loop back and retry.
812 			 */
813 			if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
814 				if (!atomic_cmpset_ptr(&lk->lk_lock, x,
815 				    x | LK_EXCLUSIVE_WAITERS)) {
816 					sleepq_release(&lk->lock_object);
817 					continue;
818 				}
819 				LOCK_LOG2(lk, "%s: %p set drain waiters flag",
820 				    __func__, lk);
821 			}
822 
823 			/*
824 			 * As far as we have been unable to acquire the
825 			 * exclusive lock and the exclusive waiters flag
826 			 * is set, we will sleep.
827 			 */
828 			if (flags & LK_INTERLOCK) {
829 				class->lc_unlock(ilk);
830 				flags &= ~LK_INTERLOCK;
831 			}
832 			GIANT_SAVE();
833 			sleepq_add(&lk->lock_object, NULL, iwmesg, SLEEPQ_LK,
834 			    SQ_EXCLUSIVE_QUEUE);
835 			sleepq_wait(&lk->lock_object, ipri & PRIMASK);
836 			GIANT_RESTORE();
837 			LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
838 			    __func__, lk);
839 		}
840 
841 		if (error == 0) {
842 			lock_profile_obtain_lock_success(&lk->lock_object,
843 			    contested, waittime, file, line);
844 			LOCK_LOG_LOCK("DRAIN", &lk->lock_object, 0,
845 			    lk->lk_recurse, file, line);
846 			WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
847 			    LK_TRYWIT(flags), file, line);
848 			TD_LOCKS_INC(curthread);
849 			STACK_SAVE(lk);
850 		}
851 		break;
852 	default:
853 		if (flags & LK_INTERLOCK)
854 			class->lc_unlock(ilk);
855 		panic("%s: unknown lockmgr request 0x%x\n", __func__, op);
856 	}
857 
858 	if (flags & LK_INTERLOCK)
859 		class->lc_unlock(ilk);
860 	if (wakeup_swapper)
861 		kick_proc0();
862 
863 	return (error);
864 }
865 
866 void
867 _lockmgr_disown(struct lock *lk, const char *file, int line)
868 {
869 	uintptr_t tid, x;
870 
871 	tid = (uintptr_t)curthread;
872 	_lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED, file, line);
873 
874 	/*
875 	 * If the owner is already LK_KERNPROC just skip the whole operation.
876 	 */
877 	if (LK_HOLDER(lk->lk_lock) != tid)
878 		return;
879 	lock_profile_release_lock(&lk->lock_object);
880 	LOCK_LOG_LOCK("XDISOWN", &lk->lock_object, 0, 0, file, line);
881 	WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
882 	TD_LOCKS_DEC(curthread);
883 
884 	/*
885 	 * In order to preserve waiters flags, just spin.
886 	 */
887 	for (;;) {
888 		x = lk->lk_lock & LK_ALL_WAITERS;
889 		if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
890 		    LK_KERNPROC | x))
891 			return;
892 		cpu_spinwait();
893 	}
894 }
895 
896 void
897 lockmgr_printinfo(struct lock *lk)
898 {
899 	struct thread *td;
900 	uintptr_t x;
901 
902 	if (lk->lk_lock == LK_UNLOCKED)
903 		printf("lock type %s: UNLOCKED\n", lk->lock_object.lo_name);
904 	else if (lk->lk_lock & LK_SHARE)
905 		printf("lock type %s: SHARED (count %ju)\n",
906 		    lk->lock_object.lo_name,
907 		    (uintmax_t)LK_SHARERS(lk->lk_lock));
908 	else {
909 		td = lockmgr_xholder(lk);
910 		printf("lock type %s: EXCL by thread %p (pid %d)\n",
911 		    lk->lock_object.lo_name, td, td->td_proc->p_pid);
912 	}
913 
914 	x = lk->lk_lock;
915 	if (x & LK_EXCLUSIVE_WAITERS)
916 		printf(" with exclusive waiters pending\n");
917 	if (x & LK_SHARED_WAITERS)
918 		printf(" with shared waiters pending\n");
919 
920 	STACK_PRINT(lk);
921 }
922 
923 int
924 lockstatus(struct lock *lk)
925 {
926 	uintptr_t v, x;
927 	int ret;
928 
929 	ret = LK_SHARED;
930 	x = lk->lk_lock;
931 	v = LK_HOLDER(x);
932 
933 	if ((x & LK_SHARE) == 0) {
934 		if (v == (uintptr_t)curthread || v == LK_KERNPROC)
935 			ret = LK_EXCLUSIVE;
936 		else
937 			ret = LK_EXCLOTHER;
938 	} else if (x == LK_UNLOCKED)
939 		ret = 0;
940 
941 	return (ret);
942 }
943 
944 #ifdef INVARIANT_SUPPORT
945 #ifndef INVARIANTS
946 #undef	_lockmgr_assert
947 #endif
948 
949 void
950 _lockmgr_assert(struct lock *lk, int what, const char *file, int line)
951 {
952 	int slocked = 0;
953 
954 	if (panicstr != NULL)
955 		return;
956 	switch (what) {
957 	case KA_SLOCKED:
958 	case KA_SLOCKED | KA_NOTRECURSED:
959 	case KA_SLOCKED | KA_RECURSED:
960 		slocked = 1;
961 	case KA_LOCKED:
962 	case KA_LOCKED | KA_NOTRECURSED:
963 	case KA_LOCKED | KA_RECURSED:
964 #ifdef WITNESS
965 
966 		/*
967 		 * We cannot trust WITNESS if the lock is held in exclusive
968 		 * mode and a call to lockmgr_disown() happened.
969 		 * Workaround this skipping the check if the lock is held in
970 		 * exclusive mode even for the KA_LOCKED case.
971 		 */
972 		if (slocked || (lk->lk_lock & LK_SHARE)) {
973 			witness_assert(&lk->lock_object, what, file, line);
974 			break;
975 		}
976 #endif
977 		if (lk->lk_lock == LK_UNLOCKED ||
978 		    ((lk->lk_lock & LK_SHARE) == 0 && (slocked ||
979 		    (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk)))))
980 			panic("Lock %s not %slocked @ %s:%d\n",
981 			    lk->lock_object.lo_name, slocked ? "share" : "",
982 			    file, line);
983 
984 		if ((lk->lk_lock & LK_SHARE) == 0) {
985 			if (lockmgr_recursed(lk)) {
986 				if (what & KA_NOTRECURSED)
987 					panic("Lock %s recursed @ %s:%d\n",
988 					    lk->lock_object.lo_name, file,
989 					    line);
990 			} else if (what & KA_RECURSED)
991 				panic("Lock %s not recursed @ %s:%d\n",
992 				    lk->lock_object.lo_name, file, line);
993 		}
994 		break;
995 	case KA_XLOCKED:
996 	case KA_XLOCKED | KA_NOTRECURSED:
997 	case KA_XLOCKED | KA_RECURSED:
998 		if (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk))
999 			panic("Lock %s not exclusively locked @ %s:%d\n",
1000 			    lk->lock_object.lo_name, file, line);
1001 		if (lockmgr_recursed(lk)) {
1002 			if (what & KA_NOTRECURSED)
1003 				panic("Lock %s recursed @ %s:%d\n",
1004 				    lk->lock_object.lo_name, file, line);
1005 		} else if (what & KA_RECURSED)
1006 			panic("Lock %s not recursed @ %s:%d\n",
1007 			    lk->lock_object.lo_name, file, line);
1008 		break;
1009 	case KA_UNLOCKED:
1010 		if (lockmgr_xlocked(lk) || lockmgr_disowned(lk))
1011 			panic("Lock %s exclusively locked @ %s:%d\n",
1012 			    lk->lock_object.lo_name, file, line);
1013 		break;
1014 	default:
1015 		panic("Unknown lockmgr assertion: %d @ %s:%d\n", what, file,
1016 		    line);
1017 	}
1018 }
1019 #endif
1020 
1021 #ifdef DDB
1022 int
1023 lockmgr_chain(struct thread *td, struct thread **ownerp)
1024 {
1025 	struct lock *lk;
1026 
1027 	lk = td->td_wchan;
1028 
1029 	if (LOCK_CLASS(&lk->lock_object) != &lock_class_lockmgr)
1030 		return (0);
1031 	db_printf("blocked on lockmgr %s", lk->lock_object.lo_name);
1032 	if (lk->lk_lock & LK_SHARE)
1033 		db_printf("SHARED (count %ju)\n",
1034 		    (uintmax_t)LK_SHARERS(lk->lk_lock));
1035 	else
1036 		db_printf("EXCL\n");
1037 	*ownerp = lockmgr_xholder(lk);
1038 
1039 	return (1);
1040 }
1041 
1042 static void
1043 db_show_lockmgr(struct lock_object *lock)
1044 {
1045 	struct thread *td;
1046 	struct lock *lk;
1047 
1048 	lk = (struct lock *)lock;
1049 
1050 	db_printf(" state: ");
1051 	if (lk->lk_lock == LK_UNLOCKED)
1052 		db_printf("UNLOCKED\n");
1053 	else if (lk->lk_lock & LK_SHARE)
1054 		db_printf("SLOCK: %ju\n", (uintmax_t)LK_SHARERS(lk->lk_lock));
1055 	else {
1056 		td = lockmgr_xholder(lk);
1057 		if (td == (struct thread *)LK_KERNPROC)
1058 			db_printf("XLOCK: LK_KERNPROC\n");
1059 		else
1060 			db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1061 			    td->td_tid, td->td_proc->p_pid,
1062 			    td->td_proc->p_comm);
1063 		if (lockmgr_recursed(lk))
1064 			db_printf(" recursed: %d\n", lk->lk_recurse);
1065 	}
1066 	db_printf(" waiters: ");
1067 	switch (lk->lk_lock & LK_ALL_WAITERS) {
1068 	case LK_SHARED_WAITERS:
1069 		db_printf("shared\n");
1070 	case LK_EXCLUSIVE_WAITERS:
1071 		db_printf("exclusive\n");
1072 		break;
1073 	case LK_ALL_WAITERS:
1074 		db_printf("shared and exclusive\n");
1075 		break;
1076 	default:
1077 		db_printf("none\n");
1078 	}
1079 }
1080 #endif
1081