xref: /freebsd/sys/kern/kern_lock.c (revision 2be1a816b9ff69588e55be0a84cbe2a31efc0f2f)
1 /*-
2  * Copyright (c) 2008 Attilio Rao <attilio@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice(s), this list of conditions and the following disclaimer as
10  *    the first lines of this file unmodified other than the possible
11  *    addition of one or more copyright notices.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice(s), this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
26  * DAMAGE.
27  */
28 
29 #include "opt_ddb.h"
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include <sys/param.h>
35 #include <sys/ktr.h>
36 #include <sys/lock.h>
37 #include <sys/lock_profile.h>
38 #include <sys/lockmgr.h>
39 #include <sys/mutex.h>
40 #include <sys/proc.h>
41 #include <sys/sleepqueue.h>
42 #ifdef DEBUG_LOCKS
43 #include <sys/stack.h>
44 #endif
45 #include <sys/systm.h>
46 
47 #include <machine/cpu.h>
48 
49 #ifdef DDB
50 #include <ddb/ddb.h>
51 #endif
52 
53 CTASSERT(((LK_CANRECURSE | LK_NOSHARE) & LO_CLASSFLAGS) ==
54     (LK_CANRECURSE | LK_NOSHARE));
55 
56 #define	SQ_EXCLUSIVE_QUEUE	0
57 #define	SQ_SHARED_QUEUE		1
58 
59 #ifndef INVARIANTS
60 #define	_lockmgr_assert(lk, what, file, line)
61 #define	TD_LOCKS_INC(td)
62 #define	TD_LOCKS_DEC(td)
63 #else
64 #define	TD_LOCKS_INC(td)	((td)->td_locks++)
65 #define	TD_LOCKS_DEC(td)	((td)->td_locks--)
66 #endif
67 #define	TD_SLOCKS_INC(td)	((td)->td_lk_slocks++)
68 #define	TD_SLOCKS_DEC(td)	((td)->td_lk_slocks--)
69 
70 #ifndef DEBUG_LOCKS
71 #define	STACK_PRINT(lk)
72 #define	STACK_SAVE(lk)
73 #define	STACK_ZERO(lk)
74 #else
75 #define	STACK_PRINT(lk)	stack_print_ddb(&(lk)->lk_stack)
76 #define	STACK_SAVE(lk)	stack_save(&(lk)->lk_stack)
77 #define	STACK_ZERO(lk)	stack_zero(&(lk)->lk_stack)
78 #endif
79 
80 #define	LOCK_LOG2(lk, string, arg1, arg2)				\
81 	if (LOCK_LOG_TEST(&(lk)->lock_object, 0))			\
82 		CTR2(KTR_LOCK, (string), (arg1), (arg2))
83 #define	LOCK_LOG3(lk, string, arg1, arg2, arg3)				\
84 	if (LOCK_LOG_TEST(&(lk)->lock_object, 0))			\
85 		CTR3(KTR_LOCK, (string), (arg1), (arg2), (arg3))
86 
87 #define	GIANT_DECLARE							\
88 	int _i = 0;							\
89 	WITNESS_SAVE_DECL(Giant)
90 #define	GIANT_RESTORE() do {						\
91 	if (_i > 0) {							\
92 		while (_i--)						\
93 			mtx_lock(&Giant);				\
94 		WITNESS_RESTORE(&Giant.lock_object, Giant);		\
95 	}								\
96 } while (0)
97 #define	GIANT_SAVE() do {						\
98 	if (mtx_owned(&Giant)) {					\
99 		WITNESS_SAVE(&Giant.lock_object, Giant);		\
100 		while (mtx_owned(&Giant)) {				\
101 			_i++;						\
102 			mtx_unlock(&Giant);				\
103 		}							\
104 	}								\
105 } while (0)
106 
107 #define	LK_CAN_SHARE(x)							\
108 	(((x) & LK_SHARE) && (((x) & LK_EXCLUSIVE_WAITERS) == 0 ||	\
109 	curthread->td_lk_slocks || (curthread->td_pflags & TDP_DEADLKTREAT)))
110 #define	LK_TRYOP(x)							\
111 	((x) & LK_NOWAIT)
112 
113 #define	LK_CAN_WITNESS(x)						\
114 	(((x) & LK_NOWITNESS) == 0 && !LK_TRYOP(x))
115 #define	LK_TRYWIT(x)							\
116 	(LK_TRYOP(x) ? LOP_TRYLOCK : 0)
117 
118 #define	lockmgr_disowned(lk)						\
119 	(((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == LK_KERNPROC)
120 
121 #define	lockmgr_xlocked(lk)						\
122 	(((lk)->lk_lock & ~(LK_FLAGMASK & ~LK_SHARE)) == (uintptr_t)curthread)
123 
124 static void	 assert_lockmgr(struct lock_object *lock, int how);
125 #ifdef DDB
126 static void	 db_show_lockmgr(struct lock_object *lock);
127 #endif
128 static void	 lock_lockmgr(struct lock_object *lock, int how);
129 static int	 unlock_lockmgr(struct lock_object *lock);
130 
131 struct lock_class lock_class_lockmgr = {
132 	.lc_name = "lockmgr",
133 	.lc_flags = LC_RECURSABLE | LC_SLEEPABLE | LC_SLEEPLOCK | LC_UPGRADABLE,
134 	.lc_assert = assert_lockmgr,
135 #ifdef DDB
136 	.lc_ddb_show = db_show_lockmgr,
137 #endif
138 	.lc_lock = lock_lockmgr,
139 	.lc_unlock = unlock_lockmgr
140 };
141 
142 static __inline struct thread *
143 lockmgr_xholder(struct lock *lk)
144 {
145 	uintptr_t x;
146 
147 	x = lk->lk_lock;
148 	return ((x & LK_SHARE) ? NULL : (struct thread *)LK_HOLDER(x));
149 }
150 
151 /*
152  * It assumes sleepq_lock held and returns with this one unheld.
153  * It also assumes the generic interlock is sane and previously checked.
154  * If LK_INTERLOCK is specified the interlock is not reacquired after the
155  * sleep.
156  */
157 static __inline int
158 sleeplk(struct lock *lk, u_int flags, struct lock_object *ilk,
159     const char *wmesg, int pri, int timo, int queue)
160 {
161 	GIANT_DECLARE;
162 	struct lock_class *class;
163 	int catch, error;
164 
165 	class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
166 	catch = (pri) ? (pri & PCATCH) : 0;
167 	pri &= PRIMASK;
168 	error = 0;
169 
170 	LOCK_LOG3(lk, "%s: %p blocking on the %s sleepqueue", __func__, lk,
171 	    (queue == SQ_EXCLUSIVE_QUEUE) ? "exclusive" : "shared");
172 
173 	if (flags & LK_INTERLOCK)
174 		class->lc_unlock(ilk);
175 	GIANT_SAVE();
176 	sleepq_add(&lk->lock_object, NULL, wmesg, SLEEPQ_LK | (catch ?
177 	    SLEEPQ_INTERRUPTIBLE : 0), queue);
178 	if ((flags & LK_TIMELOCK) && timo)
179 		sleepq_set_timeout(&lk->lock_object, timo);
180 
181 	/*
182 	 * Decisional switch for real sleeping.
183 	 */
184 	if ((flags & LK_TIMELOCK) && timo && catch)
185 		error = sleepq_timedwait_sig(&lk->lock_object, pri);
186 	else if ((flags & LK_TIMELOCK) && timo)
187 		error = sleepq_timedwait(&lk->lock_object, pri);
188 	else if (catch)
189 		error = sleepq_wait_sig(&lk->lock_object, pri);
190 	else
191 		sleepq_wait(&lk->lock_object, pri);
192 	GIANT_RESTORE();
193 	if ((flags & LK_SLEEPFAIL) && error == 0)
194 		error = ENOLCK;
195 
196 	return (error);
197 }
198 
199 static __inline void
200 wakeupshlk(struct lock *lk, const char *file, int line)
201 {
202 	uintptr_t v, x;
203 	int queue;
204 
205 	TD_LOCKS_DEC(curthread);
206 	TD_SLOCKS_DEC(curthread);
207 	WITNESS_UNLOCK(&lk->lock_object, 0, file, line);
208 	LOCK_LOG_LOCK("SUNLOCK", &lk->lock_object, 0, 0, file, line);
209 
210 	for (;;) {
211 		x = lk->lk_lock;
212 
213 		/*
214 		 * If there is more than one shared lock held, just drop one
215 		 * and return.
216 		 */
217 		if (LK_SHARERS(x) > 1) {
218 			if (atomic_cmpset_ptr(&lk->lk_lock, x,
219 			    x - LK_ONE_SHARER))
220 				break;
221 			continue;
222 		}
223 
224 		/*
225 		 * If there are not waiters on the exclusive queue, drop the
226 		 * lock quickly.
227 		 */
228 		if ((x & LK_ALL_WAITERS) == 0) {
229 			MPASS(x == LK_SHARERS_LOCK(1));
230 			if (atomic_cmpset_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1),
231 			    LK_UNLOCKED))
232 				break;
233 			continue;
234 		}
235 
236 		/*
237 		 * We should have a sharer with waiters, so enter the hard
238 		 * path in order to handle wakeups correctly.
239 		 */
240 		sleepq_lock(&lk->lock_object);
241 		x = lk->lk_lock & LK_ALL_WAITERS;
242 		v = LK_UNLOCKED;
243 
244 		/*
245 		 * If the lock has exclusive waiters, give them preference in
246 		 * order to avoid deadlock with shared runners up.
247 		 */
248 		if (x & LK_EXCLUSIVE_WAITERS) {
249 			queue = SQ_EXCLUSIVE_QUEUE;
250 			v |= (x & LK_SHARED_WAITERS);
251 		} else {
252 			MPASS(x == LK_SHARED_WAITERS);
253 			queue = SQ_SHARED_QUEUE;
254 		}
255 
256 		if (!atomic_cmpset_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x,
257 		    v)) {
258 			sleepq_release(&lk->lock_object);
259 			continue;
260 		}
261 		LOCK_LOG3(lk, "%s: %p waking up threads on the %s queue",
262 		    __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
263 		    "exclusive");
264 		sleepq_broadcast(&lk->lock_object, SLEEPQ_LK, 0, queue);
265 		sleepq_release(&lk->lock_object);
266 		break;
267 	}
268 
269 	lock_profile_release_lock(&lk->lock_object);
270 }
271 
272 static void
273 assert_lockmgr(struct lock_object *lock, int what)
274 {
275 
276 	panic("lockmgr locks do not support assertions");
277 }
278 
279 static void
280 lock_lockmgr(struct lock_object *lock, int how)
281 {
282 
283 	panic("lockmgr locks do not support sleep interlocking");
284 }
285 
286 static int
287 unlock_lockmgr(struct lock_object *lock)
288 {
289 
290 	panic("lockmgr locks do not support sleep interlocking");
291 }
292 
293 void
294 lockinit(struct lock *lk, int pri, const char *wmesg, int timo, int flags)
295 {
296 	int iflags;
297 
298 	MPASS((flags & ~LK_INIT_MASK) == 0);
299 
300 	iflags = LO_RECURSABLE | LO_SLEEPABLE | LO_UPGRADABLE;
301 	if ((flags & LK_NODUP) == 0)
302 		iflags |= LO_DUPOK;
303 	if (flags & LK_NOPROFILE)
304 		iflags |= LO_NOPROFILE;
305 	if ((flags & LK_NOWITNESS) == 0)
306 		iflags |= LO_WITNESS;
307 	if (flags & LK_QUIET)
308 		iflags |= LO_QUIET;
309 	iflags |= flags & (LK_CANRECURSE | LK_NOSHARE);
310 
311 	lk->lk_lock = LK_UNLOCKED;
312 	lk->lk_recurse = 0;
313 	lk->lk_timo = timo;
314 	lk->lk_pri = pri;
315 	lock_init(&lk->lock_object, &lock_class_lockmgr, wmesg, NULL, iflags);
316 	STACK_ZERO(lk);
317 }
318 
319 void
320 lockdestroy(struct lock *lk)
321 {
322 
323 	KASSERT(lk->lk_lock == LK_UNLOCKED, ("lockmgr still held"));
324 	KASSERT(lk->lk_recurse == 0, ("lockmgr still recursed"));
325 	lock_destroy(&lk->lock_object);
326 }
327 
328 int
329 __lockmgr_args(struct lock *lk, u_int flags, struct lock_object *ilk,
330     const char *wmesg, int pri, int timo, const char *file, int line)
331 {
332 	GIANT_DECLARE;
333 	uint64_t waittime;
334 	struct lock_class *class;
335 	const char *iwmesg;
336 	uintptr_t tid, v, x;
337 	u_int op;
338 	int contested, error, ipri, itimo, queue;
339 
340 	contested = 0;
341 	error = 0;
342 	waittime = 0;
343 	tid = (uintptr_t)curthread;
344 	op = (flags & LK_TYPE_MASK);
345 	iwmesg = (wmesg == LK_WMESG_DEFAULT) ? lk->lock_object.lo_name : wmesg;
346 	ipri = (pri == LK_PRIO_DEFAULT) ? lk->lk_pri : pri;
347 	itimo = (timo == LK_TIMO_DEFAULT) ? lk->lk_timo : timo;
348 
349 	MPASS((flags & ~LK_TOTAL_MASK) == 0);
350 	KASSERT((op & (op - 1)) == 0,
351 	    ("%s: Invalid requested operation @ %s:%d", __func__, file, line));
352 	KASSERT((flags & (LK_NOWAIT | LK_SLEEPFAIL)) == 0 ||
353 	    (op != LK_DOWNGRADE && op != LK_RELEASE),
354 	    ("%s: Invalid flags in regard of the operation desired @ %s:%d",
355 	    __func__, file, line));
356 	KASSERT((flags & LK_INTERLOCK) == 0 || ilk != NULL,
357 	    ("%s: LK_INTERLOCK passed without valid interlock @ %s:%d",
358 	    __func__, file, line));
359 
360 	class = (flags & LK_INTERLOCK) ? LOCK_CLASS(ilk) : NULL;
361 	if (panicstr != NULL) {
362 		if (flags & LK_INTERLOCK)
363 			class->lc_unlock(ilk);
364 		return (0);
365 	}
366 
367 	if (op == LK_SHARED && (lk->lock_object.lo_flags & LK_NOSHARE))
368 		op = LK_EXCLUSIVE;
369 
370 	switch (op) {
371 	case LK_SHARED:
372 		if (LK_CAN_WITNESS(flags))
373 			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER,
374 			    file, line);
375 		for (;;) {
376 			x = lk->lk_lock;
377 
378 			/*
379 			 * If no other thread has an exclusive lock, or
380 			 * no exclusive waiter is present, bump the count of
381 			 * sharers.  Since we have to preserve the state of
382 			 * waiters, if we fail to acquire the shared lock
383 			 * loop back and retry.
384 			 */
385 			if (LK_CAN_SHARE(x)) {
386 				if (atomic_cmpset_acq_ptr(&lk->lk_lock, x,
387 				    x + LK_ONE_SHARER))
388 					break;
389 				continue;
390 			}
391 			lock_profile_obtain_lock_failed(&lk->lock_object,
392 			    &contested, &waittime);
393 
394 			/*
395 			 * If the lock is alredy held by curthread in
396 			 * exclusive way avoid a deadlock.
397 			 */
398 			if (LK_HOLDER(x) == tid) {
399 				LOCK_LOG2(lk,
400 				    "%s: %p alredy held in exclusive mode",
401 				    __func__, lk);
402 				error = EDEADLK;
403 				break;
404 			}
405 
406 			/*
407 			 * If the lock is expected to not sleep just give up
408 			 * and return.
409 			 */
410 			if (LK_TRYOP(flags)) {
411 				LOCK_LOG2(lk, "%s: %p fails the try operation",
412 				    __func__, lk);
413 				error = EBUSY;
414 				break;
415 			}
416 
417 			/*
418 			 * Acquire the sleepqueue chain lock because we
419 			 * probabilly will need to manipulate waiters flags.
420 			 */
421 			sleepq_lock(&lk->lock_object);
422 			x = lk->lk_lock;
423 
424 			/*
425 			 * if the lock can be acquired in shared mode, try
426 			 * again.
427 			 */
428 			if (LK_CAN_SHARE(x)) {
429 				sleepq_release(&lk->lock_object);
430 				continue;
431 			}
432 
433 			/*
434 			 * Try to set the LK_SHARED_WAITERS flag.  If we fail,
435 			 * loop back and retry.
436 			 */
437 			if ((x & LK_SHARED_WAITERS) == 0) {
438 				if (!atomic_cmpset_acq_ptr(&lk->lk_lock, x,
439 				    x | LK_SHARED_WAITERS)) {
440 					sleepq_release(&lk->lock_object);
441 					continue;
442 				}
443 				LOCK_LOG2(lk, "%s: %p set shared waiters flag",
444 				    __func__, lk);
445 			}
446 
447 			/*
448 			 * As far as we have been unable to acquire the
449 			 * shared lock and the shared waiters flag is set,
450 			 * we will sleep.
451 			 */
452 			error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
453 			    SQ_SHARED_QUEUE);
454 			flags &= ~LK_INTERLOCK;
455 			if (error) {
456 				LOCK_LOG3(lk,
457 				    "%s: interrupted sleep for %p with %d",
458 				    __func__, lk, error);
459 				break;
460 			}
461 			LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
462 			    __func__, lk);
463 		}
464 		if (error == 0) {
465 			lock_profile_obtain_lock_success(&lk->lock_object,
466 			    contested, waittime, file, line);
467 			LOCK_LOG_LOCK("SLOCK", &lk->lock_object, 0, 0, file,
468 			    line);
469 			WITNESS_LOCK(&lk->lock_object, LK_TRYWIT(flags), file,
470 			    line);
471 			TD_LOCKS_INC(curthread);
472 			TD_SLOCKS_INC(curthread);
473 			STACK_SAVE(lk);
474 		}
475 		break;
476 	case LK_UPGRADE:
477 		_lockmgr_assert(lk, KA_SLOCKED, file, line);
478 		x = lk->lk_lock & LK_ALL_WAITERS;
479 
480 		/*
481 		 * Try to switch from one shared lock to an exclusive one.
482 		 * We need to preserve waiters flags during the operation.
483 		 */
484 		if (atomic_cmpset_ptr(&lk->lk_lock, LK_SHARERS_LOCK(1) | x,
485 		    tid | x)) {
486 			LOCK_LOG_LOCK("XUPGRADE", &lk->lock_object, 0, 0, file,
487 			    line);
488 			WITNESS_UPGRADE(&lk->lock_object, LOP_EXCLUSIVE |
489 			    LK_TRYWIT(flags), file, line);
490 			TD_SLOCKS_DEC(curthread);
491 			break;
492 		}
493 
494 		/*
495 		 * We have been unable to succeed in upgrading, so just
496 		 * give up the shared lock.
497 		 */
498 		wakeupshlk(lk, file, line);
499 
500 		/* FALLTHROUGH */
501 	case LK_EXCLUSIVE:
502 		if (LK_CAN_WITNESS(flags))
503 			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
504 			    LOP_EXCLUSIVE, file, line);
505 
506 		/*
507 		 * If curthread alredy holds the lock and this one is
508 		 * allowed to recurse, simply recurse on it.
509 		 */
510 		if (lockmgr_xlocked(lk)) {
511 			if ((flags & LK_CANRECURSE) == 0 &&
512 			    (lk->lock_object.lo_flags & LK_CANRECURSE) == 0) {
513 
514 				/*
515 				 * If the lock is expected to not panic just
516 				 * give up and return.
517 				 */
518 				if (LK_TRYOP(flags)) {
519 					LOCK_LOG2(lk,
520 					    "%s: %p fails the try operation",
521 					    __func__, lk);
522 					error = EBUSY;
523 					break;
524 				}
525 				if (flags & LK_INTERLOCK)
526 					class->lc_unlock(ilk);
527 		panic("%s: recursing on non recursive lockmgr %s @ %s:%d\n",
528 				    __func__, iwmesg, file, line);
529 			}
530 			lk->lk_recurse++;
531 			LOCK_LOG2(lk, "%s: %p recursing", __func__, lk);
532 			LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0,
533 			    lk->lk_recurse, file, line);
534 			WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
535 			    LK_TRYWIT(flags), file, line);
536 			TD_LOCKS_INC(curthread);
537 			break;
538 		}
539 
540 		while (!atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED,
541 		    tid)) {
542 			lock_profile_obtain_lock_failed(&lk->lock_object,
543 			    &contested, &waittime);
544 
545 			/*
546 			 * If the lock is expected to not sleep just give up
547 			 * and return.
548 			 */
549 			if (LK_TRYOP(flags)) {
550 				LOCK_LOG2(lk, "%s: %p fails the try operation",
551 				    __func__, lk);
552 				error = EBUSY;
553 				break;
554 			}
555 
556 			/*
557 			 * Acquire the sleepqueue chain lock because we
558 			 * probabilly will need to manipulate waiters flags.
559 			 */
560 			sleepq_lock(&lk->lock_object);
561 			x = lk->lk_lock;
562 			v = x & LK_ALL_WAITERS;
563 
564 			/*
565 			 * if the lock has been released while we spun on
566 			 * the sleepqueue chain lock just try again.
567 			 */
568 			if (x == LK_UNLOCKED) {
569 				sleepq_release(&lk->lock_object);
570 				continue;
571 			}
572 
573 			/*
574 			 * The lock can be in the state where there is a
575 			 * pending queue of waiters, but still no owner.
576 			 * This happens when the lock is contested and an
577 			 * owner is going to claim the lock.
578 			 * If curthread is the one successfully acquiring it
579 			 * claim lock ownership and return, preserving waiters
580 			 * flags.
581 			 */
582 			if (x == (LK_UNLOCKED | v)) {
583 				if (atomic_cmpset_acq_ptr(&lk->lk_lock, x,
584 				    tid | v)) {
585 					sleepq_release(&lk->lock_object);
586 					LOCK_LOG2(lk,
587 					    "%s: %p claimed by a new writer",
588 					    __func__, lk);
589 					break;
590 				}
591 				sleepq_release(&lk->lock_object);
592 				continue;
593 			}
594 
595 			/*
596 			 * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
597 			 * fail, loop back and retry.
598 			 */
599 			if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
600 				if (!atomic_cmpset_ptr(&lk->lk_lock, x,
601 				    x | LK_EXCLUSIVE_WAITERS)) {
602 					sleepq_release(&lk->lock_object);
603 					continue;
604 				}
605 				LOCK_LOG2(lk, "%s: %p set excl waiters flag",
606 				    __func__, lk);
607 			}
608 
609 			/*
610 			 * As far as we have been unable to acquire the
611 			 * exclusive lock and the exclusive waiters flag
612 			 * is set, we will sleep.
613 			 */
614 			error = sleeplk(lk, flags, ilk, iwmesg, ipri, itimo,
615 			    SQ_EXCLUSIVE_QUEUE);
616 			flags &= ~LK_INTERLOCK;
617 			if (error) {
618 				LOCK_LOG3(lk,
619 				    "%s: interrupted sleep for %p with %d",
620 				    __func__, lk, error);
621 				break;
622 			}
623 			LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
624 			    __func__, lk);
625 		}
626 		if (error == 0) {
627 			lock_profile_obtain_lock_success(&lk->lock_object,
628 			    contested, waittime, file, line);
629 			LOCK_LOG_LOCK("XLOCK", &lk->lock_object, 0,
630 			    lk->lk_recurse, file, line);
631 			WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
632 			    LK_TRYWIT(flags), file, line);
633 			TD_LOCKS_INC(curthread);
634 			STACK_SAVE(lk);
635 		}
636 		break;
637 	case LK_DOWNGRADE:
638 		_lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED, file, line);
639 		LOCK_LOG_LOCK("XDOWNGRADE", &lk->lock_object, 0, 0, file, line);
640 		WITNESS_DOWNGRADE(&lk->lock_object, 0, file, line);
641 		TD_SLOCKS_INC(curthread);
642 
643 		/*
644 		 * In order to preserve waiters flags, just spin.
645 		 */
646 		for (;;) {
647 			x = lk->lk_lock & LK_ALL_WAITERS;
648 			if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
649 			    LK_SHARERS_LOCK(1) | x))
650 				break;
651 			cpu_spinwait();
652 		}
653 		break;
654 	case LK_RELEASE:
655 		_lockmgr_assert(lk, KA_LOCKED, file, line);
656 		x = lk->lk_lock;
657 
658 		if ((x & LK_SHARE) == 0) {
659 
660 			/*
661 			 * As first option, treact the lock as if it has not
662 			 * any waiter.
663 			 * Fix-up the tid var if the lock has been disowned.
664 			 */
665 			if (LK_HOLDER(x) == LK_KERNPROC)
666 				tid = LK_KERNPROC;
667 			else {
668 				WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE,
669 				    file, line);
670 				TD_LOCKS_DEC(curthread);
671 			}
672 			LOCK_LOG_LOCK("XUNLOCK", &lk->lock_object, 0,
673 			    lk->lk_recurse, file, line);
674 
675 			/*
676 			 * The lock is held in exclusive mode.
677 			 * If the lock is recursed also, then unrecurse it.
678 			 */
679 			if (lockmgr_xlocked(lk) && lockmgr_recursed(lk)) {
680 				LOCK_LOG2(lk, "%s: %p unrecursing", __func__,
681 				    lk);
682 				lk->lk_recurse--;
683 				break;
684 			}
685 			lock_profile_release_lock(&lk->lock_object);
686 
687 			if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid,
688 			    LK_UNLOCKED))
689 				break;
690 
691 			sleepq_lock(&lk->lock_object);
692 			x = lk->lk_lock & LK_ALL_WAITERS;
693 			v = LK_UNLOCKED;
694 
695 			/*
696 		 	 * If the lock has exclusive waiters, give them
697 			 * preference in order to avoid deadlock with
698 			 * shared runners up.
699 			 */
700 			if (x & LK_EXCLUSIVE_WAITERS) {
701 				queue = SQ_EXCLUSIVE_QUEUE;
702 				v |= (x & LK_SHARED_WAITERS);
703 			} else {
704 				MPASS(x == LK_SHARED_WAITERS);
705 				queue = SQ_SHARED_QUEUE;
706 			}
707 
708 			LOCK_LOG3(lk,
709 			    "%s: %p waking up threads on the %s queue",
710 			    __func__, lk, queue == SQ_SHARED_QUEUE ? "shared" :
711 			    "exclusive");
712 			atomic_store_rel_ptr(&lk->lk_lock, v);
713 			sleepq_broadcast(&lk->lock_object, SLEEPQ_LK, 0, queue);
714 			sleepq_release(&lk->lock_object);
715 			break;
716 		} else
717 			wakeupshlk(lk, file, line);
718 		break;
719 	case LK_DRAIN:
720 		if (LK_CAN_WITNESS(flags))
721 			WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER |
722 			    LOP_EXCLUSIVE, file, line);
723 
724 		/*
725 		 * Trying to drain a lock we alredy own will result in a
726 		 * deadlock.
727 		 */
728 		if (lockmgr_xlocked(lk)) {
729 			if (flags & LK_INTERLOCK)
730 				class->lc_unlock(ilk);
731 			panic("%s: draining %s with the lock held @ %s:%d\n",
732 			    __func__, iwmesg, file, line);
733 		}
734 
735 		while (!atomic_cmpset_acq_ptr(&lk->lk_lock, LK_UNLOCKED, tid)) {
736 			lock_profile_obtain_lock_failed(&lk->lock_object,
737 			    &contested, &waittime);
738 
739 			/*
740 			 * If the lock is expected to not sleep just give up
741 			 * and return.
742 			 */
743 			if (LK_TRYOP(flags)) {
744 				LOCK_LOG2(lk, "%s: %p fails the try operation",
745 				    __func__, lk);
746 				error = EBUSY;
747 				break;
748 			}
749 
750 			/*
751 			 * Acquire the sleepqueue chain lock because we
752 			 * probabilly will need to manipulate waiters flags.
753 			 */
754 			sleepq_lock(&lk->lock_object);
755 			x = lk->lk_lock;
756 			v = x & LK_ALL_WAITERS;
757 
758 			/*
759 			 * if the lock has been released while we spun on
760 			 * the sleepqueue chain lock just try again.
761 			 */
762 			if (x == LK_UNLOCKED) {
763 				sleepq_release(&lk->lock_object);
764 				continue;
765 			}
766 
767 			if (x == (LK_UNLOCKED | v)) {
768 				v = x;
769 				if (v & LK_EXCLUSIVE_WAITERS) {
770 					queue = SQ_EXCLUSIVE_QUEUE;
771 					v &= ~LK_EXCLUSIVE_WAITERS;
772 				} else {
773 					MPASS(v & LK_SHARED_WAITERS);
774 					queue = SQ_SHARED_QUEUE;
775 					v &= ~LK_SHARED_WAITERS;
776 				}
777 				if (!atomic_cmpset_ptr(&lk->lk_lock, x, v)) {
778 					sleepq_release(&lk->lock_object);
779 					continue;
780 				}
781 				LOCK_LOG3(lk,
782 				"%s: %p waking up all threads on the %s queue",
783 				    __func__, lk, queue == SQ_SHARED_QUEUE ?
784 				    "shared" : "exclusive");
785 				sleepq_broadcast(&lk->lock_object, SLEEPQ_LK,
786 				    0, queue);
787 
788 				/*
789 				 * If shared waiters have been woken up we need
790 				 * to wait for one of them to acquire the lock
791 				 * before to set the exclusive waiters in
792 				 * order to avoid a deadlock.
793 				 */
794 				if (queue == SQ_SHARED_QUEUE) {
795 					for (v = lk->lk_lock;
796 					    (v & LK_SHARE) && !LK_SHARERS(v);
797 					    v = lk->lk_lock)
798 						cpu_spinwait();
799 				}
800 			}
801 
802 			/*
803 			 * Try to set the LK_EXCLUSIVE_WAITERS flag.  If we
804 			 * fail, loop back and retry.
805 			 */
806 			if ((x & LK_EXCLUSIVE_WAITERS) == 0) {
807 				if (!atomic_cmpset_ptr(&lk->lk_lock, x,
808 				    x | LK_EXCLUSIVE_WAITERS)) {
809 					sleepq_release(&lk->lock_object);
810 					continue;
811 				}
812 				LOCK_LOG2(lk, "%s: %p set drain waiters flag",
813 				    __func__, lk);
814 			}
815 
816 			/*
817 			 * As far as we have been unable to acquire the
818 			 * exclusive lock and the exclusive waiters flag
819 			 * is set, we will sleep.
820 			 */
821 			if (flags & LK_INTERLOCK) {
822 				class->lc_unlock(ilk);
823 				flags &= ~LK_INTERLOCK;
824 			}
825 			GIANT_SAVE();
826 			sleepq_add(&lk->lock_object, NULL, iwmesg, SLEEPQ_LK,
827 			    SQ_EXCLUSIVE_QUEUE);
828 			sleepq_wait(&lk->lock_object, ipri & PRIMASK);
829 			GIANT_RESTORE();
830 			LOCK_LOG2(lk, "%s: %p resuming from the sleep queue",
831 			    __func__, lk);
832 		}
833 
834 		if (error == 0) {
835 			lock_profile_obtain_lock_success(&lk->lock_object,
836 			    contested, waittime, file, line);
837 			LOCK_LOG_LOCK("DRAIN", &lk->lock_object, 0,
838 			    lk->lk_recurse, file, line);
839 			WITNESS_LOCK(&lk->lock_object, LOP_EXCLUSIVE |
840 			    LK_TRYWIT(flags), file, line);
841 			TD_LOCKS_INC(curthread);
842 			STACK_SAVE(lk);
843 		}
844 		break;
845 	default:
846 		if (flags & LK_INTERLOCK)
847 			class->lc_unlock(ilk);
848 		panic("%s: unknown lockmgr request 0x%x\n", __func__, op);
849 	}
850 
851 	if (flags & LK_INTERLOCK)
852 		class->lc_unlock(ilk);
853 
854 	return (error);
855 }
856 
857 void
858 _lockmgr_disown(struct lock *lk, const char *file, int line)
859 {
860 	uintptr_t tid, x;
861 
862 	tid = (uintptr_t)curthread;
863 	_lockmgr_assert(lk, KA_XLOCKED | KA_NOTRECURSED, file, line);
864 
865 	/*
866 	 * If the owner is alredy LK_KERNPROC just skip the whole operation.
867 	 */
868 	if (LK_HOLDER(lk->lk_lock) != tid)
869 		return;
870 	LOCK_LOG_LOCK("XDISOWN", &lk->lock_object, 0, 0, file, line);
871 	WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
872 	TD_LOCKS_DEC(curthread);
873 
874 	/*
875 	 * In order to preserve waiters flags, just spin.
876 	 */
877 	for (;;) {
878 		x = lk->lk_lock & LK_ALL_WAITERS;
879 		if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid | x,
880 		    LK_KERNPROC | x))
881 			return;
882 		cpu_spinwait();
883 	}
884 }
885 
886 void
887 lockmgr_printinfo(struct lock *lk)
888 {
889 	struct thread *td;
890 	uintptr_t x;
891 
892 	if (lk->lk_lock == LK_UNLOCKED)
893 		printf(" lock type %s: UNLOCKED\n", lk->lock_object.lo_name);
894 	else if (lk->lk_lock & LK_SHARE)
895 		printf(" lock type %s: SHARED (count %ju)\n",
896 		    lk->lock_object.lo_name,
897 		    (uintmax_t)LK_SHARERS(lk->lk_lock));
898 	else {
899 		td = lockmgr_xholder(lk);
900 		printf(" lock type %s: EXCL by thread %p (pid %d)\n",
901 		    lk->lock_object.lo_name, td, td->td_proc->p_pid);
902 	}
903 
904 	x = lk->lk_lock;
905 	if (x & LK_EXCLUSIVE_WAITERS)
906 		printf(" with exclusive waiters pending\n");
907 	if (x & LK_SHARED_WAITERS)
908 		printf(" with shared waiters pending\n");
909 
910 	STACK_PRINT(lk);
911 }
912 
913 int
914 lockstatus(struct lock *lk)
915 {
916 	uintptr_t v, x;
917 	int ret;
918 
919 	ret = LK_SHARED;
920 	x = lk->lk_lock;
921 	v = LK_HOLDER(x);
922 
923 	if ((x & LK_SHARE) == 0) {
924 		if (v == (uintptr_t)curthread || v == LK_KERNPROC)
925 			ret = LK_EXCLUSIVE;
926 		else
927 			ret = LK_EXCLOTHER;
928 	} else if (x == LK_UNLOCKED)
929 		ret = 0;
930 
931 	return (ret);
932 }
933 
934 #ifdef INVARIANT_SUPPORT
935 #ifndef INVARIANTS
936 #undef	_lockmgr_assert
937 #endif
938 
939 void
940 _lockmgr_assert(struct lock *lk, int what, const char *file, int line)
941 {
942 	int slocked = 0;
943 
944 	if (panicstr != NULL)
945 		return;
946 	switch (what) {
947 	case KA_SLOCKED:
948 	case KA_SLOCKED | KA_NOTRECURSED:
949 	case KA_SLOCKED | KA_RECURSED:
950 		slocked = 1;
951 	case KA_LOCKED:
952 	case KA_LOCKED | KA_NOTRECURSED:
953 	case KA_LOCKED | KA_RECURSED:
954 #ifdef WITNESS
955 
956 		/*
957 		 * We cannot trust WITNESS if the lock is held in exclusive
958 		 * mode and a call to lockmgr_disown() happened.
959 		 * Workaround this skipping the check if the lock is held in
960 		 * exclusive mode even for the KA_LOCKED case.
961 		 */
962 		if (slocked || (lk->lk_lock & LK_SHARE)) {
963 			witness_assert(&lk->lock_object, what, file, line);
964 			break;
965 		}
966 #endif
967 		if (lk->lk_lock == LK_UNLOCKED ||
968 		    ((lk->lk_lock & LK_SHARE) == 0 && (slocked ||
969 		    (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk)))))
970 			panic("Lock %s not %slocked @ %s:%d\n",
971 			    lk->lock_object.lo_name, slocked ? "share" : "",
972 			    file, line);
973 
974 		if ((lk->lk_lock & LK_SHARE) == 0) {
975 			if (lockmgr_recursed(lk)) {
976 				if (what & KA_NOTRECURSED)
977 					panic("Lock %s recursed @ %s:%d\n",
978 					    lk->lock_object.lo_name, file,
979 					    line);
980 			} else if (what & KA_RECURSED)
981 				panic("Lock %s not recursed @ %s:%d\n",
982 				    lk->lock_object.lo_name, file, line);
983 		}
984 		break;
985 	case KA_XLOCKED:
986 	case KA_XLOCKED | KA_NOTRECURSED:
987 	case KA_XLOCKED | KA_RECURSED:
988 		if (!lockmgr_xlocked(lk) && !lockmgr_disowned(lk))
989 			panic("Lock %s not exclusively locked @ %s:%d\n",
990 			    lk->lock_object.lo_name, file, line);
991 		if (lockmgr_recursed(lk)) {
992 			if (what & KA_NOTRECURSED)
993 				panic("Lock %s recursed @ %s:%d\n",
994 				    lk->lock_object.lo_name, file, line);
995 		} else if (what & KA_RECURSED)
996 			panic("Lock %s not recursed @ %s:%d\n",
997 			    lk->lock_object.lo_name, file, line);
998 		break;
999 	case KA_UNLOCKED:
1000 		if (lockmgr_xlocked(lk) || lockmgr_disowned(lk))
1001 			panic("Lock %s exclusively locked @ %s:%d\n",
1002 			    lk->lock_object.lo_name, file, line);
1003 		break;
1004 	default:
1005 		panic("Unknown lockmgr assertion: %d @ %s:%d\n", what, file,
1006 		    line);
1007 	}
1008 }
1009 #endif
1010 
1011 #ifdef DDB
1012 int
1013 lockmgr_chain(struct thread *td, struct thread **ownerp)
1014 {
1015 	struct lock *lk;
1016 
1017 	lk = td->td_wchan;
1018 
1019 	if (LOCK_CLASS(&lk->lock_object) != &lock_class_lockmgr)
1020 		return (0);
1021 	db_printf("blocked on lockmgr %s", lk->lock_object.lo_name);
1022 	if (lk->lk_lock & LK_SHARE)
1023 		db_printf("SHARED (count %ju)\n",
1024 		    (uintmax_t)LK_SHARERS(lk->lk_lock));
1025 	else
1026 		db_printf("EXCL\n");
1027 	*ownerp = lockmgr_xholder(lk);
1028 
1029 	return (1);
1030 }
1031 
1032 static void
1033 db_show_lockmgr(struct lock_object *lock)
1034 {
1035 	struct thread *td;
1036 	struct lock *lk;
1037 
1038 	lk = (struct lock *)lock;
1039 
1040 	db_printf(" state: ");
1041 	if (lk->lk_lock == LK_UNLOCKED)
1042 		db_printf("UNLOCKED\n");
1043 	else if (lk->lk_lock & LK_SHARE)
1044 		db_printf("SLOCK: %ju\n", (uintmax_t)LK_SHARERS(lk->lk_lock));
1045 	else {
1046 		td = lockmgr_xholder(lk);
1047 		if (td == (struct thread *)LK_KERNPROC)
1048 			db_printf("XLOCK: LK_KERNPROC\n");
1049 		else
1050 			db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
1051 			    td->td_tid, td->td_proc->p_pid,
1052 			    td->td_proc->p_comm);
1053 		if (lockmgr_recursed(lk))
1054 			db_printf(" recursed: %d\n", lk->lk_recurse);
1055 	}
1056 	db_printf(" waiters: ");
1057 	switch (lk->lk_lock & LK_ALL_WAITERS) {
1058 	case LK_SHARED_WAITERS:
1059 		db_printf("shared\n");
1060 	case LK_EXCLUSIVE_WAITERS:
1061 		db_printf("exclusive\n");
1062 		break;
1063 	case LK_ALL_WAITERS:
1064 		db_printf("shared and exclusive\n");
1065 		break;
1066 	default:
1067 		db_printf("none\n");
1068 	}
1069 }
1070 #endif
1071