xref: /freebsd/sys/kern/kern_mutex.c (revision ee41f1b1cf5e3d4f586cb85b46123b416275862c)
1 /*-
2  * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  * 3. Berkeley Software Design Inc's name may not be used to endorse or
13  *    promote products derived from this software without specific prior
14  *    written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
29  *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
30  * $FreeBSD$
31  */
32 
33 /*
34  * Machine independent bits of mutex implementation and implementation of
35  * `witness' structure & related debugging routines.
36  */
37 
38 /*
39  *	Main Entry: witness
40  *	Pronunciation: 'wit-n&s
41  *	Function: noun
42  *	Etymology: Middle English witnesse, from Old English witnes knowledge,
43  *	    testimony, witness, from 2wit
44  *	Date: before 12th century
45  *	1 : attestation of a fact or event : TESTIMONY
46  *	2 : one that gives evidence; specifically : one who testifies in
47  *	    a cause or before a judicial tribunal
48  *	3 : one asked to be present at a transaction so as to be able to
49  *	    testify to its having taken place
50  *	4 : one who has personal knowledge of something
51  *	5 a : something serving as evidence or proof : SIGN
52  *	  b : public affirmation by word or example of usually
53  *	      religious faith or conviction <the heroic witness to divine
54  *	      life -- Pilot>
55  *	6 capitalized : a member of the Jehovah's Witnesses
56  */
57 
58 #include "opt_ddb.h"
59 #include "opt_witness.h"
60 
61 #include <sys/param.h>
62 #include <sys/bus.h>
63 #include <sys/kernel.h>
64 #include <sys/malloc.h>
65 #include <sys/proc.h>
66 #include <sys/sysctl.h>
67 #include <sys/systm.h>
68 #include <sys/vmmeter.h>
69 #include <sys/ktr.h>
70 
71 #include <machine/atomic.h>
72 #include <machine/bus.h>
73 #include <machine/clock.h>
74 #include <machine/cpu.h>
75 
76 #include <ddb/ddb.h>
77 
78 #include <vm/vm.h>
79 #include <vm/vm_extern.h>
80 
81 #include <sys/mutex.h>
82 
83 /*
84  * The WITNESS-enabled mutex debug structure.
85  */
86 #ifdef WITNESS
87 struct mtx_debug {
88 	struct witness	*mtxd_witness;
89 	LIST_ENTRY(mtx)	mtxd_held;
90 	const char	*mtxd_file;
91 	int		mtxd_line;
92 };
93 
94 #define mtx_held	mtx_debug->mtxd_held
95 #define	mtx_file	mtx_debug->mtxd_file
96 #define	mtx_line	mtx_debug->mtxd_line
97 #define	mtx_witness	mtx_debug->mtxd_witness
98 #endif	/* WITNESS */
99 
100 /*
101  * Internal utility macros.
102  */
103 #define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
104 
105 #define mtx_owner(m)	(mtx_unowned((m)) ? NULL \
106 	: (struct proc *)((m)->mtx_lock & MTX_FLAGMASK))
107 
108 #define RETIP(x)		*(((uintptr_t *)(&x)) - 1)
109 #define SET_PRIO(p, pri)	(p)->p_pri.pri_level = (pri)
110 
111 /*
112  * Early WITNESS-enabled declarations.
113  */
114 #ifdef WITNESS
115 
116 /*
117  * Internal WITNESS routines which must be prototyped early.
118  *
119  * XXX: When/if witness code is cleaned up, it would be wise to place all
120  *	witness prototyping early in this file.
121  */
122 static void witness_init(struct mtx *, int flag);
123 static void witness_destroy(struct mtx *);
124 static void witness_display(void(*)(const char *fmt, ...));
125 
126 MALLOC_DEFINE(M_WITNESS, "witness", "witness mtx_debug structure");
127 
128 /* All mutexes in system (used for debug/panic) */
129 static struct mtx_debug all_mtx_debug = { NULL, {NULL, NULL}, NULL, 0 };
130 
131 /*
132  * This global is set to 0 once it becomes safe to use the witness code.
133  */
134 static int witness_cold = 1;
135 
136 #else	/* WITNESS */
137 
138 /* XXX XXX XXX
139  * flag++ is sleazoid way of shuting up warning
140  */
141 #define witness_init(m, flag) flag++
142 #define witness_destroy(m)
143 #define witness_try_enter(m, t, f, l)
144 #endif	/* WITNESS */
145 
146 /*
147  * All mutex locks in system are kept on the all_mtx list.
148  */
149 static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, 0, "All mutexes queue head",
150 	TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
151 	{ NULL, NULL }, &all_mtx, &all_mtx,
152 #ifdef WITNESS
153 	&all_mtx_debug
154 #else
155 	NULL
156 #endif
157 	 };
158 
159 /*
160  * Global variables for book keeping.
161  */
162 static int	mtx_cur_cnt;
163 static int	mtx_max_cnt;
164 
165 /*
166  * Couple of strings for KTR_LOCK tracing in order to avoid duplicates.
167  */
168 char	STR_mtx_lock_slp[] = "GOT (sleep) %s [%p] r=%d at %s:%d";
169 char	STR_mtx_unlock_slp[] = "REL (sleep) %s [%p] r=%d at %s:%d";
170 char	STR_mtx_lock_spn[] = "GOT (spin) %s [%p] r=%d at %s:%d";
171 char	STR_mtx_unlock_spn[] = "REL (spin) %s [%p] r=%d at %s:%d";
172 
173 /*
174  * Prototypes for non-exported routines.
175  *
176  * NOTE: Prototypes for witness routines are placed at the bottom of the file.
177  */
178 static void	propagate_priority(struct proc *);
179 
180 static void
181 propagate_priority(struct proc *p)
182 {
183 	int pri = p->p_pri.pri_level;
184 	struct mtx *m = p->p_blocked;
185 
186 	mtx_assert(&sched_lock, MA_OWNED);
187 	for (;;) {
188 		struct proc *p1;
189 
190 		p = mtx_owner(m);
191 
192 		if (p == NULL) {
193 			/*
194 			 * This really isn't quite right. Really
195 			 * ought to bump priority of process that
196 			 * next acquires the mutex.
197 			 */
198 			MPASS(m->mtx_lock == MTX_CONTESTED);
199 			return;
200 		}
201 
202 		MPASS(p->p_magic == P_MAGIC);
203 		KASSERT(p->p_stat != SSLEEP, ("sleeping process owns a mutex"));
204 		if (p->p_pri.pri_level <= pri)
205 			return;
206 
207 		/*
208 		 * Bump this process' priority.
209 		 */
210 		SET_PRIO(p, pri);
211 
212 		/*
213 		 * If lock holder is actually running, just bump priority.
214 		 */
215 		if (p->p_oncpu != 0xff) {
216 			MPASS(p->p_stat == SRUN || p->p_stat == SZOMB);
217 			return;
218 		}
219 
220 		/*
221 		 * If on run queue move to new run queue, and
222 		 * quit.
223 		 */
224 		if (p->p_stat == SRUN) {
225 			MPASS(p->p_blocked == NULL);
226 			remrunqueue(p);
227 			setrunqueue(p);
228 			return;
229 		}
230 
231 		/*
232 		 * If we aren't blocked on a mutex, we should be.
233 		 */
234 		KASSERT(p->p_stat == SMTX, (
235 		    "process %d(%s):%d holds %s but isn't blocked on a mutex\n",
236 		    p->p_pid, p->p_comm, p->p_stat,
237 		    m->mtx_description));
238 
239 		/*
240 		 * Pick up the mutex that p is blocked on.
241 		 */
242 		m = p->p_blocked;
243 		MPASS(m != NULL);
244 
245 		/*
246 		 * Check if the proc needs to be moved up on
247 		 * the blocked chain
248 		 */
249 		if (p == TAILQ_FIRST(&m->mtx_blocked)) {
250 			continue;
251 		}
252 
253 		p1 = TAILQ_PREV(p, procqueue, p_procq);
254 		if (p1->p_pri.pri_level <= pri) {
255 			continue;
256 		}
257 
258 		/*
259 		 * Remove proc from blocked chain and determine where
260 		 * it should be moved up to.  Since we know that p1 has
261 		 * a lower priority than p, we know that at least one
262 		 * process in the chain has a lower priority and that
263 		 * p1 will thus not be NULL after the loop.
264 		 */
265 		TAILQ_REMOVE(&m->mtx_blocked, p, p_procq);
266 		TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) {
267 			MPASS(p1->p_magic == P_MAGIC);
268 			if (p1->p_pri.pri_level > pri)
269 				break;
270 		}
271 
272 		MPASS(p1 != NULL);
273 		TAILQ_INSERT_BEFORE(p1, p, p_procq);
274 		CTR4(KTR_LOCK,
275 		    "propagate_priority: p %p moved before %p on [%p] %s",
276 		    p, p1, m, m->mtx_description);
277 	}
278 }
279 
280 /*
281  * The important part of mtx_trylock{,_flags}()
282  * Tries to acquire lock `m.' We do NOT handle recursion here; we assume that
283  * if we're called, it's because we know we don't already own this lock.
284  */
285 int
286 _mtx_trylock(struct mtx *m, int opts, const char *file, int line)
287 {
288 	int rval;
289 
290 	MPASS(curproc != NULL);
291 
292 	/*
293 	 * _mtx_trylock does not accept MTX_NOSWITCH option.
294 	 */
295 	KASSERT((opts & MTX_NOSWITCH) == 0,
296 	    ("mtx_trylock() called with invalid option flag(s) %d", opts));
297 
298 	rval = _obtain_lock(m, curproc);
299 
300 #ifdef WITNESS
301 	if (rval && m->mtx_witness != NULL) {
302 		/*
303 		 * We do not handle recursion in _mtx_trylock; see the
304 		 * note at the top of the routine.
305 		 */
306 		KASSERT(!mtx_recursed(m),
307 		    ("mtx_trylock() called on a recursed mutex"));
308 		witness_try_enter(m, (opts | m->mtx_flags), file, line);
309 	}
310 #endif	/* WITNESS */
311 
312 	if ((opts & MTX_QUIET) == 0)
313 		CTR5(KTR_LOCK, "TRY_ENTER %s [%p] result=%d at %s:%d",
314 		    m->mtx_description, m, rval, file, line);
315 
316 	return rval;
317 }
318 
319 /*
320  * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
321  *
322  * We call this if the lock is either contested (i.e. we need to go to
323  * sleep waiting for it), or if we need to recurse on it.
324  */
325 void
326 _mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
327 {
328 	struct proc *p = curproc;
329 
330 	if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) {
331 		m->mtx_recurse++;
332 		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
333 		if ((opts & MTX_QUIET) == 0)
334 			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
335 		return;
336 	}
337 
338 	if ((opts & MTX_QUIET) == 0)
339 		CTR3(KTR_LOCK, "_mtx_lock_sleep: %p contested (lock=%p) [%p]",
340 		    m, (void *)m->mtx_lock, (void *)RETIP(m));
341 
342 	/*
343 	 * Save our priority. Even though p_nativepri is protected by
344 	 * sched_lock, we don't obtain it here as it can be expensive.
345 	 * Since this is the only place p_nativepri is set, and since two
346 	 * CPUs will not be executing the same process concurrently, we know
347 	 * that no other CPU is going to be messing with this. Also,
348 	 * p_nativepri is only read when we are blocked on a mutex, so that
349 	 * can't be happening right now either.
350 	 */
351 	p->p_pri.pri_native = p->p_pri.pri_level;
352 
353 	while (!_obtain_lock(m, p)) {
354 		uintptr_t v;
355 		struct proc *p1;
356 
357 		mtx_lock_spin(&sched_lock);
358 		/*
359 		 * Check if the lock has been released while spinning for
360 		 * the sched_lock.
361 		 */
362 		if ((v = m->mtx_lock) == MTX_UNOWNED) {
363 			mtx_unlock_spin(&sched_lock);
364 			continue;
365 		}
366 
367 		/*
368 		 * The mutex was marked contested on release. This means that
369 		 * there are processes blocked on it.
370 		 */
371 		if (v == MTX_CONTESTED) {
372 			p1 = TAILQ_FIRST(&m->mtx_blocked);
373 			MPASS(p1 != NULL);
374 			m->mtx_lock = (uintptr_t)p | MTX_CONTESTED;
375 
376 			if (p1->p_pri.pri_level < p->p_pri.pri_level)
377 				SET_PRIO(p, p1->p_pri.pri_level);
378 			mtx_unlock_spin(&sched_lock);
379 			return;
380 		}
381 
382 		/*
383 		 * If the mutex isn't already contested and a failure occurs
384 		 * setting the contested bit, the mutex was either released
385 		 * or the state of the MTX_RECURSED bit changed.
386 		 */
387 		if ((v & MTX_CONTESTED) == 0 &&
388 		    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
389 			(void *)(v | MTX_CONTESTED))) {
390 			mtx_unlock_spin(&sched_lock);
391 			continue;
392 		}
393 
394 		/*
395 		 * We deffinately must sleep for this lock.
396 		 */
397 		mtx_assert(m, MA_NOTOWNED);
398 
399 #ifdef notyet
400 		/*
401 		 * If we're borrowing an interrupted thread's VM context, we
402 		 * must clean up before going to sleep.
403 		 */
404 		if (p->p_flag & (P_ITHD | P_SITHD)) {
405 			ithd_t *it = (ithd_t *)p;
406 
407 			if (it->it_interrupted) {
408 				if ((opts & MTX_QUIET) == 0)
409 					CTR2(KTR_LOCK,
410 				    "_mtx_lock_sleep: 0x%x interrupted 0x%x",
411 					    it, it->it_interrupted);
412 				intr_thd_fixup(it);
413 			}
414 		}
415 #endif
416 
417 		/*
418 		 * Put us on the list of threads blocked on this mutex.
419 		 */
420 		if (TAILQ_EMPTY(&m->mtx_blocked)) {
421 			p1 = (struct proc *)(m->mtx_lock & MTX_FLAGMASK);
422 			LIST_INSERT_HEAD(&p1->p_contested, m, mtx_contested);
423 			TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
424 		} else {
425 			TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq)
426 				if (p1->p_pri.pri_level > p->p_pri.pri_level)
427 					break;
428 			if (p1)
429 				TAILQ_INSERT_BEFORE(p1, p, p_procq);
430 			else
431 				TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
432 		}
433 
434 		/*
435 		 * Save who we're blocked on.
436 		 */
437 		p->p_blocked = m;
438 		p->p_mtxname = m->mtx_description;
439 		p->p_stat = SMTX;
440 		propagate_priority(p);
441 
442 		if ((opts & MTX_QUIET) == 0)
443 			CTR3(KTR_LOCK,
444 			    "_mtx_lock_sleep: p %p blocked on [%p] %s", p, m,
445 			    m->mtx_description);
446 
447 		mi_switch();
448 
449 		if ((opts & MTX_QUIET) == 0)
450 			CTR3(KTR_LOCK,
451 			  "_mtx_lock_sleep: p %p free from blocked on [%p] %s",
452 			  p, m, m->mtx_description);
453 
454 		mtx_unlock_spin(&sched_lock);
455 	}
456 
457 	return;
458 }
459 
460 /*
461  * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock.
462  *
463  * This is only called if we need to actually spin for the lock. Recursion
464  * is handled inline.
465  */
466 void
467 _mtx_lock_spin(struct mtx *m, int opts, u_int mtx_intr, const char *file,
468 	       int line)
469 {
470 	int i = 0;
471 
472 	if ((opts & MTX_QUIET) == 0)
473 		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
474 
475 	for (;;) {
476 		if (_obtain_lock(m, curproc))
477 			break;
478 
479 		while (m->mtx_lock != MTX_UNOWNED) {
480 			if (i++ < 1000000)
481 				continue;
482 			if (i++ < 6000000)
483 				DELAY(1);
484 #ifdef DDB
485 			else if (!db_active)
486 #else
487 			else
488 #endif
489 			panic("spin lock %s held by %p for > 5 seconds",
490 			    m->mtx_description, (void *)m->mtx_lock);
491 		}
492 	}
493 
494 	m->mtx_saveintr = mtx_intr;
495 	if ((opts & MTX_QUIET) == 0)
496 		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
497 
498 	return;
499 }
500 
501 /*
502  * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
503  *
504  * We are only called here if the lock is recursed or contested (i.e. we
505  * need to wake up a blocked thread).
506  */
507 void
508 _mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
509 {
510 	struct proc *p, *p1;
511 	struct mtx *m1;
512 	int pri;
513 
514 	p = curproc;
515 	MPASS4(mtx_owned(m), "mtx_owned(mpp)", file, line);
516 
517 	if (mtx_recursed(m)) {
518 		if (--(m->mtx_recurse) == 0)
519 			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
520 		if ((opts & MTX_QUIET) == 0)
521 			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
522 		return;
523 	}
524 
525 	mtx_lock_spin(&sched_lock);
526 	if ((opts & MTX_QUIET) == 0)
527 		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
528 
529 	p1 = TAILQ_FIRST(&m->mtx_blocked);
530 	MPASS(p->p_magic == P_MAGIC);
531 	MPASS(p1->p_magic == P_MAGIC);
532 
533 	TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq);
534 
535 	if (TAILQ_EMPTY(&m->mtx_blocked)) {
536 		LIST_REMOVE(m, mtx_contested);
537 		_release_lock_quick(m);
538 		if ((opts & MTX_QUIET) == 0)
539 			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m);
540 	} else
541 		atomic_store_rel_ptr(&m->mtx_lock, (void *)MTX_CONTESTED);
542 
543 	pri = PRI_MAX;
544 	LIST_FOREACH(m1, &p->p_contested, mtx_contested) {
545 		int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_pri.pri_level;
546 		if (cp < pri)
547 			pri = cp;
548 	}
549 
550 	if (pri > p->p_pri.pri_native)
551 		pri = p->p_pri.pri_native;
552 	SET_PRIO(p, pri);
553 
554 	if ((opts & MTX_QUIET) == 0)
555 		CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p contested setrunqueue %p",
556 		    m, p1);
557 
558 	p1->p_blocked = NULL;
559 	p1->p_mtxname = NULL;
560 	p1->p_stat = SRUN;
561 	setrunqueue(p1);
562 
563 	if ((opts & MTX_NOSWITCH) == 0 && p1->p_pri.pri_level < pri) {
564 #ifdef notyet
565 		if (p->p_flag & (P_ITHD | P_SITHD)) {
566 			ithd_t *it = (ithd_t *)p;
567 
568 			if (it->it_interrupted) {
569 				if ((opts & MTX_QUIET) == 0)
570 					CTR2(KTR_LOCK,
571 				    "_mtx_unlock_sleep: 0x%x interrupted 0x%x",
572 					    it, it->it_interrupted);
573 				intr_thd_fixup(it);
574 			}
575 		}
576 #endif
577 		setrunqueue(p);
578 		if ((opts & MTX_QUIET) == 0)
579 			CTR2(KTR_LOCK,
580 			    "_mtx_unlock_sleep: %p switching out lock=%p", m,
581 			    (void *)m->mtx_lock);
582 
583 		mi_switch();
584 		if ((opts & MTX_QUIET) == 0)
585 			CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p",
586 			    m, (void *)m->mtx_lock);
587 	}
588 
589 	mtx_unlock_spin(&sched_lock);
590 
591 	return;
592 }
593 
594 /*
595  * All the unlocking of MTX_SPIN locks is done inline.
596  * See the _rel_spin_lock() macro for the details.
597  */
598 
599 /*
600  * The INVARIANTS-enabled mtx_assert()
601  */
602 #ifdef INVARIANTS
603 void
604 _mtx_assert(struct mtx *m, int what, const char *file, int line)
605 {
606 	switch ((what)) {
607 	case MA_OWNED:
608 	case MA_OWNED | MA_RECURSED:
609 	case MA_OWNED | MA_NOTRECURSED:
610 		if (!mtx_owned((m)))
611 			panic("mutex %s not owned at %s:%d",
612 			    (m)->mtx_description, file, line);
613 		if (mtx_recursed((m))) {
614 			if (((what) & MA_NOTRECURSED) != 0)
615 				panic("mutex %s recursed at %s:%d",
616 				    (m)->mtx_description, file, line);
617 		} else if (((what) & MA_RECURSED) != 0) {
618 			panic("mutex %s unrecursed at %s:%d",
619 			    (m)->mtx_description, file, line);
620 		}
621 		break;
622 	case MA_NOTOWNED:
623 		if (mtx_owned((m)))
624 			panic("mutex %s owned at %s:%d",
625 			    (m)->mtx_description, file, line);
626 		break;
627 	default:
628 		panic("unknown mtx_assert at %s:%d", file, line);
629 	}
630 }
631 #endif
632 
633 /*
634  * The MUTEX_DEBUG-enabled mtx_validate()
635  */
636 #define MV_DESTROY	0	/* validate before destory */
637 #define MV_INIT		1	/* validate before init */
638 
639 #ifdef MUTEX_DEBUG
640 
641 int mtx_validate __P((struct mtx *, int));
642 
643 int
644 mtx_validate(struct mtx *m, int when)
645 {
646 	struct mtx *mp;
647 	int i;
648 	int retval = 0;
649 
650 #ifdef WITNESS
651 	if (witness_cold)
652 		return 0;
653 #endif
654 	if (m == &all_mtx || cold)
655 		return 0;
656 
657 	mtx_lock(&all_mtx);
658 /*
659  * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
660  * we can re-enable the kernacc() checks.
661  */
662 #ifndef __alpha__
663 	MPASS(kernacc((caddr_t)all_mtx.mtx_next, sizeof(uintptr_t),
664 	    VM_PROT_READ) == 1);
665 #endif
666 	MPASS(all_mtx.mtx_next->mtx_prev == &all_mtx);
667 	for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
668 #ifndef __alpha__
669 		if (kernacc((caddr_t)mp->mtx_next, sizeof(uintptr_t),
670 		    VM_PROT_READ) != 1) {
671 			panic("mtx_validate: mp=%p mp->mtx_next=%p",
672 			    mp, mp->mtx_next);
673 		}
674 #endif
675 		i++;
676 		if (i > mtx_cur_cnt) {
677 			panic("mtx_validate: too many in chain, known=%d\n",
678 			    mtx_cur_cnt);
679 		}
680 	}
681 	MPASS(i == mtx_cur_cnt);
682 	switch (when) {
683 	case MV_DESTROY:
684 		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
685 			if (mp == m)
686 				break;
687 		MPASS(mp == m);
688 		break;
689 	case MV_INIT:
690 		for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
691 		if (mp == m) {
692 			/*
693 			 * Not good. This mutex already exists.
694 			 */
695 			printf("re-initing existing mutex %s\n",
696 			    m->mtx_description);
697 			MPASS(m->mtx_lock == MTX_UNOWNED);
698 			retval = 1;
699 		}
700 	}
701 	mtx_unlock(&all_mtx);
702 	return (retval);
703 }
704 #endif
705 
706 /*
707  * Mutex initialization routine; initialize lock `m' of type contained in
708  * `opts' with options contained in `opts' and description `description.'
709  * Place on "all_mtx" queue.
710  */
711 void
712 mtx_init(struct mtx *m, const char *description, int opts)
713 {
714 
715 	if ((opts & MTX_QUIET) == 0)
716 		CTR2(KTR_LOCK, "mtx_init %p (%s)", m, description);
717 
718 #ifdef MUTEX_DEBUG
719 	/* Diagnostic and error correction */
720 	if (mtx_validate(m, MV_INIT))
721 		return;
722 #endif
723 
724 	bzero((void *)m, sizeof *m);
725 	TAILQ_INIT(&m->mtx_blocked);
726 
727 #ifdef WITNESS
728 	if (!witness_cold) {
729 		m->mtx_debug = malloc(sizeof(struct mtx_debug),
730 		    M_WITNESS, M_NOWAIT | M_ZERO);
731 		MPASS(m->mtx_debug != NULL);
732 	}
733 #endif
734 
735 	m->mtx_description = description;
736 	m->mtx_flags = opts;
737 	m->mtx_lock = MTX_UNOWNED;
738 
739 	/* Put on all mutex queue */
740 	mtx_lock(&all_mtx);
741 	m->mtx_next = &all_mtx;
742 	m->mtx_prev = all_mtx.mtx_prev;
743 	m->mtx_prev->mtx_next = m;
744 	all_mtx.mtx_prev = m;
745 	if (++mtx_cur_cnt > mtx_max_cnt)
746 		mtx_max_cnt = mtx_cur_cnt;
747 	mtx_unlock(&all_mtx);
748 
749 #ifdef WITNESS
750 	if (!witness_cold)
751 		witness_init(m, opts);
752 #endif
753 }
754 
755 /*
756  * Remove lock `m' from all_mtx queue.
757  */
758 void
759 mtx_destroy(struct mtx *m)
760 {
761 
762 #ifdef WITNESS
763 	KASSERT(!witness_cold, ("%s: Cannot destroy while still cold\n",
764 	    __FUNCTION__));
765 #endif
766 
767 	CTR2(KTR_LOCK, "mtx_destroy %p (%s)", m, m->mtx_description);
768 
769 #ifdef MUTEX_DEBUG
770 	if (m->mtx_next == NULL)
771 		panic("mtx_destroy: %p (%s) already destroyed",
772 		    m, m->mtx_description);
773 
774 	if (!mtx_owned(m)) {
775 		MPASS(m->mtx_lock == MTX_UNOWNED);
776 	} else {
777 		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
778 	}
779 
780 	/* diagnostic */
781 	mtx_validate(m, MV_DESTROY);
782 #endif
783 
784 #ifdef WITNESS
785 	if (m->mtx_witness)
786 		witness_destroy(m);
787 #endif /* WITNESS */
788 
789 	/* Remove from the all mutex queue */
790 	mtx_lock(&all_mtx);
791 	m->mtx_next->mtx_prev = m->mtx_prev;
792 	m->mtx_prev->mtx_next = m->mtx_next;
793 
794 #ifdef MUTEX_DEBUG
795 	m->mtx_next = m->mtx_prev = NULL;
796 #endif
797 
798 #ifdef WITNESS
799 	free(m->mtx_debug, M_WITNESS);
800 	m->mtx_debug = NULL;
801 #endif
802 
803 	mtx_cur_cnt--;
804 	mtx_unlock(&all_mtx);
805 }
806 
807 
808 /*
809  * The WITNESS-enabled diagnostic code.
810  */
811 #ifdef WITNESS
812 static void
813 witness_fixup(void *dummy __unused)
814 {
815 	struct mtx *mp;
816 
817 	/*
818 	 * We have to release Giant before initializing its witness
819 	 * structure so that WITNESS doesn't get confused.
820 	 */
821 	mtx_unlock(&Giant);
822 	mtx_assert(&Giant, MA_NOTOWNED);
823 
824 	mtx_lock(&all_mtx);
825 
826 	/* Iterate through all mutexes and finish up mutex initialization. */
827 	for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
828 
829 		mp->mtx_debug = malloc(sizeof(struct mtx_debug),
830 		    M_WITNESS, M_NOWAIT | M_ZERO);
831 		MPASS(mp->mtx_debug != NULL);
832 
833 		witness_init(mp, mp->mtx_flags);
834 	}
835 	mtx_unlock(&all_mtx);
836 
837 	/* Mark the witness code as being ready for use. */
838 	atomic_store_rel_int(&witness_cold, 0);
839 
840 	mtx_lock(&Giant);
841 }
842 SYSINIT(wtnsfxup, SI_SUB_MUTEX, SI_ORDER_FIRST, witness_fixup, NULL)
843 
844 #define WITNESS_COUNT 200
845 #define	WITNESS_NCHILDREN 2
846 
847 int witness_watch = 1;
848 
849 struct witness {
850 	struct witness	*w_next;
851 	const char	*w_description;
852 	const char	*w_file;
853 	int		 w_line;
854 	struct witness	*w_morechildren;
855 	u_char		 w_childcnt;
856 	u_char		 w_Giant_squawked:1;
857 	u_char		 w_other_squawked:1;
858 	u_char		 w_same_squawked:1;
859 	u_char		 w_spin:1;	/* MTX_SPIN type mutex. */
860 	u_int		 w_level;
861 	struct witness	*w_children[WITNESS_NCHILDREN];
862 };
863 
864 struct witness_blessed {
865 	char 	*b_lock1;
866 	char	*b_lock2;
867 };
868 
869 #ifdef DDB
870 /*
871  * When DDB is enabled and witness_ddb is set to 1, it will cause the system to
872  * drop into kdebug() when:
873  *	- a lock heirarchy violation occurs
874  *	- locks are held when going to sleep.
875  */
876 int	witness_ddb;
877 #ifdef WITNESS_DDB
878 TUNABLE_INT_DECL("debug.witness_ddb", 1, witness_ddb);
879 #else
880 TUNABLE_INT_DECL("debug.witness_ddb", 0, witness_ddb);
881 #endif
882 SYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, "");
883 #endif /* DDB */
884 
885 int	witness_skipspin;
886 #ifdef WITNESS_SKIPSPIN
887 TUNABLE_INT_DECL("debug.witness_skipspin", 1, witness_skipspin);
888 #else
889 TUNABLE_INT_DECL("debug.witness_skipspin", 0, witness_skipspin);
890 #endif
891 SYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0,
892     "");
893 
894 /*
895  * Witness-enabled globals
896  */
897 static struct mtx	w_mtx;
898 static struct witness	*w_free;
899 static struct witness	*w_all;
900 static int		 w_inited;
901 static int		 witness_dead;	/* fatal error, probably no memory */
902 
903 static struct witness	 w_data[WITNESS_COUNT];
904 
905 /*
906  * Internal witness routine prototypes
907  */
908 static struct witness *enroll(const char *description, int flag);
909 static int itismychild(struct witness *parent, struct witness *child);
910 static void removechild(struct witness *parent, struct witness *child);
911 static int isitmychild(struct witness *parent, struct witness *child);
912 static int isitmydescendant(struct witness *parent, struct witness *child);
913 static int dup_ok(struct witness *);
914 static int blessed(struct witness *, struct witness *);
915 static void
916     witness_displaydescendants(void(*)(const char *fmt, ...), struct witness *);
917 static void witness_leveldescendents(struct witness *parent, int level);
918 static void witness_levelall(void);
919 static struct witness * witness_get(void);
920 static void witness_free(struct witness *m);
921 
922 static char *ignore_list[] = {
923 	"witness lock",
924 	NULL
925 };
926 
927 static char *spin_order_list[] = {
928 #if defined(__i386__) && defined (SMP)
929 	"com",
930 #endif
931 	"sio",
932 #ifdef __i386__
933 	"cy",
934 #endif
935 	"sched lock",
936 #ifdef __i386__
937 	"clk",
938 #endif
939 	"callout",
940 	/*
941 	 * leaf locks
942 	 */
943 	"ithread table lock",
944 	"ithread list lock",
945 #ifdef SMP
946 #ifdef __i386__
947 	"ap boot",
948 	"imen",
949 #endif
950 	"smp rendezvous",
951 #endif
952 	NULL
953 };
954 
955 static char *order_list[] = {
956 	"Giant", "proctree", "allproc", "process lock", "uidinfo hash",
957 	    "uidinfo struct", NULL,
958 	NULL
959 };
960 
961 static char *dup_list[] = {
962 	NULL
963 };
964 
965 static char *sleep_list[] = {
966 	"Giant",
967 	NULL
968 };
969 
970 /*
971  * Pairs of locks which have been blessed
972  * Don't complain about order problems with blessed locks
973  */
974 static struct witness_blessed blessed_list[] = {
975 };
976 static int blessed_count =
977 	sizeof(blessed_list) / sizeof(struct witness_blessed);
978 
979 static void
980 witness_init(struct mtx *m, int flag)
981 {
982 	m->mtx_witness = enroll(m->mtx_description, flag);
983 }
984 
985 static void
986 witness_destroy(struct mtx *m)
987 {
988 	struct mtx *m1;
989 	struct proc *p;
990 	p = curproc;
991 	LIST_FOREACH(m1, &p->p_heldmtx, mtx_held) {
992 		if (m1 == m) {
993 			LIST_REMOVE(m, mtx_held);
994 			break;
995 		}
996 	}
997 	return;
998 
999 }
1000 
1001 static void
1002 witness_display(void(*prnt)(const char *fmt, ...))
1003 {
1004 	struct witness *w, *w1;
1005 	int level, found;
1006 
1007 	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1008 	witness_levelall();
1009 
1010 	/*
1011 	 * First, handle sleep mutexes which have been acquired at least
1012 	 * once.
1013 	 */
1014 	prnt("Sleep mutexes:\n");
1015 	for (w = w_all; w; w = w->w_next) {
1016 		if (w->w_file == NULL || w->w_spin)
1017 			continue;
1018 		for (w1 = w_all; w1; w1 = w1->w_next) {
1019 			if (isitmychild(w1, w))
1020 				break;
1021 		}
1022 		if (w1 != NULL)
1023 			continue;
1024 		/*
1025 		 * This lock has no anscestors, display its descendants.
1026 		 */
1027 		witness_displaydescendants(prnt, w);
1028 	}
1029 
1030 	/*
1031 	 * Now do spin mutexes which have been acquired at least once.
1032 	 */
1033 	prnt("\nSpin mutexes:\n");
1034 	level = 0;
1035 	while (level < sizeof(spin_order_list) / sizeof(char *)) {
1036 		found = 0;
1037 		for (w = w_all; w; w = w->w_next) {
1038 			if (w->w_file == NULL || !w->w_spin)
1039 				continue;
1040 			if (w->w_level == 1 << level) {
1041 				witness_displaydescendants(prnt, w);
1042 				level++;
1043 				found = 1;
1044 			}
1045 		}
1046 		if (found == 0)
1047 			level++;
1048 	}
1049 
1050 	/*
1051 	 * Finally, any mutexes which have not been acquired yet.
1052 	 */
1053 	prnt("\nMutexes which were never acquired:\n");
1054 	for (w = w_all; w; w = w->w_next) {
1055 		if (w->w_file != NULL)
1056 			continue;
1057 		prnt("%s\n", w->w_description);
1058 	}
1059 }
1060 
1061 void
1062 witness_enter(struct mtx *m, int flags, const char *file, int line)
1063 {
1064 	struct witness *w, *w1;
1065 	struct mtx *m1;
1066 	struct proc *p;
1067 	int i;
1068 #ifdef DDB
1069 	int go_into_ddb = 0;
1070 #endif /* DDB */
1071 
1072 	if (witness_cold || m->mtx_witness == NULL || panicstr)
1073 		return;
1074 	w = m->mtx_witness;
1075 	p = curproc;
1076 
1077 	if (flags & MTX_SPIN) {
1078 		if ((m->mtx_flags & MTX_SPIN) == 0)
1079 			panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @"
1080 			    " %s:%d", m->mtx_description, file, line);
1081 		if (mtx_recursed(m)) {
1082 			if ((m->mtx_flags & MTX_RECURSE) == 0)
1083 				panic("mutex_enter: recursion on non-recursive"
1084 				    " mutex %s @ %s:%d", m->mtx_description,
1085 				    file, line);
1086 			return;
1087 		}
1088 		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1089 		i = PCPU_GET(witness_spin_check);
1090 		if (i != 0 && w->w_level < i) {
1091 			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1092 			panic("mutex_enter(%s:%x, MTX_SPIN) out of order @"
1093 			    " %s:%d already holding %s:%x",
1094 			    m->mtx_description, w->w_level, file, line,
1095 			    spin_order_list[ffs(i)-1], i);
1096 		}
1097 		PCPU_SET(witness_spin_check, i | w->w_level);
1098 		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1099 		w->w_file = file;
1100 		w->w_line = line;
1101 		m->mtx_line = line;
1102 		m->mtx_file = file;
1103 		return;
1104 	}
1105 	if ((m->mtx_flags & MTX_SPIN) != 0)
1106 		panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1107 		    m->mtx_description, file, line);
1108 
1109 	if (mtx_recursed(m)) {
1110 		if ((m->mtx_flags & MTX_RECURSE) == 0)
1111 			panic("mutex_enter: recursion on non-recursive"
1112 			    " mutex %s @ %s:%d", m->mtx_description,
1113 			    file, line);
1114 		return;
1115 	}
1116 	if (witness_dead)
1117 		goto out;
1118 	if (cold)
1119 		goto out;
1120 
1121 	if (!mtx_legal2block())
1122 		panic("blockable mtx_lock() of %s when not legal @ %s:%d",
1123 			    m->mtx_description, file, line);
1124 	/*
1125 	 * Is this the first mutex acquired
1126 	 */
1127 	if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL)
1128 		goto out;
1129 
1130 	if ((w1 = m1->mtx_witness) == w) {
1131 		if (w->w_same_squawked || dup_ok(w))
1132 			goto out;
1133 		w->w_same_squawked = 1;
1134 		printf("acquring duplicate lock of same type: \"%s\"\n",
1135 			m->mtx_description);
1136 		printf(" 1st @ %s:%d\n", w->w_file, w->w_line);
1137 		printf(" 2nd @ %s:%d\n", file, line);
1138 #ifdef DDB
1139 		go_into_ddb = 1;
1140 #endif /* DDB */
1141 		goto out;
1142 	}
1143 	MPASS(!mtx_owned(&w_mtx));
1144 	mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1145 	/*
1146 	 * If we have a known higher number just say ok
1147 	 */
1148 	if (witness_watch > 1 && w->w_level > w1->w_level) {
1149 		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1150 		goto out;
1151 	}
1152 	if (isitmydescendant(m1->mtx_witness, w)) {
1153 		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1154 		goto out;
1155 	}
1156 	for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) {
1157 
1158 		MPASS(i < 200);
1159 		w1 = m1->mtx_witness;
1160 		if (isitmydescendant(w, w1)) {
1161 			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1162 			if (blessed(w, w1))
1163 				goto out;
1164 			if (m1 == &Giant) {
1165 				if (w1->w_Giant_squawked)
1166 					goto out;
1167 				else
1168 					w1->w_Giant_squawked = 1;
1169 			} else {
1170 				if (w1->w_other_squawked)
1171 					goto out;
1172 				else
1173 					w1->w_other_squawked = 1;
1174 			}
1175 			printf("lock order reversal\n");
1176 			printf(" 1st %s last acquired @ %s:%d\n",
1177 			    w->w_description, w->w_file, w->w_line);
1178 			printf(" 2nd %p %s @ %s:%d\n",
1179 			    m1, w1->w_description, w1->w_file, w1->w_line);
1180 			printf(" 3rd %p %s @ %s:%d\n",
1181 			    m, w->w_description, file, line);
1182 #ifdef DDB
1183 			go_into_ddb = 1;
1184 #endif /* DDB */
1185 			goto out;
1186 		}
1187 	}
1188 	m1 = LIST_FIRST(&p->p_heldmtx);
1189 	if (!itismychild(m1->mtx_witness, w))
1190 		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1191 
1192 out:
1193 #ifdef DDB
1194 	if (witness_ddb && go_into_ddb)
1195 		Debugger("witness_enter");
1196 #endif /* DDB */
1197 	w->w_file = file;
1198 	w->w_line = line;
1199 	m->mtx_line = line;
1200 	m->mtx_file = file;
1201 
1202 	/*
1203 	 * If this pays off it likely means that a mutex being witnessed
1204 	 * is acquired in hardclock. Put it in the ignore list. It is
1205 	 * likely not the mutex this assert fails on.
1206 	 */
1207 	MPASS(m->mtx_held.le_prev == NULL);
1208 	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
1209 }
1210 
1211 void
1212 witness_try_enter(struct mtx *m, int flags, const char *file, int line)
1213 {
1214 	struct proc *p;
1215 	struct witness *w = m->mtx_witness;
1216 
1217 	if (witness_cold)
1218 		return;
1219 	if (panicstr)
1220 		return;
1221 	if (flags & MTX_SPIN) {
1222 		if ((m->mtx_flags & MTX_SPIN) == 0)
1223 			panic("mutex_try_enter: "
1224 			    "MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
1225 			    m->mtx_description, file, line);
1226 		if (mtx_recursed(m)) {
1227 			if ((m->mtx_flags & MTX_RECURSE) == 0)
1228 				panic("mutex_try_enter: recursion on"
1229 				    " non-recursive mutex %s @ %s:%d",
1230 				    m->mtx_description, file, line);
1231 			return;
1232 		}
1233 		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1234 		PCPU_SET(witness_spin_check,
1235 		    PCPU_GET(witness_spin_check) | w->w_level);
1236 		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1237 		w->w_file = file;
1238 		w->w_line = line;
1239 		m->mtx_line = line;
1240 		m->mtx_file = file;
1241 		return;
1242 	}
1243 
1244 	if ((m->mtx_flags & MTX_SPIN) != 0)
1245 		panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1246 		    m->mtx_description, file, line);
1247 
1248 	if (mtx_recursed(m)) {
1249 		if ((m->mtx_flags & MTX_RECURSE) == 0)
1250 			panic("mutex_try_enter: recursion on non-recursive"
1251 			    " mutex %s @ %s:%d", m->mtx_description, file,
1252 			    line);
1253 		return;
1254 	}
1255 	w->w_file = file;
1256 	w->w_line = line;
1257 	m->mtx_line = line;
1258 	m->mtx_file = file;
1259 	p = curproc;
1260 	MPASS(m->mtx_held.le_prev == NULL);
1261 	LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
1262 }
1263 
1264 void
1265 witness_exit(struct mtx *m, int flags, const char *file, int line)
1266 {
1267 	struct witness *w;
1268 
1269 	if (witness_cold || m->mtx_witness == NULL || panicstr)
1270 		return;
1271 	w = m->mtx_witness;
1272 
1273 	if (flags & MTX_SPIN) {
1274 		if ((m->mtx_flags & MTX_SPIN) == 0)
1275 			panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @"
1276 			    " %s:%d", m->mtx_description, file, line);
1277 		if (mtx_recursed(m)) {
1278 			if ((m->mtx_flags & MTX_RECURSE) == 0)
1279 				panic("mutex_exit: recursion on non-recursive"
1280 				    " mutex %s @ %s:%d", m->mtx_description,
1281 				    file, line);
1282 			return;
1283 		}
1284 		mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1285 		PCPU_SET(witness_spin_check,
1286 		    PCPU_GET(witness_spin_check) & ~w->w_level);
1287 		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1288 		return;
1289 	}
1290 	if ((m->mtx_flags & MTX_SPIN) != 0)
1291 		panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
1292 		    m->mtx_description, file, line);
1293 
1294 	if (mtx_recursed(m)) {
1295 		if ((m->mtx_flags & MTX_RECURSE) == 0)
1296 			panic("mutex_exit: recursion on non-recursive"
1297 			    " mutex %s @ %s:%d", m->mtx_description,
1298 			    file, line);
1299 		return;
1300 	}
1301 
1302 	if ((flags & MTX_NOSWITCH) == 0 && !mtx_legal2block() && !cold)
1303 		panic("switchable mtx_unlock() of %s when not legal @ %s:%d",
1304 			    m->mtx_description, file, line);
1305 	LIST_REMOVE(m, mtx_held);
1306 	m->mtx_held.le_prev = NULL;
1307 }
1308 
1309 int
1310 witness_sleep(int check_only, struct mtx *mtx, const char *file, int line)
1311 {
1312 	struct mtx *m;
1313 	struct proc *p;
1314 	char **sleep;
1315 	int n = 0;
1316 
1317 	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1318 	p = curproc;
1319 	LIST_FOREACH(m, &p->p_heldmtx, mtx_held) {
1320 		if (m == mtx)
1321 			continue;
1322 		for (sleep = sleep_list; *sleep!= NULL; sleep++)
1323 			if (strcmp(m->mtx_description, *sleep) == 0)
1324 				goto next;
1325 		if (n == 0)
1326 			printf("Whee!\n");
1327 		printf("%s:%d: %s with \"%s\" locked from %s:%d\n",
1328 			file, line, check_only ? "could sleep" : "sleeping",
1329 			m->mtx_description,
1330 			m->mtx_witness->w_file, m->mtx_witness->w_line);
1331 		n++;
1332 	next:
1333 	}
1334 #ifdef DDB
1335 	if (witness_ddb && n)
1336 		Debugger("witness_sleep");
1337 #endif /* DDB */
1338 	return (n);
1339 }
1340 
1341 static struct witness *
1342 enroll(const char *description, int flag)
1343 {
1344 	int i;
1345 	struct witness *w, *w1;
1346 	char **ignore;
1347 	char **order;
1348 
1349 	if (!witness_watch)
1350 		return (NULL);
1351 	for (ignore = ignore_list; *ignore != NULL; ignore++)
1352 		if (strcmp(description, *ignore) == 0)
1353 			return (NULL);
1354 
1355 	if (w_inited == 0) {
1356 		mtx_init(&w_mtx, "witness lock", MTX_SPIN);
1357 		for (i = 0; i < WITNESS_COUNT; i++) {
1358 			w = &w_data[i];
1359 			witness_free(w);
1360 		}
1361 		w_inited = 1;
1362 		for (order = order_list; *order != NULL; order++) {
1363 			w = enroll(*order, MTX_DEF);
1364 			w->w_file = "order list";
1365 			for (order++; *order != NULL; order++) {
1366 				w1 = enroll(*order, MTX_DEF);
1367 				w1->w_file = "order list";
1368 				itismychild(w, w1);
1369 				w = w1;
1370     	    	    	}
1371 		}
1372 	}
1373 	if ((flag & MTX_SPIN) && witness_skipspin)
1374 		return (NULL);
1375 	mtx_lock_spin_flags(&w_mtx, MTX_QUIET);
1376 	for (w = w_all; w; w = w->w_next) {
1377 		if (strcmp(description, w->w_description) == 0) {
1378 			mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1379 			return (w);
1380 		}
1381 	}
1382 	if ((w = witness_get()) == NULL)
1383 		return (NULL);
1384 	w->w_next = w_all;
1385 	w_all = w;
1386 	w->w_description = description;
1387 	mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1388 	if (flag & MTX_SPIN) {
1389 		w->w_spin = 1;
1390 
1391 		i = 1;
1392 		for (order = spin_order_list; *order != NULL; order++) {
1393 			if (strcmp(description, *order) == 0)
1394 				break;
1395 			i <<= 1;
1396 		}
1397 		if (*order == NULL)
1398 			panic("spin lock %s not in order list", description);
1399 		w->w_level = i;
1400 	}
1401 
1402 	return (w);
1403 }
1404 
1405 static int
1406 itismychild(struct witness *parent, struct witness *child)
1407 {
1408 	static int recursed;
1409 
1410 	/*
1411 	 * Insert "child" after "parent"
1412 	 */
1413 	while (parent->w_morechildren)
1414 		parent = parent->w_morechildren;
1415 
1416 	if (parent->w_childcnt == WITNESS_NCHILDREN) {
1417 		if ((parent->w_morechildren = witness_get()) == NULL)
1418 			return (1);
1419 		parent = parent->w_morechildren;
1420 	}
1421 	MPASS(child != NULL);
1422 	parent->w_children[parent->w_childcnt++] = child;
1423 	/*
1424 	 * now prune whole tree
1425 	 */
1426 	if (recursed)
1427 		return (0);
1428 	recursed = 1;
1429 	for (child = w_all; child != NULL; child = child->w_next) {
1430 		for (parent = w_all; parent != NULL;
1431 		    parent = parent->w_next) {
1432 			if (!isitmychild(parent, child))
1433 				continue;
1434 			removechild(parent, child);
1435 			if (isitmydescendant(parent, child))
1436 				continue;
1437 			itismychild(parent, child);
1438 		}
1439 	}
1440 	recursed = 0;
1441 	witness_levelall();
1442 	return (0);
1443 }
1444 
1445 static void
1446 removechild(struct witness *parent, struct witness *child)
1447 {
1448 	struct witness *w, *w1;
1449 	int i;
1450 
1451 	for (w = parent; w != NULL; w = w->w_morechildren)
1452 		for (i = 0; i < w->w_childcnt; i++)
1453 			if (w->w_children[i] == child)
1454 				goto found;
1455 	return;
1456 found:
1457 	for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren)
1458 		continue;
1459 	w->w_children[i] = w1->w_children[--w1->w_childcnt];
1460 	MPASS(w->w_children[i] != NULL);
1461 
1462 	if (w1->w_childcnt != 0)
1463 		return;
1464 
1465 	if (w1 == parent)
1466 		return;
1467 	for (w = parent; w->w_morechildren != w1; w = w->w_morechildren)
1468 		continue;
1469 	w->w_morechildren = 0;
1470 	witness_free(w1);
1471 }
1472 
1473 static int
1474 isitmychild(struct witness *parent, struct witness *child)
1475 {
1476 	struct witness *w;
1477 	int i;
1478 
1479 	for (w = parent; w != NULL; w = w->w_morechildren) {
1480 		for (i = 0; i < w->w_childcnt; i++) {
1481 			if (w->w_children[i] == child)
1482 				return (1);
1483 		}
1484 	}
1485 	return (0);
1486 }
1487 
1488 static int
1489 isitmydescendant(struct witness *parent, struct witness *child)
1490 {
1491 	struct witness *w;
1492 	int i;
1493 	int j;
1494 
1495 	for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) {
1496 		MPASS(j < 1000);
1497 		for (i = 0; i < w->w_childcnt; i++) {
1498 			if (w->w_children[i] == child)
1499 				return (1);
1500 		}
1501 		for (i = 0; i < w->w_childcnt; i++) {
1502 			if (isitmydescendant(w->w_children[i], child))
1503 				return (1);
1504 		}
1505 	}
1506 	return (0);
1507 }
1508 
1509 void
1510 witness_levelall (void)
1511 {
1512 	struct witness *w, *w1;
1513 
1514 	for (w = w_all; w; w = w->w_next)
1515 		if (!(w->w_spin))
1516 			w->w_level = 0;
1517 	for (w = w_all; w; w = w->w_next) {
1518 		if (w->w_spin)
1519 			continue;
1520 		for (w1 = w_all; w1; w1 = w1->w_next) {
1521 			if (isitmychild(w1, w))
1522 				break;
1523 		}
1524 		if (w1 != NULL)
1525 			continue;
1526 		witness_leveldescendents(w, 0);
1527 	}
1528 }
1529 
1530 static void
1531 witness_leveldescendents(struct witness *parent, int level)
1532 {
1533 	int i;
1534 	struct witness *w;
1535 
1536 	if (parent->w_level < level)
1537 		parent->w_level = level;
1538 	level++;
1539 	for (w = parent; w != NULL; w = w->w_morechildren)
1540 		for (i = 0; i < w->w_childcnt; i++)
1541 			witness_leveldescendents(w->w_children[i], level);
1542 }
1543 
1544 static void
1545 witness_displaydescendants(void(*prnt)(const char *fmt, ...),
1546 			   struct witness *parent)
1547 {
1548 	struct witness *w;
1549 	int i;
1550 	int level;
1551 
1552 	level = parent->w_spin ? ffs(parent->w_level) : parent->w_level;
1553 
1554 	prnt("%d", level);
1555 	if (level < 10)
1556 		prnt(" ");
1557 	for (i = 0; i < level; i++)
1558 		prnt(" ");
1559 	prnt("%s", parent->w_description);
1560 	if (parent->w_file != NULL)
1561 		prnt(" -- last acquired @ %s:%d\n", parent->w_file,
1562 		    parent->w_line);
1563 
1564 	for (w = parent; w != NULL; w = w->w_morechildren)
1565 		for (i = 0; i < w->w_childcnt; i++)
1566 			    witness_displaydescendants(prnt, w->w_children[i]);
1567     }
1568 
1569 static int
1570 dup_ok(struct witness *w)
1571 {
1572 	char **dup;
1573 
1574 	for (dup = dup_list; *dup!= NULL; dup++)
1575 		if (strcmp(w->w_description, *dup) == 0)
1576 			return (1);
1577 	return (0);
1578 }
1579 
1580 static int
1581 blessed(struct witness *w1, struct witness *w2)
1582 {
1583 	int i;
1584 	struct witness_blessed *b;
1585 
1586 	for (i = 0; i < blessed_count; i++) {
1587 		b = &blessed_list[i];
1588 		if (strcmp(w1->w_description, b->b_lock1) == 0) {
1589 			if (strcmp(w2->w_description, b->b_lock2) == 0)
1590 				return (1);
1591 			continue;
1592 		}
1593 		if (strcmp(w1->w_description, b->b_lock2) == 0)
1594 			if (strcmp(w2->w_description, b->b_lock1) == 0)
1595 				return (1);
1596 	}
1597 	return (0);
1598 }
1599 
1600 static struct witness *
1601 witness_get()
1602 {
1603 	struct witness *w;
1604 
1605 	if ((w = w_free) == NULL) {
1606 		witness_dead = 1;
1607 		mtx_unlock_spin_flags(&w_mtx, MTX_QUIET);
1608 		printf("witness exhausted\n");
1609 		return (NULL);
1610 	}
1611 	w_free = w->w_next;
1612 	bzero(w, sizeof(*w));
1613 	return (w);
1614 }
1615 
1616 static void
1617 witness_free(struct witness *w)
1618 {
1619 	w->w_next = w_free;
1620 	w_free = w;
1621 }
1622 
1623 int
1624 witness_list(struct proc *p)
1625 {
1626 	struct mtx *m;
1627 	int nheld;
1628 
1629 	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1630 	nheld = 0;
1631 	LIST_FOREACH(m, &p->p_heldmtx, mtx_held) {
1632 		printf("\t\"%s\" (%p) locked at %s:%d\n",
1633 		    m->mtx_description, m,
1634 		    m->mtx_witness->w_file, m->mtx_witness->w_line);
1635 		nheld++;
1636 	}
1637 
1638 	return (nheld);
1639 }
1640 
1641 #ifdef DDB
1642 
1643 DB_SHOW_COMMAND(mutexes, db_witness_list)
1644 {
1645 
1646 	witness_list(curproc);
1647 }
1648 
1649 DB_SHOW_COMMAND(witness, db_witness_display)
1650 {
1651 
1652 	witness_display(db_printf);
1653 }
1654 #endif
1655 
1656 void
1657 witness_save(struct mtx *m, const char **filep, int *linep)
1658 {
1659 
1660 	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1661 	if (m->mtx_witness == NULL)
1662 		return;
1663 
1664 	*filep = m->mtx_witness->w_file;
1665 	*linep = m->mtx_witness->w_line;
1666 }
1667 
1668 void
1669 witness_restore(struct mtx *m, const char *file, int line)
1670 {
1671 
1672 	KASSERT(!witness_cold, ("%s: witness_cold\n", __FUNCTION__));
1673 	if (m->mtx_witness == NULL)
1674 		return;
1675 
1676 	m->mtx_witness->w_file = file;
1677 	m->mtx_witness->w_line = line;
1678 }
1679 
1680 #endif	/* WITNESS */
1681