xref: /freebsd/sys/kern/kern_mutex.c (revision 9fc7a59f2a9baa13d62a86a10d97652ca06caa5f)
1 /*-
2  * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  * 3. Berkeley Software Design Inc's name may not be used to endorse or
13  *    promote products derived from this software without specific prior
14  *    written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
29  *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
30  */
31 
32 /*
33  * Machine independent bits of mutex implementation.
34  */
35 
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38 
39 #include "opt_adaptive_mutexes.h"
40 #include "opt_ddb.h"
41 #include "opt_hwpmc_hooks.h"
42 #include "opt_sched.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/bus.h>
47 #include <sys/conf.h>
48 #include <sys/kdb.h>
49 #include <sys/kernel.h>
50 #include <sys/ktr.h>
51 #include <sys/lock.h>
52 #include <sys/malloc.h>
53 #include <sys/mutex.h>
54 #include <sys/proc.h>
55 #include <sys/resourcevar.h>
56 #include <sys/sched.h>
57 #include <sys/sbuf.h>
58 #include <sys/smp.h>
59 #include <sys/sysctl.h>
60 #include <sys/turnstile.h>
61 #include <sys/vmmeter.h>
62 #include <sys/lock_profile.h>
63 
64 #include <machine/atomic.h>
65 #include <machine/bus.h>
66 #include <machine/cpu.h>
67 
68 #include <ddb/ddb.h>
69 
70 #include <fs/devfs/devfs_int.h>
71 
72 #include <vm/vm.h>
73 #include <vm/vm_extern.h>
74 
75 #if defined(SMP) && !defined(NO_ADAPTIVE_MUTEXES)
76 #define	ADAPTIVE_MUTEXES
77 #endif
78 
79 #ifdef HWPMC_HOOKS
80 #include <sys/pmckern.h>
81 PMC_SOFT_DEFINE( , , lock, failed);
82 #endif
83 
84 /*
85  * Return the mutex address when the lock cookie address is provided.
86  * This functionality assumes that struct mtx* have a member named mtx_lock.
87  */
88 #define	mtxlock2mtx(c)	(__containerof(c, struct mtx, mtx_lock))
89 
90 /*
91  * Internal utility macros.
92  */
93 #define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
94 
95 #define	mtx_destroyed(m) ((m)->mtx_lock == MTX_DESTROYED)
96 
97 static void	assert_mtx(const struct lock_object *lock, int what);
98 #ifdef DDB
99 static void	db_show_mtx(const struct lock_object *lock);
100 #endif
101 static void	lock_mtx(struct lock_object *lock, uintptr_t how);
102 static void	lock_spin(struct lock_object *lock, uintptr_t how);
103 #ifdef KDTRACE_HOOKS
104 static int	owner_mtx(const struct lock_object *lock,
105 		    struct thread **owner);
106 #endif
107 static uintptr_t unlock_mtx(struct lock_object *lock);
108 static uintptr_t unlock_spin(struct lock_object *lock);
109 
110 /*
111  * Lock classes for sleep and spin mutexes.
112  */
113 struct lock_class lock_class_mtx_sleep = {
114 	.lc_name = "sleep mutex",
115 	.lc_flags = LC_SLEEPLOCK | LC_RECURSABLE,
116 	.lc_assert = assert_mtx,
117 #ifdef DDB
118 	.lc_ddb_show = db_show_mtx,
119 #endif
120 	.lc_lock = lock_mtx,
121 	.lc_unlock = unlock_mtx,
122 #ifdef KDTRACE_HOOKS
123 	.lc_owner = owner_mtx,
124 #endif
125 };
126 struct lock_class lock_class_mtx_spin = {
127 	.lc_name = "spin mutex",
128 	.lc_flags = LC_SPINLOCK | LC_RECURSABLE,
129 	.lc_assert = assert_mtx,
130 #ifdef DDB
131 	.lc_ddb_show = db_show_mtx,
132 #endif
133 	.lc_lock = lock_spin,
134 	.lc_unlock = unlock_spin,
135 #ifdef KDTRACE_HOOKS
136 	.lc_owner = owner_mtx,
137 #endif
138 };
139 
140 #ifdef ADAPTIVE_MUTEXES
141 static SYSCTL_NODE(_debug, OID_AUTO, mtx, CTLFLAG_RD, NULL, "mtx debugging");
142 
143 static struct lock_delay_config __read_mostly mtx_delay;
144 
145 SYSCTL_INT(_debug_mtx, OID_AUTO, delay_base, CTLFLAG_RW, &mtx_delay.base,
146     0, "");
147 SYSCTL_INT(_debug_mtx, OID_AUTO, delay_max, CTLFLAG_RW, &mtx_delay.max,
148     0, "");
149 
150 LOCK_DELAY_SYSINIT_DEFAULT(mtx_delay);
151 #endif
152 
153 static SYSCTL_NODE(_debug, OID_AUTO, mtx_spin, CTLFLAG_RD, NULL,
154     "mtx spin debugging");
155 
156 static struct lock_delay_config __read_mostly mtx_spin_delay;
157 
158 SYSCTL_INT(_debug_mtx_spin, OID_AUTO, delay_base, CTLFLAG_RW,
159     &mtx_spin_delay.base, 0, "");
160 SYSCTL_INT(_debug_mtx_spin, OID_AUTO, delay_max, CTLFLAG_RW,
161     &mtx_spin_delay.max, 0, "");
162 
163 LOCK_DELAY_SYSINIT_DEFAULT(mtx_spin_delay);
164 
165 /*
166  * System-wide mutexes
167  */
168 struct mtx blocked_lock;
169 struct mtx Giant;
170 
171 void
172 assert_mtx(const struct lock_object *lock, int what)
173 {
174 
175 	mtx_assert((const struct mtx *)lock, what);
176 }
177 
178 void
179 lock_mtx(struct lock_object *lock, uintptr_t how)
180 {
181 
182 	mtx_lock((struct mtx *)lock);
183 }
184 
185 void
186 lock_spin(struct lock_object *lock, uintptr_t how)
187 {
188 
189 	panic("spin locks can only use msleep_spin");
190 }
191 
192 uintptr_t
193 unlock_mtx(struct lock_object *lock)
194 {
195 	struct mtx *m;
196 
197 	m = (struct mtx *)lock;
198 	mtx_assert(m, MA_OWNED | MA_NOTRECURSED);
199 	mtx_unlock(m);
200 	return (0);
201 }
202 
203 uintptr_t
204 unlock_spin(struct lock_object *lock)
205 {
206 
207 	panic("spin locks can only use msleep_spin");
208 }
209 
210 #ifdef KDTRACE_HOOKS
211 int
212 owner_mtx(const struct lock_object *lock, struct thread **owner)
213 {
214 	const struct mtx *m;
215 	uintptr_t x;
216 
217 	m = (const struct mtx *)lock;
218 	x = m->mtx_lock;
219 	*owner = (struct thread *)(x & ~MTX_FLAGMASK);
220 	return (x != MTX_UNOWNED);
221 }
222 #endif
223 
224 /*
225  * Function versions of the inlined __mtx_* macros.  These are used by
226  * modules and can also be called from assembly language if needed.
227  */
228 void
229 __mtx_lock_flags(volatile uintptr_t *c, int opts, const char *file, int line)
230 {
231 	struct mtx *m;
232 	uintptr_t tid, v;
233 
234 	if (SCHEDULER_STOPPED())
235 		return;
236 
237 	m = mtxlock2mtx(c);
238 
239 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
240 	    ("mtx_lock() by idle thread %p on sleep mutex %s @ %s:%d",
241 	    curthread, m->lock_object.lo_name, file, line));
242 	KASSERT(m->mtx_lock != MTX_DESTROYED,
243 	    ("mtx_lock() of destroyed mutex @ %s:%d", file, line));
244 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
245 	    ("mtx_lock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
246 	    file, line));
247 	WITNESS_CHECKORDER(&m->lock_object, (opts & ~MTX_RECURSE) |
248 	    LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL);
249 
250 	tid = (uintptr_t)curthread;
251 	v = MTX_UNOWNED;
252 	if (!_mtx_obtain_lock_fetch(m, &v, tid))
253 		_mtx_lock_sleep(m, v, tid, opts, file, line);
254 	else
255 		LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire,
256 		    m, 0, 0, file, line);
257 	LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
258 	    line);
259 	WITNESS_LOCK(&m->lock_object, (opts & ~MTX_RECURSE) | LOP_EXCLUSIVE,
260 	    file, line);
261 	TD_LOCKS_INC(curthread);
262 }
263 
264 void
265 __mtx_unlock_flags(volatile uintptr_t *c, int opts, const char *file, int line)
266 {
267 	struct mtx *m;
268 
269 	if (SCHEDULER_STOPPED())
270 		return;
271 
272 	m = mtxlock2mtx(c);
273 
274 	KASSERT(m->mtx_lock != MTX_DESTROYED,
275 	    ("mtx_unlock() of destroyed mutex @ %s:%d", file, line));
276 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
277 	    ("mtx_unlock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
278 	    file, line));
279 	WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
280 	LOCK_LOG_LOCK("UNLOCK", &m->lock_object, opts, m->mtx_recurse, file,
281 	    line);
282 	mtx_assert(m, MA_OWNED);
283 
284 	__mtx_unlock_sleep(c, opts, file, line);
285 	TD_LOCKS_DEC(curthread);
286 }
287 
288 void
289 __mtx_lock_spin_flags(volatile uintptr_t *c, int opts, const char *file,
290     int line)
291 {
292 	struct mtx *m;
293 
294 	if (SCHEDULER_STOPPED())
295 		return;
296 
297 	m = mtxlock2mtx(c);
298 
299 	KASSERT(m->mtx_lock != MTX_DESTROYED,
300 	    ("mtx_lock_spin() of destroyed mutex @ %s:%d", file, line));
301 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
302 	    ("mtx_lock_spin() of sleep mutex %s @ %s:%d",
303 	    m->lock_object.lo_name, file, line));
304 	if (mtx_owned(m))
305 		KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 ||
306 		    (opts & MTX_RECURSE) != 0,
307 	    ("mtx_lock_spin: recursed on non-recursive mutex %s @ %s:%d\n",
308 		    m->lock_object.lo_name, file, line));
309 	opts &= ~MTX_RECURSE;
310 	WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE,
311 	    file, line, NULL);
312 	__mtx_lock_spin(m, curthread, opts, file, line);
313 	LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
314 	    line);
315 	WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
316 }
317 
318 int
319 __mtx_trylock_spin_flags(volatile uintptr_t *c, int opts, const char *file,
320     int line)
321 {
322 	struct mtx *m;
323 
324 	if (SCHEDULER_STOPPED())
325 		return (1);
326 
327 	m = mtxlock2mtx(c);
328 
329 	KASSERT(m->mtx_lock != MTX_DESTROYED,
330 	    ("mtx_trylock_spin() of destroyed mutex @ %s:%d", file, line));
331 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
332 	    ("mtx_trylock_spin() of sleep mutex %s @ %s:%d",
333 	    m->lock_object.lo_name, file, line));
334 	KASSERT((opts & MTX_RECURSE) == 0,
335 	    ("mtx_trylock_spin: unsupp. opt MTX_RECURSE on mutex %s @ %s:%d\n",
336 	    m->lock_object.lo_name, file, line));
337 	if (__mtx_trylock_spin(m, curthread, opts, file, line)) {
338 		LOCK_LOG_TRY("LOCK", &m->lock_object, opts, 1, file, line);
339 		WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
340 		return (1);
341 	}
342 	LOCK_LOG_TRY("LOCK", &m->lock_object, opts, 0, file, line);
343 	return (0);
344 }
345 
346 void
347 __mtx_unlock_spin_flags(volatile uintptr_t *c, int opts, const char *file,
348     int line)
349 {
350 	struct mtx *m;
351 
352 	if (SCHEDULER_STOPPED())
353 		return;
354 
355 	m = mtxlock2mtx(c);
356 
357 	KASSERT(m->mtx_lock != MTX_DESTROYED,
358 	    ("mtx_unlock_spin() of destroyed mutex @ %s:%d", file, line));
359 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
360 	    ("mtx_unlock_spin() of sleep mutex %s @ %s:%d",
361 	    m->lock_object.lo_name, file, line));
362 	WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
363 	LOCK_LOG_LOCK("UNLOCK", &m->lock_object, opts, m->mtx_recurse, file,
364 	    line);
365 	mtx_assert(m, MA_OWNED);
366 
367 	__mtx_unlock_spin(m);
368 }
369 
370 /*
371  * The important part of mtx_trylock{,_flags}()
372  * Tries to acquire lock `m.'  If this function is called on a mutex that
373  * is already owned, it will recursively acquire the lock.
374  */
375 int
376 _mtx_trylock_flags_(volatile uintptr_t *c, int opts, const char *file, int line)
377 {
378 	struct mtx *m;
379 #ifdef LOCK_PROFILING
380 	uint64_t waittime = 0;
381 	int contested = 0;
382 #endif
383 	int rval;
384 
385 	if (SCHEDULER_STOPPED())
386 		return (1);
387 
388 	m = mtxlock2mtx(c);
389 
390 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
391 	    ("mtx_trylock() by idle thread %p on sleep mutex %s @ %s:%d",
392 	    curthread, m->lock_object.lo_name, file, line));
393 	KASSERT(m->mtx_lock != MTX_DESTROYED,
394 	    ("mtx_trylock() of destroyed mutex @ %s:%d", file, line));
395 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
396 	    ("mtx_trylock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
397 	    file, line));
398 
399 	if (mtx_owned(m) && ((m->lock_object.lo_flags & LO_RECURSABLE) != 0 ||
400 	    (opts & MTX_RECURSE) != 0)) {
401 		m->mtx_recurse++;
402 		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
403 		rval = 1;
404 	} else
405 		rval = _mtx_obtain_lock(m, (uintptr_t)curthread);
406 	opts &= ~MTX_RECURSE;
407 
408 	LOCK_LOG_TRY("LOCK", &m->lock_object, opts, rval, file, line);
409 	if (rval) {
410 		WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK,
411 		    file, line);
412 		TD_LOCKS_INC(curthread);
413 		if (m->mtx_recurse == 0)
414 			LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire,
415 			    m, contested, waittime, file, line);
416 
417 	}
418 
419 	return (rval);
420 }
421 
422 /*
423  * __mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
424  *
425  * We call this if the lock is either contested (i.e. we need to go to
426  * sleep waiting for it), or if we need to recurse on it.
427  */
428 void
429 __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v, uintptr_t tid, int opts,
430     const char *file, int line)
431 {
432 	struct mtx *m;
433 	struct turnstile *ts;
434 #ifdef ADAPTIVE_MUTEXES
435 	volatile struct thread *owner;
436 #endif
437 #ifdef KTR
438 	int cont_logged = 0;
439 #endif
440 #ifdef LOCK_PROFILING
441 	int contested = 0;
442 	uint64_t waittime = 0;
443 #endif
444 #if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
445 	struct lock_delay_arg lda;
446 #endif
447 #ifdef KDTRACE_HOOKS
448 	u_int sleep_cnt = 0;
449 	int64_t sleep_time = 0;
450 	int64_t all_time = 0;
451 #endif
452 
453 	if (SCHEDULER_STOPPED())
454 		return;
455 
456 #if defined(ADAPTIVE_MUTEXES)
457 	lock_delay_arg_init(&lda, &mtx_delay);
458 #elif defined(KDTRACE_HOOKS)
459 	lock_delay_arg_init(&lda, NULL);
460 #endif
461 	m = mtxlock2mtx(c);
462 	if (__predict_false(v == MTX_UNOWNED))
463 		v = MTX_READ_VALUE(m);
464 
465 	if (__predict_false(lv_mtx_owner(v) == (struct thread *)tid)) {
466 		KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 ||
467 		    (opts & MTX_RECURSE) != 0,
468 	    ("_mtx_lock_sleep: recursed on non-recursive mutex %s @ %s:%d\n",
469 		    m->lock_object.lo_name, file, line));
470 		opts &= ~MTX_RECURSE;
471 		m->mtx_recurse++;
472 		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
473 		if (LOCK_LOG_TEST(&m->lock_object, opts))
474 			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
475 		return;
476 	}
477 	opts &= ~MTX_RECURSE;
478 
479 #ifdef HWPMC_HOOKS
480 	PMC_SOFT_CALL( , , lock, failed);
481 #endif
482 	lock_profile_obtain_lock_failed(&m->lock_object,
483 		    &contested, &waittime);
484 	if (LOCK_LOG_TEST(&m->lock_object, opts))
485 		CTR4(KTR_LOCK,
486 		    "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
487 		    m->lock_object.lo_name, (void *)m->mtx_lock, file, line);
488 #ifdef KDTRACE_HOOKS
489 	all_time -= lockstat_nsecs(&m->lock_object);
490 #endif
491 
492 	for (;;) {
493 		if (v == MTX_UNOWNED) {
494 			if (_mtx_obtain_lock_fetch(m, &v, tid))
495 				break;
496 			continue;
497 		}
498 #ifdef KDTRACE_HOOKS
499 		lda.spin_cnt++;
500 #endif
501 #ifdef ADAPTIVE_MUTEXES
502 		/*
503 		 * If the owner is running on another CPU, spin until the
504 		 * owner stops running or the state of the lock changes.
505 		 */
506 		owner = lv_mtx_owner(v);
507 		if (TD_IS_RUNNING(owner)) {
508 			if (LOCK_LOG_TEST(&m->lock_object, 0))
509 				CTR3(KTR_LOCK,
510 				    "%s: spinning on %p held by %p",
511 				    __func__, m, owner);
512 			KTR_STATE1(KTR_SCHED, "thread",
513 			    sched_tdname((struct thread *)tid),
514 			    "spinning", "lockname:\"%s\"",
515 			    m->lock_object.lo_name);
516 			do {
517 				lock_delay(&lda);
518 				v = MTX_READ_VALUE(m);
519 				owner = lv_mtx_owner(v);
520 			} while (v != MTX_UNOWNED && TD_IS_RUNNING(owner));
521 			KTR_STATE0(KTR_SCHED, "thread",
522 			    sched_tdname((struct thread *)tid),
523 			    "running");
524 			continue;
525 		}
526 #endif
527 
528 		ts = turnstile_trywait(&m->lock_object);
529 		v = MTX_READ_VALUE(m);
530 
531 		/*
532 		 * Check if the lock has been released while spinning for
533 		 * the turnstile chain lock.
534 		 */
535 		if (v == MTX_UNOWNED) {
536 			turnstile_cancel(ts);
537 			continue;
538 		}
539 
540 #ifdef ADAPTIVE_MUTEXES
541 		/*
542 		 * The current lock owner might have started executing
543 		 * on another CPU (or the lock could have changed
544 		 * owners) while we were waiting on the turnstile
545 		 * chain lock.  If so, drop the turnstile lock and try
546 		 * again.
547 		 */
548 		owner = lv_mtx_owner(v);
549 		if (TD_IS_RUNNING(owner)) {
550 			turnstile_cancel(ts);
551 			continue;
552 		}
553 #endif
554 
555 		/*
556 		 * If the mutex isn't already contested and a failure occurs
557 		 * setting the contested bit, the mutex was either released
558 		 * or the state of the MTX_RECURSED bit changed.
559 		 */
560 		if ((v & MTX_CONTESTED) == 0 &&
561 		    !atomic_cmpset_ptr(&m->mtx_lock, v, v | MTX_CONTESTED)) {
562 			turnstile_cancel(ts);
563 			v = MTX_READ_VALUE(m);
564 			continue;
565 		}
566 
567 		/*
568 		 * We definitely must sleep for this lock.
569 		 */
570 		mtx_assert(m, MA_NOTOWNED);
571 
572 #ifdef KTR
573 		if (!cont_logged) {
574 			CTR6(KTR_CONTENTION,
575 			    "contention: %p at %s:%d wants %s, taken by %s:%d",
576 			    (void *)tid, file, line, m->lock_object.lo_name,
577 			    WITNESS_FILE(&m->lock_object),
578 			    WITNESS_LINE(&m->lock_object));
579 			cont_logged = 1;
580 		}
581 #endif
582 
583 		/*
584 		 * Block on the turnstile.
585 		 */
586 #ifdef KDTRACE_HOOKS
587 		sleep_time -= lockstat_nsecs(&m->lock_object);
588 #endif
589 		turnstile_wait(ts, mtx_owner(m), TS_EXCLUSIVE_QUEUE);
590 #ifdef KDTRACE_HOOKS
591 		sleep_time += lockstat_nsecs(&m->lock_object);
592 		sleep_cnt++;
593 #endif
594 		v = MTX_READ_VALUE(m);
595 	}
596 #ifdef KDTRACE_HOOKS
597 	all_time += lockstat_nsecs(&m->lock_object);
598 #endif
599 #ifdef KTR
600 	if (cont_logged) {
601 		CTR4(KTR_CONTENTION,
602 		    "contention end: %s acquired by %p at %s:%d",
603 		    m->lock_object.lo_name, (void *)tid, file, line);
604 	}
605 #endif
606 	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire, m, contested,
607 	    waittime, file, line);
608 #ifdef KDTRACE_HOOKS
609 	if (sleep_time)
610 		LOCKSTAT_RECORD1(adaptive__block, m, sleep_time);
611 
612 	/*
613 	 * Only record the loops spinning and not sleeping.
614 	 */
615 	if (lda.spin_cnt > sleep_cnt)
616 		LOCKSTAT_RECORD1(adaptive__spin, m, all_time - sleep_time);
617 #endif
618 }
619 
620 static void
621 _mtx_lock_spin_failed(struct mtx *m)
622 {
623 	struct thread *td;
624 
625 	td = mtx_owner(m);
626 
627 	/* If the mutex is unlocked, try again. */
628 	if (td == NULL)
629 		return;
630 
631 	printf( "spin lock %p (%s) held by %p (tid %d) too long\n",
632 	    m, m->lock_object.lo_name, td, td->td_tid);
633 #ifdef WITNESS
634 	witness_display_spinlock(&m->lock_object, td, printf);
635 #endif
636 	panic("spin lock held too long");
637 }
638 
639 #ifdef SMP
640 /*
641  * _mtx_lock_spin_cookie: the tougher part of acquiring an MTX_SPIN lock.
642  *
643  * This is only called if we need to actually spin for the lock. Recursion
644  * is handled inline.
645  */
646 void
647 _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t v, uintptr_t tid,
648     int opts, const char *file, int line)
649 {
650 	struct mtx *m;
651 	struct lock_delay_arg lda;
652 #ifdef LOCK_PROFILING
653 	int contested = 0;
654 	uint64_t waittime = 0;
655 #endif
656 #ifdef KDTRACE_HOOKS
657 	int64_t spin_time = 0;
658 #endif
659 
660 	if (SCHEDULER_STOPPED())
661 		return;
662 
663 	lock_delay_arg_init(&lda, &mtx_spin_delay);
664 	m = mtxlock2mtx(c);
665 
666 	if (LOCK_LOG_TEST(&m->lock_object, opts))
667 		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
668 	KTR_STATE1(KTR_SCHED, "thread", sched_tdname((struct thread *)tid),
669 	    "spinning", "lockname:\"%s\"", m->lock_object.lo_name);
670 
671 #ifdef HWPMC_HOOKS
672 	PMC_SOFT_CALL( , , lock, failed);
673 #endif
674 	lock_profile_obtain_lock_failed(&m->lock_object, &contested, &waittime);
675 #ifdef KDTRACE_HOOKS
676 	spin_time -= lockstat_nsecs(&m->lock_object);
677 #endif
678 	for (;;) {
679 		if (v == MTX_UNOWNED) {
680 			if (_mtx_obtain_lock_fetch(m, &v, tid))
681 				break;
682 			continue;
683 		}
684 		/* Give interrupts a chance while we spin. */
685 		spinlock_exit();
686 		do {
687 			if (lda.spin_cnt < 10000000) {
688 				lock_delay(&lda);
689 			} else {
690 				lda.spin_cnt++;
691 				if (lda.spin_cnt < 60000000 || kdb_active ||
692 				    panicstr != NULL)
693 					DELAY(1);
694 				else
695 					_mtx_lock_spin_failed(m);
696 				cpu_spinwait();
697 			}
698 			v = MTX_READ_VALUE(m);
699 		} while (v != MTX_UNOWNED);
700 		spinlock_enter();
701 	}
702 #ifdef KDTRACE_HOOKS
703 	spin_time += lockstat_nsecs(&m->lock_object);
704 #endif
705 
706 	if (LOCK_LOG_TEST(&m->lock_object, opts))
707 		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
708 	KTR_STATE0(KTR_SCHED, "thread", sched_tdname((struct thread *)tid),
709 	    "running");
710 
711 #ifdef KDTRACE_HOOKS
712 	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(spin__acquire, m,
713 	    contested, waittime, file, line);
714 	if (spin_time != 0)
715 		LOCKSTAT_RECORD1(spin__spin, m, spin_time);
716 #endif
717 }
718 #endif /* SMP */
719 
720 void
721 thread_lock_flags_(struct thread *td, int opts, const char *file, int line)
722 {
723 	struct mtx *m;
724 	uintptr_t tid, v;
725 	struct lock_delay_arg lda;
726 #ifdef LOCK_PROFILING
727 	int contested = 0;
728 	uint64_t waittime = 0;
729 #endif
730 #ifdef KDTRACE_HOOKS
731 	int64_t spin_time = 0;
732 #endif
733 
734 	tid = (uintptr_t)curthread;
735 
736 	if (SCHEDULER_STOPPED()) {
737 		/*
738 		 * Ensure that spinlock sections are balanced even when the
739 		 * scheduler is stopped, since we may otherwise inadvertently
740 		 * re-enable interrupts while dumping core.
741 		 */
742 		spinlock_enter();
743 		return;
744 	}
745 
746 	lock_delay_arg_init(&lda, &mtx_spin_delay);
747 
748 #ifdef KDTRACE_HOOKS
749 	spin_time -= lockstat_nsecs(&td->td_lock->lock_object);
750 #endif
751 	for (;;) {
752 retry:
753 		v = MTX_UNOWNED;
754 		spinlock_enter();
755 		m = td->td_lock;
756 		KASSERT(m->mtx_lock != MTX_DESTROYED,
757 		    ("thread_lock() of destroyed mutex @ %s:%d", file, line));
758 		KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
759 		    ("thread_lock() of sleep mutex %s @ %s:%d",
760 		    m->lock_object.lo_name, file, line));
761 		if (mtx_owned(m))
762 			KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0,
763 	    ("thread_lock: recursed on non-recursive mutex %s @ %s:%d\n",
764 			    m->lock_object.lo_name, file, line));
765 		WITNESS_CHECKORDER(&m->lock_object,
766 		    opts | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL);
767 		for (;;) {
768 			if (_mtx_obtain_lock_fetch(m, &v, tid))
769 				break;
770 			if (v == MTX_UNOWNED)
771 				continue;
772 			if (v == tid) {
773 				m->mtx_recurse++;
774 				break;
775 			}
776 #ifdef HWPMC_HOOKS
777 			PMC_SOFT_CALL( , , lock, failed);
778 #endif
779 			lock_profile_obtain_lock_failed(&m->lock_object,
780 			    &contested, &waittime);
781 			/* Give interrupts a chance while we spin. */
782 			spinlock_exit();
783 			do {
784 				if (lda.spin_cnt < 10000000) {
785 					lock_delay(&lda);
786 				} else {
787 					lda.spin_cnt++;
788 					if (lda.spin_cnt < 60000000 ||
789 					    kdb_active || panicstr != NULL)
790 						DELAY(1);
791 					else
792 						_mtx_lock_spin_failed(m);
793 					cpu_spinwait();
794 				}
795 				if (m != td->td_lock)
796 					goto retry;
797 				v = MTX_READ_VALUE(m);
798 			} while (v != MTX_UNOWNED);
799 			spinlock_enter();
800 		}
801 		if (m == td->td_lock)
802 			break;
803 		__mtx_unlock_spin(m);	/* does spinlock_exit() */
804 	}
805 #ifdef KDTRACE_HOOKS
806 	spin_time += lockstat_nsecs(&m->lock_object);
807 #endif
808 	if (m->mtx_recurse == 0)
809 		LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(spin__acquire, m,
810 		    contested, waittime, file, line);
811 	LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
812 	    line);
813 	WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
814 #ifdef KDTRACE_HOOKS
815 	if (spin_time != 0)
816 		LOCKSTAT_RECORD1(thread__spin, m, spin_time);
817 #endif
818 }
819 
820 struct mtx *
821 thread_lock_block(struct thread *td)
822 {
823 	struct mtx *lock;
824 
825 	THREAD_LOCK_ASSERT(td, MA_OWNED);
826 	lock = td->td_lock;
827 	td->td_lock = &blocked_lock;
828 	mtx_unlock_spin(lock);
829 
830 	return (lock);
831 }
832 
833 void
834 thread_lock_unblock(struct thread *td, struct mtx *new)
835 {
836 	mtx_assert(new, MA_OWNED);
837 	MPASS(td->td_lock == &blocked_lock);
838 	atomic_store_rel_ptr((volatile void *)&td->td_lock, (uintptr_t)new);
839 }
840 
841 void
842 thread_lock_set(struct thread *td, struct mtx *new)
843 {
844 	struct mtx *lock;
845 
846 	mtx_assert(new, MA_OWNED);
847 	THREAD_LOCK_ASSERT(td, MA_OWNED);
848 	lock = td->td_lock;
849 	td->td_lock = new;
850 	mtx_unlock_spin(lock);
851 }
852 
853 /*
854  * __mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
855  *
856  * We are only called here if the lock is recursed, contested (i.e. we
857  * need to wake up a blocked thread) or lockstat probe is active.
858  */
859 void
860 __mtx_unlock_sleep(volatile uintptr_t *c, int opts, const char *file, int line)
861 {
862 	struct mtx *m;
863 	struct turnstile *ts;
864 	uintptr_t tid, v;
865 
866 	if (SCHEDULER_STOPPED())
867 		return;
868 
869 	tid = (uintptr_t)curthread;
870 	m = mtxlock2mtx(c);
871 	v = MTX_READ_VALUE(m);
872 
873 	if (v & MTX_RECURSED) {
874 		if (--(m->mtx_recurse) == 0)
875 			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
876 		if (LOCK_LOG_TEST(&m->lock_object, opts))
877 			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
878 		return;
879 	}
880 
881 	LOCKSTAT_PROFILE_RELEASE_LOCK(adaptive__release, m);
882 	if (v == tid && _mtx_release_lock(m, tid))
883 		return;
884 
885 	/*
886 	 * We have to lock the chain before the turnstile so this turnstile
887 	 * can be removed from the hash list if it is empty.
888 	 */
889 	turnstile_chain_lock(&m->lock_object);
890 	ts = turnstile_lookup(&m->lock_object);
891 	if (LOCK_LOG_TEST(&m->lock_object, opts))
892 		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
893 	MPASS(ts != NULL);
894 	turnstile_broadcast(ts, TS_EXCLUSIVE_QUEUE);
895 	_mtx_release_lock_quick(m);
896 
897 	/*
898 	 * This turnstile is now no longer associated with the mutex.  We can
899 	 * unlock the chain lock so a new turnstile may take it's place.
900 	 */
901 	turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
902 	turnstile_chain_unlock(&m->lock_object);
903 }
904 
905 /*
906  * All the unlocking of MTX_SPIN locks is done inline.
907  * See the __mtx_unlock_spin() macro for the details.
908  */
909 
910 /*
911  * The backing function for the INVARIANTS-enabled mtx_assert()
912  */
913 #ifdef INVARIANT_SUPPORT
914 void
915 __mtx_assert(const volatile uintptr_t *c, int what, const char *file, int line)
916 {
917 	const struct mtx *m;
918 
919 	if (panicstr != NULL || dumping || SCHEDULER_STOPPED())
920 		return;
921 
922 	m = mtxlock2mtx(c);
923 
924 	switch (what) {
925 	case MA_OWNED:
926 	case MA_OWNED | MA_RECURSED:
927 	case MA_OWNED | MA_NOTRECURSED:
928 		if (!mtx_owned(m))
929 			panic("mutex %s not owned at %s:%d",
930 			    m->lock_object.lo_name, file, line);
931 		if (mtx_recursed(m)) {
932 			if ((what & MA_NOTRECURSED) != 0)
933 				panic("mutex %s recursed at %s:%d",
934 				    m->lock_object.lo_name, file, line);
935 		} else if ((what & MA_RECURSED) != 0) {
936 			panic("mutex %s unrecursed at %s:%d",
937 			    m->lock_object.lo_name, file, line);
938 		}
939 		break;
940 	case MA_NOTOWNED:
941 		if (mtx_owned(m))
942 			panic("mutex %s owned at %s:%d",
943 			    m->lock_object.lo_name, file, line);
944 		break;
945 	default:
946 		panic("unknown mtx_assert at %s:%d", file, line);
947 	}
948 }
949 #endif
950 
951 /*
952  * General init routine used by the MTX_SYSINIT() macro.
953  */
954 void
955 mtx_sysinit(void *arg)
956 {
957 	struct mtx_args *margs = arg;
958 
959 	mtx_init((struct mtx *)margs->ma_mtx, margs->ma_desc, NULL,
960 	    margs->ma_opts);
961 }
962 
963 /*
964  * Mutex initialization routine; initialize lock `m' of type contained in
965  * `opts' with options contained in `opts' and name `name.'  The optional
966  * lock type `type' is used as a general lock category name for use with
967  * witness.
968  */
969 void
970 _mtx_init(volatile uintptr_t *c, const char *name, const char *type, int opts)
971 {
972 	struct mtx *m;
973 	struct lock_class *class;
974 	int flags;
975 
976 	m = mtxlock2mtx(c);
977 
978 	MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE |
979 	    MTX_NOWITNESS | MTX_DUPOK | MTX_NOPROFILE | MTX_NEW)) == 0);
980 	ASSERT_ATOMIC_LOAD_PTR(m->mtx_lock,
981 	    ("%s: mtx_lock not aligned for %s: %p", __func__, name,
982 	    &m->mtx_lock));
983 
984 	/* Determine lock class and lock flags. */
985 	if (opts & MTX_SPIN)
986 		class = &lock_class_mtx_spin;
987 	else
988 		class = &lock_class_mtx_sleep;
989 	flags = 0;
990 	if (opts & MTX_QUIET)
991 		flags |= LO_QUIET;
992 	if (opts & MTX_RECURSE)
993 		flags |= LO_RECURSABLE;
994 	if ((opts & MTX_NOWITNESS) == 0)
995 		flags |= LO_WITNESS;
996 	if (opts & MTX_DUPOK)
997 		flags |= LO_DUPOK;
998 	if (opts & MTX_NOPROFILE)
999 		flags |= LO_NOPROFILE;
1000 	if (opts & MTX_NEW)
1001 		flags |= LO_NEW;
1002 
1003 	/* Initialize mutex. */
1004 	lock_init(&m->lock_object, class, name, type, flags);
1005 
1006 	m->mtx_lock = MTX_UNOWNED;
1007 	m->mtx_recurse = 0;
1008 }
1009 
1010 /*
1011  * Remove lock `m' from all_mtx queue.  We don't allow MTX_QUIET to be
1012  * passed in as a flag here because if the corresponding mtx_init() was
1013  * called with MTX_QUIET set, then it will already be set in the mutex's
1014  * flags.
1015  */
1016 void
1017 _mtx_destroy(volatile uintptr_t *c)
1018 {
1019 	struct mtx *m;
1020 
1021 	m = mtxlock2mtx(c);
1022 
1023 	if (!mtx_owned(m))
1024 		MPASS(mtx_unowned(m));
1025 	else {
1026 		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
1027 
1028 		/* Perform the non-mtx related part of mtx_unlock_spin(). */
1029 		if (LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin)
1030 			spinlock_exit();
1031 		else
1032 			TD_LOCKS_DEC(curthread);
1033 
1034 		lock_profile_release_lock(&m->lock_object);
1035 		/* Tell witness this isn't locked to make it happy. */
1036 		WITNESS_UNLOCK(&m->lock_object, LOP_EXCLUSIVE, __FILE__,
1037 		    __LINE__);
1038 	}
1039 
1040 	m->mtx_lock = MTX_DESTROYED;
1041 	lock_destroy(&m->lock_object);
1042 }
1043 
1044 /*
1045  * Intialize the mutex code and system mutexes.  This is called from the MD
1046  * startup code prior to mi_startup().  The per-CPU data space needs to be
1047  * setup before this is called.
1048  */
1049 void
1050 mutex_init(void)
1051 {
1052 
1053 	/* Setup turnstiles so that sleep mutexes work. */
1054 	init_turnstiles();
1055 
1056 	/*
1057 	 * Initialize mutexes.
1058 	 */
1059 	mtx_init(&Giant, "Giant", NULL, MTX_DEF | MTX_RECURSE);
1060 	mtx_init(&blocked_lock, "blocked lock", NULL, MTX_SPIN);
1061 	blocked_lock.mtx_lock = 0xdeadc0de;	/* Always blocked. */
1062 	mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK);
1063 	mtx_init(&proc0.p_slock, "process slock", NULL, MTX_SPIN);
1064 	mtx_init(&proc0.p_statmtx, "pstatl", NULL, MTX_SPIN);
1065 	mtx_init(&proc0.p_itimmtx, "pitiml", NULL, MTX_SPIN);
1066 	mtx_init(&proc0.p_profmtx, "pprofl", NULL, MTX_SPIN);
1067 	mtx_init(&devmtx, "cdev", NULL, MTX_DEF);
1068 	mtx_lock(&Giant);
1069 }
1070 
1071 #ifdef DDB
1072 void
1073 db_show_mtx(const struct lock_object *lock)
1074 {
1075 	struct thread *td;
1076 	const struct mtx *m;
1077 
1078 	m = (const struct mtx *)lock;
1079 
1080 	db_printf(" flags: {");
1081 	if (LOCK_CLASS(lock) == &lock_class_mtx_spin)
1082 		db_printf("SPIN");
1083 	else
1084 		db_printf("DEF");
1085 	if (m->lock_object.lo_flags & LO_RECURSABLE)
1086 		db_printf(", RECURSE");
1087 	if (m->lock_object.lo_flags & LO_DUPOK)
1088 		db_printf(", DUPOK");
1089 	db_printf("}\n");
1090 	db_printf(" state: {");
1091 	if (mtx_unowned(m))
1092 		db_printf("UNOWNED");
1093 	else if (mtx_destroyed(m))
1094 		db_printf("DESTROYED");
1095 	else {
1096 		db_printf("OWNED");
1097 		if (m->mtx_lock & MTX_CONTESTED)
1098 			db_printf(", CONTESTED");
1099 		if (m->mtx_lock & MTX_RECURSED)
1100 			db_printf(", RECURSED");
1101 	}
1102 	db_printf("}\n");
1103 	if (!mtx_unowned(m) && !mtx_destroyed(m)) {
1104 		td = mtx_owner(m);
1105 		db_printf(" owner: %p (tid %d, pid %d, \"%s\")\n", td,
1106 		    td->td_tid, td->td_proc->p_pid, td->td_name);
1107 		if (mtx_recursed(m))
1108 			db_printf(" recursed: %d\n", m->mtx_recurse);
1109 	}
1110 }
1111 #endif
1112