1 /*- 2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 3. Berkeley Software Design Inc's name may not be used to endorse or 13 * promote products derived from this software without specific prior 14 * written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $ 29 * and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $ 30 * $FreeBSD$ 31 */ 32 33 /* 34 * Main Entry: witness 35 * Pronunciation: 'wit-n&s 36 * Function: noun 37 * Etymology: Middle English witnesse, from Old English witnes knowledge, 38 * testimony, witness, from 2wit 39 * Date: before 12th century 40 * 1 : attestation of a fact or event : TESTIMONY 41 * 2 : one that gives evidence; specifically : one who testifies in 42 * a cause or before a judicial tribunal 43 * 3 : one asked to be present at a transaction so as to be able to 44 * testify to its having taken place 45 * 4 : one who has personal knowledge of something 46 * 5 a : something serving as evidence or proof : SIGN 47 * b : public affirmation by word or example of usually 48 * religious faith or conviction <the heroic witness to divine 49 * life -- Pilot> 50 * 6 capitalized : a member of the Jehovah's Witnesses 51 */ 52 53 #include "opt_ddb.h" 54 #include "opt_witness.h" 55 56 /* 57 * Cause non-inlined mtx_*() to be compiled. 58 * Must be defined early because other system headers may include mutex.h. 59 */ 60 #define _KERN_MUTEX_C_ 61 62 #include <sys/param.h> 63 #include <sys/bus.h> 64 #include <sys/kernel.h> 65 #include <sys/malloc.h> 66 #include <sys/proc.h> 67 #include <sys/sysctl.h> 68 #include <sys/systm.h> 69 #include <sys/vmmeter.h> 70 #include <sys/ktr.h> 71 72 #include <machine/atomic.h> 73 #include <machine/bus.h> 74 #include <machine/clock.h> 75 #include <machine/cpu.h> 76 77 #include <ddb/ddb.h> 78 79 #include <vm/vm.h> 80 #include <vm/vm_extern.h> 81 82 #include <sys/mutex.h> 83 84 /* 85 * Machine independent bits of the mutex implementation 86 */ 87 /* All mutexes in system (used for debug/panic) */ 88 #ifdef MUTEX_DEBUG 89 static struct mtx_debug all_mtx_debug = { NULL, {NULL, NULL}, NULL, 0, 90 "All mutexes queue head" }; 91 static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, &all_mtx_debug, 92 TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked), 93 { NULL, NULL }, &all_mtx, &all_mtx }; 94 #else /* MUTEX_DEBUG */ 95 static struct mtx all_mtx = { MTX_UNOWNED, 0, 0, "All mutexes queue head", 96 TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked), 97 { NULL, NULL }, &all_mtx, &all_mtx }; 98 #endif /* MUTEX_DEBUG */ 99 100 static int mtx_cur_cnt; 101 static int mtx_max_cnt; 102 103 void _mtx_enter_giant_def(void); 104 void _mtx_exit_giant_def(void); 105 static void propagate_priority(struct proc *) __unused; 106 107 #define mtx_unowned(m) ((m)->mtx_lock == MTX_UNOWNED) 108 #define mtx_owner(m) (mtx_unowned(m) ? NULL \ 109 : (struct proc *)((m)->mtx_lock & MTX_FLAGMASK)) 110 111 #define RETIP(x) *(((uintptr_t *)(&x)) - 1) 112 #define SET_PRIO(p, pri) (p)->p_priority = (pri) 113 114 /* 115 * XXX Temporary, for use from assembly language 116 */ 117 118 void 119 _mtx_enter_giant_def(void) 120 { 121 122 mtx_enter(&Giant, MTX_DEF); 123 } 124 125 void 126 _mtx_exit_giant_def(void) 127 { 128 129 mtx_exit(&Giant, MTX_DEF); 130 } 131 132 static void 133 propagate_priority(struct proc *p) 134 { 135 int pri = p->p_priority; 136 struct mtx *m = p->p_blocked; 137 138 for (;;) { 139 struct proc *p1; 140 141 p = mtx_owner(m); 142 143 if (p == NULL) { 144 /* 145 * This really isn't quite right. Really 146 * ought to bump priority of process that 147 * next acquires the mutex. 148 */ 149 MPASS(m->mtx_lock == MTX_CONTESTED); 150 return; 151 } 152 MPASS(p->p_magic == P_MAGIC); 153 if (p->p_priority <= pri) 154 return; 155 /* 156 * If lock holder is actually running, just bump priority. 157 */ 158 if (TAILQ_NEXT(p, p_procq) == NULL) { 159 MPASS(p->p_stat == SRUN || p->p_stat == SZOMB); 160 SET_PRIO(p, pri); 161 return; 162 } 163 /* 164 * If on run queue move to new run queue, and 165 * quit. 166 */ 167 if (p->p_stat == SRUN) { 168 MPASS(p->p_blocked == NULL); 169 remrunqueue(p); 170 SET_PRIO(p, pri); 171 setrunqueue(p); 172 return; 173 } 174 175 /* 176 * If we aren't blocked on a mutex, give up and quit. 177 */ 178 if (p->p_stat != SMTX) { 179 printf( 180 "XXX: process %d(%s):%d holds %s but isn't blocked on a mutex\n", 181 p->p_pid, p->p_comm, p->p_stat, m->mtx_description); 182 return; 183 } 184 185 /* 186 * Pick up the mutex that p is blocked on. 187 */ 188 m = p->p_blocked; 189 MPASS(m != NULL); 190 191 printf("XXX: process %d(%s) is blocked on %s\n", p->p_pid, 192 p->p_comm, m->mtx_description); 193 /* 194 * Check if the proc needs to be moved up on 195 * the blocked chain 196 */ 197 if ((p1 = TAILQ_PREV(p, rq, p_procq)) == NULL || 198 p1->p_priority <= pri) { 199 if (p1) 200 printf( 201 "XXX: previous process %d(%s) has higher priority\n", 202 p->p_pid, p->p_comm); 203 else 204 printf("XXX: process at head of run queue\n"); 205 continue; 206 } 207 208 /* 209 * Remove proc from blocked chain 210 */ 211 TAILQ_REMOVE(&m->mtx_blocked, p, p_procq); 212 TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) { 213 MPASS(p1->p_magic == P_MAGIC); 214 if (p1->p_priority > pri) 215 break; 216 } 217 if (p1) 218 TAILQ_INSERT_BEFORE(p1, p, p_procq); 219 else 220 TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq); 221 CTR4(KTR_LOCK, 222 "propagate priority: p 0x%p moved before 0x%p on [0x%p] %s", 223 p, p1, m, m->mtx_description); 224 } 225 } 226 227 void 228 mtx_enter_hard(struct mtx *m, int type, int saveintr) 229 { 230 struct proc *p = CURPROC; 231 232 KASSERT(p != NULL, ("curproc is NULL in mutex")); 233 234 switch (type) { 235 case MTX_DEF: 236 if ((m->mtx_lock & MTX_FLAGMASK) == (uintptr_t)p) { 237 m->mtx_recurse++; 238 atomic_set_ptr(&m->mtx_lock, MTX_RECURSE); 239 CTR1(KTR_LOCK, "mtx_enter: 0x%p recurse", m); 240 return; 241 } 242 CTR3(KTR_LOCK, "mtx_enter: 0x%p contested (lock=%p) [0x%p]", 243 m, (void *)m->mtx_lock, (void *)RETIP(m)); 244 while (!_obtain_lock(m, p)) { 245 uintptr_t v; 246 struct proc *p1; 247 248 mtx_enter(&sched_lock, MTX_SPIN | MTX_RLIKELY); 249 /* 250 * check if the lock has been released while 251 * waiting for the schedlock. 252 */ 253 if ((v = m->mtx_lock) == MTX_UNOWNED) { 254 mtx_exit(&sched_lock, MTX_SPIN); 255 continue; 256 } 257 /* 258 * The mutex was marked contested on release. This 259 * means that there are processes blocked on it. 260 */ 261 if (v == MTX_CONTESTED) { 262 p1 = TAILQ_FIRST(&m->mtx_blocked); 263 KASSERT(p1 != NULL, ("contested mutex has no contesters")); 264 KASSERT(p != NULL, ("curproc is NULL for contested mutex")); 265 m->mtx_lock = (uintptr_t)p | MTX_CONTESTED; 266 if (p1->p_priority < p->p_priority) { 267 SET_PRIO(p, p1->p_priority); 268 } 269 mtx_exit(&sched_lock, MTX_SPIN); 270 return; 271 } 272 /* 273 * If the mutex isn't already contested and 274 * a failure occurs setting the contested bit the 275 * mutex was either release or the 276 * state of the RECURSION bit changed. 277 */ 278 if ((v & MTX_CONTESTED) == 0 && 279 !atomic_cmpset_ptr(&m->mtx_lock, (void *)v, 280 (void *)(v | MTX_CONTESTED))) { 281 mtx_exit(&sched_lock, MTX_SPIN); 282 continue; 283 } 284 285 /* We definitely have to sleep for this lock */ 286 mtx_assert(m, MA_NOTOWNED); 287 288 #ifdef notyet 289 /* 290 * If we're borrowing an interrupted thread's VM 291 * context must clean up before going to sleep. 292 */ 293 if (p->p_flag & (P_ITHD | P_SITHD)) { 294 ithd_t *it = (ithd_t *)p; 295 296 if (it->it_interrupted) { 297 CTR2(KTR_LOCK, 298 "mtx_enter: 0x%x interrupted 0x%x", 299 it, it->it_interrupted); 300 intr_thd_fixup(it); 301 } 302 } 303 #endif 304 305 /* Put us on the list of procs blocked on this mutex */ 306 if (TAILQ_EMPTY(&m->mtx_blocked)) { 307 p1 = (struct proc *)(m->mtx_lock & 308 MTX_FLAGMASK); 309 LIST_INSERT_HEAD(&p1->p_contested, m, 310 mtx_contested); 311 TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq); 312 } else { 313 TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) 314 if (p1->p_priority > p->p_priority) 315 break; 316 if (p1) 317 TAILQ_INSERT_BEFORE(p1, p, p_procq); 318 else 319 TAILQ_INSERT_TAIL(&m->mtx_blocked, p, 320 p_procq); 321 } 322 323 p->p_blocked = m; /* Who we're blocked on */ 324 p->p_stat = SMTX; 325 #if 0 326 propagate_priority(p); 327 #endif 328 CTR3(KTR_LOCK, "mtx_enter: p 0x%p blocked on [0x%p] %s", 329 p, m, m->mtx_description); 330 mi_switch(); 331 CTR3(KTR_LOCK, 332 "mtx_enter: p 0x%p free from blocked on [0x%p] %s", 333 p, m, m->mtx_description); 334 mtx_exit(&sched_lock, MTX_SPIN); 335 } 336 return; 337 case MTX_SPIN: 338 case MTX_SPIN | MTX_FIRST: 339 case MTX_SPIN | MTX_TOPHALF: 340 { 341 int i = 0; 342 343 if (m->mtx_lock == (uintptr_t)p) { 344 m->mtx_recurse++; 345 return; 346 } 347 CTR1(KTR_LOCK, "mtx_enter: %p spinning", m); 348 for (;;) { 349 if (_obtain_lock(m, p)) 350 break; 351 while (m->mtx_lock != MTX_UNOWNED) { 352 if (i++ < 1000000) 353 continue; 354 if (i++ < 6000000) 355 DELAY (1); 356 #ifdef DDB 357 else if (!db_active) 358 #else 359 else 360 #endif 361 panic( 362 "spin lock %s held by 0x%p for > 5 seconds", 363 m->mtx_description, 364 (void *)m->mtx_lock); 365 } 366 } 367 368 #ifdef MUTEX_DEBUG 369 if (type != MTX_SPIN) 370 m->mtx_saveintr = 0xbeefface; 371 else 372 #endif 373 m->mtx_saveintr = saveintr; 374 CTR1(KTR_LOCK, "mtx_enter: 0x%p spin done", m); 375 return; 376 } 377 } 378 } 379 380 void 381 mtx_exit_hard(struct mtx *m, int type) 382 { 383 struct proc *p, *p1; 384 struct mtx *m1; 385 int pri; 386 387 p = CURPROC; 388 switch (type) { 389 case MTX_DEF: 390 case MTX_DEF | MTX_NOSWITCH: 391 if (m->mtx_recurse != 0) { 392 if (--(m->mtx_recurse) == 0) 393 atomic_clear_ptr(&m->mtx_lock, MTX_RECURSE); 394 CTR1(KTR_LOCK, "mtx_exit: 0x%p unrecurse", m); 395 return; 396 } 397 mtx_enter(&sched_lock, MTX_SPIN); 398 CTR1(KTR_LOCK, "mtx_exit: 0x%p contested", m); 399 p1 = TAILQ_FIRST(&m->mtx_blocked); 400 MPASS(p->p_magic == P_MAGIC); 401 MPASS(p1->p_magic == P_MAGIC); 402 TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq); 403 if (TAILQ_EMPTY(&m->mtx_blocked)) { 404 LIST_REMOVE(m, mtx_contested); 405 _release_lock_quick(m); 406 CTR1(KTR_LOCK, "mtx_exit: 0x%p not held", m); 407 } else 408 m->mtx_lock = MTX_CONTESTED; 409 pri = MAXPRI; 410 LIST_FOREACH(m1, &p->p_contested, mtx_contested) { 411 int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_priority; 412 if (cp < pri) 413 pri = cp; 414 } 415 if (pri > p->p_nativepri) 416 pri = p->p_nativepri; 417 SET_PRIO(p, pri); 418 CTR2(KTR_LOCK, "mtx_exit: 0x%p contested setrunqueue 0x%p", 419 m, p1); 420 p1->p_blocked = NULL; 421 p1->p_stat = SRUN; 422 setrunqueue(p1); 423 if ((type & MTX_NOSWITCH) == 0 && p1->p_priority < pri) { 424 #ifdef notyet 425 if (p->p_flag & (P_ITHD | P_SITHD)) { 426 ithd_t *it = (ithd_t *)p; 427 428 if (it->it_interrupted) { 429 CTR2(KTR_LOCK, 430 "mtx_exit: 0x%x interruped 0x%x", 431 it, it->it_interrupted); 432 intr_thd_fixup(it); 433 } 434 } 435 #endif 436 setrunqueue(p); 437 CTR2(KTR_LOCK, "mtx_exit: 0x%p switching out lock=0x%p", 438 m, (void *)m->mtx_lock); 439 mi_switch(); 440 CTR2(KTR_LOCK, "mtx_exit: 0x%p resuming lock=0x%p", 441 m, (void *)m->mtx_lock); 442 } 443 mtx_exit(&sched_lock, MTX_SPIN); 444 break; 445 case MTX_SPIN: 446 case MTX_SPIN | MTX_FIRST: 447 if (m->mtx_recurse != 0) { 448 m->mtx_recurse--; 449 return; 450 } 451 MPASS(mtx_owned(m)); 452 _release_lock_quick(m); 453 if (type & MTX_FIRST) 454 enable_intr(); /* XXX is this kosher? */ 455 else { 456 MPASS(m->mtx_saveintr != 0xbeefface); 457 restore_intr(m->mtx_saveintr); 458 } 459 break; 460 case MTX_SPIN | MTX_TOPHALF: 461 if (m->mtx_recurse != 0) { 462 m->mtx_recurse--; 463 return; 464 } 465 MPASS(mtx_owned(m)); 466 _release_lock_quick(m); 467 break; 468 default: 469 panic("mtx_exit_hard: unsupported type 0x%x\n", type); 470 } 471 } 472 473 #define MV_DESTROY 0 /* validate before destory */ 474 #define MV_INIT 1 /* validate before init */ 475 476 #ifdef MUTEX_DEBUG 477 478 int mtx_validate __P((struct mtx *, int)); 479 480 int 481 mtx_validate(struct mtx *m, int when) 482 { 483 struct mtx *mp; 484 int i; 485 int retval = 0; 486 487 if (m == &all_mtx || cold) 488 return 0; 489 490 mtx_enter(&all_mtx, MTX_DEF); 491 /* 492 * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly 493 * we can re-enable the kernacc() checks. 494 */ 495 #ifndef __alpha__ 496 MPASS(kernacc((caddr_t)all_mtx.mtx_next, sizeof(uintptr_t), 497 VM_PROT_READ) == 1); 498 #endif 499 MPASS(all_mtx.mtx_next->mtx_prev == &all_mtx); 500 for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) { 501 #ifndef __alpha__ 502 if (kernacc((caddr_t)mp->mtx_next, sizeof(uintptr_t), 503 VM_PROT_READ) != 1) { 504 panic("mtx_validate: mp=%p mp->mtx_next=%p", 505 mp, mp->mtx_next); 506 } 507 #endif 508 i++; 509 if (i > mtx_cur_cnt) { 510 panic("mtx_validate: too many in chain, known=%d\n", 511 mtx_cur_cnt); 512 } 513 } 514 MPASS(i == mtx_cur_cnt); 515 switch (when) { 516 case MV_DESTROY: 517 for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) 518 if (mp == m) 519 break; 520 MPASS(mp == m); 521 break; 522 case MV_INIT: 523 for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) 524 if (mp == m) { 525 /* 526 * Not good. This mutex already exists. 527 */ 528 printf("re-initing existing mutex %s\n", 529 m->mtx_description); 530 MPASS(m->mtx_lock == MTX_UNOWNED); 531 retval = 1; 532 } 533 } 534 mtx_exit(&all_mtx, MTX_DEF); 535 return (retval); 536 } 537 #endif 538 539 void 540 mtx_init(struct mtx *m, const char *t, int flag) 541 { 542 #ifdef MUTEX_DEBUG 543 struct mtx_debug *debug; 544 #endif 545 546 CTR2(KTR_LOCK, "mtx_init 0x%p (%s)", m, t); 547 #ifdef MUTEX_DEBUG 548 if (mtx_validate(m, MV_INIT)) /* diagnostic and error correction */ 549 return; 550 if (flag & MTX_COLD) 551 debug = m->mtx_debug; 552 else 553 debug = NULL; 554 if (debug == NULL) { 555 #ifdef DIAGNOSTIC 556 if(cold && bootverbose) 557 printf("malloc'ing mtx_debug while cold for %s\n", t); 558 #endif 559 560 /* XXX - should not use DEVBUF */ 561 debug = malloc(sizeof(struct mtx_debug), M_DEVBUF, M_NOWAIT); 562 MPASS(debug != NULL); 563 bzero(debug, sizeof(struct mtx_debug)); 564 } 565 #endif 566 bzero((void *)m, sizeof *m); 567 TAILQ_INIT(&m->mtx_blocked); 568 #ifdef MUTEX_DEBUG 569 m->mtx_debug = debug; 570 #endif 571 m->mtx_description = t; 572 m->mtx_lock = MTX_UNOWNED; 573 /* Put on all mutex queue */ 574 mtx_enter(&all_mtx, MTX_DEF); 575 m->mtx_next = &all_mtx; 576 m->mtx_prev = all_mtx.mtx_prev; 577 m->mtx_prev->mtx_next = m; 578 all_mtx.mtx_prev = m; 579 if (++mtx_cur_cnt > mtx_max_cnt) 580 mtx_max_cnt = mtx_cur_cnt; 581 mtx_exit(&all_mtx, MTX_DEF); 582 witness_init(m, flag); 583 } 584 585 void 586 mtx_destroy(struct mtx *m) 587 { 588 589 CTR2(KTR_LOCK, "mtx_destroy 0x%p (%s)", m, m->mtx_description); 590 #ifdef MUTEX_DEBUG 591 if (m->mtx_next == NULL) 592 panic("mtx_destroy: %p (%s) already destroyed", 593 m, m->mtx_description); 594 595 if (!mtx_owned(m)) { 596 MPASS(m->mtx_lock == MTX_UNOWNED); 597 } else { 598 MPASS((m->mtx_lock & (MTX_RECURSE|MTX_CONTESTED)) == 0); 599 } 600 mtx_validate(m, MV_DESTROY); /* diagnostic */ 601 #endif 602 603 #ifdef WITNESS 604 if (m->mtx_witness) 605 witness_destroy(m); 606 #endif /* WITNESS */ 607 608 /* Remove from the all mutex queue */ 609 mtx_enter(&all_mtx, MTX_DEF); 610 m->mtx_next->mtx_prev = m->mtx_prev; 611 m->mtx_prev->mtx_next = m->mtx_next; 612 #ifdef MUTEX_DEBUG 613 m->mtx_next = m->mtx_prev = NULL; 614 free(m->mtx_debug, M_DEVBUF); 615 m->mtx_debug = NULL; 616 #endif 617 mtx_cur_cnt--; 618 mtx_exit(&all_mtx, MTX_DEF); 619 } 620 621 /* 622 * The non-inlined versions of the mtx_*() functions are always built (above), 623 * but the witness code depends on the MUTEX_DEBUG and WITNESS kernel options 624 * being specified. 625 */ 626 #if (defined(MUTEX_DEBUG) && defined(WITNESS)) 627 628 #define WITNESS_COUNT 200 629 #define WITNESS_NCHILDREN 2 630 631 int witness_watch = 1; 632 633 struct witness { 634 struct witness *w_next; 635 const char *w_description; 636 const char *w_file; 637 int w_line; 638 struct witness *w_morechildren; 639 u_char w_childcnt; 640 u_char w_Giant_squawked:1; 641 u_char w_other_squawked:1; 642 u_char w_same_squawked:1; 643 u_char w_sleep:1; 644 u_char w_spin:1; /* this is a spin mutex */ 645 u_int w_level; 646 struct witness *w_children[WITNESS_NCHILDREN]; 647 }; 648 649 struct witness_blessed { 650 char *b_lock1; 651 char *b_lock2; 652 }; 653 654 #ifdef DDB 655 /* 656 * When DDB is enabled and witness_ddb is set to 1, it will cause the system to 657 * drop into kdebug() when: 658 * - a lock heirarchy violation occurs 659 * - locks are held when going to sleep. 660 */ 661 #ifdef WITNESS_DDB 662 int witness_ddb = 1; 663 #else 664 int witness_ddb = 0; 665 #endif 666 SYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, ""); 667 #endif /* DDB */ 668 669 #ifdef WITNESS_SKIPSPIN 670 int witness_skipspin = 1; 671 #else 672 int witness_skipspin = 0; 673 #endif 674 SYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RD, &witness_skipspin, 0, 675 ""); 676 677 MUTEX_DECLARE(static,w_mtx); 678 static struct witness *w_free; 679 static struct witness *w_all; 680 static int w_inited; 681 static int witness_dead; /* fatal error, probably no memory */ 682 683 static struct witness w_data[WITNESS_COUNT]; 684 685 static struct witness *enroll __P((const char *description, int flag)); 686 static int itismychild __P((struct witness *parent, struct witness *child)); 687 static void removechild __P((struct witness *parent, struct witness *child)); 688 static int isitmychild __P((struct witness *parent, struct witness *child)); 689 static int isitmydescendant __P((struct witness *parent, struct witness *child)); 690 static int dup_ok __P((struct witness *)); 691 static int blessed __P((struct witness *, struct witness *)); 692 static void witness_displaydescendants 693 __P((void(*)(const char *fmt, ...), struct witness *)); 694 static void witness_leveldescendents __P((struct witness *parent, int level)); 695 static void witness_levelall __P((void)); 696 static struct witness * witness_get __P((void)); 697 static void witness_free __P((struct witness *m)); 698 699 700 static char *ignore_list[] = { 701 "witness lock", 702 NULL 703 }; 704 705 static char *spin_order_list[] = { 706 "sched lock", 707 "sio", 708 #ifdef __i386__ 709 "clk", 710 #endif 711 "callout", 712 /* 713 * leaf locks 714 */ 715 NULL 716 }; 717 718 static char *order_list[] = { 719 "uidinfo hash", "uidinfo struct", NULL, 720 NULL 721 }; 722 723 static char *dup_list[] = { 724 NULL 725 }; 726 727 static char *sleep_list[] = { 728 "Giant", 729 NULL 730 }; 731 732 /* 733 * Pairs of locks which have been blessed 734 * Don't complain about order problems with blessed locks 735 */ 736 static struct witness_blessed blessed_list[] = { 737 }; 738 static int blessed_count = sizeof(blessed_list) / sizeof(struct witness_blessed); 739 740 void 741 witness_init(struct mtx *m, int flag) 742 { 743 m->mtx_witness = enroll(m->mtx_description, flag); 744 } 745 746 void 747 witness_destroy(struct mtx *m) 748 { 749 struct mtx *m1; 750 struct proc *p; 751 p = CURPROC; 752 for ((m1 = LIST_FIRST(&p->p_heldmtx)); m1 != NULL; 753 m1 = LIST_NEXT(m1, mtx_held)) { 754 if (m1 == m) { 755 LIST_REMOVE(m, mtx_held); 756 break; 757 } 758 } 759 return; 760 761 } 762 763 void 764 witness_enter(struct mtx *m, int flags, const char *file, int line) 765 { 766 struct witness *w, *w1; 767 struct mtx *m1; 768 struct proc *p; 769 int i; 770 #ifdef DDB 771 int go_into_ddb = 0; 772 #endif /* DDB */ 773 774 w = m->mtx_witness; 775 p = CURPROC; 776 777 if (flags & MTX_SPIN) { 778 if (!w->w_spin) 779 panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @" 780 " %s:%d", m->mtx_description, file, line); 781 if (m->mtx_recurse != 0) 782 return; 783 mtx_enter(&w_mtx, MTX_SPIN); 784 i = witness_spin_check; 785 if (i != 0 && w->w_level < i) { 786 mtx_exit(&w_mtx, MTX_SPIN); 787 panic("mutex_enter(%s:%x, MTX_SPIN) out of order @" 788 " %s:%d already holding %s:%x", 789 m->mtx_description, w->w_level, file, line, 790 spin_order_list[ffs(i)-1], i); 791 } 792 PCPU_SET(witness_spin_check, i | w->w_level); 793 mtx_exit(&w_mtx, MTX_SPIN); 794 return; 795 } 796 if (w->w_spin) 797 panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d", 798 m->mtx_description, file, line); 799 800 if (m->mtx_recurse != 0) 801 return; 802 if (witness_dead) 803 goto out; 804 if (cold || panicstr) 805 goto out; 806 807 if (!mtx_legal2block()) 808 panic("blockable mtx_enter() of %s when not legal @ %s:%d", 809 m->mtx_description, file, line); 810 /* 811 * Is this the first mutex acquired 812 */ 813 if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL) 814 goto out; 815 816 if ((w1 = m1->mtx_witness) == w) { 817 if (w->w_same_squawked || dup_ok(w)) 818 goto out; 819 w->w_same_squawked = 1; 820 printf("acquring duplicate lock of same type: \"%s\"\n", 821 m->mtx_description); 822 printf(" 1st @ %s:%d\n", w->w_file, w->w_line); 823 printf(" 2nd @ %s:%d\n", file, line); 824 #ifdef DDB 825 go_into_ddb = 1; 826 #endif /* DDB */ 827 goto out; 828 } 829 MPASS(!mtx_owned(&w_mtx)); 830 mtx_enter(&w_mtx, MTX_SPIN); 831 /* 832 * If we have a known higher number just say ok 833 */ 834 if (witness_watch > 1 && w->w_level > w1->w_level) { 835 mtx_exit(&w_mtx, MTX_SPIN); 836 goto out; 837 } 838 if (isitmydescendant(m1->mtx_witness, w)) { 839 mtx_exit(&w_mtx, MTX_SPIN); 840 goto out; 841 } 842 for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) { 843 844 MPASS(i < 200); 845 w1 = m1->mtx_witness; 846 if (isitmydescendant(w, w1)) { 847 mtx_exit(&w_mtx, MTX_SPIN); 848 if (blessed(w, w1)) 849 goto out; 850 if (m1 == &Giant) { 851 if (w1->w_Giant_squawked) 852 goto out; 853 else 854 w1->w_Giant_squawked = 1; 855 } else { 856 if (w1->w_other_squawked) 857 goto out; 858 else 859 w1->w_other_squawked = 1; 860 } 861 printf("lock order reversal\n"); 862 printf(" 1st %s last acquired @ %s:%d\n", 863 w->w_description, w->w_file, w->w_line); 864 printf(" 2nd %p %s @ %s:%d\n", 865 m1, w1->w_description, w1->w_file, w1->w_line); 866 printf(" 3rd %p %s @ %s:%d\n", 867 m, w->w_description, file, line); 868 #ifdef DDB 869 go_into_ddb = 1; 870 #endif /* DDB */ 871 goto out; 872 } 873 } 874 m1 = LIST_FIRST(&p->p_heldmtx); 875 if (!itismychild(m1->mtx_witness, w)) 876 mtx_exit(&w_mtx, MTX_SPIN); 877 878 out: 879 #ifdef DDB 880 if (witness_ddb && go_into_ddb) 881 Debugger("witness_enter"); 882 #endif /* DDB */ 883 w->w_file = file; 884 w->w_line = line; 885 m->mtx_line = line; 886 m->mtx_file = file; 887 888 /* 889 * If this pays off it likely means that a mutex being witnessed 890 * is acquired in hardclock. Put it in the ignore list. It is 891 * likely not the mutex this assert fails on. 892 */ 893 MPASS(m->mtx_held.le_prev == NULL); 894 LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held); 895 } 896 897 void 898 witness_exit(struct mtx *m, int flags, const char *file, int line) 899 { 900 struct witness *w; 901 902 w = m->mtx_witness; 903 904 if (flags & MTX_SPIN) { 905 if (!w->w_spin) 906 panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @" 907 " %s:%d", m->mtx_description, file, line); 908 if (m->mtx_recurse != 0) 909 return; 910 mtx_enter(&w_mtx, MTX_SPIN); 911 PCPU_SET(witness_spin_check, witness_spin_check & ~w->w_level); 912 mtx_exit(&w_mtx, MTX_SPIN); 913 return; 914 } 915 if (w->w_spin) 916 panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d", 917 m->mtx_description, file, line); 918 919 if (m->mtx_recurse != 0) 920 return; 921 922 if ((flags & MTX_NOSWITCH) == 0 && !mtx_legal2block() && !cold) 923 panic("switchable mtx_exit() of %s when not legal @ %s:%d", 924 m->mtx_description, file, line); 925 LIST_REMOVE(m, mtx_held); 926 m->mtx_held.le_prev = NULL; 927 } 928 929 void 930 witness_try_enter(struct mtx *m, int flags, const char *file, int line) 931 { 932 struct proc *p; 933 struct witness *w = m->mtx_witness; 934 935 if (flags & MTX_SPIN) { 936 if (!w->w_spin) 937 panic("mutex_try_enter: " 938 "MTX_SPIN on MTX_DEF mutex %s @ %s:%d", 939 m->mtx_description, file, line); 940 if (m->mtx_recurse != 0) 941 return; 942 mtx_enter(&w_mtx, MTX_SPIN); 943 PCPU_SET(witness_spin_check, witness_spin_check | w->w_level); 944 mtx_exit(&w_mtx, MTX_SPIN); 945 return; 946 } 947 948 if (w->w_spin) 949 panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d", 950 m->mtx_description, file, line); 951 952 if (m->mtx_recurse != 0) 953 return; 954 955 w->w_file = file; 956 w->w_line = line; 957 m->mtx_line = line; 958 m->mtx_file = file; 959 p = CURPROC; 960 MPASS(m->mtx_held.le_prev == NULL); 961 LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held); 962 } 963 964 void 965 witness_display(void(*prnt)(const char *fmt, ...)) 966 { 967 struct witness *w, *w1; 968 969 witness_levelall(); 970 971 for (w = w_all; w; w = w->w_next) { 972 if (w->w_file == NULL) 973 continue; 974 for (w1 = w_all; w1; w1 = w1->w_next) { 975 if (isitmychild(w1, w)) 976 break; 977 } 978 if (w1 != NULL) 979 continue; 980 /* 981 * This lock has no anscestors, display its descendants. 982 */ 983 witness_displaydescendants(prnt, w); 984 } 985 prnt("\nMutex which were never acquired\n"); 986 for (w = w_all; w; w = w->w_next) { 987 if (w->w_file != NULL) 988 continue; 989 prnt("%s\n", w->w_description); 990 } 991 } 992 993 int 994 witness_sleep(int check_only, struct mtx *mtx, const char *file, int line) 995 { 996 struct mtx *m; 997 struct proc *p; 998 char **sleep; 999 int n = 0; 1000 1001 p = CURPROC; 1002 for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL; 1003 m = LIST_NEXT(m, mtx_held)) { 1004 if (m == mtx) 1005 continue; 1006 for (sleep = sleep_list; *sleep!= NULL; sleep++) 1007 if (strcmp(m->mtx_description, *sleep) == 0) 1008 goto next; 1009 printf("%s:%d: %s with \"%s\" locked from %s:%d\n", 1010 file, line, check_only ? "could sleep" : "sleeping", 1011 m->mtx_description, 1012 m->mtx_witness->w_file, m->mtx_witness->w_line); 1013 n++; 1014 next: 1015 } 1016 #ifdef DDB 1017 if (witness_ddb && n) 1018 Debugger("witness_sleep"); 1019 #endif /* DDB */ 1020 return (n); 1021 } 1022 1023 static struct witness * 1024 enroll(const char *description, int flag) 1025 { 1026 int i; 1027 struct witness *w, *w1; 1028 char **ignore; 1029 char **order; 1030 1031 if (!witness_watch) 1032 return (NULL); 1033 for (ignore = ignore_list; *ignore != NULL; ignore++) 1034 if (strcmp(description, *ignore) == 0) 1035 return (NULL); 1036 1037 if (w_inited == 0) { 1038 mtx_init(&w_mtx, "witness lock", MTX_COLD | MTX_DEF); 1039 for (i = 0; i < WITNESS_COUNT; i++) { 1040 w = &w_data[i]; 1041 witness_free(w); 1042 } 1043 w_inited = 1; 1044 for (order = order_list; *order != NULL; order++) { 1045 w = enroll(*order, MTX_DEF); 1046 w->w_file = "order list"; 1047 for (order++; *order != NULL; order++) { 1048 w1 = enroll(*order, MTX_DEF); 1049 w1->w_file = "order list"; 1050 itismychild(w, w1); 1051 w = w1; 1052 } 1053 } 1054 } 1055 if ((flag & MTX_SPIN) && witness_skipspin) 1056 return (NULL); 1057 mtx_enter(&w_mtx, MTX_SPIN); 1058 for (w = w_all; w; w = w->w_next) { 1059 if (strcmp(description, w->w_description) == 0) { 1060 mtx_exit(&w_mtx, MTX_SPIN); 1061 return (w); 1062 } 1063 } 1064 if ((w = witness_get()) == NULL) 1065 return (NULL); 1066 w->w_next = w_all; 1067 w_all = w; 1068 w->w_description = description; 1069 mtx_exit(&w_mtx, MTX_SPIN); 1070 if (flag & MTX_SPIN) { 1071 w->w_spin = 1; 1072 1073 i = 1; 1074 for (order = spin_order_list; *order != NULL; order++) { 1075 if (strcmp(description, *order) == 0) 1076 break; 1077 i <<= 1; 1078 } 1079 if (*order == NULL) 1080 panic("spin lock %s not in order list", description); 1081 w->w_level = i; 1082 } 1083 return (w); 1084 } 1085 1086 static int 1087 itismychild(struct witness *parent, struct witness *child) 1088 { 1089 static int recursed; 1090 1091 /* 1092 * Insert "child" after "parent" 1093 */ 1094 while (parent->w_morechildren) 1095 parent = parent->w_morechildren; 1096 1097 if (parent->w_childcnt == WITNESS_NCHILDREN) { 1098 if ((parent->w_morechildren = witness_get()) == NULL) 1099 return (1); 1100 parent = parent->w_morechildren; 1101 } 1102 MPASS(child != NULL); 1103 parent->w_children[parent->w_childcnt++] = child; 1104 /* 1105 * now prune whole tree 1106 */ 1107 if (recursed) 1108 return (0); 1109 recursed = 1; 1110 for (child = w_all; child != NULL; child = child->w_next) { 1111 for (parent = w_all; parent != NULL; 1112 parent = parent->w_next) { 1113 if (!isitmychild(parent, child)) 1114 continue; 1115 removechild(parent, child); 1116 if (isitmydescendant(parent, child)) 1117 continue; 1118 itismychild(parent, child); 1119 } 1120 } 1121 recursed = 0; 1122 witness_levelall(); 1123 return (0); 1124 } 1125 1126 static void 1127 removechild(struct witness *parent, struct witness *child) 1128 { 1129 struct witness *w, *w1; 1130 int i; 1131 1132 for (w = parent; w != NULL; w = w->w_morechildren) 1133 for (i = 0; i < w->w_childcnt; i++) 1134 if (w->w_children[i] == child) 1135 goto found; 1136 return; 1137 found: 1138 for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren) 1139 continue; 1140 w->w_children[i] = w1->w_children[--w1->w_childcnt]; 1141 MPASS(w->w_children[i] != NULL); 1142 1143 if (w1->w_childcnt != 0) 1144 return; 1145 1146 if (w1 == parent) 1147 return; 1148 for (w = parent; w->w_morechildren != w1; w = w->w_morechildren) 1149 continue; 1150 w->w_morechildren = 0; 1151 witness_free(w1); 1152 } 1153 1154 static int 1155 isitmychild(struct witness *parent, struct witness *child) 1156 { 1157 struct witness *w; 1158 int i; 1159 1160 for (w = parent; w != NULL; w = w->w_morechildren) { 1161 for (i = 0; i < w->w_childcnt; i++) { 1162 if (w->w_children[i] == child) 1163 return (1); 1164 } 1165 } 1166 return (0); 1167 } 1168 1169 static int 1170 isitmydescendant(struct witness *parent, struct witness *child) 1171 { 1172 struct witness *w; 1173 int i; 1174 int j; 1175 1176 for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) { 1177 MPASS(j < 1000); 1178 for (i = 0; i < w->w_childcnt; i++) { 1179 if (w->w_children[i] == child) 1180 return (1); 1181 } 1182 for (i = 0; i < w->w_childcnt; i++) { 1183 if (isitmydescendant(w->w_children[i], child)) 1184 return (1); 1185 } 1186 } 1187 return (0); 1188 } 1189 1190 void 1191 witness_levelall (void) 1192 { 1193 struct witness *w, *w1; 1194 1195 for (w = w_all; w; w = w->w_next) 1196 if (!w->w_spin) 1197 w->w_level = 0; 1198 for (w = w_all; w; w = w->w_next) { 1199 if (w->w_spin) 1200 continue; 1201 for (w1 = w_all; w1; w1 = w1->w_next) { 1202 if (isitmychild(w1, w)) 1203 break; 1204 } 1205 if (w1 != NULL) 1206 continue; 1207 witness_leveldescendents(w, 0); 1208 } 1209 } 1210 1211 static void 1212 witness_leveldescendents(struct witness *parent, int level) 1213 { 1214 int i; 1215 struct witness *w; 1216 1217 if (parent->w_level < level) 1218 parent->w_level = level; 1219 level++; 1220 for (w = parent; w != NULL; w = w->w_morechildren) 1221 for (i = 0; i < w->w_childcnt; i++) 1222 witness_leveldescendents(w->w_children[i], level); 1223 } 1224 1225 static void 1226 witness_displaydescendants(void(*prnt)(const char *fmt, ...), 1227 struct witness *parent) 1228 { 1229 struct witness *w; 1230 int i; 1231 int level = parent->w_level; 1232 1233 prnt("%d", level); 1234 if (level < 10) 1235 prnt(" "); 1236 for (i = 0; i < level; i++) 1237 prnt(" "); 1238 prnt("%s", parent->w_description); 1239 if (parent->w_file != NULL) { 1240 prnt(" -- last acquired @ %s", parent->w_file); 1241 #ifndef W_USE_WHERE 1242 prnt(":%d", parent->w_line); 1243 #endif 1244 prnt("\n"); 1245 } 1246 1247 for (w = parent; w != NULL; w = w->w_morechildren) 1248 for (i = 0; i < w->w_childcnt; i++) 1249 witness_displaydescendants(prnt, w->w_children[i]); 1250 } 1251 1252 static int 1253 dup_ok(struct witness *w) 1254 { 1255 char **dup; 1256 1257 for (dup = dup_list; *dup!= NULL; dup++) 1258 if (strcmp(w->w_description, *dup) == 0) 1259 return (1); 1260 return (0); 1261 } 1262 1263 static int 1264 blessed(struct witness *w1, struct witness *w2) 1265 { 1266 int i; 1267 struct witness_blessed *b; 1268 1269 for (i = 0; i < blessed_count; i++) { 1270 b = &blessed_list[i]; 1271 if (strcmp(w1->w_description, b->b_lock1) == 0) { 1272 if (strcmp(w2->w_description, b->b_lock2) == 0) 1273 return (1); 1274 continue; 1275 } 1276 if (strcmp(w1->w_description, b->b_lock2) == 0) 1277 if (strcmp(w2->w_description, b->b_lock1) == 0) 1278 return (1); 1279 } 1280 return (0); 1281 } 1282 1283 static struct witness * 1284 witness_get() 1285 { 1286 struct witness *w; 1287 1288 if ((w = w_free) == NULL) { 1289 witness_dead = 1; 1290 mtx_exit(&w_mtx, MTX_SPIN); 1291 printf("witness exhausted\n"); 1292 return (NULL); 1293 } 1294 w_free = w->w_next; 1295 bzero(w, sizeof(*w)); 1296 return (w); 1297 } 1298 1299 static void 1300 witness_free(struct witness *w) 1301 { 1302 w->w_next = w_free; 1303 w_free = w; 1304 } 1305 1306 void 1307 witness_list(struct proc *p) 1308 { 1309 struct mtx *m; 1310 1311 for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL; 1312 m = LIST_NEXT(m, mtx_held)) { 1313 printf("\t\"%s\" (%p) locked at %s:%d\n", 1314 m->mtx_description, m, 1315 m->mtx_witness->w_file, m->mtx_witness->w_line); 1316 } 1317 } 1318 1319 void 1320 witness_save(struct mtx *m, const char **filep, int *linep) 1321 { 1322 *filep = m->mtx_witness->w_file; 1323 *linep = m->mtx_witness->w_line; 1324 } 1325 1326 void 1327 witness_restore(struct mtx *m, const char *file, int line) 1328 { 1329 m->mtx_witness->w_file = file; 1330 m->mtx_witness->w_line = line; 1331 } 1332 1333 #endif /* (defined(MUTEX_DEBUG) && defined(WITNESS)) */ 1334