1 /*- 2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 3. Berkeley Software Design Inc's name may not be used to endorse or 13 * promote products derived from this software without specific prior 14 * written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $ 29 * and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $ 30 */ 31 32 /* 33 * Implementation of the `witness' lock verifier. Originally implemented for 34 * mutexes in BSD/OS. Extended to handle generic lock objects and lock 35 * classes in FreeBSD. 36 */ 37 38 /* 39 * Main Entry: witness 40 * Pronunciation: 'wit-n&s 41 * Function: noun 42 * Etymology: Middle English witnesse, from Old English witnes knowledge, 43 * testimony, witness, from 2wit 44 * Date: before 12th century 45 * 1 : attestation of a fact or event : TESTIMONY 46 * 2 : one that gives evidence; specifically : one who testifies in 47 * a cause or before a judicial tribunal 48 * 3 : one asked to be present at a transaction so as to be able to 49 * testify to its having taken place 50 * 4 : one who has personal knowledge of something 51 * 5 a : something serving as evidence or proof : SIGN 52 * b : public affirmation by word or example of usually 53 * religious faith or conviction <the heroic witness to divine 54 * life -- Pilot> 55 * 6 capitalized : a member of the Jehovah's Witnesses 56 */ 57 58 /* 59 * Special rules concerning Giant and lock orders: 60 * 61 * 1) Giant must be acquired before any other mutexes. Stated another way, 62 * no other mutex may be held when Giant is acquired. 63 * 64 * 2) Giant must be released when blocking on a sleepable lock. 65 * 66 * This rule is less obvious, but is a result of Giant providing the same 67 * semantics as spl(). Basically, when a thread sleeps, it must release 68 * Giant. When a thread blocks on a sleepable lock, it sleeps. Hence rule 69 * 2). 70 * 71 * 3) Giant may be acquired before or after sleepable locks. 72 * 73 * This rule is also not quite as obvious. Giant may be acquired after 74 * a sleepable lock because it is a non-sleepable lock and non-sleepable 75 * locks may always be acquired while holding a sleepable lock. The second 76 * case, Giant before a sleepable lock, follows from rule 2) above. Suppose 77 * you have two threads T1 and T2 and a sleepable lock X. Suppose that T1 78 * acquires X and blocks on Giant. Then suppose that T2 acquires Giant and 79 * blocks on X. When T2 blocks on X, T2 will release Giant allowing T1 to 80 * execute. Thus, acquiring Giant both before and after a sleepable lock 81 * will not result in a lock order reversal. 82 */ 83 84 #include <sys/cdefs.h> 85 __FBSDID("$FreeBSD$"); 86 87 #include "opt_ddb.h" 88 #include "opt_witness.h" 89 90 #include <sys/param.h> 91 #include <sys/bus.h> 92 #include <sys/kdb.h> 93 #include <sys/kernel.h> 94 #include <sys/ktr.h> 95 #include <sys/lock.h> 96 #include <sys/malloc.h> 97 #include <sys/mutex.h> 98 #include <sys/proc.h> 99 #include <sys/sysctl.h> 100 #include <sys/systm.h> 101 102 #include <ddb/ddb.h> 103 104 #include <machine/stdarg.h> 105 106 /* Define this to check for blessed mutexes */ 107 #undef BLESSING 108 109 #define WITNESS_COUNT 200 110 #define WITNESS_CHILDCOUNT (WITNESS_COUNT * 4) 111 /* 112 * XXX: This is somewhat bogus, as we assume here that at most 1024 threads 113 * will hold LOCK_NCHILDREN * 2 locks. We handle failure ok, and we should 114 * probably be safe for the most part, but it's still a SWAG. 115 */ 116 #define LOCK_CHILDCOUNT (MAXCPU + 1024) * 2 117 118 #define WITNESS_NCHILDREN 6 119 120 struct witness_child_list_entry; 121 122 struct witness { 123 const char *w_name; 124 struct lock_class *w_class; 125 STAILQ_ENTRY(witness) w_list; /* List of all witnesses. */ 126 STAILQ_ENTRY(witness) w_typelist; /* Witnesses of a type. */ 127 struct witness_child_list_entry *w_children; /* Great evilness... */ 128 const char *w_file; 129 int w_line; 130 u_int w_level; 131 u_int w_refcount; 132 u_char w_Giant_squawked:1; 133 u_char w_other_squawked:1; 134 u_char w_same_squawked:1; 135 u_char w_displayed:1; 136 }; 137 138 struct witness_child_list_entry { 139 struct witness_child_list_entry *wcl_next; 140 struct witness *wcl_children[WITNESS_NCHILDREN]; 141 u_int wcl_count; 142 }; 143 144 STAILQ_HEAD(witness_list, witness); 145 146 #ifdef BLESSING 147 struct witness_blessed { 148 const char *b_lock1; 149 const char *b_lock2; 150 }; 151 #endif 152 153 struct witness_order_list_entry { 154 const char *w_name; 155 struct lock_class *w_class; 156 }; 157 158 #ifdef BLESSING 159 static int blessed(struct witness *, struct witness *); 160 #endif 161 static int depart(struct witness *w); 162 static struct witness *enroll(const char *description, 163 struct lock_class *lock_class); 164 static int insertchild(struct witness *parent, struct witness *child); 165 static int isitmychild(struct witness *parent, struct witness *child); 166 static int isitmydescendant(struct witness *parent, struct witness *child); 167 static int itismychild(struct witness *parent, struct witness *child); 168 static int rebalancetree(struct witness_list *list); 169 static void removechild(struct witness *parent, struct witness *child); 170 static int reparentchildren(struct witness *newparent, 171 struct witness *oldparent); 172 static int sysctl_debug_witness_watch(SYSCTL_HANDLER_ARGS); 173 static void witness_displaydescendants(void(*)(const char *fmt, ...), 174 struct witness *, int indent); 175 static const char *fixup_filename(const char *file); 176 static void witness_leveldescendents(struct witness *parent, int level); 177 static void witness_levelall(void); 178 static struct witness *witness_get(void); 179 static void witness_free(struct witness *m); 180 static struct witness_child_list_entry *witness_child_get(void); 181 static void witness_child_free(struct witness_child_list_entry *wcl); 182 static struct lock_list_entry *witness_lock_list_get(void); 183 static void witness_lock_list_free(struct lock_list_entry *lle); 184 static struct lock_instance *find_instance(struct lock_list_entry *lock_list, 185 struct lock_object *lock); 186 static void witness_list_lock(struct lock_instance *instance); 187 #ifdef DDB 188 static void witness_list(struct thread *td); 189 static void witness_display_list(void(*prnt)(const char *fmt, ...), 190 struct witness_list *list); 191 static void witness_display(void(*)(const char *fmt, ...)); 192 #endif 193 194 MALLOC_DEFINE(M_WITNESS, "witness", "witness structure"); 195 196 /* 197 * If set to 0, witness is disabled. If set to 1, witness performs full lock 198 * order checking for all locks. If set to 2 or higher, then witness skips 199 * the full lock order check if the lock being acquired is at a higher level 200 * (i.e. farther down in the tree) than the current lock. This last mode is 201 * somewhat experimental and not considered fully safe. At runtime, this 202 * value may be set to 0 to turn off witness. witness is not allowed be 203 * turned on once it is turned off, however. 204 */ 205 static int witness_watch = 1; 206 TUNABLE_INT("debug.witness_watch", &witness_watch); 207 SYSCTL_PROC(_debug, OID_AUTO, witness_watch, CTLFLAG_RW | CTLTYPE_INT, NULL, 0, 208 sysctl_debug_witness_watch, "I", "witness is watching lock operations"); 209 210 #ifdef KDB 211 /* 212 * When KDB is enabled and witness_kdb is set to 1, it will cause the system 213 * to drop into kdebug() when: 214 * - a lock heirarchy violation occurs 215 * - locks are held when going to sleep. 216 */ 217 #ifdef WITNESS_KDB 218 int witness_kdb = 1; 219 #else 220 int witness_kdb = 0; 221 #endif 222 TUNABLE_INT("debug.witness_kdb", &witness_kdb); 223 SYSCTL_INT(_debug, OID_AUTO, witness_kdb, CTLFLAG_RW, &witness_kdb, 0, ""); 224 225 /* 226 * When KDB is enabled and witness_trace is set to 1, it will cause the system 227 * to print a stack trace: 228 * - a lock heirarchy violation occurs 229 * - locks are held when going to sleep. 230 */ 231 int witness_trace = 1; 232 TUNABLE_INT("debug.witness_trace", &witness_trace); 233 SYSCTL_INT(_debug, OID_AUTO, witness_trace, CTLFLAG_RW, &witness_trace, 0, ""); 234 #endif /* KDB */ 235 236 #ifdef WITNESS_SKIPSPIN 237 int witness_skipspin = 1; 238 #else 239 int witness_skipspin = 0; 240 #endif 241 TUNABLE_INT("debug.witness_skipspin", &witness_skipspin); 242 SYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RDTUN, &witness_skipspin, 0, 243 ""); 244 245 static struct mtx w_mtx; 246 static struct witness_list w_free = STAILQ_HEAD_INITIALIZER(w_free); 247 static struct witness_list w_all = STAILQ_HEAD_INITIALIZER(w_all); 248 static struct witness_list w_spin = STAILQ_HEAD_INITIALIZER(w_spin); 249 static struct witness_list w_sleep = STAILQ_HEAD_INITIALIZER(w_sleep); 250 static struct witness_child_list_entry *w_child_free = NULL; 251 static struct lock_list_entry *w_lock_list_free = NULL; 252 253 static struct witness w_data[WITNESS_COUNT]; 254 static struct witness_child_list_entry w_childdata[WITNESS_CHILDCOUNT]; 255 static struct lock_list_entry w_locklistdata[LOCK_CHILDCOUNT]; 256 257 static struct witness_order_list_entry order_lists[] = { 258 { "proctree", &lock_class_sx }, 259 { "allproc", &lock_class_sx }, 260 { "Giant", &lock_class_mtx_sleep }, 261 { "filedesc structure", &lock_class_mtx_sleep }, 262 { "pipe mutex", &lock_class_mtx_sleep }, 263 { "sigio lock", &lock_class_mtx_sleep }, 264 { "process group", &lock_class_mtx_sleep }, 265 { "process lock", &lock_class_mtx_sleep }, 266 { "session", &lock_class_mtx_sleep }, 267 { "uidinfo hash", &lock_class_mtx_sleep }, 268 { "uidinfo struct", &lock_class_mtx_sleep }, 269 { "allprison", &lock_class_mtx_sleep }, 270 { NULL, NULL }, 271 /* 272 * Sockets 273 */ 274 { "filedesc structure", &lock_class_mtx_sleep }, 275 { "accept", &lock_class_mtx_sleep }, 276 { "so_snd", &lock_class_mtx_sleep }, 277 { "so_rcv", &lock_class_mtx_sleep }, 278 { "sellck", &lock_class_mtx_sleep }, 279 { NULL, NULL }, 280 /* 281 * Routing 282 */ 283 { "so_rcv", &lock_class_mtx_sleep }, 284 { "radix node head", &lock_class_mtx_sleep }, 285 { "rtentry", &lock_class_mtx_sleep }, 286 { "ifaddr", &lock_class_mtx_sleep }, 287 { NULL, NULL }, 288 /* 289 * UNIX Domain Sockets 290 */ 291 { "unp", &lock_class_mtx_sleep }, 292 { "so_snd", &lock_class_mtx_sleep }, 293 { NULL, NULL }, 294 /* 295 * UDP/IP 296 */ 297 { "udp", &lock_class_mtx_sleep }, 298 { "udpinp", &lock_class_mtx_sleep }, 299 { "so_snd", &lock_class_mtx_sleep }, 300 { NULL, NULL }, 301 /* 302 * TCP/IP 303 */ 304 { "tcp", &lock_class_mtx_sleep }, 305 { "tcpinp", &lock_class_mtx_sleep }, 306 { "so_snd", &lock_class_mtx_sleep }, 307 { NULL, NULL }, 308 /* 309 * SLIP 310 */ 311 { "slip_mtx", &lock_class_mtx_sleep }, 312 { "slip sc_mtx", &lock_class_mtx_sleep }, 313 { NULL, NULL }, 314 /* 315 * netatalk 316 */ 317 { "ddp_list_mtx", &lock_class_mtx_sleep }, 318 { "ddp_mtx", &lock_class_mtx_sleep }, 319 { NULL, NULL }, 320 /* 321 * spin locks 322 */ 323 #ifdef SMP 324 { "ap boot", &lock_class_mtx_spin }, 325 #endif 326 { "sio", &lock_class_mtx_spin }, 327 #ifdef __i386__ 328 { "cy", &lock_class_mtx_spin }, 329 #endif 330 { "uart_hwmtx", &lock_class_mtx_spin }, 331 { "sabtty", &lock_class_mtx_spin }, 332 { "zstty", &lock_class_mtx_spin }, 333 { "ng_node", &lock_class_mtx_spin }, 334 { "ng_worklist", &lock_class_mtx_spin }, 335 { "taskqueue_fast", &lock_class_mtx_spin }, 336 { "intr table", &lock_class_mtx_spin }, 337 { "ithread table lock", &lock_class_mtx_spin }, 338 { "sleepq chain", &lock_class_mtx_spin }, 339 { "sched lock", &lock_class_mtx_spin }, 340 { "turnstile chain", &lock_class_mtx_spin }, 341 { "td_contested", &lock_class_mtx_spin }, 342 { "callout", &lock_class_mtx_spin }, 343 { "entropy harvest", &lock_class_mtx_spin }, 344 { "entropy harvest buffers", &lock_class_mtx_spin }, 345 /* 346 * leaf locks 347 */ 348 { "allpmaps", &lock_class_mtx_spin }, 349 { "vm page queue free mutex", &lock_class_mtx_spin }, 350 { "icu", &lock_class_mtx_spin }, 351 #ifdef SMP 352 { "smp rendezvous", &lock_class_mtx_spin }, 353 #if defined(__i386__) || defined(__amd64__) 354 { "tlb", &lock_class_mtx_spin }, 355 #endif 356 #ifdef __sparc64__ 357 { "ipi", &lock_class_mtx_spin }, 358 #endif 359 #endif 360 { "clk", &lock_class_mtx_spin }, 361 { "mutex profiling lock", &lock_class_mtx_spin }, 362 { "kse zombie lock", &lock_class_mtx_spin }, 363 { "ALD Queue", &lock_class_mtx_spin }, 364 #ifdef __ia64__ 365 { "MCA spin lock", &lock_class_mtx_spin }, 366 #endif 367 #if defined(__i386__) || defined(__amd64__) 368 { "pcicfg", &lock_class_mtx_spin }, 369 #endif 370 { NULL, NULL }, 371 { NULL, NULL } 372 }; 373 374 #ifdef BLESSING 375 /* 376 * Pairs of locks which have been blessed 377 * Don't complain about order problems with blessed locks 378 */ 379 static struct witness_blessed blessed_list[] = { 380 }; 381 static int blessed_count = 382 sizeof(blessed_list) / sizeof(struct witness_blessed); 383 #endif 384 385 /* 386 * List of all locks in the system. 387 */ 388 TAILQ_HEAD(, lock_object) all_locks = TAILQ_HEAD_INITIALIZER(all_locks); 389 390 static struct mtx all_mtx = { 391 { &lock_class_mtx_sleep, /* mtx_object.lo_class */ 392 "All locks list", /* mtx_object.lo_name */ 393 "All locks list", /* mtx_object.lo_type */ 394 LO_INITIALIZED, /* mtx_object.lo_flags */ 395 { NULL, NULL }, /* mtx_object.lo_list */ 396 NULL }, /* mtx_object.lo_witness */ 397 MTX_UNOWNED, 0 /* mtx_lock, mtx_recurse */ 398 }; 399 400 /* 401 * This global is set to 0 once it becomes safe to use the witness code. 402 */ 403 static int witness_cold = 1; 404 405 /* 406 * Global variables for book keeping. 407 */ 408 static int lock_cur_cnt; 409 static int lock_max_cnt; 410 411 /* 412 * The WITNESS-enabled diagnostic code. 413 */ 414 static void 415 witness_initialize(void *dummy __unused) 416 { 417 struct lock_object *lock; 418 struct witness_order_list_entry *order; 419 struct witness *w, *w1; 420 int i; 421 422 /* 423 * We have to release Giant before initializing its witness 424 * structure so that WITNESS doesn't get confused. 425 */ 426 mtx_unlock(&Giant); 427 mtx_assert(&Giant, MA_NOTOWNED); 428 429 CTR1(KTR_WITNESS, "%s: initializing witness", __func__); 430 TAILQ_INSERT_HEAD(&all_locks, &all_mtx.mtx_object, lo_list); 431 mtx_init(&w_mtx, "witness lock", NULL, MTX_SPIN | MTX_QUIET | 432 MTX_NOWITNESS); 433 for (i = 0; i < WITNESS_COUNT; i++) 434 witness_free(&w_data[i]); 435 for (i = 0; i < WITNESS_CHILDCOUNT; i++) 436 witness_child_free(&w_childdata[i]); 437 for (i = 0; i < LOCK_CHILDCOUNT; i++) 438 witness_lock_list_free(&w_locklistdata[i]); 439 440 /* First add in all the specified order lists. */ 441 for (order = order_lists; order->w_name != NULL; order++) { 442 w = enroll(order->w_name, order->w_class); 443 if (w == NULL) 444 continue; 445 w->w_file = "order list"; 446 for (order++; order->w_name != NULL; order++) { 447 w1 = enroll(order->w_name, order->w_class); 448 if (w1 == NULL) 449 continue; 450 w1->w_file = "order list"; 451 if (!itismychild(w, w1)) 452 panic("Not enough memory for static orders!"); 453 w = w1; 454 } 455 } 456 457 /* Iterate through all locks and add them to witness. */ 458 mtx_lock(&all_mtx); 459 TAILQ_FOREACH(lock, &all_locks, lo_list) { 460 if (lock->lo_flags & LO_WITNESS) 461 lock->lo_witness = enroll(lock->lo_type, 462 lock->lo_class); 463 else 464 lock->lo_witness = NULL; 465 } 466 mtx_unlock(&all_mtx); 467 468 /* Mark the witness code as being ready for use. */ 469 atomic_store_rel_int(&witness_cold, 0); 470 471 mtx_lock(&Giant); 472 } 473 SYSINIT(witness_init, SI_SUB_WITNESS, SI_ORDER_FIRST, witness_initialize, NULL) 474 475 static int 476 sysctl_debug_witness_watch(SYSCTL_HANDLER_ARGS) 477 { 478 int error, value; 479 480 value = witness_watch; 481 error = sysctl_handle_int(oidp, &value, 0, req); 482 if (error != 0 || req->newptr == NULL) 483 return (error); 484 error = suser(req->td); 485 if (error != 0) 486 return (error); 487 if (value == witness_watch) 488 return (0); 489 if (value != 0) 490 return (EINVAL); 491 witness_watch = 0; 492 return (0); 493 } 494 495 void 496 witness_init(struct lock_object *lock) 497 { 498 struct lock_class *class; 499 500 class = lock->lo_class; 501 if (lock->lo_flags & LO_INITIALIZED) 502 panic("%s: lock (%s) %s is already initialized", __func__, 503 class->lc_name, lock->lo_name); 504 if ((lock->lo_flags & LO_RECURSABLE) != 0 && 505 (class->lc_flags & LC_RECURSABLE) == 0) 506 panic("%s: lock (%s) %s can not be recursable", __func__, 507 class->lc_name, lock->lo_name); 508 if ((lock->lo_flags & LO_SLEEPABLE) != 0 && 509 (class->lc_flags & LC_SLEEPABLE) == 0) 510 panic("%s: lock (%s) %s can not be sleepable", __func__, 511 class->lc_name, lock->lo_name); 512 if ((lock->lo_flags & LO_UPGRADABLE) != 0 && 513 (class->lc_flags & LC_UPGRADABLE) == 0) 514 panic("%s: lock (%s) %s can not be upgradable", __func__, 515 class->lc_name, lock->lo_name); 516 517 mtx_lock(&all_mtx); 518 TAILQ_INSERT_TAIL(&all_locks, lock, lo_list); 519 lock->lo_flags |= LO_INITIALIZED; 520 lock_cur_cnt++; 521 if (lock_cur_cnt > lock_max_cnt) 522 lock_max_cnt = lock_cur_cnt; 523 mtx_unlock(&all_mtx); 524 if (!witness_cold && witness_watch != 0 && panicstr == NULL && 525 (lock->lo_flags & LO_WITNESS) != 0) 526 lock->lo_witness = enroll(lock->lo_type, class); 527 else 528 lock->lo_witness = NULL; 529 } 530 531 void 532 witness_destroy(struct lock_object *lock) 533 { 534 struct witness *w; 535 536 if (witness_cold) 537 panic("lock (%s) %s destroyed while witness_cold", 538 lock->lo_class->lc_name, lock->lo_name); 539 if ((lock->lo_flags & LO_INITIALIZED) == 0) 540 panic("%s: lock (%s) %s is not initialized", __func__, 541 lock->lo_class->lc_name, lock->lo_name); 542 543 /* XXX: need to verify that no one holds the lock */ 544 w = lock->lo_witness; 545 if (w != NULL) { 546 mtx_lock_spin(&w_mtx); 547 MPASS(w->w_refcount > 0); 548 w->w_refcount--; 549 550 /* 551 * Lock is already released if we have an allocation failure 552 * and depart() fails. 553 */ 554 if (w->w_refcount != 0 || depart(w)) 555 mtx_unlock_spin(&w_mtx); 556 } 557 558 mtx_lock(&all_mtx); 559 lock_cur_cnt--; 560 TAILQ_REMOVE(&all_locks, lock, lo_list); 561 lock->lo_flags &= ~LO_INITIALIZED; 562 mtx_unlock(&all_mtx); 563 } 564 565 #ifdef DDB 566 static void 567 witness_display_list(void(*prnt)(const char *fmt, ...), 568 struct witness_list *list) 569 { 570 struct witness *w; 571 572 STAILQ_FOREACH(w, list, w_typelist) { 573 if (w->w_file == NULL || w->w_level > 0) 574 continue; 575 /* 576 * This lock has no anscestors, display its descendants. 577 */ 578 witness_displaydescendants(prnt, w, 0); 579 } 580 } 581 582 static void 583 witness_display(void(*prnt)(const char *fmt, ...)) 584 { 585 struct witness *w; 586 587 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 588 witness_levelall(); 589 590 /* Clear all the displayed flags. */ 591 STAILQ_FOREACH(w, &w_all, w_list) { 592 w->w_displayed = 0; 593 } 594 595 /* 596 * First, handle sleep locks which have been acquired at least 597 * once. 598 */ 599 prnt("Sleep locks:\n"); 600 witness_display_list(prnt, &w_sleep); 601 602 /* 603 * Now do spin locks which have been acquired at least once. 604 */ 605 prnt("\nSpin locks:\n"); 606 witness_display_list(prnt, &w_spin); 607 608 /* 609 * Finally, any locks which have not been acquired yet. 610 */ 611 prnt("\nLocks which were never acquired:\n"); 612 STAILQ_FOREACH(w, &w_all, w_list) { 613 if (w->w_file != NULL || w->w_refcount == 0) 614 continue; 615 prnt("%s\n", w->w_name); 616 } 617 } 618 #endif /* DDB */ 619 620 /* Trim useless garbage from filenames. */ 621 static const char * 622 fixup_filename(const char *file) 623 { 624 625 if (file == NULL) 626 return (NULL); 627 while (strncmp(file, "../", 3) == 0) 628 file += 3; 629 return (file); 630 } 631 632 int 633 witness_defineorder(struct lock_object *lock1, struct lock_object *lock2) 634 { 635 636 if (witness_watch == 0 || panicstr != NULL) 637 return (0); 638 639 /* Require locks that witness knows about. */ 640 if (lock1 == NULL || lock1->lo_witness == NULL || lock2 == NULL || 641 lock2->lo_witness == NULL) 642 return (EINVAL); 643 644 MPASS(!mtx_owned(&w_mtx)); 645 mtx_lock_spin(&w_mtx); 646 647 /* 648 * If we already have either an explicit or implied lock order that 649 * is the other way around, then return an error. 650 */ 651 if (isitmydescendant(lock2->lo_witness, lock1->lo_witness)) { 652 mtx_unlock_spin(&w_mtx); 653 return (EDOOFUS); 654 } 655 656 /* Try to add the new order. */ 657 CTR3(KTR_WITNESS, "%s: adding %s as a child of %s", __func__, 658 lock2->lo_type, lock1->lo_type); 659 if (!itismychild(lock1->lo_witness, lock2->lo_witness)) 660 return (ENOMEM); 661 mtx_unlock_spin(&w_mtx); 662 return (0); 663 } 664 665 void 666 witness_checkorder(struct lock_object *lock, int flags, const char *file, 667 int line) 668 { 669 struct lock_list_entry **lock_list, *lle; 670 struct lock_instance *lock1, *lock2; 671 struct lock_class *class; 672 struct witness *w, *w1; 673 struct thread *td; 674 int i, j; 675 676 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL || 677 panicstr != NULL) 678 return; 679 680 /* 681 * Try locks do not block if they fail to acquire the lock, thus 682 * there is no danger of deadlocks or of switching while holding a 683 * spin lock if we acquire a lock via a try operation. This 684 * function shouldn't even be called for try locks, so panic if 685 * that happens. 686 */ 687 if (flags & LOP_TRYLOCK) 688 panic("%s should not be called for try lock operations", 689 __func__); 690 691 w = lock->lo_witness; 692 class = lock->lo_class; 693 td = curthread; 694 file = fixup_filename(file); 695 696 if (class->lc_flags & LC_SLEEPLOCK) { 697 /* 698 * Since spin locks include a critical section, this check 699 * implicitly enforces a lock order of all sleep locks before 700 * all spin locks. 701 */ 702 if (td->td_critnest != 0) 703 panic("blockable sleep lock (%s) %s @ %s:%d", 704 class->lc_name, lock->lo_name, file, line); 705 706 /* 707 * If this is the first lock acquired then just return as 708 * no order checking is needed. 709 */ 710 if (td->td_sleeplocks == NULL) 711 return; 712 lock_list = &td->td_sleeplocks; 713 } else { 714 /* 715 * If this is the first lock, just return as no order 716 * checking is needed. We check this in both if clauses 717 * here as unifying the check would require us to use a 718 * critical section to ensure we don't migrate while doing 719 * the check. Note that if this is not the first lock, we 720 * are already in a critical section and are safe for the 721 * rest of the check. 722 */ 723 if (PCPU_GET(spinlocks) == NULL) 724 return; 725 lock_list = PCPU_PTR(spinlocks); 726 } 727 728 /* 729 * Check to see if we are recursing on a lock we already own. If 730 * so, make sure that we don't mismatch exclusive and shared lock 731 * acquires. 732 */ 733 lock1 = find_instance(*lock_list, lock); 734 if (lock1 != NULL) { 735 if ((lock1->li_flags & LI_EXCLUSIVE) != 0 && 736 (flags & LOP_EXCLUSIVE) == 0) { 737 printf("shared lock of (%s) %s @ %s:%d\n", 738 class->lc_name, lock->lo_name, file, line); 739 printf("while exclusively locked from %s:%d\n", 740 lock1->li_file, lock1->li_line); 741 panic("share->excl"); 742 } 743 if ((lock1->li_flags & LI_EXCLUSIVE) == 0 && 744 (flags & LOP_EXCLUSIVE) != 0) { 745 printf("exclusive lock of (%s) %s @ %s:%d\n", 746 class->lc_name, lock->lo_name, file, line); 747 printf("while share locked from %s:%d\n", 748 lock1->li_file, lock1->li_line); 749 panic("excl->share"); 750 } 751 return; 752 } 753 754 /* 755 * Try locks do not block if they fail to acquire the lock, thus 756 * there is no danger of deadlocks or of switching while holding a 757 * spin lock if we acquire a lock via a try operation. 758 */ 759 if (flags & LOP_TRYLOCK) 760 return; 761 762 /* 763 * Check for duplicate locks of the same type. Note that we only 764 * have to check for this on the last lock we just acquired. Any 765 * other cases will be caught as lock order violations. 766 */ 767 lock1 = &(*lock_list)->ll_children[(*lock_list)->ll_count - 1]; 768 w1 = lock1->li_lock->lo_witness; 769 if (w1 == w) { 770 if (w->w_same_squawked || (lock->lo_flags & LO_DUPOK)) 771 return; 772 w->w_same_squawked = 1; 773 printf("acquiring duplicate lock of same type: \"%s\"\n", 774 lock->lo_type); 775 printf(" 1st %s @ %s:%d\n", lock1->li_lock->lo_name, 776 lock1->li_file, lock1->li_line); 777 printf(" 2nd %s @ %s:%d\n", lock->lo_name, file, line); 778 #ifdef KDB 779 goto debugger; 780 #else 781 return; 782 #endif 783 } 784 MPASS(!mtx_owned(&w_mtx)); 785 mtx_lock_spin(&w_mtx); 786 /* 787 * If we have a known higher number just say ok 788 */ 789 if (witness_watch > 1 && w->w_level > w1->w_level) { 790 mtx_unlock_spin(&w_mtx); 791 return; 792 } 793 /* 794 * If we know that the the lock we are acquiring comes after 795 * the lock we most recently acquired in the lock order tree, 796 * then there is no need for any further checks. 797 */ 798 if (isitmydescendant(w1, w)) { 799 mtx_unlock_spin(&w_mtx); 800 return; 801 } 802 for (j = 0, lle = *lock_list; lle != NULL; lle = lle->ll_next) { 803 for (i = lle->ll_count - 1; i >= 0; i--, j++) { 804 805 MPASS(j < WITNESS_COUNT); 806 lock1 = &lle->ll_children[i]; 807 w1 = lock1->li_lock->lo_witness; 808 809 /* 810 * If this lock doesn't undergo witness checking, 811 * then skip it. 812 */ 813 if (w1 == NULL) { 814 KASSERT((lock1->li_lock->lo_flags & LO_WITNESS) == 0, 815 ("lock missing witness structure")); 816 continue; 817 } 818 /* 819 * If we are locking Giant and this is a sleepable 820 * lock, then skip it. 821 */ 822 if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0 && 823 lock == &Giant.mtx_object) 824 continue; 825 /* 826 * If we are locking a sleepable lock and this lock 827 * is Giant, then skip it. 828 */ 829 if ((lock->lo_flags & LO_SLEEPABLE) != 0 && 830 lock1->li_lock == &Giant.mtx_object) 831 continue; 832 /* 833 * If we are locking a sleepable lock and this lock 834 * isn't sleepable, we want to treat it as a lock 835 * order violation to enfore a general lock order of 836 * sleepable locks before non-sleepable locks. 837 */ 838 if (!((lock->lo_flags & LO_SLEEPABLE) != 0 && 839 (lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0)) 840 /* 841 * Check the lock order hierarchy for a reveresal. 842 */ 843 if (!isitmydescendant(w, w1)) 844 continue; 845 /* 846 * We have a lock order violation, check to see if it 847 * is allowed or has already been yelled about. 848 */ 849 mtx_unlock_spin(&w_mtx); 850 #ifdef BLESSING 851 /* 852 * If the lock order is blessed, just bail. We don't 853 * look for other lock order violations though, which 854 * may be a bug. 855 */ 856 if (blessed(w, w1)) 857 return; 858 #endif 859 if (lock1->li_lock == &Giant.mtx_object) { 860 if (w1->w_Giant_squawked) 861 return; 862 else 863 w1->w_Giant_squawked = 1; 864 } else { 865 if (w1->w_other_squawked) 866 return; 867 else 868 w1->w_other_squawked = 1; 869 } 870 /* 871 * Ok, yell about it. 872 */ 873 printf("lock order reversal\n"); 874 /* 875 * Try to locate an earlier lock with 876 * witness w in our list. 877 */ 878 do { 879 lock2 = &lle->ll_children[i]; 880 MPASS(lock2->li_lock != NULL); 881 if (lock2->li_lock->lo_witness == w) 882 break; 883 if (i == 0 && lle->ll_next != NULL) { 884 lle = lle->ll_next; 885 i = lle->ll_count - 1; 886 MPASS(i >= 0 && i < LOCK_NCHILDREN); 887 } else 888 i--; 889 } while (i >= 0); 890 if (i < 0) { 891 printf(" 1st %p %s (%s) @ %s:%d\n", 892 lock1->li_lock, lock1->li_lock->lo_name, 893 lock1->li_lock->lo_type, lock1->li_file, 894 lock1->li_line); 895 printf(" 2nd %p %s (%s) @ %s:%d\n", lock, 896 lock->lo_name, lock->lo_type, file, line); 897 } else { 898 printf(" 1st %p %s (%s) @ %s:%d\n", 899 lock2->li_lock, lock2->li_lock->lo_name, 900 lock2->li_lock->lo_type, lock2->li_file, 901 lock2->li_line); 902 printf(" 2nd %p %s (%s) @ %s:%d\n", 903 lock1->li_lock, lock1->li_lock->lo_name, 904 lock1->li_lock->lo_type, lock1->li_file, 905 lock1->li_line); 906 printf(" 3rd %p %s (%s) @ %s:%d\n", lock, 907 lock->lo_name, lock->lo_type, file, line); 908 } 909 #ifdef KDB 910 goto debugger; 911 #else 912 return; 913 #endif 914 } 915 } 916 lock1 = &(*lock_list)->ll_children[(*lock_list)->ll_count - 1]; 917 /* 918 * If requested, build a new lock order. However, don't build a new 919 * relationship between a sleepable lock and Giant if it is in the 920 * wrong direction. The correct lock order is that sleepable locks 921 * always come before Giant. 922 */ 923 if (flags & LOP_NEWORDER && 924 !(lock1->li_lock == &Giant.mtx_object && 925 (lock->lo_flags & LO_SLEEPABLE) != 0)) { 926 CTR3(KTR_WITNESS, "%s: adding %s as a child of %s", __func__, 927 lock->lo_type, lock1->li_lock->lo_type); 928 if (!itismychild(lock1->li_lock->lo_witness, w)) 929 /* Witness is dead. */ 930 return; 931 } 932 mtx_unlock_spin(&w_mtx); 933 return; 934 935 #ifdef KDB 936 debugger: 937 if (witness_trace) 938 kdb_backtrace(); 939 if (witness_kdb) 940 kdb_enter(__func__); 941 #endif 942 } 943 944 void 945 witness_lock(struct lock_object *lock, int flags, const char *file, int line) 946 { 947 struct lock_list_entry **lock_list, *lle; 948 struct lock_instance *instance; 949 struct witness *w; 950 struct thread *td; 951 952 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL || 953 panicstr != NULL) 954 return; 955 w = lock->lo_witness; 956 td = curthread; 957 file = fixup_filename(file); 958 959 /* Determine lock list for this lock. */ 960 if (lock->lo_class->lc_flags & LC_SLEEPLOCK) 961 lock_list = &td->td_sleeplocks; 962 else 963 lock_list = PCPU_PTR(spinlocks); 964 965 /* Check to see if we are recursing on a lock we already own. */ 966 instance = find_instance(*lock_list, lock); 967 if (instance != NULL) { 968 instance->li_flags++; 969 CTR4(KTR_WITNESS, "%s: pid %d recursed on %s r=%d", __func__, 970 td->td_proc->p_pid, lock->lo_name, 971 instance->li_flags & LI_RECURSEMASK); 972 instance->li_file = file; 973 instance->li_line = line; 974 return; 975 } 976 977 /* Update per-witness last file and line acquire. */ 978 w->w_file = file; 979 w->w_line = line; 980 981 /* Find the next open lock instance in the list and fill it. */ 982 lle = *lock_list; 983 if (lle == NULL || lle->ll_count == LOCK_NCHILDREN) { 984 lle = witness_lock_list_get(); 985 if (lle == NULL) 986 return; 987 lle->ll_next = *lock_list; 988 CTR3(KTR_WITNESS, "%s: pid %d added lle %p", __func__, 989 td->td_proc->p_pid, lle); 990 *lock_list = lle; 991 } 992 instance = &lle->ll_children[lle->ll_count++]; 993 instance->li_lock = lock; 994 instance->li_line = line; 995 instance->li_file = file; 996 if ((flags & LOP_EXCLUSIVE) != 0) 997 instance->li_flags = LI_EXCLUSIVE; 998 else 999 instance->li_flags = 0; 1000 CTR4(KTR_WITNESS, "%s: pid %d added %s as lle[%d]", __func__, 1001 td->td_proc->p_pid, lock->lo_name, lle->ll_count - 1); 1002 } 1003 1004 void 1005 witness_upgrade(struct lock_object *lock, int flags, const char *file, int line) 1006 { 1007 struct lock_instance *instance; 1008 struct lock_class *class; 1009 1010 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1011 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1012 return; 1013 class = lock->lo_class; 1014 file = fixup_filename(file); 1015 if ((lock->lo_flags & LO_UPGRADABLE) == 0) 1016 panic("upgrade of non-upgradable lock (%s) %s @ %s:%d", 1017 class->lc_name, lock->lo_name, file, line); 1018 if ((flags & LOP_TRYLOCK) == 0) 1019 panic("non-try upgrade of lock (%s) %s @ %s:%d", class->lc_name, 1020 lock->lo_name, file, line); 1021 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0) 1022 panic("upgrade of non-sleep lock (%s) %s @ %s:%d", 1023 class->lc_name, lock->lo_name, file, line); 1024 instance = find_instance(curthread->td_sleeplocks, lock); 1025 if (instance == NULL) 1026 panic("upgrade of unlocked lock (%s) %s @ %s:%d", 1027 class->lc_name, lock->lo_name, file, line); 1028 if ((instance->li_flags & LI_EXCLUSIVE) != 0) 1029 panic("upgrade of exclusive lock (%s) %s @ %s:%d", 1030 class->lc_name, lock->lo_name, file, line); 1031 if ((instance->li_flags & LI_RECURSEMASK) != 0) 1032 panic("upgrade of recursed lock (%s) %s r=%d @ %s:%d", 1033 class->lc_name, lock->lo_name, 1034 instance->li_flags & LI_RECURSEMASK, file, line); 1035 instance->li_flags |= LI_EXCLUSIVE; 1036 } 1037 1038 void 1039 witness_downgrade(struct lock_object *lock, int flags, const char *file, 1040 int line) 1041 { 1042 struct lock_instance *instance; 1043 struct lock_class *class; 1044 1045 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1046 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1047 return; 1048 class = lock->lo_class; 1049 file = fixup_filename(file); 1050 if ((lock->lo_flags & LO_UPGRADABLE) == 0) 1051 panic("downgrade of non-upgradable lock (%s) %s @ %s:%d", 1052 class->lc_name, lock->lo_name, file, line); 1053 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0) 1054 panic("downgrade of non-sleep lock (%s) %s @ %s:%d", 1055 class->lc_name, lock->lo_name, file, line); 1056 instance = find_instance(curthread->td_sleeplocks, lock); 1057 if (instance == NULL) 1058 panic("downgrade of unlocked lock (%s) %s @ %s:%d", 1059 class->lc_name, lock->lo_name, file, line); 1060 if ((instance->li_flags & LI_EXCLUSIVE) == 0) 1061 panic("downgrade of shared lock (%s) %s @ %s:%d", 1062 class->lc_name, lock->lo_name, file, line); 1063 if ((instance->li_flags & LI_RECURSEMASK) != 0) 1064 panic("downgrade of recursed lock (%s) %s r=%d @ %s:%d", 1065 class->lc_name, lock->lo_name, 1066 instance->li_flags & LI_RECURSEMASK, file, line); 1067 instance->li_flags &= ~LI_EXCLUSIVE; 1068 } 1069 1070 void 1071 witness_unlock(struct lock_object *lock, int flags, const char *file, int line) 1072 { 1073 struct lock_list_entry **lock_list, *lle; 1074 struct lock_instance *instance; 1075 struct lock_class *class; 1076 struct thread *td; 1077 register_t s; 1078 int i, j; 1079 1080 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL || 1081 panicstr != NULL) 1082 return; 1083 td = curthread; 1084 class = lock->lo_class; 1085 file = fixup_filename(file); 1086 1087 /* Find lock instance associated with this lock. */ 1088 if (class->lc_flags & LC_SLEEPLOCK) 1089 lock_list = &td->td_sleeplocks; 1090 else 1091 lock_list = PCPU_PTR(spinlocks); 1092 for (; *lock_list != NULL; lock_list = &(*lock_list)->ll_next) 1093 for (i = 0; i < (*lock_list)->ll_count; i++) { 1094 instance = &(*lock_list)->ll_children[i]; 1095 if (instance->li_lock == lock) 1096 goto found; 1097 } 1098 panic("lock (%s) %s not locked @ %s:%d", class->lc_name, lock->lo_name, 1099 file, line); 1100 found: 1101 1102 /* First, check for shared/exclusive mismatches. */ 1103 if ((instance->li_flags & LI_EXCLUSIVE) != 0 && 1104 (flags & LOP_EXCLUSIVE) == 0) { 1105 printf("shared unlock of (%s) %s @ %s:%d\n", class->lc_name, 1106 lock->lo_name, file, line); 1107 printf("while exclusively locked from %s:%d\n", 1108 instance->li_file, instance->li_line); 1109 panic("excl->ushare"); 1110 } 1111 if ((instance->li_flags & LI_EXCLUSIVE) == 0 && 1112 (flags & LOP_EXCLUSIVE) != 0) { 1113 printf("exclusive unlock of (%s) %s @ %s:%d\n", class->lc_name, 1114 lock->lo_name, file, line); 1115 printf("while share locked from %s:%d\n", instance->li_file, 1116 instance->li_line); 1117 panic("share->uexcl"); 1118 } 1119 1120 /* If we are recursed, unrecurse. */ 1121 if ((instance->li_flags & LI_RECURSEMASK) > 0) { 1122 CTR4(KTR_WITNESS, "%s: pid %d unrecursed on %s r=%d", __func__, 1123 td->td_proc->p_pid, instance->li_lock->lo_name, 1124 instance->li_flags); 1125 instance->li_flags--; 1126 return; 1127 } 1128 1129 /* Otherwise, remove this item from the list. */ 1130 s = intr_disable(); 1131 CTR4(KTR_WITNESS, "%s: pid %d removed %s from lle[%d]", __func__, 1132 td->td_proc->p_pid, instance->li_lock->lo_name, 1133 (*lock_list)->ll_count - 1); 1134 for (j = i; j < (*lock_list)->ll_count - 1; j++) 1135 (*lock_list)->ll_children[j] = 1136 (*lock_list)->ll_children[j + 1]; 1137 (*lock_list)->ll_count--; 1138 intr_restore(s); 1139 1140 /* If this lock list entry is now empty, free it. */ 1141 if ((*lock_list)->ll_count == 0) { 1142 lle = *lock_list; 1143 *lock_list = lle->ll_next; 1144 CTR3(KTR_WITNESS, "%s: pid %d removed lle %p", __func__, 1145 td->td_proc->p_pid, lle); 1146 witness_lock_list_free(lle); 1147 } 1148 } 1149 1150 /* 1151 * Warn if any locks other than 'lock' are held. Flags can be passed in to 1152 * exempt Giant and sleepable locks from the checks as well. If any 1153 * non-exempt locks are held, then a supplied message is printed to the 1154 * console along with a list of the offending locks. If indicated in the 1155 * flags then a failure results in a panic as well. 1156 */ 1157 int 1158 witness_warn(int flags, struct lock_object *lock, const char *fmt, ...) 1159 { 1160 struct lock_list_entry *lle; 1161 struct lock_instance *lock1; 1162 struct thread *td; 1163 va_list ap; 1164 int i, n; 1165 1166 if (witness_cold || witness_watch == 0 || panicstr != NULL) 1167 return (0); 1168 n = 0; 1169 td = curthread; 1170 for (lle = td->td_sleeplocks; lle != NULL; lle = lle->ll_next) 1171 for (i = lle->ll_count - 1; i >= 0; i--) { 1172 lock1 = &lle->ll_children[i]; 1173 if (lock1->li_lock == lock) 1174 continue; 1175 if (flags & WARN_GIANTOK && 1176 lock1->li_lock == &Giant.mtx_object) 1177 continue; 1178 if (flags & WARN_SLEEPOK && 1179 (lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0) 1180 continue; 1181 if (n == 0) { 1182 va_start(ap, fmt); 1183 vprintf(fmt, ap); 1184 va_end(ap); 1185 printf(" with the following"); 1186 if (flags & WARN_SLEEPOK) 1187 printf(" non-sleepable"); 1188 printf(" locks held:\n"); 1189 } 1190 n++; 1191 witness_list_lock(lock1); 1192 } 1193 if (PCPU_GET(spinlocks) != NULL) { 1194 /* 1195 * Since we already hold a spinlock preemption is 1196 * already blocked. 1197 */ 1198 if (n == 0) { 1199 va_start(ap, fmt); 1200 vprintf(fmt, ap); 1201 va_end(ap); 1202 printf(" with the following"); 1203 if (flags & WARN_SLEEPOK) 1204 printf(" non-sleepable"); 1205 printf(" locks held:\n"); 1206 } 1207 n += witness_list_locks(PCPU_PTR(spinlocks)); 1208 } 1209 if (flags & WARN_PANIC && n) 1210 panic("witness_warn"); 1211 #ifdef KDB 1212 else if (witness_kdb && n) 1213 kdb_enter(__func__); 1214 else if (witness_trace && n) 1215 kdb_backtrace(); 1216 #endif 1217 return (n); 1218 } 1219 1220 const char * 1221 witness_file(struct lock_object *lock) 1222 { 1223 struct witness *w; 1224 1225 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL) 1226 return ("?"); 1227 w = lock->lo_witness; 1228 return (w->w_file); 1229 } 1230 1231 int 1232 witness_line(struct lock_object *lock) 1233 { 1234 struct witness *w; 1235 1236 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL) 1237 return (0); 1238 w = lock->lo_witness; 1239 return (w->w_line); 1240 } 1241 1242 static struct witness * 1243 enroll(const char *description, struct lock_class *lock_class) 1244 { 1245 struct witness *w; 1246 1247 if (witness_watch == 0 || panicstr != NULL) 1248 return (NULL); 1249 if ((lock_class->lc_flags & LC_SPINLOCK) && witness_skipspin) 1250 return (NULL); 1251 mtx_lock_spin(&w_mtx); 1252 STAILQ_FOREACH(w, &w_all, w_list) { 1253 if (w->w_name == description || (w->w_refcount > 0 && 1254 strcmp(description, w->w_name) == 0)) { 1255 w->w_refcount++; 1256 mtx_unlock_spin(&w_mtx); 1257 if (lock_class != w->w_class) 1258 panic( 1259 "lock (%s) %s does not match earlier (%s) lock", 1260 description, lock_class->lc_name, 1261 w->w_class->lc_name); 1262 return (w); 1263 } 1264 } 1265 /* 1266 * This isn't quite right, as witness_cold is still 0 while we 1267 * enroll all the locks initialized before witness_initialize(). 1268 */ 1269 if ((lock_class->lc_flags & LC_SPINLOCK) && !witness_cold) { 1270 mtx_unlock_spin(&w_mtx); 1271 panic("spin lock %s not in order list", description); 1272 } 1273 if ((w = witness_get()) == NULL) 1274 return (NULL); 1275 w->w_name = description; 1276 w->w_class = lock_class; 1277 w->w_refcount = 1; 1278 STAILQ_INSERT_HEAD(&w_all, w, w_list); 1279 if (lock_class->lc_flags & LC_SPINLOCK) 1280 STAILQ_INSERT_HEAD(&w_spin, w, w_typelist); 1281 else if (lock_class->lc_flags & LC_SLEEPLOCK) 1282 STAILQ_INSERT_HEAD(&w_sleep, w, w_typelist); 1283 else { 1284 mtx_unlock_spin(&w_mtx); 1285 panic("lock class %s is not sleep or spin", 1286 lock_class->lc_name); 1287 } 1288 mtx_unlock_spin(&w_mtx); 1289 return (w); 1290 } 1291 1292 /* Don't let the door bang you on the way out... */ 1293 static int 1294 depart(struct witness *w) 1295 { 1296 struct witness_child_list_entry *wcl, *nwcl; 1297 struct witness_list *list; 1298 struct witness *parent; 1299 1300 MPASS(w->w_refcount == 0); 1301 if (w->w_class->lc_flags & LC_SLEEPLOCK) 1302 list = &w_sleep; 1303 else 1304 list = &w_spin; 1305 /* 1306 * First, we run through the entire tree looking for any 1307 * witnesses that the outgoing witness is a child of. For 1308 * each parent that we find, we reparent all the direct 1309 * children of the outgoing witness to its parent. 1310 */ 1311 STAILQ_FOREACH(parent, list, w_typelist) { 1312 if (!isitmychild(parent, w)) 1313 continue; 1314 removechild(parent, w); 1315 if (!reparentchildren(parent, w)) 1316 return (0); 1317 } 1318 1319 /* 1320 * Now we go through and free up the child list of the 1321 * outgoing witness. 1322 */ 1323 for (wcl = w->w_children; wcl != NULL; wcl = nwcl) { 1324 nwcl = wcl->wcl_next; 1325 witness_child_free(wcl); 1326 } 1327 1328 /* 1329 * Detach from various lists and free. 1330 */ 1331 STAILQ_REMOVE(list, w, witness, w_typelist); 1332 STAILQ_REMOVE(&w_all, w, witness, w_list); 1333 witness_free(w); 1334 1335 /* Finally, fixup the tree. */ 1336 return (rebalancetree(list)); 1337 } 1338 1339 /* 1340 * Prune an entire lock order tree. We look for cases where a lock 1341 * is now both a descendant and a direct child of a given lock. In 1342 * that case, we want to remove the direct child link from the tree. 1343 * 1344 * Returns false if insertchild() fails. 1345 */ 1346 static int 1347 rebalancetree(struct witness_list *list) 1348 { 1349 struct witness *child, *parent; 1350 1351 STAILQ_FOREACH(child, list, w_typelist) { 1352 STAILQ_FOREACH(parent, list, w_typelist) { 1353 if (!isitmychild(parent, child)) 1354 continue; 1355 removechild(parent, child); 1356 if (isitmydescendant(parent, child)) 1357 continue; 1358 if (!insertchild(parent, child)) 1359 return (0); 1360 } 1361 } 1362 witness_levelall(); 1363 return (1); 1364 } 1365 1366 /* 1367 * Add "child" as a direct child of "parent". Returns false if 1368 * we fail due to out of memory. 1369 */ 1370 static int 1371 insertchild(struct witness *parent, struct witness *child) 1372 { 1373 struct witness_child_list_entry **wcl; 1374 1375 MPASS(child != NULL && parent != NULL); 1376 1377 /* 1378 * Insert "child" after "parent" 1379 */ 1380 wcl = &parent->w_children; 1381 while (*wcl != NULL && (*wcl)->wcl_count == WITNESS_NCHILDREN) 1382 wcl = &(*wcl)->wcl_next; 1383 if (*wcl == NULL) { 1384 *wcl = witness_child_get(); 1385 if (*wcl == NULL) 1386 return (0); 1387 } 1388 (*wcl)->wcl_children[(*wcl)->wcl_count++] = child; 1389 1390 return (1); 1391 } 1392 1393 /* 1394 * Make all the direct descendants of oldparent be direct descendants 1395 * of newparent. 1396 */ 1397 static int 1398 reparentchildren(struct witness *newparent, struct witness *oldparent) 1399 { 1400 struct witness_child_list_entry *wcl; 1401 int i; 1402 1403 /* Avoid making a witness a child of itself. */ 1404 MPASS(!isitmychild(oldparent, newparent)); 1405 1406 for (wcl = oldparent->w_children; wcl != NULL; wcl = wcl->wcl_next) 1407 for (i = 0; i < wcl->wcl_count; i++) 1408 if (!insertchild(newparent, wcl->wcl_children[i])) 1409 return (0); 1410 return (1); 1411 } 1412 1413 static int 1414 itismychild(struct witness *parent, struct witness *child) 1415 { 1416 struct witness_list *list; 1417 1418 MPASS(child != NULL && parent != NULL); 1419 if ((parent->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)) != 1420 (child->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK))) 1421 panic( 1422 "%s: parent (%s) and child (%s) are not the same lock type", 1423 __func__, parent->w_class->lc_name, 1424 child->w_class->lc_name); 1425 1426 if (!insertchild(parent, child)) 1427 return (0); 1428 1429 if (parent->w_class->lc_flags & LC_SLEEPLOCK) 1430 list = &w_sleep; 1431 else 1432 list = &w_spin; 1433 return (rebalancetree(list)); 1434 } 1435 1436 static void 1437 removechild(struct witness *parent, struct witness *child) 1438 { 1439 struct witness_child_list_entry **wcl, *wcl1; 1440 int i; 1441 1442 for (wcl = &parent->w_children; *wcl != NULL; wcl = &(*wcl)->wcl_next) 1443 for (i = 0; i < (*wcl)->wcl_count; i++) 1444 if ((*wcl)->wcl_children[i] == child) 1445 goto found; 1446 return; 1447 found: 1448 (*wcl)->wcl_count--; 1449 if ((*wcl)->wcl_count > i) 1450 (*wcl)->wcl_children[i] = 1451 (*wcl)->wcl_children[(*wcl)->wcl_count]; 1452 MPASS((*wcl)->wcl_children[i] != NULL); 1453 if ((*wcl)->wcl_count != 0) 1454 return; 1455 wcl1 = *wcl; 1456 *wcl = wcl1->wcl_next; 1457 witness_child_free(wcl1); 1458 } 1459 1460 static int 1461 isitmychild(struct witness *parent, struct witness *child) 1462 { 1463 struct witness_child_list_entry *wcl; 1464 int i; 1465 1466 for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) { 1467 for (i = 0; i < wcl->wcl_count; i++) { 1468 if (wcl->wcl_children[i] == child) 1469 return (1); 1470 } 1471 } 1472 return (0); 1473 } 1474 1475 static int 1476 isitmydescendant(struct witness *parent, struct witness *child) 1477 { 1478 struct witness_child_list_entry *wcl; 1479 int i, j; 1480 1481 if (isitmychild(parent, child)) 1482 return (1); 1483 j = 0; 1484 for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) { 1485 MPASS(j < 1000); 1486 for (i = 0; i < wcl->wcl_count; i++) { 1487 if (isitmydescendant(wcl->wcl_children[i], child)) 1488 return (1); 1489 } 1490 j++; 1491 } 1492 return (0); 1493 } 1494 1495 static void 1496 witness_levelall (void) 1497 { 1498 struct witness_list *list; 1499 struct witness *w, *w1; 1500 1501 /* 1502 * First clear all levels. 1503 */ 1504 STAILQ_FOREACH(w, &w_all, w_list) { 1505 w->w_level = 0; 1506 } 1507 1508 /* 1509 * Look for locks with no parent and level all their descendants. 1510 */ 1511 STAILQ_FOREACH(w, &w_all, w_list) { 1512 /* 1513 * This is just an optimization, technically we could get 1514 * away just walking the all list each time. 1515 */ 1516 if (w->w_class->lc_flags & LC_SLEEPLOCK) 1517 list = &w_sleep; 1518 else 1519 list = &w_spin; 1520 STAILQ_FOREACH(w1, list, w_typelist) { 1521 if (isitmychild(w1, w)) 1522 goto skip; 1523 } 1524 witness_leveldescendents(w, 0); 1525 skip: 1526 ; /* silence GCC 3.x */ 1527 } 1528 } 1529 1530 static void 1531 witness_leveldescendents(struct witness *parent, int level) 1532 { 1533 struct witness_child_list_entry *wcl; 1534 int i; 1535 1536 if (parent->w_level < level) 1537 parent->w_level = level; 1538 level++; 1539 for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) 1540 for (i = 0; i < wcl->wcl_count; i++) 1541 witness_leveldescendents(wcl->wcl_children[i], level); 1542 } 1543 1544 static void 1545 witness_displaydescendants(void(*prnt)(const char *fmt, ...), 1546 struct witness *parent, int indent) 1547 { 1548 struct witness_child_list_entry *wcl; 1549 int i, level; 1550 1551 level = parent->w_level; 1552 prnt("%-2d", level); 1553 for (i = 0; i < indent; i++) 1554 prnt(" "); 1555 if (parent->w_refcount > 0) 1556 prnt("%s", parent->w_name); 1557 else 1558 prnt("(dead)"); 1559 if (parent->w_displayed) { 1560 prnt(" -- (already displayed)\n"); 1561 return; 1562 } 1563 parent->w_displayed = 1; 1564 if (parent->w_refcount > 0) { 1565 if (parent->w_file != NULL) 1566 prnt(" -- last acquired @ %s:%d", parent->w_file, 1567 parent->w_line); 1568 } 1569 prnt("\n"); 1570 for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) 1571 for (i = 0; i < wcl->wcl_count; i++) 1572 witness_displaydescendants(prnt, 1573 wcl->wcl_children[i], indent + 1); 1574 } 1575 1576 #ifdef BLESSING 1577 static int 1578 blessed(struct witness *w1, struct witness *w2) 1579 { 1580 int i; 1581 struct witness_blessed *b; 1582 1583 for (i = 0; i < blessed_count; i++) { 1584 b = &blessed_list[i]; 1585 if (strcmp(w1->w_name, b->b_lock1) == 0) { 1586 if (strcmp(w2->w_name, b->b_lock2) == 0) 1587 return (1); 1588 continue; 1589 } 1590 if (strcmp(w1->w_name, b->b_lock2) == 0) 1591 if (strcmp(w2->w_name, b->b_lock1) == 0) 1592 return (1); 1593 } 1594 return (0); 1595 } 1596 #endif 1597 1598 static struct witness * 1599 witness_get(void) 1600 { 1601 struct witness *w; 1602 1603 if (witness_watch == 0) { 1604 mtx_unlock_spin(&w_mtx); 1605 return (NULL); 1606 } 1607 if (STAILQ_EMPTY(&w_free)) { 1608 witness_watch = 0; 1609 mtx_unlock_spin(&w_mtx); 1610 printf("%s: witness exhausted\n", __func__); 1611 return (NULL); 1612 } 1613 w = STAILQ_FIRST(&w_free); 1614 STAILQ_REMOVE_HEAD(&w_free, w_list); 1615 bzero(w, sizeof(*w)); 1616 return (w); 1617 } 1618 1619 static void 1620 witness_free(struct witness *w) 1621 { 1622 1623 STAILQ_INSERT_HEAD(&w_free, w, w_list); 1624 } 1625 1626 static struct witness_child_list_entry * 1627 witness_child_get(void) 1628 { 1629 struct witness_child_list_entry *wcl; 1630 1631 if (witness_watch == 0) { 1632 mtx_unlock_spin(&w_mtx); 1633 return (NULL); 1634 } 1635 wcl = w_child_free; 1636 if (wcl == NULL) { 1637 witness_watch = 0; 1638 mtx_unlock_spin(&w_mtx); 1639 printf("%s: witness exhausted\n", __func__); 1640 return (NULL); 1641 } 1642 w_child_free = wcl->wcl_next; 1643 bzero(wcl, sizeof(*wcl)); 1644 return (wcl); 1645 } 1646 1647 static void 1648 witness_child_free(struct witness_child_list_entry *wcl) 1649 { 1650 1651 wcl->wcl_next = w_child_free; 1652 w_child_free = wcl; 1653 } 1654 1655 static struct lock_list_entry * 1656 witness_lock_list_get(void) 1657 { 1658 struct lock_list_entry *lle; 1659 1660 if (witness_watch == 0) 1661 return (NULL); 1662 mtx_lock_spin(&w_mtx); 1663 lle = w_lock_list_free; 1664 if (lle == NULL) { 1665 witness_watch = 0; 1666 mtx_unlock_spin(&w_mtx); 1667 printf("%s: witness exhausted\n", __func__); 1668 return (NULL); 1669 } 1670 w_lock_list_free = lle->ll_next; 1671 mtx_unlock_spin(&w_mtx); 1672 bzero(lle, sizeof(*lle)); 1673 return (lle); 1674 } 1675 1676 static void 1677 witness_lock_list_free(struct lock_list_entry *lle) 1678 { 1679 1680 mtx_lock_spin(&w_mtx); 1681 lle->ll_next = w_lock_list_free; 1682 w_lock_list_free = lle; 1683 mtx_unlock_spin(&w_mtx); 1684 } 1685 1686 static struct lock_instance * 1687 find_instance(struct lock_list_entry *lock_list, struct lock_object *lock) 1688 { 1689 struct lock_list_entry *lle; 1690 struct lock_instance *instance; 1691 int i; 1692 1693 for (lle = lock_list; lle != NULL; lle = lle->ll_next) 1694 for (i = lle->ll_count - 1; i >= 0; i--) { 1695 instance = &lle->ll_children[i]; 1696 if (instance->li_lock == lock) 1697 return (instance); 1698 } 1699 return (NULL); 1700 } 1701 1702 static void 1703 witness_list_lock(struct lock_instance *instance) 1704 { 1705 struct lock_object *lock; 1706 1707 lock = instance->li_lock; 1708 printf("%s %s %s", (instance->li_flags & LI_EXCLUSIVE) != 0 ? 1709 "exclusive" : "shared", lock->lo_class->lc_name, lock->lo_name); 1710 if (lock->lo_type != lock->lo_name) 1711 printf(" (%s)", lock->lo_type); 1712 printf(" r = %d (%p) locked @ %s:%d\n", 1713 instance->li_flags & LI_RECURSEMASK, lock, instance->li_file, 1714 instance->li_line); 1715 } 1716 1717 int 1718 witness_list_locks(struct lock_list_entry **lock_list) 1719 { 1720 struct lock_list_entry *lle; 1721 int i, nheld; 1722 1723 nheld = 0; 1724 for (lle = *lock_list; lle != NULL; lle = lle->ll_next) 1725 for (i = lle->ll_count - 1; i >= 0; i--) { 1726 witness_list_lock(&lle->ll_children[i]); 1727 nheld++; 1728 } 1729 return (nheld); 1730 } 1731 1732 /* 1733 * This is a bit risky at best. We call this function when we have timed 1734 * out acquiring a spin lock, and we assume that the other CPU is stuck 1735 * with this lock held. So, we go groveling around in the other CPU's 1736 * per-cpu data to try to find the lock instance for this spin lock to 1737 * see when it was last acquired. 1738 */ 1739 void 1740 witness_display_spinlock(struct lock_object *lock, struct thread *owner) 1741 { 1742 struct lock_instance *instance; 1743 struct pcpu *pc; 1744 1745 if (owner->td_critnest == 0 || owner->td_oncpu == NOCPU) 1746 return; 1747 pc = pcpu_find(owner->td_oncpu); 1748 instance = find_instance(pc->pc_spinlocks, lock); 1749 if (instance != NULL) 1750 witness_list_lock(instance); 1751 } 1752 1753 void 1754 witness_save(struct lock_object *lock, const char **filep, int *linep) 1755 { 1756 struct lock_instance *instance; 1757 1758 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1759 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1760 return; 1761 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0) 1762 panic("%s: lock (%s) %s is not a sleep lock", __func__, 1763 lock->lo_class->lc_name, lock->lo_name); 1764 instance = find_instance(curthread->td_sleeplocks, lock); 1765 if (instance == NULL) 1766 panic("%s: lock (%s) %s not locked", __func__, 1767 lock->lo_class->lc_name, lock->lo_name); 1768 *filep = instance->li_file; 1769 *linep = instance->li_line; 1770 } 1771 1772 void 1773 witness_restore(struct lock_object *lock, const char *file, int line) 1774 { 1775 struct lock_instance *instance; 1776 1777 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1778 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1779 return; 1780 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0) 1781 panic("%s: lock (%s) %s is not a sleep lock", __func__, 1782 lock->lo_class->lc_name, lock->lo_name); 1783 instance = find_instance(curthread->td_sleeplocks, lock); 1784 if (instance == NULL) 1785 panic("%s: lock (%s) %s not locked", __func__, 1786 lock->lo_class->lc_name, lock->lo_name); 1787 lock->lo_witness->w_file = file; 1788 lock->lo_witness->w_line = line; 1789 instance->li_file = file; 1790 instance->li_line = line; 1791 } 1792 1793 void 1794 witness_assert(struct lock_object *lock, int flags, const char *file, int line) 1795 { 1796 #ifdef INVARIANT_SUPPORT 1797 struct lock_instance *instance; 1798 1799 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1800 return; 1801 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) != 0) 1802 instance = find_instance(curthread->td_sleeplocks, lock); 1803 else if ((lock->lo_class->lc_flags & LC_SPINLOCK) != 0) 1804 instance = find_instance(PCPU_GET(spinlocks), lock); 1805 else { 1806 panic("Lock (%s) %s is not sleep or spin!", 1807 lock->lo_class->lc_name, lock->lo_name); 1808 } 1809 file = fixup_filename(file); 1810 switch (flags) { 1811 case LA_UNLOCKED: 1812 if (instance != NULL) 1813 panic("Lock (%s) %s locked @ %s:%d.", 1814 lock->lo_class->lc_name, lock->lo_name, file, line); 1815 break; 1816 case LA_LOCKED: 1817 case LA_LOCKED | LA_RECURSED: 1818 case LA_LOCKED | LA_NOTRECURSED: 1819 case LA_SLOCKED: 1820 case LA_SLOCKED | LA_RECURSED: 1821 case LA_SLOCKED | LA_NOTRECURSED: 1822 case LA_XLOCKED: 1823 case LA_XLOCKED | LA_RECURSED: 1824 case LA_XLOCKED | LA_NOTRECURSED: 1825 if (instance == NULL) { 1826 panic("Lock (%s) %s not locked @ %s:%d.", 1827 lock->lo_class->lc_name, lock->lo_name, file, line); 1828 break; 1829 } 1830 if ((flags & LA_XLOCKED) != 0 && 1831 (instance->li_flags & LI_EXCLUSIVE) == 0) 1832 panic("Lock (%s) %s not exclusively locked @ %s:%d.", 1833 lock->lo_class->lc_name, lock->lo_name, file, line); 1834 if ((flags & LA_SLOCKED) != 0 && 1835 (instance->li_flags & LI_EXCLUSIVE) != 0) 1836 panic("Lock (%s) %s exclusively locked @ %s:%d.", 1837 lock->lo_class->lc_name, lock->lo_name, file, line); 1838 if ((flags & LA_RECURSED) != 0 && 1839 (instance->li_flags & LI_RECURSEMASK) == 0) 1840 panic("Lock (%s) %s not recursed @ %s:%d.", 1841 lock->lo_class->lc_name, lock->lo_name, file, line); 1842 if ((flags & LA_NOTRECURSED) != 0 && 1843 (instance->li_flags & LI_RECURSEMASK) != 0) 1844 panic("Lock (%s) %s recursed @ %s:%d.", 1845 lock->lo_class->lc_name, lock->lo_name, file, line); 1846 break; 1847 default: 1848 panic("Invalid lock assertion at %s:%d.", file, line); 1849 1850 } 1851 #endif /* INVARIANT_SUPPORT */ 1852 } 1853 1854 #ifdef DDB 1855 static void 1856 witness_list(struct thread *td) 1857 { 1858 1859 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1860 KASSERT(kdb_active, ("%s: not in the debugger", __func__)); 1861 1862 if (witness_watch == 0) 1863 return; 1864 1865 witness_list_locks(&td->td_sleeplocks); 1866 1867 /* 1868 * We only handle spinlocks if td == curthread. This is somewhat broken 1869 * if td is currently executing on some other CPU and holds spin locks 1870 * as we won't display those locks. If we had a MI way of getting 1871 * the per-cpu data for a given cpu then we could use 1872 * td->td_oncpu to get the list of spinlocks for this thread 1873 * and "fix" this. 1874 * 1875 * That still wouldn't really fix this unless we locked sched_lock 1876 * or stopped the other CPU to make sure it wasn't changing the list 1877 * out from under us. It is probably best to just not try to handle 1878 * threads on other CPU's for now. 1879 */ 1880 if (td == curthread && PCPU_GET(spinlocks) != NULL) 1881 witness_list_locks(PCPU_PTR(spinlocks)); 1882 } 1883 1884 DB_SHOW_COMMAND(locks, db_witness_list) 1885 { 1886 struct thread *td; 1887 pid_t pid; 1888 struct proc *p; 1889 1890 if (have_addr) { 1891 pid = (addr % 16) + ((addr >> 4) % 16) * 10 + 1892 ((addr >> 8) % 16) * 100 + ((addr >> 12) % 16) * 1000 + 1893 ((addr >> 16) % 16) * 10000; 1894 /* sx_slock(&allproc_lock); */ 1895 FOREACH_PROC_IN_SYSTEM(p) { 1896 if (p->p_pid == pid) 1897 break; 1898 } 1899 /* sx_sunlock(&allproc_lock); */ 1900 if (p == NULL) { 1901 db_printf("pid %d not found\n", pid); 1902 return; 1903 } 1904 FOREACH_THREAD_IN_PROC(p, td) { 1905 witness_list(td); 1906 } 1907 } else { 1908 td = curthread; 1909 witness_list(td); 1910 } 1911 } 1912 1913 DB_SHOW_COMMAND(witness, db_witness_display) 1914 { 1915 1916 witness_display(db_printf); 1917 } 1918 #endif 1919