1 /*- 2 * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 3. Berkeley Software Design Inc's name may not be used to endorse or 13 * promote products derived from this software without specific prior 14 * written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $ 29 * and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $ 30 */ 31 32 /* 33 * Implementation of the `witness' lock verifier. Originally implemented for 34 * mutexes in BSD/OS. Extended to handle generic lock objects and lock 35 * classes in FreeBSD. 36 */ 37 38 /* 39 * Main Entry: witness 40 * Pronunciation: 'wit-n&s 41 * Function: noun 42 * Etymology: Middle English witnesse, from Old English witnes knowledge, 43 * testimony, witness, from 2wit 44 * Date: before 12th century 45 * 1 : attestation of a fact or event : TESTIMONY 46 * 2 : one that gives evidence; specifically : one who testifies in 47 * a cause or before a judicial tribunal 48 * 3 : one asked to be present at a transaction so as to be able to 49 * testify to its having taken place 50 * 4 : one who has personal knowledge of something 51 * 5 a : something serving as evidence or proof : SIGN 52 * b : public affirmation by word or example of usually 53 * religious faith or conviction <the heroic witness to divine 54 * life -- Pilot> 55 * 6 capitalized : a member of the Jehovah's Witnesses 56 */ 57 58 /* 59 * Special rules concerning Giant and lock orders: 60 * 61 * 1) Giant must be acquired before any other mutexes. Stated another way, 62 * no other mutex may be held when Giant is acquired. 63 * 64 * 2) Giant must be released when blocking on a sleepable lock. 65 * 66 * This rule is less obvious, but is a result of Giant providing the same 67 * semantics as spl(). Basically, when a thread sleeps, it must release 68 * Giant. When a thread blocks on a sleepable lock, it sleeps. Hence rule 69 * 2). 70 * 71 * 3) Giant may be acquired before or after sleepable locks. 72 * 73 * This rule is also not quite as obvious. Giant may be acquired after 74 * a sleepable lock because it is a non-sleepable lock and non-sleepable 75 * locks may always be acquired while holding a sleepable lock. The second 76 * case, Giant before a sleepable lock, follows from rule 2) above. Suppose 77 * you have two threads T1 and T2 and a sleepable lock X. Suppose that T1 78 * acquires X and blocks on Giant. Then suppose that T2 acquires Giant and 79 * blocks on X. When T2 blocks on X, T2 will release Giant allowing T1 to 80 * execute. Thus, acquiring Giant both before and after a sleepable lock 81 * will not result in a lock order reversal. 82 */ 83 84 #include <sys/cdefs.h> 85 __FBSDID("$FreeBSD$"); 86 87 #include "opt_ddb.h" 88 #include "opt_witness.h" 89 90 #include <sys/param.h> 91 #include <sys/bus.h> 92 #include <sys/kernel.h> 93 #include <sys/ktr.h> 94 #include <sys/lock.h> 95 #include <sys/malloc.h> 96 #include <sys/mutex.h> 97 #include <sys/proc.h> 98 #include <sys/sysctl.h> 99 #include <sys/systm.h> 100 101 #include <ddb/ddb.h> 102 103 #include <machine/stdarg.h> 104 105 /* Define this to check for blessed mutexes */ 106 #undef BLESSING 107 108 #define WITNESS_COUNT 200 109 #define WITNESS_CHILDCOUNT (WITNESS_COUNT * 4) 110 /* 111 * XXX: This is somewhat bogus, as we assume here that at most 1024 threads 112 * will hold LOCK_NCHILDREN * 2 locks. We handle failure ok, and we should 113 * probably be safe for the most part, but it's still a SWAG. 114 */ 115 #define LOCK_CHILDCOUNT (MAXCPU + 1024) * 2 116 117 #define WITNESS_NCHILDREN 6 118 119 struct witness_child_list_entry; 120 121 struct witness { 122 const char *w_name; 123 struct lock_class *w_class; 124 STAILQ_ENTRY(witness) w_list; /* List of all witnesses. */ 125 STAILQ_ENTRY(witness) w_typelist; /* Witnesses of a type. */ 126 struct witness_child_list_entry *w_children; /* Great evilness... */ 127 const char *w_file; 128 int w_line; 129 u_int w_level; 130 u_int w_refcount; 131 u_char w_Giant_squawked:1; 132 u_char w_other_squawked:1; 133 u_char w_same_squawked:1; 134 u_char w_displayed:1; 135 }; 136 137 struct witness_child_list_entry { 138 struct witness_child_list_entry *wcl_next; 139 struct witness *wcl_children[WITNESS_NCHILDREN]; 140 u_int wcl_count; 141 }; 142 143 STAILQ_HEAD(witness_list, witness); 144 145 #ifdef BLESSING 146 struct witness_blessed { 147 const char *b_lock1; 148 const char *b_lock2; 149 }; 150 #endif 151 152 struct witness_order_list_entry { 153 const char *w_name; 154 struct lock_class *w_class; 155 }; 156 157 #ifdef BLESSING 158 static int blessed(struct witness *, struct witness *); 159 #endif 160 static int depart(struct witness *w); 161 static struct witness *enroll(const char *description, 162 struct lock_class *lock_class); 163 static int insertchild(struct witness *parent, struct witness *child); 164 static int isitmychild(struct witness *parent, struct witness *child); 165 static int isitmydescendant(struct witness *parent, struct witness *child); 166 static int itismychild(struct witness *parent, struct witness *child); 167 static int rebalancetree(struct witness_list *list); 168 static void removechild(struct witness *parent, struct witness *child); 169 static int reparentchildren(struct witness *newparent, 170 struct witness *oldparent); 171 static int sysctl_debug_witness_watch(SYSCTL_HANDLER_ARGS); 172 static void witness_displaydescendants(void(*)(const char *fmt, ...), 173 struct witness *, int indent); 174 static const char *fixup_filename(const char *file); 175 static void witness_leveldescendents(struct witness *parent, int level); 176 static void witness_levelall(void); 177 static struct witness *witness_get(void); 178 static void witness_free(struct witness *m); 179 static struct witness_child_list_entry *witness_child_get(void); 180 static void witness_child_free(struct witness_child_list_entry *wcl); 181 static struct lock_list_entry *witness_lock_list_get(void); 182 static void witness_lock_list_free(struct lock_list_entry *lle); 183 static struct lock_instance *find_instance(struct lock_list_entry *lock_list, 184 struct lock_object *lock); 185 static void witness_list_lock(struct lock_instance *instance); 186 #ifdef DDB 187 static void witness_list(struct thread *td); 188 static void witness_display_list(void(*prnt)(const char *fmt, ...), 189 struct witness_list *list); 190 static void witness_display(void(*)(const char *fmt, ...)); 191 #endif 192 193 MALLOC_DEFINE(M_WITNESS, "witness", "witness structure"); 194 195 /* 196 * If set to 0, witness is disabled. If set to 1, witness performs full lock 197 * order checking for all locks. If set to 2 or higher, then witness skips 198 * the full lock order check if the lock being acquired is at a higher level 199 * (i.e. farther down in the tree) than the current lock. This last mode is 200 * somewhat experimental and not considered fully safe. At runtime, this 201 * value may be set to 0 to turn off witness. witness is not allowed be 202 * turned on once it is turned off, however. 203 */ 204 static int witness_watch = 1; 205 TUNABLE_INT("debug.witness_watch", &witness_watch); 206 SYSCTL_PROC(_debug, OID_AUTO, witness_watch, CTLFLAG_RW | CTLTYPE_INT, NULL, 0, 207 sysctl_debug_witness_watch, "I", "witness is watching lock operations"); 208 209 #ifdef DDB 210 /* 211 * When DDB is enabled and witness_ddb is set to 1, it will cause the system to 212 * drop into kdebug() when: 213 * - a lock heirarchy violation occurs 214 * - locks are held when going to sleep. 215 */ 216 #ifdef WITNESS_DDB 217 int witness_ddb = 1; 218 #else 219 int witness_ddb = 0; 220 #endif 221 TUNABLE_INT("debug.witness_ddb", &witness_ddb); 222 SYSCTL_INT(_debug, OID_AUTO, witness_ddb, CTLFLAG_RW, &witness_ddb, 0, ""); 223 224 /* 225 * When DDB is enabled and witness_trace is set to 1, it will cause the system 226 * to print a stack trace: 227 * - a lock heirarchy violation occurs 228 * - locks are held when going to sleep. 229 */ 230 int witness_trace = 1; 231 TUNABLE_INT("debug.witness_trace", &witness_trace); 232 SYSCTL_INT(_debug, OID_AUTO, witness_trace, CTLFLAG_RW, &witness_trace, 0, ""); 233 #endif /* DDB */ 234 235 #ifdef WITNESS_SKIPSPIN 236 int witness_skipspin = 1; 237 #else 238 int witness_skipspin = 0; 239 #endif 240 TUNABLE_INT("debug.witness_skipspin", &witness_skipspin); 241 SYSCTL_INT(_debug, OID_AUTO, witness_skipspin, CTLFLAG_RDTUN, &witness_skipspin, 0, 242 ""); 243 244 static struct mtx w_mtx; 245 static struct witness_list w_free = STAILQ_HEAD_INITIALIZER(w_free); 246 static struct witness_list w_all = STAILQ_HEAD_INITIALIZER(w_all); 247 static struct witness_list w_spin = STAILQ_HEAD_INITIALIZER(w_spin); 248 static struct witness_list w_sleep = STAILQ_HEAD_INITIALIZER(w_sleep); 249 static struct witness_child_list_entry *w_child_free = NULL; 250 static struct lock_list_entry *w_lock_list_free = NULL; 251 252 static struct witness w_data[WITNESS_COUNT]; 253 static struct witness_child_list_entry w_childdata[WITNESS_CHILDCOUNT]; 254 static struct lock_list_entry w_locklistdata[LOCK_CHILDCOUNT]; 255 256 static struct witness_order_list_entry order_lists[] = { 257 { "proctree", &lock_class_sx }, 258 { "allproc", &lock_class_sx }, 259 { "Giant", &lock_class_mtx_sleep }, 260 { "filedesc structure", &lock_class_mtx_sleep }, 261 { "pipe mutex", &lock_class_mtx_sleep }, 262 { "sigio lock", &lock_class_mtx_sleep }, 263 { "process group", &lock_class_mtx_sleep }, 264 { "process lock", &lock_class_mtx_sleep }, 265 { "session", &lock_class_mtx_sleep }, 266 { "uidinfo hash", &lock_class_mtx_sleep }, 267 { "uidinfo struct", &lock_class_mtx_sleep }, 268 { "allprison", &lock_class_mtx_sleep }, 269 { NULL, NULL }, 270 /* 271 * Sockets 272 */ 273 { "filedesc structure", &lock_class_mtx_sleep }, 274 { "accept", &lock_class_mtx_sleep }, 275 { "so_snd", &lock_class_mtx_sleep }, 276 { "so_rcv", &lock_class_mtx_sleep }, 277 { "sellck", &lock_class_mtx_sleep }, 278 { NULL, NULL }, 279 /* 280 * Routing 281 */ 282 { "so_rcv", &lock_class_mtx_sleep }, 283 { "radix node head", &lock_class_mtx_sleep }, 284 { "rtentry", &lock_class_mtx_sleep }, 285 { "ifaddr", &lock_class_mtx_sleep }, 286 { NULL, NULL }, 287 /* 288 * UNIX Domain Sockets 289 */ 290 { "unp", &lock_class_mtx_sleep }, 291 { "so_snd", &lock_class_mtx_sleep }, 292 { NULL, NULL }, 293 /* 294 * UDP/IP 295 */ 296 { "udp", &lock_class_mtx_sleep }, 297 { "udpinp", &lock_class_mtx_sleep }, 298 { "so_snd", &lock_class_mtx_sleep }, 299 { NULL, NULL }, 300 /* 301 * TCP/IP 302 */ 303 { "tcp", &lock_class_mtx_sleep }, 304 { "tcpinp", &lock_class_mtx_sleep }, 305 { "so_snd", &lock_class_mtx_sleep }, 306 { NULL, NULL }, 307 /* 308 * SLIP 309 */ 310 { "slip_mtx", &lock_class_mtx_sleep }, 311 { "slip sc_mtx", &lock_class_mtx_sleep }, 312 { NULL, NULL }, 313 /* 314 * spin locks 315 */ 316 #ifdef SMP 317 { "ap boot", &lock_class_mtx_spin }, 318 #endif 319 { "sio", &lock_class_mtx_spin }, 320 #ifdef __i386__ 321 { "cy", &lock_class_mtx_spin }, 322 #endif 323 { "uart_hwmtx", &lock_class_mtx_spin }, 324 { "sabtty", &lock_class_mtx_spin }, 325 { "zstty", &lock_class_mtx_spin }, 326 { "ng_node", &lock_class_mtx_spin }, 327 { "ng_worklist", &lock_class_mtx_spin }, 328 { "taskqueue_fast", &lock_class_mtx_spin }, 329 { "intr table", &lock_class_mtx_spin }, 330 { "ithread table lock", &lock_class_mtx_spin }, 331 { "sleepq chain", &lock_class_mtx_spin }, 332 { "sched lock", &lock_class_mtx_spin }, 333 { "turnstile chain", &lock_class_mtx_spin }, 334 { "td_contested", &lock_class_mtx_spin }, 335 { "callout", &lock_class_mtx_spin }, 336 { "entropy harvest", &lock_class_mtx_spin }, 337 { "entropy harvest buffers", &lock_class_mtx_spin }, 338 /* 339 * leaf locks 340 */ 341 { "allpmaps", &lock_class_mtx_spin }, 342 { "vm page queue free mutex", &lock_class_mtx_spin }, 343 { "icu", &lock_class_mtx_spin }, 344 #ifdef SMP 345 { "smp rendezvous", &lock_class_mtx_spin }, 346 #if defined(__i386__) || defined(__amd64__) 347 { "tlb", &lock_class_mtx_spin }, 348 { "lazypmap", &lock_class_mtx_spin }, 349 #endif 350 #ifdef __sparc64__ 351 { "ipi", &lock_class_mtx_spin }, 352 #endif 353 #endif 354 { "clk", &lock_class_mtx_spin }, 355 { "mutex profiling lock", &lock_class_mtx_spin }, 356 { "kse zombie lock", &lock_class_mtx_spin }, 357 { "ALD Queue", &lock_class_mtx_spin }, 358 #ifdef __ia64__ 359 { "MCA spin lock", &lock_class_mtx_spin }, 360 #endif 361 #if defined(__i386__) || defined(__amd64__) 362 { "pcicfg", &lock_class_mtx_spin }, 363 #endif 364 { NULL, NULL }, 365 { NULL, NULL } 366 }; 367 368 #ifdef BLESSING 369 /* 370 * Pairs of locks which have been blessed 371 * Don't complain about order problems with blessed locks 372 */ 373 static struct witness_blessed blessed_list[] = { 374 }; 375 static int blessed_count = 376 sizeof(blessed_list) / sizeof(struct witness_blessed); 377 #endif 378 379 /* 380 * List of all locks in the system. 381 */ 382 TAILQ_HEAD(, lock_object) all_locks = TAILQ_HEAD_INITIALIZER(all_locks); 383 384 static struct mtx all_mtx = { 385 { &lock_class_mtx_sleep, /* mtx_object.lo_class */ 386 "All locks list", /* mtx_object.lo_name */ 387 "All locks list", /* mtx_object.lo_type */ 388 LO_INITIALIZED, /* mtx_object.lo_flags */ 389 { NULL, NULL }, /* mtx_object.lo_list */ 390 NULL }, /* mtx_object.lo_witness */ 391 MTX_UNOWNED, 0 /* mtx_lock, mtx_recurse */ 392 }; 393 394 /* 395 * This global is set to 0 once it becomes safe to use the witness code. 396 */ 397 static int witness_cold = 1; 398 399 /* 400 * Global variables for book keeping. 401 */ 402 static int lock_cur_cnt; 403 static int lock_max_cnt; 404 405 /* 406 * The WITNESS-enabled diagnostic code. 407 */ 408 static void 409 witness_initialize(void *dummy __unused) 410 { 411 struct lock_object *lock; 412 struct witness_order_list_entry *order; 413 struct witness *w, *w1; 414 int i; 415 416 /* 417 * We have to release Giant before initializing its witness 418 * structure so that WITNESS doesn't get confused. 419 */ 420 mtx_unlock(&Giant); 421 mtx_assert(&Giant, MA_NOTOWNED); 422 423 CTR1(KTR_WITNESS, "%s: initializing witness", __func__); 424 TAILQ_INSERT_HEAD(&all_locks, &all_mtx.mtx_object, lo_list); 425 mtx_init(&w_mtx, "witness lock", NULL, MTX_SPIN | MTX_QUIET | 426 MTX_NOWITNESS); 427 for (i = 0; i < WITNESS_COUNT; i++) 428 witness_free(&w_data[i]); 429 for (i = 0; i < WITNESS_CHILDCOUNT; i++) 430 witness_child_free(&w_childdata[i]); 431 for (i = 0; i < LOCK_CHILDCOUNT; i++) 432 witness_lock_list_free(&w_locklistdata[i]); 433 434 /* First add in all the specified order lists. */ 435 for (order = order_lists; order->w_name != NULL; order++) { 436 w = enroll(order->w_name, order->w_class); 437 if (w == NULL) 438 continue; 439 w->w_file = "order list"; 440 for (order++; order->w_name != NULL; order++) { 441 w1 = enroll(order->w_name, order->w_class); 442 if (w1 == NULL) 443 continue; 444 w1->w_file = "order list"; 445 if (!itismychild(w, w1)) 446 panic("Not enough memory for static orders!"); 447 w = w1; 448 } 449 } 450 451 /* Iterate through all locks and add them to witness. */ 452 mtx_lock(&all_mtx); 453 TAILQ_FOREACH(lock, &all_locks, lo_list) { 454 if (lock->lo_flags & LO_WITNESS) 455 lock->lo_witness = enroll(lock->lo_type, 456 lock->lo_class); 457 else 458 lock->lo_witness = NULL; 459 } 460 mtx_unlock(&all_mtx); 461 462 /* Mark the witness code as being ready for use. */ 463 atomic_store_rel_int(&witness_cold, 0); 464 465 mtx_lock(&Giant); 466 } 467 SYSINIT(witness_init, SI_SUB_WITNESS, SI_ORDER_FIRST, witness_initialize, NULL) 468 469 static int 470 sysctl_debug_witness_watch(SYSCTL_HANDLER_ARGS) 471 { 472 int error, value; 473 474 value = witness_watch; 475 error = sysctl_handle_int(oidp, &value, 0, req); 476 if (error != 0 || req->newptr == NULL) 477 return (error); 478 error = suser(req->td); 479 if (error != 0) 480 return (error); 481 if (value == witness_watch) 482 return (0); 483 if (value != 0) 484 return (EINVAL); 485 witness_watch = 0; 486 return (0); 487 } 488 489 void 490 witness_init(struct lock_object *lock) 491 { 492 struct lock_class *class; 493 494 class = lock->lo_class; 495 if (lock->lo_flags & LO_INITIALIZED) 496 panic("%s: lock (%s) %s is already initialized", __func__, 497 class->lc_name, lock->lo_name); 498 if ((lock->lo_flags & LO_RECURSABLE) != 0 && 499 (class->lc_flags & LC_RECURSABLE) == 0) 500 panic("%s: lock (%s) %s can not be recursable", __func__, 501 class->lc_name, lock->lo_name); 502 if ((lock->lo_flags & LO_SLEEPABLE) != 0 && 503 (class->lc_flags & LC_SLEEPABLE) == 0) 504 panic("%s: lock (%s) %s can not be sleepable", __func__, 505 class->lc_name, lock->lo_name); 506 if ((lock->lo_flags & LO_UPGRADABLE) != 0 && 507 (class->lc_flags & LC_UPGRADABLE) == 0) 508 panic("%s: lock (%s) %s can not be upgradable", __func__, 509 class->lc_name, lock->lo_name); 510 511 mtx_lock(&all_mtx); 512 TAILQ_INSERT_TAIL(&all_locks, lock, lo_list); 513 lock->lo_flags |= LO_INITIALIZED; 514 lock_cur_cnt++; 515 if (lock_cur_cnt > lock_max_cnt) 516 lock_max_cnt = lock_cur_cnt; 517 mtx_unlock(&all_mtx); 518 if (!witness_cold && witness_watch != 0 && panicstr == NULL && 519 (lock->lo_flags & LO_WITNESS) != 0) 520 lock->lo_witness = enroll(lock->lo_type, class); 521 else 522 lock->lo_witness = NULL; 523 } 524 525 void 526 witness_destroy(struct lock_object *lock) 527 { 528 struct witness *w; 529 530 if (witness_cold) 531 panic("lock (%s) %s destroyed while witness_cold", 532 lock->lo_class->lc_name, lock->lo_name); 533 if ((lock->lo_flags & LO_INITIALIZED) == 0) 534 panic("%s: lock (%s) %s is not initialized", __func__, 535 lock->lo_class->lc_name, lock->lo_name); 536 537 /* XXX: need to verify that no one holds the lock */ 538 w = lock->lo_witness; 539 if (w != NULL) { 540 mtx_lock_spin(&w_mtx); 541 MPASS(w->w_refcount > 0); 542 w->w_refcount--; 543 544 /* 545 * Lock is already released if we have an allocation failure 546 * and depart() fails. 547 */ 548 if (w->w_refcount != 0 || depart(w)) 549 mtx_unlock_spin(&w_mtx); 550 } 551 552 mtx_lock(&all_mtx); 553 lock_cur_cnt--; 554 TAILQ_REMOVE(&all_locks, lock, lo_list); 555 lock->lo_flags &= ~LO_INITIALIZED; 556 mtx_unlock(&all_mtx); 557 } 558 559 #ifdef DDB 560 static void 561 witness_display_list(void(*prnt)(const char *fmt, ...), 562 struct witness_list *list) 563 { 564 struct witness *w; 565 566 STAILQ_FOREACH(w, list, w_typelist) { 567 if (w->w_file == NULL || w->w_level > 0) 568 continue; 569 /* 570 * This lock has no anscestors, display its descendants. 571 */ 572 witness_displaydescendants(prnt, w, 0); 573 } 574 } 575 576 static void 577 witness_display(void(*prnt)(const char *fmt, ...)) 578 { 579 struct witness *w; 580 581 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 582 witness_levelall(); 583 584 /* Clear all the displayed flags. */ 585 STAILQ_FOREACH(w, &w_all, w_list) { 586 w->w_displayed = 0; 587 } 588 589 /* 590 * First, handle sleep locks which have been acquired at least 591 * once. 592 */ 593 prnt("Sleep locks:\n"); 594 witness_display_list(prnt, &w_sleep); 595 596 /* 597 * Now do spin locks which have been acquired at least once. 598 */ 599 prnt("\nSpin locks:\n"); 600 witness_display_list(prnt, &w_spin); 601 602 /* 603 * Finally, any locks which have not been acquired yet. 604 */ 605 prnt("\nLocks which were never acquired:\n"); 606 STAILQ_FOREACH(w, &w_all, w_list) { 607 if (w->w_file != NULL || w->w_refcount == 0) 608 continue; 609 prnt("%s\n", w->w_name); 610 } 611 } 612 #endif /* DDB */ 613 614 /* Trim useless garbage from filenames. */ 615 static const char * 616 fixup_filename(const char *file) 617 { 618 619 if (file == NULL) 620 return (NULL); 621 while (strncmp(file, "../", 3) == 0) 622 file += 3; 623 return (file); 624 } 625 626 int 627 witness_defineorder(struct lock_object *lock1, struct lock_object *lock2) 628 { 629 630 if (witness_watch == 0 || panicstr != NULL) 631 return (0); 632 633 /* Require locks that witness knows about. */ 634 if (lock1 == NULL || lock1->lo_witness == NULL || lock2 == NULL || 635 lock2->lo_witness == NULL) 636 return (EINVAL); 637 638 MPASS(!mtx_owned(&w_mtx)); 639 mtx_lock_spin(&w_mtx); 640 641 /* 642 * If we already have either an explicit or implied lock order that 643 * is the other way around, then return an error. 644 */ 645 if (isitmydescendant(lock2->lo_witness, lock1->lo_witness)) { 646 mtx_unlock_spin(&w_mtx); 647 return (EDOOFUS); 648 } 649 650 /* Try to add the new order. */ 651 CTR3(KTR_WITNESS, "%s: adding %s as a child of %s", __func__, 652 lock2->lo_type, lock1->lo_type); 653 if (!itismychild(lock1->lo_witness, lock2->lo_witness)) 654 return (ENOMEM); 655 mtx_unlock_spin(&w_mtx); 656 return (0); 657 } 658 659 void 660 witness_checkorder(struct lock_object *lock, int flags, const char *file, 661 int line) 662 { 663 struct lock_list_entry **lock_list, *lle; 664 struct lock_instance *lock1, *lock2; 665 struct lock_class *class; 666 struct witness *w, *w1; 667 struct thread *td; 668 int i, j; 669 670 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL || 671 panicstr != NULL) 672 return; 673 674 /* 675 * Try locks do not block if they fail to acquire the lock, thus 676 * there is no danger of deadlocks or of switching while holding a 677 * spin lock if we acquire a lock via a try operation. This 678 * function shouldn't even be called for try locks, so panic if 679 * that happens. 680 */ 681 if (flags & LOP_TRYLOCK) 682 panic("%s should not be called for try lock operations", 683 __func__); 684 685 w = lock->lo_witness; 686 class = lock->lo_class; 687 td = curthread; 688 file = fixup_filename(file); 689 690 if (class->lc_flags & LC_SLEEPLOCK) { 691 /* 692 * Since spin locks include a critical section, this check 693 * impliclty enforces a lock order of all sleep locks before 694 * all spin locks. 695 */ 696 if (td->td_critnest != 0) 697 panic("blockable sleep lock (%s) %s @ %s:%d", 698 class->lc_name, lock->lo_name, file, line); 699 lock_list = &td->td_sleeplocks; 700 } else 701 lock_list = PCPU_PTR(spinlocks); 702 703 /* 704 * Is this the first lock acquired? If so, then no order checking 705 * is needed. 706 */ 707 if (*lock_list == NULL) 708 return; 709 710 /* 711 * Check to see if we are recursing on a lock we already own. If 712 * so, make sure that we don't mismatch exclusive and shared lock 713 * acquires. 714 */ 715 lock1 = find_instance(*lock_list, lock); 716 if (lock1 != NULL) { 717 if ((lock1->li_flags & LI_EXCLUSIVE) != 0 && 718 (flags & LOP_EXCLUSIVE) == 0) { 719 printf("shared lock of (%s) %s @ %s:%d\n", 720 class->lc_name, lock->lo_name, file, line); 721 printf("while exclusively locked from %s:%d\n", 722 lock1->li_file, lock1->li_line); 723 panic("share->excl"); 724 } 725 if ((lock1->li_flags & LI_EXCLUSIVE) == 0 && 726 (flags & LOP_EXCLUSIVE) != 0) { 727 printf("exclusive lock of (%s) %s @ %s:%d\n", 728 class->lc_name, lock->lo_name, file, line); 729 printf("while share locked from %s:%d\n", 730 lock1->li_file, lock1->li_line); 731 panic("excl->share"); 732 } 733 return; 734 } 735 736 /* 737 * Try locks do not block if they fail to acquire the lock, thus 738 * there is no danger of deadlocks or of switching while holding a 739 * spin lock if we acquire a lock via a try operation. 740 */ 741 if (flags & LOP_TRYLOCK) 742 return; 743 744 /* 745 * Check for duplicate locks of the same type. Note that we only 746 * have to check for this on the last lock we just acquired. Any 747 * other cases will be caught as lock order violations. 748 */ 749 lock1 = &(*lock_list)->ll_children[(*lock_list)->ll_count - 1]; 750 w1 = lock1->li_lock->lo_witness; 751 if (w1 == w) { 752 if (w->w_same_squawked || (lock->lo_flags & LO_DUPOK)) 753 return; 754 w->w_same_squawked = 1; 755 printf("acquiring duplicate lock of same type: \"%s\"\n", 756 lock->lo_type); 757 printf(" 1st %s @ %s:%d\n", lock1->li_lock->lo_name, 758 lock1->li_file, lock1->li_line); 759 printf(" 2nd %s @ %s:%d\n", lock->lo_name, file, line); 760 #ifdef DDB 761 goto debugger; 762 #else 763 return; 764 #endif 765 } 766 MPASS(!mtx_owned(&w_mtx)); 767 mtx_lock_spin(&w_mtx); 768 /* 769 * If we have a known higher number just say ok 770 */ 771 if (witness_watch > 1 && w->w_level > w1->w_level) { 772 mtx_unlock_spin(&w_mtx); 773 return; 774 } 775 /* 776 * If we know that the the lock we are acquiring comes after 777 * the lock we most recently acquired in the lock order tree, 778 * then there is no need for any further checks. 779 */ 780 if (isitmydescendant(w1, w)) { 781 mtx_unlock_spin(&w_mtx); 782 return; 783 } 784 for (j = 0, lle = *lock_list; lle != NULL; lle = lle->ll_next) { 785 for (i = lle->ll_count - 1; i >= 0; i--, j++) { 786 787 MPASS(j < WITNESS_COUNT); 788 lock1 = &lle->ll_children[i]; 789 w1 = lock1->li_lock->lo_witness; 790 791 /* 792 * If this lock doesn't undergo witness checking, 793 * then skip it. 794 */ 795 if (w1 == NULL) { 796 KASSERT((lock1->li_lock->lo_flags & LO_WITNESS) == 0, 797 ("lock missing witness structure")); 798 continue; 799 } 800 /* 801 * If we are locking Giant and this is a sleepable 802 * lock, then skip it. 803 */ 804 if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0 && 805 lock == &Giant.mtx_object) 806 continue; 807 /* 808 * If we are locking a sleepable lock and this lock 809 * is Giant, then skip it. 810 */ 811 if ((lock->lo_flags & LO_SLEEPABLE) != 0 && 812 lock1->li_lock == &Giant.mtx_object) 813 continue; 814 /* 815 * If we are locking a sleepable lock and this lock 816 * isn't sleepable, we want to treat it as a lock 817 * order violation to enfore a general lock order of 818 * sleepable locks before non-sleepable locks. 819 */ 820 if (!((lock->lo_flags & LO_SLEEPABLE) != 0 && 821 (lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0)) 822 /* 823 * Check the lock order hierarchy for a reveresal. 824 */ 825 if (!isitmydescendant(w, w1)) 826 continue; 827 /* 828 * We have a lock order violation, check to see if it 829 * is allowed or has already been yelled about. 830 */ 831 mtx_unlock_spin(&w_mtx); 832 #ifdef BLESSING 833 /* 834 * If the lock order is blessed, just bail. We don't 835 * look for other lock order violations though, which 836 * may be a bug. 837 */ 838 if (blessed(w, w1)) 839 return; 840 #endif 841 if (lock1->li_lock == &Giant.mtx_object) { 842 if (w1->w_Giant_squawked) 843 return; 844 else 845 w1->w_Giant_squawked = 1; 846 } else { 847 if (w1->w_other_squawked) 848 return; 849 else 850 w1->w_other_squawked = 1; 851 } 852 /* 853 * Ok, yell about it. 854 */ 855 printf("lock order reversal\n"); 856 /* 857 * Try to locate an earlier lock with 858 * witness w in our list. 859 */ 860 do { 861 lock2 = &lle->ll_children[i]; 862 MPASS(lock2->li_lock != NULL); 863 if (lock2->li_lock->lo_witness == w) 864 break; 865 if (i == 0 && lle->ll_next != NULL) { 866 lle = lle->ll_next; 867 i = lle->ll_count - 1; 868 MPASS(i >= 0 && i < LOCK_NCHILDREN); 869 } else 870 i--; 871 } while (i >= 0); 872 if (i < 0) { 873 printf(" 1st %p %s (%s) @ %s:%d\n", 874 lock1->li_lock, lock1->li_lock->lo_name, 875 lock1->li_lock->lo_type, lock1->li_file, 876 lock1->li_line); 877 printf(" 2nd %p %s (%s) @ %s:%d\n", lock, 878 lock->lo_name, lock->lo_type, file, line); 879 } else { 880 printf(" 1st %p %s (%s) @ %s:%d\n", 881 lock2->li_lock, lock2->li_lock->lo_name, 882 lock2->li_lock->lo_type, lock2->li_file, 883 lock2->li_line); 884 printf(" 2nd %p %s (%s) @ %s:%d\n", 885 lock1->li_lock, lock1->li_lock->lo_name, 886 lock1->li_lock->lo_type, lock1->li_file, 887 lock1->li_line); 888 printf(" 3rd %p %s (%s) @ %s:%d\n", lock, 889 lock->lo_name, lock->lo_type, file, line); 890 } 891 #ifdef DDB 892 goto debugger; 893 #else 894 return; 895 #endif 896 } 897 } 898 lock1 = &(*lock_list)->ll_children[(*lock_list)->ll_count - 1]; 899 /* 900 * If requested, build a new lock order. However, don't build a new 901 * relationship between a sleepable lock and Giant if it is in the 902 * wrong direction. The correct lock order is that sleepable locks 903 * always come before Giant. 904 */ 905 if (flags & LOP_NEWORDER && 906 !(lock1->li_lock == &Giant.mtx_object && 907 (lock->lo_flags & LO_SLEEPABLE) != 0)) { 908 CTR3(KTR_WITNESS, "%s: adding %s as a child of %s", __func__, 909 lock->lo_type, lock1->li_lock->lo_type); 910 if (!itismychild(lock1->li_lock->lo_witness, w)) 911 /* Witness is dead. */ 912 return; 913 } 914 mtx_unlock_spin(&w_mtx); 915 return; 916 917 #ifdef DDB 918 debugger: 919 if (witness_trace) 920 backtrace(); 921 if (witness_ddb) 922 Debugger(__func__); 923 #endif 924 } 925 926 void 927 witness_lock(struct lock_object *lock, int flags, const char *file, int line) 928 { 929 struct lock_list_entry **lock_list, *lle; 930 struct lock_instance *instance; 931 struct witness *w; 932 struct thread *td; 933 934 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL || 935 panicstr != NULL) 936 return; 937 w = lock->lo_witness; 938 td = curthread; 939 file = fixup_filename(file); 940 941 /* Determine lock list for this lock. */ 942 if (lock->lo_class->lc_flags & LC_SLEEPLOCK) 943 lock_list = &td->td_sleeplocks; 944 else 945 lock_list = PCPU_PTR(spinlocks); 946 947 /* Check to see if we are recursing on a lock we already own. */ 948 instance = find_instance(*lock_list, lock); 949 if (instance != NULL) { 950 instance->li_flags++; 951 CTR4(KTR_WITNESS, "%s: pid %d recursed on %s r=%d", __func__, 952 td->td_proc->p_pid, lock->lo_name, 953 instance->li_flags & LI_RECURSEMASK); 954 instance->li_file = file; 955 instance->li_line = line; 956 return; 957 } 958 959 /* Update per-witness last file and line acquire. */ 960 w->w_file = file; 961 w->w_line = line; 962 963 /* Find the next open lock instance in the list and fill it. */ 964 lle = *lock_list; 965 if (lle == NULL || lle->ll_count == LOCK_NCHILDREN) { 966 lle = witness_lock_list_get(); 967 if (lle == NULL) 968 return; 969 lle->ll_next = *lock_list; 970 CTR3(KTR_WITNESS, "%s: pid %d added lle %p", __func__, 971 td->td_proc->p_pid, lle); 972 *lock_list = lle; 973 } 974 instance = &lle->ll_children[lle->ll_count++]; 975 instance->li_lock = lock; 976 instance->li_line = line; 977 instance->li_file = file; 978 if ((flags & LOP_EXCLUSIVE) != 0) 979 instance->li_flags = LI_EXCLUSIVE; 980 else 981 instance->li_flags = 0; 982 CTR4(KTR_WITNESS, "%s: pid %d added %s as lle[%d]", __func__, 983 td->td_proc->p_pid, lock->lo_name, lle->ll_count - 1); 984 } 985 986 void 987 witness_upgrade(struct lock_object *lock, int flags, const char *file, int line) 988 { 989 struct lock_instance *instance; 990 struct lock_class *class; 991 992 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 993 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 994 return; 995 class = lock->lo_class; 996 file = fixup_filename(file); 997 if ((lock->lo_flags & LO_UPGRADABLE) == 0) 998 panic("upgrade of non-upgradable lock (%s) %s @ %s:%d", 999 class->lc_name, lock->lo_name, file, line); 1000 if ((flags & LOP_TRYLOCK) == 0) 1001 panic("non-try upgrade of lock (%s) %s @ %s:%d", class->lc_name, 1002 lock->lo_name, file, line); 1003 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0) 1004 panic("upgrade of non-sleep lock (%s) %s @ %s:%d", 1005 class->lc_name, lock->lo_name, file, line); 1006 instance = find_instance(curthread->td_sleeplocks, lock); 1007 if (instance == NULL) 1008 panic("upgrade of unlocked lock (%s) %s @ %s:%d", 1009 class->lc_name, lock->lo_name, file, line); 1010 if ((instance->li_flags & LI_EXCLUSIVE) != 0) 1011 panic("upgrade of exclusive lock (%s) %s @ %s:%d", 1012 class->lc_name, lock->lo_name, file, line); 1013 if ((instance->li_flags & LI_RECURSEMASK) != 0) 1014 panic("upgrade of recursed lock (%s) %s r=%d @ %s:%d", 1015 class->lc_name, lock->lo_name, 1016 instance->li_flags & LI_RECURSEMASK, file, line); 1017 instance->li_flags |= LI_EXCLUSIVE; 1018 } 1019 1020 void 1021 witness_downgrade(struct lock_object *lock, int flags, const char *file, 1022 int line) 1023 { 1024 struct lock_instance *instance; 1025 struct lock_class *class; 1026 1027 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1028 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1029 return; 1030 class = lock->lo_class; 1031 file = fixup_filename(file); 1032 if ((lock->lo_flags & LO_UPGRADABLE) == 0) 1033 panic("downgrade of non-upgradable lock (%s) %s @ %s:%d", 1034 class->lc_name, lock->lo_name, file, line); 1035 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0) 1036 panic("downgrade of non-sleep lock (%s) %s @ %s:%d", 1037 class->lc_name, lock->lo_name, file, line); 1038 instance = find_instance(curthread->td_sleeplocks, lock); 1039 if (instance == NULL) 1040 panic("downgrade of unlocked lock (%s) %s @ %s:%d", 1041 class->lc_name, lock->lo_name, file, line); 1042 if ((instance->li_flags & LI_EXCLUSIVE) == 0) 1043 panic("downgrade of shared lock (%s) %s @ %s:%d", 1044 class->lc_name, lock->lo_name, file, line); 1045 if ((instance->li_flags & LI_RECURSEMASK) != 0) 1046 panic("downgrade of recursed lock (%s) %s r=%d @ %s:%d", 1047 class->lc_name, lock->lo_name, 1048 instance->li_flags & LI_RECURSEMASK, file, line); 1049 instance->li_flags &= ~LI_EXCLUSIVE; 1050 } 1051 1052 void 1053 witness_unlock(struct lock_object *lock, int flags, const char *file, int line) 1054 { 1055 struct lock_list_entry **lock_list, *lle; 1056 struct lock_instance *instance; 1057 struct lock_class *class; 1058 struct thread *td; 1059 register_t s; 1060 int i, j; 1061 1062 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL || 1063 panicstr != NULL) 1064 return; 1065 td = curthread; 1066 class = lock->lo_class; 1067 file = fixup_filename(file); 1068 1069 /* Find lock instance associated with this lock. */ 1070 if (class->lc_flags & LC_SLEEPLOCK) 1071 lock_list = &td->td_sleeplocks; 1072 else 1073 lock_list = PCPU_PTR(spinlocks); 1074 for (; *lock_list != NULL; lock_list = &(*lock_list)->ll_next) 1075 for (i = 0; i < (*lock_list)->ll_count; i++) { 1076 instance = &(*lock_list)->ll_children[i]; 1077 if (instance->li_lock == lock) 1078 goto found; 1079 } 1080 panic("lock (%s) %s not locked @ %s:%d", class->lc_name, lock->lo_name, 1081 file, line); 1082 found: 1083 1084 /* First, check for shared/exclusive mismatches. */ 1085 if ((instance->li_flags & LI_EXCLUSIVE) != 0 && 1086 (flags & LOP_EXCLUSIVE) == 0) { 1087 printf("shared unlock of (%s) %s @ %s:%d\n", class->lc_name, 1088 lock->lo_name, file, line); 1089 printf("while exclusively locked from %s:%d\n", 1090 instance->li_file, instance->li_line); 1091 panic("excl->ushare"); 1092 } 1093 if ((instance->li_flags & LI_EXCLUSIVE) == 0 && 1094 (flags & LOP_EXCLUSIVE) != 0) { 1095 printf("exclusive unlock of (%s) %s @ %s:%d\n", class->lc_name, 1096 lock->lo_name, file, line); 1097 printf("while share locked from %s:%d\n", instance->li_file, 1098 instance->li_line); 1099 panic("share->uexcl"); 1100 } 1101 1102 /* If we are recursed, unrecurse. */ 1103 if ((instance->li_flags & LI_RECURSEMASK) > 0) { 1104 CTR4(KTR_WITNESS, "%s: pid %d unrecursed on %s r=%d", __func__, 1105 td->td_proc->p_pid, instance->li_lock->lo_name, 1106 instance->li_flags); 1107 instance->li_flags--; 1108 return; 1109 } 1110 1111 /* Otherwise, remove this item from the list. */ 1112 s = intr_disable(); 1113 CTR4(KTR_WITNESS, "%s: pid %d removed %s from lle[%d]", __func__, 1114 td->td_proc->p_pid, instance->li_lock->lo_name, 1115 (*lock_list)->ll_count - 1); 1116 for (j = i; j < (*lock_list)->ll_count - 1; j++) 1117 (*lock_list)->ll_children[j] = 1118 (*lock_list)->ll_children[j + 1]; 1119 (*lock_list)->ll_count--; 1120 intr_restore(s); 1121 1122 /* If this lock list entry is now empty, free it. */ 1123 if ((*lock_list)->ll_count == 0) { 1124 lle = *lock_list; 1125 *lock_list = lle->ll_next; 1126 CTR3(KTR_WITNESS, "%s: pid %d removed lle %p", __func__, 1127 td->td_proc->p_pid, lle); 1128 witness_lock_list_free(lle); 1129 } 1130 } 1131 1132 /* 1133 * Warn if any locks other than 'lock' are held. Flags can be passed in to 1134 * exempt Giant and sleepable locks from the checks as well. If any 1135 * non-exempt locks are held, then a supplied message is printed to the 1136 * console along with a list of the offending locks. If indicated in the 1137 * flags then a failure results in a panic as well. 1138 */ 1139 int 1140 witness_warn(int flags, struct lock_object *lock, const char *fmt, ...) 1141 { 1142 struct lock_list_entry *lle; 1143 struct lock_instance *lock1; 1144 struct thread *td; 1145 va_list ap; 1146 int i, n; 1147 1148 if (witness_cold || witness_watch == 0 || panicstr != NULL) 1149 return (0); 1150 n = 0; 1151 td = curthread; 1152 for (lle = td->td_sleeplocks; lle != NULL; lle = lle->ll_next) 1153 for (i = lle->ll_count - 1; i >= 0; i--) { 1154 lock1 = &lle->ll_children[i]; 1155 if (lock1->li_lock == lock) 1156 continue; 1157 if (flags & WARN_GIANTOK && 1158 lock1->li_lock == &Giant.mtx_object) 1159 continue; 1160 if (flags & WARN_SLEEPOK && 1161 (lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0) 1162 continue; 1163 if (n == 0) { 1164 va_start(ap, fmt); 1165 vprintf(fmt, ap); 1166 va_end(ap); 1167 printf(" with the following"); 1168 if (flags & WARN_SLEEPOK) 1169 printf(" non-sleepable"); 1170 printf(" locks held:\n"); 1171 } 1172 n++; 1173 witness_list_lock(lock1); 1174 } 1175 if (PCPU_GET(spinlocks) != NULL) { 1176 /* 1177 * Since we already hold a spinlock preemption is 1178 * already blocked. 1179 */ 1180 if (n == 0) { 1181 va_start(ap, fmt); 1182 vprintf(fmt, ap); 1183 va_end(ap); 1184 printf(" with the following"); 1185 if (flags & WARN_SLEEPOK) 1186 printf(" non-sleepable"); 1187 printf(" locks held:\n"); 1188 } 1189 n += witness_list_locks(PCPU_PTR(spinlocks)); 1190 } 1191 if (flags & WARN_PANIC && n) 1192 panic("witness_warn"); 1193 #ifdef DDB 1194 else if (witness_ddb && n) 1195 Debugger(__func__); 1196 else if (witness_trace && n) 1197 backtrace(); 1198 #endif 1199 return (n); 1200 } 1201 1202 const char * 1203 witness_file(struct lock_object *lock) 1204 { 1205 struct witness *w; 1206 1207 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL) 1208 return ("?"); 1209 w = lock->lo_witness; 1210 return (w->w_file); 1211 } 1212 1213 int 1214 witness_line(struct lock_object *lock) 1215 { 1216 struct witness *w; 1217 1218 if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL) 1219 return (0); 1220 w = lock->lo_witness; 1221 return (w->w_line); 1222 } 1223 1224 static struct witness * 1225 enroll(const char *description, struct lock_class *lock_class) 1226 { 1227 struct witness *w; 1228 1229 if (witness_watch == 0 || panicstr != NULL) 1230 return (NULL); 1231 if ((lock_class->lc_flags & LC_SPINLOCK) && witness_skipspin) 1232 return (NULL); 1233 mtx_lock_spin(&w_mtx); 1234 STAILQ_FOREACH(w, &w_all, w_list) { 1235 if (w->w_name == description || (w->w_refcount > 0 && 1236 strcmp(description, w->w_name) == 0)) { 1237 w->w_refcount++; 1238 mtx_unlock_spin(&w_mtx); 1239 if (lock_class != w->w_class) 1240 panic( 1241 "lock (%s) %s does not match earlier (%s) lock", 1242 description, lock_class->lc_name, 1243 w->w_class->lc_name); 1244 return (w); 1245 } 1246 } 1247 /* 1248 * This isn't quite right, as witness_cold is still 0 while we 1249 * enroll all the locks initialized before witness_initialize(). 1250 */ 1251 if ((lock_class->lc_flags & LC_SPINLOCK) && !witness_cold) { 1252 mtx_unlock_spin(&w_mtx); 1253 panic("spin lock %s not in order list", description); 1254 } 1255 if ((w = witness_get()) == NULL) 1256 return (NULL); 1257 w->w_name = description; 1258 w->w_class = lock_class; 1259 w->w_refcount = 1; 1260 STAILQ_INSERT_HEAD(&w_all, w, w_list); 1261 if (lock_class->lc_flags & LC_SPINLOCK) 1262 STAILQ_INSERT_HEAD(&w_spin, w, w_typelist); 1263 else if (lock_class->lc_flags & LC_SLEEPLOCK) 1264 STAILQ_INSERT_HEAD(&w_sleep, w, w_typelist); 1265 else { 1266 mtx_unlock_spin(&w_mtx); 1267 panic("lock class %s is not sleep or spin", 1268 lock_class->lc_name); 1269 } 1270 mtx_unlock_spin(&w_mtx); 1271 return (w); 1272 } 1273 1274 /* Don't let the door bang you on the way out... */ 1275 static int 1276 depart(struct witness *w) 1277 { 1278 struct witness_child_list_entry *wcl, *nwcl; 1279 struct witness_list *list; 1280 struct witness *parent; 1281 1282 MPASS(w->w_refcount == 0); 1283 if (w->w_class->lc_flags & LC_SLEEPLOCK) 1284 list = &w_sleep; 1285 else 1286 list = &w_spin; 1287 /* 1288 * First, we run through the entire tree looking for any 1289 * witnesses that the outgoing witness is a child of. For 1290 * each parent that we find, we reparent all the direct 1291 * children of the outgoing witness to its parent. 1292 */ 1293 STAILQ_FOREACH(parent, list, w_typelist) { 1294 if (!isitmychild(parent, w)) 1295 continue; 1296 removechild(parent, w); 1297 if (!reparentchildren(parent, w)) 1298 return (0); 1299 } 1300 1301 /* 1302 * Now we go through and free up the child list of the 1303 * outgoing witness. 1304 */ 1305 for (wcl = w->w_children; wcl != NULL; wcl = nwcl) { 1306 nwcl = wcl->wcl_next; 1307 witness_child_free(wcl); 1308 } 1309 1310 /* 1311 * Detach from various lists and free. 1312 */ 1313 STAILQ_REMOVE(list, w, witness, w_typelist); 1314 STAILQ_REMOVE(&w_all, w, witness, w_list); 1315 witness_free(w); 1316 1317 /* Finally, fixup the tree. */ 1318 return (rebalancetree(list)); 1319 } 1320 1321 /* 1322 * Prune an entire lock order tree. We look for cases where a lock 1323 * is now both a descendant and a direct child of a given lock. In 1324 * that case, we want to remove the direct child link from the tree. 1325 * 1326 * Returns false if insertchild() fails. 1327 */ 1328 static int 1329 rebalancetree(struct witness_list *list) 1330 { 1331 struct witness *child, *parent; 1332 1333 STAILQ_FOREACH(child, list, w_typelist) { 1334 STAILQ_FOREACH(parent, list, w_typelist) { 1335 if (!isitmychild(parent, child)) 1336 continue; 1337 removechild(parent, child); 1338 if (isitmydescendant(parent, child)) 1339 continue; 1340 if (!insertchild(parent, child)) 1341 return (0); 1342 } 1343 } 1344 witness_levelall(); 1345 return (1); 1346 } 1347 1348 /* 1349 * Add "child" as a direct child of "parent". Returns false if 1350 * we fail due to out of memory. 1351 */ 1352 static int 1353 insertchild(struct witness *parent, struct witness *child) 1354 { 1355 struct witness_child_list_entry **wcl; 1356 1357 MPASS(child != NULL && parent != NULL); 1358 1359 /* 1360 * Insert "child" after "parent" 1361 */ 1362 wcl = &parent->w_children; 1363 while (*wcl != NULL && (*wcl)->wcl_count == WITNESS_NCHILDREN) 1364 wcl = &(*wcl)->wcl_next; 1365 if (*wcl == NULL) { 1366 *wcl = witness_child_get(); 1367 if (*wcl == NULL) 1368 return (0); 1369 } 1370 (*wcl)->wcl_children[(*wcl)->wcl_count++] = child; 1371 1372 return (1); 1373 } 1374 1375 /* 1376 * Make all the direct descendants of oldparent be direct descendants 1377 * of newparent. 1378 */ 1379 static int 1380 reparentchildren(struct witness *newparent, struct witness *oldparent) 1381 { 1382 struct witness_child_list_entry *wcl; 1383 int i; 1384 1385 /* Avoid making a witness a child of itself. */ 1386 MPASS(!isitmychild(oldparent, newparent)); 1387 1388 for (wcl = oldparent->w_children; wcl != NULL; wcl = wcl->wcl_next) 1389 for (i = 0; i < wcl->wcl_count; i++) 1390 if (!insertchild(newparent, wcl->wcl_children[i])) 1391 return (0); 1392 return (1); 1393 } 1394 1395 static int 1396 itismychild(struct witness *parent, struct witness *child) 1397 { 1398 struct witness_list *list; 1399 1400 MPASS(child != NULL && parent != NULL); 1401 if ((parent->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)) != 1402 (child->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK))) 1403 panic( 1404 "%s: parent (%s) and child (%s) are not the same lock type", 1405 __func__, parent->w_class->lc_name, 1406 child->w_class->lc_name); 1407 1408 if (!insertchild(parent, child)) 1409 return (0); 1410 1411 if (parent->w_class->lc_flags & LC_SLEEPLOCK) 1412 list = &w_sleep; 1413 else 1414 list = &w_spin; 1415 return (rebalancetree(list)); 1416 } 1417 1418 static void 1419 removechild(struct witness *parent, struct witness *child) 1420 { 1421 struct witness_child_list_entry **wcl, *wcl1; 1422 int i; 1423 1424 for (wcl = &parent->w_children; *wcl != NULL; wcl = &(*wcl)->wcl_next) 1425 for (i = 0; i < (*wcl)->wcl_count; i++) 1426 if ((*wcl)->wcl_children[i] == child) 1427 goto found; 1428 return; 1429 found: 1430 (*wcl)->wcl_count--; 1431 if ((*wcl)->wcl_count > i) 1432 (*wcl)->wcl_children[i] = 1433 (*wcl)->wcl_children[(*wcl)->wcl_count]; 1434 MPASS((*wcl)->wcl_children[i] != NULL); 1435 if ((*wcl)->wcl_count != 0) 1436 return; 1437 wcl1 = *wcl; 1438 *wcl = wcl1->wcl_next; 1439 witness_child_free(wcl1); 1440 } 1441 1442 static int 1443 isitmychild(struct witness *parent, struct witness *child) 1444 { 1445 struct witness_child_list_entry *wcl; 1446 int i; 1447 1448 for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) { 1449 for (i = 0; i < wcl->wcl_count; i++) { 1450 if (wcl->wcl_children[i] == child) 1451 return (1); 1452 } 1453 } 1454 return (0); 1455 } 1456 1457 static int 1458 isitmydescendant(struct witness *parent, struct witness *child) 1459 { 1460 struct witness_child_list_entry *wcl; 1461 int i, j; 1462 1463 if (isitmychild(parent, child)) 1464 return (1); 1465 j = 0; 1466 for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) { 1467 MPASS(j < 1000); 1468 for (i = 0; i < wcl->wcl_count; i++) { 1469 if (isitmydescendant(wcl->wcl_children[i], child)) 1470 return (1); 1471 } 1472 j++; 1473 } 1474 return (0); 1475 } 1476 1477 static void 1478 witness_levelall (void) 1479 { 1480 struct witness_list *list; 1481 struct witness *w, *w1; 1482 1483 /* 1484 * First clear all levels. 1485 */ 1486 STAILQ_FOREACH(w, &w_all, w_list) { 1487 w->w_level = 0; 1488 } 1489 1490 /* 1491 * Look for locks with no parent and level all their descendants. 1492 */ 1493 STAILQ_FOREACH(w, &w_all, w_list) { 1494 /* 1495 * This is just an optimization, technically we could get 1496 * away just walking the all list each time. 1497 */ 1498 if (w->w_class->lc_flags & LC_SLEEPLOCK) 1499 list = &w_sleep; 1500 else 1501 list = &w_spin; 1502 STAILQ_FOREACH(w1, list, w_typelist) { 1503 if (isitmychild(w1, w)) 1504 goto skip; 1505 } 1506 witness_leveldescendents(w, 0); 1507 skip: 1508 ; /* silence GCC 3.x */ 1509 } 1510 } 1511 1512 static void 1513 witness_leveldescendents(struct witness *parent, int level) 1514 { 1515 struct witness_child_list_entry *wcl; 1516 int i; 1517 1518 if (parent->w_level < level) 1519 parent->w_level = level; 1520 level++; 1521 for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) 1522 for (i = 0; i < wcl->wcl_count; i++) 1523 witness_leveldescendents(wcl->wcl_children[i], level); 1524 } 1525 1526 static void 1527 witness_displaydescendants(void(*prnt)(const char *fmt, ...), 1528 struct witness *parent, int indent) 1529 { 1530 struct witness_child_list_entry *wcl; 1531 int i, level; 1532 1533 level = parent->w_level; 1534 prnt("%-2d", level); 1535 for (i = 0; i < indent; i++) 1536 prnt(" "); 1537 if (parent->w_refcount > 0) 1538 prnt("%s", parent->w_name); 1539 else 1540 prnt("(dead)"); 1541 if (parent->w_displayed) { 1542 prnt(" -- (already displayed)\n"); 1543 return; 1544 } 1545 parent->w_displayed = 1; 1546 if (parent->w_refcount > 0) { 1547 if (parent->w_file != NULL) 1548 prnt(" -- last acquired @ %s:%d", parent->w_file, 1549 parent->w_line); 1550 } 1551 prnt("\n"); 1552 for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) 1553 for (i = 0; i < wcl->wcl_count; i++) 1554 witness_displaydescendants(prnt, 1555 wcl->wcl_children[i], indent + 1); 1556 } 1557 1558 #ifdef BLESSING 1559 static int 1560 blessed(struct witness *w1, struct witness *w2) 1561 { 1562 int i; 1563 struct witness_blessed *b; 1564 1565 for (i = 0; i < blessed_count; i++) { 1566 b = &blessed_list[i]; 1567 if (strcmp(w1->w_name, b->b_lock1) == 0) { 1568 if (strcmp(w2->w_name, b->b_lock2) == 0) 1569 return (1); 1570 continue; 1571 } 1572 if (strcmp(w1->w_name, b->b_lock2) == 0) 1573 if (strcmp(w2->w_name, b->b_lock1) == 0) 1574 return (1); 1575 } 1576 return (0); 1577 } 1578 #endif 1579 1580 static struct witness * 1581 witness_get(void) 1582 { 1583 struct witness *w; 1584 1585 if (witness_watch == 0) { 1586 mtx_unlock_spin(&w_mtx); 1587 return (NULL); 1588 } 1589 if (STAILQ_EMPTY(&w_free)) { 1590 witness_watch = 0; 1591 mtx_unlock_spin(&w_mtx); 1592 printf("%s: witness exhausted\n", __func__); 1593 return (NULL); 1594 } 1595 w = STAILQ_FIRST(&w_free); 1596 STAILQ_REMOVE_HEAD(&w_free, w_list); 1597 bzero(w, sizeof(*w)); 1598 return (w); 1599 } 1600 1601 static void 1602 witness_free(struct witness *w) 1603 { 1604 1605 STAILQ_INSERT_HEAD(&w_free, w, w_list); 1606 } 1607 1608 static struct witness_child_list_entry * 1609 witness_child_get(void) 1610 { 1611 struct witness_child_list_entry *wcl; 1612 1613 if (witness_watch == 0) { 1614 mtx_unlock_spin(&w_mtx); 1615 return (NULL); 1616 } 1617 wcl = w_child_free; 1618 if (wcl == NULL) { 1619 witness_watch = 0; 1620 mtx_unlock_spin(&w_mtx); 1621 printf("%s: witness exhausted\n", __func__); 1622 return (NULL); 1623 } 1624 w_child_free = wcl->wcl_next; 1625 bzero(wcl, sizeof(*wcl)); 1626 return (wcl); 1627 } 1628 1629 static void 1630 witness_child_free(struct witness_child_list_entry *wcl) 1631 { 1632 1633 wcl->wcl_next = w_child_free; 1634 w_child_free = wcl; 1635 } 1636 1637 static struct lock_list_entry * 1638 witness_lock_list_get(void) 1639 { 1640 struct lock_list_entry *lle; 1641 1642 if (witness_watch == 0) 1643 return (NULL); 1644 mtx_lock_spin(&w_mtx); 1645 lle = w_lock_list_free; 1646 if (lle == NULL) { 1647 witness_watch = 0; 1648 mtx_unlock_spin(&w_mtx); 1649 printf("%s: witness exhausted\n", __func__); 1650 return (NULL); 1651 } 1652 w_lock_list_free = lle->ll_next; 1653 mtx_unlock_spin(&w_mtx); 1654 bzero(lle, sizeof(*lle)); 1655 return (lle); 1656 } 1657 1658 static void 1659 witness_lock_list_free(struct lock_list_entry *lle) 1660 { 1661 1662 mtx_lock_spin(&w_mtx); 1663 lle->ll_next = w_lock_list_free; 1664 w_lock_list_free = lle; 1665 mtx_unlock_spin(&w_mtx); 1666 } 1667 1668 static struct lock_instance * 1669 find_instance(struct lock_list_entry *lock_list, struct lock_object *lock) 1670 { 1671 struct lock_list_entry *lle; 1672 struct lock_instance *instance; 1673 int i; 1674 1675 for (lle = lock_list; lle != NULL; lle = lle->ll_next) 1676 for (i = lle->ll_count - 1; i >= 0; i--) { 1677 instance = &lle->ll_children[i]; 1678 if (instance->li_lock == lock) 1679 return (instance); 1680 } 1681 return (NULL); 1682 } 1683 1684 static void 1685 witness_list_lock(struct lock_instance *instance) 1686 { 1687 struct lock_object *lock; 1688 1689 lock = instance->li_lock; 1690 printf("%s %s %s", (instance->li_flags & LI_EXCLUSIVE) != 0 ? 1691 "exclusive" : "shared", lock->lo_class->lc_name, lock->lo_name); 1692 if (lock->lo_type != lock->lo_name) 1693 printf(" (%s)", lock->lo_type); 1694 printf(" r = %d (%p) locked @ %s:%d\n", 1695 instance->li_flags & LI_RECURSEMASK, lock, instance->li_file, 1696 instance->li_line); 1697 } 1698 1699 int 1700 witness_list_locks(struct lock_list_entry **lock_list) 1701 { 1702 struct lock_list_entry *lle; 1703 int i, nheld; 1704 1705 nheld = 0; 1706 for (lle = *lock_list; lle != NULL; lle = lle->ll_next) 1707 for (i = lle->ll_count - 1; i >= 0; i--) { 1708 witness_list_lock(&lle->ll_children[i]); 1709 nheld++; 1710 } 1711 return (nheld); 1712 } 1713 1714 /* 1715 * This is a bit risky at best. We call this function when we have timed 1716 * out acquiring a spin lock, and we assume that the other CPU is stuck 1717 * with this lock held. So, we go groveling around in the other CPU's 1718 * per-cpu data to try to find the lock instance for this spin lock to 1719 * see when it was last acquired. 1720 */ 1721 void 1722 witness_display_spinlock(struct lock_object *lock, struct thread *owner) 1723 { 1724 struct lock_instance *instance; 1725 struct pcpu *pc; 1726 1727 if (owner->td_critnest == 0 || owner->td_oncpu == NOCPU) 1728 return; 1729 pc = pcpu_find(owner->td_oncpu); 1730 instance = find_instance(pc->pc_spinlocks, lock); 1731 if (instance != NULL) 1732 witness_list_lock(instance); 1733 } 1734 1735 void 1736 witness_save(struct lock_object *lock, const char **filep, int *linep) 1737 { 1738 struct lock_instance *instance; 1739 1740 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1741 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1742 return; 1743 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0) 1744 panic("%s: lock (%s) %s is not a sleep lock", __func__, 1745 lock->lo_class->lc_name, lock->lo_name); 1746 instance = find_instance(curthread->td_sleeplocks, lock); 1747 if (instance == NULL) 1748 panic("%s: lock (%s) %s not locked", __func__, 1749 lock->lo_class->lc_name, lock->lo_name); 1750 *filep = instance->li_file; 1751 *linep = instance->li_line; 1752 } 1753 1754 void 1755 witness_restore(struct lock_object *lock, const char *file, int line) 1756 { 1757 struct lock_instance *instance; 1758 1759 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1760 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1761 return; 1762 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) == 0) 1763 panic("%s: lock (%s) %s is not a sleep lock", __func__, 1764 lock->lo_class->lc_name, lock->lo_name); 1765 instance = find_instance(curthread->td_sleeplocks, lock); 1766 if (instance == NULL) 1767 panic("%s: lock (%s) %s not locked", __func__, 1768 lock->lo_class->lc_name, lock->lo_name); 1769 lock->lo_witness->w_file = file; 1770 lock->lo_witness->w_line = line; 1771 instance->li_file = file; 1772 instance->li_line = line; 1773 } 1774 1775 void 1776 witness_assert(struct lock_object *lock, int flags, const char *file, int line) 1777 { 1778 #ifdef INVARIANT_SUPPORT 1779 struct lock_instance *instance; 1780 1781 if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) 1782 return; 1783 if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) != 0) 1784 instance = find_instance(curthread->td_sleeplocks, lock); 1785 else if ((lock->lo_class->lc_flags & LC_SPINLOCK) != 0) 1786 instance = find_instance(PCPU_GET(spinlocks), lock); 1787 else { 1788 panic("Lock (%s) %s is not sleep or spin!", 1789 lock->lo_class->lc_name, lock->lo_name); 1790 } 1791 file = fixup_filename(file); 1792 switch (flags) { 1793 case LA_UNLOCKED: 1794 if (instance != NULL) 1795 panic("Lock (%s) %s locked @ %s:%d.", 1796 lock->lo_class->lc_name, lock->lo_name, file, line); 1797 break; 1798 case LA_LOCKED: 1799 case LA_LOCKED | LA_RECURSED: 1800 case LA_LOCKED | LA_NOTRECURSED: 1801 case LA_SLOCKED: 1802 case LA_SLOCKED | LA_RECURSED: 1803 case LA_SLOCKED | LA_NOTRECURSED: 1804 case LA_XLOCKED: 1805 case LA_XLOCKED | LA_RECURSED: 1806 case LA_XLOCKED | LA_NOTRECURSED: 1807 if (instance == NULL) { 1808 panic("Lock (%s) %s not locked @ %s:%d.", 1809 lock->lo_class->lc_name, lock->lo_name, file, line); 1810 break; 1811 } 1812 if ((flags & LA_XLOCKED) != 0 && 1813 (instance->li_flags & LI_EXCLUSIVE) == 0) 1814 panic("Lock (%s) %s not exclusively locked @ %s:%d.", 1815 lock->lo_class->lc_name, lock->lo_name, file, line); 1816 if ((flags & LA_SLOCKED) != 0 && 1817 (instance->li_flags & LI_EXCLUSIVE) != 0) 1818 panic("Lock (%s) %s exclusively locked @ %s:%d.", 1819 lock->lo_class->lc_name, lock->lo_name, file, line); 1820 if ((flags & LA_RECURSED) != 0 && 1821 (instance->li_flags & LI_RECURSEMASK) == 0) 1822 panic("Lock (%s) %s not recursed @ %s:%d.", 1823 lock->lo_class->lc_name, lock->lo_name, file, line); 1824 if ((flags & LA_NOTRECURSED) != 0 && 1825 (instance->li_flags & LI_RECURSEMASK) != 0) 1826 panic("Lock (%s) %s recursed @ %s:%d.", 1827 lock->lo_class->lc_name, lock->lo_name, file, line); 1828 break; 1829 default: 1830 panic("Invalid lock assertion at %s:%d.", file, line); 1831 1832 } 1833 #endif /* INVARIANT_SUPPORT */ 1834 } 1835 1836 #ifdef DDB 1837 static void 1838 witness_list(struct thread *td) 1839 { 1840 1841 KASSERT(!witness_cold, ("%s: witness_cold", __func__)); 1842 KASSERT(db_active, ("%s: not in the debugger", __func__)); 1843 1844 if (witness_watch == 0) 1845 return; 1846 1847 witness_list_locks(&td->td_sleeplocks); 1848 1849 /* 1850 * We only handle spinlocks if td == curthread. This is somewhat broken 1851 * if td is currently executing on some other CPU and holds spin locks 1852 * as we won't display those locks. If we had a MI way of getting 1853 * the per-cpu data for a given cpu then we could use 1854 * td->td_oncpu to get the list of spinlocks for this thread 1855 * and "fix" this. 1856 * 1857 * That still wouldn't really fix this unless we locked sched_lock 1858 * or stopped the other CPU to make sure it wasn't changing the list 1859 * out from under us. It is probably best to just not try to handle 1860 * threads on other CPU's for now. 1861 */ 1862 if (td == curthread && PCPU_GET(spinlocks) != NULL) 1863 witness_list_locks(PCPU_PTR(spinlocks)); 1864 } 1865 1866 DB_SHOW_COMMAND(locks, db_witness_list) 1867 { 1868 struct thread *td; 1869 pid_t pid; 1870 struct proc *p; 1871 1872 if (have_addr) { 1873 pid = (addr % 16) + ((addr >> 4) % 16) * 10 + 1874 ((addr >> 8) % 16) * 100 + ((addr >> 12) % 16) * 1000 + 1875 ((addr >> 16) % 16) * 10000; 1876 /* sx_slock(&allproc_lock); */ 1877 FOREACH_PROC_IN_SYSTEM(p) { 1878 if (p->p_pid == pid) 1879 break; 1880 } 1881 /* sx_sunlock(&allproc_lock); */ 1882 if (p == NULL) { 1883 db_printf("pid %d not found\n", pid); 1884 return; 1885 } 1886 FOREACH_THREAD_IN_PROC(p, td) { 1887 witness_list(td); 1888 } 1889 } else { 1890 td = curthread; 1891 witness_list(td); 1892 } 1893 } 1894 1895 DB_SHOW_COMMAND(witness, db_witness_display) 1896 { 1897 1898 witness_display(db_printf); 1899 } 1900 #endif 1901