1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 /* 31 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 32 */ 33 34 #include <sys/flock_impl.h> 35 #include <sys/vfs.h> 36 #include <sys/t_lock.h> /* for <sys/callb.h> */ 37 #include <sys/callb.h> 38 #include <sys/clconf.h> 39 #include <sys/cladm.h> 40 #include <sys/nbmlock.h> 41 #include <sys/cred.h> 42 #include <sys/policy.h> 43 44 /* 45 * The following four variables are for statistics purposes and they are 46 * not protected by locks. They may not be accurate but will at least be 47 * close to the actual value. 48 */ 49 50 int flk_lock_allocs; 51 int flk_lock_frees; 52 int edge_allocs; 53 int edge_frees; 54 int flk_proc_vertex_allocs; 55 int flk_proc_edge_allocs; 56 int flk_proc_vertex_frees; 57 int flk_proc_edge_frees; 58 59 static kmutex_t flock_lock; 60 61 #ifdef DEBUG 62 int check_debug = 0; 63 #define CHECK_ACTIVE_LOCKS(gp) if (check_debug) \ 64 check_active_locks(gp); 65 #define CHECK_SLEEPING_LOCKS(gp) if (check_debug) \ 66 check_sleeping_locks(gp); 67 #define CHECK_OWNER_LOCKS(gp, pid, sysid, vp) \ 68 if (check_debug) \ 69 check_owner_locks(gp, pid, sysid, vp); 70 #define CHECK_LOCK_TRANSITION(old_state, new_state) \ 71 { \ 72 if (check_lock_transition(old_state, new_state)) { \ 73 cmn_err(CE_PANIC, "Illegal lock transition \ 74 from %d to %d", old_state, new_state); \ 75 } \ 76 } 77 #else 78 79 #define CHECK_ACTIVE_LOCKS(gp) 80 #define CHECK_SLEEPING_LOCKS(gp) 81 #define CHECK_OWNER_LOCKS(gp, pid, sysid, vp) 82 #define CHECK_LOCK_TRANSITION(old_state, new_state) 83 84 #endif /* DEBUG */ 85 86 struct kmem_cache *flk_edge_cache; 87 88 graph_t *lock_graph[HASH_SIZE]; 89 proc_graph_t pgraph; 90 91 /* 92 * Clustering. 93 * 94 * NLM REGISTRY TYPE IMPLEMENTATION 95 * 96 * Assumptions: 97 * 1. Nodes in a cluster are numbered starting at 1; always non-negative 98 * integers; maximum node id is returned by clconf_maximum_nodeid(). 99 * 2. We use this node id to identify the node an NLM server runs on. 100 */ 101 102 /* 103 * NLM registry object keeps track of NLM servers via their 104 * nlmids (which are the node ids of the node in the cluster they run on) 105 * that have requested locks at this LLM with which this registry is 106 * associated. 107 * 108 * Representation of abstraction: 109 * rep = record[ states: array[nlm_state], 110 * lock: mutex] 111 * 112 * Representation invariants: 113 * 1. index i of rep.states is between 0 and n - 1 where n is number 114 * of elements in the array, which happen to be the maximum number 115 * of nodes in the cluster configuration + 1. 116 * 2. map nlmid to index i of rep.states 117 * 0 -> 0 118 * 1 -> 1 119 * 2 -> 2 120 * n-1 -> clconf_maximum_nodeid()+1 121 * 3. This 1-1 mapping is quite convenient and it avoids errors resulting 122 * from forgetting to subtract 1 from the index. 123 * 4. The reason we keep the 0th index is the following. A legitimate 124 * cluster configuration includes making a UFS file system NFS 125 * exportable. The code is structured so that if you're in a cluster 126 * you do one thing; otherwise, you do something else. The problem 127 * is what to do if you think you're in a cluster with PXFS loaded, 128 * but you're using UFS not PXFS? The upper two bytes of the sysid 129 * encode the node id of the node where NLM server runs; these bytes 130 * are zero for UFS. Since the nodeid is used to index into the 131 * registry, we can record the NLM server state information at index 132 * 0 using the same mechanism used for PXFS file locks! 133 */ 134 static flk_nlm_status_t *nlm_reg_status = NULL; /* state array 0..N-1 */ 135 static kmutex_t nlm_reg_lock; /* lock to protect arrary */ 136 static uint_t nlm_status_size; /* size of state array */ 137 138 /* 139 * Although we need a global lock dependency graph (and associated data 140 * structures), we also need a per-zone notion of whether the lock manager is 141 * running, and so whether to allow lock manager requests or not. 142 * 143 * Thus, on a per-zone basis we maintain a ``global'' variable 144 * (flk_lockmgr_status), protected by flock_lock, and set when the lock 145 * manager is determined to be changing state (starting or stopping). 146 * 147 * Each graph/zone pair also has a copy of this variable, which is protected by 148 * the graph's mutex. 149 * 150 * The per-graph copies are used to synchronize lock requests with shutdown 151 * requests. The global copy is used to initialize the per-graph field when a 152 * new graph is created. 153 */ 154 struct flock_globals { 155 flk_lockmgr_status_t flk_lockmgr_status; 156 flk_lockmgr_status_t lockmgr_status[HASH_SIZE]; 157 }; 158 159 zone_key_t flock_zone_key; 160 161 static void create_flock(lock_descriptor_t *, flock64_t *); 162 static lock_descriptor_t *flk_get_lock(void); 163 static void flk_free_lock(lock_descriptor_t *lock); 164 static void flk_get_first_blocking_lock(lock_descriptor_t *request); 165 static int flk_process_request(lock_descriptor_t *); 166 static int flk_add_edge(lock_descriptor_t *, lock_descriptor_t *, int, int); 167 static edge_t *flk_get_edge(void); 168 static int flk_wait_execute_request(lock_descriptor_t *); 169 static int flk_relation(lock_descriptor_t *, lock_descriptor_t *); 170 static void flk_insert_active_lock(lock_descriptor_t *); 171 static void flk_delete_active_lock(lock_descriptor_t *, int); 172 static void flk_insert_sleeping_lock(lock_descriptor_t *); 173 static void flk_graph_uncolor(graph_t *); 174 static void flk_wakeup(lock_descriptor_t *, int); 175 static void flk_free_edge(edge_t *); 176 static void flk_recompute_dependencies(lock_descriptor_t *, 177 lock_descriptor_t **, int, int); 178 static int flk_find_barriers(lock_descriptor_t *); 179 static void flk_update_barriers(lock_descriptor_t *); 180 static int flk_color_reachables(lock_descriptor_t *); 181 static int flk_canceled(lock_descriptor_t *); 182 static void flk_delete_locks_by_sysid(lock_descriptor_t *); 183 static void report_blocker(lock_descriptor_t *, lock_descriptor_t *); 184 static void wait_for_lock(lock_descriptor_t *); 185 static void unlock_lockmgr_granted(struct flock_globals *); 186 static void wakeup_sleeping_lockmgr_locks(struct flock_globals *); 187 188 /* Clustering hooks */ 189 static void cl_flk_change_nlm_state_all_locks(int, flk_nlm_status_t); 190 static void cl_flk_wakeup_sleeping_nlm_locks(int); 191 static void cl_flk_unlock_nlm_granted(int); 192 193 #ifdef DEBUG 194 static int check_lock_transition(int, int); 195 static void check_sleeping_locks(graph_t *); 196 static void check_active_locks(graph_t *); 197 static int no_path(lock_descriptor_t *, lock_descriptor_t *); 198 static void path(lock_descriptor_t *, lock_descriptor_t *); 199 static void check_owner_locks(graph_t *, pid_t, int, vnode_t *); 200 static int level_one_path(lock_descriptor_t *, lock_descriptor_t *); 201 static int level_two_path(lock_descriptor_t *, lock_descriptor_t *, int); 202 #endif 203 204 /* proc_graph function definitions */ 205 static int flk_check_deadlock(lock_descriptor_t *); 206 static void flk_proc_graph_uncolor(void); 207 static proc_vertex_t *flk_get_proc_vertex(lock_descriptor_t *); 208 static proc_edge_t *flk_get_proc_edge(void); 209 static void flk_proc_release(proc_vertex_t *); 210 static void flk_free_proc_edge(proc_edge_t *); 211 static void flk_update_proc_graph(edge_t *, int); 212 213 /* Non-blocking mandatory locking */ 214 static int lock_blocks_io(nbl_op_t, u_offset_t, ssize_t, int, u_offset_t, 215 u_offset_t); 216 217 static struct flock_globals * 218 flk_get_globals(void) 219 { 220 /* 221 * The KLM module had better be loaded if we're attempting to handle 222 * lockmgr requests. 223 */ 224 ASSERT(flock_zone_key != ZONE_KEY_UNINITIALIZED); 225 return (zone_getspecific(flock_zone_key, curproc->p_zone)); 226 } 227 228 static flk_lockmgr_status_t 229 flk_get_lockmgr_status(void) 230 { 231 struct flock_globals *fg; 232 233 ASSERT(MUTEX_HELD(&flock_lock)); 234 235 if (flock_zone_key == ZONE_KEY_UNINITIALIZED) { 236 /* 237 * KLM module not loaded; lock manager definitely not running. 238 */ 239 return (FLK_LOCKMGR_DOWN); 240 } 241 fg = flk_get_globals(); 242 return (fg->flk_lockmgr_status); 243 } 244 245 /* 246 * Routine called from fs_frlock in fs/fs_subr.c 247 */ 248 249 int 250 reclock(vnode_t *vp, flock64_t *lckdat, int cmd, int flag, u_offset_t offset, 251 flk_callback_t *flk_cbp) 252 { 253 lock_descriptor_t stack_lock_request; 254 lock_descriptor_t *lock_request; 255 int error = 0; 256 graph_t *gp; 257 int nlmid; 258 259 /* 260 * Check access permissions 261 */ 262 if ((cmd & SETFLCK) && 263 ((lckdat->l_type == F_RDLCK && (flag & FREAD) == 0) || 264 (lckdat->l_type == F_WRLCK && (flag & FWRITE) == 0))) 265 return (EBADF); 266 267 /* 268 * for query and unlock we use the stack_lock_request 269 */ 270 271 if ((lckdat->l_type == F_UNLCK) || 272 !((cmd & INOFLCK) || (cmd & SETFLCK))) { 273 lock_request = &stack_lock_request; 274 (void) bzero((caddr_t)lock_request, 275 sizeof (lock_descriptor_t)); 276 277 /* 278 * following is added to make the assertions in 279 * flk_execute_request() to pass through 280 */ 281 282 lock_request->l_edge.edge_in_next = &lock_request->l_edge; 283 lock_request->l_edge.edge_in_prev = &lock_request->l_edge; 284 lock_request->l_edge.edge_adj_next = &lock_request->l_edge; 285 lock_request->l_edge.edge_adj_prev = &lock_request->l_edge; 286 lock_request->l_status = FLK_INITIAL_STATE; 287 } else { 288 lock_request = flk_get_lock(); 289 } 290 lock_request->l_state = 0; 291 lock_request->l_vnode = vp; 292 lock_request->l_zoneid = getzoneid(); 293 294 /* 295 * Convert the request range into the canonical start and end 296 * values. The NLM protocol supports locking over the entire 297 * 32-bit range, so there's no range checking for remote requests, 298 * but we still need to verify that local requests obey the rules. 299 */ 300 /* Clustering */ 301 if ((cmd & (RCMDLCK | PCMDLCK)) != 0) { 302 ASSERT(lckdat->l_whence == 0); 303 lock_request->l_start = lckdat->l_start; 304 lock_request->l_end = (lckdat->l_len == 0) ? MAX_U_OFFSET_T : 305 lckdat->l_start + (lckdat->l_len - 1); 306 } else { 307 /* check the validity of the lock range */ 308 error = flk_convert_lock_data(vp, lckdat, 309 &lock_request->l_start, &lock_request->l_end, 310 offset); 311 if (error) { 312 goto done; 313 } 314 error = flk_check_lock_data(lock_request->l_start, 315 lock_request->l_end, MAXEND); 316 if (error) { 317 goto done; 318 } 319 } 320 321 ASSERT(lock_request->l_end >= lock_request->l_start); 322 323 lock_request->l_type = lckdat->l_type; 324 if (cmd & INOFLCK) 325 lock_request->l_state |= IO_LOCK; 326 if (cmd & SLPFLCK) 327 lock_request->l_state |= WILLING_TO_SLEEP_LOCK; 328 if (cmd & RCMDLCK) 329 lock_request->l_state |= LOCKMGR_LOCK; 330 if (cmd & NBMLCK) 331 lock_request->l_state |= NBMAND_LOCK; 332 /* 333 * Clustering: set flag for PXFS locks 334 * We do not _only_ check for the PCMDLCK flag because PXFS locks could 335 * also be of type 'RCMDLCK'. 336 * We do not _only_ check the GETPXFSID() macro because local PXFS 337 * clients use a pxfsid of zero to permit deadlock detection in the LLM. 338 */ 339 340 if ((cmd & PCMDLCK) || (GETPXFSID(lckdat->l_sysid) != 0)) { 341 lock_request->l_state |= PXFS_LOCK; 342 } 343 if (!((cmd & SETFLCK) || (cmd & INOFLCK))) { 344 if (lock_request->l_type == F_RDLCK || 345 lock_request->l_type == F_WRLCK) 346 lock_request->l_state |= QUERY_LOCK; 347 } 348 lock_request->l_flock = (*lckdat); 349 lock_request->l_callbacks = flk_cbp; 350 351 /* 352 * We are ready for processing the request 353 */ 354 if (IS_LOCKMGR(lock_request)) { 355 /* 356 * If the lock request is an NLM server request .... 357 */ 358 if (nlm_status_size == 0) { /* not booted as cluster */ 359 mutex_enter(&flock_lock); 360 /* 361 * Bail out if this is a lock manager request and the 362 * lock manager is not supposed to be running. 363 */ 364 if (flk_get_lockmgr_status() != FLK_LOCKMGR_UP) { 365 mutex_exit(&flock_lock); 366 error = ENOLCK; 367 goto done; 368 } 369 mutex_exit(&flock_lock); 370 } else { /* booted as a cluster */ 371 nlmid = GETNLMID(lock_request->l_flock.l_sysid); 372 ASSERT(nlmid <= nlm_status_size && nlmid >= 0); 373 374 mutex_enter(&nlm_reg_lock); 375 /* 376 * If the NLM registry does not know about this 377 * NLM server making the request, add its nlmid 378 * to the registry. 379 */ 380 if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status, 381 nlmid)) { 382 FLK_REGISTRY_ADD_NLMID(nlm_reg_status, nlmid); 383 } else if (!FLK_REGISTRY_IS_NLM_UP(nlm_reg_status, 384 nlmid)) { 385 /* 386 * If the NLM server is already known (has made 387 * previous lock requests) and its state is 388 * not NLM_UP (means that NLM server is 389 * shutting down), then bail out with an 390 * error to deny the lock request. 391 */ 392 mutex_exit(&nlm_reg_lock); 393 error = ENOLCK; 394 goto done; 395 } 396 mutex_exit(&nlm_reg_lock); 397 } 398 } 399 400 /* Now get the lock graph for a particular vnode */ 401 gp = flk_get_lock_graph(vp, FLK_INIT_GRAPH); 402 403 /* 404 * We drop rwlock here otherwise this might end up causing a 405 * deadlock if this IOLOCK sleeps. (bugid # 1183392). 406 */ 407 408 if (IS_IO_LOCK(lock_request)) { 409 VOP_RWUNLOCK(vp, 410 (lock_request->l_type == F_RDLCK) ? 411 V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL); 412 } 413 mutex_enter(&gp->gp_mutex); 414 415 lock_request->l_state |= REFERENCED_LOCK; 416 lock_request->l_graph = gp; 417 418 switch (lock_request->l_type) { 419 case F_RDLCK: 420 case F_WRLCK: 421 if (IS_QUERY_LOCK(lock_request)) { 422 flk_get_first_blocking_lock(lock_request); 423 (*lckdat) = lock_request->l_flock; 424 break; 425 } 426 427 /* process the request now */ 428 429 error = flk_process_request(lock_request); 430 break; 431 432 case F_UNLCK: 433 /* unlock request will not block so execute it immediately */ 434 435 if (IS_LOCKMGR(lock_request) && 436 flk_canceled(lock_request)) { 437 error = 0; 438 } else { 439 error = flk_execute_request(lock_request); 440 } 441 break; 442 443 case F_UNLKSYS: 444 /* 445 * Recovery mechanism to release lock manager locks when 446 * NFS client crashes and restart. NFS server will clear 447 * old locks and grant new locks. 448 */ 449 450 if (lock_request->l_flock.l_sysid == 0) { 451 mutex_exit(&gp->gp_mutex); 452 return (EINVAL); 453 } 454 if (secpolicy_nfs(CRED()) != 0) { 455 mutex_exit(&gp->gp_mutex); 456 return (EPERM); 457 } 458 flk_delete_locks_by_sysid(lock_request); 459 lock_request->l_state &= ~REFERENCED_LOCK; 460 flk_set_state(lock_request, FLK_DEAD_STATE); 461 flk_free_lock(lock_request); 462 mutex_exit(&gp->gp_mutex); 463 return (0); 464 465 default: 466 error = EINVAL; 467 break; 468 } 469 470 /* Clustering: For blocked PXFS locks, return */ 471 if (error == PXFS_LOCK_BLOCKED) { 472 lock_request->l_state &= ~REFERENCED_LOCK; 473 mutex_exit(&gp->gp_mutex); 474 return (error); 475 } 476 477 /* 478 * Now that we have seen the status of locks in the system for 479 * this vnode we acquire the rwlock if it is an IO_LOCK. 480 */ 481 482 if (IS_IO_LOCK(lock_request)) { 483 (void) VOP_RWLOCK(vp, 484 (lock_request->l_type == F_RDLCK) ? 485 V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL); 486 if (!error) { 487 lckdat->l_type = F_UNLCK; 488 489 /* 490 * This wake up is needed otherwise 491 * if IO_LOCK has slept the dependents on this 492 * will not be woken up at all. (bugid # 1185482). 493 */ 494 495 flk_wakeup(lock_request, 1); 496 flk_set_state(lock_request, FLK_DEAD_STATE); 497 flk_free_lock(lock_request); 498 } 499 /* 500 * else if error had occurred either flk_process_request() 501 * has returned EDEADLK in which case there will be no 502 * dependents for this lock or EINTR from flk_wait_execute_ 503 * request() in which case flk_cancel_sleeping_lock() 504 * would have been done. same is true with EBADF. 505 */ 506 } 507 508 if (lock_request == &stack_lock_request) { 509 flk_set_state(lock_request, FLK_DEAD_STATE); 510 } else { 511 lock_request->l_state &= ~REFERENCED_LOCK; 512 if ((error != 0) || IS_DELETED(lock_request)) { 513 flk_set_state(lock_request, FLK_DEAD_STATE); 514 flk_free_lock(lock_request); 515 } 516 } 517 518 mutex_exit(&gp->gp_mutex); 519 return (error); 520 521 done: 522 flk_set_state(lock_request, FLK_DEAD_STATE); 523 if (lock_request != &stack_lock_request) 524 flk_free_lock(lock_request); 525 return (error); 526 } 527 528 /* 529 * Invoke the callbacks in the given list. If before sleeping, invoke in 530 * list order. If after sleeping, invoke in reverse order. 531 * 532 * CPR (suspend/resume) support: if one of the callbacks returns a 533 * callb_cpr_t, return it. This will be used to make the thread CPR-safe 534 * while it is sleeping. There should be at most one callb_cpr_t for the 535 * thread. 536 * XXX This is unnecessarily complicated. The CPR information should just 537 * get passed in directly through VOP_FRLOCK and reclock, rather than 538 * sneaking it in via a callback. 539 */ 540 541 callb_cpr_t * 542 flk_invoke_callbacks(flk_callback_t *cblist, flk_cb_when_t when) 543 { 544 callb_cpr_t *cpr_callbackp = NULL; 545 callb_cpr_t *one_result; 546 flk_callback_t *cb; 547 548 if (cblist == NULL) 549 return (NULL); 550 551 if (when == FLK_BEFORE_SLEEP) { 552 cb = cblist; 553 do { 554 one_result = (*cb->cb_callback)(when, cb->cb_data); 555 if (one_result != NULL) { 556 ASSERT(cpr_callbackp == NULL); 557 cpr_callbackp = one_result; 558 } 559 cb = cb->cb_next; 560 } while (cb != cblist); 561 } else { 562 cb = cblist->cb_prev; 563 do { 564 one_result = (*cb->cb_callback)(when, cb->cb_data); 565 if (one_result != NULL) { 566 cpr_callbackp = one_result; 567 } 568 cb = cb->cb_prev; 569 } while (cb != cblist->cb_prev); 570 } 571 572 return (cpr_callbackp); 573 } 574 575 /* 576 * Initialize a flk_callback_t to hold the given callback. 577 */ 578 579 void 580 flk_init_callback(flk_callback_t *flk_cb, 581 callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), void *cbdata) 582 { 583 flk_cb->cb_next = flk_cb; 584 flk_cb->cb_prev = flk_cb; 585 flk_cb->cb_callback = cb_fcn; 586 flk_cb->cb_data = cbdata; 587 } 588 589 /* 590 * Initialize an flk_callback_t and then link it into the head of an 591 * existing list (which may be NULL). 592 */ 593 594 void 595 flk_add_callback(flk_callback_t *newcb, 596 callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), 597 void *cbdata, flk_callback_t *cblist) 598 { 599 flk_init_callback(newcb, cb_fcn, cbdata); 600 601 if (cblist == NULL) 602 return; 603 604 newcb->cb_prev = cblist->cb_prev; 605 newcb->cb_next = cblist; 606 cblist->cb_prev->cb_next = newcb; 607 cblist->cb_prev = newcb; 608 } 609 610 /* 611 * Remove the callback from a list. 612 */ 613 614 void 615 flk_del_callback(flk_callback_t *flk_cb) 616 { 617 flk_cb->cb_next->cb_prev = flk_cb->cb_prev; 618 flk_cb->cb_prev->cb_next = flk_cb->cb_next; 619 620 flk_cb->cb_prev = flk_cb; 621 flk_cb->cb_next = flk_cb; 622 } 623 624 /* 625 * Initialize the flk_edge_cache data structure and create the 626 * nlm_reg_status array. 627 */ 628 629 void 630 flk_init(void) 631 { 632 uint_t i; 633 634 flk_edge_cache = kmem_cache_create("flk_edges", 635 sizeof (struct edge), 0, NULL, NULL, NULL, NULL, NULL, 0); 636 if (flk_edge_cache == NULL) { 637 cmn_err(CE_PANIC, "Couldn't create flk_edge_cache\n"); 638 } 639 /* 640 * Create the NLM registry object. 641 */ 642 643 if (cluster_bootflags & CLUSTER_BOOTED) { 644 /* 645 * This routine tells you the maximum node id that will be used 646 * in the cluster. This number will be the size of the nlm 647 * registry status array. We add 1 because we will be using 648 * all entries indexed from 0 to maxnodeid; e.g., from 0 649 * to 64, for a total of 65 entries. 650 */ 651 nlm_status_size = clconf_maximum_nodeid() + 1; 652 } else { 653 nlm_status_size = 0; 654 } 655 656 if (nlm_status_size != 0) { /* booted as a cluster */ 657 nlm_reg_status = (flk_nlm_status_t *) 658 kmem_alloc(sizeof (flk_nlm_status_t) * nlm_status_size, 659 KM_SLEEP); 660 661 /* initialize all NLM states in array to NLM_UNKNOWN */ 662 for (i = 0; i < nlm_status_size; i++) { 663 nlm_reg_status[i] = FLK_NLM_UNKNOWN; 664 } 665 } 666 } 667 668 /* 669 * Zone constructor/destructor callbacks to be executed when a zone is 670 * created/destroyed. 671 */ 672 /* ARGSUSED */ 673 void * 674 flk_zone_init(zoneid_t zoneid) 675 { 676 struct flock_globals *fg; 677 uint_t i; 678 679 fg = kmem_alloc(sizeof (*fg), KM_SLEEP); 680 fg->flk_lockmgr_status = FLK_LOCKMGR_UP; 681 for (i = 0; i < HASH_SIZE; i++) 682 fg->lockmgr_status[i] = FLK_LOCKMGR_UP; 683 return (fg); 684 } 685 686 /* ARGSUSED */ 687 void 688 flk_zone_fini(zoneid_t zoneid, void *data) 689 { 690 struct flock_globals *fg = data; 691 692 kmem_free(fg, sizeof (*fg)); 693 } 694 695 /* 696 * Get a lock_descriptor structure with initialization of edge lists. 697 */ 698 699 static lock_descriptor_t * 700 flk_get_lock(void) 701 { 702 lock_descriptor_t *l; 703 704 l = kmem_zalloc(sizeof (lock_descriptor_t), KM_SLEEP); 705 706 cv_init(&l->l_cv, NULL, CV_DRIVER, NULL); 707 l->l_edge.edge_in_next = &l->l_edge; 708 l->l_edge.edge_in_prev = &l->l_edge; 709 l->l_edge.edge_adj_next = &l->l_edge; 710 l->l_edge.edge_adj_prev = &l->l_edge; 711 l->pvertex = -1; 712 l->l_status = FLK_INITIAL_STATE; 713 flk_lock_allocs++; 714 return (l); 715 } 716 717 /* 718 * Free a lock_descriptor structure. Just sets the DELETED_LOCK flag 719 * when some thread has a reference to it as in reclock(). 720 */ 721 722 void 723 flk_free_lock(lock_descriptor_t *lock) 724 { 725 ASSERT(IS_DEAD(lock)); 726 if (IS_REFERENCED(lock)) { 727 lock->l_state |= DELETED_LOCK; 728 return; 729 } 730 flk_lock_frees++; 731 kmem_free((void *)lock, sizeof (lock_descriptor_t)); 732 } 733 734 void 735 flk_set_state(lock_descriptor_t *lock, int new_state) 736 { 737 /* 738 * Locks in the sleeping list may be woken up in a number of ways, 739 * and more than once. If a sleeping lock is signaled awake more 740 * than once, then it may or may not change state depending on its 741 * current state. 742 * Also note that NLM locks that are sleeping could be moved to an 743 * interrupted state more than once if the unlock request is 744 * retransmitted by the NLM client - the second time around, this is 745 * just a nop. 746 * The ordering of being signaled awake is: 747 * INTERRUPTED_STATE > CANCELLED_STATE > GRANTED_STATE. 748 * The checks below implement this ordering. 749 */ 750 if (IS_INTERRUPTED(lock)) { 751 if ((new_state == FLK_CANCELLED_STATE) || 752 (new_state == FLK_GRANTED_STATE) || 753 (new_state == FLK_INTERRUPTED_STATE)) { 754 return; 755 } 756 } 757 if (IS_CANCELLED(lock)) { 758 if ((new_state == FLK_GRANTED_STATE) || 759 (new_state == FLK_CANCELLED_STATE)) { 760 return; 761 } 762 } 763 CHECK_LOCK_TRANSITION(lock->l_status, new_state); 764 if (IS_PXFS(lock)) { 765 cl_flk_state_transition_notify(lock, lock->l_status, new_state); 766 } 767 lock->l_status = new_state; 768 } 769 770 /* 771 * Routine that checks whether there are any blocking locks in the system. 772 * 773 * The policy followed is if a write lock is sleeping we don't allow read 774 * locks before this write lock even though there may not be any active 775 * locks corresponding to the read locks' region. 776 * 777 * flk_add_edge() function adds an edge between l1 and l2 iff there 778 * is no path between l1 and l2. This is done to have a "minimum 779 * storage representation" of the dependency graph. 780 * 781 * Another property of the graph is since only the new request throws 782 * edges to the existing locks in the graph, the graph is always topologically 783 * ordered. 784 */ 785 786 static int 787 flk_process_request(lock_descriptor_t *request) 788 { 789 graph_t *gp = request->l_graph; 790 lock_descriptor_t *lock; 791 int request_blocked_by_active = 0; 792 int request_blocked_by_granted = 0; 793 int request_blocked_by_sleeping = 0; 794 vnode_t *vp = request->l_vnode; 795 int error = 0; 796 int request_will_wait = 0; 797 int found_covering_lock = 0; 798 lock_descriptor_t *covered_by = NULL; 799 800 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 801 request_will_wait = IS_WILLING_TO_SLEEP(request); 802 803 /* 804 * check active locks 805 */ 806 807 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 808 809 810 if (lock) { 811 do { 812 if (BLOCKS(lock, request)) { 813 if (!request_will_wait) 814 return (EAGAIN); 815 request_blocked_by_active = 1; 816 break; 817 } 818 /* 819 * Grant lock if it is for the same owner holding active 820 * lock that covers the request. 821 */ 822 823 if (SAME_OWNER(lock, request) && 824 COVERS(lock, request) && 825 (request->l_type == F_RDLCK)) 826 return (flk_execute_request(request)); 827 lock = lock->l_next; 828 } while (lock->l_vnode == vp); 829 } 830 831 if (!request_blocked_by_active) { 832 lock_descriptor_t *lk[1]; 833 lock_descriptor_t *first_glock = NULL; 834 /* 835 * Shall we grant this?! NO!! 836 * What about those locks that were just granted and still 837 * in sleep queue. Those threads are woken up and so locks 838 * are almost active. 839 */ 840 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 841 if (lock) { 842 do { 843 if (BLOCKS(lock, request)) { 844 if (IS_GRANTED(lock)) { 845 request_blocked_by_granted = 1; 846 } else { 847 request_blocked_by_sleeping = 1; 848 } 849 } 850 851 lock = lock->l_next; 852 } while ((lock->l_vnode == vp)); 853 first_glock = lock->l_prev; 854 ASSERT(first_glock->l_vnode == vp); 855 } 856 857 if (request_blocked_by_granted) 858 goto block; 859 860 if (!request_blocked_by_sleeping) { 861 /* 862 * If the request isn't going to be blocked by a 863 * sleeping request, we know that it isn't going to 864 * be blocked; we can just execute the request -- 865 * without performing costly deadlock detection. 866 */ 867 ASSERT(!request_blocked_by_active); 868 return (flk_execute_request(request)); 869 } else if (request->l_type == F_RDLCK) { 870 /* 871 * If we have a sleeping writer in the requested 872 * lock's range, block. 873 */ 874 goto block; 875 } 876 877 lk[0] = request; 878 request->l_state |= RECOMPUTE_LOCK; 879 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 880 if (lock) { 881 do { 882 flk_recompute_dependencies(lock, lk, 1, 0); 883 lock = lock->l_next; 884 } while (lock->l_vnode == vp); 885 } 886 lock = first_glock; 887 if (lock) { 888 do { 889 if (IS_GRANTED(lock)) { 890 flk_recompute_dependencies(lock, lk, 1, 0); 891 } 892 lock = lock->l_prev; 893 } while ((lock->l_vnode == vp)); 894 } 895 request->l_state &= ~RECOMPUTE_LOCK; 896 if (!NO_DEPENDENTS(request) && flk_check_deadlock(request)) 897 return (EDEADLK); 898 return (flk_execute_request(request)); 899 } 900 901 block: 902 if (request_will_wait) 903 flk_graph_uncolor(gp); 904 905 /* check sleeping locks */ 906 907 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 908 909 /* 910 * If we find a sleeping write lock that is a superset of the 911 * region wanted by request we can be assured that by adding an 912 * edge to this write lock we have paths to all locks in the 913 * graph that blocks the request except in one case and that is why 914 * another check for SAME_OWNER in the loop below. The exception 915 * case is when this process that owns the sleeping write lock 'l1' 916 * has other locks l2, l3, l4 that are in the system and arrived 917 * before l1. l1 does not have path to these locks as they are from 918 * same process. We break when we find a second covering sleeping 919 * lock l5 owned by a process different from that owning l1, because 920 * there cannot be any of l2, l3, l4, etc., arrived before l5, and if 921 * it has l1 would have produced a deadlock already. 922 */ 923 924 if (lock) { 925 do { 926 if (BLOCKS(lock, request)) { 927 if (!request_will_wait) 928 return (EAGAIN); 929 if (COVERS(lock, request) && 930 lock->l_type == F_WRLCK) { 931 if (found_covering_lock && 932 !SAME_OWNER(lock, covered_by)) { 933 found_covering_lock++; 934 break; 935 } 936 found_covering_lock = 1; 937 covered_by = lock; 938 } 939 if (found_covering_lock && 940 !SAME_OWNER(lock, covered_by)) { 941 lock = lock->l_next; 942 continue; 943 } 944 if ((error = flk_add_edge(request, lock, 945 !found_covering_lock, 0))) 946 return (error); 947 } 948 lock = lock->l_next; 949 } while (lock->l_vnode == vp); 950 } 951 952 /* 953 * found_covering_lock == 2 iff at this point 'request' has paths 954 * to all locks that blocks 'request'. found_covering_lock == 1 iff at this 955 * point 'request' has paths to all locks that blocks 'request' whose owners 956 * are not same as the one that covers 'request' (covered_by above) and 957 * we can have locks whose owner is same as covered_by in the active list. 958 */ 959 960 if (request_blocked_by_active && found_covering_lock != 2) { 961 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 962 ASSERT(lock != NULL); 963 do { 964 if (BLOCKS(lock, request)) { 965 if (found_covering_lock && 966 !SAME_OWNER(lock, covered_by)) { 967 lock = lock->l_next; 968 continue; 969 } 970 if ((error = flk_add_edge(request, lock, 971 CHECK_CYCLE, 0))) 972 return (error); 973 } 974 lock = lock->l_next; 975 } while (lock->l_vnode == vp); 976 } 977 978 if (NOT_BLOCKED(request)) { 979 /* 980 * request not dependent on any other locks 981 * so execute this request 982 */ 983 return (flk_execute_request(request)); 984 } else { 985 /* 986 * check for deadlock 987 */ 988 if (flk_check_deadlock(request)) 989 return (EDEADLK); 990 /* 991 * this thread has to sleep 992 */ 993 return (flk_wait_execute_request(request)); 994 } 995 } 996 997 /* 998 * The actual execution of the request in the simple case is only to 999 * insert the 'request' in the list of active locks if it is not an 1000 * UNLOCK. 1001 * We have to consider the existing active locks' relation to 1002 * this 'request' if they are owned by same process. flk_relation() does 1003 * this job and sees to that the dependency graph information is maintained 1004 * properly. 1005 */ 1006 1007 int 1008 flk_execute_request(lock_descriptor_t *request) 1009 { 1010 graph_t *gp = request->l_graph; 1011 vnode_t *vp = request->l_vnode; 1012 lock_descriptor_t *lock, *lock1; 1013 int done_searching = 0; 1014 1015 CHECK_SLEEPING_LOCKS(gp); 1016 CHECK_ACTIVE_LOCKS(gp); 1017 1018 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1019 1020 flk_set_state(request, FLK_START_STATE); 1021 1022 ASSERT(NOT_BLOCKED(request)); 1023 1024 /* IO_LOCK requests are only to check status */ 1025 1026 if (IS_IO_LOCK(request)) 1027 return (0); 1028 1029 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 1030 1031 if (lock == NULL && request->l_type == F_UNLCK) 1032 return (0); 1033 if (lock == NULL) { 1034 flk_insert_active_lock(request); 1035 return (0); 1036 } 1037 1038 do { 1039 lock1 = lock->l_next; 1040 if (SAME_OWNER(request, lock)) { 1041 done_searching = flk_relation(lock, request); 1042 } 1043 lock = lock1; 1044 } while (lock->l_vnode == vp && !done_searching); 1045 1046 /* 1047 * insert in active queue 1048 */ 1049 1050 if (request->l_type != F_UNLCK) 1051 flk_insert_active_lock(request); 1052 1053 return (0); 1054 } 1055 1056 /* 1057 * 'request' is blocked by some one therefore we put it into sleep queue. 1058 */ 1059 static int 1060 flk_wait_execute_request(lock_descriptor_t *request) 1061 { 1062 graph_t *gp = request->l_graph; 1063 callb_cpr_t *cprp; /* CPR info from callback */ 1064 struct flock_globals *fg; 1065 int index; 1066 1067 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1068 ASSERT(IS_WILLING_TO_SLEEP(request)); 1069 1070 flk_insert_sleeping_lock(request); 1071 1072 if (IS_LOCKMGR(request)) { 1073 index = HASH_INDEX(request->l_vnode); 1074 fg = flk_get_globals(); 1075 1076 if (nlm_status_size == 0) { /* not booted as a cluster */ 1077 if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP) { 1078 flk_cancel_sleeping_lock(request, 1); 1079 return (ENOLCK); 1080 } 1081 } else { /* booted as a cluster */ 1082 /* 1083 * If the request is an NLM server lock request, 1084 * and the NLM state of the lock request is not 1085 * NLM_UP (because the NLM server is shutting 1086 * down), then cancel the sleeping lock and 1087 * return error ENOLCK that will encourage the 1088 * client to retransmit. 1089 */ 1090 if (!IS_NLM_UP(request)) { 1091 flk_cancel_sleeping_lock(request, 1); 1092 return (ENOLCK); 1093 } 1094 } 1095 } 1096 1097 /* Clustering: For blocking PXFS locks, return */ 1098 if (IS_PXFS(request)) { 1099 /* 1100 * PXFS locks sleep on the client side. 1101 * The callback argument is used to wake up the sleeper 1102 * when the lock is granted. 1103 * We return -1 (rather than an errno value) to indicate 1104 * the client side should sleep 1105 */ 1106 return (PXFS_LOCK_BLOCKED); 1107 } 1108 1109 if (request->l_callbacks != NULL) { 1110 /* 1111 * To make sure the shutdown code works correctly, either 1112 * the callback must happen after putting the lock on the 1113 * sleep list, or we must check the shutdown status after 1114 * returning from the callback (and before sleeping). At 1115 * least for now, we'll use the first option. If a 1116 * shutdown or signal or whatever happened while the graph 1117 * mutex was dropped, that will be detected by 1118 * wait_for_lock(). 1119 */ 1120 mutex_exit(&gp->gp_mutex); 1121 1122 cprp = flk_invoke_callbacks(request->l_callbacks, 1123 FLK_BEFORE_SLEEP); 1124 1125 mutex_enter(&gp->gp_mutex); 1126 1127 if (cprp == NULL) { 1128 wait_for_lock(request); 1129 } else { 1130 mutex_enter(cprp->cc_lockp); 1131 CALLB_CPR_SAFE_BEGIN(cprp); 1132 mutex_exit(cprp->cc_lockp); 1133 wait_for_lock(request); 1134 mutex_enter(cprp->cc_lockp); 1135 CALLB_CPR_SAFE_END(cprp, cprp->cc_lockp); 1136 mutex_exit(cprp->cc_lockp); 1137 } 1138 1139 mutex_exit(&gp->gp_mutex); 1140 (void) flk_invoke_callbacks(request->l_callbacks, 1141 FLK_AFTER_SLEEP); 1142 mutex_enter(&gp->gp_mutex); 1143 } else { 1144 wait_for_lock(request); 1145 } 1146 1147 if (IS_LOCKMGR(request)) { 1148 /* 1149 * If the lock manager is shutting down, return an 1150 * error that will encourage the client to retransmit. 1151 */ 1152 if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP && 1153 !IS_GRANTED(request)) { 1154 flk_cancel_sleeping_lock(request, 1); 1155 return (ENOLCK); 1156 } 1157 } 1158 1159 if (IS_INTERRUPTED(request)) { 1160 /* we got a signal, or act like we did */ 1161 flk_cancel_sleeping_lock(request, 1); 1162 return (EINTR); 1163 } 1164 1165 /* Cancelled if some other thread has closed the file */ 1166 1167 if (IS_CANCELLED(request)) { 1168 flk_cancel_sleeping_lock(request, 1); 1169 return (EBADF); 1170 } 1171 1172 request->l_state &= ~GRANTED_LOCK; 1173 REMOVE_SLEEP_QUEUE(request); 1174 return (flk_execute_request(request)); 1175 } 1176 1177 /* 1178 * This routine adds an edge between from and to because from depends 1179 * to. If asked to check for deadlock it checks whether there are any 1180 * reachable locks from "from_lock" that is owned by the same process 1181 * as "from_lock". 1182 * NOTE: It is the caller's responsibility to make sure that the color 1183 * of the graph is consistent between the calls to flk_add_edge as done 1184 * in flk_process_request. This routine does not color and check for 1185 * deadlock explicitly. 1186 */ 1187 1188 static int 1189 flk_add_edge(lock_descriptor_t *from_lock, lock_descriptor_t *to_lock, 1190 int check_cycle, int update_graph) 1191 { 1192 edge_t *edge; 1193 edge_t *ep; 1194 lock_descriptor_t *vertex; 1195 lock_descriptor_t *vertex_stack; 1196 1197 STACK_INIT(vertex_stack); 1198 1199 /* 1200 * if to vertex already has mark_color just return 1201 * don't add an edge as it is reachable from from vertex 1202 * before itself. 1203 */ 1204 1205 if (COLORED(to_lock)) 1206 return (0); 1207 1208 edge = flk_get_edge(); 1209 1210 /* 1211 * set the from and to vertex 1212 */ 1213 1214 edge->from_vertex = from_lock; 1215 edge->to_vertex = to_lock; 1216 1217 /* 1218 * put in adjacency list of from vertex 1219 */ 1220 1221 from_lock->l_edge.edge_adj_next->edge_adj_prev = edge; 1222 edge->edge_adj_next = from_lock->l_edge.edge_adj_next; 1223 edge->edge_adj_prev = &from_lock->l_edge; 1224 from_lock->l_edge.edge_adj_next = edge; 1225 1226 /* 1227 * put in in list of to vertex 1228 */ 1229 1230 to_lock->l_edge.edge_in_next->edge_in_prev = edge; 1231 edge->edge_in_next = to_lock->l_edge.edge_in_next; 1232 to_lock->l_edge.edge_in_next = edge; 1233 edge->edge_in_prev = &to_lock->l_edge; 1234 1235 1236 if (update_graph) { 1237 flk_update_proc_graph(edge, 0); 1238 return (0); 1239 } 1240 if (!check_cycle) { 1241 return (0); 1242 } 1243 1244 STACK_PUSH(vertex_stack, from_lock, l_stack); 1245 1246 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 1247 1248 STACK_POP(vertex_stack, l_stack); 1249 1250 for (ep = FIRST_ADJ(vertex); 1251 ep != HEAD(vertex); 1252 ep = NEXT_ADJ(ep)) { 1253 if (COLORED(ep->to_vertex)) 1254 continue; 1255 COLOR(ep->to_vertex); 1256 if (SAME_OWNER(ep->to_vertex, from_lock)) 1257 goto dead_lock; 1258 STACK_PUSH(vertex_stack, ep->to_vertex, l_stack); 1259 } 1260 } 1261 return (0); 1262 1263 dead_lock: 1264 1265 /* 1266 * remove all edges 1267 */ 1268 1269 ep = FIRST_ADJ(from_lock); 1270 1271 while (ep != HEAD(from_lock)) { 1272 IN_LIST_REMOVE(ep); 1273 from_lock->l_sedge = NEXT_ADJ(ep); 1274 ADJ_LIST_REMOVE(ep); 1275 flk_free_edge(ep); 1276 ep = from_lock->l_sedge; 1277 } 1278 return (EDEADLK); 1279 } 1280 1281 /* 1282 * Get an edge structure for representing the dependency between two locks. 1283 */ 1284 1285 static edge_t * 1286 flk_get_edge() 1287 { 1288 edge_t *ep; 1289 1290 ASSERT(flk_edge_cache != NULL); 1291 1292 ep = kmem_cache_alloc(flk_edge_cache, KM_SLEEP); 1293 edge_allocs++; 1294 return (ep); 1295 } 1296 1297 /* 1298 * Free the edge structure. 1299 */ 1300 1301 static void 1302 flk_free_edge(edge_t *ep) 1303 { 1304 edge_frees++; 1305 kmem_cache_free(flk_edge_cache, (void *)ep); 1306 } 1307 1308 /* 1309 * Check the relationship of request with lock and perform the 1310 * recomputation of dependencies, break lock if required, and return 1311 * 1 if request cannot have any more relationship with the next 1312 * active locks. 1313 * The 'lock' and 'request' are compared and in case of overlap we 1314 * delete the 'lock' and form new locks to represent the non-overlapped 1315 * portion of original 'lock'. This function has side effects such as 1316 * 'lock' will be freed, new locks will be added to the active list. 1317 */ 1318 1319 static int 1320 flk_relation(lock_descriptor_t *lock, lock_descriptor_t *request) 1321 { 1322 int lock_effect; 1323 lock_descriptor_t *lock1, *lock2; 1324 lock_descriptor_t *topology[3]; 1325 int nvertex = 0; 1326 int i; 1327 edge_t *ep; 1328 graph_t *gp = (lock->l_graph); 1329 1330 1331 CHECK_SLEEPING_LOCKS(gp); 1332 CHECK_ACTIVE_LOCKS(gp); 1333 1334 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1335 1336 topology[0] = topology[1] = topology[2] = NULL; 1337 1338 if (request->l_type == F_UNLCK) 1339 lock_effect = FLK_UNLOCK; 1340 else if (request->l_type == F_RDLCK && 1341 lock->l_type == F_WRLCK) 1342 lock_effect = FLK_DOWNGRADE; 1343 else if (request->l_type == F_WRLCK && 1344 lock->l_type == F_RDLCK) 1345 lock_effect = FLK_UPGRADE; 1346 else 1347 lock_effect = FLK_STAY_SAME; 1348 1349 if (lock->l_end < request->l_start) { 1350 if (lock->l_end == request->l_start - 1 && 1351 lock_effect == FLK_STAY_SAME) { 1352 topology[0] = request; 1353 request->l_start = lock->l_start; 1354 nvertex = 1; 1355 goto recompute; 1356 } else { 1357 return (0); 1358 } 1359 } 1360 1361 if (lock->l_start > request->l_end) { 1362 if (request->l_end == lock->l_start - 1 && 1363 lock_effect == FLK_STAY_SAME) { 1364 topology[0] = request; 1365 request->l_end = lock->l_end; 1366 nvertex = 1; 1367 goto recompute; 1368 } else { 1369 return (1); 1370 } 1371 } 1372 1373 if (request->l_end < lock->l_end) { 1374 if (request->l_start > lock->l_start) { 1375 if (lock_effect == FLK_STAY_SAME) { 1376 request->l_start = lock->l_start; 1377 request->l_end = lock->l_end; 1378 topology[0] = request; 1379 nvertex = 1; 1380 } else { 1381 lock1 = flk_get_lock(); 1382 lock2 = flk_get_lock(); 1383 COPY(lock1, lock); 1384 COPY(lock2, lock); 1385 lock1->l_start = lock->l_start; 1386 lock1->l_end = request->l_start - 1; 1387 lock2->l_start = request->l_end + 1; 1388 lock2->l_end = lock->l_end; 1389 topology[0] = lock1; 1390 topology[1] = lock2; 1391 topology[2] = request; 1392 nvertex = 3; 1393 } 1394 } else if (request->l_start < lock->l_start) { 1395 if (lock_effect == FLK_STAY_SAME) { 1396 request->l_end = lock->l_end; 1397 topology[0] = request; 1398 nvertex = 1; 1399 } else { 1400 lock1 = flk_get_lock(); 1401 COPY(lock1, lock); 1402 lock1->l_start = request->l_end + 1; 1403 topology[0] = lock1; 1404 topology[1] = request; 1405 nvertex = 2; 1406 } 1407 } else { 1408 if (lock_effect == FLK_STAY_SAME) { 1409 request->l_start = lock->l_start; 1410 request->l_end = lock->l_end; 1411 topology[0] = request; 1412 nvertex = 1; 1413 } else { 1414 lock1 = flk_get_lock(); 1415 COPY(lock1, lock); 1416 lock1->l_start = request->l_end + 1; 1417 topology[0] = lock1; 1418 topology[1] = request; 1419 nvertex = 2; 1420 } 1421 } 1422 } else if (request->l_end > lock->l_end) { 1423 if (request->l_start > lock->l_start) { 1424 if (lock_effect == FLK_STAY_SAME) { 1425 request->l_start = lock->l_start; 1426 topology[0] = request; 1427 nvertex = 1; 1428 } else { 1429 lock1 = flk_get_lock(); 1430 COPY(lock1, lock); 1431 lock1->l_end = request->l_start - 1; 1432 topology[0] = lock1; 1433 topology[1] = request; 1434 nvertex = 2; 1435 } 1436 } else if (request->l_start < lock->l_start) { 1437 topology[0] = request; 1438 nvertex = 1; 1439 } else { 1440 topology[0] = request; 1441 nvertex = 1; 1442 } 1443 } else { 1444 if (request->l_start > lock->l_start) { 1445 if (lock_effect == FLK_STAY_SAME) { 1446 request->l_start = lock->l_start; 1447 topology[0] = request; 1448 nvertex = 1; 1449 } else { 1450 lock1 = flk_get_lock(); 1451 COPY(lock1, lock); 1452 lock1->l_end = request->l_start - 1; 1453 topology[0] = lock1; 1454 topology[1] = request; 1455 nvertex = 2; 1456 } 1457 } else if (request->l_start < lock->l_start) { 1458 topology[0] = request; 1459 nvertex = 1; 1460 } else { 1461 if (lock_effect != FLK_UNLOCK) { 1462 topology[0] = request; 1463 nvertex = 1; 1464 } else { 1465 flk_delete_active_lock(lock, 0); 1466 flk_wakeup(lock, 1); 1467 flk_free_lock(lock); 1468 CHECK_SLEEPING_LOCKS(gp); 1469 CHECK_ACTIVE_LOCKS(gp); 1470 return (1); 1471 } 1472 } 1473 } 1474 1475 recompute: 1476 1477 /* 1478 * For unlock we don't send the 'request' to for recomputing 1479 * dependencies because no lock will add an edge to this. 1480 */ 1481 1482 if (lock_effect == FLK_UNLOCK) { 1483 topology[nvertex-1] = NULL; 1484 nvertex--; 1485 } 1486 for (i = 0; i < nvertex; i++) { 1487 topology[i]->l_state |= RECOMPUTE_LOCK; 1488 topology[i]->l_color = NO_COLOR; 1489 } 1490 1491 ASSERT(FIRST_ADJ(lock) == HEAD(lock)); 1492 1493 /* 1494 * we remove the adjacent edges for all vertices' to this vertex 1495 * 'lock'. 1496 */ 1497 1498 ep = FIRST_IN(lock); 1499 while (ep != HEAD(lock)) { 1500 ADJ_LIST_REMOVE(ep); 1501 ep = NEXT_IN(ep); 1502 } 1503 1504 flk_delete_active_lock(lock, 0); 1505 1506 /* We are ready for recomputing the dependencies now */ 1507 1508 flk_recompute_dependencies(lock, topology, nvertex, 1); 1509 1510 for (i = 0; i < nvertex; i++) { 1511 topology[i]->l_state &= ~RECOMPUTE_LOCK; 1512 topology[i]->l_color = NO_COLOR; 1513 } 1514 1515 1516 if (lock_effect == FLK_UNLOCK) { 1517 nvertex++; 1518 } 1519 for (i = 0; i < nvertex - 1; i++) { 1520 flk_insert_active_lock(topology[i]); 1521 } 1522 1523 1524 if (lock_effect == FLK_DOWNGRADE || lock_effect == FLK_UNLOCK) { 1525 flk_wakeup(lock, 0); 1526 } else { 1527 ep = FIRST_IN(lock); 1528 while (ep != HEAD(lock)) { 1529 lock->l_sedge = NEXT_IN(ep); 1530 IN_LIST_REMOVE(ep); 1531 flk_update_proc_graph(ep, 1); 1532 flk_free_edge(ep); 1533 ep = lock->l_sedge; 1534 } 1535 } 1536 flk_free_lock(lock); 1537 1538 CHECK_SLEEPING_LOCKS(gp); 1539 CHECK_ACTIVE_LOCKS(gp); 1540 return (0); 1541 } 1542 1543 /* 1544 * Insert a lock into the active queue. 1545 */ 1546 1547 static void 1548 flk_insert_active_lock(lock_descriptor_t *new_lock) 1549 { 1550 graph_t *gp = new_lock->l_graph; 1551 vnode_t *vp = new_lock->l_vnode; 1552 lock_descriptor_t *first_lock, *lock; 1553 1554 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1555 1556 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 1557 first_lock = lock; 1558 1559 if (first_lock != NULL) { 1560 for (; (lock->l_vnode == vp && 1561 lock->l_start < new_lock->l_start); lock = lock->l_next) 1562 ; 1563 } else { 1564 lock = ACTIVE_HEAD(gp); 1565 } 1566 1567 lock->l_prev->l_next = new_lock; 1568 new_lock->l_next = lock; 1569 new_lock->l_prev = lock->l_prev; 1570 lock->l_prev = new_lock; 1571 1572 if (first_lock == NULL || (new_lock->l_start <= first_lock->l_start)) { 1573 vp->v_filocks = (struct filock *)new_lock; 1574 } 1575 flk_set_state(new_lock, FLK_ACTIVE_STATE); 1576 new_lock->l_state |= ACTIVE_LOCK; 1577 1578 CHECK_ACTIVE_LOCKS(gp); 1579 CHECK_SLEEPING_LOCKS(gp); 1580 } 1581 1582 /* 1583 * Delete the active lock : Performs two functions depending on the 1584 * value of second parameter. One is to remove from the active lists 1585 * only and other is to both remove and free the lock. 1586 */ 1587 1588 static void 1589 flk_delete_active_lock(lock_descriptor_t *lock, int free_lock) 1590 { 1591 vnode_t *vp = lock->l_vnode; 1592 graph_t *gp = lock->l_graph; 1593 1594 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1595 if (free_lock) 1596 ASSERT(NO_DEPENDENTS(lock)); 1597 ASSERT(NOT_BLOCKED(lock)); 1598 ASSERT(IS_ACTIVE(lock)); 1599 1600 ASSERT((vp->v_filocks != NULL)); 1601 1602 if (vp->v_filocks == (struct filock *)lock) { 1603 vp->v_filocks = (struct filock *) 1604 ((lock->l_next->l_vnode == vp) ? lock->l_next : 1605 NULL); 1606 } 1607 lock->l_next->l_prev = lock->l_prev; 1608 lock->l_prev->l_next = lock->l_next; 1609 lock->l_next = lock->l_prev = NULL; 1610 flk_set_state(lock, FLK_DEAD_STATE); 1611 lock->l_state &= ~ACTIVE_LOCK; 1612 1613 if (free_lock) 1614 flk_free_lock(lock); 1615 CHECK_ACTIVE_LOCKS(gp); 1616 CHECK_SLEEPING_LOCKS(gp); 1617 } 1618 1619 /* 1620 * Insert into the sleep queue. 1621 */ 1622 1623 static void 1624 flk_insert_sleeping_lock(lock_descriptor_t *request) 1625 { 1626 graph_t *gp = request->l_graph; 1627 vnode_t *vp = request->l_vnode; 1628 lock_descriptor_t *lock; 1629 1630 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1631 ASSERT(IS_INITIAL(request)); 1632 1633 for (lock = gp->sleeping_locks.l_next; (lock != &gp->sleeping_locks && 1634 lock->l_vnode < vp); lock = lock->l_next) 1635 ; 1636 1637 lock->l_prev->l_next = request; 1638 request->l_prev = lock->l_prev; 1639 lock->l_prev = request; 1640 request->l_next = lock; 1641 flk_set_state(request, FLK_SLEEPING_STATE); 1642 request->l_state |= SLEEPING_LOCK; 1643 } 1644 1645 /* 1646 * Cancelling a sleeping lock implies removing a vertex from the 1647 * dependency graph and therefore we should recompute the dependencies 1648 * of all vertices that have a path to this vertex, w.r.t. all 1649 * vertices reachable from this vertex. 1650 */ 1651 1652 void 1653 flk_cancel_sleeping_lock(lock_descriptor_t *request, int remove_from_queue) 1654 { 1655 graph_t *gp = request->l_graph; 1656 vnode_t *vp = request->l_vnode; 1657 lock_descriptor_t **topology = NULL; 1658 edge_t *ep; 1659 lock_descriptor_t *vertex, *lock; 1660 int nvertex = 0; 1661 int i; 1662 lock_descriptor_t *vertex_stack; 1663 1664 STACK_INIT(vertex_stack); 1665 1666 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1667 /* 1668 * count number of vertex pointers that has to be allocated 1669 * All vertices that are reachable from request. 1670 */ 1671 1672 STACK_PUSH(vertex_stack, request, l_stack); 1673 1674 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 1675 STACK_POP(vertex_stack, l_stack); 1676 for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex); 1677 ep = NEXT_ADJ(ep)) { 1678 if (IS_RECOMPUTE(ep->to_vertex)) 1679 continue; 1680 ep->to_vertex->l_state |= RECOMPUTE_LOCK; 1681 STACK_PUSH(vertex_stack, ep->to_vertex, l_stack); 1682 nvertex++; 1683 } 1684 } 1685 1686 /* 1687 * allocate memory for holding the vertex pointers 1688 */ 1689 1690 if (nvertex) { 1691 topology = kmem_zalloc(nvertex * sizeof (lock_descriptor_t *), 1692 KM_SLEEP); 1693 } 1694 1695 /* 1696 * one more pass to actually store the vertices in the 1697 * allocated array. 1698 * We first check sleeping locks and then active locks 1699 * so that topology array will be in a topological 1700 * order. 1701 */ 1702 1703 nvertex = 0; 1704 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 1705 1706 if (lock) { 1707 do { 1708 if (IS_RECOMPUTE(lock)) { 1709 lock->l_index = nvertex; 1710 topology[nvertex++] = lock; 1711 } 1712 lock->l_color = NO_COLOR; 1713 lock = lock->l_next; 1714 } while (lock->l_vnode == vp); 1715 } 1716 1717 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 1718 1719 if (lock) { 1720 do { 1721 if (IS_RECOMPUTE(lock)) { 1722 lock->l_index = nvertex; 1723 topology[nvertex++] = lock; 1724 } 1725 lock->l_color = NO_COLOR; 1726 lock = lock->l_next; 1727 } while (lock->l_vnode == vp); 1728 } 1729 1730 /* 1731 * remove in and out edges of request 1732 * They are freed after updating proc_graph below. 1733 */ 1734 1735 for (ep = FIRST_IN(request); ep != HEAD(request); ep = NEXT_IN(ep)) { 1736 ADJ_LIST_REMOVE(ep); 1737 } 1738 1739 1740 if (remove_from_queue) 1741 REMOVE_SLEEP_QUEUE(request); 1742 1743 /* we are ready to recompute */ 1744 1745 flk_recompute_dependencies(request, topology, nvertex, 1); 1746 1747 ep = FIRST_ADJ(request); 1748 while (ep != HEAD(request)) { 1749 IN_LIST_REMOVE(ep); 1750 request->l_sedge = NEXT_ADJ(ep); 1751 ADJ_LIST_REMOVE(ep); 1752 flk_update_proc_graph(ep, 1); 1753 flk_free_edge(ep); 1754 ep = request->l_sedge; 1755 } 1756 1757 1758 /* 1759 * unset the RECOMPUTE flag in those vertices 1760 */ 1761 1762 for (i = 0; i < nvertex; i++) { 1763 topology[i]->l_state &= ~RECOMPUTE_LOCK; 1764 } 1765 1766 /* 1767 * free the topology 1768 */ 1769 if (nvertex) 1770 kmem_free((void *)topology, 1771 (nvertex * sizeof (lock_descriptor_t *))); 1772 /* 1773 * Possibility of some locks unblocked now 1774 */ 1775 1776 flk_wakeup(request, 0); 1777 1778 /* 1779 * we expect to have a correctly recomputed graph now. 1780 */ 1781 flk_set_state(request, FLK_DEAD_STATE); 1782 flk_free_lock(request); 1783 CHECK_SLEEPING_LOCKS(gp); 1784 CHECK_ACTIVE_LOCKS(gp); 1785 1786 } 1787 1788 /* 1789 * Uncoloring the graph is simply to increment the mark value of the graph 1790 * And only when wrap round takes place will we color all vertices in 1791 * the graph explicitly. 1792 */ 1793 1794 static void 1795 flk_graph_uncolor(graph_t *gp) 1796 { 1797 lock_descriptor_t *lock; 1798 1799 if (gp->mark == UINT_MAX) { 1800 gp->mark = 1; 1801 for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp); 1802 lock = lock->l_next) 1803 lock->l_color = 0; 1804 1805 for (lock = SLEEPING_HEAD(gp)->l_next; lock != SLEEPING_HEAD(gp); 1806 lock = lock->l_next) 1807 lock->l_color = 0; 1808 } else { 1809 gp->mark++; 1810 } 1811 } 1812 1813 /* 1814 * Wake up locks that are blocked on the given lock. 1815 */ 1816 1817 static void 1818 flk_wakeup(lock_descriptor_t *lock, int adj_list_remove) 1819 { 1820 edge_t *ep; 1821 graph_t *gp = lock->l_graph; 1822 lock_descriptor_t *lck; 1823 1824 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1825 if (NO_DEPENDENTS(lock)) 1826 return; 1827 ep = FIRST_IN(lock); 1828 do { 1829 /* 1830 * delete the edge from the adjacency list 1831 * of from vertex. if no more adjacent edges 1832 * for this vertex wake this process. 1833 */ 1834 lck = ep->from_vertex; 1835 if (adj_list_remove) 1836 ADJ_LIST_REMOVE(ep); 1837 flk_update_proc_graph(ep, 1); 1838 if (NOT_BLOCKED(lck)) { 1839 GRANT_WAKEUP(lck); 1840 } 1841 lock->l_sedge = NEXT_IN(ep); 1842 IN_LIST_REMOVE(ep); 1843 flk_free_edge(ep); 1844 ep = lock->l_sedge; 1845 } while (ep != HEAD(lock)); 1846 ASSERT(NO_DEPENDENTS(lock)); 1847 } 1848 1849 /* 1850 * The dependents of request, is checked for its dependency against the 1851 * locks in topology (called topology because the array is and should be in 1852 * topological order for this algorithm, if not in topological order the 1853 * inner loop below might add more edges than necessary. Topological ordering 1854 * of vertices satisfies the property that all edges will be from left to 1855 * right i.e., topology[i] can have an edge to topology[j], iff i<j) 1856 * If lock l1 in the dependent set of request is dependent (blocked by) 1857 * on lock l2 in topology but does not have a path to it, we add an edge 1858 * in the inner loop below. 1859 * 1860 * We don't want to add an edge between l1 and l2 if there exists 1861 * already a path from l1 to l2, so care has to be taken for those vertices 1862 * that have two paths to 'request'. These vertices are referred to here 1863 * as barrier locks. 1864 * 1865 * The barriers has to be found (those vertex that originally had two paths 1866 * to request) because otherwise we may end up adding edges unnecessarily 1867 * to vertices in topology, and thus barrier vertices can have an edge 1868 * to a vertex in topology as well a path to it. 1869 */ 1870 1871 static void 1872 flk_recompute_dependencies(lock_descriptor_t *request, 1873 lock_descriptor_t **topology, int nvertex, int update_graph) 1874 { 1875 lock_descriptor_t *vertex, *lock; 1876 graph_t *gp = request->l_graph; 1877 int i, count; 1878 int barrier_found = 0; 1879 edge_t *ep; 1880 lock_descriptor_t *vertex_stack; 1881 1882 STACK_INIT(vertex_stack); 1883 1884 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1885 if (nvertex == 0) 1886 return; 1887 flk_graph_uncolor(request->l_graph); 1888 barrier_found = flk_find_barriers(request); 1889 request->l_state |= RECOMPUTE_DONE; 1890 1891 STACK_PUSH(vertex_stack, request, l_stack); 1892 request->l_sedge = FIRST_IN(request); 1893 1894 1895 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 1896 if (vertex->l_state & RECOMPUTE_DONE) { 1897 count = 0; 1898 goto next_in_edge; 1899 } 1900 if (IS_BARRIER(vertex)) { 1901 /* decrement the barrier count */ 1902 if (vertex->l_index) { 1903 vertex->l_index--; 1904 /* this guy will be pushed again anyway ? */ 1905 STACK_POP(vertex_stack, l_stack); 1906 if (vertex->l_index == 0) { 1907 /* 1908 * barrier is over we can recompute 1909 * dependencies for this lock in the 1910 * next stack pop 1911 */ 1912 vertex->l_state &= ~BARRIER_LOCK; 1913 } 1914 continue; 1915 } 1916 } 1917 vertex->l_state |= RECOMPUTE_DONE; 1918 flk_graph_uncolor(gp); 1919 count = flk_color_reachables(vertex); 1920 for (i = 0; i < nvertex; i++) { 1921 lock = topology[i]; 1922 if (COLORED(lock)) 1923 continue; 1924 if (BLOCKS(lock, vertex)) { 1925 (void) flk_add_edge(vertex, lock, 1926 NO_CHECK_CYCLE, update_graph); 1927 COLOR(lock); 1928 count++; 1929 count += flk_color_reachables(lock); 1930 } 1931 1932 } 1933 1934 next_in_edge: 1935 if (count == nvertex || 1936 vertex->l_sedge == HEAD(vertex)) { 1937 /* prune the tree below this */ 1938 STACK_POP(vertex_stack, l_stack); 1939 vertex->l_state &= ~RECOMPUTE_DONE; 1940 /* update the barrier locks below this! */ 1941 if (vertex->l_sedge != HEAD(vertex) && barrier_found) { 1942 flk_graph_uncolor(gp); 1943 flk_update_barriers(vertex); 1944 } 1945 continue; 1946 } 1947 1948 ep = vertex->l_sedge; 1949 lock = ep->from_vertex; 1950 STACK_PUSH(vertex_stack, lock, l_stack); 1951 lock->l_sedge = FIRST_IN(lock); 1952 vertex->l_sedge = NEXT_IN(ep); 1953 } 1954 1955 } 1956 1957 /* 1958 * Color all reachable vertices from vertex that belongs to topology (here 1959 * those that have RECOMPUTE_LOCK set in their state) and yet uncolored. 1960 * 1961 * Note: we need to use a different stack_link l_stack1 because this is 1962 * called from flk_recompute_dependencies() that already uses a stack with 1963 * l_stack as stack_link. 1964 */ 1965 1966 static int 1967 flk_color_reachables(lock_descriptor_t *vertex) 1968 { 1969 lock_descriptor_t *ver, *lock; 1970 int count; 1971 edge_t *ep; 1972 lock_descriptor_t *vertex_stack; 1973 1974 STACK_INIT(vertex_stack); 1975 1976 STACK_PUSH(vertex_stack, vertex, l_stack1); 1977 count = 0; 1978 while ((ver = STACK_TOP(vertex_stack)) != NULL) { 1979 1980 STACK_POP(vertex_stack, l_stack1); 1981 for (ep = FIRST_ADJ(ver); ep != HEAD(ver); 1982 ep = NEXT_ADJ(ep)) { 1983 lock = ep->to_vertex; 1984 if (COLORED(lock)) 1985 continue; 1986 COLOR(lock); 1987 if (IS_RECOMPUTE(lock)) 1988 count++; 1989 STACK_PUSH(vertex_stack, lock, l_stack1); 1990 } 1991 1992 } 1993 return (count); 1994 } 1995 1996 /* 1997 * Called from flk_recompute_dependencies() this routine decrements 1998 * the barrier count of barrier vertices that are reachable from lock. 1999 */ 2000 2001 static void 2002 flk_update_barriers(lock_descriptor_t *lock) 2003 { 2004 lock_descriptor_t *vertex, *lck; 2005 edge_t *ep; 2006 lock_descriptor_t *vertex_stack; 2007 2008 STACK_INIT(vertex_stack); 2009 2010 STACK_PUSH(vertex_stack, lock, l_stack1); 2011 2012 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 2013 STACK_POP(vertex_stack, l_stack1); 2014 for (ep = FIRST_IN(vertex); ep != HEAD(vertex); 2015 ep = NEXT_IN(ep)) { 2016 lck = ep->from_vertex; 2017 if (COLORED(lck)) { 2018 if (IS_BARRIER(lck)) { 2019 ASSERT(lck->l_index > 0); 2020 lck->l_index--; 2021 if (lck->l_index == 0) 2022 lck->l_state &= ~BARRIER_LOCK; 2023 } 2024 continue; 2025 } 2026 COLOR(lck); 2027 if (IS_BARRIER(lck)) { 2028 ASSERT(lck->l_index > 0); 2029 lck->l_index--; 2030 if (lck->l_index == 0) 2031 lck->l_state &= ~BARRIER_LOCK; 2032 } 2033 STACK_PUSH(vertex_stack, lck, l_stack1); 2034 } 2035 } 2036 } 2037 2038 /* 2039 * Finds all vertices that are reachable from 'lock' more than once and 2040 * mark them as barrier vertices and increment their barrier count. 2041 * The barrier count is one minus the total number of paths from lock 2042 * to that vertex. 2043 */ 2044 2045 static int 2046 flk_find_barriers(lock_descriptor_t *lock) 2047 { 2048 lock_descriptor_t *vertex, *lck; 2049 int found = 0; 2050 edge_t *ep; 2051 lock_descriptor_t *vertex_stack; 2052 2053 STACK_INIT(vertex_stack); 2054 2055 STACK_PUSH(vertex_stack, lock, l_stack1); 2056 2057 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 2058 STACK_POP(vertex_stack, l_stack1); 2059 for (ep = FIRST_IN(vertex); ep != HEAD(vertex); 2060 ep = NEXT_IN(ep)) { 2061 lck = ep->from_vertex; 2062 if (COLORED(lck)) { 2063 /* this is a barrier */ 2064 lck->l_state |= BARRIER_LOCK; 2065 /* index will have barrier count */ 2066 lck->l_index++; 2067 if (!found) 2068 found = 1; 2069 continue; 2070 } 2071 COLOR(lck); 2072 lck->l_index = 0; 2073 STACK_PUSH(vertex_stack, lck, l_stack1); 2074 } 2075 } 2076 return (found); 2077 } 2078 2079 /* 2080 * Finds the first lock that is mainly responsible for blocking this 2081 * request. If there is no such lock, request->l_flock.l_type is set to 2082 * F_UNLCK. Otherwise, request->l_flock is filled in with the particulars 2083 * of the blocking lock. 2084 * 2085 * Note: It is possible a request is blocked by a sleeping lock because 2086 * of the fairness policy used in flk_process_request() to construct the 2087 * dependencies. (see comments before flk_process_request()). 2088 */ 2089 2090 static void 2091 flk_get_first_blocking_lock(lock_descriptor_t *request) 2092 { 2093 graph_t *gp = request->l_graph; 2094 vnode_t *vp = request->l_vnode; 2095 lock_descriptor_t *lock, *blocker; 2096 2097 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 2098 blocker = NULL; 2099 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 2100 2101 if (lock) { 2102 do { 2103 if (BLOCKS(lock, request)) { 2104 blocker = lock; 2105 break; 2106 } 2107 lock = lock->l_next; 2108 } while (lock->l_vnode == vp); 2109 } 2110 2111 if (blocker == NULL && request->l_flock.l_type == F_RDLCK) { 2112 /* 2113 * No active lock is blocking this request, but if a read 2114 * lock is requested, it may also get blocked by a waiting 2115 * writer. So search all sleeping locks and see if there is 2116 * a writer waiting. 2117 */ 2118 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 2119 if (lock) { 2120 do { 2121 if (BLOCKS(lock, request)) { 2122 blocker = lock; 2123 break; 2124 } 2125 lock = lock->l_next; 2126 } while (lock->l_vnode == vp); 2127 } 2128 } 2129 2130 if (blocker) { 2131 report_blocker(blocker, request); 2132 } else 2133 request->l_flock.l_type = F_UNLCK; 2134 } 2135 2136 /* 2137 * Get the graph_t structure associated with a vnode. 2138 * If 'initialize' is non-zero, and the graph_t structure for this vnode has 2139 * not yet been initialized, then a new element is allocated and returned. 2140 */ 2141 graph_t * 2142 flk_get_lock_graph(vnode_t *vp, int initialize) 2143 { 2144 graph_t *gp; 2145 graph_t *gp_alloc = NULL; 2146 int index = HASH_INDEX(vp); 2147 2148 if (initialize == FLK_USE_GRAPH) { 2149 mutex_enter(&flock_lock); 2150 gp = lock_graph[index]; 2151 mutex_exit(&flock_lock); 2152 return (gp); 2153 } 2154 2155 ASSERT(initialize == FLK_INIT_GRAPH); 2156 2157 if (lock_graph[index] == NULL) { 2158 2159 gp_alloc = kmem_zalloc(sizeof (graph_t), KM_SLEEP); 2160 2161 /* Initialize the graph */ 2162 2163 gp_alloc->active_locks.l_next = 2164 gp_alloc->active_locks.l_prev = 2165 (lock_descriptor_t *)ACTIVE_HEAD(gp_alloc); 2166 gp_alloc->sleeping_locks.l_next = 2167 gp_alloc->sleeping_locks.l_prev = 2168 (lock_descriptor_t *)SLEEPING_HEAD(gp_alloc); 2169 gp_alloc->index = index; 2170 mutex_init(&gp_alloc->gp_mutex, NULL, MUTEX_DEFAULT, NULL); 2171 } 2172 2173 mutex_enter(&flock_lock); 2174 2175 gp = lock_graph[index]; 2176 2177 /* Recheck the value within flock_lock */ 2178 if (gp == NULL) { 2179 struct flock_globals *fg; 2180 2181 /* We must have previously allocated the graph_t structure */ 2182 ASSERT(gp_alloc != NULL); 2183 lock_graph[index] = gp = gp_alloc; 2184 /* 2185 * The lockmgr status is only needed if KLM is loaded. 2186 */ 2187 if (flock_zone_key != ZONE_KEY_UNINITIALIZED) { 2188 fg = flk_get_globals(); 2189 fg->lockmgr_status[index] = fg->flk_lockmgr_status; 2190 } 2191 } 2192 2193 mutex_exit(&flock_lock); 2194 2195 if ((gp_alloc != NULL) && (gp != gp_alloc)) { 2196 /* There was a race to allocate the graph_t and we lost */ 2197 mutex_destroy(&gp_alloc->gp_mutex); 2198 kmem_free(gp_alloc, sizeof (graph_t)); 2199 } 2200 2201 return (gp); 2202 } 2203 2204 /* 2205 * PSARC case 1997/292 2206 */ 2207 int 2208 cl_flk_has_remote_locks_for_nlmid(vnode_t *vp, int nlmid) 2209 { 2210 lock_descriptor_t *lock; 2211 int result = 0; 2212 graph_t *gp; 2213 int lock_nlmid; 2214 2215 /* 2216 * Check to see if node is booted as a cluster. If not, return. 2217 */ 2218 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { 2219 return (0); 2220 } 2221 2222 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH); 2223 if (gp == NULL) { 2224 return (0); 2225 } 2226 2227 mutex_enter(&gp->gp_mutex); 2228 2229 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 2230 2231 if (lock) { 2232 while (lock->l_vnode == vp) { 2233 /* get NLM id from sysid */ 2234 lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 2235 2236 /* 2237 * If NLM server request _and_ nlmid of lock matches 2238 * nlmid of argument, then we've found a remote lock. 2239 */ 2240 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { 2241 result = 1; 2242 goto done; 2243 } 2244 lock = lock->l_next; 2245 } 2246 } 2247 2248 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 2249 2250 if (lock) { 2251 while (lock->l_vnode == vp) { 2252 /* get NLM id from sysid */ 2253 lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 2254 2255 /* 2256 * If NLM server request _and_ nlmid of lock matches 2257 * nlmid of argument, then we've found a remote lock. 2258 */ 2259 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { 2260 result = 1; 2261 goto done; 2262 } 2263 lock = lock->l_next; 2264 } 2265 } 2266 2267 done: 2268 mutex_exit(&gp->gp_mutex); 2269 return (result); 2270 } 2271 2272 /* 2273 * Determine whether there are any locks for the given vnode with a remote 2274 * sysid. Returns zero if not, non-zero if there are. 2275 * 2276 * Note that the return value from this function is potentially invalid 2277 * once it has been returned. The caller is responsible for providing its 2278 * own synchronization mechanism to ensure that the return value is useful 2279 * (e.g., see nfs_lockcompletion()). 2280 */ 2281 int 2282 flk_has_remote_locks(vnode_t *vp) 2283 { 2284 lock_descriptor_t *lock; 2285 int result = 0; 2286 graph_t *gp; 2287 2288 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH); 2289 if (gp == NULL) { 2290 return (0); 2291 } 2292 2293 mutex_enter(&gp->gp_mutex); 2294 2295 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 2296 2297 if (lock) { 2298 while (lock->l_vnode == vp) { 2299 if (IS_REMOTE(lock)) { 2300 result = 1; 2301 goto done; 2302 } 2303 lock = lock->l_next; 2304 } 2305 } 2306 2307 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 2308 2309 if (lock) { 2310 while (lock->l_vnode == vp) { 2311 if (IS_REMOTE(lock)) { 2312 result = 1; 2313 goto done; 2314 } 2315 lock = lock->l_next; 2316 } 2317 } 2318 2319 done: 2320 mutex_exit(&gp->gp_mutex); 2321 return (result); 2322 } 2323 2324 /* 2325 * Determine whether there are any locks for the given vnode with a remote 2326 * sysid matching given sysid. 2327 * Used by the new (open source) NFS Lock Manager (NLM) 2328 */ 2329 int 2330 flk_has_remote_locks_for_sysid(vnode_t *vp, int sysid) 2331 { 2332 lock_descriptor_t *lock; 2333 int result = 0; 2334 graph_t *gp; 2335 2336 if (sysid == 0) 2337 return (0); 2338 2339 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH); 2340 if (gp == NULL) { 2341 return (0); 2342 } 2343 2344 mutex_enter(&gp->gp_mutex); 2345 2346 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 2347 2348 if (lock) { 2349 while (lock->l_vnode == vp) { 2350 if (lock->l_flock.l_sysid == sysid) { 2351 result = 1; 2352 goto done; 2353 } 2354 lock = lock->l_next; 2355 } 2356 } 2357 2358 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 2359 2360 if (lock) { 2361 while (lock->l_vnode == vp) { 2362 if (lock->l_flock.l_sysid == sysid) { 2363 result = 1; 2364 goto done; 2365 } 2366 lock = lock->l_next; 2367 } 2368 } 2369 2370 done: 2371 mutex_exit(&gp->gp_mutex); 2372 return (result); 2373 } 2374 2375 /* 2376 * Determine if there are any locks owned by the given sysid. 2377 * Returns zero if not, non-zero if there are. Note that this return code 2378 * could be derived from flk_get_{sleeping,active}_locks, but this routine 2379 * avoids all the memory allocations of those routines. 2380 * 2381 * This routine has the same synchronization issues as 2382 * flk_has_remote_locks. 2383 */ 2384 2385 int 2386 flk_sysid_has_locks(int sysid, int lck_type) 2387 { 2388 int has_locks = 0; 2389 lock_descriptor_t *lock; 2390 graph_t *gp; 2391 int i; 2392 2393 for (i = 0; i < HASH_SIZE && !has_locks; i++) { 2394 mutex_enter(&flock_lock); 2395 gp = lock_graph[i]; 2396 mutex_exit(&flock_lock); 2397 if (gp == NULL) { 2398 continue; 2399 } 2400 2401 mutex_enter(&gp->gp_mutex); 2402 2403 if (lck_type & FLK_QUERY_ACTIVE) { 2404 for (lock = ACTIVE_HEAD(gp)->l_next; 2405 lock != ACTIVE_HEAD(gp) && !has_locks; 2406 lock = lock->l_next) { 2407 if (lock->l_flock.l_sysid == sysid) 2408 has_locks = 1; 2409 } 2410 } 2411 2412 if (lck_type & FLK_QUERY_SLEEPING) { 2413 for (lock = SLEEPING_HEAD(gp)->l_next; 2414 lock != SLEEPING_HEAD(gp) && !has_locks; 2415 lock = lock->l_next) { 2416 if (lock->l_flock.l_sysid == sysid) 2417 has_locks = 1; 2418 } 2419 } 2420 mutex_exit(&gp->gp_mutex); 2421 } 2422 2423 return (has_locks); 2424 } 2425 2426 2427 /* 2428 * PSARC case 1997/292 2429 * 2430 * Requires: "sysid" is a pair [nlmid, sysid]. The lower half is 16-bit 2431 * quantity, the real sysid generated by the NLM server; the upper half 2432 * identifies the node of the cluster where the NLM server ran. 2433 * This routine is only called by an NLM server running in a cluster. 2434 * Effects: Remove all locks held on behalf of the client identified 2435 * by "sysid." 2436 */ 2437 void 2438 cl_flk_remove_locks_by_sysid(int sysid) 2439 { 2440 graph_t *gp; 2441 int i; 2442 lock_descriptor_t *lock, *nlock; 2443 2444 /* 2445 * Check to see if node is booted as a cluster. If not, return. 2446 */ 2447 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { 2448 return; 2449 } 2450 2451 ASSERT(sysid != 0); 2452 for (i = 0; i < HASH_SIZE; i++) { 2453 mutex_enter(&flock_lock); 2454 gp = lock_graph[i]; 2455 mutex_exit(&flock_lock); 2456 2457 if (gp == NULL) 2458 continue; 2459 2460 mutex_enter(&gp->gp_mutex); /* get mutex on lock graph */ 2461 2462 /* signal sleeping requests so that they bail out */ 2463 lock = SLEEPING_HEAD(gp)->l_next; 2464 while (lock != SLEEPING_HEAD(gp)) { 2465 nlock = lock->l_next; 2466 if (lock->l_flock.l_sysid == sysid) { 2467 INTERRUPT_WAKEUP(lock); 2468 } 2469 lock = nlock; 2470 } 2471 2472 /* delete active locks */ 2473 lock = ACTIVE_HEAD(gp)->l_next; 2474 while (lock != ACTIVE_HEAD(gp)) { 2475 nlock = lock->l_next; 2476 if (lock->l_flock.l_sysid == sysid) { 2477 flk_delete_active_lock(lock, 0); 2478 flk_wakeup(lock, 1); 2479 flk_free_lock(lock); 2480 } 2481 lock = nlock; 2482 } 2483 mutex_exit(&gp->gp_mutex); /* release mutex on lock graph */ 2484 } 2485 } 2486 2487 /* 2488 * Delete all locks in the system that belongs to the sysid of the request. 2489 */ 2490 2491 static void 2492 flk_delete_locks_by_sysid(lock_descriptor_t *request) 2493 { 2494 int sysid = request->l_flock.l_sysid; 2495 lock_descriptor_t *lock, *nlock; 2496 graph_t *gp; 2497 int i; 2498 2499 ASSERT(MUTEX_HELD(&request->l_graph->gp_mutex)); 2500 ASSERT(sysid != 0); 2501 2502 mutex_exit(&request->l_graph->gp_mutex); 2503 2504 for (i = 0; i < HASH_SIZE; i++) { 2505 mutex_enter(&flock_lock); 2506 gp = lock_graph[i]; 2507 mutex_exit(&flock_lock); 2508 2509 if (gp == NULL) 2510 continue; 2511 2512 mutex_enter(&gp->gp_mutex); 2513 2514 /* signal sleeping requests so that they bail out */ 2515 lock = SLEEPING_HEAD(gp)->l_next; 2516 while (lock != SLEEPING_HEAD(gp)) { 2517 nlock = lock->l_next; 2518 if (lock->l_flock.l_sysid == sysid) { 2519 INTERRUPT_WAKEUP(lock); 2520 } 2521 lock = nlock; 2522 } 2523 2524 /* delete active locks */ 2525 lock = ACTIVE_HEAD(gp)->l_next; 2526 while (lock != ACTIVE_HEAD(gp)) { 2527 nlock = lock->l_next; 2528 if (lock->l_flock.l_sysid == sysid) { 2529 flk_delete_active_lock(lock, 0); 2530 flk_wakeup(lock, 1); 2531 flk_free_lock(lock); 2532 } 2533 lock = nlock; 2534 } 2535 mutex_exit(&gp->gp_mutex); 2536 } 2537 2538 mutex_enter(&request->l_graph->gp_mutex); 2539 } 2540 2541 /* 2542 * Clustering: Deletes PXFS locks 2543 * Effects: Delete all locks on files in the given file system and with the 2544 * given PXFS id. 2545 */ 2546 void 2547 cl_flk_delete_pxfs_locks(struct vfs *vfsp, int pxfsid) 2548 { 2549 lock_descriptor_t *lock, *nlock; 2550 graph_t *gp; 2551 int i; 2552 2553 for (i = 0; i < HASH_SIZE; i++) { 2554 mutex_enter(&flock_lock); 2555 gp = lock_graph[i]; 2556 mutex_exit(&flock_lock); 2557 2558 if (gp == NULL) 2559 continue; 2560 2561 mutex_enter(&gp->gp_mutex); 2562 2563 /* signal sleeping requests so that they bail out */ 2564 lock = SLEEPING_HEAD(gp)->l_next; 2565 while (lock != SLEEPING_HEAD(gp)) { 2566 nlock = lock->l_next; 2567 if (lock->l_vnode->v_vfsp == vfsp) { 2568 ASSERT(IS_PXFS(lock)); 2569 if (GETPXFSID(lock->l_flock.l_sysid) == 2570 pxfsid) { 2571 flk_set_state(lock, 2572 FLK_CANCELLED_STATE); 2573 flk_cancel_sleeping_lock(lock, 1); 2574 } 2575 } 2576 lock = nlock; 2577 } 2578 2579 /* delete active locks */ 2580 lock = ACTIVE_HEAD(gp)->l_next; 2581 while (lock != ACTIVE_HEAD(gp)) { 2582 nlock = lock->l_next; 2583 if (lock->l_vnode->v_vfsp == vfsp) { 2584 ASSERT(IS_PXFS(lock)); 2585 if (GETPXFSID(lock->l_flock.l_sysid) == 2586 pxfsid) { 2587 flk_delete_active_lock(lock, 0); 2588 flk_wakeup(lock, 1); 2589 flk_free_lock(lock); 2590 } 2591 } 2592 lock = nlock; 2593 } 2594 mutex_exit(&gp->gp_mutex); 2595 } 2596 } 2597 2598 /* 2599 * Search for a sleeping lock manager lock which matches exactly this lock 2600 * request; if one is found, fake a signal to cancel it. 2601 * 2602 * Return 1 if a matching lock was found, 0 otherwise. 2603 */ 2604 2605 static int 2606 flk_canceled(lock_descriptor_t *request) 2607 { 2608 lock_descriptor_t *lock, *nlock; 2609 graph_t *gp = request->l_graph; 2610 vnode_t *vp = request->l_vnode; 2611 2612 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 2613 ASSERT(IS_LOCKMGR(request)); 2614 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 2615 2616 if (lock) { 2617 while (lock->l_vnode == vp) { 2618 nlock = lock->l_next; 2619 if (SAME_OWNER(lock, request) && 2620 lock->l_start == request->l_start && 2621 lock->l_end == request->l_end) { 2622 INTERRUPT_WAKEUP(lock); 2623 return (1); 2624 } 2625 lock = nlock; 2626 } 2627 } 2628 return (0); 2629 } 2630 2631 /* 2632 * Remove all the locks for the vnode belonging to the given pid and sysid. 2633 */ 2634 2635 void 2636 cleanlocks(vnode_t *vp, pid_t pid, int sysid) 2637 { 2638 graph_t *gp; 2639 lock_descriptor_t *lock, *nlock; 2640 lock_descriptor_t *link_stack; 2641 2642 STACK_INIT(link_stack); 2643 2644 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH); 2645 2646 if (gp == NULL) 2647 return; 2648 mutex_enter(&gp->gp_mutex); 2649 2650 CHECK_SLEEPING_LOCKS(gp); 2651 CHECK_ACTIVE_LOCKS(gp); 2652 2653 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 2654 2655 if (lock) { 2656 do { 2657 nlock = lock->l_next; 2658 if ((lock->l_flock.l_pid == pid || 2659 pid == IGN_PID) && 2660 lock->l_flock.l_sysid == sysid) { 2661 CANCEL_WAKEUP(lock); 2662 } 2663 lock = nlock; 2664 } while (lock->l_vnode == vp); 2665 } 2666 2667 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 2668 2669 if (lock) { 2670 do { 2671 nlock = lock->l_next; 2672 if ((lock->l_flock.l_pid == pid || 2673 pid == IGN_PID) && 2674 lock->l_flock.l_sysid == sysid) { 2675 flk_delete_active_lock(lock, 0); 2676 STACK_PUSH(link_stack, lock, l_stack); 2677 } 2678 lock = nlock; 2679 } while (lock->l_vnode == vp); 2680 } 2681 2682 while ((lock = STACK_TOP(link_stack)) != NULL) { 2683 STACK_POP(link_stack, l_stack); 2684 flk_wakeup(lock, 1); 2685 flk_free_lock(lock); 2686 } 2687 2688 CHECK_SLEEPING_LOCKS(gp); 2689 CHECK_ACTIVE_LOCKS(gp); 2690 CHECK_OWNER_LOCKS(gp, pid, sysid, vp); 2691 mutex_exit(&gp->gp_mutex); 2692 } 2693 2694 2695 /* 2696 * Called from 'fs' read and write routines for files that have mandatory 2697 * locking enabled. 2698 */ 2699 2700 int 2701 chklock(struct vnode *vp, int iomode, u_offset_t offset, ssize_t len, int fmode, 2702 caller_context_t *ct) 2703 { 2704 register int i; 2705 struct flock64 bf; 2706 int error = 0; 2707 2708 bf.l_type = (iomode & FWRITE) ? F_WRLCK : F_RDLCK; 2709 bf.l_whence = 0; 2710 bf.l_start = offset; 2711 bf.l_len = len; 2712 if (ct == NULL) { 2713 bf.l_pid = curproc->p_pid; 2714 bf.l_sysid = 0; 2715 } else { 2716 bf.l_pid = ct->cc_pid; 2717 bf.l_sysid = ct->cc_sysid; 2718 } 2719 i = (fmode & (FNDELAY|FNONBLOCK)) ? INOFLCK : INOFLCK|SLPFLCK; 2720 if ((i = reclock(vp, &bf, i, 0, offset, NULL)) != 0 || 2721 bf.l_type != F_UNLCK) 2722 error = i ? i : EAGAIN; 2723 return (error); 2724 } 2725 2726 /* 2727 * convoff - converts the given data (start, whence) to the 2728 * given whence. 2729 */ 2730 int 2731 convoff(struct vnode *vp, struct flock64 *lckdat, int whence, offset_t offset) 2732 { 2733 int error; 2734 struct vattr vattr; 2735 2736 if ((lckdat->l_whence == 2) || (whence == 2)) { 2737 vattr.va_mask = AT_SIZE; 2738 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) 2739 return (error); 2740 } 2741 2742 switch (lckdat->l_whence) { 2743 case 1: 2744 lckdat->l_start += offset; 2745 break; 2746 case 2: 2747 lckdat->l_start += vattr.va_size; 2748 /* FALLTHRU */ 2749 case 0: 2750 break; 2751 default: 2752 return (EINVAL); 2753 } 2754 2755 if (lckdat->l_start < 0) 2756 return (EINVAL); 2757 2758 switch (whence) { 2759 case 1: 2760 lckdat->l_start -= offset; 2761 break; 2762 case 2: 2763 lckdat->l_start -= vattr.va_size; 2764 /* FALLTHRU */ 2765 case 0: 2766 break; 2767 default: 2768 return (EINVAL); 2769 } 2770 2771 lckdat->l_whence = (short)whence; 2772 return (0); 2773 } 2774 2775 2776 /* proc_graph function definitions */ 2777 2778 /* 2779 * Function checks for deadlock due to the new 'lock'. If deadlock found 2780 * edges of this lock are freed and returned. 2781 */ 2782 2783 static int 2784 flk_check_deadlock(lock_descriptor_t *lock) 2785 { 2786 proc_vertex_t *start_vertex, *pvertex; 2787 proc_vertex_t *dvertex; 2788 proc_edge_t *pep, *ppep; 2789 edge_t *ep, *nep; 2790 proc_vertex_t *process_stack; 2791 2792 STACK_INIT(process_stack); 2793 2794 mutex_enter(&flock_lock); 2795 start_vertex = flk_get_proc_vertex(lock); 2796 ASSERT(start_vertex != NULL); 2797 2798 /* construct the edges from this process to other processes */ 2799 2800 ep = FIRST_ADJ(lock); 2801 while (ep != HEAD(lock)) { 2802 proc_vertex_t *adj_proc; 2803 2804 adj_proc = flk_get_proc_vertex(ep->to_vertex); 2805 for (pep = start_vertex->edge; pep != NULL; pep = pep->next) { 2806 if (pep->to_proc == adj_proc) { 2807 ASSERT(pep->refcount); 2808 pep->refcount++; 2809 break; 2810 } 2811 } 2812 if (pep == NULL) { 2813 pep = flk_get_proc_edge(); 2814 pep->to_proc = adj_proc; 2815 pep->refcount = 1; 2816 adj_proc->incount++; 2817 pep->next = start_vertex->edge; 2818 start_vertex->edge = pep; 2819 } 2820 ep = NEXT_ADJ(ep); 2821 } 2822 2823 ep = FIRST_IN(lock); 2824 2825 while (ep != HEAD(lock)) { 2826 proc_vertex_t *in_proc; 2827 2828 in_proc = flk_get_proc_vertex(ep->from_vertex); 2829 2830 for (pep = in_proc->edge; pep != NULL; pep = pep->next) { 2831 if (pep->to_proc == start_vertex) { 2832 ASSERT(pep->refcount); 2833 pep->refcount++; 2834 break; 2835 } 2836 } 2837 if (pep == NULL) { 2838 pep = flk_get_proc_edge(); 2839 pep->to_proc = start_vertex; 2840 pep->refcount = 1; 2841 start_vertex->incount++; 2842 pep->next = in_proc->edge; 2843 in_proc->edge = pep; 2844 } 2845 ep = NEXT_IN(ep); 2846 } 2847 2848 if (start_vertex->incount == 0) { 2849 mutex_exit(&flock_lock); 2850 return (0); 2851 } 2852 2853 flk_proc_graph_uncolor(); 2854 2855 start_vertex->p_sedge = start_vertex->edge; 2856 2857 STACK_PUSH(process_stack, start_vertex, p_stack); 2858 2859 while ((pvertex = STACK_TOP(process_stack)) != NULL) { 2860 for (pep = pvertex->p_sedge; pep != NULL; pep = pep->next) { 2861 dvertex = pep->to_proc; 2862 if (!PROC_ARRIVED(dvertex)) { 2863 STACK_PUSH(process_stack, dvertex, p_stack); 2864 dvertex->p_sedge = dvertex->edge; 2865 PROC_ARRIVE(pvertex); 2866 pvertex->p_sedge = pep->next; 2867 break; 2868 } 2869 if (!PROC_DEPARTED(dvertex)) 2870 goto deadlock; 2871 } 2872 if (pep == NULL) { 2873 PROC_DEPART(pvertex); 2874 STACK_POP(process_stack, p_stack); 2875 } 2876 } 2877 mutex_exit(&flock_lock); 2878 return (0); 2879 2880 deadlock: 2881 2882 /* we remove all lock edges and proc edges */ 2883 2884 ep = FIRST_ADJ(lock); 2885 while (ep != HEAD(lock)) { 2886 proc_vertex_t *adj_proc; 2887 adj_proc = flk_get_proc_vertex(ep->to_vertex); 2888 nep = NEXT_ADJ(ep); 2889 IN_LIST_REMOVE(ep); 2890 ADJ_LIST_REMOVE(ep); 2891 flk_free_edge(ep); 2892 ppep = start_vertex->edge; 2893 for (pep = start_vertex->edge; pep != NULL; ppep = pep, 2894 pep = ppep->next) { 2895 if (pep->to_proc == adj_proc) { 2896 pep->refcount--; 2897 if (pep->refcount == 0) { 2898 if (pep == ppep) { 2899 start_vertex->edge = pep->next; 2900 } else { 2901 ppep->next = pep->next; 2902 } 2903 adj_proc->incount--; 2904 flk_proc_release(adj_proc); 2905 flk_free_proc_edge(pep); 2906 } 2907 break; 2908 } 2909 } 2910 ep = nep; 2911 } 2912 ep = FIRST_IN(lock); 2913 while (ep != HEAD(lock)) { 2914 proc_vertex_t *in_proc; 2915 in_proc = flk_get_proc_vertex(ep->from_vertex); 2916 nep = NEXT_IN(ep); 2917 IN_LIST_REMOVE(ep); 2918 ADJ_LIST_REMOVE(ep); 2919 flk_free_edge(ep); 2920 ppep = in_proc->edge; 2921 for (pep = in_proc->edge; pep != NULL; ppep = pep, 2922 pep = ppep->next) { 2923 if (pep->to_proc == start_vertex) { 2924 pep->refcount--; 2925 if (pep->refcount == 0) { 2926 if (pep == ppep) { 2927 in_proc->edge = pep->next; 2928 } else { 2929 ppep->next = pep->next; 2930 } 2931 start_vertex->incount--; 2932 flk_proc_release(in_proc); 2933 flk_free_proc_edge(pep); 2934 } 2935 break; 2936 } 2937 } 2938 ep = nep; 2939 } 2940 flk_proc_release(start_vertex); 2941 mutex_exit(&flock_lock); 2942 return (1); 2943 } 2944 2945 /* 2946 * Get a proc vertex. If lock's pvertex value gets a correct proc vertex 2947 * from the list we return that, otherwise we allocate one. If necessary, 2948 * we grow the list of vertices also. 2949 */ 2950 2951 static proc_vertex_t * 2952 flk_get_proc_vertex(lock_descriptor_t *lock) 2953 { 2954 int i; 2955 proc_vertex_t *pv; 2956 proc_vertex_t **palloc; 2957 2958 ASSERT(MUTEX_HELD(&flock_lock)); 2959 if (lock->pvertex != -1) { 2960 ASSERT(lock->pvertex >= 0); 2961 pv = pgraph.proc[lock->pvertex]; 2962 if (pv != NULL && PROC_SAME_OWNER(lock, pv)) { 2963 return (pv); 2964 } 2965 } 2966 for (i = 0; i < pgraph.gcount; i++) { 2967 pv = pgraph.proc[i]; 2968 if (pv != NULL && PROC_SAME_OWNER(lock, pv)) { 2969 lock->pvertex = pv->index = i; 2970 return (pv); 2971 } 2972 } 2973 pv = kmem_zalloc(sizeof (struct proc_vertex), KM_SLEEP); 2974 pv->pid = lock->l_flock.l_pid; 2975 pv->sysid = lock->l_flock.l_sysid; 2976 flk_proc_vertex_allocs++; 2977 if (pgraph.free != 0) { 2978 for (i = 0; i < pgraph.gcount; i++) { 2979 if (pgraph.proc[i] == NULL) { 2980 pgraph.proc[i] = pv; 2981 lock->pvertex = pv->index = i; 2982 pgraph.free--; 2983 return (pv); 2984 } 2985 } 2986 } 2987 palloc = kmem_zalloc((pgraph.gcount + PROC_CHUNK) * 2988 sizeof (proc_vertex_t *), KM_SLEEP); 2989 2990 if (pgraph.proc) { 2991 bcopy(pgraph.proc, palloc, 2992 pgraph.gcount * sizeof (proc_vertex_t *)); 2993 2994 kmem_free(pgraph.proc, 2995 pgraph.gcount * sizeof (proc_vertex_t *)); 2996 } 2997 pgraph.proc = palloc; 2998 pgraph.free += (PROC_CHUNK - 1); 2999 pv->index = lock->pvertex = pgraph.gcount; 3000 pgraph.gcount += PROC_CHUNK; 3001 pgraph.proc[pv->index] = pv; 3002 return (pv); 3003 } 3004 3005 /* 3006 * Allocate a proc edge. 3007 */ 3008 3009 static proc_edge_t * 3010 flk_get_proc_edge() 3011 { 3012 proc_edge_t *pep; 3013 3014 pep = kmem_zalloc(sizeof (proc_edge_t), KM_SLEEP); 3015 flk_proc_edge_allocs++; 3016 return (pep); 3017 } 3018 3019 /* 3020 * Free the proc edge. Called whenever its reference count goes to zero. 3021 */ 3022 3023 static void 3024 flk_free_proc_edge(proc_edge_t *pep) 3025 { 3026 ASSERT(pep->refcount == 0); 3027 kmem_free((void *)pep, sizeof (proc_edge_t)); 3028 flk_proc_edge_frees++; 3029 } 3030 3031 /* 3032 * Color the graph explicitly done only when the mark value hits max value. 3033 */ 3034 3035 static void 3036 flk_proc_graph_uncolor() 3037 { 3038 int i; 3039 3040 if (pgraph.mark == UINT_MAX) { 3041 for (i = 0; i < pgraph.gcount; i++) 3042 if (pgraph.proc[i] != NULL) { 3043 pgraph.proc[i]->atime = 0; 3044 pgraph.proc[i]->dtime = 0; 3045 } 3046 pgraph.mark = 1; 3047 } else { 3048 pgraph.mark++; 3049 } 3050 } 3051 3052 /* 3053 * Release the proc vertex iff both there are no in edges and out edges 3054 */ 3055 3056 static void 3057 flk_proc_release(proc_vertex_t *proc) 3058 { 3059 ASSERT(MUTEX_HELD(&flock_lock)); 3060 if (proc->edge == NULL && proc->incount == 0) { 3061 pgraph.proc[proc->index] = NULL; 3062 pgraph.free++; 3063 kmem_free(proc, sizeof (proc_vertex_t)); 3064 flk_proc_vertex_frees++; 3065 } 3066 } 3067 3068 /* 3069 * Updates process graph to reflect change in a lock_graph. 3070 * Note: We should call this function only after we have a correctly 3071 * recomputed lock graph. Otherwise we might miss a deadlock detection. 3072 * eg: in function flk_relation() we call this function after flk_recompute_ 3073 * dependencies() otherwise if a process tries to lock a vnode hashed 3074 * into another graph it might sleep for ever. 3075 */ 3076 3077 static void 3078 flk_update_proc_graph(edge_t *ep, int delete) 3079 { 3080 proc_vertex_t *toproc, *fromproc; 3081 proc_edge_t *pep, *prevpep; 3082 3083 mutex_enter(&flock_lock); 3084 toproc = flk_get_proc_vertex(ep->to_vertex); 3085 fromproc = flk_get_proc_vertex(ep->from_vertex); 3086 3087 if (!delete) 3088 goto add; 3089 pep = prevpep = fromproc->edge; 3090 3091 ASSERT(pep != NULL); 3092 while (pep != NULL) { 3093 if (pep->to_proc == toproc) { 3094 ASSERT(pep->refcount > 0); 3095 pep->refcount--; 3096 if (pep->refcount == 0) { 3097 if (pep == prevpep) { 3098 fromproc->edge = pep->next; 3099 } else { 3100 prevpep->next = pep->next; 3101 } 3102 toproc->incount--; 3103 flk_proc_release(toproc); 3104 flk_free_proc_edge(pep); 3105 } 3106 break; 3107 } 3108 prevpep = pep; 3109 pep = pep->next; 3110 } 3111 flk_proc_release(fromproc); 3112 mutex_exit(&flock_lock); 3113 return; 3114 add: 3115 3116 pep = fromproc->edge; 3117 3118 while (pep != NULL) { 3119 if (pep->to_proc == toproc) { 3120 ASSERT(pep->refcount > 0); 3121 pep->refcount++; 3122 break; 3123 } 3124 pep = pep->next; 3125 } 3126 if (pep == NULL) { 3127 pep = flk_get_proc_edge(); 3128 pep->to_proc = toproc; 3129 pep->refcount = 1; 3130 toproc->incount++; 3131 pep->next = fromproc->edge; 3132 fromproc->edge = pep; 3133 } 3134 mutex_exit(&flock_lock); 3135 } 3136 3137 /* 3138 * Set the control status for lock manager requests. 3139 * 3140 */ 3141 3142 /* 3143 * PSARC case 1997/292 3144 * 3145 * Requires: "nlmid" must be >= 1 and <= clconf_maximum_nodeid(). 3146 * Effects: Set the state of the NLM server identified by "nlmid" 3147 * in the NLM registry to state "nlm_state." 3148 * Raises exception no_such_nlm if "nlmid" doesn't identify a known 3149 * NLM server to this LLM. 3150 * Note that when this routine is called with NLM_SHUTTING_DOWN there 3151 * may be locks requests that have gotten started but not finished. In 3152 * particular, there may be blocking requests that are in the callback code 3153 * before sleeping (so they're not holding the lock for the graph). If 3154 * such a thread reacquires the graph's lock (to go to sleep) after 3155 * NLM state in the NLM registry is set to a non-up value, 3156 * it will notice the status and bail out. If the request gets 3157 * granted before the thread can check the NLM registry, let it 3158 * continue normally. It will get flushed when we are called with NLM_DOWN. 3159 * 3160 * Modifies: nlm_reg_obj (global) 3161 * Arguments: 3162 * nlmid (IN): id uniquely identifying an NLM server 3163 * nlm_state (IN): NLM server state to change "nlmid" to 3164 */ 3165 void 3166 cl_flk_set_nlm_status(int nlmid, flk_nlm_status_t nlm_state) 3167 { 3168 /* 3169 * Check to see if node is booted as a cluster. If not, return. 3170 */ 3171 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { 3172 return; 3173 } 3174 3175 /* 3176 * Check for development/debugging. It is possible to boot a node 3177 * in non-cluster mode, and then run a special script, currently 3178 * available only to developers, to bring up the node as part of a 3179 * cluster. The problem is that running such a script does not 3180 * result in the routine flk_init() being called and hence global array 3181 * nlm_reg_status is NULL. The NLM thinks it's in cluster mode, 3182 * but the LLM needs to do an additional check to see if the global 3183 * array has been created or not. If nlm_reg_status is NULL, then 3184 * return, else continue. 3185 */ 3186 if (nlm_reg_status == NULL) { 3187 return; 3188 } 3189 3190 ASSERT(nlmid <= nlm_status_size && nlmid >= 0); 3191 mutex_enter(&nlm_reg_lock); 3192 3193 if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status, nlmid)) { 3194 /* 3195 * If the NLM server "nlmid" is unknown in the NLM registry, 3196 * add it to the registry in the nlm shutting down state. 3197 */ 3198 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, 3199 FLK_NLM_SHUTTING_DOWN); 3200 } else { 3201 /* 3202 * Change the state of the NLM server identified by "nlmid" 3203 * in the NLM registry to the argument "nlm_state." 3204 */ 3205 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, 3206 nlm_state); 3207 } 3208 3209 /* 3210 * The reason we must register the NLM server that is shutting down 3211 * with an LLM that doesn't already know about it (never sent a lock 3212 * request) is to handle correctly a race between shutdown and a new 3213 * lock request. Suppose that a shutdown request from the NLM server 3214 * invokes this routine at the LLM, and a thread is spawned to 3215 * service the request. Now suppose a new lock request is in 3216 * progress and has already passed the first line of defense in 3217 * reclock(), which denies new locks requests from NLM servers 3218 * that are not in the NLM_UP state. After the current routine 3219 * is invoked for both phases of shutdown, the routine will return, 3220 * having done nothing, and the lock request will proceed and 3221 * probably be granted. The problem is that the shutdown was ignored 3222 * by the lock request because there was no record of that NLM server 3223 * shutting down. We will be in the peculiar position of thinking 3224 * that we've shutdown the NLM server and all locks at all LLMs have 3225 * been discarded, but in fact there's still one lock held. 3226 * The solution is to record the existence of NLM server and change 3227 * its state immediately to NLM_SHUTTING_DOWN. The lock request in 3228 * progress may proceed because the next phase NLM_DOWN will catch 3229 * this lock and discard it. 3230 */ 3231 mutex_exit(&nlm_reg_lock); 3232 3233 switch (nlm_state) { 3234 case FLK_NLM_UP: 3235 /* 3236 * Change the NLM state of all locks still held on behalf of 3237 * the NLM server identified by "nlmid" to NLM_UP. 3238 */ 3239 cl_flk_change_nlm_state_all_locks(nlmid, FLK_NLM_UP); 3240 break; 3241 3242 case FLK_NLM_SHUTTING_DOWN: 3243 /* 3244 * Wake up all sleeping locks for the NLM server identified 3245 * by "nlmid." Note that eventually all woken threads will 3246 * have their lock requests cancelled and descriptors 3247 * removed from the sleeping lock list. Note that the NLM 3248 * server state associated with each lock descriptor is 3249 * changed to FLK_NLM_SHUTTING_DOWN. 3250 */ 3251 cl_flk_wakeup_sleeping_nlm_locks(nlmid); 3252 break; 3253 3254 case FLK_NLM_DOWN: 3255 /* 3256 * Discard all active, granted locks for this NLM server 3257 * identified by "nlmid." 3258 */ 3259 cl_flk_unlock_nlm_granted(nlmid); 3260 break; 3261 3262 default: 3263 panic("cl_set_nlm_status: bad status (%d)", nlm_state); 3264 } 3265 } 3266 3267 /* 3268 * Set the control status for lock manager requests. 3269 * 3270 * Note that when this routine is called with FLK_WAKEUP_SLEEPERS, there 3271 * may be locks requests that have gotten started but not finished. In 3272 * particular, there may be blocking requests that are in the callback code 3273 * before sleeping (so they're not holding the lock for the graph). If 3274 * such a thread reacquires the graph's lock (to go to sleep) after 3275 * flk_lockmgr_status is set to a non-up value, it will notice the status 3276 * and bail out. If the request gets granted before the thread can check 3277 * flk_lockmgr_status, let it continue normally. It will get flushed when 3278 * we are called with FLK_LOCKMGR_DOWN. 3279 */ 3280 3281 void 3282 flk_set_lockmgr_status(flk_lockmgr_status_t status) 3283 { 3284 int i; 3285 graph_t *gp; 3286 struct flock_globals *fg; 3287 3288 fg = flk_get_globals(); 3289 ASSERT(fg != NULL); 3290 3291 mutex_enter(&flock_lock); 3292 fg->flk_lockmgr_status = status; 3293 mutex_exit(&flock_lock); 3294 3295 /* 3296 * If the lock manager is coming back up, all that's needed is to 3297 * propagate this information to the graphs. If the lock manager 3298 * is going down, additional action is required, and each graph's 3299 * copy of the state is updated atomically with this other action. 3300 */ 3301 switch (status) { 3302 case FLK_LOCKMGR_UP: 3303 for (i = 0; i < HASH_SIZE; i++) { 3304 mutex_enter(&flock_lock); 3305 gp = lock_graph[i]; 3306 mutex_exit(&flock_lock); 3307 if (gp == NULL) 3308 continue; 3309 mutex_enter(&gp->gp_mutex); 3310 fg->lockmgr_status[i] = status; 3311 mutex_exit(&gp->gp_mutex); 3312 } 3313 break; 3314 case FLK_WAKEUP_SLEEPERS: 3315 wakeup_sleeping_lockmgr_locks(fg); 3316 break; 3317 case FLK_LOCKMGR_DOWN: 3318 unlock_lockmgr_granted(fg); 3319 break; 3320 default: 3321 panic("flk_set_lockmgr_status: bad status (%d)", status); 3322 break; 3323 } 3324 } 3325 3326 /* 3327 * This routine returns all the locks that are active or sleeping and are 3328 * associated with a particular set of identifiers. If lock_state != 0, then 3329 * only locks that match the lock_state are returned. If lock_state == 0, then 3330 * all locks are returned. If pid == NOPID, the pid is ignored. If 3331 * use_sysid is FALSE, then the sysid is ignored. If vp is NULL, then the 3332 * vnode pointer is ignored. 3333 * 3334 * A list containing the vnode pointer and an flock structure 3335 * describing the lock is returned. Each element in the list is 3336 * dynamically allocated and must be freed by the caller. The 3337 * last item in the list is denoted by a NULL value in the ll_next 3338 * field. 3339 * 3340 * The vnode pointers returned are held. The caller is responsible 3341 * for releasing these. Note that the returned list is only a snapshot of 3342 * the current lock information, and that it is a snapshot of a moving 3343 * target (only one graph is locked at a time). 3344 */ 3345 3346 locklist_t * 3347 get_lock_list(int list_type, int lock_state, int sysid, boolean_t use_sysid, 3348 pid_t pid, const vnode_t *vp, zoneid_t zoneid) 3349 { 3350 lock_descriptor_t *lock; 3351 lock_descriptor_t *graph_head; 3352 locklist_t listhead; 3353 locklist_t *llheadp; 3354 locklist_t *llp; 3355 locklist_t *lltp; 3356 graph_t *gp; 3357 int i; 3358 int first_index; /* graph index */ 3359 int num_indexes; /* graph index */ 3360 3361 ASSERT((list_type == FLK_ACTIVE_STATE) || 3362 (list_type == FLK_SLEEPING_STATE)); 3363 3364 /* 3365 * Get a pointer to something to use as a list head while building 3366 * the rest of the list. 3367 */ 3368 llheadp = &listhead; 3369 lltp = llheadp; 3370 llheadp->ll_next = (locklist_t *)NULL; 3371 3372 /* Figure out which graphs we want to look at. */ 3373 if (vp == NULL) { 3374 first_index = 0; 3375 num_indexes = HASH_SIZE; 3376 } else { 3377 first_index = HASH_INDEX(vp); 3378 num_indexes = 1; 3379 } 3380 3381 for (i = first_index; i < first_index + num_indexes; i++) { 3382 mutex_enter(&flock_lock); 3383 gp = lock_graph[i]; 3384 mutex_exit(&flock_lock); 3385 if (gp == NULL) { 3386 continue; 3387 } 3388 3389 mutex_enter(&gp->gp_mutex); 3390 graph_head = (list_type == FLK_ACTIVE_STATE) ? 3391 ACTIVE_HEAD(gp) : SLEEPING_HEAD(gp); 3392 for (lock = graph_head->l_next; 3393 lock != graph_head; 3394 lock = lock->l_next) { 3395 if (use_sysid && lock->l_flock.l_sysid != sysid) 3396 continue; 3397 if (pid != NOPID && lock->l_flock.l_pid != pid) 3398 continue; 3399 if (vp != NULL && lock->l_vnode != vp) 3400 continue; 3401 if (lock_state && !(lock_state & lock->l_state)) 3402 continue; 3403 if (zoneid != lock->l_zoneid && zoneid != ALL_ZONES) 3404 continue; 3405 /* 3406 * A matching lock was found. Allocate 3407 * space for a new locklist entry and fill 3408 * it in. 3409 */ 3410 llp = kmem_alloc(sizeof (locklist_t), KM_SLEEP); 3411 lltp->ll_next = llp; 3412 VN_HOLD(lock->l_vnode); 3413 llp->ll_vp = lock->l_vnode; 3414 create_flock(lock, &(llp->ll_flock)); 3415 llp->ll_next = (locklist_t *)NULL; 3416 lltp = llp; 3417 } 3418 mutex_exit(&gp->gp_mutex); 3419 } 3420 3421 llp = llheadp->ll_next; 3422 return (llp); 3423 } 3424 3425 /* 3426 * These two functions are simply interfaces to get_lock_list. They return 3427 * a list of sleeping or active locks for the given sysid and pid. See 3428 * get_lock_list for details. 3429 * 3430 * In either case we don't particularly care to specify the zone of interest; 3431 * the sysid-space is global across zones, so the sysid will map to exactly one 3432 * zone, and we'll return information for that zone. 3433 */ 3434 3435 locklist_t * 3436 flk_get_sleeping_locks(int sysid, pid_t pid) 3437 { 3438 return (get_lock_list(FLK_SLEEPING_STATE, 0, sysid, B_TRUE, pid, NULL, 3439 ALL_ZONES)); 3440 } 3441 3442 locklist_t * 3443 flk_get_active_locks(int sysid, pid_t pid) 3444 { 3445 return (get_lock_list(FLK_ACTIVE_STATE, 0, sysid, B_TRUE, pid, NULL, 3446 ALL_ZONES)); 3447 } 3448 3449 /* 3450 * Another interface to get_lock_list. This one returns all the active 3451 * locks for a given vnode. Again, see get_lock_list for details. 3452 * 3453 * We don't need to specify which zone's locks we're interested in. The matter 3454 * would only be interesting if the vnode belonged to NFS, and NFS vnodes can't 3455 * be used by multiple zones, so the list of locks will all be from the right 3456 * zone. 3457 */ 3458 3459 locklist_t * 3460 flk_active_locks_for_vp(const vnode_t *vp) 3461 { 3462 return (get_lock_list(FLK_ACTIVE_STATE, 0, 0, B_FALSE, NOPID, vp, 3463 ALL_ZONES)); 3464 } 3465 3466 /* 3467 * Another interface to get_lock_list. This one returns all the active 3468 * nbmand locks for a given vnode. Again, see get_lock_list for details. 3469 * 3470 * See the comment for flk_active_locks_for_vp() for why we don't care to 3471 * specify the particular zone of interest. 3472 */ 3473 locklist_t * 3474 flk_active_nbmand_locks_for_vp(const vnode_t *vp) 3475 { 3476 return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE, 3477 NOPID, vp, ALL_ZONES)); 3478 } 3479 3480 /* 3481 * Another interface to get_lock_list. This one returns all the active 3482 * nbmand locks for a given pid. Again, see get_lock_list for details. 3483 * 3484 * The zone doesn't need to be specified here; the locks held by a 3485 * particular process will either be local (ie, non-NFS) or from the zone 3486 * the process is executing in. This is because other parts of the system 3487 * ensure that an NFS vnode can't be used in a zone other than that in 3488 * which it was opened. 3489 */ 3490 locklist_t * 3491 flk_active_nbmand_locks(pid_t pid) 3492 { 3493 return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE, 3494 pid, NULL, ALL_ZONES)); 3495 } 3496 3497 /* 3498 * Free up all entries in the locklist. 3499 */ 3500 void 3501 flk_free_locklist(locklist_t *llp) 3502 { 3503 locklist_t *next_llp; 3504 3505 while (llp) { 3506 next_llp = llp->ll_next; 3507 VN_RELE(llp->ll_vp); 3508 kmem_free(llp, sizeof (*llp)); 3509 llp = next_llp; 3510 } 3511 } 3512 3513 static void 3514 cl_flk_change_nlm_state_all_locks(int nlmid, flk_nlm_status_t nlm_state) 3515 { 3516 /* 3517 * For each graph "lg" in the hash table lock_graph do 3518 * a. Get the list of sleeping locks 3519 * b. For each lock descriptor in the list do 3520 * i. If the requested lock is an NLM server request AND 3521 * the nlmid is the same as the routine argument then 3522 * change the lock descriptor's state field to 3523 * "nlm_state." 3524 * c. Get the list of active locks 3525 * d. For each lock descriptor in the list do 3526 * i. If the requested lock is an NLM server request AND 3527 * the nlmid is the same as the routine argument then 3528 * change the lock descriptor's state field to 3529 * "nlm_state." 3530 */ 3531 3532 int i; 3533 graph_t *gp; /* lock graph */ 3534 lock_descriptor_t *lock; /* lock */ 3535 lock_descriptor_t *nlock = NULL; /* next lock */ 3536 int lock_nlmid; 3537 3538 for (i = 0; i < HASH_SIZE; i++) { 3539 mutex_enter(&flock_lock); 3540 gp = lock_graph[i]; 3541 mutex_exit(&flock_lock); 3542 if (gp == NULL) { 3543 continue; 3544 } 3545 3546 /* Get list of sleeping locks in current lock graph. */ 3547 mutex_enter(&gp->gp_mutex); 3548 for (lock = SLEEPING_HEAD(gp)->l_next; 3549 lock != SLEEPING_HEAD(gp); 3550 lock = nlock) { 3551 nlock = lock->l_next; 3552 /* get NLM id */ 3553 lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 3554 3555 /* 3556 * If NLM server request AND nlmid of lock matches 3557 * nlmid of argument, then set the NLM state of the 3558 * lock to "nlm_state." 3559 */ 3560 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { 3561 SET_NLM_STATE(lock, nlm_state); 3562 } 3563 } 3564 3565 /* Get list of active locks in current lock graph. */ 3566 for (lock = ACTIVE_HEAD(gp)->l_next; 3567 lock != ACTIVE_HEAD(gp); 3568 lock = nlock) { 3569 nlock = lock->l_next; 3570 /* get NLM id */ 3571 lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 3572 3573 /* 3574 * If NLM server request AND nlmid of lock matches 3575 * nlmid of argument, then set the NLM state of the 3576 * lock to "nlm_state." 3577 */ 3578 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { 3579 ASSERT(IS_ACTIVE(lock)); 3580 SET_NLM_STATE(lock, nlm_state); 3581 } 3582 } 3583 mutex_exit(&gp->gp_mutex); 3584 } 3585 } 3586 3587 /* 3588 * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid(). 3589 * Effects: Find all sleeping lock manager requests _only_ for the NLM server 3590 * identified by "nlmid." Poke those lock requests. 3591 */ 3592 static void 3593 cl_flk_wakeup_sleeping_nlm_locks(int nlmid) 3594 { 3595 lock_descriptor_t *lock; 3596 lock_descriptor_t *nlock = NULL; /* next lock */ 3597 int i; 3598 graph_t *gp; 3599 int lock_nlmid; 3600 3601 for (i = 0; i < HASH_SIZE; i++) { 3602 mutex_enter(&flock_lock); 3603 gp = lock_graph[i]; 3604 mutex_exit(&flock_lock); 3605 if (gp == NULL) { 3606 continue; 3607 } 3608 3609 mutex_enter(&gp->gp_mutex); 3610 for (lock = SLEEPING_HEAD(gp)->l_next; 3611 lock != SLEEPING_HEAD(gp); 3612 lock = nlock) { 3613 nlock = lock->l_next; 3614 /* 3615 * If NLM server request _and_ nlmid of lock matches 3616 * nlmid of argument, then set the NLM state of the 3617 * lock to NLM_SHUTTING_DOWN, and wake up sleeping 3618 * request. 3619 */ 3620 if (IS_LOCKMGR(lock)) { 3621 /* get NLM id */ 3622 lock_nlmid = 3623 GETNLMID(lock->l_flock.l_sysid); 3624 if (nlmid == lock_nlmid) { 3625 SET_NLM_STATE(lock, 3626 FLK_NLM_SHUTTING_DOWN); 3627 INTERRUPT_WAKEUP(lock); 3628 } 3629 } 3630 } 3631 mutex_exit(&gp->gp_mutex); 3632 } 3633 } 3634 3635 /* 3636 * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid() 3637 * Effects: Find all active (granted) lock manager locks _only_ for the 3638 * NLM server identified by "nlmid" and release them. 3639 */ 3640 static void 3641 cl_flk_unlock_nlm_granted(int nlmid) 3642 { 3643 lock_descriptor_t *lock; 3644 lock_descriptor_t *nlock = NULL; /* next lock */ 3645 int i; 3646 graph_t *gp; 3647 int lock_nlmid; 3648 3649 for (i = 0; i < HASH_SIZE; i++) { 3650 mutex_enter(&flock_lock); 3651 gp = lock_graph[i]; 3652 mutex_exit(&flock_lock); 3653 if (gp == NULL) { 3654 continue; 3655 } 3656 3657 mutex_enter(&gp->gp_mutex); 3658 for (lock = ACTIVE_HEAD(gp)->l_next; 3659 lock != ACTIVE_HEAD(gp); 3660 lock = nlock) { 3661 nlock = lock->l_next; 3662 ASSERT(IS_ACTIVE(lock)); 3663 3664 /* 3665 * If it's an NLM server request _and_ nlmid of 3666 * the lock matches nlmid of argument, then 3667 * remove the active lock the list, wakup blocked 3668 * threads, and free the storage for the lock. 3669 * Note that there's no need to mark the NLM state 3670 * of this lock to NLM_DOWN because the lock will 3671 * be deleted anyway and its storage freed. 3672 */ 3673 if (IS_LOCKMGR(lock)) { 3674 /* get NLM id */ 3675 lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 3676 if (nlmid == lock_nlmid) { 3677 flk_delete_active_lock(lock, 0); 3678 flk_wakeup(lock, 1); 3679 flk_free_lock(lock); 3680 } 3681 } 3682 } 3683 mutex_exit(&gp->gp_mutex); 3684 } 3685 } 3686 3687 /* 3688 * Find all sleeping lock manager requests and poke them. 3689 */ 3690 static void 3691 wakeup_sleeping_lockmgr_locks(struct flock_globals *fg) 3692 { 3693 lock_descriptor_t *lock; 3694 lock_descriptor_t *nlock = NULL; /* next lock */ 3695 int i; 3696 graph_t *gp; 3697 zoneid_t zoneid = getzoneid(); 3698 3699 for (i = 0; i < HASH_SIZE; i++) { 3700 mutex_enter(&flock_lock); 3701 gp = lock_graph[i]; 3702 mutex_exit(&flock_lock); 3703 if (gp == NULL) { 3704 continue; 3705 } 3706 3707 mutex_enter(&gp->gp_mutex); 3708 fg->lockmgr_status[i] = FLK_WAKEUP_SLEEPERS; 3709 for (lock = SLEEPING_HEAD(gp)->l_next; 3710 lock != SLEEPING_HEAD(gp); 3711 lock = nlock) { 3712 nlock = lock->l_next; 3713 if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) { 3714 INTERRUPT_WAKEUP(lock); 3715 } 3716 } 3717 mutex_exit(&gp->gp_mutex); 3718 } 3719 } 3720 3721 3722 /* 3723 * Find all active (granted) lock manager locks and release them. 3724 */ 3725 static void 3726 unlock_lockmgr_granted(struct flock_globals *fg) 3727 { 3728 lock_descriptor_t *lock; 3729 lock_descriptor_t *nlock = NULL; /* next lock */ 3730 int i; 3731 graph_t *gp; 3732 zoneid_t zoneid = getzoneid(); 3733 3734 for (i = 0; i < HASH_SIZE; i++) { 3735 mutex_enter(&flock_lock); 3736 gp = lock_graph[i]; 3737 mutex_exit(&flock_lock); 3738 if (gp == NULL) { 3739 continue; 3740 } 3741 3742 mutex_enter(&gp->gp_mutex); 3743 fg->lockmgr_status[i] = FLK_LOCKMGR_DOWN; 3744 for (lock = ACTIVE_HEAD(gp)->l_next; 3745 lock != ACTIVE_HEAD(gp); 3746 lock = nlock) { 3747 nlock = lock->l_next; 3748 if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) { 3749 ASSERT(IS_ACTIVE(lock)); 3750 flk_delete_active_lock(lock, 0); 3751 flk_wakeup(lock, 1); 3752 flk_free_lock(lock); 3753 } 3754 } 3755 mutex_exit(&gp->gp_mutex); 3756 } 3757 } 3758 3759 3760 /* 3761 * Wait until a lock is granted, cancelled, or interrupted. 3762 */ 3763 3764 static void 3765 wait_for_lock(lock_descriptor_t *request) 3766 { 3767 graph_t *gp = request->l_graph; 3768 3769 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 3770 3771 while (!(IS_GRANTED(request)) && !(IS_CANCELLED(request)) && 3772 !(IS_INTERRUPTED(request))) { 3773 if (!cv_wait_sig(&request->l_cv, &gp->gp_mutex)) { 3774 flk_set_state(request, FLK_INTERRUPTED_STATE); 3775 request->l_state |= INTERRUPTED_LOCK; 3776 } 3777 } 3778 } 3779 3780 /* 3781 * Create an flock structure from the existing lock information 3782 * 3783 * This routine is used to create flock structures for the lock manager 3784 * to use in a reclaim request. Since the lock was originated on this 3785 * host, it must be conforming to UNIX semantics, so no checking is 3786 * done to make sure it falls within the lower half of the 32-bit range. 3787 */ 3788 3789 static void 3790 create_flock(lock_descriptor_t *lp, flock64_t *flp) 3791 { 3792 ASSERT(lp->l_end == MAX_U_OFFSET_T || lp->l_end <= MAXEND); 3793 ASSERT(lp->l_end >= lp->l_start); 3794 3795 flp->l_type = lp->l_type; 3796 flp->l_whence = 0; 3797 flp->l_start = lp->l_start; 3798 flp->l_len = (lp->l_end == MAX_U_OFFSET_T) ? 0 : 3799 (lp->l_end - lp->l_start + 1); 3800 flp->l_sysid = lp->l_flock.l_sysid; 3801 flp->l_pid = lp->l_flock.l_pid; 3802 } 3803 3804 /* 3805 * Convert flock_t data describing a lock range into unsigned long starting 3806 * and ending points, which are put into lock_request. Returns 0 or an 3807 * errno value. 3808 */ 3809 3810 int 3811 flk_convert_lock_data(vnode_t *vp, flock64_t *flp, 3812 u_offset_t *start, u_offset_t *end, offset_t offset) 3813 { 3814 struct vattr vattr; 3815 int error; 3816 3817 /* 3818 * Determine the starting point of the request 3819 */ 3820 switch (flp->l_whence) { 3821 case 0: /* SEEK_SET */ 3822 *start = (u_offset_t)flp->l_start; 3823 break; 3824 case 1: /* SEEK_CUR */ 3825 *start = (u_offset_t)(flp->l_start + offset); 3826 break; 3827 case 2: /* SEEK_END */ 3828 vattr.va_mask = AT_SIZE; 3829 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) 3830 return (error); 3831 *start = (u_offset_t)(flp->l_start + vattr.va_size); 3832 break; 3833 default: 3834 return (EINVAL); 3835 } 3836 3837 /* 3838 * Determine the range covered by the request. 3839 */ 3840 if (flp->l_len == 0) 3841 *end = MAX_U_OFFSET_T; 3842 else if ((offset_t)flp->l_len > 0) { 3843 *end = (u_offset_t)(*start + (flp->l_len - 1)); 3844 } else { 3845 /* 3846 * Negative length; why do we even allow this ? 3847 * Because this allows easy specification of 3848 * the last n bytes of the file. 3849 */ 3850 *end = *start; 3851 *start += (u_offset_t)flp->l_len; 3852 (*start)++; 3853 } 3854 return (0); 3855 } 3856 3857 /* 3858 * Check the validity of lock data. This can used by the NFS 3859 * frlock routines to check data before contacting the server. The 3860 * server must support semantics that aren't as restrictive as 3861 * the UNIX API, so the NFS client is required to check. 3862 * The maximum is now passed in by the caller. 3863 */ 3864 3865 int 3866 flk_check_lock_data(u_offset_t start, u_offset_t end, offset_t max) 3867 { 3868 /* 3869 * The end (length) for local locking should never be greater 3870 * than MAXEND. However, the representation for 3871 * the entire file is MAX_U_OFFSET_T. 3872 */ 3873 if ((start > max) || 3874 ((end > max) && (end != MAX_U_OFFSET_T))) { 3875 return (EINVAL); 3876 } 3877 if (start > end) { 3878 return (EINVAL); 3879 } 3880 return (0); 3881 } 3882 3883 /* 3884 * Fill in request->l_flock with information about the lock blocking the 3885 * request. The complexity here is that lock manager requests are allowed 3886 * to see into the upper part of the 32-bit address range, whereas local 3887 * requests are only allowed to see signed values. 3888 * 3889 * What should be done when "blocker" is a lock manager lock that uses the 3890 * upper portion of the 32-bit range, but "request" is local? Since the 3891 * request has already been determined to have been blocked by the blocker, 3892 * at least some portion of "blocker" must be in the range of the request, 3893 * or the request extends to the end of file. For the first case, the 3894 * portion in the lower range is returned with the indication that it goes 3895 * "to EOF." For the second case, the last byte of the lower range is 3896 * returned with the indication that it goes "to EOF." 3897 */ 3898 3899 static void 3900 report_blocker(lock_descriptor_t *blocker, lock_descriptor_t *request) 3901 { 3902 flock64_t *flrp; /* l_flock portion of request */ 3903 3904 ASSERT(blocker != NULL); 3905 3906 flrp = &request->l_flock; 3907 flrp->l_whence = 0; 3908 flrp->l_type = blocker->l_type; 3909 flrp->l_pid = blocker->l_flock.l_pid; 3910 flrp->l_sysid = blocker->l_flock.l_sysid; 3911 3912 if (IS_LOCKMGR(request)) { 3913 flrp->l_start = blocker->l_start; 3914 if (blocker->l_end == MAX_U_OFFSET_T) 3915 flrp->l_len = 0; 3916 else 3917 flrp->l_len = blocker->l_end - blocker->l_start + 1; 3918 } else { 3919 if (blocker->l_start > MAXEND) { 3920 flrp->l_start = MAXEND; 3921 flrp->l_len = 0; 3922 } else { 3923 flrp->l_start = blocker->l_start; 3924 if (blocker->l_end == MAX_U_OFFSET_T) 3925 flrp->l_len = 0; 3926 else 3927 flrp->l_len = blocker->l_end - 3928 blocker->l_start + 1; 3929 } 3930 } 3931 } 3932 3933 /* 3934 * PSARC case 1997/292 3935 */ 3936 /* 3937 * This is the public routine exported by flock.h. 3938 */ 3939 void 3940 cl_flk_change_nlm_state_to_unknown(int nlmid) 3941 { 3942 /* 3943 * Check to see if node is booted as a cluster. If not, return. 3944 */ 3945 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { 3946 return; 3947 } 3948 3949 /* 3950 * See comment in cl_flk_set_nlm_status(). 3951 */ 3952 if (nlm_reg_status == NULL) { 3953 return; 3954 } 3955 3956 /* 3957 * protect NLM registry state with a mutex. 3958 */ 3959 ASSERT(nlmid <= nlm_status_size && nlmid >= 0); 3960 mutex_enter(&nlm_reg_lock); 3961 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, FLK_NLM_UNKNOWN); 3962 mutex_exit(&nlm_reg_lock); 3963 } 3964 3965 /* 3966 * Return non-zero if the given I/O request conflicts with an active NBMAND 3967 * lock. 3968 * If svmand is non-zero, it means look at all active locks, not just NBMAND 3969 * locks. 3970 */ 3971 3972 int 3973 nbl_lock_conflict(vnode_t *vp, nbl_op_t op, u_offset_t offset, 3974 ssize_t length, int svmand, caller_context_t *ct) 3975 { 3976 int conflict = 0; 3977 graph_t *gp; 3978 lock_descriptor_t *lock; 3979 pid_t pid; 3980 int sysid; 3981 3982 if (ct == NULL) { 3983 pid = curproc->p_pid; 3984 sysid = 0; 3985 } else { 3986 pid = ct->cc_pid; 3987 sysid = ct->cc_sysid; 3988 } 3989 3990 mutex_enter(&flock_lock); 3991 gp = lock_graph[HASH_INDEX(vp)]; 3992 mutex_exit(&flock_lock); 3993 if (gp == NULL) 3994 return (0); 3995 3996 mutex_enter(&gp->gp_mutex); 3997 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 3998 3999 for (; lock && lock->l_vnode == vp; lock = lock->l_next) { 4000 if ((svmand || (lock->l_state & NBMAND_LOCK)) && 4001 (lock->l_flock.l_sysid != sysid || 4002 lock->l_flock.l_pid != pid) && 4003 lock_blocks_io(op, offset, length, 4004 lock->l_type, lock->l_start, lock->l_end)) { 4005 conflict = 1; 4006 break; 4007 } 4008 } 4009 mutex_exit(&gp->gp_mutex); 4010 4011 return (conflict); 4012 } 4013 4014 /* 4015 * Return non-zero if the given I/O request conflicts with the given lock. 4016 */ 4017 4018 static int 4019 lock_blocks_io(nbl_op_t op, u_offset_t offset, ssize_t length, 4020 int lock_type, u_offset_t lock_start, u_offset_t lock_end) 4021 { 4022 ASSERT(op == NBL_READ || op == NBL_WRITE || op == NBL_READWRITE); 4023 ASSERT(lock_type == F_RDLCK || lock_type == F_WRLCK); 4024 4025 if (op == NBL_READ && lock_type == F_RDLCK) 4026 return (0); 4027 4028 if (offset <= lock_start && lock_start < offset + length) 4029 return (1); 4030 if (lock_start <= offset && offset <= lock_end) 4031 return (1); 4032 4033 return (0); 4034 } 4035 4036 #ifdef DEBUG 4037 static void 4038 check_active_locks(graph_t *gp) 4039 { 4040 lock_descriptor_t *lock, *lock1; 4041 edge_t *ep; 4042 4043 for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp); 4044 lock = lock->l_next) { 4045 ASSERT(IS_ACTIVE(lock)); 4046 ASSERT(NOT_BLOCKED(lock)); 4047 ASSERT(!IS_BARRIER(lock)); 4048 4049 ep = FIRST_IN(lock); 4050 4051 while (ep != HEAD(lock)) { 4052 ASSERT(IS_SLEEPING(ep->from_vertex)); 4053 ASSERT(!NOT_BLOCKED(ep->from_vertex)); 4054 ep = NEXT_IN(ep); 4055 } 4056 4057 for (lock1 = lock->l_next; lock1 != ACTIVE_HEAD(gp); 4058 lock1 = lock1->l_next) { 4059 if (lock1->l_vnode == lock->l_vnode) { 4060 if (BLOCKS(lock1, lock)) { 4061 cmn_err(CE_PANIC, 4062 "active lock %p blocks %p", 4063 (void *)lock1, (void *)lock); 4064 } else if (BLOCKS(lock, lock1)) { 4065 cmn_err(CE_PANIC, 4066 "active lock %p blocks %p", 4067 (void *)lock, (void *)lock1); 4068 } 4069 } 4070 } 4071 } 4072 } 4073 4074 /* 4075 * Effect: This functions checks to see if the transition from 'old_state' to 4076 * 'new_state' is a valid one. It returns 0 if the transition is valid 4077 * and 1 if it is not. 4078 * For a map of valid transitions, see sys/flock_impl.h 4079 */ 4080 static int 4081 check_lock_transition(int old_state, int new_state) 4082 { 4083 switch (old_state) { 4084 case FLK_INITIAL_STATE: 4085 if ((new_state == FLK_START_STATE) || 4086 (new_state == FLK_SLEEPING_STATE) || 4087 (new_state == FLK_ACTIVE_STATE) || 4088 (new_state == FLK_DEAD_STATE)) { 4089 return (0); 4090 } else { 4091 return (1); 4092 } 4093 case FLK_START_STATE: 4094 if ((new_state == FLK_ACTIVE_STATE) || 4095 (new_state == FLK_DEAD_STATE)) { 4096 return (0); 4097 } else { 4098 return (1); 4099 } 4100 case FLK_ACTIVE_STATE: 4101 if (new_state == FLK_DEAD_STATE) { 4102 return (0); 4103 } else { 4104 return (1); 4105 } 4106 case FLK_SLEEPING_STATE: 4107 if ((new_state == FLK_GRANTED_STATE) || 4108 (new_state == FLK_INTERRUPTED_STATE) || 4109 (new_state == FLK_CANCELLED_STATE)) { 4110 return (0); 4111 } else { 4112 return (1); 4113 } 4114 case FLK_GRANTED_STATE: 4115 if ((new_state == FLK_START_STATE) || 4116 (new_state == FLK_INTERRUPTED_STATE) || 4117 (new_state == FLK_CANCELLED_STATE)) { 4118 return (0); 4119 } else { 4120 return (1); 4121 } 4122 case FLK_CANCELLED_STATE: 4123 if ((new_state == FLK_INTERRUPTED_STATE) || 4124 (new_state == FLK_DEAD_STATE)) { 4125 return (0); 4126 } else { 4127 return (1); 4128 } 4129 case FLK_INTERRUPTED_STATE: 4130 if (new_state == FLK_DEAD_STATE) { 4131 return (0); 4132 } else { 4133 return (1); 4134 } 4135 case FLK_DEAD_STATE: 4136 /* May be set more than once */ 4137 if (new_state == FLK_DEAD_STATE) { 4138 return (0); 4139 } else { 4140 return (1); 4141 } 4142 default: 4143 return (1); 4144 } 4145 } 4146 4147 static void 4148 check_sleeping_locks(graph_t *gp) 4149 { 4150 lock_descriptor_t *lock1, *lock2; 4151 edge_t *ep; 4152 for (lock1 = SLEEPING_HEAD(gp)->l_next; lock1 != SLEEPING_HEAD(gp); 4153 lock1 = lock1->l_next) { 4154 ASSERT(!IS_BARRIER(lock1)); 4155 for (lock2 = lock1->l_next; lock2 != SLEEPING_HEAD(gp); 4156 lock2 = lock2->l_next) { 4157 if (lock1->l_vnode == lock2->l_vnode) { 4158 if (BLOCKS(lock2, lock1)) { 4159 ASSERT(!IS_GRANTED(lock1)); 4160 ASSERT(!NOT_BLOCKED(lock1)); 4161 path(lock1, lock2); 4162 } 4163 } 4164 } 4165 4166 for (lock2 = ACTIVE_HEAD(gp)->l_next; lock2 != ACTIVE_HEAD(gp); 4167 lock2 = lock2->l_next) { 4168 ASSERT(!IS_BARRIER(lock1)); 4169 if (lock1->l_vnode == lock2->l_vnode) { 4170 if (BLOCKS(lock2, lock1)) { 4171 ASSERT(!IS_GRANTED(lock1)); 4172 ASSERT(!NOT_BLOCKED(lock1)); 4173 path(lock1, lock2); 4174 } 4175 } 4176 } 4177 ep = FIRST_ADJ(lock1); 4178 while (ep != HEAD(lock1)) { 4179 ASSERT(BLOCKS(ep->to_vertex, lock1)); 4180 ep = NEXT_ADJ(ep); 4181 } 4182 } 4183 } 4184 4185 static int 4186 level_two_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2, int no_path) 4187 { 4188 edge_t *ep; 4189 lock_descriptor_t *vertex; 4190 lock_descriptor_t *vertex_stack; 4191 4192 STACK_INIT(vertex_stack); 4193 4194 flk_graph_uncolor(lock1->l_graph); 4195 ep = FIRST_ADJ(lock1); 4196 ASSERT(ep != HEAD(lock1)); 4197 while (ep != HEAD(lock1)) { 4198 if (no_path) 4199 ASSERT(ep->to_vertex != lock2); 4200 STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack); 4201 COLOR(ep->to_vertex); 4202 ep = NEXT_ADJ(ep); 4203 } 4204 4205 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 4206 STACK_POP(vertex_stack, l_dstack); 4207 for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex); 4208 ep = NEXT_ADJ(ep)) { 4209 if (COLORED(ep->to_vertex)) 4210 continue; 4211 COLOR(ep->to_vertex); 4212 if (ep->to_vertex == lock2) 4213 return (1); 4214 4215 STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack); 4216 } 4217 } 4218 return (0); 4219 } 4220 4221 static void 4222 check_owner_locks(graph_t *gp, pid_t pid, int sysid, vnode_t *vp) 4223 { 4224 lock_descriptor_t *lock; 4225 4226 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 4227 4228 if (lock) { 4229 while (lock != ACTIVE_HEAD(gp) && (lock->l_vnode == vp)) { 4230 if (lock->l_flock.l_pid == pid && 4231 lock->l_flock.l_sysid == sysid) 4232 cmn_err(CE_PANIC, 4233 "owner pid %d's lock %p in active queue", 4234 pid, (void *)lock); 4235 lock = lock->l_next; 4236 } 4237 } 4238 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 4239 4240 if (lock) { 4241 while (lock != SLEEPING_HEAD(gp) && (lock->l_vnode == vp)) { 4242 if (lock->l_flock.l_pid == pid && 4243 lock->l_flock.l_sysid == sysid) 4244 cmn_err(CE_PANIC, 4245 "owner pid %d's lock %p in sleep queue", 4246 pid, (void *)lock); 4247 lock = lock->l_next; 4248 } 4249 } 4250 } 4251 4252 static int 4253 level_one_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2) 4254 { 4255 edge_t *ep = FIRST_ADJ(lock1); 4256 4257 while (ep != HEAD(lock1)) { 4258 if (ep->to_vertex == lock2) 4259 return (1); 4260 else 4261 ep = NEXT_ADJ(ep); 4262 } 4263 return (0); 4264 } 4265 4266 static int 4267 no_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2) 4268 { 4269 return (!level_two_path(lock1, lock2, 1)); 4270 } 4271 4272 static void 4273 path(lock_descriptor_t *lock1, lock_descriptor_t *lock2) 4274 { 4275 if (level_one_path(lock1, lock2)) { 4276 if (level_two_path(lock1, lock2, 0) != 0) { 4277 cmn_err(CE_WARN, 4278 "one edge one path from lock1 %p lock2 %p", 4279 (void *)lock1, (void *)lock2); 4280 } 4281 } else if (no_path(lock1, lock2)) { 4282 cmn_err(CE_PANIC, 4283 "No path from lock1 %p to lock2 %p", 4284 (void *)lock1, (void *)lock2); 4285 } 4286 } 4287 #endif /* DEBUG */ 4288