1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 /* 31 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 32 */ 33 34 #include <sys/flock_impl.h> 35 #include <sys/vfs.h> 36 #include <sys/t_lock.h> /* for <sys/callb.h> */ 37 #include <sys/callb.h> 38 #include <sys/clconf.h> 39 #include <sys/cladm.h> 40 #include <sys/nbmlock.h> 41 #include <sys/cred.h> 42 #include <sys/policy.h> 43 44 /* 45 * The following four variables are for statistics purposes and they are 46 * not protected by locks. They may not be accurate but will at least be 47 * close to the actual value. 48 */ 49 50 int flk_lock_allocs; 51 int flk_lock_frees; 52 int edge_allocs; 53 int edge_frees; 54 int flk_proc_vertex_allocs; 55 int flk_proc_edge_allocs; 56 int flk_proc_vertex_frees; 57 int flk_proc_edge_frees; 58 59 static kmutex_t flock_lock; 60 61 #ifdef DEBUG 62 int check_debug = 0; 63 #define CHECK_ACTIVE_LOCKS(gp) if (check_debug) \ 64 check_active_locks(gp); 65 #define CHECK_SLEEPING_LOCKS(gp) if (check_debug) \ 66 check_sleeping_locks(gp); 67 #define CHECK_OWNER_LOCKS(gp, pid, sysid, vp) \ 68 if (check_debug) \ 69 check_owner_locks(gp, pid, sysid, vp); 70 #define CHECK_LOCK_TRANSITION(old_state, new_state) \ 71 { \ 72 if (check_lock_transition(old_state, new_state)) { \ 73 cmn_err(CE_PANIC, "Illegal lock transition \ 74 from %d to %d", old_state, new_state); \ 75 } \ 76 } 77 #else 78 79 #define CHECK_ACTIVE_LOCKS(gp) 80 #define CHECK_SLEEPING_LOCKS(gp) 81 #define CHECK_OWNER_LOCKS(gp, pid, sysid, vp) 82 #define CHECK_LOCK_TRANSITION(old_state, new_state) 83 84 #endif /* DEBUG */ 85 86 struct kmem_cache *flk_edge_cache; 87 88 graph_t *lock_graph[HASH_SIZE]; 89 proc_graph_t pgraph; 90 91 /* 92 * Clustering. 93 * 94 * NLM REGISTRY TYPE IMPLEMENTATION 95 * 96 * Assumptions: 97 * 1. Nodes in a cluster are numbered starting at 1; always non-negative 98 * integers; maximum node id is returned by clconf_maximum_nodeid(). 99 * 2. We use this node id to identify the node an NLM server runs on. 100 */ 101 102 /* 103 * NLM registry object keeps track of NLM servers via their 104 * nlmids (which are the node ids of the node in the cluster they run on) 105 * that have requested locks at this LLM with which this registry is 106 * associated. 107 * 108 * Representation of abstraction: 109 * rep = record[ states: array[nlm_state], 110 * lock: mutex] 111 * 112 * Representation invariants: 113 * 1. index i of rep.states is between 0 and n - 1 where n is number 114 * of elements in the array, which happen to be the maximum number 115 * of nodes in the cluster configuration + 1. 116 * 2. map nlmid to index i of rep.states 117 * 0 -> 0 118 * 1 -> 1 119 * 2 -> 2 120 * n-1 -> clconf_maximum_nodeid()+1 121 * 3. This 1-1 mapping is quite convenient and it avoids errors resulting 122 * from forgetting to subtract 1 from the index. 123 * 4. The reason we keep the 0th index is the following. A legitimate 124 * cluster configuration includes making a UFS file system NFS 125 * exportable. The code is structured so that if you're in a cluster 126 * you do one thing; otherwise, you do something else. The problem 127 * is what to do if you think you're in a cluster with PXFS loaded, 128 * but you're using UFS not PXFS? The upper two bytes of the sysid 129 * encode the node id of the node where NLM server runs; these bytes 130 * are zero for UFS. Since the nodeid is used to index into the 131 * registry, we can record the NLM server state information at index 132 * 0 using the same mechanism used for PXFS file locks! 133 */ 134 static flk_nlm_status_t *nlm_reg_status = NULL; /* state array 0..N-1 */ 135 static kmutex_t nlm_reg_lock; /* lock to protect arrary */ 136 static uint_t nlm_status_size; /* size of state array */ 137 138 /* 139 * Although we need a global lock dependency graph (and associated data 140 * structures), we also need a per-zone notion of whether the lock manager is 141 * running, and so whether to allow lock manager requests or not. 142 * 143 * Thus, on a per-zone basis we maintain a ``global'' variable 144 * (flk_lockmgr_status), protected by flock_lock, and set when the lock 145 * manager is determined to be changing state (starting or stopping). 146 * 147 * Each graph/zone pair also has a copy of this variable, which is protected by 148 * the graph's mutex. 149 * 150 * The per-graph copies are used to synchronize lock requests with shutdown 151 * requests. The global copy is used to initialize the per-graph field when a 152 * new graph is created. 153 */ 154 struct flock_globals { 155 flk_lockmgr_status_t flk_lockmgr_status; 156 flk_lockmgr_status_t lockmgr_status[HASH_SIZE]; 157 }; 158 159 zone_key_t flock_zone_key; 160 161 static void create_flock(lock_descriptor_t *, flock64_t *); 162 static lock_descriptor_t *flk_get_lock(void); 163 static void flk_free_lock(lock_descriptor_t *lock); 164 static void flk_get_first_blocking_lock(lock_descriptor_t *request); 165 static int flk_process_request(lock_descriptor_t *); 166 static int flk_add_edge(lock_descriptor_t *, lock_descriptor_t *, int, int); 167 static edge_t *flk_get_edge(void); 168 static int flk_wait_execute_request(lock_descriptor_t *); 169 static int flk_relation(lock_descriptor_t *, lock_descriptor_t *); 170 static void flk_insert_active_lock(lock_descriptor_t *); 171 static void flk_delete_active_lock(lock_descriptor_t *, int); 172 static void flk_insert_sleeping_lock(lock_descriptor_t *); 173 static void flk_graph_uncolor(graph_t *); 174 static void flk_wakeup(lock_descriptor_t *, int); 175 static void flk_free_edge(edge_t *); 176 static void flk_recompute_dependencies(lock_descriptor_t *, 177 lock_descriptor_t **, int, int); 178 static int flk_find_barriers(lock_descriptor_t *); 179 static void flk_update_barriers(lock_descriptor_t *); 180 static int flk_color_reachables(lock_descriptor_t *); 181 static int flk_canceled(lock_descriptor_t *); 182 static void flk_delete_locks_by_sysid(lock_descriptor_t *); 183 static void report_blocker(lock_descriptor_t *, lock_descriptor_t *); 184 static void wait_for_lock(lock_descriptor_t *); 185 static void unlock_lockmgr_granted(struct flock_globals *); 186 static void wakeup_sleeping_lockmgr_locks(struct flock_globals *); 187 188 /* Clustering hooks */ 189 static void cl_flk_change_nlm_state_all_locks(int, flk_nlm_status_t); 190 static void cl_flk_wakeup_sleeping_nlm_locks(int); 191 static void cl_flk_unlock_nlm_granted(int); 192 193 #ifdef DEBUG 194 static int check_lock_transition(int, int); 195 static void check_sleeping_locks(graph_t *); 196 static void check_active_locks(graph_t *); 197 static int no_path(lock_descriptor_t *, lock_descriptor_t *); 198 static void path(lock_descriptor_t *, lock_descriptor_t *); 199 static void check_owner_locks(graph_t *, pid_t, int, vnode_t *); 200 static int level_one_path(lock_descriptor_t *, lock_descriptor_t *); 201 static int level_two_path(lock_descriptor_t *, lock_descriptor_t *, int); 202 #endif 203 204 /* proc_graph function definitions */ 205 static int flk_check_deadlock(lock_descriptor_t *); 206 static void flk_proc_graph_uncolor(void); 207 static proc_vertex_t *flk_get_proc_vertex(lock_descriptor_t *); 208 static proc_edge_t *flk_get_proc_edge(void); 209 static void flk_proc_release(proc_vertex_t *); 210 static void flk_free_proc_edge(proc_edge_t *); 211 static void flk_update_proc_graph(edge_t *, int); 212 213 /* Non-blocking mandatory locking */ 214 static int lock_blocks_io(nbl_op_t, u_offset_t, ssize_t, int, u_offset_t, 215 u_offset_t); 216 217 static struct flock_globals * 218 flk_get_globals(void) 219 { 220 /* 221 * The KLM module had better be loaded if we're attempting to handle 222 * lockmgr requests. 223 */ 224 ASSERT(flock_zone_key != ZONE_KEY_UNINITIALIZED); 225 return (zone_getspecific(flock_zone_key, curproc->p_zone)); 226 } 227 228 static flk_lockmgr_status_t 229 flk_get_lockmgr_status(void) 230 { 231 struct flock_globals *fg; 232 233 ASSERT(MUTEX_HELD(&flock_lock)); 234 235 if (flock_zone_key == ZONE_KEY_UNINITIALIZED) { 236 /* 237 * KLM module not loaded; lock manager definitely not running. 238 */ 239 return (FLK_LOCKMGR_DOWN); 240 } 241 fg = flk_get_globals(); 242 return (fg->flk_lockmgr_status); 243 } 244 245 /* 246 * Routine called from fs_frlock in fs/fs_subr.c 247 */ 248 249 int 250 reclock(vnode_t *vp, 251 flock64_t *lckdat, 252 int cmd, 253 int flag, 254 u_offset_t offset, 255 flk_callback_t *flk_cbp) 256 { 257 lock_descriptor_t stack_lock_request; 258 lock_descriptor_t *lock_request; 259 int error = 0; 260 graph_t *gp; 261 int nlmid; 262 263 /* 264 * Check access permissions 265 */ 266 if ((cmd & SETFLCK) && 267 ((lckdat->l_type == F_RDLCK && (flag & FREAD) == 0) || 268 (lckdat->l_type == F_WRLCK && (flag & FWRITE) == 0))) 269 return (EBADF); 270 271 /* 272 * for query and unlock we use the stack_lock_request 273 */ 274 275 if ((lckdat->l_type == F_UNLCK) || 276 !((cmd & INOFLCK) || (cmd & SETFLCK))) { 277 lock_request = &stack_lock_request; 278 (void) bzero((caddr_t)lock_request, 279 sizeof (lock_descriptor_t)); 280 281 /* 282 * following is added to make the assertions in 283 * flk_execute_request() to pass through 284 */ 285 286 lock_request->l_edge.edge_in_next = &lock_request->l_edge; 287 lock_request->l_edge.edge_in_prev = &lock_request->l_edge; 288 lock_request->l_edge.edge_adj_next = &lock_request->l_edge; 289 lock_request->l_edge.edge_adj_prev = &lock_request->l_edge; 290 lock_request->l_status = FLK_INITIAL_STATE; 291 } else { 292 lock_request = flk_get_lock(); 293 } 294 lock_request->l_state = 0; 295 lock_request->l_vnode = vp; 296 lock_request->l_zoneid = getzoneid(); 297 298 /* 299 * Convert the request range into the canonical start and end 300 * values. The NLM protocol supports locking over the entire 301 * 32-bit range, so there's no range checking for remote requests, 302 * but we still need to verify that local requests obey the rules. 303 */ 304 /* Clustering */ 305 if ((cmd & (RCMDLCK | PCMDLCK)) != 0) { 306 ASSERT(lckdat->l_whence == 0); 307 lock_request->l_start = lckdat->l_start; 308 lock_request->l_end = (lckdat->l_len == 0) ? MAX_U_OFFSET_T : 309 lckdat->l_start + (lckdat->l_len - 1); 310 } else { 311 /* check the validity of the lock range */ 312 error = flk_convert_lock_data(vp, lckdat, 313 &lock_request->l_start, &lock_request->l_end, 314 offset); 315 if (error) { 316 goto done; 317 } 318 error = flk_check_lock_data(lock_request->l_start, 319 lock_request->l_end, MAXEND); 320 if (error) { 321 goto done; 322 } 323 } 324 325 ASSERT(lock_request->l_end >= lock_request->l_start); 326 327 lock_request->l_type = lckdat->l_type; 328 if (cmd & INOFLCK) 329 lock_request->l_state |= IO_LOCK; 330 if (cmd & SLPFLCK) 331 lock_request->l_state |= WILLING_TO_SLEEP_LOCK; 332 if (cmd & RCMDLCK) 333 lock_request->l_state |= LOCKMGR_LOCK; 334 if (cmd & NBMLCK) 335 lock_request->l_state |= NBMAND_LOCK; 336 /* 337 * Clustering: set flag for PXFS locks 338 * We do not _only_ check for the PCMDLCK flag because PXFS locks could 339 * also be of type 'RCMDLCK'. 340 * We do not _only_ check the GETPXFSID() macro because local PXFS 341 * clients use a pxfsid of zero to permit deadlock detection in the LLM. 342 */ 343 344 if ((cmd & PCMDLCK) || (GETPXFSID(lckdat->l_sysid) != 0)) { 345 lock_request->l_state |= PXFS_LOCK; 346 } 347 if (!((cmd & SETFLCK) || (cmd & INOFLCK))) { 348 if (lock_request->l_type == F_RDLCK || 349 lock_request->l_type == F_WRLCK) 350 lock_request->l_state |= QUERY_LOCK; 351 } 352 lock_request->l_flock = (*lckdat); 353 lock_request->l_callbacks = flk_cbp; 354 355 /* 356 * We are ready for processing the request 357 */ 358 if (IS_LOCKMGR(lock_request)) { 359 /* 360 * If the lock request is an NLM server request .... 361 */ 362 if (nlm_status_size == 0) { /* not booted as cluster */ 363 mutex_enter(&flock_lock); 364 /* 365 * Bail out if this is a lock manager request and the 366 * lock manager is not supposed to be running. 367 */ 368 if (flk_get_lockmgr_status() != FLK_LOCKMGR_UP) { 369 mutex_exit(&flock_lock); 370 error = ENOLCK; 371 goto done; 372 } 373 mutex_exit(&flock_lock); 374 } else { /* booted as a cluster */ 375 nlmid = GETNLMID(lock_request->l_flock.l_sysid); 376 ASSERT(nlmid <= nlm_status_size && nlmid >= 0); 377 378 mutex_enter(&nlm_reg_lock); 379 /* 380 * If the NLM registry does not know about this 381 * NLM server making the request, add its nlmid 382 * to the registry. 383 */ 384 if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status, 385 nlmid)) { 386 FLK_REGISTRY_ADD_NLMID(nlm_reg_status, nlmid); 387 } else if (!FLK_REGISTRY_IS_NLM_UP(nlm_reg_status, 388 nlmid)) { 389 /* 390 * If the NLM server is already known (has made 391 * previous lock requests) and its state is 392 * not NLM_UP (means that NLM server is 393 * shutting down), then bail out with an 394 * error to deny the lock request. 395 */ 396 mutex_exit(&nlm_reg_lock); 397 error = ENOLCK; 398 goto done; 399 } 400 mutex_exit(&nlm_reg_lock); 401 } 402 } 403 404 /* Now get the lock graph for a particular vnode */ 405 gp = flk_get_lock_graph(vp, FLK_INIT_GRAPH); 406 407 /* 408 * We drop rwlock here otherwise this might end up causing a 409 * deadlock if this IOLOCK sleeps. (bugid # 1183392). 410 */ 411 412 if (IS_IO_LOCK(lock_request)) { 413 VOP_RWUNLOCK(vp, 414 (lock_request->l_type == F_RDLCK) ? 415 V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL); 416 } 417 mutex_enter(&gp->gp_mutex); 418 419 lock_request->l_state |= REFERENCED_LOCK; 420 lock_request->l_graph = gp; 421 422 switch (lock_request->l_type) { 423 case F_RDLCK: 424 case F_WRLCK: 425 if (IS_QUERY_LOCK(lock_request)) { 426 flk_get_first_blocking_lock(lock_request); 427 (*lckdat) = lock_request->l_flock; 428 break; 429 } 430 431 /* process the request now */ 432 433 error = flk_process_request(lock_request); 434 break; 435 436 case F_UNLCK: 437 /* unlock request will not block so execute it immediately */ 438 439 if (IS_LOCKMGR(lock_request) && 440 flk_canceled(lock_request)) { 441 error = 0; 442 } else { 443 error = flk_execute_request(lock_request); 444 } 445 break; 446 447 case F_UNLKSYS: 448 /* 449 * Recovery mechanism to release lock manager locks when 450 * NFS client crashes and restart. NFS server will clear 451 * old locks and grant new locks. 452 */ 453 454 if (lock_request->l_flock.l_sysid == 0) { 455 mutex_exit(&gp->gp_mutex); 456 return (EINVAL); 457 } 458 if (secpolicy_nfs(CRED()) != 0) { 459 mutex_exit(&gp->gp_mutex); 460 return (EPERM); 461 } 462 flk_delete_locks_by_sysid(lock_request); 463 lock_request->l_state &= ~REFERENCED_LOCK; 464 flk_set_state(lock_request, FLK_DEAD_STATE); 465 flk_free_lock(lock_request); 466 mutex_exit(&gp->gp_mutex); 467 return (0); 468 469 default: 470 error = EINVAL; 471 break; 472 } 473 474 /* Clustering: For blocked PXFS locks, return */ 475 if (error == PXFS_LOCK_BLOCKED) { 476 lock_request->l_state &= ~REFERENCED_LOCK; 477 mutex_exit(&gp->gp_mutex); 478 return (error); 479 } 480 481 /* 482 * Now that we have seen the status of locks in the system for 483 * this vnode we acquire the rwlock if it is an IO_LOCK. 484 */ 485 486 if (IS_IO_LOCK(lock_request)) { 487 (void) VOP_RWLOCK(vp, 488 (lock_request->l_type == F_RDLCK) ? 489 V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL); 490 if (!error) { 491 lckdat->l_type = F_UNLCK; 492 493 /* 494 * This wake up is needed otherwise 495 * if IO_LOCK has slept the dependents on this 496 * will not be woken up at all. (bugid # 1185482). 497 */ 498 499 flk_wakeup(lock_request, 1); 500 flk_set_state(lock_request, FLK_DEAD_STATE); 501 flk_free_lock(lock_request); 502 } 503 /* 504 * else if error had occurred either flk_process_request() 505 * has returned EDEADLK in which case there will be no 506 * dependents for this lock or EINTR from flk_wait_execute_ 507 * request() in which case flk_cancel_sleeping_lock() 508 * would have been done. same is true with EBADF. 509 */ 510 } 511 512 if (lock_request == &stack_lock_request) { 513 flk_set_state(lock_request, FLK_DEAD_STATE); 514 } else { 515 lock_request->l_state &= ~REFERENCED_LOCK; 516 if ((error != 0) || IS_DELETED(lock_request)) { 517 flk_set_state(lock_request, FLK_DEAD_STATE); 518 flk_free_lock(lock_request); 519 } 520 } 521 522 mutex_exit(&gp->gp_mutex); 523 return (error); 524 525 done: 526 flk_set_state(lock_request, FLK_DEAD_STATE); 527 if (lock_request != &stack_lock_request) 528 flk_free_lock(lock_request); 529 return (error); 530 } 531 532 /* 533 * Invoke the callbacks in the given list. If before sleeping, invoke in 534 * list order. If after sleeping, invoke in reverse order. 535 * 536 * CPR (suspend/resume) support: if one of the callbacks returns a 537 * callb_cpr_t, return it. This will be used to make the thread CPR-safe 538 * while it is sleeping. There should be at most one callb_cpr_t for the 539 * thread. 540 * XXX This is unnecessarily complicated. The CPR information should just 541 * get passed in directly through VOP_FRLOCK and reclock, rather than 542 * sneaking it in via a callback. 543 */ 544 545 callb_cpr_t * 546 flk_invoke_callbacks(flk_callback_t *cblist, flk_cb_when_t when) 547 { 548 callb_cpr_t *cpr_callbackp = NULL; 549 callb_cpr_t *one_result; 550 flk_callback_t *cb; 551 552 if (cblist == NULL) 553 return (NULL); 554 555 if (when == FLK_BEFORE_SLEEP) { 556 cb = cblist; 557 do { 558 one_result = (*cb->cb_callback)(when, cb->cb_data); 559 if (one_result != NULL) { 560 ASSERT(cpr_callbackp == NULL); 561 cpr_callbackp = one_result; 562 } 563 cb = cb->cb_next; 564 } while (cb != cblist); 565 } else { 566 cb = cblist->cb_prev; 567 do { 568 one_result = (*cb->cb_callback)(when, cb->cb_data); 569 if (one_result != NULL) { 570 cpr_callbackp = one_result; 571 } 572 cb = cb->cb_prev; 573 } while (cb != cblist->cb_prev); 574 } 575 576 return (cpr_callbackp); 577 } 578 579 /* 580 * Initialize a flk_callback_t to hold the given callback. 581 */ 582 583 void 584 flk_init_callback(flk_callback_t *flk_cb, 585 callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), void *cbdata) 586 { 587 flk_cb->cb_next = flk_cb; 588 flk_cb->cb_prev = flk_cb; 589 flk_cb->cb_callback = cb_fcn; 590 flk_cb->cb_data = cbdata; 591 } 592 593 /* 594 * Initialize an flk_callback_t and then link it into the head of an 595 * existing list (which may be NULL). 596 */ 597 598 void 599 flk_add_callback(flk_callback_t *newcb, 600 callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), 601 void *cbdata, flk_callback_t *cblist) 602 { 603 flk_init_callback(newcb, cb_fcn, cbdata); 604 605 if (cblist == NULL) 606 return; 607 608 newcb->cb_prev = cblist->cb_prev; 609 newcb->cb_next = cblist; 610 cblist->cb_prev->cb_next = newcb; 611 cblist->cb_prev = newcb; 612 } 613 614 /* 615 * Initialize the flk_edge_cache data structure and create the 616 * nlm_reg_status array. 617 */ 618 619 void 620 flk_init(void) 621 { 622 uint_t i; 623 624 flk_edge_cache = kmem_cache_create("flk_edges", 625 sizeof (struct edge), 0, NULL, NULL, NULL, NULL, NULL, 0); 626 if (flk_edge_cache == NULL) { 627 cmn_err(CE_PANIC, "Couldn't create flk_edge_cache\n"); 628 } 629 /* 630 * Create the NLM registry object. 631 */ 632 633 if (cluster_bootflags & CLUSTER_BOOTED) { 634 /* 635 * This routine tells you the maximum node id that will be used 636 * in the cluster. This number will be the size of the nlm 637 * registry status array. We add 1 because we will be using 638 * all entries indexed from 0 to maxnodeid; e.g., from 0 639 * to 64, for a total of 65 entries. 640 */ 641 nlm_status_size = clconf_maximum_nodeid() + 1; 642 } else { 643 nlm_status_size = 0; 644 } 645 646 if (nlm_status_size != 0) { /* booted as a cluster */ 647 nlm_reg_status = (flk_nlm_status_t *) 648 kmem_alloc(sizeof (flk_nlm_status_t) * nlm_status_size, 649 KM_SLEEP); 650 651 /* initialize all NLM states in array to NLM_UNKNOWN */ 652 for (i = 0; i < nlm_status_size; i++) { 653 nlm_reg_status[i] = FLK_NLM_UNKNOWN; 654 } 655 } 656 } 657 658 /* 659 * Zone constructor/destructor callbacks to be executed when a zone is 660 * created/destroyed. 661 */ 662 /* ARGSUSED */ 663 void * 664 flk_zone_init(zoneid_t zoneid) 665 { 666 struct flock_globals *fg; 667 uint_t i; 668 669 fg = kmem_alloc(sizeof (*fg), KM_SLEEP); 670 fg->flk_lockmgr_status = FLK_LOCKMGR_UP; 671 for (i = 0; i < HASH_SIZE; i++) 672 fg->lockmgr_status[i] = FLK_LOCKMGR_UP; 673 return (fg); 674 } 675 676 /* ARGSUSED */ 677 void 678 flk_zone_fini(zoneid_t zoneid, void *data) 679 { 680 struct flock_globals *fg = data; 681 682 kmem_free(fg, sizeof (*fg)); 683 } 684 685 /* 686 * Get a lock_descriptor structure with initialization of edge lists. 687 */ 688 689 static lock_descriptor_t * 690 flk_get_lock(void) 691 { 692 lock_descriptor_t *l; 693 694 l = kmem_zalloc(sizeof (lock_descriptor_t), KM_SLEEP); 695 696 cv_init(&l->l_cv, NULL, CV_DRIVER, NULL); 697 l->l_edge.edge_in_next = &l->l_edge; 698 l->l_edge.edge_in_prev = &l->l_edge; 699 l->l_edge.edge_adj_next = &l->l_edge; 700 l->l_edge.edge_adj_prev = &l->l_edge; 701 l->pvertex = -1; 702 l->l_status = FLK_INITIAL_STATE; 703 flk_lock_allocs++; 704 return (l); 705 } 706 707 /* 708 * Free a lock_descriptor structure. Just sets the DELETED_LOCK flag 709 * when some thread has a reference to it as in reclock(). 710 */ 711 712 void 713 flk_free_lock(lock_descriptor_t *lock) 714 { 715 ASSERT(IS_DEAD(lock)); 716 if (IS_REFERENCED(lock)) { 717 lock->l_state |= DELETED_LOCK; 718 return; 719 } 720 flk_lock_frees++; 721 kmem_free((void *)lock, sizeof (lock_descriptor_t)); 722 } 723 724 void 725 flk_set_state(lock_descriptor_t *lock, int new_state) 726 { 727 /* 728 * Locks in the sleeping list may be woken up in a number of ways, 729 * and more than once. If a sleeping lock is signaled awake more 730 * than once, then it may or may not change state depending on its 731 * current state. 732 * Also note that NLM locks that are sleeping could be moved to an 733 * interrupted state more than once if the unlock request is 734 * retransmitted by the NLM client - the second time around, this is 735 * just a nop. 736 * The ordering of being signaled awake is: 737 * INTERRUPTED_STATE > CANCELLED_STATE > GRANTED_STATE. 738 * The checks below implement this ordering. 739 */ 740 if (IS_INTERRUPTED(lock)) { 741 if ((new_state == FLK_CANCELLED_STATE) || 742 (new_state == FLK_GRANTED_STATE) || 743 (new_state == FLK_INTERRUPTED_STATE)) { 744 return; 745 } 746 } 747 if (IS_CANCELLED(lock)) { 748 if ((new_state == FLK_GRANTED_STATE) || 749 (new_state == FLK_CANCELLED_STATE)) { 750 return; 751 } 752 } 753 CHECK_LOCK_TRANSITION(lock->l_status, new_state); 754 if (IS_PXFS(lock)) { 755 cl_flk_state_transition_notify(lock, lock->l_status, new_state); 756 } 757 lock->l_status = new_state; 758 } 759 760 /* 761 * Routine that checks whether there are any blocking locks in the system. 762 * 763 * The policy followed is if a write lock is sleeping we don't allow read 764 * locks before this write lock even though there may not be any active 765 * locks corresponding to the read locks' region. 766 * 767 * flk_add_edge() function adds an edge between l1 and l2 iff there 768 * is no path between l1 and l2. This is done to have a "minimum 769 * storage representation" of the dependency graph. 770 * 771 * Another property of the graph is since only the new request throws 772 * edges to the existing locks in the graph, the graph is always topologically 773 * ordered. 774 */ 775 776 static int 777 flk_process_request(lock_descriptor_t *request) 778 { 779 graph_t *gp = request->l_graph; 780 lock_descriptor_t *lock; 781 int request_blocked_by_active = 0; 782 int request_blocked_by_granted = 0; 783 int request_blocked_by_sleeping = 0; 784 vnode_t *vp = request->l_vnode; 785 int error = 0; 786 int request_will_wait = 0; 787 int found_covering_lock = 0; 788 lock_descriptor_t *covered_by = NULL; 789 790 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 791 request_will_wait = IS_WILLING_TO_SLEEP(request); 792 793 /* 794 * check active locks 795 */ 796 797 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 798 799 800 if (lock) { 801 do { 802 if (BLOCKS(lock, request)) { 803 if (!request_will_wait) 804 return (EAGAIN); 805 request_blocked_by_active = 1; 806 break; 807 } 808 /* 809 * Grant lock if it is for the same owner holding active 810 * lock that covers the request. 811 */ 812 813 if (SAME_OWNER(lock, request) && 814 COVERS(lock, request) && 815 (request->l_type == F_RDLCK)) 816 return (flk_execute_request(request)); 817 lock = lock->l_next; 818 } while (lock->l_vnode == vp); 819 } 820 821 if (!request_blocked_by_active) { 822 lock_descriptor_t *lk[1]; 823 lock_descriptor_t *first_glock = NULL; 824 /* 825 * Shall we grant this?! NO!! 826 * What about those locks that were just granted and still 827 * in sleep queue. Those threads are woken up and so locks 828 * are almost active. 829 */ 830 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 831 if (lock) { 832 do { 833 if (BLOCKS(lock, request)) { 834 if (IS_GRANTED(lock)) { 835 request_blocked_by_granted = 1; 836 } else { 837 request_blocked_by_sleeping = 1; 838 } 839 } 840 841 lock = lock->l_next; 842 } while ((lock->l_vnode == vp)); 843 first_glock = lock->l_prev; 844 ASSERT(first_glock->l_vnode == vp); 845 } 846 847 if (request_blocked_by_granted) 848 goto block; 849 850 if (!request_blocked_by_sleeping) { 851 /* 852 * If the request isn't going to be blocked by a 853 * sleeping request, we know that it isn't going to 854 * be blocked; we can just execute the request -- 855 * without performing costly deadlock detection. 856 */ 857 ASSERT(!request_blocked_by_active); 858 return (flk_execute_request(request)); 859 } else if (request->l_type == F_RDLCK) { 860 /* 861 * If we have a sleeping writer in the requested 862 * lock's range, block. 863 */ 864 goto block; 865 } 866 867 lk[0] = request; 868 request->l_state |= RECOMPUTE_LOCK; 869 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 870 if (lock) { 871 do { 872 flk_recompute_dependencies(lock, lk, 1, 0); 873 lock = lock->l_next; 874 } while (lock->l_vnode == vp); 875 } 876 lock = first_glock; 877 if (lock) { 878 do { 879 if (IS_GRANTED(lock)) { 880 flk_recompute_dependencies(lock, lk, 1, 0); 881 } 882 lock = lock->l_prev; 883 } while ((lock->l_vnode == vp)); 884 } 885 request->l_state &= ~RECOMPUTE_LOCK; 886 if (!NO_DEPENDENTS(request) && flk_check_deadlock(request)) 887 return (EDEADLK); 888 return (flk_execute_request(request)); 889 } 890 891 block: 892 if (request_will_wait) 893 flk_graph_uncolor(gp); 894 895 /* check sleeping locks */ 896 897 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 898 899 /* 900 * If we find a sleeping write lock that is a superset of the 901 * region wanted by request we can be assured that by adding an 902 * edge to this write lock we have paths to all locks in the 903 * graph that blocks the request except in one case and that is why 904 * another check for SAME_OWNER in the loop below. The exception 905 * case is when this process that owns the sleeping write lock 'l1' 906 * has other locks l2, l3, l4 that are in the system and arrived 907 * before l1. l1 does not have path to these locks as they are from 908 * same process. We break when we find a second covering sleeping 909 * lock l5 owned by a process different from that owning l1, because 910 * there cannot be any of l2, l3, l4, etc., arrived before l5, and if 911 * it has l1 would have produced a deadlock already. 912 */ 913 914 if (lock) { 915 do { 916 if (BLOCKS(lock, request)) { 917 if (!request_will_wait) 918 return (EAGAIN); 919 if (COVERS(lock, request) && 920 lock->l_type == F_WRLCK) { 921 if (found_covering_lock && 922 !SAME_OWNER(lock, covered_by)) { 923 found_covering_lock++; 924 break; 925 } 926 found_covering_lock = 1; 927 covered_by = lock; 928 } 929 if (found_covering_lock && 930 !SAME_OWNER(lock, covered_by)) { 931 lock = lock->l_next; 932 continue; 933 } 934 if ((error = flk_add_edge(request, lock, 935 !found_covering_lock, 0))) 936 return (error); 937 } 938 lock = lock->l_next; 939 } while (lock->l_vnode == vp); 940 } 941 942 /* 943 * found_covering_lock == 2 iff at this point 'request' has paths 944 * to all locks that blocks 'request'. found_covering_lock == 1 iff at this 945 * point 'request' has paths to all locks that blocks 'request' whose owners 946 * are not same as the one that covers 'request' (covered_by above) and 947 * we can have locks whose owner is same as covered_by in the active list. 948 */ 949 950 if (request_blocked_by_active && found_covering_lock != 2) { 951 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 952 ASSERT(lock != NULL); 953 do { 954 if (BLOCKS(lock, request)) { 955 if (found_covering_lock && 956 !SAME_OWNER(lock, covered_by)) { 957 lock = lock->l_next; 958 continue; 959 } 960 if ((error = flk_add_edge(request, lock, 961 CHECK_CYCLE, 0))) 962 return (error); 963 } 964 lock = lock->l_next; 965 } while (lock->l_vnode == vp); 966 } 967 968 if (NOT_BLOCKED(request)) { 969 /* 970 * request not dependent on any other locks 971 * so execute this request 972 */ 973 return (flk_execute_request(request)); 974 } else { 975 /* 976 * check for deadlock 977 */ 978 if (flk_check_deadlock(request)) 979 return (EDEADLK); 980 /* 981 * this thread has to sleep 982 */ 983 return (flk_wait_execute_request(request)); 984 } 985 } 986 987 /* 988 * The actual execution of the request in the simple case is only to 989 * insert the 'request' in the list of active locks if it is not an 990 * UNLOCK. 991 * We have to consider the existing active locks' relation to 992 * this 'request' if they are owned by same process. flk_relation() does 993 * this job and sees to that the dependency graph information is maintained 994 * properly. 995 */ 996 997 int 998 flk_execute_request(lock_descriptor_t *request) 999 { 1000 graph_t *gp = request->l_graph; 1001 vnode_t *vp = request->l_vnode; 1002 lock_descriptor_t *lock, *lock1; 1003 int done_searching = 0; 1004 1005 CHECK_SLEEPING_LOCKS(gp); 1006 CHECK_ACTIVE_LOCKS(gp); 1007 1008 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1009 1010 flk_set_state(request, FLK_START_STATE); 1011 1012 ASSERT(NOT_BLOCKED(request)); 1013 1014 /* IO_LOCK requests are only to check status */ 1015 1016 if (IS_IO_LOCK(request)) 1017 return (0); 1018 1019 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 1020 1021 if (lock == NULL && request->l_type == F_UNLCK) 1022 return (0); 1023 if (lock == NULL) { 1024 flk_insert_active_lock(request); 1025 return (0); 1026 } 1027 1028 do { 1029 lock1 = lock->l_next; 1030 if (SAME_OWNER(request, lock)) { 1031 done_searching = flk_relation(lock, request); 1032 } 1033 lock = lock1; 1034 } while (lock->l_vnode == vp && !done_searching); 1035 1036 /* 1037 * insert in active queue 1038 */ 1039 1040 if (request->l_type != F_UNLCK) 1041 flk_insert_active_lock(request); 1042 1043 return (0); 1044 } 1045 1046 /* 1047 * 'request' is blocked by some one therefore we put it into sleep queue. 1048 */ 1049 static int 1050 flk_wait_execute_request(lock_descriptor_t *request) 1051 { 1052 graph_t *gp = request->l_graph; 1053 callb_cpr_t *cprp; /* CPR info from callback */ 1054 struct flock_globals *fg; 1055 int index; 1056 1057 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1058 ASSERT(IS_WILLING_TO_SLEEP(request)); 1059 1060 flk_insert_sleeping_lock(request); 1061 1062 if (IS_LOCKMGR(request)) { 1063 index = HASH_INDEX(request->l_vnode); 1064 fg = flk_get_globals(); 1065 1066 if (nlm_status_size == 0) { /* not booted as a cluster */ 1067 if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP) { 1068 flk_cancel_sleeping_lock(request, 1); 1069 return (ENOLCK); 1070 } 1071 } else { /* booted as a cluster */ 1072 /* 1073 * If the request is an NLM server lock request, 1074 * and the NLM state of the lock request is not 1075 * NLM_UP (because the NLM server is shutting 1076 * down), then cancel the sleeping lock and 1077 * return error ENOLCK that will encourage the 1078 * client to retransmit. 1079 */ 1080 if (!IS_NLM_UP(request)) { 1081 flk_cancel_sleeping_lock(request, 1); 1082 return (ENOLCK); 1083 } 1084 } 1085 } 1086 1087 /* Clustering: For blocking PXFS locks, return */ 1088 if (IS_PXFS(request)) { 1089 /* 1090 * PXFS locks sleep on the client side. 1091 * The callback argument is used to wake up the sleeper 1092 * when the lock is granted. 1093 * We return -1 (rather than an errno value) to indicate 1094 * the client side should sleep 1095 */ 1096 return (PXFS_LOCK_BLOCKED); 1097 } 1098 1099 if (request->l_callbacks != NULL) { 1100 /* 1101 * To make sure the shutdown code works correctly, either 1102 * the callback must happen after putting the lock on the 1103 * sleep list, or we must check the shutdown status after 1104 * returning from the callback (and before sleeping). At 1105 * least for now, we'll use the first option. If a 1106 * shutdown or signal or whatever happened while the graph 1107 * mutex was dropped, that will be detected by 1108 * wait_for_lock(). 1109 */ 1110 mutex_exit(&gp->gp_mutex); 1111 1112 cprp = flk_invoke_callbacks(request->l_callbacks, 1113 FLK_BEFORE_SLEEP); 1114 1115 mutex_enter(&gp->gp_mutex); 1116 1117 if (cprp == NULL) { 1118 wait_for_lock(request); 1119 } else { 1120 mutex_enter(cprp->cc_lockp); 1121 CALLB_CPR_SAFE_BEGIN(cprp); 1122 mutex_exit(cprp->cc_lockp); 1123 wait_for_lock(request); 1124 mutex_enter(cprp->cc_lockp); 1125 CALLB_CPR_SAFE_END(cprp, cprp->cc_lockp); 1126 mutex_exit(cprp->cc_lockp); 1127 } 1128 1129 mutex_exit(&gp->gp_mutex); 1130 (void) flk_invoke_callbacks(request->l_callbacks, 1131 FLK_AFTER_SLEEP); 1132 mutex_enter(&gp->gp_mutex); 1133 } else { 1134 wait_for_lock(request); 1135 } 1136 1137 if (IS_LOCKMGR(request)) { 1138 /* 1139 * If the lock manager is shutting down, return an 1140 * error that will encourage the client to retransmit. 1141 */ 1142 if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP && 1143 !IS_GRANTED(request)) { 1144 flk_cancel_sleeping_lock(request, 1); 1145 return (ENOLCK); 1146 } 1147 } 1148 1149 if (IS_INTERRUPTED(request)) { 1150 /* we got a signal, or act like we did */ 1151 flk_cancel_sleeping_lock(request, 1); 1152 return (EINTR); 1153 } 1154 1155 /* Cancelled if some other thread has closed the file */ 1156 1157 if (IS_CANCELLED(request)) { 1158 flk_cancel_sleeping_lock(request, 1); 1159 return (EBADF); 1160 } 1161 1162 request->l_state &= ~GRANTED_LOCK; 1163 REMOVE_SLEEP_QUEUE(request); 1164 return (flk_execute_request(request)); 1165 } 1166 1167 /* 1168 * This routine adds an edge between from and to because from depends 1169 * to. If asked to check for deadlock it checks whether there are any 1170 * reachable locks from "from_lock" that is owned by the same process 1171 * as "from_lock". 1172 * NOTE: It is the caller's responsibility to make sure that the color 1173 * of the graph is consistent between the calls to flk_add_edge as done 1174 * in flk_process_request. This routine does not color and check for 1175 * deadlock explicitly. 1176 */ 1177 1178 static int 1179 flk_add_edge(lock_descriptor_t *from_lock, lock_descriptor_t *to_lock, 1180 int check_cycle, int update_graph) 1181 { 1182 edge_t *edge; 1183 edge_t *ep; 1184 lock_descriptor_t *vertex; 1185 lock_descriptor_t *vertex_stack; 1186 1187 STACK_INIT(vertex_stack); 1188 1189 /* 1190 * if to vertex already has mark_color just return 1191 * don't add an edge as it is reachable from from vertex 1192 * before itself. 1193 */ 1194 1195 if (COLORED(to_lock)) 1196 return (0); 1197 1198 edge = flk_get_edge(); 1199 1200 /* 1201 * set the from and to vertex 1202 */ 1203 1204 edge->from_vertex = from_lock; 1205 edge->to_vertex = to_lock; 1206 1207 /* 1208 * put in adjacency list of from vertex 1209 */ 1210 1211 from_lock->l_edge.edge_adj_next->edge_adj_prev = edge; 1212 edge->edge_adj_next = from_lock->l_edge.edge_adj_next; 1213 edge->edge_adj_prev = &from_lock->l_edge; 1214 from_lock->l_edge.edge_adj_next = edge; 1215 1216 /* 1217 * put in in list of to vertex 1218 */ 1219 1220 to_lock->l_edge.edge_in_next->edge_in_prev = edge; 1221 edge->edge_in_next = to_lock->l_edge.edge_in_next; 1222 to_lock->l_edge.edge_in_next = edge; 1223 edge->edge_in_prev = &to_lock->l_edge; 1224 1225 1226 if (update_graph) { 1227 flk_update_proc_graph(edge, 0); 1228 return (0); 1229 } 1230 if (!check_cycle) { 1231 return (0); 1232 } 1233 1234 STACK_PUSH(vertex_stack, from_lock, l_stack); 1235 1236 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 1237 1238 STACK_POP(vertex_stack, l_stack); 1239 1240 for (ep = FIRST_ADJ(vertex); 1241 ep != HEAD(vertex); 1242 ep = NEXT_ADJ(ep)) { 1243 if (COLORED(ep->to_vertex)) 1244 continue; 1245 COLOR(ep->to_vertex); 1246 if (SAME_OWNER(ep->to_vertex, from_lock)) 1247 goto dead_lock; 1248 STACK_PUSH(vertex_stack, ep->to_vertex, l_stack); 1249 } 1250 } 1251 return (0); 1252 1253 dead_lock: 1254 1255 /* 1256 * remove all edges 1257 */ 1258 1259 ep = FIRST_ADJ(from_lock); 1260 1261 while (ep != HEAD(from_lock)) { 1262 IN_LIST_REMOVE(ep); 1263 from_lock->l_sedge = NEXT_ADJ(ep); 1264 ADJ_LIST_REMOVE(ep); 1265 flk_free_edge(ep); 1266 ep = from_lock->l_sedge; 1267 } 1268 return (EDEADLK); 1269 } 1270 1271 /* 1272 * Get an edge structure for representing the dependency between two locks. 1273 */ 1274 1275 static edge_t * 1276 flk_get_edge() 1277 { 1278 edge_t *ep; 1279 1280 ASSERT(flk_edge_cache != NULL); 1281 1282 ep = kmem_cache_alloc(flk_edge_cache, KM_SLEEP); 1283 edge_allocs++; 1284 return (ep); 1285 } 1286 1287 /* 1288 * Free the edge structure. 1289 */ 1290 1291 static void 1292 flk_free_edge(edge_t *ep) 1293 { 1294 edge_frees++; 1295 kmem_cache_free(flk_edge_cache, (void *)ep); 1296 } 1297 1298 /* 1299 * Check the relationship of request with lock and perform the 1300 * recomputation of dependencies, break lock if required, and return 1301 * 1 if request cannot have any more relationship with the next 1302 * active locks. 1303 * The 'lock' and 'request' are compared and in case of overlap we 1304 * delete the 'lock' and form new locks to represent the non-overlapped 1305 * portion of original 'lock'. This function has side effects such as 1306 * 'lock' will be freed, new locks will be added to the active list. 1307 */ 1308 1309 static int 1310 flk_relation(lock_descriptor_t *lock, lock_descriptor_t *request) 1311 { 1312 int lock_effect; 1313 lock_descriptor_t *lock1, *lock2; 1314 lock_descriptor_t *topology[3]; 1315 int nvertex = 0; 1316 int i; 1317 edge_t *ep; 1318 graph_t *gp = (lock->l_graph); 1319 1320 1321 CHECK_SLEEPING_LOCKS(gp); 1322 CHECK_ACTIVE_LOCKS(gp); 1323 1324 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1325 1326 topology[0] = topology[1] = topology[2] = NULL; 1327 1328 if (request->l_type == F_UNLCK) 1329 lock_effect = FLK_UNLOCK; 1330 else if (request->l_type == F_RDLCK && 1331 lock->l_type == F_WRLCK) 1332 lock_effect = FLK_DOWNGRADE; 1333 else if (request->l_type == F_WRLCK && 1334 lock->l_type == F_RDLCK) 1335 lock_effect = FLK_UPGRADE; 1336 else 1337 lock_effect = FLK_STAY_SAME; 1338 1339 if (lock->l_end < request->l_start) { 1340 if (lock->l_end == request->l_start - 1 && 1341 lock_effect == FLK_STAY_SAME) { 1342 topology[0] = request; 1343 request->l_start = lock->l_start; 1344 nvertex = 1; 1345 goto recompute; 1346 } else { 1347 return (0); 1348 } 1349 } 1350 1351 if (lock->l_start > request->l_end) { 1352 if (request->l_end == lock->l_start - 1 && 1353 lock_effect == FLK_STAY_SAME) { 1354 topology[0] = request; 1355 request->l_end = lock->l_end; 1356 nvertex = 1; 1357 goto recompute; 1358 } else { 1359 return (1); 1360 } 1361 } 1362 1363 if (request->l_end < lock->l_end) { 1364 if (request->l_start > lock->l_start) { 1365 if (lock_effect == FLK_STAY_SAME) { 1366 request->l_start = lock->l_start; 1367 request->l_end = lock->l_end; 1368 topology[0] = request; 1369 nvertex = 1; 1370 } else { 1371 lock1 = flk_get_lock(); 1372 lock2 = flk_get_lock(); 1373 COPY(lock1, lock); 1374 COPY(lock2, lock); 1375 lock1->l_start = lock->l_start; 1376 lock1->l_end = request->l_start - 1; 1377 lock2->l_start = request->l_end + 1; 1378 lock2->l_end = lock->l_end; 1379 topology[0] = lock1; 1380 topology[1] = lock2; 1381 topology[2] = request; 1382 nvertex = 3; 1383 } 1384 } else if (request->l_start < lock->l_start) { 1385 if (lock_effect == FLK_STAY_SAME) { 1386 request->l_end = lock->l_end; 1387 topology[0] = request; 1388 nvertex = 1; 1389 } else { 1390 lock1 = flk_get_lock(); 1391 COPY(lock1, lock); 1392 lock1->l_start = request->l_end + 1; 1393 topology[0] = lock1; 1394 topology[1] = request; 1395 nvertex = 2; 1396 } 1397 } else { 1398 if (lock_effect == FLK_STAY_SAME) { 1399 request->l_start = lock->l_start; 1400 request->l_end = lock->l_end; 1401 topology[0] = request; 1402 nvertex = 1; 1403 } else { 1404 lock1 = flk_get_lock(); 1405 COPY(lock1, lock); 1406 lock1->l_start = request->l_end + 1; 1407 topology[0] = lock1; 1408 topology[1] = request; 1409 nvertex = 2; 1410 } 1411 } 1412 } else if (request->l_end > lock->l_end) { 1413 if (request->l_start > lock->l_start) { 1414 if (lock_effect == FLK_STAY_SAME) { 1415 request->l_start = lock->l_start; 1416 topology[0] = request; 1417 nvertex = 1; 1418 } else { 1419 lock1 = flk_get_lock(); 1420 COPY(lock1, lock); 1421 lock1->l_end = request->l_start - 1; 1422 topology[0] = lock1; 1423 topology[1] = request; 1424 nvertex = 2; 1425 } 1426 } else if (request->l_start < lock->l_start) { 1427 topology[0] = request; 1428 nvertex = 1; 1429 } else { 1430 topology[0] = request; 1431 nvertex = 1; 1432 } 1433 } else { 1434 if (request->l_start > lock->l_start) { 1435 if (lock_effect == FLK_STAY_SAME) { 1436 request->l_start = lock->l_start; 1437 topology[0] = request; 1438 nvertex = 1; 1439 } else { 1440 lock1 = flk_get_lock(); 1441 COPY(lock1, lock); 1442 lock1->l_end = request->l_start - 1; 1443 topology[0] = lock1; 1444 topology[1] = request; 1445 nvertex = 2; 1446 } 1447 } else if (request->l_start < lock->l_start) { 1448 topology[0] = request; 1449 nvertex = 1; 1450 } else { 1451 if (lock_effect != FLK_UNLOCK) { 1452 topology[0] = request; 1453 nvertex = 1; 1454 } else { 1455 flk_delete_active_lock(lock, 0); 1456 flk_wakeup(lock, 1); 1457 flk_free_lock(lock); 1458 CHECK_SLEEPING_LOCKS(gp); 1459 CHECK_ACTIVE_LOCKS(gp); 1460 return (1); 1461 } 1462 } 1463 } 1464 1465 recompute: 1466 1467 /* 1468 * For unlock we don't send the 'request' to for recomputing 1469 * dependencies because no lock will add an edge to this. 1470 */ 1471 1472 if (lock_effect == FLK_UNLOCK) { 1473 topology[nvertex-1] = NULL; 1474 nvertex--; 1475 } 1476 for (i = 0; i < nvertex; i++) { 1477 topology[i]->l_state |= RECOMPUTE_LOCK; 1478 topology[i]->l_color = NO_COLOR; 1479 } 1480 1481 ASSERT(FIRST_ADJ(lock) == HEAD(lock)); 1482 1483 /* 1484 * we remove the adjacent edges for all vertices' to this vertex 1485 * 'lock'. 1486 */ 1487 1488 ep = FIRST_IN(lock); 1489 while (ep != HEAD(lock)) { 1490 ADJ_LIST_REMOVE(ep); 1491 ep = NEXT_IN(ep); 1492 } 1493 1494 flk_delete_active_lock(lock, 0); 1495 1496 /* We are ready for recomputing the dependencies now */ 1497 1498 flk_recompute_dependencies(lock, topology, nvertex, 1); 1499 1500 for (i = 0; i < nvertex; i++) { 1501 topology[i]->l_state &= ~RECOMPUTE_LOCK; 1502 topology[i]->l_color = NO_COLOR; 1503 } 1504 1505 1506 if (lock_effect == FLK_UNLOCK) { 1507 nvertex++; 1508 } 1509 for (i = 0; i < nvertex - 1; i++) { 1510 flk_insert_active_lock(topology[i]); 1511 } 1512 1513 1514 if (lock_effect == FLK_DOWNGRADE || lock_effect == FLK_UNLOCK) { 1515 flk_wakeup(lock, 0); 1516 } else { 1517 ep = FIRST_IN(lock); 1518 while (ep != HEAD(lock)) { 1519 lock->l_sedge = NEXT_IN(ep); 1520 IN_LIST_REMOVE(ep); 1521 flk_update_proc_graph(ep, 1); 1522 flk_free_edge(ep); 1523 ep = lock->l_sedge; 1524 } 1525 } 1526 flk_free_lock(lock); 1527 1528 CHECK_SLEEPING_LOCKS(gp); 1529 CHECK_ACTIVE_LOCKS(gp); 1530 return (0); 1531 } 1532 1533 /* 1534 * Insert a lock into the active queue. 1535 */ 1536 1537 static void 1538 flk_insert_active_lock(lock_descriptor_t *new_lock) 1539 { 1540 graph_t *gp = new_lock->l_graph; 1541 vnode_t *vp = new_lock->l_vnode; 1542 lock_descriptor_t *first_lock, *lock; 1543 1544 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1545 1546 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 1547 first_lock = lock; 1548 1549 if (first_lock != NULL) { 1550 for (; (lock->l_vnode == vp && 1551 lock->l_start < new_lock->l_start); lock = lock->l_next) 1552 ; 1553 } else { 1554 lock = ACTIVE_HEAD(gp); 1555 } 1556 1557 lock->l_prev->l_next = new_lock; 1558 new_lock->l_next = lock; 1559 new_lock->l_prev = lock->l_prev; 1560 lock->l_prev = new_lock; 1561 1562 if (first_lock == NULL || (new_lock->l_start <= first_lock->l_start)) { 1563 vp->v_filocks = (struct filock *)new_lock; 1564 } 1565 flk_set_state(new_lock, FLK_ACTIVE_STATE); 1566 new_lock->l_state |= ACTIVE_LOCK; 1567 1568 CHECK_ACTIVE_LOCKS(gp); 1569 CHECK_SLEEPING_LOCKS(gp); 1570 } 1571 1572 /* 1573 * Delete the active lock : Performs two functions depending on the 1574 * value of second parameter. One is to remove from the active lists 1575 * only and other is to both remove and free the lock. 1576 */ 1577 1578 static void 1579 flk_delete_active_lock(lock_descriptor_t *lock, int free_lock) 1580 { 1581 vnode_t *vp = lock->l_vnode; 1582 graph_t *gp = lock->l_graph; 1583 1584 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1585 if (free_lock) 1586 ASSERT(NO_DEPENDENTS(lock)); 1587 ASSERT(NOT_BLOCKED(lock)); 1588 ASSERT(IS_ACTIVE(lock)); 1589 1590 ASSERT((vp->v_filocks != NULL)); 1591 1592 if (vp->v_filocks == (struct filock *)lock) { 1593 vp->v_filocks = (struct filock *) 1594 ((lock->l_next->l_vnode == vp) ? lock->l_next : 1595 NULL); 1596 } 1597 lock->l_next->l_prev = lock->l_prev; 1598 lock->l_prev->l_next = lock->l_next; 1599 lock->l_next = lock->l_prev = NULL; 1600 flk_set_state(lock, FLK_DEAD_STATE); 1601 lock->l_state &= ~ACTIVE_LOCK; 1602 1603 if (free_lock) 1604 flk_free_lock(lock); 1605 CHECK_ACTIVE_LOCKS(gp); 1606 CHECK_SLEEPING_LOCKS(gp); 1607 } 1608 1609 /* 1610 * Insert into the sleep queue. 1611 */ 1612 1613 static void 1614 flk_insert_sleeping_lock(lock_descriptor_t *request) 1615 { 1616 graph_t *gp = request->l_graph; 1617 vnode_t *vp = request->l_vnode; 1618 lock_descriptor_t *lock; 1619 1620 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1621 ASSERT(IS_INITIAL(request)); 1622 1623 for (lock = gp->sleeping_locks.l_next; (lock != &gp->sleeping_locks && 1624 lock->l_vnode < vp); lock = lock->l_next) 1625 ; 1626 1627 lock->l_prev->l_next = request; 1628 request->l_prev = lock->l_prev; 1629 lock->l_prev = request; 1630 request->l_next = lock; 1631 flk_set_state(request, FLK_SLEEPING_STATE); 1632 request->l_state |= SLEEPING_LOCK; 1633 } 1634 1635 /* 1636 * Cancelling a sleeping lock implies removing a vertex from the 1637 * dependency graph and therefore we should recompute the dependencies 1638 * of all vertices that have a path to this vertex, w.r.t. all 1639 * vertices reachable from this vertex. 1640 */ 1641 1642 void 1643 flk_cancel_sleeping_lock(lock_descriptor_t *request, int remove_from_queue) 1644 { 1645 graph_t *gp = request->l_graph; 1646 vnode_t *vp = request->l_vnode; 1647 lock_descriptor_t **topology = NULL; 1648 edge_t *ep; 1649 lock_descriptor_t *vertex, *lock; 1650 int nvertex = 0; 1651 int i; 1652 lock_descriptor_t *vertex_stack; 1653 1654 STACK_INIT(vertex_stack); 1655 1656 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1657 /* 1658 * count number of vertex pointers that has to be allocated 1659 * All vertices that are reachable from request. 1660 */ 1661 1662 STACK_PUSH(vertex_stack, request, l_stack); 1663 1664 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 1665 STACK_POP(vertex_stack, l_stack); 1666 for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex); 1667 ep = NEXT_ADJ(ep)) { 1668 if (IS_RECOMPUTE(ep->to_vertex)) 1669 continue; 1670 ep->to_vertex->l_state |= RECOMPUTE_LOCK; 1671 STACK_PUSH(vertex_stack, ep->to_vertex, l_stack); 1672 nvertex++; 1673 } 1674 } 1675 1676 /* 1677 * allocate memory for holding the vertex pointers 1678 */ 1679 1680 if (nvertex) { 1681 topology = kmem_zalloc(nvertex * sizeof (lock_descriptor_t *), 1682 KM_SLEEP); 1683 } 1684 1685 /* 1686 * one more pass to actually store the vertices in the 1687 * allocated array. 1688 * We first check sleeping locks and then active locks 1689 * so that topology array will be in a topological 1690 * order. 1691 */ 1692 1693 nvertex = 0; 1694 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 1695 1696 if (lock) { 1697 do { 1698 if (IS_RECOMPUTE(lock)) { 1699 lock->l_index = nvertex; 1700 topology[nvertex++] = lock; 1701 } 1702 lock->l_color = NO_COLOR; 1703 lock = lock->l_next; 1704 } while (lock->l_vnode == vp); 1705 } 1706 1707 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 1708 1709 if (lock) { 1710 do { 1711 if (IS_RECOMPUTE(lock)) { 1712 lock->l_index = nvertex; 1713 topology[nvertex++] = lock; 1714 } 1715 lock->l_color = NO_COLOR; 1716 lock = lock->l_next; 1717 } while (lock->l_vnode == vp); 1718 } 1719 1720 /* 1721 * remove in and out edges of request 1722 * They are freed after updating proc_graph below. 1723 */ 1724 1725 for (ep = FIRST_IN(request); ep != HEAD(request); ep = NEXT_IN(ep)) { 1726 ADJ_LIST_REMOVE(ep); 1727 } 1728 1729 1730 if (remove_from_queue) 1731 REMOVE_SLEEP_QUEUE(request); 1732 1733 /* we are ready to recompute */ 1734 1735 flk_recompute_dependencies(request, topology, nvertex, 1); 1736 1737 ep = FIRST_ADJ(request); 1738 while (ep != HEAD(request)) { 1739 IN_LIST_REMOVE(ep); 1740 request->l_sedge = NEXT_ADJ(ep); 1741 ADJ_LIST_REMOVE(ep); 1742 flk_update_proc_graph(ep, 1); 1743 flk_free_edge(ep); 1744 ep = request->l_sedge; 1745 } 1746 1747 1748 /* 1749 * unset the RECOMPUTE flag in those vertices 1750 */ 1751 1752 for (i = 0; i < nvertex; i++) { 1753 topology[i]->l_state &= ~RECOMPUTE_LOCK; 1754 } 1755 1756 /* 1757 * free the topology 1758 */ 1759 if (nvertex) 1760 kmem_free((void *)topology, 1761 (nvertex * sizeof (lock_descriptor_t *))); 1762 /* 1763 * Possibility of some locks unblocked now 1764 */ 1765 1766 flk_wakeup(request, 0); 1767 1768 /* 1769 * we expect to have a correctly recomputed graph now. 1770 */ 1771 flk_set_state(request, FLK_DEAD_STATE); 1772 flk_free_lock(request); 1773 CHECK_SLEEPING_LOCKS(gp); 1774 CHECK_ACTIVE_LOCKS(gp); 1775 1776 } 1777 1778 /* 1779 * Uncoloring the graph is simply to increment the mark value of the graph 1780 * And only when wrap round takes place will we color all vertices in 1781 * the graph explicitly. 1782 */ 1783 1784 static void 1785 flk_graph_uncolor(graph_t *gp) 1786 { 1787 lock_descriptor_t *lock; 1788 1789 if (gp->mark == UINT_MAX) { 1790 gp->mark = 1; 1791 for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp); 1792 lock = lock->l_next) 1793 lock->l_color = 0; 1794 1795 for (lock = SLEEPING_HEAD(gp)->l_next; lock != SLEEPING_HEAD(gp); 1796 lock = lock->l_next) 1797 lock->l_color = 0; 1798 } else { 1799 gp->mark++; 1800 } 1801 } 1802 1803 /* 1804 * Wake up locks that are blocked on the given lock. 1805 */ 1806 1807 static void 1808 flk_wakeup(lock_descriptor_t *lock, int adj_list_remove) 1809 { 1810 edge_t *ep; 1811 graph_t *gp = lock->l_graph; 1812 lock_descriptor_t *lck; 1813 1814 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1815 if (NO_DEPENDENTS(lock)) 1816 return; 1817 ep = FIRST_IN(lock); 1818 do { 1819 /* 1820 * delete the edge from the adjacency list 1821 * of from vertex. if no more adjacent edges 1822 * for this vertex wake this process. 1823 */ 1824 lck = ep->from_vertex; 1825 if (adj_list_remove) 1826 ADJ_LIST_REMOVE(ep); 1827 flk_update_proc_graph(ep, 1); 1828 if (NOT_BLOCKED(lck)) { 1829 GRANT_WAKEUP(lck); 1830 } 1831 lock->l_sedge = NEXT_IN(ep); 1832 IN_LIST_REMOVE(ep); 1833 flk_free_edge(ep); 1834 ep = lock->l_sedge; 1835 } while (ep != HEAD(lock)); 1836 ASSERT(NO_DEPENDENTS(lock)); 1837 } 1838 1839 /* 1840 * The dependents of request, is checked for its dependency against the 1841 * locks in topology (called topology because the array is and should be in 1842 * topological order for this algorithm, if not in topological order the 1843 * inner loop below might add more edges than necessary. Topological ordering 1844 * of vertices satisfies the property that all edges will be from left to 1845 * right i.e., topology[i] can have an edge to topology[j], iff i<j) 1846 * If lock l1 in the dependent set of request is dependent (blocked by) 1847 * on lock l2 in topology but does not have a path to it, we add an edge 1848 * in the inner loop below. 1849 * 1850 * We don't want to add an edge between l1 and l2 if there exists 1851 * already a path from l1 to l2, so care has to be taken for those vertices 1852 * that have two paths to 'request'. These vertices are referred to here 1853 * as barrier locks. 1854 * 1855 * The barriers has to be found (those vertex that originally had two paths 1856 * to request) because otherwise we may end up adding edges unnecessarily 1857 * to vertices in topology, and thus barrier vertices can have an edge 1858 * to a vertex in topology as well a path to it. 1859 */ 1860 1861 static void 1862 flk_recompute_dependencies(lock_descriptor_t *request, 1863 lock_descriptor_t **topology, 1864 int nvertex, int update_graph) 1865 { 1866 lock_descriptor_t *vertex, *lock; 1867 graph_t *gp = request->l_graph; 1868 int i, count; 1869 int barrier_found = 0; 1870 edge_t *ep; 1871 lock_descriptor_t *vertex_stack; 1872 1873 STACK_INIT(vertex_stack); 1874 1875 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1876 if (nvertex == 0) 1877 return; 1878 flk_graph_uncolor(request->l_graph); 1879 barrier_found = flk_find_barriers(request); 1880 request->l_state |= RECOMPUTE_DONE; 1881 1882 STACK_PUSH(vertex_stack, request, l_stack); 1883 request->l_sedge = FIRST_IN(request); 1884 1885 1886 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 1887 if (vertex->l_state & RECOMPUTE_DONE) { 1888 count = 0; 1889 goto next_in_edge; 1890 } 1891 if (IS_BARRIER(vertex)) { 1892 /* decrement the barrier count */ 1893 if (vertex->l_index) { 1894 vertex->l_index--; 1895 /* this guy will be pushed again anyway ? */ 1896 STACK_POP(vertex_stack, l_stack); 1897 if (vertex->l_index == 0) { 1898 /* 1899 * barrier is over we can recompute 1900 * dependencies for this lock in the 1901 * next stack pop 1902 */ 1903 vertex->l_state &= ~BARRIER_LOCK; 1904 } 1905 continue; 1906 } 1907 } 1908 vertex->l_state |= RECOMPUTE_DONE; 1909 flk_graph_uncolor(gp); 1910 count = flk_color_reachables(vertex); 1911 for (i = 0; i < nvertex; i++) { 1912 lock = topology[i]; 1913 if (COLORED(lock)) 1914 continue; 1915 if (BLOCKS(lock, vertex)) { 1916 (void) flk_add_edge(vertex, lock, 1917 NO_CHECK_CYCLE, update_graph); 1918 COLOR(lock); 1919 count++; 1920 count += flk_color_reachables(lock); 1921 } 1922 1923 } 1924 1925 next_in_edge: 1926 if (count == nvertex || 1927 vertex->l_sedge == HEAD(vertex)) { 1928 /* prune the tree below this */ 1929 STACK_POP(vertex_stack, l_stack); 1930 vertex->l_state &= ~RECOMPUTE_DONE; 1931 /* update the barrier locks below this! */ 1932 if (vertex->l_sedge != HEAD(vertex) && barrier_found) { 1933 flk_graph_uncolor(gp); 1934 flk_update_barriers(vertex); 1935 } 1936 continue; 1937 } 1938 1939 ep = vertex->l_sedge; 1940 lock = ep->from_vertex; 1941 STACK_PUSH(vertex_stack, lock, l_stack); 1942 lock->l_sedge = FIRST_IN(lock); 1943 vertex->l_sedge = NEXT_IN(ep); 1944 } 1945 1946 } 1947 1948 /* 1949 * Color all reachable vertices from vertex that belongs to topology (here 1950 * those that have RECOMPUTE_LOCK set in their state) and yet uncolored. 1951 * 1952 * Note: we need to use a different stack_link l_stack1 because this is 1953 * called from flk_recompute_dependencies() that already uses a stack with 1954 * l_stack as stack_link. 1955 */ 1956 1957 static int 1958 flk_color_reachables(lock_descriptor_t *vertex) 1959 { 1960 lock_descriptor_t *ver, *lock; 1961 int count; 1962 edge_t *ep; 1963 lock_descriptor_t *vertex_stack; 1964 1965 STACK_INIT(vertex_stack); 1966 1967 STACK_PUSH(vertex_stack, vertex, l_stack1); 1968 count = 0; 1969 while ((ver = STACK_TOP(vertex_stack)) != NULL) { 1970 1971 STACK_POP(vertex_stack, l_stack1); 1972 for (ep = FIRST_ADJ(ver); ep != HEAD(ver); 1973 ep = NEXT_ADJ(ep)) { 1974 lock = ep->to_vertex; 1975 if (COLORED(lock)) 1976 continue; 1977 COLOR(lock); 1978 if (IS_RECOMPUTE(lock)) 1979 count++; 1980 STACK_PUSH(vertex_stack, lock, l_stack1); 1981 } 1982 1983 } 1984 return (count); 1985 } 1986 1987 /* 1988 * Called from flk_recompute_dependencies() this routine decrements 1989 * the barrier count of barrier vertices that are reachable from lock. 1990 */ 1991 1992 static void 1993 flk_update_barriers(lock_descriptor_t *lock) 1994 { 1995 lock_descriptor_t *vertex, *lck; 1996 edge_t *ep; 1997 lock_descriptor_t *vertex_stack; 1998 1999 STACK_INIT(vertex_stack); 2000 2001 STACK_PUSH(vertex_stack, lock, l_stack1); 2002 2003 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 2004 STACK_POP(vertex_stack, l_stack1); 2005 for (ep = FIRST_IN(vertex); ep != HEAD(vertex); 2006 ep = NEXT_IN(ep)) { 2007 lck = ep->from_vertex; 2008 if (COLORED(lck)) { 2009 if (IS_BARRIER(lck)) { 2010 ASSERT(lck->l_index > 0); 2011 lck->l_index--; 2012 if (lck->l_index == 0) 2013 lck->l_state &= ~BARRIER_LOCK; 2014 } 2015 continue; 2016 } 2017 COLOR(lck); 2018 if (IS_BARRIER(lck)) { 2019 ASSERT(lck->l_index > 0); 2020 lck->l_index--; 2021 if (lck->l_index == 0) 2022 lck->l_state &= ~BARRIER_LOCK; 2023 } 2024 STACK_PUSH(vertex_stack, lck, l_stack1); 2025 } 2026 } 2027 } 2028 2029 /* 2030 * Finds all vertices that are reachable from 'lock' more than once and 2031 * mark them as barrier vertices and increment their barrier count. 2032 * The barrier count is one minus the total number of paths from lock 2033 * to that vertex. 2034 */ 2035 2036 static int 2037 flk_find_barriers(lock_descriptor_t *lock) 2038 { 2039 lock_descriptor_t *vertex, *lck; 2040 int found = 0; 2041 edge_t *ep; 2042 lock_descriptor_t *vertex_stack; 2043 2044 STACK_INIT(vertex_stack); 2045 2046 STACK_PUSH(vertex_stack, lock, l_stack1); 2047 2048 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 2049 STACK_POP(vertex_stack, l_stack1); 2050 for (ep = FIRST_IN(vertex); ep != HEAD(vertex); 2051 ep = NEXT_IN(ep)) { 2052 lck = ep->from_vertex; 2053 if (COLORED(lck)) { 2054 /* this is a barrier */ 2055 lck->l_state |= BARRIER_LOCK; 2056 /* index will have barrier count */ 2057 lck->l_index++; 2058 if (!found) 2059 found = 1; 2060 continue; 2061 } 2062 COLOR(lck); 2063 lck->l_index = 0; 2064 STACK_PUSH(vertex_stack, lck, l_stack1); 2065 } 2066 } 2067 return (found); 2068 } 2069 2070 /* 2071 * Finds the first lock that is mainly responsible for blocking this 2072 * request. If there is no such lock, request->l_flock.l_type is set to 2073 * F_UNLCK. Otherwise, request->l_flock is filled in with the particulars 2074 * of the blocking lock. 2075 * 2076 * Note: It is possible a request is blocked by a sleeping lock because 2077 * of the fairness policy used in flk_process_request() to construct the 2078 * dependencies. (see comments before flk_process_request()). 2079 */ 2080 2081 static void 2082 flk_get_first_blocking_lock(lock_descriptor_t *request) 2083 { 2084 graph_t *gp = request->l_graph; 2085 vnode_t *vp = request->l_vnode; 2086 lock_descriptor_t *lock, *blocker; 2087 2088 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 2089 blocker = NULL; 2090 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 2091 2092 if (lock) { 2093 do { 2094 if (BLOCKS(lock, request)) { 2095 blocker = lock; 2096 break; 2097 } 2098 lock = lock->l_next; 2099 } while (lock->l_vnode == vp); 2100 } 2101 2102 if (blocker == NULL && request->l_flock.l_type == F_RDLCK) { 2103 /* 2104 * No active lock is blocking this request, but if a read 2105 * lock is requested, it may also get blocked by a waiting 2106 * writer. So search all sleeping locks and see if there is 2107 * a writer waiting. 2108 */ 2109 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 2110 if (lock) { 2111 do { 2112 if (BLOCKS(lock, request)) { 2113 blocker = lock; 2114 break; 2115 } 2116 lock = lock->l_next; 2117 } while (lock->l_vnode == vp); 2118 } 2119 } 2120 2121 if (blocker) { 2122 report_blocker(blocker, request); 2123 } else 2124 request->l_flock.l_type = F_UNLCK; 2125 } 2126 2127 /* 2128 * Get the graph_t structure associated with a vnode. 2129 * If 'initialize' is non-zero, and the graph_t structure for this vnode has 2130 * not yet been initialized, then a new element is allocated and returned. 2131 */ 2132 graph_t * 2133 flk_get_lock_graph(vnode_t *vp, int initialize) 2134 { 2135 graph_t *gp; 2136 graph_t *gp_alloc = NULL; 2137 int index = HASH_INDEX(vp); 2138 2139 if (initialize == FLK_USE_GRAPH) { 2140 mutex_enter(&flock_lock); 2141 gp = lock_graph[index]; 2142 mutex_exit(&flock_lock); 2143 return (gp); 2144 } 2145 2146 ASSERT(initialize == FLK_INIT_GRAPH); 2147 2148 if (lock_graph[index] == NULL) { 2149 2150 gp_alloc = kmem_zalloc(sizeof (graph_t), KM_SLEEP); 2151 2152 /* Initialize the graph */ 2153 2154 gp_alloc->active_locks.l_next = 2155 gp_alloc->active_locks.l_prev = 2156 (lock_descriptor_t *)ACTIVE_HEAD(gp_alloc); 2157 gp_alloc->sleeping_locks.l_next = 2158 gp_alloc->sleeping_locks.l_prev = 2159 (lock_descriptor_t *)SLEEPING_HEAD(gp_alloc); 2160 gp_alloc->index = index; 2161 mutex_init(&gp_alloc->gp_mutex, NULL, MUTEX_DEFAULT, NULL); 2162 } 2163 2164 mutex_enter(&flock_lock); 2165 2166 gp = lock_graph[index]; 2167 2168 /* Recheck the value within flock_lock */ 2169 if (gp == NULL) { 2170 struct flock_globals *fg; 2171 2172 /* We must have previously allocated the graph_t structure */ 2173 ASSERT(gp_alloc != NULL); 2174 lock_graph[index] = gp = gp_alloc; 2175 /* 2176 * The lockmgr status is only needed if KLM is loaded. 2177 */ 2178 if (flock_zone_key != ZONE_KEY_UNINITIALIZED) { 2179 fg = flk_get_globals(); 2180 fg->lockmgr_status[index] = fg->flk_lockmgr_status; 2181 } 2182 } 2183 2184 mutex_exit(&flock_lock); 2185 2186 if ((gp_alloc != NULL) && (gp != gp_alloc)) { 2187 /* There was a race to allocate the graph_t and we lost */ 2188 mutex_destroy(&gp_alloc->gp_mutex); 2189 kmem_free(gp_alloc, sizeof (graph_t)); 2190 } 2191 2192 return (gp); 2193 } 2194 2195 /* 2196 * PSARC case 1997/292 2197 */ 2198 int 2199 cl_flk_has_remote_locks_for_nlmid(vnode_t *vp, int nlmid) 2200 { 2201 lock_descriptor_t *lock; 2202 int result = 0; 2203 graph_t *gp; 2204 int lock_nlmid; 2205 2206 /* 2207 * Check to see if node is booted as a cluster. If not, return. 2208 */ 2209 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { 2210 return (0); 2211 } 2212 2213 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH); 2214 if (gp == NULL) { 2215 return (0); 2216 } 2217 2218 mutex_enter(&gp->gp_mutex); 2219 2220 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 2221 2222 if (lock) { 2223 while (lock->l_vnode == vp) { 2224 /* get NLM id from sysid */ 2225 lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 2226 2227 /* 2228 * If NLM server request _and_ nlmid of lock matches 2229 * nlmid of argument, then we've found a remote lock. 2230 */ 2231 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { 2232 result = 1; 2233 goto done; 2234 } 2235 lock = lock->l_next; 2236 } 2237 } 2238 2239 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 2240 2241 if (lock) { 2242 while (lock->l_vnode == vp) { 2243 /* get NLM id from sysid */ 2244 lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 2245 2246 /* 2247 * If NLM server request _and_ nlmid of lock matches 2248 * nlmid of argument, then we've found a remote lock. 2249 */ 2250 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { 2251 result = 1; 2252 goto done; 2253 } 2254 lock = lock->l_next; 2255 } 2256 } 2257 2258 done: 2259 mutex_exit(&gp->gp_mutex); 2260 return (result); 2261 } 2262 2263 /* 2264 * Determine whether there are any locks for the given vnode with a remote 2265 * sysid. Returns zero if not, non-zero if there are. 2266 * 2267 * Note that the return value from this function is potentially invalid 2268 * once it has been returned. The caller is responsible for providing its 2269 * own synchronization mechanism to ensure that the return value is useful 2270 * (e.g., see nfs_lockcompletion()). 2271 */ 2272 int 2273 flk_has_remote_locks(vnode_t *vp) 2274 { 2275 lock_descriptor_t *lock; 2276 int result = 0; 2277 graph_t *gp; 2278 2279 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH); 2280 if (gp == NULL) { 2281 return (0); 2282 } 2283 2284 mutex_enter(&gp->gp_mutex); 2285 2286 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 2287 2288 if (lock) { 2289 while (lock->l_vnode == vp) { 2290 if (IS_REMOTE(lock)) { 2291 result = 1; 2292 goto done; 2293 } 2294 lock = lock->l_next; 2295 } 2296 } 2297 2298 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 2299 2300 if (lock) { 2301 while (lock->l_vnode == vp) { 2302 if (IS_REMOTE(lock)) { 2303 result = 1; 2304 goto done; 2305 } 2306 lock = lock->l_next; 2307 } 2308 } 2309 2310 done: 2311 mutex_exit(&gp->gp_mutex); 2312 return (result); 2313 } 2314 2315 /* 2316 * Determine whether there are any locks for the given vnode with a remote 2317 * sysid matching given sysid. 2318 * Used by the new (open source) NFS Lock Manager (NLM) 2319 */ 2320 int 2321 flk_has_remote_locks_for_sysid(vnode_t *vp, int sysid) 2322 { 2323 lock_descriptor_t *lock; 2324 int result = 0; 2325 graph_t *gp; 2326 2327 if (sysid == 0) 2328 return (0); 2329 2330 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH); 2331 if (gp == NULL) { 2332 return (0); 2333 } 2334 2335 mutex_enter(&gp->gp_mutex); 2336 2337 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 2338 2339 if (lock) { 2340 while (lock->l_vnode == vp) { 2341 if (lock->l_flock.l_sysid == sysid) { 2342 result = 1; 2343 goto done; 2344 } 2345 lock = lock->l_next; 2346 } 2347 } 2348 2349 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 2350 2351 if (lock) { 2352 while (lock->l_vnode == vp) { 2353 if (lock->l_flock.l_sysid == sysid) { 2354 result = 1; 2355 goto done; 2356 } 2357 lock = lock->l_next; 2358 } 2359 } 2360 2361 done: 2362 mutex_exit(&gp->gp_mutex); 2363 return (result); 2364 } 2365 2366 /* 2367 * Determine if there are any locks owned by the given sysid. 2368 * Returns zero if not, non-zero if there are. Note that this return code 2369 * could be derived from flk_get_{sleeping,active}_locks, but this routine 2370 * avoids all the memory allocations of those routines. 2371 * 2372 * This routine has the same synchronization issues as 2373 * flk_has_remote_locks. 2374 */ 2375 2376 int 2377 flk_sysid_has_locks(int sysid, int lck_type) 2378 { 2379 int has_locks = 0; 2380 lock_descriptor_t *lock; 2381 graph_t *gp; 2382 int i; 2383 2384 for (i = 0; i < HASH_SIZE && !has_locks; i++) { 2385 mutex_enter(&flock_lock); 2386 gp = lock_graph[i]; 2387 mutex_exit(&flock_lock); 2388 if (gp == NULL) { 2389 continue; 2390 } 2391 2392 mutex_enter(&gp->gp_mutex); 2393 2394 if (lck_type & FLK_QUERY_ACTIVE) { 2395 for (lock = ACTIVE_HEAD(gp)->l_next; 2396 lock != ACTIVE_HEAD(gp) && !has_locks; 2397 lock = lock->l_next) { 2398 if (lock->l_flock.l_sysid == sysid) 2399 has_locks = 1; 2400 } 2401 } 2402 2403 if (lck_type & FLK_QUERY_SLEEPING) { 2404 for (lock = SLEEPING_HEAD(gp)->l_next; 2405 lock != SLEEPING_HEAD(gp) && !has_locks; 2406 lock = lock->l_next) { 2407 if (lock->l_flock.l_sysid == sysid) 2408 has_locks = 1; 2409 } 2410 } 2411 mutex_exit(&gp->gp_mutex); 2412 } 2413 2414 return (has_locks); 2415 } 2416 2417 2418 /* 2419 * PSARC case 1997/292 2420 * 2421 * Requires: "sysid" is a pair [nlmid, sysid]. The lower half is 16-bit 2422 * quantity, the real sysid generated by the NLM server; the upper half 2423 * identifies the node of the cluster where the NLM server ran. 2424 * This routine is only called by an NLM server running in a cluster. 2425 * Effects: Remove all locks held on behalf of the client identified 2426 * by "sysid." 2427 */ 2428 void 2429 cl_flk_remove_locks_by_sysid(int sysid) 2430 { 2431 graph_t *gp; 2432 int i; 2433 lock_descriptor_t *lock, *nlock; 2434 2435 /* 2436 * Check to see if node is booted as a cluster. If not, return. 2437 */ 2438 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { 2439 return; 2440 } 2441 2442 ASSERT(sysid != 0); 2443 for (i = 0; i < HASH_SIZE; i++) { 2444 mutex_enter(&flock_lock); 2445 gp = lock_graph[i]; 2446 mutex_exit(&flock_lock); 2447 2448 if (gp == NULL) 2449 continue; 2450 2451 mutex_enter(&gp->gp_mutex); /* get mutex on lock graph */ 2452 2453 /* signal sleeping requests so that they bail out */ 2454 lock = SLEEPING_HEAD(gp)->l_next; 2455 while (lock != SLEEPING_HEAD(gp)) { 2456 nlock = lock->l_next; 2457 if (lock->l_flock.l_sysid == sysid) { 2458 INTERRUPT_WAKEUP(lock); 2459 } 2460 lock = nlock; 2461 } 2462 2463 /* delete active locks */ 2464 lock = ACTIVE_HEAD(gp)->l_next; 2465 while (lock != ACTIVE_HEAD(gp)) { 2466 nlock = lock->l_next; 2467 if (lock->l_flock.l_sysid == sysid) { 2468 flk_delete_active_lock(lock, 0); 2469 flk_wakeup(lock, 1); 2470 flk_free_lock(lock); 2471 } 2472 lock = nlock; 2473 } 2474 mutex_exit(&gp->gp_mutex); /* release mutex on lock graph */ 2475 } 2476 } 2477 2478 /* 2479 * Delete all locks in the system that belongs to the sysid of the request. 2480 */ 2481 2482 static void 2483 flk_delete_locks_by_sysid(lock_descriptor_t *request) 2484 { 2485 int sysid = request->l_flock.l_sysid; 2486 lock_descriptor_t *lock, *nlock; 2487 graph_t *gp; 2488 int i; 2489 2490 ASSERT(MUTEX_HELD(&request->l_graph->gp_mutex)); 2491 ASSERT(sysid != 0); 2492 2493 mutex_exit(&request->l_graph->gp_mutex); 2494 2495 for (i = 0; i < HASH_SIZE; i++) { 2496 mutex_enter(&flock_lock); 2497 gp = lock_graph[i]; 2498 mutex_exit(&flock_lock); 2499 2500 if (gp == NULL) 2501 continue; 2502 2503 mutex_enter(&gp->gp_mutex); 2504 2505 /* signal sleeping requests so that they bail out */ 2506 lock = SLEEPING_HEAD(gp)->l_next; 2507 while (lock != SLEEPING_HEAD(gp)) { 2508 nlock = lock->l_next; 2509 if (lock->l_flock.l_sysid == sysid) { 2510 INTERRUPT_WAKEUP(lock); 2511 } 2512 lock = nlock; 2513 } 2514 2515 /* delete active locks */ 2516 lock = ACTIVE_HEAD(gp)->l_next; 2517 while (lock != ACTIVE_HEAD(gp)) { 2518 nlock = lock->l_next; 2519 if (lock->l_flock.l_sysid == sysid) { 2520 flk_delete_active_lock(lock, 0); 2521 flk_wakeup(lock, 1); 2522 flk_free_lock(lock); 2523 } 2524 lock = nlock; 2525 } 2526 mutex_exit(&gp->gp_mutex); 2527 } 2528 2529 mutex_enter(&request->l_graph->gp_mutex); 2530 } 2531 2532 /* 2533 * Clustering: Deletes PXFS locks 2534 * Effects: Delete all locks on files in the given file system and with the 2535 * given PXFS id. 2536 */ 2537 void 2538 cl_flk_delete_pxfs_locks(struct vfs *vfsp, int pxfsid) 2539 { 2540 lock_descriptor_t *lock, *nlock; 2541 graph_t *gp; 2542 int i; 2543 2544 for (i = 0; i < HASH_SIZE; i++) { 2545 mutex_enter(&flock_lock); 2546 gp = lock_graph[i]; 2547 mutex_exit(&flock_lock); 2548 2549 if (gp == NULL) 2550 continue; 2551 2552 mutex_enter(&gp->gp_mutex); 2553 2554 /* signal sleeping requests so that they bail out */ 2555 lock = SLEEPING_HEAD(gp)->l_next; 2556 while (lock != SLEEPING_HEAD(gp)) { 2557 nlock = lock->l_next; 2558 if (lock->l_vnode->v_vfsp == vfsp) { 2559 ASSERT(IS_PXFS(lock)); 2560 if (GETPXFSID(lock->l_flock.l_sysid) == 2561 pxfsid) { 2562 flk_set_state(lock, 2563 FLK_CANCELLED_STATE); 2564 flk_cancel_sleeping_lock(lock, 1); 2565 } 2566 } 2567 lock = nlock; 2568 } 2569 2570 /* delete active locks */ 2571 lock = ACTIVE_HEAD(gp)->l_next; 2572 while (lock != ACTIVE_HEAD(gp)) { 2573 nlock = lock->l_next; 2574 if (lock->l_vnode->v_vfsp == vfsp) { 2575 ASSERT(IS_PXFS(lock)); 2576 if (GETPXFSID(lock->l_flock.l_sysid) == 2577 pxfsid) { 2578 flk_delete_active_lock(lock, 0); 2579 flk_wakeup(lock, 1); 2580 flk_free_lock(lock); 2581 } 2582 } 2583 lock = nlock; 2584 } 2585 mutex_exit(&gp->gp_mutex); 2586 } 2587 } 2588 2589 /* 2590 * Search for a sleeping lock manager lock which matches exactly this lock 2591 * request; if one is found, fake a signal to cancel it. 2592 * 2593 * Return 1 if a matching lock was found, 0 otherwise. 2594 */ 2595 2596 static int 2597 flk_canceled(lock_descriptor_t *request) 2598 { 2599 lock_descriptor_t *lock, *nlock; 2600 graph_t *gp = request->l_graph; 2601 vnode_t *vp = request->l_vnode; 2602 2603 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 2604 ASSERT(IS_LOCKMGR(request)); 2605 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 2606 2607 if (lock) { 2608 while (lock->l_vnode == vp) { 2609 nlock = lock->l_next; 2610 if (SAME_OWNER(lock, request) && 2611 lock->l_start == request->l_start && 2612 lock->l_end == request->l_end) { 2613 INTERRUPT_WAKEUP(lock); 2614 return (1); 2615 } 2616 lock = nlock; 2617 } 2618 } 2619 return (0); 2620 } 2621 2622 /* 2623 * Remove all the locks for the vnode belonging to the given pid and sysid. 2624 */ 2625 2626 void 2627 cleanlocks(vnode_t *vp, pid_t pid, int sysid) 2628 { 2629 graph_t *gp; 2630 lock_descriptor_t *lock, *nlock; 2631 lock_descriptor_t *link_stack; 2632 2633 STACK_INIT(link_stack); 2634 2635 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH); 2636 2637 if (gp == NULL) 2638 return; 2639 mutex_enter(&gp->gp_mutex); 2640 2641 CHECK_SLEEPING_LOCKS(gp); 2642 CHECK_ACTIVE_LOCKS(gp); 2643 2644 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 2645 2646 if (lock) { 2647 do { 2648 nlock = lock->l_next; 2649 if ((lock->l_flock.l_pid == pid || 2650 pid == IGN_PID) && 2651 lock->l_flock.l_sysid == sysid) { 2652 CANCEL_WAKEUP(lock); 2653 } 2654 lock = nlock; 2655 } while (lock->l_vnode == vp); 2656 } 2657 2658 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 2659 2660 if (lock) { 2661 do { 2662 nlock = lock->l_next; 2663 if ((lock->l_flock.l_pid == pid || 2664 pid == IGN_PID) && 2665 lock->l_flock.l_sysid == sysid) { 2666 flk_delete_active_lock(lock, 0); 2667 STACK_PUSH(link_stack, lock, l_stack); 2668 } 2669 lock = nlock; 2670 } while (lock->l_vnode == vp); 2671 } 2672 2673 while ((lock = STACK_TOP(link_stack)) != NULL) { 2674 STACK_POP(link_stack, l_stack); 2675 flk_wakeup(lock, 1); 2676 flk_free_lock(lock); 2677 } 2678 2679 CHECK_SLEEPING_LOCKS(gp); 2680 CHECK_ACTIVE_LOCKS(gp); 2681 CHECK_OWNER_LOCKS(gp, pid, sysid, vp); 2682 mutex_exit(&gp->gp_mutex); 2683 } 2684 2685 2686 /* 2687 * Called from 'fs' read and write routines for files that have mandatory 2688 * locking enabled. 2689 */ 2690 2691 int 2692 chklock( 2693 struct vnode *vp, 2694 int iomode, 2695 u_offset_t offset, 2696 ssize_t len, 2697 int fmode, 2698 caller_context_t *ct) 2699 { 2700 register int i; 2701 struct flock64 bf; 2702 int error = 0; 2703 2704 bf.l_type = (iomode & FWRITE) ? F_WRLCK : F_RDLCK; 2705 bf.l_whence = 0; 2706 bf.l_start = offset; 2707 bf.l_len = len; 2708 if (ct == NULL) { 2709 bf.l_pid = curproc->p_pid; 2710 bf.l_sysid = 0; 2711 } else { 2712 bf.l_pid = ct->cc_pid; 2713 bf.l_sysid = ct->cc_sysid; 2714 } 2715 i = (fmode & (FNDELAY|FNONBLOCK)) ? INOFLCK : INOFLCK|SLPFLCK; 2716 if ((i = reclock(vp, &bf, i, 0, offset, NULL)) != 0 || 2717 bf.l_type != F_UNLCK) 2718 error = i ? i : EAGAIN; 2719 return (error); 2720 } 2721 2722 /* 2723 * convoff - converts the given data (start, whence) to the 2724 * given whence. 2725 */ 2726 int 2727 convoff(vp, lckdat, whence, offset) 2728 struct vnode *vp; 2729 struct flock64 *lckdat; 2730 int whence; 2731 offset_t offset; 2732 { 2733 int error; 2734 struct vattr vattr; 2735 2736 if ((lckdat->l_whence == 2) || (whence == 2)) { 2737 vattr.va_mask = AT_SIZE; 2738 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) 2739 return (error); 2740 } 2741 2742 switch (lckdat->l_whence) { 2743 case 1: 2744 lckdat->l_start += offset; 2745 break; 2746 case 2: 2747 lckdat->l_start += vattr.va_size; 2748 /* FALLTHRU */ 2749 case 0: 2750 break; 2751 default: 2752 return (EINVAL); 2753 } 2754 2755 if (lckdat->l_start < 0) 2756 return (EINVAL); 2757 2758 switch (whence) { 2759 case 1: 2760 lckdat->l_start -= offset; 2761 break; 2762 case 2: 2763 lckdat->l_start -= vattr.va_size; 2764 /* FALLTHRU */ 2765 case 0: 2766 break; 2767 default: 2768 return (EINVAL); 2769 } 2770 2771 lckdat->l_whence = (short)whence; 2772 return (0); 2773 } 2774 2775 2776 /* proc_graph function definitions */ 2777 2778 /* 2779 * Function checks for deadlock due to the new 'lock'. If deadlock found 2780 * edges of this lock are freed and returned. 2781 */ 2782 2783 static int 2784 flk_check_deadlock(lock_descriptor_t *lock) 2785 { 2786 proc_vertex_t *start_vertex, *pvertex; 2787 proc_vertex_t *dvertex; 2788 proc_edge_t *pep, *ppep; 2789 edge_t *ep, *nep; 2790 proc_vertex_t *process_stack; 2791 2792 STACK_INIT(process_stack); 2793 2794 mutex_enter(&flock_lock); 2795 start_vertex = flk_get_proc_vertex(lock); 2796 ASSERT(start_vertex != NULL); 2797 2798 /* construct the edges from this process to other processes */ 2799 2800 ep = FIRST_ADJ(lock); 2801 while (ep != HEAD(lock)) { 2802 proc_vertex_t *adj_proc; 2803 2804 adj_proc = flk_get_proc_vertex(ep->to_vertex); 2805 for (pep = start_vertex->edge; pep != NULL; pep = pep->next) { 2806 if (pep->to_proc == adj_proc) { 2807 ASSERT(pep->refcount); 2808 pep->refcount++; 2809 break; 2810 } 2811 } 2812 if (pep == NULL) { 2813 pep = flk_get_proc_edge(); 2814 pep->to_proc = adj_proc; 2815 pep->refcount = 1; 2816 adj_proc->incount++; 2817 pep->next = start_vertex->edge; 2818 start_vertex->edge = pep; 2819 } 2820 ep = NEXT_ADJ(ep); 2821 } 2822 2823 ep = FIRST_IN(lock); 2824 2825 while (ep != HEAD(lock)) { 2826 proc_vertex_t *in_proc; 2827 2828 in_proc = flk_get_proc_vertex(ep->from_vertex); 2829 2830 for (pep = in_proc->edge; pep != NULL; pep = pep->next) { 2831 if (pep->to_proc == start_vertex) { 2832 ASSERT(pep->refcount); 2833 pep->refcount++; 2834 break; 2835 } 2836 } 2837 if (pep == NULL) { 2838 pep = flk_get_proc_edge(); 2839 pep->to_proc = start_vertex; 2840 pep->refcount = 1; 2841 start_vertex->incount++; 2842 pep->next = in_proc->edge; 2843 in_proc->edge = pep; 2844 } 2845 ep = NEXT_IN(ep); 2846 } 2847 2848 if (start_vertex->incount == 0) { 2849 mutex_exit(&flock_lock); 2850 return (0); 2851 } 2852 2853 flk_proc_graph_uncolor(); 2854 2855 start_vertex->p_sedge = start_vertex->edge; 2856 2857 STACK_PUSH(process_stack, start_vertex, p_stack); 2858 2859 while ((pvertex = STACK_TOP(process_stack)) != NULL) { 2860 for (pep = pvertex->p_sedge; pep != NULL; pep = pep->next) { 2861 dvertex = pep->to_proc; 2862 if (!PROC_ARRIVED(dvertex)) { 2863 STACK_PUSH(process_stack, dvertex, p_stack); 2864 dvertex->p_sedge = dvertex->edge; 2865 PROC_ARRIVE(pvertex); 2866 pvertex->p_sedge = pep->next; 2867 break; 2868 } 2869 if (!PROC_DEPARTED(dvertex)) 2870 goto deadlock; 2871 } 2872 if (pep == NULL) { 2873 PROC_DEPART(pvertex); 2874 STACK_POP(process_stack, p_stack); 2875 } 2876 } 2877 mutex_exit(&flock_lock); 2878 return (0); 2879 2880 deadlock: 2881 2882 /* we remove all lock edges and proc edges */ 2883 2884 ep = FIRST_ADJ(lock); 2885 while (ep != HEAD(lock)) { 2886 proc_vertex_t *adj_proc; 2887 adj_proc = flk_get_proc_vertex(ep->to_vertex); 2888 nep = NEXT_ADJ(ep); 2889 IN_LIST_REMOVE(ep); 2890 ADJ_LIST_REMOVE(ep); 2891 flk_free_edge(ep); 2892 ppep = start_vertex->edge; 2893 for (pep = start_vertex->edge; pep != NULL; ppep = pep, 2894 pep = ppep->next) { 2895 if (pep->to_proc == adj_proc) { 2896 pep->refcount--; 2897 if (pep->refcount == 0) { 2898 if (pep == ppep) { 2899 start_vertex->edge = pep->next; 2900 } else { 2901 ppep->next = pep->next; 2902 } 2903 adj_proc->incount--; 2904 flk_proc_release(adj_proc); 2905 flk_free_proc_edge(pep); 2906 } 2907 break; 2908 } 2909 } 2910 ep = nep; 2911 } 2912 ep = FIRST_IN(lock); 2913 while (ep != HEAD(lock)) { 2914 proc_vertex_t *in_proc; 2915 in_proc = flk_get_proc_vertex(ep->from_vertex); 2916 nep = NEXT_IN(ep); 2917 IN_LIST_REMOVE(ep); 2918 ADJ_LIST_REMOVE(ep); 2919 flk_free_edge(ep); 2920 ppep = in_proc->edge; 2921 for (pep = in_proc->edge; pep != NULL; ppep = pep, 2922 pep = ppep->next) { 2923 if (pep->to_proc == start_vertex) { 2924 pep->refcount--; 2925 if (pep->refcount == 0) { 2926 if (pep == ppep) { 2927 in_proc->edge = pep->next; 2928 } else { 2929 ppep->next = pep->next; 2930 } 2931 start_vertex->incount--; 2932 flk_proc_release(in_proc); 2933 flk_free_proc_edge(pep); 2934 } 2935 break; 2936 } 2937 } 2938 ep = nep; 2939 } 2940 flk_proc_release(start_vertex); 2941 mutex_exit(&flock_lock); 2942 return (1); 2943 } 2944 2945 /* 2946 * Get a proc vertex. If lock's pvertex value gets a correct proc vertex 2947 * from the list we return that, otherwise we allocate one. If necessary, 2948 * we grow the list of vertices also. 2949 */ 2950 2951 static proc_vertex_t * 2952 flk_get_proc_vertex(lock_descriptor_t *lock) 2953 { 2954 int i; 2955 proc_vertex_t *pv; 2956 proc_vertex_t **palloc; 2957 2958 ASSERT(MUTEX_HELD(&flock_lock)); 2959 if (lock->pvertex != -1) { 2960 ASSERT(lock->pvertex >= 0); 2961 pv = pgraph.proc[lock->pvertex]; 2962 if (pv != NULL && PROC_SAME_OWNER(lock, pv)) { 2963 return (pv); 2964 } 2965 } 2966 for (i = 0; i < pgraph.gcount; i++) { 2967 pv = pgraph.proc[i]; 2968 if (pv != NULL && PROC_SAME_OWNER(lock, pv)) { 2969 lock->pvertex = pv->index = i; 2970 return (pv); 2971 } 2972 } 2973 pv = kmem_zalloc(sizeof (struct proc_vertex), KM_SLEEP); 2974 pv->pid = lock->l_flock.l_pid; 2975 pv->sysid = lock->l_flock.l_sysid; 2976 flk_proc_vertex_allocs++; 2977 if (pgraph.free != 0) { 2978 for (i = 0; i < pgraph.gcount; i++) { 2979 if (pgraph.proc[i] == NULL) { 2980 pgraph.proc[i] = pv; 2981 lock->pvertex = pv->index = i; 2982 pgraph.free--; 2983 return (pv); 2984 } 2985 } 2986 } 2987 palloc = kmem_zalloc((pgraph.gcount + PROC_CHUNK) * 2988 sizeof (proc_vertex_t *), KM_SLEEP); 2989 2990 if (pgraph.proc) { 2991 bcopy(pgraph.proc, palloc, 2992 pgraph.gcount * sizeof (proc_vertex_t *)); 2993 2994 kmem_free(pgraph.proc, 2995 pgraph.gcount * sizeof (proc_vertex_t *)); 2996 } 2997 pgraph.proc = palloc; 2998 pgraph.free += (PROC_CHUNK - 1); 2999 pv->index = lock->pvertex = pgraph.gcount; 3000 pgraph.gcount += PROC_CHUNK; 3001 pgraph.proc[pv->index] = pv; 3002 return (pv); 3003 } 3004 3005 /* 3006 * Allocate a proc edge. 3007 */ 3008 3009 static proc_edge_t * 3010 flk_get_proc_edge() 3011 { 3012 proc_edge_t *pep; 3013 3014 pep = kmem_zalloc(sizeof (proc_edge_t), KM_SLEEP); 3015 flk_proc_edge_allocs++; 3016 return (pep); 3017 } 3018 3019 /* 3020 * Free the proc edge. Called whenever its reference count goes to zero. 3021 */ 3022 3023 static void 3024 flk_free_proc_edge(proc_edge_t *pep) 3025 { 3026 ASSERT(pep->refcount == 0); 3027 kmem_free((void *)pep, sizeof (proc_edge_t)); 3028 flk_proc_edge_frees++; 3029 } 3030 3031 /* 3032 * Color the graph explicitly done only when the mark value hits max value. 3033 */ 3034 3035 static void 3036 flk_proc_graph_uncolor() 3037 { 3038 int i; 3039 3040 if (pgraph.mark == UINT_MAX) { 3041 for (i = 0; i < pgraph.gcount; i++) 3042 if (pgraph.proc[i] != NULL) { 3043 pgraph.proc[i]->atime = 0; 3044 pgraph.proc[i]->dtime = 0; 3045 } 3046 pgraph.mark = 1; 3047 } else { 3048 pgraph.mark++; 3049 } 3050 } 3051 3052 /* 3053 * Release the proc vertex iff both there are no in edges and out edges 3054 */ 3055 3056 static void 3057 flk_proc_release(proc_vertex_t *proc) 3058 { 3059 ASSERT(MUTEX_HELD(&flock_lock)); 3060 if (proc->edge == NULL && proc->incount == 0) { 3061 pgraph.proc[proc->index] = NULL; 3062 pgraph.free++; 3063 kmem_free(proc, sizeof (proc_vertex_t)); 3064 flk_proc_vertex_frees++; 3065 } 3066 } 3067 3068 /* 3069 * Updates process graph to reflect change in a lock_graph. 3070 * Note: We should call this function only after we have a correctly 3071 * recomputed lock graph. Otherwise we might miss a deadlock detection. 3072 * eg: in function flk_relation() we call this function after flk_recompute_ 3073 * dependencies() otherwise if a process tries to lock a vnode hashed 3074 * into another graph it might sleep for ever. 3075 */ 3076 3077 static void 3078 flk_update_proc_graph(edge_t *ep, int delete) 3079 { 3080 proc_vertex_t *toproc, *fromproc; 3081 proc_edge_t *pep, *prevpep; 3082 3083 mutex_enter(&flock_lock); 3084 toproc = flk_get_proc_vertex(ep->to_vertex); 3085 fromproc = flk_get_proc_vertex(ep->from_vertex); 3086 3087 if (!delete) 3088 goto add; 3089 pep = prevpep = fromproc->edge; 3090 3091 ASSERT(pep != NULL); 3092 while (pep != NULL) { 3093 if (pep->to_proc == toproc) { 3094 ASSERT(pep->refcount > 0); 3095 pep->refcount--; 3096 if (pep->refcount == 0) { 3097 if (pep == prevpep) { 3098 fromproc->edge = pep->next; 3099 } else { 3100 prevpep->next = pep->next; 3101 } 3102 toproc->incount--; 3103 flk_proc_release(toproc); 3104 flk_free_proc_edge(pep); 3105 } 3106 break; 3107 } 3108 prevpep = pep; 3109 pep = pep->next; 3110 } 3111 flk_proc_release(fromproc); 3112 mutex_exit(&flock_lock); 3113 return; 3114 add: 3115 3116 pep = fromproc->edge; 3117 3118 while (pep != NULL) { 3119 if (pep->to_proc == toproc) { 3120 ASSERT(pep->refcount > 0); 3121 pep->refcount++; 3122 break; 3123 } 3124 pep = pep->next; 3125 } 3126 if (pep == NULL) { 3127 pep = flk_get_proc_edge(); 3128 pep->to_proc = toproc; 3129 pep->refcount = 1; 3130 toproc->incount++; 3131 pep->next = fromproc->edge; 3132 fromproc->edge = pep; 3133 } 3134 mutex_exit(&flock_lock); 3135 } 3136 3137 /* 3138 * Set the control status for lock manager requests. 3139 * 3140 */ 3141 3142 /* 3143 * PSARC case 1997/292 3144 * 3145 * Requires: "nlmid" must be >= 1 and <= clconf_maximum_nodeid(). 3146 * Effects: Set the state of the NLM server identified by "nlmid" 3147 * in the NLM registry to state "nlm_state." 3148 * Raises exception no_such_nlm if "nlmid" doesn't identify a known 3149 * NLM server to this LLM. 3150 * Note that when this routine is called with NLM_SHUTTING_DOWN there 3151 * may be locks requests that have gotten started but not finished. In 3152 * particular, there may be blocking requests that are in the callback code 3153 * before sleeping (so they're not holding the lock for the graph). If 3154 * such a thread reacquires the graph's lock (to go to sleep) after 3155 * NLM state in the NLM registry is set to a non-up value, 3156 * it will notice the status and bail out. If the request gets 3157 * granted before the thread can check the NLM registry, let it 3158 * continue normally. It will get flushed when we are called with NLM_DOWN. 3159 * 3160 * Modifies: nlm_reg_obj (global) 3161 * Arguments: 3162 * nlmid (IN): id uniquely identifying an NLM server 3163 * nlm_state (IN): NLM server state to change "nlmid" to 3164 */ 3165 void 3166 cl_flk_set_nlm_status(int nlmid, flk_nlm_status_t nlm_state) 3167 { 3168 /* 3169 * Check to see if node is booted as a cluster. If not, return. 3170 */ 3171 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { 3172 return; 3173 } 3174 3175 /* 3176 * Check for development/debugging. It is possible to boot a node 3177 * in non-cluster mode, and then run a special script, currently 3178 * available only to developers, to bring up the node as part of a 3179 * cluster. The problem is that running such a script does not 3180 * result in the routine flk_init() being called and hence global array 3181 * nlm_reg_status is NULL. The NLM thinks it's in cluster mode, 3182 * but the LLM needs to do an additional check to see if the global 3183 * array has been created or not. If nlm_reg_status is NULL, then 3184 * return, else continue. 3185 */ 3186 if (nlm_reg_status == NULL) { 3187 return; 3188 } 3189 3190 ASSERT(nlmid <= nlm_status_size && nlmid >= 0); 3191 mutex_enter(&nlm_reg_lock); 3192 3193 if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status, nlmid)) { 3194 /* 3195 * If the NLM server "nlmid" is unknown in the NLM registry, 3196 * add it to the registry in the nlm shutting down state. 3197 */ 3198 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, 3199 FLK_NLM_SHUTTING_DOWN); 3200 } else { 3201 /* 3202 * Change the state of the NLM server identified by "nlmid" 3203 * in the NLM registry to the argument "nlm_state." 3204 */ 3205 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, 3206 nlm_state); 3207 } 3208 3209 /* 3210 * The reason we must register the NLM server that is shutting down 3211 * with an LLM that doesn't already know about it (never sent a lock 3212 * request) is to handle correctly a race between shutdown and a new 3213 * lock request. Suppose that a shutdown request from the NLM server 3214 * invokes this routine at the LLM, and a thread is spawned to 3215 * service the request. Now suppose a new lock request is in 3216 * progress and has already passed the first line of defense in 3217 * reclock(), which denies new locks requests from NLM servers 3218 * that are not in the NLM_UP state. After the current routine 3219 * is invoked for both phases of shutdown, the routine will return, 3220 * having done nothing, and the lock request will proceed and 3221 * probably be granted. The problem is that the shutdown was ignored 3222 * by the lock request because there was no record of that NLM server 3223 * shutting down. We will be in the peculiar position of thinking 3224 * that we've shutdown the NLM server and all locks at all LLMs have 3225 * been discarded, but in fact there's still one lock held. 3226 * The solution is to record the existence of NLM server and change 3227 * its state immediately to NLM_SHUTTING_DOWN. The lock request in 3228 * progress may proceed because the next phase NLM_DOWN will catch 3229 * this lock and discard it. 3230 */ 3231 mutex_exit(&nlm_reg_lock); 3232 3233 switch (nlm_state) { 3234 case FLK_NLM_UP: 3235 /* 3236 * Change the NLM state of all locks still held on behalf of 3237 * the NLM server identified by "nlmid" to NLM_UP. 3238 */ 3239 cl_flk_change_nlm_state_all_locks(nlmid, FLK_NLM_UP); 3240 break; 3241 3242 case FLK_NLM_SHUTTING_DOWN: 3243 /* 3244 * Wake up all sleeping locks for the NLM server identified 3245 * by "nlmid." Note that eventually all woken threads will 3246 * have their lock requests cancelled and descriptors 3247 * removed from the sleeping lock list. Note that the NLM 3248 * server state associated with each lock descriptor is 3249 * changed to FLK_NLM_SHUTTING_DOWN. 3250 */ 3251 cl_flk_wakeup_sleeping_nlm_locks(nlmid); 3252 break; 3253 3254 case FLK_NLM_DOWN: 3255 /* 3256 * Discard all active, granted locks for this NLM server 3257 * identified by "nlmid." 3258 */ 3259 cl_flk_unlock_nlm_granted(nlmid); 3260 break; 3261 3262 default: 3263 panic("cl_set_nlm_status: bad status (%d)", nlm_state); 3264 } 3265 } 3266 3267 /* 3268 * Set the control status for lock manager requests. 3269 * 3270 * Note that when this routine is called with FLK_WAKEUP_SLEEPERS, there 3271 * may be locks requests that have gotten started but not finished. In 3272 * particular, there may be blocking requests that are in the callback code 3273 * before sleeping (so they're not holding the lock for the graph). If 3274 * such a thread reacquires the graph's lock (to go to sleep) after 3275 * flk_lockmgr_status is set to a non-up value, it will notice the status 3276 * and bail out. If the request gets granted before the thread can check 3277 * flk_lockmgr_status, let it continue normally. It will get flushed when 3278 * we are called with FLK_LOCKMGR_DOWN. 3279 */ 3280 3281 void 3282 flk_set_lockmgr_status(flk_lockmgr_status_t status) 3283 { 3284 int i; 3285 graph_t *gp; 3286 struct flock_globals *fg; 3287 3288 fg = flk_get_globals(); 3289 ASSERT(fg != NULL); 3290 3291 mutex_enter(&flock_lock); 3292 fg->flk_lockmgr_status = status; 3293 mutex_exit(&flock_lock); 3294 3295 /* 3296 * If the lock manager is coming back up, all that's needed is to 3297 * propagate this information to the graphs. If the lock manager 3298 * is going down, additional action is required, and each graph's 3299 * copy of the state is updated atomically with this other action. 3300 */ 3301 switch (status) { 3302 case FLK_LOCKMGR_UP: 3303 for (i = 0; i < HASH_SIZE; i++) { 3304 mutex_enter(&flock_lock); 3305 gp = lock_graph[i]; 3306 mutex_exit(&flock_lock); 3307 if (gp == NULL) 3308 continue; 3309 mutex_enter(&gp->gp_mutex); 3310 fg->lockmgr_status[i] = status; 3311 mutex_exit(&gp->gp_mutex); 3312 } 3313 break; 3314 case FLK_WAKEUP_SLEEPERS: 3315 wakeup_sleeping_lockmgr_locks(fg); 3316 break; 3317 case FLK_LOCKMGR_DOWN: 3318 unlock_lockmgr_granted(fg); 3319 break; 3320 default: 3321 panic("flk_set_lockmgr_status: bad status (%d)", status); 3322 break; 3323 } 3324 } 3325 3326 /* 3327 * This routine returns all the locks that are active or sleeping and are 3328 * associated with a particular set of identifiers. If lock_state != 0, then 3329 * only locks that match the lock_state are returned. If lock_state == 0, then 3330 * all locks are returned. If pid == NOPID, the pid is ignored. If 3331 * use_sysid is FALSE, then the sysid is ignored. If vp is NULL, then the 3332 * vnode pointer is ignored. 3333 * 3334 * A list containing the vnode pointer and an flock structure 3335 * describing the lock is returned. Each element in the list is 3336 * dynamically allocated and must be freed by the caller. The 3337 * last item in the list is denoted by a NULL value in the ll_next 3338 * field. 3339 * 3340 * The vnode pointers returned are held. The caller is responsible 3341 * for releasing these. Note that the returned list is only a snapshot of 3342 * the current lock information, and that it is a snapshot of a moving 3343 * target (only one graph is locked at a time). 3344 */ 3345 3346 locklist_t * 3347 get_lock_list(int list_type, int lock_state, int sysid, boolean_t use_sysid, 3348 pid_t pid, const vnode_t *vp, zoneid_t zoneid) 3349 { 3350 lock_descriptor_t *lock; 3351 lock_descriptor_t *graph_head; 3352 locklist_t listhead; 3353 locklist_t *llheadp; 3354 locklist_t *llp; 3355 locklist_t *lltp; 3356 graph_t *gp; 3357 int i; 3358 int first_index; /* graph index */ 3359 int num_indexes; /* graph index */ 3360 3361 ASSERT((list_type == FLK_ACTIVE_STATE) || 3362 (list_type == FLK_SLEEPING_STATE)); 3363 3364 /* 3365 * Get a pointer to something to use as a list head while building 3366 * the rest of the list. 3367 */ 3368 llheadp = &listhead; 3369 lltp = llheadp; 3370 llheadp->ll_next = (locklist_t *)NULL; 3371 3372 /* Figure out which graphs we want to look at. */ 3373 if (vp == NULL) { 3374 first_index = 0; 3375 num_indexes = HASH_SIZE; 3376 } else { 3377 first_index = HASH_INDEX(vp); 3378 num_indexes = 1; 3379 } 3380 3381 for (i = first_index; i < first_index + num_indexes; i++) { 3382 mutex_enter(&flock_lock); 3383 gp = lock_graph[i]; 3384 mutex_exit(&flock_lock); 3385 if (gp == NULL) { 3386 continue; 3387 } 3388 3389 mutex_enter(&gp->gp_mutex); 3390 graph_head = (list_type == FLK_ACTIVE_STATE) ? 3391 ACTIVE_HEAD(gp) : SLEEPING_HEAD(gp); 3392 for (lock = graph_head->l_next; 3393 lock != graph_head; 3394 lock = lock->l_next) { 3395 if (use_sysid && lock->l_flock.l_sysid != sysid) 3396 continue; 3397 if (pid != NOPID && lock->l_flock.l_pid != pid) 3398 continue; 3399 if (vp != NULL && lock->l_vnode != vp) 3400 continue; 3401 if (lock_state && !(lock_state & lock->l_state)) 3402 continue; 3403 if (zoneid != lock->l_zoneid && zoneid != ALL_ZONES) 3404 continue; 3405 /* 3406 * A matching lock was found. Allocate 3407 * space for a new locklist entry and fill 3408 * it in. 3409 */ 3410 llp = kmem_alloc(sizeof (locklist_t), KM_SLEEP); 3411 lltp->ll_next = llp; 3412 VN_HOLD(lock->l_vnode); 3413 llp->ll_vp = lock->l_vnode; 3414 create_flock(lock, &(llp->ll_flock)); 3415 llp->ll_next = (locklist_t *)NULL; 3416 lltp = llp; 3417 } 3418 mutex_exit(&gp->gp_mutex); 3419 } 3420 3421 llp = llheadp->ll_next; 3422 return (llp); 3423 } 3424 3425 /* 3426 * These two functions are simply interfaces to get_lock_list. They return 3427 * a list of sleeping or active locks for the given sysid and pid. See 3428 * get_lock_list for details. 3429 * 3430 * In either case we don't particularly care to specify the zone of interest; 3431 * the sysid-space is global across zones, so the sysid will map to exactly one 3432 * zone, and we'll return information for that zone. 3433 */ 3434 3435 locklist_t * 3436 flk_get_sleeping_locks(int sysid, pid_t pid) 3437 { 3438 return (get_lock_list(FLK_SLEEPING_STATE, 0, sysid, B_TRUE, pid, NULL, 3439 ALL_ZONES)); 3440 } 3441 3442 locklist_t * 3443 flk_get_active_locks(int sysid, pid_t pid) 3444 { 3445 return (get_lock_list(FLK_ACTIVE_STATE, 0, sysid, B_TRUE, pid, NULL, 3446 ALL_ZONES)); 3447 } 3448 3449 /* 3450 * Another interface to get_lock_list. This one returns all the active 3451 * locks for a given vnode. Again, see get_lock_list for details. 3452 * 3453 * We don't need to specify which zone's locks we're interested in. The matter 3454 * would only be interesting if the vnode belonged to NFS, and NFS vnodes can't 3455 * be used by multiple zones, so the list of locks will all be from the right 3456 * zone. 3457 */ 3458 3459 locklist_t * 3460 flk_active_locks_for_vp(const vnode_t *vp) 3461 { 3462 return (get_lock_list(FLK_ACTIVE_STATE, 0, 0, B_FALSE, NOPID, vp, 3463 ALL_ZONES)); 3464 } 3465 3466 /* 3467 * Another interface to get_lock_list. This one returns all the active 3468 * nbmand locks for a given vnode. Again, see get_lock_list for details. 3469 * 3470 * See the comment for flk_active_locks_for_vp() for why we don't care to 3471 * specify the particular zone of interest. 3472 */ 3473 locklist_t * 3474 flk_active_nbmand_locks_for_vp(const vnode_t *vp) 3475 { 3476 return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE, 3477 NOPID, vp, ALL_ZONES)); 3478 } 3479 3480 /* 3481 * Another interface to get_lock_list. This one returns all the active 3482 * nbmand locks for a given pid. Again, see get_lock_list for details. 3483 * 3484 * The zone doesn't need to be specified here; the locks held by a 3485 * particular process will either be local (ie, non-NFS) or from the zone 3486 * the process is executing in. This is because other parts of the system 3487 * ensure that an NFS vnode can't be used in a zone other than that in 3488 * which it was opened. 3489 */ 3490 locklist_t * 3491 flk_active_nbmand_locks(pid_t pid) 3492 { 3493 return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE, 3494 pid, NULL, ALL_ZONES)); 3495 } 3496 3497 /* 3498 * Free up all entries in the locklist. 3499 */ 3500 void 3501 flk_free_locklist(locklist_t *llp) 3502 { 3503 locklist_t *next_llp; 3504 3505 while (llp) { 3506 next_llp = llp->ll_next; 3507 VN_RELE(llp->ll_vp); 3508 kmem_free(llp, sizeof (*llp)); 3509 llp = next_llp; 3510 } 3511 } 3512 3513 static void 3514 cl_flk_change_nlm_state_all_locks(int nlmid, flk_nlm_status_t nlm_state) 3515 { 3516 /* 3517 * For each graph "lg" in the hash table lock_graph do 3518 * a. Get the list of sleeping locks 3519 * b. For each lock descriptor in the list do 3520 * i. If the requested lock is an NLM server request AND 3521 * the nlmid is the same as the routine argument then 3522 * change the lock descriptor's state field to 3523 * "nlm_state." 3524 * c. Get the list of active locks 3525 * d. For each lock descriptor in the list do 3526 * i. If the requested lock is an NLM server request AND 3527 * the nlmid is the same as the routine argument then 3528 * change the lock descriptor's state field to 3529 * "nlm_state." 3530 */ 3531 3532 int i; 3533 graph_t *gp; /* lock graph */ 3534 lock_descriptor_t *lock; /* lock */ 3535 lock_descriptor_t *nlock = NULL; /* next lock */ 3536 int lock_nlmid; 3537 3538 for (i = 0; i < HASH_SIZE; i++) { 3539 mutex_enter(&flock_lock); 3540 gp = lock_graph[i]; 3541 mutex_exit(&flock_lock); 3542 if (gp == NULL) { 3543 continue; 3544 } 3545 3546 /* Get list of sleeping locks in current lock graph. */ 3547 mutex_enter(&gp->gp_mutex); 3548 for (lock = SLEEPING_HEAD(gp)->l_next; 3549 lock != SLEEPING_HEAD(gp); 3550 lock = nlock) { 3551 nlock = lock->l_next; 3552 /* get NLM id */ 3553 lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 3554 3555 /* 3556 * If NLM server request AND nlmid of lock matches 3557 * nlmid of argument, then set the NLM state of the 3558 * lock to "nlm_state." 3559 */ 3560 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { 3561 SET_NLM_STATE(lock, nlm_state); 3562 } 3563 } 3564 3565 /* Get list of active locks in current lock graph. */ 3566 for (lock = ACTIVE_HEAD(gp)->l_next; 3567 lock != ACTIVE_HEAD(gp); 3568 lock = nlock) { 3569 nlock = lock->l_next; 3570 /* get NLM id */ 3571 lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 3572 3573 /* 3574 * If NLM server request AND nlmid of lock matches 3575 * nlmid of argument, then set the NLM state of the 3576 * lock to "nlm_state." 3577 */ 3578 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { 3579 ASSERT(IS_ACTIVE(lock)); 3580 SET_NLM_STATE(lock, nlm_state); 3581 } 3582 } 3583 mutex_exit(&gp->gp_mutex); 3584 } 3585 } 3586 3587 /* 3588 * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid(). 3589 * Effects: Find all sleeping lock manager requests _only_ for the NLM server 3590 * identified by "nlmid." Poke those lock requests. 3591 */ 3592 static void 3593 cl_flk_wakeup_sleeping_nlm_locks(int nlmid) 3594 { 3595 lock_descriptor_t *lock; 3596 lock_descriptor_t *nlock = NULL; /* next lock */ 3597 int i; 3598 graph_t *gp; 3599 int lock_nlmid; 3600 3601 for (i = 0; i < HASH_SIZE; i++) { 3602 mutex_enter(&flock_lock); 3603 gp = lock_graph[i]; 3604 mutex_exit(&flock_lock); 3605 if (gp == NULL) { 3606 continue; 3607 } 3608 3609 mutex_enter(&gp->gp_mutex); 3610 for (lock = SLEEPING_HEAD(gp)->l_next; 3611 lock != SLEEPING_HEAD(gp); 3612 lock = nlock) { 3613 nlock = lock->l_next; 3614 /* 3615 * If NLM server request _and_ nlmid of lock matches 3616 * nlmid of argument, then set the NLM state of the 3617 * lock to NLM_SHUTTING_DOWN, and wake up sleeping 3618 * request. 3619 */ 3620 if (IS_LOCKMGR(lock)) { 3621 /* get NLM id */ 3622 lock_nlmid = 3623 GETNLMID(lock->l_flock.l_sysid); 3624 if (nlmid == lock_nlmid) { 3625 SET_NLM_STATE(lock, 3626 FLK_NLM_SHUTTING_DOWN); 3627 INTERRUPT_WAKEUP(lock); 3628 } 3629 } 3630 } 3631 mutex_exit(&gp->gp_mutex); 3632 } 3633 } 3634 3635 /* 3636 * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid() 3637 * Effects: Find all active (granted) lock manager locks _only_ for the 3638 * NLM server identified by "nlmid" and release them. 3639 */ 3640 static void 3641 cl_flk_unlock_nlm_granted(int nlmid) 3642 { 3643 lock_descriptor_t *lock; 3644 lock_descriptor_t *nlock = NULL; /* next lock */ 3645 int i; 3646 graph_t *gp; 3647 int lock_nlmid; 3648 3649 for (i = 0; i < HASH_SIZE; i++) { 3650 mutex_enter(&flock_lock); 3651 gp = lock_graph[i]; 3652 mutex_exit(&flock_lock); 3653 if (gp == NULL) { 3654 continue; 3655 } 3656 3657 mutex_enter(&gp->gp_mutex); 3658 for (lock = ACTIVE_HEAD(gp)->l_next; 3659 lock != ACTIVE_HEAD(gp); 3660 lock = nlock) { 3661 nlock = lock->l_next; 3662 ASSERT(IS_ACTIVE(lock)); 3663 3664 /* 3665 * If it's an NLM server request _and_ nlmid of 3666 * the lock matches nlmid of argument, then 3667 * remove the active lock the list, wakup blocked 3668 * threads, and free the storage for the lock. 3669 * Note that there's no need to mark the NLM state 3670 * of this lock to NLM_DOWN because the lock will 3671 * be deleted anyway and its storage freed. 3672 */ 3673 if (IS_LOCKMGR(lock)) { 3674 /* get NLM id */ 3675 lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 3676 if (nlmid == lock_nlmid) { 3677 flk_delete_active_lock(lock, 0); 3678 flk_wakeup(lock, 1); 3679 flk_free_lock(lock); 3680 } 3681 } 3682 } 3683 mutex_exit(&gp->gp_mutex); 3684 } 3685 } 3686 3687 /* 3688 * Find all sleeping lock manager requests and poke them. 3689 */ 3690 static void 3691 wakeup_sleeping_lockmgr_locks(struct flock_globals *fg) 3692 { 3693 lock_descriptor_t *lock; 3694 lock_descriptor_t *nlock = NULL; /* next lock */ 3695 int i; 3696 graph_t *gp; 3697 zoneid_t zoneid = getzoneid(); 3698 3699 for (i = 0; i < HASH_SIZE; i++) { 3700 mutex_enter(&flock_lock); 3701 gp = lock_graph[i]; 3702 mutex_exit(&flock_lock); 3703 if (gp == NULL) { 3704 continue; 3705 } 3706 3707 mutex_enter(&gp->gp_mutex); 3708 fg->lockmgr_status[i] = FLK_WAKEUP_SLEEPERS; 3709 for (lock = SLEEPING_HEAD(gp)->l_next; 3710 lock != SLEEPING_HEAD(gp); 3711 lock = nlock) { 3712 nlock = lock->l_next; 3713 if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) { 3714 INTERRUPT_WAKEUP(lock); 3715 } 3716 } 3717 mutex_exit(&gp->gp_mutex); 3718 } 3719 } 3720 3721 3722 /* 3723 * Find all active (granted) lock manager locks and release them. 3724 */ 3725 static void 3726 unlock_lockmgr_granted(struct flock_globals *fg) 3727 { 3728 lock_descriptor_t *lock; 3729 lock_descriptor_t *nlock = NULL; /* next lock */ 3730 int i; 3731 graph_t *gp; 3732 zoneid_t zoneid = getzoneid(); 3733 3734 for (i = 0; i < HASH_SIZE; i++) { 3735 mutex_enter(&flock_lock); 3736 gp = lock_graph[i]; 3737 mutex_exit(&flock_lock); 3738 if (gp == NULL) { 3739 continue; 3740 } 3741 3742 mutex_enter(&gp->gp_mutex); 3743 fg->lockmgr_status[i] = FLK_LOCKMGR_DOWN; 3744 for (lock = ACTIVE_HEAD(gp)->l_next; 3745 lock != ACTIVE_HEAD(gp); 3746 lock = nlock) { 3747 nlock = lock->l_next; 3748 if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) { 3749 ASSERT(IS_ACTIVE(lock)); 3750 flk_delete_active_lock(lock, 0); 3751 flk_wakeup(lock, 1); 3752 flk_free_lock(lock); 3753 } 3754 } 3755 mutex_exit(&gp->gp_mutex); 3756 } 3757 } 3758 3759 3760 /* 3761 * Wait until a lock is granted, cancelled, or interrupted. 3762 */ 3763 3764 static void 3765 wait_for_lock(lock_descriptor_t *request) 3766 { 3767 graph_t *gp = request->l_graph; 3768 3769 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 3770 3771 while (!(IS_GRANTED(request)) && !(IS_CANCELLED(request)) && 3772 !(IS_INTERRUPTED(request))) { 3773 if (!cv_wait_sig(&request->l_cv, &gp->gp_mutex)) { 3774 flk_set_state(request, FLK_INTERRUPTED_STATE); 3775 request->l_state |= INTERRUPTED_LOCK; 3776 } 3777 } 3778 } 3779 3780 /* 3781 * Create an flock structure from the existing lock information 3782 * 3783 * This routine is used to create flock structures for the lock manager 3784 * to use in a reclaim request. Since the lock was originated on this 3785 * host, it must be conforming to UNIX semantics, so no checking is 3786 * done to make sure it falls within the lower half of the 32-bit range. 3787 */ 3788 3789 static void 3790 create_flock(lock_descriptor_t *lp, flock64_t *flp) 3791 { 3792 ASSERT(lp->l_end == MAX_U_OFFSET_T || lp->l_end <= MAXEND); 3793 ASSERT(lp->l_end >= lp->l_start); 3794 3795 flp->l_type = lp->l_type; 3796 flp->l_whence = 0; 3797 flp->l_start = lp->l_start; 3798 flp->l_len = (lp->l_end == MAX_U_OFFSET_T) ? 0 : 3799 (lp->l_end - lp->l_start + 1); 3800 flp->l_sysid = lp->l_flock.l_sysid; 3801 flp->l_pid = lp->l_flock.l_pid; 3802 } 3803 3804 /* 3805 * Convert flock_t data describing a lock range into unsigned long starting 3806 * and ending points, which are put into lock_request. Returns 0 or an 3807 * errno value. 3808 * Large Files: max is passed by the caller and we return EOVERFLOW 3809 * as defined by LFS API. 3810 */ 3811 3812 int 3813 flk_convert_lock_data(vnode_t *vp, flock64_t *flp, 3814 u_offset_t *start, u_offset_t *end, offset_t offset) 3815 { 3816 struct vattr vattr; 3817 int error; 3818 3819 /* 3820 * Determine the starting point of the request 3821 */ 3822 switch (flp->l_whence) { 3823 case 0: /* SEEK_SET */ 3824 *start = (u_offset_t)flp->l_start; 3825 break; 3826 case 1: /* SEEK_CUR */ 3827 *start = (u_offset_t)(flp->l_start + offset); 3828 break; 3829 case 2: /* SEEK_END */ 3830 vattr.va_mask = AT_SIZE; 3831 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) 3832 return (error); 3833 *start = (u_offset_t)(flp->l_start + vattr.va_size); 3834 break; 3835 default: 3836 return (EINVAL); 3837 } 3838 3839 /* 3840 * Determine the range covered by the request. 3841 */ 3842 if (flp->l_len == 0) 3843 *end = MAX_U_OFFSET_T; 3844 else if ((offset_t)flp->l_len > 0) { 3845 *end = (u_offset_t)(*start + (flp->l_len - 1)); 3846 } else { 3847 /* 3848 * Negative length; why do we even allow this ? 3849 * Because this allows easy specification of 3850 * the last n bytes of the file. 3851 */ 3852 *end = *start; 3853 *start += (u_offset_t)flp->l_len; 3854 (*start)++; 3855 } 3856 return (0); 3857 } 3858 3859 /* 3860 * Check the validity of lock data. This can used by the NFS 3861 * frlock routines to check data before contacting the server. The 3862 * server must support semantics that aren't as restrictive as 3863 * the UNIX API, so the NFS client is required to check. 3864 * The maximum is now passed in by the caller. 3865 */ 3866 3867 int 3868 flk_check_lock_data(u_offset_t start, u_offset_t end, offset_t max) 3869 { 3870 /* 3871 * The end (length) for local locking should never be greater 3872 * than MAXEND. However, the representation for 3873 * the entire file is MAX_U_OFFSET_T. 3874 */ 3875 if ((start > max) || 3876 ((end > max) && (end != MAX_U_OFFSET_T))) { 3877 return (EINVAL); 3878 } 3879 if (start > end) { 3880 return (EINVAL); 3881 } 3882 return (0); 3883 } 3884 3885 /* 3886 * Fill in request->l_flock with information about the lock blocking the 3887 * request. The complexity here is that lock manager requests are allowed 3888 * to see into the upper part of the 32-bit address range, whereas local 3889 * requests are only allowed to see signed values. 3890 * 3891 * What should be done when "blocker" is a lock manager lock that uses the 3892 * upper portion of the 32-bit range, but "request" is local? Since the 3893 * request has already been determined to have been blocked by the blocker, 3894 * at least some portion of "blocker" must be in the range of the request, 3895 * or the request extends to the end of file. For the first case, the 3896 * portion in the lower range is returned with the indication that it goes 3897 * "to EOF." For the second case, the last byte of the lower range is 3898 * returned with the indication that it goes "to EOF." 3899 */ 3900 3901 static void 3902 report_blocker(lock_descriptor_t *blocker, lock_descriptor_t *request) 3903 { 3904 flock64_t *flrp; /* l_flock portion of request */ 3905 3906 ASSERT(blocker != NULL); 3907 3908 flrp = &request->l_flock; 3909 flrp->l_whence = 0; 3910 flrp->l_type = blocker->l_type; 3911 flrp->l_pid = blocker->l_flock.l_pid; 3912 flrp->l_sysid = blocker->l_flock.l_sysid; 3913 3914 if (IS_LOCKMGR(request)) { 3915 flrp->l_start = blocker->l_start; 3916 if (blocker->l_end == MAX_U_OFFSET_T) 3917 flrp->l_len = 0; 3918 else 3919 flrp->l_len = blocker->l_end - blocker->l_start + 1; 3920 } else { 3921 if (blocker->l_start > MAXEND) { 3922 flrp->l_start = MAXEND; 3923 flrp->l_len = 0; 3924 } else { 3925 flrp->l_start = blocker->l_start; 3926 if (blocker->l_end == MAX_U_OFFSET_T) 3927 flrp->l_len = 0; 3928 else 3929 flrp->l_len = blocker->l_end - 3930 blocker->l_start + 1; 3931 } 3932 } 3933 } 3934 3935 /* 3936 * PSARC case 1997/292 3937 */ 3938 /* 3939 * This is the public routine exported by flock.h. 3940 */ 3941 void 3942 cl_flk_change_nlm_state_to_unknown(int nlmid) 3943 { 3944 /* 3945 * Check to see if node is booted as a cluster. If not, return. 3946 */ 3947 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { 3948 return; 3949 } 3950 3951 /* 3952 * See comment in cl_flk_set_nlm_status(). 3953 */ 3954 if (nlm_reg_status == NULL) { 3955 return; 3956 } 3957 3958 /* 3959 * protect NLM registry state with a mutex. 3960 */ 3961 ASSERT(nlmid <= nlm_status_size && nlmid >= 0); 3962 mutex_enter(&nlm_reg_lock); 3963 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, FLK_NLM_UNKNOWN); 3964 mutex_exit(&nlm_reg_lock); 3965 } 3966 3967 /* 3968 * Return non-zero if the given I/O request conflicts with an active NBMAND 3969 * lock. 3970 * If svmand is non-zero, it means look at all active locks, not just NBMAND 3971 * locks. 3972 */ 3973 3974 int 3975 nbl_lock_conflict(vnode_t *vp, nbl_op_t op, u_offset_t offset, 3976 ssize_t length, int svmand, caller_context_t *ct) 3977 { 3978 int conflict = 0; 3979 graph_t *gp; 3980 lock_descriptor_t *lock; 3981 pid_t pid; 3982 int sysid; 3983 3984 if (ct == NULL) { 3985 pid = curproc->p_pid; 3986 sysid = 0; 3987 } else { 3988 pid = ct->cc_pid; 3989 sysid = ct->cc_sysid; 3990 } 3991 3992 mutex_enter(&flock_lock); 3993 gp = lock_graph[HASH_INDEX(vp)]; 3994 mutex_exit(&flock_lock); 3995 if (gp == NULL) 3996 return (0); 3997 3998 mutex_enter(&gp->gp_mutex); 3999 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 4000 4001 for (; lock && lock->l_vnode == vp; lock = lock->l_next) { 4002 if ((svmand || (lock->l_state & NBMAND_LOCK)) && 4003 (lock->l_flock.l_sysid != sysid || 4004 lock->l_flock.l_pid != pid) && 4005 lock_blocks_io(op, offset, length, 4006 lock->l_type, lock->l_start, lock->l_end)) { 4007 conflict = 1; 4008 break; 4009 } 4010 } 4011 mutex_exit(&gp->gp_mutex); 4012 4013 return (conflict); 4014 } 4015 4016 /* 4017 * Return non-zero if the given I/O request conflicts with the given lock. 4018 */ 4019 4020 static int 4021 lock_blocks_io(nbl_op_t op, u_offset_t offset, ssize_t length, 4022 int lock_type, u_offset_t lock_start, u_offset_t lock_end) 4023 { 4024 ASSERT(op == NBL_READ || op == NBL_WRITE || op == NBL_READWRITE); 4025 ASSERT(lock_type == F_RDLCK || lock_type == F_WRLCK); 4026 4027 if (op == NBL_READ && lock_type == F_RDLCK) 4028 return (0); 4029 4030 if (offset <= lock_start && lock_start < offset + length) 4031 return (1); 4032 if (lock_start <= offset && offset <= lock_end) 4033 return (1); 4034 4035 return (0); 4036 } 4037 4038 #ifdef DEBUG 4039 static void 4040 check_active_locks(graph_t *gp) 4041 { 4042 lock_descriptor_t *lock, *lock1; 4043 edge_t *ep; 4044 4045 for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp); 4046 lock = lock->l_next) { 4047 ASSERT(IS_ACTIVE(lock)); 4048 ASSERT(NOT_BLOCKED(lock)); 4049 ASSERT(!IS_BARRIER(lock)); 4050 4051 ep = FIRST_IN(lock); 4052 4053 while (ep != HEAD(lock)) { 4054 ASSERT(IS_SLEEPING(ep->from_vertex)); 4055 ASSERT(!NOT_BLOCKED(ep->from_vertex)); 4056 ep = NEXT_IN(ep); 4057 } 4058 4059 for (lock1 = lock->l_next; lock1 != ACTIVE_HEAD(gp); 4060 lock1 = lock1->l_next) { 4061 if (lock1->l_vnode == lock->l_vnode) { 4062 if (BLOCKS(lock1, lock)) { 4063 cmn_err(CE_PANIC, 4064 "active lock %p blocks %p", 4065 (void *)lock1, (void *)lock); 4066 } else if (BLOCKS(lock, lock1)) { 4067 cmn_err(CE_PANIC, 4068 "active lock %p blocks %p", 4069 (void *)lock, (void *)lock1); 4070 } 4071 } 4072 } 4073 } 4074 } 4075 4076 /* 4077 * Effect: This functions checks to see if the transition from 'old_state' to 4078 * 'new_state' is a valid one. It returns 0 if the transition is valid 4079 * and 1 if it is not. 4080 * For a map of valid transitions, see sys/flock_impl.h 4081 */ 4082 static int 4083 check_lock_transition(int old_state, int new_state) 4084 { 4085 switch (old_state) { 4086 case FLK_INITIAL_STATE: 4087 if ((new_state == FLK_START_STATE) || 4088 (new_state == FLK_SLEEPING_STATE) || 4089 (new_state == FLK_ACTIVE_STATE) || 4090 (new_state == FLK_DEAD_STATE)) { 4091 return (0); 4092 } else { 4093 return (1); 4094 } 4095 case FLK_START_STATE: 4096 if ((new_state == FLK_ACTIVE_STATE) || 4097 (new_state == FLK_DEAD_STATE)) { 4098 return (0); 4099 } else { 4100 return (1); 4101 } 4102 case FLK_ACTIVE_STATE: 4103 if (new_state == FLK_DEAD_STATE) { 4104 return (0); 4105 } else { 4106 return (1); 4107 } 4108 case FLK_SLEEPING_STATE: 4109 if ((new_state == FLK_GRANTED_STATE) || 4110 (new_state == FLK_INTERRUPTED_STATE) || 4111 (new_state == FLK_CANCELLED_STATE)) { 4112 return (0); 4113 } else { 4114 return (1); 4115 } 4116 case FLK_GRANTED_STATE: 4117 if ((new_state == FLK_START_STATE) || 4118 (new_state == FLK_INTERRUPTED_STATE) || 4119 (new_state == FLK_CANCELLED_STATE)) { 4120 return (0); 4121 } else { 4122 return (1); 4123 } 4124 case FLK_CANCELLED_STATE: 4125 if ((new_state == FLK_INTERRUPTED_STATE) || 4126 (new_state == FLK_DEAD_STATE)) { 4127 return (0); 4128 } else { 4129 return (1); 4130 } 4131 case FLK_INTERRUPTED_STATE: 4132 if (new_state == FLK_DEAD_STATE) { 4133 return (0); 4134 } else { 4135 return (1); 4136 } 4137 case FLK_DEAD_STATE: 4138 /* May be set more than once */ 4139 if (new_state == FLK_DEAD_STATE) { 4140 return (0); 4141 } else { 4142 return (1); 4143 } 4144 default: 4145 return (1); 4146 } 4147 } 4148 4149 static void 4150 check_sleeping_locks(graph_t *gp) 4151 { 4152 lock_descriptor_t *lock1, *lock2; 4153 edge_t *ep; 4154 for (lock1 = SLEEPING_HEAD(gp)->l_next; lock1 != SLEEPING_HEAD(gp); 4155 lock1 = lock1->l_next) { 4156 ASSERT(!IS_BARRIER(lock1)); 4157 for (lock2 = lock1->l_next; lock2 != SLEEPING_HEAD(gp); 4158 lock2 = lock2->l_next) { 4159 if (lock1->l_vnode == lock2->l_vnode) { 4160 if (BLOCKS(lock2, lock1)) { 4161 ASSERT(!IS_GRANTED(lock1)); 4162 ASSERT(!NOT_BLOCKED(lock1)); 4163 path(lock1, lock2); 4164 } 4165 } 4166 } 4167 4168 for (lock2 = ACTIVE_HEAD(gp)->l_next; lock2 != ACTIVE_HEAD(gp); 4169 lock2 = lock2->l_next) { 4170 ASSERT(!IS_BARRIER(lock1)); 4171 if (lock1->l_vnode == lock2->l_vnode) { 4172 if (BLOCKS(lock2, lock1)) { 4173 ASSERT(!IS_GRANTED(lock1)); 4174 ASSERT(!NOT_BLOCKED(lock1)); 4175 path(lock1, lock2); 4176 } 4177 } 4178 } 4179 ep = FIRST_ADJ(lock1); 4180 while (ep != HEAD(lock1)) { 4181 ASSERT(BLOCKS(ep->to_vertex, lock1)); 4182 ep = NEXT_ADJ(ep); 4183 } 4184 } 4185 } 4186 4187 static int 4188 level_two_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2, int no_path) 4189 { 4190 edge_t *ep; 4191 lock_descriptor_t *vertex; 4192 lock_descriptor_t *vertex_stack; 4193 4194 STACK_INIT(vertex_stack); 4195 4196 flk_graph_uncolor(lock1->l_graph); 4197 ep = FIRST_ADJ(lock1); 4198 ASSERT(ep != HEAD(lock1)); 4199 while (ep != HEAD(lock1)) { 4200 if (no_path) 4201 ASSERT(ep->to_vertex != lock2); 4202 STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack); 4203 COLOR(ep->to_vertex); 4204 ep = NEXT_ADJ(ep); 4205 } 4206 4207 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 4208 STACK_POP(vertex_stack, l_dstack); 4209 for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex); 4210 ep = NEXT_ADJ(ep)) { 4211 if (COLORED(ep->to_vertex)) 4212 continue; 4213 COLOR(ep->to_vertex); 4214 if (ep->to_vertex == lock2) 4215 return (1); 4216 4217 STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack); 4218 } 4219 } 4220 return (0); 4221 } 4222 4223 static void 4224 check_owner_locks(graph_t *gp, pid_t pid, int sysid, vnode_t *vp) 4225 { 4226 lock_descriptor_t *lock; 4227 4228 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 4229 4230 if (lock) { 4231 while (lock != ACTIVE_HEAD(gp) && (lock->l_vnode == vp)) { 4232 if (lock->l_flock.l_pid == pid && 4233 lock->l_flock.l_sysid == sysid) 4234 cmn_err(CE_PANIC, 4235 "owner pid %d's lock %p in active queue", 4236 pid, (void *)lock); 4237 lock = lock->l_next; 4238 } 4239 } 4240 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 4241 4242 if (lock) { 4243 while (lock != SLEEPING_HEAD(gp) && (lock->l_vnode == vp)) { 4244 if (lock->l_flock.l_pid == pid && 4245 lock->l_flock.l_sysid == sysid) 4246 cmn_err(CE_PANIC, 4247 "owner pid %d's lock %p in sleep queue", 4248 pid, (void *)lock); 4249 lock = lock->l_next; 4250 } 4251 } 4252 } 4253 4254 static int 4255 level_one_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2) 4256 { 4257 edge_t *ep = FIRST_ADJ(lock1); 4258 4259 while (ep != HEAD(lock1)) { 4260 if (ep->to_vertex == lock2) 4261 return (1); 4262 else 4263 ep = NEXT_ADJ(ep); 4264 } 4265 return (0); 4266 } 4267 4268 static int 4269 no_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2) 4270 { 4271 return (!level_two_path(lock1, lock2, 1)); 4272 } 4273 4274 static void 4275 path(lock_descriptor_t *lock1, lock_descriptor_t *lock2) 4276 { 4277 if (level_one_path(lock1, lock2)) { 4278 if (level_two_path(lock1, lock2, 0) != 0) { 4279 cmn_err(CE_WARN, 4280 "one edge one path from lock1 %p lock2 %p", 4281 (void *)lock1, (void *)lock2); 4282 } 4283 } else if (no_path(lock1, lock2)) { 4284 cmn_err(CE_PANIC, 4285 "No path from lock1 %p to lock2 %p", 4286 (void *)lock1, (void *)lock2); 4287 } 4288 } 4289 #endif /* DEBUG */ 4290