1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #include <sys/flock_impl.h> 31 #include <sys/vfs.h> 32 #include <sys/t_lock.h> /* for <sys/callb.h> */ 33 #include <sys/callb.h> 34 #include <sys/clconf.h> 35 #include <sys/cladm.h> 36 #include <sys/nbmlock.h> 37 #include <sys/cred.h> 38 #include <sys/policy.h> 39 40 /* 41 * The following four variables are for statistics purposes and they are 42 * not protected by locks. They may not be accurate but will at least be 43 * close to the actual value. 44 */ 45 46 int flk_lock_allocs; 47 int flk_lock_frees; 48 int edge_allocs; 49 int edge_frees; 50 int flk_proc_vertex_allocs; 51 int flk_proc_edge_allocs; 52 int flk_proc_vertex_frees; 53 int flk_proc_edge_frees; 54 55 static kmutex_t flock_lock; 56 57 #ifdef DEBUG 58 int check_debug = 0; 59 #define CHECK_ACTIVE_LOCKS(gp) if (check_debug) \ 60 check_active_locks(gp); 61 #define CHECK_SLEEPING_LOCKS(gp) if (check_debug) \ 62 check_sleeping_locks(gp); 63 #define CHECK_OWNER_LOCKS(gp, pid, sysid, vp) \ 64 if (check_debug) \ 65 check_owner_locks(gp, pid, sysid, vp); 66 #define CHECK_LOCK_TRANSITION(old_state, new_state) \ 67 { \ 68 if (check_lock_transition(old_state, new_state)) { \ 69 cmn_err(CE_PANIC, "Illegal lock transition \ 70 from %d to %d", old_state, new_state); \ 71 } \ 72 } 73 #else 74 75 #define CHECK_ACTIVE_LOCKS(gp) 76 #define CHECK_SLEEPING_LOCKS(gp) 77 #define CHECK_OWNER_LOCKS(gp, pid, sysid, vp) 78 #define CHECK_LOCK_TRANSITION(old_state, new_state) 79 80 #endif /* DEBUG */ 81 82 struct kmem_cache *flk_edge_cache; 83 84 graph_t *lock_graph[HASH_SIZE]; 85 proc_graph_t pgraph; 86 87 /* 88 * Clustering. 89 * 90 * NLM REGISTRY TYPE IMPLEMENTATION 91 * 92 * Assumptions: 93 * 1. Nodes in a cluster are numbered starting at 1; always non-negative 94 * integers; maximum node id is returned by clconf_maximum_nodeid(). 95 * 2. We use this node id to identify the node an NLM server runs on. 96 */ 97 98 /* 99 * NLM registry object keeps track of NLM servers via their 100 * nlmids (which are the node ids of the node in the cluster they run on) 101 * that have requested locks at this LLM with which this registry is 102 * associated. 103 * 104 * Representation of abstraction: 105 * rep = record[ states: array[nlm_state], 106 * lock: mutex] 107 * 108 * Representation invariants: 109 * 1. index i of rep.states is between 0 and n - 1 where n is number 110 * of elements in the array, which happen to be the maximum number 111 * of nodes in the cluster configuration + 1. 112 * 2. map nlmid to index i of rep.states 113 * 0 -> 0 114 * 1 -> 1 115 * 2 -> 2 116 * n-1 -> clconf_maximum_nodeid()+1 117 * 3. This 1-1 mapping is quite convenient and it avoids errors resulting 118 * from forgetting to subtract 1 from the index. 119 * 4. The reason we keep the 0th index is the following. A legitimate 120 * cluster configuration includes making a UFS file system NFS 121 * exportable. The code is structured so that if you're in a cluster 122 * you do one thing; otherwise, you do something else. The problem 123 * is what to do if you think you're in a cluster with PXFS loaded, 124 * but you're using UFS not PXFS? The upper two bytes of the sysid 125 * encode the node id of the node where NLM server runs; these bytes 126 * are zero for UFS. Since the nodeid is used to index into the 127 * registry, we can record the NLM server state information at index 128 * 0 using the same mechanism used for PXFS file locks! 129 */ 130 static flk_nlm_status_t *nlm_reg_status = NULL; /* state array 0..N-1 */ 131 static kmutex_t nlm_reg_lock; /* lock to protect arrary */ 132 static uint_t nlm_status_size; /* size of state array */ 133 134 /* 135 * Although we need a global lock dependency graph (and associated data 136 * structures), we also need a per-zone notion of whether the lock manager is 137 * running, and so whether to allow lock manager requests or not. 138 * 139 * Thus, on a per-zone basis we maintain a ``global'' variable 140 * (flk_lockmgr_status), protected by flock_lock, and set when the lock 141 * manager is determined to be changing state (starting or stopping). 142 * 143 * Each graph/zone pair also has a copy of this variable, which is protected by 144 * the graph's mutex. 145 * 146 * The per-graph copies are used to synchronize lock requests with shutdown 147 * requests. The global copy is used to initialize the per-graph field when a 148 * new graph is created. 149 */ 150 struct flock_globals { 151 flk_lockmgr_status_t flk_lockmgr_status; 152 flk_lockmgr_status_t lockmgr_status[HASH_SIZE]; 153 }; 154 155 zone_key_t flock_zone_key; 156 157 static void create_flock(lock_descriptor_t *, flock64_t *); 158 static lock_descriptor_t *flk_get_lock(void); 159 static void flk_free_lock(lock_descriptor_t *lock); 160 static void flk_get_first_blocking_lock(lock_descriptor_t *request); 161 static int flk_process_request(lock_descriptor_t *); 162 static int flk_add_edge(lock_descriptor_t *, lock_descriptor_t *, int, int); 163 static edge_t *flk_get_edge(void); 164 static int flk_wait_execute_request(lock_descriptor_t *); 165 static int flk_relation(lock_descriptor_t *, lock_descriptor_t *); 166 static void flk_insert_active_lock(lock_descriptor_t *); 167 static void flk_delete_active_lock(lock_descriptor_t *, int); 168 static void flk_insert_sleeping_lock(lock_descriptor_t *); 169 static void flk_graph_uncolor(graph_t *); 170 static void flk_wakeup(lock_descriptor_t *, int); 171 static void flk_free_edge(edge_t *); 172 static void flk_recompute_dependencies(lock_descriptor_t *, 173 lock_descriptor_t **, int, int); 174 static int flk_find_barriers(lock_descriptor_t *); 175 static void flk_update_barriers(lock_descriptor_t *); 176 static int flk_color_reachables(lock_descriptor_t *); 177 static int flk_canceled(lock_descriptor_t *); 178 static void flk_delete_locks_by_sysid(lock_descriptor_t *); 179 static void report_blocker(lock_descriptor_t *, lock_descriptor_t *); 180 static void wait_for_lock(lock_descriptor_t *); 181 static void unlock_lockmgr_granted(struct flock_globals *); 182 static void wakeup_sleeping_lockmgr_locks(struct flock_globals *); 183 184 /* Clustering hooks */ 185 static void cl_flk_change_nlm_state_all_locks(int, flk_nlm_status_t); 186 static void cl_flk_wakeup_sleeping_nlm_locks(int); 187 static void cl_flk_unlock_nlm_granted(int); 188 189 #ifdef DEBUG 190 static int check_lock_transition(int, int); 191 static void check_sleeping_locks(graph_t *); 192 static void check_active_locks(graph_t *); 193 static int no_path(lock_descriptor_t *, lock_descriptor_t *); 194 static void path(lock_descriptor_t *, lock_descriptor_t *); 195 static void check_owner_locks(graph_t *, pid_t, int, vnode_t *); 196 static int level_one_path(lock_descriptor_t *, lock_descriptor_t *); 197 static int level_two_path(lock_descriptor_t *, lock_descriptor_t *, int); 198 #endif 199 200 /* proc_graph function definitions */ 201 static int flk_check_deadlock(lock_descriptor_t *); 202 static void flk_proc_graph_uncolor(void); 203 static proc_vertex_t *flk_get_proc_vertex(lock_descriptor_t *); 204 static proc_edge_t *flk_get_proc_edge(void); 205 static void flk_proc_release(proc_vertex_t *); 206 static void flk_free_proc_edge(proc_edge_t *); 207 static void flk_update_proc_graph(edge_t *, int); 208 209 /* Non-blocking mandatory locking */ 210 static int lock_blocks_io(nbl_op_t, u_offset_t, ssize_t, int, u_offset_t, 211 u_offset_t); 212 213 static struct flock_globals * 214 flk_get_globals(void) 215 { 216 /* 217 * The KLM module had better be loaded if we're attempting to handle 218 * lockmgr requests. 219 */ 220 ASSERT(flock_zone_key != ZONE_KEY_UNINITIALIZED); 221 return (zone_getspecific(flock_zone_key, curproc->p_zone)); 222 } 223 224 static flk_lockmgr_status_t 225 flk_get_lockmgr_status(void) 226 { 227 struct flock_globals *fg; 228 229 ASSERT(MUTEX_HELD(&flock_lock)); 230 231 if (flock_zone_key == ZONE_KEY_UNINITIALIZED) { 232 /* 233 * KLM module not loaded; lock manager definitely not running. 234 */ 235 return (FLK_LOCKMGR_DOWN); 236 } 237 fg = flk_get_globals(); 238 return (fg->flk_lockmgr_status); 239 } 240 241 /* 242 * Routine called from fs_frlock in fs/fs_subr.c 243 */ 244 245 int 246 reclock(vnode_t *vp, 247 flock64_t *lckdat, 248 int cmd, 249 int flag, 250 u_offset_t offset, 251 flk_callback_t *flk_cbp) 252 { 253 lock_descriptor_t stack_lock_request; 254 lock_descriptor_t *lock_request; 255 int error = 0; 256 graph_t *gp; 257 int nlmid; 258 259 /* 260 * Check access permissions 261 */ 262 if ((cmd & SETFLCK) && 263 ((lckdat->l_type == F_RDLCK && (flag & FREAD) == 0) || 264 (lckdat->l_type == F_WRLCK && (flag & FWRITE) == 0))) 265 return (EBADF); 266 267 /* 268 * for query and unlock we use the stack_lock_request 269 */ 270 271 if ((lckdat->l_type == F_UNLCK) || 272 !((cmd & INOFLCK) || (cmd & SETFLCK))) { 273 lock_request = &stack_lock_request; 274 (void) bzero((caddr_t)lock_request, 275 sizeof (lock_descriptor_t)); 276 277 /* 278 * following is added to make the assertions in 279 * flk_execute_request() to pass through 280 */ 281 282 lock_request->l_edge.edge_in_next = &lock_request->l_edge; 283 lock_request->l_edge.edge_in_prev = &lock_request->l_edge; 284 lock_request->l_edge.edge_adj_next = &lock_request->l_edge; 285 lock_request->l_edge.edge_adj_prev = &lock_request->l_edge; 286 lock_request->l_status = FLK_INITIAL_STATE; 287 } else { 288 lock_request = flk_get_lock(); 289 } 290 lock_request->l_state = 0; 291 lock_request->l_vnode = vp; 292 lock_request->l_zoneid = getzoneid(); 293 294 /* 295 * Convert the request range into the canonical start and end 296 * values. The NLM protocol supports locking over the entire 297 * 32-bit range, so there's no range checking for remote requests, 298 * but we still need to verify that local requests obey the rules. 299 */ 300 /* Clustering */ 301 if ((cmd & (RCMDLCK | PCMDLCK)) != 0) { 302 ASSERT(lckdat->l_whence == 0); 303 lock_request->l_start = lckdat->l_start; 304 lock_request->l_end = (lckdat->l_len == 0) ? MAX_U_OFFSET_T : 305 lckdat->l_start + (lckdat->l_len - 1); 306 } else { 307 /* check the validity of the lock range */ 308 error = flk_convert_lock_data(vp, lckdat, 309 &lock_request->l_start, &lock_request->l_end, 310 offset); 311 if (error) { 312 goto done; 313 } 314 error = flk_check_lock_data(lock_request->l_start, 315 lock_request->l_end, MAXEND); 316 if (error) { 317 goto done; 318 } 319 } 320 321 ASSERT(lock_request->l_end >= lock_request->l_start); 322 323 lock_request->l_type = lckdat->l_type; 324 if (cmd & INOFLCK) 325 lock_request->l_state |= IO_LOCK; 326 if (cmd & SLPFLCK) 327 lock_request->l_state |= WILLING_TO_SLEEP_LOCK; 328 if (cmd & RCMDLCK) 329 lock_request->l_state |= LOCKMGR_LOCK; 330 if (cmd & NBMLCK) 331 lock_request->l_state |= NBMAND_LOCK; 332 /* 333 * Clustering: set flag for PXFS locks 334 * We do not _only_ check for the PCMDLCK flag because PXFS locks could 335 * also be of type 'RCMDLCK'. 336 * We do not _only_ check the GETPXFSID() macro because local PXFS 337 * clients use a pxfsid of zero to permit deadlock detection in the LLM. 338 */ 339 340 if ((cmd & PCMDLCK) || (GETPXFSID(lckdat->l_sysid) != 0)) { 341 lock_request->l_state |= PXFS_LOCK; 342 } 343 if (!((cmd & SETFLCK) || (cmd & INOFLCK))) { 344 if (lock_request->l_type == F_RDLCK || 345 lock_request->l_type == F_WRLCK) 346 lock_request->l_state |= QUERY_LOCK; 347 } 348 lock_request->l_flock = (*lckdat); 349 lock_request->l_callbacks = flk_cbp; 350 351 /* 352 * We are ready for processing the request 353 */ 354 if (IS_LOCKMGR(lock_request)) { 355 /* 356 * If the lock request is an NLM server request .... 357 */ 358 if (nlm_status_size == 0) { /* not booted as cluster */ 359 mutex_enter(&flock_lock); 360 /* 361 * Bail out if this is a lock manager request and the 362 * lock manager is not supposed to be running. 363 */ 364 if (flk_get_lockmgr_status() != FLK_LOCKMGR_UP) { 365 mutex_exit(&flock_lock); 366 error = ENOLCK; 367 goto done; 368 } 369 mutex_exit(&flock_lock); 370 } else { /* booted as a cluster */ 371 nlmid = GETNLMID(lock_request->l_flock.l_sysid); 372 ASSERT(nlmid <= nlm_status_size && nlmid >= 0); 373 374 mutex_enter(&nlm_reg_lock); 375 /* 376 * If the NLM registry does not know about this 377 * NLM server making the request, add its nlmid 378 * to the registry. 379 */ 380 if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status, 381 nlmid)) { 382 FLK_REGISTRY_ADD_NLMID(nlm_reg_status, nlmid); 383 } else if (!FLK_REGISTRY_IS_NLM_UP(nlm_reg_status, 384 nlmid)) { 385 /* 386 * If the NLM server is already known (has made 387 * previous lock requests) and its state is 388 * not NLM_UP (means that NLM server is 389 * shutting down), then bail out with an 390 * error to deny the lock request. 391 */ 392 mutex_exit(&nlm_reg_lock); 393 error = ENOLCK; 394 goto done; 395 } 396 mutex_exit(&nlm_reg_lock); 397 } 398 } 399 400 /* Now get the lock graph for a particular vnode */ 401 gp = flk_get_lock_graph(vp, FLK_INIT_GRAPH); 402 403 /* 404 * We drop rwlock here otherwise this might end up causing a 405 * deadlock if this IOLOCK sleeps. (bugid # 1183392). 406 */ 407 408 if (IS_IO_LOCK(lock_request)) { 409 VOP_RWUNLOCK(vp, 410 (lock_request->l_type == F_RDLCK) ? 411 V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL); 412 } 413 mutex_enter(&gp->gp_mutex); 414 415 lock_request->l_state |= REFERENCED_LOCK; 416 lock_request->l_graph = gp; 417 418 switch (lock_request->l_type) { 419 case F_RDLCK: 420 case F_WRLCK: 421 if (IS_QUERY_LOCK(lock_request)) { 422 flk_get_first_blocking_lock(lock_request); 423 (*lckdat) = lock_request->l_flock; 424 break; 425 } 426 427 /* process the request now */ 428 429 error = flk_process_request(lock_request); 430 break; 431 432 case F_UNLCK: 433 /* unlock request will not block so execute it immediately */ 434 435 if (IS_LOCKMGR(lock_request) && 436 flk_canceled(lock_request)) { 437 error = 0; 438 } else { 439 error = flk_execute_request(lock_request); 440 } 441 break; 442 443 case F_UNLKSYS: 444 /* 445 * Recovery mechanism to release lock manager locks when 446 * NFS client crashes and restart. NFS server will clear 447 * old locks and grant new locks. 448 */ 449 450 if (lock_request->l_flock.l_sysid == 0) { 451 mutex_exit(&gp->gp_mutex); 452 return (EINVAL); 453 } 454 if (secpolicy_nfs(CRED()) != 0) { 455 mutex_exit(&gp->gp_mutex); 456 return (EPERM); 457 } 458 flk_delete_locks_by_sysid(lock_request); 459 lock_request->l_state &= ~REFERENCED_LOCK; 460 flk_set_state(lock_request, FLK_DEAD_STATE); 461 flk_free_lock(lock_request); 462 mutex_exit(&gp->gp_mutex); 463 return (0); 464 465 default: 466 error = EINVAL; 467 break; 468 } 469 470 /* Clustering: For blocked PXFS locks, return */ 471 if (error == PXFS_LOCK_BLOCKED) { 472 lock_request->l_state &= ~REFERENCED_LOCK; 473 mutex_exit(&gp->gp_mutex); 474 return (error); 475 } 476 477 /* 478 * Now that we have seen the status of locks in the system for 479 * this vnode we acquire the rwlock if it is an IO_LOCK. 480 */ 481 482 if (IS_IO_LOCK(lock_request)) { 483 (void) VOP_RWLOCK(vp, 484 (lock_request->l_type == F_RDLCK) ? 485 V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL); 486 if (!error) { 487 lckdat->l_type = F_UNLCK; 488 489 /* 490 * This wake up is needed otherwise 491 * if IO_LOCK has slept the dependents on this 492 * will not be woken up at all. (bugid # 1185482). 493 */ 494 495 flk_wakeup(lock_request, 1); 496 flk_set_state(lock_request, FLK_DEAD_STATE); 497 flk_free_lock(lock_request); 498 } 499 /* 500 * else if error had occurred either flk_process_request() 501 * has returned EDEADLK in which case there will be no 502 * dependents for this lock or EINTR from flk_wait_execute_ 503 * request() in which case flk_cancel_sleeping_lock() 504 * would have been done. same is true with EBADF. 505 */ 506 } 507 508 if (lock_request == &stack_lock_request) { 509 flk_set_state(lock_request, FLK_DEAD_STATE); 510 } else { 511 lock_request->l_state &= ~REFERENCED_LOCK; 512 if ((error != 0) || IS_DELETED(lock_request)) { 513 flk_set_state(lock_request, FLK_DEAD_STATE); 514 flk_free_lock(lock_request); 515 } 516 } 517 518 mutex_exit(&gp->gp_mutex); 519 return (error); 520 521 done: 522 flk_set_state(lock_request, FLK_DEAD_STATE); 523 if (lock_request != &stack_lock_request) 524 flk_free_lock(lock_request); 525 return (error); 526 } 527 528 /* 529 * Invoke the callbacks in the given list. If before sleeping, invoke in 530 * list order. If after sleeping, invoke in reverse order. 531 * 532 * CPR (suspend/resume) support: if one of the callbacks returns a 533 * callb_cpr_t, return it. This will be used to make the thread CPR-safe 534 * while it is sleeping. There should be at most one callb_cpr_t for the 535 * thread. 536 * XXX This is unnecessarily complicated. The CPR information should just 537 * get passed in directly through VOP_FRLOCK and reclock, rather than 538 * sneaking it in via a callback. 539 */ 540 541 callb_cpr_t * 542 flk_invoke_callbacks(flk_callback_t *cblist, flk_cb_when_t when) 543 { 544 callb_cpr_t *cpr_callbackp = NULL; 545 callb_cpr_t *one_result; 546 flk_callback_t *cb; 547 548 if (cblist == NULL) 549 return (NULL); 550 551 if (when == FLK_BEFORE_SLEEP) { 552 cb = cblist; 553 do { 554 one_result = (*cb->cb_callback)(when, cb->cb_data); 555 if (one_result != NULL) { 556 ASSERT(cpr_callbackp == NULL); 557 cpr_callbackp = one_result; 558 } 559 cb = cb->cb_next; 560 } while (cb != cblist); 561 } else { 562 cb = cblist->cb_prev; 563 do { 564 one_result = (*cb->cb_callback)(when, cb->cb_data); 565 if (one_result != NULL) { 566 cpr_callbackp = one_result; 567 } 568 cb = cb->cb_prev; 569 } while (cb != cblist->cb_prev); 570 } 571 572 return (cpr_callbackp); 573 } 574 575 /* 576 * Initialize a flk_callback_t to hold the given callback. 577 */ 578 579 void 580 flk_init_callback(flk_callback_t *flk_cb, 581 callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), void *cbdata) 582 { 583 flk_cb->cb_next = flk_cb; 584 flk_cb->cb_prev = flk_cb; 585 flk_cb->cb_callback = cb_fcn; 586 flk_cb->cb_data = cbdata; 587 } 588 589 /* 590 * Initialize an flk_callback_t and then link it into the head of an 591 * existing list (which may be NULL). 592 */ 593 594 void 595 flk_add_callback(flk_callback_t *newcb, 596 callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), 597 void *cbdata, flk_callback_t *cblist) 598 { 599 flk_init_callback(newcb, cb_fcn, cbdata); 600 601 if (cblist == NULL) 602 return; 603 604 newcb->cb_prev = cblist->cb_prev; 605 newcb->cb_next = cblist; 606 cblist->cb_prev->cb_next = newcb; 607 cblist->cb_prev = newcb; 608 } 609 610 /* 611 * Initialize the flk_edge_cache data structure and create the 612 * nlm_reg_status array. 613 */ 614 615 void 616 flk_init(void) 617 { 618 uint_t i; 619 620 flk_edge_cache = kmem_cache_create("flk_edges", 621 sizeof (struct edge), 0, NULL, NULL, NULL, NULL, NULL, 0); 622 if (flk_edge_cache == NULL) { 623 cmn_err(CE_PANIC, "Couldn't create flk_edge_cache\n"); 624 } 625 /* 626 * Create the NLM registry object. 627 */ 628 629 if (cluster_bootflags & CLUSTER_BOOTED) { 630 /* 631 * This routine tells you the maximum node id that will be used 632 * in the cluster. This number will be the size of the nlm 633 * registry status array. We add 1 because we will be using 634 * all entries indexed from 0 to maxnodeid; e.g., from 0 635 * to 64, for a total of 65 entries. 636 */ 637 nlm_status_size = clconf_maximum_nodeid() + 1; 638 } else { 639 nlm_status_size = 0; 640 } 641 642 if (nlm_status_size != 0) { /* booted as a cluster */ 643 nlm_reg_status = (flk_nlm_status_t *) 644 kmem_alloc(sizeof (flk_nlm_status_t) * nlm_status_size, 645 KM_SLEEP); 646 647 /* initialize all NLM states in array to NLM_UNKNOWN */ 648 for (i = 0; i < nlm_status_size; i++) { 649 nlm_reg_status[i] = FLK_NLM_UNKNOWN; 650 } 651 } 652 } 653 654 /* 655 * Zone constructor/destructor callbacks to be executed when a zone is 656 * created/destroyed. 657 */ 658 /* ARGSUSED */ 659 void * 660 flk_zone_init(zoneid_t zoneid) 661 { 662 struct flock_globals *fg; 663 uint_t i; 664 665 fg = kmem_alloc(sizeof (*fg), KM_SLEEP); 666 fg->flk_lockmgr_status = FLK_LOCKMGR_UP; 667 for (i = 0; i < HASH_SIZE; i++) 668 fg->lockmgr_status[i] = FLK_LOCKMGR_UP; 669 return (fg); 670 } 671 672 /* ARGSUSED */ 673 void 674 flk_zone_fini(zoneid_t zoneid, void *data) 675 { 676 struct flock_globals *fg = data; 677 678 kmem_free(fg, sizeof (*fg)); 679 } 680 681 /* 682 * Get a lock_descriptor structure with initialization of edge lists. 683 */ 684 685 static lock_descriptor_t * 686 flk_get_lock(void) 687 { 688 lock_descriptor_t *l; 689 690 l = kmem_zalloc(sizeof (lock_descriptor_t), KM_SLEEP); 691 692 cv_init(&l->l_cv, NULL, CV_DRIVER, NULL); 693 l->l_edge.edge_in_next = &l->l_edge; 694 l->l_edge.edge_in_prev = &l->l_edge; 695 l->l_edge.edge_adj_next = &l->l_edge; 696 l->l_edge.edge_adj_prev = &l->l_edge; 697 l->pvertex = -1; 698 l->l_status = FLK_INITIAL_STATE; 699 flk_lock_allocs++; 700 return (l); 701 } 702 703 /* 704 * Free a lock_descriptor structure. Just sets the DELETED_LOCK flag 705 * when some thread has a reference to it as in reclock(). 706 */ 707 708 void 709 flk_free_lock(lock_descriptor_t *lock) 710 { 711 ASSERT(IS_DEAD(lock)); 712 if (IS_REFERENCED(lock)) { 713 lock->l_state |= DELETED_LOCK; 714 return; 715 } 716 flk_lock_frees++; 717 kmem_free((void *)lock, sizeof (lock_descriptor_t)); 718 } 719 720 void 721 flk_set_state(lock_descriptor_t *lock, int new_state) 722 { 723 /* 724 * Locks in the sleeping list may be woken up in a number of ways, 725 * and more than once. If a sleeping lock is signaled awake more 726 * than once, then it may or may not change state depending on its 727 * current state. 728 * Also note that NLM locks that are sleeping could be moved to an 729 * interrupted state more than once if the unlock request is 730 * retransmitted by the NLM client - the second time around, this is 731 * just a nop. 732 * The ordering of being signaled awake is: 733 * INTERRUPTED_STATE > CANCELLED_STATE > GRANTED_STATE. 734 * The checks below implement this ordering. 735 */ 736 if (IS_INTERRUPTED(lock)) { 737 if ((new_state == FLK_CANCELLED_STATE) || 738 (new_state == FLK_GRANTED_STATE) || 739 (new_state == FLK_INTERRUPTED_STATE)) { 740 return; 741 } 742 } 743 if (IS_CANCELLED(lock)) { 744 if ((new_state == FLK_GRANTED_STATE) || 745 (new_state == FLK_CANCELLED_STATE)) { 746 return; 747 } 748 } 749 CHECK_LOCK_TRANSITION(lock->l_status, new_state); 750 if (IS_PXFS(lock)) { 751 cl_flk_state_transition_notify(lock, lock->l_status, new_state); 752 } 753 lock->l_status = new_state; 754 } 755 756 /* 757 * Routine that checks whether there are any blocking locks in the system. 758 * 759 * The policy followed is if a write lock is sleeping we don't allow read 760 * locks before this write lock even though there may not be any active 761 * locks corresponding to the read locks' region. 762 * 763 * flk_add_edge() function adds an edge between l1 and l2 iff there 764 * is no path between l1 and l2. This is done to have a "minimum 765 * storage representation" of the dependency graph. 766 * 767 * Another property of the graph is since only the new request throws 768 * edges to the existing locks in the graph, the graph is always topologically 769 * ordered. 770 */ 771 772 static int 773 flk_process_request(lock_descriptor_t *request) 774 { 775 graph_t *gp = request->l_graph; 776 lock_descriptor_t *lock; 777 int request_blocked_by_active = 0; 778 int request_blocked_by_granted = 0; 779 int request_blocked_by_sleeping = 0; 780 vnode_t *vp = request->l_vnode; 781 int error = 0; 782 int request_will_wait = 0; 783 int found_covering_lock = 0; 784 lock_descriptor_t *covered_by = NULL; 785 786 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 787 request_will_wait = IS_WILLING_TO_SLEEP(request); 788 789 /* 790 * check active locks 791 */ 792 793 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 794 795 796 if (lock) { 797 do { 798 if (BLOCKS(lock, request)) { 799 if (!request_will_wait) 800 return (EAGAIN); 801 request_blocked_by_active = 1; 802 break; 803 } 804 /* 805 * Grant lock if it is for the same owner holding active 806 * lock that covers the request. 807 */ 808 809 if (SAME_OWNER(lock, request) && 810 COVERS(lock, request) && 811 (request->l_type == F_RDLCK)) 812 return (flk_execute_request(request)); 813 lock = lock->l_next; 814 } while (lock->l_vnode == vp); 815 } 816 817 if (!request_blocked_by_active) { 818 lock_descriptor_t *lk[1]; 819 lock_descriptor_t *first_glock = NULL; 820 /* 821 * Shall we grant this?! NO!! 822 * What about those locks that were just granted and still 823 * in sleep queue. Those threads are woken up and so locks 824 * are almost active. 825 */ 826 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 827 if (lock) { 828 do { 829 if (BLOCKS(lock, request)) { 830 if (IS_GRANTED(lock)) { 831 request_blocked_by_granted = 1; 832 } else { 833 request_blocked_by_sleeping = 1; 834 } 835 } 836 837 lock = lock->l_next; 838 } while ((lock->l_vnode == vp)); 839 first_glock = lock->l_prev; 840 ASSERT(first_glock->l_vnode == vp); 841 } 842 843 if (request_blocked_by_granted) 844 goto block; 845 846 if (!request_blocked_by_sleeping) { 847 /* 848 * If the request isn't going to be blocked by a 849 * sleeping request, we know that it isn't going to 850 * be blocked; we can just execute the request -- 851 * without performing costly deadlock detection. 852 */ 853 ASSERT(!request_blocked_by_active); 854 return (flk_execute_request(request)); 855 } else if (request->l_type == F_RDLCK) { 856 /* 857 * If we have a sleeping writer in the requested 858 * lock's range, block. 859 */ 860 goto block; 861 } 862 863 lk[0] = request; 864 request->l_state |= RECOMPUTE_LOCK; 865 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 866 if (lock) { 867 do { 868 flk_recompute_dependencies(lock, lk, 1, 0); 869 lock = lock->l_next; 870 } while (lock->l_vnode == vp); 871 } 872 lock = first_glock; 873 if (lock) { 874 do { 875 if (IS_GRANTED(lock)) { 876 flk_recompute_dependencies(lock, lk, 1, 0); 877 } 878 lock = lock->l_prev; 879 } while ((lock->l_vnode == vp)); 880 } 881 request->l_state &= ~RECOMPUTE_LOCK; 882 if (!NO_DEPENDENTS(request) && flk_check_deadlock(request)) 883 return (EDEADLK); 884 return (flk_execute_request(request)); 885 } 886 887 block: 888 if (request_will_wait) 889 flk_graph_uncolor(gp); 890 891 /* check sleeping locks */ 892 893 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 894 895 /* 896 * If we find a sleeping write lock that is a superset of the 897 * region wanted by request we can be assured that by adding an 898 * edge to this write lock we have paths to all locks in the 899 * graph that blocks the request except in one case and that is why 900 * another check for SAME_OWNER in the loop below. The exception 901 * case is when this process that owns the sleeping write lock 'l1' 902 * has other locks l2, l3, l4 that are in the system and arrived 903 * before l1. l1 does not have path to these locks as they are from 904 * same process. We break when we find a second covering sleeping 905 * lock l5 owned by a process different from that owning l1, because 906 * there cannot be any of l2, l3, l4, etc., arrived before l5, and if 907 * it has l1 would have produced a deadlock already. 908 */ 909 910 if (lock) { 911 do { 912 if (BLOCKS(lock, request)) { 913 if (!request_will_wait) 914 return (EAGAIN); 915 if (COVERS(lock, request) && 916 lock->l_type == F_WRLCK) { 917 if (found_covering_lock && 918 !SAME_OWNER(lock, covered_by)) { 919 found_covering_lock++; 920 break; 921 } 922 found_covering_lock = 1; 923 covered_by = lock; 924 } 925 if (found_covering_lock && 926 !SAME_OWNER(lock, covered_by)) { 927 lock = lock->l_next; 928 continue; 929 } 930 if ((error = flk_add_edge(request, lock, 931 !found_covering_lock, 0))) 932 return (error); 933 } 934 lock = lock->l_next; 935 } while (lock->l_vnode == vp); 936 } 937 938 /* 939 * found_covering_lock == 2 iff at this point 'request' has paths 940 * to all locks that blocks 'request'. found_covering_lock == 1 iff at this 941 * point 'request' has paths to all locks that blocks 'request' whose owners 942 * are not same as the one that covers 'request' (covered_by above) and 943 * we can have locks whose owner is same as covered_by in the active list. 944 */ 945 946 if (request_blocked_by_active && found_covering_lock != 2) { 947 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 948 ASSERT(lock != NULL); 949 do { 950 if (BLOCKS(lock, request)) { 951 if (found_covering_lock && 952 !SAME_OWNER(lock, covered_by)) { 953 lock = lock->l_next; 954 continue; 955 } 956 if ((error = flk_add_edge(request, lock, 957 CHECK_CYCLE, 0))) 958 return (error); 959 } 960 lock = lock->l_next; 961 } while (lock->l_vnode == vp); 962 } 963 964 if (NOT_BLOCKED(request)) { 965 /* 966 * request not dependent on any other locks 967 * so execute this request 968 */ 969 return (flk_execute_request(request)); 970 } else { 971 /* 972 * check for deadlock 973 */ 974 if (flk_check_deadlock(request)) 975 return (EDEADLK); 976 /* 977 * this thread has to sleep 978 */ 979 return (flk_wait_execute_request(request)); 980 } 981 } 982 983 /* 984 * The actual execution of the request in the simple case is only to 985 * insert the 'request' in the list of active locks if it is not an 986 * UNLOCK. 987 * We have to consider the existing active locks' relation to 988 * this 'request' if they are owned by same process. flk_relation() does 989 * this job and sees to that the dependency graph information is maintained 990 * properly. 991 */ 992 993 int 994 flk_execute_request(lock_descriptor_t *request) 995 { 996 graph_t *gp = request->l_graph; 997 vnode_t *vp = request->l_vnode; 998 lock_descriptor_t *lock, *lock1; 999 int done_searching = 0; 1000 1001 CHECK_SLEEPING_LOCKS(gp); 1002 CHECK_ACTIVE_LOCKS(gp); 1003 1004 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1005 1006 flk_set_state(request, FLK_START_STATE); 1007 1008 ASSERT(NOT_BLOCKED(request)); 1009 1010 /* IO_LOCK requests are only to check status */ 1011 1012 if (IS_IO_LOCK(request)) 1013 return (0); 1014 1015 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 1016 1017 if (lock == NULL && request->l_type == F_UNLCK) 1018 return (0); 1019 if (lock == NULL) { 1020 flk_insert_active_lock(request); 1021 return (0); 1022 } 1023 1024 do { 1025 lock1 = lock->l_next; 1026 if (SAME_OWNER(request, lock)) { 1027 done_searching = flk_relation(lock, request); 1028 } 1029 lock = lock1; 1030 } while (lock->l_vnode == vp && !done_searching); 1031 1032 /* 1033 * insert in active queue 1034 */ 1035 1036 if (request->l_type != F_UNLCK) 1037 flk_insert_active_lock(request); 1038 1039 return (0); 1040 } 1041 1042 /* 1043 * 'request' is blocked by some one therefore we put it into sleep queue. 1044 */ 1045 static int 1046 flk_wait_execute_request(lock_descriptor_t *request) 1047 { 1048 graph_t *gp = request->l_graph; 1049 callb_cpr_t *cprp; /* CPR info from callback */ 1050 struct flock_globals *fg; 1051 int index; 1052 1053 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1054 ASSERT(IS_WILLING_TO_SLEEP(request)); 1055 1056 flk_insert_sleeping_lock(request); 1057 1058 if (IS_LOCKMGR(request)) { 1059 index = HASH_INDEX(request->l_vnode); 1060 fg = flk_get_globals(); 1061 1062 if (nlm_status_size == 0) { /* not booted as a cluster */ 1063 if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP) { 1064 flk_cancel_sleeping_lock(request, 1); 1065 return (ENOLCK); 1066 } 1067 } else { /* booted as a cluster */ 1068 /* 1069 * If the request is an NLM server lock request, 1070 * and the NLM state of the lock request is not 1071 * NLM_UP (because the NLM server is shutting 1072 * down), then cancel the sleeping lock and 1073 * return error ENOLCK that will encourage the 1074 * client to retransmit. 1075 */ 1076 if (!IS_NLM_UP(request)) { 1077 flk_cancel_sleeping_lock(request, 1); 1078 return (ENOLCK); 1079 } 1080 } 1081 } 1082 1083 /* Clustering: For blocking PXFS locks, return */ 1084 if (IS_PXFS(request)) { 1085 /* 1086 * PXFS locks sleep on the client side. 1087 * The callback argument is used to wake up the sleeper 1088 * when the lock is granted. 1089 * We return -1 (rather than an errno value) to indicate 1090 * the client side should sleep 1091 */ 1092 return (PXFS_LOCK_BLOCKED); 1093 } 1094 1095 if (request->l_callbacks != NULL) { 1096 /* 1097 * To make sure the shutdown code works correctly, either 1098 * the callback must happen after putting the lock on the 1099 * sleep list, or we must check the shutdown status after 1100 * returning from the callback (and before sleeping). At 1101 * least for now, we'll use the first option. If a 1102 * shutdown or signal or whatever happened while the graph 1103 * mutex was dropped, that will be detected by 1104 * wait_for_lock(). 1105 */ 1106 mutex_exit(&gp->gp_mutex); 1107 1108 cprp = flk_invoke_callbacks(request->l_callbacks, 1109 FLK_BEFORE_SLEEP); 1110 1111 mutex_enter(&gp->gp_mutex); 1112 1113 if (cprp == NULL) { 1114 wait_for_lock(request); 1115 } else { 1116 mutex_enter(cprp->cc_lockp); 1117 CALLB_CPR_SAFE_BEGIN(cprp); 1118 mutex_exit(cprp->cc_lockp); 1119 wait_for_lock(request); 1120 mutex_enter(cprp->cc_lockp); 1121 CALLB_CPR_SAFE_END(cprp, cprp->cc_lockp); 1122 mutex_exit(cprp->cc_lockp); 1123 } 1124 1125 mutex_exit(&gp->gp_mutex); 1126 (void) flk_invoke_callbacks(request->l_callbacks, 1127 FLK_AFTER_SLEEP); 1128 mutex_enter(&gp->gp_mutex); 1129 } else { 1130 wait_for_lock(request); 1131 } 1132 1133 if (IS_LOCKMGR(request)) { 1134 /* 1135 * If the lock manager is shutting down, return an 1136 * error that will encourage the client to retransmit. 1137 */ 1138 if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP && 1139 !IS_GRANTED(request)) { 1140 flk_cancel_sleeping_lock(request, 1); 1141 return (ENOLCK); 1142 } 1143 } 1144 1145 if (IS_INTERRUPTED(request)) { 1146 /* we got a signal, or act like we did */ 1147 flk_cancel_sleeping_lock(request, 1); 1148 return (EINTR); 1149 } 1150 1151 /* Cancelled if some other thread has closed the file */ 1152 1153 if (IS_CANCELLED(request)) { 1154 flk_cancel_sleeping_lock(request, 1); 1155 return (EBADF); 1156 } 1157 1158 request->l_state &= ~GRANTED_LOCK; 1159 REMOVE_SLEEP_QUEUE(request); 1160 return (flk_execute_request(request)); 1161 } 1162 1163 /* 1164 * This routine adds an edge between from and to because from depends 1165 * to. If asked to check for deadlock it checks whether there are any 1166 * reachable locks from "from_lock" that is owned by the same process 1167 * as "from_lock". 1168 * NOTE: It is the caller's responsibility to make sure that the color 1169 * of the graph is consistent between the calls to flk_add_edge as done 1170 * in flk_process_request. This routine does not color and check for 1171 * deadlock explicitly. 1172 */ 1173 1174 static int 1175 flk_add_edge(lock_descriptor_t *from_lock, lock_descriptor_t *to_lock, 1176 int check_cycle, int update_graph) 1177 { 1178 edge_t *edge; 1179 edge_t *ep; 1180 lock_descriptor_t *vertex; 1181 lock_descriptor_t *vertex_stack; 1182 1183 STACK_INIT(vertex_stack); 1184 1185 /* 1186 * if to vertex already has mark_color just return 1187 * don't add an edge as it is reachable from from vertex 1188 * before itself. 1189 */ 1190 1191 if (COLORED(to_lock)) 1192 return (0); 1193 1194 edge = flk_get_edge(); 1195 1196 /* 1197 * set the from and to vertex 1198 */ 1199 1200 edge->from_vertex = from_lock; 1201 edge->to_vertex = to_lock; 1202 1203 /* 1204 * put in adjacency list of from vertex 1205 */ 1206 1207 from_lock->l_edge.edge_adj_next->edge_adj_prev = edge; 1208 edge->edge_adj_next = from_lock->l_edge.edge_adj_next; 1209 edge->edge_adj_prev = &from_lock->l_edge; 1210 from_lock->l_edge.edge_adj_next = edge; 1211 1212 /* 1213 * put in in list of to vertex 1214 */ 1215 1216 to_lock->l_edge.edge_in_next->edge_in_prev = edge; 1217 edge->edge_in_next = to_lock->l_edge.edge_in_next; 1218 to_lock->l_edge.edge_in_next = edge; 1219 edge->edge_in_prev = &to_lock->l_edge; 1220 1221 1222 if (update_graph) { 1223 flk_update_proc_graph(edge, 0); 1224 return (0); 1225 } 1226 if (!check_cycle) { 1227 return (0); 1228 } 1229 1230 STACK_PUSH(vertex_stack, from_lock, l_stack); 1231 1232 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 1233 1234 STACK_POP(vertex_stack, l_stack); 1235 1236 for (ep = FIRST_ADJ(vertex); 1237 ep != HEAD(vertex); 1238 ep = NEXT_ADJ(ep)) { 1239 if (COLORED(ep->to_vertex)) 1240 continue; 1241 COLOR(ep->to_vertex); 1242 if (SAME_OWNER(ep->to_vertex, from_lock)) 1243 goto dead_lock; 1244 STACK_PUSH(vertex_stack, ep->to_vertex, l_stack); 1245 } 1246 } 1247 return (0); 1248 1249 dead_lock: 1250 1251 /* 1252 * remove all edges 1253 */ 1254 1255 ep = FIRST_ADJ(from_lock); 1256 1257 while (ep != HEAD(from_lock)) { 1258 IN_LIST_REMOVE(ep); 1259 from_lock->l_sedge = NEXT_ADJ(ep); 1260 ADJ_LIST_REMOVE(ep); 1261 flk_free_edge(ep); 1262 ep = from_lock->l_sedge; 1263 } 1264 return (EDEADLK); 1265 } 1266 1267 /* 1268 * Get an edge structure for representing the dependency between two locks. 1269 */ 1270 1271 static edge_t * 1272 flk_get_edge() 1273 { 1274 edge_t *ep; 1275 1276 ASSERT(flk_edge_cache != NULL); 1277 1278 ep = kmem_cache_alloc(flk_edge_cache, KM_SLEEP); 1279 edge_allocs++; 1280 return (ep); 1281 } 1282 1283 /* 1284 * Free the edge structure. 1285 */ 1286 1287 static void 1288 flk_free_edge(edge_t *ep) 1289 { 1290 edge_frees++; 1291 kmem_cache_free(flk_edge_cache, (void *)ep); 1292 } 1293 1294 /* 1295 * Check the relationship of request with lock and perform the 1296 * recomputation of dependencies, break lock if required, and return 1297 * 1 if request cannot have any more relationship with the next 1298 * active locks. 1299 * The 'lock' and 'request' are compared and in case of overlap we 1300 * delete the 'lock' and form new locks to represent the non-overlapped 1301 * portion of original 'lock'. This function has side effects such as 1302 * 'lock' will be freed, new locks will be added to the active list. 1303 */ 1304 1305 static int 1306 flk_relation(lock_descriptor_t *lock, lock_descriptor_t *request) 1307 { 1308 int lock_effect; 1309 lock_descriptor_t *lock1, *lock2; 1310 lock_descriptor_t *topology[3]; 1311 int nvertex = 0; 1312 int i; 1313 edge_t *ep; 1314 graph_t *gp = (lock->l_graph); 1315 1316 1317 CHECK_SLEEPING_LOCKS(gp); 1318 CHECK_ACTIVE_LOCKS(gp); 1319 1320 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1321 1322 topology[0] = topology[1] = topology[2] = NULL; 1323 1324 if (request->l_type == F_UNLCK) 1325 lock_effect = FLK_UNLOCK; 1326 else if (request->l_type == F_RDLCK && 1327 lock->l_type == F_WRLCK) 1328 lock_effect = FLK_DOWNGRADE; 1329 else if (request->l_type == F_WRLCK && 1330 lock->l_type == F_RDLCK) 1331 lock_effect = FLK_UPGRADE; 1332 else 1333 lock_effect = FLK_STAY_SAME; 1334 1335 if (lock->l_end < request->l_start) { 1336 if (lock->l_end == request->l_start - 1 && 1337 lock_effect == FLK_STAY_SAME) { 1338 topology[0] = request; 1339 request->l_start = lock->l_start; 1340 nvertex = 1; 1341 goto recompute; 1342 } else { 1343 return (0); 1344 } 1345 } 1346 1347 if (lock->l_start > request->l_end) { 1348 if (request->l_end == lock->l_start - 1 && 1349 lock_effect == FLK_STAY_SAME) { 1350 topology[0] = request; 1351 request->l_end = lock->l_end; 1352 nvertex = 1; 1353 goto recompute; 1354 } else { 1355 return (1); 1356 } 1357 } 1358 1359 if (request->l_end < lock->l_end) { 1360 if (request->l_start > lock->l_start) { 1361 if (lock_effect == FLK_STAY_SAME) { 1362 request->l_start = lock->l_start; 1363 request->l_end = lock->l_end; 1364 topology[0] = request; 1365 nvertex = 1; 1366 } else { 1367 lock1 = flk_get_lock(); 1368 lock2 = flk_get_lock(); 1369 COPY(lock1, lock); 1370 COPY(lock2, lock); 1371 lock1->l_start = lock->l_start; 1372 lock1->l_end = request->l_start - 1; 1373 lock2->l_start = request->l_end + 1; 1374 lock2->l_end = lock->l_end; 1375 topology[0] = lock1; 1376 topology[1] = lock2; 1377 topology[2] = request; 1378 nvertex = 3; 1379 } 1380 } else if (request->l_start < lock->l_start) { 1381 if (lock_effect == FLK_STAY_SAME) { 1382 request->l_end = lock->l_end; 1383 topology[0] = request; 1384 nvertex = 1; 1385 } else { 1386 lock1 = flk_get_lock(); 1387 COPY(lock1, lock); 1388 lock1->l_start = request->l_end + 1; 1389 topology[0] = lock1; 1390 topology[1] = request; 1391 nvertex = 2; 1392 } 1393 } else { 1394 if (lock_effect == FLK_STAY_SAME) { 1395 request->l_start = lock->l_start; 1396 request->l_end = lock->l_end; 1397 topology[0] = request; 1398 nvertex = 1; 1399 } else { 1400 lock1 = flk_get_lock(); 1401 COPY(lock1, lock); 1402 lock1->l_start = request->l_end + 1; 1403 topology[0] = lock1; 1404 topology[1] = request; 1405 nvertex = 2; 1406 } 1407 } 1408 } else if (request->l_end > lock->l_end) { 1409 if (request->l_start > lock->l_start) { 1410 if (lock_effect == FLK_STAY_SAME) { 1411 request->l_start = lock->l_start; 1412 topology[0] = request; 1413 nvertex = 1; 1414 } else { 1415 lock1 = flk_get_lock(); 1416 COPY(lock1, lock); 1417 lock1->l_end = request->l_start - 1; 1418 topology[0] = lock1; 1419 topology[1] = request; 1420 nvertex = 2; 1421 } 1422 } else if (request->l_start < lock->l_start) { 1423 topology[0] = request; 1424 nvertex = 1; 1425 } else { 1426 topology[0] = request; 1427 nvertex = 1; 1428 } 1429 } else { 1430 if (request->l_start > lock->l_start) { 1431 if (lock_effect == FLK_STAY_SAME) { 1432 request->l_start = lock->l_start; 1433 topology[0] = request; 1434 nvertex = 1; 1435 } else { 1436 lock1 = flk_get_lock(); 1437 COPY(lock1, lock); 1438 lock1->l_end = request->l_start - 1; 1439 topology[0] = lock1; 1440 topology[1] = request; 1441 nvertex = 2; 1442 } 1443 } else if (request->l_start < lock->l_start) { 1444 topology[0] = request; 1445 nvertex = 1; 1446 } else { 1447 if (lock_effect != FLK_UNLOCK) { 1448 topology[0] = request; 1449 nvertex = 1; 1450 } else { 1451 flk_delete_active_lock(lock, 0); 1452 flk_wakeup(lock, 1); 1453 flk_free_lock(lock); 1454 CHECK_SLEEPING_LOCKS(gp); 1455 CHECK_ACTIVE_LOCKS(gp); 1456 return (1); 1457 } 1458 } 1459 } 1460 1461 recompute: 1462 1463 /* 1464 * For unlock we don't send the 'request' to for recomputing 1465 * dependencies because no lock will add an edge to this. 1466 */ 1467 1468 if (lock_effect == FLK_UNLOCK) { 1469 topology[nvertex-1] = NULL; 1470 nvertex--; 1471 } 1472 for (i = 0; i < nvertex; i++) { 1473 topology[i]->l_state |= RECOMPUTE_LOCK; 1474 topology[i]->l_color = NO_COLOR; 1475 } 1476 1477 ASSERT(FIRST_ADJ(lock) == HEAD(lock)); 1478 1479 /* 1480 * we remove the adjacent edges for all vertices' to this vertex 1481 * 'lock'. 1482 */ 1483 1484 ep = FIRST_IN(lock); 1485 while (ep != HEAD(lock)) { 1486 ADJ_LIST_REMOVE(ep); 1487 ep = NEXT_IN(ep); 1488 } 1489 1490 flk_delete_active_lock(lock, 0); 1491 1492 /* We are ready for recomputing the dependencies now */ 1493 1494 flk_recompute_dependencies(lock, topology, nvertex, 1); 1495 1496 for (i = 0; i < nvertex; i++) { 1497 topology[i]->l_state &= ~RECOMPUTE_LOCK; 1498 topology[i]->l_color = NO_COLOR; 1499 } 1500 1501 1502 if (lock_effect == FLK_UNLOCK) { 1503 nvertex++; 1504 } 1505 for (i = 0; i < nvertex - 1; i++) { 1506 flk_insert_active_lock(topology[i]); 1507 } 1508 1509 1510 if (lock_effect == FLK_DOWNGRADE || lock_effect == FLK_UNLOCK) { 1511 flk_wakeup(lock, 0); 1512 } else { 1513 ep = FIRST_IN(lock); 1514 while (ep != HEAD(lock)) { 1515 lock->l_sedge = NEXT_IN(ep); 1516 IN_LIST_REMOVE(ep); 1517 flk_update_proc_graph(ep, 1); 1518 flk_free_edge(ep); 1519 ep = lock->l_sedge; 1520 } 1521 } 1522 flk_free_lock(lock); 1523 1524 CHECK_SLEEPING_LOCKS(gp); 1525 CHECK_ACTIVE_LOCKS(gp); 1526 return (0); 1527 } 1528 1529 /* 1530 * Insert a lock into the active queue. 1531 */ 1532 1533 static void 1534 flk_insert_active_lock(lock_descriptor_t *new_lock) 1535 { 1536 graph_t *gp = new_lock->l_graph; 1537 vnode_t *vp = new_lock->l_vnode; 1538 lock_descriptor_t *first_lock, *lock; 1539 1540 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1541 1542 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 1543 first_lock = lock; 1544 1545 if (first_lock != NULL) { 1546 for (; (lock->l_vnode == vp && 1547 lock->l_start < new_lock->l_start); lock = lock->l_next) 1548 ; 1549 } else { 1550 lock = ACTIVE_HEAD(gp); 1551 } 1552 1553 lock->l_prev->l_next = new_lock; 1554 new_lock->l_next = lock; 1555 new_lock->l_prev = lock->l_prev; 1556 lock->l_prev = new_lock; 1557 1558 if (first_lock == NULL || (new_lock->l_start <= first_lock->l_start)) { 1559 vp->v_filocks = (struct filock *)new_lock; 1560 } 1561 flk_set_state(new_lock, FLK_ACTIVE_STATE); 1562 new_lock->l_state |= ACTIVE_LOCK; 1563 1564 CHECK_ACTIVE_LOCKS(gp); 1565 CHECK_SLEEPING_LOCKS(gp); 1566 } 1567 1568 /* 1569 * Delete the active lock : Performs two functions depending on the 1570 * value of second parameter. One is to remove from the active lists 1571 * only and other is to both remove and free the lock. 1572 */ 1573 1574 static void 1575 flk_delete_active_lock(lock_descriptor_t *lock, int free_lock) 1576 { 1577 vnode_t *vp = lock->l_vnode; 1578 graph_t *gp = lock->l_graph; 1579 1580 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1581 if (free_lock) 1582 ASSERT(NO_DEPENDENTS(lock)); 1583 ASSERT(NOT_BLOCKED(lock)); 1584 ASSERT(IS_ACTIVE(lock)); 1585 1586 ASSERT((vp->v_filocks != NULL)); 1587 1588 if (vp->v_filocks == (struct filock *)lock) { 1589 vp->v_filocks = (struct filock *) 1590 ((lock->l_next->l_vnode == vp) ? lock->l_next : 1591 NULL); 1592 } 1593 lock->l_next->l_prev = lock->l_prev; 1594 lock->l_prev->l_next = lock->l_next; 1595 lock->l_next = lock->l_prev = NULL; 1596 flk_set_state(lock, FLK_DEAD_STATE); 1597 lock->l_state &= ~ACTIVE_LOCK; 1598 1599 if (free_lock) 1600 flk_free_lock(lock); 1601 CHECK_ACTIVE_LOCKS(gp); 1602 CHECK_SLEEPING_LOCKS(gp); 1603 } 1604 1605 /* 1606 * Insert into the sleep queue. 1607 */ 1608 1609 static void 1610 flk_insert_sleeping_lock(lock_descriptor_t *request) 1611 { 1612 graph_t *gp = request->l_graph; 1613 vnode_t *vp = request->l_vnode; 1614 lock_descriptor_t *lock; 1615 1616 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1617 ASSERT(IS_INITIAL(request)); 1618 1619 for (lock = gp->sleeping_locks.l_next; (lock != &gp->sleeping_locks && 1620 lock->l_vnode < vp); lock = lock->l_next) 1621 ; 1622 1623 lock->l_prev->l_next = request; 1624 request->l_prev = lock->l_prev; 1625 lock->l_prev = request; 1626 request->l_next = lock; 1627 flk_set_state(request, FLK_SLEEPING_STATE); 1628 request->l_state |= SLEEPING_LOCK; 1629 } 1630 1631 /* 1632 * Cancelling a sleeping lock implies removing a vertex from the 1633 * dependency graph and therefore we should recompute the dependencies 1634 * of all vertices that have a path to this vertex, w.r.t. all 1635 * vertices reachable from this vertex. 1636 */ 1637 1638 void 1639 flk_cancel_sleeping_lock(lock_descriptor_t *request, int remove_from_queue) 1640 { 1641 graph_t *gp = request->l_graph; 1642 vnode_t *vp = request->l_vnode; 1643 lock_descriptor_t **topology = NULL; 1644 edge_t *ep; 1645 lock_descriptor_t *vertex, *lock; 1646 int nvertex = 0; 1647 int i; 1648 lock_descriptor_t *vertex_stack; 1649 1650 STACK_INIT(vertex_stack); 1651 1652 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1653 /* 1654 * count number of vertex pointers that has to be allocated 1655 * All vertices that are reachable from request. 1656 */ 1657 1658 STACK_PUSH(vertex_stack, request, l_stack); 1659 1660 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 1661 STACK_POP(vertex_stack, l_stack); 1662 for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex); 1663 ep = NEXT_ADJ(ep)) { 1664 if (IS_RECOMPUTE(ep->to_vertex)) 1665 continue; 1666 ep->to_vertex->l_state |= RECOMPUTE_LOCK; 1667 STACK_PUSH(vertex_stack, ep->to_vertex, l_stack); 1668 nvertex++; 1669 } 1670 } 1671 1672 /* 1673 * allocate memory for holding the vertex pointers 1674 */ 1675 1676 if (nvertex) { 1677 topology = kmem_zalloc(nvertex * sizeof (lock_descriptor_t *), 1678 KM_SLEEP); 1679 } 1680 1681 /* 1682 * one more pass to actually store the vertices in the 1683 * allocated array. 1684 * We first check sleeping locks and then active locks 1685 * so that topology array will be in a topological 1686 * order. 1687 */ 1688 1689 nvertex = 0; 1690 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 1691 1692 if (lock) { 1693 do { 1694 if (IS_RECOMPUTE(lock)) { 1695 lock->l_index = nvertex; 1696 topology[nvertex++] = lock; 1697 } 1698 lock->l_color = NO_COLOR; 1699 lock = lock->l_next; 1700 } while (lock->l_vnode == vp); 1701 } 1702 1703 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 1704 1705 if (lock) { 1706 do { 1707 if (IS_RECOMPUTE(lock)) { 1708 lock->l_index = nvertex; 1709 topology[nvertex++] = lock; 1710 } 1711 lock->l_color = NO_COLOR; 1712 lock = lock->l_next; 1713 } while (lock->l_vnode == vp); 1714 } 1715 1716 /* 1717 * remove in and out edges of request 1718 * They are freed after updating proc_graph below. 1719 */ 1720 1721 for (ep = FIRST_IN(request); ep != HEAD(request); ep = NEXT_IN(ep)) { 1722 ADJ_LIST_REMOVE(ep); 1723 } 1724 1725 1726 if (remove_from_queue) 1727 REMOVE_SLEEP_QUEUE(request); 1728 1729 /* we are ready to recompute */ 1730 1731 flk_recompute_dependencies(request, topology, nvertex, 1); 1732 1733 ep = FIRST_ADJ(request); 1734 while (ep != HEAD(request)) { 1735 IN_LIST_REMOVE(ep); 1736 request->l_sedge = NEXT_ADJ(ep); 1737 ADJ_LIST_REMOVE(ep); 1738 flk_update_proc_graph(ep, 1); 1739 flk_free_edge(ep); 1740 ep = request->l_sedge; 1741 } 1742 1743 1744 /* 1745 * unset the RECOMPUTE flag in those vertices 1746 */ 1747 1748 for (i = 0; i < nvertex; i++) { 1749 topology[i]->l_state &= ~RECOMPUTE_LOCK; 1750 } 1751 1752 /* 1753 * free the topology 1754 */ 1755 if (nvertex) 1756 kmem_free((void *)topology, 1757 (nvertex * sizeof (lock_descriptor_t *))); 1758 /* 1759 * Possibility of some locks unblocked now 1760 */ 1761 1762 flk_wakeup(request, 0); 1763 1764 /* 1765 * we expect to have a correctly recomputed graph now. 1766 */ 1767 flk_set_state(request, FLK_DEAD_STATE); 1768 flk_free_lock(request); 1769 CHECK_SLEEPING_LOCKS(gp); 1770 CHECK_ACTIVE_LOCKS(gp); 1771 1772 } 1773 1774 /* 1775 * Uncoloring the graph is simply to increment the mark value of the graph 1776 * And only when wrap round takes place will we color all vertices in 1777 * the graph explicitly. 1778 */ 1779 1780 static void 1781 flk_graph_uncolor(graph_t *gp) 1782 { 1783 lock_descriptor_t *lock; 1784 1785 if (gp->mark == UINT_MAX) { 1786 gp->mark = 1; 1787 for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp); 1788 lock = lock->l_next) 1789 lock->l_color = 0; 1790 1791 for (lock = SLEEPING_HEAD(gp)->l_next; lock != SLEEPING_HEAD(gp); 1792 lock = lock->l_next) 1793 lock->l_color = 0; 1794 } else { 1795 gp->mark++; 1796 } 1797 } 1798 1799 /* 1800 * Wake up locks that are blocked on the given lock. 1801 */ 1802 1803 static void 1804 flk_wakeup(lock_descriptor_t *lock, int adj_list_remove) 1805 { 1806 edge_t *ep; 1807 graph_t *gp = lock->l_graph; 1808 lock_descriptor_t *lck; 1809 1810 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1811 if (NO_DEPENDENTS(lock)) 1812 return; 1813 ep = FIRST_IN(lock); 1814 do { 1815 /* 1816 * delete the edge from the adjacency list 1817 * of from vertex. if no more adjacent edges 1818 * for this vertex wake this process. 1819 */ 1820 lck = ep->from_vertex; 1821 if (adj_list_remove) 1822 ADJ_LIST_REMOVE(ep); 1823 flk_update_proc_graph(ep, 1); 1824 if (NOT_BLOCKED(lck)) { 1825 GRANT_WAKEUP(lck); 1826 } 1827 lock->l_sedge = NEXT_IN(ep); 1828 IN_LIST_REMOVE(ep); 1829 flk_free_edge(ep); 1830 ep = lock->l_sedge; 1831 } while (ep != HEAD(lock)); 1832 ASSERT(NO_DEPENDENTS(lock)); 1833 } 1834 1835 /* 1836 * The dependents of request, is checked for its dependency against the 1837 * locks in topology (called topology because the array is and should be in 1838 * topological order for this algorithm, if not in topological order the 1839 * inner loop below might add more edges than necessary. Topological ordering 1840 * of vertices satisfies the property that all edges will be from left to 1841 * right i.e., topology[i] can have an edge to topology[j], iff i<j) 1842 * If lock l1 in the dependent set of request is dependent (blocked by) 1843 * on lock l2 in topology but does not have a path to it, we add an edge 1844 * in the inner loop below. 1845 * 1846 * We don't want to add an edge between l1 and l2 if there exists 1847 * already a path from l1 to l2, so care has to be taken for those vertices 1848 * that have two paths to 'request'. These vertices are referred to here 1849 * as barrier locks. 1850 * 1851 * The barriers has to be found (those vertex that originally had two paths 1852 * to request) because otherwise we may end up adding edges unnecessarily 1853 * to vertices in topology, and thus barrier vertices can have an edge 1854 * to a vertex in topology as well a path to it. 1855 */ 1856 1857 static void 1858 flk_recompute_dependencies(lock_descriptor_t *request, 1859 lock_descriptor_t **topology, 1860 int nvertex, int update_graph) 1861 { 1862 lock_descriptor_t *vertex, *lock; 1863 graph_t *gp = request->l_graph; 1864 int i, count; 1865 int barrier_found = 0; 1866 edge_t *ep; 1867 lock_descriptor_t *vertex_stack; 1868 1869 STACK_INIT(vertex_stack); 1870 1871 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1872 if (nvertex == 0) 1873 return; 1874 flk_graph_uncolor(request->l_graph); 1875 barrier_found = flk_find_barriers(request); 1876 request->l_state |= RECOMPUTE_DONE; 1877 1878 STACK_PUSH(vertex_stack, request, l_stack); 1879 request->l_sedge = FIRST_IN(request); 1880 1881 1882 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 1883 if (vertex->l_state & RECOMPUTE_DONE) { 1884 count = 0; 1885 goto next_in_edge; 1886 } 1887 if (IS_BARRIER(vertex)) { 1888 /* decrement the barrier count */ 1889 if (vertex->l_index) { 1890 vertex->l_index--; 1891 /* this guy will be pushed again anyway ? */ 1892 STACK_POP(vertex_stack, l_stack); 1893 if (vertex->l_index == 0) { 1894 /* 1895 * barrier is over we can recompute 1896 * dependencies for this lock in the 1897 * next stack pop 1898 */ 1899 vertex->l_state &= ~BARRIER_LOCK; 1900 } 1901 continue; 1902 } 1903 } 1904 vertex->l_state |= RECOMPUTE_DONE; 1905 flk_graph_uncolor(gp); 1906 count = flk_color_reachables(vertex); 1907 for (i = 0; i < nvertex; i++) { 1908 lock = topology[i]; 1909 if (COLORED(lock)) 1910 continue; 1911 if (BLOCKS(lock, vertex)) { 1912 (void) flk_add_edge(vertex, lock, 1913 NO_CHECK_CYCLE, update_graph); 1914 COLOR(lock); 1915 count++; 1916 count += flk_color_reachables(lock); 1917 } 1918 1919 } 1920 1921 next_in_edge: 1922 if (count == nvertex || 1923 vertex->l_sedge == HEAD(vertex)) { 1924 /* prune the tree below this */ 1925 STACK_POP(vertex_stack, l_stack); 1926 vertex->l_state &= ~RECOMPUTE_DONE; 1927 /* update the barrier locks below this! */ 1928 if (vertex->l_sedge != HEAD(vertex) && barrier_found) { 1929 flk_graph_uncolor(gp); 1930 flk_update_barriers(vertex); 1931 } 1932 continue; 1933 } 1934 1935 ep = vertex->l_sedge; 1936 lock = ep->from_vertex; 1937 STACK_PUSH(vertex_stack, lock, l_stack); 1938 lock->l_sedge = FIRST_IN(lock); 1939 vertex->l_sedge = NEXT_IN(ep); 1940 } 1941 1942 } 1943 1944 /* 1945 * Color all reachable vertices from vertex that belongs to topology (here 1946 * those that have RECOMPUTE_LOCK set in their state) and yet uncolored. 1947 * 1948 * Note: we need to use a different stack_link l_stack1 because this is 1949 * called from flk_recompute_dependencies() that already uses a stack with 1950 * l_stack as stack_link. 1951 */ 1952 1953 static int 1954 flk_color_reachables(lock_descriptor_t *vertex) 1955 { 1956 lock_descriptor_t *ver, *lock; 1957 int count; 1958 edge_t *ep; 1959 lock_descriptor_t *vertex_stack; 1960 1961 STACK_INIT(vertex_stack); 1962 1963 STACK_PUSH(vertex_stack, vertex, l_stack1); 1964 count = 0; 1965 while ((ver = STACK_TOP(vertex_stack)) != NULL) { 1966 1967 STACK_POP(vertex_stack, l_stack1); 1968 for (ep = FIRST_ADJ(ver); ep != HEAD(ver); 1969 ep = NEXT_ADJ(ep)) { 1970 lock = ep->to_vertex; 1971 if (COLORED(lock)) 1972 continue; 1973 COLOR(lock); 1974 if (IS_RECOMPUTE(lock)) 1975 count++; 1976 STACK_PUSH(vertex_stack, lock, l_stack1); 1977 } 1978 1979 } 1980 return (count); 1981 } 1982 1983 /* 1984 * Called from flk_recompute_dependencies() this routine decrements 1985 * the barrier count of barrier vertices that are reachable from lock. 1986 */ 1987 1988 static void 1989 flk_update_barriers(lock_descriptor_t *lock) 1990 { 1991 lock_descriptor_t *vertex, *lck; 1992 edge_t *ep; 1993 lock_descriptor_t *vertex_stack; 1994 1995 STACK_INIT(vertex_stack); 1996 1997 STACK_PUSH(vertex_stack, lock, l_stack1); 1998 1999 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 2000 STACK_POP(vertex_stack, l_stack1); 2001 for (ep = FIRST_IN(vertex); ep != HEAD(vertex); 2002 ep = NEXT_IN(ep)) { 2003 lck = ep->from_vertex; 2004 if (COLORED(lck)) { 2005 if (IS_BARRIER(lck)) { 2006 ASSERT(lck->l_index > 0); 2007 lck->l_index--; 2008 if (lck->l_index == 0) 2009 lck->l_state &= ~BARRIER_LOCK; 2010 } 2011 continue; 2012 } 2013 COLOR(lck); 2014 if (IS_BARRIER(lck)) { 2015 ASSERT(lck->l_index > 0); 2016 lck->l_index--; 2017 if (lck->l_index == 0) 2018 lck->l_state &= ~BARRIER_LOCK; 2019 } 2020 STACK_PUSH(vertex_stack, lck, l_stack1); 2021 } 2022 } 2023 } 2024 2025 /* 2026 * Finds all vertices that are reachable from 'lock' more than once and 2027 * mark them as barrier vertices and increment their barrier count. 2028 * The barrier count is one minus the total number of paths from lock 2029 * to that vertex. 2030 */ 2031 2032 static int 2033 flk_find_barriers(lock_descriptor_t *lock) 2034 { 2035 lock_descriptor_t *vertex, *lck; 2036 int found = 0; 2037 edge_t *ep; 2038 lock_descriptor_t *vertex_stack; 2039 2040 STACK_INIT(vertex_stack); 2041 2042 STACK_PUSH(vertex_stack, lock, l_stack1); 2043 2044 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 2045 STACK_POP(vertex_stack, l_stack1); 2046 for (ep = FIRST_IN(vertex); ep != HEAD(vertex); 2047 ep = NEXT_IN(ep)) { 2048 lck = ep->from_vertex; 2049 if (COLORED(lck)) { 2050 /* this is a barrier */ 2051 lck->l_state |= BARRIER_LOCK; 2052 /* index will have barrier count */ 2053 lck->l_index++; 2054 if (!found) 2055 found = 1; 2056 continue; 2057 } 2058 COLOR(lck); 2059 lck->l_index = 0; 2060 STACK_PUSH(vertex_stack, lck, l_stack1); 2061 } 2062 } 2063 return (found); 2064 } 2065 2066 /* 2067 * Finds the first lock that is mainly responsible for blocking this 2068 * request. If there is no such lock, request->l_flock.l_type is set to 2069 * F_UNLCK. Otherwise, request->l_flock is filled in with the particulars 2070 * of the blocking lock. 2071 * 2072 * Note: It is possible a request is blocked by a sleeping lock because 2073 * of the fairness policy used in flk_process_request() to construct the 2074 * dependencies. (see comments before flk_process_request()). 2075 */ 2076 2077 static void 2078 flk_get_first_blocking_lock(lock_descriptor_t *request) 2079 { 2080 graph_t *gp = request->l_graph; 2081 vnode_t *vp = request->l_vnode; 2082 lock_descriptor_t *lock, *blocker; 2083 2084 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 2085 blocker = NULL; 2086 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 2087 2088 if (lock) { 2089 do { 2090 if (BLOCKS(lock, request)) { 2091 blocker = lock; 2092 break; 2093 } 2094 lock = lock->l_next; 2095 } while (lock->l_vnode == vp); 2096 } 2097 2098 if (blocker) { 2099 report_blocker(blocker, request); 2100 } else 2101 request->l_flock.l_type = F_UNLCK; 2102 } 2103 2104 /* 2105 * Get the graph_t structure associated with a vnode. 2106 * If 'initialize' is non-zero, and the graph_t structure for this vnode has 2107 * not yet been initialized, then a new element is allocated and returned. 2108 */ 2109 graph_t * 2110 flk_get_lock_graph(vnode_t *vp, int initialize) 2111 { 2112 graph_t *gp; 2113 graph_t *gp_alloc = NULL; 2114 int index = HASH_INDEX(vp); 2115 2116 if (initialize == FLK_USE_GRAPH) { 2117 mutex_enter(&flock_lock); 2118 gp = lock_graph[index]; 2119 mutex_exit(&flock_lock); 2120 return (gp); 2121 } 2122 2123 ASSERT(initialize == FLK_INIT_GRAPH); 2124 2125 if (lock_graph[index] == NULL) { 2126 2127 gp_alloc = kmem_zalloc(sizeof (graph_t), KM_SLEEP); 2128 2129 /* Initialize the graph */ 2130 2131 gp_alloc->active_locks.l_next = 2132 gp_alloc->active_locks.l_prev = 2133 (lock_descriptor_t *)ACTIVE_HEAD(gp_alloc); 2134 gp_alloc->sleeping_locks.l_next = 2135 gp_alloc->sleeping_locks.l_prev = 2136 (lock_descriptor_t *)SLEEPING_HEAD(gp_alloc); 2137 gp_alloc->index = index; 2138 mutex_init(&gp_alloc->gp_mutex, NULL, MUTEX_DEFAULT, NULL); 2139 } 2140 2141 mutex_enter(&flock_lock); 2142 2143 gp = lock_graph[index]; 2144 2145 /* Recheck the value within flock_lock */ 2146 if (gp == NULL) { 2147 struct flock_globals *fg; 2148 2149 /* We must have previously allocated the graph_t structure */ 2150 ASSERT(gp_alloc != NULL); 2151 lock_graph[index] = gp = gp_alloc; 2152 /* 2153 * The lockmgr status is only needed if KLM is loaded. 2154 */ 2155 if (flock_zone_key != ZONE_KEY_UNINITIALIZED) { 2156 fg = flk_get_globals(); 2157 fg->lockmgr_status[index] = fg->flk_lockmgr_status; 2158 } 2159 } 2160 2161 mutex_exit(&flock_lock); 2162 2163 if ((gp_alloc != NULL) && (gp != gp_alloc)) { 2164 /* There was a race to allocate the graph_t and we lost */ 2165 mutex_destroy(&gp_alloc->gp_mutex); 2166 kmem_free(gp_alloc, sizeof (graph_t)); 2167 } 2168 2169 return (gp); 2170 } 2171 2172 /* 2173 * PSARC case 1997/292 2174 */ 2175 int 2176 cl_flk_has_remote_locks_for_nlmid(vnode_t *vp, int nlmid) 2177 { 2178 lock_descriptor_t *lock; 2179 int result = 0; 2180 graph_t *gp; 2181 int lock_nlmid; 2182 2183 /* 2184 * Check to see if node is booted as a cluster. If not, return. 2185 */ 2186 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { 2187 return (0); 2188 } 2189 2190 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH); 2191 if (gp == NULL) { 2192 return (0); 2193 } 2194 2195 mutex_enter(&gp->gp_mutex); 2196 2197 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 2198 2199 if (lock) { 2200 while (lock->l_vnode == vp) { 2201 /* get NLM id from sysid */ 2202 lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 2203 2204 /* 2205 * If NLM server request _and_ nlmid of lock matches 2206 * nlmid of argument, then we've found a remote lock. 2207 */ 2208 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { 2209 result = 1; 2210 goto done; 2211 } 2212 lock = lock->l_next; 2213 } 2214 } 2215 2216 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 2217 2218 if (lock) { 2219 while (lock->l_vnode == vp) { 2220 /* get NLM id from sysid */ 2221 lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 2222 2223 /* 2224 * If NLM server request _and_ nlmid of lock matches 2225 * nlmid of argument, then we've found a remote lock. 2226 */ 2227 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { 2228 result = 1; 2229 goto done; 2230 } 2231 lock = lock->l_next; 2232 } 2233 } 2234 2235 done: 2236 mutex_exit(&gp->gp_mutex); 2237 return (result); 2238 } 2239 2240 /* 2241 * Determine whether there are any locks for the given vnode with a remote 2242 * sysid. Returns zero if not, non-zero if there are. 2243 * 2244 * Note that the return value from this function is potentially invalid 2245 * once it has been returned. The caller is responsible for providing its 2246 * own synchronization mechanism to ensure that the return value is useful 2247 * (e.g., see nfs_lockcompletion()). 2248 */ 2249 int 2250 flk_has_remote_locks(vnode_t *vp) 2251 { 2252 lock_descriptor_t *lock; 2253 int result = 0; 2254 graph_t *gp; 2255 2256 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH); 2257 if (gp == NULL) { 2258 return (0); 2259 } 2260 2261 mutex_enter(&gp->gp_mutex); 2262 2263 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 2264 2265 if (lock) { 2266 while (lock->l_vnode == vp) { 2267 if (IS_REMOTE(lock)) { 2268 result = 1; 2269 goto done; 2270 } 2271 lock = lock->l_next; 2272 } 2273 } 2274 2275 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 2276 2277 if (lock) { 2278 while (lock->l_vnode == vp) { 2279 if (IS_REMOTE(lock)) { 2280 result = 1; 2281 goto done; 2282 } 2283 lock = lock->l_next; 2284 } 2285 } 2286 2287 done: 2288 mutex_exit(&gp->gp_mutex); 2289 return (result); 2290 } 2291 2292 /* 2293 * Determine if there are any locks owned by the given sysid. 2294 * Returns zero if not, non-zero if there are. Note that this return code 2295 * could be derived from flk_get_{sleeping,active}_locks, but this routine 2296 * avoids all the memory allocations of those routines. 2297 * 2298 * This routine has the same synchronization issues as 2299 * flk_has_remote_locks. 2300 */ 2301 2302 int 2303 flk_sysid_has_locks(int sysid, int lck_type) 2304 { 2305 int has_locks = 0; 2306 lock_descriptor_t *lock; 2307 graph_t *gp; 2308 int i; 2309 2310 for (i = 0; i < HASH_SIZE && !has_locks; i++) { 2311 mutex_enter(&flock_lock); 2312 gp = lock_graph[i]; 2313 mutex_exit(&flock_lock); 2314 if (gp == NULL) { 2315 continue; 2316 } 2317 2318 mutex_enter(&gp->gp_mutex); 2319 2320 if (lck_type & FLK_QUERY_ACTIVE) { 2321 for (lock = ACTIVE_HEAD(gp)->l_next; 2322 lock != ACTIVE_HEAD(gp) && !has_locks; 2323 lock = lock->l_next) { 2324 if (lock->l_flock.l_sysid == sysid) 2325 has_locks = 1; 2326 } 2327 } 2328 2329 if (lck_type & FLK_QUERY_SLEEPING) { 2330 for (lock = SLEEPING_HEAD(gp)->l_next; 2331 lock != SLEEPING_HEAD(gp) && !has_locks; 2332 lock = lock->l_next) { 2333 if (lock->l_flock.l_sysid == sysid) 2334 has_locks = 1; 2335 } 2336 } 2337 mutex_exit(&gp->gp_mutex); 2338 } 2339 2340 return (has_locks); 2341 } 2342 2343 2344 /* 2345 * PSARC case 1997/292 2346 * 2347 * Requires: "sysid" is a pair [nlmid, sysid]. The lower half is 16-bit 2348 * quantity, the real sysid generated by the NLM server; the upper half 2349 * identifies the node of the cluster where the NLM server ran. 2350 * This routine is only called by an NLM server running in a cluster. 2351 * Effects: Remove all locks held on behalf of the client identified 2352 * by "sysid." 2353 */ 2354 void 2355 cl_flk_remove_locks_by_sysid(int sysid) 2356 { 2357 graph_t *gp; 2358 int i; 2359 lock_descriptor_t *lock, *nlock; 2360 2361 /* 2362 * Check to see if node is booted as a cluster. If not, return. 2363 */ 2364 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { 2365 return; 2366 } 2367 2368 ASSERT(sysid != 0); 2369 for (i = 0; i < HASH_SIZE; i++) { 2370 mutex_enter(&flock_lock); 2371 gp = lock_graph[i]; 2372 mutex_exit(&flock_lock); 2373 2374 if (gp == NULL) 2375 continue; 2376 2377 mutex_enter(&gp->gp_mutex); /* get mutex on lock graph */ 2378 2379 /* signal sleeping requests so that they bail out */ 2380 lock = SLEEPING_HEAD(gp)->l_next; 2381 while (lock != SLEEPING_HEAD(gp)) { 2382 nlock = lock->l_next; 2383 if (lock->l_flock.l_sysid == sysid) { 2384 INTERRUPT_WAKEUP(lock); 2385 } 2386 lock = nlock; 2387 } 2388 2389 /* delete active locks */ 2390 lock = ACTIVE_HEAD(gp)->l_next; 2391 while (lock != ACTIVE_HEAD(gp)) { 2392 nlock = lock->l_next; 2393 if (lock->l_flock.l_sysid == sysid) { 2394 flk_delete_active_lock(lock, 0); 2395 flk_wakeup(lock, 1); 2396 flk_free_lock(lock); 2397 } 2398 lock = nlock; 2399 } 2400 mutex_exit(&gp->gp_mutex); /* release mutex on lock graph */ 2401 } 2402 } 2403 2404 /* 2405 * Delete all locks in the system that belongs to the sysid of the request. 2406 */ 2407 2408 static void 2409 flk_delete_locks_by_sysid(lock_descriptor_t *request) 2410 { 2411 int sysid = request->l_flock.l_sysid; 2412 lock_descriptor_t *lock, *nlock; 2413 graph_t *gp; 2414 int i; 2415 2416 ASSERT(MUTEX_HELD(&request->l_graph->gp_mutex)); 2417 ASSERT(sysid != 0); 2418 2419 mutex_exit(&request->l_graph->gp_mutex); 2420 2421 for (i = 0; i < HASH_SIZE; i++) { 2422 mutex_enter(&flock_lock); 2423 gp = lock_graph[i]; 2424 mutex_exit(&flock_lock); 2425 2426 if (gp == NULL) 2427 continue; 2428 2429 mutex_enter(&gp->gp_mutex); 2430 2431 /* signal sleeping requests so that they bail out */ 2432 lock = SLEEPING_HEAD(gp)->l_next; 2433 while (lock != SLEEPING_HEAD(gp)) { 2434 nlock = lock->l_next; 2435 if (lock->l_flock.l_sysid == sysid) { 2436 INTERRUPT_WAKEUP(lock); 2437 } 2438 lock = nlock; 2439 } 2440 2441 /* delete active locks */ 2442 lock = ACTIVE_HEAD(gp)->l_next; 2443 while (lock != ACTIVE_HEAD(gp)) { 2444 nlock = lock->l_next; 2445 if (lock->l_flock.l_sysid == sysid) { 2446 flk_delete_active_lock(lock, 0); 2447 flk_wakeup(lock, 1); 2448 flk_free_lock(lock); 2449 } 2450 lock = nlock; 2451 } 2452 mutex_exit(&gp->gp_mutex); 2453 } 2454 2455 mutex_enter(&request->l_graph->gp_mutex); 2456 } 2457 2458 /* 2459 * Clustering: Deletes PXFS locks 2460 * Effects: Delete all locks on files in the given file system and with the 2461 * given PXFS id. 2462 */ 2463 void 2464 cl_flk_delete_pxfs_locks(struct vfs *vfsp, int pxfsid) 2465 { 2466 lock_descriptor_t *lock, *nlock; 2467 graph_t *gp; 2468 int i; 2469 2470 for (i = 0; i < HASH_SIZE; i++) { 2471 mutex_enter(&flock_lock); 2472 gp = lock_graph[i]; 2473 mutex_exit(&flock_lock); 2474 2475 if (gp == NULL) 2476 continue; 2477 2478 mutex_enter(&gp->gp_mutex); 2479 2480 /* signal sleeping requests so that they bail out */ 2481 lock = SLEEPING_HEAD(gp)->l_next; 2482 while (lock != SLEEPING_HEAD(gp)) { 2483 nlock = lock->l_next; 2484 if (lock->l_vnode->v_vfsp == vfsp) { 2485 ASSERT(IS_PXFS(lock)); 2486 if (GETPXFSID(lock->l_flock.l_sysid) == 2487 pxfsid) { 2488 flk_set_state(lock, 2489 FLK_CANCELLED_STATE); 2490 flk_cancel_sleeping_lock(lock, 1); 2491 } 2492 } 2493 lock = nlock; 2494 } 2495 2496 /* delete active locks */ 2497 lock = ACTIVE_HEAD(gp)->l_next; 2498 while (lock != ACTIVE_HEAD(gp)) { 2499 nlock = lock->l_next; 2500 if (lock->l_vnode->v_vfsp == vfsp) { 2501 ASSERT(IS_PXFS(lock)); 2502 if (GETPXFSID(lock->l_flock.l_sysid) == 2503 pxfsid) { 2504 flk_delete_active_lock(lock, 0); 2505 flk_wakeup(lock, 1); 2506 flk_free_lock(lock); 2507 } 2508 } 2509 lock = nlock; 2510 } 2511 mutex_exit(&gp->gp_mutex); 2512 } 2513 } 2514 2515 /* 2516 * Search for a sleeping lock manager lock which matches exactly this lock 2517 * request; if one is found, fake a signal to cancel it. 2518 * 2519 * Return 1 if a matching lock was found, 0 otherwise. 2520 */ 2521 2522 static int 2523 flk_canceled(lock_descriptor_t *request) 2524 { 2525 lock_descriptor_t *lock, *nlock; 2526 graph_t *gp = request->l_graph; 2527 vnode_t *vp = request->l_vnode; 2528 2529 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 2530 ASSERT(IS_LOCKMGR(request)); 2531 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 2532 2533 if (lock) { 2534 while (lock->l_vnode == vp) { 2535 nlock = lock->l_next; 2536 if (SAME_OWNER(lock, request) && 2537 lock->l_start == request->l_start && 2538 lock->l_end == request->l_end) { 2539 INTERRUPT_WAKEUP(lock); 2540 return (1); 2541 } 2542 lock = nlock; 2543 } 2544 } 2545 return (0); 2546 } 2547 2548 /* 2549 * Remove all the locks for the vnode belonging to the given pid and sysid. 2550 */ 2551 2552 void 2553 cleanlocks(vnode_t *vp, pid_t pid, int sysid) 2554 { 2555 graph_t *gp; 2556 lock_descriptor_t *lock, *nlock; 2557 lock_descriptor_t *link_stack; 2558 2559 STACK_INIT(link_stack); 2560 2561 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH); 2562 2563 if (gp == NULL) 2564 return; 2565 mutex_enter(&gp->gp_mutex); 2566 2567 CHECK_SLEEPING_LOCKS(gp); 2568 CHECK_ACTIVE_LOCKS(gp); 2569 2570 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 2571 2572 if (lock) { 2573 do { 2574 nlock = lock->l_next; 2575 if ((lock->l_flock.l_pid == pid || 2576 pid == IGN_PID) && 2577 lock->l_flock.l_sysid == sysid) { 2578 CANCEL_WAKEUP(lock); 2579 } 2580 lock = nlock; 2581 } while (lock->l_vnode == vp); 2582 } 2583 2584 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 2585 2586 if (lock) { 2587 do { 2588 nlock = lock->l_next; 2589 if ((lock->l_flock.l_pid == pid || 2590 pid == IGN_PID) && 2591 lock->l_flock.l_sysid == sysid) { 2592 flk_delete_active_lock(lock, 0); 2593 STACK_PUSH(link_stack, lock, l_stack); 2594 } 2595 lock = nlock; 2596 } while (lock->l_vnode == vp); 2597 } 2598 2599 while ((lock = STACK_TOP(link_stack)) != NULL) { 2600 STACK_POP(link_stack, l_stack); 2601 flk_wakeup(lock, 1); 2602 flk_free_lock(lock); 2603 } 2604 2605 CHECK_SLEEPING_LOCKS(gp); 2606 CHECK_ACTIVE_LOCKS(gp); 2607 CHECK_OWNER_LOCKS(gp, pid, sysid, vp); 2608 mutex_exit(&gp->gp_mutex); 2609 } 2610 2611 2612 /* 2613 * Called from 'fs' read and write routines for files that have mandatory 2614 * locking enabled. 2615 */ 2616 2617 int 2618 chklock( 2619 struct vnode *vp, 2620 int iomode, 2621 u_offset_t offset, 2622 ssize_t len, 2623 int fmode, 2624 caller_context_t *ct) 2625 { 2626 register int i; 2627 struct flock64 bf; 2628 int error = 0; 2629 2630 bf.l_type = (iomode & FWRITE) ? F_WRLCK : F_RDLCK; 2631 bf.l_whence = 0; 2632 bf.l_start = offset; 2633 bf.l_len = len; 2634 if (ct == NULL) { 2635 bf.l_pid = curproc->p_pid; 2636 bf.l_sysid = 0; 2637 } else { 2638 bf.l_pid = ct->cc_pid; 2639 bf.l_sysid = ct->cc_sysid; 2640 } 2641 i = (fmode & (FNDELAY|FNONBLOCK)) ? INOFLCK : INOFLCK|SLPFLCK; 2642 if ((i = reclock(vp, &bf, i, 0, offset, NULL)) != 0 || 2643 bf.l_type != F_UNLCK) 2644 error = i ? i : EAGAIN; 2645 return (error); 2646 } 2647 2648 /* 2649 * convoff - converts the given data (start, whence) to the 2650 * given whence. 2651 */ 2652 int 2653 convoff(vp, lckdat, whence, offset) 2654 struct vnode *vp; 2655 struct flock64 *lckdat; 2656 int whence; 2657 offset_t offset; 2658 { 2659 int error; 2660 struct vattr vattr; 2661 2662 if ((lckdat->l_whence == 2) || (whence == 2)) { 2663 vattr.va_mask = AT_SIZE; 2664 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) 2665 return (error); 2666 } 2667 2668 switch (lckdat->l_whence) { 2669 case 1: 2670 lckdat->l_start += offset; 2671 break; 2672 case 2: 2673 lckdat->l_start += vattr.va_size; 2674 /* FALLTHRU */ 2675 case 0: 2676 break; 2677 default: 2678 return (EINVAL); 2679 } 2680 2681 if (lckdat->l_start < 0) 2682 return (EINVAL); 2683 2684 switch (whence) { 2685 case 1: 2686 lckdat->l_start -= offset; 2687 break; 2688 case 2: 2689 lckdat->l_start -= vattr.va_size; 2690 /* FALLTHRU */ 2691 case 0: 2692 break; 2693 default: 2694 return (EINVAL); 2695 } 2696 2697 lckdat->l_whence = (short)whence; 2698 return (0); 2699 } 2700 2701 2702 /* proc_graph function definitions */ 2703 2704 /* 2705 * Function checks for deadlock due to the new 'lock'. If deadlock found 2706 * edges of this lock are freed and returned. 2707 */ 2708 2709 static int 2710 flk_check_deadlock(lock_descriptor_t *lock) 2711 { 2712 proc_vertex_t *start_vertex, *pvertex; 2713 proc_vertex_t *dvertex; 2714 proc_edge_t *pep, *ppep; 2715 edge_t *ep, *nep; 2716 proc_vertex_t *process_stack; 2717 2718 STACK_INIT(process_stack); 2719 2720 mutex_enter(&flock_lock); 2721 start_vertex = flk_get_proc_vertex(lock); 2722 ASSERT(start_vertex != NULL); 2723 2724 /* construct the edges from this process to other processes */ 2725 2726 ep = FIRST_ADJ(lock); 2727 while (ep != HEAD(lock)) { 2728 proc_vertex_t *adj_proc; 2729 2730 adj_proc = flk_get_proc_vertex(ep->to_vertex); 2731 for (pep = start_vertex->edge; pep != NULL; pep = pep->next) { 2732 if (pep->to_proc == adj_proc) { 2733 ASSERT(pep->refcount); 2734 pep->refcount++; 2735 break; 2736 } 2737 } 2738 if (pep == NULL) { 2739 pep = flk_get_proc_edge(); 2740 pep->to_proc = adj_proc; 2741 pep->refcount = 1; 2742 adj_proc->incount++; 2743 pep->next = start_vertex->edge; 2744 start_vertex->edge = pep; 2745 } 2746 ep = NEXT_ADJ(ep); 2747 } 2748 2749 ep = FIRST_IN(lock); 2750 2751 while (ep != HEAD(lock)) { 2752 proc_vertex_t *in_proc; 2753 2754 in_proc = flk_get_proc_vertex(ep->from_vertex); 2755 2756 for (pep = in_proc->edge; pep != NULL; pep = pep->next) { 2757 if (pep->to_proc == start_vertex) { 2758 ASSERT(pep->refcount); 2759 pep->refcount++; 2760 break; 2761 } 2762 } 2763 if (pep == NULL) { 2764 pep = flk_get_proc_edge(); 2765 pep->to_proc = start_vertex; 2766 pep->refcount = 1; 2767 start_vertex->incount++; 2768 pep->next = in_proc->edge; 2769 in_proc->edge = pep; 2770 } 2771 ep = NEXT_IN(ep); 2772 } 2773 2774 if (start_vertex->incount == 0) { 2775 mutex_exit(&flock_lock); 2776 return (0); 2777 } 2778 2779 flk_proc_graph_uncolor(); 2780 2781 start_vertex->p_sedge = start_vertex->edge; 2782 2783 STACK_PUSH(process_stack, start_vertex, p_stack); 2784 2785 while ((pvertex = STACK_TOP(process_stack)) != NULL) { 2786 for (pep = pvertex->p_sedge; pep != NULL; pep = pep->next) { 2787 dvertex = pep->to_proc; 2788 if (!PROC_ARRIVED(dvertex)) { 2789 STACK_PUSH(process_stack, dvertex, p_stack); 2790 dvertex->p_sedge = dvertex->edge; 2791 PROC_ARRIVE(pvertex); 2792 pvertex->p_sedge = pep->next; 2793 break; 2794 } 2795 if (!PROC_DEPARTED(dvertex)) 2796 goto deadlock; 2797 } 2798 if (pep == NULL) { 2799 PROC_DEPART(pvertex); 2800 STACK_POP(process_stack, p_stack); 2801 } 2802 } 2803 mutex_exit(&flock_lock); 2804 return (0); 2805 2806 deadlock: 2807 2808 /* we remove all lock edges and proc edges */ 2809 2810 ep = FIRST_ADJ(lock); 2811 while (ep != HEAD(lock)) { 2812 proc_vertex_t *adj_proc; 2813 adj_proc = flk_get_proc_vertex(ep->to_vertex); 2814 nep = NEXT_ADJ(ep); 2815 IN_LIST_REMOVE(ep); 2816 ADJ_LIST_REMOVE(ep); 2817 flk_free_edge(ep); 2818 ppep = start_vertex->edge; 2819 for (pep = start_vertex->edge; pep != NULL; ppep = pep, 2820 pep = ppep->next) { 2821 if (pep->to_proc == adj_proc) { 2822 pep->refcount--; 2823 if (pep->refcount == 0) { 2824 if (pep == ppep) { 2825 start_vertex->edge = pep->next; 2826 } else { 2827 ppep->next = pep->next; 2828 } 2829 adj_proc->incount--; 2830 flk_proc_release(adj_proc); 2831 flk_free_proc_edge(pep); 2832 } 2833 break; 2834 } 2835 } 2836 ep = nep; 2837 } 2838 ep = FIRST_IN(lock); 2839 while (ep != HEAD(lock)) { 2840 proc_vertex_t *in_proc; 2841 in_proc = flk_get_proc_vertex(ep->from_vertex); 2842 nep = NEXT_IN(ep); 2843 IN_LIST_REMOVE(ep); 2844 ADJ_LIST_REMOVE(ep); 2845 flk_free_edge(ep); 2846 ppep = in_proc->edge; 2847 for (pep = in_proc->edge; pep != NULL; ppep = pep, 2848 pep = ppep->next) { 2849 if (pep->to_proc == start_vertex) { 2850 pep->refcount--; 2851 if (pep->refcount == 0) { 2852 if (pep == ppep) { 2853 in_proc->edge = pep->next; 2854 } else { 2855 ppep->next = pep->next; 2856 } 2857 start_vertex->incount--; 2858 flk_proc_release(in_proc); 2859 flk_free_proc_edge(pep); 2860 } 2861 break; 2862 } 2863 } 2864 ep = nep; 2865 } 2866 flk_proc_release(start_vertex); 2867 mutex_exit(&flock_lock); 2868 return (1); 2869 } 2870 2871 /* 2872 * Get a proc vertex. If lock's pvertex value gets a correct proc vertex 2873 * from the list we return that, otherwise we allocate one. If necessary, 2874 * we grow the list of vertices also. 2875 */ 2876 2877 static proc_vertex_t * 2878 flk_get_proc_vertex(lock_descriptor_t *lock) 2879 { 2880 int i; 2881 proc_vertex_t *pv; 2882 proc_vertex_t **palloc; 2883 2884 ASSERT(MUTEX_HELD(&flock_lock)); 2885 if (lock->pvertex != -1) { 2886 ASSERT(lock->pvertex >= 0); 2887 pv = pgraph.proc[lock->pvertex]; 2888 if (pv != NULL && PROC_SAME_OWNER(lock, pv)) { 2889 return (pv); 2890 } 2891 } 2892 for (i = 0; i < pgraph.gcount; i++) { 2893 pv = pgraph.proc[i]; 2894 if (pv != NULL && PROC_SAME_OWNER(lock, pv)) { 2895 lock->pvertex = pv->index = i; 2896 return (pv); 2897 } 2898 } 2899 pv = kmem_zalloc(sizeof (struct proc_vertex), KM_SLEEP); 2900 pv->pid = lock->l_flock.l_pid; 2901 pv->sysid = lock->l_flock.l_sysid; 2902 flk_proc_vertex_allocs++; 2903 if (pgraph.free != 0) { 2904 for (i = 0; i < pgraph.gcount; i++) { 2905 if (pgraph.proc[i] == NULL) { 2906 pgraph.proc[i] = pv; 2907 lock->pvertex = pv->index = i; 2908 pgraph.free--; 2909 return (pv); 2910 } 2911 } 2912 } 2913 palloc = kmem_zalloc((pgraph.gcount + PROC_CHUNK) * 2914 sizeof (proc_vertex_t *), KM_SLEEP); 2915 2916 if (pgraph.proc) { 2917 bcopy(pgraph.proc, palloc, 2918 pgraph.gcount * sizeof (proc_vertex_t *)); 2919 2920 kmem_free(pgraph.proc, 2921 pgraph.gcount * sizeof (proc_vertex_t *)); 2922 } 2923 pgraph.proc = palloc; 2924 pgraph.free += (PROC_CHUNK - 1); 2925 pv->index = lock->pvertex = pgraph.gcount; 2926 pgraph.gcount += PROC_CHUNK; 2927 pgraph.proc[pv->index] = pv; 2928 return (pv); 2929 } 2930 2931 /* 2932 * Allocate a proc edge. 2933 */ 2934 2935 static proc_edge_t * 2936 flk_get_proc_edge() 2937 { 2938 proc_edge_t *pep; 2939 2940 pep = kmem_zalloc(sizeof (proc_edge_t), KM_SLEEP); 2941 flk_proc_edge_allocs++; 2942 return (pep); 2943 } 2944 2945 /* 2946 * Free the proc edge. Called whenever its reference count goes to zero. 2947 */ 2948 2949 static void 2950 flk_free_proc_edge(proc_edge_t *pep) 2951 { 2952 ASSERT(pep->refcount == 0); 2953 kmem_free((void *)pep, sizeof (proc_edge_t)); 2954 flk_proc_edge_frees++; 2955 } 2956 2957 /* 2958 * Color the graph explicitly done only when the mark value hits max value. 2959 */ 2960 2961 static void 2962 flk_proc_graph_uncolor() 2963 { 2964 int i; 2965 2966 if (pgraph.mark == UINT_MAX) { 2967 for (i = 0; i < pgraph.gcount; i++) 2968 if (pgraph.proc[i] != NULL) { 2969 pgraph.proc[i]->atime = 0; 2970 pgraph.proc[i]->dtime = 0; 2971 } 2972 pgraph.mark = 1; 2973 } else { 2974 pgraph.mark++; 2975 } 2976 } 2977 2978 /* 2979 * Release the proc vertex iff both there are no in edges and out edges 2980 */ 2981 2982 static void 2983 flk_proc_release(proc_vertex_t *proc) 2984 { 2985 ASSERT(MUTEX_HELD(&flock_lock)); 2986 if (proc->edge == NULL && proc->incount == 0) { 2987 pgraph.proc[proc->index] = NULL; 2988 pgraph.free++; 2989 kmem_free(proc, sizeof (proc_vertex_t)); 2990 flk_proc_vertex_frees++; 2991 } 2992 } 2993 2994 /* 2995 * Updates process graph to reflect change in a lock_graph. 2996 * Note: We should call this function only after we have a correctly 2997 * recomputed lock graph. Otherwise we might miss a deadlock detection. 2998 * eg: in function flk_relation() we call this function after flk_recompute_ 2999 * dependencies() otherwise if a process tries to lock a vnode hashed 3000 * into another graph it might sleep for ever. 3001 */ 3002 3003 static void 3004 flk_update_proc_graph(edge_t *ep, int delete) 3005 { 3006 proc_vertex_t *toproc, *fromproc; 3007 proc_edge_t *pep, *prevpep; 3008 3009 mutex_enter(&flock_lock); 3010 toproc = flk_get_proc_vertex(ep->to_vertex); 3011 fromproc = flk_get_proc_vertex(ep->from_vertex); 3012 3013 if (!delete) 3014 goto add; 3015 pep = prevpep = fromproc->edge; 3016 3017 ASSERT(pep != NULL); 3018 while (pep != NULL) { 3019 if (pep->to_proc == toproc) { 3020 ASSERT(pep->refcount > 0); 3021 pep->refcount--; 3022 if (pep->refcount == 0) { 3023 if (pep == prevpep) { 3024 fromproc->edge = pep->next; 3025 } else { 3026 prevpep->next = pep->next; 3027 } 3028 toproc->incount--; 3029 flk_proc_release(toproc); 3030 flk_free_proc_edge(pep); 3031 } 3032 break; 3033 } 3034 prevpep = pep; 3035 pep = pep->next; 3036 } 3037 flk_proc_release(fromproc); 3038 mutex_exit(&flock_lock); 3039 return; 3040 add: 3041 3042 pep = fromproc->edge; 3043 3044 while (pep != NULL) { 3045 if (pep->to_proc == toproc) { 3046 ASSERT(pep->refcount > 0); 3047 pep->refcount++; 3048 break; 3049 } 3050 pep = pep->next; 3051 } 3052 if (pep == NULL) { 3053 pep = flk_get_proc_edge(); 3054 pep->to_proc = toproc; 3055 pep->refcount = 1; 3056 toproc->incount++; 3057 pep->next = fromproc->edge; 3058 fromproc->edge = pep; 3059 } 3060 mutex_exit(&flock_lock); 3061 } 3062 3063 /* 3064 * Set the control status for lock manager requests. 3065 * 3066 */ 3067 3068 /* 3069 * PSARC case 1997/292 3070 * 3071 * Requires: "nlmid" must be >= 1 and <= clconf_maximum_nodeid(). 3072 * Effects: Set the state of the NLM server identified by "nlmid" 3073 * in the NLM registry to state "nlm_state." 3074 * Raises exception no_such_nlm if "nlmid" doesn't identify a known 3075 * NLM server to this LLM. 3076 * Note that when this routine is called with NLM_SHUTTING_DOWN there 3077 * may be locks requests that have gotten started but not finished. In 3078 * particular, there may be blocking requests that are in the callback code 3079 * before sleeping (so they're not holding the lock for the graph). If 3080 * such a thread reacquires the graph's lock (to go to sleep) after 3081 * NLM state in the NLM registry is set to a non-up value, 3082 * it will notice the status and bail out. If the request gets 3083 * granted before the thread can check the NLM registry, let it 3084 * continue normally. It will get flushed when we are called with NLM_DOWN. 3085 * 3086 * Modifies: nlm_reg_obj (global) 3087 * Arguments: 3088 * nlmid (IN): id uniquely identifying an NLM server 3089 * nlm_state (IN): NLM server state to change "nlmid" to 3090 */ 3091 void 3092 cl_flk_set_nlm_status(int nlmid, flk_nlm_status_t nlm_state) 3093 { 3094 /* 3095 * Check to see if node is booted as a cluster. If not, return. 3096 */ 3097 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { 3098 return; 3099 } 3100 3101 /* 3102 * Check for development/debugging. It is possible to boot a node 3103 * in non-cluster mode, and then run a special script, currently 3104 * available only to developers, to bring up the node as part of a 3105 * cluster. The problem is that running such a script does not 3106 * result in the routine flk_init() being called and hence global array 3107 * nlm_reg_status is NULL. The NLM thinks it's in cluster mode, 3108 * but the LLM needs to do an additional check to see if the global 3109 * array has been created or not. If nlm_reg_status is NULL, then 3110 * return, else continue. 3111 */ 3112 if (nlm_reg_status == NULL) { 3113 return; 3114 } 3115 3116 ASSERT(nlmid <= nlm_status_size && nlmid >= 0); 3117 mutex_enter(&nlm_reg_lock); 3118 3119 if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status, nlmid)) { 3120 /* 3121 * If the NLM server "nlmid" is unknown in the NLM registry, 3122 * add it to the registry in the nlm shutting down state. 3123 */ 3124 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, 3125 FLK_NLM_SHUTTING_DOWN); 3126 } else { 3127 /* 3128 * Change the state of the NLM server identified by "nlmid" 3129 * in the NLM registry to the argument "nlm_state." 3130 */ 3131 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, 3132 nlm_state); 3133 } 3134 3135 /* 3136 * The reason we must register the NLM server that is shutting down 3137 * with an LLM that doesn't already know about it (never sent a lock 3138 * request) is to handle correctly a race between shutdown and a new 3139 * lock request. Suppose that a shutdown request from the NLM server 3140 * invokes this routine at the LLM, and a thread is spawned to 3141 * service the request. Now suppose a new lock request is in 3142 * progress and has already passed the first line of defense in 3143 * reclock(), which denies new locks requests from NLM servers 3144 * that are not in the NLM_UP state. After the current routine 3145 * is invoked for both phases of shutdown, the routine will return, 3146 * having done nothing, and the lock request will proceed and 3147 * probably be granted. The problem is that the shutdown was ignored 3148 * by the lock request because there was no record of that NLM server 3149 * shutting down. We will be in the peculiar position of thinking 3150 * that we've shutdown the NLM server and all locks at all LLMs have 3151 * been discarded, but in fact there's still one lock held. 3152 * The solution is to record the existence of NLM server and change 3153 * its state immediately to NLM_SHUTTING_DOWN. The lock request in 3154 * progress may proceed because the next phase NLM_DOWN will catch 3155 * this lock and discard it. 3156 */ 3157 mutex_exit(&nlm_reg_lock); 3158 3159 switch (nlm_state) { 3160 case FLK_NLM_UP: 3161 /* 3162 * Change the NLM state of all locks still held on behalf of 3163 * the NLM server identified by "nlmid" to NLM_UP. 3164 */ 3165 cl_flk_change_nlm_state_all_locks(nlmid, FLK_NLM_UP); 3166 break; 3167 3168 case FLK_NLM_SHUTTING_DOWN: 3169 /* 3170 * Wake up all sleeping locks for the NLM server identified 3171 * by "nlmid." Note that eventually all woken threads will 3172 * have their lock requests cancelled and descriptors 3173 * removed from the sleeping lock list. Note that the NLM 3174 * server state associated with each lock descriptor is 3175 * changed to FLK_NLM_SHUTTING_DOWN. 3176 */ 3177 cl_flk_wakeup_sleeping_nlm_locks(nlmid); 3178 break; 3179 3180 case FLK_NLM_DOWN: 3181 /* 3182 * Discard all active, granted locks for this NLM server 3183 * identified by "nlmid." 3184 */ 3185 cl_flk_unlock_nlm_granted(nlmid); 3186 break; 3187 3188 default: 3189 panic("cl_set_nlm_status: bad status (%d)", nlm_state); 3190 } 3191 } 3192 3193 /* 3194 * Set the control status for lock manager requests. 3195 * 3196 * Note that when this routine is called with FLK_WAKEUP_SLEEPERS, there 3197 * may be locks requests that have gotten started but not finished. In 3198 * particular, there may be blocking requests that are in the callback code 3199 * before sleeping (so they're not holding the lock for the graph). If 3200 * such a thread reacquires the graph's lock (to go to sleep) after 3201 * flk_lockmgr_status is set to a non-up value, it will notice the status 3202 * and bail out. If the request gets granted before the thread can check 3203 * flk_lockmgr_status, let it continue normally. It will get flushed when 3204 * we are called with FLK_LOCKMGR_DOWN. 3205 */ 3206 3207 void 3208 flk_set_lockmgr_status(flk_lockmgr_status_t status) 3209 { 3210 int i; 3211 graph_t *gp; 3212 struct flock_globals *fg; 3213 3214 fg = flk_get_globals(); 3215 ASSERT(fg != NULL); 3216 3217 mutex_enter(&flock_lock); 3218 fg->flk_lockmgr_status = status; 3219 mutex_exit(&flock_lock); 3220 3221 /* 3222 * If the lock manager is coming back up, all that's needed is to 3223 * propagate this information to the graphs. If the lock manager 3224 * is going down, additional action is required, and each graph's 3225 * copy of the state is updated atomically with this other action. 3226 */ 3227 switch (status) { 3228 case FLK_LOCKMGR_UP: 3229 for (i = 0; i < HASH_SIZE; i++) { 3230 mutex_enter(&flock_lock); 3231 gp = lock_graph[i]; 3232 mutex_exit(&flock_lock); 3233 if (gp == NULL) 3234 continue; 3235 mutex_enter(&gp->gp_mutex); 3236 fg->lockmgr_status[i] = status; 3237 mutex_exit(&gp->gp_mutex); 3238 } 3239 break; 3240 case FLK_WAKEUP_SLEEPERS: 3241 wakeup_sleeping_lockmgr_locks(fg); 3242 break; 3243 case FLK_LOCKMGR_DOWN: 3244 unlock_lockmgr_granted(fg); 3245 break; 3246 default: 3247 panic("flk_set_lockmgr_status: bad status (%d)", status); 3248 break; 3249 } 3250 } 3251 3252 /* 3253 * This routine returns all the locks that are active or sleeping and are 3254 * associated with a particular set of identifiers. If lock_state != 0, then 3255 * only locks that match the lock_state are returned. If lock_state == 0, then 3256 * all locks are returned. If pid == NOPID, the pid is ignored. If 3257 * use_sysid is FALSE, then the sysid is ignored. If vp is NULL, then the 3258 * vnode pointer is ignored. 3259 * 3260 * A list containing the vnode pointer and an flock structure 3261 * describing the lock is returned. Each element in the list is 3262 * dynamically allocated and must be freed by the caller. The 3263 * last item in the list is denoted by a NULL value in the ll_next 3264 * field. 3265 * 3266 * The vnode pointers returned are held. The caller is responsible 3267 * for releasing these. Note that the returned list is only a snapshot of 3268 * the current lock information, and that it is a snapshot of a moving 3269 * target (only one graph is locked at a time). 3270 */ 3271 3272 locklist_t * 3273 get_lock_list(int list_type, int lock_state, int sysid, boolean_t use_sysid, 3274 pid_t pid, const vnode_t *vp, zoneid_t zoneid) 3275 { 3276 lock_descriptor_t *lock; 3277 lock_descriptor_t *graph_head; 3278 locklist_t listhead; 3279 locklist_t *llheadp; 3280 locklist_t *llp; 3281 locklist_t *lltp; 3282 graph_t *gp; 3283 int i; 3284 int first_index; /* graph index */ 3285 int num_indexes; /* graph index */ 3286 3287 ASSERT((list_type == FLK_ACTIVE_STATE) || 3288 (list_type == FLK_SLEEPING_STATE)); 3289 3290 /* 3291 * Get a pointer to something to use as a list head while building 3292 * the rest of the list. 3293 */ 3294 llheadp = &listhead; 3295 lltp = llheadp; 3296 llheadp->ll_next = (locklist_t *)NULL; 3297 3298 /* Figure out which graphs we want to look at. */ 3299 if (vp == NULL) { 3300 first_index = 0; 3301 num_indexes = HASH_SIZE; 3302 } else { 3303 first_index = HASH_INDEX(vp); 3304 num_indexes = 1; 3305 } 3306 3307 for (i = first_index; i < first_index + num_indexes; i++) { 3308 mutex_enter(&flock_lock); 3309 gp = lock_graph[i]; 3310 mutex_exit(&flock_lock); 3311 if (gp == NULL) { 3312 continue; 3313 } 3314 3315 mutex_enter(&gp->gp_mutex); 3316 graph_head = (list_type == FLK_ACTIVE_STATE) ? 3317 ACTIVE_HEAD(gp) : SLEEPING_HEAD(gp); 3318 for (lock = graph_head->l_next; 3319 lock != graph_head; 3320 lock = lock->l_next) { 3321 if (use_sysid && lock->l_flock.l_sysid != sysid) 3322 continue; 3323 if (pid != NOPID && lock->l_flock.l_pid != pid) 3324 continue; 3325 if (vp != NULL && lock->l_vnode != vp) 3326 continue; 3327 if (lock_state && !(lock_state & lock->l_state)) 3328 continue; 3329 if (zoneid != lock->l_zoneid && zoneid != ALL_ZONES) 3330 continue; 3331 /* 3332 * A matching lock was found. Allocate 3333 * space for a new locklist entry and fill 3334 * it in. 3335 */ 3336 llp = kmem_alloc(sizeof (locklist_t), KM_SLEEP); 3337 lltp->ll_next = llp; 3338 VN_HOLD(lock->l_vnode); 3339 llp->ll_vp = lock->l_vnode; 3340 create_flock(lock, &(llp->ll_flock)); 3341 llp->ll_next = (locklist_t *)NULL; 3342 lltp = llp; 3343 } 3344 mutex_exit(&gp->gp_mutex); 3345 } 3346 3347 llp = llheadp->ll_next; 3348 return (llp); 3349 } 3350 3351 /* 3352 * These two functions are simply interfaces to get_lock_list. They return 3353 * a list of sleeping or active locks for the given sysid and pid. See 3354 * get_lock_list for details. 3355 * 3356 * In either case we don't particularly care to specify the zone of interest; 3357 * the sysid-space is global across zones, so the sysid will map to exactly one 3358 * zone, and we'll return information for that zone. 3359 */ 3360 3361 locklist_t * 3362 flk_get_sleeping_locks(int sysid, pid_t pid) 3363 { 3364 return (get_lock_list(FLK_SLEEPING_STATE, 0, sysid, B_TRUE, pid, NULL, 3365 ALL_ZONES)); 3366 } 3367 3368 locklist_t * 3369 flk_get_active_locks(int sysid, pid_t pid) 3370 { 3371 return (get_lock_list(FLK_ACTIVE_STATE, 0, sysid, B_TRUE, pid, NULL, 3372 ALL_ZONES)); 3373 } 3374 3375 /* 3376 * Another interface to get_lock_list. This one returns all the active 3377 * locks for a given vnode. Again, see get_lock_list for details. 3378 * 3379 * We don't need to specify which zone's locks we're interested in. The matter 3380 * would only be interesting if the vnode belonged to NFS, and NFS vnodes can't 3381 * be used by multiple zones, so the list of locks will all be from the right 3382 * zone. 3383 */ 3384 3385 locklist_t * 3386 flk_active_locks_for_vp(const vnode_t *vp) 3387 { 3388 return (get_lock_list(FLK_ACTIVE_STATE, 0, 0, B_FALSE, NOPID, vp, 3389 ALL_ZONES)); 3390 } 3391 3392 /* 3393 * Another interface to get_lock_list. This one returns all the active 3394 * nbmand locks for a given vnode. Again, see get_lock_list for details. 3395 * 3396 * See the comment for flk_active_locks_for_vp() for why we don't care to 3397 * specify the particular zone of interest. 3398 */ 3399 locklist_t * 3400 flk_active_nbmand_locks_for_vp(const vnode_t *vp) 3401 { 3402 return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE, 3403 NOPID, vp, ALL_ZONES)); 3404 } 3405 3406 /* 3407 * Another interface to get_lock_list. This one returns all the active 3408 * nbmand locks for a given pid. Again, see get_lock_list for details. 3409 * 3410 * The zone doesn't need to be specified here; the locks held by a 3411 * particular process will either be local (ie, non-NFS) or from the zone 3412 * the process is executing in. This is because other parts of the system 3413 * ensure that an NFS vnode can't be used in a zone other than that in 3414 * which it was opened. 3415 */ 3416 locklist_t * 3417 flk_active_nbmand_locks(pid_t pid) 3418 { 3419 return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE, 3420 pid, NULL, ALL_ZONES)); 3421 } 3422 3423 /* 3424 * Free up all entries in the locklist. 3425 */ 3426 void 3427 flk_free_locklist(locklist_t *llp) 3428 { 3429 locklist_t *next_llp; 3430 3431 while (llp) { 3432 next_llp = llp->ll_next; 3433 VN_RELE(llp->ll_vp); 3434 kmem_free(llp, sizeof (*llp)); 3435 llp = next_llp; 3436 } 3437 } 3438 3439 static void 3440 cl_flk_change_nlm_state_all_locks(int nlmid, flk_nlm_status_t nlm_state) 3441 { 3442 /* 3443 * For each graph "lg" in the hash table lock_graph do 3444 * a. Get the list of sleeping locks 3445 * b. For each lock descriptor in the list do 3446 * i. If the requested lock is an NLM server request AND 3447 * the nlmid is the same as the routine argument then 3448 * change the lock descriptor's state field to 3449 * "nlm_state." 3450 * c. Get the list of active locks 3451 * d. For each lock descriptor in the list do 3452 * i. If the requested lock is an NLM server request AND 3453 * the nlmid is the same as the routine argument then 3454 * change the lock descriptor's state field to 3455 * "nlm_state." 3456 */ 3457 3458 int i; 3459 graph_t *gp; /* lock graph */ 3460 lock_descriptor_t *lock; /* lock */ 3461 lock_descriptor_t *nlock = NULL; /* next lock */ 3462 int lock_nlmid; 3463 3464 for (i = 0; i < HASH_SIZE; i++) { 3465 mutex_enter(&flock_lock); 3466 gp = lock_graph[i]; 3467 mutex_exit(&flock_lock); 3468 if (gp == NULL) { 3469 continue; 3470 } 3471 3472 /* Get list of sleeping locks in current lock graph. */ 3473 mutex_enter(&gp->gp_mutex); 3474 for (lock = SLEEPING_HEAD(gp)->l_next; 3475 lock != SLEEPING_HEAD(gp); 3476 lock = nlock) { 3477 nlock = lock->l_next; 3478 /* get NLM id */ 3479 lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 3480 3481 /* 3482 * If NLM server request AND nlmid of lock matches 3483 * nlmid of argument, then set the NLM state of the 3484 * lock to "nlm_state." 3485 */ 3486 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { 3487 SET_NLM_STATE(lock, nlm_state); 3488 } 3489 } 3490 3491 /* Get list of active locks in current lock graph. */ 3492 for (lock = ACTIVE_HEAD(gp)->l_next; 3493 lock != ACTIVE_HEAD(gp); 3494 lock = nlock) { 3495 nlock = lock->l_next; 3496 /* get NLM id */ 3497 lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 3498 3499 /* 3500 * If NLM server request AND nlmid of lock matches 3501 * nlmid of argument, then set the NLM state of the 3502 * lock to "nlm_state." 3503 */ 3504 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { 3505 ASSERT(IS_ACTIVE(lock)); 3506 SET_NLM_STATE(lock, nlm_state); 3507 } 3508 } 3509 mutex_exit(&gp->gp_mutex); 3510 } 3511 } 3512 3513 /* 3514 * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid(). 3515 * Effects: Find all sleeping lock manager requests _only_ for the NLM server 3516 * identified by "nlmid." Poke those lock requests. 3517 */ 3518 static void 3519 cl_flk_wakeup_sleeping_nlm_locks(int nlmid) 3520 { 3521 lock_descriptor_t *lock; 3522 lock_descriptor_t *nlock = NULL; /* next lock */ 3523 int i; 3524 graph_t *gp; 3525 int lock_nlmid; 3526 3527 for (i = 0; i < HASH_SIZE; i++) { 3528 mutex_enter(&flock_lock); 3529 gp = lock_graph[i]; 3530 mutex_exit(&flock_lock); 3531 if (gp == NULL) { 3532 continue; 3533 } 3534 3535 mutex_enter(&gp->gp_mutex); 3536 for (lock = SLEEPING_HEAD(gp)->l_next; 3537 lock != SLEEPING_HEAD(gp); 3538 lock = nlock) { 3539 nlock = lock->l_next; 3540 /* 3541 * If NLM server request _and_ nlmid of lock matches 3542 * nlmid of argument, then set the NLM state of the 3543 * lock to NLM_SHUTTING_DOWN, and wake up sleeping 3544 * request. 3545 */ 3546 if (IS_LOCKMGR(lock)) { 3547 /* get NLM id */ 3548 lock_nlmid = 3549 GETNLMID(lock->l_flock.l_sysid); 3550 if (nlmid == lock_nlmid) { 3551 SET_NLM_STATE(lock, 3552 FLK_NLM_SHUTTING_DOWN); 3553 INTERRUPT_WAKEUP(lock); 3554 } 3555 } 3556 } 3557 mutex_exit(&gp->gp_mutex); 3558 } 3559 } 3560 3561 /* 3562 * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid() 3563 * Effects: Find all active (granted) lock manager locks _only_ for the 3564 * NLM server identified by "nlmid" and release them. 3565 */ 3566 static void 3567 cl_flk_unlock_nlm_granted(int nlmid) 3568 { 3569 lock_descriptor_t *lock; 3570 lock_descriptor_t *nlock = NULL; /* next lock */ 3571 int i; 3572 graph_t *gp; 3573 int lock_nlmid; 3574 3575 for (i = 0; i < HASH_SIZE; i++) { 3576 mutex_enter(&flock_lock); 3577 gp = lock_graph[i]; 3578 mutex_exit(&flock_lock); 3579 if (gp == NULL) { 3580 continue; 3581 } 3582 3583 mutex_enter(&gp->gp_mutex); 3584 for (lock = ACTIVE_HEAD(gp)->l_next; 3585 lock != ACTIVE_HEAD(gp); 3586 lock = nlock) { 3587 nlock = lock->l_next; 3588 ASSERT(IS_ACTIVE(lock)); 3589 3590 /* 3591 * If it's an NLM server request _and_ nlmid of 3592 * the lock matches nlmid of argument, then 3593 * remove the active lock the list, wakup blocked 3594 * threads, and free the storage for the lock. 3595 * Note that there's no need to mark the NLM state 3596 * of this lock to NLM_DOWN because the lock will 3597 * be deleted anyway and its storage freed. 3598 */ 3599 if (IS_LOCKMGR(lock)) { 3600 /* get NLM id */ 3601 lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 3602 if (nlmid == lock_nlmid) { 3603 flk_delete_active_lock(lock, 0); 3604 flk_wakeup(lock, 1); 3605 flk_free_lock(lock); 3606 } 3607 } 3608 } 3609 mutex_exit(&gp->gp_mutex); 3610 } 3611 } 3612 3613 /* 3614 * Find all sleeping lock manager requests and poke them. 3615 */ 3616 static void 3617 wakeup_sleeping_lockmgr_locks(struct flock_globals *fg) 3618 { 3619 lock_descriptor_t *lock; 3620 lock_descriptor_t *nlock = NULL; /* next lock */ 3621 int i; 3622 graph_t *gp; 3623 zoneid_t zoneid = getzoneid(); 3624 3625 for (i = 0; i < HASH_SIZE; i++) { 3626 mutex_enter(&flock_lock); 3627 gp = lock_graph[i]; 3628 mutex_exit(&flock_lock); 3629 if (gp == NULL) { 3630 continue; 3631 } 3632 3633 mutex_enter(&gp->gp_mutex); 3634 fg->lockmgr_status[i] = FLK_WAKEUP_SLEEPERS; 3635 for (lock = SLEEPING_HEAD(gp)->l_next; 3636 lock != SLEEPING_HEAD(gp); 3637 lock = nlock) { 3638 nlock = lock->l_next; 3639 if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) { 3640 INTERRUPT_WAKEUP(lock); 3641 } 3642 } 3643 mutex_exit(&gp->gp_mutex); 3644 } 3645 } 3646 3647 3648 /* 3649 * Find all active (granted) lock manager locks and release them. 3650 */ 3651 static void 3652 unlock_lockmgr_granted(struct flock_globals *fg) 3653 { 3654 lock_descriptor_t *lock; 3655 lock_descriptor_t *nlock = NULL; /* next lock */ 3656 int i; 3657 graph_t *gp; 3658 zoneid_t zoneid = getzoneid(); 3659 3660 for (i = 0; i < HASH_SIZE; i++) { 3661 mutex_enter(&flock_lock); 3662 gp = lock_graph[i]; 3663 mutex_exit(&flock_lock); 3664 if (gp == NULL) { 3665 continue; 3666 } 3667 3668 mutex_enter(&gp->gp_mutex); 3669 fg->lockmgr_status[i] = FLK_LOCKMGR_DOWN; 3670 for (lock = ACTIVE_HEAD(gp)->l_next; 3671 lock != ACTIVE_HEAD(gp); 3672 lock = nlock) { 3673 nlock = lock->l_next; 3674 if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) { 3675 ASSERT(IS_ACTIVE(lock)); 3676 flk_delete_active_lock(lock, 0); 3677 flk_wakeup(lock, 1); 3678 flk_free_lock(lock); 3679 } 3680 } 3681 mutex_exit(&gp->gp_mutex); 3682 } 3683 } 3684 3685 3686 /* 3687 * Wait until a lock is granted, cancelled, or interrupted. 3688 */ 3689 3690 static void 3691 wait_for_lock(lock_descriptor_t *request) 3692 { 3693 graph_t *gp = request->l_graph; 3694 3695 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 3696 3697 while (!(IS_GRANTED(request)) && !(IS_CANCELLED(request)) && 3698 !(IS_INTERRUPTED(request))) { 3699 if (!cv_wait_sig(&request->l_cv, &gp->gp_mutex)) { 3700 flk_set_state(request, FLK_INTERRUPTED_STATE); 3701 request->l_state |= INTERRUPTED_LOCK; 3702 } 3703 } 3704 } 3705 3706 /* 3707 * Create an flock structure from the existing lock information 3708 * 3709 * This routine is used to create flock structures for the lock manager 3710 * to use in a reclaim request. Since the lock was originated on this 3711 * host, it must be conforming to UNIX semantics, so no checking is 3712 * done to make sure it falls within the lower half of the 32-bit range. 3713 */ 3714 3715 static void 3716 create_flock(lock_descriptor_t *lp, flock64_t *flp) 3717 { 3718 ASSERT(lp->l_end == MAX_U_OFFSET_T || lp->l_end <= MAXEND); 3719 ASSERT(lp->l_end >= lp->l_start); 3720 3721 flp->l_type = lp->l_type; 3722 flp->l_whence = 0; 3723 flp->l_start = lp->l_start; 3724 flp->l_len = (lp->l_end == MAX_U_OFFSET_T) ? 0 : 3725 (lp->l_end - lp->l_start + 1); 3726 flp->l_sysid = lp->l_flock.l_sysid; 3727 flp->l_pid = lp->l_flock.l_pid; 3728 } 3729 3730 /* 3731 * Convert flock_t data describing a lock range into unsigned long starting 3732 * and ending points, which are put into lock_request. Returns 0 or an 3733 * errno value. 3734 * Large Files: max is passed by the caller and we return EOVERFLOW 3735 * as defined by LFS API. 3736 */ 3737 3738 int 3739 flk_convert_lock_data(vnode_t *vp, flock64_t *flp, 3740 u_offset_t *start, u_offset_t *end, offset_t offset) 3741 { 3742 struct vattr vattr; 3743 int error; 3744 3745 /* 3746 * Determine the starting point of the request 3747 */ 3748 switch (flp->l_whence) { 3749 case 0: /* SEEK_SET */ 3750 *start = (u_offset_t)flp->l_start; 3751 break; 3752 case 1: /* SEEK_CUR */ 3753 *start = (u_offset_t)(flp->l_start + offset); 3754 break; 3755 case 2: /* SEEK_END */ 3756 vattr.va_mask = AT_SIZE; 3757 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) 3758 return (error); 3759 *start = (u_offset_t)(flp->l_start + vattr.va_size); 3760 break; 3761 default: 3762 return (EINVAL); 3763 } 3764 3765 /* 3766 * Determine the range covered by the request. 3767 */ 3768 if (flp->l_len == 0) 3769 *end = MAX_U_OFFSET_T; 3770 else if ((offset_t)flp->l_len > 0) { 3771 *end = (u_offset_t)(*start + (flp->l_len - 1)); 3772 } else { 3773 /* 3774 * Negative length; why do we even allow this ? 3775 * Because this allows easy specification of 3776 * the last n bytes of the file. 3777 */ 3778 *end = *start; 3779 *start += (u_offset_t)flp->l_len; 3780 (*start)++; 3781 } 3782 return (0); 3783 } 3784 3785 /* 3786 * Check the validity of lock data. This can used by the NFS 3787 * frlock routines to check data before contacting the server. The 3788 * server must support semantics that aren't as restrictive as 3789 * the UNIX API, so the NFS client is required to check. 3790 * The maximum is now passed in by the caller. 3791 */ 3792 3793 int 3794 flk_check_lock_data(u_offset_t start, u_offset_t end, offset_t max) 3795 { 3796 /* 3797 * The end (length) for local locking should never be greater 3798 * than MAXEND. However, the representation for 3799 * the entire file is MAX_U_OFFSET_T. 3800 */ 3801 if ((start > max) || 3802 ((end > max) && (end != MAX_U_OFFSET_T))) { 3803 return (EINVAL); 3804 } 3805 if (start > end) { 3806 return (EINVAL); 3807 } 3808 return (0); 3809 } 3810 3811 /* 3812 * Fill in request->l_flock with information about the lock blocking the 3813 * request. The complexity here is that lock manager requests are allowed 3814 * to see into the upper part of the 32-bit address range, whereas local 3815 * requests are only allowed to see signed values. 3816 * 3817 * What should be done when "blocker" is a lock manager lock that uses the 3818 * upper portion of the 32-bit range, but "request" is local? Since the 3819 * request has already been determined to have been blocked by the blocker, 3820 * at least some portion of "blocker" must be in the range of the request, 3821 * or the request extends to the end of file. For the first case, the 3822 * portion in the lower range is returned with the indication that it goes 3823 * "to EOF." For the second case, the last byte of the lower range is 3824 * returned with the indication that it goes "to EOF." 3825 */ 3826 3827 static void 3828 report_blocker(lock_descriptor_t *blocker, lock_descriptor_t *request) 3829 { 3830 flock64_t *flrp; /* l_flock portion of request */ 3831 3832 ASSERT(blocker != NULL); 3833 3834 flrp = &request->l_flock; 3835 flrp->l_whence = 0; 3836 flrp->l_type = blocker->l_type; 3837 flrp->l_pid = blocker->l_flock.l_pid; 3838 flrp->l_sysid = blocker->l_flock.l_sysid; 3839 3840 if (IS_LOCKMGR(request)) { 3841 flrp->l_start = blocker->l_start; 3842 if (blocker->l_end == MAX_U_OFFSET_T) 3843 flrp->l_len = 0; 3844 else 3845 flrp->l_len = blocker->l_end - blocker->l_start + 1; 3846 } else { 3847 if (blocker->l_start > MAXEND) { 3848 flrp->l_start = MAXEND; 3849 flrp->l_len = 0; 3850 } else { 3851 flrp->l_start = blocker->l_start; 3852 if (blocker->l_end == MAX_U_OFFSET_T) 3853 flrp->l_len = 0; 3854 else 3855 flrp->l_len = blocker->l_end - 3856 blocker->l_start + 1; 3857 } 3858 } 3859 } 3860 3861 /* 3862 * PSARC case 1997/292 3863 */ 3864 /* 3865 * This is the public routine exported by flock.h. 3866 */ 3867 void 3868 cl_flk_change_nlm_state_to_unknown(int nlmid) 3869 { 3870 /* 3871 * Check to see if node is booted as a cluster. If not, return. 3872 */ 3873 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { 3874 return; 3875 } 3876 3877 /* 3878 * See comment in cl_flk_set_nlm_status(). 3879 */ 3880 if (nlm_reg_status == NULL) { 3881 return; 3882 } 3883 3884 /* 3885 * protect NLM registry state with a mutex. 3886 */ 3887 ASSERT(nlmid <= nlm_status_size && nlmid >= 0); 3888 mutex_enter(&nlm_reg_lock); 3889 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, FLK_NLM_UNKNOWN); 3890 mutex_exit(&nlm_reg_lock); 3891 } 3892 3893 /* 3894 * Return non-zero if the given I/O request conflicts with an active NBMAND 3895 * lock. 3896 * If svmand is non-zero, it means look at all active locks, not just NBMAND 3897 * locks. 3898 */ 3899 3900 int 3901 nbl_lock_conflict(vnode_t *vp, nbl_op_t op, u_offset_t offset, 3902 ssize_t length, int svmand, caller_context_t *ct) 3903 { 3904 int conflict = 0; 3905 graph_t *gp; 3906 lock_descriptor_t *lock; 3907 pid_t pid; 3908 int sysid; 3909 3910 if (ct == NULL) { 3911 pid = curproc->p_pid; 3912 sysid = 0; 3913 } else { 3914 pid = ct->cc_pid; 3915 sysid = ct->cc_sysid; 3916 } 3917 3918 mutex_enter(&flock_lock); 3919 gp = lock_graph[HASH_INDEX(vp)]; 3920 mutex_exit(&flock_lock); 3921 if (gp == NULL) 3922 return (0); 3923 3924 mutex_enter(&gp->gp_mutex); 3925 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 3926 3927 for (; lock && lock->l_vnode == vp; lock = lock->l_next) { 3928 if ((svmand || (lock->l_state & NBMAND_LOCK)) && 3929 (lock->l_flock.l_sysid != sysid || 3930 lock->l_flock.l_pid != pid) && 3931 lock_blocks_io(op, offset, length, 3932 lock->l_type, lock->l_start, lock->l_end)) { 3933 conflict = 1; 3934 break; 3935 } 3936 } 3937 mutex_exit(&gp->gp_mutex); 3938 3939 return (conflict); 3940 } 3941 3942 /* 3943 * Return non-zero if the given I/O request conflicts with the given lock. 3944 */ 3945 3946 static int 3947 lock_blocks_io(nbl_op_t op, u_offset_t offset, ssize_t length, 3948 int lock_type, u_offset_t lock_start, u_offset_t lock_end) 3949 { 3950 ASSERT(op == NBL_READ || op == NBL_WRITE || op == NBL_READWRITE); 3951 ASSERT(lock_type == F_RDLCK || lock_type == F_WRLCK); 3952 3953 if (op == NBL_READ && lock_type == F_RDLCK) 3954 return (0); 3955 3956 if (offset <= lock_start && lock_start < offset + length) 3957 return (1); 3958 if (lock_start <= offset && offset <= lock_end) 3959 return (1); 3960 3961 return (0); 3962 } 3963 3964 #ifdef DEBUG 3965 static void 3966 check_active_locks(graph_t *gp) 3967 { 3968 lock_descriptor_t *lock, *lock1; 3969 edge_t *ep; 3970 3971 for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp); 3972 lock = lock->l_next) { 3973 ASSERT(IS_ACTIVE(lock)); 3974 ASSERT(NOT_BLOCKED(lock)); 3975 ASSERT(!IS_BARRIER(lock)); 3976 3977 ep = FIRST_IN(lock); 3978 3979 while (ep != HEAD(lock)) { 3980 ASSERT(IS_SLEEPING(ep->from_vertex)); 3981 ASSERT(!NOT_BLOCKED(ep->from_vertex)); 3982 ep = NEXT_IN(ep); 3983 } 3984 3985 for (lock1 = lock->l_next; lock1 != ACTIVE_HEAD(gp); 3986 lock1 = lock1->l_next) { 3987 if (lock1->l_vnode == lock->l_vnode) { 3988 if (BLOCKS(lock1, lock)) { 3989 cmn_err(CE_PANIC, 3990 "active lock %p blocks %p", 3991 (void *)lock1, (void *)lock); 3992 } else if (BLOCKS(lock, lock1)) { 3993 cmn_err(CE_PANIC, 3994 "active lock %p blocks %p", 3995 (void *)lock, (void *)lock1); 3996 } 3997 } 3998 } 3999 } 4000 } 4001 4002 /* 4003 * Effect: This functions checks to see if the transition from 'old_state' to 4004 * 'new_state' is a valid one. It returns 0 if the transition is valid 4005 * and 1 if it is not. 4006 * For a map of valid transitions, see sys/flock_impl.h 4007 */ 4008 static int 4009 check_lock_transition(int old_state, int new_state) 4010 { 4011 switch (old_state) { 4012 case FLK_INITIAL_STATE: 4013 if ((new_state == FLK_START_STATE) || 4014 (new_state == FLK_SLEEPING_STATE) || 4015 (new_state == FLK_ACTIVE_STATE) || 4016 (new_state == FLK_DEAD_STATE)) { 4017 return (0); 4018 } else { 4019 return (1); 4020 } 4021 case FLK_START_STATE: 4022 if ((new_state == FLK_ACTIVE_STATE) || 4023 (new_state == FLK_DEAD_STATE)) { 4024 return (0); 4025 } else { 4026 return (1); 4027 } 4028 case FLK_ACTIVE_STATE: 4029 if (new_state == FLK_DEAD_STATE) { 4030 return (0); 4031 } else { 4032 return (1); 4033 } 4034 case FLK_SLEEPING_STATE: 4035 if ((new_state == FLK_GRANTED_STATE) || 4036 (new_state == FLK_INTERRUPTED_STATE) || 4037 (new_state == FLK_CANCELLED_STATE)) { 4038 return (0); 4039 } else { 4040 return (1); 4041 } 4042 case FLK_GRANTED_STATE: 4043 if ((new_state == FLK_START_STATE) || 4044 (new_state == FLK_INTERRUPTED_STATE) || 4045 (new_state == FLK_CANCELLED_STATE)) { 4046 return (0); 4047 } else { 4048 return (1); 4049 } 4050 case FLK_CANCELLED_STATE: 4051 if ((new_state == FLK_INTERRUPTED_STATE) || 4052 (new_state == FLK_DEAD_STATE)) { 4053 return (0); 4054 } else { 4055 return (1); 4056 } 4057 case FLK_INTERRUPTED_STATE: 4058 if (new_state == FLK_DEAD_STATE) { 4059 return (0); 4060 } else { 4061 return (1); 4062 } 4063 case FLK_DEAD_STATE: 4064 /* May be set more than once */ 4065 if (new_state == FLK_DEAD_STATE) { 4066 return (0); 4067 } else { 4068 return (1); 4069 } 4070 default: 4071 return (1); 4072 } 4073 } 4074 4075 static void 4076 check_sleeping_locks(graph_t *gp) 4077 { 4078 lock_descriptor_t *lock1, *lock2; 4079 edge_t *ep; 4080 for (lock1 = SLEEPING_HEAD(gp)->l_next; lock1 != SLEEPING_HEAD(gp); 4081 lock1 = lock1->l_next) { 4082 ASSERT(!IS_BARRIER(lock1)); 4083 for (lock2 = lock1->l_next; lock2 != SLEEPING_HEAD(gp); 4084 lock2 = lock2->l_next) { 4085 if (lock1->l_vnode == lock2->l_vnode) { 4086 if (BLOCKS(lock2, lock1)) { 4087 ASSERT(!IS_GRANTED(lock1)); 4088 ASSERT(!NOT_BLOCKED(lock1)); 4089 path(lock1, lock2); 4090 } 4091 } 4092 } 4093 4094 for (lock2 = ACTIVE_HEAD(gp)->l_next; lock2 != ACTIVE_HEAD(gp); 4095 lock2 = lock2->l_next) { 4096 ASSERT(!IS_BARRIER(lock1)); 4097 if (lock1->l_vnode == lock2->l_vnode) { 4098 if (BLOCKS(lock2, lock1)) { 4099 ASSERT(!IS_GRANTED(lock1)); 4100 ASSERT(!NOT_BLOCKED(lock1)); 4101 path(lock1, lock2); 4102 } 4103 } 4104 } 4105 ep = FIRST_ADJ(lock1); 4106 while (ep != HEAD(lock1)) { 4107 ASSERT(BLOCKS(ep->to_vertex, lock1)); 4108 ep = NEXT_ADJ(ep); 4109 } 4110 } 4111 } 4112 4113 static int 4114 level_two_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2, int no_path) 4115 { 4116 edge_t *ep; 4117 lock_descriptor_t *vertex; 4118 lock_descriptor_t *vertex_stack; 4119 4120 STACK_INIT(vertex_stack); 4121 4122 flk_graph_uncolor(lock1->l_graph); 4123 ep = FIRST_ADJ(lock1); 4124 ASSERT(ep != HEAD(lock1)); 4125 while (ep != HEAD(lock1)) { 4126 if (no_path) 4127 ASSERT(ep->to_vertex != lock2); 4128 STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack); 4129 COLOR(ep->to_vertex); 4130 ep = NEXT_ADJ(ep); 4131 } 4132 4133 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 4134 STACK_POP(vertex_stack, l_dstack); 4135 for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex); 4136 ep = NEXT_ADJ(ep)) { 4137 if (COLORED(ep->to_vertex)) 4138 continue; 4139 COLOR(ep->to_vertex); 4140 if (ep->to_vertex == lock2) 4141 return (1); 4142 4143 STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack); 4144 } 4145 } 4146 return (0); 4147 } 4148 4149 static void 4150 check_owner_locks(graph_t *gp, pid_t pid, int sysid, vnode_t *vp) 4151 { 4152 lock_descriptor_t *lock; 4153 4154 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 4155 4156 if (lock) { 4157 while (lock != ACTIVE_HEAD(gp) && (lock->l_vnode == vp)) { 4158 if (lock->l_flock.l_pid == pid && 4159 lock->l_flock.l_sysid == sysid) 4160 cmn_err(CE_PANIC, 4161 "owner pid %d's lock %p in active queue", 4162 pid, (void *)lock); 4163 lock = lock->l_next; 4164 } 4165 } 4166 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 4167 4168 if (lock) { 4169 while (lock != SLEEPING_HEAD(gp) && (lock->l_vnode == vp)) { 4170 if (lock->l_flock.l_pid == pid && 4171 lock->l_flock.l_sysid == sysid) 4172 cmn_err(CE_PANIC, 4173 "owner pid %d's lock %p in sleep queue", 4174 pid, (void *)lock); 4175 lock = lock->l_next; 4176 } 4177 } 4178 } 4179 4180 static int 4181 level_one_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2) 4182 { 4183 edge_t *ep = FIRST_ADJ(lock1); 4184 4185 while (ep != HEAD(lock1)) { 4186 if (ep->to_vertex == lock2) 4187 return (1); 4188 else 4189 ep = NEXT_ADJ(ep); 4190 } 4191 return (0); 4192 } 4193 4194 static int 4195 no_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2) 4196 { 4197 return (!level_two_path(lock1, lock2, 1)); 4198 } 4199 4200 static void 4201 path(lock_descriptor_t *lock1, lock_descriptor_t *lock2) 4202 { 4203 if (level_one_path(lock1, lock2)) { 4204 if (level_two_path(lock1, lock2, 0) != 0) { 4205 cmn_err(CE_WARN, 4206 "one edge one path from lock1 %p lock2 %p", 4207 (void *)lock1, (void *)lock2); 4208 } 4209 } else if (no_path(lock1, lock2)) { 4210 cmn_err(CE_PANIC, 4211 "No path from lock1 %p to lock2 %p", 4212 (void *)lock1, (void *)lock2); 4213 } 4214 } 4215 #endif /* DEBUG */ 4216