1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* ONC_PLUS EXTRACT START */ 23 24 /* 25 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 30 /* All Rights Reserved */ 31 32 #pragma ident "%Z%%M% %I% %E% SMI" 33 34 #include <sys/flock_impl.h> 35 #include <sys/vfs.h> 36 #include <sys/t_lock.h> /* for <sys/callb.h> */ 37 #include <sys/callb.h> 38 #include <sys/clconf.h> 39 #include <sys/cladm.h> 40 #include <sys/nbmlock.h> 41 #include <sys/cred.h> 42 #include <sys/policy.h> 43 44 /* 45 * The following four variables are for statistics purposes and they are 46 * not protected by locks. They may not be accurate but will at least be 47 * close to the actual value. 48 */ 49 50 int flk_lock_allocs; 51 int flk_lock_frees; 52 int edge_allocs; 53 int edge_frees; 54 int flk_proc_vertex_allocs; 55 int flk_proc_edge_allocs; 56 int flk_proc_vertex_frees; 57 int flk_proc_edge_frees; 58 59 static kmutex_t flock_lock; 60 61 #ifdef DEBUG 62 int check_debug = 0; 63 #define CHECK_ACTIVE_LOCKS(gp) if (check_debug) \ 64 check_active_locks(gp); 65 #define CHECK_SLEEPING_LOCKS(gp) if (check_debug) \ 66 check_sleeping_locks(gp); 67 #define CHECK_OWNER_LOCKS(gp, pid, sysid, vp) \ 68 if (check_debug) \ 69 check_owner_locks(gp, pid, sysid, vp); 70 #define CHECK_LOCK_TRANSITION(old_state, new_state) \ 71 { \ 72 if (check_lock_transition(old_state, new_state)) { \ 73 cmn_err(CE_PANIC, "Illegal lock transition \ 74 from %d to %d", old_state, new_state); \ 75 } \ 76 } 77 #else 78 79 #define CHECK_ACTIVE_LOCKS(gp) 80 #define CHECK_SLEEPING_LOCKS(gp) 81 #define CHECK_OWNER_LOCKS(gp, pid, sysid, vp) 82 #define CHECK_LOCK_TRANSITION(old_state, new_state) 83 84 #endif /* DEBUG */ 85 86 struct kmem_cache *flk_edge_cache; 87 88 graph_t *lock_graph[HASH_SIZE]; 89 proc_graph_t pgraph; 90 91 /* 92 * Clustering. 93 * 94 * NLM REGISTRY TYPE IMPLEMENTATION 95 * 96 * Assumptions: 97 * 1. Nodes in a cluster are numbered starting at 1; always non-negative 98 * integers; maximum node id is returned by clconf_maximum_nodeid(). 99 * 2. We use this node id to identify the node an NLM server runs on. 100 */ 101 102 /* 103 * NLM registry object keeps track of NLM servers via their 104 * nlmids (which are the node ids of the node in the cluster they run on) 105 * that have requested locks at this LLM with which this registry is 106 * associated. 107 * 108 * Representation of abstraction: 109 * rep = record[ states: array[nlm_state], 110 * lock: mutex] 111 * 112 * Representation invariants: 113 * 1. index i of rep.states is between 0 and n - 1 where n is number 114 * of elements in the array, which happen to be the maximum number 115 * of nodes in the cluster configuration + 1. 116 * 2. map nlmid to index i of rep.states 117 * 0 -> 0 118 * 1 -> 1 119 * 2 -> 2 120 * n-1 -> clconf_maximum_nodeid()+1 121 * 3. This 1-1 mapping is quite convenient and it avoids errors resulting 122 * from forgetting to subtract 1 from the index. 123 * 4. The reason we keep the 0th index is the following. A legitimate 124 * cluster configuration includes making a UFS file system NFS 125 * exportable. The code is structured so that if you're in a cluster 126 * you do one thing; otherwise, you do something else. The problem 127 * is what to do if you think you're in a cluster with PXFS loaded, 128 * but you're using UFS not PXFS? The upper two bytes of the sysid 129 * encode the node id of the node where NLM server runs; these bytes 130 * are zero for UFS. Since the nodeid is used to index into the 131 * registry, we can record the NLM server state information at index 132 * 0 using the same mechanism used for PXFS file locks! 133 */ 134 static flk_nlm_status_t *nlm_reg_status = NULL; /* state array 0..N-1 */ 135 static kmutex_t nlm_reg_lock; /* lock to protect arrary */ 136 static uint_t nlm_status_size; /* size of state array */ 137 138 /* 139 * Although we need a global lock dependency graph (and associated data 140 * structures), we also need a per-zone notion of whether the lock manager is 141 * running, and so whether to allow lock manager requests or not. 142 * 143 * Thus, on a per-zone basis we maintain a ``global'' variable 144 * (flk_lockmgr_status), protected by flock_lock, and set when the lock 145 * manager is determined to be changing state (starting or stopping). 146 * 147 * Each graph/zone pair also has a copy of this variable, which is protected by 148 * the graph's mutex. 149 * 150 * The per-graph copies are used to synchronize lock requests with shutdown 151 * requests. The global copy is used to initialize the per-graph field when a 152 * new graph is created. 153 */ 154 struct flock_globals { 155 flk_lockmgr_status_t flk_lockmgr_status; 156 flk_lockmgr_status_t lockmgr_status[HASH_SIZE]; 157 }; 158 159 zone_key_t flock_zone_key; 160 161 static void create_flock(lock_descriptor_t *, flock64_t *); 162 static lock_descriptor_t *flk_get_lock(void); 163 static void flk_free_lock(lock_descriptor_t *lock); 164 static void flk_get_first_blocking_lock(lock_descriptor_t *request); 165 static int flk_process_request(lock_descriptor_t *); 166 static int flk_add_edge(lock_descriptor_t *, lock_descriptor_t *, int, int); 167 static edge_t *flk_get_edge(void); 168 static int flk_wait_execute_request(lock_descriptor_t *); 169 static int flk_relation(lock_descriptor_t *, lock_descriptor_t *); 170 static void flk_insert_active_lock(lock_descriptor_t *); 171 static void flk_delete_active_lock(lock_descriptor_t *, int); 172 static void flk_insert_sleeping_lock(lock_descriptor_t *); 173 static void flk_graph_uncolor(graph_t *); 174 static void flk_wakeup(lock_descriptor_t *, int); 175 static void flk_free_edge(edge_t *); 176 static void flk_recompute_dependencies(lock_descriptor_t *, 177 lock_descriptor_t **, int, int); 178 static int flk_find_barriers(lock_descriptor_t *); 179 static void flk_update_barriers(lock_descriptor_t *); 180 static int flk_color_reachables(lock_descriptor_t *); 181 static int flk_canceled(lock_descriptor_t *); 182 static void flk_delete_locks_by_sysid(lock_descriptor_t *); 183 static void report_blocker(lock_descriptor_t *, lock_descriptor_t *); 184 static void wait_for_lock(lock_descriptor_t *); 185 static void unlock_lockmgr_granted(struct flock_globals *); 186 static void wakeup_sleeping_lockmgr_locks(struct flock_globals *); 187 188 /* Clustering hooks */ 189 static void cl_flk_change_nlm_state_all_locks(int, flk_nlm_status_t); 190 static void cl_flk_wakeup_sleeping_nlm_locks(int); 191 static void cl_flk_unlock_nlm_granted(int); 192 193 #ifdef DEBUG 194 static int check_lock_transition(int, int); 195 static void check_sleeping_locks(graph_t *); 196 static void check_active_locks(graph_t *); 197 static int no_path(lock_descriptor_t *, lock_descriptor_t *); 198 static void path(lock_descriptor_t *, lock_descriptor_t *); 199 static void check_owner_locks(graph_t *, pid_t, int, vnode_t *); 200 static int level_one_path(lock_descriptor_t *, lock_descriptor_t *); 201 static int level_two_path(lock_descriptor_t *, lock_descriptor_t *, int); 202 #endif 203 204 /* proc_graph function definitons */ 205 static int flk_check_deadlock(lock_descriptor_t *); 206 static void flk_proc_graph_uncolor(void); 207 static proc_vertex_t *flk_get_proc_vertex(lock_descriptor_t *); 208 static proc_edge_t *flk_get_proc_edge(void); 209 static void flk_proc_release(proc_vertex_t *); 210 static void flk_free_proc_edge(proc_edge_t *); 211 static void flk_update_proc_graph(edge_t *, int); 212 213 /* Non-blocking mandatory locking */ 214 static int lock_blocks_io(nbl_op_t, u_offset_t, ssize_t, int, u_offset_t, 215 u_offset_t); 216 217 static struct flock_globals * 218 flk_get_globals(void) 219 { 220 /* 221 * The KLM module had better be loaded if we're attempting to handle 222 * lockmgr requests. 223 */ 224 ASSERT(flock_zone_key != ZONE_KEY_UNINITIALIZED); 225 return (zone_getspecific(flock_zone_key, curproc->p_zone)); 226 } 227 228 static flk_lockmgr_status_t 229 flk_get_lockmgr_status(void) 230 { 231 struct flock_globals *fg; 232 233 ASSERT(MUTEX_HELD(&flock_lock)); 234 235 if (flock_zone_key == ZONE_KEY_UNINITIALIZED) { 236 /* 237 * KLM module not loaded; lock manager definitely not running. 238 */ 239 return (FLK_LOCKMGR_DOWN); 240 } 241 fg = flk_get_globals(); 242 return (fg->flk_lockmgr_status); 243 } 244 245 /* 246 * Routine called from fs_frlock in fs/fs_subr.c 247 */ 248 249 int 250 reclock(vnode_t *vp, 251 flock64_t *lckdat, 252 int cmd, 253 int flag, 254 u_offset_t offset, 255 flk_callback_t *flk_cbp) 256 { 257 lock_descriptor_t stack_lock_request; 258 lock_descriptor_t *lock_request; 259 int error = 0; 260 graph_t *gp; 261 int nlmid; 262 263 /* 264 * Check access permissions 265 */ 266 if ((cmd & SETFLCK) && 267 ((lckdat->l_type == F_RDLCK && (flag & FREAD) == 0) || 268 (lckdat->l_type == F_WRLCK && (flag & FWRITE) == 0))) 269 return (EBADF); 270 271 /* 272 * for query and unlock we use the stack_lock_request 273 */ 274 275 if ((lckdat->l_type == F_UNLCK) || 276 !((cmd & INOFLCK) || (cmd & SETFLCK))) { 277 lock_request = &stack_lock_request; 278 (void) bzero((caddr_t)lock_request, 279 sizeof (lock_descriptor_t)); 280 281 /* 282 * following is added to make the assertions in 283 * flk_execute_request() to pass through 284 */ 285 286 lock_request->l_edge.edge_in_next = &lock_request->l_edge; 287 lock_request->l_edge.edge_in_prev = &lock_request->l_edge; 288 lock_request->l_edge.edge_adj_next = &lock_request->l_edge; 289 lock_request->l_edge.edge_adj_prev = &lock_request->l_edge; 290 lock_request->l_status = FLK_INITIAL_STATE; 291 } else { 292 lock_request = flk_get_lock(); 293 } 294 lock_request->l_state = 0; 295 lock_request->l_vnode = vp; 296 lock_request->l_zoneid = getzoneid(); 297 298 /* 299 * Convert the request range into the canonical start and end 300 * values. The NLM protocol supports locking over the entire 301 * 32-bit range, so there's no range checking for remote requests, 302 * but we still need to verify that local requests obey the rules. 303 */ 304 /* Clustering */ 305 if ((cmd & (RCMDLCK | PCMDLCK)) != 0) { 306 ASSERT(lckdat->l_whence == 0); 307 lock_request->l_start = lckdat->l_start; 308 lock_request->l_end = (lckdat->l_len == 0) ? MAX_U_OFFSET_T : 309 lckdat->l_start + (lckdat->l_len - 1); 310 } else { 311 /* check the validity of the lock range */ 312 error = flk_convert_lock_data(vp, lckdat, 313 &lock_request->l_start, &lock_request->l_end, 314 offset); 315 if (error) { 316 goto done; 317 } 318 error = flk_check_lock_data(lock_request->l_start, 319 lock_request->l_end, MAXEND); 320 if (error) { 321 goto done; 322 } 323 } 324 325 ASSERT(lock_request->l_end >= lock_request->l_start); 326 327 lock_request->l_type = lckdat->l_type; 328 if (cmd & INOFLCK) 329 lock_request->l_state |= IO_LOCK; 330 if (cmd & SLPFLCK) 331 lock_request->l_state |= WILLING_TO_SLEEP_LOCK; 332 if (cmd & RCMDLCK) 333 lock_request->l_state |= LOCKMGR_LOCK; 334 if (cmd & NBMLCK) 335 lock_request->l_state |= NBMAND_LOCK; 336 /* 337 * Clustering: set flag for PXFS locks 338 * We do not _only_ check for the PCMDLCK flag because PXFS locks could 339 * also be of type 'RCMDLCK'. 340 * We do not _only_ check the GETPXFSID() macro because local PXFS 341 * clients use a pxfsid of zero to permit deadlock detection in the LLM. 342 */ 343 344 if ((cmd & PCMDLCK) || (GETPXFSID(lckdat->l_sysid) != 0)) { 345 lock_request->l_state |= PXFS_LOCK; 346 } 347 if (!((cmd & SETFLCK) || (cmd & INOFLCK))) { 348 if (lock_request->l_type == F_RDLCK || 349 lock_request->l_type == F_WRLCK) 350 lock_request->l_state |= QUERY_LOCK; 351 } 352 lock_request->l_flock = (*lckdat); 353 lock_request->l_callbacks = flk_cbp; 354 355 /* 356 * We are ready for processing the request 357 */ 358 if (IS_LOCKMGR(lock_request)) { 359 /* 360 * If the lock request is an NLM server request .... 361 */ 362 if (nlm_status_size == 0) { /* not booted as cluster */ 363 mutex_enter(&flock_lock); 364 /* 365 * Bail out if this is a lock manager request and the 366 * lock manager is not supposed to be running. 367 */ 368 if (flk_get_lockmgr_status() != FLK_LOCKMGR_UP) { 369 mutex_exit(&flock_lock); 370 error = ENOLCK; 371 goto done; 372 } 373 mutex_exit(&flock_lock); 374 } else { /* booted as a cluster */ 375 nlmid = GETNLMID(lock_request->l_flock.l_sysid); 376 ASSERT(nlmid <= nlm_status_size && nlmid >= 0); 377 378 mutex_enter(&nlm_reg_lock); 379 /* 380 * If the NLM registry does not know about this 381 * NLM server making the request, add its nlmid 382 * to the registry. 383 */ 384 if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status, 385 nlmid)) { 386 FLK_REGISTRY_ADD_NLMID(nlm_reg_status, nlmid); 387 } else if (!FLK_REGISTRY_IS_NLM_UP(nlm_reg_status, 388 nlmid)) { 389 /* 390 * If the NLM server is already known (has made 391 * previous lock requests) and its state is 392 * not NLM_UP (means that NLM server is 393 * shutting down), then bail out with an 394 * error to deny the lock request. 395 */ 396 mutex_exit(&nlm_reg_lock); 397 error = ENOLCK; 398 goto done; 399 } 400 mutex_exit(&nlm_reg_lock); 401 } 402 } 403 404 /* Now get the lock graph for a particular vnode */ 405 gp = flk_get_lock_graph(vp, FLK_INIT_GRAPH); 406 407 /* 408 * We drop rwlock here otherwise this might end up causing a 409 * deadlock if this IOLOCK sleeps. (bugid # 1183392). 410 */ 411 412 if (IS_IO_LOCK(lock_request)) { 413 VOP_RWUNLOCK(vp, 414 (lock_request->l_type == F_RDLCK) ? 415 V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL); 416 } 417 mutex_enter(&gp->gp_mutex); 418 419 lock_request->l_state |= REFERENCED_LOCK; 420 lock_request->l_graph = gp; 421 422 switch (lock_request->l_type) { 423 case F_RDLCK: 424 case F_WRLCK: 425 if (IS_QUERY_LOCK(lock_request)) { 426 flk_get_first_blocking_lock(lock_request); 427 (*lckdat) = lock_request->l_flock; 428 break; 429 } 430 431 /* process the request now */ 432 433 error = flk_process_request(lock_request); 434 break; 435 436 case F_UNLCK: 437 /* unlock request will not block so execute it immediately */ 438 439 if (IS_LOCKMGR(lock_request) && 440 flk_canceled(lock_request)) { 441 error = 0; 442 } else { 443 error = flk_execute_request(lock_request); 444 } 445 break; 446 447 case F_UNLKSYS: 448 /* 449 * Recovery mechanism to release lock manager locks when 450 * NFS client crashes and restart. NFS server will clear 451 * old locks and grant new locks. 452 */ 453 454 if (lock_request->l_flock.l_sysid == 0) { 455 mutex_exit(&gp->gp_mutex); 456 return (EINVAL); 457 } 458 if (secpolicy_nfs(CRED()) != 0) { 459 mutex_exit(&gp->gp_mutex); 460 return (EPERM); 461 } 462 flk_delete_locks_by_sysid(lock_request); 463 lock_request->l_state &= ~REFERENCED_LOCK; 464 flk_set_state(lock_request, FLK_DEAD_STATE); 465 flk_free_lock(lock_request); 466 mutex_exit(&gp->gp_mutex); 467 return (0); 468 469 default: 470 error = EINVAL; 471 break; 472 } 473 474 /* Clustering: For blocked PXFS locks, return */ 475 if (error == PXFS_LOCK_BLOCKED) { 476 lock_request->l_state &= ~REFERENCED_LOCK; 477 mutex_exit(&gp->gp_mutex); 478 return (error); 479 } 480 481 /* 482 * Now that we have seen the status of locks in the system for 483 * this vnode we acquire the rwlock if it is an IO_LOCK. 484 */ 485 486 if (IS_IO_LOCK(lock_request)) { 487 (void) VOP_RWLOCK(vp, 488 (lock_request->l_type == F_RDLCK) ? 489 V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL); 490 if (!error) { 491 lckdat->l_type = F_UNLCK; 492 493 /* 494 * This wake up is needed otherwise 495 * if IO_LOCK has slept the dependents on this 496 * will not be woken up at all. (bugid # 1185482). 497 */ 498 499 flk_wakeup(lock_request, 1); 500 flk_set_state(lock_request, FLK_DEAD_STATE); 501 flk_free_lock(lock_request); 502 } 503 /* 504 * else if error had occurred either flk_process_request() 505 * has returned EDEADLK in which case there will be no 506 * dependents for this lock or EINTR from flk_wait_execute_ 507 * request() in which case flk_cancel_sleeping_lock() 508 * would have been done. same is true with EBADF. 509 */ 510 } 511 512 if (lock_request == &stack_lock_request) { 513 flk_set_state(lock_request, FLK_DEAD_STATE); 514 } else { 515 lock_request->l_state &= ~REFERENCED_LOCK; 516 if ((error != 0) || IS_DELETED(lock_request)) { 517 flk_set_state(lock_request, FLK_DEAD_STATE); 518 flk_free_lock(lock_request); 519 } 520 } 521 522 mutex_exit(&gp->gp_mutex); 523 return (error); 524 525 done: 526 flk_set_state(lock_request, FLK_DEAD_STATE); 527 if (lock_request != &stack_lock_request) 528 flk_free_lock(lock_request); 529 return (error); 530 } 531 532 /* 533 * Invoke the callbacks in the given list. If before sleeping, invoke in 534 * list order. If after sleeping, invoke in reverse order. 535 * 536 * CPR (suspend/resume) support: if one of the callbacks returns a 537 * callb_cpr_t, return it. This will be used to make the thread CPR-safe 538 * while it is sleeping. There should be at most one callb_cpr_t for the 539 * thread. 540 * XXX This is unnecessarily complicated. The CPR information should just 541 * get passed in directly through VOP_FRLOCK and reclock, rather than 542 * sneaking it in via a callback. 543 */ 544 545 callb_cpr_t * 546 flk_invoke_callbacks(flk_callback_t *cblist, flk_cb_when_t when) 547 { 548 callb_cpr_t *cpr_callbackp = NULL; 549 callb_cpr_t *one_result; 550 flk_callback_t *cb; 551 552 if (cblist == NULL) 553 return (NULL); 554 555 if (when == FLK_BEFORE_SLEEP) { 556 cb = cblist; 557 do { 558 one_result = (*cb->cb_callback)(when, cb->cb_data); 559 if (one_result != NULL) { 560 ASSERT(cpr_callbackp == NULL); 561 cpr_callbackp = one_result; 562 } 563 cb = cb->cb_next; 564 } while (cb != cblist); 565 } else { 566 cb = cblist->cb_prev; 567 do { 568 one_result = (*cb->cb_callback)(when, cb->cb_data); 569 if (one_result != NULL) { 570 cpr_callbackp = one_result; 571 } 572 cb = cb->cb_prev; 573 } while (cb != cblist->cb_prev); 574 } 575 576 return (cpr_callbackp); 577 } 578 579 /* 580 * Initialize a flk_callback_t to hold the given callback. 581 */ 582 583 void 584 flk_init_callback(flk_callback_t *flk_cb, 585 callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), void *cbdata) 586 { 587 flk_cb->cb_next = flk_cb; 588 flk_cb->cb_prev = flk_cb; 589 flk_cb->cb_callback = cb_fcn; 590 flk_cb->cb_data = cbdata; 591 } 592 593 /* 594 * Initialize an flk_callback_t and then link it into the head of an 595 * existing list (which may be NULL). 596 */ 597 598 void 599 flk_add_callback(flk_callback_t *newcb, 600 callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), 601 void *cbdata, flk_callback_t *cblist) 602 { 603 flk_init_callback(newcb, cb_fcn, cbdata); 604 605 if (cblist == NULL) 606 return; 607 608 newcb->cb_prev = cblist->cb_prev; 609 newcb->cb_next = cblist; 610 cblist->cb_prev->cb_next = newcb; 611 cblist->cb_prev = newcb; 612 } 613 /* ONC_PLUS EXTRACT END */ 614 615 /* 616 * Initialize the flk_edge_cache data structure and create the 617 * nlm_reg_status array. 618 */ 619 620 void 621 flk_init(void) 622 { 623 uint_t i; 624 625 flk_edge_cache = kmem_cache_create("flk_edges", 626 sizeof (struct edge), 0, NULL, NULL, NULL, NULL, NULL, 0); 627 if (flk_edge_cache == NULL) { 628 cmn_err(CE_PANIC, "Couldn't create flk_edge_cache\n"); 629 } 630 /* 631 * Create the NLM registry object. 632 */ 633 634 if (cluster_bootflags & CLUSTER_BOOTED) { 635 /* 636 * This routine tells you the maximum node id that will be used 637 * in the cluster. This number will be the size of the nlm 638 * registry status array. We add 1 because we will be using 639 * all entries indexed from 0 to maxnodeid; e.g., from 0 640 * to 64, for a total of 65 entries. 641 */ 642 nlm_status_size = clconf_maximum_nodeid() + 1; 643 } else { 644 nlm_status_size = 0; 645 } 646 647 if (nlm_status_size != 0) { /* booted as a cluster */ 648 nlm_reg_status = (flk_nlm_status_t *) 649 kmem_alloc(sizeof (flk_nlm_status_t) * nlm_status_size, 650 KM_SLEEP); 651 652 /* initialize all NLM states in array to NLM_UNKNOWN */ 653 for (i = 0; i < nlm_status_size; i++) { 654 nlm_reg_status[i] = FLK_NLM_UNKNOWN; 655 } 656 } 657 } 658 659 /* 660 * Zone constructor/destructor callbacks to be executed when a zone is 661 * created/destroyed. 662 */ 663 /* ARGSUSED */ 664 void * 665 flk_zone_init(zoneid_t zoneid) 666 { 667 struct flock_globals *fg; 668 uint_t i; 669 670 fg = kmem_alloc(sizeof (*fg), KM_SLEEP); 671 fg->flk_lockmgr_status = FLK_LOCKMGR_UP; 672 for (i = 0; i < HASH_SIZE; i++) 673 fg->lockmgr_status[i] = FLK_LOCKMGR_UP; 674 return (fg); 675 } 676 677 /* ARGSUSED */ 678 void 679 flk_zone_fini(zoneid_t zoneid, void *data) 680 { 681 struct flock_globals *fg = data; 682 683 kmem_free(fg, sizeof (*fg)); 684 } 685 686 /* 687 * Get a lock_descriptor structure with initialisation of edge lists. 688 */ 689 690 static lock_descriptor_t * 691 flk_get_lock(void) 692 { 693 lock_descriptor_t *l; 694 695 l = kmem_zalloc(sizeof (lock_descriptor_t), KM_SLEEP); 696 697 cv_init(&l->l_cv, NULL, CV_DRIVER, NULL); 698 l->l_edge.edge_in_next = &l->l_edge; 699 l->l_edge.edge_in_prev = &l->l_edge; 700 l->l_edge.edge_adj_next = &l->l_edge; 701 l->l_edge.edge_adj_prev = &l->l_edge; 702 l->pvertex = -1; 703 l->l_status = FLK_INITIAL_STATE; 704 flk_lock_allocs++; 705 return (l); 706 } 707 708 /* 709 * Free a lock_descriptor structure. Just sets the DELETED_LOCK flag 710 * when some thread has a reference to it as in reclock(). 711 */ 712 713 void 714 flk_free_lock(lock_descriptor_t *lock) 715 { 716 ASSERT(IS_DEAD(lock)); 717 if (IS_REFERENCED(lock)) { 718 lock->l_state |= DELETED_LOCK; 719 return; 720 } 721 flk_lock_frees++; 722 kmem_free((void *)lock, sizeof (lock_descriptor_t)); 723 } 724 725 void 726 flk_set_state(lock_descriptor_t *lock, int new_state) 727 { 728 /* 729 * Locks in the sleeping list may be woken up in a number of ways, 730 * and more than once. If a sleeping lock is signalled awake more 731 * than once, then it may or may not change state depending on its 732 * current state. 733 * Also note that NLM locks that are sleeping could be moved to an 734 * interrupted state more than once if the unlock request is 735 * retransmitted by the NLM client - the second time around, this is 736 * just a nop. 737 * The ordering of being signalled awake is: 738 * INTERRUPTED_STATE > CANCELLED_STATE > GRANTED_STATE. 739 * The checks below implement this ordering. 740 */ 741 if (IS_INTERRUPTED(lock)) { 742 if ((new_state == FLK_CANCELLED_STATE) || 743 (new_state == FLK_GRANTED_STATE) || 744 (new_state == FLK_INTERRUPTED_STATE)) { 745 return; 746 } 747 } 748 if (IS_CANCELLED(lock)) { 749 if ((new_state == FLK_GRANTED_STATE) || 750 (new_state == FLK_CANCELLED_STATE)) { 751 return; 752 } 753 } 754 CHECK_LOCK_TRANSITION(lock->l_status, new_state); 755 if (IS_PXFS(lock)) { 756 cl_flk_state_transition_notify(lock, lock->l_status, new_state); 757 } 758 lock->l_status = new_state; 759 } 760 761 /* 762 * Routine that checks whether there are any blocking locks in the system. 763 * 764 * The policy followed is if a write lock is sleeping we don't allow read 765 * locks before this write lock even though there may not be any active 766 * locks corresponding to the read locks' region. 767 * 768 * flk_add_edge() function adds an edge between l1 and l2 iff there 769 * is no path between l1 and l2. This is done to have a "minimum 770 * storage representation" of the dependency graph. 771 * 772 * Another property of the graph is since only the new request throws 773 * edges to the existing locks in the graph, the graph is always topologically 774 * ordered. 775 */ 776 777 static int 778 flk_process_request(lock_descriptor_t *request) 779 { 780 graph_t *gp = request->l_graph; 781 lock_descriptor_t *lock; 782 int request_blocked_by_active = 0; 783 int request_blocked_by_granted = 0; 784 int request_blocked_by_sleeping = 0; 785 vnode_t *vp = request->l_vnode; 786 int error = 0; 787 int request_will_wait = 0; 788 int found_covering_lock = 0; 789 lock_descriptor_t *covered_by = NULL; 790 791 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 792 request_will_wait = IS_WILLING_TO_SLEEP(request); 793 794 /* 795 * check active locks 796 */ 797 798 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 799 800 801 if (lock) { 802 do { 803 if (BLOCKS(lock, request)) { 804 if (!request_will_wait) 805 return (EAGAIN); 806 request_blocked_by_active = 1; 807 break; 808 } 809 /* 810 * Grant lock if it is for the same owner holding active 811 * lock that covers the request. 812 */ 813 814 if (SAME_OWNER(lock, request) && 815 COVERS(lock, request) && 816 (request->l_type == F_RDLCK)) 817 return (flk_execute_request(request)); 818 lock = lock->l_next; 819 } while (lock->l_vnode == vp); 820 } 821 822 if (!request_blocked_by_active) { 823 lock_descriptor_t *lk[1]; 824 lock_descriptor_t *first_glock = NULL; 825 /* 826 * Shall we grant this?! NO!! 827 * What about those locks that were just granted and still 828 * in sleep queue. Those threads are woken up and so locks 829 * are almost active. 830 */ 831 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 832 if (lock) { 833 do { 834 if (BLOCKS(lock, request)) { 835 if (IS_GRANTED(lock)) { 836 request_blocked_by_granted = 1; 837 } else { 838 request_blocked_by_sleeping = 1; 839 } 840 } 841 842 lock = lock->l_next; 843 } while ((lock->l_vnode == vp)); 844 first_glock = lock->l_prev; 845 ASSERT(first_glock->l_vnode == vp); 846 } 847 848 if (request_blocked_by_granted) 849 goto block; 850 851 if (!request_blocked_by_sleeping) { 852 /* 853 * If the request isn't going to be blocked by a 854 * sleeping request, we know that it isn't going to 855 * be blocked; we can just execute the request -- 856 * without performing costly deadlock detection. 857 */ 858 ASSERT(!request_blocked_by_active); 859 return (flk_execute_request(request)); 860 } else if (request->l_type == F_RDLCK) { 861 /* 862 * If we have a sleeping writer in the requested 863 * lock's range, block. 864 */ 865 goto block; 866 } 867 868 lk[0] = request; 869 request->l_state |= RECOMPUTE_LOCK; 870 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 871 if (lock) { 872 do { 873 flk_recompute_dependencies(lock, lk, 1, 0); 874 lock = lock->l_next; 875 } while (lock->l_vnode == vp); 876 } 877 lock = first_glock; 878 if (lock) { 879 do { 880 if (IS_GRANTED(lock)) { 881 flk_recompute_dependencies(lock, lk, 1, 0); 882 } 883 lock = lock->l_prev; 884 } while ((lock->l_vnode == vp)); 885 } 886 request->l_state &= ~RECOMPUTE_LOCK; 887 if (!NO_DEPENDENTS(request) && flk_check_deadlock(request)) 888 return (EDEADLK); 889 return (flk_execute_request(request)); 890 } 891 892 block: 893 if (request_will_wait) 894 flk_graph_uncolor(gp); 895 896 /* check sleeping locks */ 897 898 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 899 900 /* 901 * If we find a sleeping write lock that is a superset of the 902 * region wanted by request we can be assured that by adding an 903 * edge to this write lock we have paths to all locks in the 904 * graph that blocks the request except in one case and that is why 905 * another check for SAME_OWNER in the loop below. The exception 906 * case is when this process that owns the sleeping write lock 'l1' 907 * has other locks l2, l3, l4 that are in the system and arrived 908 * before l1. l1 does not have path to these locks as they are from 909 * same process. We break when we find a second covering sleeping 910 * lock l5 owned by a process different from that owning l1, because 911 * there cannot be any of l2, l3, l4, etc., arrived before l5, and if 912 * it has l1 would have produced a deadlock already. 913 */ 914 915 if (lock) { 916 do { 917 if (BLOCKS(lock, request)) { 918 if (!request_will_wait) 919 return (EAGAIN); 920 if (COVERS(lock, request) && 921 lock->l_type == F_WRLCK) { 922 if (found_covering_lock && 923 !SAME_OWNER(lock, covered_by)) { 924 found_covering_lock++; 925 break; 926 } 927 found_covering_lock = 1; 928 covered_by = lock; 929 } 930 if (found_covering_lock && 931 !SAME_OWNER(lock, covered_by)) { 932 lock = lock->l_next; 933 continue; 934 } 935 if ((error = flk_add_edge(request, lock, 936 !found_covering_lock, 0))) 937 return (error); 938 } 939 lock = lock->l_next; 940 } while (lock->l_vnode == vp); 941 } 942 943 /* 944 * found_covering_lock == 2 iff at this point 'request' has paths 945 * to all locks that blocks 'request'. found_covering_lock == 1 iff at this 946 * point 'request' has paths to all locks that blocks 'request' whose owners 947 * are not same as the one that covers 'request' (covered_by above) and 948 * we can have locks whose owner is same as covered_by in the active list. 949 */ 950 951 if (request_blocked_by_active && found_covering_lock != 2) { 952 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 953 ASSERT(lock != NULL); 954 do { 955 if (BLOCKS(lock, request)) { 956 if (found_covering_lock && 957 !SAME_OWNER(lock, covered_by)) { 958 lock = lock->l_next; 959 continue; 960 } 961 if ((error = flk_add_edge(request, lock, 962 CHECK_CYCLE, 0))) 963 return (error); 964 } 965 lock = lock->l_next; 966 } while (lock->l_vnode == vp); 967 } 968 969 if (NOT_BLOCKED(request)) { 970 /* 971 * request not dependent on any other locks 972 * so execute this request 973 */ 974 return (flk_execute_request(request)); 975 } else { 976 /* 977 * check for deadlock 978 */ 979 if (flk_check_deadlock(request)) 980 return (EDEADLK); 981 /* 982 * this thread has to sleep 983 */ 984 return (flk_wait_execute_request(request)); 985 } 986 } 987 988 /* ONC_PLUS EXTRACT START */ 989 /* 990 * The actual execution of the request in the simple case is only to 991 * insert the 'request' in the list of active locks if it is not an 992 * UNLOCK. 993 * We have to consider the existing active locks' relation to 994 * this 'request' if they are owned by same process. flk_relation() does 995 * this job and sees to that the dependency graph information is maintained 996 * properly. 997 */ 998 999 int 1000 flk_execute_request(lock_descriptor_t *request) 1001 { 1002 graph_t *gp = request->l_graph; 1003 vnode_t *vp = request->l_vnode; 1004 lock_descriptor_t *lock, *lock1; 1005 int done_searching = 0; 1006 1007 CHECK_SLEEPING_LOCKS(gp); 1008 CHECK_ACTIVE_LOCKS(gp); 1009 1010 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1011 1012 flk_set_state(request, FLK_START_STATE); 1013 1014 ASSERT(NOT_BLOCKED(request)); 1015 1016 /* IO_LOCK requests are only to check status */ 1017 1018 if (IS_IO_LOCK(request)) 1019 return (0); 1020 1021 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 1022 1023 if (lock == NULL && request->l_type == F_UNLCK) 1024 return (0); 1025 if (lock == NULL) { 1026 flk_insert_active_lock(request); 1027 return (0); 1028 } 1029 1030 do { 1031 lock1 = lock->l_next; 1032 if (SAME_OWNER(request, lock)) { 1033 done_searching = flk_relation(lock, request); 1034 } 1035 lock = lock1; 1036 } while (lock->l_vnode == vp && !done_searching); 1037 1038 /* 1039 * insert in active queue 1040 */ 1041 1042 if (request->l_type != F_UNLCK) 1043 flk_insert_active_lock(request); 1044 1045 return (0); 1046 } 1047 /* ONC_PLUS EXTRACT END */ 1048 1049 /* 1050 * 'request' is blocked by some one therefore we put it into sleep queue. 1051 */ 1052 static int 1053 flk_wait_execute_request(lock_descriptor_t *request) 1054 { 1055 graph_t *gp = request->l_graph; 1056 callb_cpr_t *cprp; /* CPR info from callback */ 1057 struct flock_globals *fg; 1058 int index; 1059 1060 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1061 ASSERT(IS_WILLING_TO_SLEEP(request)); 1062 1063 flk_insert_sleeping_lock(request); 1064 1065 if (IS_LOCKMGR(request)) { 1066 index = HASH_INDEX(request->l_vnode); 1067 fg = flk_get_globals(); 1068 1069 if (nlm_status_size == 0) { /* not booted as a cluster */ 1070 if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP) { 1071 flk_cancel_sleeping_lock(request, 1); 1072 return (ENOLCK); 1073 } 1074 } else { /* booted as a cluster */ 1075 /* 1076 * If the request is an NLM server lock request, 1077 * and the NLM state of the lock request is not 1078 * NLM_UP (because the NLM server is shutting 1079 * down), then cancel the sleeping lock and 1080 * return error ENOLCK that will encourage the 1081 * client to retransmit. 1082 */ 1083 if (!IS_NLM_UP(request)) { 1084 flk_cancel_sleeping_lock(request, 1); 1085 return (ENOLCK); 1086 } 1087 } 1088 } 1089 1090 /* Clustering: For blocking PXFS locks, return */ 1091 if (IS_PXFS(request)) { 1092 /* 1093 * PXFS locks sleep on the client side. 1094 * The callback argument is used to wake up the sleeper 1095 * when the lock is granted. 1096 * We return -1 (rather than an errno value) to indicate 1097 * the client side should sleep 1098 */ 1099 return (PXFS_LOCK_BLOCKED); 1100 } 1101 1102 if (request->l_callbacks != NULL) { 1103 /* 1104 * To make sure the shutdown code works correctly, either 1105 * the callback must happen after putting the lock on the 1106 * sleep list, or we must check the shutdown status after 1107 * returning from the callback (and before sleeping). At 1108 * least for now, we'll use the first option. If a 1109 * shutdown or signal or whatever happened while the graph 1110 * mutex was dropped, that will be detected by 1111 * wait_for_lock(). 1112 */ 1113 mutex_exit(&gp->gp_mutex); 1114 1115 cprp = flk_invoke_callbacks(request->l_callbacks, 1116 FLK_BEFORE_SLEEP); 1117 1118 mutex_enter(&gp->gp_mutex); 1119 1120 if (cprp == NULL) { 1121 wait_for_lock(request); 1122 } else { 1123 mutex_enter(cprp->cc_lockp); 1124 CALLB_CPR_SAFE_BEGIN(cprp); 1125 mutex_exit(cprp->cc_lockp); 1126 wait_for_lock(request); 1127 mutex_enter(cprp->cc_lockp); 1128 CALLB_CPR_SAFE_END(cprp, cprp->cc_lockp); 1129 mutex_exit(cprp->cc_lockp); 1130 } 1131 1132 mutex_exit(&gp->gp_mutex); 1133 (void) flk_invoke_callbacks(request->l_callbacks, 1134 FLK_AFTER_SLEEP); 1135 mutex_enter(&gp->gp_mutex); 1136 } else { 1137 wait_for_lock(request); 1138 } 1139 1140 if (IS_LOCKMGR(request)) { 1141 /* 1142 * If the lock manager is shutting down, return an 1143 * error that will encourage the client to retransmit. 1144 */ 1145 if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP && 1146 !IS_GRANTED(request)) { 1147 flk_cancel_sleeping_lock(request, 1); 1148 return (ENOLCK); 1149 } 1150 } 1151 1152 if (IS_INTERRUPTED(request)) { 1153 /* we got a signal, or act like we did */ 1154 flk_cancel_sleeping_lock(request, 1); 1155 return (EINTR); 1156 } 1157 1158 /* Cancelled if some other thread has closed the file */ 1159 1160 if (IS_CANCELLED(request)) { 1161 flk_cancel_sleeping_lock(request, 1); 1162 return (EBADF); 1163 } 1164 1165 request->l_state &= ~GRANTED_LOCK; 1166 REMOVE_SLEEP_QUEUE(request); 1167 return (flk_execute_request(request)); 1168 } 1169 1170 /* 1171 * This routine adds an edge between from and to because from depends 1172 * to. If asked to check for deadlock it checks whether there are any 1173 * reachable locks from "from_lock" that is owned by the same process 1174 * as "from_lock". 1175 * NOTE: It is the caller's responsibility to make sure that the color 1176 * of the graph is consistent between the calls to flk_add_edge as done 1177 * in flk_process_request. This routine does not color and check for 1178 * deadlock explicitly. 1179 */ 1180 1181 static int 1182 flk_add_edge(lock_descriptor_t *from_lock, lock_descriptor_t *to_lock, 1183 int check_cycle, int update_graph) 1184 { 1185 edge_t *edge; 1186 edge_t *ep; 1187 lock_descriptor_t *vertex; 1188 lock_descriptor_t *vertex_stack; 1189 1190 STACK_INIT(vertex_stack); 1191 1192 /* 1193 * if to vertex already has mark_color just return 1194 * don't add an edge as it is reachable from from vertex 1195 * before itself. 1196 */ 1197 1198 if (COLORED(to_lock)) 1199 return (0); 1200 1201 edge = flk_get_edge(); 1202 1203 /* 1204 * set the from and to vertex 1205 */ 1206 1207 edge->from_vertex = from_lock; 1208 edge->to_vertex = to_lock; 1209 1210 /* 1211 * put in adjacency list of from vertex 1212 */ 1213 1214 from_lock->l_edge.edge_adj_next->edge_adj_prev = edge; 1215 edge->edge_adj_next = from_lock->l_edge.edge_adj_next; 1216 edge->edge_adj_prev = &from_lock->l_edge; 1217 from_lock->l_edge.edge_adj_next = edge; 1218 1219 /* 1220 * put in in list of to vertex 1221 */ 1222 1223 to_lock->l_edge.edge_in_next->edge_in_prev = edge; 1224 edge->edge_in_next = to_lock->l_edge.edge_in_next; 1225 to_lock->l_edge.edge_in_next = edge; 1226 edge->edge_in_prev = &to_lock->l_edge; 1227 1228 1229 if (update_graph) { 1230 flk_update_proc_graph(edge, 0); 1231 return (0); 1232 } 1233 if (!check_cycle) { 1234 return (0); 1235 } 1236 1237 STACK_PUSH(vertex_stack, from_lock, l_stack); 1238 1239 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 1240 1241 STACK_POP(vertex_stack, l_stack); 1242 1243 for (ep = FIRST_ADJ(vertex); 1244 ep != HEAD(vertex); 1245 ep = NEXT_ADJ(ep)) { 1246 if (COLORED(ep->to_vertex)) 1247 continue; 1248 COLOR(ep->to_vertex); 1249 if (SAME_OWNER(ep->to_vertex, from_lock)) 1250 goto dead_lock; 1251 STACK_PUSH(vertex_stack, ep->to_vertex, l_stack); 1252 } 1253 } 1254 return (0); 1255 1256 dead_lock: 1257 1258 /* 1259 * remove all edges 1260 */ 1261 1262 ep = FIRST_ADJ(from_lock); 1263 1264 while (ep != HEAD(from_lock)) { 1265 IN_LIST_REMOVE(ep); 1266 from_lock->l_sedge = NEXT_ADJ(ep); 1267 ADJ_LIST_REMOVE(ep); 1268 flk_free_edge(ep); 1269 ep = from_lock->l_sedge; 1270 } 1271 return (EDEADLK); 1272 } 1273 1274 /* 1275 * Get an edge structure for representing the dependency between two locks. 1276 */ 1277 1278 static edge_t * 1279 flk_get_edge() 1280 { 1281 edge_t *ep; 1282 1283 ASSERT(flk_edge_cache != NULL); 1284 1285 ep = kmem_cache_alloc(flk_edge_cache, KM_SLEEP); 1286 edge_allocs++; 1287 return (ep); 1288 } 1289 1290 /* 1291 * Free the edge structure. 1292 */ 1293 1294 static void 1295 flk_free_edge(edge_t *ep) 1296 { 1297 edge_frees++; 1298 kmem_cache_free(flk_edge_cache, (void *)ep); 1299 } 1300 1301 /* 1302 * Check the relationship of request with lock and perform the 1303 * recomputation of dependencies, break lock if required, and return 1304 * 1 if request cannot have any more relationship with the next 1305 * active locks. 1306 * The 'lock' and 'request' are compared and in case of overlap we 1307 * delete the 'lock' and form new locks to represent the non-overlapped 1308 * portion of original 'lock'. This function has side effects such as 1309 * 'lock' will be freed, new locks will be added to the active list. 1310 */ 1311 1312 static int 1313 flk_relation(lock_descriptor_t *lock, lock_descriptor_t *request) 1314 { 1315 int lock_effect; 1316 lock_descriptor_t *lock1, *lock2; 1317 lock_descriptor_t *topology[3]; 1318 int nvertex = 0; 1319 int i; 1320 edge_t *ep; 1321 graph_t *gp = (lock->l_graph); 1322 1323 1324 CHECK_SLEEPING_LOCKS(gp); 1325 CHECK_ACTIVE_LOCKS(gp); 1326 1327 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1328 1329 topology[0] = topology[1] = topology[2] = NULL; 1330 1331 if (request->l_type == F_UNLCK) 1332 lock_effect = FLK_UNLOCK; 1333 else if (request->l_type == F_RDLCK && 1334 lock->l_type == F_WRLCK) 1335 lock_effect = FLK_DOWNGRADE; 1336 else if (request->l_type == F_WRLCK && 1337 lock->l_type == F_RDLCK) 1338 lock_effect = FLK_UPGRADE; 1339 else 1340 lock_effect = FLK_STAY_SAME; 1341 1342 if (lock->l_end < request->l_start) { 1343 if (lock->l_end == request->l_start - 1 && 1344 lock_effect == FLK_STAY_SAME) { 1345 topology[0] = request; 1346 request->l_start = lock->l_start; 1347 nvertex = 1; 1348 goto recompute; 1349 } else { 1350 return (0); 1351 } 1352 } 1353 1354 if (lock->l_start > request->l_end) { 1355 if (request->l_end == lock->l_start - 1 && 1356 lock_effect == FLK_STAY_SAME) { 1357 topology[0] = request; 1358 request->l_end = lock->l_end; 1359 nvertex = 1; 1360 goto recompute; 1361 } else { 1362 return (1); 1363 } 1364 } 1365 1366 if (request->l_end < lock->l_end) { 1367 if (request->l_start > lock->l_start) { 1368 if (lock_effect == FLK_STAY_SAME) { 1369 request->l_start = lock->l_start; 1370 request->l_end = lock->l_end; 1371 topology[0] = request; 1372 nvertex = 1; 1373 } else { 1374 lock1 = flk_get_lock(); 1375 lock2 = flk_get_lock(); 1376 COPY(lock1, lock); 1377 COPY(lock2, lock); 1378 lock1->l_start = lock->l_start; 1379 lock1->l_end = request->l_start - 1; 1380 lock2->l_start = request->l_end + 1; 1381 lock2->l_end = lock->l_end; 1382 topology[0] = lock1; 1383 topology[1] = lock2; 1384 topology[2] = request; 1385 nvertex = 3; 1386 } 1387 } else if (request->l_start < lock->l_start) { 1388 if (lock_effect == FLK_STAY_SAME) { 1389 request->l_end = lock->l_end; 1390 topology[0] = request; 1391 nvertex = 1; 1392 } else { 1393 lock1 = flk_get_lock(); 1394 COPY(lock1, lock); 1395 lock1->l_start = request->l_end + 1; 1396 topology[0] = lock1; 1397 topology[1] = request; 1398 nvertex = 2; 1399 } 1400 } else { 1401 if (lock_effect == FLK_STAY_SAME) { 1402 request->l_start = lock->l_start; 1403 request->l_end = lock->l_end; 1404 topology[0] = request; 1405 nvertex = 1; 1406 } else { 1407 lock1 = flk_get_lock(); 1408 COPY(lock1, lock); 1409 lock1->l_start = request->l_end + 1; 1410 topology[0] = lock1; 1411 topology[1] = request; 1412 nvertex = 2; 1413 } 1414 } 1415 } else if (request->l_end > lock->l_end) { 1416 if (request->l_start > lock->l_start) { 1417 if (lock_effect == FLK_STAY_SAME) { 1418 request->l_start = lock->l_start; 1419 topology[0] = request; 1420 nvertex = 1; 1421 } else { 1422 lock1 = flk_get_lock(); 1423 COPY(lock1, lock); 1424 lock1->l_end = request->l_start - 1; 1425 topology[0] = lock1; 1426 topology[1] = request; 1427 nvertex = 2; 1428 } 1429 } else if (request->l_start < lock->l_start) { 1430 topology[0] = request; 1431 nvertex = 1; 1432 } else { 1433 topology[0] = request; 1434 nvertex = 1; 1435 } 1436 } else { 1437 if (request->l_start > lock->l_start) { 1438 if (lock_effect == FLK_STAY_SAME) { 1439 request->l_start = lock->l_start; 1440 topology[0] = request; 1441 nvertex = 1; 1442 } else { 1443 lock1 = flk_get_lock(); 1444 COPY(lock1, lock); 1445 lock1->l_end = request->l_start - 1; 1446 topology[0] = lock1; 1447 topology[1] = request; 1448 nvertex = 2; 1449 } 1450 } else if (request->l_start < lock->l_start) { 1451 topology[0] = request; 1452 nvertex = 1; 1453 } else { 1454 if (lock_effect != FLK_UNLOCK) { 1455 topology[0] = request; 1456 nvertex = 1; 1457 } else { 1458 flk_delete_active_lock(lock, 0); 1459 flk_wakeup(lock, 1); 1460 flk_free_lock(lock); 1461 CHECK_SLEEPING_LOCKS(gp); 1462 CHECK_ACTIVE_LOCKS(gp); 1463 return (1); 1464 } 1465 } 1466 } 1467 1468 recompute: 1469 1470 /* 1471 * For unlock we don't send the 'request' to for recomputing 1472 * dependencies because no lock will add an edge to this. 1473 */ 1474 1475 if (lock_effect == FLK_UNLOCK) { 1476 topology[nvertex-1] = NULL; 1477 nvertex--; 1478 } 1479 for (i = 0; i < nvertex; i++) { 1480 topology[i]->l_state |= RECOMPUTE_LOCK; 1481 topology[i]->l_color = NO_COLOR; 1482 } 1483 1484 ASSERT(FIRST_ADJ(lock) == HEAD(lock)); 1485 1486 /* 1487 * we remove the adjacent edges for all vertices' to this vertex 1488 * 'lock'. 1489 */ 1490 1491 ep = FIRST_IN(lock); 1492 while (ep != HEAD(lock)) { 1493 ADJ_LIST_REMOVE(ep); 1494 ep = NEXT_IN(ep); 1495 } 1496 1497 flk_delete_active_lock(lock, 0); 1498 1499 /* We are ready for recomputing the dependencies now */ 1500 1501 flk_recompute_dependencies(lock, topology, nvertex, 1); 1502 1503 for (i = 0; i < nvertex; i++) { 1504 topology[i]->l_state &= ~RECOMPUTE_LOCK; 1505 topology[i]->l_color = NO_COLOR; 1506 } 1507 1508 1509 if (lock_effect == FLK_UNLOCK) { 1510 nvertex++; 1511 } 1512 for (i = 0; i < nvertex - 1; i++) { 1513 flk_insert_active_lock(topology[i]); 1514 } 1515 1516 1517 if (lock_effect == FLK_DOWNGRADE || lock_effect == FLK_UNLOCK) { 1518 flk_wakeup(lock, 0); 1519 } else { 1520 ep = FIRST_IN(lock); 1521 while (ep != HEAD(lock)) { 1522 lock->l_sedge = NEXT_IN(ep); 1523 IN_LIST_REMOVE(ep); 1524 flk_update_proc_graph(ep, 1); 1525 flk_free_edge(ep); 1526 ep = lock->l_sedge; 1527 } 1528 } 1529 flk_free_lock(lock); 1530 1531 CHECK_SLEEPING_LOCKS(gp); 1532 CHECK_ACTIVE_LOCKS(gp); 1533 return (0); 1534 } 1535 1536 /* 1537 * Insert a lock into the active queue. 1538 */ 1539 1540 static void 1541 flk_insert_active_lock(lock_descriptor_t *new_lock) 1542 { 1543 graph_t *gp = new_lock->l_graph; 1544 vnode_t *vp = new_lock->l_vnode; 1545 lock_descriptor_t *first_lock, *lock; 1546 1547 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1548 1549 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 1550 first_lock = lock; 1551 1552 if (first_lock != NULL) { 1553 for (; (lock->l_vnode == vp && 1554 lock->l_start < new_lock->l_start); lock = lock->l_next) 1555 ; 1556 } else { 1557 lock = ACTIVE_HEAD(gp); 1558 } 1559 1560 lock->l_prev->l_next = new_lock; 1561 new_lock->l_next = lock; 1562 new_lock->l_prev = lock->l_prev; 1563 lock->l_prev = new_lock; 1564 1565 if (first_lock == NULL || (new_lock->l_start <= first_lock->l_start)) { 1566 vp->v_filocks = (struct filock *)new_lock; 1567 } 1568 flk_set_state(new_lock, FLK_ACTIVE_STATE); 1569 new_lock->l_state |= ACTIVE_LOCK; 1570 1571 CHECK_ACTIVE_LOCKS(gp); 1572 CHECK_SLEEPING_LOCKS(gp); 1573 } 1574 1575 /* 1576 * Delete the active lock : Performs two functions depending on the 1577 * value of second parameter. One is to remove from the active lists 1578 * only and other is to both remove and free the lock. 1579 */ 1580 1581 static void 1582 flk_delete_active_lock(lock_descriptor_t *lock, int free_lock) 1583 { 1584 vnode_t *vp = lock->l_vnode; 1585 graph_t *gp = lock->l_graph; 1586 1587 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1588 if (free_lock) 1589 ASSERT(NO_DEPENDENTS(lock)); 1590 ASSERT(NOT_BLOCKED(lock)); 1591 ASSERT(IS_ACTIVE(lock)); 1592 1593 ASSERT((vp->v_filocks != NULL)); 1594 1595 if (vp->v_filocks == (struct filock *)lock) { 1596 vp->v_filocks = (struct filock *) 1597 ((lock->l_next->l_vnode == vp) ? lock->l_next : 1598 NULL); 1599 } 1600 lock->l_next->l_prev = lock->l_prev; 1601 lock->l_prev->l_next = lock->l_next; 1602 lock->l_next = lock->l_prev = NULL; 1603 flk_set_state(lock, FLK_DEAD_STATE); 1604 lock->l_state &= ~ACTIVE_LOCK; 1605 1606 if (free_lock) 1607 flk_free_lock(lock); 1608 CHECK_ACTIVE_LOCKS(gp); 1609 CHECK_SLEEPING_LOCKS(gp); 1610 } 1611 1612 /* 1613 * Insert into the sleep queue. 1614 */ 1615 1616 static void 1617 flk_insert_sleeping_lock(lock_descriptor_t *request) 1618 { 1619 graph_t *gp = request->l_graph; 1620 vnode_t *vp = request->l_vnode; 1621 lock_descriptor_t *lock; 1622 1623 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1624 ASSERT(IS_INITIAL(request)); 1625 1626 for (lock = gp->sleeping_locks.l_next; (lock != &gp->sleeping_locks && 1627 lock->l_vnode < vp); lock = lock->l_next) 1628 ; 1629 1630 lock->l_prev->l_next = request; 1631 request->l_prev = lock->l_prev; 1632 lock->l_prev = request; 1633 request->l_next = lock; 1634 flk_set_state(request, FLK_SLEEPING_STATE); 1635 request->l_state |= SLEEPING_LOCK; 1636 } 1637 1638 /* 1639 * Cancelling a sleeping lock implies removing a vertex from the 1640 * dependency graph and therefore we should recompute the dependencies 1641 * of all vertices that have a path to this vertex, w.r.t. all 1642 * vertices reachable from this vertex. 1643 */ 1644 1645 void 1646 flk_cancel_sleeping_lock(lock_descriptor_t *request, int remove_from_queue) 1647 { 1648 graph_t *gp = request->l_graph; 1649 vnode_t *vp = request->l_vnode; 1650 lock_descriptor_t **topology = NULL; 1651 edge_t *ep; 1652 lock_descriptor_t *vertex, *lock; 1653 int nvertex = 0; 1654 int i; 1655 lock_descriptor_t *vertex_stack; 1656 1657 STACK_INIT(vertex_stack); 1658 1659 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1660 /* 1661 * count number of vertex pointers that has to be allocated 1662 * All vertices that are reachable from request. 1663 */ 1664 1665 STACK_PUSH(vertex_stack, request, l_stack); 1666 1667 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 1668 STACK_POP(vertex_stack, l_stack); 1669 for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex); 1670 ep = NEXT_ADJ(ep)) { 1671 if (IS_RECOMPUTE(ep->to_vertex)) 1672 continue; 1673 ep->to_vertex->l_state |= RECOMPUTE_LOCK; 1674 STACK_PUSH(vertex_stack, ep->to_vertex, l_stack); 1675 nvertex++; 1676 } 1677 } 1678 1679 /* 1680 * allocate memory for holding the vertex pointers 1681 */ 1682 1683 if (nvertex) { 1684 topology = kmem_zalloc(nvertex * sizeof (lock_descriptor_t *), 1685 KM_SLEEP); 1686 } 1687 1688 /* 1689 * one more pass to actually store the vertices in the 1690 * allocated array. 1691 * We first check sleeping locks and then active locks 1692 * so that topology array will be in a topological 1693 * order. 1694 */ 1695 1696 nvertex = 0; 1697 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 1698 1699 if (lock) { 1700 do { 1701 if (IS_RECOMPUTE(lock)) { 1702 lock->l_index = nvertex; 1703 topology[nvertex++] = lock; 1704 } 1705 lock->l_color = NO_COLOR; 1706 lock = lock->l_next; 1707 } while (lock->l_vnode == vp); 1708 } 1709 1710 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 1711 1712 if (lock) { 1713 do { 1714 if (IS_RECOMPUTE(lock)) { 1715 lock->l_index = nvertex; 1716 topology[nvertex++] = lock; 1717 } 1718 lock->l_color = NO_COLOR; 1719 lock = lock->l_next; 1720 } while (lock->l_vnode == vp); 1721 } 1722 1723 /* 1724 * remove in and out edges of request 1725 * They are freed after updating proc_graph below. 1726 */ 1727 1728 for (ep = FIRST_IN(request); ep != HEAD(request); ep = NEXT_IN(ep)) { 1729 ADJ_LIST_REMOVE(ep); 1730 } 1731 1732 1733 if (remove_from_queue) 1734 REMOVE_SLEEP_QUEUE(request); 1735 1736 /* we are ready to recompute */ 1737 1738 flk_recompute_dependencies(request, topology, nvertex, 1); 1739 1740 ep = FIRST_ADJ(request); 1741 while (ep != HEAD(request)) { 1742 IN_LIST_REMOVE(ep); 1743 request->l_sedge = NEXT_ADJ(ep); 1744 ADJ_LIST_REMOVE(ep); 1745 flk_update_proc_graph(ep, 1); 1746 flk_free_edge(ep); 1747 ep = request->l_sedge; 1748 } 1749 1750 1751 /* 1752 * unset the RECOMPUTE flag in those vertices 1753 */ 1754 1755 for (i = 0; i < nvertex; i++) { 1756 topology[i]->l_state &= ~RECOMPUTE_LOCK; 1757 } 1758 1759 /* 1760 * free the topology 1761 */ 1762 if (nvertex) 1763 kmem_free((void *)topology, 1764 (nvertex * sizeof (lock_descriptor_t *))); 1765 /* 1766 * Possibility of some locks unblocked now 1767 */ 1768 1769 flk_wakeup(request, 0); 1770 1771 /* 1772 * we expect to have a correctly recomputed graph now. 1773 */ 1774 flk_set_state(request, FLK_DEAD_STATE); 1775 flk_free_lock(request); 1776 CHECK_SLEEPING_LOCKS(gp); 1777 CHECK_ACTIVE_LOCKS(gp); 1778 1779 } 1780 1781 /* 1782 * Uncoloring the graph is simply to increment the mark value of the graph 1783 * And only when wrap round takes place will we color all vertices in 1784 * the graph explicitly. 1785 */ 1786 1787 static void 1788 flk_graph_uncolor(graph_t *gp) 1789 { 1790 lock_descriptor_t *lock; 1791 1792 if (gp->mark == UINT_MAX) { 1793 gp->mark = 1; 1794 for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp); 1795 lock = lock->l_next) 1796 lock->l_color = 0; 1797 1798 for (lock = SLEEPING_HEAD(gp)->l_next; lock != SLEEPING_HEAD(gp); 1799 lock = lock->l_next) 1800 lock->l_color = 0; 1801 } else { 1802 gp->mark++; 1803 } 1804 } 1805 1806 /* 1807 * Wake up locks that are blocked on the given lock. 1808 */ 1809 1810 static void 1811 flk_wakeup(lock_descriptor_t *lock, int adj_list_remove) 1812 { 1813 edge_t *ep; 1814 graph_t *gp = lock->l_graph; 1815 lock_descriptor_t *lck; 1816 1817 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1818 if (NO_DEPENDENTS(lock)) 1819 return; 1820 ep = FIRST_IN(lock); 1821 do { 1822 /* 1823 * delete the edge from the adjacency list 1824 * of from vertex. if no more adjacent edges 1825 * for this vertex wake this process. 1826 */ 1827 lck = ep->from_vertex; 1828 if (adj_list_remove) 1829 ADJ_LIST_REMOVE(ep); 1830 flk_update_proc_graph(ep, 1); 1831 if (NOT_BLOCKED(lck)) { 1832 GRANT_WAKEUP(lck); 1833 } 1834 lock->l_sedge = NEXT_IN(ep); 1835 IN_LIST_REMOVE(ep); 1836 flk_free_edge(ep); 1837 ep = lock->l_sedge; 1838 } while (ep != HEAD(lock)); 1839 ASSERT(NO_DEPENDENTS(lock)); 1840 } 1841 1842 /* 1843 * The dependents of request, is checked for its dependency against the 1844 * locks in topology (called topology because the array is and should be in 1845 * topological order for this algorithm, if not in topological order the 1846 * inner loop below might add more edges than necessary. Topological ordering 1847 * of vertices satisfies the property that all edges will be from left to 1848 * right i.e., topology[i] can have an edge to topology[j], iff i<j) 1849 * If lock l1 in the dependent set of request is dependent (blocked by) 1850 * on lock l2 in topology but does not have a path to it, we add an edge 1851 * in the inner loop below. 1852 * 1853 * We don't want to add an edge between l1 and l2 if there exists 1854 * already a path from l1 to l2, so care has to be taken for those vertices 1855 * that have two paths to 'request'. These vertices are referred to here 1856 * as barrier locks. 1857 * 1858 * The barriers has to be found (those vertex that originally had two paths 1859 * to request) because otherwise we may end up adding edges unnecessarily 1860 * to vertices in topology, and thus barrier vertices can have an edge 1861 * to a vertex in topology as well a path to it. 1862 */ 1863 1864 static void 1865 flk_recompute_dependencies(lock_descriptor_t *request, 1866 lock_descriptor_t **topology, 1867 int nvertex, int update_graph) 1868 { 1869 lock_descriptor_t *vertex, *lock; 1870 graph_t *gp = request->l_graph; 1871 int i, count; 1872 int barrier_found = 0; 1873 edge_t *ep; 1874 lock_descriptor_t *vertex_stack; 1875 1876 STACK_INIT(vertex_stack); 1877 1878 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1879 if (nvertex == 0) 1880 return; 1881 flk_graph_uncolor(request->l_graph); 1882 barrier_found = flk_find_barriers(request); 1883 request->l_state |= RECOMPUTE_DONE; 1884 1885 STACK_PUSH(vertex_stack, request, l_stack); 1886 request->l_sedge = FIRST_IN(request); 1887 1888 1889 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 1890 if (vertex->l_state & RECOMPUTE_DONE) { 1891 count = 0; 1892 goto next_in_edge; 1893 } 1894 if (IS_BARRIER(vertex)) { 1895 /* decrement the barrier count */ 1896 if (vertex->l_index) { 1897 vertex->l_index--; 1898 /* this guy will be pushed again anyway ? */ 1899 STACK_POP(vertex_stack, l_stack); 1900 if (vertex->l_index == 0) { 1901 /* 1902 * barrier is over we can recompute 1903 * dependencies for this lock in the 1904 * next stack pop 1905 */ 1906 vertex->l_state &= ~BARRIER_LOCK; 1907 } 1908 continue; 1909 } 1910 } 1911 vertex->l_state |= RECOMPUTE_DONE; 1912 flk_graph_uncolor(gp); 1913 count = flk_color_reachables(vertex); 1914 for (i = 0; i < nvertex; i++) { 1915 lock = topology[i]; 1916 if (COLORED(lock)) 1917 continue; 1918 if (BLOCKS(lock, vertex)) { 1919 (void) flk_add_edge(vertex, lock, 1920 NO_CHECK_CYCLE, update_graph); 1921 COLOR(lock); 1922 count++; 1923 count += flk_color_reachables(lock); 1924 } 1925 1926 } 1927 1928 next_in_edge: 1929 if (count == nvertex || 1930 vertex->l_sedge == HEAD(vertex)) { 1931 /* prune the tree below this */ 1932 STACK_POP(vertex_stack, l_stack); 1933 vertex->l_state &= ~RECOMPUTE_DONE; 1934 /* update the barrier locks below this! */ 1935 if (vertex->l_sedge != HEAD(vertex) && barrier_found) { 1936 flk_graph_uncolor(gp); 1937 flk_update_barriers(vertex); 1938 } 1939 continue; 1940 } 1941 1942 ep = vertex->l_sedge; 1943 lock = ep->from_vertex; 1944 STACK_PUSH(vertex_stack, lock, l_stack); 1945 lock->l_sedge = FIRST_IN(lock); 1946 vertex->l_sedge = NEXT_IN(ep); 1947 } 1948 1949 } 1950 1951 /* 1952 * Color all reachable vertices from vertex that belongs to topology (here 1953 * those that have RECOMPUTE_LOCK set in their state) and yet uncolored. 1954 * 1955 * Note: we need to use a different stack_link l_stack1 because this is 1956 * called from flk_recompute_dependencies() that already uses a stack with 1957 * l_stack as stack_link. 1958 */ 1959 1960 static int 1961 flk_color_reachables(lock_descriptor_t *vertex) 1962 { 1963 lock_descriptor_t *ver, *lock; 1964 int count; 1965 edge_t *ep; 1966 lock_descriptor_t *vertex_stack; 1967 1968 STACK_INIT(vertex_stack); 1969 1970 STACK_PUSH(vertex_stack, vertex, l_stack1); 1971 count = 0; 1972 while ((ver = STACK_TOP(vertex_stack)) != NULL) { 1973 1974 STACK_POP(vertex_stack, l_stack1); 1975 for (ep = FIRST_ADJ(ver); ep != HEAD(ver); 1976 ep = NEXT_ADJ(ep)) { 1977 lock = ep->to_vertex; 1978 if (COLORED(lock)) 1979 continue; 1980 COLOR(lock); 1981 if (IS_RECOMPUTE(lock)) 1982 count++; 1983 STACK_PUSH(vertex_stack, lock, l_stack1); 1984 } 1985 1986 } 1987 return (count); 1988 } 1989 1990 /* 1991 * Called from flk_recompute_dependencies() this routine decrements 1992 * the barrier count of barrier vertices that are reachable from lock. 1993 */ 1994 1995 static void 1996 flk_update_barriers(lock_descriptor_t *lock) 1997 { 1998 lock_descriptor_t *vertex, *lck; 1999 edge_t *ep; 2000 lock_descriptor_t *vertex_stack; 2001 2002 STACK_INIT(vertex_stack); 2003 2004 STACK_PUSH(vertex_stack, lock, l_stack1); 2005 2006 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 2007 STACK_POP(vertex_stack, l_stack1); 2008 for (ep = FIRST_IN(vertex); ep != HEAD(vertex); 2009 ep = NEXT_IN(ep)) { 2010 lck = ep->from_vertex; 2011 if (COLORED(lck)) { 2012 if (IS_BARRIER(lck)) { 2013 ASSERT(lck->l_index > 0); 2014 lck->l_index--; 2015 if (lck->l_index == 0) 2016 lck->l_state &= ~BARRIER_LOCK; 2017 } 2018 continue; 2019 } 2020 COLOR(lck); 2021 if (IS_BARRIER(lck)) { 2022 ASSERT(lck->l_index > 0); 2023 lck->l_index--; 2024 if (lck->l_index == 0) 2025 lck->l_state &= ~BARRIER_LOCK; 2026 } 2027 STACK_PUSH(vertex_stack, lck, l_stack1); 2028 } 2029 } 2030 } 2031 2032 /* 2033 * Finds all vertices that are reachable from 'lock' more than once and 2034 * mark them as barrier vertices and increment their barrier count. 2035 * The barrier count is one minus the total number of paths from lock 2036 * to that vertex. 2037 */ 2038 2039 static int 2040 flk_find_barriers(lock_descriptor_t *lock) 2041 { 2042 lock_descriptor_t *vertex, *lck; 2043 int found = 0; 2044 edge_t *ep; 2045 lock_descriptor_t *vertex_stack; 2046 2047 STACK_INIT(vertex_stack); 2048 2049 STACK_PUSH(vertex_stack, lock, l_stack1); 2050 2051 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 2052 STACK_POP(vertex_stack, l_stack1); 2053 for (ep = FIRST_IN(vertex); ep != HEAD(vertex); 2054 ep = NEXT_IN(ep)) { 2055 lck = ep->from_vertex; 2056 if (COLORED(lck)) { 2057 /* this is a barrier */ 2058 lck->l_state |= BARRIER_LOCK; 2059 /* index will have barrier count */ 2060 lck->l_index++; 2061 if (!found) 2062 found = 1; 2063 continue; 2064 } 2065 COLOR(lck); 2066 lck->l_index = 0; 2067 STACK_PUSH(vertex_stack, lck, l_stack1); 2068 } 2069 } 2070 return (found); 2071 } 2072 2073 /* 2074 * Finds the first lock that is mainly responsible for blocking this 2075 * request. If there is no such lock, request->l_flock.l_type is set to 2076 * F_UNLCK. Otherwise, request->l_flock is filled in with the particulars 2077 * of the blocking lock. 2078 * 2079 * Note: It is possible a request is blocked by a sleeping lock because 2080 * of the fairness policy used in flk_process_request() to construct the 2081 * dependencies. (see comments before flk_process_request()). 2082 */ 2083 2084 static void 2085 flk_get_first_blocking_lock(lock_descriptor_t *request) 2086 { 2087 graph_t *gp = request->l_graph; 2088 vnode_t *vp = request->l_vnode; 2089 lock_descriptor_t *lock, *blocker; 2090 2091 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 2092 blocker = NULL; 2093 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 2094 2095 if (lock) { 2096 do { 2097 if (BLOCKS(lock, request)) { 2098 blocker = lock; 2099 break; 2100 } 2101 lock = lock->l_next; 2102 } while (lock->l_vnode == vp); 2103 } 2104 2105 if (blocker) { 2106 report_blocker(blocker, request); 2107 } else 2108 request->l_flock.l_type = F_UNLCK; 2109 } 2110 2111 /* 2112 * Get the graph_t structure associated with a vnode. 2113 * If 'initialize' is non-zero, and the graph_t structure for this vnode has 2114 * not yet been initialized, then a new element is allocated and returned. 2115 */ 2116 graph_t * 2117 flk_get_lock_graph(vnode_t *vp, int initialize) 2118 { 2119 graph_t *gp; 2120 graph_t *gp_alloc = NULL; 2121 int index = HASH_INDEX(vp); 2122 2123 if (initialize == FLK_USE_GRAPH) { 2124 mutex_enter(&flock_lock); 2125 gp = lock_graph[index]; 2126 mutex_exit(&flock_lock); 2127 return (gp); 2128 } 2129 2130 ASSERT(initialize == FLK_INIT_GRAPH); 2131 2132 if (lock_graph[index] == NULL) { 2133 2134 gp_alloc = kmem_zalloc(sizeof (graph_t), KM_SLEEP); 2135 2136 /* Initialize the graph */ 2137 2138 gp_alloc->active_locks.l_next = 2139 gp_alloc->active_locks.l_prev = 2140 (lock_descriptor_t *)ACTIVE_HEAD(gp_alloc); 2141 gp_alloc->sleeping_locks.l_next = 2142 gp_alloc->sleeping_locks.l_prev = 2143 (lock_descriptor_t *)SLEEPING_HEAD(gp_alloc); 2144 gp_alloc->index = index; 2145 mutex_init(&gp_alloc->gp_mutex, NULL, MUTEX_DEFAULT, NULL); 2146 } 2147 2148 mutex_enter(&flock_lock); 2149 2150 gp = lock_graph[index]; 2151 2152 /* Recheck the value within flock_lock */ 2153 if (gp == NULL) { 2154 struct flock_globals *fg; 2155 2156 /* We must have previously allocated the graph_t structure */ 2157 ASSERT(gp_alloc != NULL); 2158 lock_graph[index] = gp = gp_alloc; 2159 /* 2160 * The lockmgr status is only needed if KLM is loaded. 2161 */ 2162 if (flock_zone_key != ZONE_KEY_UNINITIALIZED) { 2163 fg = flk_get_globals(); 2164 fg->lockmgr_status[index] = fg->flk_lockmgr_status; 2165 } 2166 } 2167 2168 mutex_exit(&flock_lock); 2169 2170 if ((gp_alloc != NULL) && (gp != gp_alloc)) { 2171 /* There was a race to allocate the graph_t and we lost */ 2172 mutex_destroy(&gp_alloc->gp_mutex); 2173 kmem_free(gp_alloc, sizeof (graph_t)); 2174 } 2175 2176 return (gp); 2177 } 2178 2179 /* 2180 * PSARC case 1997/292 2181 */ 2182 int 2183 cl_flk_has_remote_locks_for_nlmid(vnode_t *vp, int nlmid) 2184 { 2185 lock_descriptor_t *lock; 2186 int result = 0; 2187 graph_t *gp; 2188 int lock_nlmid; 2189 2190 /* 2191 * Check to see if node is booted as a cluster. If not, return. 2192 */ 2193 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { 2194 return (0); 2195 } 2196 2197 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH); 2198 if (gp == NULL) { 2199 return (0); 2200 } 2201 2202 mutex_enter(&gp->gp_mutex); 2203 2204 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 2205 2206 if (lock) { 2207 while (lock->l_vnode == vp) { 2208 /* get NLM id from sysid */ 2209 lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 2210 2211 /* 2212 * If NLM server request _and_ nlmid of lock matches 2213 * nlmid of argument, then we've found a remote lock. 2214 */ 2215 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { 2216 result = 1; 2217 goto done; 2218 } 2219 lock = lock->l_next; 2220 } 2221 } 2222 2223 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 2224 2225 if (lock) { 2226 while (lock->l_vnode == vp) { 2227 /* get NLM id from sysid */ 2228 lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 2229 2230 /* 2231 * If NLM server request _and_ nlmid of lock matches 2232 * nlmid of argument, then we've found a remote lock. 2233 */ 2234 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { 2235 result = 1; 2236 goto done; 2237 } 2238 lock = lock->l_next; 2239 } 2240 } 2241 2242 done: 2243 mutex_exit(&gp->gp_mutex); 2244 return (result); 2245 } 2246 2247 /* ONC_PLUS EXTRACT START */ 2248 /* 2249 * Determine whether there are any locks for the given vnode with a remote 2250 * sysid. Returns zero if not, non-zero if there are. 2251 * 2252 * Note that the return value from this function is potentially invalid 2253 * once it has been returned. The caller is responsible for providing its 2254 * own synchronization mechanism to ensure that the return value is useful 2255 * (e.g., see nfs_lockcompletion()). 2256 */ 2257 int 2258 flk_has_remote_locks(vnode_t *vp) 2259 { 2260 lock_descriptor_t *lock; 2261 int result = 0; 2262 graph_t *gp; 2263 2264 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH); 2265 if (gp == NULL) { 2266 return (0); 2267 } 2268 2269 mutex_enter(&gp->gp_mutex); 2270 2271 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 2272 2273 if (lock) { 2274 while (lock->l_vnode == vp) { 2275 if (IS_REMOTE(lock)) { 2276 result = 1; 2277 goto done; 2278 } 2279 lock = lock->l_next; 2280 } 2281 } 2282 2283 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 2284 2285 if (lock) { 2286 while (lock->l_vnode == vp) { 2287 if (IS_REMOTE(lock)) { 2288 result = 1; 2289 goto done; 2290 } 2291 lock = lock->l_next; 2292 } 2293 } 2294 2295 done: 2296 mutex_exit(&gp->gp_mutex); 2297 return (result); 2298 } 2299 2300 /* 2301 * Determine if there are any locks owned by the given sysid. 2302 * Returns zero if not, non-zero if there are. Note that this return code 2303 * could be derived from flk_get_{sleeping,active}_locks, but this routine 2304 * avoids all the memory allocations of those routines. 2305 * 2306 * This routine has the same synchronization issues as 2307 * flk_has_remote_locks. 2308 */ 2309 2310 int 2311 flk_sysid_has_locks(int sysid, int lck_type) 2312 { 2313 int has_locks = 0; 2314 lock_descriptor_t *lock; 2315 graph_t *gp; 2316 int i; 2317 2318 for (i = 0; i < HASH_SIZE && !has_locks; i++) { 2319 mutex_enter(&flock_lock); 2320 gp = lock_graph[i]; 2321 mutex_exit(&flock_lock); 2322 if (gp == NULL) { 2323 continue; 2324 } 2325 2326 mutex_enter(&gp->gp_mutex); 2327 2328 if (lck_type & FLK_QUERY_ACTIVE) { 2329 for (lock = ACTIVE_HEAD(gp)->l_next; 2330 lock != ACTIVE_HEAD(gp) && !has_locks; 2331 lock = lock->l_next) { 2332 if (lock->l_flock.l_sysid == sysid) 2333 has_locks = 1; 2334 } 2335 } 2336 2337 if (lck_type & FLK_QUERY_SLEEPING) { 2338 for (lock = SLEEPING_HEAD(gp)->l_next; 2339 lock != SLEEPING_HEAD(gp) && !has_locks; 2340 lock = lock->l_next) { 2341 if (lock->l_flock.l_sysid == sysid) 2342 has_locks = 1; 2343 } 2344 } 2345 mutex_exit(&gp->gp_mutex); 2346 } 2347 2348 return (has_locks); 2349 } 2350 2351 2352 /* 2353 * PSARC case 1997/292 2354 * 2355 * Requires: "sysid" is a pair [nlmid, sysid]. The lower half is 16-bit 2356 * quantity, the real sysid generated by the NLM server; the upper half 2357 * identifies the node of the cluster where the NLM server ran. 2358 * This routine is only called by an NLM server running in a cluster. 2359 * Effects: Remove all locks held on behalf of the client identified 2360 * by "sysid." 2361 */ 2362 void 2363 cl_flk_remove_locks_by_sysid(int sysid) 2364 { 2365 graph_t *gp; 2366 int i; 2367 lock_descriptor_t *lock, *nlock; 2368 2369 /* 2370 * Check to see if node is booted as a cluster. If not, return. 2371 */ 2372 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { 2373 return; 2374 } 2375 2376 ASSERT(sysid != 0); 2377 for (i = 0; i < HASH_SIZE; i++) { 2378 mutex_enter(&flock_lock); 2379 gp = lock_graph[i]; 2380 mutex_exit(&flock_lock); 2381 2382 if (gp == NULL) 2383 continue; 2384 2385 mutex_enter(&gp->gp_mutex); /* get mutex on lock graph */ 2386 2387 /* signal sleeping requests so that they bail out */ 2388 lock = SLEEPING_HEAD(gp)->l_next; 2389 while (lock != SLEEPING_HEAD(gp)) { 2390 nlock = lock->l_next; 2391 if (lock->l_flock.l_sysid == sysid) { 2392 INTERRUPT_WAKEUP(lock); 2393 } 2394 lock = nlock; 2395 } 2396 2397 /* delete active locks */ 2398 lock = ACTIVE_HEAD(gp)->l_next; 2399 while (lock != ACTIVE_HEAD(gp)) { 2400 nlock = lock->l_next; 2401 if (lock->l_flock.l_sysid == sysid) { 2402 flk_delete_active_lock(lock, 0); 2403 flk_wakeup(lock, 1); 2404 flk_free_lock(lock); 2405 } 2406 lock = nlock; 2407 } 2408 mutex_exit(&gp->gp_mutex); /* release mutex on lock graph */ 2409 } 2410 } 2411 2412 /* 2413 * Delete all locks in the system that belongs to the sysid of the request. 2414 */ 2415 2416 static void 2417 flk_delete_locks_by_sysid(lock_descriptor_t *request) 2418 { 2419 int sysid = request->l_flock.l_sysid; 2420 lock_descriptor_t *lock, *nlock; 2421 graph_t *gp; 2422 int i; 2423 2424 ASSERT(MUTEX_HELD(&request->l_graph->gp_mutex)); 2425 ASSERT(sysid != 0); 2426 2427 mutex_exit(&request->l_graph->gp_mutex); 2428 2429 for (i = 0; i < HASH_SIZE; i++) { 2430 mutex_enter(&flock_lock); 2431 gp = lock_graph[i]; 2432 mutex_exit(&flock_lock); 2433 2434 if (gp == NULL) 2435 continue; 2436 2437 mutex_enter(&gp->gp_mutex); 2438 2439 /* signal sleeping requests so that they bail out */ 2440 lock = SLEEPING_HEAD(gp)->l_next; 2441 while (lock != SLEEPING_HEAD(gp)) { 2442 nlock = lock->l_next; 2443 if (lock->l_flock.l_sysid == sysid) { 2444 INTERRUPT_WAKEUP(lock); 2445 } 2446 lock = nlock; 2447 } 2448 2449 /* delete active locks */ 2450 lock = ACTIVE_HEAD(gp)->l_next; 2451 while (lock != ACTIVE_HEAD(gp)) { 2452 nlock = lock->l_next; 2453 if (lock->l_flock.l_sysid == sysid) { 2454 flk_delete_active_lock(lock, 0); 2455 flk_wakeup(lock, 1); 2456 flk_free_lock(lock); 2457 } 2458 lock = nlock; 2459 } 2460 mutex_exit(&gp->gp_mutex); 2461 } 2462 2463 mutex_enter(&request->l_graph->gp_mutex); 2464 } 2465 2466 /* 2467 * Clustering: Deletes PXFS locks 2468 * Effects: Delete all locks on files in the given file system and with the 2469 * given PXFS id. 2470 */ 2471 void 2472 cl_flk_delete_pxfs_locks(struct vfs *vfsp, int pxfsid) 2473 { 2474 lock_descriptor_t *lock, *nlock; 2475 graph_t *gp; 2476 int i; 2477 2478 for (i = 0; i < HASH_SIZE; i++) { 2479 mutex_enter(&flock_lock); 2480 gp = lock_graph[i]; 2481 mutex_exit(&flock_lock); 2482 2483 if (gp == NULL) 2484 continue; 2485 2486 mutex_enter(&gp->gp_mutex); 2487 2488 /* signal sleeping requests so that they bail out */ 2489 lock = SLEEPING_HEAD(gp)->l_next; 2490 while (lock != SLEEPING_HEAD(gp)) { 2491 nlock = lock->l_next; 2492 if (lock->l_vnode->v_vfsp == vfsp) { 2493 ASSERT(IS_PXFS(lock)); 2494 if (GETPXFSID(lock->l_flock.l_sysid) == 2495 pxfsid) { 2496 flk_set_state(lock, 2497 FLK_CANCELLED_STATE); 2498 flk_cancel_sleeping_lock(lock, 1); 2499 } 2500 } 2501 lock = nlock; 2502 } 2503 2504 /* delete active locks */ 2505 lock = ACTIVE_HEAD(gp)->l_next; 2506 while (lock != ACTIVE_HEAD(gp)) { 2507 nlock = lock->l_next; 2508 if (lock->l_vnode->v_vfsp == vfsp) { 2509 ASSERT(IS_PXFS(lock)); 2510 if (GETPXFSID(lock->l_flock.l_sysid) == 2511 pxfsid) { 2512 flk_delete_active_lock(lock, 0); 2513 flk_wakeup(lock, 1); 2514 flk_free_lock(lock); 2515 } 2516 } 2517 lock = nlock; 2518 } 2519 mutex_exit(&gp->gp_mutex); 2520 } 2521 } 2522 2523 /* 2524 * Search for a sleeping lock manager lock which matches exactly this lock 2525 * request; if one is found, fake a signal to cancel it. 2526 * 2527 * Return 1 if a matching lock was found, 0 otherwise. 2528 */ 2529 2530 static int 2531 flk_canceled(lock_descriptor_t *request) 2532 { 2533 lock_descriptor_t *lock, *nlock; 2534 graph_t *gp = request->l_graph; 2535 vnode_t *vp = request->l_vnode; 2536 2537 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 2538 ASSERT(IS_LOCKMGR(request)); 2539 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 2540 2541 if (lock) { 2542 while (lock->l_vnode == vp) { 2543 nlock = lock->l_next; 2544 if (SAME_OWNER(lock, request) && 2545 lock->l_start == request->l_start && 2546 lock->l_end == request->l_end) { 2547 INTERRUPT_WAKEUP(lock); 2548 return (1); 2549 } 2550 lock = nlock; 2551 } 2552 } 2553 return (0); 2554 } 2555 2556 /* 2557 * Remove all the locks for the vnode belonging to the given pid and sysid. 2558 */ 2559 2560 void 2561 cleanlocks(vnode_t *vp, pid_t pid, int sysid) 2562 { 2563 graph_t *gp; 2564 lock_descriptor_t *lock, *nlock; 2565 lock_descriptor_t *link_stack; 2566 2567 STACK_INIT(link_stack); 2568 2569 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH); 2570 2571 if (gp == NULL) 2572 return; 2573 mutex_enter(&gp->gp_mutex); 2574 2575 CHECK_SLEEPING_LOCKS(gp); 2576 CHECK_ACTIVE_LOCKS(gp); 2577 2578 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 2579 2580 if (lock) { 2581 do { 2582 nlock = lock->l_next; 2583 if ((lock->l_flock.l_pid == pid || 2584 pid == IGN_PID) && 2585 lock->l_flock.l_sysid == sysid) { 2586 CANCEL_WAKEUP(lock); 2587 } 2588 lock = nlock; 2589 } while (lock->l_vnode == vp); 2590 } 2591 2592 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 2593 2594 if (lock) { 2595 do { 2596 nlock = lock->l_next; 2597 if ((lock->l_flock.l_pid == pid || 2598 pid == IGN_PID) && 2599 lock->l_flock.l_sysid == sysid) { 2600 flk_delete_active_lock(lock, 0); 2601 STACK_PUSH(link_stack, lock, l_stack); 2602 } 2603 lock = nlock; 2604 } while (lock->l_vnode == vp); 2605 } 2606 2607 while ((lock = STACK_TOP(link_stack)) != NULL) { 2608 STACK_POP(link_stack, l_stack); 2609 flk_wakeup(lock, 1); 2610 flk_free_lock(lock); 2611 } 2612 2613 CHECK_SLEEPING_LOCKS(gp); 2614 CHECK_ACTIVE_LOCKS(gp); 2615 CHECK_OWNER_LOCKS(gp, pid, sysid, vp); 2616 mutex_exit(&gp->gp_mutex); 2617 } 2618 /* ONC_PLUS EXTRACT END */ 2619 2620 2621 /* 2622 * Called from 'fs' read and write routines for files that have mandatory 2623 * locking enabled. 2624 */ 2625 2626 int 2627 chklock( 2628 struct vnode *vp, 2629 int iomode, 2630 u_offset_t offset, 2631 ssize_t len, 2632 int fmode, 2633 caller_context_t *ct) 2634 { 2635 register int i; 2636 struct flock64 bf; 2637 int error = 0; 2638 2639 bf.l_type = (iomode & FWRITE) ? F_WRLCK : F_RDLCK; 2640 bf.l_whence = 0; 2641 bf.l_start = offset; 2642 bf.l_len = len; 2643 if (ct == NULL) { 2644 bf.l_pid = curproc->p_pid; 2645 bf.l_sysid = 0; 2646 } else { 2647 bf.l_pid = ct->cc_pid; 2648 bf.l_sysid = ct->cc_sysid; 2649 } 2650 i = (fmode & (FNDELAY|FNONBLOCK)) ? INOFLCK : INOFLCK|SLPFLCK; 2651 if ((i = reclock(vp, &bf, i, 0, offset, NULL)) != 0 || 2652 bf.l_type != F_UNLCK) 2653 error = i ? i : EAGAIN; 2654 return (error); 2655 } 2656 2657 /* ONC_PLUS EXTRACT START */ 2658 /* 2659 * convoff - converts the given data (start, whence) to the 2660 * given whence. 2661 */ 2662 int 2663 convoff(vp, lckdat, whence, offset) 2664 struct vnode *vp; 2665 struct flock64 *lckdat; 2666 int whence; 2667 offset_t offset; 2668 { 2669 int error; 2670 struct vattr vattr; 2671 2672 if ((lckdat->l_whence == 2) || (whence == 2)) { 2673 vattr.va_mask = AT_SIZE; 2674 if (error = VOP_GETATTR(vp, &vattr, 0, CRED())) 2675 return (error); 2676 } 2677 2678 switch (lckdat->l_whence) { 2679 case 1: 2680 lckdat->l_start += offset; 2681 break; 2682 case 2: 2683 lckdat->l_start += vattr.va_size; 2684 /* FALLTHRU */ 2685 case 0: 2686 break; 2687 default: 2688 return (EINVAL); 2689 } 2690 2691 if (lckdat->l_start < 0) 2692 return (EINVAL); 2693 2694 switch (whence) { 2695 case 1: 2696 lckdat->l_start -= offset; 2697 break; 2698 case 2: 2699 lckdat->l_start -= vattr.va_size; 2700 /* FALLTHRU */ 2701 case 0: 2702 break; 2703 default: 2704 return (EINVAL); 2705 } 2706 2707 lckdat->l_whence = (short)whence; 2708 return (0); 2709 } 2710 /* ONC_PLUS EXTRACT END */ 2711 2712 2713 /* proc_graph function definitions */ 2714 2715 /* 2716 * Function checks for deadlock due to the new 'lock'. If deadlock found 2717 * edges of this lock are freed and returned. 2718 */ 2719 2720 static int 2721 flk_check_deadlock(lock_descriptor_t *lock) 2722 { 2723 proc_vertex_t *start_vertex, *pvertex; 2724 proc_vertex_t *dvertex; 2725 proc_edge_t *pep, *ppep; 2726 edge_t *ep, *nep; 2727 proc_vertex_t *process_stack; 2728 2729 STACK_INIT(process_stack); 2730 2731 mutex_enter(&flock_lock); 2732 start_vertex = flk_get_proc_vertex(lock); 2733 ASSERT(start_vertex != NULL); 2734 2735 /* construct the edges from this process to other processes */ 2736 2737 ep = FIRST_ADJ(lock); 2738 while (ep != HEAD(lock)) { 2739 proc_vertex_t *adj_proc; 2740 2741 adj_proc = flk_get_proc_vertex(ep->to_vertex); 2742 for (pep = start_vertex->edge; pep != NULL; pep = pep->next) { 2743 if (pep->to_proc == adj_proc) { 2744 ASSERT(pep->refcount); 2745 pep->refcount++; 2746 break; 2747 } 2748 } 2749 if (pep == NULL) { 2750 pep = flk_get_proc_edge(); 2751 pep->to_proc = adj_proc; 2752 pep->refcount = 1; 2753 adj_proc->incount++; 2754 pep->next = start_vertex->edge; 2755 start_vertex->edge = pep; 2756 } 2757 ep = NEXT_ADJ(ep); 2758 } 2759 2760 ep = FIRST_IN(lock); 2761 2762 while (ep != HEAD(lock)) { 2763 proc_vertex_t *in_proc; 2764 2765 in_proc = flk_get_proc_vertex(ep->from_vertex); 2766 2767 for (pep = in_proc->edge; pep != NULL; pep = pep->next) { 2768 if (pep->to_proc == start_vertex) { 2769 ASSERT(pep->refcount); 2770 pep->refcount++; 2771 break; 2772 } 2773 } 2774 if (pep == NULL) { 2775 pep = flk_get_proc_edge(); 2776 pep->to_proc = start_vertex; 2777 pep->refcount = 1; 2778 start_vertex->incount++; 2779 pep->next = in_proc->edge; 2780 in_proc->edge = pep; 2781 } 2782 ep = NEXT_IN(ep); 2783 } 2784 2785 if (start_vertex->incount == 0) { 2786 mutex_exit(&flock_lock); 2787 return (0); 2788 } 2789 2790 flk_proc_graph_uncolor(); 2791 2792 start_vertex->p_sedge = start_vertex->edge; 2793 2794 STACK_PUSH(process_stack, start_vertex, p_stack); 2795 2796 while ((pvertex = STACK_TOP(process_stack)) != NULL) { 2797 for (pep = pvertex->p_sedge; pep != NULL; pep = pep->next) { 2798 dvertex = pep->to_proc; 2799 if (!PROC_ARRIVED(dvertex)) { 2800 STACK_PUSH(process_stack, dvertex, p_stack); 2801 dvertex->p_sedge = dvertex->edge; 2802 PROC_ARRIVE(pvertex); 2803 pvertex->p_sedge = pep->next; 2804 break; 2805 } 2806 if (!PROC_DEPARTED(dvertex)) 2807 goto deadlock; 2808 } 2809 if (pep == NULL) { 2810 PROC_DEPART(pvertex); 2811 STACK_POP(process_stack, p_stack); 2812 } 2813 } 2814 mutex_exit(&flock_lock); 2815 return (0); 2816 2817 deadlock: 2818 2819 /* we remove all lock edges and proc edges */ 2820 2821 ep = FIRST_ADJ(lock); 2822 while (ep != HEAD(lock)) { 2823 proc_vertex_t *adj_proc; 2824 adj_proc = flk_get_proc_vertex(ep->to_vertex); 2825 nep = NEXT_ADJ(ep); 2826 IN_LIST_REMOVE(ep); 2827 ADJ_LIST_REMOVE(ep); 2828 flk_free_edge(ep); 2829 ppep = start_vertex->edge; 2830 for (pep = start_vertex->edge; pep != NULL; ppep = pep, 2831 pep = ppep->next) { 2832 if (pep->to_proc == adj_proc) { 2833 pep->refcount--; 2834 if (pep->refcount == 0) { 2835 if (pep == ppep) { 2836 start_vertex->edge = pep->next; 2837 } else { 2838 ppep->next = pep->next; 2839 } 2840 adj_proc->incount--; 2841 flk_proc_release(adj_proc); 2842 flk_free_proc_edge(pep); 2843 } 2844 break; 2845 } 2846 } 2847 ep = nep; 2848 } 2849 ep = FIRST_IN(lock); 2850 while (ep != HEAD(lock)) { 2851 proc_vertex_t *in_proc; 2852 in_proc = flk_get_proc_vertex(ep->from_vertex); 2853 nep = NEXT_IN(ep); 2854 IN_LIST_REMOVE(ep); 2855 ADJ_LIST_REMOVE(ep); 2856 flk_free_edge(ep); 2857 ppep = in_proc->edge; 2858 for (pep = in_proc->edge; pep != NULL; ppep = pep, 2859 pep = ppep->next) { 2860 if (pep->to_proc == start_vertex) { 2861 pep->refcount--; 2862 if (pep->refcount == 0) { 2863 if (pep == ppep) { 2864 in_proc->edge = pep->next; 2865 } else { 2866 ppep->next = pep->next; 2867 } 2868 start_vertex->incount--; 2869 flk_proc_release(in_proc); 2870 flk_free_proc_edge(pep); 2871 } 2872 break; 2873 } 2874 } 2875 ep = nep; 2876 } 2877 flk_proc_release(start_vertex); 2878 mutex_exit(&flock_lock); 2879 return (1); 2880 } 2881 2882 /* 2883 * Get a proc vertex. If lock's pvertex value gets a correct proc vertex 2884 * from the list we return that, otherwise we allocate one. If necessary, 2885 * we grow the list of vertices also. 2886 */ 2887 2888 static proc_vertex_t * 2889 flk_get_proc_vertex(lock_descriptor_t *lock) 2890 { 2891 int i; 2892 proc_vertex_t *pv; 2893 proc_vertex_t **palloc; 2894 2895 ASSERT(MUTEX_HELD(&flock_lock)); 2896 if (lock->pvertex != -1) { 2897 ASSERT(lock->pvertex >= 0); 2898 pv = pgraph.proc[lock->pvertex]; 2899 if (pv != NULL && PROC_SAME_OWNER(lock, pv)) { 2900 return (pv); 2901 } 2902 } 2903 for (i = 0; i < pgraph.gcount; i++) { 2904 pv = pgraph.proc[i]; 2905 if (pv != NULL && PROC_SAME_OWNER(lock, pv)) { 2906 lock->pvertex = pv->index = i; 2907 return (pv); 2908 } 2909 } 2910 pv = kmem_zalloc(sizeof (struct proc_vertex), KM_SLEEP); 2911 pv->pid = lock->l_flock.l_pid; 2912 pv->sysid = lock->l_flock.l_sysid; 2913 flk_proc_vertex_allocs++; 2914 if (pgraph.free != 0) { 2915 for (i = 0; i < pgraph.gcount; i++) { 2916 if (pgraph.proc[i] == NULL) { 2917 pgraph.proc[i] = pv; 2918 lock->pvertex = pv->index = i; 2919 pgraph.free--; 2920 return (pv); 2921 } 2922 } 2923 } 2924 palloc = kmem_zalloc((pgraph.gcount + PROC_CHUNK) * 2925 sizeof (proc_vertex_t *), KM_SLEEP); 2926 2927 if (pgraph.proc) { 2928 bcopy(pgraph.proc, palloc, 2929 pgraph.gcount * sizeof (proc_vertex_t *)); 2930 2931 kmem_free(pgraph.proc, 2932 pgraph.gcount * sizeof (proc_vertex_t *)); 2933 } 2934 pgraph.proc = palloc; 2935 pgraph.free += (PROC_CHUNK - 1); 2936 pv->index = lock->pvertex = pgraph.gcount; 2937 pgraph.gcount += PROC_CHUNK; 2938 pgraph.proc[pv->index] = pv; 2939 return (pv); 2940 } 2941 2942 /* 2943 * Allocate a proc edge. 2944 */ 2945 2946 static proc_edge_t * 2947 flk_get_proc_edge() 2948 { 2949 proc_edge_t *pep; 2950 2951 pep = kmem_zalloc(sizeof (proc_edge_t), KM_SLEEP); 2952 flk_proc_edge_allocs++; 2953 return (pep); 2954 } 2955 2956 /* 2957 * Free the proc edge. Called whenever its reference count goes to zero. 2958 */ 2959 2960 static void 2961 flk_free_proc_edge(proc_edge_t *pep) 2962 { 2963 ASSERT(pep->refcount == 0); 2964 kmem_free((void *)pep, sizeof (proc_edge_t)); 2965 flk_proc_edge_frees++; 2966 } 2967 2968 /* 2969 * Color the graph explicitly done only when the mark value hits max value. 2970 */ 2971 2972 static void 2973 flk_proc_graph_uncolor() 2974 { 2975 int i; 2976 2977 if (pgraph.mark == UINT_MAX) { 2978 for (i = 0; i < pgraph.gcount; i++) 2979 if (pgraph.proc[i] != NULL) { 2980 pgraph.proc[i]->atime = 0; 2981 pgraph.proc[i]->dtime = 0; 2982 } 2983 pgraph.mark = 1; 2984 } else { 2985 pgraph.mark++; 2986 } 2987 } 2988 2989 /* 2990 * Release the proc vertex iff both there are no in edges and out edges 2991 */ 2992 2993 static void 2994 flk_proc_release(proc_vertex_t *proc) 2995 { 2996 ASSERT(MUTEX_HELD(&flock_lock)); 2997 if (proc->edge == NULL && proc->incount == 0) { 2998 pgraph.proc[proc->index] = NULL; 2999 pgraph.free++; 3000 kmem_free(proc, sizeof (proc_vertex_t)); 3001 flk_proc_vertex_frees++; 3002 } 3003 } 3004 3005 /* 3006 * Updates process graph to reflect change in a lock_graph. 3007 * Note: We should call this function only after we have a correctly 3008 * recomputed lock graph. Otherwise we might miss a deadlock detection. 3009 * eg: in function flk_relation() we call this function after flk_recompute_ 3010 * dependencies() otherwise if a process tries to lock a vnode hashed 3011 * into another graph it might sleep for ever. 3012 */ 3013 3014 static void 3015 flk_update_proc_graph(edge_t *ep, int delete) 3016 { 3017 proc_vertex_t *toproc, *fromproc; 3018 proc_edge_t *pep, *prevpep; 3019 3020 mutex_enter(&flock_lock); 3021 toproc = flk_get_proc_vertex(ep->to_vertex); 3022 fromproc = flk_get_proc_vertex(ep->from_vertex); 3023 3024 if (!delete) 3025 goto add; 3026 pep = prevpep = fromproc->edge; 3027 3028 ASSERT(pep != NULL); 3029 while (pep != NULL) { 3030 if (pep->to_proc == toproc) { 3031 ASSERT(pep->refcount > 0); 3032 pep->refcount--; 3033 if (pep->refcount == 0) { 3034 if (pep == prevpep) { 3035 fromproc->edge = pep->next; 3036 } else { 3037 prevpep->next = pep->next; 3038 } 3039 toproc->incount--; 3040 flk_proc_release(toproc); 3041 flk_free_proc_edge(pep); 3042 } 3043 break; 3044 } 3045 prevpep = pep; 3046 pep = pep->next; 3047 } 3048 flk_proc_release(fromproc); 3049 mutex_exit(&flock_lock); 3050 return; 3051 add: 3052 3053 pep = fromproc->edge; 3054 3055 while (pep != NULL) { 3056 if (pep->to_proc == toproc) { 3057 ASSERT(pep->refcount > 0); 3058 pep->refcount++; 3059 break; 3060 } 3061 pep = pep->next; 3062 } 3063 if (pep == NULL) { 3064 pep = flk_get_proc_edge(); 3065 pep->to_proc = toproc; 3066 pep->refcount = 1; 3067 toproc->incount++; 3068 pep->next = fromproc->edge; 3069 fromproc->edge = pep; 3070 } 3071 mutex_exit(&flock_lock); 3072 } 3073 3074 /* ONC_PLUS EXTRACT START */ 3075 /* 3076 * Set the control status for lock manager requests. 3077 * 3078 */ 3079 3080 /* 3081 * PSARC case 1997/292 3082 * 3083 * Requires: "nlmid" must be >= 1 and <= clconf_maximum_nodeid(). 3084 * Effects: Set the state of the NLM server identified by "nlmid" 3085 * in the NLM registry to state "nlm_state." 3086 * Raises exception no_such_nlm if "nlmid" doesn't identify a known 3087 * NLM server to this LLM. 3088 * Note that when this routine is called with NLM_SHUTTING_DOWN there 3089 * may be locks requests that have gotten started but not finished. In 3090 * particular, there may be blocking requests that are in the callback code 3091 * before sleeping (so they're not holding the lock for the graph). If 3092 * such a thread reacquires the graph's lock (to go to sleep) after 3093 * NLM state in the NLM registry is set to a non-up value, 3094 * it will notice the status and bail out. If the request gets 3095 * granted before the thread can check the NLM registry, let it 3096 * continue normally. It will get flushed when we are called with NLM_DOWN. 3097 * 3098 * Modifies: nlm_reg_obj (global) 3099 * Arguments: 3100 * nlmid (IN): id uniquely identifying an NLM server 3101 * nlm_state (IN): NLM server state to change "nlmid" to 3102 */ 3103 void 3104 cl_flk_set_nlm_status(int nlmid, flk_nlm_status_t nlm_state) 3105 { 3106 /* 3107 * Check to see if node is booted as a cluster. If not, return. 3108 */ 3109 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { 3110 return; 3111 } 3112 3113 /* 3114 * Check for development/debugging. It is possible to boot a node 3115 * in non-cluster mode, and then run a special script, currently 3116 * available only to developers, to bring up the node as part of a 3117 * cluster. The problem is that running such a script does not 3118 * result in the routine flk_init() being called and hence global array 3119 * nlm_reg_status is NULL. The NLM thinks it's in cluster mode, 3120 * but the LLM needs to do an additional check to see if the global 3121 * array has been created or not. If nlm_reg_status is NULL, then 3122 * return, else continue. 3123 */ 3124 if (nlm_reg_status == NULL) { 3125 return; 3126 } 3127 3128 ASSERT(nlmid <= nlm_status_size && nlmid >= 0); 3129 mutex_enter(&nlm_reg_lock); 3130 3131 if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status, nlmid)) { 3132 /* 3133 * If the NLM server "nlmid" is unknown in the NLM registry, 3134 * add it to the registry in the nlm shutting down state. 3135 */ 3136 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, 3137 FLK_NLM_SHUTTING_DOWN); 3138 } else { 3139 /* 3140 * Change the state of the NLM server identified by "nlmid" 3141 * in the NLM registry to the argument "nlm_state." 3142 */ 3143 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, 3144 nlm_state); 3145 } 3146 3147 /* 3148 * The reason we must register the NLM server that is shutting down 3149 * with an LLM that doesn't already know about it (never sent a lock 3150 * request) is to handle correctly a race between shutdown and a new 3151 * lock request. Suppose that a shutdown request from the NLM server 3152 * invokes this routine at the LLM, and a thread is spawned to 3153 * service the request. Now suppose a new lock request is in 3154 * progress and has already passed the first line of defense in 3155 * reclock(), which denies new locks requests from NLM servers 3156 * that are not in the NLM_UP state. After the current routine 3157 * is invoked for both phases of shutdown, the routine will return, 3158 * having done nothing, and the lock request will proceed and 3159 * probably be granted. The problem is that the shutdown was ignored 3160 * by the lock request because there was no record of that NLM server 3161 * shutting down. We will be in the peculiar position of thinking 3162 * that we've shutdown the NLM server and all locks at all LLMs have 3163 * been discarded, but in fact there's still one lock held. 3164 * The solution is to record the existence of NLM server and change 3165 * its state immediately to NLM_SHUTTING_DOWN. The lock request in 3166 * progress may proceed because the next phase NLM_DOWN will catch 3167 * this lock and discard it. 3168 */ 3169 mutex_exit(&nlm_reg_lock); 3170 3171 switch (nlm_state) { 3172 case FLK_NLM_UP: 3173 /* 3174 * Change the NLM state of all locks still held on behalf of 3175 * the NLM server identified by "nlmid" to NLM_UP. 3176 */ 3177 cl_flk_change_nlm_state_all_locks(nlmid, FLK_NLM_UP); 3178 break; 3179 3180 case FLK_NLM_SHUTTING_DOWN: 3181 /* 3182 * Wake up all sleeping locks for the NLM server identified 3183 * by "nlmid." Note that eventually all woken threads will 3184 * have their lock requests cancelled and descriptors 3185 * removed from the sleeping lock list. Note that the NLM 3186 * server state associated with each lock descriptor is 3187 * changed to FLK_NLM_SHUTTING_DOWN. 3188 */ 3189 cl_flk_wakeup_sleeping_nlm_locks(nlmid); 3190 break; 3191 3192 case FLK_NLM_DOWN: 3193 /* 3194 * Discard all active, granted locks for this NLM server 3195 * identified by "nlmid." 3196 */ 3197 cl_flk_unlock_nlm_granted(nlmid); 3198 break; 3199 3200 default: 3201 panic("cl_set_nlm_status: bad status (%d)", nlm_state); 3202 } 3203 } 3204 3205 /* 3206 * Set the control status for lock manager requests. 3207 * 3208 * Note that when this routine is called with FLK_WAKEUP_SLEEPERS, there 3209 * may be locks requests that have gotten started but not finished. In 3210 * particular, there may be blocking requests that are in the callback code 3211 * before sleeping (so they're not holding the lock for the graph). If 3212 * such a thread reacquires the graph's lock (to go to sleep) after 3213 * flk_lockmgr_status is set to a non-up value, it will notice the status 3214 * and bail out. If the request gets granted before the thread can check 3215 * flk_lockmgr_status, let it continue normally. It will get flushed when 3216 * we are called with FLK_LOCKMGR_DOWN. 3217 */ 3218 3219 void 3220 flk_set_lockmgr_status(flk_lockmgr_status_t status) 3221 { 3222 int i; 3223 graph_t *gp; 3224 struct flock_globals *fg; 3225 3226 fg = flk_get_globals(); 3227 ASSERT(fg != NULL); 3228 3229 mutex_enter(&flock_lock); 3230 fg->flk_lockmgr_status = status; 3231 mutex_exit(&flock_lock); 3232 3233 /* 3234 * If the lock manager is coming back up, all that's needed is to 3235 * propagate this information to the graphs. If the lock manager 3236 * is going down, additional action is required, and each graph's 3237 * copy of the state is updated atomically with this other action. 3238 */ 3239 switch (status) { 3240 case FLK_LOCKMGR_UP: 3241 for (i = 0; i < HASH_SIZE; i++) { 3242 mutex_enter(&flock_lock); 3243 gp = lock_graph[i]; 3244 mutex_exit(&flock_lock); 3245 if (gp == NULL) 3246 continue; 3247 mutex_enter(&gp->gp_mutex); 3248 fg->lockmgr_status[i] = status; 3249 mutex_exit(&gp->gp_mutex); 3250 } 3251 break; 3252 case FLK_WAKEUP_SLEEPERS: 3253 wakeup_sleeping_lockmgr_locks(fg); 3254 break; 3255 case FLK_LOCKMGR_DOWN: 3256 unlock_lockmgr_granted(fg); 3257 break; 3258 default: 3259 panic("flk_set_lockmgr_status: bad status (%d)", status); 3260 break; 3261 } 3262 } 3263 3264 /* 3265 * This routine returns all the locks that are active or sleeping and are 3266 * associated with a particular set of identifiers. If lock_state != 0, then 3267 * only locks that match the lock_state are returned. If lock_state == 0, then 3268 * all locks are returned. If pid == NOPID, the pid is ignored. If 3269 * use_sysid is FALSE, then the sysid is ignored. If vp is NULL, then the 3270 * vnode pointer is ignored. 3271 * 3272 * A list containing the vnode pointer and an flock structure 3273 * describing the lock is returned. Each element in the list is 3274 * dynammically allocated and must be freed by the caller. The 3275 * last item in the list is denoted by a NULL value in the ll_next 3276 * field. 3277 * 3278 * The vnode pointers returned are held. The caller is responsible 3279 * for releasing these. Note that the returned list is only a snapshot of 3280 * the current lock information, and that it is a snapshot of a moving 3281 * target (only one graph is locked at a time). 3282 */ 3283 3284 locklist_t * 3285 get_lock_list(int list_type, int lock_state, int sysid, boolean_t use_sysid, 3286 pid_t pid, const vnode_t *vp, zoneid_t zoneid) 3287 { 3288 lock_descriptor_t *lock; 3289 lock_descriptor_t *graph_head; 3290 locklist_t listhead; 3291 locklist_t *llheadp; 3292 locklist_t *llp; 3293 locklist_t *lltp; 3294 graph_t *gp; 3295 int i; 3296 int first_index; /* graph index */ 3297 int num_indexes; /* graph index */ 3298 3299 ASSERT((list_type == FLK_ACTIVE_STATE) || 3300 (list_type == FLK_SLEEPING_STATE)); 3301 3302 /* 3303 * Get a pointer to something to use as a list head while building 3304 * the rest of the list. 3305 */ 3306 llheadp = &listhead; 3307 lltp = llheadp; 3308 llheadp->ll_next = (locklist_t *)NULL; 3309 3310 /* Figure out which graphs we want to look at. */ 3311 if (vp == NULL) { 3312 first_index = 0; 3313 num_indexes = HASH_SIZE; 3314 } else { 3315 first_index = HASH_INDEX(vp); 3316 num_indexes = 1; 3317 } 3318 3319 for (i = first_index; i < first_index + num_indexes; i++) { 3320 mutex_enter(&flock_lock); 3321 gp = lock_graph[i]; 3322 mutex_exit(&flock_lock); 3323 if (gp == NULL) { 3324 continue; 3325 } 3326 3327 mutex_enter(&gp->gp_mutex); 3328 graph_head = (list_type == FLK_ACTIVE_STATE) ? 3329 ACTIVE_HEAD(gp) : SLEEPING_HEAD(gp); 3330 for (lock = graph_head->l_next; 3331 lock != graph_head; 3332 lock = lock->l_next) { 3333 if (use_sysid && lock->l_flock.l_sysid != sysid) 3334 continue; 3335 if (pid != NOPID && lock->l_flock.l_pid != pid) 3336 continue; 3337 if (vp != NULL && lock->l_vnode != vp) 3338 continue; 3339 if (lock_state && !(lock_state & lock->l_state)) 3340 continue; 3341 if (zoneid != lock->l_zoneid && zoneid != ALL_ZONES) 3342 continue; 3343 /* 3344 * A matching lock was found. Allocate 3345 * space for a new locklist entry and fill 3346 * it in. 3347 */ 3348 llp = kmem_alloc(sizeof (locklist_t), KM_SLEEP); 3349 lltp->ll_next = llp; 3350 VN_HOLD(lock->l_vnode); 3351 llp->ll_vp = lock->l_vnode; 3352 create_flock(lock, &(llp->ll_flock)); 3353 llp->ll_next = (locklist_t *)NULL; 3354 lltp = llp; 3355 } 3356 mutex_exit(&gp->gp_mutex); 3357 } 3358 3359 llp = llheadp->ll_next; 3360 return (llp); 3361 } 3362 3363 /* 3364 * These two functions are simply interfaces to get_lock_list. They return 3365 * a list of sleeping or active locks for the given sysid and pid. See 3366 * get_lock_list for details. 3367 * 3368 * In either case we don't particularly care to specify the zone of interest; 3369 * the sysid-space is global across zones, so the sysid will map to exactly one 3370 * zone, and we'll return information for that zone. 3371 */ 3372 3373 locklist_t * 3374 flk_get_sleeping_locks(int sysid, pid_t pid) 3375 { 3376 return (get_lock_list(FLK_SLEEPING_STATE, 0, sysid, B_TRUE, pid, NULL, 3377 ALL_ZONES)); 3378 } 3379 3380 locklist_t * 3381 flk_get_active_locks(int sysid, pid_t pid) 3382 { 3383 return (get_lock_list(FLK_ACTIVE_STATE, 0, sysid, B_TRUE, pid, NULL, 3384 ALL_ZONES)); 3385 } 3386 3387 /* 3388 * Another interface to get_lock_list. This one returns all the active 3389 * locks for a given vnode. Again, see get_lock_list for details. 3390 * 3391 * We don't need to specify which zone's locks we're interested in. The matter 3392 * would only be interesting if the vnode belonged to NFS, and NFS vnodes can't 3393 * be used by multiple zones, so the list of locks will all be from the right 3394 * zone. 3395 */ 3396 3397 locklist_t * 3398 flk_active_locks_for_vp(const vnode_t *vp) 3399 { 3400 return (get_lock_list(FLK_ACTIVE_STATE, 0, 0, B_FALSE, NOPID, vp, 3401 ALL_ZONES)); 3402 } 3403 3404 /* 3405 * Another interface to get_lock_list. This one returns all the active 3406 * nbmand locks for a given vnode. Again, see get_lock_list for details. 3407 * 3408 * See the comment for flk_active_locks_for_vp() for why we don't care to 3409 * specify the particular zone of interest. 3410 */ 3411 locklist_t * 3412 flk_active_nbmand_locks_for_vp(const vnode_t *vp) 3413 { 3414 return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE, 3415 NOPID, vp, ALL_ZONES)); 3416 } 3417 3418 /* 3419 * Another interface to get_lock_list. This one returns all the active 3420 * nbmand locks for a given pid. Again, see get_lock_list for details. 3421 * 3422 * The zone doesn't need to be specified here; the locks held by a 3423 * particular process will either be local (ie, non-NFS) or from the zone 3424 * the process is executing in. This is because other parts of the system 3425 * ensure that an NFS vnode can't be used in a zone other than that in 3426 * which it was opened. 3427 */ 3428 locklist_t * 3429 flk_active_nbmand_locks(pid_t pid) 3430 { 3431 return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE, 3432 pid, NULL, ALL_ZONES)); 3433 } 3434 3435 /* 3436 * Free up all entries in the locklist. 3437 */ 3438 void 3439 flk_free_locklist(locklist_t *llp) 3440 { 3441 locklist_t *next_llp; 3442 3443 while (llp) { 3444 next_llp = llp->ll_next; 3445 VN_RELE(llp->ll_vp); 3446 kmem_free(llp, sizeof (*llp)); 3447 llp = next_llp; 3448 } 3449 } 3450 3451 static void 3452 cl_flk_change_nlm_state_all_locks(int nlmid, flk_nlm_status_t nlm_state) 3453 { 3454 /* 3455 * For each graph "lg" in the hash table lock_graph do 3456 * a. Get the list of sleeping locks 3457 * b. For each lock descriptor in the list do 3458 * i. If the requested lock is an NLM server request AND 3459 * the nlmid is the same as the routine argument then 3460 * change the lock descriptor's state field to 3461 * "nlm_state." 3462 * c. Get the list of active locks 3463 * d. For each lock descriptor in the list do 3464 * i. If the requested lock is an NLM server request AND 3465 * the nlmid is the same as the routine argument then 3466 * change the lock descriptor's state field to 3467 * "nlm_state." 3468 */ 3469 3470 int i; 3471 graph_t *gp; /* lock graph */ 3472 lock_descriptor_t *lock; /* lock */ 3473 lock_descriptor_t *nlock = NULL; /* next lock */ 3474 int lock_nlmid; 3475 3476 for (i = 0; i < HASH_SIZE; i++) { 3477 mutex_enter(&flock_lock); 3478 gp = lock_graph[i]; 3479 mutex_exit(&flock_lock); 3480 if (gp == NULL) { 3481 continue; 3482 } 3483 3484 /* Get list of sleeping locks in current lock graph. */ 3485 mutex_enter(&gp->gp_mutex); 3486 for (lock = SLEEPING_HEAD(gp)->l_next; 3487 lock != SLEEPING_HEAD(gp); 3488 lock = nlock) { 3489 nlock = lock->l_next; 3490 /* get NLM id */ 3491 lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 3492 3493 /* 3494 * If NLM server request AND nlmid of lock matches 3495 * nlmid of argument, then set the NLM state of the 3496 * lock to "nlm_state." 3497 */ 3498 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { 3499 SET_NLM_STATE(lock, nlm_state); 3500 } 3501 } 3502 3503 /* Get list of active locks in current lock graph. */ 3504 for (lock = ACTIVE_HEAD(gp)->l_next; 3505 lock != ACTIVE_HEAD(gp); 3506 lock = nlock) { 3507 nlock = lock->l_next; 3508 /* get NLM id */ 3509 lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 3510 3511 /* 3512 * If NLM server request AND nlmid of lock matches 3513 * nlmid of argument, then set the NLM state of the 3514 * lock to "nlm_state." 3515 */ 3516 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { 3517 ASSERT(IS_ACTIVE(lock)); 3518 SET_NLM_STATE(lock, nlm_state); 3519 } 3520 } 3521 mutex_exit(&gp->gp_mutex); 3522 } 3523 } 3524 3525 /* 3526 * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid(). 3527 * Effects: Find all sleeping lock manager requests _only_ for the NLM server 3528 * identified by "nlmid." Poke those lock requests. 3529 */ 3530 static void 3531 cl_flk_wakeup_sleeping_nlm_locks(int nlmid) 3532 { 3533 lock_descriptor_t *lock; 3534 lock_descriptor_t *nlock = NULL; /* next lock */ 3535 int i; 3536 graph_t *gp; 3537 int lock_nlmid; 3538 3539 for (i = 0; i < HASH_SIZE; i++) { 3540 mutex_enter(&flock_lock); 3541 gp = lock_graph[i]; 3542 mutex_exit(&flock_lock); 3543 if (gp == NULL) { 3544 continue; 3545 } 3546 3547 mutex_enter(&gp->gp_mutex); 3548 for (lock = SLEEPING_HEAD(gp)->l_next; 3549 lock != SLEEPING_HEAD(gp); 3550 lock = nlock) { 3551 nlock = lock->l_next; 3552 /* 3553 * If NLM server request _and_ nlmid of lock matches 3554 * nlmid of argument, then set the NLM state of the 3555 * lock to NLM_SHUTTING_DOWN, and wake up sleeping 3556 * request. 3557 */ 3558 if (IS_LOCKMGR(lock)) { 3559 /* get NLM id */ 3560 lock_nlmid = 3561 GETNLMID(lock->l_flock.l_sysid); 3562 if (nlmid == lock_nlmid) { 3563 SET_NLM_STATE(lock, 3564 FLK_NLM_SHUTTING_DOWN); 3565 INTERRUPT_WAKEUP(lock); 3566 } 3567 } 3568 } 3569 mutex_exit(&gp->gp_mutex); 3570 } 3571 } 3572 3573 /* 3574 * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid() 3575 * Effects: Find all active (granted) lock manager locks _only_ for the 3576 * NLM server identified by "nlmid" and release them. 3577 */ 3578 static void 3579 cl_flk_unlock_nlm_granted(int nlmid) 3580 { 3581 lock_descriptor_t *lock; 3582 lock_descriptor_t *nlock = NULL; /* next lock */ 3583 int i; 3584 graph_t *gp; 3585 int lock_nlmid; 3586 3587 for (i = 0; i < HASH_SIZE; i++) { 3588 mutex_enter(&flock_lock); 3589 gp = lock_graph[i]; 3590 mutex_exit(&flock_lock); 3591 if (gp == NULL) { 3592 continue; 3593 } 3594 3595 mutex_enter(&gp->gp_mutex); 3596 for (lock = ACTIVE_HEAD(gp)->l_next; 3597 lock != ACTIVE_HEAD(gp); 3598 lock = nlock) { 3599 nlock = lock->l_next; 3600 ASSERT(IS_ACTIVE(lock)); 3601 3602 /* 3603 * If it's an NLM server request _and_ nlmid of 3604 * the lock matches nlmid of argument, then 3605 * remove the active lock the list, wakup blocked 3606 * threads, and free the storage for the lock. 3607 * Note that there's no need to mark the NLM state 3608 * of this lock to NLM_DOWN because the lock will 3609 * be deleted anyway and its storage freed. 3610 */ 3611 if (IS_LOCKMGR(lock)) { 3612 /* get NLM id */ 3613 lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 3614 if (nlmid == lock_nlmid) { 3615 flk_delete_active_lock(lock, 0); 3616 flk_wakeup(lock, 1); 3617 flk_free_lock(lock); 3618 } 3619 } 3620 } 3621 mutex_exit(&gp->gp_mutex); 3622 } 3623 } 3624 3625 /* 3626 * Find all sleeping lock manager requests and poke them. 3627 */ 3628 static void 3629 wakeup_sleeping_lockmgr_locks(struct flock_globals *fg) 3630 { 3631 lock_descriptor_t *lock; 3632 lock_descriptor_t *nlock = NULL; /* next lock */ 3633 int i; 3634 graph_t *gp; 3635 zoneid_t zoneid = getzoneid(); 3636 3637 for (i = 0; i < HASH_SIZE; i++) { 3638 mutex_enter(&flock_lock); 3639 gp = lock_graph[i]; 3640 mutex_exit(&flock_lock); 3641 if (gp == NULL) { 3642 continue; 3643 } 3644 3645 mutex_enter(&gp->gp_mutex); 3646 fg->lockmgr_status[i] = FLK_WAKEUP_SLEEPERS; 3647 for (lock = SLEEPING_HEAD(gp)->l_next; 3648 lock != SLEEPING_HEAD(gp); 3649 lock = nlock) { 3650 nlock = lock->l_next; 3651 if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) { 3652 INTERRUPT_WAKEUP(lock); 3653 } 3654 } 3655 mutex_exit(&gp->gp_mutex); 3656 } 3657 } 3658 3659 3660 /* 3661 * Find all active (granted) lock manager locks and release them. 3662 */ 3663 static void 3664 unlock_lockmgr_granted(struct flock_globals *fg) 3665 { 3666 lock_descriptor_t *lock; 3667 lock_descriptor_t *nlock = NULL; /* next lock */ 3668 int i; 3669 graph_t *gp; 3670 zoneid_t zoneid = getzoneid(); 3671 3672 for (i = 0; i < HASH_SIZE; i++) { 3673 mutex_enter(&flock_lock); 3674 gp = lock_graph[i]; 3675 mutex_exit(&flock_lock); 3676 if (gp == NULL) { 3677 continue; 3678 } 3679 3680 mutex_enter(&gp->gp_mutex); 3681 fg->lockmgr_status[i] = FLK_LOCKMGR_DOWN; 3682 for (lock = ACTIVE_HEAD(gp)->l_next; 3683 lock != ACTIVE_HEAD(gp); 3684 lock = nlock) { 3685 nlock = lock->l_next; 3686 if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) { 3687 ASSERT(IS_ACTIVE(lock)); 3688 flk_delete_active_lock(lock, 0); 3689 flk_wakeup(lock, 1); 3690 flk_free_lock(lock); 3691 } 3692 } 3693 mutex_exit(&gp->gp_mutex); 3694 } 3695 } 3696 /* ONC_PLUS EXTRACT END */ 3697 3698 3699 /* 3700 * Wait until a lock is granted, cancelled, or interrupted. 3701 */ 3702 3703 static void 3704 wait_for_lock(lock_descriptor_t *request) 3705 { 3706 graph_t *gp = request->l_graph; 3707 3708 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 3709 3710 while (!(IS_GRANTED(request)) && !(IS_CANCELLED(request)) && 3711 !(IS_INTERRUPTED(request))) { 3712 if (!cv_wait_sig(&request->l_cv, &gp->gp_mutex)) { 3713 flk_set_state(request, FLK_INTERRUPTED_STATE); 3714 request->l_state |= INTERRUPTED_LOCK; 3715 } 3716 } 3717 } 3718 3719 /* ONC_PLUS EXTRACT START */ 3720 /* 3721 * Create an flock structure from the existing lock information 3722 * 3723 * This routine is used to create flock structures for the lock manager 3724 * to use in a reclaim request. Since the lock was orginated on this 3725 * host, it must be conforming to UNIX semantics, so no checking is 3726 * done to make sure it falls within the lower half of the 32-bit range. 3727 */ 3728 3729 static void 3730 create_flock(lock_descriptor_t *lp, flock64_t *flp) 3731 { 3732 ASSERT(lp->l_end == MAX_U_OFFSET_T || lp->l_end <= MAXEND); 3733 ASSERT(lp->l_end >= lp->l_start); 3734 3735 flp->l_type = lp->l_type; 3736 flp->l_whence = 0; 3737 flp->l_start = lp->l_start; 3738 flp->l_len = (lp->l_end == MAX_U_OFFSET_T) ? 0 : 3739 (lp->l_end - lp->l_start + 1); 3740 flp->l_sysid = lp->l_flock.l_sysid; 3741 flp->l_pid = lp->l_flock.l_pid; 3742 } 3743 3744 /* 3745 * Convert flock_t data describing a lock range into unsigned long starting 3746 * and ending points, which are put into lock_request. Returns 0 or an 3747 * errno value. 3748 * Large Files: max is passed by the caller and we return EOVERFLOW 3749 * as defined by LFS API. 3750 */ 3751 3752 int 3753 flk_convert_lock_data(vnode_t *vp, flock64_t *flp, 3754 u_offset_t *start, u_offset_t *end, offset_t offset) 3755 { 3756 struct vattr vattr; 3757 int error; 3758 3759 /* 3760 * Determine the starting point of the request 3761 */ 3762 switch (flp->l_whence) { 3763 case 0: /* SEEK_SET */ 3764 *start = (u_offset_t)flp->l_start; 3765 break; 3766 case 1: /* SEEK_CUR */ 3767 *start = (u_offset_t)(flp->l_start + offset); 3768 break; 3769 case 2: /* SEEK_END */ 3770 vattr.va_mask = AT_SIZE; 3771 if (error = VOP_GETATTR(vp, &vattr, 0, CRED())) 3772 return (error); 3773 *start = (u_offset_t)(flp->l_start + vattr.va_size); 3774 break; 3775 default: 3776 return (EINVAL); 3777 } 3778 3779 /* 3780 * Determine the range covered by the request. 3781 */ 3782 if (flp->l_len == 0) 3783 *end = MAX_U_OFFSET_T; 3784 else if ((offset_t)flp->l_len > 0) { 3785 *end = (u_offset_t)(*start + (flp->l_len - 1)); 3786 } else { 3787 /* 3788 * Negative length; why do we even allow this ? 3789 * Because this allows easy specification of 3790 * the last n bytes of the file. 3791 */ 3792 *end = *start; 3793 *start += (u_offset_t)flp->l_len; 3794 (*start)++; 3795 } 3796 return (0); 3797 } 3798 3799 /* 3800 * Check the validity of lock data. This can used by the NFS 3801 * frlock routines to check data before contacting the server. The 3802 * server must support semantics that aren't as restrictive as 3803 * the UNIX API, so the NFS client is required to check. 3804 * The maximum is now passed in by the caller. 3805 */ 3806 3807 int 3808 flk_check_lock_data(u_offset_t start, u_offset_t end, offset_t max) 3809 { 3810 /* 3811 * The end (length) for local locking should never be greater 3812 * than MAXEND. However, the representation for 3813 * the entire file is MAX_U_OFFSET_T. 3814 */ 3815 if ((start > max) || 3816 ((end > max) && (end != MAX_U_OFFSET_T))) { 3817 return (EINVAL); 3818 } 3819 if (start > end) { 3820 return (EINVAL); 3821 } 3822 return (0); 3823 } 3824 3825 /* 3826 * Fill in request->l_flock with information about the lock blocking the 3827 * request. The complexity here is that lock manager requests are allowed 3828 * to see into the upper part of the 32-bit address range, whereas local 3829 * requests are only allowed to see signed values. 3830 * 3831 * What should be done when "blocker" is a lock manager lock that uses the 3832 * upper portion of the 32-bit range, but "request" is local? Since the 3833 * request has already been determined to have been blocked by the blocker, 3834 * at least some portion of "blocker" must be in the range of the request, 3835 * or the request extends to the end of file. For the first case, the 3836 * portion in the lower range is returned with the indication that it goes 3837 * "to EOF." For the second case, the last byte of the lower range is 3838 * returned with the indication that it goes "to EOF." 3839 */ 3840 3841 static void 3842 report_blocker(lock_descriptor_t *blocker, lock_descriptor_t *request) 3843 { 3844 flock64_t *flrp; /* l_flock portion of request */ 3845 3846 ASSERT(blocker != NULL); 3847 3848 flrp = &request->l_flock; 3849 flrp->l_whence = 0; 3850 flrp->l_type = blocker->l_type; 3851 flrp->l_pid = blocker->l_flock.l_pid; 3852 flrp->l_sysid = blocker->l_flock.l_sysid; 3853 3854 if (IS_LOCKMGR(request)) { 3855 flrp->l_start = blocker->l_start; 3856 if (blocker->l_end == MAX_U_OFFSET_T) 3857 flrp->l_len = 0; 3858 else 3859 flrp->l_len = blocker->l_end - blocker->l_start + 1; 3860 } else { 3861 if (blocker->l_start > MAXEND) { 3862 flrp->l_start = MAXEND; 3863 flrp->l_len = 0; 3864 } else { 3865 flrp->l_start = blocker->l_start; 3866 if (blocker->l_end == MAX_U_OFFSET_T) 3867 flrp->l_len = 0; 3868 else 3869 flrp->l_len = blocker->l_end - 3870 blocker->l_start + 1; 3871 } 3872 } 3873 } 3874 /* ONC_PLUS EXTRACT END */ 3875 3876 /* 3877 * PSARC case 1997/292 3878 */ 3879 /* 3880 * This is the public routine exported by flock.h. 3881 */ 3882 void 3883 cl_flk_change_nlm_state_to_unknown(int nlmid) 3884 { 3885 /* 3886 * Check to see if node is booted as a cluster. If not, return. 3887 */ 3888 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { 3889 return; 3890 } 3891 3892 /* 3893 * See comment in cl_flk_set_nlm_status(). 3894 */ 3895 if (nlm_reg_status == NULL) { 3896 return; 3897 } 3898 3899 /* 3900 * protect NLM registry state with a mutex. 3901 */ 3902 ASSERT(nlmid <= nlm_status_size && nlmid >= 0); 3903 mutex_enter(&nlm_reg_lock); 3904 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, FLK_NLM_UNKNOWN); 3905 mutex_exit(&nlm_reg_lock); 3906 } 3907 3908 /* 3909 * Return non-zero if the given I/O request conflicts with an active NBMAND 3910 * lock. 3911 * If svmand is non-zero, it means look at all active locks, not just NBMAND 3912 * locks. 3913 */ 3914 3915 int 3916 nbl_lock_conflict(vnode_t *vp, nbl_op_t op, u_offset_t offset, 3917 ssize_t length, int svmand) 3918 { 3919 int conflict = 0; 3920 graph_t *gp; 3921 lock_descriptor_t *lock; 3922 3923 mutex_enter(&flock_lock); 3924 gp = lock_graph[HASH_INDEX(vp)]; 3925 mutex_exit(&flock_lock); 3926 if (gp == NULL) 3927 return (0); 3928 3929 mutex_enter(&gp->gp_mutex); 3930 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 3931 3932 for (; lock && lock->l_vnode == vp; lock = lock->l_next) { 3933 if ((svmand || (lock->l_state & NBMAND_LOCK)) && 3934 lock->l_flock.l_sysid == 0 && 3935 lock->l_flock.l_pid != curproc->p_pid && 3936 lock_blocks_io(op, offset, length, 3937 lock->l_type, lock->l_start, lock->l_end)) { 3938 conflict = 1; 3939 break; 3940 } 3941 } 3942 mutex_exit(&gp->gp_mutex); 3943 3944 return (conflict); 3945 } 3946 3947 /* 3948 * Return non-zero if the given I/O request conflicts with the given lock. 3949 */ 3950 3951 static int 3952 lock_blocks_io(nbl_op_t op, u_offset_t offset, ssize_t length, 3953 int lock_type, u_offset_t lock_start, u_offset_t lock_end) 3954 { 3955 ASSERT(op == NBL_READ || op == NBL_WRITE || op == NBL_READWRITE); 3956 ASSERT(lock_type == F_RDLCK || lock_type == F_WRLCK); 3957 3958 if (op == NBL_READ && lock_type == F_RDLCK) 3959 return (0); 3960 3961 if (offset <= lock_start && lock_start < offset + length) 3962 return (1); 3963 if (lock_start <= offset && offset <= lock_end) 3964 return (1); 3965 3966 return (0); 3967 } 3968 3969 #ifdef DEBUG 3970 static void 3971 check_active_locks(graph_t *gp) 3972 { 3973 lock_descriptor_t *lock, *lock1; 3974 edge_t *ep; 3975 3976 for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp); 3977 lock = lock->l_next) { 3978 ASSERT(IS_ACTIVE(lock)); 3979 ASSERT(NOT_BLOCKED(lock)); 3980 ASSERT(!IS_BARRIER(lock)); 3981 3982 ep = FIRST_IN(lock); 3983 3984 while (ep != HEAD(lock)) { 3985 ASSERT(IS_SLEEPING(ep->from_vertex)); 3986 ASSERT(!NOT_BLOCKED(ep->from_vertex)); 3987 ep = NEXT_IN(ep); 3988 } 3989 3990 for (lock1 = lock->l_next; lock1 != ACTIVE_HEAD(gp); 3991 lock1 = lock1->l_next) { 3992 if (lock1->l_vnode == lock->l_vnode) { 3993 if (BLOCKS(lock1, lock)) { 3994 cmn_err(CE_PANIC, 3995 "active lock %p blocks %p", 3996 (void *)lock1, (void *)lock); 3997 } else if (BLOCKS(lock, lock1)) { 3998 cmn_err(CE_PANIC, 3999 "active lock %p blocks %p", 4000 (void *)lock, (void *)lock1); 4001 } 4002 } 4003 } 4004 } 4005 } 4006 4007 /* 4008 * Effect: This functions checks to see if the transition from 'old_state' to 4009 * 'new_state' is a valid one. It returns 0 if the transition is valid 4010 * and 1 if it is not. 4011 * For a map of valid transitions, see sys/flock_impl.h 4012 */ 4013 static int 4014 check_lock_transition(int old_state, int new_state) 4015 { 4016 switch (old_state) { 4017 case FLK_INITIAL_STATE: 4018 if ((new_state == FLK_START_STATE) || 4019 (new_state == FLK_SLEEPING_STATE) || 4020 (new_state == FLK_ACTIVE_STATE) || 4021 (new_state == FLK_DEAD_STATE)) { 4022 return (0); 4023 } else { 4024 return (1); 4025 } 4026 case FLK_START_STATE: 4027 if ((new_state == FLK_ACTIVE_STATE) || 4028 (new_state == FLK_DEAD_STATE)) { 4029 return (0); 4030 } else { 4031 return (1); 4032 } 4033 case FLK_ACTIVE_STATE: 4034 if (new_state == FLK_DEAD_STATE) { 4035 return (0); 4036 } else { 4037 return (1); 4038 } 4039 case FLK_SLEEPING_STATE: 4040 if ((new_state == FLK_GRANTED_STATE) || 4041 (new_state == FLK_INTERRUPTED_STATE) || 4042 (new_state == FLK_CANCELLED_STATE)) { 4043 return (0); 4044 } else { 4045 return (1); 4046 } 4047 case FLK_GRANTED_STATE: 4048 if ((new_state == FLK_START_STATE) || 4049 (new_state == FLK_INTERRUPTED_STATE) || 4050 (new_state == FLK_CANCELLED_STATE)) { 4051 return (0); 4052 } else { 4053 return (1); 4054 } 4055 case FLK_CANCELLED_STATE: 4056 if ((new_state == FLK_INTERRUPTED_STATE) || 4057 (new_state == FLK_DEAD_STATE)) { 4058 return (0); 4059 } else { 4060 return (1); 4061 } 4062 case FLK_INTERRUPTED_STATE: 4063 if (new_state == FLK_DEAD_STATE) { 4064 return (0); 4065 } else { 4066 return (1); 4067 } 4068 case FLK_DEAD_STATE: 4069 /* May be set more than once */ 4070 if (new_state == FLK_DEAD_STATE) { 4071 return (0); 4072 } else { 4073 return (1); 4074 } 4075 default: 4076 return (1); 4077 } 4078 } 4079 4080 static void 4081 check_sleeping_locks(graph_t *gp) 4082 { 4083 lock_descriptor_t *lock1, *lock2; 4084 edge_t *ep; 4085 for (lock1 = SLEEPING_HEAD(gp)->l_next; lock1 != SLEEPING_HEAD(gp); 4086 lock1 = lock1->l_next) { 4087 ASSERT(!IS_BARRIER(lock1)); 4088 for (lock2 = lock1->l_next; lock2 != SLEEPING_HEAD(gp); 4089 lock2 = lock2->l_next) { 4090 if (lock1->l_vnode == lock2->l_vnode) { 4091 if (BLOCKS(lock2, lock1)) { 4092 ASSERT(!IS_GRANTED(lock1)); 4093 ASSERT(!NOT_BLOCKED(lock1)); 4094 path(lock1, lock2); 4095 } 4096 } 4097 } 4098 4099 for (lock2 = ACTIVE_HEAD(gp)->l_next; lock2 != ACTIVE_HEAD(gp); 4100 lock2 = lock2->l_next) { 4101 ASSERT(!IS_BARRIER(lock1)); 4102 if (lock1->l_vnode == lock2->l_vnode) { 4103 if (BLOCKS(lock2, lock1)) { 4104 ASSERT(!IS_GRANTED(lock1)); 4105 ASSERT(!NOT_BLOCKED(lock1)); 4106 path(lock1, lock2); 4107 } 4108 } 4109 } 4110 ep = FIRST_ADJ(lock1); 4111 while (ep != HEAD(lock1)) { 4112 ASSERT(BLOCKS(ep->to_vertex, lock1)); 4113 ep = NEXT_ADJ(ep); 4114 } 4115 } 4116 } 4117 4118 static int 4119 level_two_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2, int no_path) 4120 { 4121 edge_t *ep; 4122 lock_descriptor_t *vertex; 4123 lock_descriptor_t *vertex_stack; 4124 4125 STACK_INIT(vertex_stack); 4126 4127 flk_graph_uncolor(lock1->l_graph); 4128 ep = FIRST_ADJ(lock1); 4129 ASSERT(ep != HEAD(lock1)); 4130 while (ep != HEAD(lock1)) { 4131 if (no_path) 4132 ASSERT(ep->to_vertex != lock2); 4133 STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack); 4134 COLOR(ep->to_vertex); 4135 ep = NEXT_ADJ(ep); 4136 } 4137 4138 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 4139 STACK_POP(vertex_stack, l_dstack); 4140 for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex); 4141 ep = NEXT_ADJ(ep)) { 4142 if (COLORED(ep->to_vertex)) 4143 continue; 4144 COLOR(ep->to_vertex); 4145 if (ep->to_vertex == lock2) 4146 return (1); 4147 4148 STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack); 4149 } 4150 } 4151 return (0); 4152 } 4153 4154 static void 4155 check_owner_locks(graph_t *gp, pid_t pid, int sysid, vnode_t *vp) 4156 { 4157 lock_descriptor_t *lock; 4158 4159 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 4160 4161 if (lock) { 4162 while (lock != ACTIVE_HEAD(gp) && (lock->l_vnode == vp)) { 4163 if (lock->l_flock.l_pid == pid && 4164 lock->l_flock.l_sysid == sysid) 4165 cmn_err(CE_PANIC, 4166 "owner pid %d's lock %p in active queue", 4167 pid, (void *)lock); 4168 lock = lock->l_next; 4169 } 4170 } 4171 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 4172 4173 if (lock) { 4174 while (lock != SLEEPING_HEAD(gp) && (lock->l_vnode == vp)) { 4175 if (lock->l_flock.l_pid == pid && 4176 lock->l_flock.l_sysid == sysid) 4177 cmn_err(CE_PANIC, 4178 "owner pid %d's lock %p in sleep queue", 4179 pid, (void *)lock); 4180 lock = lock->l_next; 4181 } 4182 } 4183 } 4184 4185 static int 4186 level_one_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2) 4187 { 4188 edge_t *ep = FIRST_ADJ(lock1); 4189 4190 while (ep != HEAD(lock1)) { 4191 if (ep->to_vertex == lock2) 4192 return (1); 4193 else 4194 ep = NEXT_ADJ(ep); 4195 } 4196 return (0); 4197 } 4198 4199 static int 4200 no_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2) 4201 { 4202 return (!level_two_path(lock1, lock2, 1)); 4203 } 4204 4205 static void 4206 path(lock_descriptor_t *lock1, lock_descriptor_t *lock2) 4207 { 4208 if (level_one_path(lock1, lock2)) { 4209 if (level_two_path(lock1, lock2, 0) != 0) { 4210 cmn_err(CE_WARN, 4211 "one edge one path from lock1 %p lock2 %p", 4212 (void *)lock1, (void *)lock2); 4213 } 4214 } else if (no_path(lock1, lock2)) { 4215 cmn_err(CE_PANIC, 4216 "No path from lock1 %p to lock2 %p", 4217 (void *)lock1, (void *)lock2); 4218 } 4219 } 4220 #endif /* DEBUG */ 4221