1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* ONC_PLUS EXTRACT START */ 22 23 /* 24 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 25 * Use is subject to license terms. 26 */ 27 28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 29 /* All Rights Reserved */ 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #include <sys/flock_impl.h> 34 #include <sys/vfs.h> 35 #include <sys/t_lock.h> /* for <sys/callb.h> */ 36 #include <sys/callb.h> 37 #include <sys/clconf.h> 38 #include <sys/cladm.h> 39 #include <sys/nbmlock.h> 40 #include <sys/cred.h> 41 #include <sys/policy.h> 42 43 /* 44 * The following four variables are for statistics purposes and they are 45 * not protected by locks. They may not be accurate but will at least be 46 * close to the actual value. 47 */ 48 49 int flk_lock_allocs; 50 int flk_lock_frees; 51 int edge_allocs; 52 int edge_frees; 53 int flk_proc_vertex_allocs; 54 int flk_proc_edge_allocs; 55 int flk_proc_vertex_frees; 56 int flk_proc_edge_frees; 57 58 static kmutex_t flock_lock; 59 60 #ifdef DEBUG 61 int check_debug = 0; 62 #define CHECK_ACTIVE_LOCKS(gp) if (check_debug) \ 63 check_active_locks(gp); 64 #define CHECK_SLEEPING_LOCKS(gp) if (check_debug) \ 65 check_sleeping_locks(gp); 66 #define CHECK_OWNER_LOCKS(gp, pid, sysid, vp) \ 67 if (check_debug) \ 68 check_owner_locks(gp, pid, sysid, vp); 69 #define CHECK_LOCK_TRANSITION(old_state, new_state) \ 70 { \ 71 if (check_lock_transition(old_state, new_state)) { \ 72 cmn_err(CE_PANIC, "Illegal lock transition \ 73 from %d to %d", old_state, new_state); \ 74 } \ 75 } 76 #else 77 78 #define CHECK_ACTIVE_LOCKS(gp) 79 #define CHECK_SLEEPING_LOCKS(gp) 80 #define CHECK_OWNER_LOCKS(gp, pid, sysid, vp) 81 #define CHECK_LOCK_TRANSITION(old_state, new_state) 82 83 #endif /* DEBUG */ 84 85 struct kmem_cache *flk_edge_cache; 86 87 graph_t *lock_graph[HASH_SIZE]; 88 proc_graph_t pgraph; 89 90 /* 91 * Clustering. 92 * 93 * NLM REGISTRY TYPE IMPLEMENTATION 94 * 95 * Assumptions: 96 * 1. Nodes in a cluster are numbered starting at 1; always non-negative 97 * integers; maximum node id is returned by clconf_maximum_nodeid(). 98 * 2. We use this node id to identify the node an NLM server runs on. 99 */ 100 101 /* 102 * NLM registry object keeps track of NLM servers via their 103 * nlmids (which are the node ids of the node in the cluster they run on) 104 * that have requested locks at this LLM with which this registry is 105 * associated. 106 * 107 * Representation of abstraction: 108 * rep = record[ states: array[nlm_state], 109 * lock: mutex] 110 * 111 * Representation invariants: 112 * 1. index i of rep.states is between 0 and n - 1 where n is number 113 * of elements in the array, which happen to be the maximum number 114 * of nodes in the cluster configuration + 1. 115 * 2. map nlmid to index i of rep.states 116 * 0 -> 0 117 * 1 -> 1 118 * 2 -> 2 119 * n-1 -> clconf_maximum_nodeid()+1 120 * 3. This 1-1 mapping is quite convenient and it avoids errors resulting 121 * from forgetting to subtract 1 from the index. 122 * 4. The reason we keep the 0th index is the following. A legitimate 123 * cluster configuration includes making a UFS file system NFS 124 * exportable. The code is structured so that if you're in a cluster 125 * you do one thing; otherwise, you do something else. The problem 126 * is what to do if you think you're in a cluster with PXFS loaded, 127 * but you're using UFS not PXFS? The upper two bytes of the sysid 128 * encode the node id of the node where NLM server runs; these bytes 129 * are zero for UFS. Since the nodeid is used to index into the 130 * registry, we can record the NLM server state information at index 131 * 0 using the same mechanism used for PXFS file locks! 132 */ 133 static flk_nlm_status_t *nlm_reg_status = NULL; /* state array 0..N-1 */ 134 static kmutex_t nlm_reg_lock; /* lock to protect arrary */ 135 static uint_t nlm_status_size; /* size of state array */ 136 137 /* 138 * Although we need a global lock dependency graph (and associated data 139 * structures), we also need a per-zone notion of whether the lock manager is 140 * running, and so whether to allow lock manager requests or not. 141 * 142 * Thus, on a per-zone basis we maintain a ``global'' variable 143 * (flk_lockmgr_status), protected by flock_lock, and set when the lock 144 * manager is determined to be changing state (starting or stopping). 145 * 146 * Each graph/zone pair also has a copy of this variable, which is protected by 147 * the graph's mutex. 148 * 149 * The per-graph copies are used to synchronize lock requests with shutdown 150 * requests. The global copy is used to initialize the per-graph field when a 151 * new graph is created. 152 */ 153 struct flock_globals { 154 flk_lockmgr_status_t flk_lockmgr_status; 155 flk_lockmgr_status_t lockmgr_status[HASH_SIZE]; 156 }; 157 158 zone_key_t flock_zone_key; 159 160 static void create_flock(lock_descriptor_t *, flock64_t *); 161 static lock_descriptor_t *flk_get_lock(void); 162 static void flk_free_lock(lock_descriptor_t *lock); 163 static void flk_get_first_blocking_lock(lock_descriptor_t *request); 164 static int flk_process_request(lock_descriptor_t *); 165 static int flk_add_edge(lock_descriptor_t *, lock_descriptor_t *, int, int); 166 static edge_t *flk_get_edge(void); 167 static int flk_wait_execute_request(lock_descriptor_t *); 168 static int flk_relation(lock_descriptor_t *, lock_descriptor_t *); 169 static void flk_insert_active_lock(lock_descriptor_t *); 170 static void flk_delete_active_lock(lock_descriptor_t *, int); 171 static void flk_insert_sleeping_lock(lock_descriptor_t *); 172 static void flk_graph_uncolor(graph_t *); 173 static void flk_wakeup(lock_descriptor_t *, int); 174 static void flk_free_edge(edge_t *); 175 static void flk_recompute_dependencies(lock_descriptor_t *, 176 lock_descriptor_t **, int, int); 177 static int flk_find_barriers(lock_descriptor_t *); 178 static void flk_update_barriers(lock_descriptor_t *); 179 static int flk_color_reachables(lock_descriptor_t *); 180 static int flk_canceled(lock_descriptor_t *); 181 static void flk_delete_locks_by_sysid(lock_descriptor_t *); 182 static void report_blocker(lock_descriptor_t *, lock_descriptor_t *); 183 static void wait_for_lock(lock_descriptor_t *); 184 static void unlock_lockmgr_granted(struct flock_globals *); 185 static void wakeup_sleeping_lockmgr_locks(struct flock_globals *); 186 187 /* Clustering hooks */ 188 static void cl_flk_change_nlm_state_all_locks(int, flk_nlm_status_t); 189 static void cl_flk_wakeup_sleeping_nlm_locks(int); 190 static void cl_flk_unlock_nlm_granted(int); 191 192 #ifdef DEBUG 193 static int check_lock_transition(int, int); 194 static void check_sleeping_locks(graph_t *); 195 static void check_active_locks(graph_t *); 196 static int no_path(lock_descriptor_t *, lock_descriptor_t *); 197 static void path(lock_descriptor_t *, lock_descriptor_t *); 198 static void check_owner_locks(graph_t *, pid_t, int, vnode_t *); 199 static int level_one_path(lock_descriptor_t *, lock_descriptor_t *); 200 static int level_two_path(lock_descriptor_t *, lock_descriptor_t *, int); 201 #endif 202 203 /* proc_graph function definitions */ 204 static int flk_check_deadlock(lock_descriptor_t *); 205 static void flk_proc_graph_uncolor(void); 206 static proc_vertex_t *flk_get_proc_vertex(lock_descriptor_t *); 207 static proc_edge_t *flk_get_proc_edge(void); 208 static void flk_proc_release(proc_vertex_t *); 209 static void flk_free_proc_edge(proc_edge_t *); 210 static void flk_update_proc_graph(edge_t *, int); 211 212 /* Non-blocking mandatory locking */ 213 static int lock_blocks_io(nbl_op_t, u_offset_t, ssize_t, int, u_offset_t, 214 u_offset_t); 215 216 static struct flock_globals * 217 flk_get_globals(void) 218 { 219 /* 220 * The KLM module had better be loaded if we're attempting to handle 221 * lockmgr requests. 222 */ 223 ASSERT(flock_zone_key != ZONE_KEY_UNINITIALIZED); 224 return (zone_getspecific(flock_zone_key, curproc->p_zone)); 225 } 226 227 static flk_lockmgr_status_t 228 flk_get_lockmgr_status(void) 229 { 230 struct flock_globals *fg; 231 232 ASSERT(MUTEX_HELD(&flock_lock)); 233 234 if (flock_zone_key == ZONE_KEY_UNINITIALIZED) { 235 /* 236 * KLM module not loaded; lock manager definitely not running. 237 */ 238 return (FLK_LOCKMGR_DOWN); 239 } 240 fg = flk_get_globals(); 241 return (fg->flk_lockmgr_status); 242 } 243 244 /* 245 * Routine called from fs_frlock in fs/fs_subr.c 246 */ 247 248 int 249 reclock(vnode_t *vp, 250 flock64_t *lckdat, 251 int cmd, 252 int flag, 253 u_offset_t offset, 254 flk_callback_t *flk_cbp) 255 { 256 lock_descriptor_t stack_lock_request; 257 lock_descriptor_t *lock_request; 258 int error = 0; 259 graph_t *gp; 260 int nlmid; 261 262 /* 263 * Check access permissions 264 */ 265 if ((cmd & SETFLCK) && 266 ((lckdat->l_type == F_RDLCK && (flag & FREAD) == 0) || 267 (lckdat->l_type == F_WRLCK && (flag & FWRITE) == 0))) 268 return (EBADF); 269 270 /* 271 * for query and unlock we use the stack_lock_request 272 */ 273 274 if ((lckdat->l_type == F_UNLCK) || 275 !((cmd & INOFLCK) || (cmd & SETFLCK))) { 276 lock_request = &stack_lock_request; 277 (void) bzero((caddr_t)lock_request, 278 sizeof (lock_descriptor_t)); 279 280 /* 281 * following is added to make the assertions in 282 * flk_execute_request() to pass through 283 */ 284 285 lock_request->l_edge.edge_in_next = &lock_request->l_edge; 286 lock_request->l_edge.edge_in_prev = &lock_request->l_edge; 287 lock_request->l_edge.edge_adj_next = &lock_request->l_edge; 288 lock_request->l_edge.edge_adj_prev = &lock_request->l_edge; 289 lock_request->l_status = FLK_INITIAL_STATE; 290 } else { 291 lock_request = flk_get_lock(); 292 } 293 lock_request->l_state = 0; 294 lock_request->l_vnode = vp; 295 lock_request->l_zoneid = getzoneid(); 296 297 /* 298 * Convert the request range into the canonical start and end 299 * values. The NLM protocol supports locking over the entire 300 * 32-bit range, so there's no range checking for remote requests, 301 * but we still need to verify that local requests obey the rules. 302 */ 303 /* Clustering */ 304 if ((cmd & (RCMDLCK | PCMDLCK)) != 0) { 305 ASSERT(lckdat->l_whence == 0); 306 lock_request->l_start = lckdat->l_start; 307 lock_request->l_end = (lckdat->l_len == 0) ? MAX_U_OFFSET_T : 308 lckdat->l_start + (lckdat->l_len - 1); 309 } else { 310 /* check the validity of the lock range */ 311 error = flk_convert_lock_data(vp, lckdat, 312 &lock_request->l_start, &lock_request->l_end, 313 offset); 314 if (error) { 315 goto done; 316 } 317 error = flk_check_lock_data(lock_request->l_start, 318 lock_request->l_end, MAXEND); 319 if (error) { 320 goto done; 321 } 322 } 323 324 ASSERT(lock_request->l_end >= lock_request->l_start); 325 326 lock_request->l_type = lckdat->l_type; 327 if (cmd & INOFLCK) 328 lock_request->l_state |= IO_LOCK; 329 if (cmd & SLPFLCK) 330 lock_request->l_state |= WILLING_TO_SLEEP_LOCK; 331 if (cmd & RCMDLCK) 332 lock_request->l_state |= LOCKMGR_LOCK; 333 if (cmd & NBMLCK) 334 lock_request->l_state |= NBMAND_LOCK; 335 /* 336 * Clustering: set flag for PXFS locks 337 * We do not _only_ check for the PCMDLCK flag because PXFS locks could 338 * also be of type 'RCMDLCK'. 339 * We do not _only_ check the GETPXFSID() macro because local PXFS 340 * clients use a pxfsid of zero to permit deadlock detection in the LLM. 341 */ 342 343 if ((cmd & PCMDLCK) || (GETPXFSID(lckdat->l_sysid) != 0)) { 344 lock_request->l_state |= PXFS_LOCK; 345 } 346 if (!((cmd & SETFLCK) || (cmd & INOFLCK))) { 347 if (lock_request->l_type == F_RDLCK || 348 lock_request->l_type == F_WRLCK) 349 lock_request->l_state |= QUERY_LOCK; 350 } 351 lock_request->l_flock = (*lckdat); 352 lock_request->l_callbacks = flk_cbp; 353 354 /* 355 * We are ready for processing the request 356 */ 357 if (IS_LOCKMGR(lock_request)) { 358 /* 359 * If the lock request is an NLM server request .... 360 */ 361 if (nlm_status_size == 0) { /* not booted as cluster */ 362 mutex_enter(&flock_lock); 363 /* 364 * Bail out if this is a lock manager request and the 365 * lock manager is not supposed to be running. 366 */ 367 if (flk_get_lockmgr_status() != FLK_LOCKMGR_UP) { 368 mutex_exit(&flock_lock); 369 error = ENOLCK; 370 goto done; 371 } 372 mutex_exit(&flock_lock); 373 } else { /* booted as a cluster */ 374 nlmid = GETNLMID(lock_request->l_flock.l_sysid); 375 ASSERT(nlmid <= nlm_status_size && nlmid >= 0); 376 377 mutex_enter(&nlm_reg_lock); 378 /* 379 * If the NLM registry does not know about this 380 * NLM server making the request, add its nlmid 381 * to the registry. 382 */ 383 if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status, 384 nlmid)) { 385 FLK_REGISTRY_ADD_NLMID(nlm_reg_status, nlmid); 386 } else if (!FLK_REGISTRY_IS_NLM_UP(nlm_reg_status, 387 nlmid)) { 388 /* 389 * If the NLM server is already known (has made 390 * previous lock requests) and its state is 391 * not NLM_UP (means that NLM server is 392 * shutting down), then bail out with an 393 * error to deny the lock request. 394 */ 395 mutex_exit(&nlm_reg_lock); 396 error = ENOLCK; 397 goto done; 398 } 399 mutex_exit(&nlm_reg_lock); 400 } 401 } 402 403 /* Now get the lock graph for a particular vnode */ 404 gp = flk_get_lock_graph(vp, FLK_INIT_GRAPH); 405 406 /* 407 * We drop rwlock here otherwise this might end up causing a 408 * deadlock if this IOLOCK sleeps. (bugid # 1183392). 409 */ 410 411 if (IS_IO_LOCK(lock_request)) { 412 VOP_RWUNLOCK(vp, 413 (lock_request->l_type == F_RDLCK) ? 414 V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL); 415 } 416 mutex_enter(&gp->gp_mutex); 417 418 lock_request->l_state |= REFERENCED_LOCK; 419 lock_request->l_graph = gp; 420 421 switch (lock_request->l_type) { 422 case F_RDLCK: 423 case F_WRLCK: 424 if (IS_QUERY_LOCK(lock_request)) { 425 flk_get_first_blocking_lock(lock_request); 426 (*lckdat) = lock_request->l_flock; 427 break; 428 } 429 430 /* process the request now */ 431 432 error = flk_process_request(lock_request); 433 break; 434 435 case F_UNLCK: 436 /* unlock request will not block so execute it immediately */ 437 438 if (IS_LOCKMGR(lock_request) && 439 flk_canceled(lock_request)) { 440 error = 0; 441 } else { 442 error = flk_execute_request(lock_request); 443 } 444 break; 445 446 case F_UNLKSYS: 447 /* 448 * Recovery mechanism to release lock manager locks when 449 * NFS client crashes and restart. NFS server will clear 450 * old locks and grant new locks. 451 */ 452 453 if (lock_request->l_flock.l_sysid == 0) { 454 mutex_exit(&gp->gp_mutex); 455 return (EINVAL); 456 } 457 if (secpolicy_nfs(CRED()) != 0) { 458 mutex_exit(&gp->gp_mutex); 459 return (EPERM); 460 } 461 flk_delete_locks_by_sysid(lock_request); 462 lock_request->l_state &= ~REFERENCED_LOCK; 463 flk_set_state(lock_request, FLK_DEAD_STATE); 464 flk_free_lock(lock_request); 465 mutex_exit(&gp->gp_mutex); 466 return (0); 467 468 default: 469 error = EINVAL; 470 break; 471 } 472 473 /* Clustering: For blocked PXFS locks, return */ 474 if (error == PXFS_LOCK_BLOCKED) { 475 lock_request->l_state &= ~REFERENCED_LOCK; 476 mutex_exit(&gp->gp_mutex); 477 return (error); 478 } 479 480 /* 481 * Now that we have seen the status of locks in the system for 482 * this vnode we acquire the rwlock if it is an IO_LOCK. 483 */ 484 485 if (IS_IO_LOCK(lock_request)) { 486 (void) VOP_RWLOCK(vp, 487 (lock_request->l_type == F_RDLCK) ? 488 V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL); 489 if (!error) { 490 lckdat->l_type = F_UNLCK; 491 492 /* 493 * This wake up is needed otherwise 494 * if IO_LOCK has slept the dependents on this 495 * will not be woken up at all. (bugid # 1185482). 496 */ 497 498 flk_wakeup(lock_request, 1); 499 flk_set_state(lock_request, FLK_DEAD_STATE); 500 flk_free_lock(lock_request); 501 } 502 /* 503 * else if error had occurred either flk_process_request() 504 * has returned EDEADLK in which case there will be no 505 * dependents for this lock or EINTR from flk_wait_execute_ 506 * request() in which case flk_cancel_sleeping_lock() 507 * would have been done. same is true with EBADF. 508 */ 509 } 510 511 if (lock_request == &stack_lock_request) { 512 flk_set_state(lock_request, FLK_DEAD_STATE); 513 } else { 514 lock_request->l_state &= ~REFERENCED_LOCK; 515 if ((error != 0) || IS_DELETED(lock_request)) { 516 flk_set_state(lock_request, FLK_DEAD_STATE); 517 flk_free_lock(lock_request); 518 } 519 } 520 521 mutex_exit(&gp->gp_mutex); 522 return (error); 523 524 done: 525 flk_set_state(lock_request, FLK_DEAD_STATE); 526 if (lock_request != &stack_lock_request) 527 flk_free_lock(lock_request); 528 return (error); 529 } 530 531 /* 532 * Invoke the callbacks in the given list. If before sleeping, invoke in 533 * list order. If after sleeping, invoke in reverse order. 534 * 535 * CPR (suspend/resume) support: if one of the callbacks returns a 536 * callb_cpr_t, return it. This will be used to make the thread CPR-safe 537 * while it is sleeping. There should be at most one callb_cpr_t for the 538 * thread. 539 * XXX This is unnecessarily complicated. The CPR information should just 540 * get passed in directly through VOP_FRLOCK and reclock, rather than 541 * sneaking it in via a callback. 542 */ 543 544 callb_cpr_t * 545 flk_invoke_callbacks(flk_callback_t *cblist, flk_cb_when_t when) 546 { 547 callb_cpr_t *cpr_callbackp = NULL; 548 callb_cpr_t *one_result; 549 flk_callback_t *cb; 550 551 if (cblist == NULL) 552 return (NULL); 553 554 if (when == FLK_BEFORE_SLEEP) { 555 cb = cblist; 556 do { 557 one_result = (*cb->cb_callback)(when, cb->cb_data); 558 if (one_result != NULL) { 559 ASSERT(cpr_callbackp == NULL); 560 cpr_callbackp = one_result; 561 } 562 cb = cb->cb_next; 563 } while (cb != cblist); 564 } else { 565 cb = cblist->cb_prev; 566 do { 567 one_result = (*cb->cb_callback)(when, cb->cb_data); 568 if (one_result != NULL) { 569 cpr_callbackp = one_result; 570 } 571 cb = cb->cb_prev; 572 } while (cb != cblist->cb_prev); 573 } 574 575 return (cpr_callbackp); 576 } 577 578 /* 579 * Initialize a flk_callback_t to hold the given callback. 580 */ 581 582 void 583 flk_init_callback(flk_callback_t *flk_cb, 584 callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), void *cbdata) 585 { 586 flk_cb->cb_next = flk_cb; 587 flk_cb->cb_prev = flk_cb; 588 flk_cb->cb_callback = cb_fcn; 589 flk_cb->cb_data = cbdata; 590 } 591 592 /* 593 * Initialize an flk_callback_t and then link it into the head of an 594 * existing list (which may be NULL). 595 */ 596 597 void 598 flk_add_callback(flk_callback_t *newcb, 599 callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), 600 void *cbdata, flk_callback_t *cblist) 601 { 602 flk_init_callback(newcb, cb_fcn, cbdata); 603 604 if (cblist == NULL) 605 return; 606 607 newcb->cb_prev = cblist->cb_prev; 608 newcb->cb_next = cblist; 609 cblist->cb_prev->cb_next = newcb; 610 cblist->cb_prev = newcb; 611 } 612 /* ONC_PLUS EXTRACT END */ 613 614 /* 615 * Initialize the flk_edge_cache data structure and create the 616 * nlm_reg_status array. 617 */ 618 619 void 620 flk_init(void) 621 { 622 uint_t i; 623 624 flk_edge_cache = kmem_cache_create("flk_edges", 625 sizeof (struct edge), 0, NULL, NULL, NULL, NULL, NULL, 0); 626 if (flk_edge_cache == NULL) { 627 cmn_err(CE_PANIC, "Couldn't create flk_edge_cache\n"); 628 } 629 /* 630 * Create the NLM registry object. 631 */ 632 633 if (cluster_bootflags & CLUSTER_BOOTED) { 634 /* 635 * This routine tells you the maximum node id that will be used 636 * in the cluster. This number will be the size of the nlm 637 * registry status array. We add 1 because we will be using 638 * all entries indexed from 0 to maxnodeid; e.g., from 0 639 * to 64, for a total of 65 entries. 640 */ 641 nlm_status_size = clconf_maximum_nodeid() + 1; 642 } else { 643 nlm_status_size = 0; 644 } 645 646 if (nlm_status_size != 0) { /* booted as a cluster */ 647 nlm_reg_status = (flk_nlm_status_t *) 648 kmem_alloc(sizeof (flk_nlm_status_t) * nlm_status_size, 649 KM_SLEEP); 650 651 /* initialize all NLM states in array to NLM_UNKNOWN */ 652 for (i = 0; i < nlm_status_size; i++) { 653 nlm_reg_status[i] = FLK_NLM_UNKNOWN; 654 } 655 } 656 } 657 658 /* 659 * Zone constructor/destructor callbacks to be executed when a zone is 660 * created/destroyed. 661 */ 662 /* ARGSUSED */ 663 void * 664 flk_zone_init(zoneid_t zoneid) 665 { 666 struct flock_globals *fg; 667 uint_t i; 668 669 fg = kmem_alloc(sizeof (*fg), KM_SLEEP); 670 fg->flk_lockmgr_status = FLK_LOCKMGR_UP; 671 for (i = 0; i < HASH_SIZE; i++) 672 fg->lockmgr_status[i] = FLK_LOCKMGR_UP; 673 return (fg); 674 } 675 676 /* ARGSUSED */ 677 void 678 flk_zone_fini(zoneid_t zoneid, void *data) 679 { 680 struct flock_globals *fg = data; 681 682 kmem_free(fg, sizeof (*fg)); 683 } 684 685 /* 686 * Get a lock_descriptor structure with initialization of edge lists. 687 */ 688 689 static lock_descriptor_t * 690 flk_get_lock(void) 691 { 692 lock_descriptor_t *l; 693 694 l = kmem_zalloc(sizeof (lock_descriptor_t), KM_SLEEP); 695 696 cv_init(&l->l_cv, NULL, CV_DRIVER, NULL); 697 l->l_edge.edge_in_next = &l->l_edge; 698 l->l_edge.edge_in_prev = &l->l_edge; 699 l->l_edge.edge_adj_next = &l->l_edge; 700 l->l_edge.edge_adj_prev = &l->l_edge; 701 l->pvertex = -1; 702 l->l_status = FLK_INITIAL_STATE; 703 flk_lock_allocs++; 704 return (l); 705 } 706 707 /* 708 * Free a lock_descriptor structure. Just sets the DELETED_LOCK flag 709 * when some thread has a reference to it as in reclock(). 710 */ 711 712 void 713 flk_free_lock(lock_descriptor_t *lock) 714 { 715 ASSERT(IS_DEAD(lock)); 716 if (IS_REFERENCED(lock)) { 717 lock->l_state |= DELETED_LOCK; 718 return; 719 } 720 flk_lock_frees++; 721 kmem_free((void *)lock, sizeof (lock_descriptor_t)); 722 } 723 724 void 725 flk_set_state(lock_descriptor_t *lock, int new_state) 726 { 727 /* 728 * Locks in the sleeping list may be woken up in a number of ways, 729 * and more than once. If a sleeping lock is signaled awake more 730 * than once, then it may or may not change state depending on its 731 * current state. 732 * Also note that NLM locks that are sleeping could be moved to an 733 * interrupted state more than once if the unlock request is 734 * retransmitted by the NLM client - the second time around, this is 735 * just a nop. 736 * The ordering of being signaled awake is: 737 * INTERRUPTED_STATE > CANCELLED_STATE > GRANTED_STATE. 738 * The checks below implement this ordering. 739 */ 740 if (IS_INTERRUPTED(lock)) { 741 if ((new_state == FLK_CANCELLED_STATE) || 742 (new_state == FLK_GRANTED_STATE) || 743 (new_state == FLK_INTERRUPTED_STATE)) { 744 return; 745 } 746 } 747 if (IS_CANCELLED(lock)) { 748 if ((new_state == FLK_GRANTED_STATE) || 749 (new_state == FLK_CANCELLED_STATE)) { 750 return; 751 } 752 } 753 CHECK_LOCK_TRANSITION(lock->l_status, new_state); 754 if (IS_PXFS(lock)) { 755 cl_flk_state_transition_notify(lock, lock->l_status, new_state); 756 } 757 lock->l_status = new_state; 758 } 759 760 /* 761 * Routine that checks whether there are any blocking locks in the system. 762 * 763 * The policy followed is if a write lock is sleeping we don't allow read 764 * locks before this write lock even though there may not be any active 765 * locks corresponding to the read locks' region. 766 * 767 * flk_add_edge() function adds an edge between l1 and l2 iff there 768 * is no path between l1 and l2. This is done to have a "minimum 769 * storage representation" of the dependency graph. 770 * 771 * Another property of the graph is since only the new request throws 772 * edges to the existing locks in the graph, the graph is always topologically 773 * ordered. 774 */ 775 776 static int 777 flk_process_request(lock_descriptor_t *request) 778 { 779 graph_t *gp = request->l_graph; 780 lock_descriptor_t *lock; 781 int request_blocked_by_active = 0; 782 int request_blocked_by_granted = 0; 783 int request_blocked_by_sleeping = 0; 784 vnode_t *vp = request->l_vnode; 785 int error = 0; 786 int request_will_wait = 0; 787 int found_covering_lock = 0; 788 lock_descriptor_t *covered_by = NULL; 789 790 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 791 request_will_wait = IS_WILLING_TO_SLEEP(request); 792 793 /* 794 * check active locks 795 */ 796 797 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 798 799 800 if (lock) { 801 do { 802 if (BLOCKS(lock, request)) { 803 if (!request_will_wait) 804 return (EAGAIN); 805 request_blocked_by_active = 1; 806 break; 807 } 808 /* 809 * Grant lock if it is for the same owner holding active 810 * lock that covers the request. 811 */ 812 813 if (SAME_OWNER(lock, request) && 814 COVERS(lock, request) && 815 (request->l_type == F_RDLCK)) 816 return (flk_execute_request(request)); 817 lock = lock->l_next; 818 } while (lock->l_vnode == vp); 819 } 820 821 if (!request_blocked_by_active) { 822 lock_descriptor_t *lk[1]; 823 lock_descriptor_t *first_glock = NULL; 824 /* 825 * Shall we grant this?! NO!! 826 * What about those locks that were just granted and still 827 * in sleep queue. Those threads are woken up and so locks 828 * are almost active. 829 */ 830 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 831 if (lock) { 832 do { 833 if (BLOCKS(lock, request)) { 834 if (IS_GRANTED(lock)) { 835 request_blocked_by_granted = 1; 836 } else { 837 request_blocked_by_sleeping = 1; 838 } 839 } 840 841 lock = lock->l_next; 842 } while ((lock->l_vnode == vp)); 843 first_glock = lock->l_prev; 844 ASSERT(first_glock->l_vnode == vp); 845 } 846 847 if (request_blocked_by_granted) 848 goto block; 849 850 if (!request_blocked_by_sleeping) { 851 /* 852 * If the request isn't going to be blocked by a 853 * sleeping request, we know that it isn't going to 854 * be blocked; we can just execute the request -- 855 * without performing costly deadlock detection. 856 */ 857 ASSERT(!request_blocked_by_active); 858 return (flk_execute_request(request)); 859 } else if (request->l_type == F_RDLCK) { 860 /* 861 * If we have a sleeping writer in the requested 862 * lock's range, block. 863 */ 864 goto block; 865 } 866 867 lk[0] = request; 868 request->l_state |= RECOMPUTE_LOCK; 869 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 870 if (lock) { 871 do { 872 flk_recompute_dependencies(lock, lk, 1, 0); 873 lock = lock->l_next; 874 } while (lock->l_vnode == vp); 875 } 876 lock = first_glock; 877 if (lock) { 878 do { 879 if (IS_GRANTED(lock)) { 880 flk_recompute_dependencies(lock, lk, 1, 0); 881 } 882 lock = lock->l_prev; 883 } while ((lock->l_vnode == vp)); 884 } 885 request->l_state &= ~RECOMPUTE_LOCK; 886 if (!NO_DEPENDENTS(request) && flk_check_deadlock(request)) 887 return (EDEADLK); 888 return (flk_execute_request(request)); 889 } 890 891 block: 892 if (request_will_wait) 893 flk_graph_uncolor(gp); 894 895 /* check sleeping locks */ 896 897 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 898 899 /* 900 * If we find a sleeping write lock that is a superset of the 901 * region wanted by request we can be assured that by adding an 902 * edge to this write lock we have paths to all locks in the 903 * graph that blocks the request except in one case and that is why 904 * another check for SAME_OWNER in the loop below. The exception 905 * case is when this process that owns the sleeping write lock 'l1' 906 * has other locks l2, l3, l4 that are in the system and arrived 907 * before l1. l1 does not have path to these locks as they are from 908 * same process. We break when we find a second covering sleeping 909 * lock l5 owned by a process different from that owning l1, because 910 * there cannot be any of l2, l3, l4, etc., arrived before l5, and if 911 * it has l1 would have produced a deadlock already. 912 */ 913 914 if (lock) { 915 do { 916 if (BLOCKS(lock, request)) { 917 if (!request_will_wait) 918 return (EAGAIN); 919 if (COVERS(lock, request) && 920 lock->l_type == F_WRLCK) { 921 if (found_covering_lock && 922 !SAME_OWNER(lock, covered_by)) { 923 found_covering_lock++; 924 break; 925 } 926 found_covering_lock = 1; 927 covered_by = lock; 928 } 929 if (found_covering_lock && 930 !SAME_OWNER(lock, covered_by)) { 931 lock = lock->l_next; 932 continue; 933 } 934 if ((error = flk_add_edge(request, lock, 935 !found_covering_lock, 0))) 936 return (error); 937 } 938 lock = lock->l_next; 939 } while (lock->l_vnode == vp); 940 } 941 942 /* 943 * found_covering_lock == 2 iff at this point 'request' has paths 944 * to all locks that blocks 'request'. found_covering_lock == 1 iff at this 945 * point 'request' has paths to all locks that blocks 'request' whose owners 946 * are not same as the one that covers 'request' (covered_by above) and 947 * we can have locks whose owner is same as covered_by in the active list. 948 */ 949 950 if (request_blocked_by_active && found_covering_lock != 2) { 951 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 952 ASSERT(lock != NULL); 953 do { 954 if (BLOCKS(lock, request)) { 955 if (found_covering_lock && 956 !SAME_OWNER(lock, covered_by)) { 957 lock = lock->l_next; 958 continue; 959 } 960 if ((error = flk_add_edge(request, lock, 961 CHECK_CYCLE, 0))) 962 return (error); 963 } 964 lock = lock->l_next; 965 } while (lock->l_vnode == vp); 966 } 967 968 if (NOT_BLOCKED(request)) { 969 /* 970 * request not dependent on any other locks 971 * so execute this request 972 */ 973 return (flk_execute_request(request)); 974 } else { 975 /* 976 * check for deadlock 977 */ 978 if (flk_check_deadlock(request)) 979 return (EDEADLK); 980 /* 981 * this thread has to sleep 982 */ 983 return (flk_wait_execute_request(request)); 984 } 985 } 986 987 /* ONC_PLUS EXTRACT START */ 988 /* 989 * The actual execution of the request in the simple case is only to 990 * insert the 'request' in the list of active locks if it is not an 991 * UNLOCK. 992 * We have to consider the existing active locks' relation to 993 * this 'request' if they are owned by same process. flk_relation() does 994 * this job and sees to that the dependency graph information is maintained 995 * properly. 996 */ 997 998 int 999 flk_execute_request(lock_descriptor_t *request) 1000 { 1001 graph_t *gp = request->l_graph; 1002 vnode_t *vp = request->l_vnode; 1003 lock_descriptor_t *lock, *lock1; 1004 int done_searching = 0; 1005 1006 CHECK_SLEEPING_LOCKS(gp); 1007 CHECK_ACTIVE_LOCKS(gp); 1008 1009 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1010 1011 flk_set_state(request, FLK_START_STATE); 1012 1013 ASSERT(NOT_BLOCKED(request)); 1014 1015 /* IO_LOCK requests are only to check status */ 1016 1017 if (IS_IO_LOCK(request)) 1018 return (0); 1019 1020 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 1021 1022 if (lock == NULL && request->l_type == F_UNLCK) 1023 return (0); 1024 if (lock == NULL) { 1025 flk_insert_active_lock(request); 1026 return (0); 1027 } 1028 1029 do { 1030 lock1 = lock->l_next; 1031 if (SAME_OWNER(request, lock)) { 1032 done_searching = flk_relation(lock, request); 1033 } 1034 lock = lock1; 1035 } while (lock->l_vnode == vp && !done_searching); 1036 1037 /* 1038 * insert in active queue 1039 */ 1040 1041 if (request->l_type != F_UNLCK) 1042 flk_insert_active_lock(request); 1043 1044 return (0); 1045 } 1046 /* ONC_PLUS EXTRACT END */ 1047 1048 /* 1049 * 'request' is blocked by some one therefore we put it into sleep queue. 1050 */ 1051 static int 1052 flk_wait_execute_request(lock_descriptor_t *request) 1053 { 1054 graph_t *gp = request->l_graph; 1055 callb_cpr_t *cprp; /* CPR info from callback */ 1056 struct flock_globals *fg; 1057 int index; 1058 1059 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1060 ASSERT(IS_WILLING_TO_SLEEP(request)); 1061 1062 flk_insert_sleeping_lock(request); 1063 1064 if (IS_LOCKMGR(request)) { 1065 index = HASH_INDEX(request->l_vnode); 1066 fg = flk_get_globals(); 1067 1068 if (nlm_status_size == 0) { /* not booted as a cluster */ 1069 if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP) { 1070 flk_cancel_sleeping_lock(request, 1); 1071 return (ENOLCK); 1072 } 1073 } else { /* booted as a cluster */ 1074 /* 1075 * If the request is an NLM server lock request, 1076 * and the NLM state of the lock request is not 1077 * NLM_UP (because the NLM server is shutting 1078 * down), then cancel the sleeping lock and 1079 * return error ENOLCK that will encourage the 1080 * client to retransmit. 1081 */ 1082 if (!IS_NLM_UP(request)) { 1083 flk_cancel_sleeping_lock(request, 1); 1084 return (ENOLCK); 1085 } 1086 } 1087 } 1088 1089 /* Clustering: For blocking PXFS locks, return */ 1090 if (IS_PXFS(request)) { 1091 /* 1092 * PXFS locks sleep on the client side. 1093 * The callback argument is used to wake up the sleeper 1094 * when the lock is granted. 1095 * We return -1 (rather than an errno value) to indicate 1096 * the client side should sleep 1097 */ 1098 return (PXFS_LOCK_BLOCKED); 1099 } 1100 1101 if (request->l_callbacks != NULL) { 1102 /* 1103 * To make sure the shutdown code works correctly, either 1104 * the callback must happen after putting the lock on the 1105 * sleep list, or we must check the shutdown status after 1106 * returning from the callback (and before sleeping). At 1107 * least for now, we'll use the first option. If a 1108 * shutdown or signal or whatever happened while the graph 1109 * mutex was dropped, that will be detected by 1110 * wait_for_lock(). 1111 */ 1112 mutex_exit(&gp->gp_mutex); 1113 1114 cprp = flk_invoke_callbacks(request->l_callbacks, 1115 FLK_BEFORE_SLEEP); 1116 1117 mutex_enter(&gp->gp_mutex); 1118 1119 if (cprp == NULL) { 1120 wait_for_lock(request); 1121 } else { 1122 mutex_enter(cprp->cc_lockp); 1123 CALLB_CPR_SAFE_BEGIN(cprp); 1124 mutex_exit(cprp->cc_lockp); 1125 wait_for_lock(request); 1126 mutex_enter(cprp->cc_lockp); 1127 CALLB_CPR_SAFE_END(cprp, cprp->cc_lockp); 1128 mutex_exit(cprp->cc_lockp); 1129 } 1130 1131 mutex_exit(&gp->gp_mutex); 1132 (void) flk_invoke_callbacks(request->l_callbacks, 1133 FLK_AFTER_SLEEP); 1134 mutex_enter(&gp->gp_mutex); 1135 } else { 1136 wait_for_lock(request); 1137 } 1138 1139 if (IS_LOCKMGR(request)) { 1140 /* 1141 * If the lock manager is shutting down, return an 1142 * error that will encourage the client to retransmit. 1143 */ 1144 if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP && 1145 !IS_GRANTED(request)) { 1146 flk_cancel_sleeping_lock(request, 1); 1147 return (ENOLCK); 1148 } 1149 } 1150 1151 if (IS_INTERRUPTED(request)) { 1152 /* we got a signal, or act like we did */ 1153 flk_cancel_sleeping_lock(request, 1); 1154 return (EINTR); 1155 } 1156 1157 /* Cancelled if some other thread has closed the file */ 1158 1159 if (IS_CANCELLED(request)) { 1160 flk_cancel_sleeping_lock(request, 1); 1161 return (EBADF); 1162 } 1163 1164 request->l_state &= ~GRANTED_LOCK; 1165 REMOVE_SLEEP_QUEUE(request); 1166 return (flk_execute_request(request)); 1167 } 1168 1169 /* 1170 * This routine adds an edge between from and to because from depends 1171 * to. If asked to check for deadlock it checks whether there are any 1172 * reachable locks from "from_lock" that is owned by the same process 1173 * as "from_lock". 1174 * NOTE: It is the caller's responsibility to make sure that the color 1175 * of the graph is consistent between the calls to flk_add_edge as done 1176 * in flk_process_request. This routine does not color and check for 1177 * deadlock explicitly. 1178 */ 1179 1180 static int 1181 flk_add_edge(lock_descriptor_t *from_lock, lock_descriptor_t *to_lock, 1182 int check_cycle, int update_graph) 1183 { 1184 edge_t *edge; 1185 edge_t *ep; 1186 lock_descriptor_t *vertex; 1187 lock_descriptor_t *vertex_stack; 1188 1189 STACK_INIT(vertex_stack); 1190 1191 /* 1192 * if to vertex already has mark_color just return 1193 * don't add an edge as it is reachable from from vertex 1194 * before itself. 1195 */ 1196 1197 if (COLORED(to_lock)) 1198 return (0); 1199 1200 edge = flk_get_edge(); 1201 1202 /* 1203 * set the from and to vertex 1204 */ 1205 1206 edge->from_vertex = from_lock; 1207 edge->to_vertex = to_lock; 1208 1209 /* 1210 * put in adjacency list of from vertex 1211 */ 1212 1213 from_lock->l_edge.edge_adj_next->edge_adj_prev = edge; 1214 edge->edge_adj_next = from_lock->l_edge.edge_adj_next; 1215 edge->edge_adj_prev = &from_lock->l_edge; 1216 from_lock->l_edge.edge_adj_next = edge; 1217 1218 /* 1219 * put in in list of to vertex 1220 */ 1221 1222 to_lock->l_edge.edge_in_next->edge_in_prev = edge; 1223 edge->edge_in_next = to_lock->l_edge.edge_in_next; 1224 to_lock->l_edge.edge_in_next = edge; 1225 edge->edge_in_prev = &to_lock->l_edge; 1226 1227 1228 if (update_graph) { 1229 flk_update_proc_graph(edge, 0); 1230 return (0); 1231 } 1232 if (!check_cycle) { 1233 return (0); 1234 } 1235 1236 STACK_PUSH(vertex_stack, from_lock, l_stack); 1237 1238 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 1239 1240 STACK_POP(vertex_stack, l_stack); 1241 1242 for (ep = FIRST_ADJ(vertex); 1243 ep != HEAD(vertex); 1244 ep = NEXT_ADJ(ep)) { 1245 if (COLORED(ep->to_vertex)) 1246 continue; 1247 COLOR(ep->to_vertex); 1248 if (SAME_OWNER(ep->to_vertex, from_lock)) 1249 goto dead_lock; 1250 STACK_PUSH(vertex_stack, ep->to_vertex, l_stack); 1251 } 1252 } 1253 return (0); 1254 1255 dead_lock: 1256 1257 /* 1258 * remove all edges 1259 */ 1260 1261 ep = FIRST_ADJ(from_lock); 1262 1263 while (ep != HEAD(from_lock)) { 1264 IN_LIST_REMOVE(ep); 1265 from_lock->l_sedge = NEXT_ADJ(ep); 1266 ADJ_LIST_REMOVE(ep); 1267 flk_free_edge(ep); 1268 ep = from_lock->l_sedge; 1269 } 1270 return (EDEADLK); 1271 } 1272 1273 /* 1274 * Get an edge structure for representing the dependency between two locks. 1275 */ 1276 1277 static edge_t * 1278 flk_get_edge() 1279 { 1280 edge_t *ep; 1281 1282 ASSERT(flk_edge_cache != NULL); 1283 1284 ep = kmem_cache_alloc(flk_edge_cache, KM_SLEEP); 1285 edge_allocs++; 1286 return (ep); 1287 } 1288 1289 /* 1290 * Free the edge structure. 1291 */ 1292 1293 static void 1294 flk_free_edge(edge_t *ep) 1295 { 1296 edge_frees++; 1297 kmem_cache_free(flk_edge_cache, (void *)ep); 1298 } 1299 1300 /* 1301 * Check the relationship of request with lock and perform the 1302 * recomputation of dependencies, break lock if required, and return 1303 * 1 if request cannot have any more relationship with the next 1304 * active locks. 1305 * The 'lock' and 'request' are compared and in case of overlap we 1306 * delete the 'lock' and form new locks to represent the non-overlapped 1307 * portion of original 'lock'. This function has side effects such as 1308 * 'lock' will be freed, new locks will be added to the active list. 1309 */ 1310 1311 static int 1312 flk_relation(lock_descriptor_t *lock, lock_descriptor_t *request) 1313 { 1314 int lock_effect; 1315 lock_descriptor_t *lock1, *lock2; 1316 lock_descriptor_t *topology[3]; 1317 int nvertex = 0; 1318 int i; 1319 edge_t *ep; 1320 graph_t *gp = (lock->l_graph); 1321 1322 1323 CHECK_SLEEPING_LOCKS(gp); 1324 CHECK_ACTIVE_LOCKS(gp); 1325 1326 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1327 1328 topology[0] = topology[1] = topology[2] = NULL; 1329 1330 if (request->l_type == F_UNLCK) 1331 lock_effect = FLK_UNLOCK; 1332 else if (request->l_type == F_RDLCK && 1333 lock->l_type == F_WRLCK) 1334 lock_effect = FLK_DOWNGRADE; 1335 else if (request->l_type == F_WRLCK && 1336 lock->l_type == F_RDLCK) 1337 lock_effect = FLK_UPGRADE; 1338 else 1339 lock_effect = FLK_STAY_SAME; 1340 1341 if (lock->l_end < request->l_start) { 1342 if (lock->l_end == request->l_start - 1 && 1343 lock_effect == FLK_STAY_SAME) { 1344 topology[0] = request; 1345 request->l_start = lock->l_start; 1346 nvertex = 1; 1347 goto recompute; 1348 } else { 1349 return (0); 1350 } 1351 } 1352 1353 if (lock->l_start > request->l_end) { 1354 if (request->l_end == lock->l_start - 1 && 1355 lock_effect == FLK_STAY_SAME) { 1356 topology[0] = request; 1357 request->l_end = lock->l_end; 1358 nvertex = 1; 1359 goto recompute; 1360 } else { 1361 return (1); 1362 } 1363 } 1364 1365 if (request->l_end < lock->l_end) { 1366 if (request->l_start > lock->l_start) { 1367 if (lock_effect == FLK_STAY_SAME) { 1368 request->l_start = lock->l_start; 1369 request->l_end = lock->l_end; 1370 topology[0] = request; 1371 nvertex = 1; 1372 } else { 1373 lock1 = flk_get_lock(); 1374 lock2 = flk_get_lock(); 1375 COPY(lock1, lock); 1376 COPY(lock2, lock); 1377 lock1->l_start = lock->l_start; 1378 lock1->l_end = request->l_start - 1; 1379 lock2->l_start = request->l_end + 1; 1380 lock2->l_end = lock->l_end; 1381 topology[0] = lock1; 1382 topology[1] = lock2; 1383 topology[2] = request; 1384 nvertex = 3; 1385 } 1386 } else if (request->l_start < lock->l_start) { 1387 if (lock_effect == FLK_STAY_SAME) { 1388 request->l_end = lock->l_end; 1389 topology[0] = request; 1390 nvertex = 1; 1391 } else { 1392 lock1 = flk_get_lock(); 1393 COPY(lock1, lock); 1394 lock1->l_start = request->l_end + 1; 1395 topology[0] = lock1; 1396 topology[1] = request; 1397 nvertex = 2; 1398 } 1399 } else { 1400 if (lock_effect == FLK_STAY_SAME) { 1401 request->l_start = lock->l_start; 1402 request->l_end = lock->l_end; 1403 topology[0] = request; 1404 nvertex = 1; 1405 } else { 1406 lock1 = flk_get_lock(); 1407 COPY(lock1, lock); 1408 lock1->l_start = request->l_end + 1; 1409 topology[0] = lock1; 1410 topology[1] = request; 1411 nvertex = 2; 1412 } 1413 } 1414 } else if (request->l_end > lock->l_end) { 1415 if (request->l_start > lock->l_start) { 1416 if (lock_effect == FLK_STAY_SAME) { 1417 request->l_start = lock->l_start; 1418 topology[0] = request; 1419 nvertex = 1; 1420 } else { 1421 lock1 = flk_get_lock(); 1422 COPY(lock1, lock); 1423 lock1->l_end = request->l_start - 1; 1424 topology[0] = lock1; 1425 topology[1] = request; 1426 nvertex = 2; 1427 } 1428 } else if (request->l_start < lock->l_start) { 1429 topology[0] = request; 1430 nvertex = 1; 1431 } else { 1432 topology[0] = request; 1433 nvertex = 1; 1434 } 1435 } else { 1436 if (request->l_start > lock->l_start) { 1437 if (lock_effect == FLK_STAY_SAME) { 1438 request->l_start = lock->l_start; 1439 topology[0] = request; 1440 nvertex = 1; 1441 } else { 1442 lock1 = flk_get_lock(); 1443 COPY(lock1, lock); 1444 lock1->l_end = request->l_start - 1; 1445 topology[0] = lock1; 1446 topology[1] = request; 1447 nvertex = 2; 1448 } 1449 } else if (request->l_start < lock->l_start) { 1450 topology[0] = request; 1451 nvertex = 1; 1452 } else { 1453 if (lock_effect != FLK_UNLOCK) { 1454 topology[0] = request; 1455 nvertex = 1; 1456 } else { 1457 flk_delete_active_lock(lock, 0); 1458 flk_wakeup(lock, 1); 1459 flk_free_lock(lock); 1460 CHECK_SLEEPING_LOCKS(gp); 1461 CHECK_ACTIVE_LOCKS(gp); 1462 return (1); 1463 } 1464 } 1465 } 1466 1467 recompute: 1468 1469 /* 1470 * For unlock we don't send the 'request' to for recomputing 1471 * dependencies because no lock will add an edge to this. 1472 */ 1473 1474 if (lock_effect == FLK_UNLOCK) { 1475 topology[nvertex-1] = NULL; 1476 nvertex--; 1477 } 1478 for (i = 0; i < nvertex; i++) { 1479 topology[i]->l_state |= RECOMPUTE_LOCK; 1480 topology[i]->l_color = NO_COLOR; 1481 } 1482 1483 ASSERT(FIRST_ADJ(lock) == HEAD(lock)); 1484 1485 /* 1486 * we remove the adjacent edges for all vertices' to this vertex 1487 * 'lock'. 1488 */ 1489 1490 ep = FIRST_IN(lock); 1491 while (ep != HEAD(lock)) { 1492 ADJ_LIST_REMOVE(ep); 1493 ep = NEXT_IN(ep); 1494 } 1495 1496 flk_delete_active_lock(lock, 0); 1497 1498 /* We are ready for recomputing the dependencies now */ 1499 1500 flk_recompute_dependencies(lock, topology, nvertex, 1); 1501 1502 for (i = 0; i < nvertex; i++) { 1503 topology[i]->l_state &= ~RECOMPUTE_LOCK; 1504 topology[i]->l_color = NO_COLOR; 1505 } 1506 1507 1508 if (lock_effect == FLK_UNLOCK) { 1509 nvertex++; 1510 } 1511 for (i = 0; i < nvertex - 1; i++) { 1512 flk_insert_active_lock(topology[i]); 1513 } 1514 1515 1516 if (lock_effect == FLK_DOWNGRADE || lock_effect == FLK_UNLOCK) { 1517 flk_wakeup(lock, 0); 1518 } else { 1519 ep = FIRST_IN(lock); 1520 while (ep != HEAD(lock)) { 1521 lock->l_sedge = NEXT_IN(ep); 1522 IN_LIST_REMOVE(ep); 1523 flk_update_proc_graph(ep, 1); 1524 flk_free_edge(ep); 1525 ep = lock->l_sedge; 1526 } 1527 } 1528 flk_free_lock(lock); 1529 1530 CHECK_SLEEPING_LOCKS(gp); 1531 CHECK_ACTIVE_LOCKS(gp); 1532 return (0); 1533 } 1534 1535 /* 1536 * Insert a lock into the active queue. 1537 */ 1538 1539 static void 1540 flk_insert_active_lock(lock_descriptor_t *new_lock) 1541 { 1542 graph_t *gp = new_lock->l_graph; 1543 vnode_t *vp = new_lock->l_vnode; 1544 lock_descriptor_t *first_lock, *lock; 1545 1546 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1547 1548 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 1549 first_lock = lock; 1550 1551 if (first_lock != NULL) { 1552 for (; (lock->l_vnode == vp && 1553 lock->l_start < new_lock->l_start); lock = lock->l_next) 1554 ; 1555 } else { 1556 lock = ACTIVE_HEAD(gp); 1557 } 1558 1559 lock->l_prev->l_next = new_lock; 1560 new_lock->l_next = lock; 1561 new_lock->l_prev = lock->l_prev; 1562 lock->l_prev = new_lock; 1563 1564 if (first_lock == NULL || (new_lock->l_start <= first_lock->l_start)) { 1565 vp->v_filocks = (struct filock *)new_lock; 1566 } 1567 flk_set_state(new_lock, FLK_ACTIVE_STATE); 1568 new_lock->l_state |= ACTIVE_LOCK; 1569 1570 CHECK_ACTIVE_LOCKS(gp); 1571 CHECK_SLEEPING_LOCKS(gp); 1572 } 1573 1574 /* 1575 * Delete the active lock : Performs two functions depending on the 1576 * value of second parameter. One is to remove from the active lists 1577 * only and other is to both remove and free the lock. 1578 */ 1579 1580 static void 1581 flk_delete_active_lock(lock_descriptor_t *lock, int free_lock) 1582 { 1583 vnode_t *vp = lock->l_vnode; 1584 graph_t *gp = lock->l_graph; 1585 1586 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1587 if (free_lock) 1588 ASSERT(NO_DEPENDENTS(lock)); 1589 ASSERT(NOT_BLOCKED(lock)); 1590 ASSERT(IS_ACTIVE(lock)); 1591 1592 ASSERT((vp->v_filocks != NULL)); 1593 1594 if (vp->v_filocks == (struct filock *)lock) { 1595 vp->v_filocks = (struct filock *) 1596 ((lock->l_next->l_vnode == vp) ? lock->l_next : 1597 NULL); 1598 } 1599 lock->l_next->l_prev = lock->l_prev; 1600 lock->l_prev->l_next = lock->l_next; 1601 lock->l_next = lock->l_prev = NULL; 1602 flk_set_state(lock, FLK_DEAD_STATE); 1603 lock->l_state &= ~ACTIVE_LOCK; 1604 1605 if (free_lock) 1606 flk_free_lock(lock); 1607 CHECK_ACTIVE_LOCKS(gp); 1608 CHECK_SLEEPING_LOCKS(gp); 1609 } 1610 1611 /* 1612 * Insert into the sleep queue. 1613 */ 1614 1615 static void 1616 flk_insert_sleeping_lock(lock_descriptor_t *request) 1617 { 1618 graph_t *gp = request->l_graph; 1619 vnode_t *vp = request->l_vnode; 1620 lock_descriptor_t *lock; 1621 1622 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1623 ASSERT(IS_INITIAL(request)); 1624 1625 for (lock = gp->sleeping_locks.l_next; (lock != &gp->sleeping_locks && 1626 lock->l_vnode < vp); lock = lock->l_next) 1627 ; 1628 1629 lock->l_prev->l_next = request; 1630 request->l_prev = lock->l_prev; 1631 lock->l_prev = request; 1632 request->l_next = lock; 1633 flk_set_state(request, FLK_SLEEPING_STATE); 1634 request->l_state |= SLEEPING_LOCK; 1635 } 1636 1637 /* 1638 * Cancelling a sleeping lock implies removing a vertex from the 1639 * dependency graph and therefore we should recompute the dependencies 1640 * of all vertices that have a path to this vertex, w.r.t. all 1641 * vertices reachable from this vertex. 1642 */ 1643 1644 void 1645 flk_cancel_sleeping_lock(lock_descriptor_t *request, int remove_from_queue) 1646 { 1647 graph_t *gp = request->l_graph; 1648 vnode_t *vp = request->l_vnode; 1649 lock_descriptor_t **topology = NULL; 1650 edge_t *ep; 1651 lock_descriptor_t *vertex, *lock; 1652 int nvertex = 0; 1653 int i; 1654 lock_descriptor_t *vertex_stack; 1655 1656 STACK_INIT(vertex_stack); 1657 1658 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1659 /* 1660 * count number of vertex pointers that has to be allocated 1661 * All vertices that are reachable from request. 1662 */ 1663 1664 STACK_PUSH(vertex_stack, request, l_stack); 1665 1666 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 1667 STACK_POP(vertex_stack, l_stack); 1668 for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex); 1669 ep = NEXT_ADJ(ep)) { 1670 if (IS_RECOMPUTE(ep->to_vertex)) 1671 continue; 1672 ep->to_vertex->l_state |= RECOMPUTE_LOCK; 1673 STACK_PUSH(vertex_stack, ep->to_vertex, l_stack); 1674 nvertex++; 1675 } 1676 } 1677 1678 /* 1679 * allocate memory for holding the vertex pointers 1680 */ 1681 1682 if (nvertex) { 1683 topology = kmem_zalloc(nvertex * sizeof (lock_descriptor_t *), 1684 KM_SLEEP); 1685 } 1686 1687 /* 1688 * one more pass to actually store the vertices in the 1689 * allocated array. 1690 * We first check sleeping locks and then active locks 1691 * so that topology array will be in a topological 1692 * order. 1693 */ 1694 1695 nvertex = 0; 1696 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 1697 1698 if (lock) { 1699 do { 1700 if (IS_RECOMPUTE(lock)) { 1701 lock->l_index = nvertex; 1702 topology[nvertex++] = lock; 1703 } 1704 lock->l_color = NO_COLOR; 1705 lock = lock->l_next; 1706 } while (lock->l_vnode == vp); 1707 } 1708 1709 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 1710 1711 if (lock) { 1712 do { 1713 if (IS_RECOMPUTE(lock)) { 1714 lock->l_index = nvertex; 1715 topology[nvertex++] = lock; 1716 } 1717 lock->l_color = NO_COLOR; 1718 lock = lock->l_next; 1719 } while (lock->l_vnode == vp); 1720 } 1721 1722 /* 1723 * remove in and out edges of request 1724 * They are freed after updating proc_graph below. 1725 */ 1726 1727 for (ep = FIRST_IN(request); ep != HEAD(request); ep = NEXT_IN(ep)) { 1728 ADJ_LIST_REMOVE(ep); 1729 } 1730 1731 1732 if (remove_from_queue) 1733 REMOVE_SLEEP_QUEUE(request); 1734 1735 /* we are ready to recompute */ 1736 1737 flk_recompute_dependencies(request, topology, nvertex, 1); 1738 1739 ep = FIRST_ADJ(request); 1740 while (ep != HEAD(request)) { 1741 IN_LIST_REMOVE(ep); 1742 request->l_sedge = NEXT_ADJ(ep); 1743 ADJ_LIST_REMOVE(ep); 1744 flk_update_proc_graph(ep, 1); 1745 flk_free_edge(ep); 1746 ep = request->l_sedge; 1747 } 1748 1749 1750 /* 1751 * unset the RECOMPUTE flag in those vertices 1752 */ 1753 1754 for (i = 0; i < nvertex; i++) { 1755 topology[i]->l_state &= ~RECOMPUTE_LOCK; 1756 } 1757 1758 /* 1759 * free the topology 1760 */ 1761 if (nvertex) 1762 kmem_free((void *)topology, 1763 (nvertex * sizeof (lock_descriptor_t *))); 1764 /* 1765 * Possibility of some locks unblocked now 1766 */ 1767 1768 flk_wakeup(request, 0); 1769 1770 /* 1771 * we expect to have a correctly recomputed graph now. 1772 */ 1773 flk_set_state(request, FLK_DEAD_STATE); 1774 flk_free_lock(request); 1775 CHECK_SLEEPING_LOCKS(gp); 1776 CHECK_ACTIVE_LOCKS(gp); 1777 1778 } 1779 1780 /* 1781 * Uncoloring the graph is simply to increment the mark value of the graph 1782 * And only when wrap round takes place will we color all vertices in 1783 * the graph explicitly. 1784 */ 1785 1786 static void 1787 flk_graph_uncolor(graph_t *gp) 1788 { 1789 lock_descriptor_t *lock; 1790 1791 if (gp->mark == UINT_MAX) { 1792 gp->mark = 1; 1793 for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp); 1794 lock = lock->l_next) 1795 lock->l_color = 0; 1796 1797 for (lock = SLEEPING_HEAD(gp)->l_next; lock != SLEEPING_HEAD(gp); 1798 lock = lock->l_next) 1799 lock->l_color = 0; 1800 } else { 1801 gp->mark++; 1802 } 1803 } 1804 1805 /* 1806 * Wake up locks that are blocked on the given lock. 1807 */ 1808 1809 static void 1810 flk_wakeup(lock_descriptor_t *lock, int adj_list_remove) 1811 { 1812 edge_t *ep; 1813 graph_t *gp = lock->l_graph; 1814 lock_descriptor_t *lck; 1815 1816 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1817 if (NO_DEPENDENTS(lock)) 1818 return; 1819 ep = FIRST_IN(lock); 1820 do { 1821 /* 1822 * delete the edge from the adjacency list 1823 * of from vertex. if no more adjacent edges 1824 * for this vertex wake this process. 1825 */ 1826 lck = ep->from_vertex; 1827 if (adj_list_remove) 1828 ADJ_LIST_REMOVE(ep); 1829 flk_update_proc_graph(ep, 1); 1830 if (NOT_BLOCKED(lck)) { 1831 GRANT_WAKEUP(lck); 1832 } 1833 lock->l_sedge = NEXT_IN(ep); 1834 IN_LIST_REMOVE(ep); 1835 flk_free_edge(ep); 1836 ep = lock->l_sedge; 1837 } while (ep != HEAD(lock)); 1838 ASSERT(NO_DEPENDENTS(lock)); 1839 } 1840 1841 /* 1842 * The dependents of request, is checked for its dependency against the 1843 * locks in topology (called topology because the array is and should be in 1844 * topological order for this algorithm, if not in topological order the 1845 * inner loop below might add more edges than necessary. Topological ordering 1846 * of vertices satisfies the property that all edges will be from left to 1847 * right i.e., topology[i] can have an edge to topology[j], iff i<j) 1848 * If lock l1 in the dependent set of request is dependent (blocked by) 1849 * on lock l2 in topology but does not have a path to it, we add an edge 1850 * in the inner loop below. 1851 * 1852 * We don't want to add an edge between l1 and l2 if there exists 1853 * already a path from l1 to l2, so care has to be taken for those vertices 1854 * that have two paths to 'request'. These vertices are referred to here 1855 * as barrier locks. 1856 * 1857 * The barriers has to be found (those vertex that originally had two paths 1858 * to request) because otherwise we may end up adding edges unnecessarily 1859 * to vertices in topology, and thus barrier vertices can have an edge 1860 * to a vertex in topology as well a path to it. 1861 */ 1862 1863 static void 1864 flk_recompute_dependencies(lock_descriptor_t *request, 1865 lock_descriptor_t **topology, 1866 int nvertex, int update_graph) 1867 { 1868 lock_descriptor_t *vertex, *lock; 1869 graph_t *gp = request->l_graph; 1870 int i, count; 1871 int barrier_found = 0; 1872 edge_t *ep; 1873 lock_descriptor_t *vertex_stack; 1874 1875 STACK_INIT(vertex_stack); 1876 1877 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1878 if (nvertex == 0) 1879 return; 1880 flk_graph_uncolor(request->l_graph); 1881 barrier_found = flk_find_barriers(request); 1882 request->l_state |= RECOMPUTE_DONE; 1883 1884 STACK_PUSH(vertex_stack, request, l_stack); 1885 request->l_sedge = FIRST_IN(request); 1886 1887 1888 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 1889 if (vertex->l_state & RECOMPUTE_DONE) { 1890 count = 0; 1891 goto next_in_edge; 1892 } 1893 if (IS_BARRIER(vertex)) { 1894 /* decrement the barrier count */ 1895 if (vertex->l_index) { 1896 vertex->l_index--; 1897 /* this guy will be pushed again anyway ? */ 1898 STACK_POP(vertex_stack, l_stack); 1899 if (vertex->l_index == 0) { 1900 /* 1901 * barrier is over we can recompute 1902 * dependencies for this lock in the 1903 * next stack pop 1904 */ 1905 vertex->l_state &= ~BARRIER_LOCK; 1906 } 1907 continue; 1908 } 1909 } 1910 vertex->l_state |= RECOMPUTE_DONE; 1911 flk_graph_uncolor(gp); 1912 count = flk_color_reachables(vertex); 1913 for (i = 0; i < nvertex; i++) { 1914 lock = topology[i]; 1915 if (COLORED(lock)) 1916 continue; 1917 if (BLOCKS(lock, vertex)) { 1918 (void) flk_add_edge(vertex, lock, 1919 NO_CHECK_CYCLE, update_graph); 1920 COLOR(lock); 1921 count++; 1922 count += flk_color_reachables(lock); 1923 } 1924 1925 } 1926 1927 next_in_edge: 1928 if (count == nvertex || 1929 vertex->l_sedge == HEAD(vertex)) { 1930 /* prune the tree below this */ 1931 STACK_POP(vertex_stack, l_stack); 1932 vertex->l_state &= ~RECOMPUTE_DONE; 1933 /* update the barrier locks below this! */ 1934 if (vertex->l_sedge != HEAD(vertex) && barrier_found) { 1935 flk_graph_uncolor(gp); 1936 flk_update_barriers(vertex); 1937 } 1938 continue; 1939 } 1940 1941 ep = vertex->l_sedge; 1942 lock = ep->from_vertex; 1943 STACK_PUSH(vertex_stack, lock, l_stack); 1944 lock->l_sedge = FIRST_IN(lock); 1945 vertex->l_sedge = NEXT_IN(ep); 1946 } 1947 1948 } 1949 1950 /* 1951 * Color all reachable vertices from vertex that belongs to topology (here 1952 * those that have RECOMPUTE_LOCK set in their state) and yet uncolored. 1953 * 1954 * Note: we need to use a different stack_link l_stack1 because this is 1955 * called from flk_recompute_dependencies() that already uses a stack with 1956 * l_stack as stack_link. 1957 */ 1958 1959 static int 1960 flk_color_reachables(lock_descriptor_t *vertex) 1961 { 1962 lock_descriptor_t *ver, *lock; 1963 int count; 1964 edge_t *ep; 1965 lock_descriptor_t *vertex_stack; 1966 1967 STACK_INIT(vertex_stack); 1968 1969 STACK_PUSH(vertex_stack, vertex, l_stack1); 1970 count = 0; 1971 while ((ver = STACK_TOP(vertex_stack)) != NULL) { 1972 1973 STACK_POP(vertex_stack, l_stack1); 1974 for (ep = FIRST_ADJ(ver); ep != HEAD(ver); 1975 ep = NEXT_ADJ(ep)) { 1976 lock = ep->to_vertex; 1977 if (COLORED(lock)) 1978 continue; 1979 COLOR(lock); 1980 if (IS_RECOMPUTE(lock)) 1981 count++; 1982 STACK_PUSH(vertex_stack, lock, l_stack1); 1983 } 1984 1985 } 1986 return (count); 1987 } 1988 1989 /* 1990 * Called from flk_recompute_dependencies() this routine decrements 1991 * the barrier count of barrier vertices that are reachable from lock. 1992 */ 1993 1994 static void 1995 flk_update_barriers(lock_descriptor_t *lock) 1996 { 1997 lock_descriptor_t *vertex, *lck; 1998 edge_t *ep; 1999 lock_descriptor_t *vertex_stack; 2000 2001 STACK_INIT(vertex_stack); 2002 2003 STACK_PUSH(vertex_stack, lock, l_stack1); 2004 2005 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 2006 STACK_POP(vertex_stack, l_stack1); 2007 for (ep = FIRST_IN(vertex); ep != HEAD(vertex); 2008 ep = NEXT_IN(ep)) { 2009 lck = ep->from_vertex; 2010 if (COLORED(lck)) { 2011 if (IS_BARRIER(lck)) { 2012 ASSERT(lck->l_index > 0); 2013 lck->l_index--; 2014 if (lck->l_index == 0) 2015 lck->l_state &= ~BARRIER_LOCK; 2016 } 2017 continue; 2018 } 2019 COLOR(lck); 2020 if (IS_BARRIER(lck)) { 2021 ASSERT(lck->l_index > 0); 2022 lck->l_index--; 2023 if (lck->l_index == 0) 2024 lck->l_state &= ~BARRIER_LOCK; 2025 } 2026 STACK_PUSH(vertex_stack, lck, l_stack1); 2027 } 2028 } 2029 } 2030 2031 /* 2032 * Finds all vertices that are reachable from 'lock' more than once and 2033 * mark them as barrier vertices and increment their barrier count. 2034 * The barrier count is one minus the total number of paths from lock 2035 * to that vertex. 2036 */ 2037 2038 static int 2039 flk_find_barriers(lock_descriptor_t *lock) 2040 { 2041 lock_descriptor_t *vertex, *lck; 2042 int found = 0; 2043 edge_t *ep; 2044 lock_descriptor_t *vertex_stack; 2045 2046 STACK_INIT(vertex_stack); 2047 2048 STACK_PUSH(vertex_stack, lock, l_stack1); 2049 2050 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 2051 STACK_POP(vertex_stack, l_stack1); 2052 for (ep = FIRST_IN(vertex); ep != HEAD(vertex); 2053 ep = NEXT_IN(ep)) { 2054 lck = ep->from_vertex; 2055 if (COLORED(lck)) { 2056 /* this is a barrier */ 2057 lck->l_state |= BARRIER_LOCK; 2058 /* index will have barrier count */ 2059 lck->l_index++; 2060 if (!found) 2061 found = 1; 2062 continue; 2063 } 2064 COLOR(lck); 2065 lck->l_index = 0; 2066 STACK_PUSH(vertex_stack, lck, l_stack1); 2067 } 2068 } 2069 return (found); 2070 } 2071 2072 /* 2073 * Finds the first lock that is mainly responsible for blocking this 2074 * request. If there is no such lock, request->l_flock.l_type is set to 2075 * F_UNLCK. Otherwise, request->l_flock is filled in with the particulars 2076 * of the blocking lock. 2077 * 2078 * Note: It is possible a request is blocked by a sleeping lock because 2079 * of the fairness policy used in flk_process_request() to construct the 2080 * dependencies. (see comments before flk_process_request()). 2081 */ 2082 2083 static void 2084 flk_get_first_blocking_lock(lock_descriptor_t *request) 2085 { 2086 graph_t *gp = request->l_graph; 2087 vnode_t *vp = request->l_vnode; 2088 lock_descriptor_t *lock, *blocker; 2089 2090 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 2091 blocker = NULL; 2092 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 2093 2094 if (lock) { 2095 do { 2096 if (BLOCKS(lock, request)) { 2097 blocker = lock; 2098 break; 2099 } 2100 lock = lock->l_next; 2101 } while (lock->l_vnode == vp); 2102 } 2103 2104 if (blocker) { 2105 report_blocker(blocker, request); 2106 } else 2107 request->l_flock.l_type = F_UNLCK; 2108 } 2109 2110 /* 2111 * Get the graph_t structure associated with a vnode. 2112 * If 'initialize' is non-zero, and the graph_t structure for this vnode has 2113 * not yet been initialized, then a new element is allocated and returned. 2114 */ 2115 graph_t * 2116 flk_get_lock_graph(vnode_t *vp, int initialize) 2117 { 2118 graph_t *gp; 2119 graph_t *gp_alloc = NULL; 2120 int index = HASH_INDEX(vp); 2121 2122 if (initialize == FLK_USE_GRAPH) { 2123 mutex_enter(&flock_lock); 2124 gp = lock_graph[index]; 2125 mutex_exit(&flock_lock); 2126 return (gp); 2127 } 2128 2129 ASSERT(initialize == FLK_INIT_GRAPH); 2130 2131 if (lock_graph[index] == NULL) { 2132 2133 gp_alloc = kmem_zalloc(sizeof (graph_t), KM_SLEEP); 2134 2135 /* Initialize the graph */ 2136 2137 gp_alloc->active_locks.l_next = 2138 gp_alloc->active_locks.l_prev = 2139 (lock_descriptor_t *)ACTIVE_HEAD(gp_alloc); 2140 gp_alloc->sleeping_locks.l_next = 2141 gp_alloc->sleeping_locks.l_prev = 2142 (lock_descriptor_t *)SLEEPING_HEAD(gp_alloc); 2143 gp_alloc->index = index; 2144 mutex_init(&gp_alloc->gp_mutex, NULL, MUTEX_DEFAULT, NULL); 2145 } 2146 2147 mutex_enter(&flock_lock); 2148 2149 gp = lock_graph[index]; 2150 2151 /* Recheck the value within flock_lock */ 2152 if (gp == NULL) { 2153 struct flock_globals *fg; 2154 2155 /* We must have previously allocated the graph_t structure */ 2156 ASSERT(gp_alloc != NULL); 2157 lock_graph[index] = gp = gp_alloc; 2158 /* 2159 * The lockmgr status is only needed if KLM is loaded. 2160 */ 2161 if (flock_zone_key != ZONE_KEY_UNINITIALIZED) { 2162 fg = flk_get_globals(); 2163 fg->lockmgr_status[index] = fg->flk_lockmgr_status; 2164 } 2165 } 2166 2167 mutex_exit(&flock_lock); 2168 2169 if ((gp_alloc != NULL) && (gp != gp_alloc)) { 2170 /* There was a race to allocate the graph_t and we lost */ 2171 mutex_destroy(&gp_alloc->gp_mutex); 2172 kmem_free(gp_alloc, sizeof (graph_t)); 2173 } 2174 2175 return (gp); 2176 } 2177 2178 /* 2179 * PSARC case 1997/292 2180 */ 2181 int 2182 cl_flk_has_remote_locks_for_nlmid(vnode_t *vp, int nlmid) 2183 { 2184 lock_descriptor_t *lock; 2185 int result = 0; 2186 graph_t *gp; 2187 int lock_nlmid; 2188 2189 /* 2190 * Check to see if node is booted as a cluster. If not, return. 2191 */ 2192 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { 2193 return (0); 2194 } 2195 2196 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH); 2197 if (gp == NULL) { 2198 return (0); 2199 } 2200 2201 mutex_enter(&gp->gp_mutex); 2202 2203 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 2204 2205 if (lock) { 2206 while (lock->l_vnode == vp) { 2207 /* get NLM id from sysid */ 2208 lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 2209 2210 /* 2211 * If NLM server request _and_ nlmid of lock matches 2212 * nlmid of argument, then we've found a remote lock. 2213 */ 2214 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { 2215 result = 1; 2216 goto done; 2217 } 2218 lock = lock->l_next; 2219 } 2220 } 2221 2222 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 2223 2224 if (lock) { 2225 while (lock->l_vnode == vp) { 2226 /* get NLM id from sysid */ 2227 lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 2228 2229 /* 2230 * If NLM server request _and_ nlmid of lock matches 2231 * nlmid of argument, then we've found a remote lock. 2232 */ 2233 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { 2234 result = 1; 2235 goto done; 2236 } 2237 lock = lock->l_next; 2238 } 2239 } 2240 2241 done: 2242 mutex_exit(&gp->gp_mutex); 2243 return (result); 2244 } 2245 2246 /* ONC_PLUS EXTRACT START */ 2247 /* 2248 * Determine whether there are any locks for the given vnode with a remote 2249 * sysid. Returns zero if not, non-zero if there are. 2250 * 2251 * Note that the return value from this function is potentially invalid 2252 * once it has been returned. The caller is responsible for providing its 2253 * own synchronization mechanism to ensure that the return value is useful 2254 * (e.g., see nfs_lockcompletion()). 2255 */ 2256 int 2257 flk_has_remote_locks(vnode_t *vp) 2258 { 2259 lock_descriptor_t *lock; 2260 int result = 0; 2261 graph_t *gp; 2262 2263 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH); 2264 if (gp == NULL) { 2265 return (0); 2266 } 2267 2268 mutex_enter(&gp->gp_mutex); 2269 2270 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 2271 2272 if (lock) { 2273 while (lock->l_vnode == vp) { 2274 if (IS_REMOTE(lock)) { 2275 result = 1; 2276 goto done; 2277 } 2278 lock = lock->l_next; 2279 } 2280 } 2281 2282 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 2283 2284 if (lock) { 2285 while (lock->l_vnode == vp) { 2286 if (IS_REMOTE(lock)) { 2287 result = 1; 2288 goto done; 2289 } 2290 lock = lock->l_next; 2291 } 2292 } 2293 2294 done: 2295 mutex_exit(&gp->gp_mutex); 2296 return (result); 2297 } 2298 2299 /* 2300 * Determine if there are any locks owned by the given sysid. 2301 * Returns zero if not, non-zero if there are. Note that this return code 2302 * could be derived from flk_get_{sleeping,active}_locks, but this routine 2303 * avoids all the memory allocations of those routines. 2304 * 2305 * This routine has the same synchronization issues as 2306 * flk_has_remote_locks. 2307 */ 2308 2309 int 2310 flk_sysid_has_locks(int sysid, int lck_type) 2311 { 2312 int has_locks = 0; 2313 lock_descriptor_t *lock; 2314 graph_t *gp; 2315 int i; 2316 2317 for (i = 0; i < HASH_SIZE && !has_locks; i++) { 2318 mutex_enter(&flock_lock); 2319 gp = lock_graph[i]; 2320 mutex_exit(&flock_lock); 2321 if (gp == NULL) { 2322 continue; 2323 } 2324 2325 mutex_enter(&gp->gp_mutex); 2326 2327 if (lck_type & FLK_QUERY_ACTIVE) { 2328 for (lock = ACTIVE_HEAD(gp)->l_next; 2329 lock != ACTIVE_HEAD(gp) && !has_locks; 2330 lock = lock->l_next) { 2331 if (lock->l_flock.l_sysid == sysid) 2332 has_locks = 1; 2333 } 2334 } 2335 2336 if (lck_type & FLK_QUERY_SLEEPING) { 2337 for (lock = SLEEPING_HEAD(gp)->l_next; 2338 lock != SLEEPING_HEAD(gp) && !has_locks; 2339 lock = lock->l_next) { 2340 if (lock->l_flock.l_sysid == sysid) 2341 has_locks = 1; 2342 } 2343 } 2344 mutex_exit(&gp->gp_mutex); 2345 } 2346 2347 return (has_locks); 2348 } 2349 2350 2351 /* 2352 * PSARC case 1997/292 2353 * 2354 * Requires: "sysid" is a pair [nlmid, sysid]. The lower half is 16-bit 2355 * quantity, the real sysid generated by the NLM server; the upper half 2356 * identifies the node of the cluster where the NLM server ran. 2357 * This routine is only called by an NLM server running in a cluster. 2358 * Effects: Remove all locks held on behalf of the client identified 2359 * by "sysid." 2360 */ 2361 void 2362 cl_flk_remove_locks_by_sysid(int sysid) 2363 { 2364 graph_t *gp; 2365 int i; 2366 lock_descriptor_t *lock, *nlock; 2367 2368 /* 2369 * Check to see if node is booted as a cluster. If not, return. 2370 */ 2371 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { 2372 return; 2373 } 2374 2375 ASSERT(sysid != 0); 2376 for (i = 0; i < HASH_SIZE; i++) { 2377 mutex_enter(&flock_lock); 2378 gp = lock_graph[i]; 2379 mutex_exit(&flock_lock); 2380 2381 if (gp == NULL) 2382 continue; 2383 2384 mutex_enter(&gp->gp_mutex); /* get mutex on lock graph */ 2385 2386 /* signal sleeping requests so that they bail out */ 2387 lock = SLEEPING_HEAD(gp)->l_next; 2388 while (lock != SLEEPING_HEAD(gp)) { 2389 nlock = lock->l_next; 2390 if (lock->l_flock.l_sysid == sysid) { 2391 INTERRUPT_WAKEUP(lock); 2392 } 2393 lock = nlock; 2394 } 2395 2396 /* delete active locks */ 2397 lock = ACTIVE_HEAD(gp)->l_next; 2398 while (lock != ACTIVE_HEAD(gp)) { 2399 nlock = lock->l_next; 2400 if (lock->l_flock.l_sysid == sysid) { 2401 flk_delete_active_lock(lock, 0); 2402 flk_wakeup(lock, 1); 2403 flk_free_lock(lock); 2404 } 2405 lock = nlock; 2406 } 2407 mutex_exit(&gp->gp_mutex); /* release mutex on lock graph */ 2408 } 2409 } 2410 2411 /* 2412 * Delete all locks in the system that belongs to the sysid of the request. 2413 */ 2414 2415 static void 2416 flk_delete_locks_by_sysid(lock_descriptor_t *request) 2417 { 2418 int sysid = request->l_flock.l_sysid; 2419 lock_descriptor_t *lock, *nlock; 2420 graph_t *gp; 2421 int i; 2422 2423 ASSERT(MUTEX_HELD(&request->l_graph->gp_mutex)); 2424 ASSERT(sysid != 0); 2425 2426 mutex_exit(&request->l_graph->gp_mutex); 2427 2428 for (i = 0; i < HASH_SIZE; i++) { 2429 mutex_enter(&flock_lock); 2430 gp = lock_graph[i]; 2431 mutex_exit(&flock_lock); 2432 2433 if (gp == NULL) 2434 continue; 2435 2436 mutex_enter(&gp->gp_mutex); 2437 2438 /* signal sleeping requests so that they bail out */ 2439 lock = SLEEPING_HEAD(gp)->l_next; 2440 while (lock != SLEEPING_HEAD(gp)) { 2441 nlock = lock->l_next; 2442 if (lock->l_flock.l_sysid == sysid) { 2443 INTERRUPT_WAKEUP(lock); 2444 } 2445 lock = nlock; 2446 } 2447 2448 /* delete active locks */ 2449 lock = ACTIVE_HEAD(gp)->l_next; 2450 while (lock != ACTIVE_HEAD(gp)) { 2451 nlock = lock->l_next; 2452 if (lock->l_flock.l_sysid == sysid) { 2453 flk_delete_active_lock(lock, 0); 2454 flk_wakeup(lock, 1); 2455 flk_free_lock(lock); 2456 } 2457 lock = nlock; 2458 } 2459 mutex_exit(&gp->gp_mutex); 2460 } 2461 2462 mutex_enter(&request->l_graph->gp_mutex); 2463 } 2464 2465 /* 2466 * Clustering: Deletes PXFS locks 2467 * Effects: Delete all locks on files in the given file system and with the 2468 * given PXFS id. 2469 */ 2470 void 2471 cl_flk_delete_pxfs_locks(struct vfs *vfsp, int pxfsid) 2472 { 2473 lock_descriptor_t *lock, *nlock; 2474 graph_t *gp; 2475 int i; 2476 2477 for (i = 0; i < HASH_SIZE; i++) { 2478 mutex_enter(&flock_lock); 2479 gp = lock_graph[i]; 2480 mutex_exit(&flock_lock); 2481 2482 if (gp == NULL) 2483 continue; 2484 2485 mutex_enter(&gp->gp_mutex); 2486 2487 /* signal sleeping requests so that they bail out */ 2488 lock = SLEEPING_HEAD(gp)->l_next; 2489 while (lock != SLEEPING_HEAD(gp)) { 2490 nlock = lock->l_next; 2491 if (lock->l_vnode->v_vfsp == vfsp) { 2492 ASSERT(IS_PXFS(lock)); 2493 if (GETPXFSID(lock->l_flock.l_sysid) == 2494 pxfsid) { 2495 flk_set_state(lock, 2496 FLK_CANCELLED_STATE); 2497 flk_cancel_sleeping_lock(lock, 1); 2498 } 2499 } 2500 lock = nlock; 2501 } 2502 2503 /* delete active locks */ 2504 lock = ACTIVE_HEAD(gp)->l_next; 2505 while (lock != ACTIVE_HEAD(gp)) { 2506 nlock = lock->l_next; 2507 if (lock->l_vnode->v_vfsp == vfsp) { 2508 ASSERT(IS_PXFS(lock)); 2509 if (GETPXFSID(lock->l_flock.l_sysid) == 2510 pxfsid) { 2511 flk_delete_active_lock(lock, 0); 2512 flk_wakeup(lock, 1); 2513 flk_free_lock(lock); 2514 } 2515 } 2516 lock = nlock; 2517 } 2518 mutex_exit(&gp->gp_mutex); 2519 } 2520 } 2521 2522 /* 2523 * Search for a sleeping lock manager lock which matches exactly this lock 2524 * request; if one is found, fake a signal to cancel it. 2525 * 2526 * Return 1 if a matching lock was found, 0 otherwise. 2527 */ 2528 2529 static int 2530 flk_canceled(lock_descriptor_t *request) 2531 { 2532 lock_descriptor_t *lock, *nlock; 2533 graph_t *gp = request->l_graph; 2534 vnode_t *vp = request->l_vnode; 2535 2536 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 2537 ASSERT(IS_LOCKMGR(request)); 2538 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 2539 2540 if (lock) { 2541 while (lock->l_vnode == vp) { 2542 nlock = lock->l_next; 2543 if (SAME_OWNER(lock, request) && 2544 lock->l_start == request->l_start && 2545 lock->l_end == request->l_end) { 2546 INTERRUPT_WAKEUP(lock); 2547 return (1); 2548 } 2549 lock = nlock; 2550 } 2551 } 2552 return (0); 2553 } 2554 2555 /* 2556 * Remove all the locks for the vnode belonging to the given pid and sysid. 2557 */ 2558 2559 void 2560 cleanlocks(vnode_t *vp, pid_t pid, int sysid) 2561 { 2562 graph_t *gp; 2563 lock_descriptor_t *lock, *nlock; 2564 lock_descriptor_t *link_stack; 2565 2566 STACK_INIT(link_stack); 2567 2568 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH); 2569 2570 if (gp == NULL) 2571 return; 2572 mutex_enter(&gp->gp_mutex); 2573 2574 CHECK_SLEEPING_LOCKS(gp); 2575 CHECK_ACTIVE_LOCKS(gp); 2576 2577 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 2578 2579 if (lock) { 2580 do { 2581 nlock = lock->l_next; 2582 if ((lock->l_flock.l_pid == pid || 2583 pid == IGN_PID) && 2584 lock->l_flock.l_sysid == sysid) { 2585 CANCEL_WAKEUP(lock); 2586 } 2587 lock = nlock; 2588 } while (lock->l_vnode == vp); 2589 } 2590 2591 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 2592 2593 if (lock) { 2594 do { 2595 nlock = lock->l_next; 2596 if ((lock->l_flock.l_pid == pid || 2597 pid == IGN_PID) && 2598 lock->l_flock.l_sysid == sysid) { 2599 flk_delete_active_lock(lock, 0); 2600 STACK_PUSH(link_stack, lock, l_stack); 2601 } 2602 lock = nlock; 2603 } while (lock->l_vnode == vp); 2604 } 2605 2606 while ((lock = STACK_TOP(link_stack)) != NULL) { 2607 STACK_POP(link_stack, l_stack); 2608 flk_wakeup(lock, 1); 2609 flk_free_lock(lock); 2610 } 2611 2612 CHECK_SLEEPING_LOCKS(gp); 2613 CHECK_ACTIVE_LOCKS(gp); 2614 CHECK_OWNER_LOCKS(gp, pid, sysid, vp); 2615 mutex_exit(&gp->gp_mutex); 2616 } 2617 /* ONC_PLUS EXTRACT END */ 2618 2619 2620 /* 2621 * Called from 'fs' read and write routines for files that have mandatory 2622 * locking enabled. 2623 */ 2624 2625 int 2626 chklock( 2627 struct vnode *vp, 2628 int iomode, 2629 u_offset_t offset, 2630 ssize_t len, 2631 int fmode, 2632 caller_context_t *ct) 2633 { 2634 register int i; 2635 struct flock64 bf; 2636 int error = 0; 2637 2638 bf.l_type = (iomode & FWRITE) ? F_WRLCK : F_RDLCK; 2639 bf.l_whence = 0; 2640 bf.l_start = offset; 2641 bf.l_len = len; 2642 if (ct == NULL) { 2643 bf.l_pid = curproc->p_pid; 2644 bf.l_sysid = 0; 2645 } else { 2646 bf.l_pid = ct->cc_pid; 2647 bf.l_sysid = ct->cc_sysid; 2648 } 2649 i = (fmode & (FNDELAY|FNONBLOCK)) ? INOFLCK : INOFLCK|SLPFLCK; 2650 if ((i = reclock(vp, &bf, i, 0, offset, NULL)) != 0 || 2651 bf.l_type != F_UNLCK) 2652 error = i ? i : EAGAIN; 2653 return (error); 2654 } 2655 2656 /* ONC_PLUS EXTRACT START */ 2657 /* 2658 * convoff - converts the given data (start, whence) to the 2659 * given whence. 2660 */ 2661 int 2662 convoff(vp, lckdat, whence, offset) 2663 struct vnode *vp; 2664 struct flock64 *lckdat; 2665 int whence; 2666 offset_t offset; 2667 { 2668 int error; 2669 struct vattr vattr; 2670 2671 if ((lckdat->l_whence == 2) || (whence == 2)) { 2672 vattr.va_mask = AT_SIZE; 2673 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) 2674 return (error); 2675 } 2676 2677 switch (lckdat->l_whence) { 2678 case 1: 2679 lckdat->l_start += offset; 2680 break; 2681 case 2: 2682 lckdat->l_start += vattr.va_size; 2683 /* FALLTHRU */ 2684 case 0: 2685 break; 2686 default: 2687 return (EINVAL); 2688 } 2689 2690 if (lckdat->l_start < 0) 2691 return (EINVAL); 2692 2693 switch (whence) { 2694 case 1: 2695 lckdat->l_start -= offset; 2696 break; 2697 case 2: 2698 lckdat->l_start -= vattr.va_size; 2699 /* FALLTHRU */ 2700 case 0: 2701 break; 2702 default: 2703 return (EINVAL); 2704 } 2705 2706 lckdat->l_whence = (short)whence; 2707 return (0); 2708 } 2709 /* ONC_PLUS EXTRACT END */ 2710 2711 2712 /* proc_graph function definitions */ 2713 2714 /* 2715 * Function checks for deadlock due to the new 'lock'. If deadlock found 2716 * edges of this lock are freed and returned. 2717 */ 2718 2719 static int 2720 flk_check_deadlock(lock_descriptor_t *lock) 2721 { 2722 proc_vertex_t *start_vertex, *pvertex; 2723 proc_vertex_t *dvertex; 2724 proc_edge_t *pep, *ppep; 2725 edge_t *ep, *nep; 2726 proc_vertex_t *process_stack; 2727 2728 STACK_INIT(process_stack); 2729 2730 mutex_enter(&flock_lock); 2731 start_vertex = flk_get_proc_vertex(lock); 2732 ASSERT(start_vertex != NULL); 2733 2734 /* construct the edges from this process to other processes */ 2735 2736 ep = FIRST_ADJ(lock); 2737 while (ep != HEAD(lock)) { 2738 proc_vertex_t *adj_proc; 2739 2740 adj_proc = flk_get_proc_vertex(ep->to_vertex); 2741 for (pep = start_vertex->edge; pep != NULL; pep = pep->next) { 2742 if (pep->to_proc == adj_proc) { 2743 ASSERT(pep->refcount); 2744 pep->refcount++; 2745 break; 2746 } 2747 } 2748 if (pep == NULL) { 2749 pep = flk_get_proc_edge(); 2750 pep->to_proc = adj_proc; 2751 pep->refcount = 1; 2752 adj_proc->incount++; 2753 pep->next = start_vertex->edge; 2754 start_vertex->edge = pep; 2755 } 2756 ep = NEXT_ADJ(ep); 2757 } 2758 2759 ep = FIRST_IN(lock); 2760 2761 while (ep != HEAD(lock)) { 2762 proc_vertex_t *in_proc; 2763 2764 in_proc = flk_get_proc_vertex(ep->from_vertex); 2765 2766 for (pep = in_proc->edge; pep != NULL; pep = pep->next) { 2767 if (pep->to_proc == start_vertex) { 2768 ASSERT(pep->refcount); 2769 pep->refcount++; 2770 break; 2771 } 2772 } 2773 if (pep == NULL) { 2774 pep = flk_get_proc_edge(); 2775 pep->to_proc = start_vertex; 2776 pep->refcount = 1; 2777 start_vertex->incount++; 2778 pep->next = in_proc->edge; 2779 in_proc->edge = pep; 2780 } 2781 ep = NEXT_IN(ep); 2782 } 2783 2784 if (start_vertex->incount == 0) { 2785 mutex_exit(&flock_lock); 2786 return (0); 2787 } 2788 2789 flk_proc_graph_uncolor(); 2790 2791 start_vertex->p_sedge = start_vertex->edge; 2792 2793 STACK_PUSH(process_stack, start_vertex, p_stack); 2794 2795 while ((pvertex = STACK_TOP(process_stack)) != NULL) { 2796 for (pep = pvertex->p_sedge; pep != NULL; pep = pep->next) { 2797 dvertex = pep->to_proc; 2798 if (!PROC_ARRIVED(dvertex)) { 2799 STACK_PUSH(process_stack, dvertex, p_stack); 2800 dvertex->p_sedge = dvertex->edge; 2801 PROC_ARRIVE(pvertex); 2802 pvertex->p_sedge = pep->next; 2803 break; 2804 } 2805 if (!PROC_DEPARTED(dvertex)) 2806 goto deadlock; 2807 } 2808 if (pep == NULL) { 2809 PROC_DEPART(pvertex); 2810 STACK_POP(process_stack, p_stack); 2811 } 2812 } 2813 mutex_exit(&flock_lock); 2814 return (0); 2815 2816 deadlock: 2817 2818 /* we remove all lock edges and proc edges */ 2819 2820 ep = FIRST_ADJ(lock); 2821 while (ep != HEAD(lock)) { 2822 proc_vertex_t *adj_proc; 2823 adj_proc = flk_get_proc_vertex(ep->to_vertex); 2824 nep = NEXT_ADJ(ep); 2825 IN_LIST_REMOVE(ep); 2826 ADJ_LIST_REMOVE(ep); 2827 flk_free_edge(ep); 2828 ppep = start_vertex->edge; 2829 for (pep = start_vertex->edge; pep != NULL; ppep = pep, 2830 pep = ppep->next) { 2831 if (pep->to_proc == adj_proc) { 2832 pep->refcount--; 2833 if (pep->refcount == 0) { 2834 if (pep == ppep) { 2835 start_vertex->edge = pep->next; 2836 } else { 2837 ppep->next = pep->next; 2838 } 2839 adj_proc->incount--; 2840 flk_proc_release(adj_proc); 2841 flk_free_proc_edge(pep); 2842 } 2843 break; 2844 } 2845 } 2846 ep = nep; 2847 } 2848 ep = FIRST_IN(lock); 2849 while (ep != HEAD(lock)) { 2850 proc_vertex_t *in_proc; 2851 in_proc = flk_get_proc_vertex(ep->from_vertex); 2852 nep = NEXT_IN(ep); 2853 IN_LIST_REMOVE(ep); 2854 ADJ_LIST_REMOVE(ep); 2855 flk_free_edge(ep); 2856 ppep = in_proc->edge; 2857 for (pep = in_proc->edge; pep != NULL; ppep = pep, 2858 pep = ppep->next) { 2859 if (pep->to_proc == start_vertex) { 2860 pep->refcount--; 2861 if (pep->refcount == 0) { 2862 if (pep == ppep) { 2863 in_proc->edge = pep->next; 2864 } else { 2865 ppep->next = pep->next; 2866 } 2867 start_vertex->incount--; 2868 flk_proc_release(in_proc); 2869 flk_free_proc_edge(pep); 2870 } 2871 break; 2872 } 2873 } 2874 ep = nep; 2875 } 2876 flk_proc_release(start_vertex); 2877 mutex_exit(&flock_lock); 2878 return (1); 2879 } 2880 2881 /* 2882 * Get a proc vertex. If lock's pvertex value gets a correct proc vertex 2883 * from the list we return that, otherwise we allocate one. If necessary, 2884 * we grow the list of vertices also. 2885 */ 2886 2887 static proc_vertex_t * 2888 flk_get_proc_vertex(lock_descriptor_t *lock) 2889 { 2890 int i; 2891 proc_vertex_t *pv; 2892 proc_vertex_t **palloc; 2893 2894 ASSERT(MUTEX_HELD(&flock_lock)); 2895 if (lock->pvertex != -1) { 2896 ASSERT(lock->pvertex >= 0); 2897 pv = pgraph.proc[lock->pvertex]; 2898 if (pv != NULL && PROC_SAME_OWNER(lock, pv)) { 2899 return (pv); 2900 } 2901 } 2902 for (i = 0; i < pgraph.gcount; i++) { 2903 pv = pgraph.proc[i]; 2904 if (pv != NULL && PROC_SAME_OWNER(lock, pv)) { 2905 lock->pvertex = pv->index = i; 2906 return (pv); 2907 } 2908 } 2909 pv = kmem_zalloc(sizeof (struct proc_vertex), KM_SLEEP); 2910 pv->pid = lock->l_flock.l_pid; 2911 pv->sysid = lock->l_flock.l_sysid; 2912 flk_proc_vertex_allocs++; 2913 if (pgraph.free != 0) { 2914 for (i = 0; i < pgraph.gcount; i++) { 2915 if (pgraph.proc[i] == NULL) { 2916 pgraph.proc[i] = pv; 2917 lock->pvertex = pv->index = i; 2918 pgraph.free--; 2919 return (pv); 2920 } 2921 } 2922 } 2923 palloc = kmem_zalloc((pgraph.gcount + PROC_CHUNK) * 2924 sizeof (proc_vertex_t *), KM_SLEEP); 2925 2926 if (pgraph.proc) { 2927 bcopy(pgraph.proc, palloc, 2928 pgraph.gcount * sizeof (proc_vertex_t *)); 2929 2930 kmem_free(pgraph.proc, 2931 pgraph.gcount * sizeof (proc_vertex_t *)); 2932 } 2933 pgraph.proc = palloc; 2934 pgraph.free += (PROC_CHUNK - 1); 2935 pv->index = lock->pvertex = pgraph.gcount; 2936 pgraph.gcount += PROC_CHUNK; 2937 pgraph.proc[pv->index] = pv; 2938 return (pv); 2939 } 2940 2941 /* 2942 * Allocate a proc edge. 2943 */ 2944 2945 static proc_edge_t * 2946 flk_get_proc_edge() 2947 { 2948 proc_edge_t *pep; 2949 2950 pep = kmem_zalloc(sizeof (proc_edge_t), KM_SLEEP); 2951 flk_proc_edge_allocs++; 2952 return (pep); 2953 } 2954 2955 /* 2956 * Free the proc edge. Called whenever its reference count goes to zero. 2957 */ 2958 2959 static void 2960 flk_free_proc_edge(proc_edge_t *pep) 2961 { 2962 ASSERT(pep->refcount == 0); 2963 kmem_free((void *)pep, sizeof (proc_edge_t)); 2964 flk_proc_edge_frees++; 2965 } 2966 2967 /* 2968 * Color the graph explicitly done only when the mark value hits max value. 2969 */ 2970 2971 static void 2972 flk_proc_graph_uncolor() 2973 { 2974 int i; 2975 2976 if (pgraph.mark == UINT_MAX) { 2977 for (i = 0; i < pgraph.gcount; i++) 2978 if (pgraph.proc[i] != NULL) { 2979 pgraph.proc[i]->atime = 0; 2980 pgraph.proc[i]->dtime = 0; 2981 } 2982 pgraph.mark = 1; 2983 } else { 2984 pgraph.mark++; 2985 } 2986 } 2987 2988 /* 2989 * Release the proc vertex iff both there are no in edges and out edges 2990 */ 2991 2992 static void 2993 flk_proc_release(proc_vertex_t *proc) 2994 { 2995 ASSERT(MUTEX_HELD(&flock_lock)); 2996 if (proc->edge == NULL && proc->incount == 0) { 2997 pgraph.proc[proc->index] = NULL; 2998 pgraph.free++; 2999 kmem_free(proc, sizeof (proc_vertex_t)); 3000 flk_proc_vertex_frees++; 3001 } 3002 } 3003 3004 /* 3005 * Updates process graph to reflect change in a lock_graph. 3006 * Note: We should call this function only after we have a correctly 3007 * recomputed lock graph. Otherwise we might miss a deadlock detection. 3008 * eg: in function flk_relation() we call this function after flk_recompute_ 3009 * dependencies() otherwise if a process tries to lock a vnode hashed 3010 * into another graph it might sleep for ever. 3011 */ 3012 3013 static void 3014 flk_update_proc_graph(edge_t *ep, int delete) 3015 { 3016 proc_vertex_t *toproc, *fromproc; 3017 proc_edge_t *pep, *prevpep; 3018 3019 mutex_enter(&flock_lock); 3020 toproc = flk_get_proc_vertex(ep->to_vertex); 3021 fromproc = flk_get_proc_vertex(ep->from_vertex); 3022 3023 if (!delete) 3024 goto add; 3025 pep = prevpep = fromproc->edge; 3026 3027 ASSERT(pep != NULL); 3028 while (pep != NULL) { 3029 if (pep->to_proc == toproc) { 3030 ASSERT(pep->refcount > 0); 3031 pep->refcount--; 3032 if (pep->refcount == 0) { 3033 if (pep == prevpep) { 3034 fromproc->edge = pep->next; 3035 } else { 3036 prevpep->next = pep->next; 3037 } 3038 toproc->incount--; 3039 flk_proc_release(toproc); 3040 flk_free_proc_edge(pep); 3041 } 3042 break; 3043 } 3044 prevpep = pep; 3045 pep = pep->next; 3046 } 3047 flk_proc_release(fromproc); 3048 mutex_exit(&flock_lock); 3049 return; 3050 add: 3051 3052 pep = fromproc->edge; 3053 3054 while (pep != NULL) { 3055 if (pep->to_proc == toproc) { 3056 ASSERT(pep->refcount > 0); 3057 pep->refcount++; 3058 break; 3059 } 3060 pep = pep->next; 3061 } 3062 if (pep == NULL) { 3063 pep = flk_get_proc_edge(); 3064 pep->to_proc = toproc; 3065 pep->refcount = 1; 3066 toproc->incount++; 3067 pep->next = fromproc->edge; 3068 fromproc->edge = pep; 3069 } 3070 mutex_exit(&flock_lock); 3071 } 3072 3073 /* ONC_PLUS EXTRACT START */ 3074 /* 3075 * Set the control status for lock manager requests. 3076 * 3077 */ 3078 3079 /* 3080 * PSARC case 1997/292 3081 * 3082 * Requires: "nlmid" must be >= 1 and <= clconf_maximum_nodeid(). 3083 * Effects: Set the state of the NLM server identified by "nlmid" 3084 * in the NLM registry to state "nlm_state." 3085 * Raises exception no_such_nlm if "nlmid" doesn't identify a known 3086 * NLM server to this LLM. 3087 * Note that when this routine is called with NLM_SHUTTING_DOWN there 3088 * may be locks requests that have gotten started but not finished. In 3089 * particular, there may be blocking requests that are in the callback code 3090 * before sleeping (so they're not holding the lock for the graph). If 3091 * such a thread reacquires the graph's lock (to go to sleep) after 3092 * NLM state in the NLM registry is set to a non-up value, 3093 * it will notice the status and bail out. If the request gets 3094 * granted before the thread can check the NLM registry, let it 3095 * continue normally. It will get flushed when we are called with NLM_DOWN. 3096 * 3097 * Modifies: nlm_reg_obj (global) 3098 * Arguments: 3099 * nlmid (IN): id uniquely identifying an NLM server 3100 * nlm_state (IN): NLM server state to change "nlmid" to 3101 */ 3102 void 3103 cl_flk_set_nlm_status(int nlmid, flk_nlm_status_t nlm_state) 3104 { 3105 /* 3106 * Check to see if node is booted as a cluster. If not, return. 3107 */ 3108 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { 3109 return; 3110 } 3111 3112 /* 3113 * Check for development/debugging. It is possible to boot a node 3114 * in non-cluster mode, and then run a special script, currently 3115 * available only to developers, to bring up the node as part of a 3116 * cluster. The problem is that running such a script does not 3117 * result in the routine flk_init() being called and hence global array 3118 * nlm_reg_status is NULL. The NLM thinks it's in cluster mode, 3119 * but the LLM needs to do an additional check to see if the global 3120 * array has been created or not. If nlm_reg_status is NULL, then 3121 * return, else continue. 3122 */ 3123 if (nlm_reg_status == NULL) { 3124 return; 3125 } 3126 3127 ASSERT(nlmid <= nlm_status_size && nlmid >= 0); 3128 mutex_enter(&nlm_reg_lock); 3129 3130 if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status, nlmid)) { 3131 /* 3132 * If the NLM server "nlmid" is unknown in the NLM registry, 3133 * add it to the registry in the nlm shutting down state. 3134 */ 3135 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, 3136 FLK_NLM_SHUTTING_DOWN); 3137 } else { 3138 /* 3139 * Change the state of the NLM server identified by "nlmid" 3140 * in the NLM registry to the argument "nlm_state." 3141 */ 3142 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, 3143 nlm_state); 3144 } 3145 3146 /* 3147 * The reason we must register the NLM server that is shutting down 3148 * with an LLM that doesn't already know about it (never sent a lock 3149 * request) is to handle correctly a race between shutdown and a new 3150 * lock request. Suppose that a shutdown request from the NLM server 3151 * invokes this routine at the LLM, and a thread is spawned to 3152 * service the request. Now suppose a new lock request is in 3153 * progress and has already passed the first line of defense in 3154 * reclock(), which denies new locks requests from NLM servers 3155 * that are not in the NLM_UP state. After the current routine 3156 * is invoked for both phases of shutdown, the routine will return, 3157 * having done nothing, and the lock request will proceed and 3158 * probably be granted. The problem is that the shutdown was ignored 3159 * by the lock request because there was no record of that NLM server 3160 * shutting down. We will be in the peculiar position of thinking 3161 * that we've shutdown the NLM server and all locks at all LLMs have 3162 * been discarded, but in fact there's still one lock held. 3163 * The solution is to record the existence of NLM server and change 3164 * its state immediately to NLM_SHUTTING_DOWN. The lock request in 3165 * progress may proceed because the next phase NLM_DOWN will catch 3166 * this lock and discard it. 3167 */ 3168 mutex_exit(&nlm_reg_lock); 3169 3170 switch (nlm_state) { 3171 case FLK_NLM_UP: 3172 /* 3173 * Change the NLM state of all locks still held on behalf of 3174 * the NLM server identified by "nlmid" to NLM_UP. 3175 */ 3176 cl_flk_change_nlm_state_all_locks(nlmid, FLK_NLM_UP); 3177 break; 3178 3179 case FLK_NLM_SHUTTING_DOWN: 3180 /* 3181 * Wake up all sleeping locks for the NLM server identified 3182 * by "nlmid." Note that eventually all woken threads will 3183 * have their lock requests cancelled and descriptors 3184 * removed from the sleeping lock list. Note that the NLM 3185 * server state associated with each lock descriptor is 3186 * changed to FLK_NLM_SHUTTING_DOWN. 3187 */ 3188 cl_flk_wakeup_sleeping_nlm_locks(nlmid); 3189 break; 3190 3191 case FLK_NLM_DOWN: 3192 /* 3193 * Discard all active, granted locks for this NLM server 3194 * identified by "nlmid." 3195 */ 3196 cl_flk_unlock_nlm_granted(nlmid); 3197 break; 3198 3199 default: 3200 panic("cl_set_nlm_status: bad status (%d)", nlm_state); 3201 } 3202 } 3203 3204 /* 3205 * Set the control status for lock manager requests. 3206 * 3207 * Note that when this routine is called with FLK_WAKEUP_SLEEPERS, there 3208 * may be locks requests that have gotten started but not finished. In 3209 * particular, there may be blocking requests that are in the callback code 3210 * before sleeping (so they're not holding the lock for the graph). If 3211 * such a thread reacquires the graph's lock (to go to sleep) after 3212 * flk_lockmgr_status is set to a non-up value, it will notice the status 3213 * and bail out. If the request gets granted before the thread can check 3214 * flk_lockmgr_status, let it continue normally. It will get flushed when 3215 * we are called with FLK_LOCKMGR_DOWN. 3216 */ 3217 3218 void 3219 flk_set_lockmgr_status(flk_lockmgr_status_t status) 3220 { 3221 int i; 3222 graph_t *gp; 3223 struct flock_globals *fg; 3224 3225 fg = flk_get_globals(); 3226 ASSERT(fg != NULL); 3227 3228 mutex_enter(&flock_lock); 3229 fg->flk_lockmgr_status = status; 3230 mutex_exit(&flock_lock); 3231 3232 /* 3233 * If the lock manager is coming back up, all that's needed is to 3234 * propagate this information to the graphs. If the lock manager 3235 * is going down, additional action is required, and each graph's 3236 * copy of the state is updated atomically with this other action. 3237 */ 3238 switch (status) { 3239 case FLK_LOCKMGR_UP: 3240 for (i = 0; i < HASH_SIZE; i++) { 3241 mutex_enter(&flock_lock); 3242 gp = lock_graph[i]; 3243 mutex_exit(&flock_lock); 3244 if (gp == NULL) 3245 continue; 3246 mutex_enter(&gp->gp_mutex); 3247 fg->lockmgr_status[i] = status; 3248 mutex_exit(&gp->gp_mutex); 3249 } 3250 break; 3251 case FLK_WAKEUP_SLEEPERS: 3252 wakeup_sleeping_lockmgr_locks(fg); 3253 break; 3254 case FLK_LOCKMGR_DOWN: 3255 unlock_lockmgr_granted(fg); 3256 break; 3257 default: 3258 panic("flk_set_lockmgr_status: bad status (%d)", status); 3259 break; 3260 } 3261 } 3262 3263 /* 3264 * This routine returns all the locks that are active or sleeping and are 3265 * associated with a particular set of identifiers. If lock_state != 0, then 3266 * only locks that match the lock_state are returned. If lock_state == 0, then 3267 * all locks are returned. If pid == NOPID, the pid is ignored. If 3268 * use_sysid is FALSE, then the sysid is ignored. If vp is NULL, then the 3269 * vnode pointer is ignored. 3270 * 3271 * A list containing the vnode pointer and an flock structure 3272 * describing the lock is returned. Each element in the list is 3273 * dynamically allocated and must be freed by the caller. The 3274 * last item in the list is denoted by a NULL value in the ll_next 3275 * field. 3276 * 3277 * The vnode pointers returned are held. The caller is responsible 3278 * for releasing these. Note that the returned list is only a snapshot of 3279 * the current lock information, and that it is a snapshot of a moving 3280 * target (only one graph is locked at a time). 3281 */ 3282 3283 locklist_t * 3284 get_lock_list(int list_type, int lock_state, int sysid, boolean_t use_sysid, 3285 pid_t pid, const vnode_t *vp, zoneid_t zoneid) 3286 { 3287 lock_descriptor_t *lock; 3288 lock_descriptor_t *graph_head; 3289 locklist_t listhead; 3290 locklist_t *llheadp; 3291 locklist_t *llp; 3292 locklist_t *lltp; 3293 graph_t *gp; 3294 int i; 3295 int first_index; /* graph index */ 3296 int num_indexes; /* graph index */ 3297 3298 ASSERT((list_type == FLK_ACTIVE_STATE) || 3299 (list_type == FLK_SLEEPING_STATE)); 3300 3301 /* 3302 * Get a pointer to something to use as a list head while building 3303 * the rest of the list. 3304 */ 3305 llheadp = &listhead; 3306 lltp = llheadp; 3307 llheadp->ll_next = (locklist_t *)NULL; 3308 3309 /* Figure out which graphs we want to look at. */ 3310 if (vp == NULL) { 3311 first_index = 0; 3312 num_indexes = HASH_SIZE; 3313 } else { 3314 first_index = HASH_INDEX(vp); 3315 num_indexes = 1; 3316 } 3317 3318 for (i = first_index; i < first_index + num_indexes; i++) { 3319 mutex_enter(&flock_lock); 3320 gp = lock_graph[i]; 3321 mutex_exit(&flock_lock); 3322 if (gp == NULL) { 3323 continue; 3324 } 3325 3326 mutex_enter(&gp->gp_mutex); 3327 graph_head = (list_type == FLK_ACTIVE_STATE) ? 3328 ACTIVE_HEAD(gp) : SLEEPING_HEAD(gp); 3329 for (lock = graph_head->l_next; 3330 lock != graph_head; 3331 lock = lock->l_next) { 3332 if (use_sysid && lock->l_flock.l_sysid != sysid) 3333 continue; 3334 if (pid != NOPID && lock->l_flock.l_pid != pid) 3335 continue; 3336 if (vp != NULL && lock->l_vnode != vp) 3337 continue; 3338 if (lock_state && !(lock_state & lock->l_state)) 3339 continue; 3340 if (zoneid != lock->l_zoneid && zoneid != ALL_ZONES) 3341 continue; 3342 /* 3343 * A matching lock was found. Allocate 3344 * space for a new locklist entry and fill 3345 * it in. 3346 */ 3347 llp = kmem_alloc(sizeof (locklist_t), KM_SLEEP); 3348 lltp->ll_next = llp; 3349 VN_HOLD(lock->l_vnode); 3350 llp->ll_vp = lock->l_vnode; 3351 create_flock(lock, &(llp->ll_flock)); 3352 llp->ll_next = (locklist_t *)NULL; 3353 lltp = llp; 3354 } 3355 mutex_exit(&gp->gp_mutex); 3356 } 3357 3358 llp = llheadp->ll_next; 3359 return (llp); 3360 } 3361 3362 /* 3363 * These two functions are simply interfaces to get_lock_list. They return 3364 * a list of sleeping or active locks for the given sysid and pid. See 3365 * get_lock_list for details. 3366 * 3367 * In either case we don't particularly care to specify the zone of interest; 3368 * the sysid-space is global across zones, so the sysid will map to exactly one 3369 * zone, and we'll return information for that zone. 3370 */ 3371 3372 locklist_t * 3373 flk_get_sleeping_locks(int sysid, pid_t pid) 3374 { 3375 return (get_lock_list(FLK_SLEEPING_STATE, 0, sysid, B_TRUE, pid, NULL, 3376 ALL_ZONES)); 3377 } 3378 3379 locklist_t * 3380 flk_get_active_locks(int sysid, pid_t pid) 3381 { 3382 return (get_lock_list(FLK_ACTIVE_STATE, 0, sysid, B_TRUE, pid, NULL, 3383 ALL_ZONES)); 3384 } 3385 3386 /* 3387 * Another interface to get_lock_list. This one returns all the active 3388 * locks for a given vnode. Again, see get_lock_list for details. 3389 * 3390 * We don't need to specify which zone's locks we're interested in. The matter 3391 * would only be interesting if the vnode belonged to NFS, and NFS vnodes can't 3392 * be used by multiple zones, so the list of locks will all be from the right 3393 * zone. 3394 */ 3395 3396 locklist_t * 3397 flk_active_locks_for_vp(const vnode_t *vp) 3398 { 3399 return (get_lock_list(FLK_ACTIVE_STATE, 0, 0, B_FALSE, NOPID, vp, 3400 ALL_ZONES)); 3401 } 3402 3403 /* 3404 * Another interface to get_lock_list. This one returns all the active 3405 * nbmand locks for a given vnode. Again, see get_lock_list for details. 3406 * 3407 * See the comment for flk_active_locks_for_vp() for why we don't care to 3408 * specify the particular zone of interest. 3409 */ 3410 locklist_t * 3411 flk_active_nbmand_locks_for_vp(const vnode_t *vp) 3412 { 3413 return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE, 3414 NOPID, vp, ALL_ZONES)); 3415 } 3416 3417 /* 3418 * Another interface to get_lock_list. This one returns all the active 3419 * nbmand locks for a given pid. Again, see get_lock_list for details. 3420 * 3421 * The zone doesn't need to be specified here; the locks held by a 3422 * particular process will either be local (ie, non-NFS) or from the zone 3423 * the process is executing in. This is because other parts of the system 3424 * ensure that an NFS vnode can't be used in a zone other than that in 3425 * which it was opened. 3426 */ 3427 locklist_t * 3428 flk_active_nbmand_locks(pid_t pid) 3429 { 3430 return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE, 3431 pid, NULL, ALL_ZONES)); 3432 } 3433 3434 /* 3435 * Free up all entries in the locklist. 3436 */ 3437 void 3438 flk_free_locklist(locklist_t *llp) 3439 { 3440 locklist_t *next_llp; 3441 3442 while (llp) { 3443 next_llp = llp->ll_next; 3444 VN_RELE(llp->ll_vp); 3445 kmem_free(llp, sizeof (*llp)); 3446 llp = next_llp; 3447 } 3448 } 3449 3450 static void 3451 cl_flk_change_nlm_state_all_locks(int nlmid, flk_nlm_status_t nlm_state) 3452 { 3453 /* 3454 * For each graph "lg" in the hash table lock_graph do 3455 * a. Get the list of sleeping locks 3456 * b. For each lock descriptor in the list do 3457 * i. If the requested lock is an NLM server request AND 3458 * the nlmid is the same as the routine argument then 3459 * change the lock descriptor's state field to 3460 * "nlm_state." 3461 * c. Get the list of active locks 3462 * d. For each lock descriptor in the list do 3463 * i. If the requested lock is an NLM server request AND 3464 * the nlmid is the same as the routine argument then 3465 * change the lock descriptor's state field to 3466 * "nlm_state." 3467 */ 3468 3469 int i; 3470 graph_t *gp; /* lock graph */ 3471 lock_descriptor_t *lock; /* lock */ 3472 lock_descriptor_t *nlock = NULL; /* next lock */ 3473 int lock_nlmid; 3474 3475 for (i = 0; i < HASH_SIZE; i++) { 3476 mutex_enter(&flock_lock); 3477 gp = lock_graph[i]; 3478 mutex_exit(&flock_lock); 3479 if (gp == NULL) { 3480 continue; 3481 } 3482 3483 /* Get list of sleeping locks in current lock graph. */ 3484 mutex_enter(&gp->gp_mutex); 3485 for (lock = SLEEPING_HEAD(gp)->l_next; 3486 lock != SLEEPING_HEAD(gp); 3487 lock = nlock) { 3488 nlock = lock->l_next; 3489 /* get NLM id */ 3490 lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 3491 3492 /* 3493 * If NLM server request AND nlmid of lock matches 3494 * nlmid of argument, then set the NLM state of the 3495 * lock to "nlm_state." 3496 */ 3497 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { 3498 SET_NLM_STATE(lock, nlm_state); 3499 } 3500 } 3501 3502 /* Get list of active locks in current lock graph. */ 3503 for (lock = ACTIVE_HEAD(gp)->l_next; 3504 lock != ACTIVE_HEAD(gp); 3505 lock = nlock) { 3506 nlock = lock->l_next; 3507 /* get NLM id */ 3508 lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 3509 3510 /* 3511 * If NLM server request AND nlmid of lock matches 3512 * nlmid of argument, then set the NLM state of the 3513 * lock to "nlm_state." 3514 */ 3515 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { 3516 ASSERT(IS_ACTIVE(lock)); 3517 SET_NLM_STATE(lock, nlm_state); 3518 } 3519 } 3520 mutex_exit(&gp->gp_mutex); 3521 } 3522 } 3523 3524 /* 3525 * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid(). 3526 * Effects: Find all sleeping lock manager requests _only_ for the NLM server 3527 * identified by "nlmid." Poke those lock requests. 3528 */ 3529 static void 3530 cl_flk_wakeup_sleeping_nlm_locks(int nlmid) 3531 { 3532 lock_descriptor_t *lock; 3533 lock_descriptor_t *nlock = NULL; /* next lock */ 3534 int i; 3535 graph_t *gp; 3536 int lock_nlmid; 3537 3538 for (i = 0; i < HASH_SIZE; i++) { 3539 mutex_enter(&flock_lock); 3540 gp = lock_graph[i]; 3541 mutex_exit(&flock_lock); 3542 if (gp == NULL) { 3543 continue; 3544 } 3545 3546 mutex_enter(&gp->gp_mutex); 3547 for (lock = SLEEPING_HEAD(gp)->l_next; 3548 lock != SLEEPING_HEAD(gp); 3549 lock = nlock) { 3550 nlock = lock->l_next; 3551 /* 3552 * If NLM server request _and_ nlmid of lock matches 3553 * nlmid of argument, then set the NLM state of the 3554 * lock to NLM_SHUTTING_DOWN, and wake up sleeping 3555 * request. 3556 */ 3557 if (IS_LOCKMGR(lock)) { 3558 /* get NLM id */ 3559 lock_nlmid = 3560 GETNLMID(lock->l_flock.l_sysid); 3561 if (nlmid == lock_nlmid) { 3562 SET_NLM_STATE(lock, 3563 FLK_NLM_SHUTTING_DOWN); 3564 INTERRUPT_WAKEUP(lock); 3565 } 3566 } 3567 } 3568 mutex_exit(&gp->gp_mutex); 3569 } 3570 } 3571 3572 /* 3573 * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid() 3574 * Effects: Find all active (granted) lock manager locks _only_ for the 3575 * NLM server identified by "nlmid" and release them. 3576 */ 3577 static void 3578 cl_flk_unlock_nlm_granted(int nlmid) 3579 { 3580 lock_descriptor_t *lock; 3581 lock_descriptor_t *nlock = NULL; /* next lock */ 3582 int i; 3583 graph_t *gp; 3584 int lock_nlmid; 3585 3586 for (i = 0; i < HASH_SIZE; i++) { 3587 mutex_enter(&flock_lock); 3588 gp = lock_graph[i]; 3589 mutex_exit(&flock_lock); 3590 if (gp == NULL) { 3591 continue; 3592 } 3593 3594 mutex_enter(&gp->gp_mutex); 3595 for (lock = ACTIVE_HEAD(gp)->l_next; 3596 lock != ACTIVE_HEAD(gp); 3597 lock = nlock) { 3598 nlock = lock->l_next; 3599 ASSERT(IS_ACTIVE(lock)); 3600 3601 /* 3602 * If it's an NLM server request _and_ nlmid of 3603 * the lock matches nlmid of argument, then 3604 * remove the active lock the list, wakup blocked 3605 * threads, and free the storage for the lock. 3606 * Note that there's no need to mark the NLM state 3607 * of this lock to NLM_DOWN because the lock will 3608 * be deleted anyway and its storage freed. 3609 */ 3610 if (IS_LOCKMGR(lock)) { 3611 /* get NLM id */ 3612 lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 3613 if (nlmid == lock_nlmid) { 3614 flk_delete_active_lock(lock, 0); 3615 flk_wakeup(lock, 1); 3616 flk_free_lock(lock); 3617 } 3618 } 3619 } 3620 mutex_exit(&gp->gp_mutex); 3621 } 3622 } 3623 3624 /* 3625 * Find all sleeping lock manager requests and poke them. 3626 */ 3627 static void 3628 wakeup_sleeping_lockmgr_locks(struct flock_globals *fg) 3629 { 3630 lock_descriptor_t *lock; 3631 lock_descriptor_t *nlock = NULL; /* next lock */ 3632 int i; 3633 graph_t *gp; 3634 zoneid_t zoneid = getzoneid(); 3635 3636 for (i = 0; i < HASH_SIZE; i++) { 3637 mutex_enter(&flock_lock); 3638 gp = lock_graph[i]; 3639 mutex_exit(&flock_lock); 3640 if (gp == NULL) { 3641 continue; 3642 } 3643 3644 mutex_enter(&gp->gp_mutex); 3645 fg->lockmgr_status[i] = FLK_WAKEUP_SLEEPERS; 3646 for (lock = SLEEPING_HEAD(gp)->l_next; 3647 lock != SLEEPING_HEAD(gp); 3648 lock = nlock) { 3649 nlock = lock->l_next; 3650 if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) { 3651 INTERRUPT_WAKEUP(lock); 3652 } 3653 } 3654 mutex_exit(&gp->gp_mutex); 3655 } 3656 } 3657 3658 3659 /* 3660 * Find all active (granted) lock manager locks and release them. 3661 */ 3662 static void 3663 unlock_lockmgr_granted(struct flock_globals *fg) 3664 { 3665 lock_descriptor_t *lock; 3666 lock_descriptor_t *nlock = NULL; /* next lock */ 3667 int i; 3668 graph_t *gp; 3669 zoneid_t zoneid = getzoneid(); 3670 3671 for (i = 0; i < HASH_SIZE; i++) { 3672 mutex_enter(&flock_lock); 3673 gp = lock_graph[i]; 3674 mutex_exit(&flock_lock); 3675 if (gp == NULL) { 3676 continue; 3677 } 3678 3679 mutex_enter(&gp->gp_mutex); 3680 fg->lockmgr_status[i] = FLK_LOCKMGR_DOWN; 3681 for (lock = ACTIVE_HEAD(gp)->l_next; 3682 lock != ACTIVE_HEAD(gp); 3683 lock = nlock) { 3684 nlock = lock->l_next; 3685 if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) { 3686 ASSERT(IS_ACTIVE(lock)); 3687 flk_delete_active_lock(lock, 0); 3688 flk_wakeup(lock, 1); 3689 flk_free_lock(lock); 3690 } 3691 } 3692 mutex_exit(&gp->gp_mutex); 3693 } 3694 } 3695 /* ONC_PLUS EXTRACT END */ 3696 3697 3698 /* 3699 * Wait until a lock is granted, cancelled, or interrupted. 3700 */ 3701 3702 static void 3703 wait_for_lock(lock_descriptor_t *request) 3704 { 3705 graph_t *gp = request->l_graph; 3706 3707 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 3708 3709 while (!(IS_GRANTED(request)) && !(IS_CANCELLED(request)) && 3710 !(IS_INTERRUPTED(request))) { 3711 if (!cv_wait_sig(&request->l_cv, &gp->gp_mutex)) { 3712 flk_set_state(request, FLK_INTERRUPTED_STATE); 3713 request->l_state |= INTERRUPTED_LOCK; 3714 } 3715 } 3716 } 3717 3718 /* ONC_PLUS EXTRACT START */ 3719 /* 3720 * Create an flock structure from the existing lock information 3721 * 3722 * This routine is used to create flock structures for the lock manager 3723 * to use in a reclaim request. Since the lock was originated on this 3724 * host, it must be conforming to UNIX semantics, so no checking is 3725 * done to make sure it falls within the lower half of the 32-bit range. 3726 */ 3727 3728 static void 3729 create_flock(lock_descriptor_t *lp, flock64_t *flp) 3730 { 3731 ASSERT(lp->l_end == MAX_U_OFFSET_T || lp->l_end <= MAXEND); 3732 ASSERT(lp->l_end >= lp->l_start); 3733 3734 flp->l_type = lp->l_type; 3735 flp->l_whence = 0; 3736 flp->l_start = lp->l_start; 3737 flp->l_len = (lp->l_end == MAX_U_OFFSET_T) ? 0 : 3738 (lp->l_end - lp->l_start + 1); 3739 flp->l_sysid = lp->l_flock.l_sysid; 3740 flp->l_pid = lp->l_flock.l_pid; 3741 } 3742 3743 /* 3744 * Convert flock_t data describing a lock range into unsigned long starting 3745 * and ending points, which are put into lock_request. Returns 0 or an 3746 * errno value. 3747 * Large Files: max is passed by the caller and we return EOVERFLOW 3748 * as defined by LFS API. 3749 */ 3750 3751 int 3752 flk_convert_lock_data(vnode_t *vp, flock64_t *flp, 3753 u_offset_t *start, u_offset_t *end, offset_t offset) 3754 { 3755 struct vattr vattr; 3756 int error; 3757 3758 /* 3759 * Determine the starting point of the request 3760 */ 3761 switch (flp->l_whence) { 3762 case 0: /* SEEK_SET */ 3763 *start = (u_offset_t)flp->l_start; 3764 break; 3765 case 1: /* SEEK_CUR */ 3766 *start = (u_offset_t)(flp->l_start + offset); 3767 break; 3768 case 2: /* SEEK_END */ 3769 vattr.va_mask = AT_SIZE; 3770 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) 3771 return (error); 3772 *start = (u_offset_t)(flp->l_start + vattr.va_size); 3773 break; 3774 default: 3775 return (EINVAL); 3776 } 3777 3778 /* 3779 * Determine the range covered by the request. 3780 */ 3781 if (flp->l_len == 0) 3782 *end = MAX_U_OFFSET_T; 3783 else if ((offset_t)flp->l_len > 0) { 3784 *end = (u_offset_t)(*start + (flp->l_len - 1)); 3785 } else { 3786 /* 3787 * Negative length; why do we even allow this ? 3788 * Because this allows easy specification of 3789 * the last n bytes of the file. 3790 */ 3791 *end = *start; 3792 *start += (u_offset_t)flp->l_len; 3793 (*start)++; 3794 } 3795 return (0); 3796 } 3797 3798 /* 3799 * Check the validity of lock data. This can used by the NFS 3800 * frlock routines to check data before contacting the server. The 3801 * server must support semantics that aren't as restrictive as 3802 * the UNIX API, so the NFS client is required to check. 3803 * The maximum is now passed in by the caller. 3804 */ 3805 3806 int 3807 flk_check_lock_data(u_offset_t start, u_offset_t end, offset_t max) 3808 { 3809 /* 3810 * The end (length) for local locking should never be greater 3811 * than MAXEND. However, the representation for 3812 * the entire file is MAX_U_OFFSET_T. 3813 */ 3814 if ((start > max) || 3815 ((end > max) && (end != MAX_U_OFFSET_T))) { 3816 return (EINVAL); 3817 } 3818 if (start > end) { 3819 return (EINVAL); 3820 } 3821 return (0); 3822 } 3823 3824 /* 3825 * Fill in request->l_flock with information about the lock blocking the 3826 * request. The complexity here is that lock manager requests are allowed 3827 * to see into the upper part of the 32-bit address range, whereas local 3828 * requests are only allowed to see signed values. 3829 * 3830 * What should be done when "blocker" is a lock manager lock that uses the 3831 * upper portion of the 32-bit range, but "request" is local? Since the 3832 * request has already been determined to have been blocked by the blocker, 3833 * at least some portion of "blocker" must be in the range of the request, 3834 * or the request extends to the end of file. For the first case, the 3835 * portion in the lower range is returned with the indication that it goes 3836 * "to EOF." For the second case, the last byte of the lower range is 3837 * returned with the indication that it goes "to EOF." 3838 */ 3839 3840 static void 3841 report_blocker(lock_descriptor_t *blocker, lock_descriptor_t *request) 3842 { 3843 flock64_t *flrp; /* l_flock portion of request */ 3844 3845 ASSERT(blocker != NULL); 3846 3847 flrp = &request->l_flock; 3848 flrp->l_whence = 0; 3849 flrp->l_type = blocker->l_type; 3850 flrp->l_pid = blocker->l_flock.l_pid; 3851 flrp->l_sysid = blocker->l_flock.l_sysid; 3852 3853 if (IS_LOCKMGR(request)) { 3854 flrp->l_start = blocker->l_start; 3855 if (blocker->l_end == MAX_U_OFFSET_T) 3856 flrp->l_len = 0; 3857 else 3858 flrp->l_len = blocker->l_end - blocker->l_start + 1; 3859 } else { 3860 if (blocker->l_start > MAXEND) { 3861 flrp->l_start = MAXEND; 3862 flrp->l_len = 0; 3863 } else { 3864 flrp->l_start = blocker->l_start; 3865 if (blocker->l_end == MAX_U_OFFSET_T) 3866 flrp->l_len = 0; 3867 else 3868 flrp->l_len = blocker->l_end - 3869 blocker->l_start + 1; 3870 } 3871 } 3872 } 3873 /* ONC_PLUS EXTRACT END */ 3874 3875 /* 3876 * PSARC case 1997/292 3877 */ 3878 /* 3879 * This is the public routine exported by flock.h. 3880 */ 3881 void 3882 cl_flk_change_nlm_state_to_unknown(int nlmid) 3883 { 3884 /* 3885 * Check to see if node is booted as a cluster. If not, return. 3886 */ 3887 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { 3888 return; 3889 } 3890 3891 /* 3892 * See comment in cl_flk_set_nlm_status(). 3893 */ 3894 if (nlm_reg_status == NULL) { 3895 return; 3896 } 3897 3898 /* 3899 * protect NLM registry state with a mutex. 3900 */ 3901 ASSERT(nlmid <= nlm_status_size && nlmid >= 0); 3902 mutex_enter(&nlm_reg_lock); 3903 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, FLK_NLM_UNKNOWN); 3904 mutex_exit(&nlm_reg_lock); 3905 } 3906 3907 /* 3908 * Return non-zero if the given I/O request conflicts with an active NBMAND 3909 * lock. 3910 * If svmand is non-zero, it means look at all active locks, not just NBMAND 3911 * locks. 3912 */ 3913 3914 int 3915 nbl_lock_conflict(vnode_t *vp, nbl_op_t op, u_offset_t offset, 3916 ssize_t length, int svmand, caller_context_t *ct) 3917 { 3918 int conflict = 0; 3919 graph_t *gp; 3920 lock_descriptor_t *lock; 3921 pid_t pid; 3922 int sysid; 3923 3924 if (ct == NULL) { 3925 pid = curproc->p_pid; 3926 sysid = 0; 3927 } else { 3928 pid = ct->cc_pid; 3929 sysid = ct->cc_sysid; 3930 } 3931 3932 mutex_enter(&flock_lock); 3933 gp = lock_graph[HASH_INDEX(vp)]; 3934 mutex_exit(&flock_lock); 3935 if (gp == NULL) 3936 return (0); 3937 3938 mutex_enter(&gp->gp_mutex); 3939 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 3940 3941 for (; lock && lock->l_vnode == vp; lock = lock->l_next) { 3942 if ((svmand || (lock->l_state & NBMAND_LOCK)) && 3943 (lock->l_flock.l_sysid != sysid || 3944 lock->l_flock.l_pid != pid) && 3945 lock_blocks_io(op, offset, length, 3946 lock->l_type, lock->l_start, lock->l_end)) { 3947 conflict = 1; 3948 break; 3949 } 3950 } 3951 mutex_exit(&gp->gp_mutex); 3952 3953 return (conflict); 3954 } 3955 3956 /* 3957 * Return non-zero if the given I/O request conflicts with the given lock. 3958 */ 3959 3960 static int 3961 lock_blocks_io(nbl_op_t op, u_offset_t offset, ssize_t length, 3962 int lock_type, u_offset_t lock_start, u_offset_t lock_end) 3963 { 3964 ASSERT(op == NBL_READ || op == NBL_WRITE || op == NBL_READWRITE); 3965 ASSERT(lock_type == F_RDLCK || lock_type == F_WRLCK); 3966 3967 if (op == NBL_READ && lock_type == F_RDLCK) 3968 return (0); 3969 3970 if (offset <= lock_start && lock_start < offset + length) 3971 return (1); 3972 if (lock_start <= offset && offset <= lock_end) 3973 return (1); 3974 3975 return (0); 3976 } 3977 3978 #ifdef DEBUG 3979 static void 3980 check_active_locks(graph_t *gp) 3981 { 3982 lock_descriptor_t *lock, *lock1; 3983 edge_t *ep; 3984 3985 for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp); 3986 lock = lock->l_next) { 3987 ASSERT(IS_ACTIVE(lock)); 3988 ASSERT(NOT_BLOCKED(lock)); 3989 ASSERT(!IS_BARRIER(lock)); 3990 3991 ep = FIRST_IN(lock); 3992 3993 while (ep != HEAD(lock)) { 3994 ASSERT(IS_SLEEPING(ep->from_vertex)); 3995 ASSERT(!NOT_BLOCKED(ep->from_vertex)); 3996 ep = NEXT_IN(ep); 3997 } 3998 3999 for (lock1 = lock->l_next; lock1 != ACTIVE_HEAD(gp); 4000 lock1 = lock1->l_next) { 4001 if (lock1->l_vnode == lock->l_vnode) { 4002 if (BLOCKS(lock1, lock)) { 4003 cmn_err(CE_PANIC, 4004 "active lock %p blocks %p", 4005 (void *)lock1, (void *)lock); 4006 } else if (BLOCKS(lock, lock1)) { 4007 cmn_err(CE_PANIC, 4008 "active lock %p blocks %p", 4009 (void *)lock, (void *)lock1); 4010 } 4011 } 4012 } 4013 } 4014 } 4015 4016 /* 4017 * Effect: This functions checks to see if the transition from 'old_state' to 4018 * 'new_state' is a valid one. It returns 0 if the transition is valid 4019 * and 1 if it is not. 4020 * For a map of valid transitions, see sys/flock_impl.h 4021 */ 4022 static int 4023 check_lock_transition(int old_state, int new_state) 4024 { 4025 switch (old_state) { 4026 case FLK_INITIAL_STATE: 4027 if ((new_state == FLK_START_STATE) || 4028 (new_state == FLK_SLEEPING_STATE) || 4029 (new_state == FLK_ACTIVE_STATE) || 4030 (new_state == FLK_DEAD_STATE)) { 4031 return (0); 4032 } else { 4033 return (1); 4034 } 4035 case FLK_START_STATE: 4036 if ((new_state == FLK_ACTIVE_STATE) || 4037 (new_state == FLK_DEAD_STATE)) { 4038 return (0); 4039 } else { 4040 return (1); 4041 } 4042 case FLK_ACTIVE_STATE: 4043 if (new_state == FLK_DEAD_STATE) { 4044 return (0); 4045 } else { 4046 return (1); 4047 } 4048 case FLK_SLEEPING_STATE: 4049 if ((new_state == FLK_GRANTED_STATE) || 4050 (new_state == FLK_INTERRUPTED_STATE) || 4051 (new_state == FLK_CANCELLED_STATE)) { 4052 return (0); 4053 } else { 4054 return (1); 4055 } 4056 case FLK_GRANTED_STATE: 4057 if ((new_state == FLK_START_STATE) || 4058 (new_state == FLK_INTERRUPTED_STATE) || 4059 (new_state == FLK_CANCELLED_STATE)) { 4060 return (0); 4061 } else { 4062 return (1); 4063 } 4064 case FLK_CANCELLED_STATE: 4065 if ((new_state == FLK_INTERRUPTED_STATE) || 4066 (new_state == FLK_DEAD_STATE)) { 4067 return (0); 4068 } else { 4069 return (1); 4070 } 4071 case FLK_INTERRUPTED_STATE: 4072 if (new_state == FLK_DEAD_STATE) { 4073 return (0); 4074 } else { 4075 return (1); 4076 } 4077 case FLK_DEAD_STATE: 4078 /* May be set more than once */ 4079 if (new_state == FLK_DEAD_STATE) { 4080 return (0); 4081 } else { 4082 return (1); 4083 } 4084 default: 4085 return (1); 4086 } 4087 } 4088 4089 static void 4090 check_sleeping_locks(graph_t *gp) 4091 { 4092 lock_descriptor_t *lock1, *lock2; 4093 edge_t *ep; 4094 for (lock1 = SLEEPING_HEAD(gp)->l_next; lock1 != SLEEPING_HEAD(gp); 4095 lock1 = lock1->l_next) { 4096 ASSERT(!IS_BARRIER(lock1)); 4097 for (lock2 = lock1->l_next; lock2 != SLEEPING_HEAD(gp); 4098 lock2 = lock2->l_next) { 4099 if (lock1->l_vnode == lock2->l_vnode) { 4100 if (BLOCKS(lock2, lock1)) { 4101 ASSERT(!IS_GRANTED(lock1)); 4102 ASSERT(!NOT_BLOCKED(lock1)); 4103 path(lock1, lock2); 4104 } 4105 } 4106 } 4107 4108 for (lock2 = ACTIVE_HEAD(gp)->l_next; lock2 != ACTIVE_HEAD(gp); 4109 lock2 = lock2->l_next) { 4110 ASSERT(!IS_BARRIER(lock1)); 4111 if (lock1->l_vnode == lock2->l_vnode) { 4112 if (BLOCKS(lock2, lock1)) { 4113 ASSERT(!IS_GRANTED(lock1)); 4114 ASSERT(!NOT_BLOCKED(lock1)); 4115 path(lock1, lock2); 4116 } 4117 } 4118 } 4119 ep = FIRST_ADJ(lock1); 4120 while (ep != HEAD(lock1)) { 4121 ASSERT(BLOCKS(ep->to_vertex, lock1)); 4122 ep = NEXT_ADJ(ep); 4123 } 4124 } 4125 } 4126 4127 static int 4128 level_two_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2, int no_path) 4129 { 4130 edge_t *ep; 4131 lock_descriptor_t *vertex; 4132 lock_descriptor_t *vertex_stack; 4133 4134 STACK_INIT(vertex_stack); 4135 4136 flk_graph_uncolor(lock1->l_graph); 4137 ep = FIRST_ADJ(lock1); 4138 ASSERT(ep != HEAD(lock1)); 4139 while (ep != HEAD(lock1)) { 4140 if (no_path) 4141 ASSERT(ep->to_vertex != lock2); 4142 STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack); 4143 COLOR(ep->to_vertex); 4144 ep = NEXT_ADJ(ep); 4145 } 4146 4147 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 4148 STACK_POP(vertex_stack, l_dstack); 4149 for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex); 4150 ep = NEXT_ADJ(ep)) { 4151 if (COLORED(ep->to_vertex)) 4152 continue; 4153 COLOR(ep->to_vertex); 4154 if (ep->to_vertex == lock2) 4155 return (1); 4156 4157 STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack); 4158 } 4159 } 4160 return (0); 4161 } 4162 4163 static void 4164 check_owner_locks(graph_t *gp, pid_t pid, int sysid, vnode_t *vp) 4165 { 4166 lock_descriptor_t *lock; 4167 4168 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 4169 4170 if (lock) { 4171 while (lock != ACTIVE_HEAD(gp) && (lock->l_vnode == vp)) { 4172 if (lock->l_flock.l_pid == pid && 4173 lock->l_flock.l_sysid == sysid) 4174 cmn_err(CE_PANIC, 4175 "owner pid %d's lock %p in active queue", 4176 pid, (void *)lock); 4177 lock = lock->l_next; 4178 } 4179 } 4180 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 4181 4182 if (lock) { 4183 while (lock != SLEEPING_HEAD(gp) && (lock->l_vnode == vp)) { 4184 if (lock->l_flock.l_pid == pid && 4185 lock->l_flock.l_sysid == sysid) 4186 cmn_err(CE_PANIC, 4187 "owner pid %d's lock %p in sleep queue", 4188 pid, (void *)lock); 4189 lock = lock->l_next; 4190 } 4191 } 4192 } 4193 4194 static int 4195 level_one_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2) 4196 { 4197 edge_t *ep = FIRST_ADJ(lock1); 4198 4199 while (ep != HEAD(lock1)) { 4200 if (ep->to_vertex == lock2) 4201 return (1); 4202 else 4203 ep = NEXT_ADJ(ep); 4204 } 4205 return (0); 4206 } 4207 4208 static int 4209 no_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2) 4210 { 4211 return (!level_two_path(lock1, lock2, 1)); 4212 } 4213 4214 static void 4215 path(lock_descriptor_t *lock1, lock_descriptor_t *lock2) 4216 { 4217 if (level_one_path(lock1, lock2)) { 4218 if (level_two_path(lock1, lock2, 0) != 0) { 4219 cmn_err(CE_WARN, 4220 "one edge one path from lock1 %p lock2 %p", 4221 (void *)lock1, (void *)lock2); 4222 } 4223 } else if (no_path(lock1, lock2)) { 4224 cmn_err(CE_PANIC, 4225 "No path from lock1 %p to lock2 %p", 4226 (void *)lock1, (void *)lock2); 4227 } 4228 } 4229 #endif /* DEBUG */ 4230