17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5da6c28aaSamw * Common Development and Distribution License (the "License"). 6da6c28aaSamw * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate 227c478bd9Sstevel@tonic-gate /* 23da6c28aaSamw * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 247c478bd9Sstevel@tonic-gate * Use is subject to license terms. 257c478bd9Sstevel@tonic-gate */ 267c478bd9Sstevel@tonic-gate 277c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 287c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 297c478bd9Sstevel@tonic-gate 30bbaa8b60SDan Kruchinin /* 31bbaa8b60SDan Kruchinin * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 327a5aac98SJerry Jelinek * Copyright 2015 Joyent, Inc. 33bbaa8b60SDan Kruchinin */ 34bbaa8b60SDan Kruchinin 357c478bd9Sstevel@tonic-gate #include <sys/flock_impl.h> 367c478bd9Sstevel@tonic-gate #include <sys/vfs.h> 377c478bd9Sstevel@tonic-gate #include <sys/t_lock.h> /* for <sys/callb.h> */ 387c478bd9Sstevel@tonic-gate #include <sys/callb.h> 397c478bd9Sstevel@tonic-gate #include <sys/clconf.h> 407c478bd9Sstevel@tonic-gate #include <sys/cladm.h> 417c478bd9Sstevel@tonic-gate #include <sys/nbmlock.h> 427c478bd9Sstevel@tonic-gate #include <sys/cred.h> 437c478bd9Sstevel@tonic-gate #include <sys/policy.h> 447c478bd9Sstevel@tonic-gate 457c478bd9Sstevel@tonic-gate /* 467c478bd9Sstevel@tonic-gate * The following four variables are for statistics purposes and they are 477c478bd9Sstevel@tonic-gate * not protected by locks. They may not be accurate but will at least be 487c478bd9Sstevel@tonic-gate * close to the actual value. 497c478bd9Sstevel@tonic-gate */ 507c478bd9Sstevel@tonic-gate 517c478bd9Sstevel@tonic-gate int flk_lock_allocs; 527c478bd9Sstevel@tonic-gate int flk_lock_frees; 537c478bd9Sstevel@tonic-gate int edge_allocs; 547c478bd9Sstevel@tonic-gate int edge_frees; 557c478bd9Sstevel@tonic-gate int flk_proc_vertex_allocs; 567c478bd9Sstevel@tonic-gate int flk_proc_edge_allocs; 577c478bd9Sstevel@tonic-gate int flk_proc_vertex_frees; 587c478bd9Sstevel@tonic-gate int flk_proc_edge_frees; 597c478bd9Sstevel@tonic-gate 607c478bd9Sstevel@tonic-gate static kmutex_t flock_lock; 617c478bd9Sstevel@tonic-gate 627c478bd9Sstevel@tonic-gate #ifdef DEBUG 637c478bd9Sstevel@tonic-gate int check_debug = 0; 647c478bd9Sstevel@tonic-gate #define CHECK_ACTIVE_LOCKS(gp) if (check_debug) \ 657c478bd9Sstevel@tonic-gate check_active_locks(gp); 667c478bd9Sstevel@tonic-gate #define CHECK_SLEEPING_LOCKS(gp) if (check_debug) \ 677c478bd9Sstevel@tonic-gate check_sleeping_locks(gp); 687c478bd9Sstevel@tonic-gate #define CHECK_OWNER_LOCKS(gp, pid, sysid, vp) \ 697c478bd9Sstevel@tonic-gate if (check_debug) \ 707c478bd9Sstevel@tonic-gate check_owner_locks(gp, pid, sysid, vp); 717c478bd9Sstevel@tonic-gate #define CHECK_LOCK_TRANSITION(old_state, new_state) \ 727c478bd9Sstevel@tonic-gate { \ 737c478bd9Sstevel@tonic-gate if (check_lock_transition(old_state, new_state)) { \ 747c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "Illegal lock transition \ 757c478bd9Sstevel@tonic-gate from %d to %d", old_state, new_state); \ 767c478bd9Sstevel@tonic-gate } \ 777c478bd9Sstevel@tonic-gate } 787c478bd9Sstevel@tonic-gate #else 797c478bd9Sstevel@tonic-gate 807c478bd9Sstevel@tonic-gate #define CHECK_ACTIVE_LOCKS(gp) 817c478bd9Sstevel@tonic-gate #define CHECK_SLEEPING_LOCKS(gp) 827c478bd9Sstevel@tonic-gate #define CHECK_OWNER_LOCKS(gp, pid, sysid, vp) 837c478bd9Sstevel@tonic-gate #define CHECK_LOCK_TRANSITION(old_state, new_state) 847c478bd9Sstevel@tonic-gate 857c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 867c478bd9Sstevel@tonic-gate 877c478bd9Sstevel@tonic-gate struct kmem_cache *flk_edge_cache; 887c478bd9Sstevel@tonic-gate 897c478bd9Sstevel@tonic-gate graph_t *lock_graph[HASH_SIZE]; 907c478bd9Sstevel@tonic-gate proc_graph_t pgraph; 917c478bd9Sstevel@tonic-gate 927c478bd9Sstevel@tonic-gate /* 937c478bd9Sstevel@tonic-gate * Clustering. 947c478bd9Sstevel@tonic-gate * 957c478bd9Sstevel@tonic-gate * NLM REGISTRY TYPE IMPLEMENTATION 967c478bd9Sstevel@tonic-gate * 977c478bd9Sstevel@tonic-gate * Assumptions: 987c478bd9Sstevel@tonic-gate * 1. Nodes in a cluster are numbered starting at 1; always non-negative 997c478bd9Sstevel@tonic-gate * integers; maximum node id is returned by clconf_maximum_nodeid(). 1007c478bd9Sstevel@tonic-gate * 2. We use this node id to identify the node an NLM server runs on. 1017c478bd9Sstevel@tonic-gate */ 1027c478bd9Sstevel@tonic-gate 1037c478bd9Sstevel@tonic-gate /* 1047c478bd9Sstevel@tonic-gate * NLM registry object keeps track of NLM servers via their 1057c478bd9Sstevel@tonic-gate * nlmids (which are the node ids of the node in the cluster they run on) 1067c478bd9Sstevel@tonic-gate * that have requested locks at this LLM with which this registry is 1077c478bd9Sstevel@tonic-gate * associated. 1087c478bd9Sstevel@tonic-gate * 1097c478bd9Sstevel@tonic-gate * Representation of abstraction: 1107c478bd9Sstevel@tonic-gate * rep = record[ states: array[nlm_state], 1117c478bd9Sstevel@tonic-gate * lock: mutex] 1127c478bd9Sstevel@tonic-gate * 1137c478bd9Sstevel@tonic-gate * Representation invariants: 1147c478bd9Sstevel@tonic-gate * 1. index i of rep.states is between 0 and n - 1 where n is number 1157c478bd9Sstevel@tonic-gate * of elements in the array, which happen to be the maximum number 1167c478bd9Sstevel@tonic-gate * of nodes in the cluster configuration + 1. 1177c478bd9Sstevel@tonic-gate * 2. map nlmid to index i of rep.states 1187c478bd9Sstevel@tonic-gate * 0 -> 0 1197c478bd9Sstevel@tonic-gate * 1 -> 1 1207c478bd9Sstevel@tonic-gate * 2 -> 2 1217c478bd9Sstevel@tonic-gate * n-1 -> clconf_maximum_nodeid()+1 1227c478bd9Sstevel@tonic-gate * 3. This 1-1 mapping is quite convenient and it avoids errors resulting 1237c478bd9Sstevel@tonic-gate * from forgetting to subtract 1 from the index. 1247c478bd9Sstevel@tonic-gate * 4. The reason we keep the 0th index is the following. A legitimate 1257c478bd9Sstevel@tonic-gate * cluster configuration includes making a UFS file system NFS 1267c478bd9Sstevel@tonic-gate * exportable. The code is structured so that if you're in a cluster 1277c478bd9Sstevel@tonic-gate * you do one thing; otherwise, you do something else. The problem 1287c478bd9Sstevel@tonic-gate * is what to do if you think you're in a cluster with PXFS loaded, 1297c478bd9Sstevel@tonic-gate * but you're using UFS not PXFS? The upper two bytes of the sysid 1307c478bd9Sstevel@tonic-gate * encode the node id of the node where NLM server runs; these bytes 1317c478bd9Sstevel@tonic-gate * are zero for UFS. Since the nodeid is used to index into the 1327c478bd9Sstevel@tonic-gate * registry, we can record the NLM server state information at index 1337c478bd9Sstevel@tonic-gate * 0 using the same mechanism used for PXFS file locks! 1347c478bd9Sstevel@tonic-gate */ 1357c478bd9Sstevel@tonic-gate static flk_nlm_status_t *nlm_reg_status = NULL; /* state array 0..N-1 */ 1367c478bd9Sstevel@tonic-gate static kmutex_t nlm_reg_lock; /* lock to protect arrary */ 1377c478bd9Sstevel@tonic-gate static uint_t nlm_status_size; /* size of state array */ 1387c478bd9Sstevel@tonic-gate 1397c478bd9Sstevel@tonic-gate /* 1407c478bd9Sstevel@tonic-gate * Although we need a global lock dependency graph (and associated data 1417c478bd9Sstevel@tonic-gate * structures), we also need a per-zone notion of whether the lock manager is 1427c478bd9Sstevel@tonic-gate * running, and so whether to allow lock manager requests or not. 1437c478bd9Sstevel@tonic-gate * 1447c478bd9Sstevel@tonic-gate * Thus, on a per-zone basis we maintain a ``global'' variable 1457c478bd9Sstevel@tonic-gate * (flk_lockmgr_status), protected by flock_lock, and set when the lock 1467c478bd9Sstevel@tonic-gate * manager is determined to be changing state (starting or stopping). 1477c478bd9Sstevel@tonic-gate * 1487c478bd9Sstevel@tonic-gate * Each graph/zone pair also has a copy of this variable, which is protected by 1497c478bd9Sstevel@tonic-gate * the graph's mutex. 1507c478bd9Sstevel@tonic-gate * 1517c478bd9Sstevel@tonic-gate * The per-graph copies are used to synchronize lock requests with shutdown 1527c478bd9Sstevel@tonic-gate * requests. The global copy is used to initialize the per-graph field when a 1537c478bd9Sstevel@tonic-gate * new graph is created. 1547c478bd9Sstevel@tonic-gate */ 1557c478bd9Sstevel@tonic-gate struct flock_globals { 1567c478bd9Sstevel@tonic-gate flk_lockmgr_status_t flk_lockmgr_status; 1577c478bd9Sstevel@tonic-gate flk_lockmgr_status_t lockmgr_status[HASH_SIZE]; 1587c478bd9Sstevel@tonic-gate }; 1597c478bd9Sstevel@tonic-gate 1607c478bd9Sstevel@tonic-gate zone_key_t flock_zone_key; 1617c478bd9Sstevel@tonic-gate 1627c478bd9Sstevel@tonic-gate static void create_flock(lock_descriptor_t *, flock64_t *); 1637c478bd9Sstevel@tonic-gate static lock_descriptor_t *flk_get_lock(void); 1647c478bd9Sstevel@tonic-gate static void flk_free_lock(lock_descriptor_t *lock); 1657c478bd9Sstevel@tonic-gate static void flk_get_first_blocking_lock(lock_descriptor_t *request); 1667c478bd9Sstevel@tonic-gate static int flk_process_request(lock_descriptor_t *); 1677c478bd9Sstevel@tonic-gate static int flk_add_edge(lock_descriptor_t *, lock_descriptor_t *, int, int); 1687c478bd9Sstevel@tonic-gate static edge_t *flk_get_edge(void); 1697c478bd9Sstevel@tonic-gate static int flk_wait_execute_request(lock_descriptor_t *); 1707c478bd9Sstevel@tonic-gate static int flk_relation(lock_descriptor_t *, lock_descriptor_t *); 1717c478bd9Sstevel@tonic-gate static void flk_insert_active_lock(lock_descriptor_t *); 1727c478bd9Sstevel@tonic-gate static void flk_delete_active_lock(lock_descriptor_t *, int); 1737c478bd9Sstevel@tonic-gate static void flk_insert_sleeping_lock(lock_descriptor_t *); 1747c478bd9Sstevel@tonic-gate static void flk_graph_uncolor(graph_t *); 1757c478bd9Sstevel@tonic-gate static void flk_wakeup(lock_descriptor_t *, int); 1767c478bd9Sstevel@tonic-gate static void flk_free_edge(edge_t *); 1777c478bd9Sstevel@tonic-gate static void flk_recompute_dependencies(lock_descriptor_t *, 1787c478bd9Sstevel@tonic-gate lock_descriptor_t **, int, int); 1797c478bd9Sstevel@tonic-gate static int flk_find_barriers(lock_descriptor_t *); 1807c478bd9Sstevel@tonic-gate static void flk_update_barriers(lock_descriptor_t *); 1817c478bd9Sstevel@tonic-gate static int flk_color_reachables(lock_descriptor_t *); 1827c478bd9Sstevel@tonic-gate static int flk_canceled(lock_descriptor_t *); 1837c478bd9Sstevel@tonic-gate static void flk_delete_locks_by_sysid(lock_descriptor_t *); 1847c478bd9Sstevel@tonic-gate static void report_blocker(lock_descriptor_t *, lock_descriptor_t *); 1857c478bd9Sstevel@tonic-gate static void wait_for_lock(lock_descriptor_t *); 1867c478bd9Sstevel@tonic-gate static void unlock_lockmgr_granted(struct flock_globals *); 1877c478bd9Sstevel@tonic-gate static void wakeup_sleeping_lockmgr_locks(struct flock_globals *); 1887c478bd9Sstevel@tonic-gate 1897c478bd9Sstevel@tonic-gate /* Clustering hooks */ 1907c478bd9Sstevel@tonic-gate static void cl_flk_change_nlm_state_all_locks(int, flk_nlm_status_t); 1917c478bd9Sstevel@tonic-gate static void cl_flk_wakeup_sleeping_nlm_locks(int); 1927c478bd9Sstevel@tonic-gate static void cl_flk_unlock_nlm_granted(int); 1937c478bd9Sstevel@tonic-gate 1947c478bd9Sstevel@tonic-gate #ifdef DEBUG 1957c478bd9Sstevel@tonic-gate static int check_lock_transition(int, int); 1967c478bd9Sstevel@tonic-gate static void check_sleeping_locks(graph_t *); 1977c478bd9Sstevel@tonic-gate static void check_active_locks(graph_t *); 1987c478bd9Sstevel@tonic-gate static int no_path(lock_descriptor_t *, lock_descriptor_t *); 1997c478bd9Sstevel@tonic-gate static void path(lock_descriptor_t *, lock_descriptor_t *); 2007c478bd9Sstevel@tonic-gate static void check_owner_locks(graph_t *, pid_t, int, vnode_t *); 2017c478bd9Sstevel@tonic-gate static int level_one_path(lock_descriptor_t *, lock_descriptor_t *); 2027c478bd9Sstevel@tonic-gate static int level_two_path(lock_descriptor_t *, lock_descriptor_t *, int); 2037c478bd9Sstevel@tonic-gate #endif 2047c478bd9Sstevel@tonic-gate 205da6c28aaSamw /* proc_graph function definitions */ 2067c478bd9Sstevel@tonic-gate static int flk_check_deadlock(lock_descriptor_t *); 2077c478bd9Sstevel@tonic-gate static void flk_proc_graph_uncolor(void); 2087c478bd9Sstevel@tonic-gate static proc_vertex_t *flk_get_proc_vertex(lock_descriptor_t *); 2097c478bd9Sstevel@tonic-gate static proc_edge_t *flk_get_proc_edge(void); 2107c478bd9Sstevel@tonic-gate static void flk_proc_release(proc_vertex_t *); 2117c478bd9Sstevel@tonic-gate static void flk_free_proc_edge(proc_edge_t *); 2127c478bd9Sstevel@tonic-gate static void flk_update_proc_graph(edge_t *, int); 2137c478bd9Sstevel@tonic-gate 2147c478bd9Sstevel@tonic-gate /* Non-blocking mandatory locking */ 2157c478bd9Sstevel@tonic-gate static int lock_blocks_io(nbl_op_t, u_offset_t, ssize_t, int, u_offset_t, 2167c478bd9Sstevel@tonic-gate u_offset_t); 2177c478bd9Sstevel@tonic-gate 2187c478bd9Sstevel@tonic-gate static struct flock_globals * 2197c478bd9Sstevel@tonic-gate flk_get_globals(void) 2207c478bd9Sstevel@tonic-gate { 2217c478bd9Sstevel@tonic-gate /* 2227c478bd9Sstevel@tonic-gate * The KLM module had better be loaded if we're attempting to handle 2237c478bd9Sstevel@tonic-gate * lockmgr requests. 2247c478bd9Sstevel@tonic-gate */ 2257c478bd9Sstevel@tonic-gate ASSERT(flock_zone_key != ZONE_KEY_UNINITIALIZED); 2267c478bd9Sstevel@tonic-gate return (zone_getspecific(flock_zone_key, curproc->p_zone)); 2277c478bd9Sstevel@tonic-gate } 2287c478bd9Sstevel@tonic-gate 2297c478bd9Sstevel@tonic-gate static flk_lockmgr_status_t 2307c478bd9Sstevel@tonic-gate flk_get_lockmgr_status(void) 2317c478bd9Sstevel@tonic-gate { 2327c478bd9Sstevel@tonic-gate struct flock_globals *fg; 2337c478bd9Sstevel@tonic-gate 2347c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&flock_lock)); 2357c478bd9Sstevel@tonic-gate 2367c478bd9Sstevel@tonic-gate if (flock_zone_key == ZONE_KEY_UNINITIALIZED) { 2377c478bd9Sstevel@tonic-gate /* 2387c478bd9Sstevel@tonic-gate * KLM module not loaded; lock manager definitely not running. 2397c478bd9Sstevel@tonic-gate */ 2407c478bd9Sstevel@tonic-gate return (FLK_LOCKMGR_DOWN); 2417c478bd9Sstevel@tonic-gate } 2427c478bd9Sstevel@tonic-gate fg = flk_get_globals(); 2437c478bd9Sstevel@tonic-gate return (fg->flk_lockmgr_status); 2447c478bd9Sstevel@tonic-gate } 2457c478bd9Sstevel@tonic-gate 2467c478bd9Sstevel@tonic-gate /* 2477a5aac98SJerry Jelinek * This implements Open File Description (not descriptor) style record locking. 2487a5aac98SJerry Jelinek * These locks can also be thought of as pid-less since they are not tied to a 2497a5aac98SJerry Jelinek * specific process, thus they're preserved across fork. 2507a5aac98SJerry Jelinek * 2517a5aac98SJerry Jelinek * Called directly from fcntl. 2527a5aac98SJerry Jelinek * 2537a5aac98SJerry Jelinek * See reclock() for the implementation of the traditional POSIX style record 2547a5aac98SJerry Jelinek * locking scheme (pid-ful). This function is derived from reclock() but 2557a5aac98SJerry Jelinek * simplified and modified to work for OFD style locking. 2567a5aac98SJerry Jelinek * 2577a5aac98SJerry Jelinek * The two primary advantages of OFD style of locking are: 2587a5aac98SJerry Jelinek * 1) It is per-file description, so closing a file descriptor that refers to a 2597a5aac98SJerry Jelinek * different file description for the same file will not drop the lock (i.e. 2607a5aac98SJerry Jelinek * two open's of the same file get different descriptions but a dup or fork 2617a5aac98SJerry Jelinek * will refer to the same description). 2627a5aac98SJerry Jelinek * 2) Locks are preserved across fork(2). 2637a5aac98SJerry Jelinek * 2647a5aac98SJerry Jelinek * Because these locks are per-description a lock ptr lives at the f_filocks 2657a5aac98SJerry Jelinek * member of the file_t and the lock_descriptor includes a file_t pointer 2667a5aac98SJerry Jelinek * to enable unique lock identification and management. 2677a5aac98SJerry Jelinek * 2687a5aac98SJerry Jelinek * Since these locks are pid-less we cannot do deadlock detection with the 2697a5aac98SJerry Jelinek * current process-oriented implementation. This is consistent with OFD locking 2707a5aac98SJerry Jelinek * behavior on other operating systems such as Linux. Since we don't do 2717a5aac98SJerry Jelinek * deadlock detection we never interact with the process graph that is 2727a5aac98SJerry Jelinek * maintained for deadlock detection on the traditional POSIX-style locks. 2737a5aac98SJerry Jelinek * 2747a5aac98SJerry Jelinek * Future Work: 2757a5aac98SJerry Jelinek * 2767a5aac98SJerry Jelinek * The current implementation does not support record locks. That is, 2777a5aac98SJerry Jelinek * currently the single lock must cover the entire file. This is validated in 2787a5aac98SJerry Jelinek * fcntl. To support record locks the f_filock pointer in the file_t needs to 2797a5aac98SJerry Jelinek * be changed to a list of pointers to the locks. That list needs to be 2807a5aac98SJerry Jelinek * managed independently of the lock list on the vnode itself and it needs to 2817a5aac98SJerry Jelinek * be maintained as record locks are created, split, coalesced and deleted. 2827a5aac98SJerry Jelinek * 2837a5aac98SJerry Jelinek * The current implementation does not support remote file systems (e.g. 2847a5aac98SJerry Jelinek * NFS or CIFS). This is handled in fs_frlock(). The design of how OFD locks 2857a5aac98SJerry Jelinek * interact with the NLM is not clear since the NLM protocol/implementation 2867a5aac98SJerry Jelinek * appears to be oriented around locks associated with a process. A further 2877a5aac98SJerry Jelinek * problem is that a design is needed for what nlm_send_siglost() should do and 2887a5aac98SJerry Jelinek * where it will send SIGLOST. More recent versions of Linux apparently try to 2897a5aac98SJerry Jelinek * emulate OFD locks on NFS by converting them to traditional POSIX style locks 2907a5aac98SJerry Jelinek * that work with the NLM. It is not clear that this provides the correct 2917a5aac98SJerry Jelinek * semantics in all cases. 2927a5aac98SJerry Jelinek */ 2937a5aac98SJerry Jelinek int 2947a5aac98SJerry Jelinek ofdlock(file_t *fp, int fcmd, flock64_t *lckdat, int flag, u_offset_t offset) 2957a5aac98SJerry Jelinek { 2967a5aac98SJerry Jelinek int cmd = 0; 2977a5aac98SJerry Jelinek vnode_t *vp; 2987a5aac98SJerry Jelinek lock_descriptor_t stack_lock_request; 2997a5aac98SJerry Jelinek lock_descriptor_t *lock_request; 3007a5aac98SJerry Jelinek int error = 0; 3017a5aac98SJerry Jelinek graph_t *gp; 3027a5aac98SJerry Jelinek int serialize = 0; 3037a5aac98SJerry Jelinek 3047a5aac98SJerry Jelinek if (fcmd != F_OFD_GETLK) 3057a5aac98SJerry Jelinek cmd = SETFLCK; 3067a5aac98SJerry Jelinek 3077a5aac98SJerry Jelinek if (fcmd == F_OFD_SETLKW || fcmd == F_FLOCKW) 3087a5aac98SJerry Jelinek cmd |= SLPFLCK; 3097a5aac98SJerry Jelinek 3107a5aac98SJerry Jelinek /* see block comment */ 3117a5aac98SJerry Jelinek VERIFY(lckdat->l_whence == 0); 3127a5aac98SJerry Jelinek VERIFY(lckdat->l_start == 0); 3137a5aac98SJerry Jelinek VERIFY(lckdat->l_len == 0); 3147a5aac98SJerry Jelinek 3157a5aac98SJerry Jelinek vp = fp->f_vnode; 3167a5aac98SJerry Jelinek 3177a5aac98SJerry Jelinek /* 3187a5aac98SJerry Jelinek * For reclock fs_frlock() would normally have set these in a few 3197a5aac98SJerry Jelinek * places but for us it's cleaner to centralize it here. Note that 3207a5aac98SJerry Jelinek * IGN_PID is -1. We use 0 for our pid-less locks. 3217a5aac98SJerry Jelinek */ 3227a5aac98SJerry Jelinek lckdat->l_pid = 0; 3237a5aac98SJerry Jelinek lckdat->l_sysid = 0; 3247a5aac98SJerry Jelinek 3257a5aac98SJerry Jelinek /* 3267a5aac98SJerry Jelinek * Check access permissions 3277a5aac98SJerry Jelinek */ 3287a5aac98SJerry Jelinek if ((fcmd == F_OFD_SETLK || fcmd == F_OFD_SETLKW) && 3297a5aac98SJerry Jelinek ((lckdat->l_type == F_RDLCK && (flag & FREAD) == 0) || 3307a5aac98SJerry Jelinek (lckdat->l_type == F_WRLCK && (flag & FWRITE) == 0))) 3317a5aac98SJerry Jelinek return (EBADF); 3327a5aac98SJerry Jelinek 3337a5aac98SJerry Jelinek /* 3347a5aac98SJerry Jelinek * for query and unlock we use the stack_lock_request 3357a5aac98SJerry Jelinek */ 3367a5aac98SJerry Jelinek if (lckdat->l_type == F_UNLCK || !(cmd & SETFLCK)) { 3377a5aac98SJerry Jelinek lock_request = &stack_lock_request; 3387a5aac98SJerry Jelinek (void) bzero((caddr_t)lock_request, 3397a5aac98SJerry Jelinek sizeof (lock_descriptor_t)); 3407a5aac98SJerry Jelinek 3417a5aac98SJerry Jelinek /* 3427a5aac98SJerry Jelinek * following is added to make the assertions in 3437a5aac98SJerry Jelinek * flk_execute_request() pass 3447a5aac98SJerry Jelinek */ 3457a5aac98SJerry Jelinek lock_request->l_edge.edge_in_next = &lock_request->l_edge; 3467a5aac98SJerry Jelinek lock_request->l_edge.edge_in_prev = &lock_request->l_edge; 3477a5aac98SJerry Jelinek lock_request->l_edge.edge_adj_next = &lock_request->l_edge; 3487a5aac98SJerry Jelinek lock_request->l_edge.edge_adj_prev = &lock_request->l_edge; 3497a5aac98SJerry Jelinek lock_request->l_status = FLK_INITIAL_STATE; 3507a5aac98SJerry Jelinek } else { 3517a5aac98SJerry Jelinek lock_request = flk_get_lock(); 3527a5aac98SJerry Jelinek fp->f_filock = (struct filock *)lock_request; 3537a5aac98SJerry Jelinek } 3547a5aac98SJerry Jelinek lock_request->l_state = 0; 3557a5aac98SJerry Jelinek lock_request->l_vnode = vp; 3567a5aac98SJerry Jelinek lock_request->l_zoneid = getzoneid(); 3577a5aac98SJerry Jelinek lock_request->l_ofd = fp; 3587a5aac98SJerry Jelinek 3597a5aac98SJerry Jelinek /* 3607a5aac98SJerry Jelinek * Convert the request range into the canonical start and end 3617a5aac98SJerry Jelinek * values then check the validity of the lock range. 3627a5aac98SJerry Jelinek */ 3637a5aac98SJerry Jelinek error = flk_convert_lock_data(vp, lckdat, &lock_request->l_start, 3647a5aac98SJerry Jelinek &lock_request->l_end, offset); 3657a5aac98SJerry Jelinek if (error) 3667a5aac98SJerry Jelinek goto done; 3677a5aac98SJerry Jelinek 3687a5aac98SJerry Jelinek error = flk_check_lock_data(lock_request->l_start, lock_request->l_end, 3697a5aac98SJerry Jelinek MAXEND); 3707a5aac98SJerry Jelinek if (error) 3717a5aac98SJerry Jelinek goto done; 3727a5aac98SJerry Jelinek 3737a5aac98SJerry Jelinek ASSERT(lock_request->l_end >= lock_request->l_start); 3747a5aac98SJerry Jelinek 3757a5aac98SJerry Jelinek lock_request->l_type = lckdat->l_type; 3767a5aac98SJerry Jelinek if (cmd & SLPFLCK) 3777a5aac98SJerry Jelinek lock_request->l_state |= WILLING_TO_SLEEP_LOCK; 3787a5aac98SJerry Jelinek 3797a5aac98SJerry Jelinek if (!(cmd & SETFLCK)) { 3807a5aac98SJerry Jelinek if (lock_request->l_type == F_RDLCK || 3817a5aac98SJerry Jelinek lock_request->l_type == F_WRLCK) 3827a5aac98SJerry Jelinek lock_request->l_state |= QUERY_LOCK; 3837a5aac98SJerry Jelinek } 3847a5aac98SJerry Jelinek lock_request->l_flock = (*lckdat); 3857a5aac98SJerry Jelinek 3867a5aac98SJerry Jelinek /* 3877a5aac98SJerry Jelinek * We are ready for processing the request 3887c478bd9Sstevel@tonic-gate */ 3897c478bd9Sstevel@tonic-gate 3907a5aac98SJerry Jelinek if (fcmd != F_OFD_GETLK && lock_request->l_type != F_UNLCK && 3917a5aac98SJerry Jelinek nbl_need_check(vp)) { 3927a5aac98SJerry Jelinek nbl_start_crit(vp, RW_WRITER); 3937a5aac98SJerry Jelinek serialize = 1; 3947a5aac98SJerry Jelinek } 3957a5aac98SJerry Jelinek 3967a5aac98SJerry Jelinek /* Get the lock graph for a particular vnode */ 3977a5aac98SJerry Jelinek gp = flk_get_lock_graph(vp, FLK_INIT_GRAPH); 3987a5aac98SJerry Jelinek 3997a5aac98SJerry Jelinek mutex_enter(&gp->gp_mutex); 4007a5aac98SJerry Jelinek 4017a5aac98SJerry Jelinek lock_request->l_state |= REFERENCED_LOCK; 4027a5aac98SJerry Jelinek lock_request->l_graph = gp; 4037a5aac98SJerry Jelinek 4047a5aac98SJerry Jelinek switch (lock_request->l_type) { 4057a5aac98SJerry Jelinek case F_RDLCK: 4067a5aac98SJerry Jelinek case F_WRLCK: 4077a5aac98SJerry Jelinek if (IS_QUERY_LOCK(lock_request)) { 4087a5aac98SJerry Jelinek flk_get_first_blocking_lock(lock_request); 4097a5aac98SJerry Jelinek if (lock_request->l_ofd != NULL) 4107a5aac98SJerry Jelinek lock_request->l_flock.l_pid = -1; 4117a5aac98SJerry Jelinek (*lckdat) = lock_request->l_flock; 4127a5aac98SJerry Jelinek } else { 4137a5aac98SJerry Jelinek /* process the request now */ 4147a5aac98SJerry Jelinek error = flk_process_request(lock_request); 4157a5aac98SJerry Jelinek } 4167a5aac98SJerry Jelinek break; 4177a5aac98SJerry Jelinek 4187a5aac98SJerry Jelinek case F_UNLCK: 4197a5aac98SJerry Jelinek /* unlock request will not block so execute it immediately */ 4207a5aac98SJerry Jelinek error = flk_execute_request(lock_request); 4217a5aac98SJerry Jelinek break; 4227a5aac98SJerry Jelinek 4237a5aac98SJerry Jelinek default: 4247a5aac98SJerry Jelinek error = EINVAL; 4257a5aac98SJerry Jelinek break; 4267a5aac98SJerry Jelinek } 4277a5aac98SJerry Jelinek 4287a5aac98SJerry Jelinek if (lock_request == &stack_lock_request) { 4297a5aac98SJerry Jelinek flk_set_state(lock_request, FLK_DEAD_STATE); 4307a5aac98SJerry Jelinek } else { 4317a5aac98SJerry Jelinek lock_request->l_state &= ~REFERENCED_LOCK; 4327a5aac98SJerry Jelinek if ((error != 0) || IS_DELETED(lock_request)) { 4337a5aac98SJerry Jelinek flk_set_state(lock_request, FLK_DEAD_STATE); 4347a5aac98SJerry Jelinek flk_free_lock(lock_request); 4357a5aac98SJerry Jelinek } 4367a5aac98SJerry Jelinek } 4377a5aac98SJerry Jelinek 4387a5aac98SJerry Jelinek mutex_exit(&gp->gp_mutex); 4397a5aac98SJerry Jelinek if (serialize) 4407a5aac98SJerry Jelinek nbl_end_crit(vp); 4417a5aac98SJerry Jelinek 4427a5aac98SJerry Jelinek return (error); 4437a5aac98SJerry Jelinek 4447a5aac98SJerry Jelinek done: 4457a5aac98SJerry Jelinek flk_set_state(lock_request, FLK_DEAD_STATE); 4467a5aac98SJerry Jelinek if (lock_request != &stack_lock_request) 4477a5aac98SJerry Jelinek flk_free_lock(lock_request); 4487a5aac98SJerry Jelinek return (error); 4497a5aac98SJerry Jelinek } 4507a5aac98SJerry Jelinek 4517a5aac98SJerry Jelinek /* 4527a5aac98SJerry Jelinek * Remove any lock on the vnode belonging to the given file_t. 4537a5aac98SJerry Jelinek * Called from closef on last close, file_t is locked. 4547a5aac98SJerry Jelinek * 4557a5aac98SJerry Jelinek * This is modeled on the cleanlocks() function but only removes the single 4567a5aac98SJerry Jelinek * lock associated with fp. 4577a5aac98SJerry Jelinek */ 4587a5aac98SJerry Jelinek void 4597a5aac98SJerry Jelinek ofdcleanlock(file_t *fp) 4607a5aac98SJerry Jelinek { 4617a5aac98SJerry Jelinek lock_descriptor_t *fplock, *lock, *nlock; 4627a5aac98SJerry Jelinek vnode_t *vp; 4637a5aac98SJerry Jelinek graph_t *gp; 4647a5aac98SJerry Jelinek 4657a5aac98SJerry Jelinek ASSERT(MUTEX_HELD(&fp->f_tlock)); 4667a5aac98SJerry Jelinek 4677a5aac98SJerry Jelinek if ((fplock = (lock_descriptor_t *)fp->f_filock) == NULL) 4687a5aac98SJerry Jelinek return; 4697a5aac98SJerry Jelinek 4707a5aac98SJerry Jelinek fp->f_filock = NULL; 4717a5aac98SJerry Jelinek vp = fp->f_vnode; 4727a5aac98SJerry Jelinek 4737a5aac98SJerry Jelinek gp = flk_get_lock_graph(vp, FLK_USE_GRAPH); 4747a5aac98SJerry Jelinek 4757a5aac98SJerry Jelinek if (gp == NULL) 4767a5aac98SJerry Jelinek return; 4777a5aac98SJerry Jelinek mutex_enter(&gp->gp_mutex); 4787a5aac98SJerry Jelinek 4797a5aac98SJerry Jelinek CHECK_SLEEPING_LOCKS(gp); 4807a5aac98SJerry Jelinek CHECK_ACTIVE_LOCKS(gp); 4817a5aac98SJerry Jelinek 4827a5aac98SJerry Jelinek SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 4837a5aac98SJerry Jelinek 4847a5aac98SJerry Jelinek if (lock) { 4857a5aac98SJerry Jelinek do { 4867a5aac98SJerry Jelinek nlock = lock->l_next; 4877a5aac98SJerry Jelinek if (fplock == lock) { 4887a5aac98SJerry Jelinek CANCEL_WAKEUP(lock); 4897a5aac98SJerry Jelinek break; 4907a5aac98SJerry Jelinek } 4917a5aac98SJerry Jelinek lock = nlock; 4927a5aac98SJerry Jelinek } while (lock->l_vnode == vp); 4937a5aac98SJerry Jelinek } 4947a5aac98SJerry Jelinek 4957a5aac98SJerry Jelinek SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 4967a5aac98SJerry Jelinek 4977a5aac98SJerry Jelinek if (lock) { 4987a5aac98SJerry Jelinek do { 4997a5aac98SJerry Jelinek nlock = lock->l_next; 5007a5aac98SJerry Jelinek if (fplock == lock) { 5017a5aac98SJerry Jelinek flk_delete_active_lock(lock, 0); 5027a5aac98SJerry Jelinek flk_wakeup(lock, 1); 5037a5aac98SJerry Jelinek flk_free_lock(lock); 5047a5aac98SJerry Jelinek break; 5057a5aac98SJerry Jelinek } 5067a5aac98SJerry Jelinek lock = nlock; 5077a5aac98SJerry Jelinek } while (lock->l_vnode == vp); 5087a5aac98SJerry Jelinek } 5097a5aac98SJerry Jelinek 5107a5aac98SJerry Jelinek CHECK_SLEEPING_LOCKS(gp); 5117a5aac98SJerry Jelinek CHECK_ACTIVE_LOCKS(gp); 5127a5aac98SJerry Jelinek mutex_exit(&gp->gp_mutex); 5137a5aac98SJerry Jelinek } 5147a5aac98SJerry Jelinek 5157a5aac98SJerry Jelinek /* 5167a5aac98SJerry Jelinek * Routine called from fs_frlock in fs/fs_subr.c 5177a5aac98SJerry Jelinek * 5187a5aac98SJerry Jelinek * This implements traditional POSIX style record locking. The two primary 5197a5aac98SJerry Jelinek * drawbacks to this style of locking are: 5207a5aac98SJerry Jelinek * 1) It is per-process, so any close of a file descriptor that refers to the 5217a5aac98SJerry Jelinek * file will drop the lock (e.g. lock /etc/passwd, call a library function 5227a5aac98SJerry Jelinek * which opens /etc/passwd to read the file, when the library closes it's 5237a5aac98SJerry Jelinek * file descriptor the application loses its lock and does not know). 5247a5aac98SJerry Jelinek * 2) Locks are not preserved across fork(2). 5257a5aac98SJerry Jelinek * 5267a5aac98SJerry Jelinek * Because these locks are only assoiciated with a pid they are per-process. 5277a5aac98SJerry Jelinek * This is why any close will drop the lock and is also why once the process 5287a5aac98SJerry Jelinek * forks then the lock is no longer related to the new process. These locks can 5297a5aac98SJerry Jelinek * be considered as pid-ful. 5307a5aac98SJerry Jelinek * 5317a5aac98SJerry Jelinek * See ofdlock() for the implementation of a similar but improved locking 5327a5aac98SJerry Jelinek * scheme. 5337a5aac98SJerry Jelinek */ 5347c478bd9Sstevel@tonic-gate int 535*74a91888SMarcel Telka reclock(vnode_t *vp, flock64_t *lckdat, int cmd, int flag, u_offset_t offset, 5367c478bd9Sstevel@tonic-gate flk_callback_t *flk_cbp) 5377c478bd9Sstevel@tonic-gate { 5387c478bd9Sstevel@tonic-gate lock_descriptor_t stack_lock_request; 5397c478bd9Sstevel@tonic-gate lock_descriptor_t *lock_request; 5407c478bd9Sstevel@tonic-gate int error = 0; 5417c478bd9Sstevel@tonic-gate graph_t *gp; 5427c478bd9Sstevel@tonic-gate int nlmid; 5437c478bd9Sstevel@tonic-gate 5447c478bd9Sstevel@tonic-gate /* 5457c478bd9Sstevel@tonic-gate * Check access permissions 5467c478bd9Sstevel@tonic-gate */ 5477c478bd9Sstevel@tonic-gate if ((cmd & SETFLCK) && 5487c478bd9Sstevel@tonic-gate ((lckdat->l_type == F_RDLCK && (flag & FREAD) == 0) || 5497c478bd9Sstevel@tonic-gate (lckdat->l_type == F_WRLCK && (flag & FWRITE) == 0))) 5507c478bd9Sstevel@tonic-gate return (EBADF); 5517c478bd9Sstevel@tonic-gate 5527c478bd9Sstevel@tonic-gate /* 5537c478bd9Sstevel@tonic-gate * for query and unlock we use the stack_lock_request 5547c478bd9Sstevel@tonic-gate */ 5557c478bd9Sstevel@tonic-gate 5567c478bd9Sstevel@tonic-gate if ((lckdat->l_type == F_UNLCK) || 5577c478bd9Sstevel@tonic-gate !((cmd & INOFLCK) || (cmd & SETFLCK))) { 5587c478bd9Sstevel@tonic-gate lock_request = &stack_lock_request; 5597c478bd9Sstevel@tonic-gate (void) bzero((caddr_t)lock_request, 5607c478bd9Sstevel@tonic-gate sizeof (lock_descriptor_t)); 5617c478bd9Sstevel@tonic-gate 5627c478bd9Sstevel@tonic-gate /* 5637c478bd9Sstevel@tonic-gate * following is added to make the assertions in 5647c478bd9Sstevel@tonic-gate * flk_execute_request() to pass through 5657c478bd9Sstevel@tonic-gate */ 5667c478bd9Sstevel@tonic-gate 5677c478bd9Sstevel@tonic-gate lock_request->l_edge.edge_in_next = &lock_request->l_edge; 5687c478bd9Sstevel@tonic-gate lock_request->l_edge.edge_in_prev = &lock_request->l_edge; 5697c478bd9Sstevel@tonic-gate lock_request->l_edge.edge_adj_next = &lock_request->l_edge; 5707c478bd9Sstevel@tonic-gate lock_request->l_edge.edge_adj_prev = &lock_request->l_edge; 5717c478bd9Sstevel@tonic-gate lock_request->l_status = FLK_INITIAL_STATE; 5727c478bd9Sstevel@tonic-gate } else { 5737c478bd9Sstevel@tonic-gate lock_request = flk_get_lock(); 5747c478bd9Sstevel@tonic-gate } 5757c478bd9Sstevel@tonic-gate lock_request->l_state = 0; 5767c478bd9Sstevel@tonic-gate lock_request->l_vnode = vp; 5777c478bd9Sstevel@tonic-gate lock_request->l_zoneid = getzoneid(); 5787c478bd9Sstevel@tonic-gate 5797c478bd9Sstevel@tonic-gate /* 5807c478bd9Sstevel@tonic-gate * Convert the request range into the canonical start and end 5817c478bd9Sstevel@tonic-gate * values. The NLM protocol supports locking over the entire 5827c478bd9Sstevel@tonic-gate * 32-bit range, so there's no range checking for remote requests, 5837c478bd9Sstevel@tonic-gate * but we still need to verify that local requests obey the rules. 5847c478bd9Sstevel@tonic-gate */ 5857c478bd9Sstevel@tonic-gate /* Clustering */ 5867c478bd9Sstevel@tonic-gate if ((cmd & (RCMDLCK | PCMDLCK)) != 0) { 5877c478bd9Sstevel@tonic-gate ASSERT(lckdat->l_whence == 0); 5887c478bd9Sstevel@tonic-gate lock_request->l_start = lckdat->l_start; 5897c478bd9Sstevel@tonic-gate lock_request->l_end = (lckdat->l_len == 0) ? MAX_U_OFFSET_T : 5907c478bd9Sstevel@tonic-gate lckdat->l_start + (lckdat->l_len - 1); 5917c478bd9Sstevel@tonic-gate } else { 5927c478bd9Sstevel@tonic-gate /* check the validity of the lock range */ 5937c478bd9Sstevel@tonic-gate error = flk_convert_lock_data(vp, lckdat, 5947c478bd9Sstevel@tonic-gate &lock_request->l_start, &lock_request->l_end, 5957c478bd9Sstevel@tonic-gate offset); 5967c478bd9Sstevel@tonic-gate if (error) { 5977c478bd9Sstevel@tonic-gate goto done; 5987c478bd9Sstevel@tonic-gate } 5997c478bd9Sstevel@tonic-gate error = flk_check_lock_data(lock_request->l_start, 6007c478bd9Sstevel@tonic-gate lock_request->l_end, MAXEND); 6017c478bd9Sstevel@tonic-gate if (error) { 6027c478bd9Sstevel@tonic-gate goto done; 6037c478bd9Sstevel@tonic-gate } 6047c478bd9Sstevel@tonic-gate } 6057c478bd9Sstevel@tonic-gate 6067c478bd9Sstevel@tonic-gate ASSERT(lock_request->l_end >= lock_request->l_start); 6077c478bd9Sstevel@tonic-gate 6087c478bd9Sstevel@tonic-gate lock_request->l_type = lckdat->l_type; 6097c478bd9Sstevel@tonic-gate if (cmd & INOFLCK) 6107c478bd9Sstevel@tonic-gate lock_request->l_state |= IO_LOCK; 6117c478bd9Sstevel@tonic-gate if (cmd & SLPFLCK) 6127c478bd9Sstevel@tonic-gate lock_request->l_state |= WILLING_TO_SLEEP_LOCK; 6137c478bd9Sstevel@tonic-gate if (cmd & RCMDLCK) 6147c478bd9Sstevel@tonic-gate lock_request->l_state |= LOCKMGR_LOCK; 6157c478bd9Sstevel@tonic-gate if (cmd & NBMLCK) 6167c478bd9Sstevel@tonic-gate lock_request->l_state |= NBMAND_LOCK; 6177c478bd9Sstevel@tonic-gate /* 6187c478bd9Sstevel@tonic-gate * Clustering: set flag for PXFS locks 6197c478bd9Sstevel@tonic-gate * We do not _only_ check for the PCMDLCK flag because PXFS locks could 6207c478bd9Sstevel@tonic-gate * also be of type 'RCMDLCK'. 6217c478bd9Sstevel@tonic-gate * We do not _only_ check the GETPXFSID() macro because local PXFS 6227c478bd9Sstevel@tonic-gate * clients use a pxfsid of zero to permit deadlock detection in the LLM. 6237c478bd9Sstevel@tonic-gate */ 6247c478bd9Sstevel@tonic-gate 6257c478bd9Sstevel@tonic-gate if ((cmd & PCMDLCK) || (GETPXFSID(lckdat->l_sysid) != 0)) { 6267c478bd9Sstevel@tonic-gate lock_request->l_state |= PXFS_LOCK; 6277c478bd9Sstevel@tonic-gate } 6287c478bd9Sstevel@tonic-gate if (!((cmd & SETFLCK) || (cmd & INOFLCK))) { 6297c478bd9Sstevel@tonic-gate if (lock_request->l_type == F_RDLCK || 6307c478bd9Sstevel@tonic-gate lock_request->l_type == F_WRLCK) 6317c478bd9Sstevel@tonic-gate lock_request->l_state |= QUERY_LOCK; 6327c478bd9Sstevel@tonic-gate } 6337c478bd9Sstevel@tonic-gate lock_request->l_flock = (*lckdat); 6347c478bd9Sstevel@tonic-gate lock_request->l_callbacks = flk_cbp; 6357c478bd9Sstevel@tonic-gate 6367c478bd9Sstevel@tonic-gate /* 6377c478bd9Sstevel@tonic-gate * We are ready for processing the request 6387c478bd9Sstevel@tonic-gate */ 6397c478bd9Sstevel@tonic-gate if (IS_LOCKMGR(lock_request)) { 6407c478bd9Sstevel@tonic-gate /* 6417c478bd9Sstevel@tonic-gate * If the lock request is an NLM server request .... 6427c478bd9Sstevel@tonic-gate */ 6437c478bd9Sstevel@tonic-gate if (nlm_status_size == 0) { /* not booted as cluster */ 6447c478bd9Sstevel@tonic-gate mutex_enter(&flock_lock); 6457c478bd9Sstevel@tonic-gate /* 6467c478bd9Sstevel@tonic-gate * Bail out if this is a lock manager request and the 6477c478bd9Sstevel@tonic-gate * lock manager is not supposed to be running. 6487c478bd9Sstevel@tonic-gate */ 6497c478bd9Sstevel@tonic-gate if (flk_get_lockmgr_status() != FLK_LOCKMGR_UP) { 6507c478bd9Sstevel@tonic-gate mutex_exit(&flock_lock); 6517c478bd9Sstevel@tonic-gate error = ENOLCK; 6527c478bd9Sstevel@tonic-gate goto done; 6537c478bd9Sstevel@tonic-gate } 6547c478bd9Sstevel@tonic-gate mutex_exit(&flock_lock); 6557c478bd9Sstevel@tonic-gate } else { /* booted as a cluster */ 6567c478bd9Sstevel@tonic-gate nlmid = GETNLMID(lock_request->l_flock.l_sysid); 6577c478bd9Sstevel@tonic-gate ASSERT(nlmid <= nlm_status_size && nlmid >= 0); 6587c478bd9Sstevel@tonic-gate 6597c478bd9Sstevel@tonic-gate mutex_enter(&nlm_reg_lock); 6607c478bd9Sstevel@tonic-gate /* 6617c478bd9Sstevel@tonic-gate * If the NLM registry does not know about this 6627c478bd9Sstevel@tonic-gate * NLM server making the request, add its nlmid 6637c478bd9Sstevel@tonic-gate * to the registry. 6647c478bd9Sstevel@tonic-gate */ 6657c478bd9Sstevel@tonic-gate if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status, 6667c478bd9Sstevel@tonic-gate nlmid)) { 6677c478bd9Sstevel@tonic-gate FLK_REGISTRY_ADD_NLMID(nlm_reg_status, nlmid); 6687c478bd9Sstevel@tonic-gate } else if (!FLK_REGISTRY_IS_NLM_UP(nlm_reg_status, 6697c478bd9Sstevel@tonic-gate nlmid)) { 6707c478bd9Sstevel@tonic-gate /* 6717c478bd9Sstevel@tonic-gate * If the NLM server is already known (has made 6727c478bd9Sstevel@tonic-gate * previous lock requests) and its state is 6737c478bd9Sstevel@tonic-gate * not NLM_UP (means that NLM server is 6747c478bd9Sstevel@tonic-gate * shutting down), then bail out with an 6757c478bd9Sstevel@tonic-gate * error to deny the lock request. 6767c478bd9Sstevel@tonic-gate */ 6777c478bd9Sstevel@tonic-gate mutex_exit(&nlm_reg_lock); 6787c478bd9Sstevel@tonic-gate error = ENOLCK; 6797c478bd9Sstevel@tonic-gate goto done; 6807c478bd9Sstevel@tonic-gate } 6817c478bd9Sstevel@tonic-gate mutex_exit(&nlm_reg_lock); 6827c478bd9Sstevel@tonic-gate } 6837c478bd9Sstevel@tonic-gate } 6847c478bd9Sstevel@tonic-gate 6857c478bd9Sstevel@tonic-gate /* Now get the lock graph for a particular vnode */ 6867c478bd9Sstevel@tonic-gate gp = flk_get_lock_graph(vp, FLK_INIT_GRAPH); 6877c478bd9Sstevel@tonic-gate 6887c478bd9Sstevel@tonic-gate /* 6897c478bd9Sstevel@tonic-gate * We drop rwlock here otherwise this might end up causing a 6907c478bd9Sstevel@tonic-gate * deadlock if this IOLOCK sleeps. (bugid # 1183392). 6917c478bd9Sstevel@tonic-gate */ 6927c478bd9Sstevel@tonic-gate 6937c478bd9Sstevel@tonic-gate if (IS_IO_LOCK(lock_request)) { 6947c478bd9Sstevel@tonic-gate VOP_RWUNLOCK(vp, 6957c478bd9Sstevel@tonic-gate (lock_request->l_type == F_RDLCK) ? 6967c478bd9Sstevel@tonic-gate V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL); 6977c478bd9Sstevel@tonic-gate } 6987c478bd9Sstevel@tonic-gate mutex_enter(&gp->gp_mutex); 6997c478bd9Sstevel@tonic-gate 7007c478bd9Sstevel@tonic-gate lock_request->l_state |= REFERENCED_LOCK; 7017c478bd9Sstevel@tonic-gate lock_request->l_graph = gp; 7027c478bd9Sstevel@tonic-gate 7037c478bd9Sstevel@tonic-gate switch (lock_request->l_type) { 7047c478bd9Sstevel@tonic-gate case F_RDLCK: 7057c478bd9Sstevel@tonic-gate case F_WRLCK: 7067c478bd9Sstevel@tonic-gate if (IS_QUERY_LOCK(lock_request)) { 7077c478bd9Sstevel@tonic-gate flk_get_first_blocking_lock(lock_request); 7087a5aac98SJerry Jelinek if (lock_request->l_ofd != NULL) 7097a5aac98SJerry Jelinek lock_request->l_flock.l_pid = -1; 7107c478bd9Sstevel@tonic-gate (*lckdat) = lock_request->l_flock; 7117c478bd9Sstevel@tonic-gate break; 7127c478bd9Sstevel@tonic-gate } 7137c478bd9Sstevel@tonic-gate 7147c478bd9Sstevel@tonic-gate /* process the request now */ 7157c478bd9Sstevel@tonic-gate 7167c478bd9Sstevel@tonic-gate error = flk_process_request(lock_request); 7177c478bd9Sstevel@tonic-gate break; 7187c478bd9Sstevel@tonic-gate 7197c478bd9Sstevel@tonic-gate case F_UNLCK: 7207c478bd9Sstevel@tonic-gate /* unlock request will not block so execute it immediately */ 7217c478bd9Sstevel@tonic-gate 7227c478bd9Sstevel@tonic-gate if (IS_LOCKMGR(lock_request) && 7237c478bd9Sstevel@tonic-gate flk_canceled(lock_request)) { 7247c478bd9Sstevel@tonic-gate error = 0; 7257c478bd9Sstevel@tonic-gate } else { 7267c478bd9Sstevel@tonic-gate error = flk_execute_request(lock_request); 7277c478bd9Sstevel@tonic-gate } 7287c478bd9Sstevel@tonic-gate break; 7297c478bd9Sstevel@tonic-gate 7307c478bd9Sstevel@tonic-gate case F_UNLKSYS: 7317c478bd9Sstevel@tonic-gate /* 7327c478bd9Sstevel@tonic-gate * Recovery mechanism to release lock manager locks when 7337c478bd9Sstevel@tonic-gate * NFS client crashes and restart. NFS server will clear 7347c478bd9Sstevel@tonic-gate * old locks and grant new locks. 7357c478bd9Sstevel@tonic-gate */ 7367c478bd9Sstevel@tonic-gate 7377c478bd9Sstevel@tonic-gate if (lock_request->l_flock.l_sysid == 0) { 7387c478bd9Sstevel@tonic-gate mutex_exit(&gp->gp_mutex); 7397c478bd9Sstevel@tonic-gate return (EINVAL); 7407c478bd9Sstevel@tonic-gate } 7417c478bd9Sstevel@tonic-gate if (secpolicy_nfs(CRED()) != 0) { 7427c478bd9Sstevel@tonic-gate mutex_exit(&gp->gp_mutex); 7437c478bd9Sstevel@tonic-gate return (EPERM); 7447c478bd9Sstevel@tonic-gate } 7457c478bd9Sstevel@tonic-gate flk_delete_locks_by_sysid(lock_request); 7467c478bd9Sstevel@tonic-gate lock_request->l_state &= ~REFERENCED_LOCK; 7477c478bd9Sstevel@tonic-gate flk_set_state(lock_request, FLK_DEAD_STATE); 7487c478bd9Sstevel@tonic-gate flk_free_lock(lock_request); 7497c478bd9Sstevel@tonic-gate mutex_exit(&gp->gp_mutex); 7507c478bd9Sstevel@tonic-gate return (0); 7517c478bd9Sstevel@tonic-gate 7527c478bd9Sstevel@tonic-gate default: 7537c478bd9Sstevel@tonic-gate error = EINVAL; 7547c478bd9Sstevel@tonic-gate break; 7557c478bd9Sstevel@tonic-gate } 7567c478bd9Sstevel@tonic-gate 7577c478bd9Sstevel@tonic-gate /* Clustering: For blocked PXFS locks, return */ 7587c478bd9Sstevel@tonic-gate if (error == PXFS_LOCK_BLOCKED) { 7597c478bd9Sstevel@tonic-gate lock_request->l_state &= ~REFERENCED_LOCK; 7607c478bd9Sstevel@tonic-gate mutex_exit(&gp->gp_mutex); 7617c478bd9Sstevel@tonic-gate return (error); 7627c478bd9Sstevel@tonic-gate } 7637c478bd9Sstevel@tonic-gate 7647c478bd9Sstevel@tonic-gate /* 7657c478bd9Sstevel@tonic-gate * Now that we have seen the status of locks in the system for 7667c478bd9Sstevel@tonic-gate * this vnode we acquire the rwlock if it is an IO_LOCK. 7677c478bd9Sstevel@tonic-gate */ 7687c478bd9Sstevel@tonic-gate 7697c478bd9Sstevel@tonic-gate if (IS_IO_LOCK(lock_request)) { 7707c478bd9Sstevel@tonic-gate (void) VOP_RWLOCK(vp, 7717c478bd9Sstevel@tonic-gate (lock_request->l_type == F_RDLCK) ? 7727c478bd9Sstevel@tonic-gate V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL); 7737c478bd9Sstevel@tonic-gate if (!error) { 7747c478bd9Sstevel@tonic-gate lckdat->l_type = F_UNLCK; 7757c478bd9Sstevel@tonic-gate 7767c478bd9Sstevel@tonic-gate /* 7777c478bd9Sstevel@tonic-gate * This wake up is needed otherwise 7787c478bd9Sstevel@tonic-gate * if IO_LOCK has slept the dependents on this 7797c478bd9Sstevel@tonic-gate * will not be woken up at all. (bugid # 1185482). 7807c478bd9Sstevel@tonic-gate */ 7817c478bd9Sstevel@tonic-gate 7827c478bd9Sstevel@tonic-gate flk_wakeup(lock_request, 1); 7837c478bd9Sstevel@tonic-gate flk_set_state(lock_request, FLK_DEAD_STATE); 7847c478bd9Sstevel@tonic-gate flk_free_lock(lock_request); 7857c478bd9Sstevel@tonic-gate } 7867c478bd9Sstevel@tonic-gate /* 7877c478bd9Sstevel@tonic-gate * else if error had occurred either flk_process_request() 7887c478bd9Sstevel@tonic-gate * has returned EDEADLK in which case there will be no 7897c478bd9Sstevel@tonic-gate * dependents for this lock or EINTR from flk_wait_execute_ 7907c478bd9Sstevel@tonic-gate * request() in which case flk_cancel_sleeping_lock() 7917c478bd9Sstevel@tonic-gate * would have been done. same is true with EBADF. 7927c478bd9Sstevel@tonic-gate */ 7937c478bd9Sstevel@tonic-gate } 7947c478bd9Sstevel@tonic-gate 7957c478bd9Sstevel@tonic-gate if (lock_request == &stack_lock_request) { 7967c478bd9Sstevel@tonic-gate flk_set_state(lock_request, FLK_DEAD_STATE); 7977c478bd9Sstevel@tonic-gate } else { 7987c478bd9Sstevel@tonic-gate lock_request->l_state &= ~REFERENCED_LOCK; 7997c478bd9Sstevel@tonic-gate if ((error != 0) || IS_DELETED(lock_request)) { 8007c478bd9Sstevel@tonic-gate flk_set_state(lock_request, FLK_DEAD_STATE); 8017c478bd9Sstevel@tonic-gate flk_free_lock(lock_request); 8027c478bd9Sstevel@tonic-gate } 8037c478bd9Sstevel@tonic-gate } 8047c478bd9Sstevel@tonic-gate 8057c478bd9Sstevel@tonic-gate mutex_exit(&gp->gp_mutex); 8067c478bd9Sstevel@tonic-gate return (error); 8077c478bd9Sstevel@tonic-gate 8087c478bd9Sstevel@tonic-gate done: 8097c478bd9Sstevel@tonic-gate flk_set_state(lock_request, FLK_DEAD_STATE); 8107c478bd9Sstevel@tonic-gate if (lock_request != &stack_lock_request) 8117c478bd9Sstevel@tonic-gate flk_free_lock(lock_request); 8127c478bd9Sstevel@tonic-gate return (error); 8137c478bd9Sstevel@tonic-gate } 8147c478bd9Sstevel@tonic-gate 8157c478bd9Sstevel@tonic-gate /* 8167c478bd9Sstevel@tonic-gate * Invoke the callbacks in the given list. If before sleeping, invoke in 8177c478bd9Sstevel@tonic-gate * list order. If after sleeping, invoke in reverse order. 8187c478bd9Sstevel@tonic-gate * 8197c478bd9Sstevel@tonic-gate * CPR (suspend/resume) support: if one of the callbacks returns a 8207c478bd9Sstevel@tonic-gate * callb_cpr_t, return it. This will be used to make the thread CPR-safe 8217c478bd9Sstevel@tonic-gate * while it is sleeping. There should be at most one callb_cpr_t for the 8227c478bd9Sstevel@tonic-gate * thread. 8237c478bd9Sstevel@tonic-gate * XXX This is unnecessarily complicated. The CPR information should just 8247c478bd9Sstevel@tonic-gate * get passed in directly through VOP_FRLOCK and reclock, rather than 8257c478bd9Sstevel@tonic-gate * sneaking it in via a callback. 8267c478bd9Sstevel@tonic-gate */ 8277c478bd9Sstevel@tonic-gate 8287c478bd9Sstevel@tonic-gate callb_cpr_t * 8297c478bd9Sstevel@tonic-gate flk_invoke_callbacks(flk_callback_t *cblist, flk_cb_when_t when) 8307c478bd9Sstevel@tonic-gate { 8317c478bd9Sstevel@tonic-gate callb_cpr_t *cpr_callbackp = NULL; 8327c478bd9Sstevel@tonic-gate callb_cpr_t *one_result; 8337c478bd9Sstevel@tonic-gate flk_callback_t *cb; 8347c478bd9Sstevel@tonic-gate 8357c478bd9Sstevel@tonic-gate if (cblist == NULL) 8367c478bd9Sstevel@tonic-gate return (NULL); 8377c478bd9Sstevel@tonic-gate 8387c478bd9Sstevel@tonic-gate if (when == FLK_BEFORE_SLEEP) { 8397c478bd9Sstevel@tonic-gate cb = cblist; 8407c478bd9Sstevel@tonic-gate do { 8417c478bd9Sstevel@tonic-gate one_result = (*cb->cb_callback)(when, cb->cb_data); 8427c478bd9Sstevel@tonic-gate if (one_result != NULL) { 8437c478bd9Sstevel@tonic-gate ASSERT(cpr_callbackp == NULL); 8447c478bd9Sstevel@tonic-gate cpr_callbackp = one_result; 8457c478bd9Sstevel@tonic-gate } 8467c478bd9Sstevel@tonic-gate cb = cb->cb_next; 8477c478bd9Sstevel@tonic-gate } while (cb != cblist); 8487c478bd9Sstevel@tonic-gate } else { 8497c478bd9Sstevel@tonic-gate cb = cblist->cb_prev; 8507c478bd9Sstevel@tonic-gate do { 8517c478bd9Sstevel@tonic-gate one_result = (*cb->cb_callback)(when, cb->cb_data); 8527c478bd9Sstevel@tonic-gate if (one_result != NULL) { 8537c478bd9Sstevel@tonic-gate cpr_callbackp = one_result; 8547c478bd9Sstevel@tonic-gate } 8557c478bd9Sstevel@tonic-gate cb = cb->cb_prev; 8567c478bd9Sstevel@tonic-gate } while (cb != cblist->cb_prev); 8577c478bd9Sstevel@tonic-gate } 8587c478bd9Sstevel@tonic-gate 8597c478bd9Sstevel@tonic-gate return (cpr_callbackp); 8607c478bd9Sstevel@tonic-gate } 8617c478bd9Sstevel@tonic-gate 8627c478bd9Sstevel@tonic-gate /* 8637c478bd9Sstevel@tonic-gate * Initialize a flk_callback_t to hold the given callback. 8647c478bd9Sstevel@tonic-gate */ 8657c478bd9Sstevel@tonic-gate 8667c478bd9Sstevel@tonic-gate void 8677c478bd9Sstevel@tonic-gate flk_init_callback(flk_callback_t *flk_cb, 8687c478bd9Sstevel@tonic-gate callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), void *cbdata) 8697c478bd9Sstevel@tonic-gate { 8707c478bd9Sstevel@tonic-gate flk_cb->cb_next = flk_cb; 8717c478bd9Sstevel@tonic-gate flk_cb->cb_prev = flk_cb; 8727c478bd9Sstevel@tonic-gate flk_cb->cb_callback = cb_fcn; 8737c478bd9Sstevel@tonic-gate flk_cb->cb_data = cbdata; 8747c478bd9Sstevel@tonic-gate } 8757c478bd9Sstevel@tonic-gate 8767c478bd9Sstevel@tonic-gate /* 8777c478bd9Sstevel@tonic-gate * Initialize an flk_callback_t and then link it into the head of an 8787c478bd9Sstevel@tonic-gate * existing list (which may be NULL). 8797c478bd9Sstevel@tonic-gate */ 8807c478bd9Sstevel@tonic-gate 8817c478bd9Sstevel@tonic-gate void 8827c478bd9Sstevel@tonic-gate flk_add_callback(flk_callback_t *newcb, 8837c478bd9Sstevel@tonic-gate callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), 8847c478bd9Sstevel@tonic-gate void *cbdata, flk_callback_t *cblist) 8857c478bd9Sstevel@tonic-gate { 8867c478bd9Sstevel@tonic-gate flk_init_callback(newcb, cb_fcn, cbdata); 8877c478bd9Sstevel@tonic-gate 8887c478bd9Sstevel@tonic-gate if (cblist == NULL) 8897c478bd9Sstevel@tonic-gate return; 8907c478bd9Sstevel@tonic-gate 8917c478bd9Sstevel@tonic-gate newcb->cb_prev = cblist->cb_prev; 8927c478bd9Sstevel@tonic-gate newcb->cb_next = cblist; 8937c478bd9Sstevel@tonic-gate cblist->cb_prev->cb_next = newcb; 8947c478bd9Sstevel@tonic-gate cblist->cb_prev = newcb; 8957c478bd9Sstevel@tonic-gate } 8967c478bd9Sstevel@tonic-gate 8977c478bd9Sstevel@tonic-gate /* 898*74a91888SMarcel Telka * Remove the callback from a list. 899*74a91888SMarcel Telka */ 900*74a91888SMarcel Telka 901*74a91888SMarcel Telka void 902*74a91888SMarcel Telka flk_del_callback(flk_callback_t *flk_cb) 903*74a91888SMarcel Telka { 904*74a91888SMarcel Telka flk_cb->cb_next->cb_prev = flk_cb->cb_prev; 905*74a91888SMarcel Telka flk_cb->cb_prev->cb_next = flk_cb->cb_next; 906*74a91888SMarcel Telka 907*74a91888SMarcel Telka flk_cb->cb_prev = flk_cb; 908*74a91888SMarcel Telka flk_cb->cb_next = flk_cb; 909*74a91888SMarcel Telka } 910*74a91888SMarcel Telka 911*74a91888SMarcel Telka /* 9127c478bd9Sstevel@tonic-gate * Initialize the flk_edge_cache data structure and create the 9137c478bd9Sstevel@tonic-gate * nlm_reg_status array. 9147c478bd9Sstevel@tonic-gate */ 9157c478bd9Sstevel@tonic-gate 9167c478bd9Sstevel@tonic-gate void 9177c478bd9Sstevel@tonic-gate flk_init(void) 9187c478bd9Sstevel@tonic-gate { 9197c478bd9Sstevel@tonic-gate uint_t i; 9207c478bd9Sstevel@tonic-gate 9217c478bd9Sstevel@tonic-gate flk_edge_cache = kmem_cache_create("flk_edges", 9227c478bd9Sstevel@tonic-gate sizeof (struct edge), 0, NULL, NULL, NULL, NULL, NULL, 0); 9237c478bd9Sstevel@tonic-gate if (flk_edge_cache == NULL) { 9247c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "Couldn't create flk_edge_cache\n"); 9257c478bd9Sstevel@tonic-gate } 9267c478bd9Sstevel@tonic-gate /* 9277c478bd9Sstevel@tonic-gate * Create the NLM registry object. 9287c478bd9Sstevel@tonic-gate */ 9297c478bd9Sstevel@tonic-gate 9307c478bd9Sstevel@tonic-gate if (cluster_bootflags & CLUSTER_BOOTED) { 9317c478bd9Sstevel@tonic-gate /* 9327c478bd9Sstevel@tonic-gate * This routine tells you the maximum node id that will be used 9337c478bd9Sstevel@tonic-gate * in the cluster. This number will be the size of the nlm 9347c478bd9Sstevel@tonic-gate * registry status array. We add 1 because we will be using 9357c478bd9Sstevel@tonic-gate * all entries indexed from 0 to maxnodeid; e.g., from 0 9367c478bd9Sstevel@tonic-gate * to 64, for a total of 65 entries. 9377c478bd9Sstevel@tonic-gate */ 9387c478bd9Sstevel@tonic-gate nlm_status_size = clconf_maximum_nodeid() + 1; 9397c478bd9Sstevel@tonic-gate } else { 9407c478bd9Sstevel@tonic-gate nlm_status_size = 0; 9417c478bd9Sstevel@tonic-gate } 9427c478bd9Sstevel@tonic-gate 9437c478bd9Sstevel@tonic-gate if (nlm_status_size != 0) { /* booted as a cluster */ 9447c478bd9Sstevel@tonic-gate nlm_reg_status = (flk_nlm_status_t *) 9457c478bd9Sstevel@tonic-gate kmem_alloc(sizeof (flk_nlm_status_t) * nlm_status_size, 9467c478bd9Sstevel@tonic-gate KM_SLEEP); 9477c478bd9Sstevel@tonic-gate 9487c478bd9Sstevel@tonic-gate /* initialize all NLM states in array to NLM_UNKNOWN */ 9497c478bd9Sstevel@tonic-gate for (i = 0; i < nlm_status_size; i++) { 9507c478bd9Sstevel@tonic-gate nlm_reg_status[i] = FLK_NLM_UNKNOWN; 9517c478bd9Sstevel@tonic-gate } 9527c478bd9Sstevel@tonic-gate } 9537c478bd9Sstevel@tonic-gate } 9547c478bd9Sstevel@tonic-gate 9557c478bd9Sstevel@tonic-gate /* 9567c478bd9Sstevel@tonic-gate * Zone constructor/destructor callbacks to be executed when a zone is 9577c478bd9Sstevel@tonic-gate * created/destroyed. 9587c478bd9Sstevel@tonic-gate */ 9597c478bd9Sstevel@tonic-gate /* ARGSUSED */ 9607c478bd9Sstevel@tonic-gate void * 9617c478bd9Sstevel@tonic-gate flk_zone_init(zoneid_t zoneid) 9627c478bd9Sstevel@tonic-gate { 9637c478bd9Sstevel@tonic-gate struct flock_globals *fg; 9647c478bd9Sstevel@tonic-gate uint_t i; 9657c478bd9Sstevel@tonic-gate 9667c478bd9Sstevel@tonic-gate fg = kmem_alloc(sizeof (*fg), KM_SLEEP); 9677c478bd9Sstevel@tonic-gate fg->flk_lockmgr_status = FLK_LOCKMGR_UP; 9687c478bd9Sstevel@tonic-gate for (i = 0; i < HASH_SIZE; i++) 9697c478bd9Sstevel@tonic-gate fg->lockmgr_status[i] = FLK_LOCKMGR_UP; 9707c478bd9Sstevel@tonic-gate return (fg); 9717c478bd9Sstevel@tonic-gate } 9727c478bd9Sstevel@tonic-gate 9737c478bd9Sstevel@tonic-gate /* ARGSUSED */ 9747c478bd9Sstevel@tonic-gate void 9757c478bd9Sstevel@tonic-gate flk_zone_fini(zoneid_t zoneid, void *data) 9767c478bd9Sstevel@tonic-gate { 9777c478bd9Sstevel@tonic-gate struct flock_globals *fg = data; 9787c478bd9Sstevel@tonic-gate 9797c478bd9Sstevel@tonic-gate kmem_free(fg, sizeof (*fg)); 9807c478bd9Sstevel@tonic-gate } 9817c478bd9Sstevel@tonic-gate 9827c478bd9Sstevel@tonic-gate /* 983da6c28aaSamw * Get a lock_descriptor structure with initialization of edge lists. 9847c478bd9Sstevel@tonic-gate */ 9857c478bd9Sstevel@tonic-gate 9867c478bd9Sstevel@tonic-gate static lock_descriptor_t * 9877c478bd9Sstevel@tonic-gate flk_get_lock(void) 9887c478bd9Sstevel@tonic-gate { 9897c478bd9Sstevel@tonic-gate lock_descriptor_t *l; 9907c478bd9Sstevel@tonic-gate 9917c478bd9Sstevel@tonic-gate l = kmem_zalloc(sizeof (lock_descriptor_t), KM_SLEEP); 9927c478bd9Sstevel@tonic-gate 9937c478bd9Sstevel@tonic-gate cv_init(&l->l_cv, NULL, CV_DRIVER, NULL); 9947c478bd9Sstevel@tonic-gate l->l_edge.edge_in_next = &l->l_edge; 9957c478bd9Sstevel@tonic-gate l->l_edge.edge_in_prev = &l->l_edge; 9967c478bd9Sstevel@tonic-gate l->l_edge.edge_adj_next = &l->l_edge; 9977c478bd9Sstevel@tonic-gate l->l_edge.edge_adj_prev = &l->l_edge; 9987c478bd9Sstevel@tonic-gate l->pvertex = -1; 9997c478bd9Sstevel@tonic-gate l->l_status = FLK_INITIAL_STATE; 10007c478bd9Sstevel@tonic-gate flk_lock_allocs++; 10017c478bd9Sstevel@tonic-gate return (l); 10027c478bd9Sstevel@tonic-gate } 10037c478bd9Sstevel@tonic-gate 10047c478bd9Sstevel@tonic-gate /* 10057c478bd9Sstevel@tonic-gate * Free a lock_descriptor structure. Just sets the DELETED_LOCK flag 10067c478bd9Sstevel@tonic-gate * when some thread has a reference to it as in reclock(). 10077c478bd9Sstevel@tonic-gate */ 10087c478bd9Sstevel@tonic-gate 10097c478bd9Sstevel@tonic-gate void 10107c478bd9Sstevel@tonic-gate flk_free_lock(lock_descriptor_t *lock) 10117c478bd9Sstevel@tonic-gate { 10127a5aac98SJerry Jelinek file_t *fp; 10137a5aac98SJerry Jelinek 10147c478bd9Sstevel@tonic-gate ASSERT(IS_DEAD(lock)); 10157a5aac98SJerry Jelinek 10167a5aac98SJerry Jelinek if ((fp = lock->l_ofd) != NULL) 10177a5aac98SJerry Jelinek fp->f_filock = NULL; 10187a5aac98SJerry Jelinek 10197c478bd9Sstevel@tonic-gate if (IS_REFERENCED(lock)) { 10207c478bd9Sstevel@tonic-gate lock->l_state |= DELETED_LOCK; 10217c478bd9Sstevel@tonic-gate return; 10227c478bd9Sstevel@tonic-gate } 10237c478bd9Sstevel@tonic-gate flk_lock_frees++; 10247c478bd9Sstevel@tonic-gate kmem_free((void *)lock, sizeof (lock_descriptor_t)); 10257c478bd9Sstevel@tonic-gate } 10267c478bd9Sstevel@tonic-gate 10277c478bd9Sstevel@tonic-gate void 10287c478bd9Sstevel@tonic-gate flk_set_state(lock_descriptor_t *lock, int new_state) 10297c478bd9Sstevel@tonic-gate { 10307c478bd9Sstevel@tonic-gate /* 10317c478bd9Sstevel@tonic-gate * Locks in the sleeping list may be woken up in a number of ways, 1032da6c28aaSamw * and more than once. If a sleeping lock is signaled awake more 10337c478bd9Sstevel@tonic-gate * than once, then it may or may not change state depending on its 10347c478bd9Sstevel@tonic-gate * current state. 10357c478bd9Sstevel@tonic-gate * Also note that NLM locks that are sleeping could be moved to an 10367c478bd9Sstevel@tonic-gate * interrupted state more than once if the unlock request is 10377c478bd9Sstevel@tonic-gate * retransmitted by the NLM client - the second time around, this is 10387c478bd9Sstevel@tonic-gate * just a nop. 1039da6c28aaSamw * The ordering of being signaled awake is: 10407c478bd9Sstevel@tonic-gate * INTERRUPTED_STATE > CANCELLED_STATE > GRANTED_STATE. 10417c478bd9Sstevel@tonic-gate * The checks below implement this ordering. 10427c478bd9Sstevel@tonic-gate */ 10437c478bd9Sstevel@tonic-gate if (IS_INTERRUPTED(lock)) { 10447c478bd9Sstevel@tonic-gate if ((new_state == FLK_CANCELLED_STATE) || 10457c478bd9Sstevel@tonic-gate (new_state == FLK_GRANTED_STATE) || 10467c478bd9Sstevel@tonic-gate (new_state == FLK_INTERRUPTED_STATE)) { 10477c478bd9Sstevel@tonic-gate return; 10487c478bd9Sstevel@tonic-gate } 10497c478bd9Sstevel@tonic-gate } 10507c478bd9Sstevel@tonic-gate if (IS_CANCELLED(lock)) { 10517c478bd9Sstevel@tonic-gate if ((new_state == FLK_GRANTED_STATE) || 10527c478bd9Sstevel@tonic-gate (new_state == FLK_CANCELLED_STATE)) { 10537c478bd9Sstevel@tonic-gate return; 10547c478bd9Sstevel@tonic-gate } 10557c478bd9Sstevel@tonic-gate } 10567c478bd9Sstevel@tonic-gate CHECK_LOCK_TRANSITION(lock->l_status, new_state); 10577c478bd9Sstevel@tonic-gate if (IS_PXFS(lock)) { 10587c478bd9Sstevel@tonic-gate cl_flk_state_transition_notify(lock, lock->l_status, new_state); 10597c478bd9Sstevel@tonic-gate } 10607c478bd9Sstevel@tonic-gate lock->l_status = new_state; 10617c478bd9Sstevel@tonic-gate } 10627c478bd9Sstevel@tonic-gate 10637c478bd9Sstevel@tonic-gate /* 10647c478bd9Sstevel@tonic-gate * Routine that checks whether there are any blocking locks in the system. 10657c478bd9Sstevel@tonic-gate * 10667c478bd9Sstevel@tonic-gate * The policy followed is if a write lock is sleeping we don't allow read 10677c478bd9Sstevel@tonic-gate * locks before this write lock even though there may not be any active 10687c478bd9Sstevel@tonic-gate * locks corresponding to the read locks' region. 10697c478bd9Sstevel@tonic-gate * 10707c478bd9Sstevel@tonic-gate * flk_add_edge() function adds an edge between l1 and l2 iff there 10717c478bd9Sstevel@tonic-gate * is no path between l1 and l2. This is done to have a "minimum 10727c478bd9Sstevel@tonic-gate * storage representation" of the dependency graph. 10737c478bd9Sstevel@tonic-gate * 10747c478bd9Sstevel@tonic-gate * Another property of the graph is since only the new request throws 10757c478bd9Sstevel@tonic-gate * edges to the existing locks in the graph, the graph is always topologically 10767c478bd9Sstevel@tonic-gate * ordered. 10777c478bd9Sstevel@tonic-gate */ 10787c478bd9Sstevel@tonic-gate 10797c478bd9Sstevel@tonic-gate static int 10807c478bd9Sstevel@tonic-gate flk_process_request(lock_descriptor_t *request) 10817c478bd9Sstevel@tonic-gate { 10827c478bd9Sstevel@tonic-gate graph_t *gp = request->l_graph; 10837c478bd9Sstevel@tonic-gate lock_descriptor_t *lock; 10847c478bd9Sstevel@tonic-gate int request_blocked_by_active = 0; 10857c478bd9Sstevel@tonic-gate int request_blocked_by_granted = 0; 10867c478bd9Sstevel@tonic-gate int request_blocked_by_sleeping = 0; 10877c478bd9Sstevel@tonic-gate vnode_t *vp = request->l_vnode; 10887c478bd9Sstevel@tonic-gate int error = 0; 10897c478bd9Sstevel@tonic-gate int request_will_wait = 0; 10907c478bd9Sstevel@tonic-gate int found_covering_lock = 0; 10917c478bd9Sstevel@tonic-gate lock_descriptor_t *covered_by = NULL; 10927c478bd9Sstevel@tonic-gate 10937c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&gp->gp_mutex)); 10947c478bd9Sstevel@tonic-gate request_will_wait = IS_WILLING_TO_SLEEP(request); 10957c478bd9Sstevel@tonic-gate 10967c478bd9Sstevel@tonic-gate /* 10977c478bd9Sstevel@tonic-gate * check active locks 10987c478bd9Sstevel@tonic-gate */ 10997c478bd9Sstevel@tonic-gate 11007c478bd9Sstevel@tonic-gate SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 11017c478bd9Sstevel@tonic-gate 11027c478bd9Sstevel@tonic-gate 11037c478bd9Sstevel@tonic-gate if (lock) { 11047c478bd9Sstevel@tonic-gate do { 11057c478bd9Sstevel@tonic-gate if (BLOCKS(lock, request)) { 11067c478bd9Sstevel@tonic-gate if (!request_will_wait) 11077c478bd9Sstevel@tonic-gate return (EAGAIN); 11087c478bd9Sstevel@tonic-gate request_blocked_by_active = 1; 11097c478bd9Sstevel@tonic-gate break; 11107c478bd9Sstevel@tonic-gate } 11117c478bd9Sstevel@tonic-gate /* 11127c478bd9Sstevel@tonic-gate * Grant lock if it is for the same owner holding active 11137c478bd9Sstevel@tonic-gate * lock that covers the request. 11147c478bd9Sstevel@tonic-gate */ 11157c478bd9Sstevel@tonic-gate 11167c478bd9Sstevel@tonic-gate if (SAME_OWNER(lock, request) && 11177c478bd9Sstevel@tonic-gate COVERS(lock, request) && 11187c478bd9Sstevel@tonic-gate (request->l_type == F_RDLCK)) 11197c478bd9Sstevel@tonic-gate return (flk_execute_request(request)); 11207c478bd9Sstevel@tonic-gate lock = lock->l_next; 11217c478bd9Sstevel@tonic-gate } while (lock->l_vnode == vp); 11227c478bd9Sstevel@tonic-gate } 11237c478bd9Sstevel@tonic-gate 11247c478bd9Sstevel@tonic-gate if (!request_blocked_by_active) { 11257c478bd9Sstevel@tonic-gate lock_descriptor_t *lk[1]; 11267c478bd9Sstevel@tonic-gate lock_descriptor_t *first_glock = NULL; 11277c478bd9Sstevel@tonic-gate /* 11287c478bd9Sstevel@tonic-gate * Shall we grant this?! NO!! 11297c478bd9Sstevel@tonic-gate * What about those locks that were just granted and still 11307c478bd9Sstevel@tonic-gate * in sleep queue. Those threads are woken up and so locks 11317c478bd9Sstevel@tonic-gate * are almost active. 11327c478bd9Sstevel@tonic-gate */ 11337c478bd9Sstevel@tonic-gate SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 11347c478bd9Sstevel@tonic-gate if (lock) { 11357c478bd9Sstevel@tonic-gate do { 11367c478bd9Sstevel@tonic-gate if (BLOCKS(lock, request)) { 11377c478bd9Sstevel@tonic-gate if (IS_GRANTED(lock)) { 11387c478bd9Sstevel@tonic-gate request_blocked_by_granted = 1; 11397c478bd9Sstevel@tonic-gate } else { 11407c478bd9Sstevel@tonic-gate request_blocked_by_sleeping = 1; 11417c478bd9Sstevel@tonic-gate } 11427c478bd9Sstevel@tonic-gate } 11437c478bd9Sstevel@tonic-gate 11447c478bd9Sstevel@tonic-gate lock = lock->l_next; 11457c478bd9Sstevel@tonic-gate } while ((lock->l_vnode == vp)); 11467c478bd9Sstevel@tonic-gate first_glock = lock->l_prev; 11477c478bd9Sstevel@tonic-gate ASSERT(first_glock->l_vnode == vp); 11487c478bd9Sstevel@tonic-gate } 11497c478bd9Sstevel@tonic-gate 11507c478bd9Sstevel@tonic-gate if (request_blocked_by_granted) 11517c478bd9Sstevel@tonic-gate goto block; 11527c478bd9Sstevel@tonic-gate 11537c478bd9Sstevel@tonic-gate if (!request_blocked_by_sleeping) { 11547c478bd9Sstevel@tonic-gate /* 11557c478bd9Sstevel@tonic-gate * If the request isn't going to be blocked by a 11567c478bd9Sstevel@tonic-gate * sleeping request, we know that it isn't going to 11577c478bd9Sstevel@tonic-gate * be blocked; we can just execute the request -- 11587c478bd9Sstevel@tonic-gate * without performing costly deadlock detection. 11597c478bd9Sstevel@tonic-gate */ 11607c478bd9Sstevel@tonic-gate ASSERT(!request_blocked_by_active); 11617c478bd9Sstevel@tonic-gate return (flk_execute_request(request)); 11627c478bd9Sstevel@tonic-gate } else if (request->l_type == F_RDLCK) { 11637c478bd9Sstevel@tonic-gate /* 11647c478bd9Sstevel@tonic-gate * If we have a sleeping writer in the requested 11657c478bd9Sstevel@tonic-gate * lock's range, block. 11667c478bd9Sstevel@tonic-gate */ 11677c478bd9Sstevel@tonic-gate goto block; 11687c478bd9Sstevel@tonic-gate } 11697c478bd9Sstevel@tonic-gate 11707c478bd9Sstevel@tonic-gate lk[0] = request; 11717c478bd9Sstevel@tonic-gate request->l_state |= RECOMPUTE_LOCK; 11727c478bd9Sstevel@tonic-gate SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 11737c478bd9Sstevel@tonic-gate if (lock) { 11747c478bd9Sstevel@tonic-gate do { 11757c478bd9Sstevel@tonic-gate flk_recompute_dependencies(lock, lk, 1, 0); 11767c478bd9Sstevel@tonic-gate lock = lock->l_next; 11777c478bd9Sstevel@tonic-gate } while (lock->l_vnode == vp); 11787c478bd9Sstevel@tonic-gate } 11797c478bd9Sstevel@tonic-gate lock = first_glock; 11807c478bd9Sstevel@tonic-gate if (lock) { 11817c478bd9Sstevel@tonic-gate do { 11827c478bd9Sstevel@tonic-gate if (IS_GRANTED(lock)) { 11837c478bd9Sstevel@tonic-gate flk_recompute_dependencies(lock, lk, 1, 0); 11847c478bd9Sstevel@tonic-gate } 11857c478bd9Sstevel@tonic-gate lock = lock->l_prev; 11867c478bd9Sstevel@tonic-gate } while ((lock->l_vnode == vp)); 11877c478bd9Sstevel@tonic-gate } 11887c478bd9Sstevel@tonic-gate request->l_state &= ~RECOMPUTE_LOCK; 11897c478bd9Sstevel@tonic-gate if (!NO_DEPENDENTS(request) && flk_check_deadlock(request)) 11907c478bd9Sstevel@tonic-gate return (EDEADLK); 11917c478bd9Sstevel@tonic-gate return (flk_execute_request(request)); 11927c478bd9Sstevel@tonic-gate } 11937c478bd9Sstevel@tonic-gate 11947c478bd9Sstevel@tonic-gate block: 11957c478bd9Sstevel@tonic-gate if (request_will_wait) 11967c478bd9Sstevel@tonic-gate flk_graph_uncolor(gp); 11977c478bd9Sstevel@tonic-gate 11987c478bd9Sstevel@tonic-gate /* check sleeping locks */ 11997c478bd9Sstevel@tonic-gate 12007c478bd9Sstevel@tonic-gate SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 12017c478bd9Sstevel@tonic-gate 12027c478bd9Sstevel@tonic-gate /* 12037c478bd9Sstevel@tonic-gate * If we find a sleeping write lock that is a superset of the 12047c478bd9Sstevel@tonic-gate * region wanted by request we can be assured that by adding an 12057c478bd9Sstevel@tonic-gate * edge to this write lock we have paths to all locks in the 12067c478bd9Sstevel@tonic-gate * graph that blocks the request except in one case and that is why 12077c478bd9Sstevel@tonic-gate * another check for SAME_OWNER in the loop below. The exception 12087c478bd9Sstevel@tonic-gate * case is when this process that owns the sleeping write lock 'l1' 12097c478bd9Sstevel@tonic-gate * has other locks l2, l3, l4 that are in the system and arrived 12107c478bd9Sstevel@tonic-gate * before l1. l1 does not have path to these locks as they are from 12117c478bd9Sstevel@tonic-gate * same process. We break when we find a second covering sleeping 12127c478bd9Sstevel@tonic-gate * lock l5 owned by a process different from that owning l1, because 12137c478bd9Sstevel@tonic-gate * there cannot be any of l2, l3, l4, etc., arrived before l5, and if 12147c478bd9Sstevel@tonic-gate * it has l1 would have produced a deadlock already. 12157c478bd9Sstevel@tonic-gate */ 12167c478bd9Sstevel@tonic-gate 12177c478bd9Sstevel@tonic-gate if (lock) { 12187c478bd9Sstevel@tonic-gate do { 12197c478bd9Sstevel@tonic-gate if (BLOCKS(lock, request)) { 12207c478bd9Sstevel@tonic-gate if (!request_will_wait) 12217c478bd9Sstevel@tonic-gate return (EAGAIN); 12227c478bd9Sstevel@tonic-gate if (COVERS(lock, request) && 12237c478bd9Sstevel@tonic-gate lock->l_type == F_WRLCK) { 12247c478bd9Sstevel@tonic-gate if (found_covering_lock && 12257c478bd9Sstevel@tonic-gate !SAME_OWNER(lock, covered_by)) { 12267c478bd9Sstevel@tonic-gate found_covering_lock++; 12277c478bd9Sstevel@tonic-gate break; 12287c478bd9Sstevel@tonic-gate } 12297c478bd9Sstevel@tonic-gate found_covering_lock = 1; 12307c478bd9Sstevel@tonic-gate covered_by = lock; 12317c478bd9Sstevel@tonic-gate } 12327c478bd9Sstevel@tonic-gate if (found_covering_lock && 12337c478bd9Sstevel@tonic-gate !SAME_OWNER(lock, covered_by)) { 12347c478bd9Sstevel@tonic-gate lock = lock->l_next; 12357c478bd9Sstevel@tonic-gate continue; 12367c478bd9Sstevel@tonic-gate } 12377c478bd9Sstevel@tonic-gate if ((error = flk_add_edge(request, lock, 12387c478bd9Sstevel@tonic-gate !found_covering_lock, 0))) 12397c478bd9Sstevel@tonic-gate return (error); 12407c478bd9Sstevel@tonic-gate } 12417c478bd9Sstevel@tonic-gate lock = lock->l_next; 12427c478bd9Sstevel@tonic-gate } while (lock->l_vnode == vp); 12437c478bd9Sstevel@tonic-gate } 12447c478bd9Sstevel@tonic-gate 12457c478bd9Sstevel@tonic-gate /* 12467c478bd9Sstevel@tonic-gate * found_covering_lock == 2 iff at this point 'request' has paths 12477c478bd9Sstevel@tonic-gate * to all locks that blocks 'request'. found_covering_lock == 1 iff at this 12487c478bd9Sstevel@tonic-gate * point 'request' has paths to all locks that blocks 'request' whose owners 12497c478bd9Sstevel@tonic-gate * are not same as the one that covers 'request' (covered_by above) and 12507c478bd9Sstevel@tonic-gate * we can have locks whose owner is same as covered_by in the active list. 12517c478bd9Sstevel@tonic-gate */ 12527c478bd9Sstevel@tonic-gate 12537c478bd9Sstevel@tonic-gate if (request_blocked_by_active && found_covering_lock != 2) { 12547c478bd9Sstevel@tonic-gate SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 12557c478bd9Sstevel@tonic-gate ASSERT(lock != NULL); 12567c478bd9Sstevel@tonic-gate do { 12577c478bd9Sstevel@tonic-gate if (BLOCKS(lock, request)) { 12587c478bd9Sstevel@tonic-gate if (found_covering_lock && 12597c478bd9Sstevel@tonic-gate !SAME_OWNER(lock, covered_by)) { 12607c478bd9Sstevel@tonic-gate lock = lock->l_next; 12617c478bd9Sstevel@tonic-gate continue; 12627c478bd9Sstevel@tonic-gate } 12637c478bd9Sstevel@tonic-gate if ((error = flk_add_edge(request, lock, 12647c478bd9Sstevel@tonic-gate CHECK_CYCLE, 0))) 12657c478bd9Sstevel@tonic-gate return (error); 12667c478bd9Sstevel@tonic-gate } 12677c478bd9Sstevel@tonic-gate lock = lock->l_next; 12687c478bd9Sstevel@tonic-gate } while (lock->l_vnode == vp); 12697c478bd9Sstevel@tonic-gate } 12707c478bd9Sstevel@tonic-gate 12717c478bd9Sstevel@tonic-gate if (NOT_BLOCKED(request)) { 12727c478bd9Sstevel@tonic-gate /* 12737c478bd9Sstevel@tonic-gate * request not dependent on any other locks 12747c478bd9Sstevel@tonic-gate * so execute this request 12757c478bd9Sstevel@tonic-gate */ 12767c478bd9Sstevel@tonic-gate return (flk_execute_request(request)); 12777c478bd9Sstevel@tonic-gate } else { 12787c478bd9Sstevel@tonic-gate /* 12797c478bd9Sstevel@tonic-gate * check for deadlock 12807c478bd9Sstevel@tonic-gate */ 12817c478bd9Sstevel@tonic-gate if (flk_check_deadlock(request)) 12827c478bd9Sstevel@tonic-gate return (EDEADLK); 12837c478bd9Sstevel@tonic-gate /* 12847c478bd9Sstevel@tonic-gate * this thread has to sleep 12857c478bd9Sstevel@tonic-gate */ 12867c478bd9Sstevel@tonic-gate return (flk_wait_execute_request(request)); 12877c478bd9Sstevel@tonic-gate } 12887c478bd9Sstevel@tonic-gate } 12897c478bd9Sstevel@tonic-gate 12907c478bd9Sstevel@tonic-gate /* 12917c478bd9Sstevel@tonic-gate * The actual execution of the request in the simple case is only to 12927c478bd9Sstevel@tonic-gate * insert the 'request' in the list of active locks if it is not an 12937c478bd9Sstevel@tonic-gate * UNLOCK. 12947c478bd9Sstevel@tonic-gate * We have to consider the existing active locks' relation to 12957c478bd9Sstevel@tonic-gate * this 'request' if they are owned by same process. flk_relation() does 12967c478bd9Sstevel@tonic-gate * this job and sees to that the dependency graph information is maintained 12977c478bd9Sstevel@tonic-gate * properly. 12987c478bd9Sstevel@tonic-gate */ 12997c478bd9Sstevel@tonic-gate 13007c478bd9Sstevel@tonic-gate int 13017c478bd9Sstevel@tonic-gate flk_execute_request(lock_descriptor_t *request) 13027c478bd9Sstevel@tonic-gate { 13037c478bd9Sstevel@tonic-gate graph_t *gp = request->l_graph; 13047c478bd9Sstevel@tonic-gate vnode_t *vp = request->l_vnode; 13057c478bd9Sstevel@tonic-gate lock_descriptor_t *lock, *lock1; 13067c478bd9Sstevel@tonic-gate int done_searching = 0; 13077c478bd9Sstevel@tonic-gate 13087c478bd9Sstevel@tonic-gate CHECK_SLEEPING_LOCKS(gp); 13097c478bd9Sstevel@tonic-gate CHECK_ACTIVE_LOCKS(gp); 13107c478bd9Sstevel@tonic-gate 13117c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&gp->gp_mutex)); 13127c478bd9Sstevel@tonic-gate 13137c478bd9Sstevel@tonic-gate flk_set_state(request, FLK_START_STATE); 13147c478bd9Sstevel@tonic-gate 13157c478bd9Sstevel@tonic-gate ASSERT(NOT_BLOCKED(request)); 13167c478bd9Sstevel@tonic-gate 13177c478bd9Sstevel@tonic-gate /* IO_LOCK requests are only to check status */ 13187c478bd9Sstevel@tonic-gate 13197c478bd9Sstevel@tonic-gate if (IS_IO_LOCK(request)) 13207c478bd9Sstevel@tonic-gate return (0); 13217c478bd9Sstevel@tonic-gate 13227c478bd9Sstevel@tonic-gate SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 13237c478bd9Sstevel@tonic-gate 13247c478bd9Sstevel@tonic-gate if (lock == NULL && request->l_type == F_UNLCK) 13257c478bd9Sstevel@tonic-gate return (0); 13267c478bd9Sstevel@tonic-gate if (lock == NULL) { 13277c478bd9Sstevel@tonic-gate flk_insert_active_lock(request); 13287c478bd9Sstevel@tonic-gate return (0); 13297c478bd9Sstevel@tonic-gate } 13307c478bd9Sstevel@tonic-gate 13317c478bd9Sstevel@tonic-gate do { 13327c478bd9Sstevel@tonic-gate lock1 = lock->l_next; 13337c478bd9Sstevel@tonic-gate if (SAME_OWNER(request, lock)) { 13347c478bd9Sstevel@tonic-gate done_searching = flk_relation(lock, request); 13357c478bd9Sstevel@tonic-gate } 13367c478bd9Sstevel@tonic-gate lock = lock1; 13377c478bd9Sstevel@tonic-gate } while (lock->l_vnode == vp && !done_searching); 13387c478bd9Sstevel@tonic-gate 13397c478bd9Sstevel@tonic-gate /* 13407c478bd9Sstevel@tonic-gate * insert in active queue 13417c478bd9Sstevel@tonic-gate */ 13427c478bd9Sstevel@tonic-gate 13437c478bd9Sstevel@tonic-gate if (request->l_type != F_UNLCK) 13447c478bd9Sstevel@tonic-gate flk_insert_active_lock(request); 13457c478bd9Sstevel@tonic-gate 13467c478bd9Sstevel@tonic-gate return (0); 13477c478bd9Sstevel@tonic-gate } 13487c478bd9Sstevel@tonic-gate 13497c478bd9Sstevel@tonic-gate /* 13507c478bd9Sstevel@tonic-gate * 'request' is blocked by some one therefore we put it into sleep queue. 13517c478bd9Sstevel@tonic-gate */ 13527c478bd9Sstevel@tonic-gate static int 13537c478bd9Sstevel@tonic-gate flk_wait_execute_request(lock_descriptor_t *request) 13547c478bd9Sstevel@tonic-gate { 13557c478bd9Sstevel@tonic-gate graph_t *gp = request->l_graph; 13567c478bd9Sstevel@tonic-gate callb_cpr_t *cprp; /* CPR info from callback */ 13577c478bd9Sstevel@tonic-gate struct flock_globals *fg; 13587c478bd9Sstevel@tonic-gate int index; 13597c478bd9Sstevel@tonic-gate 13607c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&gp->gp_mutex)); 13617c478bd9Sstevel@tonic-gate ASSERT(IS_WILLING_TO_SLEEP(request)); 13627c478bd9Sstevel@tonic-gate 13637c478bd9Sstevel@tonic-gate flk_insert_sleeping_lock(request); 13647c478bd9Sstevel@tonic-gate 13657c478bd9Sstevel@tonic-gate if (IS_LOCKMGR(request)) { 13667c478bd9Sstevel@tonic-gate index = HASH_INDEX(request->l_vnode); 13677c478bd9Sstevel@tonic-gate fg = flk_get_globals(); 13687c478bd9Sstevel@tonic-gate 13697c478bd9Sstevel@tonic-gate if (nlm_status_size == 0) { /* not booted as a cluster */ 13707c478bd9Sstevel@tonic-gate if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP) { 13717c478bd9Sstevel@tonic-gate flk_cancel_sleeping_lock(request, 1); 13727c478bd9Sstevel@tonic-gate return (ENOLCK); 13737c478bd9Sstevel@tonic-gate } 13747c478bd9Sstevel@tonic-gate } else { /* booted as a cluster */ 13757c478bd9Sstevel@tonic-gate /* 13767c478bd9Sstevel@tonic-gate * If the request is an NLM server lock request, 13777c478bd9Sstevel@tonic-gate * and the NLM state of the lock request is not 13787c478bd9Sstevel@tonic-gate * NLM_UP (because the NLM server is shutting 13797c478bd9Sstevel@tonic-gate * down), then cancel the sleeping lock and 13807c478bd9Sstevel@tonic-gate * return error ENOLCK that will encourage the 13817c478bd9Sstevel@tonic-gate * client to retransmit. 13827c478bd9Sstevel@tonic-gate */ 13837c478bd9Sstevel@tonic-gate if (!IS_NLM_UP(request)) { 13847c478bd9Sstevel@tonic-gate flk_cancel_sleeping_lock(request, 1); 13857c478bd9Sstevel@tonic-gate return (ENOLCK); 13867c478bd9Sstevel@tonic-gate } 13877c478bd9Sstevel@tonic-gate } 13887c478bd9Sstevel@tonic-gate } 13897c478bd9Sstevel@tonic-gate 13907c478bd9Sstevel@tonic-gate /* Clustering: For blocking PXFS locks, return */ 13917c478bd9Sstevel@tonic-gate if (IS_PXFS(request)) { 13927c478bd9Sstevel@tonic-gate /* 13937c478bd9Sstevel@tonic-gate * PXFS locks sleep on the client side. 13947c478bd9Sstevel@tonic-gate * The callback argument is used to wake up the sleeper 13957c478bd9Sstevel@tonic-gate * when the lock is granted. 13967c478bd9Sstevel@tonic-gate * We return -1 (rather than an errno value) to indicate 13977c478bd9Sstevel@tonic-gate * the client side should sleep 13987c478bd9Sstevel@tonic-gate */ 13997c478bd9Sstevel@tonic-gate return (PXFS_LOCK_BLOCKED); 14007c478bd9Sstevel@tonic-gate } 14017c478bd9Sstevel@tonic-gate 14027c478bd9Sstevel@tonic-gate if (request->l_callbacks != NULL) { 14037c478bd9Sstevel@tonic-gate /* 14047c478bd9Sstevel@tonic-gate * To make sure the shutdown code works correctly, either 14057c478bd9Sstevel@tonic-gate * the callback must happen after putting the lock on the 14067c478bd9Sstevel@tonic-gate * sleep list, or we must check the shutdown status after 14077c478bd9Sstevel@tonic-gate * returning from the callback (and before sleeping). At 14087c478bd9Sstevel@tonic-gate * least for now, we'll use the first option. If a 14097c478bd9Sstevel@tonic-gate * shutdown or signal or whatever happened while the graph 14107c478bd9Sstevel@tonic-gate * mutex was dropped, that will be detected by 14117c478bd9Sstevel@tonic-gate * wait_for_lock(). 14127c478bd9Sstevel@tonic-gate */ 14137c478bd9Sstevel@tonic-gate mutex_exit(&gp->gp_mutex); 14147c478bd9Sstevel@tonic-gate 14157c478bd9Sstevel@tonic-gate cprp = flk_invoke_callbacks(request->l_callbacks, 14167c478bd9Sstevel@tonic-gate FLK_BEFORE_SLEEP); 14177c478bd9Sstevel@tonic-gate 14187c478bd9Sstevel@tonic-gate mutex_enter(&gp->gp_mutex); 14197c478bd9Sstevel@tonic-gate 14207c478bd9Sstevel@tonic-gate if (cprp == NULL) { 14217c478bd9Sstevel@tonic-gate wait_for_lock(request); 14227c478bd9Sstevel@tonic-gate } else { 14237c478bd9Sstevel@tonic-gate mutex_enter(cprp->cc_lockp); 14247c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(cprp); 14257c478bd9Sstevel@tonic-gate mutex_exit(cprp->cc_lockp); 14267c478bd9Sstevel@tonic-gate wait_for_lock(request); 14277c478bd9Sstevel@tonic-gate mutex_enter(cprp->cc_lockp); 14287c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(cprp, cprp->cc_lockp); 14297c478bd9Sstevel@tonic-gate mutex_exit(cprp->cc_lockp); 14307c478bd9Sstevel@tonic-gate } 14317c478bd9Sstevel@tonic-gate 14327c478bd9Sstevel@tonic-gate mutex_exit(&gp->gp_mutex); 14337c478bd9Sstevel@tonic-gate (void) flk_invoke_callbacks(request->l_callbacks, 14347c478bd9Sstevel@tonic-gate FLK_AFTER_SLEEP); 14357c478bd9Sstevel@tonic-gate mutex_enter(&gp->gp_mutex); 14367c478bd9Sstevel@tonic-gate } else { 14377c478bd9Sstevel@tonic-gate wait_for_lock(request); 14387c478bd9Sstevel@tonic-gate } 14397c478bd9Sstevel@tonic-gate 14407c478bd9Sstevel@tonic-gate if (IS_LOCKMGR(request)) { 14417c478bd9Sstevel@tonic-gate /* 14427c478bd9Sstevel@tonic-gate * If the lock manager is shutting down, return an 14437c478bd9Sstevel@tonic-gate * error that will encourage the client to retransmit. 14447c478bd9Sstevel@tonic-gate */ 14457c478bd9Sstevel@tonic-gate if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP && 14467c478bd9Sstevel@tonic-gate !IS_GRANTED(request)) { 14477c478bd9Sstevel@tonic-gate flk_cancel_sleeping_lock(request, 1); 14487c478bd9Sstevel@tonic-gate return (ENOLCK); 14497c478bd9Sstevel@tonic-gate } 14507c478bd9Sstevel@tonic-gate } 14517c478bd9Sstevel@tonic-gate 14527c478bd9Sstevel@tonic-gate if (IS_INTERRUPTED(request)) { 14537c478bd9Sstevel@tonic-gate /* we got a signal, or act like we did */ 14547c478bd9Sstevel@tonic-gate flk_cancel_sleeping_lock(request, 1); 14557c478bd9Sstevel@tonic-gate return (EINTR); 14567c478bd9Sstevel@tonic-gate } 14577c478bd9Sstevel@tonic-gate 14587c478bd9Sstevel@tonic-gate /* Cancelled if some other thread has closed the file */ 14597c478bd9Sstevel@tonic-gate 14607c478bd9Sstevel@tonic-gate if (IS_CANCELLED(request)) { 14617c478bd9Sstevel@tonic-gate flk_cancel_sleeping_lock(request, 1); 14627c478bd9Sstevel@tonic-gate return (EBADF); 14637c478bd9Sstevel@tonic-gate } 14647c478bd9Sstevel@tonic-gate 14657c478bd9Sstevel@tonic-gate request->l_state &= ~GRANTED_LOCK; 14667c478bd9Sstevel@tonic-gate REMOVE_SLEEP_QUEUE(request); 14677c478bd9Sstevel@tonic-gate return (flk_execute_request(request)); 14687c478bd9Sstevel@tonic-gate } 14697c478bd9Sstevel@tonic-gate 14707c478bd9Sstevel@tonic-gate /* 14717c478bd9Sstevel@tonic-gate * This routine adds an edge between from and to because from depends 14727c478bd9Sstevel@tonic-gate * to. If asked to check for deadlock it checks whether there are any 14737c478bd9Sstevel@tonic-gate * reachable locks from "from_lock" that is owned by the same process 14747c478bd9Sstevel@tonic-gate * as "from_lock". 14757c478bd9Sstevel@tonic-gate * NOTE: It is the caller's responsibility to make sure that the color 14767c478bd9Sstevel@tonic-gate * of the graph is consistent between the calls to flk_add_edge as done 14777c478bd9Sstevel@tonic-gate * in flk_process_request. This routine does not color and check for 14787c478bd9Sstevel@tonic-gate * deadlock explicitly. 14797c478bd9Sstevel@tonic-gate */ 14807c478bd9Sstevel@tonic-gate 14817c478bd9Sstevel@tonic-gate static int 14827c478bd9Sstevel@tonic-gate flk_add_edge(lock_descriptor_t *from_lock, lock_descriptor_t *to_lock, 14837c478bd9Sstevel@tonic-gate int check_cycle, int update_graph) 14847c478bd9Sstevel@tonic-gate { 14857c478bd9Sstevel@tonic-gate edge_t *edge; 14867c478bd9Sstevel@tonic-gate edge_t *ep; 14877c478bd9Sstevel@tonic-gate lock_descriptor_t *vertex; 14887c478bd9Sstevel@tonic-gate lock_descriptor_t *vertex_stack; 14897c478bd9Sstevel@tonic-gate 14907c478bd9Sstevel@tonic-gate STACK_INIT(vertex_stack); 14917c478bd9Sstevel@tonic-gate 14927c478bd9Sstevel@tonic-gate /* 14937c478bd9Sstevel@tonic-gate * if to vertex already has mark_color just return 14947c478bd9Sstevel@tonic-gate * don't add an edge as it is reachable from from vertex 14957c478bd9Sstevel@tonic-gate * before itself. 14967c478bd9Sstevel@tonic-gate */ 14977c478bd9Sstevel@tonic-gate 14987c478bd9Sstevel@tonic-gate if (COLORED(to_lock)) 14997c478bd9Sstevel@tonic-gate return (0); 15007c478bd9Sstevel@tonic-gate 15017c478bd9Sstevel@tonic-gate edge = flk_get_edge(); 15027c478bd9Sstevel@tonic-gate 15037c478bd9Sstevel@tonic-gate /* 15047c478bd9Sstevel@tonic-gate * set the from and to vertex 15057c478bd9Sstevel@tonic-gate */ 15067c478bd9Sstevel@tonic-gate 15077c478bd9Sstevel@tonic-gate edge->from_vertex = from_lock; 15087c478bd9Sstevel@tonic-gate edge->to_vertex = to_lock; 15097c478bd9Sstevel@tonic-gate 15107c478bd9Sstevel@tonic-gate /* 15117c478bd9Sstevel@tonic-gate * put in adjacency list of from vertex 15127c478bd9Sstevel@tonic-gate */ 15137c478bd9Sstevel@tonic-gate 15147c478bd9Sstevel@tonic-gate from_lock->l_edge.edge_adj_next->edge_adj_prev = edge; 15157c478bd9Sstevel@tonic-gate edge->edge_adj_next = from_lock->l_edge.edge_adj_next; 15167c478bd9Sstevel@tonic-gate edge->edge_adj_prev = &from_lock->l_edge; 15177c478bd9Sstevel@tonic-gate from_lock->l_edge.edge_adj_next = edge; 15187c478bd9Sstevel@tonic-gate 15197c478bd9Sstevel@tonic-gate /* 15207a5aac98SJerry Jelinek * put in list of to vertex 15217c478bd9Sstevel@tonic-gate */ 15227c478bd9Sstevel@tonic-gate 15237c478bd9Sstevel@tonic-gate to_lock->l_edge.edge_in_next->edge_in_prev = edge; 15247c478bd9Sstevel@tonic-gate edge->edge_in_next = to_lock->l_edge.edge_in_next; 15257c478bd9Sstevel@tonic-gate to_lock->l_edge.edge_in_next = edge; 15267c478bd9Sstevel@tonic-gate edge->edge_in_prev = &to_lock->l_edge; 15277c478bd9Sstevel@tonic-gate 15287c478bd9Sstevel@tonic-gate 15297c478bd9Sstevel@tonic-gate if (update_graph) { 15307c478bd9Sstevel@tonic-gate flk_update_proc_graph(edge, 0); 15317c478bd9Sstevel@tonic-gate return (0); 15327c478bd9Sstevel@tonic-gate } 15337c478bd9Sstevel@tonic-gate if (!check_cycle) { 15347c478bd9Sstevel@tonic-gate return (0); 15357c478bd9Sstevel@tonic-gate } 15367c478bd9Sstevel@tonic-gate 15377c478bd9Sstevel@tonic-gate STACK_PUSH(vertex_stack, from_lock, l_stack); 15387c478bd9Sstevel@tonic-gate 15397c478bd9Sstevel@tonic-gate while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 15407c478bd9Sstevel@tonic-gate 15417c478bd9Sstevel@tonic-gate STACK_POP(vertex_stack, l_stack); 15427c478bd9Sstevel@tonic-gate 15437c478bd9Sstevel@tonic-gate for (ep = FIRST_ADJ(vertex); 15447c478bd9Sstevel@tonic-gate ep != HEAD(vertex); 15457c478bd9Sstevel@tonic-gate ep = NEXT_ADJ(ep)) { 15467c478bd9Sstevel@tonic-gate if (COLORED(ep->to_vertex)) 15477c478bd9Sstevel@tonic-gate continue; 15487c478bd9Sstevel@tonic-gate COLOR(ep->to_vertex); 15497c478bd9Sstevel@tonic-gate if (SAME_OWNER(ep->to_vertex, from_lock)) 15507c478bd9Sstevel@tonic-gate goto dead_lock; 15517c478bd9Sstevel@tonic-gate STACK_PUSH(vertex_stack, ep->to_vertex, l_stack); 15527c478bd9Sstevel@tonic-gate } 15537c478bd9Sstevel@tonic-gate } 15547c478bd9Sstevel@tonic-gate return (0); 15557c478bd9Sstevel@tonic-gate 15567c478bd9Sstevel@tonic-gate dead_lock: 15577c478bd9Sstevel@tonic-gate 15587c478bd9Sstevel@tonic-gate /* 15597c478bd9Sstevel@tonic-gate * remove all edges 15607c478bd9Sstevel@tonic-gate */ 15617c478bd9Sstevel@tonic-gate 15627c478bd9Sstevel@tonic-gate ep = FIRST_ADJ(from_lock); 15637c478bd9Sstevel@tonic-gate 15647c478bd9Sstevel@tonic-gate while (ep != HEAD(from_lock)) { 15657c478bd9Sstevel@tonic-gate IN_LIST_REMOVE(ep); 15667c478bd9Sstevel@tonic-gate from_lock->l_sedge = NEXT_ADJ(ep); 15677c478bd9Sstevel@tonic-gate ADJ_LIST_REMOVE(ep); 15687c478bd9Sstevel@tonic-gate flk_free_edge(ep); 15697c478bd9Sstevel@tonic-gate ep = from_lock->l_sedge; 15707c478bd9Sstevel@tonic-gate } 15717c478bd9Sstevel@tonic-gate return (EDEADLK); 15727c478bd9Sstevel@tonic-gate } 15737c478bd9Sstevel@tonic-gate 15747c478bd9Sstevel@tonic-gate /* 15757c478bd9Sstevel@tonic-gate * Get an edge structure for representing the dependency between two locks. 15767c478bd9Sstevel@tonic-gate */ 15777c478bd9Sstevel@tonic-gate 15787c478bd9Sstevel@tonic-gate static edge_t * 15797c478bd9Sstevel@tonic-gate flk_get_edge() 15807c478bd9Sstevel@tonic-gate { 15817c478bd9Sstevel@tonic-gate edge_t *ep; 15827c478bd9Sstevel@tonic-gate 15837c478bd9Sstevel@tonic-gate ASSERT(flk_edge_cache != NULL); 15847c478bd9Sstevel@tonic-gate 15857c478bd9Sstevel@tonic-gate ep = kmem_cache_alloc(flk_edge_cache, KM_SLEEP); 15867c478bd9Sstevel@tonic-gate edge_allocs++; 15877c478bd9Sstevel@tonic-gate return (ep); 15887c478bd9Sstevel@tonic-gate } 15897c478bd9Sstevel@tonic-gate 15907c478bd9Sstevel@tonic-gate /* 15917c478bd9Sstevel@tonic-gate * Free the edge structure. 15927c478bd9Sstevel@tonic-gate */ 15937c478bd9Sstevel@tonic-gate 15947c478bd9Sstevel@tonic-gate static void 15957c478bd9Sstevel@tonic-gate flk_free_edge(edge_t *ep) 15967c478bd9Sstevel@tonic-gate { 15977c478bd9Sstevel@tonic-gate edge_frees++; 15987c478bd9Sstevel@tonic-gate kmem_cache_free(flk_edge_cache, (void *)ep); 15997c478bd9Sstevel@tonic-gate } 16007c478bd9Sstevel@tonic-gate 16017c478bd9Sstevel@tonic-gate /* 16027c478bd9Sstevel@tonic-gate * Check the relationship of request with lock and perform the 16037c478bd9Sstevel@tonic-gate * recomputation of dependencies, break lock if required, and return 16047c478bd9Sstevel@tonic-gate * 1 if request cannot have any more relationship with the next 16057c478bd9Sstevel@tonic-gate * active locks. 16067c478bd9Sstevel@tonic-gate * The 'lock' and 'request' are compared and in case of overlap we 16077c478bd9Sstevel@tonic-gate * delete the 'lock' and form new locks to represent the non-overlapped 16087c478bd9Sstevel@tonic-gate * portion of original 'lock'. This function has side effects such as 16097c478bd9Sstevel@tonic-gate * 'lock' will be freed, new locks will be added to the active list. 16107c478bd9Sstevel@tonic-gate */ 16117c478bd9Sstevel@tonic-gate 16127c478bd9Sstevel@tonic-gate static int 16137c478bd9Sstevel@tonic-gate flk_relation(lock_descriptor_t *lock, lock_descriptor_t *request) 16147c478bd9Sstevel@tonic-gate { 16157c478bd9Sstevel@tonic-gate int lock_effect; 16167c478bd9Sstevel@tonic-gate lock_descriptor_t *lock1, *lock2; 16177c478bd9Sstevel@tonic-gate lock_descriptor_t *topology[3]; 16187c478bd9Sstevel@tonic-gate int nvertex = 0; 16197c478bd9Sstevel@tonic-gate int i; 16207c478bd9Sstevel@tonic-gate edge_t *ep; 16217c478bd9Sstevel@tonic-gate graph_t *gp = (lock->l_graph); 16227c478bd9Sstevel@tonic-gate 16237c478bd9Sstevel@tonic-gate 16247c478bd9Sstevel@tonic-gate CHECK_SLEEPING_LOCKS(gp); 16257c478bd9Sstevel@tonic-gate CHECK_ACTIVE_LOCKS(gp); 16267c478bd9Sstevel@tonic-gate 16277c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&gp->gp_mutex)); 16287c478bd9Sstevel@tonic-gate 16297c478bd9Sstevel@tonic-gate topology[0] = topology[1] = topology[2] = NULL; 16307c478bd9Sstevel@tonic-gate 16317c478bd9Sstevel@tonic-gate if (request->l_type == F_UNLCK) 16327c478bd9Sstevel@tonic-gate lock_effect = FLK_UNLOCK; 16337c478bd9Sstevel@tonic-gate else if (request->l_type == F_RDLCK && 16347c478bd9Sstevel@tonic-gate lock->l_type == F_WRLCK) 16357c478bd9Sstevel@tonic-gate lock_effect = FLK_DOWNGRADE; 16367c478bd9Sstevel@tonic-gate else if (request->l_type == F_WRLCK && 16377c478bd9Sstevel@tonic-gate lock->l_type == F_RDLCK) 16387c478bd9Sstevel@tonic-gate lock_effect = FLK_UPGRADE; 16397c478bd9Sstevel@tonic-gate else 16407c478bd9Sstevel@tonic-gate lock_effect = FLK_STAY_SAME; 16417c478bd9Sstevel@tonic-gate 16427c478bd9Sstevel@tonic-gate if (lock->l_end < request->l_start) { 16437c478bd9Sstevel@tonic-gate if (lock->l_end == request->l_start - 1 && 16447c478bd9Sstevel@tonic-gate lock_effect == FLK_STAY_SAME) { 16457c478bd9Sstevel@tonic-gate topology[0] = request; 16467c478bd9Sstevel@tonic-gate request->l_start = lock->l_start; 16477c478bd9Sstevel@tonic-gate nvertex = 1; 16487c478bd9Sstevel@tonic-gate goto recompute; 16497c478bd9Sstevel@tonic-gate } else { 16507c478bd9Sstevel@tonic-gate return (0); 16517c478bd9Sstevel@tonic-gate } 16527c478bd9Sstevel@tonic-gate } 16537c478bd9Sstevel@tonic-gate 16547c478bd9Sstevel@tonic-gate if (lock->l_start > request->l_end) { 16557c478bd9Sstevel@tonic-gate if (request->l_end == lock->l_start - 1 && 16567c478bd9Sstevel@tonic-gate lock_effect == FLK_STAY_SAME) { 16577c478bd9Sstevel@tonic-gate topology[0] = request; 16587c478bd9Sstevel@tonic-gate request->l_end = lock->l_end; 16597c478bd9Sstevel@tonic-gate nvertex = 1; 16607c478bd9Sstevel@tonic-gate goto recompute; 16617c478bd9Sstevel@tonic-gate } else { 16627c478bd9Sstevel@tonic-gate return (1); 16637c478bd9Sstevel@tonic-gate } 16647c478bd9Sstevel@tonic-gate } 16657c478bd9Sstevel@tonic-gate 16667c478bd9Sstevel@tonic-gate if (request->l_end < lock->l_end) { 16677c478bd9Sstevel@tonic-gate if (request->l_start > lock->l_start) { 16687c478bd9Sstevel@tonic-gate if (lock_effect == FLK_STAY_SAME) { 16697c478bd9Sstevel@tonic-gate request->l_start = lock->l_start; 16707c478bd9Sstevel@tonic-gate request->l_end = lock->l_end; 16717c478bd9Sstevel@tonic-gate topology[0] = request; 16727c478bd9Sstevel@tonic-gate nvertex = 1; 16737c478bd9Sstevel@tonic-gate } else { 16747c478bd9Sstevel@tonic-gate lock1 = flk_get_lock(); 16757c478bd9Sstevel@tonic-gate lock2 = flk_get_lock(); 16767c478bd9Sstevel@tonic-gate COPY(lock1, lock); 16777c478bd9Sstevel@tonic-gate COPY(lock2, lock); 16787c478bd9Sstevel@tonic-gate lock1->l_start = lock->l_start; 16797c478bd9Sstevel@tonic-gate lock1->l_end = request->l_start - 1; 16807c478bd9Sstevel@tonic-gate lock2->l_start = request->l_end + 1; 16817c478bd9Sstevel@tonic-gate lock2->l_end = lock->l_end; 16827c478bd9Sstevel@tonic-gate topology[0] = lock1; 16837c478bd9Sstevel@tonic-gate topology[1] = lock2; 16847c478bd9Sstevel@tonic-gate topology[2] = request; 16857c478bd9Sstevel@tonic-gate nvertex = 3; 16867c478bd9Sstevel@tonic-gate } 16877c478bd9Sstevel@tonic-gate } else if (request->l_start < lock->l_start) { 16887c478bd9Sstevel@tonic-gate if (lock_effect == FLK_STAY_SAME) { 16897c478bd9Sstevel@tonic-gate request->l_end = lock->l_end; 16907c478bd9Sstevel@tonic-gate topology[0] = request; 16917c478bd9Sstevel@tonic-gate nvertex = 1; 16927c478bd9Sstevel@tonic-gate } else { 16937c478bd9Sstevel@tonic-gate lock1 = flk_get_lock(); 16947c478bd9Sstevel@tonic-gate COPY(lock1, lock); 16957c478bd9Sstevel@tonic-gate lock1->l_start = request->l_end + 1; 16967c478bd9Sstevel@tonic-gate topology[0] = lock1; 16977c478bd9Sstevel@tonic-gate topology[1] = request; 16987c478bd9Sstevel@tonic-gate nvertex = 2; 16997c478bd9Sstevel@tonic-gate } 17007c478bd9Sstevel@tonic-gate } else { 17017c478bd9Sstevel@tonic-gate if (lock_effect == FLK_STAY_SAME) { 17027c478bd9Sstevel@tonic-gate request->l_start = lock->l_start; 17037c478bd9Sstevel@tonic-gate request->l_end = lock->l_end; 17047c478bd9Sstevel@tonic-gate topology[0] = request; 17057c478bd9Sstevel@tonic-gate nvertex = 1; 17067c478bd9Sstevel@tonic-gate } else { 17077c478bd9Sstevel@tonic-gate lock1 = flk_get_lock(); 17087c478bd9Sstevel@tonic-gate COPY(lock1, lock); 17097c478bd9Sstevel@tonic-gate lock1->l_start = request->l_end + 1; 17107c478bd9Sstevel@tonic-gate topology[0] = lock1; 17117c478bd9Sstevel@tonic-gate topology[1] = request; 17127c478bd9Sstevel@tonic-gate nvertex = 2; 17137c478bd9Sstevel@tonic-gate } 17147c478bd9Sstevel@tonic-gate } 17157c478bd9Sstevel@tonic-gate } else if (request->l_end > lock->l_end) { 17167c478bd9Sstevel@tonic-gate if (request->l_start > lock->l_start) { 17177c478bd9Sstevel@tonic-gate if (lock_effect == FLK_STAY_SAME) { 17187c478bd9Sstevel@tonic-gate request->l_start = lock->l_start; 17197c478bd9Sstevel@tonic-gate topology[0] = request; 17207c478bd9Sstevel@tonic-gate nvertex = 1; 17217c478bd9Sstevel@tonic-gate } else { 17227c478bd9Sstevel@tonic-gate lock1 = flk_get_lock(); 17237c478bd9Sstevel@tonic-gate COPY(lock1, lock); 17247c478bd9Sstevel@tonic-gate lock1->l_end = request->l_start - 1; 17257c478bd9Sstevel@tonic-gate topology[0] = lock1; 17267c478bd9Sstevel@tonic-gate topology[1] = request; 17277c478bd9Sstevel@tonic-gate nvertex = 2; 17287c478bd9Sstevel@tonic-gate } 17297c478bd9Sstevel@tonic-gate } else if (request->l_start < lock->l_start) { 17307c478bd9Sstevel@tonic-gate topology[0] = request; 17317c478bd9Sstevel@tonic-gate nvertex = 1; 17327c478bd9Sstevel@tonic-gate } else { 17337c478bd9Sstevel@tonic-gate topology[0] = request; 17347c478bd9Sstevel@tonic-gate nvertex = 1; 17357c478bd9Sstevel@tonic-gate } 17367c478bd9Sstevel@tonic-gate } else { 17377c478bd9Sstevel@tonic-gate if (request->l_start > lock->l_start) { 17387c478bd9Sstevel@tonic-gate if (lock_effect == FLK_STAY_SAME) { 17397c478bd9Sstevel@tonic-gate request->l_start = lock->l_start; 17407c478bd9Sstevel@tonic-gate topology[0] = request; 17417c478bd9Sstevel@tonic-gate nvertex = 1; 17427c478bd9Sstevel@tonic-gate } else { 17437c478bd9Sstevel@tonic-gate lock1 = flk_get_lock(); 17447c478bd9Sstevel@tonic-gate COPY(lock1, lock); 17457c478bd9Sstevel@tonic-gate lock1->l_end = request->l_start - 1; 17467c478bd9Sstevel@tonic-gate topology[0] = lock1; 17477c478bd9Sstevel@tonic-gate topology[1] = request; 17487c478bd9Sstevel@tonic-gate nvertex = 2; 17497c478bd9Sstevel@tonic-gate } 17507c478bd9Sstevel@tonic-gate } else if (request->l_start < lock->l_start) { 17517c478bd9Sstevel@tonic-gate topology[0] = request; 17527c478bd9Sstevel@tonic-gate nvertex = 1; 17537c478bd9Sstevel@tonic-gate } else { 17547c478bd9Sstevel@tonic-gate if (lock_effect != FLK_UNLOCK) { 17557c478bd9Sstevel@tonic-gate topology[0] = request; 17567c478bd9Sstevel@tonic-gate nvertex = 1; 17577c478bd9Sstevel@tonic-gate } else { 17587c478bd9Sstevel@tonic-gate flk_delete_active_lock(lock, 0); 17597c478bd9Sstevel@tonic-gate flk_wakeup(lock, 1); 17607c478bd9Sstevel@tonic-gate flk_free_lock(lock); 17617c478bd9Sstevel@tonic-gate CHECK_SLEEPING_LOCKS(gp); 17627c478bd9Sstevel@tonic-gate CHECK_ACTIVE_LOCKS(gp); 17637c478bd9Sstevel@tonic-gate return (1); 17647c478bd9Sstevel@tonic-gate } 17657c478bd9Sstevel@tonic-gate } 17667c478bd9Sstevel@tonic-gate } 17677c478bd9Sstevel@tonic-gate 17687c478bd9Sstevel@tonic-gate recompute: 17697c478bd9Sstevel@tonic-gate 17707c478bd9Sstevel@tonic-gate /* 17717c478bd9Sstevel@tonic-gate * For unlock we don't send the 'request' to for recomputing 17727c478bd9Sstevel@tonic-gate * dependencies because no lock will add an edge to this. 17737c478bd9Sstevel@tonic-gate */ 17747c478bd9Sstevel@tonic-gate 17757c478bd9Sstevel@tonic-gate if (lock_effect == FLK_UNLOCK) { 17767c478bd9Sstevel@tonic-gate topology[nvertex-1] = NULL; 17777c478bd9Sstevel@tonic-gate nvertex--; 17787c478bd9Sstevel@tonic-gate } 17797c478bd9Sstevel@tonic-gate for (i = 0; i < nvertex; i++) { 17807c478bd9Sstevel@tonic-gate topology[i]->l_state |= RECOMPUTE_LOCK; 17817c478bd9Sstevel@tonic-gate topology[i]->l_color = NO_COLOR; 17827c478bd9Sstevel@tonic-gate } 17837c478bd9Sstevel@tonic-gate 17847c478bd9Sstevel@tonic-gate ASSERT(FIRST_ADJ(lock) == HEAD(lock)); 17857c478bd9Sstevel@tonic-gate 17867c478bd9Sstevel@tonic-gate /* 17877c478bd9Sstevel@tonic-gate * we remove the adjacent edges for all vertices' to this vertex 17887c478bd9Sstevel@tonic-gate * 'lock'. 17897c478bd9Sstevel@tonic-gate */ 17907c478bd9Sstevel@tonic-gate 17917c478bd9Sstevel@tonic-gate ep = FIRST_IN(lock); 17927c478bd9Sstevel@tonic-gate while (ep != HEAD(lock)) { 17937c478bd9Sstevel@tonic-gate ADJ_LIST_REMOVE(ep); 17947c478bd9Sstevel@tonic-gate ep = NEXT_IN(ep); 17957c478bd9Sstevel@tonic-gate } 17967c478bd9Sstevel@tonic-gate 17977c478bd9Sstevel@tonic-gate flk_delete_active_lock(lock, 0); 17987c478bd9Sstevel@tonic-gate 17997c478bd9Sstevel@tonic-gate /* We are ready for recomputing the dependencies now */ 18007c478bd9Sstevel@tonic-gate 18017c478bd9Sstevel@tonic-gate flk_recompute_dependencies(lock, topology, nvertex, 1); 18027c478bd9Sstevel@tonic-gate 18037c478bd9Sstevel@tonic-gate for (i = 0; i < nvertex; i++) { 18047c478bd9Sstevel@tonic-gate topology[i]->l_state &= ~RECOMPUTE_LOCK; 18057c478bd9Sstevel@tonic-gate topology[i]->l_color = NO_COLOR; 18067c478bd9Sstevel@tonic-gate } 18077c478bd9Sstevel@tonic-gate 18087c478bd9Sstevel@tonic-gate 18097c478bd9Sstevel@tonic-gate if (lock_effect == FLK_UNLOCK) { 18107c478bd9Sstevel@tonic-gate nvertex++; 18117c478bd9Sstevel@tonic-gate } 18127c478bd9Sstevel@tonic-gate for (i = 0; i < nvertex - 1; i++) { 18137c478bd9Sstevel@tonic-gate flk_insert_active_lock(topology[i]); 18147c478bd9Sstevel@tonic-gate } 18157c478bd9Sstevel@tonic-gate 18167c478bd9Sstevel@tonic-gate 18177c478bd9Sstevel@tonic-gate if (lock_effect == FLK_DOWNGRADE || lock_effect == FLK_UNLOCK) { 18187c478bd9Sstevel@tonic-gate flk_wakeup(lock, 0); 18197c478bd9Sstevel@tonic-gate } else { 18207c478bd9Sstevel@tonic-gate ep = FIRST_IN(lock); 18217c478bd9Sstevel@tonic-gate while (ep != HEAD(lock)) { 18227c478bd9Sstevel@tonic-gate lock->l_sedge = NEXT_IN(ep); 18237c478bd9Sstevel@tonic-gate IN_LIST_REMOVE(ep); 18247c478bd9Sstevel@tonic-gate flk_update_proc_graph(ep, 1); 18257c478bd9Sstevel@tonic-gate flk_free_edge(ep); 18267c478bd9Sstevel@tonic-gate ep = lock->l_sedge; 18277c478bd9Sstevel@tonic-gate } 18287c478bd9Sstevel@tonic-gate } 18297c478bd9Sstevel@tonic-gate flk_free_lock(lock); 18307c478bd9Sstevel@tonic-gate 18317c478bd9Sstevel@tonic-gate CHECK_SLEEPING_LOCKS(gp); 18327c478bd9Sstevel@tonic-gate CHECK_ACTIVE_LOCKS(gp); 18337c478bd9Sstevel@tonic-gate return (0); 18347c478bd9Sstevel@tonic-gate } 18357c478bd9Sstevel@tonic-gate 18367c478bd9Sstevel@tonic-gate /* 18377c478bd9Sstevel@tonic-gate * Insert a lock into the active queue. 18387c478bd9Sstevel@tonic-gate */ 18397c478bd9Sstevel@tonic-gate 18407c478bd9Sstevel@tonic-gate static void 18417c478bd9Sstevel@tonic-gate flk_insert_active_lock(lock_descriptor_t *new_lock) 18427c478bd9Sstevel@tonic-gate { 18437c478bd9Sstevel@tonic-gate graph_t *gp = new_lock->l_graph; 18447c478bd9Sstevel@tonic-gate vnode_t *vp = new_lock->l_vnode; 18457c478bd9Sstevel@tonic-gate lock_descriptor_t *first_lock, *lock; 18467c478bd9Sstevel@tonic-gate 18477c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&gp->gp_mutex)); 18487c478bd9Sstevel@tonic-gate 18497c478bd9Sstevel@tonic-gate SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 18507c478bd9Sstevel@tonic-gate first_lock = lock; 18517c478bd9Sstevel@tonic-gate 18527c478bd9Sstevel@tonic-gate if (first_lock != NULL) { 18537c478bd9Sstevel@tonic-gate for (; (lock->l_vnode == vp && 18547c478bd9Sstevel@tonic-gate lock->l_start < new_lock->l_start); lock = lock->l_next) 18557c478bd9Sstevel@tonic-gate ; 18567c478bd9Sstevel@tonic-gate } else { 18577c478bd9Sstevel@tonic-gate lock = ACTIVE_HEAD(gp); 18587c478bd9Sstevel@tonic-gate } 18597c478bd9Sstevel@tonic-gate 18607c478bd9Sstevel@tonic-gate lock->l_prev->l_next = new_lock; 18617c478bd9Sstevel@tonic-gate new_lock->l_next = lock; 18627c478bd9Sstevel@tonic-gate new_lock->l_prev = lock->l_prev; 18637c478bd9Sstevel@tonic-gate lock->l_prev = new_lock; 18647c478bd9Sstevel@tonic-gate 18657c478bd9Sstevel@tonic-gate if (first_lock == NULL || (new_lock->l_start <= first_lock->l_start)) { 18667c478bd9Sstevel@tonic-gate vp->v_filocks = (struct filock *)new_lock; 18677c478bd9Sstevel@tonic-gate } 18687c478bd9Sstevel@tonic-gate flk_set_state(new_lock, FLK_ACTIVE_STATE); 18697c478bd9Sstevel@tonic-gate new_lock->l_state |= ACTIVE_LOCK; 18707c478bd9Sstevel@tonic-gate 18717c478bd9Sstevel@tonic-gate CHECK_ACTIVE_LOCKS(gp); 18727c478bd9Sstevel@tonic-gate CHECK_SLEEPING_LOCKS(gp); 18737c478bd9Sstevel@tonic-gate } 18747c478bd9Sstevel@tonic-gate 18757c478bd9Sstevel@tonic-gate /* 18767c478bd9Sstevel@tonic-gate * Delete the active lock : Performs two functions depending on the 18777c478bd9Sstevel@tonic-gate * value of second parameter. One is to remove from the active lists 18787c478bd9Sstevel@tonic-gate * only and other is to both remove and free the lock. 18797c478bd9Sstevel@tonic-gate */ 18807c478bd9Sstevel@tonic-gate 18817c478bd9Sstevel@tonic-gate static void 18827c478bd9Sstevel@tonic-gate flk_delete_active_lock(lock_descriptor_t *lock, int free_lock) 18837c478bd9Sstevel@tonic-gate { 18847c478bd9Sstevel@tonic-gate vnode_t *vp = lock->l_vnode; 18857c478bd9Sstevel@tonic-gate graph_t *gp = lock->l_graph; 18867c478bd9Sstevel@tonic-gate 18877c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&gp->gp_mutex)); 18887c478bd9Sstevel@tonic-gate if (free_lock) 18897c478bd9Sstevel@tonic-gate ASSERT(NO_DEPENDENTS(lock)); 18907c478bd9Sstevel@tonic-gate ASSERT(NOT_BLOCKED(lock)); 18917c478bd9Sstevel@tonic-gate ASSERT(IS_ACTIVE(lock)); 18927c478bd9Sstevel@tonic-gate 18937c478bd9Sstevel@tonic-gate ASSERT((vp->v_filocks != NULL)); 18947c478bd9Sstevel@tonic-gate 18957c478bd9Sstevel@tonic-gate if (vp->v_filocks == (struct filock *)lock) { 18967c478bd9Sstevel@tonic-gate vp->v_filocks = (struct filock *) 18977c478bd9Sstevel@tonic-gate ((lock->l_next->l_vnode == vp) ? lock->l_next : 18987c478bd9Sstevel@tonic-gate NULL); 18997c478bd9Sstevel@tonic-gate } 19007c478bd9Sstevel@tonic-gate lock->l_next->l_prev = lock->l_prev; 19017c478bd9Sstevel@tonic-gate lock->l_prev->l_next = lock->l_next; 19027c478bd9Sstevel@tonic-gate lock->l_next = lock->l_prev = NULL; 19037c478bd9Sstevel@tonic-gate flk_set_state(lock, FLK_DEAD_STATE); 19047c478bd9Sstevel@tonic-gate lock->l_state &= ~ACTIVE_LOCK; 19057c478bd9Sstevel@tonic-gate 19067c478bd9Sstevel@tonic-gate if (free_lock) 19077c478bd9Sstevel@tonic-gate flk_free_lock(lock); 19087c478bd9Sstevel@tonic-gate CHECK_ACTIVE_LOCKS(gp); 19097c478bd9Sstevel@tonic-gate CHECK_SLEEPING_LOCKS(gp); 19107c478bd9Sstevel@tonic-gate } 19117c478bd9Sstevel@tonic-gate 19127c478bd9Sstevel@tonic-gate /* 19137c478bd9Sstevel@tonic-gate * Insert into the sleep queue. 19147c478bd9Sstevel@tonic-gate */ 19157c478bd9Sstevel@tonic-gate 19167c478bd9Sstevel@tonic-gate static void 19177c478bd9Sstevel@tonic-gate flk_insert_sleeping_lock(lock_descriptor_t *request) 19187c478bd9Sstevel@tonic-gate { 19197c478bd9Sstevel@tonic-gate graph_t *gp = request->l_graph; 19207c478bd9Sstevel@tonic-gate vnode_t *vp = request->l_vnode; 19217c478bd9Sstevel@tonic-gate lock_descriptor_t *lock; 19227c478bd9Sstevel@tonic-gate 19237c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&gp->gp_mutex)); 19247c478bd9Sstevel@tonic-gate ASSERT(IS_INITIAL(request)); 19257c478bd9Sstevel@tonic-gate 19267c478bd9Sstevel@tonic-gate for (lock = gp->sleeping_locks.l_next; (lock != &gp->sleeping_locks && 19277c478bd9Sstevel@tonic-gate lock->l_vnode < vp); lock = lock->l_next) 19287c478bd9Sstevel@tonic-gate ; 19297c478bd9Sstevel@tonic-gate 19307c478bd9Sstevel@tonic-gate lock->l_prev->l_next = request; 19317c478bd9Sstevel@tonic-gate request->l_prev = lock->l_prev; 19327c478bd9Sstevel@tonic-gate lock->l_prev = request; 19337c478bd9Sstevel@tonic-gate request->l_next = lock; 19347c478bd9Sstevel@tonic-gate flk_set_state(request, FLK_SLEEPING_STATE); 19357c478bd9Sstevel@tonic-gate request->l_state |= SLEEPING_LOCK; 19367c478bd9Sstevel@tonic-gate } 19377c478bd9Sstevel@tonic-gate 19387c478bd9Sstevel@tonic-gate /* 19397c478bd9Sstevel@tonic-gate * Cancelling a sleeping lock implies removing a vertex from the 19407c478bd9Sstevel@tonic-gate * dependency graph and therefore we should recompute the dependencies 19417c478bd9Sstevel@tonic-gate * of all vertices that have a path to this vertex, w.r.t. all 19427c478bd9Sstevel@tonic-gate * vertices reachable from this vertex. 19437c478bd9Sstevel@tonic-gate */ 19447c478bd9Sstevel@tonic-gate 19457c478bd9Sstevel@tonic-gate void 19467c478bd9Sstevel@tonic-gate flk_cancel_sleeping_lock(lock_descriptor_t *request, int remove_from_queue) 19477c478bd9Sstevel@tonic-gate { 19487c478bd9Sstevel@tonic-gate graph_t *gp = request->l_graph; 19497c478bd9Sstevel@tonic-gate vnode_t *vp = request->l_vnode; 19507c478bd9Sstevel@tonic-gate lock_descriptor_t **topology = NULL; 19517c478bd9Sstevel@tonic-gate edge_t *ep; 19527c478bd9Sstevel@tonic-gate lock_descriptor_t *vertex, *lock; 19537c478bd9Sstevel@tonic-gate int nvertex = 0; 19547c478bd9Sstevel@tonic-gate int i; 19557c478bd9Sstevel@tonic-gate lock_descriptor_t *vertex_stack; 19567c478bd9Sstevel@tonic-gate 19577c478bd9Sstevel@tonic-gate STACK_INIT(vertex_stack); 19587c478bd9Sstevel@tonic-gate 19597c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&gp->gp_mutex)); 19607c478bd9Sstevel@tonic-gate /* 19617c478bd9Sstevel@tonic-gate * count number of vertex pointers that has to be allocated 19627c478bd9Sstevel@tonic-gate * All vertices that are reachable from request. 19637c478bd9Sstevel@tonic-gate */ 19647c478bd9Sstevel@tonic-gate 19657c478bd9Sstevel@tonic-gate STACK_PUSH(vertex_stack, request, l_stack); 19667c478bd9Sstevel@tonic-gate 19677c478bd9Sstevel@tonic-gate while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 19687c478bd9Sstevel@tonic-gate STACK_POP(vertex_stack, l_stack); 19697c478bd9Sstevel@tonic-gate for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex); 19707c478bd9Sstevel@tonic-gate ep = NEXT_ADJ(ep)) { 19717c478bd9Sstevel@tonic-gate if (IS_RECOMPUTE(ep->to_vertex)) 19727c478bd9Sstevel@tonic-gate continue; 19737c478bd9Sstevel@tonic-gate ep->to_vertex->l_state |= RECOMPUTE_LOCK; 19747c478bd9Sstevel@tonic-gate STACK_PUSH(vertex_stack, ep->to_vertex, l_stack); 19757c478bd9Sstevel@tonic-gate nvertex++; 19767c478bd9Sstevel@tonic-gate } 19777c478bd9Sstevel@tonic-gate } 19787c478bd9Sstevel@tonic-gate 19797c478bd9Sstevel@tonic-gate /* 19807c478bd9Sstevel@tonic-gate * allocate memory for holding the vertex pointers 19817c478bd9Sstevel@tonic-gate */ 19827c478bd9Sstevel@tonic-gate 19837c478bd9Sstevel@tonic-gate if (nvertex) { 19847c478bd9Sstevel@tonic-gate topology = kmem_zalloc(nvertex * sizeof (lock_descriptor_t *), 19857c478bd9Sstevel@tonic-gate KM_SLEEP); 19867c478bd9Sstevel@tonic-gate } 19877c478bd9Sstevel@tonic-gate 19887c478bd9Sstevel@tonic-gate /* 19897c478bd9Sstevel@tonic-gate * one more pass to actually store the vertices in the 19907c478bd9Sstevel@tonic-gate * allocated array. 19917c478bd9Sstevel@tonic-gate * We first check sleeping locks and then active locks 19927c478bd9Sstevel@tonic-gate * so that topology array will be in a topological 19937c478bd9Sstevel@tonic-gate * order. 19947c478bd9Sstevel@tonic-gate */ 19957c478bd9Sstevel@tonic-gate 19967c478bd9Sstevel@tonic-gate nvertex = 0; 19977c478bd9Sstevel@tonic-gate SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 19987c478bd9Sstevel@tonic-gate 19997c478bd9Sstevel@tonic-gate if (lock) { 20007c478bd9Sstevel@tonic-gate do { 20017c478bd9Sstevel@tonic-gate if (IS_RECOMPUTE(lock)) { 20027c478bd9Sstevel@tonic-gate lock->l_index = nvertex; 20037c478bd9Sstevel@tonic-gate topology[nvertex++] = lock; 20047c478bd9Sstevel@tonic-gate } 20057c478bd9Sstevel@tonic-gate lock->l_color = NO_COLOR; 20067c478bd9Sstevel@tonic-gate lock = lock->l_next; 20077c478bd9Sstevel@tonic-gate } while (lock->l_vnode == vp); 20087c478bd9Sstevel@tonic-gate } 20097c478bd9Sstevel@tonic-gate 20107c478bd9Sstevel@tonic-gate SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 20117c478bd9Sstevel@tonic-gate 20127c478bd9Sstevel@tonic-gate if (lock) { 20137c478bd9Sstevel@tonic-gate do { 20147c478bd9Sstevel@tonic-gate if (IS_RECOMPUTE(lock)) { 20157c478bd9Sstevel@tonic-gate lock->l_index = nvertex; 20167c478bd9Sstevel@tonic-gate topology[nvertex++] = lock; 20177c478bd9Sstevel@tonic-gate } 20187c478bd9Sstevel@tonic-gate lock->l_color = NO_COLOR; 20197c478bd9Sstevel@tonic-gate lock = lock->l_next; 20207c478bd9Sstevel@tonic-gate } while (lock->l_vnode == vp); 20217c478bd9Sstevel@tonic-gate } 20227c478bd9Sstevel@tonic-gate 20237c478bd9Sstevel@tonic-gate /* 20247c478bd9Sstevel@tonic-gate * remove in and out edges of request 20257c478bd9Sstevel@tonic-gate * They are freed after updating proc_graph below. 20267c478bd9Sstevel@tonic-gate */ 20277c478bd9Sstevel@tonic-gate 20287c478bd9Sstevel@tonic-gate for (ep = FIRST_IN(request); ep != HEAD(request); ep = NEXT_IN(ep)) { 20297c478bd9Sstevel@tonic-gate ADJ_LIST_REMOVE(ep); 20307c478bd9Sstevel@tonic-gate } 20317c478bd9Sstevel@tonic-gate 20327c478bd9Sstevel@tonic-gate 20337c478bd9Sstevel@tonic-gate if (remove_from_queue) 20347c478bd9Sstevel@tonic-gate REMOVE_SLEEP_QUEUE(request); 20357c478bd9Sstevel@tonic-gate 20367c478bd9Sstevel@tonic-gate /* we are ready to recompute */ 20377c478bd9Sstevel@tonic-gate 20387c478bd9Sstevel@tonic-gate flk_recompute_dependencies(request, topology, nvertex, 1); 20397c478bd9Sstevel@tonic-gate 20407c478bd9Sstevel@tonic-gate ep = FIRST_ADJ(request); 20417c478bd9Sstevel@tonic-gate while (ep != HEAD(request)) { 20427c478bd9Sstevel@tonic-gate IN_LIST_REMOVE(ep); 20437c478bd9Sstevel@tonic-gate request->l_sedge = NEXT_ADJ(ep); 20447c478bd9Sstevel@tonic-gate ADJ_LIST_REMOVE(ep); 20457c478bd9Sstevel@tonic-gate flk_update_proc_graph(ep, 1); 20467c478bd9Sstevel@tonic-gate flk_free_edge(ep); 20477c478bd9Sstevel@tonic-gate ep = request->l_sedge; 20487c478bd9Sstevel@tonic-gate } 20497c478bd9Sstevel@tonic-gate 20507c478bd9Sstevel@tonic-gate 20517c478bd9Sstevel@tonic-gate /* 20527c478bd9Sstevel@tonic-gate * unset the RECOMPUTE flag in those vertices 20537c478bd9Sstevel@tonic-gate */ 20547c478bd9Sstevel@tonic-gate 20557c478bd9Sstevel@tonic-gate for (i = 0; i < nvertex; i++) { 20567c478bd9Sstevel@tonic-gate topology[i]->l_state &= ~RECOMPUTE_LOCK; 20577c478bd9Sstevel@tonic-gate } 20587c478bd9Sstevel@tonic-gate 20597c478bd9Sstevel@tonic-gate /* 20607c478bd9Sstevel@tonic-gate * free the topology 20617c478bd9Sstevel@tonic-gate */ 20627c478bd9Sstevel@tonic-gate if (nvertex) 20637c478bd9Sstevel@tonic-gate kmem_free((void *)topology, 20647c478bd9Sstevel@tonic-gate (nvertex * sizeof (lock_descriptor_t *))); 20657c478bd9Sstevel@tonic-gate /* 20667c478bd9Sstevel@tonic-gate * Possibility of some locks unblocked now 20677c478bd9Sstevel@tonic-gate */ 20687c478bd9Sstevel@tonic-gate 20697c478bd9Sstevel@tonic-gate flk_wakeup(request, 0); 20707c478bd9Sstevel@tonic-gate 20717c478bd9Sstevel@tonic-gate /* 20727c478bd9Sstevel@tonic-gate * we expect to have a correctly recomputed graph now. 20737c478bd9Sstevel@tonic-gate */ 20747c478bd9Sstevel@tonic-gate flk_set_state(request, FLK_DEAD_STATE); 20757c478bd9Sstevel@tonic-gate flk_free_lock(request); 20767c478bd9Sstevel@tonic-gate CHECK_SLEEPING_LOCKS(gp); 20777c478bd9Sstevel@tonic-gate CHECK_ACTIVE_LOCKS(gp); 20787c478bd9Sstevel@tonic-gate 20797c478bd9Sstevel@tonic-gate } 20807c478bd9Sstevel@tonic-gate 20817c478bd9Sstevel@tonic-gate /* 20827c478bd9Sstevel@tonic-gate * Uncoloring the graph is simply to increment the mark value of the graph 20837c478bd9Sstevel@tonic-gate * And only when wrap round takes place will we color all vertices in 20847c478bd9Sstevel@tonic-gate * the graph explicitly. 20857c478bd9Sstevel@tonic-gate */ 20867c478bd9Sstevel@tonic-gate 20877c478bd9Sstevel@tonic-gate static void 20887c478bd9Sstevel@tonic-gate flk_graph_uncolor(graph_t *gp) 20897c478bd9Sstevel@tonic-gate { 20907c478bd9Sstevel@tonic-gate lock_descriptor_t *lock; 20917c478bd9Sstevel@tonic-gate 20927c478bd9Sstevel@tonic-gate if (gp->mark == UINT_MAX) { 20937c478bd9Sstevel@tonic-gate gp->mark = 1; 20947c478bd9Sstevel@tonic-gate for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp); 20957c478bd9Sstevel@tonic-gate lock = lock->l_next) 20967c478bd9Sstevel@tonic-gate lock->l_color = 0; 20977c478bd9Sstevel@tonic-gate 20987c478bd9Sstevel@tonic-gate for (lock = SLEEPING_HEAD(gp)->l_next; lock != SLEEPING_HEAD(gp); 20997c478bd9Sstevel@tonic-gate lock = lock->l_next) 21007c478bd9Sstevel@tonic-gate lock->l_color = 0; 21017c478bd9Sstevel@tonic-gate } else { 21027c478bd9Sstevel@tonic-gate gp->mark++; 21037c478bd9Sstevel@tonic-gate } 21047c478bd9Sstevel@tonic-gate } 21057c478bd9Sstevel@tonic-gate 21067c478bd9Sstevel@tonic-gate /* 21077c478bd9Sstevel@tonic-gate * Wake up locks that are blocked on the given lock. 21087c478bd9Sstevel@tonic-gate */ 21097c478bd9Sstevel@tonic-gate 21107c478bd9Sstevel@tonic-gate static void 21117c478bd9Sstevel@tonic-gate flk_wakeup(lock_descriptor_t *lock, int adj_list_remove) 21127c478bd9Sstevel@tonic-gate { 21137c478bd9Sstevel@tonic-gate edge_t *ep; 21147c478bd9Sstevel@tonic-gate graph_t *gp = lock->l_graph; 21157c478bd9Sstevel@tonic-gate lock_descriptor_t *lck; 21167c478bd9Sstevel@tonic-gate 21177c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&gp->gp_mutex)); 21187c478bd9Sstevel@tonic-gate if (NO_DEPENDENTS(lock)) 21197c478bd9Sstevel@tonic-gate return; 21207c478bd9Sstevel@tonic-gate ep = FIRST_IN(lock); 21217c478bd9Sstevel@tonic-gate do { 21227c478bd9Sstevel@tonic-gate /* 21237c478bd9Sstevel@tonic-gate * delete the edge from the adjacency list 21247c478bd9Sstevel@tonic-gate * of from vertex. if no more adjacent edges 21257c478bd9Sstevel@tonic-gate * for this vertex wake this process. 21267c478bd9Sstevel@tonic-gate */ 21277c478bd9Sstevel@tonic-gate lck = ep->from_vertex; 21287c478bd9Sstevel@tonic-gate if (adj_list_remove) 21297c478bd9Sstevel@tonic-gate ADJ_LIST_REMOVE(ep); 21307c478bd9Sstevel@tonic-gate flk_update_proc_graph(ep, 1); 21317c478bd9Sstevel@tonic-gate if (NOT_BLOCKED(lck)) { 21327c478bd9Sstevel@tonic-gate GRANT_WAKEUP(lck); 21337c478bd9Sstevel@tonic-gate } 21347c478bd9Sstevel@tonic-gate lock->l_sedge = NEXT_IN(ep); 21357c478bd9Sstevel@tonic-gate IN_LIST_REMOVE(ep); 21367c478bd9Sstevel@tonic-gate flk_free_edge(ep); 21377c478bd9Sstevel@tonic-gate ep = lock->l_sedge; 21387c478bd9Sstevel@tonic-gate } while (ep != HEAD(lock)); 21397c478bd9Sstevel@tonic-gate ASSERT(NO_DEPENDENTS(lock)); 21407c478bd9Sstevel@tonic-gate } 21417c478bd9Sstevel@tonic-gate 21427c478bd9Sstevel@tonic-gate /* 21437c478bd9Sstevel@tonic-gate * The dependents of request, is checked for its dependency against the 21447c478bd9Sstevel@tonic-gate * locks in topology (called topology because the array is and should be in 21457c478bd9Sstevel@tonic-gate * topological order for this algorithm, if not in topological order the 21467c478bd9Sstevel@tonic-gate * inner loop below might add more edges than necessary. Topological ordering 21477c478bd9Sstevel@tonic-gate * of vertices satisfies the property that all edges will be from left to 21487c478bd9Sstevel@tonic-gate * right i.e., topology[i] can have an edge to topology[j], iff i<j) 21497c478bd9Sstevel@tonic-gate * If lock l1 in the dependent set of request is dependent (blocked by) 21507c478bd9Sstevel@tonic-gate * on lock l2 in topology but does not have a path to it, we add an edge 21517c478bd9Sstevel@tonic-gate * in the inner loop below. 21527c478bd9Sstevel@tonic-gate * 21537c478bd9Sstevel@tonic-gate * We don't want to add an edge between l1 and l2 if there exists 21547c478bd9Sstevel@tonic-gate * already a path from l1 to l2, so care has to be taken for those vertices 21557c478bd9Sstevel@tonic-gate * that have two paths to 'request'. These vertices are referred to here 21567c478bd9Sstevel@tonic-gate * as barrier locks. 21577c478bd9Sstevel@tonic-gate * 21587c478bd9Sstevel@tonic-gate * The barriers has to be found (those vertex that originally had two paths 21597c478bd9Sstevel@tonic-gate * to request) because otherwise we may end up adding edges unnecessarily 21607c478bd9Sstevel@tonic-gate * to vertices in topology, and thus barrier vertices can have an edge 21617c478bd9Sstevel@tonic-gate * to a vertex in topology as well a path to it. 21627c478bd9Sstevel@tonic-gate */ 21637c478bd9Sstevel@tonic-gate 21647c478bd9Sstevel@tonic-gate static void 21657c478bd9Sstevel@tonic-gate flk_recompute_dependencies(lock_descriptor_t *request, 2166*74a91888SMarcel Telka lock_descriptor_t **topology, int nvertex, int update_graph) 21677c478bd9Sstevel@tonic-gate { 21687c478bd9Sstevel@tonic-gate lock_descriptor_t *vertex, *lock; 21697c478bd9Sstevel@tonic-gate graph_t *gp = request->l_graph; 21707c478bd9Sstevel@tonic-gate int i, count; 21717c478bd9Sstevel@tonic-gate int barrier_found = 0; 21727c478bd9Sstevel@tonic-gate edge_t *ep; 21737c478bd9Sstevel@tonic-gate lock_descriptor_t *vertex_stack; 21747c478bd9Sstevel@tonic-gate 21757c478bd9Sstevel@tonic-gate STACK_INIT(vertex_stack); 21767c478bd9Sstevel@tonic-gate 21777c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&gp->gp_mutex)); 21787c478bd9Sstevel@tonic-gate if (nvertex == 0) 21797c478bd9Sstevel@tonic-gate return; 21807c478bd9Sstevel@tonic-gate flk_graph_uncolor(request->l_graph); 21817c478bd9Sstevel@tonic-gate barrier_found = flk_find_barriers(request); 21827c478bd9Sstevel@tonic-gate request->l_state |= RECOMPUTE_DONE; 21837c478bd9Sstevel@tonic-gate 21847c478bd9Sstevel@tonic-gate STACK_PUSH(vertex_stack, request, l_stack); 21857c478bd9Sstevel@tonic-gate request->l_sedge = FIRST_IN(request); 21867c478bd9Sstevel@tonic-gate 21877c478bd9Sstevel@tonic-gate 21887c478bd9Sstevel@tonic-gate while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 21897c478bd9Sstevel@tonic-gate if (vertex->l_state & RECOMPUTE_DONE) { 21907c478bd9Sstevel@tonic-gate count = 0; 21917c478bd9Sstevel@tonic-gate goto next_in_edge; 21927c478bd9Sstevel@tonic-gate } 21937c478bd9Sstevel@tonic-gate if (IS_BARRIER(vertex)) { 21947c478bd9Sstevel@tonic-gate /* decrement the barrier count */ 21957c478bd9Sstevel@tonic-gate if (vertex->l_index) { 21967c478bd9Sstevel@tonic-gate vertex->l_index--; 21977c478bd9Sstevel@tonic-gate /* this guy will be pushed again anyway ? */ 21987c478bd9Sstevel@tonic-gate STACK_POP(vertex_stack, l_stack); 21997c478bd9Sstevel@tonic-gate if (vertex->l_index == 0) { 22007c478bd9Sstevel@tonic-gate /* 22017c478bd9Sstevel@tonic-gate * barrier is over we can recompute 22027c478bd9Sstevel@tonic-gate * dependencies for this lock in the 22037c478bd9Sstevel@tonic-gate * next stack pop 22047c478bd9Sstevel@tonic-gate */ 22057c478bd9Sstevel@tonic-gate vertex->l_state &= ~BARRIER_LOCK; 22067c478bd9Sstevel@tonic-gate } 22077c478bd9Sstevel@tonic-gate continue; 22087c478bd9Sstevel@tonic-gate } 22097c478bd9Sstevel@tonic-gate } 22107c478bd9Sstevel@tonic-gate vertex->l_state |= RECOMPUTE_DONE; 22117c478bd9Sstevel@tonic-gate flk_graph_uncolor(gp); 22127c478bd9Sstevel@tonic-gate count = flk_color_reachables(vertex); 22137c478bd9Sstevel@tonic-gate for (i = 0; i < nvertex; i++) { 22147c478bd9Sstevel@tonic-gate lock = topology[i]; 22157c478bd9Sstevel@tonic-gate if (COLORED(lock)) 22167c478bd9Sstevel@tonic-gate continue; 22177c478bd9Sstevel@tonic-gate if (BLOCKS(lock, vertex)) { 22187c478bd9Sstevel@tonic-gate (void) flk_add_edge(vertex, lock, 22197c478bd9Sstevel@tonic-gate NO_CHECK_CYCLE, update_graph); 22207c478bd9Sstevel@tonic-gate COLOR(lock); 22217c478bd9Sstevel@tonic-gate count++; 22227c478bd9Sstevel@tonic-gate count += flk_color_reachables(lock); 22237c478bd9Sstevel@tonic-gate } 22247c478bd9Sstevel@tonic-gate 22257c478bd9Sstevel@tonic-gate } 22267c478bd9Sstevel@tonic-gate 22277c478bd9Sstevel@tonic-gate next_in_edge: 22287c478bd9Sstevel@tonic-gate if (count == nvertex || 22297c478bd9Sstevel@tonic-gate vertex->l_sedge == HEAD(vertex)) { 22307c478bd9Sstevel@tonic-gate /* prune the tree below this */ 22317c478bd9Sstevel@tonic-gate STACK_POP(vertex_stack, l_stack); 22327c478bd9Sstevel@tonic-gate vertex->l_state &= ~RECOMPUTE_DONE; 22337c478bd9Sstevel@tonic-gate /* update the barrier locks below this! */ 22347c478bd9Sstevel@tonic-gate if (vertex->l_sedge != HEAD(vertex) && barrier_found) { 22357c478bd9Sstevel@tonic-gate flk_graph_uncolor(gp); 22367c478bd9Sstevel@tonic-gate flk_update_barriers(vertex); 22377c478bd9Sstevel@tonic-gate } 22387c478bd9Sstevel@tonic-gate continue; 22397c478bd9Sstevel@tonic-gate } 22407c478bd9Sstevel@tonic-gate 22417c478bd9Sstevel@tonic-gate ep = vertex->l_sedge; 22427c478bd9Sstevel@tonic-gate lock = ep->from_vertex; 22437c478bd9Sstevel@tonic-gate STACK_PUSH(vertex_stack, lock, l_stack); 22447c478bd9Sstevel@tonic-gate lock->l_sedge = FIRST_IN(lock); 22457c478bd9Sstevel@tonic-gate vertex->l_sedge = NEXT_IN(ep); 22467c478bd9Sstevel@tonic-gate } 22477c478bd9Sstevel@tonic-gate 22487c478bd9Sstevel@tonic-gate } 22497c478bd9Sstevel@tonic-gate 22507c478bd9Sstevel@tonic-gate /* 22517c478bd9Sstevel@tonic-gate * Color all reachable vertices from vertex that belongs to topology (here 22527c478bd9Sstevel@tonic-gate * those that have RECOMPUTE_LOCK set in their state) and yet uncolored. 22537c478bd9Sstevel@tonic-gate * 22547c478bd9Sstevel@tonic-gate * Note: we need to use a different stack_link l_stack1 because this is 22557c478bd9Sstevel@tonic-gate * called from flk_recompute_dependencies() that already uses a stack with 22567c478bd9Sstevel@tonic-gate * l_stack as stack_link. 22577c478bd9Sstevel@tonic-gate */ 22587c478bd9Sstevel@tonic-gate 22597c478bd9Sstevel@tonic-gate static int 22607c478bd9Sstevel@tonic-gate flk_color_reachables(lock_descriptor_t *vertex) 22617c478bd9Sstevel@tonic-gate { 22627c478bd9Sstevel@tonic-gate lock_descriptor_t *ver, *lock; 22637c478bd9Sstevel@tonic-gate int count; 22647c478bd9Sstevel@tonic-gate edge_t *ep; 22657c478bd9Sstevel@tonic-gate lock_descriptor_t *vertex_stack; 22667c478bd9Sstevel@tonic-gate 22677c478bd9Sstevel@tonic-gate STACK_INIT(vertex_stack); 22687c478bd9Sstevel@tonic-gate 22697c478bd9Sstevel@tonic-gate STACK_PUSH(vertex_stack, vertex, l_stack1); 22707c478bd9Sstevel@tonic-gate count = 0; 22717c478bd9Sstevel@tonic-gate while ((ver = STACK_TOP(vertex_stack)) != NULL) { 22727c478bd9Sstevel@tonic-gate 22737c478bd9Sstevel@tonic-gate STACK_POP(vertex_stack, l_stack1); 22747c478bd9Sstevel@tonic-gate for (ep = FIRST_ADJ(ver); ep != HEAD(ver); 22757c478bd9Sstevel@tonic-gate ep = NEXT_ADJ(ep)) { 22767c478bd9Sstevel@tonic-gate lock = ep->to_vertex; 22777c478bd9Sstevel@tonic-gate if (COLORED(lock)) 22787c478bd9Sstevel@tonic-gate continue; 22797c478bd9Sstevel@tonic-gate COLOR(lock); 22807c478bd9Sstevel@tonic-gate if (IS_RECOMPUTE(lock)) 22817c478bd9Sstevel@tonic-gate count++; 22827c478bd9Sstevel@tonic-gate STACK_PUSH(vertex_stack, lock, l_stack1); 22837c478bd9Sstevel@tonic-gate } 22847c478bd9Sstevel@tonic-gate 22857c478bd9Sstevel@tonic-gate } 22867c478bd9Sstevel@tonic-gate return (count); 22877c478bd9Sstevel@tonic-gate } 22887c478bd9Sstevel@tonic-gate 22897c478bd9Sstevel@tonic-gate /* 22907c478bd9Sstevel@tonic-gate * Called from flk_recompute_dependencies() this routine decrements 22917c478bd9Sstevel@tonic-gate * the barrier count of barrier vertices that are reachable from lock. 22927c478bd9Sstevel@tonic-gate */ 22937c478bd9Sstevel@tonic-gate 22947c478bd9Sstevel@tonic-gate static void 22957c478bd9Sstevel@tonic-gate flk_update_barriers(lock_descriptor_t *lock) 22967c478bd9Sstevel@tonic-gate { 22977c478bd9Sstevel@tonic-gate lock_descriptor_t *vertex, *lck; 22987c478bd9Sstevel@tonic-gate edge_t *ep; 22997c478bd9Sstevel@tonic-gate lock_descriptor_t *vertex_stack; 23007c478bd9Sstevel@tonic-gate 23017c478bd9Sstevel@tonic-gate STACK_INIT(vertex_stack); 23027c478bd9Sstevel@tonic-gate 23037c478bd9Sstevel@tonic-gate STACK_PUSH(vertex_stack, lock, l_stack1); 23047c478bd9Sstevel@tonic-gate 23057c478bd9Sstevel@tonic-gate while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 23067c478bd9Sstevel@tonic-gate STACK_POP(vertex_stack, l_stack1); 23077c478bd9Sstevel@tonic-gate for (ep = FIRST_IN(vertex); ep != HEAD(vertex); 23087c478bd9Sstevel@tonic-gate ep = NEXT_IN(ep)) { 23097c478bd9Sstevel@tonic-gate lck = ep->from_vertex; 23107c478bd9Sstevel@tonic-gate if (COLORED(lck)) { 23117c478bd9Sstevel@tonic-gate if (IS_BARRIER(lck)) { 23127c478bd9Sstevel@tonic-gate ASSERT(lck->l_index > 0); 23137c478bd9Sstevel@tonic-gate lck->l_index--; 23147c478bd9Sstevel@tonic-gate if (lck->l_index == 0) 23157c478bd9Sstevel@tonic-gate lck->l_state &= ~BARRIER_LOCK; 23167c478bd9Sstevel@tonic-gate } 23177c478bd9Sstevel@tonic-gate continue; 23187c478bd9Sstevel@tonic-gate } 23197c478bd9Sstevel@tonic-gate COLOR(lck); 23207c478bd9Sstevel@tonic-gate if (IS_BARRIER(lck)) { 23217c478bd9Sstevel@tonic-gate ASSERT(lck->l_index > 0); 23227c478bd9Sstevel@tonic-gate lck->l_index--; 23237c478bd9Sstevel@tonic-gate if (lck->l_index == 0) 23247c478bd9Sstevel@tonic-gate lck->l_state &= ~BARRIER_LOCK; 23257c478bd9Sstevel@tonic-gate } 23267c478bd9Sstevel@tonic-gate STACK_PUSH(vertex_stack, lck, l_stack1); 23277c478bd9Sstevel@tonic-gate } 23287c478bd9Sstevel@tonic-gate } 23297c478bd9Sstevel@tonic-gate } 23307c478bd9Sstevel@tonic-gate 23317c478bd9Sstevel@tonic-gate /* 23327c478bd9Sstevel@tonic-gate * Finds all vertices that are reachable from 'lock' more than once and 23337c478bd9Sstevel@tonic-gate * mark them as barrier vertices and increment their barrier count. 23347c478bd9Sstevel@tonic-gate * The barrier count is one minus the total number of paths from lock 23357c478bd9Sstevel@tonic-gate * to that vertex. 23367c478bd9Sstevel@tonic-gate */ 23377c478bd9Sstevel@tonic-gate 23387c478bd9Sstevel@tonic-gate static int 23397c478bd9Sstevel@tonic-gate flk_find_barriers(lock_descriptor_t *lock) 23407c478bd9Sstevel@tonic-gate { 23417c478bd9Sstevel@tonic-gate lock_descriptor_t *vertex, *lck; 23427c478bd9Sstevel@tonic-gate int found = 0; 23437c478bd9Sstevel@tonic-gate edge_t *ep; 23447c478bd9Sstevel@tonic-gate lock_descriptor_t *vertex_stack; 23457c478bd9Sstevel@tonic-gate 23467c478bd9Sstevel@tonic-gate STACK_INIT(vertex_stack); 23477c478bd9Sstevel@tonic-gate 23487c478bd9Sstevel@tonic-gate STACK_PUSH(vertex_stack, lock, l_stack1); 23497c478bd9Sstevel@tonic-gate 23507c478bd9Sstevel@tonic-gate while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 23517c478bd9Sstevel@tonic-gate STACK_POP(vertex_stack, l_stack1); 23527c478bd9Sstevel@tonic-gate for (ep = FIRST_IN(vertex); ep != HEAD(vertex); 23537c478bd9Sstevel@tonic-gate ep = NEXT_IN(ep)) { 23547c478bd9Sstevel@tonic-gate lck = ep->from_vertex; 23557c478bd9Sstevel@tonic-gate if (COLORED(lck)) { 23567c478bd9Sstevel@tonic-gate /* this is a barrier */ 23577c478bd9Sstevel@tonic-gate lck->l_state |= BARRIER_LOCK; 23587c478bd9Sstevel@tonic-gate /* index will have barrier count */ 23597c478bd9Sstevel@tonic-gate lck->l_index++; 23607c478bd9Sstevel@tonic-gate if (!found) 23617c478bd9Sstevel@tonic-gate found = 1; 23627c478bd9Sstevel@tonic-gate continue; 23637c478bd9Sstevel@tonic-gate } 23647c478bd9Sstevel@tonic-gate COLOR(lck); 23657c478bd9Sstevel@tonic-gate lck->l_index = 0; 23667c478bd9Sstevel@tonic-gate STACK_PUSH(vertex_stack, lck, l_stack1); 23677c478bd9Sstevel@tonic-gate } 23687c478bd9Sstevel@tonic-gate } 23697c478bd9Sstevel@tonic-gate return (found); 23707c478bd9Sstevel@tonic-gate } 23717c478bd9Sstevel@tonic-gate 23727c478bd9Sstevel@tonic-gate /* 23737c478bd9Sstevel@tonic-gate * Finds the first lock that is mainly responsible for blocking this 23747c478bd9Sstevel@tonic-gate * request. If there is no such lock, request->l_flock.l_type is set to 23757c478bd9Sstevel@tonic-gate * F_UNLCK. Otherwise, request->l_flock is filled in with the particulars 23767c478bd9Sstevel@tonic-gate * of the blocking lock. 23777c478bd9Sstevel@tonic-gate * 23787c478bd9Sstevel@tonic-gate * Note: It is possible a request is blocked by a sleeping lock because 23797c478bd9Sstevel@tonic-gate * of the fairness policy used in flk_process_request() to construct the 23807c478bd9Sstevel@tonic-gate * dependencies. (see comments before flk_process_request()). 23817c478bd9Sstevel@tonic-gate */ 23827c478bd9Sstevel@tonic-gate 23837c478bd9Sstevel@tonic-gate static void 23847c478bd9Sstevel@tonic-gate flk_get_first_blocking_lock(lock_descriptor_t *request) 23857c478bd9Sstevel@tonic-gate { 23867c478bd9Sstevel@tonic-gate graph_t *gp = request->l_graph; 23877c478bd9Sstevel@tonic-gate vnode_t *vp = request->l_vnode; 23887c478bd9Sstevel@tonic-gate lock_descriptor_t *lock, *blocker; 23897c478bd9Sstevel@tonic-gate 23907c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&gp->gp_mutex)); 23917c478bd9Sstevel@tonic-gate blocker = NULL; 23927c478bd9Sstevel@tonic-gate SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 23937c478bd9Sstevel@tonic-gate 23947c478bd9Sstevel@tonic-gate if (lock) { 23957c478bd9Sstevel@tonic-gate do { 23967c478bd9Sstevel@tonic-gate if (BLOCKS(lock, request)) { 23977c478bd9Sstevel@tonic-gate blocker = lock; 23987c478bd9Sstevel@tonic-gate break; 23997c478bd9Sstevel@tonic-gate } 24007c478bd9Sstevel@tonic-gate lock = lock->l_next; 24017c478bd9Sstevel@tonic-gate } while (lock->l_vnode == vp); 24027c478bd9Sstevel@tonic-gate } 24037c478bd9Sstevel@tonic-gate 2404b584d06cSArne Jansen if (blocker == NULL && request->l_flock.l_type == F_RDLCK) { 2405b584d06cSArne Jansen /* 2406b584d06cSArne Jansen * No active lock is blocking this request, but if a read 2407b584d06cSArne Jansen * lock is requested, it may also get blocked by a waiting 2408b584d06cSArne Jansen * writer. So search all sleeping locks and see if there is 2409b584d06cSArne Jansen * a writer waiting. 2410b584d06cSArne Jansen */ 2411b584d06cSArne Jansen SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 2412b584d06cSArne Jansen if (lock) { 2413b584d06cSArne Jansen do { 2414b584d06cSArne Jansen if (BLOCKS(lock, request)) { 2415b584d06cSArne Jansen blocker = lock; 2416b584d06cSArne Jansen break; 2417b584d06cSArne Jansen } 2418b584d06cSArne Jansen lock = lock->l_next; 2419b584d06cSArne Jansen } while (lock->l_vnode == vp); 2420b584d06cSArne Jansen } 2421b584d06cSArne Jansen } 2422b584d06cSArne Jansen 24237c478bd9Sstevel@tonic-gate if (blocker) { 24247c478bd9Sstevel@tonic-gate report_blocker(blocker, request); 24257c478bd9Sstevel@tonic-gate } else 24267c478bd9Sstevel@tonic-gate request->l_flock.l_type = F_UNLCK; 24277c478bd9Sstevel@tonic-gate } 24287c478bd9Sstevel@tonic-gate 24297c478bd9Sstevel@tonic-gate /* 24307c478bd9Sstevel@tonic-gate * Get the graph_t structure associated with a vnode. 24317c478bd9Sstevel@tonic-gate * If 'initialize' is non-zero, and the graph_t structure for this vnode has 24327c478bd9Sstevel@tonic-gate * not yet been initialized, then a new element is allocated and returned. 24337c478bd9Sstevel@tonic-gate */ 24347c478bd9Sstevel@tonic-gate graph_t * 24357c478bd9Sstevel@tonic-gate flk_get_lock_graph(vnode_t *vp, int initialize) 24367c478bd9Sstevel@tonic-gate { 24377c478bd9Sstevel@tonic-gate graph_t *gp; 24387c478bd9Sstevel@tonic-gate graph_t *gp_alloc = NULL; 24397c478bd9Sstevel@tonic-gate int index = HASH_INDEX(vp); 24407c478bd9Sstevel@tonic-gate 24417c478bd9Sstevel@tonic-gate if (initialize == FLK_USE_GRAPH) { 24427c478bd9Sstevel@tonic-gate mutex_enter(&flock_lock); 24437c478bd9Sstevel@tonic-gate gp = lock_graph[index]; 24447c478bd9Sstevel@tonic-gate mutex_exit(&flock_lock); 24457c478bd9Sstevel@tonic-gate return (gp); 24467c478bd9Sstevel@tonic-gate } 24477c478bd9Sstevel@tonic-gate 24487c478bd9Sstevel@tonic-gate ASSERT(initialize == FLK_INIT_GRAPH); 24497c478bd9Sstevel@tonic-gate 24507c478bd9Sstevel@tonic-gate if (lock_graph[index] == NULL) { 24517c478bd9Sstevel@tonic-gate 24527c478bd9Sstevel@tonic-gate gp_alloc = kmem_zalloc(sizeof (graph_t), KM_SLEEP); 24537c478bd9Sstevel@tonic-gate 24547c478bd9Sstevel@tonic-gate /* Initialize the graph */ 24557c478bd9Sstevel@tonic-gate 24567c478bd9Sstevel@tonic-gate gp_alloc->active_locks.l_next = 24577c478bd9Sstevel@tonic-gate gp_alloc->active_locks.l_prev = 24587c478bd9Sstevel@tonic-gate (lock_descriptor_t *)ACTIVE_HEAD(gp_alloc); 24597c478bd9Sstevel@tonic-gate gp_alloc->sleeping_locks.l_next = 24607c478bd9Sstevel@tonic-gate gp_alloc->sleeping_locks.l_prev = 24617c478bd9Sstevel@tonic-gate (lock_descriptor_t *)SLEEPING_HEAD(gp_alloc); 24627c478bd9Sstevel@tonic-gate gp_alloc->index = index; 24637c478bd9Sstevel@tonic-gate mutex_init(&gp_alloc->gp_mutex, NULL, MUTEX_DEFAULT, NULL); 24647c478bd9Sstevel@tonic-gate } 24657c478bd9Sstevel@tonic-gate 24667c478bd9Sstevel@tonic-gate mutex_enter(&flock_lock); 24677c478bd9Sstevel@tonic-gate 24687c478bd9Sstevel@tonic-gate gp = lock_graph[index]; 24697c478bd9Sstevel@tonic-gate 24707c478bd9Sstevel@tonic-gate /* Recheck the value within flock_lock */ 24717c478bd9Sstevel@tonic-gate if (gp == NULL) { 24727c478bd9Sstevel@tonic-gate struct flock_globals *fg; 24737c478bd9Sstevel@tonic-gate 24747c478bd9Sstevel@tonic-gate /* We must have previously allocated the graph_t structure */ 24757c478bd9Sstevel@tonic-gate ASSERT(gp_alloc != NULL); 24767c478bd9Sstevel@tonic-gate lock_graph[index] = gp = gp_alloc; 24777c478bd9Sstevel@tonic-gate /* 24787c478bd9Sstevel@tonic-gate * The lockmgr status is only needed if KLM is loaded. 24797c478bd9Sstevel@tonic-gate */ 24807c478bd9Sstevel@tonic-gate if (flock_zone_key != ZONE_KEY_UNINITIALIZED) { 24817c478bd9Sstevel@tonic-gate fg = flk_get_globals(); 24827c478bd9Sstevel@tonic-gate fg->lockmgr_status[index] = fg->flk_lockmgr_status; 24837c478bd9Sstevel@tonic-gate } 24847c478bd9Sstevel@tonic-gate } 24857c478bd9Sstevel@tonic-gate 24867c478bd9Sstevel@tonic-gate mutex_exit(&flock_lock); 24877c478bd9Sstevel@tonic-gate 24887c478bd9Sstevel@tonic-gate if ((gp_alloc != NULL) && (gp != gp_alloc)) { 24897c478bd9Sstevel@tonic-gate /* There was a race to allocate the graph_t and we lost */ 24907c478bd9Sstevel@tonic-gate mutex_destroy(&gp_alloc->gp_mutex); 24917c478bd9Sstevel@tonic-gate kmem_free(gp_alloc, sizeof (graph_t)); 24927c478bd9Sstevel@tonic-gate } 24937c478bd9Sstevel@tonic-gate 24947c478bd9Sstevel@tonic-gate return (gp); 24957c478bd9Sstevel@tonic-gate } 24967c478bd9Sstevel@tonic-gate 24977c478bd9Sstevel@tonic-gate /* 24987c478bd9Sstevel@tonic-gate * PSARC case 1997/292 24997c478bd9Sstevel@tonic-gate */ 25007c478bd9Sstevel@tonic-gate int 25017c478bd9Sstevel@tonic-gate cl_flk_has_remote_locks_for_nlmid(vnode_t *vp, int nlmid) 25027c478bd9Sstevel@tonic-gate { 25037c478bd9Sstevel@tonic-gate lock_descriptor_t *lock; 25047c478bd9Sstevel@tonic-gate int result = 0; 25057c478bd9Sstevel@tonic-gate graph_t *gp; 25067c478bd9Sstevel@tonic-gate int lock_nlmid; 25077c478bd9Sstevel@tonic-gate 25087c478bd9Sstevel@tonic-gate /* 25097c478bd9Sstevel@tonic-gate * Check to see if node is booted as a cluster. If not, return. 25107c478bd9Sstevel@tonic-gate */ 25117c478bd9Sstevel@tonic-gate if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { 25127c478bd9Sstevel@tonic-gate return (0); 25137c478bd9Sstevel@tonic-gate } 25147c478bd9Sstevel@tonic-gate 25157c478bd9Sstevel@tonic-gate gp = flk_get_lock_graph(vp, FLK_USE_GRAPH); 25167c478bd9Sstevel@tonic-gate if (gp == NULL) { 25177c478bd9Sstevel@tonic-gate return (0); 25187c478bd9Sstevel@tonic-gate } 25197c478bd9Sstevel@tonic-gate 25207c478bd9Sstevel@tonic-gate mutex_enter(&gp->gp_mutex); 25217c478bd9Sstevel@tonic-gate 25227c478bd9Sstevel@tonic-gate SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 25237c478bd9Sstevel@tonic-gate 25247c478bd9Sstevel@tonic-gate if (lock) { 25257c478bd9Sstevel@tonic-gate while (lock->l_vnode == vp) { 25267c478bd9Sstevel@tonic-gate /* get NLM id from sysid */ 25277c478bd9Sstevel@tonic-gate lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 25287c478bd9Sstevel@tonic-gate 25297c478bd9Sstevel@tonic-gate /* 25307c478bd9Sstevel@tonic-gate * If NLM server request _and_ nlmid of lock matches 25317c478bd9Sstevel@tonic-gate * nlmid of argument, then we've found a remote lock. 25327c478bd9Sstevel@tonic-gate */ 25337c478bd9Sstevel@tonic-gate if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { 25347c478bd9Sstevel@tonic-gate result = 1; 25357c478bd9Sstevel@tonic-gate goto done; 25367c478bd9Sstevel@tonic-gate } 25377c478bd9Sstevel@tonic-gate lock = lock->l_next; 25387c478bd9Sstevel@tonic-gate } 25397c478bd9Sstevel@tonic-gate } 25407c478bd9Sstevel@tonic-gate 25417c478bd9Sstevel@tonic-gate SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 25427c478bd9Sstevel@tonic-gate 25437c478bd9Sstevel@tonic-gate if (lock) { 25447c478bd9Sstevel@tonic-gate while (lock->l_vnode == vp) { 25457c478bd9Sstevel@tonic-gate /* get NLM id from sysid */ 25467c478bd9Sstevel@tonic-gate lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 25477c478bd9Sstevel@tonic-gate 25487c478bd9Sstevel@tonic-gate /* 25497c478bd9Sstevel@tonic-gate * If NLM server request _and_ nlmid of lock matches 25507c478bd9Sstevel@tonic-gate * nlmid of argument, then we've found a remote lock. 25517c478bd9Sstevel@tonic-gate */ 25527c478bd9Sstevel@tonic-gate if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { 25537c478bd9Sstevel@tonic-gate result = 1; 25547c478bd9Sstevel@tonic-gate goto done; 25557c478bd9Sstevel@tonic-gate } 25567c478bd9Sstevel@tonic-gate lock = lock->l_next; 25577c478bd9Sstevel@tonic-gate } 25587c478bd9Sstevel@tonic-gate } 25597c478bd9Sstevel@tonic-gate 25607c478bd9Sstevel@tonic-gate done: 25617c478bd9Sstevel@tonic-gate mutex_exit(&gp->gp_mutex); 25627c478bd9Sstevel@tonic-gate return (result); 25637c478bd9Sstevel@tonic-gate } 25647c478bd9Sstevel@tonic-gate 25657c478bd9Sstevel@tonic-gate /* 25667c478bd9Sstevel@tonic-gate * Determine whether there are any locks for the given vnode with a remote 25677c478bd9Sstevel@tonic-gate * sysid. Returns zero if not, non-zero if there are. 25687c478bd9Sstevel@tonic-gate * 25697c478bd9Sstevel@tonic-gate * Note that the return value from this function is potentially invalid 25707c478bd9Sstevel@tonic-gate * once it has been returned. The caller is responsible for providing its 25717c478bd9Sstevel@tonic-gate * own synchronization mechanism to ensure that the return value is useful 25727c478bd9Sstevel@tonic-gate * (e.g., see nfs_lockcompletion()). 25737c478bd9Sstevel@tonic-gate */ 25747c478bd9Sstevel@tonic-gate int 25757c478bd9Sstevel@tonic-gate flk_has_remote_locks(vnode_t *vp) 25767c478bd9Sstevel@tonic-gate { 25777c478bd9Sstevel@tonic-gate lock_descriptor_t *lock; 25787c478bd9Sstevel@tonic-gate int result = 0; 25797c478bd9Sstevel@tonic-gate graph_t *gp; 25807c478bd9Sstevel@tonic-gate 25817c478bd9Sstevel@tonic-gate gp = flk_get_lock_graph(vp, FLK_USE_GRAPH); 25827c478bd9Sstevel@tonic-gate if (gp == NULL) { 25837c478bd9Sstevel@tonic-gate return (0); 25847c478bd9Sstevel@tonic-gate } 25857c478bd9Sstevel@tonic-gate 25867c478bd9Sstevel@tonic-gate mutex_enter(&gp->gp_mutex); 25877c478bd9Sstevel@tonic-gate 25887c478bd9Sstevel@tonic-gate SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 25897c478bd9Sstevel@tonic-gate 25907c478bd9Sstevel@tonic-gate if (lock) { 25917c478bd9Sstevel@tonic-gate while (lock->l_vnode == vp) { 25927c478bd9Sstevel@tonic-gate if (IS_REMOTE(lock)) { 25937c478bd9Sstevel@tonic-gate result = 1; 25947c478bd9Sstevel@tonic-gate goto done; 25957c478bd9Sstevel@tonic-gate } 25967c478bd9Sstevel@tonic-gate lock = lock->l_next; 25977c478bd9Sstevel@tonic-gate } 25987c478bd9Sstevel@tonic-gate } 25997c478bd9Sstevel@tonic-gate 26007c478bd9Sstevel@tonic-gate SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 26017c478bd9Sstevel@tonic-gate 26027c478bd9Sstevel@tonic-gate if (lock) { 26037c478bd9Sstevel@tonic-gate while (lock->l_vnode == vp) { 26047c478bd9Sstevel@tonic-gate if (IS_REMOTE(lock)) { 26057c478bd9Sstevel@tonic-gate result = 1; 26067c478bd9Sstevel@tonic-gate goto done; 26077c478bd9Sstevel@tonic-gate } 26087c478bd9Sstevel@tonic-gate lock = lock->l_next; 26097c478bd9Sstevel@tonic-gate } 26107c478bd9Sstevel@tonic-gate } 26117c478bd9Sstevel@tonic-gate 26127c478bd9Sstevel@tonic-gate done: 26137c478bd9Sstevel@tonic-gate mutex_exit(&gp->gp_mutex); 26147c478bd9Sstevel@tonic-gate return (result); 26157c478bd9Sstevel@tonic-gate } 26167c478bd9Sstevel@tonic-gate 26177c478bd9Sstevel@tonic-gate /* 2618bbaa8b60SDan Kruchinin * Determine whether there are any locks for the given vnode with a remote 2619bbaa8b60SDan Kruchinin * sysid matching given sysid. 2620bbaa8b60SDan Kruchinin * Used by the new (open source) NFS Lock Manager (NLM) 2621bbaa8b60SDan Kruchinin */ 2622bbaa8b60SDan Kruchinin int 2623bbaa8b60SDan Kruchinin flk_has_remote_locks_for_sysid(vnode_t *vp, int sysid) 2624bbaa8b60SDan Kruchinin { 2625bbaa8b60SDan Kruchinin lock_descriptor_t *lock; 2626bbaa8b60SDan Kruchinin int result = 0; 2627bbaa8b60SDan Kruchinin graph_t *gp; 2628bbaa8b60SDan Kruchinin 2629bbaa8b60SDan Kruchinin if (sysid == 0) 2630bbaa8b60SDan Kruchinin return (0); 2631bbaa8b60SDan Kruchinin 2632bbaa8b60SDan Kruchinin gp = flk_get_lock_graph(vp, FLK_USE_GRAPH); 2633bbaa8b60SDan Kruchinin if (gp == NULL) { 2634bbaa8b60SDan Kruchinin return (0); 2635bbaa8b60SDan Kruchinin } 2636bbaa8b60SDan Kruchinin 2637bbaa8b60SDan Kruchinin mutex_enter(&gp->gp_mutex); 2638bbaa8b60SDan Kruchinin 2639bbaa8b60SDan Kruchinin SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 2640bbaa8b60SDan Kruchinin 2641bbaa8b60SDan Kruchinin if (lock) { 2642bbaa8b60SDan Kruchinin while (lock->l_vnode == vp) { 2643bbaa8b60SDan Kruchinin if (lock->l_flock.l_sysid == sysid) { 2644bbaa8b60SDan Kruchinin result = 1; 2645bbaa8b60SDan Kruchinin goto done; 2646bbaa8b60SDan Kruchinin } 2647bbaa8b60SDan Kruchinin lock = lock->l_next; 2648bbaa8b60SDan Kruchinin } 2649bbaa8b60SDan Kruchinin } 2650bbaa8b60SDan Kruchinin 2651bbaa8b60SDan Kruchinin SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 2652bbaa8b60SDan Kruchinin 2653bbaa8b60SDan Kruchinin if (lock) { 2654bbaa8b60SDan Kruchinin while (lock->l_vnode == vp) { 2655bbaa8b60SDan Kruchinin if (lock->l_flock.l_sysid == sysid) { 2656bbaa8b60SDan Kruchinin result = 1; 2657bbaa8b60SDan Kruchinin goto done; 2658bbaa8b60SDan Kruchinin } 2659bbaa8b60SDan Kruchinin lock = lock->l_next; 2660bbaa8b60SDan Kruchinin } 2661bbaa8b60SDan Kruchinin } 2662bbaa8b60SDan Kruchinin 2663bbaa8b60SDan Kruchinin done: 2664bbaa8b60SDan Kruchinin mutex_exit(&gp->gp_mutex); 2665bbaa8b60SDan Kruchinin return (result); 2666bbaa8b60SDan Kruchinin } 2667bbaa8b60SDan Kruchinin 2668bbaa8b60SDan Kruchinin /* 26697c478bd9Sstevel@tonic-gate * Determine if there are any locks owned by the given sysid. 26707c478bd9Sstevel@tonic-gate * Returns zero if not, non-zero if there are. Note that this return code 26717c478bd9Sstevel@tonic-gate * could be derived from flk_get_{sleeping,active}_locks, but this routine 26727c478bd9Sstevel@tonic-gate * avoids all the memory allocations of those routines. 26737c478bd9Sstevel@tonic-gate * 26747c478bd9Sstevel@tonic-gate * This routine has the same synchronization issues as 26757c478bd9Sstevel@tonic-gate * flk_has_remote_locks. 26767c478bd9Sstevel@tonic-gate */ 26777c478bd9Sstevel@tonic-gate 26787c478bd9Sstevel@tonic-gate int 26797c478bd9Sstevel@tonic-gate flk_sysid_has_locks(int sysid, int lck_type) 26807c478bd9Sstevel@tonic-gate { 26817c478bd9Sstevel@tonic-gate int has_locks = 0; 26827c478bd9Sstevel@tonic-gate lock_descriptor_t *lock; 26837c478bd9Sstevel@tonic-gate graph_t *gp; 26847c478bd9Sstevel@tonic-gate int i; 26857c478bd9Sstevel@tonic-gate 26867c478bd9Sstevel@tonic-gate for (i = 0; i < HASH_SIZE && !has_locks; i++) { 26877c478bd9Sstevel@tonic-gate mutex_enter(&flock_lock); 26887c478bd9Sstevel@tonic-gate gp = lock_graph[i]; 26897c478bd9Sstevel@tonic-gate mutex_exit(&flock_lock); 26907c478bd9Sstevel@tonic-gate if (gp == NULL) { 26917c478bd9Sstevel@tonic-gate continue; 26927c478bd9Sstevel@tonic-gate } 26937c478bd9Sstevel@tonic-gate 26947c478bd9Sstevel@tonic-gate mutex_enter(&gp->gp_mutex); 26957c478bd9Sstevel@tonic-gate 26967c478bd9Sstevel@tonic-gate if (lck_type & FLK_QUERY_ACTIVE) { 26977c478bd9Sstevel@tonic-gate for (lock = ACTIVE_HEAD(gp)->l_next; 26987c478bd9Sstevel@tonic-gate lock != ACTIVE_HEAD(gp) && !has_locks; 26997c478bd9Sstevel@tonic-gate lock = lock->l_next) { 27007c478bd9Sstevel@tonic-gate if (lock->l_flock.l_sysid == sysid) 27017c478bd9Sstevel@tonic-gate has_locks = 1; 27027c478bd9Sstevel@tonic-gate } 27037c478bd9Sstevel@tonic-gate } 27047c478bd9Sstevel@tonic-gate 27057c478bd9Sstevel@tonic-gate if (lck_type & FLK_QUERY_SLEEPING) { 27067c478bd9Sstevel@tonic-gate for (lock = SLEEPING_HEAD(gp)->l_next; 27077c478bd9Sstevel@tonic-gate lock != SLEEPING_HEAD(gp) && !has_locks; 27087c478bd9Sstevel@tonic-gate lock = lock->l_next) { 27097c478bd9Sstevel@tonic-gate if (lock->l_flock.l_sysid == sysid) 27107c478bd9Sstevel@tonic-gate has_locks = 1; 27117c478bd9Sstevel@tonic-gate } 27127c478bd9Sstevel@tonic-gate } 27137c478bd9Sstevel@tonic-gate mutex_exit(&gp->gp_mutex); 27147c478bd9Sstevel@tonic-gate } 27157c478bd9Sstevel@tonic-gate 27167c478bd9Sstevel@tonic-gate return (has_locks); 27177c478bd9Sstevel@tonic-gate } 27187c478bd9Sstevel@tonic-gate 27197c478bd9Sstevel@tonic-gate 27207c478bd9Sstevel@tonic-gate /* 27217c478bd9Sstevel@tonic-gate * PSARC case 1997/292 27227c478bd9Sstevel@tonic-gate * 27237c478bd9Sstevel@tonic-gate * Requires: "sysid" is a pair [nlmid, sysid]. The lower half is 16-bit 27247c478bd9Sstevel@tonic-gate * quantity, the real sysid generated by the NLM server; the upper half 27257c478bd9Sstevel@tonic-gate * identifies the node of the cluster where the NLM server ran. 27267c478bd9Sstevel@tonic-gate * This routine is only called by an NLM server running in a cluster. 27277c478bd9Sstevel@tonic-gate * Effects: Remove all locks held on behalf of the client identified 27287c478bd9Sstevel@tonic-gate * by "sysid." 27297c478bd9Sstevel@tonic-gate */ 27307c478bd9Sstevel@tonic-gate void 27317c478bd9Sstevel@tonic-gate cl_flk_remove_locks_by_sysid(int sysid) 27327c478bd9Sstevel@tonic-gate { 27337c478bd9Sstevel@tonic-gate graph_t *gp; 27347c478bd9Sstevel@tonic-gate int i; 27357c478bd9Sstevel@tonic-gate lock_descriptor_t *lock, *nlock; 27367c478bd9Sstevel@tonic-gate 27377c478bd9Sstevel@tonic-gate /* 27387c478bd9Sstevel@tonic-gate * Check to see if node is booted as a cluster. If not, return. 27397c478bd9Sstevel@tonic-gate */ 27407c478bd9Sstevel@tonic-gate if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { 27417c478bd9Sstevel@tonic-gate return; 27427c478bd9Sstevel@tonic-gate } 27437c478bd9Sstevel@tonic-gate 27447c478bd9Sstevel@tonic-gate ASSERT(sysid != 0); 27457c478bd9Sstevel@tonic-gate for (i = 0; i < HASH_SIZE; i++) { 27467c478bd9Sstevel@tonic-gate mutex_enter(&flock_lock); 27477c478bd9Sstevel@tonic-gate gp = lock_graph[i]; 27487c478bd9Sstevel@tonic-gate mutex_exit(&flock_lock); 27497c478bd9Sstevel@tonic-gate 27507c478bd9Sstevel@tonic-gate if (gp == NULL) 27517c478bd9Sstevel@tonic-gate continue; 27527c478bd9Sstevel@tonic-gate 27537c478bd9Sstevel@tonic-gate mutex_enter(&gp->gp_mutex); /* get mutex on lock graph */ 27547c478bd9Sstevel@tonic-gate 27557c478bd9Sstevel@tonic-gate /* signal sleeping requests so that they bail out */ 27567c478bd9Sstevel@tonic-gate lock = SLEEPING_HEAD(gp)->l_next; 27577c478bd9Sstevel@tonic-gate while (lock != SLEEPING_HEAD(gp)) { 27587c478bd9Sstevel@tonic-gate nlock = lock->l_next; 27597c478bd9Sstevel@tonic-gate if (lock->l_flock.l_sysid == sysid) { 27607c478bd9Sstevel@tonic-gate INTERRUPT_WAKEUP(lock); 27617c478bd9Sstevel@tonic-gate } 27627c478bd9Sstevel@tonic-gate lock = nlock; 27637c478bd9Sstevel@tonic-gate } 27647c478bd9Sstevel@tonic-gate 27657c478bd9Sstevel@tonic-gate /* delete active locks */ 27667c478bd9Sstevel@tonic-gate lock = ACTIVE_HEAD(gp)->l_next; 27677c478bd9Sstevel@tonic-gate while (lock != ACTIVE_HEAD(gp)) { 27687c478bd9Sstevel@tonic-gate nlock = lock->l_next; 27697c478bd9Sstevel@tonic-gate if (lock->l_flock.l_sysid == sysid) { 27707c478bd9Sstevel@tonic-gate flk_delete_active_lock(lock, 0); 27717c478bd9Sstevel@tonic-gate flk_wakeup(lock, 1); 27727c478bd9Sstevel@tonic-gate flk_free_lock(lock); 27737c478bd9Sstevel@tonic-gate } 27747c478bd9Sstevel@tonic-gate lock = nlock; 27757c478bd9Sstevel@tonic-gate } 27767c478bd9Sstevel@tonic-gate mutex_exit(&gp->gp_mutex); /* release mutex on lock graph */ 27777c478bd9Sstevel@tonic-gate } 27787c478bd9Sstevel@tonic-gate } 27797c478bd9Sstevel@tonic-gate 27807c478bd9Sstevel@tonic-gate /* 27817c478bd9Sstevel@tonic-gate * Delete all locks in the system that belongs to the sysid of the request. 27827c478bd9Sstevel@tonic-gate */ 27837c478bd9Sstevel@tonic-gate 27847c478bd9Sstevel@tonic-gate static void 27857c478bd9Sstevel@tonic-gate flk_delete_locks_by_sysid(lock_descriptor_t *request) 27867c478bd9Sstevel@tonic-gate { 27877c478bd9Sstevel@tonic-gate int sysid = request->l_flock.l_sysid; 27887c478bd9Sstevel@tonic-gate lock_descriptor_t *lock, *nlock; 27897c478bd9Sstevel@tonic-gate graph_t *gp; 27907c478bd9Sstevel@tonic-gate int i; 27917c478bd9Sstevel@tonic-gate 27927c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&request->l_graph->gp_mutex)); 27937c478bd9Sstevel@tonic-gate ASSERT(sysid != 0); 27947c478bd9Sstevel@tonic-gate 27957c478bd9Sstevel@tonic-gate mutex_exit(&request->l_graph->gp_mutex); 27967c478bd9Sstevel@tonic-gate 27977c478bd9Sstevel@tonic-gate for (i = 0; i < HASH_SIZE; i++) { 27987c478bd9Sstevel@tonic-gate mutex_enter(&flock_lock); 27997c478bd9Sstevel@tonic-gate gp = lock_graph[i]; 28007c478bd9Sstevel@tonic-gate mutex_exit(&flock_lock); 28017c478bd9Sstevel@tonic-gate 28027c478bd9Sstevel@tonic-gate if (gp == NULL) 28037c478bd9Sstevel@tonic-gate continue; 28047c478bd9Sstevel@tonic-gate 28057c478bd9Sstevel@tonic-gate mutex_enter(&gp->gp_mutex); 28067c478bd9Sstevel@tonic-gate 28077c478bd9Sstevel@tonic-gate /* signal sleeping requests so that they bail out */ 28087c478bd9Sstevel@tonic-gate lock = SLEEPING_HEAD(gp)->l_next; 28097c478bd9Sstevel@tonic-gate while (lock != SLEEPING_HEAD(gp)) { 28107c478bd9Sstevel@tonic-gate nlock = lock->l_next; 28117c478bd9Sstevel@tonic-gate if (lock->l_flock.l_sysid == sysid) { 28127c478bd9Sstevel@tonic-gate INTERRUPT_WAKEUP(lock); 28137c478bd9Sstevel@tonic-gate } 28147c478bd9Sstevel@tonic-gate lock = nlock; 28157c478bd9Sstevel@tonic-gate } 28167c478bd9Sstevel@tonic-gate 28177c478bd9Sstevel@tonic-gate /* delete active locks */ 28187c478bd9Sstevel@tonic-gate lock = ACTIVE_HEAD(gp)->l_next; 28197c478bd9Sstevel@tonic-gate while (lock != ACTIVE_HEAD(gp)) { 28207c478bd9Sstevel@tonic-gate nlock = lock->l_next; 28217c478bd9Sstevel@tonic-gate if (lock->l_flock.l_sysid == sysid) { 28227c478bd9Sstevel@tonic-gate flk_delete_active_lock(lock, 0); 28237c478bd9Sstevel@tonic-gate flk_wakeup(lock, 1); 28247c478bd9Sstevel@tonic-gate flk_free_lock(lock); 28257c478bd9Sstevel@tonic-gate } 28267c478bd9Sstevel@tonic-gate lock = nlock; 28277c478bd9Sstevel@tonic-gate } 28287c478bd9Sstevel@tonic-gate mutex_exit(&gp->gp_mutex); 28297c478bd9Sstevel@tonic-gate } 28307c478bd9Sstevel@tonic-gate 28317c478bd9Sstevel@tonic-gate mutex_enter(&request->l_graph->gp_mutex); 28327c478bd9Sstevel@tonic-gate } 28337c478bd9Sstevel@tonic-gate 28347c478bd9Sstevel@tonic-gate /* 28357c478bd9Sstevel@tonic-gate * Clustering: Deletes PXFS locks 28367c478bd9Sstevel@tonic-gate * Effects: Delete all locks on files in the given file system and with the 28377c478bd9Sstevel@tonic-gate * given PXFS id. 28387c478bd9Sstevel@tonic-gate */ 28397c478bd9Sstevel@tonic-gate void 28407c478bd9Sstevel@tonic-gate cl_flk_delete_pxfs_locks(struct vfs *vfsp, int pxfsid) 28417c478bd9Sstevel@tonic-gate { 28427c478bd9Sstevel@tonic-gate lock_descriptor_t *lock, *nlock; 28437c478bd9Sstevel@tonic-gate graph_t *gp; 28447c478bd9Sstevel@tonic-gate int i; 28457c478bd9Sstevel@tonic-gate 28467c478bd9Sstevel@tonic-gate for (i = 0; i < HASH_SIZE; i++) { 28477c478bd9Sstevel@tonic-gate mutex_enter(&flock_lock); 28487c478bd9Sstevel@tonic-gate gp = lock_graph[i]; 28497c478bd9Sstevel@tonic-gate mutex_exit(&flock_lock); 28507c478bd9Sstevel@tonic-gate 28517c478bd9Sstevel@tonic-gate if (gp == NULL) 28527c478bd9Sstevel@tonic-gate continue; 28537c478bd9Sstevel@tonic-gate 28547c478bd9Sstevel@tonic-gate mutex_enter(&gp->gp_mutex); 28557c478bd9Sstevel@tonic-gate 28567c478bd9Sstevel@tonic-gate /* signal sleeping requests so that they bail out */ 28577c478bd9Sstevel@tonic-gate lock = SLEEPING_HEAD(gp)->l_next; 28587c478bd9Sstevel@tonic-gate while (lock != SLEEPING_HEAD(gp)) { 28597c478bd9Sstevel@tonic-gate nlock = lock->l_next; 28607c478bd9Sstevel@tonic-gate if (lock->l_vnode->v_vfsp == vfsp) { 28617c478bd9Sstevel@tonic-gate ASSERT(IS_PXFS(lock)); 28627c478bd9Sstevel@tonic-gate if (GETPXFSID(lock->l_flock.l_sysid) == 28637c478bd9Sstevel@tonic-gate pxfsid) { 28647c478bd9Sstevel@tonic-gate flk_set_state(lock, 28657c478bd9Sstevel@tonic-gate FLK_CANCELLED_STATE); 28667c478bd9Sstevel@tonic-gate flk_cancel_sleeping_lock(lock, 1); 28677c478bd9Sstevel@tonic-gate } 28687c478bd9Sstevel@tonic-gate } 28697c478bd9Sstevel@tonic-gate lock = nlock; 28707c478bd9Sstevel@tonic-gate } 28717c478bd9Sstevel@tonic-gate 28727c478bd9Sstevel@tonic-gate /* delete active locks */ 28737c478bd9Sstevel@tonic-gate lock = ACTIVE_HEAD(gp)->l_next; 28747c478bd9Sstevel@tonic-gate while (lock != ACTIVE_HEAD(gp)) { 28757c478bd9Sstevel@tonic-gate nlock = lock->l_next; 28767c478bd9Sstevel@tonic-gate if (lock->l_vnode->v_vfsp == vfsp) { 28777c478bd9Sstevel@tonic-gate ASSERT(IS_PXFS(lock)); 28787c478bd9Sstevel@tonic-gate if (GETPXFSID(lock->l_flock.l_sysid) == 28797c478bd9Sstevel@tonic-gate pxfsid) { 28807c478bd9Sstevel@tonic-gate flk_delete_active_lock(lock, 0); 28817c478bd9Sstevel@tonic-gate flk_wakeup(lock, 1); 28827c478bd9Sstevel@tonic-gate flk_free_lock(lock); 28837c478bd9Sstevel@tonic-gate } 28847c478bd9Sstevel@tonic-gate } 28857c478bd9Sstevel@tonic-gate lock = nlock; 28867c478bd9Sstevel@tonic-gate } 28877c478bd9Sstevel@tonic-gate mutex_exit(&gp->gp_mutex); 28887c478bd9Sstevel@tonic-gate } 28897c478bd9Sstevel@tonic-gate } 28907c478bd9Sstevel@tonic-gate 28917c478bd9Sstevel@tonic-gate /* 28927c478bd9Sstevel@tonic-gate * Search for a sleeping lock manager lock which matches exactly this lock 28937c478bd9Sstevel@tonic-gate * request; if one is found, fake a signal to cancel it. 28947c478bd9Sstevel@tonic-gate * 28957c478bd9Sstevel@tonic-gate * Return 1 if a matching lock was found, 0 otherwise. 28967c478bd9Sstevel@tonic-gate */ 28977c478bd9Sstevel@tonic-gate 28987c478bd9Sstevel@tonic-gate static int 28997c478bd9Sstevel@tonic-gate flk_canceled(lock_descriptor_t *request) 29007c478bd9Sstevel@tonic-gate { 29017c478bd9Sstevel@tonic-gate lock_descriptor_t *lock, *nlock; 29027c478bd9Sstevel@tonic-gate graph_t *gp = request->l_graph; 29037c478bd9Sstevel@tonic-gate vnode_t *vp = request->l_vnode; 29047c478bd9Sstevel@tonic-gate 29057c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&gp->gp_mutex)); 29067c478bd9Sstevel@tonic-gate ASSERT(IS_LOCKMGR(request)); 29077c478bd9Sstevel@tonic-gate SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 29087c478bd9Sstevel@tonic-gate 29097c478bd9Sstevel@tonic-gate if (lock) { 29107c478bd9Sstevel@tonic-gate while (lock->l_vnode == vp) { 29117c478bd9Sstevel@tonic-gate nlock = lock->l_next; 29127c478bd9Sstevel@tonic-gate if (SAME_OWNER(lock, request) && 29137c478bd9Sstevel@tonic-gate lock->l_start == request->l_start && 29147c478bd9Sstevel@tonic-gate lock->l_end == request->l_end) { 29157c478bd9Sstevel@tonic-gate INTERRUPT_WAKEUP(lock); 29167c478bd9Sstevel@tonic-gate return (1); 29177c478bd9Sstevel@tonic-gate } 29187c478bd9Sstevel@tonic-gate lock = nlock; 29197c478bd9Sstevel@tonic-gate } 29207c478bd9Sstevel@tonic-gate } 29217c478bd9Sstevel@tonic-gate return (0); 29227c478bd9Sstevel@tonic-gate } 29237c478bd9Sstevel@tonic-gate 29247c478bd9Sstevel@tonic-gate /* 29257a5aac98SJerry Jelinek * Remove all non-OFD locks for the vnode belonging to the given pid and sysid. 29267a5aac98SJerry Jelinek * That is, since OFD locks are pid-less we'll never match on the incoming 29277a5aac98SJerry Jelinek * pid. OFD locks are removed earlier in the close() path via closef() and 29287a5aac98SJerry Jelinek * ofdcleanlock(). 29297c478bd9Sstevel@tonic-gate */ 29307c478bd9Sstevel@tonic-gate void 29317c478bd9Sstevel@tonic-gate cleanlocks(vnode_t *vp, pid_t pid, int sysid) 29327c478bd9Sstevel@tonic-gate { 29337c478bd9Sstevel@tonic-gate graph_t *gp; 29347c478bd9Sstevel@tonic-gate lock_descriptor_t *lock, *nlock; 29357c478bd9Sstevel@tonic-gate lock_descriptor_t *link_stack; 29367c478bd9Sstevel@tonic-gate 29377c478bd9Sstevel@tonic-gate STACK_INIT(link_stack); 29387c478bd9Sstevel@tonic-gate 29397c478bd9Sstevel@tonic-gate gp = flk_get_lock_graph(vp, FLK_USE_GRAPH); 29407c478bd9Sstevel@tonic-gate 29417c478bd9Sstevel@tonic-gate if (gp == NULL) 29427c478bd9Sstevel@tonic-gate return; 29437c478bd9Sstevel@tonic-gate mutex_enter(&gp->gp_mutex); 29447c478bd9Sstevel@tonic-gate 29457c478bd9Sstevel@tonic-gate CHECK_SLEEPING_LOCKS(gp); 29467c478bd9Sstevel@tonic-gate CHECK_ACTIVE_LOCKS(gp); 29477c478bd9Sstevel@tonic-gate 29487c478bd9Sstevel@tonic-gate SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 29497c478bd9Sstevel@tonic-gate 29507c478bd9Sstevel@tonic-gate if (lock) { 29517c478bd9Sstevel@tonic-gate do { 29527c478bd9Sstevel@tonic-gate nlock = lock->l_next; 29537c478bd9Sstevel@tonic-gate if ((lock->l_flock.l_pid == pid || 29547c478bd9Sstevel@tonic-gate pid == IGN_PID) && 29557c478bd9Sstevel@tonic-gate lock->l_flock.l_sysid == sysid) { 29567c478bd9Sstevel@tonic-gate CANCEL_WAKEUP(lock); 29577c478bd9Sstevel@tonic-gate } 29587c478bd9Sstevel@tonic-gate lock = nlock; 29597c478bd9Sstevel@tonic-gate } while (lock->l_vnode == vp); 29607c478bd9Sstevel@tonic-gate } 29617c478bd9Sstevel@tonic-gate 29627c478bd9Sstevel@tonic-gate SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 29637c478bd9Sstevel@tonic-gate 29647c478bd9Sstevel@tonic-gate if (lock) { 29657c478bd9Sstevel@tonic-gate do { 29667c478bd9Sstevel@tonic-gate nlock = lock->l_next; 29677c478bd9Sstevel@tonic-gate if ((lock->l_flock.l_pid == pid || 29687c478bd9Sstevel@tonic-gate pid == IGN_PID) && 29697c478bd9Sstevel@tonic-gate lock->l_flock.l_sysid == sysid) { 29707c478bd9Sstevel@tonic-gate flk_delete_active_lock(lock, 0); 29717c478bd9Sstevel@tonic-gate STACK_PUSH(link_stack, lock, l_stack); 29727c478bd9Sstevel@tonic-gate } 29737c478bd9Sstevel@tonic-gate lock = nlock; 29747c478bd9Sstevel@tonic-gate } while (lock->l_vnode == vp); 29757c478bd9Sstevel@tonic-gate } 29767c478bd9Sstevel@tonic-gate 29777c478bd9Sstevel@tonic-gate while ((lock = STACK_TOP(link_stack)) != NULL) { 29787c478bd9Sstevel@tonic-gate STACK_POP(link_stack, l_stack); 29797c478bd9Sstevel@tonic-gate flk_wakeup(lock, 1); 29807c478bd9Sstevel@tonic-gate flk_free_lock(lock); 29817c478bd9Sstevel@tonic-gate } 29827c478bd9Sstevel@tonic-gate 29837c478bd9Sstevel@tonic-gate CHECK_SLEEPING_LOCKS(gp); 29847c478bd9Sstevel@tonic-gate CHECK_ACTIVE_LOCKS(gp); 29857c478bd9Sstevel@tonic-gate CHECK_OWNER_LOCKS(gp, pid, sysid, vp); 29867c478bd9Sstevel@tonic-gate mutex_exit(&gp->gp_mutex); 29877c478bd9Sstevel@tonic-gate } 29887c478bd9Sstevel@tonic-gate 29897c478bd9Sstevel@tonic-gate 29907c478bd9Sstevel@tonic-gate /* 29917c478bd9Sstevel@tonic-gate * Called from 'fs' read and write routines for files that have mandatory 29927c478bd9Sstevel@tonic-gate * locking enabled. 29937c478bd9Sstevel@tonic-gate */ 29947c478bd9Sstevel@tonic-gate 29957c478bd9Sstevel@tonic-gate int 2996*74a91888SMarcel Telka chklock(struct vnode *vp, int iomode, u_offset_t offset, ssize_t len, int fmode, 29977c478bd9Sstevel@tonic-gate caller_context_t *ct) 29987c478bd9Sstevel@tonic-gate { 29997c478bd9Sstevel@tonic-gate register int i; 30007c478bd9Sstevel@tonic-gate struct flock64 bf; 30017c478bd9Sstevel@tonic-gate int error = 0; 30027c478bd9Sstevel@tonic-gate 30037c478bd9Sstevel@tonic-gate bf.l_type = (iomode & FWRITE) ? F_WRLCK : F_RDLCK; 30047c478bd9Sstevel@tonic-gate bf.l_whence = 0; 30057c478bd9Sstevel@tonic-gate bf.l_start = offset; 30067c478bd9Sstevel@tonic-gate bf.l_len = len; 30077c478bd9Sstevel@tonic-gate if (ct == NULL) { 30087c478bd9Sstevel@tonic-gate bf.l_pid = curproc->p_pid; 30097c478bd9Sstevel@tonic-gate bf.l_sysid = 0; 30107c478bd9Sstevel@tonic-gate } else { 30117c478bd9Sstevel@tonic-gate bf.l_pid = ct->cc_pid; 30127c478bd9Sstevel@tonic-gate bf.l_sysid = ct->cc_sysid; 30137c478bd9Sstevel@tonic-gate } 30147c478bd9Sstevel@tonic-gate i = (fmode & (FNDELAY|FNONBLOCK)) ? INOFLCK : INOFLCK|SLPFLCK; 30157c478bd9Sstevel@tonic-gate if ((i = reclock(vp, &bf, i, 0, offset, NULL)) != 0 || 30167c478bd9Sstevel@tonic-gate bf.l_type != F_UNLCK) 30177c478bd9Sstevel@tonic-gate error = i ? i : EAGAIN; 30187c478bd9Sstevel@tonic-gate return (error); 30197c478bd9Sstevel@tonic-gate } 30207c478bd9Sstevel@tonic-gate 30217c478bd9Sstevel@tonic-gate /* 30227c478bd9Sstevel@tonic-gate * convoff - converts the given data (start, whence) to the 30237c478bd9Sstevel@tonic-gate * given whence. 30247c478bd9Sstevel@tonic-gate */ 30257c478bd9Sstevel@tonic-gate int 3026*74a91888SMarcel Telka convoff(struct vnode *vp, struct flock64 *lckdat, int whence, offset_t offset) 30277c478bd9Sstevel@tonic-gate { 30287c478bd9Sstevel@tonic-gate int error; 30297c478bd9Sstevel@tonic-gate struct vattr vattr; 30307c478bd9Sstevel@tonic-gate 30317c478bd9Sstevel@tonic-gate if ((lckdat->l_whence == 2) || (whence == 2)) { 30327c478bd9Sstevel@tonic-gate vattr.va_mask = AT_SIZE; 3033da6c28aaSamw if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) 30347c478bd9Sstevel@tonic-gate return (error); 30357c478bd9Sstevel@tonic-gate } 30367c478bd9Sstevel@tonic-gate 30377c478bd9Sstevel@tonic-gate switch (lckdat->l_whence) { 30387c478bd9Sstevel@tonic-gate case 1: 30397c478bd9Sstevel@tonic-gate lckdat->l_start += offset; 30407c478bd9Sstevel@tonic-gate break; 30417c478bd9Sstevel@tonic-gate case 2: 30427c478bd9Sstevel@tonic-gate lckdat->l_start += vattr.va_size; 30437c478bd9Sstevel@tonic-gate /* FALLTHRU */ 30447c478bd9Sstevel@tonic-gate case 0: 30457c478bd9Sstevel@tonic-gate break; 30467c478bd9Sstevel@tonic-gate default: 30477c478bd9Sstevel@tonic-gate return (EINVAL); 30487c478bd9Sstevel@tonic-gate } 30497c478bd9Sstevel@tonic-gate 30507c478bd9Sstevel@tonic-gate if (lckdat->l_start < 0) 30517c478bd9Sstevel@tonic-gate return (EINVAL); 30527c478bd9Sstevel@tonic-gate 30537c478bd9Sstevel@tonic-gate switch (whence) { 30547c478bd9Sstevel@tonic-gate case 1: 30557c478bd9Sstevel@tonic-gate lckdat->l_start -= offset; 30567c478bd9Sstevel@tonic-gate break; 30577c478bd9Sstevel@tonic-gate case 2: 30587c478bd9Sstevel@tonic-gate lckdat->l_start -= vattr.va_size; 30597c478bd9Sstevel@tonic-gate /* FALLTHRU */ 30607c478bd9Sstevel@tonic-gate case 0: 30617c478bd9Sstevel@tonic-gate break; 30627c478bd9Sstevel@tonic-gate default: 30637c478bd9Sstevel@tonic-gate return (EINVAL); 30647c478bd9Sstevel@tonic-gate } 30657c478bd9Sstevel@tonic-gate 30667c478bd9Sstevel@tonic-gate lckdat->l_whence = (short)whence; 30677c478bd9Sstevel@tonic-gate return (0); 30687c478bd9Sstevel@tonic-gate } 30697c478bd9Sstevel@tonic-gate 30707c478bd9Sstevel@tonic-gate 30717c478bd9Sstevel@tonic-gate /* proc_graph function definitions */ 30727c478bd9Sstevel@tonic-gate 30737c478bd9Sstevel@tonic-gate /* 30747c478bd9Sstevel@tonic-gate * Function checks for deadlock due to the new 'lock'. If deadlock found 30757c478bd9Sstevel@tonic-gate * edges of this lock are freed and returned. 30767c478bd9Sstevel@tonic-gate */ 30777c478bd9Sstevel@tonic-gate 30787c478bd9Sstevel@tonic-gate static int 30797c478bd9Sstevel@tonic-gate flk_check_deadlock(lock_descriptor_t *lock) 30807c478bd9Sstevel@tonic-gate { 30817c478bd9Sstevel@tonic-gate proc_vertex_t *start_vertex, *pvertex; 30827c478bd9Sstevel@tonic-gate proc_vertex_t *dvertex; 30837c478bd9Sstevel@tonic-gate proc_edge_t *pep, *ppep; 30847c478bd9Sstevel@tonic-gate edge_t *ep, *nep; 30857c478bd9Sstevel@tonic-gate proc_vertex_t *process_stack; 30867c478bd9Sstevel@tonic-gate 30877a5aac98SJerry Jelinek /* 30887a5aac98SJerry Jelinek * OFD style locks are not associated with any process so there is 30897a5aac98SJerry Jelinek * no proc graph for these. Thus we cannot, and do not, do deadlock 30907a5aac98SJerry Jelinek * detection. 30917a5aac98SJerry Jelinek */ 30927a5aac98SJerry Jelinek if (lock->l_ofd != NULL) 30937a5aac98SJerry Jelinek return (0); 30947a5aac98SJerry Jelinek 30957c478bd9Sstevel@tonic-gate STACK_INIT(process_stack); 30967c478bd9Sstevel@tonic-gate 30977c478bd9Sstevel@tonic-gate mutex_enter(&flock_lock); 30987c478bd9Sstevel@tonic-gate start_vertex = flk_get_proc_vertex(lock); 30997c478bd9Sstevel@tonic-gate ASSERT(start_vertex != NULL); 31007c478bd9Sstevel@tonic-gate 31017c478bd9Sstevel@tonic-gate /* construct the edges from this process to other processes */ 31027c478bd9Sstevel@tonic-gate 31037c478bd9Sstevel@tonic-gate ep = FIRST_ADJ(lock); 31047c478bd9Sstevel@tonic-gate while (ep != HEAD(lock)) { 31057c478bd9Sstevel@tonic-gate proc_vertex_t *adj_proc; 31067c478bd9Sstevel@tonic-gate 31077c478bd9Sstevel@tonic-gate adj_proc = flk_get_proc_vertex(ep->to_vertex); 31087c478bd9Sstevel@tonic-gate for (pep = start_vertex->edge; pep != NULL; pep = pep->next) { 31097c478bd9Sstevel@tonic-gate if (pep->to_proc == adj_proc) { 31107c478bd9Sstevel@tonic-gate ASSERT(pep->refcount); 31117c478bd9Sstevel@tonic-gate pep->refcount++; 31127c478bd9Sstevel@tonic-gate break; 31137c478bd9Sstevel@tonic-gate } 31147c478bd9Sstevel@tonic-gate } 31157c478bd9Sstevel@tonic-gate if (pep == NULL) { 31167c478bd9Sstevel@tonic-gate pep = flk_get_proc_edge(); 31177c478bd9Sstevel@tonic-gate pep->to_proc = adj_proc; 31187c478bd9Sstevel@tonic-gate pep->refcount = 1; 31197c478bd9Sstevel@tonic-gate adj_proc->incount++; 31207c478bd9Sstevel@tonic-gate pep->next = start_vertex->edge; 31217c478bd9Sstevel@tonic-gate start_vertex->edge = pep; 31227c478bd9Sstevel@tonic-gate } 31237c478bd9Sstevel@tonic-gate ep = NEXT_ADJ(ep); 31247c478bd9Sstevel@tonic-gate } 31257c478bd9Sstevel@tonic-gate 31267c478bd9Sstevel@tonic-gate ep = FIRST_IN(lock); 31277c478bd9Sstevel@tonic-gate 31287c478bd9Sstevel@tonic-gate while (ep != HEAD(lock)) { 31297c478bd9Sstevel@tonic-gate proc_vertex_t *in_proc; 31307c478bd9Sstevel@tonic-gate 31317c478bd9Sstevel@tonic-gate in_proc = flk_get_proc_vertex(ep->from_vertex); 31327c478bd9Sstevel@tonic-gate 31337c478bd9Sstevel@tonic-gate for (pep = in_proc->edge; pep != NULL; pep = pep->next) { 31347c478bd9Sstevel@tonic-gate if (pep->to_proc == start_vertex) { 31357c478bd9Sstevel@tonic-gate ASSERT(pep->refcount); 31367c478bd9Sstevel@tonic-gate pep->refcount++; 31377c478bd9Sstevel@tonic-gate break; 31387c478bd9Sstevel@tonic-gate } 31397c478bd9Sstevel@tonic-gate } 31407c478bd9Sstevel@tonic-gate if (pep == NULL) { 31417c478bd9Sstevel@tonic-gate pep = flk_get_proc_edge(); 31427c478bd9Sstevel@tonic-gate pep->to_proc = start_vertex; 31437c478bd9Sstevel@tonic-gate pep->refcount = 1; 31447c478bd9Sstevel@tonic-gate start_vertex->incount++; 31457c478bd9Sstevel@tonic-gate pep->next = in_proc->edge; 31467c478bd9Sstevel@tonic-gate in_proc->edge = pep; 31477c478bd9Sstevel@tonic-gate } 31487c478bd9Sstevel@tonic-gate ep = NEXT_IN(ep); 31497c478bd9Sstevel@tonic-gate } 31507c478bd9Sstevel@tonic-gate 31517c478bd9Sstevel@tonic-gate if (start_vertex->incount == 0) { 31527c478bd9Sstevel@tonic-gate mutex_exit(&flock_lock); 31537c478bd9Sstevel@tonic-gate return (0); 31547c478bd9Sstevel@tonic-gate } 31557c478bd9Sstevel@tonic-gate 31567c478bd9Sstevel@tonic-gate flk_proc_graph_uncolor(); 31577c478bd9Sstevel@tonic-gate 31587c478bd9Sstevel@tonic-gate start_vertex->p_sedge = start_vertex->edge; 31597c478bd9Sstevel@tonic-gate 31607c478bd9Sstevel@tonic-gate STACK_PUSH(process_stack, start_vertex, p_stack); 31617c478bd9Sstevel@tonic-gate 31627c478bd9Sstevel@tonic-gate while ((pvertex = STACK_TOP(process_stack)) != NULL) { 31637c478bd9Sstevel@tonic-gate for (pep = pvertex->p_sedge; pep != NULL; pep = pep->next) { 31647c478bd9Sstevel@tonic-gate dvertex = pep->to_proc; 31657c478bd9Sstevel@tonic-gate if (!PROC_ARRIVED(dvertex)) { 31667c478bd9Sstevel@tonic-gate STACK_PUSH(process_stack, dvertex, p_stack); 31677c478bd9Sstevel@tonic-gate dvertex->p_sedge = dvertex->edge; 31687c478bd9Sstevel@tonic-gate PROC_ARRIVE(pvertex); 31697c478bd9Sstevel@tonic-gate pvertex->p_sedge = pep->next; 31707c478bd9Sstevel@tonic-gate break; 31717c478bd9Sstevel@tonic-gate } 31727c478bd9Sstevel@tonic-gate if (!PROC_DEPARTED(dvertex)) 31737c478bd9Sstevel@tonic-gate goto deadlock; 31747c478bd9Sstevel@tonic-gate } 31757c478bd9Sstevel@tonic-gate if (pep == NULL) { 31767c478bd9Sstevel@tonic-gate PROC_DEPART(pvertex); 31777c478bd9Sstevel@tonic-gate STACK_POP(process_stack, p_stack); 31787c478bd9Sstevel@tonic-gate } 31797c478bd9Sstevel@tonic-gate } 31807c478bd9Sstevel@tonic-gate mutex_exit(&flock_lock); 31817c478bd9Sstevel@tonic-gate return (0); 31827c478bd9Sstevel@tonic-gate 31837c478bd9Sstevel@tonic-gate deadlock: 31847c478bd9Sstevel@tonic-gate 31857c478bd9Sstevel@tonic-gate /* we remove all lock edges and proc edges */ 31867c478bd9Sstevel@tonic-gate 31877c478bd9Sstevel@tonic-gate ep = FIRST_ADJ(lock); 31887c478bd9Sstevel@tonic-gate while (ep != HEAD(lock)) { 31897c478bd9Sstevel@tonic-gate proc_vertex_t *adj_proc; 31907c478bd9Sstevel@tonic-gate adj_proc = flk_get_proc_vertex(ep->to_vertex); 31917c478bd9Sstevel@tonic-gate nep = NEXT_ADJ(ep); 31927c478bd9Sstevel@tonic-gate IN_LIST_REMOVE(ep); 31937c478bd9Sstevel@tonic-gate ADJ_LIST_REMOVE(ep); 31947c478bd9Sstevel@tonic-gate flk_free_edge(ep); 31957c478bd9Sstevel@tonic-gate ppep = start_vertex->edge; 31967c478bd9Sstevel@tonic-gate for (pep = start_vertex->edge; pep != NULL; ppep = pep, 31977c478bd9Sstevel@tonic-gate pep = ppep->next) { 31987c478bd9Sstevel@tonic-gate if (pep->to_proc == adj_proc) { 31997c478bd9Sstevel@tonic-gate pep->refcount--; 32007c478bd9Sstevel@tonic-gate if (pep->refcount == 0) { 32017c478bd9Sstevel@tonic-gate if (pep == ppep) { 32027c478bd9Sstevel@tonic-gate start_vertex->edge = pep->next; 32037c478bd9Sstevel@tonic-gate } else { 32047c478bd9Sstevel@tonic-gate ppep->next = pep->next; 32057c478bd9Sstevel@tonic-gate } 32067c478bd9Sstevel@tonic-gate adj_proc->incount--; 32077c478bd9Sstevel@tonic-gate flk_proc_release(adj_proc); 32087c478bd9Sstevel@tonic-gate flk_free_proc_edge(pep); 32097c478bd9Sstevel@tonic-gate } 32107c478bd9Sstevel@tonic-gate break; 32117c478bd9Sstevel@tonic-gate } 32127c478bd9Sstevel@tonic-gate } 32137c478bd9Sstevel@tonic-gate ep = nep; 32147c478bd9Sstevel@tonic-gate } 32157c478bd9Sstevel@tonic-gate ep = FIRST_IN(lock); 32167c478bd9Sstevel@tonic-gate while (ep != HEAD(lock)) { 32177c478bd9Sstevel@tonic-gate proc_vertex_t *in_proc; 32187c478bd9Sstevel@tonic-gate in_proc = flk_get_proc_vertex(ep->from_vertex); 32197c478bd9Sstevel@tonic-gate nep = NEXT_IN(ep); 32207c478bd9Sstevel@tonic-gate IN_LIST_REMOVE(ep); 32217c478bd9Sstevel@tonic-gate ADJ_LIST_REMOVE(ep); 32227c478bd9Sstevel@tonic-gate flk_free_edge(ep); 32237c478bd9Sstevel@tonic-gate ppep = in_proc->edge; 32247c478bd9Sstevel@tonic-gate for (pep = in_proc->edge; pep != NULL; ppep = pep, 32257c478bd9Sstevel@tonic-gate pep = ppep->next) { 32267c478bd9Sstevel@tonic-gate if (pep->to_proc == start_vertex) { 32277c478bd9Sstevel@tonic-gate pep->refcount--; 32287c478bd9Sstevel@tonic-gate if (pep->refcount == 0) { 32297c478bd9Sstevel@tonic-gate if (pep == ppep) { 32307c478bd9Sstevel@tonic-gate in_proc->edge = pep->next; 32317c478bd9Sstevel@tonic-gate } else { 32327c478bd9Sstevel@tonic-gate ppep->next = pep->next; 32337c478bd9Sstevel@tonic-gate } 32347c478bd9Sstevel@tonic-gate start_vertex->incount--; 32357c478bd9Sstevel@tonic-gate flk_proc_release(in_proc); 32367c478bd9Sstevel@tonic-gate flk_free_proc_edge(pep); 32377c478bd9Sstevel@tonic-gate } 32387c478bd9Sstevel@tonic-gate break; 32397c478bd9Sstevel@tonic-gate } 32407c478bd9Sstevel@tonic-gate } 32417c478bd9Sstevel@tonic-gate ep = nep; 32427c478bd9Sstevel@tonic-gate } 32437c478bd9Sstevel@tonic-gate flk_proc_release(start_vertex); 32447c478bd9Sstevel@tonic-gate mutex_exit(&flock_lock); 32457c478bd9Sstevel@tonic-gate return (1); 32467c478bd9Sstevel@tonic-gate } 32477c478bd9Sstevel@tonic-gate 32487c478bd9Sstevel@tonic-gate /* 32497c478bd9Sstevel@tonic-gate * Get a proc vertex. If lock's pvertex value gets a correct proc vertex 32507c478bd9Sstevel@tonic-gate * from the list we return that, otherwise we allocate one. If necessary, 32517c478bd9Sstevel@tonic-gate * we grow the list of vertices also. 32527c478bd9Sstevel@tonic-gate */ 32537c478bd9Sstevel@tonic-gate 32547c478bd9Sstevel@tonic-gate static proc_vertex_t * 32557c478bd9Sstevel@tonic-gate flk_get_proc_vertex(lock_descriptor_t *lock) 32567c478bd9Sstevel@tonic-gate { 32577c478bd9Sstevel@tonic-gate int i; 32587c478bd9Sstevel@tonic-gate proc_vertex_t *pv; 32597c478bd9Sstevel@tonic-gate proc_vertex_t **palloc; 32607c478bd9Sstevel@tonic-gate 32617c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&flock_lock)); 32627c478bd9Sstevel@tonic-gate if (lock->pvertex != -1) { 32637c478bd9Sstevel@tonic-gate ASSERT(lock->pvertex >= 0); 32647c478bd9Sstevel@tonic-gate pv = pgraph.proc[lock->pvertex]; 32657c478bd9Sstevel@tonic-gate if (pv != NULL && PROC_SAME_OWNER(lock, pv)) { 32667c478bd9Sstevel@tonic-gate return (pv); 32677c478bd9Sstevel@tonic-gate } 32687c478bd9Sstevel@tonic-gate } 32697c478bd9Sstevel@tonic-gate for (i = 0; i < pgraph.gcount; i++) { 32707c478bd9Sstevel@tonic-gate pv = pgraph.proc[i]; 32717c478bd9Sstevel@tonic-gate if (pv != NULL && PROC_SAME_OWNER(lock, pv)) { 32727c478bd9Sstevel@tonic-gate lock->pvertex = pv->index = i; 32737c478bd9Sstevel@tonic-gate return (pv); 32747c478bd9Sstevel@tonic-gate } 32757c478bd9Sstevel@tonic-gate } 32767c478bd9Sstevel@tonic-gate pv = kmem_zalloc(sizeof (struct proc_vertex), KM_SLEEP); 32777c478bd9Sstevel@tonic-gate pv->pid = lock->l_flock.l_pid; 32787c478bd9Sstevel@tonic-gate pv->sysid = lock->l_flock.l_sysid; 32797c478bd9Sstevel@tonic-gate flk_proc_vertex_allocs++; 32807c478bd9Sstevel@tonic-gate if (pgraph.free != 0) { 32817c478bd9Sstevel@tonic-gate for (i = 0; i < pgraph.gcount; i++) { 32827c478bd9Sstevel@tonic-gate if (pgraph.proc[i] == NULL) { 32837c478bd9Sstevel@tonic-gate pgraph.proc[i] = pv; 32847c478bd9Sstevel@tonic-gate lock->pvertex = pv->index = i; 32857c478bd9Sstevel@tonic-gate pgraph.free--; 32867c478bd9Sstevel@tonic-gate return (pv); 32877c478bd9Sstevel@tonic-gate } 32887c478bd9Sstevel@tonic-gate } 32897c478bd9Sstevel@tonic-gate } 32907c478bd9Sstevel@tonic-gate palloc = kmem_zalloc((pgraph.gcount + PROC_CHUNK) * 32917c478bd9Sstevel@tonic-gate sizeof (proc_vertex_t *), KM_SLEEP); 32927c478bd9Sstevel@tonic-gate 32937c478bd9Sstevel@tonic-gate if (pgraph.proc) { 32947c478bd9Sstevel@tonic-gate bcopy(pgraph.proc, palloc, 32957c478bd9Sstevel@tonic-gate pgraph.gcount * sizeof (proc_vertex_t *)); 32967c478bd9Sstevel@tonic-gate 32977c478bd9Sstevel@tonic-gate kmem_free(pgraph.proc, 32987c478bd9Sstevel@tonic-gate pgraph.gcount * sizeof (proc_vertex_t *)); 32997c478bd9Sstevel@tonic-gate } 33007c478bd9Sstevel@tonic-gate pgraph.proc = palloc; 33017c478bd9Sstevel@tonic-gate pgraph.free += (PROC_CHUNK - 1); 33027c478bd9Sstevel@tonic-gate pv->index = lock->pvertex = pgraph.gcount; 33037c478bd9Sstevel@tonic-gate pgraph.gcount += PROC_CHUNK; 33047c478bd9Sstevel@tonic-gate pgraph.proc[pv->index] = pv; 33057c478bd9Sstevel@tonic-gate return (pv); 33067c478bd9Sstevel@tonic-gate } 33077c478bd9Sstevel@tonic-gate 33087c478bd9Sstevel@tonic-gate /* 33097c478bd9Sstevel@tonic-gate * Allocate a proc edge. 33107c478bd9Sstevel@tonic-gate */ 33117c478bd9Sstevel@tonic-gate 33127c478bd9Sstevel@tonic-gate static proc_edge_t * 33137c478bd9Sstevel@tonic-gate flk_get_proc_edge() 33147c478bd9Sstevel@tonic-gate { 33157c478bd9Sstevel@tonic-gate proc_edge_t *pep; 33167c478bd9Sstevel@tonic-gate 33177c478bd9Sstevel@tonic-gate pep = kmem_zalloc(sizeof (proc_edge_t), KM_SLEEP); 33187c478bd9Sstevel@tonic-gate flk_proc_edge_allocs++; 33197c478bd9Sstevel@tonic-gate return (pep); 33207c478bd9Sstevel@tonic-gate } 33217c478bd9Sstevel@tonic-gate 33227c478bd9Sstevel@tonic-gate /* 33237c478bd9Sstevel@tonic-gate * Free the proc edge. Called whenever its reference count goes to zero. 33247c478bd9Sstevel@tonic-gate */ 33257c478bd9Sstevel@tonic-gate 33267c478bd9Sstevel@tonic-gate static void 33277c478bd9Sstevel@tonic-gate flk_free_proc_edge(proc_edge_t *pep) 33287c478bd9Sstevel@tonic-gate { 33297c478bd9Sstevel@tonic-gate ASSERT(pep->refcount == 0); 33307c478bd9Sstevel@tonic-gate kmem_free((void *)pep, sizeof (proc_edge_t)); 33317c478bd9Sstevel@tonic-gate flk_proc_edge_frees++; 33327c478bd9Sstevel@tonic-gate } 33337c478bd9Sstevel@tonic-gate 33347c478bd9Sstevel@tonic-gate /* 33357c478bd9Sstevel@tonic-gate * Color the graph explicitly done only when the mark value hits max value. 33367c478bd9Sstevel@tonic-gate */ 33377c478bd9Sstevel@tonic-gate 33387c478bd9Sstevel@tonic-gate static void 33397c478bd9Sstevel@tonic-gate flk_proc_graph_uncolor() 33407c478bd9Sstevel@tonic-gate { 33417c478bd9Sstevel@tonic-gate int i; 33427c478bd9Sstevel@tonic-gate 33437c478bd9Sstevel@tonic-gate if (pgraph.mark == UINT_MAX) { 33447c478bd9Sstevel@tonic-gate for (i = 0; i < pgraph.gcount; i++) 33457c478bd9Sstevel@tonic-gate if (pgraph.proc[i] != NULL) { 33467c478bd9Sstevel@tonic-gate pgraph.proc[i]->atime = 0; 33477c478bd9Sstevel@tonic-gate pgraph.proc[i]->dtime = 0; 33487c478bd9Sstevel@tonic-gate } 33497c478bd9Sstevel@tonic-gate pgraph.mark = 1; 33507c478bd9Sstevel@tonic-gate } else { 33517c478bd9Sstevel@tonic-gate pgraph.mark++; 33527c478bd9Sstevel@tonic-gate } 33537c478bd9Sstevel@tonic-gate } 33547c478bd9Sstevel@tonic-gate 33557c478bd9Sstevel@tonic-gate /* 33567c478bd9Sstevel@tonic-gate * Release the proc vertex iff both there are no in edges and out edges 33577c478bd9Sstevel@tonic-gate */ 33587c478bd9Sstevel@tonic-gate 33597c478bd9Sstevel@tonic-gate static void 33607c478bd9Sstevel@tonic-gate flk_proc_release(proc_vertex_t *proc) 33617c478bd9Sstevel@tonic-gate { 33627c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&flock_lock)); 33637c478bd9Sstevel@tonic-gate if (proc->edge == NULL && proc->incount == 0) { 33647c478bd9Sstevel@tonic-gate pgraph.proc[proc->index] = NULL; 33657c478bd9Sstevel@tonic-gate pgraph.free++; 33667c478bd9Sstevel@tonic-gate kmem_free(proc, sizeof (proc_vertex_t)); 33677c478bd9Sstevel@tonic-gate flk_proc_vertex_frees++; 33687c478bd9Sstevel@tonic-gate } 33697c478bd9Sstevel@tonic-gate } 33707c478bd9Sstevel@tonic-gate 33717c478bd9Sstevel@tonic-gate /* 33727c478bd9Sstevel@tonic-gate * Updates process graph to reflect change in a lock_graph. 33737c478bd9Sstevel@tonic-gate * Note: We should call this function only after we have a correctly 33747c478bd9Sstevel@tonic-gate * recomputed lock graph. Otherwise we might miss a deadlock detection. 33757c478bd9Sstevel@tonic-gate * eg: in function flk_relation() we call this function after flk_recompute_ 33767c478bd9Sstevel@tonic-gate * dependencies() otherwise if a process tries to lock a vnode hashed 33777c478bd9Sstevel@tonic-gate * into another graph it might sleep for ever. 33787c478bd9Sstevel@tonic-gate */ 33797c478bd9Sstevel@tonic-gate 33807c478bd9Sstevel@tonic-gate static void 33817c478bd9Sstevel@tonic-gate flk_update_proc_graph(edge_t *ep, int delete) 33827c478bd9Sstevel@tonic-gate { 33837c478bd9Sstevel@tonic-gate proc_vertex_t *toproc, *fromproc; 33847c478bd9Sstevel@tonic-gate proc_edge_t *pep, *prevpep; 33857c478bd9Sstevel@tonic-gate 33867c478bd9Sstevel@tonic-gate mutex_enter(&flock_lock); 33877a5aac98SJerry Jelinek 33887a5aac98SJerry Jelinek /* 33897a5aac98SJerry Jelinek * OFD style locks are not associated with any process so there is 33907a5aac98SJerry Jelinek * no proc graph for these. 33917a5aac98SJerry Jelinek */ 33927a5aac98SJerry Jelinek if (ep->from_vertex->l_ofd != NULL) { 33937a5aac98SJerry Jelinek mutex_exit(&flock_lock); 33947a5aac98SJerry Jelinek return; 33957a5aac98SJerry Jelinek } 33967a5aac98SJerry Jelinek 33977c478bd9Sstevel@tonic-gate toproc = flk_get_proc_vertex(ep->to_vertex); 33987c478bd9Sstevel@tonic-gate fromproc = flk_get_proc_vertex(ep->from_vertex); 33997c478bd9Sstevel@tonic-gate 34007c478bd9Sstevel@tonic-gate if (!delete) 34017c478bd9Sstevel@tonic-gate goto add; 34027c478bd9Sstevel@tonic-gate pep = prevpep = fromproc->edge; 34037c478bd9Sstevel@tonic-gate 34047c478bd9Sstevel@tonic-gate ASSERT(pep != NULL); 34057c478bd9Sstevel@tonic-gate while (pep != NULL) { 34067c478bd9Sstevel@tonic-gate if (pep->to_proc == toproc) { 34077c478bd9Sstevel@tonic-gate ASSERT(pep->refcount > 0); 34087c478bd9Sstevel@tonic-gate pep->refcount--; 34097c478bd9Sstevel@tonic-gate if (pep->refcount == 0) { 34107c478bd9Sstevel@tonic-gate if (pep == prevpep) { 34117c478bd9Sstevel@tonic-gate fromproc->edge = pep->next; 34127c478bd9Sstevel@tonic-gate } else { 34137c478bd9Sstevel@tonic-gate prevpep->next = pep->next; 34147c478bd9Sstevel@tonic-gate } 34157c478bd9Sstevel@tonic-gate toproc->incount--; 34167c478bd9Sstevel@tonic-gate flk_proc_release(toproc); 34177c478bd9Sstevel@tonic-gate flk_free_proc_edge(pep); 34187c478bd9Sstevel@tonic-gate } 34197c478bd9Sstevel@tonic-gate break; 34207c478bd9Sstevel@tonic-gate } 34217c478bd9Sstevel@tonic-gate prevpep = pep; 34227c478bd9Sstevel@tonic-gate pep = pep->next; 34237c478bd9Sstevel@tonic-gate } 34247c478bd9Sstevel@tonic-gate flk_proc_release(fromproc); 34257c478bd9Sstevel@tonic-gate mutex_exit(&flock_lock); 34267c478bd9Sstevel@tonic-gate return; 34277c478bd9Sstevel@tonic-gate add: 34287c478bd9Sstevel@tonic-gate 34297c478bd9Sstevel@tonic-gate pep = fromproc->edge; 34307c478bd9Sstevel@tonic-gate 34317c478bd9Sstevel@tonic-gate while (pep != NULL) { 34327c478bd9Sstevel@tonic-gate if (pep->to_proc == toproc) { 34337c478bd9Sstevel@tonic-gate ASSERT(pep->refcount > 0); 34347c478bd9Sstevel@tonic-gate pep->refcount++; 34357c478bd9Sstevel@tonic-gate break; 34367c478bd9Sstevel@tonic-gate } 34377c478bd9Sstevel@tonic-gate pep = pep->next; 34387c478bd9Sstevel@tonic-gate } 34397c478bd9Sstevel@tonic-gate if (pep == NULL) { 34407c478bd9Sstevel@tonic-gate pep = flk_get_proc_edge(); 34417c478bd9Sstevel@tonic-gate pep->to_proc = toproc; 34427c478bd9Sstevel@tonic-gate pep->refcount = 1; 34437c478bd9Sstevel@tonic-gate toproc->incount++; 34447c478bd9Sstevel@tonic-gate pep->next = fromproc->edge; 34457c478bd9Sstevel@tonic-gate fromproc->edge = pep; 34467c478bd9Sstevel@tonic-gate } 34477c478bd9Sstevel@tonic-gate mutex_exit(&flock_lock); 34487c478bd9Sstevel@tonic-gate } 34497c478bd9Sstevel@tonic-gate 34507c478bd9Sstevel@tonic-gate /* 34517c478bd9Sstevel@tonic-gate * Set the control status for lock manager requests. 34527c478bd9Sstevel@tonic-gate * 34537c478bd9Sstevel@tonic-gate */ 34547c478bd9Sstevel@tonic-gate 34557c478bd9Sstevel@tonic-gate /* 34567c478bd9Sstevel@tonic-gate * PSARC case 1997/292 34577c478bd9Sstevel@tonic-gate * 34587c478bd9Sstevel@tonic-gate * Requires: "nlmid" must be >= 1 and <= clconf_maximum_nodeid(). 34597c478bd9Sstevel@tonic-gate * Effects: Set the state of the NLM server identified by "nlmid" 34607c478bd9Sstevel@tonic-gate * in the NLM registry to state "nlm_state." 34617c478bd9Sstevel@tonic-gate * Raises exception no_such_nlm if "nlmid" doesn't identify a known 34627c478bd9Sstevel@tonic-gate * NLM server to this LLM. 34637c478bd9Sstevel@tonic-gate * Note that when this routine is called with NLM_SHUTTING_DOWN there 34647c478bd9Sstevel@tonic-gate * may be locks requests that have gotten started but not finished. In 34657c478bd9Sstevel@tonic-gate * particular, there may be blocking requests that are in the callback code 34667c478bd9Sstevel@tonic-gate * before sleeping (so they're not holding the lock for the graph). If 34677c478bd9Sstevel@tonic-gate * such a thread reacquires the graph's lock (to go to sleep) after 34687c478bd9Sstevel@tonic-gate * NLM state in the NLM registry is set to a non-up value, 34697c478bd9Sstevel@tonic-gate * it will notice the status and bail out. If the request gets 34707c478bd9Sstevel@tonic-gate * granted before the thread can check the NLM registry, let it 34717c478bd9Sstevel@tonic-gate * continue normally. It will get flushed when we are called with NLM_DOWN. 34727c478bd9Sstevel@tonic-gate * 34737c478bd9Sstevel@tonic-gate * Modifies: nlm_reg_obj (global) 34747c478bd9Sstevel@tonic-gate * Arguments: 34757c478bd9Sstevel@tonic-gate * nlmid (IN): id uniquely identifying an NLM server 34767c478bd9Sstevel@tonic-gate * nlm_state (IN): NLM server state to change "nlmid" to 34777c478bd9Sstevel@tonic-gate */ 34787c478bd9Sstevel@tonic-gate void 34797c478bd9Sstevel@tonic-gate cl_flk_set_nlm_status(int nlmid, flk_nlm_status_t nlm_state) 34807c478bd9Sstevel@tonic-gate { 34817c478bd9Sstevel@tonic-gate /* 34827c478bd9Sstevel@tonic-gate * Check to see if node is booted as a cluster. If not, return. 34837c478bd9Sstevel@tonic-gate */ 34847c478bd9Sstevel@tonic-gate if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { 34857c478bd9Sstevel@tonic-gate return; 34867c478bd9Sstevel@tonic-gate } 34877c478bd9Sstevel@tonic-gate 34887c478bd9Sstevel@tonic-gate /* 34897c478bd9Sstevel@tonic-gate * Check for development/debugging. It is possible to boot a node 34907c478bd9Sstevel@tonic-gate * in non-cluster mode, and then run a special script, currently 34917c478bd9Sstevel@tonic-gate * available only to developers, to bring up the node as part of a 34927c478bd9Sstevel@tonic-gate * cluster. The problem is that running such a script does not 34937c478bd9Sstevel@tonic-gate * result in the routine flk_init() being called and hence global array 34947c478bd9Sstevel@tonic-gate * nlm_reg_status is NULL. The NLM thinks it's in cluster mode, 34957c478bd9Sstevel@tonic-gate * but the LLM needs to do an additional check to see if the global 34967c478bd9Sstevel@tonic-gate * array has been created or not. If nlm_reg_status is NULL, then 34977c478bd9Sstevel@tonic-gate * return, else continue. 34987c478bd9Sstevel@tonic-gate */ 34997c478bd9Sstevel@tonic-gate if (nlm_reg_status == NULL) { 35007c478bd9Sstevel@tonic-gate return; 35017c478bd9Sstevel@tonic-gate } 35027c478bd9Sstevel@tonic-gate 35037c478bd9Sstevel@tonic-gate ASSERT(nlmid <= nlm_status_size && nlmid >= 0); 35047c478bd9Sstevel@tonic-gate mutex_enter(&nlm_reg_lock); 35057c478bd9Sstevel@tonic-gate 35067c478bd9Sstevel@tonic-gate if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status, nlmid)) { 35077c478bd9Sstevel@tonic-gate /* 35087c478bd9Sstevel@tonic-gate * If the NLM server "nlmid" is unknown in the NLM registry, 35097c478bd9Sstevel@tonic-gate * add it to the registry in the nlm shutting down state. 35107c478bd9Sstevel@tonic-gate */ 35117c478bd9Sstevel@tonic-gate FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, 35127c478bd9Sstevel@tonic-gate FLK_NLM_SHUTTING_DOWN); 35137c478bd9Sstevel@tonic-gate } else { 35147c478bd9Sstevel@tonic-gate /* 35157c478bd9Sstevel@tonic-gate * Change the state of the NLM server identified by "nlmid" 35167c478bd9Sstevel@tonic-gate * in the NLM registry to the argument "nlm_state." 35177c478bd9Sstevel@tonic-gate */ 35187c478bd9Sstevel@tonic-gate FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, 35197c478bd9Sstevel@tonic-gate nlm_state); 35207c478bd9Sstevel@tonic-gate } 35217c478bd9Sstevel@tonic-gate 35227c478bd9Sstevel@tonic-gate /* 35237c478bd9Sstevel@tonic-gate * The reason we must register the NLM server that is shutting down 35247c478bd9Sstevel@tonic-gate * with an LLM that doesn't already know about it (never sent a lock 35257c478bd9Sstevel@tonic-gate * request) is to handle correctly a race between shutdown and a new 35267c478bd9Sstevel@tonic-gate * lock request. Suppose that a shutdown request from the NLM server 35277c478bd9Sstevel@tonic-gate * invokes this routine at the LLM, and a thread is spawned to 35287c478bd9Sstevel@tonic-gate * service the request. Now suppose a new lock request is in 35297c478bd9Sstevel@tonic-gate * progress and has already passed the first line of defense in 35307c478bd9Sstevel@tonic-gate * reclock(), which denies new locks requests from NLM servers 35317c478bd9Sstevel@tonic-gate * that are not in the NLM_UP state. After the current routine 35327c478bd9Sstevel@tonic-gate * is invoked for both phases of shutdown, the routine will return, 35337c478bd9Sstevel@tonic-gate * having done nothing, and the lock request will proceed and 35347c478bd9Sstevel@tonic-gate * probably be granted. The problem is that the shutdown was ignored 35357c478bd9Sstevel@tonic-gate * by the lock request because there was no record of that NLM server 35367c478bd9Sstevel@tonic-gate * shutting down. We will be in the peculiar position of thinking 35377c478bd9Sstevel@tonic-gate * that we've shutdown the NLM server and all locks at all LLMs have 35387c478bd9Sstevel@tonic-gate * been discarded, but in fact there's still one lock held. 35397c478bd9Sstevel@tonic-gate * The solution is to record the existence of NLM server and change 35407c478bd9Sstevel@tonic-gate * its state immediately to NLM_SHUTTING_DOWN. The lock request in 35417c478bd9Sstevel@tonic-gate * progress may proceed because the next phase NLM_DOWN will catch 35427c478bd9Sstevel@tonic-gate * this lock and discard it. 35437c478bd9Sstevel@tonic-gate */ 35447c478bd9Sstevel@tonic-gate mutex_exit(&nlm_reg_lock); 35457c478bd9Sstevel@tonic-gate 35467c478bd9Sstevel@tonic-gate switch (nlm_state) { 35477c478bd9Sstevel@tonic-gate case FLK_NLM_UP: 35487c478bd9Sstevel@tonic-gate /* 35497c478bd9Sstevel@tonic-gate * Change the NLM state of all locks still held on behalf of 35507c478bd9Sstevel@tonic-gate * the NLM server identified by "nlmid" to NLM_UP. 35517c478bd9Sstevel@tonic-gate */ 35527c478bd9Sstevel@tonic-gate cl_flk_change_nlm_state_all_locks(nlmid, FLK_NLM_UP); 35537c478bd9Sstevel@tonic-gate break; 35547c478bd9Sstevel@tonic-gate 35557c478bd9Sstevel@tonic-gate case FLK_NLM_SHUTTING_DOWN: 35567c478bd9Sstevel@tonic-gate /* 35577c478bd9Sstevel@tonic-gate * Wake up all sleeping locks for the NLM server identified 35587c478bd9Sstevel@tonic-gate * by "nlmid." Note that eventually all woken threads will 35597c478bd9Sstevel@tonic-gate * have their lock requests cancelled and descriptors 35607c478bd9Sstevel@tonic-gate * removed from the sleeping lock list. Note that the NLM 35617c478bd9Sstevel@tonic-gate * server state associated with each lock descriptor is 35627c478bd9Sstevel@tonic-gate * changed to FLK_NLM_SHUTTING_DOWN. 35637c478bd9Sstevel@tonic-gate */ 35647c478bd9Sstevel@tonic-gate cl_flk_wakeup_sleeping_nlm_locks(nlmid); 35657c478bd9Sstevel@tonic-gate break; 35667c478bd9Sstevel@tonic-gate 35677c478bd9Sstevel@tonic-gate case FLK_NLM_DOWN: 35687c478bd9Sstevel@tonic-gate /* 35697c478bd9Sstevel@tonic-gate * Discard all active, granted locks for this NLM server 35707c478bd9Sstevel@tonic-gate * identified by "nlmid." 35717c478bd9Sstevel@tonic-gate */ 35727c478bd9Sstevel@tonic-gate cl_flk_unlock_nlm_granted(nlmid); 35737c478bd9Sstevel@tonic-gate break; 35747c478bd9Sstevel@tonic-gate 35757c478bd9Sstevel@tonic-gate default: 35767c478bd9Sstevel@tonic-gate panic("cl_set_nlm_status: bad status (%d)", nlm_state); 35777c478bd9Sstevel@tonic-gate } 35787c478bd9Sstevel@tonic-gate } 35797c478bd9Sstevel@tonic-gate 35807c478bd9Sstevel@tonic-gate /* 35817c478bd9Sstevel@tonic-gate * Set the control status for lock manager requests. 35827c478bd9Sstevel@tonic-gate * 35837c478bd9Sstevel@tonic-gate * Note that when this routine is called with FLK_WAKEUP_SLEEPERS, there 35847c478bd9Sstevel@tonic-gate * may be locks requests that have gotten started but not finished. In 35857c478bd9Sstevel@tonic-gate * particular, there may be blocking requests that are in the callback code 35867c478bd9Sstevel@tonic-gate * before sleeping (so they're not holding the lock for the graph). If 35877c478bd9Sstevel@tonic-gate * such a thread reacquires the graph's lock (to go to sleep) after 35887c478bd9Sstevel@tonic-gate * flk_lockmgr_status is set to a non-up value, it will notice the status 35897c478bd9Sstevel@tonic-gate * and bail out. If the request gets granted before the thread can check 35907c478bd9Sstevel@tonic-gate * flk_lockmgr_status, let it continue normally. It will get flushed when 35917c478bd9Sstevel@tonic-gate * we are called with FLK_LOCKMGR_DOWN. 35927c478bd9Sstevel@tonic-gate */ 35937c478bd9Sstevel@tonic-gate 35947c478bd9Sstevel@tonic-gate void 35957c478bd9Sstevel@tonic-gate flk_set_lockmgr_status(flk_lockmgr_status_t status) 35967c478bd9Sstevel@tonic-gate { 35977c478bd9Sstevel@tonic-gate int i; 35987c478bd9Sstevel@tonic-gate graph_t *gp; 35997c478bd9Sstevel@tonic-gate struct flock_globals *fg; 36007c478bd9Sstevel@tonic-gate 36017c478bd9Sstevel@tonic-gate fg = flk_get_globals(); 36027c478bd9Sstevel@tonic-gate ASSERT(fg != NULL); 36037c478bd9Sstevel@tonic-gate 36047c478bd9Sstevel@tonic-gate mutex_enter(&flock_lock); 36057c478bd9Sstevel@tonic-gate fg->flk_lockmgr_status = status; 36067c478bd9Sstevel@tonic-gate mutex_exit(&flock_lock); 36077c478bd9Sstevel@tonic-gate 36087c478bd9Sstevel@tonic-gate /* 36097c478bd9Sstevel@tonic-gate * If the lock manager is coming back up, all that's needed is to 36107c478bd9Sstevel@tonic-gate * propagate this information to the graphs. If the lock manager 36117c478bd9Sstevel@tonic-gate * is going down, additional action is required, and each graph's 36127c478bd9Sstevel@tonic-gate * copy of the state is updated atomically with this other action. 36137c478bd9Sstevel@tonic-gate */ 36147c478bd9Sstevel@tonic-gate switch (status) { 36157c478bd9Sstevel@tonic-gate case FLK_LOCKMGR_UP: 36167c478bd9Sstevel@tonic-gate for (i = 0; i < HASH_SIZE; i++) { 36177c478bd9Sstevel@tonic-gate mutex_enter(&flock_lock); 36187c478bd9Sstevel@tonic-gate gp = lock_graph[i]; 36197c478bd9Sstevel@tonic-gate mutex_exit(&flock_lock); 36207c478bd9Sstevel@tonic-gate if (gp == NULL) 36217c478bd9Sstevel@tonic-gate continue; 36227c478bd9Sstevel@tonic-gate mutex_enter(&gp->gp_mutex); 36237c478bd9Sstevel@tonic-gate fg->lockmgr_status[i] = status; 36247c478bd9Sstevel@tonic-gate mutex_exit(&gp->gp_mutex); 36257c478bd9Sstevel@tonic-gate } 36267c478bd9Sstevel@tonic-gate break; 36277c478bd9Sstevel@tonic-gate case FLK_WAKEUP_SLEEPERS: 36287c478bd9Sstevel@tonic-gate wakeup_sleeping_lockmgr_locks(fg); 36297c478bd9Sstevel@tonic-gate break; 36307c478bd9Sstevel@tonic-gate case FLK_LOCKMGR_DOWN: 36317c478bd9Sstevel@tonic-gate unlock_lockmgr_granted(fg); 36327c478bd9Sstevel@tonic-gate break; 36337c478bd9Sstevel@tonic-gate default: 36347c478bd9Sstevel@tonic-gate panic("flk_set_lockmgr_status: bad status (%d)", status); 36357c478bd9Sstevel@tonic-gate break; 36367c478bd9Sstevel@tonic-gate } 36377c478bd9Sstevel@tonic-gate } 36387c478bd9Sstevel@tonic-gate 36397c478bd9Sstevel@tonic-gate /* 36407c478bd9Sstevel@tonic-gate * This routine returns all the locks that are active or sleeping and are 36417c478bd9Sstevel@tonic-gate * associated with a particular set of identifiers. If lock_state != 0, then 36427c478bd9Sstevel@tonic-gate * only locks that match the lock_state are returned. If lock_state == 0, then 36437c478bd9Sstevel@tonic-gate * all locks are returned. If pid == NOPID, the pid is ignored. If 36447c478bd9Sstevel@tonic-gate * use_sysid is FALSE, then the sysid is ignored. If vp is NULL, then the 36457c478bd9Sstevel@tonic-gate * vnode pointer is ignored. 36467c478bd9Sstevel@tonic-gate * 36477c478bd9Sstevel@tonic-gate * A list containing the vnode pointer and an flock structure 36487c478bd9Sstevel@tonic-gate * describing the lock is returned. Each element in the list is 3649da6c28aaSamw * dynamically allocated and must be freed by the caller. The 36507c478bd9Sstevel@tonic-gate * last item in the list is denoted by a NULL value in the ll_next 36517c478bd9Sstevel@tonic-gate * field. 36527c478bd9Sstevel@tonic-gate * 36537c478bd9Sstevel@tonic-gate * The vnode pointers returned are held. The caller is responsible 36547c478bd9Sstevel@tonic-gate * for releasing these. Note that the returned list is only a snapshot of 36557c478bd9Sstevel@tonic-gate * the current lock information, and that it is a snapshot of a moving 36567c478bd9Sstevel@tonic-gate * target (only one graph is locked at a time). 36577c478bd9Sstevel@tonic-gate */ 36587c478bd9Sstevel@tonic-gate 36597c478bd9Sstevel@tonic-gate locklist_t * 36607c478bd9Sstevel@tonic-gate get_lock_list(int list_type, int lock_state, int sysid, boolean_t use_sysid, 36617c478bd9Sstevel@tonic-gate pid_t pid, const vnode_t *vp, zoneid_t zoneid) 36627c478bd9Sstevel@tonic-gate { 36637c478bd9Sstevel@tonic-gate lock_descriptor_t *lock; 36647c478bd9Sstevel@tonic-gate lock_descriptor_t *graph_head; 36657c478bd9Sstevel@tonic-gate locklist_t listhead; 36667c478bd9Sstevel@tonic-gate locklist_t *llheadp; 36677c478bd9Sstevel@tonic-gate locklist_t *llp; 36687c478bd9Sstevel@tonic-gate locklist_t *lltp; 36697c478bd9Sstevel@tonic-gate graph_t *gp; 36707c478bd9Sstevel@tonic-gate int i; 36717c478bd9Sstevel@tonic-gate int first_index; /* graph index */ 36727c478bd9Sstevel@tonic-gate int num_indexes; /* graph index */ 36737c478bd9Sstevel@tonic-gate 36747c478bd9Sstevel@tonic-gate ASSERT((list_type == FLK_ACTIVE_STATE) || 36757c478bd9Sstevel@tonic-gate (list_type == FLK_SLEEPING_STATE)); 36767c478bd9Sstevel@tonic-gate 36777c478bd9Sstevel@tonic-gate /* 36787c478bd9Sstevel@tonic-gate * Get a pointer to something to use as a list head while building 36797c478bd9Sstevel@tonic-gate * the rest of the list. 36807c478bd9Sstevel@tonic-gate */ 36817c478bd9Sstevel@tonic-gate llheadp = &listhead; 36827c478bd9Sstevel@tonic-gate lltp = llheadp; 36837c478bd9Sstevel@tonic-gate llheadp->ll_next = (locklist_t *)NULL; 36847c478bd9Sstevel@tonic-gate 36857c478bd9Sstevel@tonic-gate /* Figure out which graphs we want to look at. */ 36867c478bd9Sstevel@tonic-gate if (vp == NULL) { 36877c478bd9Sstevel@tonic-gate first_index = 0; 36887c478bd9Sstevel@tonic-gate num_indexes = HASH_SIZE; 36897c478bd9Sstevel@tonic-gate } else { 36907c478bd9Sstevel@tonic-gate first_index = HASH_INDEX(vp); 36917c478bd9Sstevel@tonic-gate num_indexes = 1; 36927c478bd9Sstevel@tonic-gate } 36937c478bd9Sstevel@tonic-gate 36947c478bd9Sstevel@tonic-gate for (i = first_index; i < first_index + num_indexes; i++) { 36957c478bd9Sstevel@tonic-gate mutex_enter(&flock_lock); 36967c478bd9Sstevel@tonic-gate gp = lock_graph[i]; 36977c478bd9Sstevel@tonic-gate mutex_exit(&flock_lock); 36987c478bd9Sstevel@tonic-gate if (gp == NULL) { 36997c478bd9Sstevel@tonic-gate continue; 37007c478bd9Sstevel@tonic-gate } 37017c478bd9Sstevel@tonic-gate 37027c478bd9Sstevel@tonic-gate mutex_enter(&gp->gp_mutex); 37037c478bd9Sstevel@tonic-gate graph_head = (list_type == FLK_ACTIVE_STATE) ? 37047c478bd9Sstevel@tonic-gate ACTIVE_HEAD(gp) : SLEEPING_HEAD(gp); 37057c478bd9Sstevel@tonic-gate for (lock = graph_head->l_next; 37067c478bd9Sstevel@tonic-gate lock != graph_head; 37077c478bd9Sstevel@tonic-gate lock = lock->l_next) { 37087c478bd9Sstevel@tonic-gate if (use_sysid && lock->l_flock.l_sysid != sysid) 37097c478bd9Sstevel@tonic-gate continue; 37107c478bd9Sstevel@tonic-gate if (pid != NOPID && lock->l_flock.l_pid != pid) 37117c478bd9Sstevel@tonic-gate continue; 37127c478bd9Sstevel@tonic-gate if (vp != NULL && lock->l_vnode != vp) 37137c478bd9Sstevel@tonic-gate continue; 37147c478bd9Sstevel@tonic-gate if (lock_state && !(lock_state & lock->l_state)) 37157c478bd9Sstevel@tonic-gate continue; 37167c478bd9Sstevel@tonic-gate if (zoneid != lock->l_zoneid && zoneid != ALL_ZONES) 37177c478bd9Sstevel@tonic-gate continue; 37187c478bd9Sstevel@tonic-gate /* 37197c478bd9Sstevel@tonic-gate * A matching lock was found. Allocate 37207c478bd9Sstevel@tonic-gate * space for a new locklist entry and fill 37217c478bd9Sstevel@tonic-gate * it in. 37227c478bd9Sstevel@tonic-gate */ 37237c478bd9Sstevel@tonic-gate llp = kmem_alloc(sizeof (locklist_t), KM_SLEEP); 37247c478bd9Sstevel@tonic-gate lltp->ll_next = llp; 37257c478bd9Sstevel@tonic-gate VN_HOLD(lock->l_vnode); 37267c478bd9Sstevel@tonic-gate llp->ll_vp = lock->l_vnode; 37277c478bd9Sstevel@tonic-gate create_flock(lock, &(llp->ll_flock)); 37287c478bd9Sstevel@tonic-gate llp->ll_next = (locklist_t *)NULL; 37297c478bd9Sstevel@tonic-gate lltp = llp; 37307c478bd9Sstevel@tonic-gate } 37317c478bd9Sstevel@tonic-gate mutex_exit(&gp->gp_mutex); 37327c478bd9Sstevel@tonic-gate } 37337c478bd9Sstevel@tonic-gate 37347c478bd9Sstevel@tonic-gate llp = llheadp->ll_next; 37357c478bd9Sstevel@tonic-gate return (llp); 37367c478bd9Sstevel@tonic-gate } 37377c478bd9Sstevel@tonic-gate 37387c478bd9Sstevel@tonic-gate /* 37397c478bd9Sstevel@tonic-gate * These two functions are simply interfaces to get_lock_list. They return 37407c478bd9Sstevel@tonic-gate * a list of sleeping or active locks for the given sysid and pid. See 37417c478bd9Sstevel@tonic-gate * get_lock_list for details. 37427c478bd9Sstevel@tonic-gate * 37437c478bd9Sstevel@tonic-gate * In either case we don't particularly care to specify the zone of interest; 37447c478bd9Sstevel@tonic-gate * the sysid-space is global across zones, so the sysid will map to exactly one 37457c478bd9Sstevel@tonic-gate * zone, and we'll return information for that zone. 37467c478bd9Sstevel@tonic-gate */ 37477c478bd9Sstevel@tonic-gate 37487c478bd9Sstevel@tonic-gate locklist_t * 37497c478bd9Sstevel@tonic-gate flk_get_sleeping_locks(int sysid, pid_t pid) 37507c478bd9Sstevel@tonic-gate { 37517c478bd9Sstevel@tonic-gate return (get_lock_list(FLK_SLEEPING_STATE, 0, sysid, B_TRUE, pid, NULL, 37527c478bd9Sstevel@tonic-gate ALL_ZONES)); 37537c478bd9Sstevel@tonic-gate } 37547c478bd9Sstevel@tonic-gate 37557c478bd9Sstevel@tonic-gate locklist_t * 37567c478bd9Sstevel@tonic-gate flk_get_active_locks(int sysid, pid_t pid) 37577c478bd9Sstevel@tonic-gate { 37587c478bd9Sstevel@tonic-gate return (get_lock_list(FLK_ACTIVE_STATE, 0, sysid, B_TRUE, pid, NULL, 37597c478bd9Sstevel@tonic-gate ALL_ZONES)); 37607c478bd9Sstevel@tonic-gate } 37617c478bd9Sstevel@tonic-gate 37627c478bd9Sstevel@tonic-gate /* 37637c478bd9Sstevel@tonic-gate * Another interface to get_lock_list. This one returns all the active 37647c478bd9Sstevel@tonic-gate * locks for a given vnode. Again, see get_lock_list for details. 37657c478bd9Sstevel@tonic-gate * 37667c478bd9Sstevel@tonic-gate * We don't need to specify which zone's locks we're interested in. The matter 37677c478bd9Sstevel@tonic-gate * would only be interesting if the vnode belonged to NFS, and NFS vnodes can't 37687c478bd9Sstevel@tonic-gate * be used by multiple zones, so the list of locks will all be from the right 37697c478bd9Sstevel@tonic-gate * zone. 37707c478bd9Sstevel@tonic-gate */ 37717c478bd9Sstevel@tonic-gate 37727c478bd9Sstevel@tonic-gate locklist_t * 37737c478bd9Sstevel@tonic-gate flk_active_locks_for_vp(const vnode_t *vp) 37747c478bd9Sstevel@tonic-gate { 37757c478bd9Sstevel@tonic-gate return (get_lock_list(FLK_ACTIVE_STATE, 0, 0, B_FALSE, NOPID, vp, 37767c478bd9Sstevel@tonic-gate ALL_ZONES)); 37777c478bd9Sstevel@tonic-gate } 37787c478bd9Sstevel@tonic-gate 37797c478bd9Sstevel@tonic-gate /* 37807c478bd9Sstevel@tonic-gate * Another interface to get_lock_list. This one returns all the active 37817c478bd9Sstevel@tonic-gate * nbmand locks for a given vnode. Again, see get_lock_list for details. 37827c478bd9Sstevel@tonic-gate * 37837c478bd9Sstevel@tonic-gate * See the comment for flk_active_locks_for_vp() for why we don't care to 37847c478bd9Sstevel@tonic-gate * specify the particular zone of interest. 37857c478bd9Sstevel@tonic-gate */ 37867c478bd9Sstevel@tonic-gate locklist_t * 37877c478bd9Sstevel@tonic-gate flk_active_nbmand_locks_for_vp(const vnode_t *vp) 37887c478bd9Sstevel@tonic-gate { 37897c478bd9Sstevel@tonic-gate return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE, 37907c478bd9Sstevel@tonic-gate NOPID, vp, ALL_ZONES)); 37917c478bd9Sstevel@tonic-gate } 37927c478bd9Sstevel@tonic-gate 37937c478bd9Sstevel@tonic-gate /* 37947c478bd9Sstevel@tonic-gate * Another interface to get_lock_list. This one returns all the active 37957c478bd9Sstevel@tonic-gate * nbmand locks for a given pid. Again, see get_lock_list for details. 37967c478bd9Sstevel@tonic-gate * 37977c478bd9Sstevel@tonic-gate * The zone doesn't need to be specified here; the locks held by a 37987c478bd9Sstevel@tonic-gate * particular process will either be local (ie, non-NFS) or from the zone 37997c478bd9Sstevel@tonic-gate * the process is executing in. This is because other parts of the system 38007c478bd9Sstevel@tonic-gate * ensure that an NFS vnode can't be used in a zone other than that in 38017c478bd9Sstevel@tonic-gate * which it was opened. 38027c478bd9Sstevel@tonic-gate */ 38037c478bd9Sstevel@tonic-gate locklist_t * 38047c478bd9Sstevel@tonic-gate flk_active_nbmand_locks(pid_t pid) 38057c478bd9Sstevel@tonic-gate { 38067c478bd9Sstevel@tonic-gate return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE, 38077c478bd9Sstevel@tonic-gate pid, NULL, ALL_ZONES)); 38087c478bd9Sstevel@tonic-gate } 38097c478bd9Sstevel@tonic-gate 38107c478bd9Sstevel@tonic-gate /* 38117c478bd9Sstevel@tonic-gate * Free up all entries in the locklist. 38127c478bd9Sstevel@tonic-gate */ 38137c478bd9Sstevel@tonic-gate void 38147c478bd9Sstevel@tonic-gate flk_free_locklist(locklist_t *llp) 38157c478bd9Sstevel@tonic-gate { 38167c478bd9Sstevel@tonic-gate locklist_t *next_llp; 38177c478bd9Sstevel@tonic-gate 38187c478bd9Sstevel@tonic-gate while (llp) { 38197c478bd9Sstevel@tonic-gate next_llp = llp->ll_next; 38207c478bd9Sstevel@tonic-gate VN_RELE(llp->ll_vp); 38217c478bd9Sstevel@tonic-gate kmem_free(llp, sizeof (*llp)); 38227c478bd9Sstevel@tonic-gate llp = next_llp; 38237c478bd9Sstevel@tonic-gate } 38247c478bd9Sstevel@tonic-gate } 38257c478bd9Sstevel@tonic-gate 38267c478bd9Sstevel@tonic-gate static void 38277c478bd9Sstevel@tonic-gate cl_flk_change_nlm_state_all_locks(int nlmid, flk_nlm_status_t nlm_state) 38287c478bd9Sstevel@tonic-gate { 38297c478bd9Sstevel@tonic-gate /* 38307c478bd9Sstevel@tonic-gate * For each graph "lg" in the hash table lock_graph do 38317c478bd9Sstevel@tonic-gate * a. Get the list of sleeping locks 38327c478bd9Sstevel@tonic-gate * b. For each lock descriptor in the list do 38337c478bd9Sstevel@tonic-gate * i. If the requested lock is an NLM server request AND 38347c478bd9Sstevel@tonic-gate * the nlmid is the same as the routine argument then 38357c478bd9Sstevel@tonic-gate * change the lock descriptor's state field to 38367c478bd9Sstevel@tonic-gate * "nlm_state." 38377c478bd9Sstevel@tonic-gate * c. Get the list of active locks 38387c478bd9Sstevel@tonic-gate * d. For each lock descriptor in the list do 38397c478bd9Sstevel@tonic-gate * i. If the requested lock is an NLM server request AND 38407c478bd9Sstevel@tonic-gate * the nlmid is the same as the routine argument then 38417c478bd9Sstevel@tonic-gate * change the lock descriptor's state field to 38427c478bd9Sstevel@tonic-gate * "nlm_state." 38437c478bd9Sstevel@tonic-gate */ 38447c478bd9Sstevel@tonic-gate 38457c478bd9Sstevel@tonic-gate int i; 38467c478bd9Sstevel@tonic-gate graph_t *gp; /* lock graph */ 38477c478bd9Sstevel@tonic-gate lock_descriptor_t *lock; /* lock */ 38487c478bd9Sstevel@tonic-gate lock_descriptor_t *nlock = NULL; /* next lock */ 38497c478bd9Sstevel@tonic-gate int lock_nlmid; 38507c478bd9Sstevel@tonic-gate 38517c478bd9Sstevel@tonic-gate for (i = 0; i < HASH_SIZE; i++) { 38527c478bd9Sstevel@tonic-gate mutex_enter(&flock_lock); 38537c478bd9Sstevel@tonic-gate gp = lock_graph[i]; 38547c478bd9Sstevel@tonic-gate mutex_exit(&flock_lock); 38557c478bd9Sstevel@tonic-gate if (gp == NULL) { 38567c478bd9Sstevel@tonic-gate continue; 38577c478bd9Sstevel@tonic-gate } 38587c478bd9Sstevel@tonic-gate 38597c478bd9Sstevel@tonic-gate /* Get list of sleeping locks in current lock graph. */ 38607c478bd9Sstevel@tonic-gate mutex_enter(&gp->gp_mutex); 38617c478bd9Sstevel@tonic-gate for (lock = SLEEPING_HEAD(gp)->l_next; 38627c478bd9Sstevel@tonic-gate lock != SLEEPING_HEAD(gp); 38637c478bd9Sstevel@tonic-gate lock = nlock) { 38647c478bd9Sstevel@tonic-gate nlock = lock->l_next; 38657c478bd9Sstevel@tonic-gate /* get NLM id */ 38667c478bd9Sstevel@tonic-gate lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 38677c478bd9Sstevel@tonic-gate 38687c478bd9Sstevel@tonic-gate /* 38697c478bd9Sstevel@tonic-gate * If NLM server request AND nlmid of lock matches 38707c478bd9Sstevel@tonic-gate * nlmid of argument, then set the NLM state of the 38717c478bd9Sstevel@tonic-gate * lock to "nlm_state." 38727c478bd9Sstevel@tonic-gate */ 38737c478bd9Sstevel@tonic-gate if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { 38747c478bd9Sstevel@tonic-gate SET_NLM_STATE(lock, nlm_state); 38757c478bd9Sstevel@tonic-gate } 38767c478bd9Sstevel@tonic-gate } 38777c478bd9Sstevel@tonic-gate 38787c478bd9Sstevel@tonic-gate /* Get list of active locks in current lock graph. */ 38797c478bd9Sstevel@tonic-gate for (lock = ACTIVE_HEAD(gp)->l_next; 38807c478bd9Sstevel@tonic-gate lock != ACTIVE_HEAD(gp); 38817c478bd9Sstevel@tonic-gate lock = nlock) { 38827c478bd9Sstevel@tonic-gate nlock = lock->l_next; 38837c478bd9Sstevel@tonic-gate /* get NLM id */ 38847c478bd9Sstevel@tonic-gate lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 38857c478bd9Sstevel@tonic-gate 38867c478bd9Sstevel@tonic-gate /* 38877c478bd9Sstevel@tonic-gate * If NLM server request AND nlmid of lock matches 38887c478bd9Sstevel@tonic-gate * nlmid of argument, then set the NLM state of the 38897c478bd9Sstevel@tonic-gate * lock to "nlm_state." 38907c478bd9Sstevel@tonic-gate */ 38917c478bd9Sstevel@tonic-gate if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { 38927c478bd9Sstevel@tonic-gate ASSERT(IS_ACTIVE(lock)); 38937c478bd9Sstevel@tonic-gate SET_NLM_STATE(lock, nlm_state); 38947c478bd9Sstevel@tonic-gate } 38957c478bd9Sstevel@tonic-gate } 38967c478bd9Sstevel@tonic-gate mutex_exit(&gp->gp_mutex); 38977c478bd9Sstevel@tonic-gate } 38987c478bd9Sstevel@tonic-gate } 38997c478bd9Sstevel@tonic-gate 39007c478bd9Sstevel@tonic-gate /* 39017c478bd9Sstevel@tonic-gate * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid(). 39027c478bd9Sstevel@tonic-gate * Effects: Find all sleeping lock manager requests _only_ for the NLM server 39037c478bd9Sstevel@tonic-gate * identified by "nlmid." Poke those lock requests. 39047c478bd9Sstevel@tonic-gate */ 39057c478bd9Sstevel@tonic-gate static void 39067c478bd9Sstevel@tonic-gate cl_flk_wakeup_sleeping_nlm_locks(int nlmid) 39077c478bd9Sstevel@tonic-gate { 39087c478bd9Sstevel@tonic-gate lock_descriptor_t *lock; 39097c478bd9Sstevel@tonic-gate lock_descriptor_t *nlock = NULL; /* next lock */ 39107c478bd9Sstevel@tonic-gate int i; 39117c478bd9Sstevel@tonic-gate graph_t *gp; 39127c478bd9Sstevel@tonic-gate int lock_nlmid; 39137c478bd9Sstevel@tonic-gate 39147c478bd9Sstevel@tonic-gate for (i = 0; i < HASH_SIZE; i++) { 39157c478bd9Sstevel@tonic-gate mutex_enter(&flock_lock); 39167c478bd9Sstevel@tonic-gate gp = lock_graph[i]; 39177c478bd9Sstevel@tonic-gate mutex_exit(&flock_lock); 39187c478bd9Sstevel@tonic-gate if (gp == NULL) { 39197c478bd9Sstevel@tonic-gate continue; 39207c478bd9Sstevel@tonic-gate } 39217c478bd9Sstevel@tonic-gate 39227c478bd9Sstevel@tonic-gate mutex_enter(&gp->gp_mutex); 39237c478bd9Sstevel@tonic-gate for (lock = SLEEPING_HEAD(gp)->l_next; 39247c478bd9Sstevel@tonic-gate lock != SLEEPING_HEAD(gp); 39257c478bd9Sstevel@tonic-gate lock = nlock) { 39267c478bd9Sstevel@tonic-gate nlock = lock->l_next; 39277c478bd9Sstevel@tonic-gate /* 39287c478bd9Sstevel@tonic-gate * If NLM server request _and_ nlmid of lock matches 39297c478bd9Sstevel@tonic-gate * nlmid of argument, then set the NLM state of the 39307c478bd9Sstevel@tonic-gate * lock to NLM_SHUTTING_DOWN, and wake up sleeping 39317c478bd9Sstevel@tonic-gate * request. 39327c478bd9Sstevel@tonic-gate */ 39337c478bd9Sstevel@tonic-gate if (IS_LOCKMGR(lock)) { 39347c478bd9Sstevel@tonic-gate /* get NLM id */ 39357c478bd9Sstevel@tonic-gate lock_nlmid = 39367c478bd9Sstevel@tonic-gate GETNLMID(lock->l_flock.l_sysid); 39377c478bd9Sstevel@tonic-gate if (nlmid == lock_nlmid) { 39387c478bd9Sstevel@tonic-gate SET_NLM_STATE(lock, 39397c478bd9Sstevel@tonic-gate FLK_NLM_SHUTTING_DOWN); 39407c478bd9Sstevel@tonic-gate INTERRUPT_WAKEUP(lock); 39417c478bd9Sstevel@tonic-gate } 39427c478bd9Sstevel@tonic-gate } 39437c478bd9Sstevel@tonic-gate } 39447c478bd9Sstevel@tonic-gate mutex_exit(&gp->gp_mutex); 39457c478bd9Sstevel@tonic-gate } 39467c478bd9Sstevel@tonic-gate } 39477c478bd9Sstevel@tonic-gate 39487c478bd9Sstevel@tonic-gate /* 39497c478bd9Sstevel@tonic-gate * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid() 39507c478bd9Sstevel@tonic-gate * Effects: Find all active (granted) lock manager locks _only_ for the 39517c478bd9Sstevel@tonic-gate * NLM server identified by "nlmid" and release them. 39527c478bd9Sstevel@tonic-gate */ 39537c478bd9Sstevel@tonic-gate static void 39547c478bd9Sstevel@tonic-gate cl_flk_unlock_nlm_granted(int nlmid) 39557c478bd9Sstevel@tonic-gate { 39567c478bd9Sstevel@tonic-gate lock_descriptor_t *lock; 39577c478bd9Sstevel@tonic-gate lock_descriptor_t *nlock = NULL; /* next lock */ 39587c478bd9Sstevel@tonic-gate int i; 39597c478bd9Sstevel@tonic-gate graph_t *gp; 39607c478bd9Sstevel@tonic-gate int lock_nlmid; 39617c478bd9Sstevel@tonic-gate 39627c478bd9Sstevel@tonic-gate for (i = 0; i < HASH_SIZE; i++) { 39637c478bd9Sstevel@tonic-gate mutex_enter(&flock_lock); 39647c478bd9Sstevel@tonic-gate gp = lock_graph[i]; 39657c478bd9Sstevel@tonic-gate mutex_exit(&flock_lock); 39667c478bd9Sstevel@tonic-gate if (gp == NULL) { 39677c478bd9Sstevel@tonic-gate continue; 39687c478bd9Sstevel@tonic-gate } 39697c478bd9Sstevel@tonic-gate 39707c478bd9Sstevel@tonic-gate mutex_enter(&gp->gp_mutex); 39717c478bd9Sstevel@tonic-gate for (lock = ACTIVE_HEAD(gp)->l_next; 39727c478bd9Sstevel@tonic-gate lock != ACTIVE_HEAD(gp); 39737c478bd9Sstevel@tonic-gate lock = nlock) { 39747c478bd9Sstevel@tonic-gate nlock = lock->l_next; 39757c478bd9Sstevel@tonic-gate ASSERT(IS_ACTIVE(lock)); 39767c478bd9Sstevel@tonic-gate 39777c478bd9Sstevel@tonic-gate /* 39787c478bd9Sstevel@tonic-gate * If it's an NLM server request _and_ nlmid of 39797c478bd9Sstevel@tonic-gate * the lock matches nlmid of argument, then 39807c478bd9Sstevel@tonic-gate * remove the active lock the list, wakup blocked 39817c478bd9Sstevel@tonic-gate * threads, and free the storage for the lock. 39827c478bd9Sstevel@tonic-gate * Note that there's no need to mark the NLM state 39837c478bd9Sstevel@tonic-gate * of this lock to NLM_DOWN because the lock will 39847c478bd9Sstevel@tonic-gate * be deleted anyway and its storage freed. 39857c478bd9Sstevel@tonic-gate */ 39867c478bd9Sstevel@tonic-gate if (IS_LOCKMGR(lock)) { 39877c478bd9Sstevel@tonic-gate /* get NLM id */ 39887c478bd9Sstevel@tonic-gate lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 39897c478bd9Sstevel@tonic-gate if (nlmid == lock_nlmid) { 39907c478bd9Sstevel@tonic-gate flk_delete_active_lock(lock, 0); 39917c478bd9Sstevel@tonic-gate flk_wakeup(lock, 1); 39927c478bd9Sstevel@tonic-gate flk_free_lock(lock); 39937c478bd9Sstevel@tonic-gate } 39947c478bd9Sstevel@tonic-gate } 39957c478bd9Sstevel@tonic-gate } 39967c478bd9Sstevel@tonic-gate mutex_exit(&gp->gp_mutex); 39977c478bd9Sstevel@tonic-gate } 39987c478bd9Sstevel@tonic-gate } 39997c478bd9Sstevel@tonic-gate 40007c478bd9Sstevel@tonic-gate /* 40017c478bd9Sstevel@tonic-gate * Find all sleeping lock manager requests and poke them. 40027c478bd9Sstevel@tonic-gate */ 40037c478bd9Sstevel@tonic-gate static void 40047c478bd9Sstevel@tonic-gate wakeup_sleeping_lockmgr_locks(struct flock_globals *fg) 40057c478bd9Sstevel@tonic-gate { 40067c478bd9Sstevel@tonic-gate lock_descriptor_t *lock; 40077c478bd9Sstevel@tonic-gate lock_descriptor_t *nlock = NULL; /* next lock */ 40087c478bd9Sstevel@tonic-gate int i; 40097c478bd9Sstevel@tonic-gate graph_t *gp; 40107c478bd9Sstevel@tonic-gate zoneid_t zoneid = getzoneid(); 40117c478bd9Sstevel@tonic-gate 40127c478bd9Sstevel@tonic-gate for (i = 0; i < HASH_SIZE; i++) { 40137c478bd9Sstevel@tonic-gate mutex_enter(&flock_lock); 40147c478bd9Sstevel@tonic-gate gp = lock_graph[i]; 40157c478bd9Sstevel@tonic-gate mutex_exit(&flock_lock); 40167c478bd9Sstevel@tonic-gate if (gp == NULL) { 40177c478bd9Sstevel@tonic-gate continue; 40187c478bd9Sstevel@tonic-gate } 40197c478bd9Sstevel@tonic-gate 40207c478bd9Sstevel@tonic-gate mutex_enter(&gp->gp_mutex); 40217c478bd9Sstevel@tonic-gate fg->lockmgr_status[i] = FLK_WAKEUP_SLEEPERS; 40227c478bd9Sstevel@tonic-gate for (lock = SLEEPING_HEAD(gp)->l_next; 40237c478bd9Sstevel@tonic-gate lock != SLEEPING_HEAD(gp); 40247c478bd9Sstevel@tonic-gate lock = nlock) { 40257c478bd9Sstevel@tonic-gate nlock = lock->l_next; 40267c478bd9Sstevel@tonic-gate if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) { 40277c478bd9Sstevel@tonic-gate INTERRUPT_WAKEUP(lock); 40287c478bd9Sstevel@tonic-gate } 40297c478bd9Sstevel@tonic-gate } 40307c478bd9Sstevel@tonic-gate mutex_exit(&gp->gp_mutex); 40317c478bd9Sstevel@tonic-gate } 40327c478bd9Sstevel@tonic-gate } 40337c478bd9Sstevel@tonic-gate 40347c478bd9Sstevel@tonic-gate 40357c478bd9Sstevel@tonic-gate /* 40367c478bd9Sstevel@tonic-gate * Find all active (granted) lock manager locks and release them. 40377c478bd9Sstevel@tonic-gate */ 40387c478bd9Sstevel@tonic-gate static void 40397c478bd9Sstevel@tonic-gate unlock_lockmgr_granted(struct flock_globals *fg) 40407c478bd9Sstevel@tonic-gate { 40417c478bd9Sstevel@tonic-gate lock_descriptor_t *lock; 40427c478bd9Sstevel@tonic-gate lock_descriptor_t *nlock = NULL; /* next lock */ 40437c478bd9Sstevel@tonic-gate int i; 40447c478bd9Sstevel@tonic-gate graph_t *gp; 40457c478bd9Sstevel@tonic-gate zoneid_t zoneid = getzoneid(); 40467c478bd9Sstevel@tonic-gate 40477c478bd9Sstevel@tonic-gate for (i = 0; i < HASH_SIZE; i++) { 40487c478bd9Sstevel@tonic-gate mutex_enter(&flock_lock); 40497c478bd9Sstevel@tonic-gate gp = lock_graph[i]; 40507c478bd9Sstevel@tonic-gate mutex_exit(&flock_lock); 40517c478bd9Sstevel@tonic-gate if (gp == NULL) { 40527c478bd9Sstevel@tonic-gate continue; 40537c478bd9Sstevel@tonic-gate } 40547c478bd9Sstevel@tonic-gate 40557c478bd9Sstevel@tonic-gate mutex_enter(&gp->gp_mutex); 40567c478bd9Sstevel@tonic-gate fg->lockmgr_status[i] = FLK_LOCKMGR_DOWN; 40577c478bd9Sstevel@tonic-gate for (lock = ACTIVE_HEAD(gp)->l_next; 40587c478bd9Sstevel@tonic-gate lock != ACTIVE_HEAD(gp); 40597c478bd9Sstevel@tonic-gate lock = nlock) { 40607c478bd9Sstevel@tonic-gate nlock = lock->l_next; 40617c478bd9Sstevel@tonic-gate if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) { 40627c478bd9Sstevel@tonic-gate ASSERT(IS_ACTIVE(lock)); 40637c478bd9Sstevel@tonic-gate flk_delete_active_lock(lock, 0); 40647c478bd9Sstevel@tonic-gate flk_wakeup(lock, 1); 40657c478bd9Sstevel@tonic-gate flk_free_lock(lock); 40667c478bd9Sstevel@tonic-gate } 40677c478bd9Sstevel@tonic-gate } 40687c478bd9Sstevel@tonic-gate mutex_exit(&gp->gp_mutex); 40697c478bd9Sstevel@tonic-gate } 40707c478bd9Sstevel@tonic-gate } 40717c478bd9Sstevel@tonic-gate 40727c478bd9Sstevel@tonic-gate 40737c478bd9Sstevel@tonic-gate /* 40747c478bd9Sstevel@tonic-gate * Wait until a lock is granted, cancelled, or interrupted. 40757c478bd9Sstevel@tonic-gate */ 40767c478bd9Sstevel@tonic-gate 40777c478bd9Sstevel@tonic-gate static void 40787c478bd9Sstevel@tonic-gate wait_for_lock(lock_descriptor_t *request) 40797c478bd9Sstevel@tonic-gate { 40807c478bd9Sstevel@tonic-gate graph_t *gp = request->l_graph; 40817c478bd9Sstevel@tonic-gate 40827c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&gp->gp_mutex)); 40837c478bd9Sstevel@tonic-gate 40847c478bd9Sstevel@tonic-gate while (!(IS_GRANTED(request)) && !(IS_CANCELLED(request)) && 40857c478bd9Sstevel@tonic-gate !(IS_INTERRUPTED(request))) { 40867c478bd9Sstevel@tonic-gate if (!cv_wait_sig(&request->l_cv, &gp->gp_mutex)) { 40877c478bd9Sstevel@tonic-gate flk_set_state(request, FLK_INTERRUPTED_STATE); 40887c478bd9Sstevel@tonic-gate request->l_state |= INTERRUPTED_LOCK; 40897c478bd9Sstevel@tonic-gate } 40907c478bd9Sstevel@tonic-gate } 40917c478bd9Sstevel@tonic-gate } 40927c478bd9Sstevel@tonic-gate 40937c478bd9Sstevel@tonic-gate /* 40947c478bd9Sstevel@tonic-gate * Create an flock structure from the existing lock information 40957c478bd9Sstevel@tonic-gate * 40967c478bd9Sstevel@tonic-gate * This routine is used to create flock structures for the lock manager 4097da6c28aaSamw * to use in a reclaim request. Since the lock was originated on this 40987c478bd9Sstevel@tonic-gate * host, it must be conforming to UNIX semantics, so no checking is 40997c478bd9Sstevel@tonic-gate * done to make sure it falls within the lower half of the 32-bit range. 41007c478bd9Sstevel@tonic-gate */ 41017c478bd9Sstevel@tonic-gate 41027c478bd9Sstevel@tonic-gate static void 41037c478bd9Sstevel@tonic-gate create_flock(lock_descriptor_t *lp, flock64_t *flp) 41047c478bd9Sstevel@tonic-gate { 41057c478bd9Sstevel@tonic-gate ASSERT(lp->l_end == MAX_U_OFFSET_T || lp->l_end <= MAXEND); 41067c478bd9Sstevel@tonic-gate ASSERT(lp->l_end >= lp->l_start); 41077c478bd9Sstevel@tonic-gate 41087c478bd9Sstevel@tonic-gate flp->l_type = lp->l_type; 41097c478bd9Sstevel@tonic-gate flp->l_whence = 0; 41107c478bd9Sstevel@tonic-gate flp->l_start = lp->l_start; 41117c478bd9Sstevel@tonic-gate flp->l_len = (lp->l_end == MAX_U_OFFSET_T) ? 0 : 41127c478bd9Sstevel@tonic-gate (lp->l_end - lp->l_start + 1); 41137c478bd9Sstevel@tonic-gate flp->l_sysid = lp->l_flock.l_sysid; 41147c478bd9Sstevel@tonic-gate flp->l_pid = lp->l_flock.l_pid; 41157c478bd9Sstevel@tonic-gate } 41167c478bd9Sstevel@tonic-gate 41177c478bd9Sstevel@tonic-gate /* 41187c478bd9Sstevel@tonic-gate * Convert flock_t data describing a lock range into unsigned long starting 41197c478bd9Sstevel@tonic-gate * and ending points, which are put into lock_request. Returns 0 or an 41207c478bd9Sstevel@tonic-gate * errno value. 41217c478bd9Sstevel@tonic-gate */ 41227c478bd9Sstevel@tonic-gate 41237c478bd9Sstevel@tonic-gate int 41247c478bd9Sstevel@tonic-gate flk_convert_lock_data(vnode_t *vp, flock64_t *flp, 41257c478bd9Sstevel@tonic-gate u_offset_t *start, u_offset_t *end, offset_t offset) 41267c478bd9Sstevel@tonic-gate { 41277c478bd9Sstevel@tonic-gate struct vattr vattr; 41287c478bd9Sstevel@tonic-gate int error; 41297c478bd9Sstevel@tonic-gate 41307c478bd9Sstevel@tonic-gate /* 41317c478bd9Sstevel@tonic-gate * Determine the starting point of the request 41327c478bd9Sstevel@tonic-gate */ 41337c478bd9Sstevel@tonic-gate switch (flp->l_whence) { 41347c478bd9Sstevel@tonic-gate case 0: /* SEEK_SET */ 41357c478bd9Sstevel@tonic-gate *start = (u_offset_t)flp->l_start; 41367c478bd9Sstevel@tonic-gate break; 41377c478bd9Sstevel@tonic-gate case 1: /* SEEK_CUR */ 41387c478bd9Sstevel@tonic-gate *start = (u_offset_t)(flp->l_start + offset); 41397c478bd9Sstevel@tonic-gate break; 41407c478bd9Sstevel@tonic-gate case 2: /* SEEK_END */ 41417c478bd9Sstevel@tonic-gate vattr.va_mask = AT_SIZE; 4142da6c28aaSamw if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) 41437c478bd9Sstevel@tonic-gate return (error); 41447c478bd9Sstevel@tonic-gate *start = (u_offset_t)(flp->l_start + vattr.va_size); 41457c478bd9Sstevel@tonic-gate break; 41467c478bd9Sstevel@tonic-gate default: 41477c478bd9Sstevel@tonic-gate return (EINVAL); 41487c478bd9Sstevel@tonic-gate } 41497c478bd9Sstevel@tonic-gate 41507c478bd9Sstevel@tonic-gate /* 41517c478bd9Sstevel@tonic-gate * Determine the range covered by the request. 41527c478bd9Sstevel@tonic-gate */ 41537c478bd9Sstevel@tonic-gate if (flp->l_len == 0) 41547c478bd9Sstevel@tonic-gate *end = MAX_U_OFFSET_T; 41557c478bd9Sstevel@tonic-gate else if ((offset_t)flp->l_len > 0) { 41567c478bd9Sstevel@tonic-gate *end = (u_offset_t)(*start + (flp->l_len - 1)); 41577c478bd9Sstevel@tonic-gate } else { 41587c478bd9Sstevel@tonic-gate /* 41597c478bd9Sstevel@tonic-gate * Negative length; why do we even allow this ? 41607c478bd9Sstevel@tonic-gate * Because this allows easy specification of 41617c478bd9Sstevel@tonic-gate * the last n bytes of the file. 41627c478bd9Sstevel@tonic-gate */ 41637c478bd9Sstevel@tonic-gate *end = *start; 41647c478bd9Sstevel@tonic-gate *start += (u_offset_t)flp->l_len; 41657c478bd9Sstevel@tonic-gate (*start)++; 41667c478bd9Sstevel@tonic-gate } 41677c478bd9Sstevel@tonic-gate return (0); 41687c478bd9Sstevel@tonic-gate } 41697c478bd9Sstevel@tonic-gate 41707c478bd9Sstevel@tonic-gate /* 41717c478bd9Sstevel@tonic-gate * Check the validity of lock data. This can used by the NFS 41727c478bd9Sstevel@tonic-gate * frlock routines to check data before contacting the server. The 41737c478bd9Sstevel@tonic-gate * server must support semantics that aren't as restrictive as 41747c478bd9Sstevel@tonic-gate * the UNIX API, so the NFS client is required to check. 41757c478bd9Sstevel@tonic-gate * The maximum is now passed in by the caller. 41767c478bd9Sstevel@tonic-gate */ 41777c478bd9Sstevel@tonic-gate 41787c478bd9Sstevel@tonic-gate int 41797c478bd9Sstevel@tonic-gate flk_check_lock_data(u_offset_t start, u_offset_t end, offset_t max) 41807c478bd9Sstevel@tonic-gate { 41817c478bd9Sstevel@tonic-gate /* 41827c478bd9Sstevel@tonic-gate * The end (length) for local locking should never be greater 41837c478bd9Sstevel@tonic-gate * than MAXEND. However, the representation for 41847c478bd9Sstevel@tonic-gate * the entire file is MAX_U_OFFSET_T. 41857c478bd9Sstevel@tonic-gate */ 41867c478bd9Sstevel@tonic-gate if ((start > max) || 41877c478bd9Sstevel@tonic-gate ((end > max) && (end != MAX_U_OFFSET_T))) { 41887c478bd9Sstevel@tonic-gate return (EINVAL); 41897c478bd9Sstevel@tonic-gate } 41907c478bd9Sstevel@tonic-gate if (start > end) { 41917c478bd9Sstevel@tonic-gate return (EINVAL); 41927c478bd9Sstevel@tonic-gate } 41937c478bd9Sstevel@tonic-gate return (0); 41947c478bd9Sstevel@tonic-gate } 41957c478bd9Sstevel@tonic-gate 41967c478bd9Sstevel@tonic-gate /* 41977c478bd9Sstevel@tonic-gate * Fill in request->l_flock with information about the lock blocking the 41987c478bd9Sstevel@tonic-gate * request. The complexity here is that lock manager requests are allowed 41997c478bd9Sstevel@tonic-gate * to see into the upper part of the 32-bit address range, whereas local 42007c478bd9Sstevel@tonic-gate * requests are only allowed to see signed values. 42017c478bd9Sstevel@tonic-gate * 42027c478bd9Sstevel@tonic-gate * What should be done when "blocker" is a lock manager lock that uses the 42037c478bd9Sstevel@tonic-gate * upper portion of the 32-bit range, but "request" is local? Since the 42047c478bd9Sstevel@tonic-gate * request has already been determined to have been blocked by the blocker, 42057c478bd9Sstevel@tonic-gate * at least some portion of "blocker" must be in the range of the request, 42067c478bd9Sstevel@tonic-gate * or the request extends to the end of file. For the first case, the 42077c478bd9Sstevel@tonic-gate * portion in the lower range is returned with the indication that it goes 42087c478bd9Sstevel@tonic-gate * "to EOF." For the second case, the last byte of the lower range is 42097c478bd9Sstevel@tonic-gate * returned with the indication that it goes "to EOF." 42107c478bd9Sstevel@tonic-gate */ 42117c478bd9Sstevel@tonic-gate 42127c478bd9Sstevel@tonic-gate static void 42137c478bd9Sstevel@tonic-gate report_blocker(lock_descriptor_t *blocker, lock_descriptor_t *request) 42147c478bd9Sstevel@tonic-gate { 42157c478bd9Sstevel@tonic-gate flock64_t *flrp; /* l_flock portion of request */ 42167c478bd9Sstevel@tonic-gate 42177c478bd9Sstevel@tonic-gate ASSERT(blocker != NULL); 42187c478bd9Sstevel@tonic-gate 42197c478bd9Sstevel@tonic-gate flrp = &request->l_flock; 42207c478bd9Sstevel@tonic-gate flrp->l_whence = 0; 42217c478bd9Sstevel@tonic-gate flrp->l_type = blocker->l_type; 42227c478bd9Sstevel@tonic-gate flrp->l_pid = blocker->l_flock.l_pid; 42237c478bd9Sstevel@tonic-gate flrp->l_sysid = blocker->l_flock.l_sysid; 42247a5aac98SJerry Jelinek request->l_ofd = blocker->l_ofd; 42257c478bd9Sstevel@tonic-gate 42267c478bd9Sstevel@tonic-gate if (IS_LOCKMGR(request)) { 42277c478bd9Sstevel@tonic-gate flrp->l_start = blocker->l_start; 42287c478bd9Sstevel@tonic-gate if (blocker->l_end == MAX_U_OFFSET_T) 42297c478bd9Sstevel@tonic-gate flrp->l_len = 0; 42307c478bd9Sstevel@tonic-gate else 42317c478bd9Sstevel@tonic-gate flrp->l_len = blocker->l_end - blocker->l_start + 1; 42327c478bd9Sstevel@tonic-gate } else { 42337c478bd9Sstevel@tonic-gate if (blocker->l_start > MAXEND) { 42347c478bd9Sstevel@tonic-gate flrp->l_start = MAXEND; 42357c478bd9Sstevel@tonic-gate flrp->l_len = 0; 42367c478bd9Sstevel@tonic-gate } else { 42377c478bd9Sstevel@tonic-gate flrp->l_start = blocker->l_start; 42387c478bd9Sstevel@tonic-gate if (blocker->l_end == MAX_U_OFFSET_T) 42397c478bd9Sstevel@tonic-gate flrp->l_len = 0; 42407c478bd9Sstevel@tonic-gate else 42417c478bd9Sstevel@tonic-gate flrp->l_len = blocker->l_end - 42427c478bd9Sstevel@tonic-gate blocker->l_start + 1; 42437c478bd9Sstevel@tonic-gate } 42447c478bd9Sstevel@tonic-gate } 42457c478bd9Sstevel@tonic-gate } 42467c478bd9Sstevel@tonic-gate 42477c478bd9Sstevel@tonic-gate /* 42487c478bd9Sstevel@tonic-gate * PSARC case 1997/292 42497c478bd9Sstevel@tonic-gate */ 42507c478bd9Sstevel@tonic-gate /* 42517c478bd9Sstevel@tonic-gate * This is the public routine exported by flock.h. 42527c478bd9Sstevel@tonic-gate */ 42537c478bd9Sstevel@tonic-gate void 42547c478bd9Sstevel@tonic-gate cl_flk_change_nlm_state_to_unknown(int nlmid) 42557c478bd9Sstevel@tonic-gate { 42567c478bd9Sstevel@tonic-gate /* 42577c478bd9Sstevel@tonic-gate * Check to see if node is booted as a cluster. If not, return. 42587c478bd9Sstevel@tonic-gate */ 42597c478bd9Sstevel@tonic-gate if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { 42607c478bd9Sstevel@tonic-gate return; 42617c478bd9Sstevel@tonic-gate } 42627c478bd9Sstevel@tonic-gate 42637c478bd9Sstevel@tonic-gate /* 42647c478bd9Sstevel@tonic-gate * See comment in cl_flk_set_nlm_status(). 42657c478bd9Sstevel@tonic-gate */ 42667c478bd9Sstevel@tonic-gate if (nlm_reg_status == NULL) { 42677c478bd9Sstevel@tonic-gate return; 42687c478bd9Sstevel@tonic-gate } 42697c478bd9Sstevel@tonic-gate 42707c478bd9Sstevel@tonic-gate /* 42717c478bd9Sstevel@tonic-gate * protect NLM registry state with a mutex. 42727c478bd9Sstevel@tonic-gate */ 42737c478bd9Sstevel@tonic-gate ASSERT(nlmid <= nlm_status_size && nlmid >= 0); 42747c478bd9Sstevel@tonic-gate mutex_enter(&nlm_reg_lock); 42757c478bd9Sstevel@tonic-gate FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, FLK_NLM_UNKNOWN); 42767c478bd9Sstevel@tonic-gate mutex_exit(&nlm_reg_lock); 42777c478bd9Sstevel@tonic-gate } 42787c478bd9Sstevel@tonic-gate 42797c478bd9Sstevel@tonic-gate /* 42807c478bd9Sstevel@tonic-gate * Return non-zero if the given I/O request conflicts with an active NBMAND 42817c478bd9Sstevel@tonic-gate * lock. 42827c478bd9Sstevel@tonic-gate * If svmand is non-zero, it means look at all active locks, not just NBMAND 42837c478bd9Sstevel@tonic-gate * locks. 42847c478bd9Sstevel@tonic-gate */ 42857c478bd9Sstevel@tonic-gate 42867c478bd9Sstevel@tonic-gate int 42877c478bd9Sstevel@tonic-gate nbl_lock_conflict(vnode_t *vp, nbl_op_t op, u_offset_t offset, 4288da6c28aaSamw ssize_t length, int svmand, caller_context_t *ct) 42897c478bd9Sstevel@tonic-gate { 42907c478bd9Sstevel@tonic-gate int conflict = 0; 42917c478bd9Sstevel@tonic-gate graph_t *gp; 42927c478bd9Sstevel@tonic-gate lock_descriptor_t *lock; 4293da6c28aaSamw pid_t pid; 4294da6c28aaSamw int sysid; 4295da6c28aaSamw 4296da6c28aaSamw if (ct == NULL) { 4297da6c28aaSamw pid = curproc->p_pid; 4298da6c28aaSamw sysid = 0; 4299da6c28aaSamw } else { 4300da6c28aaSamw pid = ct->cc_pid; 4301da6c28aaSamw sysid = ct->cc_sysid; 4302da6c28aaSamw } 43037c478bd9Sstevel@tonic-gate 43047c478bd9Sstevel@tonic-gate mutex_enter(&flock_lock); 43057c478bd9Sstevel@tonic-gate gp = lock_graph[HASH_INDEX(vp)]; 43067c478bd9Sstevel@tonic-gate mutex_exit(&flock_lock); 43077c478bd9Sstevel@tonic-gate if (gp == NULL) 43087c478bd9Sstevel@tonic-gate return (0); 43097c478bd9Sstevel@tonic-gate 43107c478bd9Sstevel@tonic-gate mutex_enter(&gp->gp_mutex); 43117c478bd9Sstevel@tonic-gate SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 43127c478bd9Sstevel@tonic-gate 43137c478bd9Sstevel@tonic-gate for (; lock && lock->l_vnode == vp; lock = lock->l_next) { 43147c478bd9Sstevel@tonic-gate if ((svmand || (lock->l_state & NBMAND_LOCK)) && 4315da6c28aaSamw (lock->l_flock.l_sysid != sysid || 4316da6c28aaSamw lock->l_flock.l_pid != pid) && 43177c478bd9Sstevel@tonic-gate lock_blocks_io(op, offset, length, 43187c478bd9Sstevel@tonic-gate lock->l_type, lock->l_start, lock->l_end)) { 43197c478bd9Sstevel@tonic-gate conflict = 1; 43207c478bd9Sstevel@tonic-gate break; 43217c478bd9Sstevel@tonic-gate } 43227c478bd9Sstevel@tonic-gate } 43237c478bd9Sstevel@tonic-gate mutex_exit(&gp->gp_mutex); 43247c478bd9Sstevel@tonic-gate 43257c478bd9Sstevel@tonic-gate return (conflict); 43267c478bd9Sstevel@tonic-gate } 43277c478bd9Sstevel@tonic-gate 43287c478bd9Sstevel@tonic-gate /* 43297c478bd9Sstevel@tonic-gate * Return non-zero if the given I/O request conflicts with the given lock. 43307c478bd9Sstevel@tonic-gate */ 43317c478bd9Sstevel@tonic-gate 43327c478bd9Sstevel@tonic-gate static int 43337c478bd9Sstevel@tonic-gate lock_blocks_io(nbl_op_t op, u_offset_t offset, ssize_t length, 43347c478bd9Sstevel@tonic-gate int lock_type, u_offset_t lock_start, u_offset_t lock_end) 43357c478bd9Sstevel@tonic-gate { 43367c478bd9Sstevel@tonic-gate ASSERT(op == NBL_READ || op == NBL_WRITE || op == NBL_READWRITE); 43377c478bd9Sstevel@tonic-gate ASSERT(lock_type == F_RDLCK || lock_type == F_WRLCK); 43387c478bd9Sstevel@tonic-gate 43397c478bd9Sstevel@tonic-gate if (op == NBL_READ && lock_type == F_RDLCK) 43407c478bd9Sstevel@tonic-gate return (0); 43417c478bd9Sstevel@tonic-gate 43427c478bd9Sstevel@tonic-gate if (offset <= lock_start && lock_start < offset + length) 43437c478bd9Sstevel@tonic-gate return (1); 43447c478bd9Sstevel@tonic-gate if (lock_start <= offset && offset <= lock_end) 43457c478bd9Sstevel@tonic-gate return (1); 43467c478bd9Sstevel@tonic-gate 43477c478bd9Sstevel@tonic-gate return (0); 43487c478bd9Sstevel@tonic-gate } 43497c478bd9Sstevel@tonic-gate 43507c478bd9Sstevel@tonic-gate #ifdef DEBUG 43517c478bd9Sstevel@tonic-gate static void 43527c478bd9Sstevel@tonic-gate check_active_locks(graph_t *gp) 43537c478bd9Sstevel@tonic-gate { 43547c478bd9Sstevel@tonic-gate lock_descriptor_t *lock, *lock1; 43557c478bd9Sstevel@tonic-gate edge_t *ep; 43567c478bd9Sstevel@tonic-gate 43577c478bd9Sstevel@tonic-gate for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp); 43587c478bd9Sstevel@tonic-gate lock = lock->l_next) { 43597c478bd9Sstevel@tonic-gate ASSERT(IS_ACTIVE(lock)); 43607c478bd9Sstevel@tonic-gate ASSERT(NOT_BLOCKED(lock)); 43617c478bd9Sstevel@tonic-gate ASSERT(!IS_BARRIER(lock)); 43627c478bd9Sstevel@tonic-gate 43637c478bd9Sstevel@tonic-gate ep = FIRST_IN(lock); 43647c478bd9Sstevel@tonic-gate 43657c478bd9Sstevel@tonic-gate while (ep != HEAD(lock)) { 43667c478bd9Sstevel@tonic-gate ASSERT(IS_SLEEPING(ep->from_vertex)); 43677c478bd9Sstevel@tonic-gate ASSERT(!NOT_BLOCKED(ep->from_vertex)); 43687c478bd9Sstevel@tonic-gate ep = NEXT_IN(ep); 43697c478bd9Sstevel@tonic-gate } 43707c478bd9Sstevel@tonic-gate 43717c478bd9Sstevel@tonic-gate for (lock1 = lock->l_next; lock1 != ACTIVE_HEAD(gp); 43727c478bd9Sstevel@tonic-gate lock1 = lock1->l_next) { 43737c478bd9Sstevel@tonic-gate if (lock1->l_vnode == lock->l_vnode) { 43747c478bd9Sstevel@tonic-gate if (BLOCKS(lock1, lock)) { 43757c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, 43767c478bd9Sstevel@tonic-gate "active lock %p blocks %p", 43777c478bd9Sstevel@tonic-gate (void *)lock1, (void *)lock); 43787c478bd9Sstevel@tonic-gate } else if (BLOCKS(lock, lock1)) { 43797c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, 43807c478bd9Sstevel@tonic-gate "active lock %p blocks %p", 43817c478bd9Sstevel@tonic-gate (void *)lock, (void *)lock1); 43827c478bd9Sstevel@tonic-gate } 43837c478bd9Sstevel@tonic-gate } 43847c478bd9Sstevel@tonic-gate } 43857c478bd9Sstevel@tonic-gate } 43867c478bd9Sstevel@tonic-gate } 43877c478bd9Sstevel@tonic-gate 43887c478bd9Sstevel@tonic-gate /* 43897c478bd9Sstevel@tonic-gate * Effect: This functions checks to see if the transition from 'old_state' to 43907c478bd9Sstevel@tonic-gate * 'new_state' is a valid one. It returns 0 if the transition is valid 43917c478bd9Sstevel@tonic-gate * and 1 if it is not. 43927c478bd9Sstevel@tonic-gate * For a map of valid transitions, see sys/flock_impl.h 43937c478bd9Sstevel@tonic-gate */ 43947c478bd9Sstevel@tonic-gate static int 43957c478bd9Sstevel@tonic-gate check_lock_transition(int old_state, int new_state) 43967c478bd9Sstevel@tonic-gate { 43977c478bd9Sstevel@tonic-gate switch (old_state) { 43987c478bd9Sstevel@tonic-gate case FLK_INITIAL_STATE: 43997c478bd9Sstevel@tonic-gate if ((new_state == FLK_START_STATE) || 44007c478bd9Sstevel@tonic-gate (new_state == FLK_SLEEPING_STATE) || 44017c478bd9Sstevel@tonic-gate (new_state == FLK_ACTIVE_STATE) || 44027c478bd9Sstevel@tonic-gate (new_state == FLK_DEAD_STATE)) { 44037c478bd9Sstevel@tonic-gate return (0); 44047c478bd9Sstevel@tonic-gate } else { 44057c478bd9Sstevel@tonic-gate return (1); 44067c478bd9Sstevel@tonic-gate } 44077c478bd9Sstevel@tonic-gate case FLK_START_STATE: 44087c478bd9Sstevel@tonic-gate if ((new_state == FLK_ACTIVE_STATE) || 44097c478bd9Sstevel@tonic-gate (new_state == FLK_DEAD_STATE)) { 44107c478bd9Sstevel@tonic-gate return (0); 44117c478bd9Sstevel@tonic-gate } else { 44127c478bd9Sstevel@tonic-gate return (1); 44137c478bd9Sstevel@tonic-gate } 44147c478bd9Sstevel@tonic-gate case FLK_ACTIVE_STATE: 44157c478bd9Sstevel@tonic-gate if (new_state == FLK_DEAD_STATE) { 44167c478bd9Sstevel@tonic-gate return (0); 44177c478bd9Sstevel@tonic-gate } else { 44187c478bd9Sstevel@tonic-gate return (1); 44197c478bd9Sstevel@tonic-gate } 44207c478bd9Sstevel@tonic-gate case FLK_SLEEPING_STATE: 44217c478bd9Sstevel@tonic-gate if ((new_state == FLK_GRANTED_STATE) || 44227c478bd9Sstevel@tonic-gate (new_state == FLK_INTERRUPTED_STATE) || 44237c478bd9Sstevel@tonic-gate (new_state == FLK_CANCELLED_STATE)) { 44247c478bd9Sstevel@tonic-gate return (0); 44257c478bd9Sstevel@tonic-gate } else { 44267c478bd9Sstevel@tonic-gate return (1); 44277c478bd9Sstevel@tonic-gate } 44287c478bd9Sstevel@tonic-gate case FLK_GRANTED_STATE: 44297c478bd9Sstevel@tonic-gate if ((new_state == FLK_START_STATE) || 44307c478bd9Sstevel@tonic-gate (new_state == FLK_INTERRUPTED_STATE) || 44317c478bd9Sstevel@tonic-gate (new_state == FLK_CANCELLED_STATE)) { 44327c478bd9Sstevel@tonic-gate return (0); 44337c478bd9Sstevel@tonic-gate } else { 44347c478bd9Sstevel@tonic-gate return (1); 44357c478bd9Sstevel@tonic-gate } 44367c478bd9Sstevel@tonic-gate case FLK_CANCELLED_STATE: 44377c478bd9Sstevel@tonic-gate if ((new_state == FLK_INTERRUPTED_STATE) || 44387c478bd9Sstevel@tonic-gate (new_state == FLK_DEAD_STATE)) { 44397c478bd9Sstevel@tonic-gate return (0); 44407c478bd9Sstevel@tonic-gate } else { 44417c478bd9Sstevel@tonic-gate return (1); 44427c478bd9Sstevel@tonic-gate } 44437c478bd9Sstevel@tonic-gate case FLK_INTERRUPTED_STATE: 44447c478bd9Sstevel@tonic-gate if (new_state == FLK_DEAD_STATE) { 44457c478bd9Sstevel@tonic-gate return (0); 44467c478bd9Sstevel@tonic-gate } else { 44477c478bd9Sstevel@tonic-gate return (1); 44487c478bd9Sstevel@tonic-gate } 44497c478bd9Sstevel@tonic-gate case FLK_DEAD_STATE: 44507c478bd9Sstevel@tonic-gate /* May be set more than once */ 44517c478bd9Sstevel@tonic-gate if (new_state == FLK_DEAD_STATE) { 44527c478bd9Sstevel@tonic-gate return (0); 44537c478bd9Sstevel@tonic-gate } else { 44547c478bd9Sstevel@tonic-gate return (1); 44557c478bd9Sstevel@tonic-gate } 44567c478bd9Sstevel@tonic-gate default: 44577c478bd9Sstevel@tonic-gate return (1); 44587c478bd9Sstevel@tonic-gate } 44597c478bd9Sstevel@tonic-gate } 44607c478bd9Sstevel@tonic-gate 44617c478bd9Sstevel@tonic-gate static void 44627c478bd9Sstevel@tonic-gate check_sleeping_locks(graph_t *gp) 44637c478bd9Sstevel@tonic-gate { 44647c478bd9Sstevel@tonic-gate lock_descriptor_t *lock1, *lock2; 44657c478bd9Sstevel@tonic-gate edge_t *ep; 44667c478bd9Sstevel@tonic-gate for (lock1 = SLEEPING_HEAD(gp)->l_next; lock1 != SLEEPING_HEAD(gp); 44677c478bd9Sstevel@tonic-gate lock1 = lock1->l_next) { 44687c478bd9Sstevel@tonic-gate ASSERT(!IS_BARRIER(lock1)); 44697c478bd9Sstevel@tonic-gate for (lock2 = lock1->l_next; lock2 != SLEEPING_HEAD(gp); 44707c478bd9Sstevel@tonic-gate lock2 = lock2->l_next) { 44717c478bd9Sstevel@tonic-gate if (lock1->l_vnode == lock2->l_vnode) { 44727c478bd9Sstevel@tonic-gate if (BLOCKS(lock2, lock1)) { 44737c478bd9Sstevel@tonic-gate ASSERT(!IS_GRANTED(lock1)); 44747c478bd9Sstevel@tonic-gate ASSERT(!NOT_BLOCKED(lock1)); 44757c478bd9Sstevel@tonic-gate path(lock1, lock2); 44767c478bd9Sstevel@tonic-gate } 44777c478bd9Sstevel@tonic-gate } 44787c478bd9Sstevel@tonic-gate } 44797c478bd9Sstevel@tonic-gate 44807c478bd9Sstevel@tonic-gate for (lock2 = ACTIVE_HEAD(gp)->l_next; lock2 != ACTIVE_HEAD(gp); 44817c478bd9Sstevel@tonic-gate lock2 = lock2->l_next) { 44827c478bd9Sstevel@tonic-gate ASSERT(!IS_BARRIER(lock1)); 44837c478bd9Sstevel@tonic-gate if (lock1->l_vnode == lock2->l_vnode) { 44847c478bd9Sstevel@tonic-gate if (BLOCKS(lock2, lock1)) { 44857c478bd9Sstevel@tonic-gate ASSERT(!IS_GRANTED(lock1)); 44867c478bd9Sstevel@tonic-gate ASSERT(!NOT_BLOCKED(lock1)); 44877c478bd9Sstevel@tonic-gate path(lock1, lock2); 44887c478bd9Sstevel@tonic-gate } 44897c478bd9Sstevel@tonic-gate } 44907c478bd9Sstevel@tonic-gate } 44917c478bd9Sstevel@tonic-gate ep = FIRST_ADJ(lock1); 44927c478bd9Sstevel@tonic-gate while (ep != HEAD(lock1)) { 44937c478bd9Sstevel@tonic-gate ASSERT(BLOCKS(ep->to_vertex, lock1)); 44947c478bd9Sstevel@tonic-gate ep = NEXT_ADJ(ep); 44957c478bd9Sstevel@tonic-gate } 44967c478bd9Sstevel@tonic-gate } 44977c478bd9Sstevel@tonic-gate } 44987c478bd9Sstevel@tonic-gate 44997c478bd9Sstevel@tonic-gate static int 45007c478bd9Sstevel@tonic-gate level_two_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2, int no_path) 45017c478bd9Sstevel@tonic-gate { 45027c478bd9Sstevel@tonic-gate edge_t *ep; 45037c478bd9Sstevel@tonic-gate lock_descriptor_t *vertex; 45047c478bd9Sstevel@tonic-gate lock_descriptor_t *vertex_stack; 45057c478bd9Sstevel@tonic-gate 45067c478bd9Sstevel@tonic-gate STACK_INIT(vertex_stack); 45077c478bd9Sstevel@tonic-gate 45087c478bd9Sstevel@tonic-gate flk_graph_uncolor(lock1->l_graph); 45097c478bd9Sstevel@tonic-gate ep = FIRST_ADJ(lock1); 45107c478bd9Sstevel@tonic-gate ASSERT(ep != HEAD(lock1)); 45117c478bd9Sstevel@tonic-gate while (ep != HEAD(lock1)) { 45127c478bd9Sstevel@tonic-gate if (no_path) 45137c478bd9Sstevel@tonic-gate ASSERT(ep->to_vertex != lock2); 45147c478bd9Sstevel@tonic-gate STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack); 45157c478bd9Sstevel@tonic-gate COLOR(ep->to_vertex); 45167c478bd9Sstevel@tonic-gate ep = NEXT_ADJ(ep); 45177c478bd9Sstevel@tonic-gate } 45187c478bd9Sstevel@tonic-gate 45197c478bd9Sstevel@tonic-gate while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 45207c478bd9Sstevel@tonic-gate STACK_POP(vertex_stack, l_dstack); 45217c478bd9Sstevel@tonic-gate for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex); 45227c478bd9Sstevel@tonic-gate ep = NEXT_ADJ(ep)) { 45237c478bd9Sstevel@tonic-gate if (COLORED(ep->to_vertex)) 45247c478bd9Sstevel@tonic-gate continue; 45257c478bd9Sstevel@tonic-gate COLOR(ep->to_vertex); 45267c478bd9Sstevel@tonic-gate if (ep->to_vertex == lock2) 45277c478bd9Sstevel@tonic-gate return (1); 45287c478bd9Sstevel@tonic-gate 45297c478bd9Sstevel@tonic-gate STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack); 45307c478bd9Sstevel@tonic-gate } 45317c478bd9Sstevel@tonic-gate } 45327c478bd9Sstevel@tonic-gate return (0); 45337c478bd9Sstevel@tonic-gate } 45347c478bd9Sstevel@tonic-gate 45357c478bd9Sstevel@tonic-gate static void 45367c478bd9Sstevel@tonic-gate check_owner_locks(graph_t *gp, pid_t pid, int sysid, vnode_t *vp) 45377c478bd9Sstevel@tonic-gate { 45387c478bd9Sstevel@tonic-gate lock_descriptor_t *lock; 45397c478bd9Sstevel@tonic-gate 45407a5aac98SJerry Jelinek /* Ignore OFD style locks since they're not process-wide. */ 45417a5aac98SJerry Jelinek if (pid == 0) 45427a5aac98SJerry Jelinek return; 45437a5aac98SJerry Jelinek 45447c478bd9Sstevel@tonic-gate SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 45457c478bd9Sstevel@tonic-gate 45467c478bd9Sstevel@tonic-gate if (lock) { 45477c478bd9Sstevel@tonic-gate while (lock != ACTIVE_HEAD(gp) && (lock->l_vnode == vp)) { 45487c478bd9Sstevel@tonic-gate if (lock->l_flock.l_pid == pid && 45497c478bd9Sstevel@tonic-gate lock->l_flock.l_sysid == sysid) 45507c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, 45517c478bd9Sstevel@tonic-gate "owner pid %d's lock %p in active queue", 45527c478bd9Sstevel@tonic-gate pid, (void *)lock); 45537c478bd9Sstevel@tonic-gate lock = lock->l_next; 45547c478bd9Sstevel@tonic-gate } 45557c478bd9Sstevel@tonic-gate } 45567c478bd9Sstevel@tonic-gate SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 45577c478bd9Sstevel@tonic-gate 45587c478bd9Sstevel@tonic-gate if (lock) { 45597c478bd9Sstevel@tonic-gate while (lock != SLEEPING_HEAD(gp) && (lock->l_vnode == vp)) { 45607c478bd9Sstevel@tonic-gate if (lock->l_flock.l_pid == pid && 45617c478bd9Sstevel@tonic-gate lock->l_flock.l_sysid == sysid) 45627c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, 45637c478bd9Sstevel@tonic-gate "owner pid %d's lock %p in sleep queue", 45647c478bd9Sstevel@tonic-gate pid, (void *)lock); 45657c478bd9Sstevel@tonic-gate lock = lock->l_next; 45667c478bd9Sstevel@tonic-gate } 45677c478bd9Sstevel@tonic-gate } 45687c478bd9Sstevel@tonic-gate } 45697c478bd9Sstevel@tonic-gate 45707c478bd9Sstevel@tonic-gate static int 45717c478bd9Sstevel@tonic-gate level_one_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2) 45727c478bd9Sstevel@tonic-gate { 45737c478bd9Sstevel@tonic-gate edge_t *ep = FIRST_ADJ(lock1); 45747c478bd9Sstevel@tonic-gate 45757c478bd9Sstevel@tonic-gate while (ep != HEAD(lock1)) { 45767c478bd9Sstevel@tonic-gate if (ep->to_vertex == lock2) 45777c478bd9Sstevel@tonic-gate return (1); 45787c478bd9Sstevel@tonic-gate else 45797c478bd9Sstevel@tonic-gate ep = NEXT_ADJ(ep); 45807c478bd9Sstevel@tonic-gate } 45817c478bd9Sstevel@tonic-gate return (0); 45827c478bd9Sstevel@tonic-gate } 45837c478bd9Sstevel@tonic-gate 45847c478bd9Sstevel@tonic-gate static int 45857c478bd9Sstevel@tonic-gate no_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2) 45867c478bd9Sstevel@tonic-gate { 45877c478bd9Sstevel@tonic-gate return (!level_two_path(lock1, lock2, 1)); 45887c478bd9Sstevel@tonic-gate } 45897c478bd9Sstevel@tonic-gate 45907c478bd9Sstevel@tonic-gate static void 45917c478bd9Sstevel@tonic-gate path(lock_descriptor_t *lock1, lock_descriptor_t *lock2) 45927c478bd9Sstevel@tonic-gate { 45937c478bd9Sstevel@tonic-gate if (level_one_path(lock1, lock2)) { 45947c478bd9Sstevel@tonic-gate if (level_two_path(lock1, lock2, 0) != 0) { 45957c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 45967c478bd9Sstevel@tonic-gate "one edge one path from lock1 %p lock2 %p", 45977c478bd9Sstevel@tonic-gate (void *)lock1, (void *)lock2); 45987c478bd9Sstevel@tonic-gate } 45997c478bd9Sstevel@tonic-gate } else if (no_path(lock1, lock2)) { 46007c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, 46017c478bd9Sstevel@tonic-gate "No path from lock1 %p to lock2 %p", 46027c478bd9Sstevel@tonic-gate (void *)lock1, (void *)lock2); 46037c478bd9Sstevel@tonic-gate } 46047c478bd9Sstevel@tonic-gate } 46057c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 4606