17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 550a83466Sjwahlig * Common Development and Distribution License (the "License"). 650a83466Sjwahlig * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22a19609f8Sjv227347 * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved. 237c478bd9Sstevel@tonic-gate */ 247c478bd9Sstevel@tonic-gate 257c478bd9Sstevel@tonic-gate /* 267c478bd9Sstevel@tonic-gate * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 277c478bd9Sstevel@tonic-gate * All Rights Reserved 287c478bd9Sstevel@tonic-gate */ 297c478bd9Sstevel@tonic-gate 307c478bd9Sstevel@tonic-gate #include <sys/param.h> 317c478bd9Sstevel@tonic-gate #include <sys/types.h> 327c478bd9Sstevel@tonic-gate #include <sys/systm.h> 337c478bd9Sstevel@tonic-gate #include <sys/thread.h> 347c478bd9Sstevel@tonic-gate #include <sys/t_lock.h> 357c478bd9Sstevel@tonic-gate #include <sys/time.h> 367c478bd9Sstevel@tonic-gate #include <sys/vnode.h> 377c478bd9Sstevel@tonic-gate #include <sys/vfs.h> 387c478bd9Sstevel@tonic-gate #include <sys/errno.h> 397c478bd9Sstevel@tonic-gate #include <sys/buf.h> 407c478bd9Sstevel@tonic-gate #include <sys/stat.h> 417c478bd9Sstevel@tonic-gate #include <sys/cred.h> 427c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 437c478bd9Sstevel@tonic-gate #include <sys/debug.h> 447c478bd9Sstevel@tonic-gate #include <sys/dnlc.h> 457c478bd9Sstevel@tonic-gate #include <sys/vmsystm.h> 467c478bd9Sstevel@tonic-gate #include <sys/flock.h> 477c478bd9Sstevel@tonic-gate #include <sys/share.h> 487c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 497c478bd9Sstevel@tonic-gate #include <sys/tiuser.h> 507c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 517c478bd9Sstevel@tonic-gate #include <sys/callb.h> 527c478bd9Sstevel@tonic-gate #include <sys/acl.h> 537c478bd9Sstevel@tonic-gate #include <sys/kstat.h> 547c478bd9Sstevel@tonic-gate #include <sys/signal.h> 557c478bd9Sstevel@tonic-gate #include <sys/disp.h> 567c478bd9Sstevel@tonic-gate #include <sys/atomic.h> 577c478bd9Sstevel@tonic-gate #include <sys/list.h> 587c478bd9Sstevel@tonic-gate #include <sys/sdt.h> 597c478bd9Sstevel@tonic-gate 607c478bd9Sstevel@tonic-gate #include <rpc/types.h> 617c478bd9Sstevel@tonic-gate #include <rpc/xdr.h> 627c478bd9Sstevel@tonic-gate #include <rpc/auth.h> 637c478bd9Sstevel@tonic-gate #include <rpc/clnt.h> 647c478bd9Sstevel@tonic-gate 657c478bd9Sstevel@tonic-gate #include <nfs/nfs.h> 667c478bd9Sstevel@tonic-gate #include <nfs/nfs_clnt.h> 677c478bd9Sstevel@tonic-gate #include <nfs/nfs_acl.h> 687c478bd9Sstevel@tonic-gate 697c478bd9Sstevel@tonic-gate #include <nfs/nfs4.h> 707c478bd9Sstevel@tonic-gate #include <nfs/rnode4.h> 717c478bd9Sstevel@tonic-gate #include <nfs/nfs4_clnt.h> 727c478bd9Sstevel@tonic-gate 737c478bd9Sstevel@tonic-gate #include <vm/hat.h> 747c478bd9Sstevel@tonic-gate #include <vm/as.h> 757c478bd9Sstevel@tonic-gate #include <vm/page.h> 767c478bd9Sstevel@tonic-gate #include <vm/pvn.h> 777c478bd9Sstevel@tonic-gate #include <vm/seg.h> 787c478bd9Sstevel@tonic-gate #include <vm/seg_map.h> 797c478bd9Sstevel@tonic-gate #include <vm/seg_vn.h> 807c478bd9Sstevel@tonic-gate 817c478bd9Sstevel@tonic-gate #include <sys/ddi.h> 827c478bd9Sstevel@tonic-gate 837c478bd9Sstevel@tonic-gate /* 847c478bd9Sstevel@tonic-gate * Arguments to page-flush thread. 857c478bd9Sstevel@tonic-gate */ 867c478bd9Sstevel@tonic-gate typedef struct { 877c478bd9Sstevel@tonic-gate vnode_t *vp; 887c478bd9Sstevel@tonic-gate cred_t *cr; 897c478bd9Sstevel@tonic-gate } pgflush_t; 907c478bd9Sstevel@tonic-gate 917c478bd9Sstevel@tonic-gate #ifdef DEBUG 927c478bd9Sstevel@tonic-gate int nfs4_client_lease_debug; 937c478bd9Sstevel@tonic-gate int nfs4_sharedfh_debug; 947c478bd9Sstevel@tonic-gate int nfs4_fname_debug; 957c478bd9Sstevel@tonic-gate 967c478bd9Sstevel@tonic-gate /* temporary: panic if v_type is inconsistent with r_attr va_type */ 977c478bd9Sstevel@tonic-gate int nfs4_vtype_debug; 987c478bd9Sstevel@tonic-gate 997c478bd9Sstevel@tonic-gate uint_t nfs4_tsd_key; 1007c478bd9Sstevel@tonic-gate #endif 1017c478bd9Sstevel@tonic-gate 1027c478bd9Sstevel@tonic-gate static time_t nfs4_client_resumed = 0; 1037c478bd9Sstevel@tonic-gate static callb_id_t cid = 0; 1047c478bd9Sstevel@tonic-gate 1057c478bd9Sstevel@tonic-gate static int nfs4renew(nfs4_server_t *); 1067c478bd9Sstevel@tonic-gate static void nfs4_attrcache_va(vnode_t *, nfs4_ga_res_t *, int); 1077c478bd9Sstevel@tonic-gate static void nfs4_pgflush_thread(pgflush_t *); 1087c478bd9Sstevel@tonic-gate 1097c478bd9Sstevel@tonic-gate static boolean_t nfs4_client_cpr_callb(void *, int); 1107c478bd9Sstevel@tonic-gate 1117c478bd9Sstevel@tonic-gate struct mi4_globals { 1127c478bd9Sstevel@tonic-gate kmutex_t mig_lock; /* lock protecting mig_list */ 1137c478bd9Sstevel@tonic-gate list_t mig_list; /* list of NFS v4 mounts in zone */ 1147c478bd9Sstevel@tonic-gate boolean_t mig_destructor_called; 1157c478bd9Sstevel@tonic-gate }; 1167c478bd9Sstevel@tonic-gate 1177c478bd9Sstevel@tonic-gate static zone_key_t mi4_list_key; 1187c478bd9Sstevel@tonic-gate 1197c478bd9Sstevel@tonic-gate /* 1207c478bd9Sstevel@tonic-gate * Attributes caching: 1217c478bd9Sstevel@tonic-gate * 1227c478bd9Sstevel@tonic-gate * Attributes are cached in the rnode in struct vattr form. 1237c478bd9Sstevel@tonic-gate * There is a time associated with the cached attributes (r_time_attr_inval) 1247c478bd9Sstevel@tonic-gate * which tells whether the attributes are valid. The time is initialized 1257c478bd9Sstevel@tonic-gate * to the difference between current time and the modify time of the vnode 1267c478bd9Sstevel@tonic-gate * when new attributes are cached. This allows the attributes for 1277c478bd9Sstevel@tonic-gate * files that have changed recently to be timed out sooner than for files 1287c478bd9Sstevel@tonic-gate * that have not changed for a long time. There are minimum and maximum 1297c478bd9Sstevel@tonic-gate * timeout values that can be set per mount point. 1307c478bd9Sstevel@tonic-gate */ 1317c478bd9Sstevel@tonic-gate 1327c478bd9Sstevel@tonic-gate /* 1337c478bd9Sstevel@tonic-gate * If a cache purge is in progress, wait for it to finish. 1347c478bd9Sstevel@tonic-gate * 1357c478bd9Sstevel@tonic-gate * The current thread must not be in the middle of an 1367c478bd9Sstevel@tonic-gate * nfs4_start_op/nfs4_end_op region. Otherwise, there could be a deadlock 1377c478bd9Sstevel@tonic-gate * between this thread, a recovery thread, and the page flush thread. 1387c478bd9Sstevel@tonic-gate */ 1397c478bd9Sstevel@tonic-gate int 1407c478bd9Sstevel@tonic-gate nfs4_waitfor_purge_complete(vnode_t *vp) 1417c478bd9Sstevel@tonic-gate { 1427c478bd9Sstevel@tonic-gate rnode4_t *rp; 1437c478bd9Sstevel@tonic-gate k_sigset_t smask; 1447c478bd9Sstevel@tonic-gate 1457c478bd9Sstevel@tonic-gate rp = VTOR4(vp); 1467c478bd9Sstevel@tonic-gate if ((rp->r_serial != NULL && rp->r_serial != curthread) || 1477c478bd9Sstevel@tonic-gate ((rp->r_flags & R4PGFLUSH) && rp->r_pgflush != curthread)) { 1487c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 1497c478bd9Sstevel@tonic-gate sigintr(&smask, VTOMI4(vp)->mi_flags & MI4_INT); 1507c478bd9Sstevel@tonic-gate while ((rp->r_serial != NULL && rp->r_serial != curthread) || 1517c478bd9Sstevel@tonic-gate ((rp->r_flags & R4PGFLUSH) && 1527c478bd9Sstevel@tonic-gate rp->r_pgflush != curthread)) { 1537c478bd9Sstevel@tonic-gate if (!cv_wait_sig(&rp->r_cv, &rp->r_statelock)) { 1547c478bd9Sstevel@tonic-gate sigunintr(&smask); 1557c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 1567c478bd9Sstevel@tonic-gate return (EINTR); 1577c478bd9Sstevel@tonic-gate } 1587c478bd9Sstevel@tonic-gate } 1597c478bd9Sstevel@tonic-gate sigunintr(&smask); 1607c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 1617c478bd9Sstevel@tonic-gate } 1627c478bd9Sstevel@tonic-gate return (0); 1637c478bd9Sstevel@tonic-gate } 1647c478bd9Sstevel@tonic-gate 1657c478bd9Sstevel@tonic-gate /* 1667c478bd9Sstevel@tonic-gate * Validate caches by checking cached attributes. If they have timed out, 1677c478bd9Sstevel@tonic-gate * then get new attributes from the server. As a side effect, cache 1687c478bd9Sstevel@tonic-gate * invalidation is done if the attributes have changed. 1697c478bd9Sstevel@tonic-gate * 1707c478bd9Sstevel@tonic-gate * If the attributes have not timed out and if there is a cache 1717c478bd9Sstevel@tonic-gate * invalidation being done by some other thread, then wait until that 1727c478bd9Sstevel@tonic-gate * thread has completed the cache invalidation. 1737c478bd9Sstevel@tonic-gate */ 1747c478bd9Sstevel@tonic-gate int 1757c478bd9Sstevel@tonic-gate nfs4_validate_caches(vnode_t *vp, cred_t *cr) 1767c478bd9Sstevel@tonic-gate { 1777c478bd9Sstevel@tonic-gate int error; 1787c478bd9Sstevel@tonic-gate nfs4_ga_res_t gar; 1797c478bd9Sstevel@tonic-gate 1807c478bd9Sstevel@tonic-gate if (ATTRCACHE4_VALID(vp)) { 1817c478bd9Sstevel@tonic-gate error = nfs4_waitfor_purge_complete(vp); 1827c478bd9Sstevel@tonic-gate if (error) 1837c478bd9Sstevel@tonic-gate return (error); 1847c478bd9Sstevel@tonic-gate return (0); 1857c478bd9Sstevel@tonic-gate } 1867c478bd9Sstevel@tonic-gate 1877c478bd9Sstevel@tonic-gate return (nfs4_getattr_otw(vp, &gar, cr, 0)); 1887c478bd9Sstevel@tonic-gate } 1897c478bd9Sstevel@tonic-gate 1907c478bd9Sstevel@tonic-gate /* 1917c478bd9Sstevel@tonic-gate * Fill in attribute from the cache. 1927c478bd9Sstevel@tonic-gate * If valid, then return 0 to indicate that no error occurred, 1937c478bd9Sstevel@tonic-gate * otherwise return 1 to indicate that an error occurred. 1947c478bd9Sstevel@tonic-gate */ 1957c478bd9Sstevel@tonic-gate static int 1967c478bd9Sstevel@tonic-gate nfs4_getattr_cache(vnode_t *vp, struct vattr *vap) 1977c478bd9Sstevel@tonic-gate { 1987c478bd9Sstevel@tonic-gate rnode4_t *rp; 1997c478bd9Sstevel@tonic-gate 2007c478bd9Sstevel@tonic-gate rp = VTOR4(vp); 2017c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 2027c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statev4_lock); 2037c478bd9Sstevel@tonic-gate if (ATTRCACHE4_VALID(vp)) { 2047c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statev4_lock); 2057c478bd9Sstevel@tonic-gate /* 2067c478bd9Sstevel@tonic-gate * Cached attributes are valid 2077c478bd9Sstevel@tonic-gate */ 2087c478bd9Sstevel@tonic-gate *vap = rp->r_attr; 2097c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 2107c478bd9Sstevel@tonic-gate return (0); 2117c478bd9Sstevel@tonic-gate } 2127c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statev4_lock); 2137c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 2147c478bd9Sstevel@tonic-gate return (1); 2157c478bd9Sstevel@tonic-gate } 2167c478bd9Sstevel@tonic-gate 2177c478bd9Sstevel@tonic-gate 2187c478bd9Sstevel@tonic-gate /* 2197c478bd9Sstevel@tonic-gate * If returned error is ESTALE flush all caches. The nfs4_purge_caches() 2207c478bd9Sstevel@tonic-gate * call is synchronous because all the pages were invalidated by the 2217c478bd9Sstevel@tonic-gate * nfs4_invalidate_pages() call. 2227c478bd9Sstevel@tonic-gate */ 2237c478bd9Sstevel@tonic-gate void 2247c478bd9Sstevel@tonic-gate nfs4_purge_stale_fh(int errno, vnode_t *vp, cred_t *cr) 2257c478bd9Sstevel@tonic-gate { 2267c478bd9Sstevel@tonic-gate struct rnode4 *rp = VTOR4(vp); 2277c478bd9Sstevel@tonic-gate 2287c478bd9Sstevel@tonic-gate /* Ensure that the ..._end_op() call has been done */ 2297c478bd9Sstevel@tonic-gate ASSERT(tsd_get(nfs4_tsd_key) == NULL); 2307c478bd9Sstevel@tonic-gate 2317c478bd9Sstevel@tonic-gate if (errno != ESTALE) 2327c478bd9Sstevel@tonic-gate return; 2337c478bd9Sstevel@tonic-gate 2347c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 2357c478bd9Sstevel@tonic-gate rp->r_flags |= R4STALE; 2367c478bd9Sstevel@tonic-gate if (!rp->r_error) 2377c478bd9Sstevel@tonic-gate rp->r_error = errno; 2387c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 2397c478bd9Sstevel@tonic-gate if (nfs4_has_pages(vp)) 2407c478bd9Sstevel@tonic-gate nfs4_invalidate_pages(vp, (u_offset_t)0, cr); 2417c478bd9Sstevel@tonic-gate nfs4_purge_caches(vp, NFS4_PURGE_DNLC, cr, FALSE); 2427c478bd9Sstevel@tonic-gate } 2437c478bd9Sstevel@tonic-gate 2447c478bd9Sstevel@tonic-gate /* 2457c478bd9Sstevel@tonic-gate * Purge all of the various NFS `data' caches. If "asyncpg" is TRUE, the 2467c478bd9Sstevel@tonic-gate * page purge is done asynchronously. 2477c478bd9Sstevel@tonic-gate */ 2487c478bd9Sstevel@tonic-gate void 2497c478bd9Sstevel@tonic-gate nfs4_purge_caches(vnode_t *vp, int purge_dnlc, cred_t *cr, int asyncpg) 2507c478bd9Sstevel@tonic-gate { 2517c478bd9Sstevel@tonic-gate rnode4_t *rp; 2527c478bd9Sstevel@tonic-gate char *contents; 2537c478bd9Sstevel@tonic-gate vnode_t *xattr; 2547c478bd9Sstevel@tonic-gate int size; 2557c478bd9Sstevel@tonic-gate int pgflush; /* are we the page flush thread? */ 2567c478bd9Sstevel@tonic-gate 2577c478bd9Sstevel@tonic-gate /* 2587c478bd9Sstevel@tonic-gate * Purge the DNLC for any entries which refer to this file. 2597c478bd9Sstevel@tonic-gate */ 2607c478bd9Sstevel@tonic-gate if (vp->v_count > 1 && 2617c478bd9Sstevel@tonic-gate (vp->v_type == VDIR || purge_dnlc == NFS4_PURGE_DNLC)) 2627c478bd9Sstevel@tonic-gate dnlc_purge_vp(vp); 2637c478bd9Sstevel@tonic-gate 2647c478bd9Sstevel@tonic-gate /* 2657c478bd9Sstevel@tonic-gate * Clear any readdir state bits and purge the readlink response cache. 2667c478bd9Sstevel@tonic-gate */ 2677c478bd9Sstevel@tonic-gate rp = VTOR4(vp); 2687c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 2697c478bd9Sstevel@tonic-gate rp->r_flags &= ~R4LOOKUP; 2707c478bd9Sstevel@tonic-gate contents = rp->r_symlink.contents; 2717c478bd9Sstevel@tonic-gate size = rp->r_symlink.size; 2727c478bd9Sstevel@tonic-gate rp->r_symlink.contents = NULL; 2737c478bd9Sstevel@tonic-gate 2747c478bd9Sstevel@tonic-gate xattr = rp->r_xattr_dir; 2757c478bd9Sstevel@tonic-gate rp->r_xattr_dir = NULL; 2767c478bd9Sstevel@tonic-gate 2777c478bd9Sstevel@tonic-gate /* 2787c478bd9Sstevel@tonic-gate * Purge pathconf cache too. 2797c478bd9Sstevel@tonic-gate */ 2807c478bd9Sstevel@tonic-gate rp->r_pathconf.pc4_xattr_valid = 0; 2817c478bd9Sstevel@tonic-gate rp->r_pathconf.pc4_cache_valid = 0; 2827c478bd9Sstevel@tonic-gate 2837c478bd9Sstevel@tonic-gate pgflush = (curthread == rp->r_pgflush); 2847c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 2857c478bd9Sstevel@tonic-gate 2867c478bd9Sstevel@tonic-gate if (contents != NULL) { 2877c478bd9Sstevel@tonic-gate 2887c478bd9Sstevel@tonic-gate kmem_free((void *)contents, size); 2897c478bd9Sstevel@tonic-gate } 2907c478bd9Sstevel@tonic-gate 2917c478bd9Sstevel@tonic-gate if (xattr != NULL) 2927c478bd9Sstevel@tonic-gate VN_RELE(xattr); 2937c478bd9Sstevel@tonic-gate 2947c478bd9Sstevel@tonic-gate /* 2957c478bd9Sstevel@tonic-gate * Flush the page cache. If the current thread is the page flush 2967c478bd9Sstevel@tonic-gate * thread, don't initiate a new page flush. There's no need for 2977c478bd9Sstevel@tonic-gate * it, and doing it correctly is hard. 2987c478bd9Sstevel@tonic-gate */ 2997c478bd9Sstevel@tonic-gate if (nfs4_has_pages(vp) && !pgflush) { 3007c478bd9Sstevel@tonic-gate if (!asyncpg) { 3017c478bd9Sstevel@tonic-gate (void) nfs4_waitfor_purge_complete(vp); 302d55e25c3SPavel Filipensky nfs4_flush_pages(vp, cr); 3037c478bd9Sstevel@tonic-gate } else { 3047c478bd9Sstevel@tonic-gate pgflush_t *args; 3057c478bd9Sstevel@tonic-gate 3067c478bd9Sstevel@tonic-gate /* 3077c478bd9Sstevel@tonic-gate * We don't hold r_statelock while creating the 3087c478bd9Sstevel@tonic-gate * thread, in case the call blocks. So we use a 3097c478bd9Sstevel@tonic-gate * flag to indicate that a page flush thread is 3107c478bd9Sstevel@tonic-gate * active. 3117c478bd9Sstevel@tonic-gate */ 3127c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 3137c478bd9Sstevel@tonic-gate if (rp->r_flags & R4PGFLUSH) { 3147c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 3157c478bd9Sstevel@tonic-gate } else { 3167c478bd9Sstevel@tonic-gate rp->r_flags |= R4PGFLUSH; 3177c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 3187c478bd9Sstevel@tonic-gate 3197c478bd9Sstevel@tonic-gate args = kmem_alloc(sizeof (pgflush_t), 3207c478bd9Sstevel@tonic-gate KM_SLEEP); 3217c478bd9Sstevel@tonic-gate args->vp = vp; 3227c478bd9Sstevel@tonic-gate VN_HOLD(args->vp); 3237c478bd9Sstevel@tonic-gate args->cr = cr; 3247c478bd9Sstevel@tonic-gate crhold(args->cr); 3257c478bd9Sstevel@tonic-gate (void) zthread_create(NULL, 0, 3267c478bd9Sstevel@tonic-gate nfs4_pgflush_thread, args, 0, 3277c478bd9Sstevel@tonic-gate minclsyspri); 3287c478bd9Sstevel@tonic-gate } 3297c478bd9Sstevel@tonic-gate } 3307c478bd9Sstevel@tonic-gate } 3317c478bd9Sstevel@tonic-gate 3327c478bd9Sstevel@tonic-gate /* 3337c478bd9Sstevel@tonic-gate * Flush the readdir response cache. 3347c478bd9Sstevel@tonic-gate */ 3357c478bd9Sstevel@tonic-gate nfs4_purge_rddir_cache(vp); 3367c478bd9Sstevel@tonic-gate } 3377c478bd9Sstevel@tonic-gate 3387c478bd9Sstevel@tonic-gate /* 3397c478bd9Sstevel@tonic-gate * Invalidate all pages for the given file, after writing back the dirty 3407c478bd9Sstevel@tonic-gate * ones. 3417c478bd9Sstevel@tonic-gate */ 3427c478bd9Sstevel@tonic-gate 343d55e25c3SPavel Filipensky void 344d55e25c3SPavel Filipensky nfs4_flush_pages(vnode_t *vp, cred_t *cr) 3457c478bd9Sstevel@tonic-gate { 3467c478bd9Sstevel@tonic-gate int error; 3477c478bd9Sstevel@tonic-gate rnode4_t *rp = VTOR4(vp); 3487c478bd9Sstevel@tonic-gate 349da6c28aaSamw error = VOP_PUTPAGE(vp, (u_offset_t)0, 0, B_INVAL, cr, NULL); 3507c478bd9Sstevel@tonic-gate if (error == ENOSPC || error == EDQUOT) { 3517c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 3527c478bd9Sstevel@tonic-gate if (!rp->r_error) 3537c478bd9Sstevel@tonic-gate rp->r_error = error; 3547c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 3557c478bd9Sstevel@tonic-gate } 3567c478bd9Sstevel@tonic-gate } 3577c478bd9Sstevel@tonic-gate 3587c478bd9Sstevel@tonic-gate /* 3597c478bd9Sstevel@tonic-gate * Page flush thread. 3607c478bd9Sstevel@tonic-gate */ 3617c478bd9Sstevel@tonic-gate 3627c478bd9Sstevel@tonic-gate static void 3637c478bd9Sstevel@tonic-gate nfs4_pgflush_thread(pgflush_t *args) 3647c478bd9Sstevel@tonic-gate { 3657c478bd9Sstevel@tonic-gate rnode4_t *rp = VTOR4(args->vp); 3667c478bd9Sstevel@tonic-gate 3677c478bd9Sstevel@tonic-gate /* remember which thread we are, so we don't deadlock ourselves */ 3687c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 3697c478bd9Sstevel@tonic-gate ASSERT(rp->r_pgflush == NULL); 3707c478bd9Sstevel@tonic-gate rp->r_pgflush = curthread; 3717c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 3727c478bd9Sstevel@tonic-gate 373d55e25c3SPavel Filipensky nfs4_flush_pages(args->vp, args->cr); 3747c478bd9Sstevel@tonic-gate 3757c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 3767c478bd9Sstevel@tonic-gate rp->r_pgflush = NULL; 3777c478bd9Sstevel@tonic-gate rp->r_flags &= ~R4PGFLUSH; 3787c478bd9Sstevel@tonic-gate cv_broadcast(&rp->r_cv); 3797c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 3807c478bd9Sstevel@tonic-gate 3817c478bd9Sstevel@tonic-gate VN_RELE(args->vp); 3827c478bd9Sstevel@tonic-gate crfree(args->cr); 3837c478bd9Sstevel@tonic-gate kmem_free(args, sizeof (pgflush_t)); 3847c478bd9Sstevel@tonic-gate zthread_exit(); 3857c478bd9Sstevel@tonic-gate } 3867c478bd9Sstevel@tonic-gate 3877c478bd9Sstevel@tonic-gate /* 3887c478bd9Sstevel@tonic-gate * Purge the readdir cache of all entries which are not currently 3897c478bd9Sstevel@tonic-gate * being filled. 3907c478bd9Sstevel@tonic-gate */ 3917c478bd9Sstevel@tonic-gate void 3927c478bd9Sstevel@tonic-gate nfs4_purge_rddir_cache(vnode_t *vp) 3937c478bd9Sstevel@tonic-gate { 3947c478bd9Sstevel@tonic-gate rnode4_t *rp; 3957c478bd9Sstevel@tonic-gate 3967c478bd9Sstevel@tonic-gate rp = VTOR4(vp); 3977c478bd9Sstevel@tonic-gate 3987c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 3997c478bd9Sstevel@tonic-gate rp->r_direof = NULL; 4007c478bd9Sstevel@tonic-gate rp->r_flags &= ~R4LOOKUP; 4017c478bd9Sstevel@tonic-gate rp->r_flags |= R4READDIRWATTR; 4027c478bd9Sstevel@tonic-gate rddir4_cache_purge(rp); 4037c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 4047c478bd9Sstevel@tonic-gate } 4057c478bd9Sstevel@tonic-gate 4067c478bd9Sstevel@tonic-gate /* 4077c478bd9Sstevel@tonic-gate * Set attributes cache for given vnode using virtual attributes. There is 4087c478bd9Sstevel@tonic-gate * no cache validation, but if the attributes are deemed to be stale, they 4097c478bd9Sstevel@tonic-gate * are ignored. This corresponds to nfs3_attrcache(). 4107c478bd9Sstevel@tonic-gate * 4117c478bd9Sstevel@tonic-gate * Set the timeout value on the attribute cache and fill it 4127c478bd9Sstevel@tonic-gate * with the passed in attributes. 4137c478bd9Sstevel@tonic-gate */ 4147c478bd9Sstevel@tonic-gate void 4157c478bd9Sstevel@tonic-gate nfs4_attrcache_noinval(vnode_t *vp, nfs4_ga_res_t *garp, hrtime_t t) 4167c478bd9Sstevel@tonic-gate { 4177c478bd9Sstevel@tonic-gate rnode4_t *rp = VTOR4(vp); 4187c478bd9Sstevel@tonic-gate 4197c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 4207c478bd9Sstevel@tonic-gate if (rp->r_time_attr_saved <= t) 4217c478bd9Sstevel@tonic-gate nfs4_attrcache_va(vp, garp, FALSE); 4227c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 4237c478bd9Sstevel@tonic-gate } 4247c478bd9Sstevel@tonic-gate 4257c478bd9Sstevel@tonic-gate /* 4267c478bd9Sstevel@tonic-gate * Use the passed in virtual attributes to check to see whether the 4277c478bd9Sstevel@tonic-gate * data and metadata caches are valid, cache the new attributes, and 4287c478bd9Sstevel@tonic-gate * then do the cache invalidation if required. 4297c478bd9Sstevel@tonic-gate * 4307c478bd9Sstevel@tonic-gate * The cache validation and caching of the new attributes is done 4317c478bd9Sstevel@tonic-gate * atomically via the use of the mutex, r_statelock. If required, 4327c478bd9Sstevel@tonic-gate * the cache invalidation is done atomically w.r.t. the cache 4337c478bd9Sstevel@tonic-gate * validation and caching of the attributes via the pseudo lock, 4347c478bd9Sstevel@tonic-gate * r_serial. 4357c478bd9Sstevel@tonic-gate * 4367c478bd9Sstevel@tonic-gate * This routine is used to do cache validation and attributes caching 4377c478bd9Sstevel@tonic-gate * for operations with a single set of post operation attributes. 4387c478bd9Sstevel@tonic-gate */ 4397c478bd9Sstevel@tonic-gate 4407c478bd9Sstevel@tonic-gate void 4417c478bd9Sstevel@tonic-gate nfs4_attr_cache(vnode_t *vp, nfs4_ga_res_t *garp, 4427c478bd9Sstevel@tonic-gate hrtime_t t, cred_t *cr, int async, 4437c478bd9Sstevel@tonic-gate change_info4 *cinfo) 4447c478bd9Sstevel@tonic-gate { 4457c478bd9Sstevel@tonic-gate rnode4_t *rp; 4465e4df02aSvv149972 int mtime_changed = 0; 4475e4df02aSvv149972 int ctime_changed = 0; 4487c478bd9Sstevel@tonic-gate vsecattr_t *vsp; 4497c478bd9Sstevel@tonic-gate int was_serial, set_time_cache_inval, recov; 4507c478bd9Sstevel@tonic-gate vattr_t *vap = &garp->n4g_va; 4517c478bd9Sstevel@tonic-gate mntinfo4_t *mi = VTOMI4(vp); 4525e4df02aSvv149972 len_t preattr_rsize; 4535e4df02aSvv149972 boolean_t writemodify_set = B_FALSE; 4545e4df02aSvv149972 boolean_t cachepurge_set = B_FALSE; 4557c478bd9Sstevel@tonic-gate 4567c478bd9Sstevel@tonic-gate ASSERT(mi->mi_vfsp->vfs_dev == garp->n4g_va.va_fsid); 4577c478bd9Sstevel@tonic-gate 4587c478bd9Sstevel@tonic-gate /* Is curthread the recovery thread? */ 4597c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 4607c478bd9Sstevel@tonic-gate recov = (VTOMI4(vp)->mi_recovthread == curthread); 4617c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 4627c478bd9Sstevel@tonic-gate 4637c478bd9Sstevel@tonic-gate rp = VTOR4(vp); 4647c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 4657c478bd9Sstevel@tonic-gate was_serial = (rp->r_serial == curthread); 466843f2029SMarcel Telka if (rp->r_serial != NULL && !was_serial) { 4677c478bd9Sstevel@tonic-gate /* 468843f2029SMarcel Telka * Purge current attrs and bail out to avoid potential deadlock 469843f2029SMarcel Telka * between another thread caching attrs (r_serial thread), this 470843f2029SMarcel Telka * thread, and a thread trying to read or write pages. 4717c478bd9Sstevel@tonic-gate */ 4727c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE4_LOCKED(rp); 4737c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 4747c478bd9Sstevel@tonic-gate return; 4757c478bd9Sstevel@tonic-gate } 4767c478bd9Sstevel@tonic-gate 4777c478bd9Sstevel@tonic-gate /* 4787c478bd9Sstevel@tonic-gate * If there is a page flush thread, the current thread needs to 4797c478bd9Sstevel@tonic-gate * bail out, to prevent a possible deadlock between the current 4807c478bd9Sstevel@tonic-gate * thread (which might be in a start_op/end_op region), the 4817c478bd9Sstevel@tonic-gate * recovery thread, and the page flush thread. Expire the 4827c478bd9Sstevel@tonic-gate * attribute cache, so that any attributes the current thread was 4837c478bd9Sstevel@tonic-gate * going to set are not lost. 4847c478bd9Sstevel@tonic-gate */ 4857c478bd9Sstevel@tonic-gate if ((rp->r_flags & R4PGFLUSH) && rp->r_pgflush != curthread) { 4867c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE4_LOCKED(rp); 4877c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 4887c478bd9Sstevel@tonic-gate return; 4897c478bd9Sstevel@tonic-gate } 4907c478bd9Sstevel@tonic-gate 4917c478bd9Sstevel@tonic-gate if (rp->r_time_attr_saved > t) { 4927c478bd9Sstevel@tonic-gate /* 4937c478bd9Sstevel@tonic-gate * Attributes have been cached since these attributes were 49400fdf600Smaheshvs * probably made. If there is an inconsistency in what is 49500fdf600Smaheshvs * cached, mark them invalid. If not, don't act on them. 4967c478bd9Sstevel@tonic-gate */ 49700fdf600Smaheshvs if (!CACHE4_VALID(rp, vap->va_mtime, vap->va_size)) 49800fdf600Smaheshvs PURGE_ATTRCACHE4_LOCKED(rp); 4997c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 5007c478bd9Sstevel@tonic-gate return; 5017c478bd9Sstevel@tonic-gate } 5027c478bd9Sstevel@tonic-gate set_time_cache_inval = 0; 5037c478bd9Sstevel@tonic-gate if (cinfo) { 5047c478bd9Sstevel@tonic-gate /* 5057c478bd9Sstevel@tonic-gate * Only directory modifying callers pass non-NULL cinfo. 5067c478bd9Sstevel@tonic-gate */ 5077c478bd9Sstevel@tonic-gate ASSERT(vp->v_type == VDIR); 5087c478bd9Sstevel@tonic-gate /* 5097c478bd9Sstevel@tonic-gate * If the cache timeout either doesn't exist or hasn't expired, 5107c478bd9Sstevel@tonic-gate * and dir didn't changed on server before dirmod op 5117c478bd9Sstevel@tonic-gate * and dir didn't change after dirmod op but before getattr 5127c478bd9Sstevel@tonic-gate * then there's a chance that the client's cached data for 5137c478bd9Sstevel@tonic-gate * this object is current (not stale). No immediate cache 5147c478bd9Sstevel@tonic-gate * flush is required. 5157c478bd9Sstevel@tonic-gate * 5167c478bd9Sstevel@tonic-gate */ 5177c478bd9Sstevel@tonic-gate if ((! rp->r_time_cache_inval || t < rp->r_time_cache_inval) && 5187c478bd9Sstevel@tonic-gate cinfo->before == rp->r_change && 5197c478bd9Sstevel@tonic-gate (garp->n4g_change_valid && 5207c478bd9Sstevel@tonic-gate cinfo->after == garp->n4g_change)) { 5217c478bd9Sstevel@tonic-gate 5227c478bd9Sstevel@tonic-gate /* 5237c478bd9Sstevel@tonic-gate * If atomic isn't set, then the before/after info 5247c478bd9Sstevel@tonic-gate * cannot be blindly trusted. For this case, we tell 5257c478bd9Sstevel@tonic-gate * nfs4_attrcache_va to cache the attrs but also 5267c478bd9Sstevel@tonic-gate * establish an absolute maximum cache timeout. When 5277c478bd9Sstevel@tonic-gate * the timeout is reached, caches will be flushed. 5287c478bd9Sstevel@tonic-gate */ 5297c478bd9Sstevel@tonic-gate if (! cinfo->atomic) 5307c478bd9Sstevel@tonic-gate set_time_cache_inval = 1; 5317c478bd9Sstevel@tonic-gate } else { 5327c478bd9Sstevel@tonic-gate 5337c478bd9Sstevel@tonic-gate /* 5347c478bd9Sstevel@tonic-gate * We're not sure exactly what changed, but we know 5357c478bd9Sstevel@tonic-gate * what to do. flush all caches for dir. remove the 5367c478bd9Sstevel@tonic-gate * attr timeout. 5377c478bd9Sstevel@tonic-gate * 5387c478bd9Sstevel@tonic-gate * a) timeout expired. flush all caches. 5397c478bd9Sstevel@tonic-gate * b) r_change != cinfo.before. flush all caches. 5407c478bd9Sstevel@tonic-gate * c) r_change == cinfo.before, but cinfo.after != 5417c478bd9Sstevel@tonic-gate * post-op getattr(change). flush all caches. 5427c478bd9Sstevel@tonic-gate * d) post-op getattr(change) not provided by server. 5437c478bd9Sstevel@tonic-gate * flush all caches. 5447c478bd9Sstevel@tonic-gate */ 5457c478bd9Sstevel@tonic-gate mtime_changed = 1; 5467c478bd9Sstevel@tonic-gate ctime_changed = 1; 5477c478bd9Sstevel@tonic-gate rp->r_time_cache_inval = 0; 5487c478bd9Sstevel@tonic-gate } 5497c478bd9Sstevel@tonic-gate } else { 5505e4df02aSvv149972 /* 5515e4df02aSvv149972 * Write thread after writing data to file on remote server, 5525e4df02aSvv149972 * will always set R4WRITEMODIFIED to indicate that file on 5535e4df02aSvv149972 * remote server was modified with a WRITE operation and would 5545e4df02aSvv149972 * have marked attribute cache as timed out. If R4WRITEMODIFIED 5555e4df02aSvv149972 * is set, then do not check for mtime and ctime change. 5565e4df02aSvv149972 */ 5577c478bd9Sstevel@tonic-gate if (!(rp->r_flags & R4WRITEMODIFIED)) { 5587c478bd9Sstevel@tonic-gate if (!CACHE4_VALID(rp, vap->va_mtime, vap->va_size)) 5597c478bd9Sstevel@tonic-gate mtime_changed = 1; 5605e4df02aSvv149972 5617c478bd9Sstevel@tonic-gate if (rp->r_attr.va_ctime.tv_sec != 5627c478bd9Sstevel@tonic-gate vap->va_ctime.tv_sec || 5637c478bd9Sstevel@tonic-gate rp->r_attr.va_ctime.tv_nsec != 5647c478bd9Sstevel@tonic-gate vap->va_ctime.tv_nsec) 5657c478bd9Sstevel@tonic-gate ctime_changed = 1; 5669415afeeSMarcel Telka 5679415afeeSMarcel Telka /* 5689415afeeSMarcel Telka * If the change attribute was not provided by server 5699415afeeSMarcel Telka * or it differs, then flush all caches. 5709415afeeSMarcel Telka */ 5719415afeeSMarcel Telka if (!garp->n4g_change_valid || 5729415afeeSMarcel Telka rp->r_change != garp->n4g_change) { 5739415afeeSMarcel Telka mtime_changed = 1; 5749415afeeSMarcel Telka ctime_changed = 1; 5759415afeeSMarcel Telka } 5767c478bd9Sstevel@tonic-gate } else { 5775e4df02aSvv149972 writemodify_set = B_TRUE; 5787c478bd9Sstevel@tonic-gate } 5797c478bd9Sstevel@tonic-gate } 5807c478bd9Sstevel@tonic-gate 5815e4df02aSvv149972 preattr_rsize = rp->r_size; 5825e4df02aSvv149972 5837c478bd9Sstevel@tonic-gate nfs4_attrcache_va(vp, garp, set_time_cache_inval); 5847c478bd9Sstevel@tonic-gate 5855e4df02aSvv149972 /* 5865e4df02aSvv149972 * If we have updated filesize in nfs4_attrcache_va, as soon as we 5875e4df02aSvv149972 * drop statelock we will be in transition of purging all 5885e4df02aSvv149972 * our caches and updating them. It is possible for another 5895e4df02aSvv149972 * thread to pick this new file size and read in zeroed data. 5905e4df02aSvv149972 * stall other threads till cache purge is complete. 5915e4df02aSvv149972 */ 5925e4df02aSvv149972 if ((!cinfo) && (rp->r_size != preattr_rsize)) { 5935e4df02aSvv149972 /* 5945e4df02aSvv149972 * If R4WRITEMODIFIED was set and we have updated the file 5955e4df02aSvv149972 * size, Server's returned file size need not necessarily 5965e4df02aSvv149972 * be because of this Client's WRITE. We need to purge 5975e4df02aSvv149972 * all caches. 5985e4df02aSvv149972 */ 5995e4df02aSvv149972 if (writemodify_set) 6005e4df02aSvv149972 mtime_changed = 1; 6015e4df02aSvv149972 6025e4df02aSvv149972 if (mtime_changed && !(rp->r_flags & R4INCACHEPURGE)) { 6035e4df02aSvv149972 rp->r_flags |= R4INCACHEPURGE; 6045e4df02aSvv149972 cachepurge_set = B_TRUE; 6055e4df02aSvv149972 } 6065e4df02aSvv149972 } 6075e4df02aSvv149972 6087c478bd9Sstevel@tonic-gate if (!mtime_changed && !ctime_changed) { 6097c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 6107c478bd9Sstevel@tonic-gate return; 6117c478bd9Sstevel@tonic-gate } 6127c478bd9Sstevel@tonic-gate 6137c478bd9Sstevel@tonic-gate rp->r_serial = curthread; 6147c478bd9Sstevel@tonic-gate 6157c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 6167c478bd9Sstevel@tonic-gate 6177c478bd9Sstevel@tonic-gate /* 6187c478bd9Sstevel@tonic-gate * If we're the recov thread, then force async nfs4_purge_caches 6197c478bd9Sstevel@tonic-gate * to avoid potential deadlock. 6207c478bd9Sstevel@tonic-gate */ 6217c478bd9Sstevel@tonic-gate if (mtime_changed) 6227c478bd9Sstevel@tonic-gate nfs4_purge_caches(vp, NFS4_NOPURGE_DNLC, cr, recov ? 1 : async); 6237c478bd9Sstevel@tonic-gate 6245e4df02aSvv149972 if ((rp->r_flags & R4INCACHEPURGE) && cachepurge_set) { 6255e4df02aSvv149972 mutex_enter(&rp->r_statelock); 6265e4df02aSvv149972 rp->r_flags &= ~R4INCACHEPURGE; 6275e4df02aSvv149972 cv_broadcast(&rp->r_cv); 6285e4df02aSvv149972 mutex_exit(&rp->r_statelock); 6295e4df02aSvv149972 cachepurge_set = B_FALSE; 6305e4df02aSvv149972 } 6315e4df02aSvv149972 6327c478bd9Sstevel@tonic-gate if (ctime_changed) { 6337c478bd9Sstevel@tonic-gate (void) nfs4_access_purge_rp(rp); 6347c478bd9Sstevel@tonic-gate if (rp->r_secattr != NULL) { 6357c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 6367c478bd9Sstevel@tonic-gate vsp = rp->r_secattr; 6377c478bd9Sstevel@tonic-gate rp->r_secattr = NULL; 6387c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 6397c478bd9Sstevel@tonic-gate if (vsp != NULL) 6407c478bd9Sstevel@tonic-gate nfs4_acl_free_cache(vsp); 6417c478bd9Sstevel@tonic-gate } 6427c478bd9Sstevel@tonic-gate } 6437c478bd9Sstevel@tonic-gate 6447c478bd9Sstevel@tonic-gate if (!was_serial) { 6457c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 6467c478bd9Sstevel@tonic-gate rp->r_serial = NULL; 6477c478bd9Sstevel@tonic-gate cv_broadcast(&rp->r_cv); 6487c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 6497c478bd9Sstevel@tonic-gate } 6507c478bd9Sstevel@tonic-gate } 6517c478bd9Sstevel@tonic-gate 6527c478bd9Sstevel@tonic-gate /* 6537c478bd9Sstevel@tonic-gate * Set attributes cache for given vnode using virtual attributes. 6547c478bd9Sstevel@tonic-gate * 6557c478bd9Sstevel@tonic-gate * Set the timeout value on the attribute cache and fill it 6567c478bd9Sstevel@tonic-gate * with the passed in attributes. 6577c478bd9Sstevel@tonic-gate * 6587c478bd9Sstevel@tonic-gate * The caller must be holding r_statelock. 6597c478bd9Sstevel@tonic-gate */ 6607c478bd9Sstevel@tonic-gate static void 6617c478bd9Sstevel@tonic-gate nfs4_attrcache_va(vnode_t *vp, nfs4_ga_res_t *garp, int set_cache_timeout) 6627c478bd9Sstevel@tonic-gate { 6637c478bd9Sstevel@tonic-gate rnode4_t *rp; 6647c478bd9Sstevel@tonic-gate mntinfo4_t *mi; 6657c478bd9Sstevel@tonic-gate hrtime_t delta; 6667c478bd9Sstevel@tonic-gate hrtime_t now; 6677c478bd9Sstevel@tonic-gate vattr_t *vap = &garp->n4g_va; 6687c478bd9Sstevel@tonic-gate 6697c478bd9Sstevel@tonic-gate rp = VTOR4(vp); 6707c478bd9Sstevel@tonic-gate 6717c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&rp->r_statelock)); 6727c478bd9Sstevel@tonic-gate ASSERT(vap->va_mask == AT_ALL); 6737c478bd9Sstevel@tonic-gate 6747c478bd9Sstevel@tonic-gate /* Switch to master before checking v_flag */ 6757c478bd9Sstevel@tonic-gate if (IS_SHADOW(vp, rp)) 6767c478bd9Sstevel@tonic-gate vp = RTOV4(rp); 6777c478bd9Sstevel@tonic-gate 6787c478bd9Sstevel@tonic-gate now = gethrtime(); 6797c478bd9Sstevel@tonic-gate 6807c478bd9Sstevel@tonic-gate mi = VTOMI4(vp); 6817c478bd9Sstevel@tonic-gate 6827c478bd9Sstevel@tonic-gate /* 6837c478bd9Sstevel@tonic-gate * Only establish a new cache timeout (if requested). Never 6847c478bd9Sstevel@tonic-gate * extend a timeout. Never clear a timeout. Clearing a timeout 6857c478bd9Sstevel@tonic-gate * is done by nfs4_update_dircaches (ancestor in our call chain) 6867c478bd9Sstevel@tonic-gate */ 6877c478bd9Sstevel@tonic-gate if (set_cache_timeout && ! rp->r_time_cache_inval) 6887c478bd9Sstevel@tonic-gate rp->r_time_cache_inval = now + mi->mi_acdirmax; 6897c478bd9Sstevel@tonic-gate 6907c478bd9Sstevel@tonic-gate /* 6917c478bd9Sstevel@tonic-gate * Delta is the number of nanoseconds that we will 6927c478bd9Sstevel@tonic-gate * cache the attributes of the file. It is based on 6937c478bd9Sstevel@tonic-gate * the number of nanoseconds since the last time that 6947c478bd9Sstevel@tonic-gate * we detected a change. The assumption is that files 6957c478bd9Sstevel@tonic-gate * that changed recently are likely to change again. 6967c478bd9Sstevel@tonic-gate * There is a minimum and a maximum for regular files 6977c478bd9Sstevel@tonic-gate * and for directories which is enforced though. 6987c478bd9Sstevel@tonic-gate * 6997c478bd9Sstevel@tonic-gate * Using the time since last change was detected 7007c478bd9Sstevel@tonic-gate * eliminates direct comparison or calculation 7017c478bd9Sstevel@tonic-gate * using mixed client and server times. NFS does 7027c478bd9Sstevel@tonic-gate * not make any assumptions regarding the client 7037c478bd9Sstevel@tonic-gate * and server clocks being synchronized. 7047c478bd9Sstevel@tonic-gate */ 7057c478bd9Sstevel@tonic-gate if (vap->va_mtime.tv_sec != rp->r_attr.va_mtime.tv_sec || 7067c478bd9Sstevel@tonic-gate vap->va_mtime.tv_nsec != rp->r_attr.va_mtime.tv_nsec || 7077c478bd9Sstevel@tonic-gate vap->va_size != rp->r_attr.va_size) { 7087c478bd9Sstevel@tonic-gate rp->r_time_attr_saved = now; 7097c478bd9Sstevel@tonic-gate } 7107c478bd9Sstevel@tonic-gate 7117c478bd9Sstevel@tonic-gate if ((mi->mi_flags & MI4_NOAC) || (vp->v_flag & VNOCACHE)) 7127c478bd9Sstevel@tonic-gate delta = 0; 7137c478bd9Sstevel@tonic-gate else { 7147c478bd9Sstevel@tonic-gate delta = now - rp->r_time_attr_saved; 7157c478bd9Sstevel@tonic-gate if (vp->v_type == VDIR) { 7167c478bd9Sstevel@tonic-gate if (delta < mi->mi_acdirmin) 7177c478bd9Sstevel@tonic-gate delta = mi->mi_acdirmin; 7187c478bd9Sstevel@tonic-gate else if (delta > mi->mi_acdirmax) 7197c478bd9Sstevel@tonic-gate delta = mi->mi_acdirmax; 7207c478bd9Sstevel@tonic-gate } else { 7217c478bd9Sstevel@tonic-gate if (delta < mi->mi_acregmin) 7227c478bd9Sstevel@tonic-gate delta = mi->mi_acregmin; 7237c478bd9Sstevel@tonic-gate else if (delta > mi->mi_acregmax) 7247c478bd9Sstevel@tonic-gate delta = mi->mi_acregmax; 7257c478bd9Sstevel@tonic-gate } 7267c478bd9Sstevel@tonic-gate } 7277c478bd9Sstevel@tonic-gate rp->r_time_attr_inval = now + delta; 7287c478bd9Sstevel@tonic-gate 7297c478bd9Sstevel@tonic-gate rp->r_attr = *vap; 7307c478bd9Sstevel@tonic-gate if (garp->n4g_change_valid) 7317c478bd9Sstevel@tonic-gate rp->r_change = garp->n4g_change; 7327c478bd9Sstevel@tonic-gate 7337c478bd9Sstevel@tonic-gate /* 7347c478bd9Sstevel@tonic-gate * The attributes that were returned may be valid and can 7357c478bd9Sstevel@tonic-gate * be used, but they may not be allowed to be cached. 7367c478bd9Sstevel@tonic-gate * Reset the timers to cause immediate invalidation and 7377c478bd9Sstevel@tonic-gate * clear r_change so no VERIFY operations will suceed 7387c478bd9Sstevel@tonic-gate */ 7397c478bd9Sstevel@tonic-gate if (garp->n4g_attrwhy == NFS4_GETATTR_NOCACHE_OK) { 7407c478bd9Sstevel@tonic-gate rp->r_time_attr_inval = now; 7417c478bd9Sstevel@tonic-gate rp->r_time_attr_saved = now; 7427c478bd9Sstevel@tonic-gate rp->r_change = 0; 7437c478bd9Sstevel@tonic-gate } 7447c478bd9Sstevel@tonic-gate 7457c478bd9Sstevel@tonic-gate /* 7467c478bd9Sstevel@tonic-gate * If mounted_on_fileid returned AND the object is a stub, 7477c478bd9Sstevel@tonic-gate * then set object's va_nodeid to the mounted over fid 7487c478bd9Sstevel@tonic-gate * returned by server. 7497c478bd9Sstevel@tonic-gate * 7507c478bd9Sstevel@tonic-gate * If mounted_on_fileid not provided/supported, then 7517c478bd9Sstevel@tonic-gate * just set it to 0 for now. Eventually it would be 7527c478bd9Sstevel@tonic-gate * better to set it to a hashed version of FH. This 7537c478bd9Sstevel@tonic-gate * would probably be good enough to provide a unique 7547c478bd9Sstevel@tonic-gate * fid/d_ino within a dir. 7557c478bd9Sstevel@tonic-gate * 7567c478bd9Sstevel@tonic-gate * We don't need to carry mounted_on_fileid in the 7577c478bd9Sstevel@tonic-gate * rnode as long as the client never requests fileid 7587c478bd9Sstevel@tonic-gate * without also requesting mounted_on_fileid. For 7597c478bd9Sstevel@tonic-gate * now, it stays. 7607c478bd9Sstevel@tonic-gate */ 7617c478bd9Sstevel@tonic-gate if (garp->n4g_mon_fid_valid) { 7627c478bd9Sstevel@tonic-gate rp->r_mntd_fid = garp->n4g_mon_fid; 7637c478bd9Sstevel@tonic-gate 764b9238976Sth199096 if (RP_ISSTUB(rp)) 7657c478bd9Sstevel@tonic-gate rp->r_attr.va_nodeid = rp->r_mntd_fid; 7667c478bd9Sstevel@tonic-gate } 7677c478bd9Sstevel@tonic-gate 7687c478bd9Sstevel@tonic-gate /* 7697c478bd9Sstevel@tonic-gate * Check to see if there are valid pathconf bits to 7707c478bd9Sstevel@tonic-gate * cache in the rnode. 7717c478bd9Sstevel@tonic-gate */ 7727c478bd9Sstevel@tonic-gate if (garp->n4g_ext_res) { 7737c478bd9Sstevel@tonic-gate if (garp->n4g_ext_res->n4g_pc4.pc4_cache_valid) { 7747c478bd9Sstevel@tonic-gate rp->r_pathconf = garp->n4g_ext_res->n4g_pc4; 7757c478bd9Sstevel@tonic-gate } else { 7767c478bd9Sstevel@tonic-gate if (garp->n4g_ext_res->n4g_pc4.pc4_xattr_valid) { 7777c478bd9Sstevel@tonic-gate rp->r_pathconf.pc4_xattr_valid = TRUE; 7787c478bd9Sstevel@tonic-gate rp->r_pathconf.pc4_xattr_exists = 7797c478bd9Sstevel@tonic-gate garp->n4g_ext_res->n4g_pc4.pc4_xattr_exists; 7807c478bd9Sstevel@tonic-gate } 7817c478bd9Sstevel@tonic-gate } 7827c478bd9Sstevel@tonic-gate } 7837c478bd9Sstevel@tonic-gate /* 7847c478bd9Sstevel@tonic-gate * Update the size of the file if there is no cached data or if 7857c478bd9Sstevel@tonic-gate * the cached data is clean and there is no data being written 7867c478bd9Sstevel@tonic-gate * out. 7877c478bd9Sstevel@tonic-gate */ 7887c478bd9Sstevel@tonic-gate if (rp->r_size != vap->va_size && 7897c478bd9Sstevel@tonic-gate (!vn_has_cached_data(vp) || 7907c478bd9Sstevel@tonic-gate (!(rp->r_flags & R4DIRTY) && rp->r_count == 0))) { 7917c478bd9Sstevel@tonic-gate rp->r_size = vap->va_size; 7927c478bd9Sstevel@tonic-gate } 7937c478bd9Sstevel@tonic-gate nfs_setswaplike(vp, vap); 7947c478bd9Sstevel@tonic-gate rp->r_flags &= ~R4WRITEMODIFIED; 7957c478bd9Sstevel@tonic-gate } 7967c478bd9Sstevel@tonic-gate 7977c478bd9Sstevel@tonic-gate /* 7987c478bd9Sstevel@tonic-gate * Get attributes over-the-wire and update attributes cache 7997c478bd9Sstevel@tonic-gate * if no error occurred in the over-the-wire operation. 8007c478bd9Sstevel@tonic-gate * Return 0 if successful, otherwise error. 8017c478bd9Sstevel@tonic-gate */ 8027c478bd9Sstevel@tonic-gate int 8037c478bd9Sstevel@tonic-gate nfs4_getattr_otw(vnode_t *vp, nfs4_ga_res_t *garp, cred_t *cr, int get_acl) 8047c478bd9Sstevel@tonic-gate { 8057c478bd9Sstevel@tonic-gate mntinfo4_t *mi = VTOMI4(vp); 8067c478bd9Sstevel@tonic-gate hrtime_t t; 8077c478bd9Sstevel@tonic-gate nfs4_recov_state_t recov_state; 8087c478bd9Sstevel@tonic-gate nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS }; 8097c478bd9Sstevel@tonic-gate 8107c478bd9Sstevel@tonic-gate recov_state.rs_flags = 0; 8117c478bd9Sstevel@tonic-gate recov_state.rs_num_retry_despite_err = 0; 8127c478bd9Sstevel@tonic-gate 8137c478bd9Sstevel@tonic-gate /* Save the original mount point security flavor */ 8147c478bd9Sstevel@tonic-gate (void) save_mnt_secinfo(mi->mi_curr_serv); 8157c478bd9Sstevel@tonic-gate 8167c478bd9Sstevel@tonic-gate recov_retry: 817b9238976Sth199096 8187c478bd9Sstevel@tonic-gate if ((e.error = nfs4_start_fop(mi, vp, NULL, OH_GETATTR, 8197c478bd9Sstevel@tonic-gate &recov_state, NULL))) { 8207c478bd9Sstevel@tonic-gate (void) check_mnt_secinfo(mi->mi_curr_serv, vp); 8217c478bd9Sstevel@tonic-gate return (e.error); 8227c478bd9Sstevel@tonic-gate } 8237c478bd9Sstevel@tonic-gate 8247c478bd9Sstevel@tonic-gate t = gethrtime(); 8257c478bd9Sstevel@tonic-gate 8267c478bd9Sstevel@tonic-gate nfs4_getattr_otw_norecovery(vp, garp, &e, cr, get_acl); 8277c478bd9Sstevel@tonic-gate 8287c478bd9Sstevel@tonic-gate if (nfs4_needs_recovery(&e, FALSE, vp->v_vfsp)) { 8297c478bd9Sstevel@tonic-gate if (nfs4_start_recovery(&e, VTOMI4(vp), vp, NULL, NULL, 8302f172c55SRobert Thurlow NULL, OP_GETATTR, NULL, NULL, NULL) == FALSE) { 8317c478bd9Sstevel@tonic-gate nfs4_end_fop(VTOMI4(vp), vp, NULL, OH_GETATTR, 8327c478bd9Sstevel@tonic-gate &recov_state, 1); 8337c478bd9Sstevel@tonic-gate goto recov_retry; 8347c478bd9Sstevel@tonic-gate } 8357c478bd9Sstevel@tonic-gate } 8367c478bd9Sstevel@tonic-gate 8377c478bd9Sstevel@tonic-gate nfs4_end_fop(VTOMI4(vp), vp, NULL, OH_GETATTR, &recov_state, 0); 8387c478bd9Sstevel@tonic-gate 8397c478bd9Sstevel@tonic-gate if (!e.error) { 8407c478bd9Sstevel@tonic-gate if (e.stat == NFS4_OK) { 8417c478bd9Sstevel@tonic-gate nfs4_attr_cache(vp, garp, t, cr, FALSE, NULL); 8427c478bd9Sstevel@tonic-gate } else { 8437c478bd9Sstevel@tonic-gate e.error = geterrno4(e.stat); 8447c478bd9Sstevel@tonic-gate 8457c478bd9Sstevel@tonic-gate nfs4_purge_stale_fh(e.error, vp, cr); 8467c478bd9Sstevel@tonic-gate } 8477c478bd9Sstevel@tonic-gate } 8487c478bd9Sstevel@tonic-gate 8497c478bd9Sstevel@tonic-gate /* 8507c478bd9Sstevel@tonic-gate * If getattr a node that is a stub for a crossed 8517c478bd9Sstevel@tonic-gate * mount point, keep the original secinfo flavor for 8527c478bd9Sstevel@tonic-gate * the current file system, not the crossed one. 8537c478bd9Sstevel@tonic-gate */ 8547c478bd9Sstevel@tonic-gate (void) check_mnt_secinfo(mi->mi_curr_serv, vp); 8557c478bd9Sstevel@tonic-gate 8567c478bd9Sstevel@tonic-gate return (e.error); 8577c478bd9Sstevel@tonic-gate } 8587c478bd9Sstevel@tonic-gate 8597c478bd9Sstevel@tonic-gate /* 8607c478bd9Sstevel@tonic-gate * Generate a compound to get attributes over-the-wire. 8617c478bd9Sstevel@tonic-gate */ 8627c478bd9Sstevel@tonic-gate void 8637c478bd9Sstevel@tonic-gate nfs4_getattr_otw_norecovery(vnode_t *vp, nfs4_ga_res_t *garp, 8647c478bd9Sstevel@tonic-gate nfs4_error_t *ep, cred_t *cr, int get_acl) 8657c478bd9Sstevel@tonic-gate { 8667c478bd9Sstevel@tonic-gate COMPOUND4args_clnt args; 8677c478bd9Sstevel@tonic-gate COMPOUND4res_clnt res; 8687c478bd9Sstevel@tonic-gate int doqueue; 8697c478bd9Sstevel@tonic-gate rnode4_t *rp = VTOR4(vp); 8707c478bd9Sstevel@tonic-gate nfs_argop4 argop[2]; 8717c478bd9Sstevel@tonic-gate 8727c478bd9Sstevel@tonic-gate args.ctag = TAG_GETATTR; 8737c478bd9Sstevel@tonic-gate 8747c478bd9Sstevel@tonic-gate args.array_len = 2; 8757c478bd9Sstevel@tonic-gate args.array = argop; 8767c478bd9Sstevel@tonic-gate 8777c478bd9Sstevel@tonic-gate /* putfh */ 8787c478bd9Sstevel@tonic-gate argop[0].argop = OP_CPUTFH; 8797c478bd9Sstevel@tonic-gate argop[0].nfs_argop4_u.opcputfh.sfh = rp->r_fh; 8807c478bd9Sstevel@tonic-gate 8817c478bd9Sstevel@tonic-gate /* getattr */ 8827c478bd9Sstevel@tonic-gate /* 8837c478bd9Sstevel@tonic-gate * Unlike nfs version 2 and 3, where getattr returns all the 884da6c28aaSamw * attributes, nfs version 4 returns only the ones explicitly 8857c478bd9Sstevel@tonic-gate * asked for. This creates problems, as some system functions 8867c478bd9Sstevel@tonic-gate * (e.g. cache check) require certain attributes and if the 8877c478bd9Sstevel@tonic-gate * cached node lacks some attributes such as uid/gid, it can 8887c478bd9Sstevel@tonic-gate * affect system utilities (e.g. "ls") that rely on the information 8897c478bd9Sstevel@tonic-gate * to be there. This can lead to anything from system crashes to 8907c478bd9Sstevel@tonic-gate * corrupted information processed by user apps. 8917c478bd9Sstevel@tonic-gate * So to ensure that all bases are covered, request at least 8927c478bd9Sstevel@tonic-gate * the AT_ALL attribute mask. 8937c478bd9Sstevel@tonic-gate */ 8947c478bd9Sstevel@tonic-gate argop[1].argop = OP_GETATTR; 8957c478bd9Sstevel@tonic-gate argop[1].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK; 8967c478bd9Sstevel@tonic-gate if (get_acl) 8977c478bd9Sstevel@tonic-gate argop[1].nfs_argop4_u.opgetattr.attr_request |= FATTR4_ACL_MASK; 8987c478bd9Sstevel@tonic-gate argop[1].nfs_argop4_u.opgetattr.mi = VTOMI4(vp); 8997c478bd9Sstevel@tonic-gate 9007c478bd9Sstevel@tonic-gate doqueue = 1; 9017c478bd9Sstevel@tonic-gate 9027c478bd9Sstevel@tonic-gate rfs4call(VTOMI4(vp), &args, &res, cr, &doqueue, 0, ep); 9037c478bd9Sstevel@tonic-gate 9047c478bd9Sstevel@tonic-gate if (ep->error) 9057c478bd9Sstevel@tonic-gate return; 9067c478bd9Sstevel@tonic-gate 9077c478bd9Sstevel@tonic-gate if (res.status != NFS4_OK) { 9087c478bd9Sstevel@tonic-gate (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 9097c478bd9Sstevel@tonic-gate return; 9107c478bd9Sstevel@tonic-gate } 9117c478bd9Sstevel@tonic-gate 9127c478bd9Sstevel@tonic-gate *garp = res.array[1].nfs_resop4_u.opgetattr.ga_res; 9137c478bd9Sstevel@tonic-gate 9147c478bd9Sstevel@tonic-gate (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 9157c478bd9Sstevel@tonic-gate } 9167c478bd9Sstevel@tonic-gate 9177c478bd9Sstevel@tonic-gate /* 9187c478bd9Sstevel@tonic-gate * Return either cached or remote attributes. If get remote attr 9197c478bd9Sstevel@tonic-gate * use them to check and invalidate caches, then cache the new attributes. 9207c478bd9Sstevel@tonic-gate */ 9217c478bd9Sstevel@tonic-gate int 9227c478bd9Sstevel@tonic-gate nfs4getattr(vnode_t *vp, vattr_t *vap, cred_t *cr) 9237c478bd9Sstevel@tonic-gate { 9247c478bd9Sstevel@tonic-gate int error; 9257c478bd9Sstevel@tonic-gate rnode4_t *rp; 9267c478bd9Sstevel@tonic-gate nfs4_ga_res_t gar; 9277c478bd9Sstevel@tonic-gate 9287c478bd9Sstevel@tonic-gate ASSERT(nfs4_consistent_type(vp)); 9297c478bd9Sstevel@tonic-gate 9307c478bd9Sstevel@tonic-gate /* 9317c478bd9Sstevel@tonic-gate * If we've got cached attributes, we're done, otherwise go 9327c478bd9Sstevel@tonic-gate * to the server to get attributes, which will update the cache 933b9238976Sth199096 * in the process. Either way, use the cached attributes for 934b9238976Sth199096 * the caller's vattr_t. 935b9238976Sth199096 * 936b9238976Sth199096 * Note that we ignore the gar set by the OTW call: the attr caching 937b9238976Sth199096 * code may make adjustments when storing to the rnode, and we want 938b9238976Sth199096 * to see those changes here. 9397c478bd9Sstevel@tonic-gate */ 9407c478bd9Sstevel@tonic-gate rp = VTOR4(vp); 941b9238976Sth199096 error = 0; 9427c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 943b9238976Sth199096 if (!ATTRCACHE4_VALID(vp)) { 9447c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 9457c478bd9Sstevel@tonic-gate error = nfs4_getattr_otw(vp, &gar, cr, 0); 946b9238976Sth199096 mutex_enter(&rp->r_statelock); 947b9238976Sth199096 } 948b9238976Sth199096 9497c478bd9Sstevel@tonic-gate if (!error) 950b9238976Sth199096 *vap = rp->r_attr; 9517c478bd9Sstevel@tonic-gate 9527c478bd9Sstevel@tonic-gate /* Return the client's view of file size */ 9537c478bd9Sstevel@tonic-gate vap->va_size = rp->r_size; 954b9238976Sth199096 9557c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 9567c478bd9Sstevel@tonic-gate 9577c478bd9Sstevel@tonic-gate ASSERT(nfs4_consistent_type(vp)); 9587c478bd9Sstevel@tonic-gate 9597c478bd9Sstevel@tonic-gate return (error); 9607c478bd9Sstevel@tonic-gate } 9617c478bd9Sstevel@tonic-gate 9627c478bd9Sstevel@tonic-gate int 9637c478bd9Sstevel@tonic-gate nfs4_attr_otw(vnode_t *vp, nfs4_tag_type_t tag_type, 9647c478bd9Sstevel@tonic-gate nfs4_ga_res_t *garp, bitmap4 reqbitmap, cred_t *cr) 9657c478bd9Sstevel@tonic-gate { 9667c478bd9Sstevel@tonic-gate COMPOUND4args_clnt args; 9677c478bd9Sstevel@tonic-gate COMPOUND4res_clnt res; 9687c478bd9Sstevel@tonic-gate int doqueue; 9697c478bd9Sstevel@tonic-gate nfs_argop4 argop[2]; 9707c478bd9Sstevel@tonic-gate mntinfo4_t *mi = VTOMI4(vp); 9717c478bd9Sstevel@tonic-gate bool_t needrecov = FALSE; 9727c478bd9Sstevel@tonic-gate nfs4_recov_state_t recov_state; 9737c478bd9Sstevel@tonic-gate nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS }; 9747c478bd9Sstevel@tonic-gate nfs4_ga_ext_res_t *gerp; 9757c478bd9Sstevel@tonic-gate 9767c478bd9Sstevel@tonic-gate recov_state.rs_flags = 0; 9777c478bd9Sstevel@tonic-gate recov_state.rs_num_retry_despite_err = 0; 9787c478bd9Sstevel@tonic-gate 9797c478bd9Sstevel@tonic-gate recov_retry: 9807c478bd9Sstevel@tonic-gate args.ctag = tag_type; 9817c478bd9Sstevel@tonic-gate 9827c478bd9Sstevel@tonic-gate args.array_len = 2; 9837c478bd9Sstevel@tonic-gate args.array = argop; 9847c478bd9Sstevel@tonic-gate 9857c478bd9Sstevel@tonic-gate e.error = nfs4_start_fop(mi, vp, NULL, OH_GETATTR, &recov_state, NULL); 9867c478bd9Sstevel@tonic-gate if (e.error) 9877c478bd9Sstevel@tonic-gate return (e.error); 9887c478bd9Sstevel@tonic-gate 9897c478bd9Sstevel@tonic-gate /* putfh */ 9907c478bd9Sstevel@tonic-gate argop[0].argop = OP_CPUTFH; 9917c478bd9Sstevel@tonic-gate argop[0].nfs_argop4_u.opcputfh.sfh = VTOR4(vp)->r_fh; 9927c478bd9Sstevel@tonic-gate 9937c478bd9Sstevel@tonic-gate /* getattr */ 9947c478bd9Sstevel@tonic-gate argop[1].argop = OP_GETATTR; 9957c478bd9Sstevel@tonic-gate argop[1].nfs_argop4_u.opgetattr.attr_request = reqbitmap; 9967c478bd9Sstevel@tonic-gate argop[1].nfs_argop4_u.opgetattr.mi = mi; 9977c478bd9Sstevel@tonic-gate 9987c478bd9Sstevel@tonic-gate doqueue = 1; 9997c478bd9Sstevel@tonic-gate 10007c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_call_debug, (CE_NOTE, 10017c478bd9Sstevel@tonic-gate "nfs4_attr_otw: %s call, rp %s", needrecov ? "recov" : "first", 10027c478bd9Sstevel@tonic-gate rnode4info(VTOR4(vp)))); 10037c478bd9Sstevel@tonic-gate 10047c478bd9Sstevel@tonic-gate rfs4call(mi, &args, &res, cr, &doqueue, 0, &e); 10057c478bd9Sstevel@tonic-gate 10067c478bd9Sstevel@tonic-gate needrecov = nfs4_needs_recovery(&e, FALSE, vp->v_vfsp); 10077c478bd9Sstevel@tonic-gate if (!needrecov && e.error) { 10087c478bd9Sstevel@tonic-gate nfs4_end_fop(VTOMI4(vp), vp, NULL, OH_GETATTR, &recov_state, 10097c478bd9Sstevel@tonic-gate needrecov); 10107c478bd9Sstevel@tonic-gate return (e.error); 10117c478bd9Sstevel@tonic-gate } 10127c478bd9Sstevel@tonic-gate 10137c478bd9Sstevel@tonic-gate if (needrecov) { 10147c478bd9Sstevel@tonic-gate bool_t abort; 10157c478bd9Sstevel@tonic-gate 10167c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 10177c478bd9Sstevel@tonic-gate "nfs4_attr_otw: initiating recovery\n")); 10187c478bd9Sstevel@tonic-gate 10197c478bd9Sstevel@tonic-gate abort = nfs4_start_recovery(&e, VTOMI4(vp), vp, NULL, NULL, 10202f172c55SRobert Thurlow NULL, OP_GETATTR, NULL, NULL, NULL); 10217c478bd9Sstevel@tonic-gate nfs4_end_fop(VTOMI4(vp), vp, NULL, OH_GETATTR, &recov_state, 10227c478bd9Sstevel@tonic-gate needrecov); 10237c478bd9Sstevel@tonic-gate if (!e.error) { 10247c478bd9Sstevel@tonic-gate (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 10257c478bd9Sstevel@tonic-gate e.error = geterrno4(res.status); 10267c478bd9Sstevel@tonic-gate } 10277c478bd9Sstevel@tonic-gate if (abort == FALSE) 10287c478bd9Sstevel@tonic-gate goto recov_retry; 10297c478bd9Sstevel@tonic-gate return (e.error); 10307c478bd9Sstevel@tonic-gate } 10317c478bd9Sstevel@tonic-gate 10327c478bd9Sstevel@tonic-gate if (res.status) { 10337c478bd9Sstevel@tonic-gate e.error = geterrno4(res.status); 10347c478bd9Sstevel@tonic-gate } else { 10357c478bd9Sstevel@tonic-gate gerp = garp->n4g_ext_res; 10367c478bd9Sstevel@tonic-gate bcopy(&res.array[1].nfs_resop4_u.opgetattr.ga_res, 10377c478bd9Sstevel@tonic-gate garp, sizeof (nfs4_ga_res_t)); 10387c478bd9Sstevel@tonic-gate garp->n4g_ext_res = gerp; 10397c478bd9Sstevel@tonic-gate if (garp->n4g_ext_res && 10407c478bd9Sstevel@tonic-gate res.array[1].nfs_resop4_u.opgetattr.ga_res.n4g_ext_res) 10417c478bd9Sstevel@tonic-gate bcopy(res.array[1].nfs_resop4_u.opgetattr. 10427c478bd9Sstevel@tonic-gate ga_res.n4g_ext_res, 10437c478bd9Sstevel@tonic-gate garp->n4g_ext_res, sizeof (nfs4_ga_ext_res_t)); 10447c478bd9Sstevel@tonic-gate } 10457c478bd9Sstevel@tonic-gate (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 10467c478bd9Sstevel@tonic-gate nfs4_end_fop(VTOMI4(vp), vp, NULL, OH_GETATTR, &recov_state, 10477c478bd9Sstevel@tonic-gate needrecov); 10487c478bd9Sstevel@tonic-gate return (e.error); 10497c478bd9Sstevel@tonic-gate } 10507c478bd9Sstevel@tonic-gate 10517c478bd9Sstevel@tonic-gate /* 10527c478bd9Sstevel@tonic-gate * Asynchronous I/O parameters. nfs_async_threads is the high-water mark 10537c478bd9Sstevel@tonic-gate * for the demand-based allocation of async threads per-mount. The 10547c478bd9Sstevel@tonic-gate * nfs_async_timeout is the amount of time a thread will live after it 10557c478bd9Sstevel@tonic-gate * becomes idle, unless new I/O requests are received before the thread 10567c478bd9Sstevel@tonic-gate * dies. See nfs4_async_putpage and nfs4_async_start. 10577c478bd9Sstevel@tonic-gate */ 10587c478bd9Sstevel@tonic-gate 10597c478bd9Sstevel@tonic-gate static void nfs4_async_start(struct vfs *); 10600776f5e6SVallish Vaidyeshwara static void nfs4_async_pgops_start(struct vfs *); 10610776f5e6SVallish Vaidyeshwara static void nfs4_async_common_start(struct vfs *, int); 10627c478bd9Sstevel@tonic-gate 10637c478bd9Sstevel@tonic-gate static void 10647c478bd9Sstevel@tonic-gate free_async_args4(struct nfs4_async_reqs *args) 10657c478bd9Sstevel@tonic-gate { 10667c478bd9Sstevel@tonic-gate rnode4_t *rp; 10677c478bd9Sstevel@tonic-gate 10687c478bd9Sstevel@tonic-gate if (args->a_io != NFS4_INACTIVE) { 10697c478bd9Sstevel@tonic-gate rp = VTOR4(args->a_vp); 10707c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 10717c478bd9Sstevel@tonic-gate rp->r_count--; 10727c478bd9Sstevel@tonic-gate if (args->a_io == NFS4_PUTAPAGE || 10737c478bd9Sstevel@tonic-gate args->a_io == NFS4_PAGEIO) 10747c478bd9Sstevel@tonic-gate rp->r_awcount--; 10757c478bd9Sstevel@tonic-gate cv_broadcast(&rp->r_cv); 10767c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 10777c478bd9Sstevel@tonic-gate VN_RELE(args->a_vp); 10787c478bd9Sstevel@tonic-gate } 10797c478bd9Sstevel@tonic-gate crfree(args->a_cred); 10807c478bd9Sstevel@tonic-gate kmem_free(args, sizeof (*args)); 10817c478bd9Sstevel@tonic-gate } 10827c478bd9Sstevel@tonic-gate 10837c478bd9Sstevel@tonic-gate /* 10847c478bd9Sstevel@tonic-gate * Cross-zone thread creation and NFS access is disallowed, yet fsflush() and 10857c478bd9Sstevel@tonic-gate * pageout(), running in the global zone, have legitimate reasons to do 10867c478bd9Sstevel@tonic-gate * VOP_PUTPAGE(B_ASYNC) on other zones' NFS mounts. We avoid the problem by 10877c478bd9Sstevel@tonic-gate * use of a a per-mount "asynchronous requests manager thread" which is 10887c478bd9Sstevel@tonic-gate * signaled by the various asynchronous work routines when there is 10897c478bd9Sstevel@tonic-gate * asynchronous work to be done. It is responsible for creating new 10907c478bd9Sstevel@tonic-gate * worker threads if necessary, and notifying existing worker threads 10917c478bd9Sstevel@tonic-gate * that there is work to be done. 10927c478bd9Sstevel@tonic-gate * 10937c478bd9Sstevel@tonic-gate * In other words, it will "take the specifications from the customers and 10947c478bd9Sstevel@tonic-gate * give them to the engineers." 10957c478bd9Sstevel@tonic-gate * 10967c478bd9Sstevel@tonic-gate * Worker threads die off of their own accord if they are no longer 10977c478bd9Sstevel@tonic-gate * needed. 10987c478bd9Sstevel@tonic-gate * 10997c478bd9Sstevel@tonic-gate * This thread is killed when the zone is going away or the filesystem 11007c478bd9Sstevel@tonic-gate * is being unmounted. 11017c478bd9Sstevel@tonic-gate */ 11027c478bd9Sstevel@tonic-gate void 11037c478bd9Sstevel@tonic-gate nfs4_async_manager(vfs_t *vfsp) 11047c478bd9Sstevel@tonic-gate { 11057c478bd9Sstevel@tonic-gate callb_cpr_t cprinfo; 11067c478bd9Sstevel@tonic-gate mntinfo4_t *mi; 11077c478bd9Sstevel@tonic-gate uint_t max_threads; 11087c478bd9Sstevel@tonic-gate 11097c478bd9Sstevel@tonic-gate mi = VFTOMI4(vfsp); 11107c478bd9Sstevel@tonic-gate 11117c478bd9Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &mi->mi_async_lock, callb_generic_cpr, 11127c478bd9Sstevel@tonic-gate "nfs4_async_manager"); 11137c478bd9Sstevel@tonic-gate 11147c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_async_lock); 11157c478bd9Sstevel@tonic-gate /* 11167c478bd9Sstevel@tonic-gate * We want to stash the max number of threads that this mount was 11177c478bd9Sstevel@tonic-gate * allowed so we can use it later when the variable is set to zero as 11187c478bd9Sstevel@tonic-gate * part of the zone/mount going away. 11197c478bd9Sstevel@tonic-gate * 11207c478bd9Sstevel@tonic-gate * We want to be able to create at least one thread to handle 1121388e50fcSMarcel Telka * asynchronous inactive calls. 11227c478bd9Sstevel@tonic-gate */ 11237c478bd9Sstevel@tonic-gate max_threads = MAX(mi->mi_max_threads, 1); 11247c478bd9Sstevel@tonic-gate /* 11257c478bd9Sstevel@tonic-gate * We don't want to wait for mi_max_threads to go to zero, since that 11267c478bd9Sstevel@tonic-gate * happens as part of a failed unmount, but this thread should only 11277c478bd9Sstevel@tonic-gate * exit when the mount is really going away. 11287c478bd9Sstevel@tonic-gate * 11297c478bd9Sstevel@tonic-gate * Once MI4_ASYNC_MGR_STOP is set, no more async operations will be 11307c478bd9Sstevel@tonic-gate * attempted: the various _async_*() functions know to do things 11317c478bd9Sstevel@tonic-gate * inline if mi_max_threads == 0. Henceforth we just drain out the 11327c478bd9Sstevel@tonic-gate * outstanding requests. 11337c478bd9Sstevel@tonic-gate * 11347c478bd9Sstevel@tonic-gate * Note that we still create zthreads even if we notice the zone is 11357c478bd9Sstevel@tonic-gate * shutting down (MI4_ASYNC_MGR_STOP is set); this may cause the zone 11367c478bd9Sstevel@tonic-gate * shutdown sequence to take slightly longer in some cases, but 11377c478bd9Sstevel@tonic-gate * doesn't violate the protocol, as all threads will exit as soon as 11387c478bd9Sstevel@tonic-gate * they're done processing the remaining requests. 11397c478bd9Sstevel@tonic-gate */ 1140388e50fcSMarcel Telka for (;;) { 11417c478bd9Sstevel@tonic-gate while (mi->mi_async_req_count > 0) { 11427c478bd9Sstevel@tonic-gate /* 11437c478bd9Sstevel@tonic-gate * Paranoia: If the mount started out having 11447c478bd9Sstevel@tonic-gate * (mi->mi_max_threads == 0), and the value was 11457c478bd9Sstevel@tonic-gate * later changed (via a debugger or somesuch), 11467c478bd9Sstevel@tonic-gate * we could be confused since we will think we 11477c478bd9Sstevel@tonic-gate * can't create any threads, and the calling 11487c478bd9Sstevel@tonic-gate * code (which looks at the current value of 11497c478bd9Sstevel@tonic-gate * mi->mi_max_threads, now non-zero) thinks we 11507c478bd9Sstevel@tonic-gate * can. 11517c478bd9Sstevel@tonic-gate * 11527c478bd9Sstevel@tonic-gate * So, because we're paranoid, we create threads 11537c478bd9Sstevel@tonic-gate * up to the maximum of the original and the 11547c478bd9Sstevel@tonic-gate * current value. This means that future 11557c478bd9Sstevel@tonic-gate * (debugger-induced) alterations of 11567c478bd9Sstevel@tonic-gate * mi->mi_max_threads are ignored for our 11577c478bd9Sstevel@tonic-gate * purposes, but who told them they could change 11587c478bd9Sstevel@tonic-gate * random values on a live kernel anyhow? 11597c478bd9Sstevel@tonic-gate */ 11600776f5e6SVallish Vaidyeshwara if (mi->mi_threads[NFS4_ASYNC_QUEUE] < 11617c478bd9Sstevel@tonic-gate MAX(mi->mi_max_threads, max_threads)) { 11620776f5e6SVallish Vaidyeshwara mi->mi_threads[NFS4_ASYNC_QUEUE]++; 11637c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock); 116450a83466Sjwahlig MI4_HOLD(mi); 11657c478bd9Sstevel@tonic-gate VFS_HOLD(vfsp); /* hold for new thread */ 11667c478bd9Sstevel@tonic-gate (void) zthread_create(NULL, 0, nfs4_async_start, 11677c478bd9Sstevel@tonic-gate vfsp, 0, minclsyspri); 11687c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_async_lock); 11690776f5e6SVallish Vaidyeshwara } else if (mi->mi_threads[NFS4_ASYNC_PGOPS_QUEUE] < 11700776f5e6SVallish Vaidyeshwara NUM_ASYNC_PGOPS_THREADS) { 11710776f5e6SVallish Vaidyeshwara mi->mi_threads[NFS4_ASYNC_PGOPS_QUEUE]++; 11720776f5e6SVallish Vaidyeshwara mutex_exit(&mi->mi_async_lock); 11730776f5e6SVallish Vaidyeshwara MI4_HOLD(mi); 11740776f5e6SVallish Vaidyeshwara VFS_HOLD(vfsp); /* hold for new thread */ 11750776f5e6SVallish Vaidyeshwara (void) zthread_create(NULL, 0, 11760776f5e6SVallish Vaidyeshwara nfs4_async_pgops_start, vfsp, 0, 11770776f5e6SVallish Vaidyeshwara minclsyspri); 11780776f5e6SVallish Vaidyeshwara mutex_enter(&mi->mi_async_lock); 11797c478bd9Sstevel@tonic-gate } 11800776f5e6SVallish Vaidyeshwara NFS4_WAKE_ASYNC_WORKER(mi->mi_async_work_cv); 11817c478bd9Sstevel@tonic-gate ASSERT(mi->mi_async_req_count != 0); 11827c478bd9Sstevel@tonic-gate mi->mi_async_req_count--; 11837c478bd9Sstevel@tonic-gate } 1184388e50fcSMarcel Telka 11857c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 1186388e50fcSMarcel Telka if (mi->mi_flags & MI4_ASYNC_MGR_STOP) { 1187388e50fcSMarcel Telka mutex_exit(&mi->mi_lock); 1188388e50fcSMarcel Telka break; 11897c478bd9Sstevel@tonic-gate } 11907c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 11917c478bd9Sstevel@tonic-gate 1192388e50fcSMarcel Telka CALLB_CPR_SAFE_BEGIN(&cprinfo); 1193388e50fcSMarcel Telka cv_wait(&mi->mi_async_reqs_cv, &mi->mi_async_lock); 1194388e50fcSMarcel Telka CALLB_CPR_SAFE_END(&cprinfo, &mi->mi_async_lock); 1195388e50fcSMarcel Telka } 1196388e50fcSMarcel Telka 11977c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_zone_debug, (CE_NOTE, 11987c478bd9Sstevel@tonic-gate "nfs4_async_manager exiting for vfs %p\n", (void *)mi->mi_vfsp)); 11997c478bd9Sstevel@tonic-gate /* 12007c478bd9Sstevel@tonic-gate * Let everyone know we're done. 12017c478bd9Sstevel@tonic-gate */ 12027c478bd9Sstevel@tonic-gate mi->mi_manager_thread = NULL; 12037c478bd9Sstevel@tonic-gate /* 12047c478bd9Sstevel@tonic-gate * Wake up the inactive thread. 12057c478bd9Sstevel@tonic-gate */ 12067c478bd9Sstevel@tonic-gate cv_broadcast(&mi->mi_inact_req_cv); 12077c478bd9Sstevel@tonic-gate /* 12087c478bd9Sstevel@tonic-gate * Wake up anyone sitting in nfs4_async_manager_stop() 12097c478bd9Sstevel@tonic-gate */ 12107c478bd9Sstevel@tonic-gate cv_broadcast(&mi->mi_async_cv); 12117c478bd9Sstevel@tonic-gate /* 12127c478bd9Sstevel@tonic-gate * There is no explicit call to mutex_exit(&mi->mi_async_lock) 12137c478bd9Sstevel@tonic-gate * since CALLB_CPR_EXIT is actually responsible for releasing 12147c478bd9Sstevel@tonic-gate * 'mi_async_lock'. 12157c478bd9Sstevel@tonic-gate */ 12167c478bd9Sstevel@tonic-gate CALLB_CPR_EXIT(&cprinfo); 12177c478bd9Sstevel@tonic-gate VFS_RELE(vfsp); /* release thread's hold */ 121850a83466Sjwahlig MI4_RELE(mi); 12197c478bd9Sstevel@tonic-gate zthread_exit(); 12207c478bd9Sstevel@tonic-gate } 12217c478bd9Sstevel@tonic-gate 12227c478bd9Sstevel@tonic-gate /* 12237c478bd9Sstevel@tonic-gate * Signal (and wait for) the async manager thread to clean up and go away. 12247c478bd9Sstevel@tonic-gate */ 12257c478bd9Sstevel@tonic-gate void 12267c478bd9Sstevel@tonic-gate nfs4_async_manager_stop(vfs_t *vfsp) 12277c478bd9Sstevel@tonic-gate { 12287c478bd9Sstevel@tonic-gate mntinfo4_t *mi = VFTOMI4(vfsp); 12297c478bd9Sstevel@tonic-gate 12307c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_async_lock); 12317c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 12327c478bd9Sstevel@tonic-gate mi->mi_flags |= MI4_ASYNC_MGR_STOP; 12337c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 12347c478bd9Sstevel@tonic-gate cv_broadcast(&mi->mi_async_reqs_cv); 12357c478bd9Sstevel@tonic-gate /* 12367c478bd9Sstevel@tonic-gate * Wait for the async manager thread to die. 12377c478bd9Sstevel@tonic-gate */ 12387c478bd9Sstevel@tonic-gate while (mi->mi_manager_thread != NULL) 12397c478bd9Sstevel@tonic-gate cv_wait(&mi->mi_async_cv, &mi->mi_async_lock); 12407c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock); 12417c478bd9Sstevel@tonic-gate } 12427c478bd9Sstevel@tonic-gate 12437c478bd9Sstevel@tonic-gate int 12447c478bd9Sstevel@tonic-gate nfs4_async_readahead(vnode_t *vp, u_offset_t blkoff, caddr_t addr, 12457c478bd9Sstevel@tonic-gate struct seg *seg, cred_t *cr, void (*readahead)(vnode_t *, 12467c478bd9Sstevel@tonic-gate u_offset_t, caddr_t, struct seg *, cred_t *)) 12477c478bd9Sstevel@tonic-gate { 12487c478bd9Sstevel@tonic-gate rnode4_t *rp; 12497c478bd9Sstevel@tonic-gate mntinfo4_t *mi; 12507c478bd9Sstevel@tonic-gate struct nfs4_async_reqs *args; 12517c478bd9Sstevel@tonic-gate 12527c478bd9Sstevel@tonic-gate rp = VTOR4(vp); 12537c478bd9Sstevel@tonic-gate ASSERT(rp->r_freef == NULL); 12547c478bd9Sstevel@tonic-gate 12557c478bd9Sstevel@tonic-gate mi = VTOMI4(vp); 12567c478bd9Sstevel@tonic-gate 12577c478bd9Sstevel@tonic-gate /* 12587c478bd9Sstevel@tonic-gate * If addr falls in a different segment, don't bother doing readahead. 12597c478bd9Sstevel@tonic-gate */ 12607c478bd9Sstevel@tonic-gate if (addr >= seg->s_base + seg->s_size) 12617c478bd9Sstevel@tonic-gate return (-1); 12627c478bd9Sstevel@tonic-gate 12637c478bd9Sstevel@tonic-gate /* 12647c478bd9Sstevel@tonic-gate * If we can't allocate a request structure, punt on the readahead. 12657c478bd9Sstevel@tonic-gate */ 12667c478bd9Sstevel@tonic-gate if ((args = kmem_alloc(sizeof (*args), KM_NOSLEEP)) == NULL) 12677c478bd9Sstevel@tonic-gate return (-1); 12687c478bd9Sstevel@tonic-gate 12697c478bd9Sstevel@tonic-gate /* 12707c478bd9Sstevel@tonic-gate * If a lock operation is pending, don't initiate any new 12717c478bd9Sstevel@tonic-gate * readaheads. Otherwise, bump r_count to indicate the new 12727c478bd9Sstevel@tonic-gate * asynchronous I/O. 12737c478bd9Sstevel@tonic-gate */ 12747c478bd9Sstevel@tonic-gate if (!nfs_rw_tryenter(&rp->r_lkserlock, RW_READER)) { 12757c478bd9Sstevel@tonic-gate kmem_free(args, sizeof (*args)); 12767c478bd9Sstevel@tonic-gate return (-1); 12777c478bd9Sstevel@tonic-gate } 12787c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 12797c478bd9Sstevel@tonic-gate rp->r_count++; 12807c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 12817c478bd9Sstevel@tonic-gate nfs_rw_exit(&rp->r_lkserlock); 12827c478bd9Sstevel@tonic-gate 12837c478bd9Sstevel@tonic-gate args->a_next = NULL; 12847c478bd9Sstevel@tonic-gate #ifdef DEBUG 12857c478bd9Sstevel@tonic-gate args->a_queuer = curthread; 12867c478bd9Sstevel@tonic-gate #endif 12877c478bd9Sstevel@tonic-gate VN_HOLD(vp); 12887c478bd9Sstevel@tonic-gate args->a_vp = vp; 12897c478bd9Sstevel@tonic-gate ASSERT(cr != NULL); 12907c478bd9Sstevel@tonic-gate crhold(cr); 12917c478bd9Sstevel@tonic-gate args->a_cred = cr; 12927c478bd9Sstevel@tonic-gate args->a_io = NFS4_READ_AHEAD; 12937c478bd9Sstevel@tonic-gate args->a_nfs4_readahead = readahead; 12947c478bd9Sstevel@tonic-gate args->a_nfs4_blkoff = blkoff; 12957c478bd9Sstevel@tonic-gate args->a_nfs4_seg = seg; 12967c478bd9Sstevel@tonic-gate args->a_nfs4_addr = addr; 12977c478bd9Sstevel@tonic-gate 12987c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_async_lock); 12997c478bd9Sstevel@tonic-gate 13007c478bd9Sstevel@tonic-gate /* 13017c478bd9Sstevel@tonic-gate * If asyncio has been disabled, don't bother readahead. 13027c478bd9Sstevel@tonic-gate */ 13037c478bd9Sstevel@tonic-gate if (mi->mi_max_threads == 0) { 13047c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock); 13057c478bd9Sstevel@tonic-gate goto noasync; 13067c478bd9Sstevel@tonic-gate } 13077c478bd9Sstevel@tonic-gate 13087c478bd9Sstevel@tonic-gate /* 13097c478bd9Sstevel@tonic-gate * Link request structure into the async list and 13107c478bd9Sstevel@tonic-gate * wakeup async thread to do the i/o. 13117c478bd9Sstevel@tonic-gate */ 13127c478bd9Sstevel@tonic-gate if (mi->mi_async_reqs[NFS4_READ_AHEAD] == NULL) { 13137c478bd9Sstevel@tonic-gate mi->mi_async_reqs[NFS4_READ_AHEAD] = args; 13147c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_READ_AHEAD] = args; 13157c478bd9Sstevel@tonic-gate } else { 13167c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_READ_AHEAD]->a_next = args; 13177c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_READ_AHEAD] = args; 13187c478bd9Sstevel@tonic-gate } 13197c478bd9Sstevel@tonic-gate 13207c478bd9Sstevel@tonic-gate if (mi->mi_io_kstats) { 13217c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 13227c478bd9Sstevel@tonic-gate kstat_waitq_enter(KSTAT_IO_PTR(mi->mi_io_kstats)); 13237c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 13247c478bd9Sstevel@tonic-gate } 13257c478bd9Sstevel@tonic-gate 13267c478bd9Sstevel@tonic-gate mi->mi_async_req_count++; 13277c478bd9Sstevel@tonic-gate ASSERT(mi->mi_async_req_count != 0); 13287c478bd9Sstevel@tonic-gate cv_signal(&mi->mi_async_reqs_cv); 13297c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock); 13307c478bd9Sstevel@tonic-gate return (0); 13317c478bd9Sstevel@tonic-gate 13327c478bd9Sstevel@tonic-gate noasync: 13337c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 13347c478bd9Sstevel@tonic-gate rp->r_count--; 13357c478bd9Sstevel@tonic-gate cv_broadcast(&rp->r_cv); 13367c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 13377c478bd9Sstevel@tonic-gate VN_RELE(vp); 13387c478bd9Sstevel@tonic-gate crfree(cr); 13397c478bd9Sstevel@tonic-gate kmem_free(args, sizeof (*args)); 13407c478bd9Sstevel@tonic-gate return (-1); 13417c478bd9Sstevel@tonic-gate } 13427c478bd9Sstevel@tonic-gate 13430776f5e6SVallish Vaidyeshwara static void 13440776f5e6SVallish Vaidyeshwara nfs4_async_start(struct vfs *vfsp) 13450776f5e6SVallish Vaidyeshwara { 13460776f5e6SVallish Vaidyeshwara nfs4_async_common_start(vfsp, NFS4_ASYNC_QUEUE); 13470776f5e6SVallish Vaidyeshwara } 13480776f5e6SVallish Vaidyeshwara 13490776f5e6SVallish Vaidyeshwara static void 13500776f5e6SVallish Vaidyeshwara nfs4_async_pgops_start(struct vfs *vfsp) 13510776f5e6SVallish Vaidyeshwara { 13520776f5e6SVallish Vaidyeshwara nfs4_async_common_start(vfsp, NFS4_ASYNC_PGOPS_QUEUE); 13530776f5e6SVallish Vaidyeshwara } 13540776f5e6SVallish Vaidyeshwara 13557c478bd9Sstevel@tonic-gate /* 13567c478bd9Sstevel@tonic-gate * The async queues for each mounted file system are arranged as a 13577c478bd9Sstevel@tonic-gate * set of queues, one for each async i/o type. Requests are taken 13587c478bd9Sstevel@tonic-gate * from the queues in a round-robin fashion. A number of consecutive 13597c478bd9Sstevel@tonic-gate * requests are taken from each queue before moving on to the next 13607c478bd9Sstevel@tonic-gate * queue. This functionality may allow the NFS Version 2 server to do 13617c478bd9Sstevel@tonic-gate * write clustering, even if the client is mixing writes and reads 13627c478bd9Sstevel@tonic-gate * because it will take multiple write requests from the queue 13637c478bd9Sstevel@tonic-gate * before processing any of the other async i/o types. 13647c478bd9Sstevel@tonic-gate * 13650776f5e6SVallish Vaidyeshwara * XXX The nfs4_async_common_start thread is unsafe in the light of the present 13667c478bd9Sstevel@tonic-gate * model defined by cpr to suspend the system. Specifically over the 13677c478bd9Sstevel@tonic-gate * wire calls are cpr-unsafe. The thread should be reevaluated in 13687c478bd9Sstevel@tonic-gate * case of future updates to the cpr model. 13697c478bd9Sstevel@tonic-gate */ 13707c478bd9Sstevel@tonic-gate static void 13710776f5e6SVallish Vaidyeshwara nfs4_async_common_start(struct vfs *vfsp, int async_queue) 13727c478bd9Sstevel@tonic-gate { 13737c478bd9Sstevel@tonic-gate struct nfs4_async_reqs *args; 13747c478bd9Sstevel@tonic-gate mntinfo4_t *mi = VFTOMI4(vfsp); 13757c478bd9Sstevel@tonic-gate clock_t time_left = 1; 13767c478bd9Sstevel@tonic-gate callb_cpr_t cprinfo; 13777c478bd9Sstevel@tonic-gate int i; 13787c478bd9Sstevel@tonic-gate extern int nfs_async_timeout; 13790776f5e6SVallish Vaidyeshwara int async_types; 13800776f5e6SVallish Vaidyeshwara kcondvar_t *async_work_cv; 13810776f5e6SVallish Vaidyeshwara 13820776f5e6SVallish Vaidyeshwara if (async_queue == NFS4_ASYNC_QUEUE) { 13830776f5e6SVallish Vaidyeshwara async_types = NFS4_ASYNC_TYPES; 13840776f5e6SVallish Vaidyeshwara async_work_cv = &mi->mi_async_work_cv[NFS4_ASYNC_QUEUE]; 13850776f5e6SVallish Vaidyeshwara } else { 13860776f5e6SVallish Vaidyeshwara async_types = NFS4_ASYNC_PGOPS_TYPES; 13870776f5e6SVallish Vaidyeshwara async_work_cv = &mi->mi_async_work_cv[NFS4_ASYNC_PGOPS_QUEUE]; 13880776f5e6SVallish Vaidyeshwara } 13897c478bd9Sstevel@tonic-gate 13907c478bd9Sstevel@tonic-gate /* 13917c478bd9Sstevel@tonic-gate * Dynamic initialization of nfs_async_timeout to allow nfs to be 13927c478bd9Sstevel@tonic-gate * built in an implementation independent manner. 13937c478bd9Sstevel@tonic-gate */ 13947c478bd9Sstevel@tonic-gate if (nfs_async_timeout == -1) 13957c478bd9Sstevel@tonic-gate nfs_async_timeout = NFS_ASYNC_TIMEOUT; 13967c478bd9Sstevel@tonic-gate 13977c478bd9Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &mi->mi_async_lock, callb_generic_cpr, "nas"); 13987c478bd9Sstevel@tonic-gate 13997c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_async_lock); 14007c478bd9Sstevel@tonic-gate for (;;) { 14017c478bd9Sstevel@tonic-gate /* 14027c478bd9Sstevel@tonic-gate * Find the next queue containing an entry. We start 14037c478bd9Sstevel@tonic-gate * at the current queue pointer and then round robin 14047c478bd9Sstevel@tonic-gate * through all of them until we either find a non-empty 14057c478bd9Sstevel@tonic-gate * queue or have looked through all of them. 14067c478bd9Sstevel@tonic-gate */ 14070776f5e6SVallish Vaidyeshwara for (i = 0; i < async_types; i++) { 14080776f5e6SVallish Vaidyeshwara args = *mi->mi_async_curr[async_queue]; 14097c478bd9Sstevel@tonic-gate if (args != NULL) 14107c478bd9Sstevel@tonic-gate break; 14110776f5e6SVallish Vaidyeshwara mi->mi_async_curr[async_queue]++; 14120776f5e6SVallish Vaidyeshwara if (mi->mi_async_curr[async_queue] == 14130776f5e6SVallish Vaidyeshwara &mi->mi_async_reqs[async_types]) { 14140776f5e6SVallish Vaidyeshwara mi->mi_async_curr[async_queue] = 14150776f5e6SVallish Vaidyeshwara &mi->mi_async_reqs[0]; 14160776f5e6SVallish Vaidyeshwara } 14177c478bd9Sstevel@tonic-gate } 14187c478bd9Sstevel@tonic-gate /* 14197c478bd9Sstevel@tonic-gate * If we didn't find a entry, then block until woken up 14207c478bd9Sstevel@tonic-gate * again and then look through the queues again. 14217c478bd9Sstevel@tonic-gate */ 14227c478bd9Sstevel@tonic-gate if (args == NULL) { 14237c478bd9Sstevel@tonic-gate /* 14247c478bd9Sstevel@tonic-gate * Exiting is considered to be safe for CPR as well 14257c478bd9Sstevel@tonic-gate */ 14267c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 14277c478bd9Sstevel@tonic-gate 14287c478bd9Sstevel@tonic-gate /* 14297c478bd9Sstevel@tonic-gate * Wakeup thread waiting to unmount the file 14307c478bd9Sstevel@tonic-gate * system only if all async threads are inactive. 14317c478bd9Sstevel@tonic-gate * 14327c478bd9Sstevel@tonic-gate * If we've timed-out and there's nothing to do, 14337c478bd9Sstevel@tonic-gate * then get rid of this thread. 14347c478bd9Sstevel@tonic-gate */ 14357c478bd9Sstevel@tonic-gate if (mi->mi_max_threads == 0 || time_left <= 0) { 14360776f5e6SVallish Vaidyeshwara --mi->mi_threads[async_queue]; 14370776f5e6SVallish Vaidyeshwara 14380776f5e6SVallish Vaidyeshwara if (mi->mi_threads[NFS4_ASYNC_QUEUE] == 0 && 14390776f5e6SVallish Vaidyeshwara mi->mi_threads[NFS4_ASYNC_PGOPS_QUEUE] == 0) 14407c478bd9Sstevel@tonic-gate cv_signal(&mi->mi_async_cv); 14417c478bd9Sstevel@tonic-gate CALLB_CPR_EXIT(&cprinfo); 14427c478bd9Sstevel@tonic-gate VFS_RELE(vfsp); /* release thread's hold */ 144350a83466Sjwahlig MI4_RELE(mi); 14447c478bd9Sstevel@tonic-gate zthread_exit(); 14457c478bd9Sstevel@tonic-gate /* NOTREACHED */ 14467c478bd9Sstevel@tonic-gate } 14470776f5e6SVallish Vaidyeshwara time_left = cv_reltimedwait(async_work_cv, 1448d3d50737SRafael Vanoni &mi->mi_async_lock, nfs_async_timeout, 1449d3d50737SRafael Vanoni TR_CLOCK_TICK); 14507c478bd9Sstevel@tonic-gate 14517c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &mi->mi_async_lock); 14527c478bd9Sstevel@tonic-gate 14537c478bd9Sstevel@tonic-gate continue; 14547c478bd9Sstevel@tonic-gate } else { 14557c478bd9Sstevel@tonic-gate time_left = 1; 14567c478bd9Sstevel@tonic-gate } 14577c478bd9Sstevel@tonic-gate 14587c478bd9Sstevel@tonic-gate /* 14597c478bd9Sstevel@tonic-gate * Remove the request from the async queue and then 14607c478bd9Sstevel@tonic-gate * update the current async request queue pointer. If 14617c478bd9Sstevel@tonic-gate * the current queue is empty or we have removed enough 14627c478bd9Sstevel@tonic-gate * consecutive entries from it, then reset the counter 14637c478bd9Sstevel@tonic-gate * for this queue and then move the current pointer to 14647c478bd9Sstevel@tonic-gate * the next queue. 14657c478bd9Sstevel@tonic-gate */ 14660776f5e6SVallish Vaidyeshwara *mi->mi_async_curr[async_queue] = args->a_next; 14670776f5e6SVallish Vaidyeshwara if (*mi->mi_async_curr[async_queue] == NULL || 14687c478bd9Sstevel@tonic-gate --mi->mi_async_clusters[args->a_io] == 0) { 14697c478bd9Sstevel@tonic-gate mi->mi_async_clusters[args->a_io] = 14707c478bd9Sstevel@tonic-gate mi->mi_async_init_clusters; 14710776f5e6SVallish Vaidyeshwara mi->mi_async_curr[async_queue]++; 14720776f5e6SVallish Vaidyeshwara if (mi->mi_async_curr[async_queue] == 14730776f5e6SVallish Vaidyeshwara &mi->mi_async_reqs[async_types]) { 14740776f5e6SVallish Vaidyeshwara mi->mi_async_curr[async_queue] = 14750776f5e6SVallish Vaidyeshwara &mi->mi_async_reqs[0]; 14760776f5e6SVallish Vaidyeshwara } 14777c478bd9Sstevel@tonic-gate } 14787c478bd9Sstevel@tonic-gate 14797c478bd9Sstevel@tonic-gate if (args->a_io != NFS4_INACTIVE && mi->mi_io_kstats) { 14807c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 14817c478bd9Sstevel@tonic-gate kstat_waitq_exit(KSTAT_IO_PTR(mi->mi_io_kstats)); 14827c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 14837c478bd9Sstevel@tonic-gate } 14847c478bd9Sstevel@tonic-gate 14857c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock); 14867c478bd9Sstevel@tonic-gate 14877c478bd9Sstevel@tonic-gate /* 14887c478bd9Sstevel@tonic-gate * Obtain arguments from the async request structure. 14897c478bd9Sstevel@tonic-gate */ 14907c478bd9Sstevel@tonic-gate if (args->a_io == NFS4_READ_AHEAD && mi->mi_max_threads > 0) { 14917c478bd9Sstevel@tonic-gate (*args->a_nfs4_readahead)(args->a_vp, 1492b9238976Sth199096 args->a_nfs4_blkoff, args->a_nfs4_addr, 1493b9238976Sth199096 args->a_nfs4_seg, args->a_cred); 14947c478bd9Sstevel@tonic-gate } else if (args->a_io == NFS4_PUTAPAGE) { 14957c478bd9Sstevel@tonic-gate (void) (*args->a_nfs4_putapage)(args->a_vp, 14967c478bd9Sstevel@tonic-gate args->a_nfs4_pp, args->a_nfs4_off, 14977c478bd9Sstevel@tonic-gate args->a_nfs4_len, args->a_nfs4_flags, 14987c478bd9Sstevel@tonic-gate args->a_cred); 14997c478bd9Sstevel@tonic-gate } else if (args->a_io == NFS4_PAGEIO) { 15007c478bd9Sstevel@tonic-gate (void) (*args->a_nfs4_pageio)(args->a_vp, 15017c478bd9Sstevel@tonic-gate args->a_nfs4_pp, args->a_nfs4_off, 15027c478bd9Sstevel@tonic-gate args->a_nfs4_len, args->a_nfs4_flags, 15037c478bd9Sstevel@tonic-gate args->a_cred); 15047c478bd9Sstevel@tonic-gate } else if (args->a_io == NFS4_READDIR) { 15057c478bd9Sstevel@tonic-gate (void) ((*args->a_nfs4_readdir)(args->a_vp, 15067c478bd9Sstevel@tonic-gate args->a_nfs4_rdc, args->a_cred)); 15077c478bd9Sstevel@tonic-gate } else if (args->a_io == NFS4_COMMIT) { 15087c478bd9Sstevel@tonic-gate (*args->a_nfs4_commit)(args->a_vp, args->a_nfs4_plist, 15097c478bd9Sstevel@tonic-gate args->a_nfs4_offset, args->a_nfs4_count, 15107c478bd9Sstevel@tonic-gate args->a_cred); 15117c478bd9Sstevel@tonic-gate } else if (args->a_io == NFS4_INACTIVE) { 15127c478bd9Sstevel@tonic-gate nfs4_inactive_otw(args->a_vp, args->a_cred); 15137c478bd9Sstevel@tonic-gate } 15147c478bd9Sstevel@tonic-gate 15157c478bd9Sstevel@tonic-gate /* 15167c478bd9Sstevel@tonic-gate * Now, release the vnode and free the credentials 15177c478bd9Sstevel@tonic-gate * structure. 15187c478bd9Sstevel@tonic-gate */ 15197c478bd9Sstevel@tonic-gate free_async_args4(args); 15207c478bd9Sstevel@tonic-gate /* 15217c478bd9Sstevel@tonic-gate * Reacquire the mutex because it will be needed above. 15227c478bd9Sstevel@tonic-gate */ 15237c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_async_lock); 15247c478bd9Sstevel@tonic-gate } 15257c478bd9Sstevel@tonic-gate } 15267c478bd9Sstevel@tonic-gate 15277c478bd9Sstevel@tonic-gate /* 15287c478bd9Sstevel@tonic-gate * nfs4_inactive_thread - look for vnodes that need over-the-wire calls as 15297c478bd9Sstevel@tonic-gate * part of VOP_INACTIVE. 15307c478bd9Sstevel@tonic-gate */ 15317c478bd9Sstevel@tonic-gate 15327c478bd9Sstevel@tonic-gate void 15337c478bd9Sstevel@tonic-gate nfs4_inactive_thread(mntinfo4_t *mi) 15347c478bd9Sstevel@tonic-gate { 15357c478bd9Sstevel@tonic-gate struct nfs4_async_reqs *args; 15367c478bd9Sstevel@tonic-gate callb_cpr_t cprinfo; 15377c478bd9Sstevel@tonic-gate vfs_t *vfsp = mi->mi_vfsp; 15387c478bd9Sstevel@tonic-gate 15397c478bd9Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &mi->mi_async_lock, callb_generic_cpr, 15407c478bd9Sstevel@tonic-gate "nfs4_inactive_thread"); 15417c478bd9Sstevel@tonic-gate 15427c478bd9Sstevel@tonic-gate for (;;) { 15437c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_async_lock); 15447c478bd9Sstevel@tonic-gate args = mi->mi_async_reqs[NFS4_INACTIVE]; 15457c478bd9Sstevel@tonic-gate if (args == NULL) { 15467c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 15477c478bd9Sstevel@tonic-gate /* 154850a83466Sjwahlig * We don't want to exit until the async manager is done 15497c478bd9Sstevel@tonic-gate * with its work; hence the check for mi_manager_thread 15507c478bd9Sstevel@tonic-gate * being NULL. 15517c478bd9Sstevel@tonic-gate * 15527c478bd9Sstevel@tonic-gate * The async manager thread will cv_broadcast() on 15537c478bd9Sstevel@tonic-gate * mi_inact_req_cv when it's done, at which point we'll 15547c478bd9Sstevel@tonic-gate * wake up and exit. 15557c478bd9Sstevel@tonic-gate */ 155650a83466Sjwahlig if (mi->mi_manager_thread == NULL) 15577c478bd9Sstevel@tonic-gate goto die; 15587c478bd9Sstevel@tonic-gate mi->mi_flags |= MI4_INACTIVE_IDLE; 15597c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 15607c478bd9Sstevel@tonic-gate cv_signal(&mi->mi_async_cv); 15617c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 15627c478bd9Sstevel@tonic-gate cv_wait(&mi->mi_inact_req_cv, &mi->mi_async_lock); 15637c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &mi->mi_async_lock); 15647c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock); 15657c478bd9Sstevel@tonic-gate } else { 15667c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 15677c478bd9Sstevel@tonic-gate mi->mi_flags &= ~MI4_INACTIVE_IDLE; 15687c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 15697c478bd9Sstevel@tonic-gate mi->mi_async_reqs[NFS4_INACTIVE] = args->a_next; 15707c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock); 15717c478bd9Sstevel@tonic-gate nfs4_inactive_otw(args->a_vp, args->a_cred); 15727c478bd9Sstevel@tonic-gate crfree(args->a_cred); 15737c478bd9Sstevel@tonic-gate kmem_free(args, sizeof (*args)); 15747c478bd9Sstevel@tonic-gate } 15757c478bd9Sstevel@tonic-gate } 15767c478bd9Sstevel@tonic-gate die: 15777c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 15787c478bd9Sstevel@tonic-gate mi->mi_inactive_thread = NULL; 15797c478bd9Sstevel@tonic-gate cv_signal(&mi->mi_async_cv); 158050a83466Sjwahlig 15817c478bd9Sstevel@tonic-gate /* 15827c478bd9Sstevel@tonic-gate * There is no explicit call to mutex_exit(&mi->mi_async_lock) since 15837c478bd9Sstevel@tonic-gate * CALLB_CPR_EXIT is actually responsible for releasing 'mi_async_lock'. 15847c478bd9Sstevel@tonic-gate */ 15857c478bd9Sstevel@tonic-gate CALLB_CPR_EXIT(&cprinfo); 158650a83466Sjwahlig 15877c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_zone_debug, (CE_NOTE, 15887c478bd9Sstevel@tonic-gate "nfs4_inactive_thread exiting for vfs %p\n", (void *)vfsp)); 158950a83466Sjwahlig 159050a83466Sjwahlig MI4_RELE(mi); 15917c478bd9Sstevel@tonic-gate zthread_exit(); 15927c478bd9Sstevel@tonic-gate /* NOTREACHED */ 15937c478bd9Sstevel@tonic-gate } 15947c478bd9Sstevel@tonic-gate 15957c478bd9Sstevel@tonic-gate /* 15967c478bd9Sstevel@tonic-gate * nfs_async_stop: 15977c478bd9Sstevel@tonic-gate * Wait for all outstanding putpage operations and the inactive thread to 15987c478bd9Sstevel@tonic-gate * complete; nfs4_async_stop_sig() without interruptibility. 15997c478bd9Sstevel@tonic-gate */ 16007c478bd9Sstevel@tonic-gate void 16017c478bd9Sstevel@tonic-gate nfs4_async_stop(struct vfs *vfsp) 16027c478bd9Sstevel@tonic-gate { 16037c478bd9Sstevel@tonic-gate mntinfo4_t *mi = VFTOMI4(vfsp); 16047c478bd9Sstevel@tonic-gate 16057c478bd9Sstevel@tonic-gate /* 16067c478bd9Sstevel@tonic-gate * Wait for all outstanding async operations to complete and for 16077c478bd9Sstevel@tonic-gate * worker threads to exit. 16087c478bd9Sstevel@tonic-gate */ 16097c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_async_lock); 16107c478bd9Sstevel@tonic-gate mi->mi_max_threads = 0; 16110776f5e6SVallish Vaidyeshwara NFS4_WAKEALL_ASYNC_WORKERS(mi->mi_async_work_cv); 16120776f5e6SVallish Vaidyeshwara while (mi->mi_threads[NFS4_ASYNC_QUEUE] != 0 || 16130776f5e6SVallish Vaidyeshwara mi->mi_threads[NFS4_ASYNC_PGOPS_QUEUE] != 0) 16147c478bd9Sstevel@tonic-gate cv_wait(&mi->mi_async_cv, &mi->mi_async_lock); 16157c478bd9Sstevel@tonic-gate 16167c478bd9Sstevel@tonic-gate /* 16177c478bd9Sstevel@tonic-gate * Wait for the inactive thread to finish doing what it's doing. It 16187c478bd9Sstevel@tonic-gate * won't exit until the last reference to the vfs_t goes away. 16197c478bd9Sstevel@tonic-gate */ 16207c478bd9Sstevel@tonic-gate if (mi->mi_inactive_thread != NULL) { 16217c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 16227c478bd9Sstevel@tonic-gate while (!(mi->mi_flags & MI4_INACTIVE_IDLE) || 16237c478bd9Sstevel@tonic-gate (mi->mi_async_reqs[NFS4_INACTIVE] != NULL)) { 16247c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 16257c478bd9Sstevel@tonic-gate cv_wait(&mi->mi_async_cv, &mi->mi_async_lock); 16267c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 16277c478bd9Sstevel@tonic-gate } 16287c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 16297c478bd9Sstevel@tonic-gate } 16307c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock); 16317c478bd9Sstevel@tonic-gate } 16327c478bd9Sstevel@tonic-gate 16337c478bd9Sstevel@tonic-gate /* 16347c478bd9Sstevel@tonic-gate * nfs_async_stop_sig: 16357c478bd9Sstevel@tonic-gate * Wait for all outstanding putpage operations and the inactive thread to 16367c478bd9Sstevel@tonic-gate * complete. If a signal is delivered we will abort and return non-zero; 16377c478bd9Sstevel@tonic-gate * otherwise return 0. Since this routine is called from nfs4_unmount, we 1638da6c28aaSamw * need to make it interruptible. 16397c478bd9Sstevel@tonic-gate */ 16407c478bd9Sstevel@tonic-gate int 16417c478bd9Sstevel@tonic-gate nfs4_async_stop_sig(struct vfs *vfsp) 16427c478bd9Sstevel@tonic-gate { 16437c478bd9Sstevel@tonic-gate mntinfo4_t *mi = VFTOMI4(vfsp); 16447c478bd9Sstevel@tonic-gate ushort_t omax; 16457c478bd9Sstevel@tonic-gate bool_t intr = FALSE; 16467c478bd9Sstevel@tonic-gate 16477c478bd9Sstevel@tonic-gate /* 16487c478bd9Sstevel@tonic-gate * Wait for all outstanding putpage operations to complete and for 16497c478bd9Sstevel@tonic-gate * worker threads to exit. 16507c478bd9Sstevel@tonic-gate */ 16517c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_async_lock); 16527c478bd9Sstevel@tonic-gate omax = mi->mi_max_threads; 16537c478bd9Sstevel@tonic-gate mi->mi_max_threads = 0; 16540776f5e6SVallish Vaidyeshwara NFS4_WAKEALL_ASYNC_WORKERS(mi->mi_async_work_cv); 16550776f5e6SVallish Vaidyeshwara while (mi->mi_threads[NFS4_ASYNC_QUEUE] != 0 || 16560776f5e6SVallish Vaidyeshwara mi->mi_threads[NFS4_ASYNC_PGOPS_QUEUE] != 0) { 16577c478bd9Sstevel@tonic-gate if (!cv_wait_sig(&mi->mi_async_cv, &mi->mi_async_lock)) { 16587c478bd9Sstevel@tonic-gate intr = TRUE; 16597c478bd9Sstevel@tonic-gate goto interrupted; 16607c478bd9Sstevel@tonic-gate } 16617c478bd9Sstevel@tonic-gate } 16627c478bd9Sstevel@tonic-gate 16637c478bd9Sstevel@tonic-gate /* 16647c478bd9Sstevel@tonic-gate * Wait for the inactive thread to finish doing what it's doing. It 16657c478bd9Sstevel@tonic-gate * won't exit until the a last reference to the vfs_t goes away. 16667c478bd9Sstevel@tonic-gate */ 16677c478bd9Sstevel@tonic-gate if (mi->mi_inactive_thread != NULL) { 16687c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 16697c478bd9Sstevel@tonic-gate while (!(mi->mi_flags & MI4_INACTIVE_IDLE) || 16707c478bd9Sstevel@tonic-gate (mi->mi_async_reqs[NFS4_INACTIVE] != NULL)) { 16717c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 16727c478bd9Sstevel@tonic-gate if (!cv_wait_sig(&mi->mi_async_cv, 16737c478bd9Sstevel@tonic-gate &mi->mi_async_lock)) { 16747c478bd9Sstevel@tonic-gate intr = TRUE; 16757c478bd9Sstevel@tonic-gate goto interrupted; 16767c478bd9Sstevel@tonic-gate } 16777c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 16787c478bd9Sstevel@tonic-gate } 16797c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 16807c478bd9Sstevel@tonic-gate } 16817c478bd9Sstevel@tonic-gate interrupted: 16827c478bd9Sstevel@tonic-gate if (intr) 16837c478bd9Sstevel@tonic-gate mi->mi_max_threads = omax; 16847c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock); 16857c478bd9Sstevel@tonic-gate 16867c478bd9Sstevel@tonic-gate return (intr); 16877c478bd9Sstevel@tonic-gate } 16887c478bd9Sstevel@tonic-gate 16897c478bd9Sstevel@tonic-gate int 16907c478bd9Sstevel@tonic-gate nfs4_async_putapage(vnode_t *vp, page_t *pp, u_offset_t off, size_t len, 16917c478bd9Sstevel@tonic-gate int flags, cred_t *cr, int (*putapage)(vnode_t *, page_t *, 16927c478bd9Sstevel@tonic-gate u_offset_t, size_t, int, cred_t *)) 16937c478bd9Sstevel@tonic-gate { 16947c478bd9Sstevel@tonic-gate rnode4_t *rp; 16957c478bd9Sstevel@tonic-gate mntinfo4_t *mi; 16967c478bd9Sstevel@tonic-gate struct nfs4_async_reqs *args; 16977c478bd9Sstevel@tonic-gate 16987c478bd9Sstevel@tonic-gate ASSERT(flags & B_ASYNC); 16997c478bd9Sstevel@tonic-gate ASSERT(vp->v_vfsp != NULL); 17007c478bd9Sstevel@tonic-gate 17017c478bd9Sstevel@tonic-gate rp = VTOR4(vp); 17027c478bd9Sstevel@tonic-gate ASSERT(rp->r_count > 0); 17037c478bd9Sstevel@tonic-gate 17047c478bd9Sstevel@tonic-gate mi = VTOMI4(vp); 17057c478bd9Sstevel@tonic-gate 17067c478bd9Sstevel@tonic-gate /* 17077c478bd9Sstevel@tonic-gate * If we can't allocate a request structure, do the putpage 17087c478bd9Sstevel@tonic-gate * operation synchronously in this thread's context. 17097c478bd9Sstevel@tonic-gate */ 17107c478bd9Sstevel@tonic-gate if ((args = kmem_alloc(sizeof (*args), KM_NOSLEEP)) == NULL) 17117c478bd9Sstevel@tonic-gate goto noasync; 17127c478bd9Sstevel@tonic-gate 17137c478bd9Sstevel@tonic-gate args->a_next = NULL; 17147c478bd9Sstevel@tonic-gate #ifdef DEBUG 17157c478bd9Sstevel@tonic-gate args->a_queuer = curthread; 17167c478bd9Sstevel@tonic-gate #endif 17177c478bd9Sstevel@tonic-gate VN_HOLD(vp); 17187c478bd9Sstevel@tonic-gate args->a_vp = vp; 17197c478bd9Sstevel@tonic-gate ASSERT(cr != NULL); 17207c478bd9Sstevel@tonic-gate crhold(cr); 17217c478bd9Sstevel@tonic-gate args->a_cred = cr; 17227c478bd9Sstevel@tonic-gate args->a_io = NFS4_PUTAPAGE; 17237c478bd9Sstevel@tonic-gate args->a_nfs4_putapage = putapage; 17247c478bd9Sstevel@tonic-gate args->a_nfs4_pp = pp; 17257c478bd9Sstevel@tonic-gate args->a_nfs4_off = off; 17267c478bd9Sstevel@tonic-gate args->a_nfs4_len = (uint_t)len; 17277c478bd9Sstevel@tonic-gate args->a_nfs4_flags = flags; 17287c478bd9Sstevel@tonic-gate 17297c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_async_lock); 17307c478bd9Sstevel@tonic-gate 17317c478bd9Sstevel@tonic-gate /* 17327c478bd9Sstevel@tonic-gate * If asyncio has been disabled, then make a synchronous request. 17337c478bd9Sstevel@tonic-gate * This check is done a second time in case async io was diabled 17347c478bd9Sstevel@tonic-gate * while this thread was blocked waiting for memory pressure to 17357c478bd9Sstevel@tonic-gate * reduce or for the queue to drain. 17367c478bd9Sstevel@tonic-gate */ 17377c478bd9Sstevel@tonic-gate if (mi->mi_max_threads == 0) { 17387c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock); 17397c478bd9Sstevel@tonic-gate 17407c478bd9Sstevel@tonic-gate VN_RELE(vp); 17417c478bd9Sstevel@tonic-gate crfree(cr); 17427c478bd9Sstevel@tonic-gate kmem_free(args, sizeof (*args)); 17437c478bd9Sstevel@tonic-gate goto noasync; 17447c478bd9Sstevel@tonic-gate } 17457c478bd9Sstevel@tonic-gate 17467c478bd9Sstevel@tonic-gate /* 17477c478bd9Sstevel@tonic-gate * Link request structure into the async list and 17487c478bd9Sstevel@tonic-gate * wakeup async thread to do the i/o. 17497c478bd9Sstevel@tonic-gate */ 17507c478bd9Sstevel@tonic-gate if (mi->mi_async_reqs[NFS4_PUTAPAGE] == NULL) { 17517c478bd9Sstevel@tonic-gate mi->mi_async_reqs[NFS4_PUTAPAGE] = args; 17527c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_PUTAPAGE] = args; 17537c478bd9Sstevel@tonic-gate } else { 17547c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_PUTAPAGE]->a_next = args; 17557c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_PUTAPAGE] = args; 17567c478bd9Sstevel@tonic-gate } 17577c478bd9Sstevel@tonic-gate 17587c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 17597c478bd9Sstevel@tonic-gate rp->r_count++; 17607c478bd9Sstevel@tonic-gate rp->r_awcount++; 17617c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 17627c478bd9Sstevel@tonic-gate 17637c478bd9Sstevel@tonic-gate if (mi->mi_io_kstats) { 17647c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 17657c478bd9Sstevel@tonic-gate kstat_waitq_enter(KSTAT_IO_PTR(mi->mi_io_kstats)); 17667c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 17677c478bd9Sstevel@tonic-gate } 17687c478bd9Sstevel@tonic-gate 17697c478bd9Sstevel@tonic-gate mi->mi_async_req_count++; 17707c478bd9Sstevel@tonic-gate ASSERT(mi->mi_async_req_count != 0); 17717c478bd9Sstevel@tonic-gate cv_signal(&mi->mi_async_reqs_cv); 17727c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock); 17737c478bd9Sstevel@tonic-gate return (0); 17747c478bd9Sstevel@tonic-gate 17757c478bd9Sstevel@tonic-gate noasync: 17767c478bd9Sstevel@tonic-gate 1777aea676fdSArne Jansen if (curproc == proc_pageout || curproc == proc_fsflush) { 17787c478bd9Sstevel@tonic-gate /* 17797c478bd9Sstevel@tonic-gate * If we get here in the context of the pageout/fsflush, 17807c478bd9Sstevel@tonic-gate * or we have run out of memory or we're attempting to 17817c478bd9Sstevel@tonic-gate * unmount we refuse to do a sync write, because this may 17827c478bd9Sstevel@tonic-gate * hang pageout/fsflush and the machine. In this case, 17837c478bd9Sstevel@tonic-gate * we just re-mark the page as dirty and punt on the page. 17847c478bd9Sstevel@tonic-gate * 17857c478bd9Sstevel@tonic-gate * Make sure B_FORCE isn't set. We can re-mark the 17867c478bd9Sstevel@tonic-gate * pages as dirty and unlock the pages in one swoop by 17877c478bd9Sstevel@tonic-gate * passing in B_ERROR to pvn_write_done(). However, 17887c478bd9Sstevel@tonic-gate * we should make sure B_FORCE isn't set - we don't 17897c478bd9Sstevel@tonic-gate * want the page tossed before it gets written out. 17907c478bd9Sstevel@tonic-gate */ 17917c478bd9Sstevel@tonic-gate if (flags & B_FORCE) 17927c478bd9Sstevel@tonic-gate flags &= ~(B_INVAL | B_FORCE); 17937c478bd9Sstevel@tonic-gate pvn_write_done(pp, flags | B_ERROR); 17947c478bd9Sstevel@tonic-gate return (0); 17957c478bd9Sstevel@tonic-gate } 17967c478bd9Sstevel@tonic-gate 1797aea676fdSArne Jansen if (nfs_zone() != mi->mi_zone) { 17987c478bd9Sstevel@tonic-gate /* 1799aea676fdSArne Jansen * So this was a cross-zone sync putpage. 18007c478bd9Sstevel@tonic-gate * 18017c478bd9Sstevel@tonic-gate * We pass in B_ERROR to pvn_write_done() to re-mark the pages 18027c478bd9Sstevel@tonic-gate * as dirty and unlock them. 18037c478bd9Sstevel@tonic-gate * 18047c478bd9Sstevel@tonic-gate * We don't want to clear B_FORCE here as the caller presumably 18057c478bd9Sstevel@tonic-gate * knows what they're doing if they set it. 18067c478bd9Sstevel@tonic-gate */ 18077c478bd9Sstevel@tonic-gate pvn_write_done(pp, flags | B_ERROR); 18087c478bd9Sstevel@tonic-gate return (EPERM); 18097c478bd9Sstevel@tonic-gate } 1810aea676fdSArne Jansen return ((*putapage)(vp, pp, off, len, flags, cr)); 1811aea676fdSArne Jansen } 18127c478bd9Sstevel@tonic-gate 18137c478bd9Sstevel@tonic-gate int 18147c478bd9Sstevel@tonic-gate nfs4_async_pageio(vnode_t *vp, page_t *pp, u_offset_t io_off, size_t io_len, 18157c478bd9Sstevel@tonic-gate int flags, cred_t *cr, int (*pageio)(vnode_t *, page_t *, u_offset_t, 18167c478bd9Sstevel@tonic-gate size_t, int, cred_t *)) 18177c478bd9Sstevel@tonic-gate { 18187c478bd9Sstevel@tonic-gate rnode4_t *rp; 18197c478bd9Sstevel@tonic-gate mntinfo4_t *mi; 18207c478bd9Sstevel@tonic-gate struct nfs4_async_reqs *args; 18217c478bd9Sstevel@tonic-gate 18227c478bd9Sstevel@tonic-gate ASSERT(flags & B_ASYNC); 18237c478bd9Sstevel@tonic-gate ASSERT(vp->v_vfsp != NULL); 18247c478bd9Sstevel@tonic-gate 18257c478bd9Sstevel@tonic-gate rp = VTOR4(vp); 18267c478bd9Sstevel@tonic-gate ASSERT(rp->r_count > 0); 18277c478bd9Sstevel@tonic-gate 18287c478bd9Sstevel@tonic-gate mi = VTOMI4(vp); 18297c478bd9Sstevel@tonic-gate 18307c478bd9Sstevel@tonic-gate /* 18317c478bd9Sstevel@tonic-gate * If we can't allocate a request structure, do the pageio 18327c478bd9Sstevel@tonic-gate * request synchronously in this thread's context. 18337c478bd9Sstevel@tonic-gate */ 18347c478bd9Sstevel@tonic-gate if ((args = kmem_alloc(sizeof (*args), KM_NOSLEEP)) == NULL) 18357c478bd9Sstevel@tonic-gate goto noasync; 18367c478bd9Sstevel@tonic-gate 18377c478bd9Sstevel@tonic-gate args->a_next = NULL; 18387c478bd9Sstevel@tonic-gate #ifdef DEBUG 18397c478bd9Sstevel@tonic-gate args->a_queuer = curthread; 18407c478bd9Sstevel@tonic-gate #endif 18417c478bd9Sstevel@tonic-gate VN_HOLD(vp); 18427c478bd9Sstevel@tonic-gate args->a_vp = vp; 18437c478bd9Sstevel@tonic-gate ASSERT(cr != NULL); 18447c478bd9Sstevel@tonic-gate crhold(cr); 18457c478bd9Sstevel@tonic-gate args->a_cred = cr; 18467c478bd9Sstevel@tonic-gate args->a_io = NFS4_PAGEIO; 18477c478bd9Sstevel@tonic-gate args->a_nfs4_pageio = pageio; 18487c478bd9Sstevel@tonic-gate args->a_nfs4_pp = pp; 18497c478bd9Sstevel@tonic-gate args->a_nfs4_off = io_off; 18507c478bd9Sstevel@tonic-gate args->a_nfs4_len = (uint_t)io_len; 18517c478bd9Sstevel@tonic-gate args->a_nfs4_flags = flags; 18527c478bd9Sstevel@tonic-gate 18537c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_async_lock); 18547c478bd9Sstevel@tonic-gate 18557c478bd9Sstevel@tonic-gate /* 18567c478bd9Sstevel@tonic-gate * If asyncio has been disabled, then make a synchronous request. 18577c478bd9Sstevel@tonic-gate * This check is done a second time in case async io was diabled 18587c478bd9Sstevel@tonic-gate * while this thread was blocked waiting for memory pressure to 18597c478bd9Sstevel@tonic-gate * reduce or for the queue to drain. 18607c478bd9Sstevel@tonic-gate */ 18617c478bd9Sstevel@tonic-gate if (mi->mi_max_threads == 0) { 18627c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock); 18637c478bd9Sstevel@tonic-gate 18647c478bd9Sstevel@tonic-gate VN_RELE(vp); 18657c478bd9Sstevel@tonic-gate crfree(cr); 18667c478bd9Sstevel@tonic-gate kmem_free(args, sizeof (*args)); 18677c478bd9Sstevel@tonic-gate goto noasync; 18687c478bd9Sstevel@tonic-gate } 18697c478bd9Sstevel@tonic-gate 18707c478bd9Sstevel@tonic-gate /* 18717c478bd9Sstevel@tonic-gate * Link request structure into the async list and 18727c478bd9Sstevel@tonic-gate * wakeup async thread to do the i/o. 18737c478bd9Sstevel@tonic-gate */ 18747c478bd9Sstevel@tonic-gate if (mi->mi_async_reqs[NFS4_PAGEIO] == NULL) { 18757c478bd9Sstevel@tonic-gate mi->mi_async_reqs[NFS4_PAGEIO] = args; 18767c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_PAGEIO] = args; 18777c478bd9Sstevel@tonic-gate } else { 18787c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_PAGEIO]->a_next = args; 18797c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_PAGEIO] = args; 18807c478bd9Sstevel@tonic-gate } 18817c478bd9Sstevel@tonic-gate 18827c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 18837c478bd9Sstevel@tonic-gate rp->r_count++; 18847c478bd9Sstevel@tonic-gate rp->r_awcount++; 18857c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 18867c478bd9Sstevel@tonic-gate 18877c478bd9Sstevel@tonic-gate if (mi->mi_io_kstats) { 18887c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 18897c478bd9Sstevel@tonic-gate kstat_waitq_enter(KSTAT_IO_PTR(mi->mi_io_kstats)); 18907c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 18917c478bd9Sstevel@tonic-gate } 18927c478bd9Sstevel@tonic-gate 18937c478bd9Sstevel@tonic-gate mi->mi_async_req_count++; 18947c478bd9Sstevel@tonic-gate ASSERT(mi->mi_async_req_count != 0); 18957c478bd9Sstevel@tonic-gate cv_signal(&mi->mi_async_reqs_cv); 18967c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock); 18977c478bd9Sstevel@tonic-gate return (0); 18987c478bd9Sstevel@tonic-gate 18997c478bd9Sstevel@tonic-gate noasync: 19007c478bd9Sstevel@tonic-gate /* 19017c478bd9Sstevel@tonic-gate * If we can't do it ASYNC, for reads we do nothing (but cleanup 19027c478bd9Sstevel@tonic-gate * the page list), for writes we do it synchronously, except for 19037c478bd9Sstevel@tonic-gate * proc_pageout/proc_fsflush as described below. 19047c478bd9Sstevel@tonic-gate */ 19057c478bd9Sstevel@tonic-gate if (flags & B_READ) { 19067c478bd9Sstevel@tonic-gate pvn_read_done(pp, flags | B_ERROR); 19077c478bd9Sstevel@tonic-gate return (0); 19087c478bd9Sstevel@tonic-gate } 19097c478bd9Sstevel@tonic-gate 19107c478bd9Sstevel@tonic-gate if (curproc == proc_pageout || curproc == proc_fsflush) { 19117c478bd9Sstevel@tonic-gate /* 19127c478bd9Sstevel@tonic-gate * If we get here in the context of the pageout/fsflush, 19137c478bd9Sstevel@tonic-gate * we refuse to do a sync write, because this may hang 19147c478bd9Sstevel@tonic-gate * pageout/fsflush (and the machine). In this case, we just 19157c478bd9Sstevel@tonic-gate * re-mark the page as dirty and punt on the page. 19167c478bd9Sstevel@tonic-gate * 19177c478bd9Sstevel@tonic-gate * Make sure B_FORCE isn't set. We can re-mark the 19187c478bd9Sstevel@tonic-gate * pages as dirty and unlock the pages in one swoop by 19197c478bd9Sstevel@tonic-gate * passing in B_ERROR to pvn_write_done(). However, 19207c478bd9Sstevel@tonic-gate * we should make sure B_FORCE isn't set - we don't 19217c478bd9Sstevel@tonic-gate * want the page tossed before it gets written out. 19227c478bd9Sstevel@tonic-gate */ 19237c478bd9Sstevel@tonic-gate if (flags & B_FORCE) 19247c478bd9Sstevel@tonic-gate flags &= ~(B_INVAL | B_FORCE); 19257c478bd9Sstevel@tonic-gate pvn_write_done(pp, flags | B_ERROR); 19267c478bd9Sstevel@tonic-gate return (0); 19277c478bd9Sstevel@tonic-gate } 19287c478bd9Sstevel@tonic-gate 1929108322fbScarlsonj if (nfs_zone() != mi->mi_zone) { 19307c478bd9Sstevel@tonic-gate /* 19317c478bd9Sstevel@tonic-gate * So this was a cross-zone sync pageio. We pass in B_ERROR 19327c478bd9Sstevel@tonic-gate * to pvn_write_done() to re-mark the pages as dirty and unlock 19337c478bd9Sstevel@tonic-gate * them. 19347c478bd9Sstevel@tonic-gate * 19357c478bd9Sstevel@tonic-gate * We don't want to clear B_FORCE here as the caller presumably 19367c478bd9Sstevel@tonic-gate * knows what they're doing if they set it. 19377c478bd9Sstevel@tonic-gate */ 19387c478bd9Sstevel@tonic-gate pvn_write_done(pp, flags | B_ERROR); 19397c478bd9Sstevel@tonic-gate return (EPERM); 19407c478bd9Sstevel@tonic-gate } 19417c478bd9Sstevel@tonic-gate return ((*pageio)(vp, pp, io_off, io_len, flags, cr)); 19427c478bd9Sstevel@tonic-gate } 19437c478bd9Sstevel@tonic-gate 19447c478bd9Sstevel@tonic-gate void 19457c478bd9Sstevel@tonic-gate nfs4_async_readdir(vnode_t *vp, rddir4_cache *rdc, cred_t *cr, 19467c478bd9Sstevel@tonic-gate int (*readdir)(vnode_t *, rddir4_cache *, cred_t *)) 19477c478bd9Sstevel@tonic-gate { 19487c478bd9Sstevel@tonic-gate rnode4_t *rp; 19497c478bd9Sstevel@tonic-gate mntinfo4_t *mi; 19507c478bd9Sstevel@tonic-gate struct nfs4_async_reqs *args; 19517c478bd9Sstevel@tonic-gate 19527c478bd9Sstevel@tonic-gate rp = VTOR4(vp); 19537c478bd9Sstevel@tonic-gate ASSERT(rp->r_freef == NULL); 19547c478bd9Sstevel@tonic-gate 19557c478bd9Sstevel@tonic-gate mi = VTOMI4(vp); 19567c478bd9Sstevel@tonic-gate 19577c478bd9Sstevel@tonic-gate /* 19587c478bd9Sstevel@tonic-gate * If we can't allocate a request structure, skip the readdir. 19597c478bd9Sstevel@tonic-gate */ 19607c478bd9Sstevel@tonic-gate if ((args = kmem_alloc(sizeof (*args), KM_NOSLEEP)) == NULL) 19617c478bd9Sstevel@tonic-gate goto noasync; 19627c478bd9Sstevel@tonic-gate 19637c478bd9Sstevel@tonic-gate args->a_next = NULL; 19647c478bd9Sstevel@tonic-gate #ifdef DEBUG 19657c478bd9Sstevel@tonic-gate args->a_queuer = curthread; 19667c478bd9Sstevel@tonic-gate #endif 19677c478bd9Sstevel@tonic-gate VN_HOLD(vp); 19687c478bd9Sstevel@tonic-gate args->a_vp = vp; 19697c478bd9Sstevel@tonic-gate ASSERT(cr != NULL); 19707c478bd9Sstevel@tonic-gate crhold(cr); 19717c478bd9Sstevel@tonic-gate args->a_cred = cr; 19727c478bd9Sstevel@tonic-gate args->a_io = NFS4_READDIR; 19737c478bd9Sstevel@tonic-gate args->a_nfs4_readdir = readdir; 19747c478bd9Sstevel@tonic-gate args->a_nfs4_rdc = rdc; 19757c478bd9Sstevel@tonic-gate 19767c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_async_lock); 19777c478bd9Sstevel@tonic-gate 19787c478bd9Sstevel@tonic-gate /* 19797c478bd9Sstevel@tonic-gate * If asyncio has been disabled, then skip this request 19807c478bd9Sstevel@tonic-gate */ 19817c478bd9Sstevel@tonic-gate if (mi->mi_max_threads == 0) { 19827c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock); 19837c478bd9Sstevel@tonic-gate 19847c478bd9Sstevel@tonic-gate VN_RELE(vp); 19857c478bd9Sstevel@tonic-gate crfree(cr); 19867c478bd9Sstevel@tonic-gate kmem_free(args, sizeof (*args)); 19877c478bd9Sstevel@tonic-gate goto noasync; 19887c478bd9Sstevel@tonic-gate } 19897c478bd9Sstevel@tonic-gate 19907c478bd9Sstevel@tonic-gate /* 19917c478bd9Sstevel@tonic-gate * Link request structure into the async list and 19927c478bd9Sstevel@tonic-gate * wakeup async thread to do the i/o. 19937c478bd9Sstevel@tonic-gate */ 19947c478bd9Sstevel@tonic-gate if (mi->mi_async_reqs[NFS4_READDIR] == NULL) { 19957c478bd9Sstevel@tonic-gate mi->mi_async_reqs[NFS4_READDIR] = args; 19967c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_READDIR] = args; 19977c478bd9Sstevel@tonic-gate } else { 19987c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_READDIR]->a_next = args; 19997c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_READDIR] = args; 20007c478bd9Sstevel@tonic-gate } 20017c478bd9Sstevel@tonic-gate 20027c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 20037c478bd9Sstevel@tonic-gate rp->r_count++; 20047c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 20057c478bd9Sstevel@tonic-gate 20067c478bd9Sstevel@tonic-gate if (mi->mi_io_kstats) { 20077c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 20087c478bd9Sstevel@tonic-gate kstat_waitq_enter(KSTAT_IO_PTR(mi->mi_io_kstats)); 20097c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 20107c478bd9Sstevel@tonic-gate } 20117c478bd9Sstevel@tonic-gate 20127c478bd9Sstevel@tonic-gate mi->mi_async_req_count++; 20137c478bd9Sstevel@tonic-gate ASSERT(mi->mi_async_req_count != 0); 20147c478bd9Sstevel@tonic-gate cv_signal(&mi->mi_async_reqs_cv); 20157c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock); 20167c478bd9Sstevel@tonic-gate return; 20177c478bd9Sstevel@tonic-gate 20187c478bd9Sstevel@tonic-gate noasync: 20197c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 20207c478bd9Sstevel@tonic-gate rdc->entries = NULL; 20217c478bd9Sstevel@tonic-gate /* 20227c478bd9Sstevel@tonic-gate * Indicate that no one is trying to fill this entry and 20237c478bd9Sstevel@tonic-gate * it still needs to be filled. 20247c478bd9Sstevel@tonic-gate */ 20257c478bd9Sstevel@tonic-gate rdc->flags &= ~RDDIR; 20267c478bd9Sstevel@tonic-gate rdc->flags |= RDDIRREQ; 20277c478bd9Sstevel@tonic-gate rddir4_cache_rele(rp, rdc); 20287c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 20297c478bd9Sstevel@tonic-gate } 20307c478bd9Sstevel@tonic-gate 20317c478bd9Sstevel@tonic-gate void 20327c478bd9Sstevel@tonic-gate nfs4_async_commit(vnode_t *vp, page_t *plist, offset3 offset, count3 count, 20337c478bd9Sstevel@tonic-gate cred_t *cr, void (*commit)(vnode_t *, page_t *, offset3, count3, 20347c478bd9Sstevel@tonic-gate cred_t *)) 20357c478bd9Sstevel@tonic-gate { 20367c478bd9Sstevel@tonic-gate rnode4_t *rp; 20377c478bd9Sstevel@tonic-gate mntinfo4_t *mi; 20387c478bd9Sstevel@tonic-gate struct nfs4_async_reqs *args; 20397c478bd9Sstevel@tonic-gate page_t *pp; 20407c478bd9Sstevel@tonic-gate 20417c478bd9Sstevel@tonic-gate rp = VTOR4(vp); 20427c478bd9Sstevel@tonic-gate mi = VTOMI4(vp); 20437c478bd9Sstevel@tonic-gate 20447c478bd9Sstevel@tonic-gate /* 20457c478bd9Sstevel@tonic-gate * If we can't allocate a request structure, do the commit 20467c478bd9Sstevel@tonic-gate * operation synchronously in this thread's context. 20477c478bd9Sstevel@tonic-gate */ 20487c478bd9Sstevel@tonic-gate if ((args = kmem_alloc(sizeof (*args), KM_NOSLEEP)) == NULL) 20497c478bd9Sstevel@tonic-gate goto noasync; 20507c478bd9Sstevel@tonic-gate 20517c478bd9Sstevel@tonic-gate args->a_next = NULL; 20527c478bd9Sstevel@tonic-gate #ifdef DEBUG 20537c478bd9Sstevel@tonic-gate args->a_queuer = curthread; 20547c478bd9Sstevel@tonic-gate #endif 20557c478bd9Sstevel@tonic-gate VN_HOLD(vp); 20567c478bd9Sstevel@tonic-gate args->a_vp = vp; 20577c478bd9Sstevel@tonic-gate ASSERT(cr != NULL); 20587c478bd9Sstevel@tonic-gate crhold(cr); 20597c478bd9Sstevel@tonic-gate args->a_cred = cr; 20607c478bd9Sstevel@tonic-gate args->a_io = NFS4_COMMIT; 20617c478bd9Sstevel@tonic-gate args->a_nfs4_commit = commit; 20627c478bd9Sstevel@tonic-gate args->a_nfs4_plist = plist; 20637c478bd9Sstevel@tonic-gate args->a_nfs4_offset = offset; 20647c478bd9Sstevel@tonic-gate args->a_nfs4_count = count; 20657c478bd9Sstevel@tonic-gate 20667c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_async_lock); 20677c478bd9Sstevel@tonic-gate 20687c478bd9Sstevel@tonic-gate /* 20697c478bd9Sstevel@tonic-gate * If asyncio has been disabled, then make a synchronous request. 20707c478bd9Sstevel@tonic-gate * This check is done a second time in case async io was diabled 20717c478bd9Sstevel@tonic-gate * while this thread was blocked waiting for memory pressure to 20727c478bd9Sstevel@tonic-gate * reduce or for the queue to drain. 20737c478bd9Sstevel@tonic-gate */ 20747c478bd9Sstevel@tonic-gate if (mi->mi_max_threads == 0) { 20757c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock); 20767c478bd9Sstevel@tonic-gate 20777c478bd9Sstevel@tonic-gate VN_RELE(vp); 20787c478bd9Sstevel@tonic-gate crfree(cr); 20797c478bd9Sstevel@tonic-gate kmem_free(args, sizeof (*args)); 20807c478bd9Sstevel@tonic-gate goto noasync; 20817c478bd9Sstevel@tonic-gate } 20827c478bd9Sstevel@tonic-gate 20837c478bd9Sstevel@tonic-gate /* 20847c478bd9Sstevel@tonic-gate * Link request structure into the async list and 20857c478bd9Sstevel@tonic-gate * wakeup async thread to do the i/o. 20867c478bd9Sstevel@tonic-gate */ 20877c478bd9Sstevel@tonic-gate if (mi->mi_async_reqs[NFS4_COMMIT] == NULL) { 20887c478bd9Sstevel@tonic-gate mi->mi_async_reqs[NFS4_COMMIT] = args; 20897c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_COMMIT] = args; 20907c478bd9Sstevel@tonic-gate } else { 20917c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_COMMIT]->a_next = args; 20927c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_COMMIT] = args; 20937c478bd9Sstevel@tonic-gate } 20947c478bd9Sstevel@tonic-gate 20957c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 20967c478bd9Sstevel@tonic-gate rp->r_count++; 20977c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 20987c478bd9Sstevel@tonic-gate 20997c478bd9Sstevel@tonic-gate if (mi->mi_io_kstats) { 21007c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 21017c478bd9Sstevel@tonic-gate kstat_waitq_enter(KSTAT_IO_PTR(mi->mi_io_kstats)); 21027c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 21037c478bd9Sstevel@tonic-gate } 21047c478bd9Sstevel@tonic-gate 21057c478bd9Sstevel@tonic-gate mi->mi_async_req_count++; 21067c478bd9Sstevel@tonic-gate ASSERT(mi->mi_async_req_count != 0); 21077c478bd9Sstevel@tonic-gate cv_signal(&mi->mi_async_reqs_cv); 21087c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock); 21097c478bd9Sstevel@tonic-gate return; 21107c478bd9Sstevel@tonic-gate 21117c478bd9Sstevel@tonic-gate noasync: 21127c478bd9Sstevel@tonic-gate if (curproc == proc_pageout || curproc == proc_fsflush || 2113108322fbScarlsonj nfs_zone() != mi->mi_zone) { 21147c478bd9Sstevel@tonic-gate while (plist != NULL) { 21157c478bd9Sstevel@tonic-gate pp = plist; 21167c478bd9Sstevel@tonic-gate page_sub(&plist, pp); 21177c478bd9Sstevel@tonic-gate pp->p_fsdata = C_COMMIT; 21187c478bd9Sstevel@tonic-gate page_unlock(pp); 21197c478bd9Sstevel@tonic-gate } 21207c478bd9Sstevel@tonic-gate return; 21217c478bd9Sstevel@tonic-gate } 21227c478bd9Sstevel@tonic-gate (*commit)(vp, plist, offset, count, cr); 21237c478bd9Sstevel@tonic-gate } 21247c478bd9Sstevel@tonic-gate 21257c478bd9Sstevel@tonic-gate /* 21267c478bd9Sstevel@tonic-gate * nfs4_async_inactive - hand off a VOP_INACTIVE call to a thread. The 21277c478bd9Sstevel@tonic-gate * reference to the vnode is handed over to the thread; the caller should 21287c478bd9Sstevel@tonic-gate * no longer refer to the vnode. 21297c478bd9Sstevel@tonic-gate * 21307c478bd9Sstevel@tonic-gate * Unlike most of the async routines, this handoff is needed for 21317c478bd9Sstevel@tonic-gate * correctness reasons, not just performance. So doing operations in the 21327c478bd9Sstevel@tonic-gate * context of the current thread is not an option. 21337c478bd9Sstevel@tonic-gate */ 21347c478bd9Sstevel@tonic-gate void 21357c478bd9Sstevel@tonic-gate nfs4_async_inactive(vnode_t *vp, cred_t *cr) 21367c478bd9Sstevel@tonic-gate { 21377c478bd9Sstevel@tonic-gate mntinfo4_t *mi; 21387c478bd9Sstevel@tonic-gate struct nfs4_async_reqs *args; 21397c478bd9Sstevel@tonic-gate boolean_t signal_inactive_thread = B_FALSE; 21407c478bd9Sstevel@tonic-gate 21417c478bd9Sstevel@tonic-gate mi = VTOMI4(vp); 21427c478bd9Sstevel@tonic-gate 21437c478bd9Sstevel@tonic-gate args = kmem_alloc(sizeof (*args), KM_SLEEP); 21447c478bd9Sstevel@tonic-gate args->a_next = NULL; 21457c478bd9Sstevel@tonic-gate #ifdef DEBUG 21467c478bd9Sstevel@tonic-gate args->a_queuer = curthread; 21477c478bd9Sstevel@tonic-gate #endif 21487c478bd9Sstevel@tonic-gate args->a_vp = vp; 21497c478bd9Sstevel@tonic-gate ASSERT(cr != NULL); 21507c478bd9Sstevel@tonic-gate crhold(cr); 21517c478bd9Sstevel@tonic-gate args->a_cred = cr; 21527c478bd9Sstevel@tonic-gate args->a_io = NFS4_INACTIVE; 21537c478bd9Sstevel@tonic-gate 21547c478bd9Sstevel@tonic-gate /* 21557c478bd9Sstevel@tonic-gate * Note that we don't check mi->mi_max_threads here, since we 21567c478bd9Sstevel@tonic-gate * *need* to get rid of this vnode regardless of whether someone 21577c478bd9Sstevel@tonic-gate * set nfs4_max_threads to zero in /etc/system. 21587c478bd9Sstevel@tonic-gate * 21597c478bd9Sstevel@tonic-gate * The manager thread knows about this and is willing to create 2160da6c28aaSamw * at least one thread to accommodate us. 21617c478bd9Sstevel@tonic-gate */ 21627c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_async_lock); 21637c478bd9Sstevel@tonic-gate if (mi->mi_inactive_thread == NULL) { 21647c478bd9Sstevel@tonic-gate rnode4_t *rp; 21657c478bd9Sstevel@tonic-gate vnode_t *unldvp = NULL; 21667c478bd9Sstevel@tonic-gate char *unlname; 21677c478bd9Sstevel@tonic-gate cred_t *unlcred; 21687c478bd9Sstevel@tonic-gate 21697c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock); 21707c478bd9Sstevel@tonic-gate /* 21717c478bd9Sstevel@tonic-gate * We just need to free up the memory associated with the 21727c478bd9Sstevel@tonic-gate * vnode, which can be safely done from within the current 21737c478bd9Sstevel@tonic-gate * context. 21747c478bd9Sstevel@tonic-gate */ 21757c478bd9Sstevel@tonic-gate crfree(cr); /* drop our reference */ 21767c478bd9Sstevel@tonic-gate kmem_free(args, sizeof (*args)); 21777c478bd9Sstevel@tonic-gate rp = VTOR4(vp); 21787c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 21797c478bd9Sstevel@tonic-gate if (rp->r_unldvp != NULL) { 21807c478bd9Sstevel@tonic-gate unldvp = rp->r_unldvp; 21817c478bd9Sstevel@tonic-gate rp->r_unldvp = NULL; 21827c478bd9Sstevel@tonic-gate unlname = rp->r_unlname; 21837c478bd9Sstevel@tonic-gate rp->r_unlname = NULL; 21847c478bd9Sstevel@tonic-gate unlcred = rp->r_unlcred; 21857c478bd9Sstevel@tonic-gate rp->r_unlcred = NULL; 21867c478bd9Sstevel@tonic-gate } 21877c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 21887c478bd9Sstevel@tonic-gate /* 21897c478bd9Sstevel@tonic-gate * No need to explicitly throw away any cached pages. The 21907c478bd9Sstevel@tonic-gate * eventual r4inactive() will attempt a synchronous 21917c478bd9Sstevel@tonic-gate * VOP_PUTPAGE() which will immediately fail since the request 21927c478bd9Sstevel@tonic-gate * is coming from the wrong zone, and then will proceed to call 21937c478bd9Sstevel@tonic-gate * nfs4_invalidate_pages() which will clean things up for us. 21947c478bd9Sstevel@tonic-gate * 21957c478bd9Sstevel@tonic-gate * Throw away the delegation here so rp4_addfree()'s attempt to 21967c478bd9Sstevel@tonic-gate * return any existing delegations becomes a no-op. 21977c478bd9Sstevel@tonic-gate */ 219850a83466Sjwahlig if (rp->r_deleg_type != OPEN_DELEGATE_NONE) { 219950a83466Sjwahlig (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 220050a83466Sjwahlig FALSE); 22017c478bd9Sstevel@tonic-gate (void) nfs4delegreturn(rp, NFS4_DR_DISCARD); 220250a83466Sjwahlig nfs_rw_exit(&mi->mi_recovlock); 220350a83466Sjwahlig } 22047c478bd9Sstevel@tonic-gate nfs4_clear_open_streams(rp); 22057c478bd9Sstevel@tonic-gate 22067c478bd9Sstevel@tonic-gate rp4_addfree(rp, cr); 22077c478bd9Sstevel@tonic-gate if (unldvp != NULL) { 22087c478bd9Sstevel@tonic-gate kmem_free(unlname, MAXNAMELEN); 22097c478bd9Sstevel@tonic-gate VN_RELE(unldvp); 22107c478bd9Sstevel@tonic-gate crfree(unlcred); 22117c478bd9Sstevel@tonic-gate } 22127c478bd9Sstevel@tonic-gate return; 22137c478bd9Sstevel@tonic-gate } 22147c478bd9Sstevel@tonic-gate 22157c478bd9Sstevel@tonic-gate if (mi->mi_manager_thread == NULL) { 22167c478bd9Sstevel@tonic-gate /* 22177c478bd9Sstevel@tonic-gate * We want to talk to the inactive thread. 22187c478bd9Sstevel@tonic-gate */ 22197c478bd9Sstevel@tonic-gate signal_inactive_thread = B_TRUE; 22207c478bd9Sstevel@tonic-gate } 22217c478bd9Sstevel@tonic-gate 22227c478bd9Sstevel@tonic-gate /* 22237c478bd9Sstevel@tonic-gate * Enqueue the vnode and wake up either the special thread (empty 22247c478bd9Sstevel@tonic-gate * list) or an async thread. 22257c478bd9Sstevel@tonic-gate */ 22267c478bd9Sstevel@tonic-gate if (mi->mi_async_reqs[NFS4_INACTIVE] == NULL) { 22277c478bd9Sstevel@tonic-gate mi->mi_async_reqs[NFS4_INACTIVE] = args; 22287c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_INACTIVE] = args; 22297c478bd9Sstevel@tonic-gate signal_inactive_thread = B_TRUE; 22307c478bd9Sstevel@tonic-gate } else { 22317c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_INACTIVE]->a_next = args; 22327c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_INACTIVE] = args; 22337c478bd9Sstevel@tonic-gate } 22347c478bd9Sstevel@tonic-gate if (signal_inactive_thread) { 22357c478bd9Sstevel@tonic-gate cv_signal(&mi->mi_inact_req_cv); 22367c478bd9Sstevel@tonic-gate } else { 22377c478bd9Sstevel@tonic-gate mi->mi_async_req_count++; 22387c478bd9Sstevel@tonic-gate ASSERT(mi->mi_async_req_count != 0); 22397c478bd9Sstevel@tonic-gate cv_signal(&mi->mi_async_reqs_cv); 22407c478bd9Sstevel@tonic-gate } 22417c478bd9Sstevel@tonic-gate 22427c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock); 22437c478bd9Sstevel@tonic-gate } 22447c478bd9Sstevel@tonic-gate 22457c478bd9Sstevel@tonic-gate int 22467c478bd9Sstevel@tonic-gate writerp4(rnode4_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated) 22477c478bd9Sstevel@tonic-gate { 22487c478bd9Sstevel@tonic-gate int pagecreate; 22497c478bd9Sstevel@tonic-gate int n; 22507c478bd9Sstevel@tonic-gate int saved_n; 22517c478bd9Sstevel@tonic-gate caddr_t saved_base; 22527c478bd9Sstevel@tonic-gate u_offset_t offset; 22537c478bd9Sstevel@tonic-gate int error; 22547c478bd9Sstevel@tonic-gate int sm_error; 2255a5652762Spraks vnode_t *vp = RTOV(rp); 22567c478bd9Sstevel@tonic-gate 22577c478bd9Sstevel@tonic-gate ASSERT(tcount <= MAXBSIZE && tcount <= uio->uio_resid); 22587c478bd9Sstevel@tonic-gate ASSERT(nfs_rw_lock_held(&rp->r_rwlock, RW_WRITER)); 2259a5652762Spraks if (!vpm_enable) { 2260a5652762Spraks ASSERT(((uintptr_t)base & MAXBOFFSET) + tcount <= MAXBSIZE); 2261a5652762Spraks } 22627c478bd9Sstevel@tonic-gate 22637c478bd9Sstevel@tonic-gate /* 22647c478bd9Sstevel@tonic-gate * Move bytes in at most PAGESIZE chunks. We must avoid 22657c478bd9Sstevel@tonic-gate * spanning pages in uiomove() because page faults may cause 22667c478bd9Sstevel@tonic-gate * the cache to be invalidated out from under us. The r_size is not 22677c478bd9Sstevel@tonic-gate * updated until after the uiomove. If we push the last page of a 22687c478bd9Sstevel@tonic-gate * file before r_size is correct, we will lose the data written past 22697c478bd9Sstevel@tonic-gate * the current (and invalid) r_size. 22707c478bd9Sstevel@tonic-gate */ 22717c478bd9Sstevel@tonic-gate do { 22727c478bd9Sstevel@tonic-gate offset = uio->uio_loffset; 22737c478bd9Sstevel@tonic-gate pagecreate = 0; 22747c478bd9Sstevel@tonic-gate 22757c478bd9Sstevel@tonic-gate /* 22767c478bd9Sstevel@tonic-gate * n is the number of bytes required to satisfy the request 22777c478bd9Sstevel@tonic-gate * or the number of bytes to fill out the page. 22787c478bd9Sstevel@tonic-gate */ 2279a5652762Spraks n = (int)MIN((PAGESIZE - (offset & PAGEOFFSET)), tcount); 22807c478bd9Sstevel@tonic-gate 22817c478bd9Sstevel@tonic-gate /* 22827c478bd9Sstevel@tonic-gate * Check to see if we can skip reading in the page 22837c478bd9Sstevel@tonic-gate * and just allocate the memory. We can do this 22847c478bd9Sstevel@tonic-gate * if we are going to rewrite the entire mapping 22857c478bd9Sstevel@tonic-gate * or if we are going to write to or beyond the current 22867c478bd9Sstevel@tonic-gate * end of file from the beginning of the mapping. 22877c478bd9Sstevel@tonic-gate * 22887c478bd9Sstevel@tonic-gate * The read of r_size is now protected by r_statelock. 22897c478bd9Sstevel@tonic-gate */ 22907c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 22917c478bd9Sstevel@tonic-gate /* 22927c478bd9Sstevel@tonic-gate * When pgcreated is nonzero the caller has already done 22937c478bd9Sstevel@tonic-gate * a segmap_getmapflt with forcefault 0 and S_WRITE. With 22947c478bd9Sstevel@tonic-gate * segkpm this means we already have at least one page 22957c478bd9Sstevel@tonic-gate * created and mapped at base. 22967c478bd9Sstevel@tonic-gate */ 22977c478bd9Sstevel@tonic-gate pagecreate = pgcreated || 2298a5652762Spraks ((offset & PAGEOFFSET) == 0 && 22997c478bd9Sstevel@tonic-gate (n == PAGESIZE || ((offset + n) >= rp->r_size))); 23007c478bd9Sstevel@tonic-gate 23017c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 23027c478bd9Sstevel@tonic-gate 2303a5652762Spraks if (!vpm_enable && pagecreate) { 23047c478bd9Sstevel@tonic-gate /* 23057c478bd9Sstevel@tonic-gate * The last argument tells segmap_pagecreate() to 23067c478bd9Sstevel@tonic-gate * always lock the page, as opposed to sometimes 23077c478bd9Sstevel@tonic-gate * returning with the page locked. This way we avoid a 23087c478bd9Sstevel@tonic-gate * fault on the ensuing uiomove(), but also 23097c478bd9Sstevel@tonic-gate * more importantly (to fix bug 1094402) we can 23107c478bd9Sstevel@tonic-gate * call segmap_fault() to unlock the page in all 23117c478bd9Sstevel@tonic-gate * cases. An alternative would be to modify 23127c478bd9Sstevel@tonic-gate * segmap_pagecreate() to tell us when it is 23137c478bd9Sstevel@tonic-gate * locking a page, but that's a fairly major 23147c478bd9Sstevel@tonic-gate * interface change. 23157c478bd9Sstevel@tonic-gate */ 23167c478bd9Sstevel@tonic-gate if (pgcreated == 0) 23177c478bd9Sstevel@tonic-gate (void) segmap_pagecreate(segkmap, base, 23187c478bd9Sstevel@tonic-gate (uint_t)n, 1); 23197c478bd9Sstevel@tonic-gate saved_base = base; 23207c478bd9Sstevel@tonic-gate saved_n = n; 23217c478bd9Sstevel@tonic-gate } 23227c478bd9Sstevel@tonic-gate 23237c478bd9Sstevel@tonic-gate /* 23247c478bd9Sstevel@tonic-gate * The number of bytes of data in the last page can not 23257c478bd9Sstevel@tonic-gate * be accurately be determined while page is being 23267c478bd9Sstevel@tonic-gate * uiomove'd to and the size of the file being updated. 23277c478bd9Sstevel@tonic-gate * Thus, inform threads which need to know accurately 23287c478bd9Sstevel@tonic-gate * how much data is in the last page of the file. They 23297c478bd9Sstevel@tonic-gate * will not do the i/o immediately, but will arrange for 23307c478bd9Sstevel@tonic-gate * the i/o to happen later when this modify operation 23317c478bd9Sstevel@tonic-gate * will have finished. 23327c478bd9Sstevel@tonic-gate */ 23337c478bd9Sstevel@tonic-gate ASSERT(!(rp->r_flags & R4MODINPROGRESS)); 23347c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 23357c478bd9Sstevel@tonic-gate rp->r_flags |= R4MODINPROGRESS; 23367c478bd9Sstevel@tonic-gate rp->r_modaddr = (offset & MAXBMASK); 23377c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 23387c478bd9Sstevel@tonic-gate 2339a5652762Spraks if (vpm_enable) { 2340a5652762Spraks /* 2341a5652762Spraks * Copy data. If new pages are created, part of 2342a5652762Spraks * the page that is not written will be initizliazed 2343a5652762Spraks * with zeros. 2344a5652762Spraks */ 2345a5652762Spraks error = vpm_data_copy(vp, offset, n, uio, 2346a5652762Spraks !pagecreate, NULL, 0, S_WRITE); 2347a5652762Spraks } else { 23487c478bd9Sstevel@tonic-gate error = uiomove(base, n, UIO_WRITE, uio); 2349a5652762Spraks } 23507c478bd9Sstevel@tonic-gate 23517c478bd9Sstevel@tonic-gate /* 23527c478bd9Sstevel@tonic-gate * r_size is the maximum number of 23537c478bd9Sstevel@tonic-gate * bytes known to be in the file. 23547c478bd9Sstevel@tonic-gate * Make sure it is at least as high as the 23557c478bd9Sstevel@tonic-gate * first unwritten byte pointed to by uio_loffset. 23567c478bd9Sstevel@tonic-gate */ 23577c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 23587c478bd9Sstevel@tonic-gate if (rp->r_size < uio->uio_loffset) 23597c478bd9Sstevel@tonic-gate rp->r_size = uio->uio_loffset; 23607c478bd9Sstevel@tonic-gate rp->r_flags &= ~R4MODINPROGRESS; 23617c478bd9Sstevel@tonic-gate rp->r_flags |= R4DIRTY; 23627c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 23637c478bd9Sstevel@tonic-gate 23647c478bd9Sstevel@tonic-gate /* n = # of bytes written */ 23657c478bd9Sstevel@tonic-gate n = (int)(uio->uio_loffset - offset); 2366a5652762Spraks 2367a5652762Spraks if (!vpm_enable) { 23687c478bd9Sstevel@tonic-gate base += n; 2369a5652762Spraks } 2370a5652762Spraks 23717c478bd9Sstevel@tonic-gate tcount -= n; 23727c478bd9Sstevel@tonic-gate /* 23737c478bd9Sstevel@tonic-gate * If we created pages w/o initializing them completely, 23747c478bd9Sstevel@tonic-gate * we need to zero the part that wasn't set up. 23757c478bd9Sstevel@tonic-gate * This happens on a most EOF write cases and if 23767c478bd9Sstevel@tonic-gate * we had some sort of error during the uiomove. 23777c478bd9Sstevel@tonic-gate */ 2378a5652762Spraks if (!vpm_enable && pagecreate) { 23797c478bd9Sstevel@tonic-gate if ((uio->uio_loffset & PAGEOFFSET) || n == 0) 23807c478bd9Sstevel@tonic-gate (void) kzero(base, PAGESIZE - n); 23817c478bd9Sstevel@tonic-gate 23827c478bd9Sstevel@tonic-gate if (pgcreated) { 23837c478bd9Sstevel@tonic-gate /* 23847c478bd9Sstevel@tonic-gate * Caller is responsible for this page, 23857c478bd9Sstevel@tonic-gate * it was not created in this loop. 23867c478bd9Sstevel@tonic-gate */ 23877c478bd9Sstevel@tonic-gate pgcreated = 0; 23887c478bd9Sstevel@tonic-gate } else { 23897c478bd9Sstevel@tonic-gate /* 23907c478bd9Sstevel@tonic-gate * For bug 1094402: segmap_pagecreate locks 23917c478bd9Sstevel@tonic-gate * page. Unlock it. This also unlocks the 23927c478bd9Sstevel@tonic-gate * pages allocated by page_create_va() in 23937c478bd9Sstevel@tonic-gate * segmap_pagecreate(). 23947c478bd9Sstevel@tonic-gate */ 23957c478bd9Sstevel@tonic-gate sm_error = segmap_fault(kas.a_hat, segkmap, 23967c478bd9Sstevel@tonic-gate saved_base, saved_n, 23977c478bd9Sstevel@tonic-gate F_SOFTUNLOCK, S_WRITE); 23987c478bd9Sstevel@tonic-gate if (error == 0) 23997c478bd9Sstevel@tonic-gate error = sm_error; 24007c478bd9Sstevel@tonic-gate } 24017c478bd9Sstevel@tonic-gate } 24027c478bd9Sstevel@tonic-gate } while (tcount > 0 && error == 0); 24037c478bd9Sstevel@tonic-gate 24047c478bd9Sstevel@tonic-gate return (error); 24057c478bd9Sstevel@tonic-gate } 24067c478bd9Sstevel@tonic-gate 24077c478bd9Sstevel@tonic-gate int 24087c478bd9Sstevel@tonic-gate nfs4_putpages(vnode_t *vp, u_offset_t off, size_t len, int flags, cred_t *cr) 24097c478bd9Sstevel@tonic-gate { 24107c478bd9Sstevel@tonic-gate rnode4_t *rp; 24117c478bd9Sstevel@tonic-gate page_t *pp; 24127c478bd9Sstevel@tonic-gate u_offset_t eoff; 24137c478bd9Sstevel@tonic-gate u_offset_t io_off; 24147c478bd9Sstevel@tonic-gate size_t io_len; 24157c478bd9Sstevel@tonic-gate int error; 24167c478bd9Sstevel@tonic-gate int rdirty; 24177c478bd9Sstevel@tonic-gate int err; 24187c478bd9Sstevel@tonic-gate 24197c478bd9Sstevel@tonic-gate rp = VTOR4(vp); 24207c478bd9Sstevel@tonic-gate ASSERT(rp->r_count > 0); 24217c478bd9Sstevel@tonic-gate 24227c478bd9Sstevel@tonic-gate if (!nfs4_has_pages(vp)) 24237c478bd9Sstevel@tonic-gate return (0); 24247c478bd9Sstevel@tonic-gate 24257c478bd9Sstevel@tonic-gate ASSERT(vp->v_type != VCHR); 24267c478bd9Sstevel@tonic-gate 24277c478bd9Sstevel@tonic-gate /* 24287c478bd9Sstevel@tonic-gate * If R4OUTOFSPACE is set, then all writes turn into B_INVAL 24297c478bd9Sstevel@tonic-gate * writes. B_FORCE is set to force the VM system to actually 24307c478bd9Sstevel@tonic-gate * invalidate the pages, even if the i/o failed. The pages 24317c478bd9Sstevel@tonic-gate * need to get invalidated because they can't be written out 24327c478bd9Sstevel@tonic-gate * because there isn't any space left on either the server's 24337c478bd9Sstevel@tonic-gate * file system or in the user's disk quota. The B_FREE bit 24347c478bd9Sstevel@tonic-gate * is cleared to avoid confusion as to whether this is a 24357c478bd9Sstevel@tonic-gate * request to place the page on the freelist or to destroy 24367c478bd9Sstevel@tonic-gate * it. 24377c478bd9Sstevel@tonic-gate */ 24387c478bd9Sstevel@tonic-gate if ((rp->r_flags & R4OUTOFSPACE) || 24397c478bd9Sstevel@tonic-gate (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) 24407c478bd9Sstevel@tonic-gate flags = (flags & ~B_FREE) | B_INVAL | B_FORCE; 24417c478bd9Sstevel@tonic-gate 24427c478bd9Sstevel@tonic-gate if (len == 0) { 24437c478bd9Sstevel@tonic-gate /* 24447c478bd9Sstevel@tonic-gate * If doing a full file synchronous operation, then clear 24457c478bd9Sstevel@tonic-gate * the R4DIRTY bit. If a page gets dirtied while the flush 24467c478bd9Sstevel@tonic-gate * is happening, then R4DIRTY will get set again. The 24477c478bd9Sstevel@tonic-gate * R4DIRTY bit must get cleared before the flush so that 24487c478bd9Sstevel@tonic-gate * we don't lose this information. 244984d68d8eSthurlow * 245084d68d8eSthurlow * If there are no full file async write operations 245184d68d8eSthurlow * pending and RDIRTY bit is set, clear it. 24527c478bd9Sstevel@tonic-gate */ 24537c478bd9Sstevel@tonic-gate if (off == (u_offset_t)0 && 24547c478bd9Sstevel@tonic-gate !(flags & B_ASYNC) && 24557c478bd9Sstevel@tonic-gate (rp->r_flags & R4DIRTY)) { 24567c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 24577c478bd9Sstevel@tonic-gate rdirty = (rp->r_flags & R4DIRTY); 24587c478bd9Sstevel@tonic-gate rp->r_flags &= ~R4DIRTY; 24597c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 246084d68d8eSthurlow } else if (flags & B_ASYNC && off == (u_offset_t)0) { 246184d68d8eSthurlow mutex_enter(&rp->r_statelock); 246284d68d8eSthurlow if (rp->r_flags & R4DIRTY && rp->r_awcount == 0) { 246384d68d8eSthurlow rdirty = (rp->r_flags & R4DIRTY); 246484d68d8eSthurlow rp->r_flags &= ~R4DIRTY; 246584d68d8eSthurlow } 246684d68d8eSthurlow mutex_exit(&rp->r_statelock); 24677c478bd9Sstevel@tonic-gate } else 24687c478bd9Sstevel@tonic-gate rdirty = 0; 24697c478bd9Sstevel@tonic-gate 24707c478bd9Sstevel@tonic-gate /* 24717c478bd9Sstevel@tonic-gate * Search the entire vp list for pages >= off, and flush 24727c478bd9Sstevel@tonic-gate * the dirty pages. 24737c478bd9Sstevel@tonic-gate */ 24747c478bd9Sstevel@tonic-gate error = pvn_vplist_dirty(vp, off, rp->r_putapage, 24757c478bd9Sstevel@tonic-gate flags, cr); 24767c478bd9Sstevel@tonic-gate 24777c478bd9Sstevel@tonic-gate /* 2478da6c28aaSamw * If an error occurred and the file was marked as dirty 24797c478bd9Sstevel@tonic-gate * before and we aren't forcibly invalidating pages, then 24807c478bd9Sstevel@tonic-gate * reset the R4DIRTY flag. 24817c478bd9Sstevel@tonic-gate */ 24827c478bd9Sstevel@tonic-gate if (error && rdirty && 24837c478bd9Sstevel@tonic-gate (flags & (B_INVAL | B_FORCE)) != (B_INVAL | B_FORCE)) { 24847c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 24857c478bd9Sstevel@tonic-gate rp->r_flags |= R4DIRTY; 24867c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 24877c478bd9Sstevel@tonic-gate } 24887c478bd9Sstevel@tonic-gate } else { 24897c478bd9Sstevel@tonic-gate /* 24907c478bd9Sstevel@tonic-gate * Do a range from [off...off + len) looking for pages 24917c478bd9Sstevel@tonic-gate * to deal with. 24927c478bd9Sstevel@tonic-gate */ 24937c478bd9Sstevel@tonic-gate error = 0; 24947c478bd9Sstevel@tonic-gate io_len = 0; 24957c478bd9Sstevel@tonic-gate eoff = off + len; 24967c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 24977c478bd9Sstevel@tonic-gate for (io_off = off; io_off < eoff && io_off < rp->r_size; 24987c478bd9Sstevel@tonic-gate io_off += io_len) { 24997c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 25007c478bd9Sstevel@tonic-gate /* 25017c478bd9Sstevel@tonic-gate * If we are not invalidating, synchronously 25027c478bd9Sstevel@tonic-gate * freeing or writing pages use the routine 25037c478bd9Sstevel@tonic-gate * page_lookup_nowait() to prevent reclaiming 25047c478bd9Sstevel@tonic-gate * them from the free list. 25057c478bd9Sstevel@tonic-gate */ 25067c478bd9Sstevel@tonic-gate if ((flags & B_INVAL) || !(flags & B_ASYNC)) { 25077c478bd9Sstevel@tonic-gate pp = page_lookup(vp, io_off, 25087c478bd9Sstevel@tonic-gate (flags & (B_INVAL | B_FREE)) ? 25097c478bd9Sstevel@tonic-gate SE_EXCL : SE_SHARED); 25107c478bd9Sstevel@tonic-gate } else { 25117c478bd9Sstevel@tonic-gate pp = page_lookup_nowait(vp, io_off, 25127c478bd9Sstevel@tonic-gate (flags & B_FREE) ? SE_EXCL : SE_SHARED); 25137c478bd9Sstevel@tonic-gate } 25147c478bd9Sstevel@tonic-gate 25157c478bd9Sstevel@tonic-gate if (pp == NULL || !pvn_getdirty(pp, flags)) 25167c478bd9Sstevel@tonic-gate io_len = PAGESIZE; 25177c478bd9Sstevel@tonic-gate else { 25187c478bd9Sstevel@tonic-gate err = (*rp->r_putapage)(vp, pp, &io_off, 25197c478bd9Sstevel@tonic-gate &io_len, flags, cr); 25207c478bd9Sstevel@tonic-gate if (!error) 25217c478bd9Sstevel@tonic-gate error = err; 25227c478bd9Sstevel@tonic-gate /* 25237c478bd9Sstevel@tonic-gate * "io_off" and "io_len" are returned as 25247c478bd9Sstevel@tonic-gate * the range of pages we actually wrote. 25257c478bd9Sstevel@tonic-gate * This allows us to skip ahead more quickly 25267c478bd9Sstevel@tonic-gate * since several pages may've been dealt 25277c478bd9Sstevel@tonic-gate * with by this iteration of the loop. 25287c478bd9Sstevel@tonic-gate */ 25297c478bd9Sstevel@tonic-gate } 25307c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 25317c478bd9Sstevel@tonic-gate } 25327c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 25337c478bd9Sstevel@tonic-gate } 25347c478bd9Sstevel@tonic-gate 25357c478bd9Sstevel@tonic-gate return (error); 25367c478bd9Sstevel@tonic-gate } 25377c478bd9Sstevel@tonic-gate 25387c478bd9Sstevel@tonic-gate void 25397c478bd9Sstevel@tonic-gate nfs4_invalidate_pages(vnode_t *vp, u_offset_t off, cred_t *cr) 25407c478bd9Sstevel@tonic-gate { 25417c478bd9Sstevel@tonic-gate rnode4_t *rp; 25427c478bd9Sstevel@tonic-gate 25437c478bd9Sstevel@tonic-gate rp = VTOR4(vp); 25447c478bd9Sstevel@tonic-gate if (IS_SHADOW(vp, rp)) 25457c478bd9Sstevel@tonic-gate vp = RTOV4(rp); 25467c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 25477c478bd9Sstevel@tonic-gate while (rp->r_flags & R4TRUNCATE) 25487c478bd9Sstevel@tonic-gate cv_wait(&rp->r_cv, &rp->r_statelock); 25497c478bd9Sstevel@tonic-gate rp->r_flags |= R4TRUNCATE; 25507c478bd9Sstevel@tonic-gate if (off == (u_offset_t)0) { 25517c478bd9Sstevel@tonic-gate rp->r_flags &= ~R4DIRTY; 25527c478bd9Sstevel@tonic-gate if (!(rp->r_flags & R4STALE)) 25537c478bd9Sstevel@tonic-gate rp->r_error = 0; 25547c478bd9Sstevel@tonic-gate } 25557c478bd9Sstevel@tonic-gate rp->r_truncaddr = off; 25567c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 25577c478bd9Sstevel@tonic-gate (void) pvn_vplist_dirty(vp, off, rp->r_putapage, 25587c478bd9Sstevel@tonic-gate B_INVAL | B_TRUNC, cr); 25597c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 25607c478bd9Sstevel@tonic-gate rp->r_flags &= ~R4TRUNCATE; 25617c478bd9Sstevel@tonic-gate cv_broadcast(&rp->r_cv); 25627c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 25637c478bd9Sstevel@tonic-gate } 25647c478bd9Sstevel@tonic-gate 25657c478bd9Sstevel@tonic-gate static int 25667c478bd9Sstevel@tonic-gate nfs4_mnt_kstat_update(kstat_t *ksp, int rw) 25677c478bd9Sstevel@tonic-gate { 25687c478bd9Sstevel@tonic-gate mntinfo4_t *mi; 25697c478bd9Sstevel@tonic-gate struct mntinfo_kstat *mik; 25707c478bd9Sstevel@tonic-gate vfs_t *vfsp; 25717c478bd9Sstevel@tonic-gate 25727c478bd9Sstevel@tonic-gate /* this is a read-only kstat. Bail out on a write */ 25737c478bd9Sstevel@tonic-gate if (rw == KSTAT_WRITE) 25747c478bd9Sstevel@tonic-gate return (EACCES); 25757c478bd9Sstevel@tonic-gate 25767c478bd9Sstevel@tonic-gate 25777c478bd9Sstevel@tonic-gate /* 25787c478bd9Sstevel@tonic-gate * We don't want to wait here as kstat_chain_lock could be held by 25797c478bd9Sstevel@tonic-gate * dounmount(). dounmount() takes vfs_reflock before the chain lock 25807c478bd9Sstevel@tonic-gate * and thus could lead to a deadlock. 25817c478bd9Sstevel@tonic-gate */ 25827c478bd9Sstevel@tonic-gate vfsp = (struct vfs *)ksp->ks_private; 25837c478bd9Sstevel@tonic-gate 25847c478bd9Sstevel@tonic-gate mi = VFTOMI4(vfsp); 25857c478bd9Sstevel@tonic-gate mik = (struct mntinfo_kstat *)ksp->ks_data; 25867c478bd9Sstevel@tonic-gate 25877c478bd9Sstevel@tonic-gate (void) strcpy(mik->mik_proto, mi->mi_curr_serv->sv_knconf->knc_proto); 25887c478bd9Sstevel@tonic-gate 25897c478bd9Sstevel@tonic-gate mik->mik_vers = (uint32_t)mi->mi_vers; 25907c478bd9Sstevel@tonic-gate mik->mik_flags = mi->mi_flags; 25917c478bd9Sstevel@tonic-gate /* 25927c478bd9Sstevel@tonic-gate * The sv_secdata holds the flavor the client specifies. 25937c478bd9Sstevel@tonic-gate * If the client uses default and a security negotiation 25947c478bd9Sstevel@tonic-gate * occurs, sv_currsec will point to the current flavor 25957c478bd9Sstevel@tonic-gate * selected from the server flavor list. 25967c478bd9Sstevel@tonic-gate * sv_currsec is NULL if no security negotiation takes place. 25977c478bd9Sstevel@tonic-gate */ 25987c478bd9Sstevel@tonic-gate mik->mik_secmod = mi->mi_curr_serv->sv_currsec ? 25997c478bd9Sstevel@tonic-gate mi->mi_curr_serv->sv_currsec->secmod : 26007c478bd9Sstevel@tonic-gate mi->mi_curr_serv->sv_secdata->secmod; 26017c478bd9Sstevel@tonic-gate mik->mik_curread = (uint32_t)mi->mi_curread; 26027c478bd9Sstevel@tonic-gate mik->mik_curwrite = (uint32_t)mi->mi_curwrite; 26037c478bd9Sstevel@tonic-gate mik->mik_retrans = mi->mi_retrans; 26047c478bd9Sstevel@tonic-gate mik->mik_timeo = mi->mi_timeo; 26057c478bd9Sstevel@tonic-gate mik->mik_acregmin = HR2SEC(mi->mi_acregmin); 26067c478bd9Sstevel@tonic-gate mik->mik_acregmax = HR2SEC(mi->mi_acregmax); 26077c478bd9Sstevel@tonic-gate mik->mik_acdirmin = HR2SEC(mi->mi_acdirmin); 26087c478bd9Sstevel@tonic-gate mik->mik_acdirmax = HR2SEC(mi->mi_acdirmax); 26097c478bd9Sstevel@tonic-gate mik->mik_noresponse = (uint32_t)mi->mi_noresponse; 26107c478bd9Sstevel@tonic-gate mik->mik_failover = (uint32_t)mi->mi_failover; 26117c478bd9Sstevel@tonic-gate mik->mik_remap = (uint32_t)mi->mi_remap; 26127c478bd9Sstevel@tonic-gate 26137c478bd9Sstevel@tonic-gate (void) strcpy(mik->mik_curserver, mi->mi_curr_serv->sv_hostname); 26147c478bd9Sstevel@tonic-gate 26157c478bd9Sstevel@tonic-gate return (0); 26167c478bd9Sstevel@tonic-gate } 26177c478bd9Sstevel@tonic-gate 26187c478bd9Sstevel@tonic-gate void 26197c478bd9Sstevel@tonic-gate nfs4_mnt_kstat_init(struct vfs *vfsp) 26207c478bd9Sstevel@tonic-gate { 26217c478bd9Sstevel@tonic-gate mntinfo4_t *mi = VFTOMI4(vfsp); 26227c478bd9Sstevel@tonic-gate 26237c478bd9Sstevel@tonic-gate /* 26247c478bd9Sstevel@tonic-gate * PSARC 2001/697 Contract Private Interface 26257c478bd9Sstevel@tonic-gate * All nfs kstats are under SunMC contract 26267c478bd9Sstevel@tonic-gate * Please refer to the PSARC listed above and contact 26277c478bd9Sstevel@tonic-gate * SunMC before making any changes! 26287c478bd9Sstevel@tonic-gate * 26297c478bd9Sstevel@tonic-gate * Changes must be reviewed by Solaris File Sharing 26307c478bd9Sstevel@tonic-gate * Changes must be communicated to contract-2001-697@sun.com 26317c478bd9Sstevel@tonic-gate * 26327c478bd9Sstevel@tonic-gate */ 26337c478bd9Sstevel@tonic-gate 26347c478bd9Sstevel@tonic-gate mi->mi_io_kstats = kstat_create_zone("nfs", getminor(vfsp->vfs_dev), 26357c478bd9Sstevel@tonic-gate NULL, "nfs", KSTAT_TYPE_IO, 1, 0, mi->mi_zone->zone_id); 26367c478bd9Sstevel@tonic-gate if (mi->mi_io_kstats) { 26377c478bd9Sstevel@tonic-gate if (mi->mi_zone->zone_id != GLOBAL_ZONEID) 26387c478bd9Sstevel@tonic-gate kstat_zone_add(mi->mi_io_kstats, GLOBAL_ZONEID); 26397c478bd9Sstevel@tonic-gate mi->mi_io_kstats->ks_lock = &mi->mi_lock; 26407c478bd9Sstevel@tonic-gate kstat_install(mi->mi_io_kstats); 26417c478bd9Sstevel@tonic-gate } 26427c478bd9Sstevel@tonic-gate 26437c478bd9Sstevel@tonic-gate if ((mi->mi_ro_kstats = kstat_create_zone("nfs", 26447c478bd9Sstevel@tonic-gate getminor(vfsp->vfs_dev), "mntinfo", "misc", KSTAT_TYPE_RAW, 26457c478bd9Sstevel@tonic-gate sizeof (struct mntinfo_kstat), 0, mi->mi_zone->zone_id)) != NULL) { 26467c478bd9Sstevel@tonic-gate if (mi->mi_zone->zone_id != GLOBAL_ZONEID) 26477c478bd9Sstevel@tonic-gate kstat_zone_add(mi->mi_ro_kstats, GLOBAL_ZONEID); 26487c478bd9Sstevel@tonic-gate mi->mi_ro_kstats->ks_update = nfs4_mnt_kstat_update; 26497c478bd9Sstevel@tonic-gate mi->mi_ro_kstats->ks_private = (void *)vfsp; 26507c478bd9Sstevel@tonic-gate kstat_install(mi->mi_ro_kstats); 26517c478bd9Sstevel@tonic-gate } 26527c478bd9Sstevel@tonic-gate 26537c478bd9Sstevel@tonic-gate nfs4_mnt_recov_kstat_init(vfsp); 26547c478bd9Sstevel@tonic-gate } 26557c478bd9Sstevel@tonic-gate 26567c478bd9Sstevel@tonic-gate void 26577c478bd9Sstevel@tonic-gate nfs4_write_error(vnode_t *vp, int error, cred_t *cr) 26587c478bd9Sstevel@tonic-gate { 26597c478bd9Sstevel@tonic-gate mntinfo4_t *mi; 2660d3d50737SRafael Vanoni clock_t now = ddi_get_lbolt(); 26617c478bd9Sstevel@tonic-gate 26627c478bd9Sstevel@tonic-gate mi = VTOMI4(vp); 26637c478bd9Sstevel@tonic-gate /* 26647c478bd9Sstevel@tonic-gate * In case of forced unmount, do not print any messages 26657c478bd9Sstevel@tonic-gate * since it can flood the console with error messages. 26667c478bd9Sstevel@tonic-gate */ 26677c478bd9Sstevel@tonic-gate if (mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED) 26687c478bd9Sstevel@tonic-gate return; 26697c478bd9Sstevel@tonic-gate 26707c478bd9Sstevel@tonic-gate /* 26717c478bd9Sstevel@tonic-gate * If the mount point is dead, not recoverable, do not 26727c478bd9Sstevel@tonic-gate * print error messages that can flood the console. 26737c478bd9Sstevel@tonic-gate */ 26747c478bd9Sstevel@tonic-gate if (mi->mi_flags & MI4_RECOV_FAIL) 26757c478bd9Sstevel@tonic-gate return; 26767c478bd9Sstevel@tonic-gate 26777c478bd9Sstevel@tonic-gate /* 26787c478bd9Sstevel@tonic-gate * No use in flooding the console with ENOSPC 26797c478bd9Sstevel@tonic-gate * messages from the same file system. 26807c478bd9Sstevel@tonic-gate */ 26817c478bd9Sstevel@tonic-gate if ((error != ENOSPC && error != EDQUOT) || 2682d3d50737SRafael Vanoni now - mi->mi_printftime > 0) { 26837c478bd9Sstevel@tonic-gate zoneid_t zoneid = mi->mi_zone->zone_id; 26847c478bd9Sstevel@tonic-gate 26857c478bd9Sstevel@tonic-gate #ifdef DEBUG 26867c478bd9Sstevel@tonic-gate nfs_perror(error, "NFS%ld write error on host %s: %m.\n", 26877c478bd9Sstevel@tonic-gate mi->mi_vers, VTOR4(vp)->r_server->sv_hostname, NULL); 26887c478bd9Sstevel@tonic-gate #else 26897c478bd9Sstevel@tonic-gate nfs_perror(error, "NFS write error on host %s: %m.\n", 26907c478bd9Sstevel@tonic-gate VTOR4(vp)->r_server->sv_hostname, NULL); 26917c478bd9Sstevel@tonic-gate #endif 26927c478bd9Sstevel@tonic-gate if (error == ENOSPC || error == EDQUOT) { 26937c478bd9Sstevel@tonic-gate zcmn_err(zoneid, CE_CONT, 26947c478bd9Sstevel@tonic-gate "^File: userid=%d, groupid=%d\n", 26957c478bd9Sstevel@tonic-gate crgetuid(cr), crgetgid(cr)); 26967c478bd9Sstevel@tonic-gate if (crgetuid(curthread->t_cred) != crgetuid(cr) || 26977c478bd9Sstevel@tonic-gate crgetgid(curthread->t_cred) != crgetgid(cr)) { 26987c478bd9Sstevel@tonic-gate zcmn_err(zoneid, CE_CONT, 26997c478bd9Sstevel@tonic-gate "^User: userid=%d, groupid=%d\n", 27007c478bd9Sstevel@tonic-gate crgetuid(curthread->t_cred), 27017c478bd9Sstevel@tonic-gate crgetgid(curthread->t_cred)); 27027c478bd9Sstevel@tonic-gate } 2703d3d50737SRafael Vanoni mi->mi_printftime = now + 27047c478bd9Sstevel@tonic-gate nfs_write_error_interval * hz; 27057c478bd9Sstevel@tonic-gate } 27067c478bd9Sstevel@tonic-gate sfh4_printfhandle(VTOR4(vp)->r_fh); 27077c478bd9Sstevel@tonic-gate #ifdef DEBUG 27087c478bd9Sstevel@tonic-gate if (error == EACCES) { 27097c478bd9Sstevel@tonic-gate zcmn_err(zoneid, CE_CONT, 27107c478bd9Sstevel@tonic-gate "nfs_bio: cred is%s kcred\n", 27117c478bd9Sstevel@tonic-gate cr == kcred ? "" : " not"); 27127c478bd9Sstevel@tonic-gate } 27137c478bd9Sstevel@tonic-gate #endif 27147c478bd9Sstevel@tonic-gate } 27157c478bd9Sstevel@tonic-gate } 27167c478bd9Sstevel@tonic-gate 27177c478bd9Sstevel@tonic-gate /* 27187c478bd9Sstevel@tonic-gate * Return non-zero if the given file can be safely memory mapped. Locks 27197c478bd9Sstevel@tonic-gate * are safe if whole-file (length and offset are both zero). 27207c478bd9Sstevel@tonic-gate */ 27217c478bd9Sstevel@tonic-gate 27227c478bd9Sstevel@tonic-gate #define SAFE_LOCK(flk) ((flk).l_start == 0 && (flk).l_len == 0) 27237c478bd9Sstevel@tonic-gate 27247c478bd9Sstevel@tonic-gate static int 27257c478bd9Sstevel@tonic-gate nfs4_safemap(const vnode_t *vp) 27267c478bd9Sstevel@tonic-gate { 27277c478bd9Sstevel@tonic-gate locklist_t *llp, *next_llp; 27287c478bd9Sstevel@tonic-gate int safe = 1; 27297c478bd9Sstevel@tonic-gate rnode4_t *rp = VTOR4(vp); 27307c478bd9Sstevel@tonic-gate 27317c478bd9Sstevel@tonic-gate ASSERT(nfs_rw_lock_held(&rp->r_lkserlock, RW_WRITER)); 27327c478bd9Sstevel@tonic-gate 27337c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_map_debug, (CE_NOTE, "nfs4_safemap: " 27347c478bd9Sstevel@tonic-gate "vp = %p", (void *)vp)); 27357c478bd9Sstevel@tonic-gate 27367c478bd9Sstevel@tonic-gate /* 27377c478bd9Sstevel@tonic-gate * Review all the locks for the vnode, both ones that have been 27387c478bd9Sstevel@tonic-gate * acquired and ones that are pending. We assume that 27397c478bd9Sstevel@tonic-gate * flk_active_locks_for_vp() has merged any locks that can be 27407c478bd9Sstevel@tonic-gate * merged (so that if a process has the entire file locked, it is 27417c478bd9Sstevel@tonic-gate * represented as a single lock). 27427c478bd9Sstevel@tonic-gate * 27437c478bd9Sstevel@tonic-gate * Note that we can't bail out of the loop if we find a non-safe 27447c478bd9Sstevel@tonic-gate * lock, because we have to free all the elements in the llp list. 27457c478bd9Sstevel@tonic-gate * We might be able to speed up this code slightly by not looking 27467c478bd9Sstevel@tonic-gate * at each lock's l_start and l_len fields once we've found a 27477c478bd9Sstevel@tonic-gate * non-safe lock. 27487c478bd9Sstevel@tonic-gate */ 27497c478bd9Sstevel@tonic-gate 27507c478bd9Sstevel@tonic-gate llp = flk_active_locks_for_vp(vp); 27517c478bd9Sstevel@tonic-gate while (llp) { 27527c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_map_debug, (CE_NOTE, 27537c478bd9Sstevel@tonic-gate "nfs4_safemap: active lock (%" PRId64 ", %" PRId64 ")", 27547c478bd9Sstevel@tonic-gate llp->ll_flock.l_start, llp->ll_flock.l_len)); 27557c478bd9Sstevel@tonic-gate if (!SAFE_LOCK(llp->ll_flock)) { 27567c478bd9Sstevel@tonic-gate safe = 0; 27577c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_map_debug, (CE_NOTE, 27587c478bd9Sstevel@tonic-gate "nfs4_safemap: unsafe active lock (%" PRId64 27597c478bd9Sstevel@tonic-gate ", %" PRId64 ")", llp->ll_flock.l_start, 27607c478bd9Sstevel@tonic-gate llp->ll_flock.l_len)); 27617c478bd9Sstevel@tonic-gate } 27627c478bd9Sstevel@tonic-gate next_llp = llp->ll_next; 27637c478bd9Sstevel@tonic-gate VN_RELE(llp->ll_vp); 27647c478bd9Sstevel@tonic-gate kmem_free(llp, sizeof (*llp)); 27657c478bd9Sstevel@tonic-gate llp = next_llp; 27667c478bd9Sstevel@tonic-gate } 27677c478bd9Sstevel@tonic-gate 27687c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_map_debug, (CE_NOTE, "nfs4_safemap: %s", 27697c478bd9Sstevel@tonic-gate safe ? "safe" : "unsafe")); 27707c478bd9Sstevel@tonic-gate return (safe); 27717c478bd9Sstevel@tonic-gate } 27727c478bd9Sstevel@tonic-gate 27737c478bd9Sstevel@tonic-gate /* 27747c478bd9Sstevel@tonic-gate * Return whether there is a lost LOCK or LOCKU queued up for the given 27757c478bd9Sstevel@tonic-gate * file that would make an mmap request unsafe. cf. nfs4_safemap(). 27767c478bd9Sstevel@tonic-gate */ 27777c478bd9Sstevel@tonic-gate 27787c478bd9Sstevel@tonic-gate bool_t 27797c478bd9Sstevel@tonic-gate nfs4_map_lost_lock_conflict(vnode_t *vp) 27807c478bd9Sstevel@tonic-gate { 27817c478bd9Sstevel@tonic-gate bool_t conflict = FALSE; 27827c478bd9Sstevel@tonic-gate nfs4_lost_rqst_t *lrp; 27837c478bd9Sstevel@tonic-gate mntinfo4_t *mi = VTOMI4(vp); 27847c478bd9Sstevel@tonic-gate 27857c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 27867c478bd9Sstevel@tonic-gate for (lrp = list_head(&mi->mi_lost_state); lrp != NULL; 27877c478bd9Sstevel@tonic-gate lrp = list_next(&mi->mi_lost_state, lrp)) { 27887c478bd9Sstevel@tonic-gate if (lrp->lr_op != OP_LOCK && lrp->lr_op != OP_LOCKU) 27897c478bd9Sstevel@tonic-gate continue; 27907c478bd9Sstevel@tonic-gate ASSERT(lrp->lr_vp != NULL); 2791da6c28aaSamw if (!VOP_CMP(lrp->lr_vp, vp, NULL)) 27927c478bd9Sstevel@tonic-gate continue; /* different file */ 27937c478bd9Sstevel@tonic-gate if (!SAFE_LOCK(*lrp->lr_flk)) { 27947c478bd9Sstevel@tonic-gate conflict = TRUE; 27957c478bd9Sstevel@tonic-gate break; 27967c478bd9Sstevel@tonic-gate } 27977c478bd9Sstevel@tonic-gate } 27987c478bd9Sstevel@tonic-gate 27997c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 28007c478bd9Sstevel@tonic-gate return (conflict); 28017c478bd9Sstevel@tonic-gate } 28027c478bd9Sstevel@tonic-gate 28037c478bd9Sstevel@tonic-gate /* 28047c478bd9Sstevel@tonic-gate * nfs_lockcompletion: 28057c478bd9Sstevel@tonic-gate * 28067c478bd9Sstevel@tonic-gate * If the vnode has a lock that makes it unsafe to cache the file, mark it 28077c478bd9Sstevel@tonic-gate * as non cachable (set VNOCACHE bit). 28087c478bd9Sstevel@tonic-gate */ 28097c478bd9Sstevel@tonic-gate 28107c478bd9Sstevel@tonic-gate void 28117c478bd9Sstevel@tonic-gate nfs4_lockcompletion(vnode_t *vp, int cmd) 28127c478bd9Sstevel@tonic-gate { 28137c478bd9Sstevel@tonic-gate rnode4_t *rp = VTOR4(vp); 28147c478bd9Sstevel@tonic-gate 28157c478bd9Sstevel@tonic-gate ASSERT(nfs_rw_lock_held(&rp->r_lkserlock, RW_WRITER)); 28167c478bd9Sstevel@tonic-gate ASSERT(!IS_SHADOW(vp, rp)); 28177c478bd9Sstevel@tonic-gate 28187c478bd9Sstevel@tonic-gate if (cmd == F_SETLK || cmd == F_SETLKW) { 28197c478bd9Sstevel@tonic-gate 28207c478bd9Sstevel@tonic-gate if (!nfs4_safemap(vp)) { 28217c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock); 28227c478bd9Sstevel@tonic-gate vp->v_flag |= VNOCACHE; 28237c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 28247c478bd9Sstevel@tonic-gate } else { 28257c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock); 28267c478bd9Sstevel@tonic-gate vp->v_flag &= ~VNOCACHE; 28277c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 28287c478bd9Sstevel@tonic-gate } 28297c478bd9Sstevel@tonic-gate } 28307c478bd9Sstevel@tonic-gate /* 28317c478bd9Sstevel@tonic-gate * The cached attributes of the file are stale after acquiring 28327c478bd9Sstevel@tonic-gate * the lock on the file. They were updated when the file was 28337c478bd9Sstevel@tonic-gate * opened, but not updated when the lock was acquired. Therefore the 28347c478bd9Sstevel@tonic-gate * cached attributes are invalidated after the lock is obtained. 28357c478bd9Sstevel@tonic-gate */ 28367c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE4(vp); 28377c478bd9Sstevel@tonic-gate } 28387c478bd9Sstevel@tonic-gate 28397c478bd9Sstevel@tonic-gate /* ARGSUSED */ 28407c478bd9Sstevel@tonic-gate static void * 28417c478bd9Sstevel@tonic-gate nfs4_mi_init(zoneid_t zoneid) 28427c478bd9Sstevel@tonic-gate { 28437c478bd9Sstevel@tonic-gate struct mi4_globals *mig; 28447c478bd9Sstevel@tonic-gate 28457c478bd9Sstevel@tonic-gate mig = kmem_alloc(sizeof (*mig), KM_SLEEP); 28467c478bd9Sstevel@tonic-gate mutex_init(&mig->mig_lock, NULL, MUTEX_DEFAULT, NULL); 28477c478bd9Sstevel@tonic-gate list_create(&mig->mig_list, sizeof (mntinfo4_t), 28487c478bd9Sstevel@tonic-gate offsetof(mntinfo4_t, mi_zone_node)); 28497c478bd9Sstevel@tonic-gate mig->mig_destructor_called = B_FALSE; 28507c478bd9Sstevel@tonic-gate return (mig); 28517c478bd9Sstevel@tonic-gate } 28527c478bd9Sstevel@tonic-gate 28537c478bd9Sstevel@tonic-gate /* 28547c478bd9Sstevel@tonic-gate * Callback routine to tell all NFSv4 mounts in the zone to start tearing down 28557c478bd9Sstevel@tonic-gate * state and killing off threads. 28567c478bd9Sstevel@tonic-gate */ 28577c478bd9Sstevel@tonic-gate /* ARGSUSED */ 28587c478bd9Sstevel@tonic-gate static void 28597c478bd9Sstevel@tonic-gate nfs4_mi_shutdown(zoneid_t zoneid, void *data) 28607c478bd9Sstevel@tonic-gate { 28617c478bd9Sstevel@tonic-gate struct mi4_globals *mig = data; 28627c478bd9Sstevel@tonic-gate mntinfo4_t *mi; 28637c478bd9Sstevel@tonic-gate nfs4_server_t *np; 28647c478bd9Sstevel@tonic-gate 28657c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_zone_debug, (CE_NOTE, 28667c478bd9Sstevel@tonic-gate "nfs4_mi_shutdown zone %d\n", zoneid)); 28677c478bd9Sstevel@tonic-gate ASSERT(mig != NULL); 286850a83466Sjwahlig for (;;) { 28697c478bd9Sstevel@tonic-gate mutex_enter(&mig->mig_lock); 287050a83466Sjwahlig mi = list_head(&mig->mig_list); 287150a83466Sjwahlig if (mi == NULL) { 287250a83466Sjwahlig mutex_exit(&mig->mig_lock); 287350a83466Sjwahlig break; 287450a83466Sjwahlig } 28753fd6cc29Sthurlow 28767c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_zone_debug, (CE_NOTE, 28777c478bd9Sstevel@tonic-gate "nfs4_mi_shutdown stopping vfs %p\n", (void *)mi->mi_vfsp)); 28787c478bd9Sstevel@tonic-gate /* 28797c478bd9Sstevel@tonic-gate * purge the DNLC for this filesystem 28807c478bd9Sstevel@tonic-gate */ 28817c478bd9Sstevel@tonic-gate (void) dnlc_purge_vfsp(mi->mi_vfsp, 0); 28827c478bd9Sstevel@tonic-gate /* 28837c478bd9Sstevel@tonic-gate * Tell existing async worker threads to exit. 28847c478bd9Sstevel@tonic-gate */ 288550a83466Sjwahlig mutex_enter(&mi->mi_async_lock); 28867c478bd9Sstevel@tonic-gate mi->mi_max_threads = 0; 28870776f5e6SVallish Vaidyeshwara NFS4_WAKEALL_ASYNC_WORKERS(mi->mi_async_work_cv); 28887c478bd9Sstevel@tonic-gate /* 288950a83466Sjwahlig * Set the appropriate flags, signal and wait for both the 289050a83466Sjwahlig * async manager and the inactive thread to exit when they're 289150a83466Sjwahlig * done with their current work. 28927c478bd9Sstevel@tonic-gate */ 28937c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 28947c478bd9Sstevel@tonic-gate mi->mi_flags |= (MI4_ASYNC_MGR_STOP|MI4_DEAD); 28957c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 28967c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock); 289750a83466Sjwahlig if (mi->mi_manager_thread) { 289850a83466Sjwahlig nfs4_async_manager_stop(mi->mi_vfsp); 28997c478bd9Sstevel@tonic-gate } 290050a83466Sjwahlig if (mi->mi_inactive_thread) { 290150a83466Sjwahlig mutex_enter(&mi->mi_async_lock); 290250a83466Sjwahlig cv_signal(&mi->mi_inact_req_cv); 290350a83466Sjwahlig /* 290450a83466Sjwahlig * Wait for the inactive thread to exit. 290550a83466Sjwahlig */ 290650a83466Sjwahlig while (mi->mi_inactive_thread != NULL) { 290750a83466Sjwahlig cv_wait(&mi->mi_async_cv, &mi->mi_async_lock); 290850a83466Sjwahlig } 290950a83466Sjwahlig mutex_exit(&mi->mi_async_lock); 291050a83466Sjwahlig } 291150a83466Sjwahlig /* 291250a83466Sjwahlig * Wait for the recovery thread to complete, that is, it will 291350a83466Sjwahlig * signal when it is done using the "mi" structure and about 291450a83466Sjwahlig * to exit 291550a83466Sjwahlig */ 291650a83466Sjwahlig mutex_enter(&mi->mi_lock); 291750a83466Sjwahlig while (mi->mi_in_recovery > 0) 291850a83466Sjwahlig cv_wait(&mi->mi_cv_in_recov, &mi->mi_lock); 291950a83466Sjwahlig mutex_exit(&mi->mi_lock); 292050a83466Sjwahlig /* 292150a83466Sjwahlig * We're done when every mi has been done or the list is empty. 292250a83466Sjwahlig * This one is done, remove it from the list. 292350a83466Sjwahlig */ 292450a83466Sjwahlig list_remove(&mig->mig_list, mi); 29257c478bd9Sstevel@tonic-gate mutex_exit(&mig->mig_lock); 2926a19609f8Sjv227347 zone_rele_ref(&mi->mi_zone_ref, ZONE_REF_NFSV4); 2927a19609f8Sjv227347 292850a83466Sjwahlig /* 292950a83466Sjwahlig * Release hold on vfs and mi done to prevent race with zone 293050a83466Sjwahlig * shutdown. This releases the hold in nfs4_mi_zonelist_add. 293150a83466Sjwahlig */ 293250a83466Sjwahlig VFS_RELE(mi->mi_vfsp); 293350a83466Sjwahlig MI4_RELE(mi); 293450a83466Sjwahlig } 29357c478bd9Sstevel@tonic-gate /* 29367c478bd9Sstevel@tonic-gate * Tell each renew thread in the zone to exit 29377c478bd9Sstevel@tonic-gate */ 29387c478bd9Sstevel@tonic-gate mutex_enter(&nfs4_server_lst_lock); 29397c478bd9Sstevel@tonic-gate for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) { 29407c478bd9Sstevel@tonic-gate mutex_enter(&np->s_lock); 29417c478bd9Sstevel@tonic-gate if (np->zoneid == zoneid) { 29427c478bd9Sstevel@tonic-gate /* 29437c478bd9Sstevel@tonic-gate * We add another hold onto the nfs4_server_t 29447c478bd9Sstevel@tonic-gate * because this will make sure tha the nfs4_server_t 29457c478bd9Sstevel@tonic-gate * stays around until nfs4_callback_fini_zone destroys 29467c478bd9Sstevel@tonic-gate * the zone. This way, the renew thread can 29477c478bd9Sstevel@tonic-gate * unconditionally release its holds on the 29487c478bd9Sstevel@tonic-gate * nfs4_server_t. 29497c478bd9Sstevel@tonic-gate */ 29507c478bd9Sstevel@tonic-gate np->s_refcnt++; 29517c478bd9Sstevel@tonic-gate nfs4_mark_srv_dead(np); 29527c478bd9Sstevel@tonic-gate } 29537c478bd9Sstevel@tonic-gate mutex_exit(&np->s_lock); 29547c478bd9Sstevel@tonic-gate } 29557c478bd9Sstevel@tonic-gate mutex_exit(&nfs4_server_lst_lock); 29567c478bd9Sstevel@tonic-gate } 29577c478bd9Sstevel@tonic-gate 29587c478bd9Sstevel@tonic-gate static void 29597c478bd9Sstevel@tonic-gate nfs4_mi_free_globals(struct mi4_globals *mig) 29607c478bd9Sstevel@tonic-gate { 29617c478bd9Sstevel@tonic-gate list_destroy(&mig->mig_list); /* makes sure the list is empty */ 29627c478bd9Sstevel@tonic-gate mutex_destroy(&mig->mig_lock); 29637c478bd9Sstevel@tonic-gate kmem_free(mig, sizeof (*mig)); 29647c478bd9Sstevel@tonic-gate } 29657c478bd9Sstevel@tonic-gate 29667c478bd9Sstevel@tonic-gate /* ARGSUSED */ 29677c478bd9Sstevel@tonic-gate static void 29687c478bd9Sstevel@tonic-gate nfs4_mi_destroy(zoneid_t zoneid, void *data) 29697c478bd9Sstevel@tonic-gate { 29707c478bd9Sstevel@tonic-gate struct mi4_globals *mig = data; 29717c478bd9Sstevel@tonic-gate 29727c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_zone_debug, (CE_NOTE, 29737c478bd9Sstevel@tonic-gate "nfs4_mi_destroy zone %d\n", zoneid)); 29747c478bd9Sstevel@tonic-gate ASSERT(mig != NULL); 29757c478bd9Sstevel@tonic-gate mutex_enter(&mig->mig_lock); 29767c478bd9Sstevel@tonic-gate if (list_head(&mig->mig_list) != NULL) { 29777c478bd9Sstevel@tonic-gate /* Still waiting for VFS_FREEVFS() */ 29787c478bd9Sstevel@tonic-gate mig->mig_destructor_called = B_TRUE; 29797c478bd9Sstevel@tonic-gate mutex_exit(&mig->mig_lock); 29807c478bd9Sstevel@tonic-gate return; 29817c478bd9Sstevel@tonic-gate } 29827c478bd9Sstevel@tonic-gate nfs4_mi_free_globals(mig); 29837c478bd9Sstevel@tonic-gate } 29847c478bd9Sstevel@tonic-gate 29857c478bd9Sstevel@tonic-gate /* 29867c478bd9Sstevel@tonic-gate * Add an NFS mount to the per-zone list of NFS mounts. 29877c478bd9Sstevel@tonic-gate */ 29887c478bd9Sstevel@tonic-gate void 29897c478bd9Sstevel@tonic-gate nfs4_mi_zonelist_add(mntinfo4_t *mi) 29907c478bd9Sstevel@tonic-gate { 29917c478bd9Sstevel@tonic-gate struct mi4_globals *mig; 29927c478bd9Sstevel@tonic-gate 29937c478bd9Sstevel@tonic-gate mig = zone_getspecific(mi4_list_key, mi->mi_zone); 29947c478bd9Sstevel@tonic-gate mutex_enter(&mig->mig_lock); 29957c478bd9Sstevel@tonic-gate list_insert_head(&mig->mig_list, mi); 299650a83466Sjwahlig /* 299750a83466Sjwahlig * hold added to eliminate race with zone shutdown -this will be 299850a83466Sjwahlig * released in mi_shutdown 299950a83466Sjwahlig */ 300050a83466Sjwahlig MI4_HOLD(mi); 300150a83466Sjwahlig VFS_HOLD(mi->mi_vfsp); 30027c478bd9Sstevel@tonic-gate mutex_exit(&mig->mig_lock); 30037c478bd9Sstevel@tonic-gate } 30047c478bd9Sstevel@tonic-gate 30057c478bd9Sstevel@tonic-gate /* 30067c478bd9Sstevel@tonic-gate * Remove an NFS mount from the per-zone list of NFS mounts. 30077c478bd9Sstevel@tonic-gate */ 300850a83466Sjwahlig int 30097c478bd9Sstevel@tonic-gate nfs4_mi_zonelist_remove(mntinfo4_t *mi) 30107c478bd9Sstevel@tonic-gate { 30117c478bd9Sstevel@tonic-gate struct mi4_globals *mig; 301250a83466Sjwahlig int ret = 0; 30137c478bd9Sstevel@tonic-gate 30147c478bd9Sstevel@tonic-gate mig = zone_getspecific(mi4_list_key, mi->mi_zone); 30157c478bd9Sstevel@tonic-gate mutex_enter(&mig->mig_lock); 301650a83466Sjwahlig mutex_enter(&mi->mi_lock); 301750a83466Sjwahlig /* if this mi is marked dead, then the zone already released it */ 301850a83466Sjwahlig if (!(mi->mi_flags & MI4_DEAD)) { 30197c478bd9Sstevel@tonic-gate list_remove(&mig->mig_list, mi); 30201dc00f28SJames Wahlig mutex_exit(&mi->mi_lock); 302150a83466Sjwahlig 302250a83466Sjwahlig /* release the holds put on in zonelist_add(). */ 302350a83466Sjwahlig VFS_RELE(mi->mi_vfsp); 302450a83466Sjwahlig MI4_RELE(mi); 302550a83466Sjwahlig ret = 1; 30261dc00f28SJames Wahlig } else { 302750a83466Sjwahlig mutex_exit(&mi->mi_lock); 30281dc00f28SJames Wahlig } 302950a83466Sjwahlig 30307c478bd9Sstevel@tonic-gate /* 30317c478bd9Sstevel@tonic-gate * We can be called asynchronously by VFS_FREEVFS() after the zone 30327c478bd9Sstevel@tonic-gate * shutdown/destroy callbacks have executed; if so, clean up the zone's 30337c478bd9Sstevel@tonic-gate * mi globals. 30347c478bd9Sstevel@tonic-gate */ 30357c478bd9Sstevel@tonic-gate if (list_head(&mig->mig_list) == NULL && 30367c478bd9Sstevel@tonic-gate mig->mig_destructor_called == B_TRUE) { 30377c478bd9Sstevel@tonic-gate nfs4_mi_free_globals(mig); 303850a83466Sjwahlig return (ret); 30397c478bd9Sstevel@tonic-gate } 30407c478bd9Sstevel@tonic-gate mutex_exit(&mig->mig_lock); 304150a83466Sjwahlig return (ret); 30427c478bd9Sstevel@tonic-gate } 30437c478bd9Sstevel@tonic-gate 30447c478bd9Sstevel@tonic-gate void 30457c478bd9Sstevel@tonic-gate nfs_free_mi4(mntinfo4_t *mi) 30467c478bd9Sstevel@tonic-gate { 30477c478bd9Sstevel@tonic-gate nfs4_open_owner_t *foop; 30487c478bd9Sstevel@tonic-gate nfs4_oo_hash_bucket_t *bucketp; 30497c478bd9Sstevel@tonic-gate nfs4_debug_msg_t *msgp; 30507c478bd9Sstevel@tonic-gate int i; 305150a83466Sjwahlig servinfo4_t *svp; 30527c478bd9Sstevel@tonic-gate 3053f0558703SVallish Vaidyeshwara /* 3054f0558703SVallish Vaidyeshwara * Code introduced here should be carefully evaluated to make 3055f0558703SVallish Vaidyeshwara * sure none of the freed resources are accessed either directly 3056f0558703SVallish Vaidyeshwara * or indirectly after freeing them. For eg: Introducing calls to 3057f0558703SVallish Vaidyeshwara * NFS4_DEBUG that use mntinfo4_t structure member after freeing 3058f0558703SVallish Vaidyeshwara * the structure members or other routines calling back into NFS 3059f0558703SVallish Vaidyeshwara * accessing freed mntinfo4_t structure member. 3060f0558703SVallish Vaidyeshwara */ 30617c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 30627c478bd9Sstevel@tonic-gate ASSERT(mi->mi_recovthread == NULL); 30637c478bd9Sstevel@tonic-gate ASSERT(mi->mi_flags & MI4_ASYNC_MGR_STOP); 30647c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 30657c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_async_lock); 30660776f5e6SVallish Vaidyeshwara ASSERT(mi->mi_threads[NFS4_ASYNC_QUEUE] == 0 && 30670776f5e6SVallish Vaidyeshwara mi->mi_threads[NFS4_ASYNC_PGOPS_QUEUE] == 0); 30687c478bd9Sstevel@tonic-gate ASSERT(mi->mi_manager_thread == NULL); 30697c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock); 307050a83466Sjwahlig if (mi->mi_io_kstats) { 307150a83466Sjwahlig kstat_delete(mi->mi_io_kstats); 307250a83466Sjwahlig mi->mi_io_kstats = NULL; 30737c478bd9Sstevel@tonic-gate } 307450a83466Sjwahlig if (mi->mi_ro_kstats) { 307550a83466Sjwahlig kstat_delete(mi->mi_ro_kstats); 307650a83466Sjwahlig mi->mi_ro_kstats = NULL; 30777c478bd9Sstevel@tonic-gate } 307850a83466Sjwahlig if (mi->mi_recov_ksp) { 307950a83466Sjwahlig kstat_delete(mi->mi_recov_ksp); 308050a83466Sjwahlig mi->mi_recov_ksp = NULL; 308150a83466Sjwahlig } 30827c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_msg_list_lock); 30837c478bd9Sstevel@tonic-gate while (msgp = list_head(&mi->mi_msg_list)) { 30847c478bd9Sstevel@tonic-gate list_remove(&mi->mi_msg_list, msgp); 30857c478bd9Sstevel@tonic-gate nfs4_free_msg(msgp); 30867c478bd9Sstevel@tonic-gate } 30877c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_msg_list_lock); 30887c478bd9Sstevel@tonic-gate list_destroy(&mi->mi_msg_list); 3089bbf2a467SNagakiran Rajashekar if (mi->mi_fname != NULL) 3090bbf2a467SNagakiran Rajashekar fn_rele(&mi->mi_fname); 30917c478bd9Sstevel@tonic-gate if (mi->mi_rootfh != NULL) 30927c478bd9Sstevel@tonic-gate sfh4_rele(&mi->mi_rootfh); 30937c478bd9Sstevel@tonic-gate if (mi->mi_srvparentfh != NULL) 30947c478bd9Sstevel@tonic-gate sfh4_rele(&mi->mi_srvparentfh); 3095f0558703SVallish Vaidyeshwara svp = mi->mi_servers; 3096f0558703SVallish Vaidyeshwara sv4_free(svp); 30977c478bd9Sstevel@tonic-gate mutex_destroy(&mi->mi_lock); 30987c478bd9Sstevel@tonic-gate mutex_destroy(&mi->mi_async_lock); 30997c478bd9Sstevel@tonic-gate mutex_destroy(&mi->mi_msg_list_lock); 3100*8a790dc6SMarcel Telka mutex_destroy(&mi->mi_rnodes_lock); 31017c478bd9Sstevel@tonic-gate nfs_rw_destroy(&mi->mi_recovlock); 31027c478bd9Sstevel@tonic-gate nfs_rw_destroy(&mi->mi_rename_lock); 31037c478bd9Sstevel@tonic-gate nfs_rw_destroy(&mi->mi_fh_lock); 31047c478bd9Sstevel@tonic-gate cv_destroy(&mi->mi_failover_cv); 31057c478bd9Sstevel@tonic-gate cv_destroy(&mi->mi_async_reqs_cv); 31060776f5e6SVallish Vaidyeshwara cv_destroy(&mi->mi_async_work_cv[NFS4_ASYNC_QUEUE]); 31070776f5e6SVallish Vaidyeshwara cv_destroy(&mi->mi_async_work_cv[NFS4_ASYNC_PGOPS_QUEUE]); 31087c478bd9Sstevel@tonic-gate cv_destroy(&mi->mi_async_cv); 31097c478bd9Sstevel@tonic-gate cv_destroy(&mi->mi_inact_req_cv); 31107c478bd9Sstevel@tonic-gate /* 31117c478bd9Sstevel@tonic-gate * Destroy the oo hash lists and mutexes for the cred hash table. 31127c478bd9Sstevel@tonic-gate */ 31137c478bd9Sstevel@tonic-gate for (i = 0; i < NFS4_NUM_OO_BUCKETS; i++) { 31147c478bd9Sstevel@tonic-gate bucketp = &(mi->mi_oo_list[i]); 31157c478bd9Sstevel@tonic-gate /* Destroy any remaining open owners on the list */ 31167c478bd9Sstevel@tonic-gate foop = list_head(&bucketp->b_oo_hash_list); 31177c478bd9Sstevel@tonic-gate while (foop != NULL) { 31187c478bd9Sstevel@tonic-gate list_remove(&bucketp->b_oo_hash_list, foop); 31197c478bd9Sstevel@tonic-gate nfs4_destroy_open_owner(foop); 31207c478bd9Sstevel@tonic-gate foop = list_head(&bucketp->b_oo_hash_list); 31217c478bd9Sstevel@tonic-gate } 31227c478bd9Sstevel@tonic-gate list_destroy(&bucketp->b_oo_hash_list); 31237c478bd9Sstevel@tonic-gate mutex_destroy(&bucketp->b_lock); 31247c478bd9Sstevel@tonic-gate } 31257c478bd9Sstevel@tonic-gate /* 31267c478bd9Sstevel@tonic-gate * Empty and destroy the freed open owner list. 31277c478bd9Sstevel@tonic-gate */ 31287c478bd9Sstevel@tonic-gate foop = list_head(&mi->mi_foo_list); 31297c478bd9Sstevel@tonic-gate while (foop != NULL) { 31307c478bd9Sstevel@tonic-gate list_remove(&mi->mi_foo_list, foop); 31317c478bd9Sstevel@tonic-gate nfs4_destroy_open_owner(foop); 31327c478bd9Sstevel@tonic-gate foop = list_head(&mi->mi_foo_list); 31337c478bd9Sstevel@tonic-gate } 31347c478bd9Sstevel@tonic-gate list_destroy(&mi->mi_foo_list); 31357c478bd9Sstevel@tonic-gate list_destroy(&mi->mi_bseqid_list); 31367c478bd9Sstevel@tonic-gate list_destroy(&mi->mi_lost_state); 3137*8a790dc6SMarcel Telka list_destroy(&mi->mi_rnodes); 31387c478bd9Sstevel@tonic-gate avl_destroy(&mi->mi_filehandles); 31397c478bd9Sstevel@tonic-gate kmem_free(mi, sizeof (*mi)); 31407c478bd9Sstevel@tonic-gate } 314150a83466Sjwahlig void 314250a83466Sjwahlig mi_hold(mntinfo4_t *mi) 314350a83466Sjwahlig { 31441a5e258fSJosef 'Jeff' Sipek atomic_inc_32(&mi->mi_count); 314550a83466Sjwahlig ASSERT(mi->mi_count != 0); 314650a83466Sjwahlig } 314750a83466Sjwahlig 314850a83466Sjwahlig void 314950a83466Sjwahlig mi_rele(mntinfo4_t *mi) 315050a83466Sjwahlig { 315150a83466Sjwahlig ASSERT(mi->mi_count != 0); 31521a5e258fSJosef 'Jeff' Sipek if (atomic_dec_32_nv(&mi->mi_count) == 0) { 315350a83466Sjwahlig nfs_free_mi4(mi); 315450a83466Sjwahlig } 315550a83466Sjwahlig } 31567c478bd9Sstevel@tonic-gate 31577c478bd9Sstevel@tonic-gate vnode_t nfs4_xattr_notsupp_vnode; 31587c478bd9Sstevel@tonic-gate 31597c478bd9Sstevel@tonic-gate void 31607c478bd9Sstevel@tonic-gate nfs4_clnt_init(void) 31617c478bd9Sstevel@tonic-gate { 31627c478bd9Sstevel@tonic-gate nfs4_vnops_init(); 31637c478bd9Sstevel@tonic-gate (void) nfs4_rnode_init(); 31647c478bd9Sstevel@tonic-gate (void) nfs4_shadow_init(); 31657c478bd9Sstevel@tonic-gate (void) nfs4_acache_init(); 31667c478bd9Sstevel@tonic-gate (void) nfs4_subr_init(); 31677c478bd9Sstevel@tonic-gate nfs4_acl_init(); 31687c478bd9Sstevel@tonic-gate nfs_idmap_init(); 31697c478bd9Sstevel@tonic-gate nfs4_callback_init(); 31707c478bd9Sstevel@tonic-gate nfs4_secinfo_init(); 31717c478bd9Sstevel@tonic-gate #ifdef DEBUG 31727c478bd9Sstevel@tonic-gate tsd_create(&nfs4_tsd_key, NULL); 31737c478bd9Sstevel@tonic-gate #endif 31747c478bd9Sstevel@tonic-gate 31757c478bd9Sstevel@tonic-gate /* 31767c478bd9Sstevel@tonic-gate * Add a CPR callback so that we can update client 31777c478bd9Sstevel@tonic-gate * lease after a suspend and resume. 31787c478bd9Sstevel@tonic-gate */ 31797c478bd9Sstevel@tonic-gate cid = callb_add(nfs4_client_cpr_callb, 0, CB_CL_CPR_RPC, "nfs4"); 31807c478bd9Sstevel@tonic-gate 31817c478bd9Sstevel@tonic-gate zone_key_create(&mi4_list_key, nfs4_mi_init, nfs4_mi_shutdown, 31827c478bd9Sstevel@tonic-gate nfs4_mi_destroy); 31837c478bd9Sstevel@tonic-gate 31847c478bd9Sstevel@tonic-gate /* 31857c478bd9Sstevel@tonic-gate * Initialise the reference count of the notsupp xattr cache vnode to 1 31867c478bd9Sstevel@tonic-gate * so that it never goes away (VOP_INACTIVE isn't called on it). 31877c478bd9Sstevel@tonic-gate */ 31887c478bd9Sstevel@tonic-gate nfs4_xattr_notsupp_vnode.v_count = 1; 31897c478bd9Sstevel@tonic-gate } 31907c478bd9Sstevel@tonic-gate 31917c478bd9Sstevel@tonic-gate void 31927c478bd9Sstevel@tonic-gate nfs4_clnt_fini(void) 31937c478bd9Sstevel@tonic-gate { 31947c478bd9Sstevel@tonic-gate (void) zone_key_delete(mi4_list_key); 31957c478bd9Sstevel@tonic-gate nfs4_vnops_fini(); 31967c478bd9Sstevel@tonic-gate (void) nfs4_rnode_fini(); 31977c478bd9Sstevel@tonic-gate (void) nfs4_shadow_fini(); 31987c478bd9Sstevel@tonic-gate (void) nfs4_acache_fini(); 31997c478bd9Sstevel@tonic-gate (void) nfs4_subr_fini(); 32007c478bd9Sstevel@tonic-gate nfs_idmap_fini(); 32017c478bd9Sstevel@tonic-gate nfs4_callback_fini(); 32027c478bd9Sstevel@tonic-gate nfs4_secinfo_fini(); 32037c478bd9Sstevel@tonic-gate #ifdef DEBUG 32047c478bd9Sstevel@tonic-gate tsd_destroy(&nfs4_tsd_key); 32057c478bd9Sstevel@tonic-gate #endif 32067c478bd9Sstevel@tonic-gate if (cid) 32077c478bd9Sstevel@tonic-gate (void) callb_delete(cid); 32087c478bd9Sstevel@tonic-gate } 32097c478bd9Sstevel@tonic-gate 32107c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 32117c478bd9Sstevel@tonic-gate static boolean_t 32127c478bd9Sstevel@tonic-gate nfs4_client_cpr_callb(void *arg, int code) 32137c478bd9Sstevel@tonic-gate { 32147c478bd9Sstevel@tonic-gate /* 32157c478bd9Sstevel@tonic-gate * We get called for Suspend and Resume events. 32167c478bd9Sstevel@tonic-gate * For the suspend case we simply don't care! 32177c478bd9Sstevel@tonic-gate */ 32187c478bd9Sstevel@tonic-gate if (code == CB_CODE_CPR_CHKPT) { 32197c478bd9Sstevel@tonic-gate return (B_TRUE); 32207c478bd9Sstevel@tonic-gate } 32217c478bd9Sstevel@tonic-gate 32227c478bd9Sstevel@tonic-gate /* 32237c478bd9Sstevel@tonic-gate * When we get to here we are in the process of 32247c478bd9Sstevel@tonic-gate * resuming the system from a previous suspend. 32257c478bd9Sstevel@tonic-gate */ 32267c478bd9Sstevel@tonic-gate nfs4_client_resumed = gethrestime_sec(); 32277c478bd9Sstevel@tonic-gate return (B_TRUE); 32287c478bd9Sstevel@tonic-gate } 32297c478bd9Sstevel@tonic-gate 32307c478bd9Sstevel@tonic-gate void 32317c478bd9Sstevel@tonic-gate nfs4_renew_lease_thread(nfs4_server_t *sp) 32327c478bd9Sstevel@tonic-gate { 32337c478bd9Sstevel@tonic-gate int error = 0; 32347c478bd9Sstevel@tonic-gate time_t tmp_last_renewal_time, tmp_time, tmp_now_time, kip_secs; 32357c478bd9Sstevel@tonic-gate clock_t tick_delay = 0; 32367c478bd9Sstevel@tonic-gate clock_t time_left = 0; 32377c478bd9Sstevel@tonic-gate callb_cpr_t cpr_info; 32387c478bd9Sstevel@tonic-gate kmutex_t cpr_lock; 32397c478bd9Sstevel@tonic-gate 32407c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 32417c478bd9Sstevel@tonic-gate "nfs4_renew_lease_thread: acting on sp 0x%p", (void*)sp)); 32427c478bd9Sstevel@tonic-gate mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 32437c478bd9Sstevel@tonic-gate CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "nfsv4Lease"); 32447c478bd9Sstevel@tonic-gate 32457c478bd9Sstevel@tonic-gate mutex_enter(&sp->s_lock); 32467c478bd9Sstevel@tonic-gate /* sp->s_lease_time is set via a GETATTR */ 32477c478bd9Sstevel@tonic-gate sp->last_renewal_time = gethrestime_sec(); 32487c478bd9Sstevel@tonic-gate sp->lease_valid = NFS4_LEASE_UNINITIALIZED; 32497c478bd9Sstevel@tonic-gate ASSERT(sp->s_refcnt >= 1); 32507c478bd9Sstevel@tonic-gate 32517c478bd9Sstevel@tonic-gate for (;;) { 32527c478bd9Sstevel@tonic-gate if (!sp->state_ref_count || 32537c478bd9Sstevel@tonic-gate sp->lease_valid != NFS4_LEASE_VALID) { 32547c478bd9Sstevel@tonic-gate 32557c478bd9Sstevel@tonic-gate kip_secs = MAX((sp->s_lease_time >> 1) - 32567c478bd9Sstevel@tonic-gate (3 * sp->propagation_delay.tv_sec), 1); 32577c478bd9Sstevel@tonic-gate 32587c478bd9Sstevel@tonic-gate tick_delay = SEC_TO_TICK(kip_secs); 32597c478bd9Sstevel@tonic-gate 32607c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 32617c478bd9Sstevel@tonic-gate "nfs4_renew_lease_thread: no renew : thread " 32627c478bd9Sstevel@tonic-gate "wait %ld secs", kip_secs)); 32637c478bd9Sstevel@tonic-gate 32647c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 32657c478bd9Sstevel@tonic-gate "nfs4_renew_lease_thread: no renew : " 32667c478bd9Sstevel@tonic-gate "state_ref_count %d, lease_valid %d", 32677c478bd9Sstevel@tonic-gate sp->state_ref_count, sp->lease_valid)); 32687c478bd9Sstevel@tonic-gate 32697c478bd9Sstevel@tonic-gate mutex_enter(&cpr_lock); 32707c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cpr_info); 32717c478bd9Sstevel@tonic-gate mutex_exit(&cpr_lock); 3272d3d50737SRafael Vanoni time_left = cv_reltimedwait(&sp->cv_thread_exit, 3273d3d50737SRafael Vanoni &sp->s_lock, tick_delay, TR_CLOCK_TICK); 32747c478bd9Sstevel@tonic-gate mutex_enter(&cpr_lock); 32757c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock); 32767c478bd9Sstevel@tonic-gate mutex_exit(&cpr_lock); 32777c478bd9Sstevel@tonic-gate 32787c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 32797c478bd9Sstevel@tonic-gate "nfs4_renew_lease_thread: no renew: " 32807c478bd9Sstevel@tonic-gate "time left %ld", time_left)); 32817c478bd9Sstevel@tonic-gate 32827c478bd9Sstevel@tonic-gate if (sp->s_thread_exit == NFS4_THREAD_EXIT) 32837c478bd9Sstevel@tonic-gate goto die; 32847c478bd9Sstevel@tonic-gate continue; 32857c478bd9Sstevel@tonic-gate } 32867c478bd9Sstevel@tonic-gate 32877c478bd9Sstevel@tonic-gate tmp_last_renewal_time = sp->last_renewal_time; 32887c478bd9Sstevel@tonic-gate 32897c478bd9Sstevel@tonic-gate tmp_time = gethrestime_sec() - sp->last_renewal_time + 32907c478bd9Sstevel@tonic-gate (3 * sp->propagation_delay.tv_sec); 32917c478bd9Sstevel@tonic-gate 32927c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 32937c478bd9Sstevel@tonic-gate "nfs4_renew_lease_thread: tmp_time %ld, " 32947c478bd9Sstevel@tonic-gate "sp->last_renewal_time %ld", tmp_time, 32957c478bd9Sstevel@tonic-gate sp->last_renewal_time)); 32967c478bd9Sstevel@tonic-gate 32977c478bd9Sstevel@tonic-gate kip_secs = MAX((sp->s_lease_time >> 1) - tmp_time, 1); 32987c478bd9Sstevel@tonic-gate 32997c478bd9Sstevel@tonic-gate tick_delay = SEC_TO_TICK(kip_secs); 33007c478bd9Sstevel@tonic-gate 33017c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 33027c478bd9Sstevel@tonic-gate "nfs4_renew_lease_thread: valid lease: sleep for %ld " 33037c478bd9Sstevel@tonic-gate "secs", kip_secs)); 33047c478bd9Sstevel@tonic-gate 33057c478bd9Sstevel@tonic-gate mutex_enter(&cpr_lock); 33067c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cpr_info); 33077c478bd9Sstevel@tonic-gate mutex_exit(&cpr_lock); 3308d3d50737SRafael Vanoni time_left = cv_reltimedwait(&sp->cv_thread_exit, &sp->s_lock, 3309d3d50737SRafael Vanoni tick_delay, TR_CLOCK_TICK); 33107c478bd9Sstevel@tonic-gate mutex_enter(&cpr_lock); 33117c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock); 33127c478bd9Sstevel@tonic-gate mutex_exit(&cpr_lock); 33137c478bd9Sstevel@tonic-gate 33147c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 33157c478bd9Sstevel@tonic-gate "nfs4_renew_lease_thread: valid lease: time left %ld :" 33167c478bd9Sstevel@tonic-gate "sp last_renewal_time %ld, nfs4_client_resumed %ld, " 33177c478bd9Sstevel@tonic-gate "tmp_last_renewal_time %ld", time_left, 33187c478bd9Sstevel@tonic-gate sp->last_renewal_time, nfs4_client_resumed, 33197c478bd9Sstevel@tonic-gate tmp_last_renewal_time)); 33207c478bd9Sstevel@tonic-gate 33217c478bd9Sstevel@tonic-gate if (sp->s_thread_exit == NFS4_THREAD_EXIT) 33227c478bd9Sstevel@tonic-gate goto die; 33237c478bd9Sstevel@tonic-gate 33247c478bd9Sstevel@tonic-gate if (tmp_last_renewal_time == sp->last_renewal_time || 33257c478bd9Sstevel@tonic-gate (nfs4_client_resumed != 0 && 33267c478bd9Sstevel@tonic-gate nfs4_client_resumed > sp->last_renewal_time)) { 33277c478bd9Sstevel@tonic-gate /* 33287c478bd9Sstevel@tonic-gate * Issue RENEW op since we haven't renewed the lease 33297c478bd9Sstevel@tonic-gate * since we slept. 33307c478bd9Sstevel@tonic-gate */ 33317c478bd9Sstevel@tonic-gate tmp_now_time = gethrestime_sec(); 33327c478bd9Sstevel@tonic-gate error = nfs4renew(sp); 33337c478bd9Sstevel@tonic-gate /* 33347c478bd9Sstevel@tonic-gate * Need to re-acquire sp's lock, nfs4renew() 33357c478bd9Sstevel@tonic-gate * relinqueshes it. 33367c478bd9Sstevel@tonic-gate */ 33377c478bd9Sstevel@tonic-gate mutex_enter(&sp->s_lock); 33387c478bd9Sstevel@tonic-gate 33397c478bd9Sstevel@tonic-gate /* 33407c478bd9Sstevel@tonic-gate * See if someone changed s_thread_exit while we gave 33417c478bd9Sstevel@tonic-gate * up s_lock. 33427c478bd9Sstevel@tonic-gate */ 33437c478bd9Sstevel@tonic-gate if (sp->s_thread_exit == NFS4_THREAD_EXIT) 33447c478bd9Sstevel@tonic-gate goto die; 33457c478bd9Sstevel@tonic-gate 33467c478bd9Sstevel@tonic-gate if (!error) { 33477c478bd9Sstevel@tonic-gate /* 33487c478bd9Sstevel@tonic-gate * check to see if we implicitly renewed while 33497c478bd9Sstevel@tonic-gate * we waited for a reply for our RENEW call. 33507c478bd9Sstevel@tonic-gate */ 33517c478bd9Sstevel@tonic-gate if (tmp_last_renewal_time == 33527c478bd9Sstevel@tonic-gate sp->last_renewal_time) { 33537c478bd9Sstevel@tonic-gate /* no implicit renew came */ 33547c478bd9Sstevel@tonic-gate sp->last_renewal_time = tmp_now_time; 33557c478bd9Sstevel@tonic-gate } else { 33567c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, 33577c478bd9Sstevel@tonic-gate (CE_NOTE, "renew_thread: did " 33587c478bd9Sstevel@tonic-gate "implicit renewal before reply " 33597c478bd9Sstevel@tonic-gate "from server for RENEW")); 33607c478bd9Sstevel@tonic-gate } 33617c478bd9Sstevel@tonic-gate } else { 33627c478bd9Sstevel@tonic-gate /* figure out error */ 33637c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 33647c478bd9Sstevel@tonic-gate "renew_thread: nfs4renew returned error" 33657c478bd9Sstevel@tonic-gate " %d", error)); 33667c478bd9Sstevel@tonic-gate } 33677c478bd9Sstevel@tonic-gate 33687c478bd9Sstevel@tonic-gate } 33697c478bd9Sstevel@tonic-gate } 33707c478bd9Sstevel@tonic-gate 33717c478bd9Sstevel@tonic-gate die: 33727c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 33737c478bd9Sstevel@tonic-gate "nfs4_renew_lease_thread: thread exiting")); 33747c478bd9Sstevel@tonic-gate 33757c478bd9Sstevel@tonic-gate while (sp->s_otw_call_count != 0) { 33767c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 33777c478bd9Sstevel@tonic-gate "nfs4_renew_lease_thread: waiting for outstanding " 33787c478bd9Sstevel@tonic-gate "otw calls to finish for sp 0x%p, current " 33797c478bd9Sstevel@tonic-gate "s_otw_call_count %d", (void *)sp, 33807c478bd9Sstevel@tonic-gate sp->s_otw_call_count)); 33817c478bd9Sstevel@tonic-gate mutex_enter(&cpr_lock); 33827c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cpr_info); 33837c478bd9Sstevel@tonic-gate mutex_exit(&cpr_lock); 33847c478bd9Sstevel@tonic-gate cv_wait(&sp->s_cv_otw_count, &sp->s_lock); 33857c478bd9Sstevel@tonic-gate mutex_enter(&cpr_lock); 33867c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock); 33877c478bd9Sstevel@tonic-gate mutex_exit(&cpr_lock); 33887c478bd9Sstevel@tonic-gate } 33897c478bd9Sstevel@tonic-gate mutex_exit(&sp->s_lock); 33907c478bd9Sstevel@tonic-gate 33917c478bd9Sstevel@tonic-gate nfs4_server_rele(sp); /* free the thread's reference */ 33927c478bd9Sstevel@tonic-gate nfs4_server_rele(sp); /* free the list's reference */ 33937c478bd9Sstevel@tonic-gate sp = NULL; 33947c478bd9Sstevel@tonic-gate 33957c478bd9Sstevel@tonic-gate done: 33967c478bd9Sstevel@tonic-gate mutex_enter(&cpr_lock); 33977c478bd9Sstevel@tonic-gate CALLB_CPR_EXIT(&cpr_info); /* drops cpr_lock */ 33987c478bd9Sstevel@tonic-gate mutex_destroy(&cpr_lock); 33997c478bd9Sstevel@tonic-gate 34007c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 34017c478bd9Sstevel@tonic-gate "nfs4_renew_lease_thread: renew thread exit officially")); 34027c478bd9Sstevel@tonic-gate 34037c478bd9Sstevel@tonic-gate zthread_exit(); 34047c478bd9Sstevel@tonic-gate /* NOT REACHED */ 34057c478bd9Sstevel@tonic-gate } 34067c478bd9Sstevel@tonic-gate 34077c478bd9Sstevel@tonic-gate /* 34087c478bd9Sstevel@tonic-gate * Send out a RENEW op to the server. 34097c478bd9Sstevel@tonic-gate * Assumes sp is locked down. 34107c478bd9Sstevel@tonic-gate */ 34117c478bd9Sstevel@tonic-gate static int 34127c478bd9Sstevel@tonic-gate nfs4renew(nfs4_server_t *sp) 34137c478bd9Sstevel@tonic-gate { 34147c478bd9Sstevel@tonic-gate COMPOUND4args_clnt args; 34157c478bd9Sstevel@tonic-gate COMPOUND4res_clnt res; 34167c478bd9Sstevel@tonic-gate nfs_argop4 argop[1]; 34177c478bd9Sstevel@tonic-gate int doqueue = 1; 34187c478bd9Sstevel@tonic-gate int rpc_error; 34197c478bd9Sstevel@tonic-gate cred_t *cr; 34207c478bd9Sstevel@tonic-gate mntinfo4_t *mi; 34217c478bd9Sstevel@tonic-gate timespec_t prop_time, after_time; 34227c478bd9Sstevel@tonic-gate int needrecov = FALSE; 34237c478bd9Sstevel@tonic-gate nfs4_recov_state_t recov_state; 34247c478bd9Sstevel@tonic-gate nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS }; 34257c478bd9Sstevel@tonic-gate 34267c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4renew")); 34277c478bd9Sstevel@tonic-gate 34287c478bd9Sstevel@tonic-gate recov_state.rs_flags = 0; 34297c478bd9Sstevel@tonic-gate recov_state.rs_num_retry_despite_err = 0; 34307c478bd9Sstevel@tonic-gate 34317c478bd9Sstevel@tonic-gate recov_retry: 34327c478bd9Sstevel@tonic-gate mi = sp->mntinfo4_list; 34337c478bd9Sstevel@tonic-gate VFS_HOLD(mi->mi_vfsp); 34347c478bd9Sstevel@tonic-gate mutex_exit(&sp->s_lock); 34357c478bd9Sstevel@tonic-gate ASSERT(mi != NULL); 34367c478bd9Sstevel@tonic-gate 34377c478bd9Sstevel@tonic-gate e.error = nfs4_start_op(mi, NULL, NULL, &recov_state); 34387c478bd9Sstevel@tonic-gate if (e.error) { 34397c478bd9Sstevel@tonic-gate VFS_RELE(mi->mi_vfsp); 34407c478bd9Sstevel@tonic-gate return (e.error); 34417c478bd9Sstevel@tonic-gate } 34427c478bd9Sstevel@tonic-gate 34437c478bd9Sstevel@tonic-gate /* Check to see if we're dealing with a marked-dead sp */ 34447c478bd9Sstevel@tonic-gate mutex_enter(&sp->s_lock); 34457c478bd9Sstevel@tonic-gate if (sp->s_thread_exit == NFS4_THREAD_EXIT) { 34467c478bd9Sstevel@tonic-gate mutex_exit(&sp->s_lock); 34477c478bd9Sstevel@tonic-gate nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 34487c478bd9Sstevel@tonic-gate VFS_RELE(mi->mi_vfsp); 34497c478bd9Sstevel@tonic-gate return (0); 34507c478bd9Sstevel@tonic-gate } 34517c478bd9Sstevel@tonic-gate 34527c478bd9Sstevel@tonic-gate /* Make sure mi hasn't changed on us */ 34537c478bd9Sstevel@tonic-gate if (mi != sp->mntinfo4_list) { 34547c478bd9Sstevel@tonic-gate /* Must drop sp's lock to avoid a recursive mutex enter */ 34557c478bd9Sstevel@tonic-gate mutex_exit(&sp->s_lock); 34567c478bd9Sstevel@tonic-gate nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 34577c478bd9Sstevel@tonic-gate VFS_RELE(mi->mi_vfsp); 34587c478bd9Sstevel@tonic-gate mutex_enter(&sp->s_lock); 34597c478bd9Sstevel@tonic-gate goto recov_retry; 34607c478bd9Sstevel@tonic-gate } 34617c478bd9Sstevel@tonic-gate mutex_exit(&sp->s_lock); 34627c478bd9Sstevel@tonic-gate 34637c478bd9Sstevel@tonic-gate args.ctag = TAG_RENEW; 34647c478bd9Sstevel@tonic-gate 34657c478bd9Sstevel@tonic-gate args.array_len = 1; 34667c478bd9Sstevel@tonic-gate args.array = argop; 34677c478bd9Sstevel@tonic-gate 34687c478bd9Sstevel@tonic-gate argop[0].argop = OP_RENEW; 34697c478bd9Sstevel@tonic-gate 34707c478bd9Sstevel@tonic-gate mutex_enter(&sp->s_lock); 34717c478bd9Sstevel@tonic-gate argop[0].nfs_argop4_u.oprenew.clientid = sp->clientid; 34727c478bd9Sstevel@tonic-gate cr = sp->s_cred; 34737c478bd9Sstevel@tonic-gate crhold(cr); 34747c478bd9Sstevel@tonic-gate mutex_exit(&sp->s_lock); 34757c478bd9Sstevel@tonic-gate 34767c478bd9Sstevel@tonic-gate ASSERT(cr != NULL); 34777c478bd9Sstevel@tonic-gate 34787c478bd9Sstevel@tonic-gate /* used to figure out RTT for sp */ 34797c478bd9Sstevel@tonic-gate gethrestime(&prop_time); 34807c478bd9Sstevel@tonic-gate 34817c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_call_debug, (CE_NOTE, 34827c478bd9Sstevel@tonic-gate "nfs4renew: %s call, sp 0x%p", needrecov ? "recov" : "first", 34837c478bd9Sstevel@tonic-gate (void*)sp)); 34847c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "before: %ld s %ld ns ", 34857c478bd9Sstevel@tonic-gate prop_time.tv_sec, prop_time.tv_nsec)); 34867c478bd9Sstevel@tonic-gate 34877c478bd9Sstevel@tonic-gate DTRACE_PROBE2(nfs4__renew__start, nfs4_server_t *, sp, 34887c478bd9Sstevel@tonic-gate mntinfo4_t *, mi); 34897c478bd9Sstevel@tonic-gate 34907c478bd9Sstevel@tonic-gate rfs4call(mi, &args, &res, cr, &doqueue, 0, &e); 34917c478bd9Sstevel@tonic-gate crfree(cr); 34927c478bd9Sstevel@tonic-gate 34937c478bd9Sstevel@tonic-gate DTRACE_PROBE2(nfs4__renew__end, nfs4_server_t *, sp, 34947c478bd9Sstevel@tonic-gate mntinfo4_t *, mi); 34957c478bd9Sstevel@tonic-gate 34967c478bd9Sstevel@tonic-gate gethrestime(&after_time); 34977c478bd9Sstevel@tonic-gate 34987c478bd9Sstevel@tonic-gate mutex_enter(&sp->s_lock); 34997c478bd9Sstevel@tonic-gate sp->propagation_delay.tv_sec = 35007c478bd9Sstevel@tonic-gate MAX(1, after_time.tv_sec - prop_time.tv_sec); 35017c478bd9Sstevel@tonic-gate mutex_exit(&sp->s_lock); 35027c478bd9Sstevel@tonic-gate 35037c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "after : %ld s %ld ns ", 35047c478bd9Sstevel@tonic-gate after_time.tv_sec, after_time.tv_nsec)); 35057c478bd9Sstevel@tonic-gate 35067c478bd9Sstevel@tonic-gate if (e.error == 0 && res.status == NFS4ERR_CB_PATH_DOWN) { 3507d1ea0e82Swebaker (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 35087c478bd9Sstevel@tonic-gate nfs4_delegreturn_all(sp); 35097c478bd9Sstevel@tonic-gate nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 35107c478bd9Sstevel@tonic-gate VFS_RELE(mi->mi_vfsp); 35117c478bd9Sstevel@tonic-gate /* 35127c478bd9Sstevel@tonic-gate * If the server returns CB_PATH_DOWN, it has renewed 35137c478bd9Sstevel@tonic-gate * the lease and informed us that the callback path is 35147c478bd9Sstevel@tonic-gate * down. Since the lease is renewed, just return 0 and 35157c478bd9Sstevel@tonic-gate * let the renew thread proceed as normal. 35167c478bd9Sstevel@tonic-gate */ 35177c478bd9Sstevel@tonic-gate return (0); 35187c478bd9Sstevel@tonic-gate } 35197c478bd9Sstevel@tonic-gate 35207c478bd9Sstevel@tonic-gate needrecov = nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp); 35217c478bd9Sstevel@tonic-gate if (!needrecov && e.error) { 35227c478bd9Sstevel@tonic-gate nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 35237c478bd9Sstevel@tonic-gate VFS_RELE(mi->mi_vfsp); 35247c478bd9Sstevel@tonic-gate return (e.error); 35257c478bd9Sstevel@tonic-gate } 35267c478bd9Sstevel@tonic-gate 35277c478bd9Sstevel@tonic-gate rpc_error = e.error; 35287c478bd9Sstevel@tonic-gate 35297c478bd9Sstevel@tonic-gate if (needrecov) { 35307c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 35317c478bd9Sstevel@tonic-gate "nfs4renew: initiating recovery\n")); 35327c478bd9Sstevel@tonic-gate 35337c478bd9Sstevel@tonic-gate if (nfs4_start_recovery(&e, mi, NULL, NULL, NULL, NULL, 35342f172c55SRobert Thurlow OP_RENEW, NULL, NULL, NULL) == FALSE) { 35357c478bd9Sstevel@tonic-gate nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 35367c478bd9Sstevel@tonic-gate VFS_RELE(mi->mi_vfsp); 35377c478bd9Sstevel@tonic-gate if (!e.error) 35387c478bd9Sstevel@tonic-gate (void) xdr_free(xdr_COMPOUND4res_clnt, 35397c478bd9Sstevel@tonic-gate (caddr_t)&res); 35407c478bd9Sstevel@tonic-gate mutex_enter(&sp->s_lock); 35417c478bd9Sstevel@tonic-gate goto recov_retry; 35427c478bd9Sstevel@tonic-gate } 35437c478bd9Sstevel@tonic-gate /* fall through for res.status case */ 35447c478bd9Sstevel@tonic-gate } 35457c478bd9Sstevel@tonic-gate 35467c478bd9Sstevel@tonic-gate if (res.status) { 35477c478bd9Sstevel@tonic-gate if (res.status == NFS4ERR_LEASE_MOVED) { 35487c478bd9Sstevel@tonic-gate /*EMPTY*/ 35497c478bd9Sstevel@tonic-gate /* 35507c478bd9Sstevel@tonic-gate * XXX need to try every mntinfo4 in sp->mntinfo4_list 35517c478bd9Sstevel@tonic-gate * to renew the lease on that server 35527c478bd9Sstevel@tonic-gate */ 35537c478bd9Sstevel@tonic-gate } 35547c478bd9Sstevel@tonic-gate e.error = geterrno4(res.status); 35557c478bd9Sstevel@tonic-gate } 35567c478bd9Sstevel@tonic-gate 35577c478bd9Sstevel@tonic-gate if (!rpc_error) 35587c478bd9Sstevel@tonic-gate (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 35597c478bd9Sstevel@tonic-gate 35607c478bd9Sstevel@tonic-gate nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov); 35617c478bd9Sstevel@tonic-gate 35627c478bd9Sstevel@tonic-gate VFS_RELE(mi->mi_vfsp); 35637c478bd9Sstevel@tonic-gate 35647c478bd9Sstevel@tonic-gate return (e.error); 35657c478bd9Sstevel@tonic-gate } 35667c478bd9Sstevel@tonic-gate 35677c478bd9Sstevel@tonic-gate void 35687c478bd9Sstevel@tonic-gate nfs4_inc_state_ref_count(mntinfo4_t *mi) 35697c478bd9Sstevel@tonic-gate { 35707c478bd9Sstevel@tonic-gate nfs4_server_t *sp; 35717c478bd9Sstevel@tonic-gate 35727c478bd9Sstevel@tonic-gate /* this locks down sp if it is found */ 35737c478bd9Sstevel@tonic-gate sp = find_nfs4_server(mi); 35747c478bd9Sstevel@tonic-gate 35757c478bd9Sstevel@tonic-gate if (sp != NULL) { 35767c478bd9Sstevel@tonic-gate nfs4_inc_state_ref_count_nolock(sp, mi); 35777c478bd9Sstevel@tonic-gate mutex_exit(&sp->s_lock); 35787c478bd9Sstevel@tonic-gate nfs4_server_rele(sp); 35797c478bd9Sstevel@tonic-gate } 35807c478bd9Sstevel@tonic-gate } 35817c478bd9Sstevel@tonic-gate 35827c478bd9Sstevel@tonic-gate /* 35837c478bd9Sstevel@tonic-gate * Bump the number of OPEN files (ie: those with state) so we know if this 35847c478bd9Sstevel@tonic-gate * nfs4_server has any state to maintain a lease for or not. 35857c478bd9Sstevel@tonic-gate * 35867c478bd9Sstevel@tonic-gate * Also, marks the nfs4_server's lease valid if it hasn't been done so already. 35877c478bd9Sstevel@tonic-gate */ 35887c478bd9Sstevel@tonic-gate void 35897c478bd9Sstevel@tonic-gate nfs4_inc_state_ref_count_nolock(nfs4_server_t *sp, mntinfo4_t *mi) 35907c478bd9Sstevel@tonic-gate { 35917c478bd9Sstevel@tonic-gate ASSERT(mutex_owned(&sp->s_lock)); 35927c478bd9Sstevel@tonic-gate 35937c478bd9Sstevel@tonic-gate sp->state_ref_count++; 35947c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 35957c478bd9Sstevel@tonic-gate "nfs4_inc_state_ref_count: state_ref_count now %d", 35967c478bd9Sstevel@tonic-gate sp->state_ref_count)); 35977c478bd9Sstevel@tonic-gate 35987c478bd9Sstevel@tonic-gate if (sp->lease_valid == NFS4_LEASE_UNINITIALIZED) 35997c478bd9Sstevel@tonic-gate sp->lease_valid = NFS4_LEASE_VALID; 36007c478bd9Sstevel@tonic-gate 36017c478bd9Sstevel@tonic-gate /* 36027c478bd9Sstevel@tonic-gate * If this call caused the lease to be marked valid and/or 36037c478bd9Sstevel@tonic-gate * took the state_ref_count from 0 to 1, then start the time 36047c478bd9Sstevel@tonic-gate * on lease renewal. 36057c478bd9Sstevel@tonic-gate */ 36067c478bd9Sstevel@tonic-gate if (sp->lease_valid == NFS4_LEASE_VALID && sp->state_ref_count == 1) 36077c478bd9Sstevel@tonic-gate sp->last_renewal_time = gethrestime_sec(); 36087c478bd9Sstevel@tonic-gate 36097c478bd9Sstevel@tonic-gate /* update the number of open files for mi */ 36107c478bd9Sstevel@tonic-gate mi->mi_open_files++; 36117c478bd9Sstevel@tonic-gate } 36127c478bd9Sstevel@tonic-gate 36137c478bd9Sstevel@tonic-gate void 36147c478bd9Sstevel@tonic-gate nfs4_dec_state_ref_count(mntinfo4_t *mi) 36157c478bd9Sstevel@tonic-gate { 36167c478bd9Sstevel@tonic-gate nfs4_server_t *sp; 36177c478bd9Sstevel@tonic-gate 36187c478bd9Sstevel@tonic-gate /* this locks down sp if it is found */ 36197c478bd9Sstevel@tonic-gate sp = find_nfs4_server_all(mi, 1); 36207c478bd9Sstevel@tonic-gate 36217c478bd9Sstevel@tonic-gate if (sp != NULL) { 36227c478bd9Sstevel@tonic-gate nfs4_dec_state_ref_count_nolock(sp, mi); 36237c478bd9Sstevel@tonic-gate mutex_exit(&sp->s_lock); 36247c478bd9Sstevel@tonic-gate nfs4_server_rele(sp); 36257c478bd9Sstevel@tonic-gate } 36267c478bd9Sstevel@tonic-gate } 36277c478bd9Sstevel@tonic-gate 36287c478bd9Sstevel@tonic-gate /* 36297c478bd9Sstevel@tonic-gate * Decrement the number of OPEN files (ie: those with state) so we know if 36307c478bd9Sstevel@tonic-gate * this nfs4_server has any state to maintain a lease for or not. 36317c478bd9Sstevel@tonic-gate */ 36327c478bd9Sstevel@tonic-gate void 36337c478bd9Sstevel@tonic-gate nfs4_dec_state_ref_count_nolock(nfs4_server_t *sp, mntinfo4_t *mi) 36347c478bd9Sstevel@tonic-gate { 36357c478bd9Sstevel@tonic-gate ASSERT(mutex_owned(&sp->s_lock)); 36367c478bd9Sstevel@tonic-gate ASSERT(sp->state_ref_count != 0); 36377c478bd9Sstevel@tonic-gate sp->state_ref_count--; 36387c478bd9Sstevel@tonic-gate 36397c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 36407c478bd9Sstevel@tonic-gate "nfs4_dec_state_ref_count: state ref count now %d", 36417c478bd9Sstevel@tonic-gate sp->state_ref_count)); 36427c478bd9Sstevel@tonic-gate 36437c478bd9Sstevel@tonic-gate mi->mi_open_files--; 36447c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 36457c478bd9Sstevel@tonic-gate "nfs4_dec_state_ref_count: mi open files %d, v4 flags 0x%x", 36467c478bd9Sstevel@tonic-gate mi->mi_open_files, mi->mi_flags)); 36477c478bd9Sstevel@tonic-gate 36487c478bd9Sstevel@tonic-gate /* We don't have to hold the mi_lock to test mi_flags */ 36497c478bd9Sstevel@tonic-gate if (mi->mi_open_files == 0 && 36507c478bd9Sstevel@tonic-gate (mi->mi_flags & MI4_REMOVE_ON_LAST_CLOSE)) { 36517c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, 36527c478bd9Sstevel@tonic-gate "nfs4_dec_state_ref_count: remove mntinfo4 %p since " 36537c478bd9Sstevel@tonic-gate "we have closed the last open file", (void*)mi)); 36547c478bd9Sstevel@tonic-gate nfs4_remove_mi_from_server(mi, sp); 36557c478bd9Sstevel@tonic-gate } 36567c478bd9Sstevel@tonic-gate } 36577c478bd9Sstevel@tonic-gate 36587c478bd9Sstevel@tonic-gate bool_t 36597c478bd9Sstevel@tonic-gate inlease(nfs4_server_t *sp) 36607c478bd9Sstevel@tonic-gate { 36617c478bd9Sstevel@tonic-gate bool_t result; 36627c478bd9Sstevel@tonic-gate 36637c478bd9Sstevel@tonic-gate ASSERT(mutex_owned(&sp->s_lock)); 36647c478bd9Sstevel@tonic-gate 36657c478bd9Sstevel@tonic-gate if (sp->lease_valid == NFS4_LEASE_VALID && 36667c478bd9Sstevel@tonic-gate gethrestime_sec() < sp->last_renewal_time + sp->s_lease_time) 36677c478bd9Sstevel@tonic-gate result = TRUE; 36687c478bd9Sstevel@tonic-gate else 36697c478bd9Sstevel@tonic-gate result = FALSE; 36707c478bd9Sstevel@tonic-gate 36717c478bd9Sstevel@tonic-gate return (result); 36727c478bd9Sstevel@tonic-gate } 36737c478bd9Sstevel@tonic-gate 36747c478bd9Sstevel@tonic-gate 36757c478bd9Sstevel@tonic-gate /* 36767c478bd9Sstevel@tonic-gate * Return non-zero if the given nfs4_server_t is going through recovery. 36777c478bd9Sstevel@tonic-gate */ 36787c478bd9Sstevel@tonic-gate 36797c478bd9Sstevel@tonic-gate int 36807c478bd9Sstevel@tonic-gate nfs4_server_in_recovery(nfs4_server_t *sp) 36817c478bd9Sstevel@tonic-gate { 36827c478bd9Sstevel@tonic-gate return (nfs_rw_lock_held(&sp->s_recovlock, RW_WRITER)); 36837c478bd9Sstevel@tonic-gate } 36847c478bd9Sstevel@tonic-gate 36857c478bd9Sstevel@tonic-gate /* 36867c478bd9Sstevel@tonic-gate * Compare two shared filehandle objects. Returns -1, 0, or +1, if the 36877c478bd9Sstevel@tonic-gate * first is less than, equal to, or greater than the second. 36887c478bd9Sstevel@tonic-gate */ 36897c478bd9Sstevel@tonic-gate 36907c478bd9Sstevel@tonic-gate int 36917c478bd9Sstevel@tonic-gate sfh4cmp(const void *p1, const void *p2) 36927c478bd9Sstevel@tonic-gate { 36937c478bd9Sstevel@tonic-gate const nfs4_sharedfh_t *sfh1 = (const nfs4_sharedfh_t *)p1; 36947c478bd9Sstevel@tonic-gate const nfs4_sharedfh_t *sfh2 = (const nfs4_sharedfh_t *)p2; 36957c478bd9Sstevel@tonic-gate 36967c478bd9Sstevel@tonic-gate return (nfs4cmpfh(&sfh1->sfh_fh, &sfh2->sfh_fh)); 36977c478bd9Sstevel@tonic-gate } 36987c478bd9Sstevel@tonic-gate 36997c478bd9Sstevel@tonic-gate /* 37007c478bd9Sstevel@tonic-gate * Create a table for shared filehandle objects. 37017c478bd9Sstevel@tonic-gate */ 37027c478bd9Sstevel@tonic-gate 37037c478bd9Sstevel@tonic-gate void 37047c478bd9Sstevel@tonic-gate sfh4_createtab(avl_tree_t *tab) 37057c478bd9Sstevel@tonic-gate { 37067c478bd9Sstevel@tonic-gate avl_create(tab, sfh4cmp, sizeof (nfs4_sharedfh_t), 37077c478bd9Sstevel@tonic-gate offsetof(nfs4_sharedfh_t, sfh_tree)); 37087c478bd9Sstevel@tonic-gate } 37097c478bd9Sstevel@tonic-gate 37107c478bd9Sstevel@tonic-gate /* 37117c478bd9Sstevel@tonic-gate * Return a shared filehandle object for the given filehandle. The caller 37127c478bd9Sstevel@tonic-gate * is responsible for eventually calling sfh4_rele(). 37137c478bd9Sstevel@tonic-gate */ 37147c478bd9Sstevel@tonic-gate 37157c478bd9Sstevel@tonic-gate nfs4_sharedfh_t * 37167c478bd9Sstevel@tonic-gate sfh4_put(const nfs_fh4 *fh, mntinfo4_t *mi, nfs4_sharedfh_t *key) 37177c478bd9Sstevel@tonic-gate { 37187c478bd9Sstevel@tonic-gate nfs4_sharedfh_t *sfh, *nsfh; 37197c478bd9Sstevel@tonic-gate avl_index_t where; 37207c478bd9Sstevel@tonic-gate nfs4_sharedfh_t skey; 37217c478bd9Sstevel@tonic-gate 37227c478bd9Sstevel@tonic-gate if (!key) { 37237c478bd9Sstevel@tonic-gate skey.sfh_fh = *fh; 37247c478bd9Sstevel@tonic-gate key = &skey; 37257c478bd9Sstevel@tonic-gate } 37267c478bd9Sstevel@tonic-gate 37277c478bd9Sstevel@tonic-gate nsfh = kmem_alloc(sizeof (nfs4_sharedfh_t), KM_SLEEP); 37287c478bd9Sstevel@tonic-gate nsfh->sfh_fh.nfs_fh4_len = fh->nfs_fh4_len; 37297c478bd9Sstevel@tonic-gate /* 37307c478bd9Sstevel@tonic-gate * We allocate the largest possible filehandle size because it's 37317c478bd9Sstevel@tonic-gate * not that big, and it saves us from possibly having to resize the 37327c478bd9Sstevel@tonic-gate * buffer later. 37337c478bd9Sstevel@tonic-gate */ 37347c478bd9Sstevel@tonic-gate nsfh->sfh_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP); 37357c478bd9Sstevel@tonic-gate bcopy(fh->nfs_fh4_val, nsfh->sfh_fh.nfs_fh4_val, fh->nfs_fh4_len); 37367c478bd9Sstevel@tonic-gate mutex_init(&nsfh->sfh_lock, NULL, MUTEX_DEFAULT, NULL); 37377c478bd9Sstevel@tonic-gate nsfh->sfh_refcnt = 1; 37387c478bd9Sstevel@tonic-gate nsfh->sfh_flags = SFH4_IN_TREE; 37397c478bd9Sstevel@tonic-gate nsfh->sfh_mi = mi; 37407c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_sharedfh_debug, (CE_NOTE, "sfh4_get: new object (%p)", 37417c478bd9Sstevel@tonic-gate (void *)nsfh)); 37427c478bd9Sstevel@tonic-gate 37437c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&mi->mi_fh_lock, RW_WRITER, 0); 37447c478bd9Sstevel@tonic-gate sfh = avl_find(&mi->mi_filehandles, key, &where); 37457c478bd9Sstevel@tonic-gate if (sfh != NULL) { 37467c478bd9Sstevel@tonic-gate mutex_enter(&sfh->sfh_lock); 37477c478bd9Sstevel@tonic-gate sfh->sfh_refcnt++; 37487c478bd9Sstevel@tonic-gate mutex_exit(&sfh->sfh_lock); 37497c478bd9Sstevel@tonic-gate nfs_rw_exit(&mi->mi_fh_lock); 37507c478bd9Sstevel@tonic-gate /* free our speculative allocs */ 37517c478bd9Sstevel@tonic-gate kmem_free(nsfh->sfh_fh.nfs_fh4_val, NFS4_FHSIZE); 37527c478bd9Sstevel@tonic-gate kmem_free(nsfh, sizeof (nfs4_sharedfh_t)); 37537c478bd9Sstevel@tonic-gate return (sfh); 37547c478bd9Sstevel@tonic-gate } 37557c478bd9Sstevel@tonic-gate 37567c478bd9Sstevel@tonic-gate avl_insert(&mi->mi_filehandles, nsfh, where); 37577c478bd9Sstevel@tonic-gate nfs_rw_exit(&mi->mi_fh_lock); 37587c478bd9Sstevel@tonic-gate 37597c478bd9Sstevel@tonic-gate return (nsfh); 37607c478bd9Sstevel@tonic-gate } 37617c478bd9Sstevel@tonic-gate 37627c478bd9Sstevel@tonic-gate /* 37637c478bd9Sstevel@tonic-gate * Return a shared filehandle object for the given filehandle. The caller 37647c478bd9Sstevel@tonic-gate * is responsible for eventually calling sfh4_rele(). 37657c478bd9Sstevel@tonic-gate */ 37667c478bd9Sstevel@tonic-gate 37677c478bd9Sstevel@tonic-gate nfs4_sharedfh_t * 37687c478bd9Sstevel@tonic-gate sfh4_get(const nfs_fh4 *fh, mntinfo4_t *mi) 37697c478bd9Sstevel@tonic-gate { 37707c478bd9Sstevel@tonic-gate nfs4_sharedfh_t *sfh; 37717c478bd9Sstevel@tonic-gate nfs4_sharedfh_t key; 37727c478bd9Sstevel@tonic-gate 37737c478bd9Sstevel@tonic-gate ASSERT(fh->nfs_fh4_len <= NFS4_FHSIZE); 37747c478bd9Sstevel@tonic-gate 37757c478bd9Sstevel@tonic-gate #ifdef DEBUG 37767c478bd9Sstevel@tonic-gate if (nfs4_sharedfh_debug) { 37777c478bd9Sstevel@tonic-gate nfs4_fhandle_t fhandle; 37787c478bd9Sstevel@tonic-gate 37797c478bd9Sstevel@tonic-gate fhandle.fh_len = fh->nfs_fh4_len; 37807c478bd9Sstevel@tonic-gate bcopy(fh->nfs_fh4_val, fhandle.fh_buf, fhandle.fh_len); 37817c478bd9Sstevel@tonic-gate zcmn_err(mi->mi_zone->zone_id, CE_NOTE, "sfh4_get:"); 37827c478bd9Sstevel@tonic-gate nfs4_printfhandle(&fhandle); 37837c478bd9Sstevel@tonic-gate } 37847c478bd9Sstevel@tonic-gate #endif 37857c478bd9Sstevel@tonic-gate 37867c478bd9Sstevel@tonic-gate /* 37877c478bd9Sstevel@tonic-gate * If there's already an object for the given filehandle, bump the 37887c478bd9Sstevel@tonic-gate * reference count and return it. Otherwise, create a new object 37897c478bd9Sstevel@tonic-gate * and add it to the AVL tree. 37907c478bd9Sstevel@tonic-gate */ 37917c478bd9Sstevel@tonic-gate 37927c478bd9Sstevel@tonic-gate key.sfh_fh = *fh; 37937c478bd9Sstevel@tonic-gate 37947c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&mi->mi_fh_lock, RW_READER, 0); 37957c478bd9Sstevel@tonic-gate sfh = avl_find(&mi->mi_filehandles, &key, NULL); 37967c478bd9Sstevel@tonic-gate if (sfh != NULL) { 37977c478bd9Sstevel@tonic-gate mutex_enter(&sfh->sfh_lock); 37987c478bd9Sstevel@tonic-gate sfh->sfh_refcnt++; 37997c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_sharedfh_debug, (CE_NOTE, 38007c478bd9Sstevel@tonic-gate "sfh4_get: found existing %p, new refcnt=%d", 38017c478bd9Sstevel@tonic-gate (void *)sfh, sfh->sfh_refcnt)); 38027c478bd9Sstevel@tonic-gate mutex_exit(&sfh->sfh_lock); 38037c478bd9Sstevel@tonic-gate nfs_rw_exit(&mi->mi_fh_lock); 38047c478bd9Sstevel@tonic-gate return (sfh); 38057c478bd9Sstevel@tonic-gate } 38067c478bd9Sstevel@tonic-gate nfs_rw_exit(&mi->mi_fh_lock); 38077c478bd9Sstevel@tonic-gate 38087c478bd9Sstevel@tonic-gate return (sfh4_put(fh, mi, &key)); 38097c478bd9Sstevel@tonic-gate } 38107c478bd9Sstevel@tonic-gate 38117c478bd9Sstevel@tonic-gate /* 38127c478bd9Sstevel@tonic-gate * Get a reference to the given shared filehandle object. 38137c478bd9Sstevel@tonic-gate */ 38147c478bd9Sstevel@tonic-gate 38157c478bd9Sstevel@tonic-gate void 38167c478bd9Sstevel@tonic-gate sfh4_hold(nfs4_sharedfh_t *sfh) 38177c478bd9Sstevel@tonic-gate { 38187c478bd9Sstevel@tonic-gate ASSERT(sfh->sfh_refcnt > 0); 38197c478bd9Sstevel@tonic-gate 38207c478bd9Sstevel@tonic-gate mutex_enter(&sfh->sfh_lock); 38217c478bd9Sstevel@tonic-gate sfh->sfh_refcnt++; 38227c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_sharedfh_debug, 38237c478bd9Sstevel@tonic-gate (CE_NOTE, "sfh4_hold %p, new refcnt=%d", 38247c478bd9Sstevel@tonic-gate (void *)sfh, sfh->sfh_refcnt)); 38257c478bd9Sstevel@tonic-gate mutex_exit(&sfh->sfh_lock); 38267c478bd9Sstevel@tonic-gate } 38277c478bd9Sstevel@tonic-gate 38287c478bd9Sstevel@tonic-gate /* 38297c478bd9Sstevel@tonic-gate * Release a reference to the given shared filehandle object and null out 38307c478bd9Sstevel@tonic-gate * the given pointer. 38317c478bd9Sstevel@tonic-gate */ 38327c478bd9Sstevel@tonic-gate 38337c478bd9Sstevel@tonic-gate void 38347c478bd9Sstevel@tonic-gate sfh4_rele(nfs4_sharedfh_t **sfhpp) 38357c478bd9Sstevel@tonic-gate { 38367c478bd9Sstevel@tonic-gate mntinfo4_t *mi; 38377c478bd9Sstevel@tonic-gate nfs4_sharedfh_t *sfh = *sfhpp; 38387c478bd9Sstevel@tonic-gate 38397c478bd9Sstevel@tonic-gate ASSERT(sfh->sfh_refcnt > 0); 38407c478bd9Sstevel@tonic-gate 38417c478bd9Sstevel@tonic-gate mutex_enter(&sfh->sfh_lock); 38427c478bd9Sstevel@tonic-gate if (sfh->sfh_refcnt > 1) { 38437c478bd9Sstevel@tonic-gate sfh->sfh_refcnt--; 38447c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_sharedfh_debug, (CE_NOTE, 38457c478bd9Sstevel@tonic-gate "sfh4_rele %p, new refcnt=%d", 38467c478bd9Sstevel@tonic-gate (void *)sfh, sfh->sfh_refcnt)); 38477c478bd9Sstevel@tonic-gate mutex_exit(&sfh->sfh_lock); 38487c478bd9Sstevel@tonic-gate goto finish; 38497c478bd9Sstevel@tonic-gate } 38507c478bd9Sstevel@tonic-gate mutex_exit(&sfh->sfh_lock); 38517c478bd9Sstevel@tonic-gate 38527c478bd9Sstevel@tonic-gate /* 38537c478bd9Sstevel@tonic-gate * Possibly the last reference, so get the lock for the table in 38547c478bd9Sstevel@tonic-gate * case it's time to remove the object from the table. 38557c478bd9Sstevel@tonic-gate */ 38567c478bd9Sstevel@tonic-gate mi = sfh->sfh_mi; 38577c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&mi->mi_fh_lock, RW_WRITER, 0); 38587c478bd9Sstevel@tonic-gate mutex_enter(&sfh->sfh_lock); 38597c478bd9Sstevel@tonic-gate sfh->sfh_refcnt--; 38607c478bd9Sstevel@tonic-gate if (sfh->sfh_refcnt > 0) { 38617c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_sharedfh_debug, (CE_NOTE, 38627c478bd9Sstevel@tonic-gate "sfh4_rele %p, new refcnt=%d", 38637c478bd9Sstevel@tonic-gate (void *)sfh, sfh->sfh_refcnt)); 38647c478bd9Sstevel@tonic-gate mutex_exit(&sfh->sfh_lock); 38657c478bd9Sstevel@tonic-gate nfs_rw_exit(&mi->mi_fh_lock); 38667c478bd9Sstevel@tonic-gate goto finish; 38677c478bd9Sstevel@tonic-gate } 38687c478bd9Sstevel@tonic-gate 38697c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_sharedfh_debug, (CE_NOTE, 38707c478bd9Sstevel@tonic-gate "sfh4_rele %p, last ref", (void *)sfh)); 38717c478bd9Sstevel@tonic-gate if (sfh->sfh_flags & SFH4_IN_TREE) { 38727c478bd9Sstevel@tonic-gate avl_remove(&mi->mi_filehandles, sfh); 38737c478bd9Sstevel@tonic-gate sfh->sfh_flags &= ~SFH4_IN_TREE; 38747c478bd9Sstevel@tonic-gate } 38757c478bd9Sstevel@tonic-gate mutex_exit(&sfh->sfh_lock); 38767c478bd9Sstevel@tonic-gate nfs_rw_exit(&mi->mi_fh_lock); 38777c478bd9Sstevel@tonic-gate mutex_destroy(&sfh->sfh_lock); 38787c478bd9Sstevel@tonic-gate kmem_free(sfh->sfh_fh.nfs_fh4_val, NFS4_FHSIZE); 38797c478bd9Sstevel@tonic-gate kmem_free(sfh, sizeof (nfs4_sharedfh_t)); 38807c478bd9Sstevel@tonic-gate 38817c478bd9Sstevel@tonic-gate finish: 38827c478bd9Sstevel@tonic-gate *sfhpp = NULL; 38837c478bd9Sstevel@tonic-gate } 38847c478bd9Sstevel@tonic-gate 38857c478bd9Sstevel@tonic-gate /* 38867c478bd9Sstevel@tonic-gate * Update the filehandle for the given shared filehandle object. 38877c478bd9Sstevel@tonic-gate */ 38887c478bd9Sstevel@tonic-gate 38897c478bd9Sstevel@tonic-gate int nfs4_warn_dupfh = 0; /* if set, always warn about dup fhs below */ 38907c478bd9Sstevel@tonic-gate 38917c478bd9Sstevel@tonic-gate void 38927c478bd9Sstevel@tonic-gate sfh4_update(nfs4_sharedfh_t *sfh, const nfs_fh4 *newfh) 38937c478bd9Sstevel@tonic-gate { 38947c478bd9Sstevel@tonic-gate mntinfo4_t *mi = sfh->sfh_mi; 38957c478bd9Sstevel@tonic-gate nfs4_sharedfh_t *dupsfh; 38967c478bd9Sstevel@tonic-gate avl_index_t where; 38977c478bd9Sstevel@tonic-gate nfs4_sharedfh_t key; 38987c478bd9Sstevel@tonic-gate 38997c478bd9Sstevel@tonic-gate #ifdef DEBUG 39007c478bd9Sstevel@tonic-gate mutex_enter(&sfh->sfh_lock); 39017c478bd9Sstevel@tonic-gate ASSERT(sfh->sfh_refcnt > 0); 39027c478bd9Sstevel@tonic-gate mutex_exit(&sfh->sfh_lock); 39037c478bd9Sstevel@tonic-gate #endif 39047c478bd9Sstevel@tonic-gate ASSERT(newfh->nfs_fh4_len <= NFS4_FHSIZE); 39057c478bd9Sstevel@tonic-gate 39067c478bd9Sstevel@tonic-gate /* 39077c478bd9Sstevel@tonic-gate * The basic plan is to remove the shared filehandle object from 39087c478bd9Sstevel@tonic-gate * the table, update it to have the new filehandle, then reinsert 39097c478bd9Sstevel@tonic-gate * it. 39107c478bd9Sstevel@tonic-gate */ 39117c478bd9Sstevel@tonic-gate 39127c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&mi->mi_fh_lock, RW_WRITER, 0); 39137c478bd9Sstevel@tonic-gate mutex_enter(&sfh->sfh_lock); 39147c478bd9Sstevel@tonic-gate if (sfh->sfh_flags & SFH4_IN_TREE) { 39157c478bd9Sstevel@tonic-gate avl_remove(&mi->mi_filehandles, sfh); 39167c478bd9Sstevel@tonic-gate sfh->sfh_flags &= ~SFH4_IN_TREE; 39177c478bd9Sstevel@tonic-gate } 39187c478bd9Sstevel@tonic-gate mutex_exit(&sfh->sfh_lock); 39197c478bd9Sstevel@tonic-gate sfh->sfh_fh.nfs_fh4_len = newfh->nfs_fh4_len; 39207c478bd9Sstevel@tonic-gate bcopy(newfh->nfs_fh4_val, sfh->sfh_fh.nfs_fh4_val, 39217c478bd9Sstevel@tonic-gate sfh->sfh_fh.nfs_fh4_len); 39227c478bd9Sstevel@tonic-gate 39237c478bd9Sstevel@tonic-gate /* 39247c478bd9Sstevel@tonic-gate * XXX If there is already a shared filehandle object with the new 39257c478bd9Sstevel@tonic-gate * filehandle, we're in trouble, because the rnode code assumes 39267c478bd9Sstevel@tonic-gate * that there is only one shared filehandle object for a given 39277c478bd9Sstevel@tonic-gate * filehandle. So issue a warning (for read-write mounts only) 39287c478bd9Sstevel@tonic-gate * and don't try to re-insert the given object into the table. 39297c478bd9Sstevel@tonic-gate * Hopefully the given object will quickly go away and everyone 39307c478bd9Sstevel@tonic-gate * will use the new object. 39317c478bd9Sstevel@tonic-gate */ 39327c478bd9Sstevel@tonic-gate key.sfh_fh = *newfh; 39337c478bd9Sstevel@tonic-gate dupsfh = avl_find(&mi->mi_filehandles, &key, &where); 39347c478bd9Sstevel@tonic-gate if (dupsfh != NULL) { 39357c478bd9Sstevel@tonic-gate if (!(mi->mi_vfsp->vfs_flag & VFS_RDONLY) || nfs4_warn_dupfh) { 39367c478bd9Sstevel@tonic-gate zcmn_err(mi->mi_zone->zone_id, CE_WARN, "sfh4_update: " 39377c478bd9Sstevel@tonic-gate "duplicate filehandle detected"); 39387c478bd9Sstevel@tonic-gate sfh4_printfhandle(dupsfh); 39397c478bd9Sstevel@tonic-gate } 39407c478bd9Sstevel@tonic-gate } else { 39417c478bd9Sstevel@tonic-gate avl_insert(&mi->mi_filehandles, sfh, where); 39427c478bd9Sstevel@tonic-gate mutex_enter(&sfh->sfh_lock); 39437c478bd9Sstevel@tonic-gate sfh->sfh_flags |= SFH4_IN_TREE; 39447c478bd9Sstevel@tonic-gate mutex_exit(&sfh->sfh_lock); 39457c478bd9Sstevel@tonic-gate } 39467c478bd9Sstevel@tonic-gate nfs_rw_exit(&mi->mi_fh_lock); 39477c478bd9Sstevel@tonic-gate } 39487c478bd9Sstevel@tonic-gate 39497c478bd9Sstevel@tonic-gate /* 39507c478bd9Sstevel@tonic-gate * Copy out the current filehandle for the given shared filehandle object. 39517c478bd9Sstevel@tonic-gate */ 39527c478bd9Sstevel@tonic-gate 39537c478bd9Sstevel@tonic-gate void 39547c478bd9Sstevel@tonic-gate sfh4_copyval(const nfs4_sharedfh_t *sfh, nfs4_fhandle_t *fhp) 39557c478bd9Sstevel@tonic-gate { 39567c478bd9Sstevel@tonic-gate mntinfo4_t *mi = sfh->sfh_mi; 39577c478bd9Sstevel@tonic-gate 39587c478bd9Sstevel@tonic-gate ASSERT(sfh->sfh_refcnt > 0); 39597c478bd9Sstevel@tonic-gate 39607c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&mi->mi_fh_lock, RW_READER, 0); 39617c478bd9Sstevel@tonic-gate fhp->fh_len = sfh->sfh_fh.nfs_fh4_len; 39627c478bd9Sstevel@tonic-gate ASSERT(fhp->fh_len <= NFS4_FHSIZE); 39637c478bd9Sstevel@tonic-gate bcopy(sfh->sfh_fh.nfs_fh4_val, fhp->fh_buf, fhp->fh_len); 39647c478bd9Sstevel@tonic-gate nfs_rw_exit(&mi->mi_fh_lock); 39657c478bd9Sstevel@tonic-gate } 39667c478bd9Sstevel@tonic-gate 39677c478bd9Sstevel@tonic-gate /* 39687c478bd9Sstevel@tonic-gate * Print out the filehandle for the given shared filehandle object. 39697c478bd9Sstevel@tonic-gate */ 39707c478bd9Sstevel@tonic-gate 39717c478bd9Sstevel@tonic-gate void 39727c478bd9Sstevel@tonic-gate sfh4_printfhandle(const nfs4_sharedfh_t *sfh) 39737c478bd9Sstevel@tonic-gate { 39747c478bd9Sstevel@tonic-gate nfs4_fhandle_t fhandle; 39757c478bd9Sstevel@tonic-gate 39767c478bd9Sstevel@tonic-gate sfh4_copyval(sfh, &fhandle); 39777c478bd9Sstevel@tonic-gate nfs4_printfhandle(&fhandle); 39787c478bd9Sstevel@tonic-gate } 39797c478bd9Sstevel@tonic-gate 39807c478bd9Sstevel@tonic-gate /* 39817c478bd9Sstevel@tonic-gate * Compare 2 fnames. Returns -1 if the first is "less" than the second, 0 39827c478bd9Sstevel@tonic-gate * if they're the same, +1 if the first is "greater" than the second. The 39837c478bd9Sstevel@tonic-gate * caller (or whoever's calling the AVL package) is responsible for 39847c478bd9Sstevel@tonic-gate * handling locking issues. 39857c478bd9Sstevel@tonic-gate */ 39867c478bd9Sstevel@tonic-gate 39877c478bd9Sstevel@tonic-gate static int 39887c478bd9Sstevel@tonic-gate fncmp(const void *p1, const void *p2) 39897c478bd9Sstevel@tonic-gate { 39907c478bd9Sstevel@tonic-gate const nfs4_fname_t *f1 = p1; 39917c478bd9Sstevel@tonic-gate const nfs4_fname_t *f2 = p2; 39927c478bd9Sstevel@tonic-gate int res; 39937c478bd9Sstevel@tonic-gate 39947c478bd9Sstevel@tonic-gate res = strcmp(f1->fn_name, f2->fn_name); 39957c478bd9Sstevel@tonic-gate /* 39967c478bd9Sstevel@tonic-gate * The AVL package wants +/-1, not arbitrary positive or negative 39977c478bd9Sstevel@tonic-gate * integers. 39987c478bd9Sstevel@tonic-gate */ 39997c478bd9Sstevel@tonic-gate if (res > 0) 40007c478bd9Sstevel@tonic-gate res = 1; 40017c478bd9Sstevel@tonic-gate else if (res < 0) 40027c478bd9Sstevel@tonic-gate res = -1; 40037c478bd9Sstevel@tonic-gate return (res); 40047c478bd9Sstevel@tonic-gate } 40057c478bd9Sstevel@tonic-gate 40067c478bd9Sstevel@tonic-gate /* 40077c478bd9Sstevel@tonic-gate * Get or create an fname with the given name, as a child of the given 40087c478bd9Sstevel@tonic-gate * fname. The caller is responsible for eventually releasing the reference 40097c478bd9Sstevel@tonic-gate * (fn_rele()). parent may be NULL. 40107c478bd9Sstevel@tonic-gate */ 40117c478bd9Sstevel@tonic-gate 40127c478bd9Sstevel@tonic-gate nfs4_fname_t * 4013bbf2a467SNagakiran Rajashekar fn_get(nfs4_fname_t *parent, char *name, nfs4_sharedfh_t *sfh) 40147c478bd9Sstevel@tonic-gate { 40157c478bd9Sstevel@tonic-gate nfs4_fname_t key; 40167c478bd9Sstevel@tonic-gate nfs4_fname_t *fnp; 40177c478bd9Sstevel@tonic-gate avl_index_t where; 40187c478bd9Sstevel@tonic-gate 40197c478bd9Sstevel@tonic-gate key.fn_name = name; 40207c478bd9Sstevel@tonic-gate 40217c478bd9Sstevel@tonic-gate /* 40227c478bd9Sstevel@tonic-gate * If there's already an fname registered with the given name, bump 40237c478bd9Sstevel@tonic-gate * its reference count and return it. Otherwise, create a new one 40247c478bd9Sstevel@tonic-gate * and add it to the parent's AVL tree. 4025bbf2a467SNagakiran Rajashekar * 4026bbf2a467SNagakiran Rajashekar * fname entries we are looking for should match both name 4027bbf2a467SNagakiran Rajashekar * and sfh stored in the fname. 40287c478bd9Sstevel@tonic-gate */ 4029bbf2a467SNagakiran Rajashekar again: 40307c478bd9Sstevel@tonic-gate if (parent != NULL) { 40317c478bd9Sstevel@tonic-gate mutex_enter(&parent->fn_lock); 40327c478bd9Sstevel@tonic-gate fnp = avl_find(&parent->fn_children, &key, &where); 40337c478bd9Sstevel@tonic-gate if (fnp != NULL) { 4034bbf2a467SNagakiran Rajashekar /* 4035bbf2a467SNagakiran Rajashekar * This hold on fnp is released below later, 4036bbf2a467SNagakiran Rajashekar * in case this is not the fnp we want. 4037bbf2a467SNagakiran Rajashekar */ 40387c478bd9Sstevel@tonic-gate fn_hold(fnp); 4039bbf2a467SNagakiran Rajashekar 4040bbf2a467SNagakiran Rajashekar if (fnp->fn_sfh == sfh) { 4041bbf2a467SNagakiran Rajashekar /* 4042bbf2a467SNagakiran Rajashekar * We have found our entry. 4043bbf2a467SNagakiran Rajashekar * put an hold and return it. 4044bbf2a467SNagakiran Rajashekar */ 40457c478bd9Sstevel@tonic-gate mutex_exit(&parent->fn_lock); 40467c478bd9Sstevel@tonic-gate return (fnp); 40477c478bd9Sstevel@tonic-gate } 4048bbf2a467SNagakiran Rajashekar 4049bbf2a467SNagakiran Rajashekar /* 4050bbf2a467SNagakiran Rajashekar * We have found an entry that has a mismatching 4051bbf2a467SNagakiran Rajashekar * fn_sfh. This could be a stale entry due to 4052bbf2a467SNagakiran Rajashekar * server side rename. We will remove this entry 4053bbf2a467SNagakiran Rajashekar * and make sure no such entries exist. 4054bbf2a467SNagakiran Rajashekar */ 4055bbf2a467SNagakiran Rajashekar mutex_exit(&parent->fn_lock); 4056bbf2a467SNagakiran Rajashekar mutex_enter(&fnp->fn_lock); 4057bbf2a467SNagakiran Rajashekar if (fnp->fn_parent == parent) { 4058bbf2a467SNagakiran Rajashekar /* 4059bbf2a467SNagakiran Rajashekar * Remove ourselves from parent's 4060bbf2a467SNagakiran Rajashekar * fn_children tree. 4061bbf2a467SNagakiran Rajashekar */ 4062bbf2a467SNagakiran Rajashekar mutex_enter(&parent->fn_lock); 4063bbf2a467SNagakiran Rajashekar avl_remove(&parent->fn_children, fnp); 4064bbf2a467SNagakiran Rajashekar mutex_exit(&parent->fn_lock); 4065bbf2a467SNagakiran Rajashekar fn_rele(&fnp->fn_parent); 4066bbf2a467SNagakiran Rajashekar } 4067bbf2a467SNagakiran Rajashekar mutex_exit(&fnp->fn_lock); 4068bbf2a467SNagakiran Rajashekar fn_rele(&fnp); 4069bbf2a467SNagakiran Rajashekar goto again; 4070bbf2a467SNagakiran Rajashekar } 40717c478bd9Sstevel@tonic-gate } 40727c478bd9Sstevel@tonic-gate 40737c478bd9Sstevel@tonic-gate fnp = kmem_alloc(sizeof (nfs4_fname_t), KM_SLEEP); 40747c478bd9Sstevel@tonic-gate mutex_init(&fnp->fn_lock, NULL, MUTEX_DEFAULT, NULL); 40757c478bd9Sstevel@tonic-gate fnp->fn_parent = parent; 40767c478bd9Sstevel@tonic-gate if (parent != NULL) 40777c478bd9Sstevel@tonic-gate fn_hold(parent); 40787c478bd9Sstevel@tonic-gate fnp->fn_len = strlen(name); 40797c478bd9Sstevel@tonic-gate ASSERT(fnp->fn_len < MAXNAMELEN); 40807c478bd9Sstevel@tonic-gate fnp->fn_name = kmem_alloc(fnp->fn_len + 1, KM_SLEEP); 40817c478bd9Sstevel@tonic-gate (void) strcpy(fnp->fn_name, name); 40827c478bd9Sstevel@tonic-gate fnp->fn_refcnt = 1; 4083bbf2a467SNagakiran Rajashekar 4084bbf2a467SNagakiran Rajashekar /* 4085bbf2a467SNagakiran Rajashekar * This hold on sfh is later released 4086bbf2a467SNagakiran Rajashekar * when we do the final fn_rele() on this fname. 4087bbf2a467SNagakiran Rajashekar */ 4088bbf2a467SNagakiran Rajashekar sfh4_hold(sfh); 4089bbf2a467SNagakiran Rajashekar fnp->fn_sfh = sfh; 4090bbf2a467SNagakiran Rajashekar 40917c478bd9Sstevel@tonic-gate avl_create(&fnp->fn_children, fncmp, sizeof (nfs4_fname_t), 40927c478bd9Sstevel@tonic-gate offsetof(nfs4_fname_t, fn_tree)); 40937c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_fname_debug, (CE_NOTE, 40947c478bd9Sstevel@tonic-gate "fn_get %p:%s, a new nfs4_fname_t!", 40957c478bd9Sstevel@tonic-gate (void *)fnp, fnp->fn_name)); 40967c478bd9Sstevel@tonic-gate if (parent != NULL) { 40977c478bd9Sstevel@tonic-gate avl_insert(&parent->fn_children, fnp, where); 40987c478bd9Sstevel@tonic-gate mutex_exit(&parent->fn_lock); 40997c478bd9Sstevel@tonic-gate } 41007c478bd9Sstevel@tonic-gate 41017c478bd9Sstevel@tonic-gate return (fnp); 41027c478bd9Sstevel@tonic-gate } 41037c478bd9Sstevel@tonic-gate 41047c478bd9Sstevel@tonic-gate void 41057c478bd9Sstevel@tonic-gate fn_hold(nfs4_fname_t *fnp) 41067c478bd9Sstevel@tonic-gate { 41071a5e258fSJosef 'Jeff' Sipek atomic_inc_32(&fnp->fn_refcnt); 41087c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_fname_debug, (CE_NOTE, 41097c478bd9Sstevel@tonic-gate "fn_hold %p:%s, new refcnt=%d", 41107c478bd9Sstevel@tonic-gate (void *)fnp, fnp->fn_name, fnp->fn_refcnt)); 41117c478bd9Sstevel@tonic-gate } 41127c478bd9Sstevel@tonic-gate 41137c478bd9Sstevel@tonic-gate /* 41147c478bd9Sstevel@tonic-gate * Decrement the reference count of the given fname, and destroy it if its 41157c478bd9Sstevel@tonic-gate * reference count goes to zero. Nulls out the given pointer. 41167c478bd9Sstevel@tonic-gate */ 41177c478bd9Sstevel@tonic-gate 41187c478bd9Sstevel@tonic-gate void 41197c478bd9Sstevel@tonic-gate fn_rele(nfs4_fname_t **fnpp) 41207c478bd9Sstevel@tonic-gate { 41217c478bd9Sstevel@tonic-gate nfs4_fname_t *parent; 41227c478bd9Sstevel@tonic-gate uint32_t newref; 41237c478bd9Sstevel@tonic-gate nfs4_fname_t *fnp; 41247c478bd9Sstevel@tonic-gate 41257c478bd9Sstevel@tonic-gate recur: 41267c478bd9Sstevel@tonic-gate fnp = *fnpp; 41277c478bd9Sstevel@tonic-gate *fnpp = NULL; 41287c478bd9Sstevel@tonic-gate 41297c478bd9Sstevel@tonic-gate mutex_enter(&fnp->fn_lock); 41307c478bd9Sstevel@tonic-gate parent = fnp->fn_parent; 41317c478bd9Sstevel@tonic-gate if (parent != NULL) 41327c478bd9Sstevel@tonic-gate mutex_enter(&parent->fn_lock); /* prevent new references */ 41331a5e258fSJosef 'Jeff' Sipek newref = atomic_dec_32_nv(&fnp->fn_refcnt); 41347c478bd9Sstevel@tonic-gate if (newref > 0) { 41357c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_fname_debug, (CE_NOTE, 41367c478bd9Sstevel@tonic-gate "fn_rele %p:%s, new refcnt=%d", 41377c478bd9Sstevel@tonic-gate (void *)fnp, fnp->fn_name, fnp->fn_refcnt)); 41387c478bd9Sstevel@tonic-gate if (parent != NULL) 41397c478bd9Sstevel@tonic-gate mutex_exit(&parent->fn_lock); 41407c478bd9Sstevel@tonic-gate mutex_exit(&fnp->fn_lock); 41417c478bd9Sstevel@tonic-gate return; 41427c478bd9Sstevel@tonic-gate } 41437c478bd9Sstevel@tonic-gate 41447c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_fname_debug, (CE_NOTE, 41457c478bd9Sstevel@tonic-gate "fn_rele %p:%s, last reference, deleting...", 41467c478bd9Sstevel@tonic-gate (void *)fnp, fnp->fn_name)); 41477c478bd9Sstevel@tonic-gate if (parent != NULL) { 41487c478bd9Sstevel@tonic-gate avl_remove(&parent->fn_children, fnp); 41497c478bd9Sstevel@tonic-gate mutex_exit(&parent->fn_lock); 41507c478bd9Sstevel@tonic-gate } 41517c478bd9Sstevel@tonic-gate kmem_free(fnp->fn_name, fnp->fn_len + 1); 4152bbf2a467SNagakiran Rajashekar sfh4_rele(&fnp->fn_sfh); 41537c478bd9Sstevel@tonic-gate mutex_destroy(&fnp->fn_lock); 41547c478bd9Sstevel@tonic-gate avl_destroy(&fnp->fn_children); 41557c478bd9Sstevel@tonic-gate kmem_free(fnp, sizeof (nfs4_fname_t)); 41567c478bd9Sstevel@tonic-gate /* 41577c478bd9Sstevel@tonic-gate * Recursivly fn_rele the parent. 41587c478bd9Sstevel@tonic-gate * Use goto instead of a recursive call to avoid stack overflow. 41597c478bd9Sstevel@tonic-gate */ 41607c478bd9Sstevel@tonic-gate if (parent != NULL) { 41617c478bd9Sstevel@tonic-gate fnpp = &parent; 41627c478bd9Sstevel@tonic-gate goto recur; 41637c478bd9Sstevel@tonic-gate } 41647c478bd9Sstevel@tonic-gate } 41657c478bd9Sstevel@tonic-gate 41667c478bd9Sstevel@tonic-gate /* 41677c478bd9Sstevel@tonic-gate * Returns the single component name of the given fname, in a MAXNAMELEN 41687c478bd9Sstevel@tonic-gate * string buffer, which the caller is responsible for freeing. Note that 41697c478bd9Sstevel@tonic-gate * the name may become invalid as a result of fn_move(). 41707c478bd9Sstevel@tonic-gate */ 41717c478bd9Sstevel@tonic-gate 41727c478bd9Sstevel@tonic-gate char * 41737c478bd9Sstevel@tonic-gate fn_name(nfs4_fname_t *fnp) 41747c478bd9Sstevel@tonic-gate { 41757c478bd9Sstevel@tonic-gate char *name; 41767c478bd9Sstevel@tonic-gate 41777c478bd9Sstevel@tonic-gate ASSERT(fnp->fn_len < MAXNAMELEN); 41787c478bd9Sstevel@tonic-gate name = kmem_alloc(MAXNAMELEN, KM_SLEEP); 41797c478bd9Sstevel@tonic-gate mutex_enter(&fnp->fn_lock); 41807c478bd9Sstevel@tonic-gate (void) strcpy(name, fnp->fn_name); 41817c478bd9Sstevel@tonic-gate mutex_exit(&fnp->fn_lock); 41827c478bd9Sstevel@tonic-gate 41837c478bd9Sstevel@tonic-gate return (name); 41847c478bd9Sstevel@tonic-gate } 41857c478bd9Sstevel@tonic-gate 41867c478bd9Sstevel@tonic-gate 41877c478bd9Sstevel@tonic-gate /* 41887c478bd9Sstevel@tonic-gate * fn_path_realloc 41897c478bd9Sstevel@tonic-gate * 41907c478bd9Sstevel@tonic-gate * This function, used only by fn_path, constructs 41917c478bd9Sstevel@tonic-gate * a new string which looks like "prepend" + "/" + "current". 41927c478bd9Sstevel@tonic-gate * by allocating a new string and freeing the old one. 41937c478bd9Sstevel@tonic-gate */ 41947c478bd9Sstevel@tonic-gate static void 41957c478bd9Sstevel@tonic-gate fn_path_realloc(char **curses, char *prepend) 41967c478bd9Sstevel@tonic-gate { 41977c478bd9Sstevel@tonic-gate int len, curlen = 0; 41987c478bd9Sstevel@tonic-gate char *news; 41997c478bd9Sstevel@tonic-gate 42007c478bd9Sstevel@tonic-gate if (*curses == NULL) { 42017c478bd9Sstevel@tonic-gate /* 42027c478bd9Sstevel@tonic-gate * Prime the pump, allocate just the 42037c478bd9Sstevel@tonic-gate * space for prepend and return that. 42047c478bd9Sstevel@tonic-gate */ 42057c478bd9Sstevel@tonic-gate len = strlen(prepend) + 1; 42067c478bd9Sstevel@tonic-gate news = kmem_alloc(len, KM_SLEEP); 42077c478bd9Sstevel@tonic-gate (void) strncpy(news, prepend, len); 42087c478bd9Sstevel@tonic-gate } else { 42097c478bd9Sstevel@tonic-gate /* 42107c478bd9Sstevel@tonic-gate * Allocate the space for a new string 42117c478bd9Sstevel@tonic-gate * +1 +1 is for the "/" and the NULL 42127c478bd9Sstevel@tonic-gate * byte at the end of it all. 42137c478bd9Sstevel@tonic-gate */ 42147c478bd9Sstevel@tonic-gate curlen = strlen(*curses); 42157c478bd9Sstevel@tonic-gate len = curlen + strlen(prepend) + 1 + 1; 42167c478bd9Sstevel@tonic-gate news = kmem_alloc(len, KM_SLEEP); 42177c478bd9Sstevel@tonic-gate (void) strncpy(news, prepend, len); 42187c478bd9Sstevel@tonic-gate (void) strcat(news, "/"); 42197c478bd9Sstevel@tonic-gate (void) strcat(news, *curses); 42207c478bd9Sstevel@tonic-gate kmem_free(*curses, curlen + 1); 42217c478bd9Sstevel@tonic-gate } 42227c478bd9Sstevel@tonic-gate *curses = news; 42237c478bd9Sstevel@tonic-gate } 42247c478bd9Sstevel@tonic-gate 42257c478bd9Sstevel@tonic-gate /* 42267c478bd9Sstevel@tonic-gate * Returns the path name (starting from the fs root) for the given fname. 42277c478bd9Sstevel@tonic-gate * The caller is responsible for freeing. Note that the path may be or 42287c478bd9Sstevel@tonic-gate * become invalid as a result of fn_move(). 42297c478bd9Sstevel@tonic-gate */ 42307c478bd9Sstevel@tonic-gate 42317c478bd9Sstevel@tonic-gate char * 42327c478bd9Sstevel@tonic-gate fn_path(nfs4_fname_t *fnp) 42337c478bd9Sstevel@tonic-gate { 42347c478bd9Sstevel@tonic-gate char *path; 42357c478bd9Sstevel@tonic-gate nfs4_fname_t *nextfnp; 42367c478bd9Sstevel@tonic-gate 42377c478bd9Sstevel@tonic-gate if (fnp == NULL) 42387c478bd9Sstevel@tonic-gate return (NULL); 42397c478bd9Sstevel@tonic-gate 42407c478bd9Sstevel@tonic-gate path = NULL; 42417c478bd9Sstevel@tonic-gate 42427c478bd9Sstevel@tonic-gate /* walk up the tree constructing the pathname. */ 42437c478bd9Sstevel@tonic-gate 42447c478bd9Sstevel@tonic-gate fn_hold(fnp); /* adjust for later rele */ 42457c478bd9Sstevel@tonic-gate do { 42467c478bd9Sstevel@tonic-gate mutex_enter(&fnp->fn_lock); 42477c478bd9Sstevel@tonic-gate /* 42487c478bd9Sstevel@tonic-gate * Add fn_name in front of the current path 42497c478bd9Sstevel@tonic-gate */ 42507c478bd9Sstevel@tonic-gate fn_path_realloc(&path, fnp->fn_name); 42517c478bd9Sstevel@tonic-gate nextfnp = fnp->fn_parent; 42527c478bd9Sstevel@tonic-gate if (nextfnp != NULL) 42537c478bd9Sstevel@tonic-gate fn_hold(nextfnp); 42547c478bd9Sstevel@tonic-gate mutex_exit(&fnp->fn_lock); 42557c478bd9Sstevel@tonic-gate fn_rele(&fnp); 42567c478bd9Sstevel@tonic-gate fnp = nextfnp; 42577c478bd9Sstevel@tonic-gate } while (fnp != NULL); 42587c478bd9Sstevel@tonic-gate 42597c478bd9Sstevel@tonic-gate return (path); 42607c478bd9Sstevel@tonic-gate } 42617c478bd9Sstevel@tonic-gate 42627c478bd9Sstevel@tonic-gate /* 42637c478bd9Sstevel@tonic-gate * Return a reference to the parent of the given fname, which the caller is 42647c478bd9Sstevel@tonic-gate * responsible for eventually releasing. 42657c478bd9Sstevel@tonic-gate */ 42667c478bd9Sstevel@tonic-gate 42677c478bd9Sstevel@tonic-gate nfs4_fname_t * 42687c478bd9Sstevel@tonic-gate fn_parent(nfs4_fname_t *fnp) 42697c478bd9Sstevel@tonic-gate { 42707c478bd9Sstevel@tonic-gate nfs4_fname_t *parent; 42717c478bd9Sstevel@tonic-gate 42727c478bd9Sstevel@tonic-gate mutex_enter(&fnp->fn_lock); 42737c478bd9Sstevel@tonic-gate parent = fnp->fn_parent; 42747c478bd9Sstevel@tonic-gate if (parent != NULL) 42757c478bd9Sstevel@tonic-gate fn_hold(parent); 42767c478bd9Sstevel@tonic-gate mutex_exit(&fnp->fn_lock); 42777c478bd9Sstevel@tonic-gate 42787c478bd9Sstevel@tonic-gate return (parent); 42797c478bd9Sstevel@tonic-gate } 42807c478bd9Sstevel@tonic-gate 42817c478bd9Sstevel@tonic-gate /* 42827c478bd9Sstevel@tonic-gate * Update fnp so that its parent is newparent and its name is newname. 42837c478bd9Sstevel@tonic-gate */ 42847c478bd9Sstevel@tonic-gate 42857c478bd9Sstevel@tonic-gate void 42867c478bd9Sstevel@tonic-gate fn_move(nfs4_fname_t *fnp, nfs4_fname_t *newparent, char *newname) 42877c478bd9Sstevel@tonic-gate { 42887c478bd9Sstevel@tonic-gate nfs4_fname_t *parent, *tmpfnp; 42897c478bd9Sstevel@tonic-gate ssize_t newlen; 42907c478bd9Sstevel@tonic-gate nfs4_fname_t key; 42917c478bd9Sstevel@tonic-gate avl_index_t where; 42927c478bd9Sstevel@tonic-gate 42937c478bd9Sstevel@tonic-gate /* 42947c478bd9Sstevel@tonic-gate * This assert exists to catch the client trying to rename 42957c478bd9Sstevel@tonic-gate * a dir to be a child of itself. This happened at a recent 42967c478bd9Sstevel@tonic-gate * bakeoff against a 3rd party (broken) server which allowed 42977c478bd9Sstevel@tonic-gate * the rename to succeed. If it trips it means that: 42987c478bd9Sstevel@tonic-gate * a) the code in nfs4rename that detects this case is broken 42997c478bd9Sstevel@tonic-gate * b) the server is broken (since it allowed the bogus rename) 43007c478bd9Sstevel@tonic-gate * 43017c478bd9Sstevel@tonic-gate * For non-DEBUG kernels, prepare for a recursive mutex_enter 43027c478bd9Sstevel@tonic-gate * panic below from: mutex_enter(&newparent->fn_lock); 43037c478bd9Sstevel@tonic-gate */ 43047c478bd9Sstevel@tonic-gate ASSERT(fnp != newparent); 43057c478bd9Sstevel@tonic-gate 43067c478bd9Sstevel@tonic-gate /* 43077c478bd9Sstevel@tonic-gate * Remove fnp from its current parent, change its name, then add it 43084a36c613SPavel Filipensky * to newparent. It might happen that fnp was replaced by another 43094a36c613SPavel Filipensky * nfs4_fname_t with the same fn_name in parent->fn_children. 43104a36c613SPavel Filipensky * In such case, fnp->fn_parent is NULL and we skip the removal 43114a36c613SPavel Filipensky * of fnp from its current parent. 43127c478bd9Sstevel@tonic-gate */ 43137c478bd9Sstevel@tonic-gate mutex_enter(&fnp->fn_lock); 43147c478bd9Sstevel@tonic-gate parent = fnp->fn_parent; 43154a36c613SPavel Filipensky if (parent != NULL) { 43167c478bd9Sstevel@tonic-gate mutex_enter(&parent->fn_lock); 43177c478bd9Sstevel@tonic-gate avl_remove(&parent->fn_children, fnp); 43187c478bd9Sstevel@tonic-gate mutex_exit(&parent->fn_lock); 43197c478bd9Sstevel@tonic-gate fn_rele(&fnp->fn_parent); 43204a36c613SPavel Filipensky } 43217c478bd9Sstevel@tonic-gate 43227c478bd9Sstevel@tonic-gate newlen = strlen(newname); 43237c478bd9Sstevel@tonic-gate if (newlen != fnp->fn_len) { 43247c478bd9Sstevel@tonic-gate ASSERT(newlen < MAXNAMELEN); 43257c478bd9Sstevel@tonic-gate kmem_free(fnp->fn_name, fnp->fn_len + 1); 43267c478bd9Sstevel@tonic-gate fnp->fn_name = kmem_alloc(newlen + 1, KM_SLEEP); 43277c478bd9Sstevel@tonic-gate fnp->fn_len = newlen; 43287c478bd9Sstevel@tonic-gate } 43297c478bd9Sstevel@tonic-gate (void) strcpy(fnp->fn_name, newname); 43307c478bd9Sstevel@tonic-gate 43317c478bd9Sstevel@tonic-gate again: 43327c478bd9Sstevel@tonic-gate mutex_enter(&newparent->fn_lock); 43337c478bd9Sstevel@tonic-gate key.fn_name = fnp->fn_name; 43347c478bd9Sstevel@tonic-gate tmpfnp = avl_find(&newparent->fn_children, &key, &where); 43357c478bd9Sstevel@tonic-gate if (tmpfnp != NULL) { 43367c478bd9Sstevel@tonic-gate /* 43377c478bd9Sstevel@tonic-gate * This could be due to a file that was unlinked while 43387c478bd9Sstevel@tonic-gate * open, or perhaps the rnode is in the free list. Remove 43397c478bd9Sstevel@tonic-gate * it from newparent and let it go away on its own. The 43407c478bd9Sstevel@tonic-gate * contorted code is to deal with lock order issues and 43417c478bd9Sstevel@tonic-gate * race conditions. 43427c478bd9Sstevel@tonic-gate */ 43437c478bd9Sstevel@tonic-gate fn_hold(tmpfnp); 43447c478bd9Sstevel@tonic-gate mutex_exit(&newparent->fn_lock); 43457c478bd9Sstevel@tonic-gate mutex_enter(&tmpfnp->fn_lock); 43467c478bd9Sstevel@tonic-gate if (tmpfnp->fn_parent == newparent) { 43477c478bd9Sstevel@tonic-gate mutex_enter(&newparent->fn_lock); 43487c478bd9Sstevel@tonic-gate avl_remove(&newparent->fn_children, tmpfnp); 43497c478bd9Sstevel@tonic-gate mutex_exit(&newparent->fn_lock); 43507c478bd9Sstevel@tonic-gate fn_rele(&tmpfnp->fn_parent); 43517c478bd9Sstevel@tonic-gate } 43527c478bd9Sstevel@tonic-gate mutex_exit(&tmpfnp->fn_lock); 43537c478bd9Sstevel@tonic-gate fn_rele(&tmpfnp); 43547c478bd9Sstevel@tonic-gate goto again; 43557c478bd9Sstevel@tonic-gate } 43567c478bd9Sstevel@tonic-gate fnp->fn_parent = newparent; 43577c478bd9Sstevel@tonic-gate fn_hold(newparent); 43587c478bd9Sstevel@tonic-gate avl_insert(&newparent->fn_children, fnp, where); 43597c478bd9Sstevel@tonic-gate mutex_exit(&newparent->fn_lock); 43607c478bd9Sstevel@tonic-gate mutex_exit(&fnp->fn_lock); 43617c478bd9Sstevel@tonic-gate } 43627c478bd9Sstevel@tonic-gate 43637c478bd9Sstevel@tonic-gate #ifdef DEBUG 43647c478bd9Sstevel@tonic-gate /* 43657c478bd9Sstevel@tonic-gate * Return non-zero if the type information makes sense for the given vnode. 43667c478bd9Sstevel@tonic-gate * Otherwise panic. 43677c478bd9Sstevel@tonic-gate */ 43687c478bd9Sstevel@tonic-gate int 43697c478bd9Sstevel@tonic-gate nfs4_consistent_type(vnode_t *vp) 43707c478bd9Sstevel@tonic-gate { 43717c478bd9Sstevel@tonic-gate rnode4_t *rp = VTOR4(vp); 43727c478bd9Sstevel@tonic-gate 43737c478bd9Sstevel@tonic-gate if (nfs4_vtype_debug && vp->v_type != VNON && 43747c478bd9Sstevel@tonic-gate rp->r_attr.va_type != VNON && vp->v_type != rp->r_attr.va_type) { 43757c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "vnode %p type mismatch; v_type=%d, " 43767c478bd9Sstevel@tonic-gate "rnode attr type=%d", (void *)vp, vp->v_type, 43777c478bd9Sstevel@tonic-gate rp->r_attr.va_type); 43787c478bd9Sstevel@tonic-gate } 43797c478bd9Sstevel@tonic-gate 43807c478bd9Sstevel@tonic-gate return (1); 43817c478bd9Sstevel@tonic-gate } 43827c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 4383