17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate * CDDL HEADER START
37c478bd9Sstevel@tonic-gate *
47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the
545916cd2Sjpk * Common Development and Distribution License (the "License").
645916cd2Sjpk * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate *
87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate * and limitations under the License.
127c478bd9Sstevel@tonic-gate *
137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate *
197c478bd9Sstevel@tonic-gate * CDDL HEADER END
207c478bd9Sstevel@tonic-gate */
217c478bd9Sstevel@tonic-gate /*
22f8bbc571SPavel Filipensky * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
237c478bd9Sstevel@tonic-gate * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate */
257c478bd9Sstevel@tonic-gate
26f5654033SAlexander Eremin /*
27f5654033SAlexander Eremin * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
28f5654033SAlexander Eremin */
29f5654033SAlexander Eremin
307c478bd9Sstevel@tonic-gate #include <sys/param.h>
317c478bd9Sstevel@tonic-gate #include <sys/types.h>
327c478bd9Sstevel@tonic-gate #include <sys/systm.h>
3367dbe2beSCasper H.S. Dik #include <sys/cred.h>
347c478bd9Sstevel@tonic-gate #include <sys/proc.h>
357c478bd9Sstevel@tonic-gate #include <sys/user.h>
367c478bd9Sstevel@tonic-gate #include <sys/time.h>
377c478bd9Sstevel@tonic-gate #include <sys/buf.h>
387c478bd9Sstevel@tonic-gate #include <sys/vfs.h>
397c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
407c478bd9Sstevel@tonic-gate #include <sys/socket.h>
417c478bd9Sstevel@tonic-gate #include <sys/uio.h>
427c478bd9Sstevel@tonic-gate #include <sys/tiuser.h>
437c478bd9Sstevel@tonic-gate #include <sys/swap.h>
447c478bd9Sstevel@tonic-gate #include <sys/errno.h>
457c478bd9Sstevel@tonic-gate #include <sys/debug.h>
467c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
477c478bd9Sstevel@tonic-gate #include <sys/kstat.h>
487c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
497c478bd9Sstevel@tonic-gate #include <sys/vtrace.h>
507c478bd9Sstevel@tonic-gate #include <sys/session.h>
517c478bd9Sstevel@tonic-gate #include <sys/dnlc.h>
527c478bd9Sstevel@tonic-gate #include <sys/bitmap.h>
537c478bd9Sstevel@tonic-gate #include <sys/acl.h>
547c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
557c478bd9Sstevel@tonic-gate #include <sys/pathname.h>
567c478bd9Sstevel@tonic-gate #include <sys/flock.h>
577c478bd9Sstevel@tonic-gate #include <sys/dirent.h>
587c478bd9Sstevel@tonic-gate #include <sys/flock.h>
597c478bd9Sstevel@tonic-gate #include <sys/callb.h>
607c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
617c478bd9Sstevel@tonic-gate #include <sys/list.h>
6245916cd2Sjpk #include <sys/tsol/tnet.h>
6345916cd2Sjpk #include <sys/priv.h>
6403986916Sjarrett #include <sys/sdt.h>
6593aeed83Smarks #include <sys/attr.h>
6645916cd2Sjpk
6745916cd2Sjpk #include <inet/ip6.h>
687c478bd9Sstevel@tonic-gate
697c478bd9Sstevel@tonic-gate #include <rpc/types.h>
707c478bd9Sstevel@tonic-gate #include <rpc/xdr.h>
717c478bd9Sstevel@tonic-gate #include <rpc/auth.h>
727c478bd9Sstevel@tonic-gate #include <rpc/clnt.h>
737c478bd9Sstevel@tonic-gate
747c478bd9Sstevel@tonic-gate #include <nfs/nfs.h>
757c478bd9Sstevel@tonic-gate #include <nfs/nfs4.h>
767c478bd9Sstevel@tonic-gate #include <nfs/nfs_clnt.h>
777c478bd9Sstevel@tonic-gate #include <nfs/rnode.h>
787c478bd9Sstevel@tonic-gate #include <nfs/nfs_acl.h>
797c478bd9Sstevel@tonic-gate
8003986916Sjarrett #include <sys/tsol/label.h>
8103986916Sjarrett
827c478bd9Sstevel@tonic-gate /*
837c478bd9Sstevel@tonic-gate * The hash queues for the access to active and cached rnodes
847c478bd9Sstevel@tonic-gate * are organized as doubly linked lists. A reader/writer lock
857c478bd9Sstevel@tonic-gate * for each hash bucket is used to control access and to synchronize
867c478bd9Sstevel@tonic-gate * lookups, additions, and deletions from the hash queue.
877c478bd9Sstevel@tonic-gate *
887c478bd9Sstevel@tonic-gate * The rnode freelist is organized as a doubly linked list with
897c478bd9Sstevel@tonic-gate * a head pointer. Additions and deletions are synchronized via
907c478bd9Sstevel@tonic-gate * a single mutex.
917c478bd9Sstevel@tonic-gate *
927c478bd9Sstevel@tonic-gate * In order to add an rnode to the free list, it must be hashed into
937c478bd9Sstevel@tonic-gate * a hash queue and the exclusive lock to the hash queue be held.
947c478bd9Sstevel@tonic-gate * If an rnode is not hashed into a hash queue, then it is destroyed
957c478bd9Sstevel@tonic-gate * because it represents no valuable information that can be reused
967c478bd9Sstevel@tonic-gate * about the file. The exclusive lock to the hash queue must be
977c478bd9Sstevel@tonic-gate * held in order to prevent a lookup in the hash queue from finding
987c478bd9Sstevel@tonic-gate * the rnode and using it and assuming that the rnode is not on the
997c478bd9Sstevel@tonic-gate * freelist. The lookup in the hash queue will have the hash queue
1007c478bd9Sstevel@tonic-gate * locked, either exclusive or shared.
1017c478bd9Sstevel@tonic-gate *
1027c478bd9Sstevel@tonic-gate * The vnode reference count for each rnode is not allowed to drop
1037c478bd9Sstevel@tonic-gate * below 1. This prevents external entities, such as the VM
1047c478bd9Sstevel@tonic-gate * subsystem, from acquiring references to vnodes already on the
1057c478bd9Sstevel@tonic-gate * freelist and then trying to place them back on the freelist
1067c478bd9Sstevel@tonic-gate * when their reference is released. This means that the when an
1077c478bd9Sstevel@tonic-gate * rnode is looked up in the hash queues, then either the rnode
108da6c28aaSamw * is removed from the freelist and that reference is transferred to
1097c478bd9Sstevel@tonic-gate * the new reference or the vnode reference count must be incremented
1107c478bd9Sstevel@tonic-gate * accordingly. The mutex for the freelist must be held in order to
1117c478bd9Sstevel@tonic-gate * accurately test to see if the rnode is on the freelist or not.
1127c478bd9Sstevel@tonic-gate * The hash queue lock might be held shared and it is possible that
1137c478bd9Sstevel@tonic-gate * two different threads may race to remove the rnode from the
1147c478bd9Sstevel@tonic-gate * freelist. This race can be resolved by holding the mutex for the
1157c478bd9Sstevel@tonic-gate * freelist. Please note that the mutex for the freelist does not
1167c478bd9Sstevel@tonic-gate * need to held if the rnode is not on the freelist. It can not be
1177c478bd9Sstevel@tonic-gate * placed on the freelist due to the requirement that the thread
1187c478bd9Sstevel@tonic-gate * putting the rnode on the freelist must hold the exclusive lock
1197c478bd9Sstevel@tonic-gate * to the hash queue and the thread doing the lookup in the hash
1207c478bd9Sstevel@tonic-gate * queue is holding either a shared or exclusive lock to the hash
1217c478bd9Sstevel@tonic-gate * queue.
1227c478bd9Sstevel@tonic-gate *
1237c478bd9Sstevel@tonic-gate * The lock ordering is:
1247c478bd9Sstevel@tonic-gate *
1257c478bd9Sstevel@tonic-gate * hash bucket lock -> vnode lock
1267c478bd9Sstevel@tonic-gate * hash bucket lock -> freelist lock
1277c478bd9Sstevel@tonic-gate */
1287c478bd9Sstevel@tonic-gate static rhashq_t *rtable;
1297c478bd9Sstevel@tonic-gate
1307c478bd9Sstevel@tonic-gate static kmutex_t rpfreelist_lock;
1317c478bd9Sstevel@tonic-gate static rnode_t *rpfreelist = NULL;
1327c478bd9Sstevel@tonic-gate static long rnew = 0;
1337c478bd9Sstevel@tonic-gate long nrnode = 0;
1347c478bd9Sstevel@tonic-gate
1357c478bd9Sstevel@tonic-gate static int rtablesize;
1367c478bd9Sstevel@tonic-gate static int rtablemask;
1377c478bd9Sstevel@tonic-gate
1387c478bd9Sstevel@tonic-gate static int hashlen = 4;
1397c478bd9Sstevel@tonic-gate
1407c478bd9Sstevel@tonic-gate static struct kmem_cache *rnode_cache;
1417c478bd9Sstevel@tonic-gate
1427c478bd9Sstevel@tonic-gate /*
1437c478bd9Sstevel@tonic-gate * Mutex to protect the following variables:
1447c478bd9Sstevel@tonic-gate * nfs_major
1457c478bd9Sstevel@tonic-gate * nfs_minor
1467c478bd9Sstevel@tonic-gate */
1477c478bd9Sstevel@tonic-gate kmutex_t nfs_minor_lock;
1487c478bd9Sstevel@tonic-gate int nfs_major;
1497c478bd9Sstevel@tonic-gate int nfs_minor;
1507c478bd9Sstevel@tonic-gate
1517c478bd9Sstevel@tonic-gate /* Do we allow preepoch (negative) time values otw? */
1527c478bd9Sstevel@tonic-gate bool_t nfs_allow_preepoch_time = FALSE; /* default: do not allow preepoch */
1537c478bd9Sstevel@tonic-gate
1547c478bd9Sstevel@tonic-gate /*
1557c478bd9Sstevel@tonic-gate * Access cache
1567c478bd9Sstevel@tonic-gate */
1577c478bd9Sstevel@tonic-gate static acache_hash_t *acache;
1587c478bd9Sstevel@tonic-gate static long nacache; /* used strictly to size the number of hash queues */
1597c478bd9Sstevel@tonic-gate
1607c478bd9Sstevel@tonic-gate static int acachesize;
1617c478bd9Sstevel@tonic-gate static int acachemask;
1627c478bd9Sstevel@tonic-gate static struct kmem_cache *acache_cache;
1637c478bd9Sstevel@tonic-gate
1647c478bd9Sstevel@tonic-gate /*
1657c478bd9Sstevel@tonic-gate * Client side utilities
1667c478bd9Sstevel@tonic-gate */
1677c478bd9Sstevel@tonic-gate
1687c478bd9Sstevel@tonic-gate /*
1697c478bd9Sstevel@tonic-gate * client side statistics
1707c478bd9Sstevel@tonic-gate */
1717c478bd9Sstevel@tonic-gate static const struct clstat clstat_tmpl = {
1727c478bd9Sstevel@tonic-gate { "calls", KSTAT_DATA_UINT64 },
1737c478bd9Sstevel@tonic-gate { "badcalls", KSTAT_DATA_UINT64 },
1747c478bd9Sstevel@tonic-gate { "clgets", KSTAT_DATA_UINT64 },
1757c478bd9Sstevel@tonic-gate { "cltoomany", KSTAT_DATA_UINT64 },
1767c478bd9Sstevel@tonic-gate #ifdef DEBUG
1777c478bd9Sstevel@tonic-gate { "clalloc", KSTAT_DATA_UINT64 },
1787c478bd9Sstevel@tonic-gate { "noresponse", KSTAT_DATA_UINT64 },
1797c478bd9Sstevel@tonic-gate { "failover", KSTAT_DATA_UINT64 },
1807c478bd9Sstevel@tonic-gate { "remap", KSTAT_DATA_UINT64 },
1817c478bd9Sstevel@tonic-gate #endif
1827c478bd9Sstevel@tonic-gate };
1837c478bd9Sstevel@tonic-gate
1847c478bd9Sstevel@tonic-gate /*
1857c478bd9Sstevel@tonic-gate * The following are statistics that describe behavior of the system as a whole
1867c478bd9Sstevel@tonic-gate * and doesn't correspond to any one particular zone.
1877c478bd9Sstevel@tonic-gate */
1887c478bd9Sstevel@tonic-gate #ifdef DEBUG
1897c478bd9Sstevel@tonic-gate static struct clstat_debug {
1907c478bd9Sstevel@tonic-gate kstat_named_t nrnode; /* number of allocated rnodes */
1917c478bd9Sstevel@tonic-gate kstat_named_t access; /* size of access cache */
1927c478bd9Sstevel@tonic-gate kstat_named_t dirent; /* size of readdir cache */
1937c478bd9Sstevel@tonic-gate kstat_named_t dirents; /* size of readdir buf cache */
1947c478bd9Sstevel@tonic-gate kstat_named_t reclaim; /* number of reclaims */
1957c478bd9Sstevel@tonic-gate kstat_named_t clreclaim; /* number of cl reclaims */
1967c478bd9Sstevel@tonic-gate kstat_named_t f_reclaim; /* number of free reclaims */
1977c478bd9Sstevel@tonic-gate kstat_named_t a_reclaim; /* number of active reclaims */
1987c478bd9Sstevel@tonic-gate kstat_named_t r_reclaim; /* number of rnode reclaims */
1997c478bd9Sstevel@tonic-gate kstat_named_t rpath; /* bytes used to store rpaths */
2007c478bd9Sstevel@tonic-gate } clstat_debug = {
2017c478bd9Sstevel@tonic-gate { "nrnode", KSTAT_DATA_UINT64 },
2027c478bd9Sstevel@tonic-gate { "access", KSTAT_DATA_UINT64 },
2037c478bd9Sstevel@tonic-gate { "dirent", KSTAT_DATA_UINT64 },
2047c478bd9Sstevel@tonic-gate { "dirents", KSTAT_DATA_UINT64 },
2057c478bd9Sstevel@tonic-gate { "reclaim", KSTAT_DATA_UINT64 },
2067c478bd9Sstevel@tonic-gate { "clreclaim", KSTAT_DATA_UINT64 },
2077c478bd9Sstevel@tonic-gate { "f_reclaim", KSTAT_DATA_UINT64 },
2087c478bd9Sstevel@tonic-gate { "a_reclaim", KSTAT_DATA_UINT64 },
2097c478bd9Sstevel@tonic-gate { "r_reclaim", KSTAT_DATA_UINT64 },
2107c478bd9Sstevel@tonic-gate { "r_path", KSTAT_DATA_UINT64 },
2117c478bd9Sstevel@tonic-gate };
2127c478bd9Sstevel@tonic-gate #endif /* DEBUG */
2137c478bd9Sstevel@tonic-gate
2147c478bd9Sstevel@tonic-gate /*
2157c478bd9Sstevel@tonic-gate * We keep a global list of per-zone client data, so we can clean up all zones
2167c478bd9Sstevel@tonic-gate * if we get low on memory.
2177c478bd9Sstevel@tonic-gate */
2187c478bd9Sstevel@tonic-gate static list_t nfs_clnt_list;
2197c478bd9Sstevel@tonic-gate static kmutex_t nfs_clnt_list_lock;
2207c478bd9Sstevel@tonic-gate static zone_key_t nfsclnt_zone_key;
2217c478bd9Sstevel@tonic-gate
2227c478bd9Sstevel@tonic-gate static struct kmem_cache *chtab_cache;
2237c478bd9Sstevel@tonic-gate
2247c478bd9Sstevel@tonic-gate /*
2257c478bd9Sstevel@tonic-gate * Some servers do not properly update the attributes of the
2267c478bd9Sstevel@tonic-gate * directory when changes are made. To allow interoperability
2277c478bd9Sstevel@tonic-gate * with these broken servers, the nfs_disable_rddir_cache
2287c478bd9Sstevel@tonic-gate * parameter must be set in /etc/system
2297c478bd9Sstevel@tonic-gate */
2307c478bd9Sstevel@tonic-gate int nfs_disable_rddir_cache = 0;
2317c478bd9Sstevel@tonic-gate
2327c478bd9Sstevel@tonic-gate int clget(clinfo_t *, servinfo_t *, cred_t *, CLIENT **,
2337c478bd9Sstevel@tonic-gate struct chtab **);
2347c478bd9Sstevel@tonic-gate void clfree(CLIENT *, struct chtab *);
2357c478bd9Sstevel@tonic-gate static int acl_clget(mntinfo_t *, servinfo_t *, cred_t *, CLIENT **,
2367c478bd9Sstevel@tonic-gate struct chtab **, struct nfs_clnt *);
2377c478bd9Sstevel@tonic-gate static int nfs_clget(mntinfo_t *, servinfo_t *, cred_t *, CLIENT **,
2387c478bd9Sstevel@tonic-gate struct chtab **, struct nfs_clnt *);
2397c478bd9Sstevel@tonic-gate static void clreclaim(void *);
2407c478bd9Sstevel@tonic-gate static int nfs_feedback(int, int, mntinfo_t *);
2417c478bd9Sstevel@tonic-gate static int rfscall(mntinfo_t *, rpcproc_t, xdrproc_t, caddr_t, xdrproc_t,
2427c478bd9Sstevel@tonic-gate caddr_t, cred_t *, int *, enum clnt_stat *, int,
2437c478bd9Sstevel@tonic-gate failinfo_t *);
2447c478bd9Sstevel@tonic-gate static int aclcall(mntinfo_t *, rpcproc_t, xdrproc_t, caddr_t, xdrproc_t,
2457c478bd9Sstevel@tonic-gate caddr_t, cred_t *, int *, int, failinfo_t *);
2467c478bd9Sstevel@tonic-gate static void rinactive(rnode_t *, cred_t *);
2477c478bd9Sstevel@tonic-gate static int rtablehash(nfs_fhandle *);
2487c478bd9Sstevel@tonic-gate static vnode_t *make_rnode(nfs_fhandle *, rhashq_t *, struct vfs *,
2497c478bd9Sstevel@tonic-gate struct vnodeops *,
2507c478bd9Sstevel@tonic-gate int (*)(vnode_t *, page_t *, u_offset_t *, size_t *, int,
2517c478bd9Sstevel@tonic-gate cred_t *),
2527c478bd9Sstevel@tonic-gate int (*)(const void *, const void *), int *, cred_t *,
2537c478bd9Sstevel@tonic-gate char *, char *);
2547c478bd9Sstevel@tonic-gate static void rp_rmfree(rnode_t *);
2557c478bd9Sstevel@tonic-gate static void rp_addhash(rnode_t *);
2567c478bd9Sstevel@tonic-gate static void rp_rmhash_locked(rnode_t *);
2577c478bd9Sstevel@tonic-gate static rnode_t *rfind(rhashq_t *, nfs_fhandle *, struct vfs *);
2587c478bd9Sstevel@tonic-gate static void destroy_rnode(rnode_t *);
2597c478bd9Sstevel@tonic-gate static void rddir_cache_free(rddir_cache *);
2607c478bd9Sstevel@tonic-gate static int nfs_free_data_reclaim(rnode_t *);
2617c478bd9Sstevel@tonic-gate static int nfs_active_data_reclaim(rnode_t *);
2627c478bd9Sstevel@tonic-gate static int nfs_free_reclaim(void);
2637c478bd9Sstevel@tonic-gate static int nfs_active_reclaim(void);
2647c478bd9Sstevel@tonic-gate static int nfs_rnode_reclaim(void);
2657c478bd9Sstevel@tonic-gate static void nfs_reclaim(void *);
2667c478bd9Sstevel@tonic-gate static int failover_safe(failinfo_t *);
2677c478bd9Sstevel@tonic-gate static void failover_newserver(mntinfo_t *mi);
2687c478bd9Sstevel@tonic-gate static void failover_thread(mntinfo_t *mi);
2697c478bd9Sstevel@tonic-gate static int failover_wait(mntinfo_t *);
2707c478bd9Sstevel@tonic-gate static int failover_remap(failinfo_t *);
2717c478bd9Sstevel@tonic-gate static int failover_lookup(char *, vnode_t *,
2727c478bd9Sstevel@tonic-gate int (*)(vnode_t *, char *, vnode_t **,
2737c478bd9Sstevel@tonic-gate struct pathname *, int, vnode_t *, cred_t *, int),
2747c478bd9Sstevel@tonic-gate int (*)(vnode_t *, vnode_t **, bool_t, cred_t *, int),
2757c478bd9Sstevel@tonic-gate vnode_t **);
2767c478bd9Sstevel@tonic-gate static void nfs_free_r_path(rnode_t *);
2777c478bd9Sstevel@tonic-gate static void nfs_set_vroot(vnode_t *);
2787c478bd9Sstevel@tonic-gate static char *nfs_getsrvnames(mntinfo_t *, size_t *);
2797c478bd9Sstevel@tonic-gate
2807c478bd9Sstevel@tonic-gate /*
2817c478bd9Sstevel@tonic-gate * from rpcsec module (common/rpcsec)
2827c478bd9Sstevel@tonic-gate */
2837c478bd9Sstevel@tonic-gate extern int sec_clnt_geth(CLIENT *, struct sec_data *, cred_t *, AUTH **);
2847c478bd9Sstevel@tonic-gate extern void sec_clnt_freeh(AUTH *);
2857c478bd9Sstevel@tonic-gate extern void sec_clnt_freeinfo(struct sec_data *);
2867c478bd9Sstevel@tonic-gate
2877c478bd9Sstevel@tonic-gate /*
28845916cd2Sjpk * used in mount policy
28945916cd2Sjpk */
29045916cd2Sjpk extern ts_label_t *getflabel_cipso(vfs_t *);
29145916cd2Sjpk
29245916cd2Sjpk /*
2937c478bd9Sstevel@tonic-gate * EIO or EINTR are not recoverable errors.
2947c478bd9Sstevel@tonic-gate */
2957c478bd9Sstevel@tonic-gate #define IS_RECOVERABLE_ERROR(error) !((error == EINTR) || (error == EIO))
2967c478bd9Sstevel@tonic-gate
297e280ed37SDai Ngo #ifdef DEBUG
298e280ed37SDai Ngo #define SRV_QFULL_MSG "send queue to NFS%d server %s is full; still trying\n"
299e280ed37SDai Ngo #define SRV_NOTRESP_MSG "NFS%d server %s not responding still trying\n"
300e280ed37SDai Ngo #else
301e280ed37SDai Ngo #define SRV_QFULL_MSG "send queue to NFS server %s is full still trying\n"
302e280ed37SDai Ngo #define SRV_NOTRESP_MSG "NFS server %s not responding still trying\n"
303e280ed37SDai Ngo #endif
3047c478bd9Sstevel@tonic-gate /*
3057c478bd9Sstevel@tonic-gate * Common handle get program for NFS, NFS ACL, and NFS AUTH client.
3067c478bd9Sstevel@tonic-gate */
3077c478bd9Sstevel@tonic-gate static int
clget_impl(clinfo_t * ci,servinfo_t * svp,cred_t * cr,CLIENT ** newcl,struct chtab ** chp,struct nfs_clnt * nfscl)3087c478bd9Sstevel@tonic-gate clget_impl(clinfo_t *ci, servinfo_t *svp, cred_t *cr, CLIENT **newcl,
3097c478bd9Sstevel@tonic-gate struct chtab **chp, struct nfs_clnt *nfscl)
3107c478bd9Sstevel@tonic-gate {
3117c478bd9Sstevel@tonic-gate struct chhead *ch, *newch;
3127c478bd9Sstevel@tonic-gate struct chhead **plistp;
3137c478bd9Sstevel@tonic-gate struct chtab *cp;
3147c478bd9Sstevel@tonic-gate int error;
3157c478bd9Sstevel@tonic-gate k_sigset_t smask;
3167c478bd9Sstevel@tonic-gate
3177c478bd9Sstevel@tonic-gate if (newcl == NULL || chp == NULL || ci == NULL)
3187c478bd9Sstevel@tonic-gate return (EINVAL);
3197c478bd9Sstevel@tonic-gate
3207c478bd9Sstevel@tonic-gate *newcl = NULL;
3217c478bd9Sstevel@tonic-gate *chp = NULL;
3227c478bd9Sstevel@tonic-gate
3237c478bd9Sstevel@tonic-gate /*
3247c478bd9Sstevel@tonic-gate * Find an unused handle or create one
3257c478bd9Sstevel@tonic-gate */
3267c478bd9Sstevel@tonic-gate newch = NULL;
3277c478bd9Sstevel@tonic-gate nfscl->nfscl_stat.clgets.value.ui64++;
3287c478bd9Sstevel@tonic-gate top:
3297c478bd9Sstevel@tonic-gate /*
3307c478bd9Sstevel@tonic-gate * Find the correct entry in the cache to check for free
3317c478bd9Sstevel@tonic-gate * client handles. The search is based on the RPC program
3327c478bd9Sstevel@tonic-gate * number, program version number, dev_t for the transport
3337c478bd9Sstevel@tonic-gate * device, and the protocol family.
3347c478bd9Sstevel@tonic-gate */
3357c478bd9Sstevel@tonic-gate mutex_enter(&nfscl->nfscl_chtable_lock);
3367c478bd9Sstevel@tonic-gate plistp = &nfscl->nfscl_chtable;
3377c478bd9Sstevel@tonic-gate for (ch = nfscl->nfscl_chtable; ch != NULL; ch = ch->ch_next) {
3387c478bd9Sstevel@tonic-gate if (ch->ch_prog == ci->cl_prog &&
3397c478bd9Sstevel@tonic-gate ch->ch_vers == ci->cl_vers &&
3407c478bd9Sstevel@tonic-gate ch->ch_dev == svp->sv_knconf->knc_rdev &&
3417c478bd9Sstevel@tonic-gate (strcmp(ch->ch_protofmly,
3427c478bd9Sstevel@tonic-gate svp->sv_knconf->knc_protofmly) == 0))
3437c478bd9Sstevel@tonic-gate break;
3447c478bd9Sstevel@tonic-gate plistp = &ch->ch_next;
3457c478bd9Sstevel@tonic-gate }
3467c478bd9Sstevel@tonic-gate
3477c478bd9Sstevel@tonic-gate /*
3487c478bd9Sstevel@tonic-gate * If we didn't find a cache entry for this quadruple, then
3497c478bd9Sstevel@tonic-gate * create one. If we don't have one already preallocated,
3507c478bd9Sstevel@tonic-gate * then drop the cache lock, create one, and then start over.
3517c478bd9Sstevel@tonic-gate * If we did have a preallocated entry, then just add it to
3527c478bd9Sstevel@tonic-gate * the front of the list.
3537c478bd9Sstevel@tonic-gate */
3547c478bd9Sstevel@tonic-gate if (ch == NULL) {
3557c478bd9Sstevel@tonic-gate if (newch == NULL) {
3567c478bd9Sstevel@tonic-gate mutex_exit(&nfscl->nfscl_chtable_lock);
3577c478bd9Sstevel@tonic-gate newch = kmem_alloc(sizeof (*newch), KM_SLEEP);
3587c478bd9Sstevel@tonic-gate newch->ch_timesused = 0;
3597c478bd9Sstevel@tonic-gate newch->ch_prog = ci->cl_prog;
3607c478bd9Sstevel@tonic-gate newch->ch_vers = ci->cl_vers;
3617c478bd9Sstevel@tonic-gate newch->ch_dev = svp->sv_knconf->knc_rdev;
3627c478bd9Sstevel@tonic-gate newch->ch_protofmly = kmem_alloc(
3637c478bd9Sstevel@tonic-gate strlen(svp->sv_knconf->knc_protofmly) + 1,
3647c478bd9Sstevel@tonic-gate KM_SLEEP);
3657c478bd9Sstevel@tonic-gate (void) strcpy(newch->ch_protofmly,
3667c478bd9Sstevel@tonic-gate svp->sv_knconf->knc_protofmly);
3677c478bd9Sstevel@tonic-gate newch->ch_list = NULL;
3687c478bd9Sstevel@tonic-gate goto top;
3697c478bd9Sstevel@tonic-gate }
3707c478bd9Sstevel@tonic-gate ch = newch;
3717c478bd9Sstevel@tonic-gate newch = NULL;
3727c478bd9Sstevel@tonic-gate ch->ch_next = nfscl->nfscl_chtable;
3737c478bd9Sstevel@tonic-gate nfscl->nfscl_chtable = ch;
3747c478bd9Sstevel@tonic-gate /*
3757c478bd9Sstevel@tonic-gate * We found a cache entry, but if it isn't on the front of the
3767c478bd9Sstevel@tonic-gate * list, then move it to the front of the list to try to take
3777c478bd9Sstevel@tonic-gate * advantage of locality of operations.
3787c478bd9Sstevel@tonic-gate */
3797c478bd9Sstevel@tonic-gate } else if (ch != nfscl->nfscl_chtable) {
3807c478bd9Sstevel@tonic-gate *plistp = ch->ch_next;
3817c478bd9Sstevel@tonic-gate ch->ch_next = nfscl->nfscl_chtable;
3827c478bd9Sstevel@tonic-gate nfscl->nfscl_chtable = ch;
3837c478bd9Sstevel@tonic-gate }
3847c478bd9Sstevel@tonic-gate
3857c478bd9Sstevel@tonic-gate /*
3867c478bd9Sstevel@tonic-gate * If there was a free client handle cached, then remove it
3877c478bd9Sstevel@tonic-gate * from the list, init it, and use it.
3887c478bd9Sstevel@tonic-gate */
3897c478bd9Sstevel@tonic-gate if (ch->ch_list != NULL) {
3907c478bd9Sstevel@tonic-gate cp = ch->ch_list;
3917c478bd9Sstevel@tonic-gate ch->ch_list = cp->ch_list;
3927c478bd9Sstevel@tonic-gate mutex_exit(&nfscl->nfscl_chtable_lock);
3937c478bd9Sstevel@tonic-gate if (newch != NULL) {
3947c478bd9Sstevel@tonic-gate kmem_free(newch->ch_protofmly,
3957c478bd9Sstevel@tonic-gate strlen(newch->ch_protofmly) + 1);
3967c478bd9Sstevel@tonic-gate kmem_free(newch, sizeof (*newch));
3977c478bd9Sstevel@tonic-gate }
3987c478bd9Sstevel@tonic-gate (void) clnt_tli_kinit(cp->ch_client, svp->sv_knconf,
3997c478bd9Sstevel@tonic-gate &svp->sv_addr, ci->cl_readsize, ci->cl_retrans, cr);
4007c478bd9Sstevel@tonic-gate error = sec_clnt_geth(cp->ch_client, svp->sv_secdata, cr,
4017c478bd9Sstevel@tonic-gate &cp->ch_client->cl_auth);
4027c478bd9Sstevel@tonic-gate if (error || cp->ch_client->cl_auth == NULL) {
4037c478bd9Sstevel@tonic-gate CLNT_DESTROY(cp->ch_client);
4047c478bd9Sstevel@tonic-gate kmem_cache_free(chtab_cache, cp);
4057c478bd9Sstevel@tonic-gate return ((error != 0) ? error : EINTR);
4067c478bd9Sstevel@tonic-gate }
4077c478bd9Sstevel@tonic-gate ch->ch_timesused++;
4087c478bd9Sstevel@tonic-gate *newcl = cp->ch_client;
4097c478bd9Sstevel@tonic-gate *chp = cp;
4107c478bd9Sstevel@tonic-gate return (0);
4117c478bd9Sstevel@tonic-gate }
4127c478bd9Sstevel@tonic-gate
4137c478bd9Sstevel@tonic-gate /*
4147c478bd9Sstevel@tonic-gate * There weren't any free client handles which fit, so allocate
4157c478bd9Sstevel@tonic-gate * a new one and use that.
4167c478bd9Sstevel@tonic-gate */
4177c478bd9Sstevel@tonic-gate #ifdef DEBUG
4181a5e258fSJosef 'Jeff' Sipek atomic_inc_64(&nfscl->nfscl_stat.clalloc.value.ui64);
4197c478bd9Sstevel@tonic-gate #endif
4207c478bd9Sstevel@tonic-gate mutex_exit(&nfscl->nfscl_chtable_lock);
4217c478bd9Sstevel@tonic-gate
4227c478bd9Sstevel@tonic-gate nfscl->nfscl_stat.cltoomany.value.ui64++;
4237c478bd9Sstevel@tonic-gate if (newch != NULL) {
4247c478bd9Sstevel@tonic-gate kmem_free(newch->ch_protofmly, strlen(newch->ch_protofmly) + 1);
4257c478bd9Sstevel@tonic-gate kmem_free(newch, sizeof (*newch));
4267c478bd9Sstevel@tonic-gate }
4277c478bd9Sstevel@tonic-gate
4287c478bd9Sstevel@tonic-gate cp = kmem_cache_alloc(chtab_cache, KM_SLEEP);
4297c478bd9Sstevel@tonic-gate cp->ch_head = ch;
4307c478bd9Sstevel@tonic-gate
4317c478bd9Sstevel@tonic-gate sigintr(&smask, (int)ci->cl_flags & MI_INT);
4327c478bd9Sstevel@tonic-gate error = clnt_tli_kcreate(svp->sv_knconf, &svp->sv_addr, ci->cl_prog,
4337c478bd9Sstevel@tonic-gate ci->cl_vers, ci->cl_readsize, ci->cl_retrans, cr, &cp->ch_client);
4347c478bd9Sstevel@tonic-gate sigunintr(&smask);
4357c478bd9Sstevel@tonic-gate
4367c478bd9Sstevel@tonic-gate if (error != 0) {
4377c478bd9Sstevel@tonic-gate kmem_cache_free(chtab_cache, cp);
4387c478bd9Sstevel@tonic-gate #ifdef DEBUG
4391a5e258fSJosef 'Jeff' Sipek atomic_dec_64(&nfscl->nfscl_stat.clalloc.value.ui64);
4407c478bd9Sstevel@tonic-gate #endif
4417c478bd9Sstevel@tonic-gate /*
4427c478bd9Sstevel@tonic-gate * Warning is unnecessary if error is EINTR.
4437c478bd9Sstevel@tonic-gate */
4447c478bd9Sstevel@tonic-gate if (error != EINTR) {
4457c478bd9Sstevel@tonic-gate nfs_cmn_err(error, CE_WARN,
4467c478bd9Sstevel@tonic-gate "clget: couldn't create handle: %m\n");
4477c478bd9Sstevel@tonic-gate }
4487c478bd9Sstevel@tonic-gate return (error);
4497c478bd9Sstevel@tonic-gate }
4507c478bd9Sstevel@tonic-gate (void) CLNT_CONTROL(cp->ch_client, CLSET_PROGRESS, NULL);
4517c478bd9Sstevel@tonic-gate auth_destroy(cp->ch_client->cl_auth);
4527c478bd9Sstevel@tonic-gate error = sec_clnt_geth(cp->ch_client, svp->sv_secdata, cr,
4537c478bd9Sstevel@tonic-gate &cp->ch_client->cl_auth);
4547c478bd9Sstevel@tonic-gate if (error || cp->ch_client->cl_auth == NULL) {
4557c478bd9Sstevel@tonic-gate CLNT_DESTROY(cp->ch_client);
4567c478bd9Sstevel@tonic-gate kmem_cache_free(chtab_cache, cp);
4577c478bd9Sstevel@tonic-gate #ifdef DEBUG
4581a5e258fSJosef 'Jeff' Sipek atomic_dec_64(&nfscl->nfscl_stat.clalloc.value.ui64);
4597c478bd9Sstevel@tonic-gate #endif
4607c478bd9Sstevel@tonic-gate return ((error != 0) ? error : EINTR);
4617c478bd9Sstevel@tonic-gate }
4627c478bd9Sstevel@tonic-gate ch->ch_timesused++;
4637c478bd9Sstevel@tonic-gate *newcl = cp->ch_client;
4647c478bd9Sstevel@tonic-gate ASSERT(cp->ch_client->cl_nosignal == FALSE);
4657c478bd9Sstevel@tonic-gate *chp = cp;
4667c478bd9Sstevel@tonic-gate return (0);
4677c478bd9Sstevel@tonic-gate }
4687c478bd9Sstevel@tonic-gate
4697c478bd9Sstevel@tonic-gate int
clget(clinfo_t * ci,servinfo_t * svp,cred_t * cr,CLIENT ** newcl,struct chtab ** chp)4707c478bd9Sstevel@tonic-gate clget(clinfo_t *ci, servinfo_t *svp, cred_t *cr, CLIENT **newcl,
4717c478bd9Sstevel@tonic-gate struct chtab **chp)
4727c478bd9Sstevel@tonic-gate {
4737c478bd9Sstevel@tonic-gate struct nfs_clnt *nfscl;
4747c478bd9Sstevel@tonic-gate
475108322fbScarlsonj nfscl = zone_getspecific(nfsclnt_zone_key, nfs_zone());
4767c478bd9Sstevel@tonic-gate ASSERT(nfscl != NULL);
4777c478bd9Sstevel@tonic-gate
4787c478bd9Sstevel@tonic-gate return (clget_impl(ci, svp, cr, newcl, chp, nfscl));
4797c478bd9Sstevel@tonic-gate }
4807c478bd9Sstevel@tonic-gate
4817c478bd9Sstevel@tonic-gate static int
acl_clget(mntinfo_t * mi,servinfo_t * svp,cred_t * cr,CLIENT ** newcl,struct chtab ** chp,struct nfs_clnt * nfscl)4827c478bd9Sstevel@tonic-gate acl_clget(mntinfo_t *mi, servinfo_t *svp, cred_t *cr, CLIENT **newcl,
4837c478bd9Sstevel@tonic-gate struct chtab **chp, struct nfs_clnt *nfscl)
4847c478bd9Sstevel@tonic-gate {
4857c478bd9Sstevel@tonic-gate clinfo_t ci;
4867c478bd9Sstevel@tonic-gate int error;
4877c478bd9Sstevel@tonic-gate
4887c478bd9Sstevel@tonic-gate /*
4897c478bd9Sstevel@tonic-gate * Set read buffer size to rsize
4907c478bd9Sstevel@tonic-gate * and add room for RPC headers.
4917c478bd9Sstevel@tonic-gate */
4927c478bd9Sstevel@tonic-gate ci.cl_readsize = mi->mi_tsize;
4937c478bd9Sstevel@tonic-gate if (ci.cl_readsize != 0)
4947c478bd9Sstevel@tonic-gate ci.cl_readsize += (RPC_MAXDATASIZE - NFS_MAXDATA);
4957c478bd9Sstevel@tonic-gate
4967c478bd9Sstevel@tonic-gate /*
4977c478bd9Sstevel@tonic-gate * If soft mount and server is down just try once.
4987c478bd9Sstevel@tonic-gate * meaning: do not retransmit.
4997c478bd9Sstevel@tonic-gate */
5007c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_HARD) && (mi->mi_flags & MI_DOWN))
5017c478bd9Sstevel@tonic-gate ci.cl_retrans = 0;
5027c478bd9Sstevel@tonic-gate else
5037c478bd9Sstevel@tonic-gate ci.cl_retrans = mi->mi_retrans;
5047c478bd9Sstevel@tonic-gate
5057c478bd9Sstevel@tonic-gate ci.cl_prog = NFS_ACL_PROGRAM;
5067c478bd9Sstevel@tonic-gate ci.cl_vers = mi->mi_vers;
5077c478bd9Sstevel@tonic-gate ci.cl_flags = mi->mi_flags;
5087c478bd9Sstevel@tonic-gate
5097c478bd9Sstevel@tonic-gate /*
5107c478bd9Sstevel@tonic-gate * clget calls sec_clnt_geth() to get an auth handle. For RPCSEC_GSS
5117c478bd9Sstevel@tonic-gate * security flavor, the client tries to establish a security context
5127c478bd9Sstevel@tonic-gate * by contacting the server. If the connection is timed out or reset,
5137c478bd9Sstevel@tonic-gate * e.g. server reboot, we will try again.
5147c478bd9Sstevel@tonic-gate */
5157c478bd9Sstevel@tonic-gate do {
5167c478bd9Sstevel@tonic-gate error = clget_impl(&ci, svp, cr, newcl, chp, nfscl);
5177c478bd9Sstevel@tonic-gate
5187c478bd9Sstevel@tonic-gate if (error == 0)
5197c478bd9Sstevel@tonic-gate break;
5207c478bd9Sstevel@tonic-gate
5217c478bd9Sstevel@tonic-gate /*
5227c478bd9Sstevel@tonic-gate * For forced unmount or zone shutdown, bail out, no retry.
5237c478bd9Sstevel@tonic-gate */
5247c478bd9Sstevel@tonic-gate if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
5257c478bd9Sstevel@tonic-gate error = EIO;
5267c478bd9Sstevel@tonic-gate break;
5277c478bd9Sstevel@tonic-gate }
5287c478bd9Sstevel@tonic-gate
5297c478bd9Sstevel@tonic-gate /* do not retry for softmount */
5307c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_HARD))
5317c478bd9Sstevel@tonic-gate break;
5327c478bd9Sstevel@tonic-gate
5337c478bd9Sstevel@tonic-gate /* let the caller deal with the failover case */
5347c478bd9Sstevel@tonic-gate if (FAILOVER_MOUNT(mi))
5357c478bd9Sstevel@tonic-gate break;
5367c478bd9Sstevel@tonic-gate
5377c478bd9Sstevel@tonic-gate } while (error == ETIMEDOUT || error == ECONNRESET);
5387c478bd9Sstevel@tonic-gate
5397c478bd9Sstevel@tonic-gate return (error);
5407c478bd9Sstevel@tonic-gate }
5417c478bd9Sstevel@tonic-gate
5427c478bd9Sstevel@tonic-gate static int
nfs_clget(mntinfo_t * mi,servinfo_t * svp,cred_t * cr,CLIENT ** newcl,struct chtab ** chp,struct nfs_clnt * nfscl)5437c478bd9Sstevel@tonic-gate nfs_clget(mntinfo_t *mi, servinfo_t *svp, cred_t *cr, CLIENT **newcl,
5447c478bd9Sstevel@tonic-gate struct chtab **chp, struct nfs_clnt *nfscl)
5457c478bd9Sstevel@tonic-gate {
5467c478bd9Sstevel@tonic-gate clinfo_t ci;
5477c478bd9Sstevel@tonic-gate int error;
5487c478bd9Sstevel@tonic-gate
5497c478bd9Sstevel@tonic-gate /*
5507c478bd9Sstevel@tonic-gate * Set read buffer size to rsize
5517c478bd9Sstevel@tonic-gate * and add room for RPC headers.
5527c478bd9Sstevel@tonic-gate */
5537c478bd9Sstevel@tonic-gate ci.cl_readsize = mi->mi_tsize;
5547c478bd9Sstevel@tonic-gate if (ci.cl_readsize != 0)
5557c478bd9Sstevel@tonic-gate ci.cl_readsize += (RPC_MAXDATASIZE - NFS_MAXDATA);
5567c478bd9Sstevel@tonic-gate
5577c478bd9Sstevel@tonic-gate /*
5587c478bd9Sstevel@tonic-gate * If soft mount and server is down just try once.
5597c478bd9Sstevel@tonic-gate * meaning: do not retransmit.
5607c478bd9Sstevel@tonic-gate */
5617c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_HARD) && (mi->mi_flags & MI_DOWN))
5627c478bd9Sstevel@tonic-gate ci.cl_retrans = 0;
5637c478bd9Sstevel@tonic-gate else
5647c478bd9Sstevel@tonic-gate ci.cl_retrans = mi->mi_retrans;
5657c478bd9Sstevel@tonic-gate
5667c478bd9Sstevel@tonic-gate ci.cl_prog = mi->mi_prog;
5677c478bd9Sstevel@tonic-gate ci.cl_vers = mi->mi_vers;
5687c478bd9Sstevel@tonic-gate ci.cl_flags = mi->mi_flags;
5697c478bd9Sstevel@tonic-gate
5707c478bd9Sstevel@tonic-gate /*
5717c478bd9Sstevel@tonic-gate * clget calls sec_clnt_geth() to get an auth handle. For RPCSEC_GSS
5727c478bd9Sstevel@tonic-gate * security flavor, the client tries to establish a security context
5737c478bd9Sstevel@tonic-gate * by contacting the server. If the connection is timed out or reset,
5747c478bd9Sstevel@tonic-gate * e.g. server reboot, we will try again.
5757c478bd9Sstevel@tonic-gate */
5767c478bd9Sstevel@tonic-gate do {
5777c478bd9Sstevel@tonic-gate error = clget_impl(&ci, svp, cr, newcl, chp, nfscl);
5787c478bd9Sstevel@tonic-gate
5797c478bd9Sstevel@tonic-gate if (error == 0)
5807c478bd9Sstevel@tonic-gate break;
5817c478bd9Sstevel@tonic-gate
5827c478bd9Sstevel@tonic-gate /*
5837c478bd9Sstevel@tonic-gate * For forced unmount or zone shutdown, bail out, no retry.
5847c478bd9Sstevel@tonic-gate */
5857c478bd9Sstevel@tonic-gate if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
5867c478bd9Sstevel@tonic-gate error = EIO;
5877c478bd9Sstevel@tonic-gate break;
5887c478bd9Sstevel@tonic-gate }
5897c478bd9Sstevel@tonic-gate
5907c478bd9Sstevel@tonic-gate /* do not retry for softmount */
5917c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_HARD))
5927c478bd9Sstevel@tonic-gate break;
5937c478bd9Sstevel@tonic-gate
5947c478bd9Sstevel@tonic-gate /* let the caller deal with the failover case */
5957c478bd9Sstevel@tonic-gate if (FAILOVER_MOUNT(mi))
5967c478bd9Sstevel@tonic-gate break;
5977c478bd9Sstevel@tonic-gate
5987c478bd9Sstevel@tonic-gate } while (error == ETIMEDOUT || error == ECONNRESET);
5997c478bd9Sstevel@tonic-gate
6007c478bd9Sstevel@tonic-gate return (error);
6017c478bd9Sstevel@tonic-gate }
6027c478bd9Sstevel@tonic-gate
6037c478bd9Sstevel@tonic-gate static void
clfree_impl(CLIENT * cl,struct chtab * cp,struct nfs_clnt * nfscl)6047c478bd9Sstevel@tonic-gate clfree_impl(CLIENT *cl, struct chtab *cp, struct nfs_clnt *nfscl)
6057c478bd9Sstevel@tonic-gate {
6067c478bd9Sstevel@tonic-gate if (cl->cl_auth != NULL) {
6077c478bd9Sstevel@tonic-gate sec_clnt_freeh(cl->cl_auth);
6087c478bd9Sstevel@tonic-gate cl->cl_auth = NULL;
6097c478bd9Sstevel@tonic-gate }
6107c478bd9Sstevel@tonic-gate
6117c478bd9Sstevel@tonic-gate /*
6127c478bd9Sstevel@tonic-gate * Timestamp this cache entry so that we know when it was last
6137c478bd9Sstevel@tonic-gate * used.
6147c478bd9Sstevel@tonic-gate */
6157c478bd9Sstevel@tonic-gate cp->ch_freed = gethrestime_sec();
6167c478bd9Sstevel@tonic-gate
6177c478bd9Sstevel@tonic-gate /*
6187c478bd9Sstevel@tonic-gate * Add the free client handle to the front of the list.
6197c478bd9Sstevel@tonic-gate * This way, the list will be sorted in youngest to oldest
6207c478bd9Sstevel@tonic-gate * order.
6217c478bd9Sstevel@tonic-gate */
6227c478bd9Sstevel@tonic-gate mutex_enter(&nfscl->nfscl_chtable_lock);
6237c478bd9Sstevel@tonic-gate cp->ch_list = cp->ch_head->ch_list;
6247c478bd9Sstevel@tonic-gate cp->ch_head->ch_list = cp;
6257c478bd9Sstevel@tonic-gate mutex_exit(&nfscl->nfscl_chtable_lock);
6267c478bd9Sstevel@tonic-gate }
6277c478bd9Sstevel@tonic-gate
6287c478bd9Sstevel@tonic-gate void
clfree(CLIENT * cl,struct chtab * cp)6297c478bd9Sstevel@tonic-gate clfree(CLIENT *cl, struct chtab *cp)
6307c478bd9Sstevel@tonic-gate {
6317c478bd9Sstevel@tonic-gate struct nfs_clnt *nfscl;
6327c478bd9Sstevel@tonic-gate
633108322fbScarlsonj nfscl = zone_getspecific(nfsclnt_zone_key, nfs_zone());
6347c478bd9Sstevel@tonic-gate ASSERT(nfscl != NULL);
6357c478bd9Sstevel@tonic-gate
6367c478bd9Sstevel@tonic-gate clfree_impl(cl, cp, nfscl);
6377c478bd9Sstevel@tonic-gate }
6387c478bd9Sstevel@tonic-gate
6397c478bd9Sstevel@tonic-gate #define CL_HOLDTIME 60 /* time to hold client handles */
6407c478bd9Sstevel@tonic-gate
6417c478bd9Sstevel@tonic-gate static void
clreclaim_zone(struct nfs_clnt * nfscl,uint_t cl_holdtime)6427c478bd9Sstevel@tonic-gate clreclaim_zone(struct nfs_clnt *nfscl, uint_t cl_holdtime)
6437c478bd9Sstevel@tonic-gate {
6447c478bd9Sstevel@tonic-gate struct chhead *ch;
6457c478bd9Sstevel@tonic-gate struct chtab *cp; /* list of objects that can be reclaimed */
6467c478bd9Sstevel@tonic-gate struct chtab *cpe;
6477c478bd9Sstevel@tonic-gate struct chtab *cpl;
6487c478bd9Sstevel@tonic-gate struct chtab **cpp;
6497c478bd9Sstevel@tonic-gate #ifdef DEBUG
6507c478bd9Sstevel@tonic-gate int n = 0;
6517c478bd9Sstevel@tonic-gate #endif
6527c478bd9Sstevel@tonic-gate
6537c478bd9Sstevel@tonic-gate /*
6547c478bd9Sstevel@tonic-gate * Need to reclaim some memory, so step through the cache
6557c478bd9Sstevel@tonic-gate * looking through the lists for entries which can be freed.
6567c478bd9Sstevel@tonic-gate */
6577c478bd9Sstevel@tonic-gate cp = NULL;
6587c478bd9Sstevel@tonic-gate
6597c478bd9Sstevel@tonic-gate mutex_enter(&nfscl->nfscl_chtable_lock);
6607c478bd9Sstevel@tonic-gate
6617c478bd9Sstevel@tonic-gate /*
6627c478bd9Sstevel@tonic-gate * Here we step through each non-NULL quadruple and start to
6637c478bd9Sstevel@tonic-gate * construct the reclaim list pointed to by cp. Note that
6647c478bd9Sstevel@tonic-gate * cp will contain all eligible chtab entries. When this traversal
6657c478bd9Sstevel@tonic-gate * completes, chtab entries from the last quadruple will be at the
6667c478bd9Sstevel@tonic-gate * front of cp and entries from previously inspected quadruples have
6677c478bd9Sstevel@tonic-gate * been appended to the rear of cp.
6687c478bd9Sstevel@tonic-gate */
6697c478bd9Sstevel@tonic-gate for (ch = nfscl->nfscl_chtable; ch != NULL; ch = ch->ch_next) {
6707c478bd9Sstevel@tonic-gate if (ch->ch_list == NULL)
6717c478bd9Sstevel@tonic-gate continue;
6727c478bd9Sstevel@tonic-gate /*
6737c478bd9Sstevel@tonic-gate * Search each list for entries older then
6747c478bd9Sstevel@tonic-gate * cl_holdtime seconds. The lists are maintained
6757c478bd9Sstevel@tonic-gate * in youngest to oldest order so that when the
6767c478bd9Sstevel@tonic-gate * first entry is found which is old enough, then
6777c478bd9Sstevel@tonic-gate * all of the rest of the entries on the list will
6787c478bd9Sstevel@tonic-gate * be old enough as well.
6797c478bd9Sstevel@tonic-gate */
6807c478bd9Sstevel@tonic-gate cpl = ch->ch_list;
6817c478bd9Sstevel@tonic-gate cpp = &ch->ch_list;
6827c478bd9Sstevel@tonic-gate while (cpl != NULL &&
6837c478bd9Sstevel@tonic-gate cpl->ch_freed + cl_holdtime > gethrestime_sec()) {
6847c478bd9Sstevel@tonic-gate cpp = &cpl->ch_list;
6857c478bd9Sstevel@tonic-gate cpl = cpl->ch_list;
6867c478bd9Sstevel@tonic-gate }
6877c478bd9Sstevel@tonic-gate if (cpl != NULL) {
6887c478bd9Sstevel@tonic-gate *cpp = NULL;
6897c478bd9Sstevel@tonic-gate if (cp != NULL) {
6907c478bd9Sstevel@tonic-gate cpe = cpl;
6917c478bd9Sstevel@tonic-gate while (cpe->ch_list != NULL)
6927c478bd9Sstevel@tonic-gate cpe = cpe->ch_list;
6937c478bd9Sstevel@tonic-gate cpe->ch_list = cp;
6947c478bd9Sstevel@tonic-gate }
6957c478bd9Sstevel@tonic-gate cp = cpl;
6967c478bd9Sstevel@tonic-gate }
6977c478bd9Sstevel@tonic-gate }
6987c478bd9Sstevel@tonic-gate
6997c478bd9Sstevel@tonic-gate mutex_exit(&nfscl->nfscl_chtable_lock);
7007c478bd9Sstevel@tonic-gate
7017c478bd9Sstevel@tonic-gate /*
7027c478bd9Sstevel@tonic-gate * If cp is empty, then there is nothing to reclaim here.
7037c478bd9Sstevel@tonic-gate */
7047c478bd9Sstevel@tonic-gate if (cp == NULL)
7057c478bd9Sstevel@tonic-gate return;
7067c478bd9Sstevel@tonic-gate
7077c478bd9Sstevel@tonic-gate /*
7087c478bd9Sstevel@tonic-gate * Step through the list of entries to free, destroying each client
7097c478bd9Sstevel@tonic-gate * handle and kmem_free'ing the memory for each entry.
7107c478bd9Sstevel@tonic-gate */
7117c478bd9Sstevel@tonic-gate while (cp != NULL) {
7127c478bd9Sstevel@tonic-gate #ifdef DEBUG
7137c478bd9Sstevel@tonic-gate n++;
7147c478bd9Sstevel@tonic-gate #endif
7157c478bd9Sstevel@tonic-gate CLNT_DESTROY(cp->ch_client);
7167c478bd9Sstevel@tonic-gate cpl = cp->ch_list;
7177c478bd9Sstevel@tonic-gate kmem_cache_free(chtab_cache, cp);
7187c478bd9Sstevel@tonic-gate cp = cpl;
7197c478bd9Sstevel@tonic-gate }
7207c478bd9Sstevel@tonic-gate
7217c478bd9Sstevel@tonic-gate #ifdef DEBUG
7227c478bd9Sstevel@tonic-gate /*
7237c478bd9Sstevel@tonic-gate * Update clalloc so that nfsstat shows the current number
7247c478bd9Sstevel@tonic-gate * of allocated client handles.
7257c478bd9Sstevel@tonic-gate */
7267c478bd9Sstevel@tonic-gate atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -n);
7277c478bd9Sstevel@tonic-gate #endif
7287c478bd9Sstevel@tonic-gate }
7297c478bd9Sstevel@tonic-gate
7307c478bd9Sstevel@tonic-gate /* ARGSUSED */
7317c478bd9Sstevel@tonic-gate static void
clreclaim(void * all)7327c478bd9Sstevel@tonic-gate clreclaim(void *all)
7337c478bd9Sstevel@tonic-gate {
7347c478bd9Sstevel@tonic-gate struct nfs_clnt *nfscl;
7357c478bd9Sstevel@tonic-gate
7367c478bd9Sstevel@tonic-gate #ifdef DEBUG
7377c478bd9Sstevel@tonic-gate clstat_debug.clreclaim.value.ui64++;
7387c478bd9Sstevel@tonic-gate #endif
7397c478bd9Sstevel@tonic-gate /*
7407c478bd9Sstevel@tonic-gate * The system is low on memory; go through and try to reclaim some from
7417c478bd9Sstevel@tonic-gate * every zone on the system.
7427c478bd9Sstevel@tonic-gate */
7437c478bd9Sstevel@tonic-gate mutex_enter(&nfs_clnt_list_lock);
7447c478bd9Sstevel@tonic-gate nfscl = list_head(&nfs_clnt_list);
7457c478bd9Sstevel@tonic-gate for (; nfscl != NULL; nfscl = list_next(&nfs_clnt_list, nfscl))
7467c478bd9Sstevel@tonic-gate clreclaim_zone(nfscl, CL_HOLDTIME);
7477c478bd9Sstevel@tonic-gate mutex_exit(&nfs_clnt_list_lock);
7487c478bd9Sstevel@tonic-gate }
7497c478bd9Sstevel@tonic-gate
7507c478bd9Sstevel@tonic-gate /*
7517c478bd9Sstevel@tonic-gate * Minimum time-out values indexed by call type
7527c478bd9Sstevel@tonic-gate * These units are in "eights" of a second to avoid multiplies
7537c478bd9Sstevel@tonic-gate */
7547c478bd9Sstevel@tonic-gate static unsigned int minimum_timeo[] = {
7557c478bd9Sstevel@tonic-gate 6, 7, 10
7567c478bd9Sstevel@tonic-gate };
7577c478bd9Sstevel@tonic-gate
7587c478bd9Sstevel@tonic-gate /*
7597c478bd9Sstevel@tonic-gate * Back off for retransmission timeout, MAXTIMO is in hz of a sec
7607c478bd9Sstevel@tonic-gate */
7617c478bd9Sstevel@tonic-gate #define MAXTIMO (20*hz)
7627c478bd9Sstevel@tonic-gate #define backoff(tim) (((tim) < MAXTIMO) ? dobackoff(tim) : (tim))
7637c478bd9Sstevel@tonic-gate #define dobackoff(tim) ((((tim) << 1) > MAXTIMO) ? MAXTIMO : ((tim) << 1))
7647c478bd9Sstevel@tonic-gate
7657c478bd9Sstevel@tonic-gate #define MIN_NFS_TSIZE 512 /* minimum "chunk" of NFS IO */
7667c478bd9Sstevel@tonic-gate #define REDUCE_NFS_TIME (hz/2) /* rtxcur we try to keep under */
7677c478bd9Sstevel@tonic-gate #define INCREASE_NFS_TIME (hz/3*8) /* srtt we try to keep under (scaled*8) */
7687c478bd9Sstevel@tonic-gate
7697c478bd9Sstevel@tonic-gate /*
7707c478bd9Sstevel@tonic-gate * Function called when rfscall notices that we have been
7717c478bd9Sstevel@tonic-gate * re-transmitting, or when we get a response without retransmissions.
7727c478bd9Sstevel@tonic-gate * Return 1 if the transfer size was adjusted down - 0 if no change.
7737c478bd9Sstevel@tonic-gate */
7747c478bd9Sstevel@tonic-gate static int
nfs_feedback(int flag,int which,mntinfo_t * mi)7757c478bd9Sstevel@tonic-gate nfs_feedback(int flag, int which, mntinfo_t *mi)
7767c478bd9Sstevel@tonic-gate {
7777c478bd9Sstevel@tonic-gate int kind;
7787c478bd9Sstevel@tonic-gate int r = 0;
7797c478bd9Sstevel@tonic-gate
7807c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
7817c478bd9Sstevel@tonic-gate if (flag == FEEDBACK_REXMIT1) {
7827c478bd9Sstevel@tonic-gate if (mi->mi_timers[NFS_CALLTYPES].rt_rtxcur != 0 &&
7837c478bd9Sstevel@tonic-gate mi->mi_timers[NFS_CALLTYPES].rt_rtxcur < REDUCE_NFS_TIME)
7847c478bd9Sstevel@tonic-gate goto done;
7857c478bd9Sstevel@tonic-gate if (mi->mi_curread > MIN_NFS_TSIZE) {
7867c478bd9Sstevel@tonic-gate mi->mi_curread /= 2;
7877c478bd9Sstevel@tonic-gate if (mi->mi_curread < MIN_NFS_TSIZE)
7887c478bd9Sstevel@tonic-gate mi->mi_curread = MIN_NFS_TSIZE;
7897c478bd9Sstevel@tonic-gate r = 1;
7907c478bd9Sstevel@tonic-gate }
7917c478bd9Sstevel@tonic-gate
7927c478bd9Sstevel@tonic-gate if (mi->mi_curwrite > MIN_NFS_TSIZE) {
7937c478bd9Sstevel@tonic-gate mi->mi_curwrite /= 2;
7947c478bd9Sstevel@tonic-gate if (mi->mi_curwrite < MIN_NFS_TSIZE)
7957c478bd9Sstevel@tonic-gate mi->mi_curwrite = MIN_NFS_TSIZE;
7967c478bd9Sstevel@tonic-gate r = 1;
7977c478bd9Sstevel@tonic-gate }
7987c478bd9Sstevel@tonic-gate } else if (flag == FEEDBACK_OK) {
7997c478bd9Sstevel@tonic-gate kind = mi->mi_timer_type[which];
8007c478bd9Sstevel@tonic-gate if (kind == 0 ||
8017c478bd9Sstevel@tonic-gate mi->mi_timers[kind].rt_srtt >= INCREASE_NFS_TIME)
8027c478bd9Sstevel@tonic-gate goto done;
8037c478bd9Sstevel@tonic-gate if (kind == 1) {
8047c478bd9Sstevel@tonic-gate if (mi->mi_curread >= mi->mi_tsize)
8057c478bd9Sstevel@tonic-gate goto done;
8067c478bd9Sstevel@tonic-gate mi->mi_curread += MIN_NFS_TSIZE;
8077c478bd9Sstevel@tonic-gate if (mi->mi_curread > mi->mi_tsize/2)
8087c478bd9Sstevel@tonic-gate mi->mi_curread = mi->mi_tsize;
8097c478bd9Sstevel@tonic-gate } else if (kind == 2) {
8107c478bd9Sstevel@tonic-gate if (mi->mi_curwrite >= mi->mi_stsize)
8117c478bd9Sstevel@tonic-gate goto done;
8127c478bd9Sstevel@tonic-gate mi->mi_curwrite += MIN_NFS_TSIZE;
8137c478bd9Sstevel@tonic-gate if (mi->mi_curwrite > mi->mi_stsize/2)
8147c478bd9Sstevel@tonic-gate mi->mi_curwrite = mi->mi_stsize;
8157c478bd9Sstevel@tonic-gate }
8167c478bd9Sstevel@tonic-gate }
8177c478bd9Sstevel@tonic-gate done:
8187c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
8197c478bd9Sstevel@tonic-gate return (r);
8207c478bd9Sstevel@tonic-gate }
8217c478bd9Sstevel@tonic-gate
8227c478bd9Sstevel@tonic-gate #ifdef DEBUG
8237c478bd9Sstevel@tonic-gate static int rfs2call_hits = 0;
8247c478bd9Sstevel@tonic-gate static int rfs2call_misses = 0;
8257c478bd9Sstevel@tonic-gate #endif
8267c478bd9Sstevel@tonic-gate
8277c478bd9Sstevel@tonic-gate int
rfs2call(mntinfo_t * mi,rpcproc_t which,xdrproc_t xdrargs,caddr_t argsp,xdrproc_t xdrres,caddr_t resp,cred_t * cr,int * douprintf,enum nfsstat * statusp,int flags,failinfo_t * fi)8287c478bd9Sstevel@tonic-gate rfs2call(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
8297c478bd9Sstevel@tonic-gate xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf,
8307c478bd9Sstevel@tonic-gate enum nfsstat *statusp, int flags, failinfo_t *fi)
8317c478bd9Sstevel@tonic-gate {
8327c478bd9Sstevel@tonic-gate int rpcerror;
8337c478bd9Sstevel@tonic-gate enum clnt_stat rpc_status;
8347c478bd9Sstevel@tonic-gate
8357c478bd9Sstevel@tonic-gate ASSERT(statusp != NULL);
8367c478bd9Sstevel@tonic-gate
8377c478bd9Sstevel@tonic-gate rpcerror = rfscall(mi, which, xdrargs, argsp, xdrres, resp,
8387c478bd9Sstevel@tonic-gate cr, douprintf, &rpc_status, flags, fi);
8397c478bd9Sstevel@tonic-gate if (!rpcerror) {
8407c478bd9Sstevel@tonic-gate /*
8417c478bd9Sstevel@tonic-gate * See crnetadjust() for comments.
8427c478bd9Sstevel@tonic-gate */
8437c478bd9Sstevel@tonic-gate if (*statusp == NFSERR_ACCES &&
8447c478bd9Sstevel@tonic-gate (cr = crnetadjust(cr)) != NULL) {
8457c478bd9Sstevel@tonic-gate #ifdef DEBUG
8467c478bd9Sstevel@tonic-gate rfs2call_hits++;
8477c478bd9Sstevel@tonic-gate #endif
8487c478bd9Sstevel@tonic-gate rpcerror = rfscall(mi, which, xdrargs, argsp, xdrres,
8497c478bd9Sstevel@tonic-gate resp, cr, douprintf, NULL, flags, fi);
8507c478bd9Sstevel@tonic-gate crfree(cr);
8517c478bd9Sstevel@tonic-gate #ifdef DEBUG
8527c478bd9Sstevel@tonic-gate if (*statusp == NFSERR_ACCES)
8537c478bd9Sstevel@tonic-gate rfs2call_misses++;
8547c478bd9Sstevel@tonic-gate #endif
8557c478bd9Sstevel@tonic-gate }
8567c478bd9Sstevel@tonic-gate } else if (rpc_status == RPC_PROCUNAVAIL) {
8577c478bd9Sstevel@tonic-gate *statusp = NFSERR_OPNOTSUPP;
8587c478bd9Sstevel@tonic-gate rpcerror = 0;
8597c478bd9Sstevel@tonic-gate }
8607c478bd9Sstevel@tonic-gate
8617c478bd9Sstevel@tonic-gate return (rpcerror);
8627c478bd9Sstevel@tonic-gate }
8637c478bd9Sstevel@tonic-gate
8647c478bd9Sstevel@tonic-gate #define NFS3_JUKEBOX_DELAY 10 * hz
8657c478bd9Sstevel@tonic-gate
8667c478bd9Sstevel@tonic-gate static clock_t nfs3_jukebox_delay = 0;
8677c478bd9Sstevel@tonic-gate
8687c478bd9Sstevel@tonic-gate #ifdef DEBUG
8697c478bd9Sstevel@tonic-gate static int rfs3call_hits = 0;
8707c478bd9Sstevel@tonic-gate static int rfs3call_misses = 0;
8717c478bd9Sstevel@tonic-gate #endif
8727c478bd9Sstevel@tonic-gate
8737c478bd9Sstevel@tonic-gate int
rfs3call(mntinfo_t * mi,rpcproc_t which,xdrproc_t xdrargs,caddr_t argsp,xdrproc_t xdrres,caddr_t resp,cred_t * cr,int * douprintf,nfsstat3 * statusp,int flags,failinfo_t * fi)8747c478bd9Sstevel@tonic-gate rfs3call(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
8757c478bd9Sstevel@tonic-gate xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf,
8767c478bd9Sstevel@tonic-gate nfsstat3 *statusp, int flags, failinfo_t *fi)
8777c478bd9Sstevel@tonic-gate {
8787c478bd9Sstevel@tonic-gate int rpcerror;
8797c478bd9Sstevel@tonic-gate int user_informed;
8807c478bd9Sstevel@tonic-gate
8817c478bd9Sstevel@tonic-gate user_informed = 0;
8827c478bd9Sstevel@tonic-gate do {
8837c478bd9Sstevel@tonic-gate rpcerror = rfscall(mi, which, xdrargs, argsp, xdrres, resp,
8847c478bd9Sstevel@tonic-gate cr, douprintf, NULL, flags, fi);
8857c478bd9Sstevel@tonic-gate if (!rpcerror) {
8867c478bd9Sstevel@tonic-gate cred_t *crr;
8877c478bd9Sstevel@tonic-gate if (*statusp == NFS3ERR_JUKEBOX) {
8887c478bd9Sstevel@tonic-gate if (ttoproc(curthread) == &p0) {
8897c478bd9Sstevel@tonic-gate rpcerror = EAGAIN;
8907c478bd9Sstevel@tonic-gate break;
8917c478bd9Sstevel@tonic-gate }
8927c478bd9Sstevel@tonic-gate if (!user_informed) {
8937c478bd9Sstevel@tonic-gate user_informed = 1;
8947c478bd9Sstevel@tonic-gate uprintf(
8957c478bd9Sstevel@tonic-gate "file temporarily unavailable on the server, retrying...\n");
8967c478bd9Sstevel@tonic-gate }
8977c478bd9Sstevel@tonic-gate delay(nfs3_jukebox_delay);
8987c478bd9Sstevel@tonic-gate }
8997c478bd9Sstevel@tonic-gate /*
9007c478bd9Sstevel@tonic-gate * See crnetadjust() for comments.
9017c478bd9Sstevel@tonic-gate */
9027c478bd9Sstevel@tonic-gate else if (*statusp == NFS3ERR_ACCES &&
9037c478bd9Sstevel@tonic-gate (crr = crnetadjust(cr)) != NULL) {
9047c478bd9Sstevel@tonic-gate #ifdef DEBUG
9057c478bd9Sstevel@tonic-gate rfs3call_hits++;
9067c478bd9Sstevel@tonic-gate #endif
9077c478bd9Sstevel@tonic-gate rpcerror = rfscall(mi, which, xdrargs, argsp,
9087c478bd9Sstevel@tonic-gate xdrres, resp, crr, douprintf,
9097c478bd9Sstevel@tonic-gate NULL, flags, fi);
9107c478bd9Sstevel@tonic-gate
9117c478bd9Sstevel@tonic-gate crfree(crr);
9127c478bd9Sstevel@tonic-gate #ifdef DEBUG
9137c478bd9Sstevel@tonic-gate if (*statusp == NFS3ERR_ACCES)
9147c478bd9Sstevel@tonic-gate rfs3call_misses++;
9157c478bd9Sstevel@tonic-gate #endif
9167c478bd9Sstevel@tonic-gate }
9177c478bd9Sstevel@tonic-gate }
9187c478bd9Sstevel@tonic-gate } while (!rpcerror && *statusp == NFS3ERR_JUKEBOX);
9197c478bd9Sstevel@tonic-gate
9207c478bd9Sstevel@tonic-gate return (rpcerror);
9217c478bd9Sstevel@tonic-gate }
9227c478bd9Sstevel@tonic-gate
9237c478bd9Sstevel@tonic-gate #define VALID_FH(fi) (VTOR(fi->vp)->r_server == VTOMI(fi->vp)->mi_curr_serv)
9247c478bd9Sstevel@tonic-gate #define INC_READERS(mi) { \
9257c478bd9Sstevel@tonic-gate mi->mi_readers++; \
9267c478bd9Sstevel@tonic-gate }
9277c478bd9Sstevel@tonic-gate #define DEC_READERS(mi) { \
9287c478bd9Sstevel@tonic-gate mi->mi_readers--; \
9297c478bd9Sstevel@tonic-gate if (mi->mi_readers == 0) \
9307c478bd9Sstevel@tonic-gate cv_broadcast(&mi->mi_failover_cv); \
9317c478bd9Sstevel@tonic-gate }
9327c478bd9Sstevel@tonic-gate
9337c478bd9Sstevel@tonic-gate static int
rfscall(mntinfo_t * mi,rpcproc_t which,xdrproc_t xdrargs,caddr_t argsp,xdrproc_t xdrres,caddr_t resp,cred_t * icr,int * douprintf,enum clnt_stat * rpc_status,int flags,failinfo_t * fi)9347c478bd9Sstevel@tonic-gate rfscall(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
93545916cd2Sjpk xdrproc_t xdrres, caddr_t resp, cred_t *icr, int *douprintf,
9367c478bd9Sstevel@tonic-gate enum clnt_stat *rpc_status, int flags, failinfo_t *fi)
9377c478bd9Sstevel@tonic-gate {
9387c478bd9Sstevel@tonic-gate CLIENT *client;
9397c478bd9Sstevel@tonic-gate struct chtab *ch;
94045916cd2Sjpk cred_t *cr = icr;
9417c478bd9Sstevel@tonic-gate enum clnt_stat status;
942e280ed37SDai Ngo struct rpc_err rpcerr, rpcerr_tmp;
9437c478bd9Sstevel@tonic-gate struct timeval wait;
9447c478bd9Sstevel@tonic-gate int timeo; /* in units of hz */
9457c478bd9Sstevel@tonic-gate int my_rsize, my_wsize;
9467c478bd9Sstevel@tonic-gate bool_t tryagain;
94745916cd2Sjpk bool_t cred_cloned = FALSE;
9487c478bd9Sstevel@tonic-gate k_sigset_t smask;
9497c478bd9Sstevel@tonic-gate servinfo_t *svp;
9507c478bd9Sstevel@tonic-gate struct nfs_clnt *nfscl;
9517c478bd9Sstevel@tonic-gate zoneid_t zoneid = getzoneid();
952e280ed37SDai Ngo char *msg;
9537c478bd9Sstevel@tonic-gate #ifdef DEBUG
9547c478bd9Sstevel@tonic-gate char *bufp;
9557c478bd9Sstevel@tonic-gate #endif
9567c478bd9Sstevel@tonic-gate
9577c478bd9Sstevel@tonic-gate
9587c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_NFS, TR_RFSCALL_START,
9597c478bd9Sstevel@tonic-gate "rfscall_start:which %d mi %p", which, mi);
9607c478bd9Sstevel@tonic-gate
961108322fbScarlsonj nfscl = zone_getspecific(nfsclnt_zone_key, nfs_zone());
9627c478bd9Sstevel@tonic-gate ASSERT(nfscl != NULL);
9637c478bd9Sstevel@tonic-gate
9647c478bd9Sstevel@tonic-gate nfscl->nfscl_stat.calls.value.ui64++;
9657c478bd9Sstevel@tonic-gate mi->mi_reqs[which].value.ui64++;
9667c478bd9Sstevel@tonic-gate
9677c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_SUCCESS;
9687c478bd9Sstevel@tonic-gate
9697c478bd9Sstevel@tonic-gate /*
9707c478bd9Sstevel@tonic-gate * In case of forced unmount or zone shutdown, return EIO.
9717c478bd9Sstevel@tonic-gate */
9727c478bd9Sstevel@tonic-gate
9737c478bd9Sstevel@tonic-gate if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
9747c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_FAILED;
9757c478bd9Sstevel@tonic-gate rpcerr.re_errno = EIO;
9767c478bd9Sstevel@tonic-gate return (rpcerr.re_errno);
9777c478bd9Sstevel@tonic-gate }
9787c478bd9Sstevel@tonic-gate
9797c478bd9Sstevel@tonic-gate /*
9807c478bd9Sstevel@tonic-gate * Remember the transfer sizes in case
9817c478bd9Sstevel@tonic-gate * nfs_feedback changes them underneath us.
9827c478bd9Sstevel@tonic-gate */
9837c478bd9Sstevel@tonic-gate my_rsize = mi->mi_curread;
9847c478bd9Sstevel@tonic-gate my_wsize = mi->mi_curwrite;
9857c478bd9Sstevel@tonic-gate
9867c478bd9Sstevel@tonic-gate /*
9877c478bd9Sstevel@tonic-gate * NFS client failover support
9887c478bd9Sstevel@tonic-gate *
9897c478bd9Sstevel@tonic-gate * If this rnode is not in sync with the current server (VALID_FH),
9907c478bd9Sstevel@tonic-gate * we'd like to do a remap to get in sync. We can be interrupted
9917c478bd9Sstevel@tonic-gate * in failover_remap(), and if so we'll bail. Otherwise, we'll
9927c478bd9Sstevel@tonic-gate * use the best info we have to try the RPC. Part of that is
9937c478bd9Sstevel@tonic-gate * unconditionally updating the filehandle copy kept for V3.
9947c478bd9Sstevel@tonic-gate *
9957c478bd9Sstevel@tonic-gate * Locking: INC_READERS/DEC_READERS is a poor man's interrruptible
9967c478bd9Sstevel@tonic-gate * rw_enter(); we're trying to keep the current server from being
9977c478bd9Sstevel@tonic-gate * changed on us until we're done with the remapping and have a
9987c478bd9Sstevel@tonic-gate * matching client handle. We don't want to sending a filehandle
9997c478bd9Sstevel@tonic-gate * to the wrong host.
10007c478bd9Sstevel@tonic-gate */
10017c478bd9Sstevel@tonic-gate failoverretry:
10027c478bd9Sstevel@tonic-gate if (FAILOVER_MOUNT(mi)) {
10037c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
10047c478bd9Sstevel@tonic-gate if (!(flags & RFSCALL_SOFT) && failover_safe(fi)) {
10057c478bd9Sstevel@tonic-gate if (failover_wait(mi)) {
10067c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
10077c478bd9Sstevel@tonic-gate return (EINTR);
10087c478bd9Sstevel@tonic-gate }
10097c478bd9Sstevel@tonic-gate }
10107c478bd9Sstevel@tonic-gate INC_READERS(mi);
10117c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
10127c478bd9Sstevel@tonic-gate if (fi) {
10137c478bd9Sstevel@tonic-gate if (!VALID_FH(fi) &&
10147c478bd9Sstevel@tonic-gate !(flags & RFSCALL_SOFT) && failover_safe(fi)) {
10157c478bd9Sstevel@tonic-gate int remaperr;
10167c478bd9Sstevel@tonic-gate
10177c478bd9Sstevel@tonic-gate svp = mi->mi_curr_serv;
10187c478bd9Sstevel@tonic-gate remaperr = failover_remap(fi);
10197c478bd9Sstevel@tonic-gate if (remaperr != 0) {
10207c478bd9Sstevel@tonic-gate #ifdef DEBUG
10217c478bd9Sstevel@tonic-gate if (remaperr != EINTR)
10227c478bd9Sstevel@tonic-gate nfs_cmn_err(remaperr, CE_WARN,
10237c478bd9Sstevel@tonic-gate "rfscall couldn't failover: %m");
10247c478bd9Sstevel@tonic-gate #endif
10257c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
10267c478bd9Sstevel@tonic-gate DEC_READERS(mi);
10277c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
10287c478bd9Sstevel@tonic-gate /*
10297c478bd9Sstevel@tonic-gate * If failover_remap returns ETIMEDOUT
10307c478bd9Sstevel@tonic-gate * and the filesystem is hard mounted
10317c478bd9Sstevel@tonic-gate * we have to retry the call with a new
10327c478bd9Sstevel@tonic-gate * server.
10337c478bd9Sstevel@tonic-gate */
10347c478bd9Sstevel@tonic-gate if ((mi->mi_flags & MI_HARD) &&
10357c478bd9Sstevel@tonic-gate IS_RECOVERABLE_ERROR(remaperr)) {
10367c478bd9Sstevel@tonic-gate if (svp == mi->mi_curr_serv)
10377c478bd9Sstevel@tonic-gate failover_newserver(mi);
10387c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_SUCCESS;
10397c478bd9Sstevel@tonic-gate goto failoverretry;
10407c478bd9Sstevel@tonic-gate }
10417c478bd9Sstevel@tonic-gate rpcerr.re_errno = remaperr;
10427c478bd9Sstevel@tonic-gate return (remaperr);
10437c478bd9Sstevel@tonic-gate }
10447c478bd9Sstevel@tonic-gate }
10457c478bd9Sstevel@tonic-gate if (fi->fhp && fi->copyproc)
10467c478bd9Sstevel@tonic-gate (*fi->copyproc)(fi->fhp, fi->vp);
10477c478bd9Sstevel@tonic-gate }
10487c478bd9Sstevel@tonic-gate }
10497c478bd9Sstevel@tonic-gate
105045916cd2Sjpk /* For TSOL, use a new cred which has net_mac_aware flag */
105145916cd2Sjpk if (!cred_cloned && is_system_labeled()) {
105245916cd2Sjpk cred_cloned = TRUE;
105345916cd2Sjpk cr = crdup(icr);
105445916cd2Sjpk (void) setpflags(NET_MAC_AWARE, 1, cr);
105545916cd2Sjpk }
105645916cd2Sjpk
10577c478bd9Sstevel@tonic-gate /*
10587c478bd9Sstevel@tonic-gate * clget() calls clnt_tli_kinit() which clears the xid, so we
10597c478bd9Sstevel@tonic-gate * are guaranteed to reprocess the retry as a new request.
10607c478bd9Sstevel@tonic-gate */
10617c478bd9Sstevel@tonic-gate svp = mi->mi_curr_serv;
10627c478bd9Sstevel@tonic-gate rpcerr.re_errno = nfs_clget(mi, svp, cr, &client, &ch, nfscl);
10637c478bd9Sstevel@tonic-gate
10647c478bd9Sstevel@tonic-gate if (FAILOVER_MOUNT(mi)) {
10657c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
10667c478bd9Sstevel@tonic-gate DEC_READERS(mi);
10677c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
10687c478bd9Sstevel@tonic-gate
10697c478bd9Sstevel@tonic-gate if ((rpcerr.re_errno == ETIMEDOUT ||
10707c478bd9Sstevel@tonic-gate rpcerr.re_errno == ECONNRESET) &&
10717c478bd9Sstevel@tonic-gate failover_safe(fi)) {
10727c478bd9Sstevel@tonic-gate if (svp == mi->mi_curr_serv)
10737c478bd9Sstevel@tonic-gate failover_newserver(mi);
10747c478bd9Sstevel@tonic-gate goto failoverretry;
10757c478bd9Sstevel@tonic-gate }
10767c478bd9Sstevel@tonic-gate }
10777c478bd9Sstevel@tonic-gate if (rpcerr.re_errno != 0)
10787c478bd9Sstevel@tonic-gate return (rpcerr.re_errno);
10797c478bd9Sstevel@tonic-gate
10807c478bd9Sstevel@tonic-gate if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD ||
10817c478bd9Sstevel@tonic-gate svp->sv_knconf->knc_semantics == NC_TPI_COTS) {
10827c478bd9Sstevel@tonic-gate timeo = (mi->mi_timeo * hz) / 10;
10837c478bd9Sstevel@tonic-gate } else {
10847c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
10857c478bd9Sstevel@tonic-gate timeo = CLNT_SETTIMERS(client,
10867c478bd9Sstevel@tonic-gate &(mi->mi_timers[mi->mi_timer_type[which]]),
10877c478bd9Sstevel@tonic-gate &(mi->mi_timers[NFS_CALLTYPES]),
10887c478bd9Sstevel@tonic-gate (minimum_timeo[mi->mi_call_type[which]]*hz)>>3,
10897c478bd9Sstevel@tonic-gate (void (*)())NULL, (caddr_t)mi, 0);
10907c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
10917c478bd9Sstevel@tonic-gate }
10927c478bd9Sstevel@tonic-gate
10937c478bd9Sstevel@tonic-gate /*
10947c478bd9Sstevel@tonic-gate * If hard mounted fs, retry call forever unless hard error occurs.
10957c478bd9Sstevel@tonic-gate */
10967c478bd9Sstevel@tonic-gate do {
10977c478bd9Sstevel@tonic-gate tryagain = FALSE;
10987c478bd9Sstevel@tonic-gate
10997c478bd9Sstevel@tonic-gate if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
11007c478bd9Sstevel@tonic-gate status = RPC_FAILED;
11017c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_FAILED;
11027c478bd9Sstevel@tonic-gate rpcerr.re_errno = EIO;
11037c478bd9Sstevel@tonic-gate break;
11047c478bd9Sstevel@tonic-gate }
11057c478bd9Sstevel@tonic-gate
11067c478bd9Sstevel@tonic-gate TICK_TO_TIMEVAL(timeo, &wait);
11077c478bd9Sstevel@tonic-gate
11087c478bd9Sstevel@tonic-gate /*
11097c478bd9Sstevel@tonic-gate * Mask out all signals except SIGHUP, SIGINT, SIGQUIT
11107c478bd9Sstevel@tonic-gate * and SIGTERM. (Preserving the existing masks).
11117c478bd9Sstevel@tonic-gate * Mask out SIGINT if mount option nointr is specified.
11127c478bd9Sstevel@tonic-gate */
11137c478bd9Sstevel@tonic-gate sigintr(&smask, (int)mi->mi_flags & MI_INT);
11147c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_INT))
11157c478bd9Sstevel@tonic-gate client->cl_nosignal = TRUE;
11167c478bd9Sstevel@tonic-gate
11177c478bd9Sstevel@tonic-gate /*
11187c478bd9Sstevel@tonic-gate * If there is a current signal, then don't bother
11197c478bd9Sstevel@tonic-gate * even trying to send out the request because we
11207c478bd9Sstevel@tonic-gate * won't be able to block waiting for the response.
11217c478bd9Sstevel@tonic-gate * Simply assume RPC_INTR and get on with it.
11227c478bd9Sstevel@tonic-gate */
11237c478bd9Sstevel@tonic-gate if (ttolwp(curthread) != NULL && ISSIG(curthread, JUSTLOOKING))
11247c478bd9Sstevel@tonic-gate status = RPC_INTR;
11257c478bd9Sstevel@tonic-gate else {
11267c478bd9Sstevel@tonic-gate status = CLNT_CALL(client, which, xdrargs, argsp,
11277c478bd9Sstevel@tonic-gate xdrres, resp, wait);
11287c478bd9Sstevel@tonic-gate }
11297c478bd9Sstevel@tonic-gate
11307c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_INT))
11317c478bd9Sstevel@tonic-gate client->cl_nosignal = FALSE;
11327c478bd9Sstevel@tonic-gate /*
11337c478bd9Sstevel@tonic-gate * restore original signal mask
11347c478bd9Sstevel@tonic-gate */
11357c478bd9Sstevel@tonic-gate sigunintr(&smask);
11367c478bd9Sstevel@tonic-gate
11377c478bd9Sstevel@tonic-gate switch (status) {
11387c478bd9Sstevel@tonic-gate case RPC_SUCCESS:
11397c478bd9Sstevel@tonic-gate if ((mi->mi_flags & MI_DYNAMIC) &&
11407c478bd9Sstevel@tonic-gate mi->mi_timer_type[which] != 0 &&
11417c478bd9Sstevel@tonic-gate (mi->mi_curread != my_rsize ||
11427c478bd9Sstevel@tonic-gate mi->mi_curwrite != my_wsize))
11437c478bd9Sstevel@tonic-gate (void) nfs_feedback(FEEDBACK_OK, which, mi);
11447c478bd9Sstevel@tonic-gate break;
11457c478bd9Sstevel@tonic-gate
11467c478bd9Sstevel@tonic-gate case RPC_INTR:
11477c478bd9Sstevel@tonic-gate /*
11487c478bd9Sstevel@tonic-gate * There is no way to recover from this error,
11497c478bd9Sstevel@tonic-gate * even if mount option nointr is specified.
11507c478bd9Sstevel@tonic-gate * SIGKILL, for example, cannot be blocked.
11517c478bd9Sstevel@tonic-gate */
11527c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_INTR;
11537c478bd9Sstevel@tonic-gate rpcerr.re_errno = EINTR;
11547c478bd9Sstevel@tonic-gate break;
11557c478bd9Sstevel@tonic-gate
11567c478bd9Sstevel@tonic-gate case RPC_UDERROR:
11577c478bd9Sstevel@tonic-gate /*
11587c478bd9Sstevel@tonic-gate * If the NFS server is local (vold) and
11597c478bd9Sstevel@tonic-gate * it goes away then we get RPC_UDERROR.
11607c478bd9Sstevel@tonic-gate * This is a retryable error, so we would
11617c478bd9Sstevel@tonic-gate * loop, so check to see if the specific
11627c478bd9Sstevel@tonic-gate * error was ECONNRESET, indicating that
11637c478bd9Sstevel@tonic-gate * target did not exist at all. If so,
11647c478bd9Sstevel@tonic-gate * return with RPC_PROGUNAVAIL and
11657c478bd9Sstevel@tonic-gate * ECONNRESET to indicate why.
11667c478bd9Sstevel@tonic-gate */
11677c478bd9Sstevel@tonic-gate CLNT_GETERR(client, &rpcerr);
11687c478bd9Sstevel@tonic-gate if (rpcerr.re_errno == ECONNRESET) {
11697c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_PROGUNAVAIL;
11707c478bd9Sstevel@tonic-gate rpcerr.re_errno = ECONNRESET;
11717c478bd9Sstevel@tonic-gate break;
11727c478bd9Sstevel@tonic-gate }
11737c478bd9Sstevel@tonic-gate /*FALLTHROUGH*/
11747c478bd9Sstevel@tonic-gate
11757c478bd9Sstevel@tonic-gate default: /* probably RPC_TIMEDOUT */
11767c478bd9Sstevel@tonic-gate if (IS_UNRECOVERABLE_RPC(status))
11777c478bd9Sstevel@tonic-gate break;
11787c478bd9Sstevel@tonic-gate
11797c478bd9Sstevel@tonic-gate /*
11807c478bd9Sstevel@tonic-gate * increment server not responding count
11817c478bd9Sstevel@tonic-gate */
11827c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
11837c478bd9Sstevel@tonic-gate mi->mi_noresponse++;
11847c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
11857c478bd9Sstevel@tonic-gate #ifdef DEBUG
11867c478bd9Sstevel@tonic-gate nfscl->nfscl_stat.noresponse.value.ui64++;
11877c478bd9Sstevel@tonic-gate #endif
11887c478bd9Sstevel@tonic-gate
11897c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_HARD)) {
11907c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_SEMISOFT) ||
11917c478bd9Sstevel@tonic-gate (mi->mi_ss_call_type[which] == 0))
11927c478bd9Sstevel@tonic-gate break;
11937c478bd9Sstevel@tonic-gate }
11947c478bd9Sstevel@tonic-gate
11957c478bd9Sstevel@tonic-gate /*
11967c478bd9Sstevel@tonic-gate * The call is in progress (over COTS).
11977c478bd9Sstevel@tonic-gate * Try the CLNT_CALL again, but don't
11987c478bd9Sstevel@tonic-gate * print a noisy error message.
11997c478bd9Sstevel@tonic-gate */
12007c478bd9Sstevel@tonic-gate if (status == RPC_INPROGRESS) {
12017c478bd9Sstevel@tonic-gate tryagain = TRUE;
12027c478bd9Sstevel@tonic-gate break;
12037c478bd9Sstevel@tonic-gate }
12047c478bd9Sstevel@tonic-gate
12057c478bd9Sstevel@tonic-gate if (flags & RFSCALL_SOFT)
12067c478bd9Sstevel@tonic-gate break;
12077c478bd9Sstevel@tonic-gate
12087c478bd9Sstevel@tonic-gate /*
12097c478bd9Sstevel@tonic-gate * On zone shutdown, just move on.
12107c478bd9Sstevel@tonic-gate */
12117c478bd9Sstevel@tonic-gate if (zone_status_get(curproc->p_zone) >=
12127c478bd9Sstevel@tonic-gate ZONE_IS_SHUTTING_DOWN) {
12137c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_FAILED;
12147c478bd9Sstevel@tonic-gate rpcerr.re_errno = EIO;
12157c478bd9Sstevel@tonic-gate break;
12167c478bd9Sstevel@tonic-gate }
12177c478bd9Sstevel@tonic-gate
12187c478bd9Sstevel@tonic-gate /*
12197c478bd9Sstevel@tonic-gate * NFS client failover support
12207c478bd9Sstevel@tonic-gate *
12217c478bd9Sstevel@tonic-gate * If the current server just failed us, we'll
12227c478bd9Sstevel@tonic-gate * start the process of finding a new server.
12237c478bd9Sstevel@tonic-gate * After that, we can just retry.
12247c478bd9Sstevel@tonic-gate */
12257c478bd9Sstevel@tonic-gate if (FAILOVER_MOUNT(mi) && failover_safe(fi)) {
12267c478bd9Sstevel@tonic-gate if (svp == mi->mi_curr_serv)
12277c478bd9Sstevel@tonic-gate failover_newserver(mi);
12287c478bd9Sstevel@tonic-gate clfree_impl(client, ch, nfscl);
12297c478bd9Sstevel@tonic-gate goto failoverretry;
12307c478bd9Sstevel@tonic-gate }
12317c478bd9Sstevel@tonic-gate
12327c478bd9Sstevel@tonic-gate tryagain = TRUE;
12337c478bd9Sstevel@tonic-gate timeo = backoff(timeo);
1234e280ed37SDai Ngo
1235e280ed37SDai Ngo CLNT_GETERR(client, &rpcerr_tmp);
1236e280ed37SDai Ngo if ((status == RPC_CANTSEND) &&
1237e280ed37SDai Ngo (rpcerr_tmp.re_errno == ENOBUFS))
1238e280ed37SDai Ngo msg = SRV_QFULL_MSG;
1239e280ed37SDai Ngo else
1240e280ed37SDai Ngo msg = SRV_NOTRESP_MSG;
1241e280ed37SDai Ngo
12427c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
12437c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_PRINTED)) {
12447c478bd9Sstevel@tonic-gate mi->mi_flags |= MI_PRINTED;
12457c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
12467c478bd9Sstevel@tonic-gate #ifdef DEBUG
1247e280ed37SDai Ngo zprintf(zoneid, msg, mi->mi_vers,
12487c478bd9Sstevel@tonic-gate svp->sv_hostname);
1249e280ed37SDai Ngo #else
1250e280ed37SDai Ngo zprintf(zoneid, msg, svp->sv_hostname);
12517c478bd9Sstevel@tonic-gate #endif
12527c478bd9Sstevel@tonic-gate } else
12537c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
12549acbbeafSnn35248 if (*douprintf && nfs_has_ctty()) {
12557c478bd9Sstevel@tonic-gate *douprintf = 0;
12567c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_NOPRINT))
12577c478bd9Sstevel@tonic-gate #ifdef DEBUG
1258e280ed37SDai Ngo uprintf(msg, mi->mi_vers,
12597c478bd9Sstevel@tonic-gate svp->sv_hostname);
1260e280ed37SDai Ngo #else
1261e280ed37SDai Ngo uprintf(msg, svp->sv_hostname);
12627c478bd9Sstevel@tonic-gate #endif
12637c478bd9Sstevel@tonic-gate }
12647c478bd9Sstevel@tonic-gate
12657c478bd9Sstevel@tonic-gate /*
12667c478bd9Sstevel@tonic-gate * If doing dynamic adjustment of transfer
12677c478bd9Sstevel@tonic-gate * size and if it's a read or write call
12687c478bd9Sstevel@tonic-gate * and if the transfer size changed while
12697c478bd9Sstevel@tonic-gate * retransmitting or if the feedback routine
12707c478bd9Sstevel@tonic-gate * changed the transfer size,
12717c478bd9Sstevel@tonic-gate * then exit rfscall so that the transfer
12727c478bd9Sstevel@tonic-gate * size can be adjusted at the vnops level.
12737c478bd9Sstevel@tonic-gate */
12747c478bd9Sstevel@tonic-gate if ((mi->mi_flags & MI_DYNAMIC) &&
12757c478bd9Sstevel@tonic-gate mi->mi_timer_type[which] != 0 &&
12767c478bd9Sstevel@tonic-gate (mi->mi_curread != my_rsize ||
12777c478bd9Sstevel@tonic-gate mi->mi_curwrite != my_wsize ||
12787c478bd9Sstevel@tonic-gate nfs_feedback(FEEDBACK_REXMIT1, which, mi))) {
12797c478bd9Sstevel@tonic-gate /*
12807c478bd9Sstevel@tonic-gate * On read or write calls, return
12817c478bd9Sstevel@tonic-gate * back to the vnode ops level if
12827c478bd9Sstevel@tonic-gate * the transfer size changed.
12837c478bd9Sstevel@tonic-gate */
12847c478bd9Sstevel@tonic-gate clfree_impl(client, ch, nfscl);
128545916cd2Sjpk if (cred_cloned)
128645916cd2Sjpk crfree(cr);
12877c478bd9Sstevel@tonic-gate return (ENFS_TRYAGAIN);
12887c478bd9Sstevel@tonic-gate }
12897c478bd9Sstevel@tonic-gate }
12907c478bd9Sstevel@tonic-gate } while (tryagain);
12917c478bd9Sstevel@tonic-gate
12927c478bd9Sstevel@tonic-gate if (status != RPC_SUCCESS) {
12937c478bd9Sstevel@tonic-gate /*
12947c478bd9Sstevel@tonic-gate * Let soft mounts use the timed out message.
12957c478bd9Sstevel@tonic-gate */
12967c478bd9Sstevel@tonic-gate if (status == RPC_INPROGRESS)
12977c478bd9Sstevel@tonic-gate status = RPC_TIMEDOUT;
12987c478bd9Sstevel@tonic-gate nfscl->nfscl_stat.badcalls.value.ui64++;
12997c478bd9Sstevel@tonic-gate if (status != RPC_INTR) {
13007c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
13017c478bd9Sstevel@tonic-gate mi->mi_flags |= MI_DOWN;
13027c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
13037c478bd9Sstevel@tonic-gate CLNT_GETERR(client, &rpcerr);
13047c478bd9Sstevel@tonic-gate #ifdef DEBUG
13057c478bd9Sstevel@tonic-gate bufp = clnt_sperror(client, svp->sv_hostname);
13067c478bd9Sstevel@tonic-gate zprintf(zoneid, "NFS%d %s failed for %s\n",
13077c478bd9Sstevel@tonic-gate mi->mi_vers, mi->mi_rfsnames[which], bufp);
13089acbbeafSnn35248 if (nfs_has_ctty()) {
13097c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_NOPRINT)) {
13107c478bd9Sstevel@tonic-gate uprintf("NFS%d %s failed for %s\n",
13117c478bd9Sstevel@tonic-gate mi->mi_vers, mi->mi_rfsnames[which],
13127c478bd9Sstevel@tonic-gate bufp);
13137c478bd9Sstevel@tonic-gate }
13147c478bd9Sstevel@tonic-gate }
13157c478bd9Sstevel@tonic-gate kmem_free(bufp, MAXPATHLEN);
13167c478bd9Sstevel@tonic-gate #else
13177c478bd9Sstevel@tonic-gate zprintf(zoneid,
13187c478bd9Sstevel@tonic-gate "NFS %s failed for server %s: error %d (%s)\n",
13197c478bd9Sstevel@tonic-gate mi->mi_rfsnames[which], svp->sv_hostname,
13207c478bd9Sstevel@tonic-gate status, clnt_sperrno(status));
13219acbbeafSnn35248 if (nfs_has_ctty()) {
13227c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_NOPRINT)) {
13237c478bd9Sstevel@tonic-gate uprintf(
13247c478bd9Sstevel@tonic-gate "NFS %s failed for server %s: error %d (%s)\n",
13257c478bd9Sstevel@tonic-gate mi->mi_rfsnames[which],
13267c478bd9Sstevel@tonic-gate svp->sv_hostname, status,
13277c478bd9Sstevel@tonic-gate clnt_sperrno(status));
13287c478bd9Sstevel@tonic-gate }
13297c478bd9Sstevel@tonic-gate }
13307c478bd9Sstevel@tonic-gate #endif
13317c478bd9Sstevel@tonic-gate /*
13327c478bd9Sstevel@tonic-gate * when CLNT_CALL() fails with RPC_AUTHERROR,
13337c478bd9Sstevel@tonic-gate * re_errno is set appropriately depending on
13347c478bd9Sstevel@tonic-gate * the authentication error
13357c478bd9Sstevel@tonic-gate */
13367c478bd9Sstevel@tonic-gate if (status == RPC_VERSMISMATCH ||
13377c478bd9Sstevel@tonic-gate status == RPC_PROGVERSMISMATCH)
13387c478bd9Sstevel@tonic-gate rpcerr.re_errno = EIO;
13397c478bd9Sstevel@tonic-gate }
13407c478bd9Sstevel@tonic-gate } else {
13417c478bd9Sstevel@tonic-gate /*
13427c478bd9Sstevel@tonic-gate * Test the value of mi_down and mi_printed without
13437c478bd9Sstevel@tonic-gate * holding the mi_lock mutex. If they are both zero,
13447c478bd9Sstevel@tonic-gate * then it is okay to skip the down and printed
13457c478bd9Sstevel@tonic-gate * processing. This saves on a mutex_enter and
13467c478bd9Sstevel@tonic-gate * mutex_exit pair for a normal, successful RPC.
13477c478bd9Sstevel@tonic-gate * This was just complete overhead.
13487c478bd9Sstevel@tonic-gate */
13497c478bd9Sstevel@tonic-gate if (mi->mi_flags & (MI_DOWN | MI_PRINTED)) {
13507c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
13517c478bd9Sstevel@tonic-gate mi->mi_flags &= ~MI_DOWN;
13527c478bd9Sstevel@tonic-gate if (mi->mi_flags & MI_PRINTED) {
13537c478bd9Sstevel@tonic-gate mi->mi_flags &= ~MI_PRINTED;
13547c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
13557c478bd9Sstevel@tonic-gate #ifdef DEBUG
13567c478bd9Sstevel@tonic-gate if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED))
13577c478bd9Sstevel@tonic-gate zprintf(zoneid, "NFS%d server %s ok\n",
13587c478bd9Sstevel@tonic-gate mi->mi_vers, svp->sv_hostname);
13597c478bd9Sstevel@tonic-gate #else
13607c478bd9Sstevel@tonic-gate if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED))
13617c478bd9Sstevel@tonic-gate zprintf(zoneid, "NFS server %s ok\n",
13627c478bd9Sstevel@tonic-gate svp->sv_hostname);
13637c478bd9Sstevel@tonic-gate #endif
13647c478bd9Sstevel@tonic-gate } else
13657c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
13667c478bd9Sstevel@tonic-gate }
13677c478bd9Sstevel@tonic-gate
13687c478bd9Sstevel@tonic-gate if (*douprintf == 0) {
13697c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_NOPRINT))
13707c478bd9Sstevel@tonic-gate #ifdef DEBUG
13717c478bd9Sstevel@tonic-gate if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED))
13727c478bd9Sstevel@tonic-gate uprintf("NFS%d server %s ok\n",
13737c478bd9Sstevel@tonic-gate mi->mi_vers, svp->sv_hostname);
13747c478bd9Sstevel@tonic-gate #else
13757c478bd9Sstevel@tonic-gate if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED))
13767c478bd9Sstevel@tonic-gate uprintf("NFS server %s ok\n", svp->sv_hostname);
13777c478bd9Sstevel@tonic-gate #endif
13787c478bd9Sstevel@tonic-gate *douprintf = 1;
13797c478bd9Sstevel@tonic-gate }
13807c478bd9Sstevel@tonic-gate }
13817c478bd9Sstevel@tonic-gate
13827c478bd9Sstevel@tonic-gate clfree_impl(client, ch, nfscl);
138345916cd2Sjpk if (cred_cloned)
138445916cd2Sjpk crfree(cr);
13857c478bd9Sstevel@tonic-gate
13867c478bd9Sstevel@tonic-gate ASSERT(rpcerr.re_status == RPC_SUCCESS || rpcerr.re_errno != 0);
13877c478bd9Sstevel@tonic-gate
13887c478bd9Sstevel@tonic-gate if (rpc_status != NULL)
13897c478bd9Sstevel@tonic-gate *rpc_status = rpcerr.re_status;
13907c478bd9Sstevel@tonic-gate
13917c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_NFS, TR_RFSCALL_END, "rfscall_end:errno %d",
13927c478bd9Sstevel@tonic-gate rpcerr.re_errno);
13937c478bd9Sstevel@tonic-gate
13947c478bd9Sstevel@tonic-gate return (rpcerr.re_errno);
13957c478bd9Sstevel@tonic-gate }
13967c478bd9Sstevel@tonic-gate
13977c478bd9Sstevel@tonic-gate #ifdef DEBUG
13987c478bd9Sstevel@tonic-gate static int acl2call_hits = 0;
13997c478bd9Sstevel@tonic-gate static int acl2call_misses = 0;
14007c478bd9Sstevel@tonic-gate #endif
14017c478bd9Sstevel@tonic-gate
14027c478bd9Sstevel@tonic-gate int
acl2call(mntinfo_t * mi,rpcproc_t which,xdrproc_t xdrargs,caddr_t argsp,xdrproc_t xdrres,caddr_t resp,cred_t * cr,int * douprintf,enum nfsstat * statusp,int flags,failinfo_t * fi)14037c478bd9Sstevel@tonic-gate acl2call(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
14047c478bd9Sstevel@tonic-gate xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf,
14057c478bd9Sstevel@tonic-gate enum nfsstat *statusp, int flags, failinfo_t *fi)
14067c478bd9Sstevel@tonic-gate {
14077c478bd9Sstevel@tonic-gate int rpcerror;
14087c478bd9Sstevel@tonic-gate
14097c478bd9Sstevel@tonic-gate rpcerror = aclcall(mi, which, xdrargs, argsp, xdrres, resp,
14107c478bd9Sstevel@tonic-gate cr, douprintf, flags, fi);
14117c478bd9Sstevel@tonic-gate if (!rpcerror) {
14127c478bd9Sstevel@tonic-gate /*
14137c478bd9Sstevel@tonic-gate * See comments with crnetadjust().
14147c478bd9Sstevel@tonic-gate */
14157c478bd9Sstevel@tonic-gate if (*statusp == NFSERR_ACCES &&
14167c478bd9Sstevel@tonic-gate (cr = crnetadjust(cr)) != NULL) {
14177c478bd9Sstevel@tonic-gate #ifdef DEBUG
14187c478bd9Sstevel@tonic-gate acl2call_hits++;
14197c478bd9Sstevel@tonic-gate #endif
14207c478bd9Sstevel@tonic-gate rpcerror = aclcall(mi, which, xdrargs, argsp, xdrres,
14217c478bd9Sstevel@tonic-gate resp, cr, douprintf, flags, fi);
14227c478bd9Sstevel@tonic-gate crfree(cr);
14237c478bd9Sstevel@tonic-gate #ifdef DEBUG
14247c478bd9Sstevel@tonic-gate if (*statusp == NFSERR_ACCES)
14257c478bd9Sstevel@tonic-gate acl2call_misses++;
14267c478bd9Sstevel@tonic-gate #endif
14277c478bd9Sstevel@tonic-gate }
14287c478bd9Sstevel@tonic-gate }
14297c478bd9Sstevel@tonic-gate
14307c478bd9Sstevel@tonic-gate return (rpcerror);
14317c478bd9Sstevel@tonic-gate }
14327c478bd9Sstevel@tonic-gate
14337c478bd9Sstevel@tonic-gate #ifdef DEBUG
14347c478bd9Sstevel@tonic-gate static int acl3call_hits = 0;
14357c478bd9Sstevel@tonic-gate static int acl3call_misses = 0;
14367c478bd9Sstevel@tonic-gate #endif
14377c478bd9Sstevel@tonic-gate
14387c478bd9Sstevel@tonic-gate int
acl3call(mntinfo_t * mi,rpcproc_t which,xdrproc_t xdrargs,caddr_t argsp,xdrproc_t xdrres,caddr_t resp,cred_t * cr,int * douprintf,nfsstat3 * statusp,int flags,failinfo_t * fi)14397c478bd9Sstevel@tonic-gate acl3call(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
14407c478bd9Sstevel@tonic-gate xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf,
14417c478bd9Sstevel@tonic-gate nfsstat3 *statusp, int flags, failinfo_t *fi)
14427c478bd9Sstevel@tonic-gate {
14437c478bd9Sstevel@tonic-gate int rpcerror;
14447c478bd9Sstevel@tonic-gate int user_informed;
14457c478bd9Sstevel@tonic-gate
14467c478bd9Sstevel@tonic-gate user_informed = 0;
14477c478bd9Sstevel@tonic-gate
14487c478bd9Sstevel@tonic-gate do {
14497c478bd9Sstevel@tonic-gate rpcerror = aclcall(mi, which, xdrargs, argsp, xdrres, resp,
14507c478bd9Sstevel@tonic-gate cr, douprintf, flags, fi);
14517c478bd9Sstevel@tonic-gate if (!rpcerror) {
14527c478bd9Sstevel@tonic-gate cred_t *crr;
14537c478bd9Sstevel@tonic-gate if (*statusp == NFS3ERR_JUKEBOX) {
14547c478bd9Sstevel@tonic-gate if (!user_informed) {
14557c478bd9Sstevel@tonic-gate user_informed = 1;
14567c478bd9Sstevel@tonic-gate uprintf(
14577c478bd9Sstevel@tonic-gate "file temporarily unavailable on the server, retrying...\n");
14587c478bd9Sstevel@tonic-gate }
14597c478bd9Sstevel@tonic-gate delay(nfs3_jukebox_delay);
14607c478bd9Sstevel@tonic-gate }
14617c478bd9Sstevel@tonic-gate /*
14627c478bd9Sstevel@tonic-gate * See crnetadjust() for comments.
14637c478bd9Sstevel@tonic-gate */
14647c478bd9Sstevel@tonic-gate else if (*statusp == NFS3ERR_ACCES &&
14657c478bd9Sstevel@tonic-gate (crr = crnetadjust(cr)) != NULL) {
14667c478bd9Sstevel@tonic-gate #ifdef DEBUG
14677c478bd9Sstevel@tonic-gate acl3call_hits++;
14687c478bd9Sstevel@tonic-gate #endif
14697c478bd9Sstevel@tonic-gate rpcerror = aclcall(mi, which, xdrargs, argsp,
14707c478bd9Sstevel@tonic-gate xdrres, resp, crr, douprintf, flags, fi);
14717c478bd9Sstevel@tonic-gate
14727c478bd9Sstevel@tonic-gate crfree(crr);
14737c478bd9Sstevel@tonic-gate #ifdef DEBUG
14747c478bd9Sstevel@tonic-gate if (*statusp == NFS3ERR_ACCES)
14757c478bd9Sstevel@tonic-gate acl3call_misses++;
14767c478bd9Sstevel@tonic-gate #endif
14777c478bd9Sstevel@tonic-gate }
14787c478bd9Sstevel@tonic-gate }
14797c478bd9Sstevel@tonic-gate } while (!rpcerror && *statusp == NFS3ERR_JUKEBOX);
14807c478bd9Sstevel@tonic-gate
14817c478bd9Sstevel@tonic-gate return (rpcerror);
14827c478bd9Sstevel@tonic-gate }
14837c478bd9Sstevel@tonic-gate
14847c478bd9Sstevel@tonic-gate static int
aclcall(mntinfo_t * mi,rpcproc_t which,xdrproc_t xdrargs,caddr_t argsp,xdrproc_t xdrres,caddr_t resp,cred_t * icr,int * douprintf,int flags,failinfo_t * fi)14857c478bd9Sstevel@tonic-gate aclcall(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
148645916cd2Sjpk xdrproc_t xdrres, caddr_t resp, cred_t *icr, int *douprintf,
14877c478bd9Sstevel@tonic-gate int flags, failinfo_t *fi)
14887c478bd9Sstevel@tonic-gate {
14897c478bd9Sstevel@tonic-gate CLIENT *client;
14907c478bd9Sstevel@tonic-gate struct chtab *ch;
149145916cd2Sjpk cred_t *cr = icr;
149245916cd2Sjpk bool_t cred_cloned = FALSE;
14937c478bd9Sstevel@tonic-gate enum clnt_stat status;
14947c478bd9Sstevel@tonic-gate struct rpc_err rpcerr;
14957c478bd9Sstevel@tonic-gate struct timeval wait;
14967c478bd9Sstevel@tonic-gate int timeo; /* in units of hz */
14977c478bd9Sstevel@tonic-gate #if 0 /* notyet */
14987c478bd9Sstevel@tonic-gate int my_rsize, my_wsize;
14997c478bd9Sstevel@tonic-gate #endif
15007c478bd9Sstevel@tonic-gate bool_t tryagain;
15017c478bd9Sstevel@tonic-gate k_sigset_t smask;
15027c478bd9Sstevel@tonic-gate servinfo_t *svp;
15037c478bd9Sstevel@tonic-gate struct nfs_clnt *nfscl;
15047c478bd9Sstevel@tonic-gate zoneid_t zoneid = getzoneid();
15057c478bd9Sstevel@tonic-gate #ifdef DEBUG
15067c478bd9Sstevel@tonic-gate char *bufp;
15077c478bd9Sstevel@tonic-gate #endif
15087c478bd9Sstevel@tonic-gate
15097c478bd9Sstevel@tonic-gate #if 0 /* notyet */
15107c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_NFS, TR_RFSCALL_START,
15117c478bd9Sstevel@tonic-gate "rfscall_start:which %d mi %p", which, mi);
15127c478bd9Sstevel@tonic-gate #endif
15137c478bd9Sstevel@tonic-gate
1514108322fbScarlsonj nfscl = zone_getspecific(nfsclnt_zone_key, nfs_zone());
15157c478bd9Sstevel@tonic-gate ASSERT(nfscl != NULL);
15167c478bd9Sstevel@tonic-gate
15177c478bd9Sstevel@tonic-gate nfscl->nfscl_stat.calls.value.ui64++;
15187c478bd9Sstevel@tonic-gate mi->mi_aclreqs[which].value.ui64++;
15197c478bd9Sstevel@tonic-gate
15207c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_SUCCESS;
15217c478bd9Sstevel@tonic-gate
15227c478bd9Sstevel@tonic-gate if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
15237c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_FAILED;
15247c478bd9Sstevel@tonic-gate rpcerr.re_errno = EIO;
15257c478bd9Sstevel@tonic-gate return (rpcerr.re_errno);
15267c478bd9Sstevel@tonic-gate }
15277c478bd9Sstevel@tonic-gate
15287c478bd9Sstevel@tonic-gate #if 0 /* notyet */
15297c478bd9Sstevel@tonic-gate /*
15307c478bd9Sstevel@tonic-gate * Remember the transfer sizes in case
15317c478bd9Sstevel@tonic-gate * nfs_feedback changes them underneath us.
15327c478bd9Sstevel@tonic-gate */
15337c478bd9Sstevel@tonic-gate my_rsize = mi->mi_curread;
15347c478bd9Sstevel@tonic-gate my_wsize = mi->mi_curwrite;
15357c478bd9Sstevel@tonic-gate #endif
15367c478bd9Sstevel@tonic-gate
15377c478bd9Sstevel@tonic-gate /*
15387c478bd9Sstevel@tonic-gate * NFS client failover support
15397c478bd9Sstevel@tonic-gate *
15407c478bd9Sstevel@tonic-gate * If this rnode is not in sync with the current server (VALID_FH),
15417c478bd9Sstevel@tonic-gate * we'd like to do a remap to get in sync. We can be interrupted
15427c478bd9Sstevel@tonic-gate * in failover_remap(), and if so we'll bail. Otherwise, we'll
15437c478bd9Sstevel@tonic-gate * use the best info we have to try the RPC. Part of that is
15447c478bd9Sstevel@tonic-gate * unconditionally updating the filehandle copy kept for V3.
15457c478bd9Sstevel@tonic-gate *
15467c478bd9Sstevel@tonic-gate * Locking: INC_READERS/DEC_READERS is a poor man's interrruptible
15477c478bd9Sstevel@tonic-gate * rw_enter(); we're trying to keep the current server from being
15487c478bd9Sstevel@tonic-gate * changed on us until we're done with the remapping and have a
15497c478bd9Sstevel@tonic-gate * matching client handle. We don't want to sending a filehandle
15507c478bd9Sstevel@tonic-gate * to the wrong host.
15517c478bd9Sstevel@tonic-gate */
15527c478bd9Sstevel@tonic-gate failoverretry:
15537c478bd9Sstevel@tonic-gate if (FAILOVER_MOUNT(mi)) {
15547c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
15557c478bd9Sstevel@tonic-gate if (!(flags & RFSCALL_SOFT) && failover_safe(fi)) {
15567c478bd9Sstevel@tonic-gate if (failover_wait(mi)) {
15577c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
15587c478bd9Sstevel@tonic-gate return (EINTR);
15597c478bd9Sstevel@tonic-gate }
15607c478bd9Sstevel@tonic-gate }
15617c478bd9Sstevel@tonic-gate INC_READERS(mi);
15627c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
15637c478bd9Sstevel@tonic-gate if (fi) {
15647c478bd9Sstevel@tonic-gate if (!VALID_FH(fi) &&
15657c478bd9Sstevel@tonic-gate !(flags & RFSCALL_SOFT) && failover_safe(fi)) {
15667c478bd9Sstevel@tonic-gate int remaperr;
15677c478bd9Sstevel@tonic-gate
15687c478bd9Sstevel@tonic-gate svp = mi->mi_curr_serv;
15697c478bd9Sstevel@tonic-gate remaperr = failover_remap(fi);
15707c478bd9Sstevel@tonic-gate if (remaperr != 0) {
15717c478bd9Sstevel@tonic-gate #ifdef DEBUG
15727c478bd9Sstevel@tonic-gate if (remaperr != EINTR)
15737c478bd9Sstevel@tonic-gate nfs_cmn_err(remaperr, CE_WARN,
15747c478bd9Sstevel@tonic-gate "aclcall couldn't failover: %m");
15757c478bd9Sstevel@tonic-gate #endif
15767c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
15777c478bd9Sstevel@tonic-gate DEC_READERS(mi);
15787c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
15797c478bd9Sstevel@tonic-gate
15807c478bd9Sstevel@tonic-gate /*
15817c478bd9Sstevel@tonic-gate * If failover_remap returns ETIMEDOUT
15827c478bd9Sstevel@tonic-gate * and the filesystem is hard mounted
15837c478bd9Sstevel@tonic-gate * we have to retry the call with a new
15847c478bd9Sstevel@tonic-gate * server.
15857c478bd9Sstevel@tonic-gate */
15867c478bd9Sstevel@tonic-gate if ((mi->mi_flags & MI_HARD) &&
15877c478bd9Sstevel@tonic-gate IS_RECOVERABLE_ERROR(remaperr)) {
15887c478bd9Sstevel@tonic-gate if (svp == mi->mi_curr_serv)
15897c478bd9Sstevel@tonic-gate failover_newserver(mi);
15907c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_SUCCESS;
15917c478bd9Sstevel@tonic-gate goto failoverretry;
15927c478bd9Sstevel@tonic-gate }
15937c478bd9Sstevel@tonic-gate return (remaperr);
15947c478bd9Sstevel@tonic-gate }
15957c478bd9Sstevel@tonic-gate }
15967c478bd9Sstevel@tonic-gate if (fi->fhp && fi->copyproc)
15977c478bd9Sstevel@tonic-gate (*fi->copyproc)(fi->fhp, fi->vp);
15987c478bd9Sstevel@tonic-gate }
15997c478bd9Sstevel@tonic-gate }
16007c478bd9Sstevel@tonic-gate
160145916cd2Sjpk /* For TSOL, use a new cred which has net_mac_aware flag */
160245916cd2Sjpk if (!cred_cloned && is_system_labeled()) {
160345916cd2Sjpk cred_cloned = TRUE;
160445916cd2Sjpk cr = crdup(icr);
160545916cd2Sjpk (void) setpflags(NET_MAC_AWARE, 1, cr);
160645916cd2Sjpk }
160745916cd2Sjpk
16087c478bd9Sstevel@tonic-gate /*
16097c478bd9Sstevel@tonic-gate * acl_clget() calls clnt_tli_kinit() which clears the xid, so we
16107c478bd9Sstevel@tonic-gate * are guaranteed to reprocess the retry as a new request.
16117c478bd9Sstevel@tonic-gate */
16127c478bd9Sstevel@tonic-gate svp = mi->mi_curr_serv;
16137c478bd9Sstevel@tonic-gate rpcerr.re_errno = acl_clget(mi, svp, cr, &client, &ch, nfscl);
16147c478bd9Sstevel@tonic-gate if (FAILOVER_MOUNT(mi)) {
16157c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
16167c478bd9Sstevel@tonic-gate DEC_READERS(mi);
16177c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
16187c478bd9Sstevel@tonic-gate
16197c478bd9Sstevel@tonic-gate if ((rpcerr.re_errno == ETIMEDOUT ||
16207c478bd9Sstevel@tonic-gate rpcerr.re_errno == ECONNRESET) &&
16217c478bd9Sstevel@tonic-gate failover_safe(fi)) {
16227c478bd9Sstevel@tonic-gate if (svp == mi->mi_curr_serv)
16237c478bd9Sstevel@tonic-gate failover_newserver(mi);
16247c478bd9Sstevel@tonic-gate goto failoverretry;
16257c478bd9Sstevel@tonic-gate }
16267c478bd9Sstevel@tonic-gate }
162745916cd2Sjpk if (rpcerr.re_errno != 0) {
162845916cd2Sjpk if (cred_cloned)
162945916cd2Sjpk crfree(cr);
16307c478bd9Sstevel@tonic-gate return (rpcerr.re_errno);
163145916cd2Sjpk }
16327c478bd9Sstevel@tonic-gate
16337c478bd9Sstevel@tonic-gate if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD ||
16347c478bd9Sstevel@tonic-gate svp->sv_knconf->knc_semantics == NC_TPI_COTS) {
16357c478bd9Sstevel@tonic-gate timeo = (mi->mi_timeo * hz) / 10;
16367c478bd9Sstevel@tonic-gate } else {
16377c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
16387c478bd9Sstevel@tonic-gate timeo = CLNT_SETTIMERS(client,
16397c478bd9Sstevel@tonic-gate &(mi->mi_timers[mi->mi_acl_timer_type[which]]),
16407c478bd9Sstevel@tonic-gate &(mi->mi_timers[NFS_CALLTYPES]),
16417c478bd9Sstevel@tonic-gate (minimum_timeo[mi->mi_acl_call_type[which]]*hz)>>3,
16427c478bd9Sstevel@tonic-gate (void (*)()) 0, (caddr_t)mi, 0);
16437c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
16447c478bd9Sstevel@tonic-gate }
16457c478bd9Sstevel@tonic-gate
16467c478bd9Sstevel@tonic-gate /*
16477c478bd9Sstevel@tonic-gate * If hard mounted fs, retry call forever unless hard error occurs.
16487c478bd9Sstevel@tonic-gate */
16497c478bd9Sstevel@tonic-gate do {
16507c478bd9Sstevel@tonic-gate tryagain = FALSE;
16517c478bd9Sstevel@tonic-gate
16527c478bd9Sstevel@tonic-gate if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
16537c478bd9Sstevel@tonic-gate status = RPC_FAILED;
16547c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_FAILED;
16557c478bd9Sstevel@tonic-gate rpcerr.re_errno = EIO;
16567c478bd9Sstevel@tonic-gate break;
16577c478bd9Sstevel@tonic-gate }
16587c478bd9Sstevel@tonic-gate
16597c478bd9Sstevel@tonic-gate TICK_TO_TIMEVAL(timeo, &wait);
16607c478bd9Sstevel@tonic-gate
16617c478bd9Sstevel@tonic-gate /*
16627c478bd9Sstevel@tonic-gate * Mask out all signals except SIGHUP, SIGINT, SIGQUIT
16637c478bd9Sstevel@tonic-gate * and SIGTERM. (Preserving the existing masks).
16647c478bd9Sstevel@tonic-gate * Mask out SIGINT if mount option nointr is specified.
16657c478bd9Sstevel@tonic-gate */
16667c478bd9Sstevel@tonic-gate sigintr(&smask, (int)mi->mi_flags & MI_INT);
16677c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_INT))
16687c478bd9Sstevel@tonic-gate client->cl_nosignal = TRUE;
16697c478bd9Sstevel@tonic-gate
16707c478bd9Sstevel@tonic-gate /*
16717c478bd9Sstevel@tonic-gate * If there is a current signal, then don't bother
16727c478bd9Sstevel@tonic-gate * even trying to send out the request because we
16737c478bd9Sstevel@tonic-gate * won't be able to block waiting for the response.
16747c478bd9Sstevel@tonic-gate * Simply assume RPC_INTR and get on with it.
16757c478bd9Sstevel@tonic-gate */
16767c478bd9Sstevel@tonic-gate if (ttolwp(curthread) != NULL && ISSIG(curthread, JUSTLOOKING))
16777c478bd9Sstevel@tonic-gate status = RPC_INTR;
16787c478bd9Sstevel@tonic-gate else {
16797c478bd9Sstevel@tonic-gate status = CLNT_CALL(client, which, xdrargs, argsp,
16807c478bd9Sstevel@tonic-gate xdrres, resp, wait);
16817c478bd9Sstevel@tonic-gate }
16827c478bd9Sstevel@tonic-gate
16837c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_INT))
16847c478bd9Sstevel@tonic-gate client->cl_nosignal = FALSE;
16857c478bd9Sstevel@tonic-gate /*
16867c478bd9Sstevel@tonic-gate * restore original signal mask
16877c478bd9Sstevel@tonic-gate */
16887c478bd9Sstevel@tonic-gate sigunintr(&smask);
16897c478bd9Sstevel@tonic-gate
16907c478bd9Sstevel@tonic-gate switch (status) {
16917c478bd9Sstevel@tonic-gate case RPC_SUCCESS:
16927c478bd9Sstevel@tonic-gate #if 0 /* notyet */
16937c478bd9Sstevel@tonic-gate if ((mi->mi_flags & MI_DYNAMIC) &&
16947c478bd9Sstevel@tonic-gate mi->mi_timer_type[which] != 0 &&
16957c478bd9Sstevel@tonic-gate (mi->mi_curread != my_rsize ||
16967c478bd9Sstevel@tonic-gate mi->mi_curwrite != my_wsize))
16977c478bd9Sstevel@tonic-gate (void) nfs_feedback(FEEDBACK_OK, which, mi);
16987c478bd9Sstevel@tonic-gate #endif
16997c478bd9Sstevel@tonic-gate break;
17007c478bd9Sstevel@tonic-gate
17017c478bd9Sstevel@tonic-gate /*
17027c478bd9Sstevel@tonic-gate * Unfortunately, there are servers in the world which
17037c478bd9Sstevel@tonic-gate * are not coded correctly. They are not prepared to
17047c478bd9Sstevel@tonic-gate * handle RPC requests to the NFS port which are not
17057c478bd9Sstevel@tonic-gate * NFS requests. Thus, they may try to process the
17067c478bd9Sstevel@tonic-gate * NFS_ACL request as if it were an NFS request. This
17077c478bd9Sstevel@tonic-gate * does not work. Generally, an error will be generated
17087c478bd9Sstevel@tonic-gate * on the client because it will not be able to decode
17097c478bd9Sstevel@tonic-gate * the response from the server. However, it seems
17107c478bd9Sstevel@tonic-gate * possible that the server may not be able to decode
17117c478bd9Sstevel@tonic-gate * the arguments. Thus, the criteria for deciding
17127c478bd9Sstevel@tonic-gate * whether the server supports NFS_ACL or not is whether
17137c478bd9Sstevel@tonic-gate * the following RPC errors are returned from CLNT_CALL.
17147c478bd9Sstevel@tonic-gate */
17157c478bd9Sstevel@tonic-gate case RPC_CANTDECODERES:
17167c478bd9Sstevel@tonic-gate case RPC_PROGUNAVAIL:
17177c478bd9Sstevel@tonic-gate case RPC_CANTDECODEARGS:
17187c478bd9Sstevel@tonic-gate case RPC_PROGVERSMISMATCH:
17197c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
17207c478bd9Sstevel@tonic-gate mi->mi_flags &= ~(MI_ACL | MI_EXTATTR);
17217c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
17227c478bd9Sstevel@tonic-gate break;
17237c478bd9Sstevel@tonic-gate
17247c478bd9Sstevel@tonic-gate /*
17257c478bd9Sstevel@tonic-gate * If the server supports NFS_ACL but not the new ops
17267c478bd9Sstevel@tonic-gate * for extended attributes, make sure we don't retry.
17277c478bd9Sstevel@tonic-gate */
17287c478bd9Sstevel@tonic-gate case RPC_PROCUNAVAIL:
17297c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
17307c478bd9Sstevel@tonic-gate mi->mi_flags &= ~MI_EXTATTR;
17317c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
17327c478bd9Sstevel@tonic-gate break;
17337c478bd9Sstevel@tonic-gate
17347c478bd9Sstevel@tonic-gate case RPC_INTR:
17357c478bd9Sstevel@tonic-gate /*
17367c478bd9Sstevel@tonic-gate * There is no way to recover from this error,
17377c478bd9Sstevel@tonic-gate * even if mount option nointr is specified.
17387c478bd9Sstevel@tonic-gate * SIGKILL, for example, cannot be blocked.
17397c478bd9Sstevel@tonic-gate */
17407c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_INTR;
17417c478bd9Sstevel@tonic-gate rpcerr.re_errno = EINTR;
17427c478bd9Sstevel@tonic-gate break;
17437c478bd9Sstevel@tonic-gate
17447c478bd9Sstevel@tonic-gate case RPC_UDERROR:
17457c478bd9Sstevel@tonic-gate /*
17467c478bd9Sstevel@tonic-gate * If the NFS server is local (vold) and
17477c478bd9Sstevel@tonic-gate * it goes away then we get RPC_UDERROR.
17487c478bd9Sstevel@tonic-gate * This is a retryable error, so we would
17497c478bd9Sstevel@tonic-gate * loop, so check to see if the specific
17507c478bd9Sstevel@tonic-gate * error was ECONNRESET, indicating that
17517c478bd9Sstevel@tonic-gate * target did not exist at all. If so,
17527c478bd9Sstevel@tonic-gate * return with RPC_PROGUNAVAIL and
17537c478bd9Sstevel@tonic-gate * ECONNRESET to indicate why.
17547c478bd9Sstevel@tonic-gate */
17557c478bd9Sstevel@tonic-gate CLNT_GETERR(client, &rpcerr);
17567c478bd9Sstevel@tonic-gate if (rpcerr.re_errno == ECONNRESET) {
17577c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_PROGUNAVAIL;
17587c478bd9Sstevel@tonic-gate rpcerr.re_errno = ECONNRESET;
17597c478bd9Sstevel@tonic-gate break;
17607c478bd9Sstevel@tonic-gate }
17617c478bd9Sstevel@tonic-gate /*FALLTHROUGH*/
17627c478bd9Sstevel@tonic-gate
17637c478bd9Sstevel@tonic-gate default: /* probably RPC_TIMEDOUT */
17647c478bd9Sstevel@tonic-gate if (IS_UNRECOVERABLE_RPC(status))
17657c478bd9Sstevel@tonic-gate break;
17667c478bd9Sstevel@tonic-gate
17677c478bd9Sstevel@tonic-gate /*
17687c478bd9Sstevel@tonic-gate * increment server not responding count
17697c478bd9Sstevel@tonic-gate */
17707c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
17717c478bd9Sstevel@tonic-gate mi->mi_noresponse++;
17727c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
17737c478bd9Sstevel@tonic-gate #ifdef DEBUG
17747c478bd9Sstevel@tonic-gate nfscl->nfscl_stat.noresponse.value.ui64++;
17757c478bd9Sstevel@tonic-gate #endif
17767c478bd9Sstevel@tonic-gate
17777c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_HARD)) {
17787c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_SEMISOFT) ||
17797c478bd9Sstevel@tonic-gate (mi->mi_acl_ss_call_type[which] == 0))
17807c478bd9Sstevel@tonic-gate break;
17817c478bd9Sstevel@tonic-gate }
17827c478bd9Sstevel@tonic-gate
17837c478bd9Sstevel@tonic-gate /*
17847c478bd9Sstevel@tonic-gate * The call is in progress (over COTS).
17857c478bd9Sstevel@tonic-gate * Try the CLNT_CALL again, but don't
17867c478bd9Sstevel@tonic-gate * print a noisy error message.
17877c478bd9Sstevel@tonic-gate */
17887c478bd9Sstevel@tonic-gate if (status == RPC_INPROGRESS) {
17897c478bd9Sstevel@tonic-gate tryagain = TRUE;
17907c478bd9Sstevel@tonic-gate break;
17917c478bd9Sstevel@tonic-gate }
17927c478bd9Sstevel@tonic-gate
17937c478bd9Sstevel@tonic-gate if (flags & RFSCALL_SOFT)
17947c478bd9Sstevel@tonic-gate break;
17957c478bd9Sstevel@tonic-gate
17967c478bd9Sstevel@tonic-gate /*
17977c478bd9Sstevel@tonic-gate * On zone shutdown, just move on.
17987c478bd9Sstevel@tonic-gate */
17997c478bd9Sstevel@tonic-gate if (zone_status_get(curproc->p_zone) >=
18007c478bd9Sstevel@tonic-gate ZONE_IS_SHUTTING_DOWN) {
18017c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_FAILED;
18027c478bd9Sstevel@tonic-gate rpcerr.re_errno = EIO;
18037c478bd9Sstevel@tonic-gate break;
18047c478bd9Sstevel@tonic-gate }
18057c478bd9Sstevel@tonic-gate
18067c478bd9Sstevel@tonic-gate /*
18077c478bd9Sstevel@tonic-gate * NFS client failover support
18087c478bd9Sstevel@tonic-gate *
18097c478bd9Sstevel@tonic-gate * If the current server just failed us, we'll
18107c478bd9Sstevel@tonic-gate * start the process of finding a new server.
18117c478bd9Sstevel@tonic-gate * After that, we can just retry.
18127c478bd9Sstevel@tonic-gate */
18137c478bd9Sstevel@tonic-gate if (FAILOVER_MOUNT(mi) && failover_safe(fi)) {
18147c478bd9Sstevel@tonic-gate if (svp == mi->mi_curr_serv)
18157c478bd9Sstevel@tonic-gate failover_newserver(mi);
18167c478bd9Sstevel@tonic-gate clfree_impl(client, ch, nfscl);
18177c478bd9Sstevel@tonic-gate goto failoverretry;
18187c478bd9Sstevel@tonic-gate }
18197c478bd9Sstevel@tonic-gate
18207c478bd9Sstevel@tonic-gate tryagain = TRUE;
18217c478bd9Sstevel@tonic-gate timeo = backoff(timeo);
18227c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
18237c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_PRINTED)) {
18247c478bd9Sstevel@tonic-gate mi->mi_flags |= MI_PRINTED;
18257c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
18267c478bd9Sstevel@tonic-gate #ifdef DEBUG
18277c478bd9Sstevel@tonic-gate zprintf(zoneid,
18287c478bd9Sstevel@tonic-gate "NFS_ACL%d server %s not responding still trying\n",
18297c478bd9Sstevel@tonic-gate mi->mi_vers, svp->sv_hostname);
18307c478bd9Sstevel@tonic-gate #else
18317c478bd9Sstevel@tonic-gate zprintf(zoneid,
18327c478bd9Sstevel@tonic-gate "NFS server %s not responding still trying\n",
18337c478bd9Sstevel@tonic-gate svp->sv_hostname);
18347c478bd9Sstevel@tonic-gate #endif
18357c478bd9Sstevel@tonic-gate } else
18367c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
18379acbbeafSnn35248 if (*douprintf && nfs_has_ctty()) {
18387c478bd9Sstevel@tonic-gate *douprintf = 0;
18397c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_NOPRINT))
18407c478bd9Sstevel@tonic-gate #ifdef DEBUG
18417c478bd9Sstevel@tonic-gate uprintf(
18427c478bd9Sstevel@tonic-gate "NFS_ACL%d server %s not responding still trying\n",
18437c478bd9Sstevel@tonic-gate mi->mi_vers, svp->sv_hostname);
18447c478bd9Sstevel@tonic-gate #else
18457c478bd9Sstevel@tonic-gate uprintf(
18467c478bd9Sstevel@tonic-gate "NFS server %s not responding still trying\n",
18477c478bd9Sstevel@tonic-gate svp->sv_hostname);
18487c478bd9Sstevel@tonic-gate #endif
18497c478bd9Sstevel@tonic-gate }
18507c478bd9Sstevel@tonic-gate
18517c478bd9Sstevel@tonic-gate #if 0 /* notyet */
18527c478bd9Sstevel@tonic-gate /*
18537c478bd9Sstevel@tonic-gate * If doing dynamic adjustment of transfer
18547c478bd9Sstevel@tonic-gate * size and if it's a read or write call
18557c478bd9Sstevel@tonic-gate * and if the transfer size changed while
18567c478bd9Sstevel@tonic-gate * retransmitting or if the feedback routine
18577c478bd9Sstevel@tonic-gate * changed the transfer size,
18587c478bd9Sstevel@tonic-gate * then exit rfscall so that the transfer
18597c478bd9Sstevel@tonic-gate * size can be adjusted at the vnops level.
18607c478bd9Sstevel@tonic-gate */
18617c478bd9Sstevel@tonic-gate if ((mi->mi_flags & MI_DYNAMIC) &&
18627c478bd9Sstevel@tonic-gate mi->mi_acl_timer_type[which] != 0 &&
18637c478bd9Sstevel@tonic-gate (mi->mi_curread != my_rsize ||
18647c478bd9Sstevel@tonic-gate mi->mi_curwrite != my_wsize ||
18657c478bd9Sstevel@tonic-gate nfs_feedback(FEEDBACK_REXMIT1, which, mi))) {
18667c478bd9Sstevel@tonic-gate /*
18677c478bd9Sstevel@tonic-gate * On read or write calls, return
18687c478bd9Sstevel@tonic-gate * back to the vnode ops level if
18697c478bd9Sstevel@tonic-gate * the transfer size changed.
18707c478bd9Sstevel@tonic-gate */
18717c478bd9Sstevel@tonic-gate clfree_impl(client, ch, nfscl);
187245916cd2Sjpk if (cred_cloned)
187345916cd2Sjpk crfree(cr);
18747c478bd9Sstevel@tonic-gate return (ENFS_TRYAGAIN);
18757c478bd9Sstevel@tonic-gate }
18767c478bd9Sstevel@tonic-gate #endif
18777c478bd9Sstevel@tonic-gate }
18787c478bd9Sstevel@tonic-gate } while (tryagain);
18797c478bd9Sstevel@tonic-gate
18807c478bd9Sstevel@tonic-gate if (status != RPC_SUCCESS) {
18817c478bd9Sstevel@tonic-gate /*
18827c478bd9Sstevel@tonic-gate * Let soft mounts use the timed out message.
18837c478bd9Sstevel@tonic-gate */
18847c478bd9Sstevel@tonic-gate if (status == RPC_INPROGRESS)
18857c478bd9Sstevel@tonic-gate status = RPC_TIMEDOUT;
18867c478bd9Sstevel@tonic-gate nfscl->nfscl_stat.badcalls.value.ui64++;
18877c478bd9Sstevel@tonic-gate if (status == RPC_CANTDECODERES ||
18887c478bd9Sstevel@tonic-gate status == RPC_PROGUNAVAIL ||
18897c478bd9Sstevel@tonic-gate status == RPC_PROCUNAVAIL ||
18907c478bd9Sstevel@tonic-gate status == RPC_CANTDECODEARGS ||
18917c478bd9Sstevel@tonic-gate status == RPC_PROGVERSMISMATCH)
18927c478bd9Sstevel@tonic-gate CLNT_GETERR(client, &rpcerr);
18937c478bd9Sstevel@tonic-gate else if (status != RPC_INTR) {
18947c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
18957c478bd9Sstevel@tonic-gate mi->mi_flags |= MI_DOWN;
18967c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
18977c478bd9Sstevel@tonic-gate CLNT_GETERR(client, &rpcerr);
18987c478bd9Sstevel@tonic-gate #ifdef DEBUG
18997c478bd9Sstevel@tonic-gate bufp = clnt_sperror(client, svp->sv_hostname);
19007c478bd9Sstevel@tonic-gate zprintf(zoneid, "NFS_ACL%d %s failed for %s\n",
19017c478bd9Sstevel@tonic-gate mi->mi_vers, mi->mi_aclnames[which], bufp);
19029acbbeafSnn35248 if (nfs_has_ctty()) {
19037c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_NOPRINT)) {
19047c478bd9Sstevel@tonic-gate uprintf("NFS_ACL%d %s failed for %s\n",
19057c478bd9Sstevel@tonic-gate mi->mi_vers, mi->mi_aclnames[which],
19067c478bd9Sstevel@tonic-gate bufp);
19077c478bd9Sstevel@tonic-gate }
19087c478bd9Sstevel@tonic-gate }
19097c478bd9Sstevel@tonic-gate kmem_free(bufp, MAXPATHLEN);
19107c478bd9Sstevel@tonic-gate #else
19117c478bd9Sstevel@tonic-gate zprintf(zoneid,
19127c478bd9Sstevel@tonic-gate "NFS %s failed for server %s: error %d (%s)\n",
19137c478bd9Sstevel@tonic-gate mi->mi_aclnames[which], svp->sv_hostname,
19147c478bd9Sstevel@tonic-gate status, clnt_sperrno(status));
19159acbbeafSnn35248 if (nfs_has_ctty()) {
19167c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_NOPRINT))
19177c478bd9Sstevel@tonic-gate uprintf(
19187c478bd9Sstevel@tonic-gate "NFS %s failed for server %s: error %d (%s)\n",
19197c478bd9Sstevel@tonic-gate mi->mi_aclnames[which],
19207c478bd9Sstevel@tonic-gate svp->sv_hostname, status,
19217c478bd9Sstevel@tonic-gate clnt_sperrno(status));
19227c478bd9Sstevel@tonic-gate }
19237c478bd9Sstevel@tonic-gate #endif
19247c478bd9Sstevel@tonic-gate /*
19257c478bd9Sstevel@tonic-gate * when CLNT_CALL() fails with RPC_AUTHERROR,
19267c478bd9Sstevel@tonic-gate * re_errno is set appropriately depending on
19277c478bd9Sstevel@tonic-gate * the authentication error
19287c478bd9Sstevel@tonic-gate */
19297c478bd9Sstevel@tonic-gate if (status == RPC_VERSMISMATCH ||
19307c478bd9Sstevel@tonic-gate status == RPC_PROGVERSMISMATCH)
19317c478bd9Sstevel@tonic-gate rpcerr.re_errno = EIO;
19327c478bd9Sstevel@tonic-gate }
19337c478bd9Sstevel@tonic-gate } else {
19347c478bd9Sstevel@tonic-gate /*
19357c478bd9Sstevel@tonic-gate * Test the value of mi_down and mi_printed without
19367c478bd9Sstevel@tonic-gate * holding the mi_lock mutex. If they are both zero,
19377c478bd9Sstevel@tonic-gate * then it is okay to skip the down and printed
19387c478bd9Sstevel@tonic-gate * processing. This saves on a mutex_enter and
19397c478bd9Sstevel@tonic-gate * mutex_exit pair for a normal, successful RPC.
19407c478bd9Sstevel@tonic-gate * This was just complete overhead.
19417c478bd9Sstevel@tonic-gate */
19427c478bd9Sstevel@tonic-gate if (mi->mi_flags & (MI_DOWN | MI_PRINTED)) {
19437c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
19447c478bd9Sstevel@tonic-gate mi->mi_flags &= ~MI_DOWN;
19457c478bd9Sstevel@tonic-gate if (mi->mi_flags & MI_PRINTED) {
19467c478bd9Sstevel@tonic-gate mi->mi_flags &= ~MI_PRINTED;
19477c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
19487c478bd9Sstevel@tonic-gate #ifdef DEBUG
19497c478bd9Sstevel@tonic-gate zprintf(zoneid, "NFS_ACL%d server %s ok\n",
19507c478bd9Sstevel@tonic-gate mi->mi_vers, svp->sv_hostname);
19517c478bd9Sstevel@tonic-gate #else
19527c478bd9Sstevel@tonic-gate zprintf(zoneid, "NFS server %s ok\n",
19537c478bd9Sstevel@tonic-gate svp->sv_hostname);
19547c478bd9Sstevel@tonic-gate #endif
19557c478bd9Sstevel@tonic-gate } else
19567c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
19577c478bd9Sstevel@tonic-gate }
19587c478bd9Sstevel@tonic-gate
19597c478bd9Sstevel@tonic-gate if (*douprintf == 0) {
19607c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_NOPRINT))
19617c478bd9Sstevel@tonic-gate #ifdef DEBUG
19627c478bd9Sstevel@tonic-gate uprintf("NFS_ACL%d server %s ok\n",
19637c478bd9Sstevel@tonic-gate mi->mi_vers, svp->sv_hostname);
19647c478bd9Sstevel@tonic-gate #else
19657c478bd9Sstevel@tonic-gate uprintf("NFS server %s ok\n", svp->sv_hostname);
19667c478bd9Sstevel@tonic-gate #endif
19677c478bd9Sstevel@tonic-gate *douprintf = 1;
19687c478bd9Sstevel@tonic-gate }
19697c478bd9Sstevel@tonic-gate }
19707c478bd9Sstevel@tonic-gate
19717c478bd9Sstevel@tonic-gate clfree_impl(client, ch, nfscl);
197245916cd2Sjpk if (cred_cloned)
197345916cd2Sjpk crfree(cr);
19747c478bd9Sstevel@tonic-gate
19757c478bd9Sstevel@tonic-gate ASSERT(rpcerr.re_status == RPC_SUCCESS || rpcerr.re_errno != 0);
19767c478bd9Sstevel@tonic-gate
19777c478bd9Sstevel@tonic-gate #if 0 /* notyet */
19787c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_NFS, TR_RFSCALL_END, "rfscall_end:errno %d",
19797c478bd9Sstevel@tonic-gate rpcerr.re_errno);
19807c478bd9Sstevel@tonic-gate #endif
19817c478bd9Sstevel@tonic-gate
19827c478bd9Sstevel@tonic-gate return (rpcerr.re_errno);
19837c478bd9Sstevel@tonic-gate }
19847c478bd9Sstevel@tonic-gate
19857c478bd9Sstevel@tonic-gate int
vattr_to_sattr(struct vattr * vap,struct nfssattr * sa)19867c478bd9Sstevel@tonic-gate vattr_to_sattr(struct vattr *vap, struct nfssattr *sa)
19877c478bd9Sstevel@tonic-gate {
19887c478bd9Sstevel@tonic-gate uint_t mask = vap->va_mask;
19897c478bd9Sstevel@tonic-gate
19907c478bd9Sstevel@tonic-gate if (!(mask & AT_MODE))
19917c478bd9Sstevel@tonic-gate sa->sa_mode = (uint32_t)-1;
19927c478bd9Sstevel@tonic-gate else
19937c478bd9Sstevel@tonic-gate sa->sa_mode = vap->va_mode;
19947c478bd9Sstevel@tonic-gate if (!(mask & AT_UID))
19957c478bd9Sstevel@tonic-gate sa->sa_uid = (uint32_t)-1;
19967c478bd9Sstevel@tonic-gate else
19977c478bd9Sstevel@tonic-gate sa->sa_uid = (uint32_t)vap->va_uid;
19987c478bd9Sstevel@tonic-gate if (!(mask & AT_GID))
19997c478bd9Sstevel@tonic-gate sa->sa_gid = (uint32_t)-1;
20007c478bd9Sstevel@tonic-gate else
20017c478bd9Sstevel@tonic-gate sa->sa_gid = (uint32_t)vap->va_gid;
20027c478bd9Sstevel@tonic-gate if (!(mask & AT_SIZE))
20037c478bd9Sstevel@tonic-gate sa->sa_size = (uint32_t)-1;
20047c478bd9Sstevel@tonic-gate else
20057c478bd9Sstevel@tonic-gate sa->sa_size = (uint32_t)vap->va_size;
20067c478bd9Sstevel@tonic-gate if (!(mask & AT_ATIME))
20077c478bd9Sstevel@tonic-gate sa->sa_atime.tv_sec = sa->sa_atime.tv_usec = (int32_t)-1;
20087c478bd9Sstevel@tonic-gate else {
20097c478bd9Sstevel@tonic-gate /* check time validity */
20107c478bd9Sstevel@tonic-gate if (! NFS_TIME_T_OK(vap->va_atime.tv_sec)) {
20117c478bd9Sstevel@tonic-gate return (EOVERFLOW);
20127c478bd9Sstevel@tonic-gate }
20137c478bd9Sstevel@tonic-gate sa->sa_atime.tv_sec = vap->va_atime.tv_sec;
20147c478bd9Sstevel@tonic-gate sa->sa_atime.tv_usec = vap->va_atime.tv_nsec / 1000;
20157c478bd9Sstevel@tonic-gate }
20167c478bd9Sstevel@tonic-gate if (!(mask & AT_MTIME))
20177c478bd9Sstevel@tonic-gate sa->sa_mtime.tv_sec = sa->sa_mtime.tv_usec = (int32_t)-1;
20187c478bd9Sstevel@tonic-gate else {
20197c478bd9Sstevel@tonic-gate /* check time validity */
20207c478bd9Sstevel@tonic-gate if (! NFS_TIME_T_OK(vap->va_mtime.tv_sec)) {
20217c478bd9Sstevel@tonic-gate return (EOVERFLOW);
20227c478bd9Sstevel@tonic-gate }
20237c478bd9Sstevel@tonic-gate sa->sa_mtime.tv_sec = vap->va_mtime.tv_sec;
20247c478bd9Sstevel@tonic-gate sa->sa_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000;
20257c478bd9Sstevel@tonic-gate }
20267c478bd9Sstevel@tonic-gate return (0);
20277c478bd9Sstevel@tonic-gate }
20287c478bd9Sstevel@tonic-gate
20297c478bd9Sstevel@tonic-gate int
vattr_to_sattr3(struct vattr * vap,sattr3 * sa)20307c478bd9Sstevel@tonic-gate vattr_to_sattr3(struct vattr *vap, sattr3 *sa)
20317c478bd9Sstevel@tonic-gate {
20327c478bd9Sstevel@tonic-gate uint_t mask = vap->va_mask;
20337c478bd9Sstevel@tonic-gate
20347c478bd9Sstevel@tonic-gate if (!(mask & AT_MODE))
20357c478bd9Sstevel@tonic-gate sa->mode.set_it = FALSE;
20367c478bd9Sstevel@tonic-gate else {
20377c478bd9Sstevel@tonic-gate sa->mode.set_it = TRUE;
20387c478bd9Sstevel@tonic-gate sa->mode.mode = (mode3)vap->va_mode;
20397c478bd9Sstevel@tonic-gate }
20407c478bd9Sstevel@tonic-gate if (!(mask & AT_UID))
20417c478bd9Sstevel@tonic-gate sa->uid.set_it = FALSE;
20427c478bd9Sstevel@tonic-gate else {
20437c478bd9Sstevel@tonic-gate sa->uid.set_it = TRUE;
20447c478bd9Sstevel@tonic-gate sa->uid.uid = (uid3)vap->va_uid;
20457c478bd9Sstevel@tonic-gate }
20467c478bd9Sstevel@tonic-gate if (!(mask & AT_GID))
20477c478bd9Sstevel@tonic-gate sa->gid.set_it = FALSE;
20487c478bd9Sstevel@tonic-gate else {
20497c478bd9Sstevel@tonic-gate sa->gid.set_it = TRUE;
20507c478bd9Sstevel@tonic-gate sa->gid.gid = (gid3)vap->va_gid;
20517c478bd9Sstevel@tonic-gate }
20527c478bd9Sstevel@tonic-gate if (!(mask & AT_SIZE))
20537c478bd9Sstevel@tonic-gate sa->size.set_it = FALSE;
20547c478bd9Sstevel@tonic-gate else {
20557c478bd9Sstevel@tonic-gate sa->size.set_it = TRUE;
20567c478bd9Sstevel@tonic-gate sa->size.size = (size3)vap->va_size;
20577c478bd9Sstevel@tonic-gate }
20587c478bd9Sstevel@tonic-gate if (!(mask & AT_ATIME))
20597c478bd9Sstevel@tonic-gate sa->atime.set_it = DONT_CHANGE;
20607c478bd9Sstevel@tonic-gate else {
20617c478bd9Sstevel@tonic-gate /* check time validity */
20627c478bd9Sstevel@tonic-gate if (! NFS_TIME_T_OK(vap->va_atime.tv_sec)) {
20637c478bd9Sstevel@tonic-gate return (EOVERFLOW);
20647c478bd9Sstevel@tonic-gate }
20657c478bd9Sstevel@tonic-gate sa->atime.set_it = SET_TO_CLIENT_TIME;
20667c478bd9Sstevel@tonic-gate sa->atime.atime.seconds = (uint32)vap->va_atime.tv_sec;
20677c478bd9Sstevel@tonic-gate sa->atime.atime.nseconds = (uint32)vap->va_atime.tv_nsec;
20687c478bd9Sstevel@tonic-gate }
20697c478bd9Sstevel@tonic-gate if (!(mask & AT_MTIME))
20707c478bd9Sstevel@tonic-gate sa->mtime.set_it = DONT_CHANGE;
20717c478bd9Sstevel@tonic-gate else {
20727c478bd9Sstevel@tonic-gate /* check time validity */
20737c478bd9Sstevel@tonic-gate if (! NFS_TIME_T_OK(vap->va_mtime.tv_sec)) {
20747c478bd9Sstevel@tonic-gate return (EOVERFLOW);
20757c478bd9Sstevel@tonic-gate }
20767c478bd9Sstevel@tonic-gate sa->mtime.set_it = SET_TO_CLIENT_TIME;
20777c478bd9Sstevel@tonic-gate sa->mtime.mtime.seconds = (uint32)vap->va_mtime.tv_sec;
20787c478bd9Sstevel@tonic-gate sa->mtime.mtime.nseconds = (uint32)vap->va_mtime.tv_nsec;
20797c478bd9Sstevel@tonic-gate }
20807c478bd9Sstevel@tonic-gate return (0);
20817c478bd9Sstevel@tonic-gate }
20827c478bd9Sstevel@tonic-gate
20837c478bd9Sstevel@tonic-gate void
setdiropargs(struct nfsdiropargs * da,char * nm,vnode_t * dvp)20847c478bd9Sstevel@tonic-gate setdiropargs(struct nfsdiropargs *da, char *nm, vnode_t *dvp)
20857c478bd9Sstevel@tonic-gate {
20867c478bd9Sstevel@tonic-gate
20877c478bd9Sstevel@tonic-gate da->da_fhandle = VTOFH(dvp);
20887c478bd9Sstevel@tonic-gate da->da_name = nm;
20897c478bd9Sstevel@tonic-gate da->da_flags = 0;
20907c478bd9Sstevel@tonic-gate }
20917c478bd9Sstevel@tonic-gate
20927c478bd9Sstevel@tonic-gate void
setdiropargs3(diropargs3 * da,char * nm,vnode_t * dvp)20937c478bd9Sstevel@tonic-gate setdiropargs3(diropargs3 *da, char *nm, vnode_t *dvp)
20947c478bd9Sstevel@tonic-gate {
20957c478bd9Sstevel@tonic-gate
20967c478bd9Sstevel@tonic-gate da->dirp = VTOFH3(dvp);
20977c478bd9Sstevel@tonic-gate da->name = nm;
20987c478bd9Sstevel@tonic-gate }
20997c478bd9Sstevel@tonic-gate
21007c478bd9Sstevel@tonic-gate int
setdirgid(vnode_t * dvp,gid_t * gidp,cred_t * cr)21017c478bd9Sstevel@tonic-gate setdirgid(vnode_t *dvp, gid_t *gidp, cred_t *cr)
21027c478bd9Sstevel@tonic-gate {
21037c478bd9Sstevel@tonic-gate int error;
21047c478bd9Sstevel@tonic-gate rnode_t *rp;
21057c478bd9Sstevel@tonic-gate struct vattr va;
21067c478bd9Sstevel@tonic-gate
21077c478bd9Sstevel@tonic-gate va.va_mask = AT_MODE | AT_GID;
2108da6c28aaSamw error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
21097c478bd9Sstevel@tonic-gate if (error)
21107c478bd9Sstevel@tonic-gate return (error);
21117c478bd9Sstevel@tonic-gate
21127c478bd9Sstevel@tonic-gate /*
21137c478bd9Sstevel@tonic-gate * To determine the expected group-id of the created file:
21147c478bd9Sstevel@tonic-gate * 1) If the filesystem was not mounted with the Old-BSD-compatible
21157c478bd9Sstevel@tonic-gate * GRPID option, and the directory's set-gid bit is clear,
21167c478bd9Sstevel@tonic-gate * then use the process's gid.
21177c478bd9Sstevel@tonic-gate * 2) Otherwise, set the group-id to the gid of the parent directory.
21187c478bd9Sstevel@tonic-gate */
21197c478bd9Sstevel@tonic-gate rp = VTOR(dvp);
21207c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
21217c478bd9Sstevel@tonic-gate if (!(VTOMI(dvp)->mi_flags & MI_GRPID) && !(va.va_mode & VSGID))
21227c478bd9Sstevel@tonic-gate *gidp = crgetgid(cr);
21237c478bd9Sstevel@tonic-gate else
21247c478bd9Sstevel@tonic-gate *gidp = va.va_gid;
21257c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
21267c478bd9Sstevel@tonic-gate return (0);
21277c478bd9Sstevel@tonic-gate }
21287c478bd9Sstevel@tonic-gate
21297c478bd9Sstevel@tonic-gate int
setdirmode(vnode_t * dvp,mode_t * omp,cred_t * cr)21307c478bd9Sstevel@tonic-gate setdirmode(vnode_t *dvp, mode_t *omp, cred_t *cr)
21317c478bd9Sstevel@tonic-gate {
21327c478bd9Sstevel@tonic-gate int error;
21337c478bd9Sstevel@tonic-gate struct vattr va;
21347c478bd9Sstevel@tonic-gate
21357c478bd9Sstevel@tonic-gate va.va_mask = AT_MODE;
2136da6c28aaSamw error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
21377c478bd9Sstevel@tonic-gate if (error)
21387c478bd9Sstevel@tonic-gate return (error);
21397c478bd9Sstevel@tonic-gate
21407c478bd9Sstevel@tonic-gate /*
21417c478bd9Sstevel@tonic-gate * Modify the expected mode (om) so that the set-gid bit matches
21427c478bd9Sstevel@tonic-gate * that of the parent directory (dvp).
21437c478bd9Sstevel@tonic-gate */
21447c478bd9Sstevel@tonic-gate if (va.va_mode & VSGID)
21457c478bd9Sstevel@tonic-gate *omp |= VSGID;
21467c478bd9Sstevel@tonic-gate else
21477c478bd9Sstevel@tonic-gate *omp &= ~VSGID;
21487c478bd9Sstevel@tonic-gate return (0);
21497c478bd9Sstevel@tonic-gate }
21507c478bd9Sstevel@tonic-gate
21517c478bd9Sstevel@tonic-gate void
nfs_setswaplike(vnode_t * vp,vattr_t * vap)21527c478bd9Sstevel@tonic-gate nfs_setswaplike(vnode_t *vp, vattr_t *vap)
21537c478bd9Sstevel@tonic-gate {
21547c478bd9Sstevel@tonic-gate
21557c478bd9Sstevel@tonic-gate if (vp->v_type == VREG && (vap->va_mode & (VEXEC | VSVTX)) == VSVTX) {
21567c478bd9Sstevel@tonic-gate if (!(vp->v_flag & VSWAPLIKE)) {
21577c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock);
21587c478bd9Sstevel@tonic-gate vp->v_flag |= VSWAPLIKE;
21597c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock);
21607c478bd9Sstevel@tonic-gate }
21617c478bd9Sstevel@tonic-gate } else {
21627c478bd9Sstevel@tonic-gate if (vp->v_flag & VSWAPLIKE) {
21637c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock);
21647c478bd9Sstevel@tonic-gate vp->v_flag &= ~VSWAPLIKE;
21657c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock);
21667c478bd9Sstevel@tonic-gate }
21677c478bd9Sstevel@tonic-gate }
21687c478bd9Sstevel@tonic-gate }
21697c478bd9Sstevel@tonic-gate
21707c478bd9Sstevel@tonic-gate /*
21717c478bd9Sstevel@tonic-gate * Free the resources associated with an rnode.
21727c478bd9Sstevel@tonic-gate */
21737c478bd9Sstevel@tonic-gate static void
rinactive(rnode_t * rp,cred_t * cr)21747c478bd9Sstevel@tonic-gate rinactive(rnode_t *rp, cred_t *cr)
21757c478bd9Sstevel@tonic-gate {
21767c478bd9Sstevel@tonic-gate vnode_t *vp;
21777c478bd9Sstevel@tonic-gate cred_t *cred;
21787c478bd9Sstevel@tonic-gate char *contents;
21797c478bd9Sstevel@tonic-gate int size;
21807c478bd9Sstevel@tonic-gate vsecattr_t *vsp;
21817c478bd9Sstevel@tonic-gate int error;
21827c478bd9Sstevel@tonic-gate nfs3_pathconf_info *info;
21837c478bd9Sstevel@tonic-gate
21847c478bd9Sstevel@tonic-gate /*
21857c478bd9Sstevel@tonic-gate * Before freeing anything, wait until all asynchronous
21867c478bd9Sstevel@tonic-gate * activity is done on this rnode. This will allow all
21877c478bd9Sstevel@tonic-gate * asynchronous read ahead and write behind i/o's to
21887c478bd9Sstevel@tonic-gate * finish.
21897c478bd9Sstevel@tonic-gate */
21907c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
21917c478bd9Sstevel@tonic-gate while (rp->r_count > 0)
21927c478bd9Sstevel@tonic-gate cv_wait(&rp->r_cv, &rp->r_statelock);
21937c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
21947c478bd9Sstevel@tonic-gate
21957c478bd9Sstevel@tonic-gate /*
21967c478bd9Sstevel@tonic-gate * Flush and invalidate all pages associated with the vnode.
21977c478bd9Sstevel@tonic-gate */
21987c478bd9Sstevel@tonic-gate vp = RTOV(rp);
21997c478bd9Sstevel@tonic-gate if (vn_has_cached_data(vp)) {
22007c478bd9Sstevel@tonic-gate ASSERT(vp->v_type != VCHR);
22017c478bd9Sstevel@tonic-gate if ((rp->r_flags & RDIRTY) && !rp->r_error) {
2202da6c28aaSamw error = VOP_PUTPAGE(vp, (u_offset_t)0, 0, 0, cr, NULL);
22037c478bd9Sstevel@tonic-gate if (error && (error == ENOSPC || error == EDQUOT)) {
22047c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
22057c478bd9Sstevel@tonic-gate if (!rp->r_error)
22067c478bd9Sstevel@tonic-gate rp->r_error = error;
22077c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
22087c478bd9Sstevel@tonic-gate }
22097c478bd9Sstevel@tonic-gate }
22107c478bd9Sstevel@tonic-gate nfs_invalidate_pages(vp, (u_offset_t)0, cr);
22117c478bd9Sstevel@tonic-gate }
22127c478bd9Sstevel@tonic-gate
22137c478bd9Sstevel@tonic-gate /*
22147c478bd9Sstevel@tonic-gate * Free any held credentials and caches which may be associated
22157c478bd9Sstevel@tonic-gate * with this rnode.
22167c478bd9Sstevel@tonic-gate */
22177c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
22187c478bd9Sstevel@tonic-gate cred = rp->r_cred;
22197c478bd9Sstevel@tonic-gate rp->r_cred = NULL;
22207c478bd9Sstevel@tonic-gate contents = rp->r_symlink.contents;
22217c478bd9Sstevel@tonic-gate size = rp->r_symlink.size;
22227c478bd9Sstevel@tonic-gate rp->r_symlink.contents = NULL;
22237c478bd9Sstevel@tonic-gate vsp = rp->r_secattr;
22247c478bd9Sstevel@tonic-gate rp->r_secattr = NULL;
22257c478bd9Sstevel@tonic-gate info = rp->r_pathconf;
22267c478bd9Sstevel@tonic-gate rp->r_pathconf = NULL;
22277c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
22287c478bd9Sstevel@tonic-gate
22297c478bd9Sstevel@tonic-gate /*
22307c478bd9Sstevel@tonic-gate * Free the held credential.
22317c478bd9Sstevel@tonic-gate */
22327c478bd9Sstevel@tonic-gate if (cred != NULL)
22337c478bd9Sstevel@tonic-gate crfree(cred);
22347c478bd9Sstevel@tonic-gate
22357c478bd9Sstevel@tonic-gate /*
22367c478bd9Sstevel@tonic-gate * Free the access cache entries.
22377c478bd9Sstevel@tonic-gate */
22387c478bd9Sstevel@tonic-gate (void) nfs_access_purge_rp(rp);
22397c478bd9Sstevel@tonic-gate
22407c478bd9Sstevel@tonic-gate /*
22417c478bd9Sstevel@tonic-gate * Free the readdir cache entries.
22427c478bd9Sstevel@tonic-gate */
22437c478bd9Sstevel@tonic-gate if (HAVE_RDDIR_CACHE(rp))
22447c478bd9Sstevel@tonic-gate nfs_purge_rddir_cache(vp);
22457c478bd9Sstevel@tonic-gate
22467c478bd9Sstevel@tonic-gate /*
22477c478bd9Sstevel@tonic-gate * Free the symbolic link cache.
22487c478bd9Sstevel@tonic-gate */
22497c478bd9Sstevel@tonic-gate if (contents != NULL) {
22507c478bd9Sstevel@tonic-gate
22517c478bd9Sstevel@tonic-gate kmem_free((void *)contents, size);
22527c478bd9Sstevel@tonic-gate }
22537c478bd9Sstevel@tonic-gate
22547c478bd9Sstevel@tonic-gate /*
22557c478bd9Sstevel@tonic-gate * Free any cached ACL.
22567c478bd9Sstevel@tonic-gate */
22577c478bd9Sstevel@tonic-gate if (vsp != NULL)
22587c478bd9Sstevel@tonic-gate nfs_acl_free(vsp);
22597c478bd9Sstevel@tonic-gate
22607c478bd9Sstevel@tonic-gate /*
22617c478bd9Sstevel@tonic-gate * Free any cached pathconf information.
22627c478bd9Sstevel@tonic-gate */
22637c478bd9Sstevel@tonic-gate if (info != NULL)
22647c478bd9Sstevel@tonic-gate kmem_free(info, sizeof (*info));
22657c478bd9Sstevel@tonic-gate }
22667c478bd9Sstevel@tonic-gate
22677c478bd9Sstevel@tonic-gate /*
22687c478bd9Sstevel@tonic-gate * Return a vnode for the given NFS Version 2 file handle.
22697c478bd9Sstevel@tonic-gate * If no rnode exists for this fhandle, create one and put it
22707c478bd9Sstevel@tonic-gate * into the hash queues. If the rnode for this fhandle
22717c478bd9Sstevel@tonic-gate * already exists, return it.
22727c478bd9Sstevel@tonic-gate *
22737c478bd9Sstevel@tonic-gate * Note: make_rnode() may upgrade the hash bucket lock to exclusive.
22747c478bd9Sstevel@tonic-gate */
22757c478bd9Sstevel@tonic-gate vnode_t *
makenfsnode(fhandle_t * fh,struct nfsfattr * attr,struct vfs * vfsp,hrtime_t t,cred_t * cr,char * dnm,char * nm)22767c478bd9Sstevel@tonic-gate makenfsnode(fhandle_t *fh, struct nfsfattr *attr, struct vfs *vfsp,
22777c478bd9Sstevel@tonic-gate hrtime_t t, cred_t *cr, char *dnm, char *nm)
22787c478bd9Sstevel@tonic-gate {
22797c478bd9Sstevel@tonic-gate int newnode;
22807c478bd9Sstevel@tonic-gate int index;
22817c478bd9Sstevel@tonic-gate vnode_t *vp;
22827c478bd9Sstevel@tonic-gate nfs_fhandle nfh;
22837c478bd9Sstevel@tonic-gate vattr_t va;
22847c478bd9Sstevel@tonic-gate
22857c478bd9Sstevel@tonic-gate nfh.fh_len = NFS_FHSIZE;
22867c478bd9Sstevel@tonic-gate bcopy(fh, nfh.fh_buf, NFS_FHSIZE);
22877c478bd9Sstevel@tonic-gate
22887c478bd9Sstevel@tonic-gate index = rtablehash(&nfh);
22897c478bd9Sstevel@tonic-gate rw_enter(&rtable[index].r_lock, RW_READER);
22907c478bd9Sstevel@tonic-gate
22917c478bd9Sstevel@tonic-gate vp = make_rnode(&nfh, &rtable[index], vfsp, nfs_vnodeops,
22927c478bd9Sstevel@tonic-gate nfs_putapage, nfs_rddir_compar, &newnode, cr, dnm, nm);
22937c478bd9Sstevel@tonic-gate
22947c478bd9Sstevel@tonic-gate if (attr != NULL) {
22957c478bd9Sstevel@tonic-gate if (!newnode) {
22967c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock);
22977c478bd9Sstevel@tonic-gate (void) nfs_cache_fattr(vp, attr, &va, t, cr);
22987c478bd9Sstevel@tonic-gate } else {
22997c478bd9Sstevel@tonic-gate if (attr->na_type < NFNON || attr->na_type > NFSOC)
23007c478bd9Sstevel@tonic-gate vp->v_type = VBAD;
23017c478bd9Sstevel@tonic-gate else
23027c478bd9Sstevel@tonic-gate vp->v_type = n2v_type(attr);
23037c478bd9Sstevel@tonic-gate /*
23047c478bd9Sstevel@tonic-gate * A translation here seems to be necessary
23057c478bd9Sstevel@tonic-gate * because this function can be called
23067c478bd9Sstevel@tonic-gate * with `attr' that has come from the wire,
23077c478bd9Sstevel@tonic-gate * and been operated on by vattr_to_nattr().
23087c478bd9Sstevel@tonic-gate * See nfsrootvp()->VOP_GETTATTR()->nfsgetattr()
23097c478bd9Sstevel@tonic-gate * ->nfs_getattr_otw()->rfscall()->vattr_to_nattr()
23107c478bd9Sstevel@tonic-gate * ->makenfsnode().
23117c478bd9Sstevel@tonic-gate */
23127c478bd9Sstevel@tonic-gate if ((attr->na_rdev & 0xffff0000) == 0)
23137c478bd9Sstevel@tonic-gate vp->v_rdev = nfsv2_expdev(attr->na_rdev);
23147c478bd9Sstevel@tonic-gate else
23157c478bd9Sstevel@tonic-gate vp->v_rdev = expldev(n2v_rdev(attr));
23167c478bd9Sstevel@tonic-gate nfs_attrcache(vp, attr, t);
23177c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock);
23187c478bd9Sstevel@tonic-gate }
23197c478bd9Sstevel@tonic-gate } else {
23207c478bd9Sstevel@tonic-gate if (newnode) {
23217c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE(vp);
23227c478bd9Sstevel@tonic-gate }
23237c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock);
23247c478bd9Sstevel@tonic-gate }
23257c478bd9Sstevel@tonic-gate
23267c478bd9Sstevel@tonic-gate return (vp);
23277c478bd9Sstevel@tonic-gate }
23287c478bd9Sstevel@tonic-gate
23297c478bd9Sstevel@tonic-gate /*
23307c478bd9Sstevel@tonic-gate * Return a vnode for the given NFS Version 3 file handle.
23317c478bd9Sstevel@tonic-gate * If no rnode exists for this fhandle, create one and put it
23327c478bd9Sstevel@tonic-gate * into the hash queues. If the rnode for this fhandle
23337c478bd9Sstevel@tonic-gate * already exists, return it.
23347c478bd9Sstevel@tonic-gate *
23357c478bd9Sstevel@tonic-gate * Note: make_rnode() may upgrade the hash bucket lock to exclusive.
23367c478bd9Sstevel@tonic-gate */
23377c478bd9Sstevel@tonic-gate vnode_t *
makenfs3node_va(nfs_fh3 * fh,vattr_t * vap,struct vfs * vfsp,hrtime_t t,cred_t * cr,char * dnm,char * nm)23387c478bd9Sstevel@tonic-gate makenfs3node_va(nfs_fh3 *fh, vattr_t *vap, struct vfs *vfsp, hrtime_t t,
23397c478bd9Sstevel@tonic-gate cred_t *cr, char *dnm, char *nm)
23407c478bd9Sstevel@tonic-gate {
23417c478bd9Sstevel@tonic-gate int newnode;
23427c478bd9Sstevel@tonic-gate int index;
23437c478bd9Sstevel@tonic-gate vnode_t *vp;
23447c478bd9Sstevel@tonic-gate
23457c478bd9Sstevel@tonic-gate index = rtablehash((nfs_fhandle *)fh);
23467c478bd9Sstevel@tonic-gate rw_enter(&rtable[index].r_lock, RW_READER);
23477c478bd9Sstevel@tonic-gate
23487c478bd9Sstevel@tonic-gate vp = make_rnode((nfs_fhandle *)fh, &rtable[index], vfsp,
23497c478bd9Sstevel@tonic-gate nfs3_vnodeops, nfs3_putapage, nfs3_rddir_compar, &newnode, cr,
23507c478bd9Sstevel@tonic-gate dnm, nm);
23517c478bd9Sstevel@tonic-gate
23527c478bd9Sstevel@tonic-gate if (vap == NULL) {
23537c478bd9Sstevel@tonic-gate if (newnode) {
23547c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE(vp);
23557c478bd9Sstevel@tonic-gate }
23567c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock);
23577c478bd9Sstevel@tonic-gate return (vp);
23587c478bd9Sstevel@tonic-gate }
23597c478bd9Sstevel@tonic-gate
23607c478bd9Sstevel@tonic-gate if (!newnode) {
23617c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock);
23627c478bd9Sstevel@tonic-gate nfs_attr_cache(vp, vap, t, cr);
23637c478bd9Sstevel@tonic-gate } else {
23647c478bd9Sstevel@tonic-gate rnode_t *rp = VTOR(vp);
23657c478bd9Sstevel@tonic-gate
23667c478bd9Sstevel@tonic-gate vp->v_type = vap->va_type;
23677c478bd9Sstevel@tonic-gate vp->v_rdev = vap->va_rdev;
23687c478bd9Sstevel@tonic-gate
23697c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
23707c478bd9Sstevel@tonic-gate if (rp->r_mtime <= t)
23717c478bd9Sstevel@tonic-gate nfs_attrcache_va(vp, vap);
23727c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
23737c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock);
23747c478bd9Sstevel@tonic-gate }
23757c478bd9Sstevel@tonic-gate
23767c478bd9Sstevel@tonic-gate return (vp);
23777c478bd9Sstevel@tonic-gate }
23787c478bd9Sstevel@tonic-gate
23797c478bd9Sstevel@tonic-gate vnode_t *
makenfs3node(nfs_fh3 * fh,fattr3 * attr,struct vfs * vfsp,hrtime_t t,cred_t * cr,char * dnm,char * nm)23807c478bd9Sstevel@tonic-gate makenfs3node(nfs_fh3 *fh, fattr3 *attr, struct vfs *vfsp, hrtime_t t,
23817c478bd9Sstevel@tonic-gate cred_t *cr, char *dnm, char *nm)
23827c478bd9Sstevel@tonic-gate {
23837c478bd9Sstevel@tonic-gate int newnode;
23847c478bd9Sstevel@tonic-gate int index;
23857c478bd9Sstevel@tonic-gate vnode_t *vp;
23867c478bd9Sstevel@tonic-gate vattr_t va;
23877c478bd9Sstevel@tonic-gate
23887c478bd9Sstevel@tonic-gate index = rtablehash((nfs_fhandle *)fh);
23897c478bd9Sstevel@tonic-gate rw_enter(&rtable[index].r_lock, RW_READER);
23907c478bd9Sstevel@tonic-gate
23917c478bd9Sstevel@tonic-gate vp = make_rnode((nfs_fhandle *)fh, &rtable[index], vfsp,
23927c478bd9Sstevel@tonic-gate nfs3_vnodeops, nfs3_putapage, nfs3_rddir_compar, &newnode, cr,
23937c478bd9Sstevel@tonic-gate dnm, nm);
23947c478bd9Sstevel@tonic-gate
23957c478bd9Sstevel@tonic-gate if (attr == NULL) {
23967c478bd9Sstevel@tonic-gate if (newnode) {
23977c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE(vp);
23987c478bd9Sstevel@tonic-gate }
23997c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock);
24007c478bd9Sstevel@tonic-gate return (vp);
24017c478bd9Sstevel@tonic-gate }
24027c478bd9Sstevel@tonic-gate
24037c478bd9Sstevel@tonic-gate if (!newnode) {
24047c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock);
24057c478bd9Sstevel@tonic-gate (void) nfs3_cache_fattr3(vp, attr, &va, t, cr);
24067c478bd9Sstevel@tonic-gate } else {
24077c478bd9Sstevel@tonic-gate if (attr->type < NF3REG || attr->type > NF3FIFO)
24087c478bd9Sstevel@tonic-gate vp->v_type = VBAD;
24097c478bd9Sstevel@tonic-gate else
24107c478bd9Sstevel@tonic-gate vp->v_type = nf3_to_vt[attr->type];
24117c478bd9Sstevel@tonic-gate vp->v_rdev = makedevice(attr->rdev.specdata1,
24127c478bd9Sstevel@tonic-gate attr->rdev.specdata2);
24137c478bd9Sstevel@tonic-gate nfs3_attrcache(vp, attr, t);
24147c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock);
24157c478bd9Sstevel@tonic-gate }
24167c478bd9Sstevel@tonic-gate
24177c478bd9Sstevel@tonic-gate return (vp);
24187c478bd9Sstevel@tonic-gate }
24197c478bd9Sstevel@tonic-gate
24207c478bd9Sstevel@tonic-gate /*
24217c478bd9Sstevel@tonic-gate * Read this comment before making changes to rtablehash()!
24227c478bd9Sstevel@tonic-gate * This is a hash function in which seemingly obvious and harmless
24237c478bd9Sstevel@tonic-gate * changes can cause escalations costing million dollars!
24247c478bd9Sstevel@tonic-gate * Know what you are doing.
24257c478bd9Sstevel@tonic-gate *
24267c478bd9Sstevel@tonic-gate * rtablehash() implements Jenkins' one-at-a-time hash algorithm. The
24277c478bd9Sstevel@tonic-gate * algorithm is currently detailed here:
24287c478bd9Sstevel@tonic-gate *
24297c478bd9Sstevel@tonic-gate * http://burtleburtle.net/bob/hash/doobs.html
24307c478bd9Sstevel@tonic-gate *
24317c478bd9Sstevel@tonic-gate * Of course, the above link may not be valid by the time you are reading
24327c478bd9Sstevel@tonic-gate * this, but suffice it to say that the one-at-a-time algorithm works well in
24337c478bd9Sstevel@tonic-gate * almost all cases. If you are changing the algorithm be sure to verify that
24347c478bd9Sstevel@tonic-gate * the hash algorithm still provides even distribution in all cases and with
24357c478bd9Sstevel@tonic-gate * any server returning filehandles in whatever order (sequential or random).
24367c478bd9Sstevel@tonic-gate */
24377c478bd9Sstevel@tonic-gate static int
rtablehash(nfs_fhandle * fh)24387c478bd9Sstevel@tonic-gate rtablehash(nfs_fhandle *fh)
24397c478bd9Sstevel@tonic-gate {
24407c478bd9Sstevel@tonic-gate ulong_t hash, len, i;
24417c478bd9Sstevel@tonic-gate char *key;
24427c478bd9Sstevel@tonic-gate
24437c478bd9Sstevel@tonic-gate key = fh->fh_buf;
24447c478bd9Sstevel@tonic-gate len = (ulong_t)fh->fh_len;
24457c478bd9Sstevel@tonic-gate for (hash = 0, i = 0; i < len; i++) {
24467c478bd9Sstevel@tonic-gate hash += key[i];
24477c478bd9Sstevel@tonic-gate hash += (hash << 10);
24487c478bd9Sstevel@tonic-gate hash ^= (hash >> 6);
24497c478bd9Sstevel@tonic-gate }
24507c478bd9Sstevel@tonic-gate hash += (hash << 3);
24517c478bd9Sstevel@tonic-gate hash ^= (hash >> 11);
24527c478bd9Sstevel@tonic-gate hash += (hash << 15);
24537c478bd9Sstevel@tonic-gate return (hash & rtablemask);
24547c478bd9Sstevel@tonic-gate }
24557c478bd9Sstevel@tonic-gate
24567c478bd9Sstevel@tonic-gate static vnode_t *
make_rnode(nfs_fhandle * fh,rhashq_t * rhtp,struct vfs * vfsp,struct vnodeops * vops,int (* putapage)(vnode_t *,page_t *,u_offset_t *,size_t *,int,cred_t *),int (* compar)(const void *,const void *),int * newnode,cred_t * cr,char * dnm,char * nm)24577c478bd9Sstevel@tonic-gate make_rnode(nfs_fhandle *fh, rhashq_t *rhtp, struct vfs *vfsp,
24587c478bd9Sstevel@tonic-gate struct vnodeops *vops,
24597c478bd9Sstevel@tonic-gate int (*putapage)(vnode_t *, page_t *, u_offset_t *, size_t *, int, cred_t *),
24607c478bd9Sstevel@tonic-gate int (*compar)(const void *, const void *),
24617c478bd9Sstevel@tonic-gate int *newnode, cred_t *cr, char *dnm, char *nm)
24627c478bd9Sstevel@tonic-gate {
24637c478bd9Sstevel@tonic-gate rnode_t *rp;
24647c478bd9Sstevel@tonic-gate rnode_t *trp;
24657c478bd9Sstevel@tonic-gate vnode_t *vp;
24667c478bd9Sstevel@tonic-gate mntinfo_t *mi;
24677c478bd9Sstevel@tonic-gate
24687c478bd9Sstevel@tonic-gate ASSERT(RW_READ_HELD(&rhtp->r_lock));
24697c478bd9Sstevel@tonic-gate
24707c478bd9Sstevel@tonic-gate mi = VFTOMI(vfsp);
24717c478bd9Sstevel@tonic-gate start:
24727c478bd9Sstevel@tonic-gate if ((rp = rfind(rhtp, fh, vfsp)) != NULL) {
24737c478bd9Sstevel@tonic-gate vp = RTOV(rp);
24747c478bd9Sstevel@tonic-gate nfs_set_vroot(vp);
24757c478bd9Sstevel@tonic-gate *newnode = 0;
24767c478bd9Sstevel@tonic-gate return (vp);
24777c478bd9Sstevel@tonic-gate }
24787c478bd9Sstevel@tonic-gate rw_exit(&rhtp->r_lock);
24797c478bd9Sstevel@tonic-gate
24807c478bd9Sstevel@tonic-gate mutex_enter(&rpfreelist_lock);
24817c478bd9Sstevel@tonic-gate if (rpfreelist != NULL && rnew >= nrnode) {
24827c478bd9Sstevel@tonic-gate rp = rpfreelist;
24837c478bd9Sstevel@tonic-gate rp_rmfree(rp);
24847c478bd9Sstevel@tonic-gate mutex_exit(&rpfreelist_lock);
24857c478bd9Sstevel@tonic-gate
24867c478bd9Sstevel@tonic-gate vp = RTOV(rp);
24877c478bd9Sstevel@tonic-gate
24887c478bd9Sstevel@tonic-gate if (rp->r_flags & RHASHED) {
24897c478bd9Sstevel@tonic-gate rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
24907c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock);
24917c478bd9Sstevel@tonic-gate if (vp->v_count > 1) {
24927c478bd9Sstevel@tonic-gate vp->v_count--;
24937c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock);
24947c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock);
24957c478bd9Sstevel@tonic-gate rw_enter(&rhtp->r_lock, RW_READER);
24967c478bd9Sstevel@tonic-gate goto start;
24977c478bd9Sstevel@tonic-gate }
24987c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock);
24997c478bd9Sstevel@tonic-gate rp_rmhash_locked(rp);
25007c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock);
25017c478bd9Sstevel@tonic-gate }
25027c478bd9Sstevel@tonic-gate
25037c478bd9Sstevel@tonic-gate rinactive(rp, cr);
25047c478bd9Sstevel@tonic-gate
25057c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock);
25067c478bd9Sstevel@tonic-gate if (vp->v_count > 1) {
25077c478bd9Sstevel@tonic-gate vp->v_count--;
25087c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock);
25097c478bd9Sstevel@tonic-gate rw_enter(&rhtp->r_lock, RW_READER);
25107c478bd9Sstevel@tonic-gate goto start;
25117c478bd9Sstevel@tonic-gate }
25127c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock);
25137c478bd9Sstevel@tonic-gate vn_invalid(vp);
25147c478bd9Sstevel@tonic-gate /*
25157c478bd9Sstevel@tonic-gate * destroy old locks before bzero'ing and
25167c478bd9Sstevel@tonic-gate * recreating the locks below.
25177c478bd9Sstevel@tonic-gate */
25187c478bd9Sstevel@tonic-gate nfs_rw_destroy(&rp->r_rwlock);
25197c478bd9Sstevel@tonic-gate nfs_rw_destroy(&rp->r_lkserlock);
25207c478bd9Sstevel@tonic-gate mutex_destroy(&rp->r_statelock);
25217c478bd9Sstevel@tonic-gate cv_destroy(&rp->r_cv);
25227c478bd9Sstevel@tonic-gate cv_destroy(&rp->r_commit.c_cv);
25237c478bd9Sstevel@tonic-gate nfs_free_r_path(rp);
25247c478bd9Sstevel@tonic-gate avl_destroy(&rp->r_dir);
25257c478bd9Sstevel@tonic-gate /*
25267c478bd9Sstevel@tonic-gate * Make sure that if rnode is recycled then
25277c478bd9Sstevel@tonic-gate * VFS count is decremented properly before
25287c478bd9Sstevel@tonic-gate * reuse.
25297c478bd9Sstevel@tonic-gate */
25307c478bd9Sstevel@tonic-gate VFS_RELE(vp->v_vfsp);
25317c478bd9Sstevel@tonic-gate vn_reinit(vp);
25327c478bd9Sstevel@tonic-gate } else {
25337c478bd9Sstevel@tonic-gate vnode_t *new_vp;
25347c478bd9Sstevel@tonic-gate
25357c478bd9Sstevel@tonic-gate mutex_exit(&rpfreelist_lock);
25367c478bd9Sstevel@tonic-gate
25377c478bd9Sstevel@tonic-gate rp = kmem_cache_alloc(rnode_cache, KM_SLEEP);
25387c478bd9Sstevel@tonic-gate new_vp = vn_alloc(KM_SLEEP);
25397c478bd9Sstevel@tonic-gate
25401a5e258fSJosef 'Jeff' Sipek atomic_inc_ulong((ulong_t *)&rnew);
25417c478bd9Sstevel@tonic-gate #ifdef DEBUG
25427c478bd9Sstevel@tonic-gate clstat_debug.nrnode.value.ui64++;
25437c478bd9Sstevel@tonic-gate #endif
25447c478bd9Sstevel@tonic-gate vp = new_vp;
25457c478bd9Sstevel@tonic-gate }
25467c478bd9Sstevel@tonic-gate
25477c478bd9Sstevel@tonic-gate bzero(rp, sizeof (*rp));
25487c478bd9Sstevel@tonic-gate rp->r_vnode = vp;
25497c478bd9Sstevel@tonic-gate nfs_rw_init(&rp->r_rwlock, NULL, RW_DEFAULT, NULL);
25507c478bd9Sstevel@tonic-gate nfs_rw_init(&rp->r_lkserlock, NULL, RW_DEFAULT, NULL);
25517c478bd9Sstevel@tonic-gate mutex_init(&rp->r_statelock, NULL, MUTEX_DEFAULT, NULL);
25527c478bd9Sstevel@tonic-gate cv_init(&rp->r_cv, NULL, CV_DEFAULT, NULL);
25537c478bd9Sstevel@tonic-gate cv_init(&rp->r_commit.c_cv, NULL, CV_DEFAULT, NULL);
25547c478bd9Sstevel@tonic-gate rp->r_fh.fh_len = fh->fh_len;
25557c478bd9Sstevel@tonic-gate bcopy(fh->fh_buf, rp->r_fh.fh_buf, fh->fh_len);
25567c478bd9Sstevel@tonic-gate rp->r_server = mi->mi_curr_serv;
25577c478bd9Sstevel@tonic-gate if (FAILOVER_MOUNT(mi)) {
25587c478bd9Sstevel@tonic-gate /*
25597c478bd9Sstevel@tonic-gate * If replicated servers, stash pathnames
25607c478bd9Sstevel@tonic-gate */
25617c478bd9Sstevel@tonic-gate if (dnm != NULL && nm != NULL) {
25627c478bd9Sstevel@tonic-gate char *s, *p;
25637c478bd9Sstevel@tonic-gate uint_t len;
25647c478bd9Sstevel@tonic-gate
25657c478bd9Sstevel@tonic-gate len = (uint_t)(strlen(dnm) + strlen(nm) + 2);
25667c478bd9Sstevel@tonic-gate rp->r_path = kmem_alloc(len, KM_SLEEP);
25677c478bd9Sstevel@tonic-gate #ifdef DEBUG
25687c478bd9Sstevel@tonic-gate clstat_debug.rpath.value.ui64 += len;
25697c478bd9Sstevel@tonic-gate #endif
25707c478bd9Sstevel@tonic-gate s = rp->r_path;
25717c478bd9Sstevel@tonic-gate for (p = dnm; *p; p++)
25727c478bd9Sstevel@tonic-gate *s++ = *p;
25737c478bd9Sstevel@tonic-gate *s++ = '/';
25747c478bd9Sstevel@tonic-gate for (p = nm; *p; p++)
25757c478bd9Sstevel@tonic-gate *s++ = *p;
25767c478bd9Sstevel@tonic-gate *s = '\0';
25777c478bd9Sstevel@tonic-gate } else {
25787c478bd9Sstevel@tonic-gate /* special case for root */
25797c478bd9Sstevel@tonic-gate rp->r_path = kmem_alloc(2, KM_SLEEP);
25807c478bd9Sstevel@tonic-gate #ifdef DEBUG
25817c478bd9Sstevel@tonic-gate clstat_debug.rpath.value.ui64 += 2;
25827c478bd9Sstevel@tonic-gate #endif
25837c478bd9Sstevel@tonic-gate *rp->r_path = '.';
25847c478bd9Sstevel@tonic-gate *(rp->r_path + 1) = '\0';
25857c478bd9Sstevel@tonic-gate }
25867c478bd9Sstevel@tonic-gate }
25877c478bd9Sstevel@tonic-gate VFS_HOLD(vfsp);
25887c478bd9Sstevel@tonic-gate rp->r_putapage = putapage;
25897c478bd9Sstevel@tonic-gate rp->r_hashq = rhtp;
25907c478bd9Sstevel@tonic-gate rp->r_flags = RREADDIRPLUS;
25917c478bd9Sstevel@tonic-gate avl_create(&rp->r_dir, compar, sizeof (rddir_cache),
25927c478bd9Sstevel@tonic-gate offsetof(rddir_cache, tree));
25937c478bd9Sstevel@tonic-gate vn_setops(vp, vops);
25947c478bd9Sstevel@tonic-gate vp->v_data = (caddr_t)rp;
25957c478bd9Sstevel@tonic-gate vp->v_vfsp = vfsp;
25967c478bd9Sstevel@tonic-gate vp->v_type = VNON;
2597f8bbc571SPavel Filipensky vp->v_flag |= VMODSORT;
25987c478bd9Sstevel@tonic-gate nfs_set_vroot(vp);
25997c478bd9Sstevel@tonic-gate
26007c478bd9Sstevel@tonic-gate /*
26017c478bd9Sstevel@tonic-gate * There is a race condition if someone else
26027c478bd9Sstevel@tonic-gate * alloc's the rnode while no locks are held, so we
26037c478bd9Sstevel@tonic-gate * check again and recover if found.
26047c478bd9Sstevel@tonic-gate */
26057c478bd9Sstevel@tonic-gate rw_enter(&rhtp->r_lock, RW_WRITER);
26067c478bd9Sstevel@tonic-gate if ((trp = rfind(rhtp, fh, vfsp)) != NULL) {
26077c478bd9Sstevel@tonic-gate vp = RTOV(trp);
26087c478bd9Sstevel@tonic-gate nfs_set_vroot(vp);
26097c478bd9Sstevel@tonic-gate *newnode = 0;
26107c478bd9Sstevel@tonic-gate rw_exit(&rhtp->r_lock);
26117c478bd9Sstevel@tonic-gate rp_addfree(rp, cr);
26127c478bd9Sstevel@tonic-gate rw_enter(&rhtp->r_lock, RW_READER);
26137c478bd9Sstevel@tonic-gate return (vp);
26147c478bd9Sstevel@tonic-gate }
26157c478bd9Sstevel@tonic-gate rp_addhash(rp);
26167c478bd9Sstevel@tonic-gate *newnode = 1;
26177c478bd9Sstevel@tonic-gate return (vp);
26187c478bd9Sstevel@tonic-gate }
26197c478bd9Sstevel@tonic-gate
2620f8bbc571SPavel Filipensky /*
2621f8bbc571SPavel Filipensky * Callback function to check if the page should be marked as
2622f8bbc571SPavel Filipensky * modified. In the positive case, p_fsdata is set to C_NOCOMMIT.
2623f8bbc571SPavel Filipensky */
2624f8bbc571SPavel Filipensky int
nfs_setmod_check(page_t * pp)2625f8bbc571SPavel Filipensky nfs_setmod_check(page_t *pp)
2626f8bbc571SPavel Filipensky {
2627f8bbc571SPavel Filipensky if (pp->p_fsdata != C_NOCOMMIT) {
2628f8bbc571SPavel Filipensky pp->p_fsdata = C_NOCOMMIT;
2629f8bbc571SPavel Filipensky return (1);
2630f8bbc571SPavel Filipensky }
2631f8bbc571SPavel Filipensky return (0);
2632f8bbc571SPavel Filipensky }
2633f8bbc571SPavel Filipensky
26347c478bd9Sstevel@tonic-gate static void
nfs_set_vroot(vnode_t * vp)26357c478bd9Sstevel@tonic-gate nfs_set_vroot(vnode_t *vp)
26367c478bd9Sstevel@tonic-gate {
26377c478bd9Sstevel@tonic-gate rnode_t *rp;
26387c478bd9Sstevel@tonic-gate nfs_fhandle *rootfh;
26397c478bd9Sstevel@tonic-gate
26407c478bd9Sstevel@tonic-gate rp = VTOR(vp);
26417c478bd9Sstevel@tonic-gate rootfh = &rp->r_server->sv_fhandle;
26427c478bd9Sstevel@tonic-gate if (rootfh->fh_len == rp->r_fh.fh_len &&
26437c478bd9Sstevel@tonic-gate bcmp(rootfh->fh_buf, rp->r_fh.fh_buf, rp->r_fh.fh_len) == 0) {
26447c478bd9Sstevel@tonic-gate if (!(vp->v_flag & VROOT)) {
26457c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock);
26467c478bd9Sstevel@tonic-gate vp->v_flag |= VROOT;
26477c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock);
26487c478bd9Sstevel@tonic-gate }
26497c478bd9Sstevel@tonic-gate }
26507c478bd9Sstevel@tonic-gate }
26517c478bd9Sstevel@tonic-gate
26527c478bd9Sstevel@tonic-gate static void
nfs_free_r_path(rnode_t * rp)26537c478bd9Sstevel@tonic-gate nfs_free_r_path(rnode_t *rp)
26547c478bd9Sstevel@tonic-gate {
26557c478bd9Sstevel@tonic-gate char *path;
26567c478bd9Sstevel@tonic-gate size_t len;
26577c478bd9Sstevel@tonic-gate
26587c478bd9Sstevel@tonic-gate path = rp->r_path;
26597c478bd9Sstevel@tonic-gate if (path) {
26607c478bd9Sstevel@tonic-gate rp->r_path = NULL;
26617c478bd9Sstevel@tonic-gate len = strlen(path) + 1;
26627c478bd9Sstevel@tonic-gate kmem_free(path, len);
26637c478bd9Sstevel@tonic-gate #ifdef DEBUG
26647c478bd9Sstevel@tonic-gate clstat_debug.rpath.value.ui64 -= len;
26657c478bd9Sstevel@tonic-gate #endif
26667c478bd9Sstevel@tonic-gate }
26677c478bd9Sstevel@tonic-gate }
26687c478bd9Sstevel@tonic-gate
26697c478bd9Sstevel@tonic-gate /*
26707c478bd9Sstevel@tonic-gate * Put an rnode on the free list.
26717c478bd9Sstevel@tonic-gate *
26727c478bd9Sstevel@tonic-gate * Rnodes which were allocated above and beyond the normal limit
26737c478bd9Sstevel@tonic-gate * are immediately freed.
26747c478bd9Sstevel@tonic-gate */
26757c478bd9Sstevel@tonic-gate void
rp_addfree(rnode_t * rp,cred_t * cr)26767c478bd9Sstevel@tonic-gate rp_addfree(rnode_t *rp, cred_t *cr)
26777c478bd9Sstevel@tonic-gate {
26787c478bd9Sstevel@tonic-gate vnode_t *vp;
26797c478bd9Sstevel@tonic-gate struct vfs *vfsp;
26807c478bd9Sstevel@tonic-gate
26817c478bd9Sstevel@tonic-gate vp = RTOV(rp);
26827c478bd9Sstevel@tonic-gate ASSERT(vp->v_count >= 1);
26837c478bd9Sstevel@tonic-gate ASSERT(rp->r_freef == NULL && rp->r_freeb == NULL);
26847c478bd9Sstevel@tonic-gate
26857c478bd9Sstevel@tonic-gate /*
26867c478bd9Sstevel@tonic-gate * If we have too many rnodes allocated and there are no
26877c478bd9Sstevel@tonic-gate * references to this rnode, or if the rnode is no longer
26887c478bd9Sstevel@tonic-gate * accessible by it does not reside in the hash queues,
26897c478bd9Sstevel@tonic-gate * or if an i/o error occurred while writing to the file,
26907c478bd9Sstevel@tonic-gate * then just free it instead of putting it on the rnode
26917c478bd9Sstevel@tonic-gate * freelist.
26927c478bd9Sstevel@tonic-gate */
26937c478bd9Sstevel@tonic-gate vfsp = vp->v_vfsp;
26947c478bd9Sstevel@tonic-gate if (((rnew > nrnode || !(rp->r_flags & RHASHED) || rp->r_error ||
26957c478bd9Sstevel@tonic-gate (vfsp->vfs_flag & VFS_UNMOUNTED)) && rp->r_count == 0)) {
26967c478bd9Sstevel@tonic-gate if (rp->r_flags & RHASHED) {
26977c478bd9Sstevel@tonic-gate rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
26987c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock);
26997c478bd9Sstevel@tonic-gate if (vp->v_count > 1) {
27007c478bd9Sstevel@tonic-gate vp->v_count--;
27017c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock);
27027c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock);
27037c478bd9Sstevel@tonic-gate return;
27047c478bd9Sstevel@tonic-gate }
27057c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock);
27067c478bd9Sstevel@tonic-gate rp_rmhash_locked(rp);
27077c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock);
27087c478bd9Sstevel@tonic-gate }
27097c478bd9Sstevel@tonic-gate
27107c478bd9Sstevel@tonic-gate rinactive(rp, cr);
27117c478bd9Sstevel@tonic-gate
27127c478bd9Sstevel@tonic-gate /*
27137c478bd9Sstevel@tonic-gate * Recheck the vnode reference count. We need to
27147c478bd9Sstevel@tonic-gate * make sure that another reference has not been
27157c478bd9Sstevel@tonic-gate * acquired while we were not holding v_lock. The
27167c478bd9Sstevel@tonic-gate * rnode is not in the rnode hash queues, so the
27177c478bd9Sstevel@tonic-gate * only way for a reference to have been acquired
27187c478bd9Sstevel@tonic-gate * is for a VOP_PUTPAGE because the rnode was marked
27197c478bd9Sstevel@tonic-gate * with RDIRTY or for a modified page. This
27207c478bd9Sstevel@tonic-gate * reference may have been acquired before our call
27217c478bd9Sstevel@tonic-gate * to rinactive. The i/o may have been completed,
27227c478bd9Sstevel@tonic-gate * thus allowing rinactive to complete, but the
27237c478bd9Sstevel@tonic-gate * reference to the vnode may not have been released
27247c478bd9Sstevel@tonic-gate * yet. In any case, the rnode can not be destroyed
27257c478bd9Sstevel@tonic-gate * until the other references to this vnode have been
27267c478bd9Sstevel@tonic-gate * released. The other references will take care of
27277c478bd9Sstevel@tonic-gate * either destroying the rnode or placing it on the
27287c478bd9Sstevel@tonic-gate * rnode freelist. If there are no other references,
27297c478bd9Sstevel@tonic-gate * then the rnode may be safely destroyed.
27307c478bd9Sstevel@tonic-gate */
27317c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock);
27327c478bd9Sstevel@tonic-gate if (vp->v_count > 1) {
27337c478bd9Sstevel@tonic-gate vp->v_count--;
27347c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock);
27357c478bd9Sstevel@tonic-gate return;
27367c478bd9Sstevel@tonic-gate }
27377c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock);
27387c478bd9Sstevel@tonic-gate
27397c478bd9Sstevel@tonic-gate destroy_rnode(rp);
27407c478bd9Sstevel@tonic-gate return;
27417c478bd9Sstevel@tonic-gate }
27427c478bd9Sstevel@tonic-gate
27437c478bd9Sstevel@tonic-gate /*
27447c478bd9Sstevel@tonic-gate * Lock the hash queue and then recheck the reference count
27457c478bd9Sstevel@tonic-gate * to ensure that no other threads have acquired a reference
27467c478bd9Sstevel@tonic-gate * to indicate that the rnode should not be placed on the
27477c478bd9Sstevel@tonic-gate * freelist. If another reference has been acquired, then
27487c478bd9Sstevel@tonic-gate * just release this one and let the other thread complete
27497c478bd9Sstevel@tonic-gate * the processing of adding this rnode to the freelist.
27507c478bd9Sstevel@tonic-gate */
27517c478bd9Sstevel@tonic-gate rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
27527c478bd9Sstevel@tonic-gate
27537c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock);
27547c478bd9Sstevel@tonic-gate if (vp->v_count > 1) {
27557c478bd9Sstevel@tonic-gate vp->v_count--;
27567c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock);
27577c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock);
27587c478bd9Sstevel@tonic-gate return;
27597c478bd9Sstevel@tonic-gate }
27607c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock);
27617c478bd9Sstevel@tonic-gate
27627c478bd9Sstevel@tonic-gate /*
27637c478bd9Sstevel@tonic-gate * If there is no cached data or metadata for this file, then
27647c478bd9Sstevel@tonic-gate * put the rnode on the front of the freelist so that it will
27657c478bd9Sstevel@tonic-gate * be reused before other rnodes which may have cached data or
27667c478bd9Sstevel@tonic-gate * metadata associated with them.
27677c478bd9Sstevel@tonic-gate */
27687c478bd9Sstevel@tonic-gate mutex_enter(&rpfreelist_lock);
27697c478bd9Sstevel@tonic-gate if (rpfreelist == NULL) {
27707c478bd9Sstevel@tonic-gate rp->r_freef = rp;
27717c478bd9Sstevel@tonic-gate rp->r_freeb = rp;
27727c478bd9Sstevel@tonic-gate rpfreelist = rp;
27737c478bd9Sstevel@tonic-gate } else {
27747c478bd9Sstevel@tonic-gate rp->r_freef = rpfreelist;
27757c478bd9Sstevel@tonic-gate rp->r_freeb = rpfreelist->r_freeb;
27767c478bd9Sstevel@tonic-gate rpfreelist->r_freeb->r_freef = rp;
27777c478bd9Sstevel@tonic-gate rpfreelist->r_freeb = rp;
27787c478bd9Sstevel@tonic-gate if (!vn_has_cached_data(vp) &&
27797c478bd9Sstevel@tonic-gate !HAVE_RDDIR_CACHE(rp) &&
27807c478bd9Sstevel@tonic-gate rp->r_symlink.contents == NULL &&
27817c478bd9Sstevel@tonic-gate rp->r_secattr == NULL &&
27827c478bd9Sstevel@tonic-gate rp->r_pathconf == NULL)
27837c478bd9Sstevel@tonic-gate rpfreelist = rp;
27847c478bd9Sstevel@tonic-gate }
27857c478bd9Sstevel@tonic-gate mutex_exit(&rpfreelist_lock);
27867c478bd9Sstevel@tonic-gate
27877c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock);
27887c478bd9Sstevel@tonic-gate }
27897c478bd9Sstevel@tonic-gate
27907c478bd9Sstevel@tonic-gate /*
27917c478bd9Sstevel@tonic-gate * Remove an rnode from the free list.
27927c478bd9Sstevel@tonic-gate *
27937c478bd9Sstevel@tonic-gate * The caller must be holding rpfreelist_lock and the rnode
27947c478bd9Sstevel@tonic-gate * must be on the freelist.
27957c478bd9Sstevel@tonic-gate */
27967c478bd9Sstevel@tonic-gate static void
rp_rmfree(rnode_t * rp)27977c478bd9Sstevel@tonic-gate rp_rmfree(rnode_t *rp)
27987c478bd9Sstevel@tonic-gate {
27997c478bd9Sstevel@tonic-gate
28007c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&rpfreelist_lock));
28017c478bd9Sstevel@tonic-gate ASSERT(rp->r_freef != NULL && rp->r_freeb != NULL);
28027c478bd9Sstevel@tonic-gate
28037c478bd9Sstevel@tonic-gate if (rp == rpfreelist) {
28047c478bd9Sstevel@tonic-gate rpfreelist = rp->r_freef;
28057c478bd9Sstevel@tonic-gate if (rp == rpfreelist)
28067c478bd9Sstevel@tonic-gate rpfreelist = NULL;
28077c478bd9Sstevel@tonic-gate }
28087c478bd9Sstevel@tonic-gate
28097c478bd9Sstevel@tonic-gate rp->r_freeb->r_freef = rp->r_freef;
28107c478bd9Sstevel@tonic-gate rp->r_freef->r_freeb = rp->r_freeb;
28117c478bd9Sstevel@tonic-gate
28127c478bd9Sstevel@tonic-gate rp->r_freef = rp->r_freeb = NULL;
28137c478bd9Sstevel@tonic-gate }
28147c478bd9Sstevel@tonic-gate
28157c478bd9Sstevel@tonic-gate /*
28167c478bd9Sstevel@tonic-gate * Put a rnode in the hash table.
28177c478bd9Sstevel@tonic-gate *
28187c478bd9Sstevel@tonic-gate * The caller must be holding the exclusive hash queue lock.
28197c478bd9Sstevel@tonic-gate */
28207c478bd9Sstevel@tonic-gate static void
rp_addhash(rnode_t * rp)28217c478bd9Sstevel@tonic-gate rp_addhash(rnode_t *rp)
28227c478bd9Sstevel@tonic-gate {
2823*49e84970SMarcel Telka mntinfo_t *mi;
28247c478bd9Sstevel@tonic-gate
28257c478bd9Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock));
28267c478bd9Sstevel@tonic-gate ASSERT(!(rp->r_flags & RHASHED));
28277c478bd9Sstevel@tonic-gate
28287c478bd9Sstevel@tonic-gate rp->r_hashf = rp->r_hashq->r_hashf;
28297c478bd9Sstevel@tonic-gate rp->r_hashq->r_hashf = rp;
28307c478bd9Sstevel@tonic-gate rp->r_hashb = (rnode_t *)rp->r_hashq;
28317c478bd9Sstevel@tonic-gate rp->r_hashf->r_hashb = rp;
28327c478bd9Sstevel@tonic-gate
28337c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
28347c478bd9Sstevel@tonic-gate rp->r_flags |= RHASHED;
28357c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
2836*49e84970SMarcel Telka
2837*49e84970SMarcel Telka mi = VTOMI(RTOV(rp));
2838*49e84970SMarcel Telka mutex_enter(&mi->mi_rnodes_lock);
2839*49e84970SMarcel Telka list_insert_tail(&mi->mi_rnodes, rp);
2840*49e84970SMarcel Telka mutex_exit(&mi->mi_rnodes_lock);
28417c478bd9Sstevel@tonic-gate }
28427c478bd9Sstevel@tonic-gate
28437c478bd9Sstevel@tonic-gate /*
28447c478bd9Sstevel@tonic-gate * Remove a rnode from the hash table.
28457c478bd9Sstevel@tonic-gate *
28467c478bd9Sstevel@tonic-gate * The caller must be holding the hash queue lock.
28477c478bd9Sstevel@tonic-gate */
28487c478bd9Sstevel@tonic-gate static void
rp_rmhash_locked(rnode_t * rp)28497c478bd9Sstevel@tonic-gate rp_rmhash_locked(rnode_t *rp)
28507c478bd9Sstevel@tonic-gate {
2851*49e84970SMarcel Telka mntinfo_t *mi;
28527c478bd9Sstevel@tonic-gate
28537c478bd9Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock));
28547c478bd9Sstevel@tonic-gate ASSERT(rp->r_flags & RHASHED);
28557c478bd9Sstevel@tonic-gate
28567c478bd9Sstevel@tonic-gate rp->r_hashb->r_hashf = rp->r_hashf;
28577c478bd9Sstevel@tonic-gate rp->r_hashf->r_hashb = rp->r_hashb;
28587c478bd9Sstevel@tonic-gate
28597c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
28607c478bd9Sstevel@tonic-gate rp->r_flags &= ~RHASHED;
28617c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
2862*49e84970SMarcel Telka
2863*49e84970SMarcel Telka mi = VTOMI(RTOV(rp));
2864*49e84970SMarcel Telka mutex_enter(&mi->mi_rnodes_lock);
2865*49e84970SMarcel Telka if (list_link_active(&rp->r_mi_link))
2866*49e84970SMarcel Telka list_remove(&mi->mi_rnodes, rp);
2867*49e84970SMarcel Telka mutex_exit(&mi->mi_rnodes_lock);
28687c478bd9Sstevel@tonic-gate }
28697c478bd9Sstevel@tonic-gate
28707c478bd9Sstevel@tonic-gate /*
28717c478bd9Sstevel@tonic-gate * Remove a rnode from the hash table.
28727c478bd9Sstevel@tonic-gate *
28737c478bd9Sstevel@tonic-gate * The caller must not be holding the hash queue lock.
28747c478bd9Sstevel@tonic-gate */
28757c478bd9Sstevel@tonic-gate void
rp_rmhash(rnode_t * rp)28767c478bd9Sstevel@tonic-gate rp_rmhash(rnode_t *rp)
28777c478bd9Sstevel@tonic-gate {
28787c478bd9Sstevel@tonic-gate
28797c478bd9Sstevel@tonic-gate rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
28807c478bd9Sstevel@tonic-gate rp_rmhash_locked(rp);
28817c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock);
28827c478bd9Sstevel@tonic-gate }
28837c478bd9Sstevel@tonic-gate
28847c478bd9Sstevel@tonic-gate /*
28857c478bd9Sstevel@tonic-gate * Lookup a rnode by fhandle.
28867c478bd9Sstevel@tonic-gate *
28877c478bd9Sstevel@tonic-gate * The caller must be holding the hash queue lock, either shared or exclusive.
28887c478bd9Sstevel@tonic-gate */
28897c478bd9Sstevel@tonic-gate static rnode_t *
rfind(rhashq_t * rhtp,nfs_fhandle * fh,struct vfs * vfsp)28907c478bd9Sstevel@tonic-gate rfind(rhashq_t *rhtp, nfs_fhandle *fh, struct vfs *vfsp)
28917c478bd9Sstevel@tonic-gate {
28927c478bd9Sstevel@tonic-gate rnode_t *rp;
28937c478bd9Sstevel@tonic-gate vnode_t *vp;
28947c478bd9Sstevel@tonic-gate
28957c478bd9Sstevel@tonic-gate ASSERT(RW_LOCK_HELD(&rhtp->r_lock));
28967c478bd9Sstevel@tonic-gate
28977c478bd9Sstevel@tonic-gate for (rp = rhtp->r_hashf; rp != (rnode_t *)rhtp; rp = rp->r_hashf) {
28987c478bd9Sstevel@tonic-gate vp = RTOV(rp);
28997c478bd9Sstevel@tonic-gate if (vp->v_vfsp == vfsp &&
29007c478bd9Sstevel@tonic-gate rp->r_fh.fh_len == fh->fh_len &&
29017c478bd9Sstevel@tonic-gate bcmp(rp->r_fh.fh_buf, fh->fh_buf, fh->fh_len) == 0) {
29027c478bd9Sstevel@tonic-gate /*
29037c478bd9Sstevel@tonic-gate * remove rnode from free list, if necessary.
29047c478bd9Sstevel@tonic-gate */
29057c478bd9Sstevel@tonic-gate if (rp->r_freef != NULL) {
29067c478bd9Sstevel@tonic-gate mutex_enter(&rpfreelist_lock);
29077c478bd9Sstevel@tonic-gate /*
29087c478bd9Sstevel@tonic-gate * If the rnode is on the freelist,
29097c478bd9Sstevel@tonic-gate * then remove it and use that reference
29107c478bd9Sstevel@tonic-gate * as the new reference. Otherwise,
29117c478bd9Sstevel@tonic-gate * need to increment the reference count.
29127c478bd9Sstevel@tonic-gate */
29137c478bd9Sstevel@tonic-gate if (rp->r_freef != NULL) {
29147c478bd9Sstevel@tonic-gate rp_rmfree(rp);
29157c478bd9Sstevel@tonic-gate mutex_exit(&rpfreelist_lock);
29167c478bd9Sstevel@tonic-gate } else {
29177c478bd9Sstevel@tonic-gate mutex_exit(&rpfreelist_lock);
29187c478bd9Sstevel@tonic-gate VN_HOLD(vp);
29197c478bd9Sstevel@tonic-gate }
29207c478bd9Sstevel@tonic-gate } else
29217c478bd9Sstevel@tonic-gate VN_HOLD(vp);
29227c478bd9Sstevel@tonic-gate return (rp);
29237c478bd9Sstevel@tonic-gate }
29247c478bd9Sstevel@tonic-gate }
29257c478bd9Sstevel@tonic-gate return (NULL);
29267c478bd9Sstevel@tonic-gate }
29277c478bd9Sstevel@tonic-gate
29287c478bd9Sstevel@tonic-gate /*
2929*49e84970SMarcel Telka * Return 1 if there is an active vnode belonging to this vfs in the
29307c478bd9Sstevel@tonic-gate * rtable cache.
29317c478bd9Sstevel@tonic-gate *
29327c478bd9Sstevel@tonic-gate * Several of these checks are done without holding the usual
29337c478bd9Sstevel@tonic-gate * locks. This is safe because destroy_rtable(), rp_addfree(),
29347c478bd9Sstevel@tonic-gate * etc. will redo the necessary checks before actually destroying
29357c478bd9Sstevel@tonic-gate * any rnodes.
29367c478bd9Sstevel@tonic-gate */
29377c478bd9Sstevel@tonic-gate int
check_rtable(struct vfs * vfsp)29387c478bd9Sstevel@tonic-gate check_rtable(struct vfs *vfsp)
29397c478bd9Sstevel@tonic-gate {
29407c478bd9Sstevel@tonic-gate rnode_t *rp;
29417c478bd9Sstevel@tonic-gate vnode_t *vp;
2942*49e84970SMarcel Telka mntinfo_t *mi;
29437c478bd9Sstevel@tonic-gate
2944*49e84970SMarcel Telka ASSERT(vfsp != NULL);
2945*49e84970SMarcel Telka mi = VFTOMI(vfsp);
2946*49e84970SMarcel Telka
2947*49e84970SMarcel Telka mutex_enter(&mi->mi_rnodes_lock);
2948*49e84970SMarcel Telka for (rp = list_head(&mi->mi_rnodes); rp != NULL;
2949*49e84970SMarcel Telka rp = list_next(&mi->mi_rnodes, rp)) {
29507c478bd9Sstevel@tonic-gate vp = RTOV(rp);
2951*49e84970SMarcel Telka
29527c478bd9Sstevel@tonic-gate if (rp->r_freef == NULL ||
2953*49e84970SMarcel Telka (vn_has_cached_data(vp) && (rp->r_flags & RDIRTY)) ||
29547c478bd9Sstevel@tonic-gate rp->r_count > 0) {
2955*49e84970SMarcel Telka mutex_exit(&mi->mi_rnodes_lock);
29567c478bd9Sstevel@tonic-gate return (1);
29577c478bd9Sstevel@tonic-gate }
29587c478bd9Sstevel@tonic-gate }
2959*49e84970SMarcel Telka mutex_exit(&mi->mi_rnodes_lock);
2960*49e84970SMarcel Telka
29617c478bd9Sstevel@tonic-gate return (0);
29627c478bd9Sstevel@tonic-gate }
29637c478bd9Sstevel@tonic-gate
29647c478bd9Sstevel@tonic-gate /*
29657c478bd9Sstevel@tonic-gate * Destroy inactive vnodes from the hash queues which belong to this
29667c478bd9Sstevel@tonic-gate * vfs. It is essential that we destroy all inactive vnodes during a
29677c478bd9Sstevel@tonic-gate * forced unmount as well as during a normal unmount.
29687c478bd9Sstevel@tonic-gate */
29697c478bd9Sstevel@tonic-gate void
destroy_rtable(struct vfs * vfsp,cred_t * cr)29707c478bd9Sstevel@tonic-gate destroy_rtable(struct vfs *vfsp, cred_t *cr)
29717c478bd9Sstevel@tonic-gate {
29727c478bd9Sstevel@tonic-gate rnode_t *rp;
2973*49e84970SMarcel Telka mntinfo_t *mi;
29747c478bd9Sstevel@tonic-gate
2975*49e84970SMarcel Telka ASSERT(vfsp != NULL);
29767c478bd9Sstevel@tonic-gate
2977*49e84970SMarcel Telka mi = VFTOMI(vfsp);
2978*49e84970SMarcel Telka
29797c478bd9Sstevel@tonic-gate mutex_enter(&rpfreelist_lock);
2980*49e84970SMarcel Telka mutex_enter(&mi->mi_rnodes_lock);
2981*49e84970SMarcel Telka while ((rp = list_remove_head(&mi->mi_rnodes)) != NULL) {
2982*49e84970SMarcel Telka /*
2983*49e84970SMarcel Telka * If the rnode is no longer on the freelist it is not
2984*49e84970SMarcel Telka * ours and it will be handled by some other thread, so
2985*49e84970SMarcel Telka * skip it.
2986*49e84970SMarcel Telka */
2987*49e84970SMarcel Telka if (rp->r_freef == NULL)
2988*49e84970SMarcel Telka continue;
2989*49e84970SMarcel Telka mutex_exit(&mi->mi_rnodes_lock);
2990*49e84970SMarcel Telka
29917c478bd9Sstevel@tonic-gate rp_rmfree(rp);
29927c478bd9Sstevel@tonic-gate mutex_exit(&rpfreelist_lock);
29937c478bd9Sstevel@tonic-gate
2994*49e84970SMarcel Telka rp_rmhash(rp);
2995*49e84970SMarcel Telka
29967c478bd9Sstevel@tonic-gate /*
29977c478bd9Sstevel@tonic-gate * This call to rp_addfree will end up destroying the
29987c478bd9Sstevel@tonic-gate * rnode, but in a safe way with the appropriate set
29997c478bd9Sstevel@tonic-gate * of checks done.
30007c478bd9Sstevel@tonic-gate */
30017c478bd9Sstevel@tonic-gate rp_addfree(rp, cr);
30027c478bd9Sstevel@tonic-gate
3003*49e84970SMarcel Telka mutex_enter(&rpfreelist_lock);
3004*49e84970SMarcel Telka mutex_enter(&mi->mi_rnodes_lock);
3005*49e84970SMarcel Telka }
3006*49e84970SMarcel Telka mutex_exit(&mi->mi_rnodes_lock);
3007*49e84970SMarcel Telka mutex_exit(&rpfreelist_lock);
30087c478bd9Sstevel@tonic-gate }
30097c478bd9Sstevel@tonic-gate
30107c478bd9Sstevel@tonic-gate /*
30117c478bd9Sstevel@tonic-gate * This routine destroys all the resources associated with the rnode
30127c478bd9Sstevel@tonic-gate * and then the rnode itself.
30137c478bd9Sstevel@tonic-gate */
30147c478bd9Sstevel@tonic-gate static void
destroy_rnode(rnode_t * rp)30157c478bd9Sstevel@tonic-gate destroy_rnode(rnode_t *rp)
30167c478bd9Sstevel@tonic-gate {
30177c478bd9Sstevel@tonic-gate vnode_t *vp;
30187c478bd9Sstevel@tonic-gate vfs_t *vfsp;
30197c478bd9Sstevel@tonic-gate
30207c478bd9Sstevel@tonic-gate vp = RTOV(rp);
30217c478bd9Sstevel@tonic-gate vfsp = vp->v_vfsp;
30227c478bd9Sstevel@tonic-gate
30237c478bd9Sstevel@tonic-gate ASSERT(vp->v_count == 1);
30247c478bd9Sstevel@tonic-gate ASSERT(rp->r_count == 0);
30257c478bd9Sstevel@tonic-gate ASSERT(rp->r_lmpl == NULL);
30267c478bd9Sstevel@tonic-gate ASSERT(rp->r_mapcnt == 0);
30277c478bd9Sstevel@tonic-gate ASSERT(!(rp->r_flags & RHASHED));
30287c478bd9Sstevel@tonic-gate ASSERT(rp->r_freef == NULL && rp->r_freeb == NULL);
30291a5e258fSJosef 'Jeff' Sipek atomic_dec_ulong((ulong_t *)&rnew);
30307c478bd9Sstevel@tonic-gate #ifdef DEBUG
30317c478bd9Sstevel@tonic-gate clstat_debug.nrnode.value.ui64--;
30327c478bd9Sstevel@tonic-gate #endif
30337c478bd9Sstevel@tonic-gate nfs_rw_destroy(&rp->r_rwlock);
30347c478bd9Sstevel@tonic-gate nfs_rw_destroy(&rp->r_lkserlock);
30357c478bd9Sstevel@tonic-gate mutex_destroy(&rp->r_statelock);
30367c478bd9Sstevel@tonic-gate cv_destroy(&rp->r_cv);
30377c478bd9Sstevel@tonic-gate cv_destroy(&rp->r_commit.c_cv);
30387c478bd9Sstevel@tonic-gate if (rp->r_flags & RDELMAPLIST)
30397c478bd9Sstevel@tonic-gate list_destroy(&rp->r_indelmap);
30407c478bd9Sstevel@tonic-gate nfs_free_r_path(rp);
30417c478bd9Sstevel@tonic-gate avl_destroy(&rp->r_dir);
30427c478bd9Sstevel@tonic-gate vn_invalid(vp);
30437c478bd9Sstevel@tonic-gate vn_free(vp);
30447c478bd9Sstevel@tonic-gate kmem_cache_free(rnode_cache, rp);
30457c478bd9Sstevel@tonic-gate VFS_RELE(vfsp);
30467c478bd9Sstevel@tonic-gate }
30477c478bd9Sstevel@tonic-gate
30487c478bd9Sstevel@tonic-gate /*
30497c478bd9Sstevel@tonic-gate * Flush all vnodes in this (or every) vfs.
30507c478bd9Sstevel@tonic-gate * Used by nfs_sync and by nfs_unmount.
30517c478bd9Sstevel@tonic-gate */
30527c478bd9Sstevel@tonic-gate void
rflush(struct vfs * vfsp,cred_t * cr)30537c478bd9Sstevel@tonic-gate rflush(struct vfs *vfsp, cred_t *cr)
30547c478bd9Sstevel@tonic-gate {
30557c478bd9Sstevel@tonic-gate int index;
30567c478bd9Sstevel@tonic-gate rnode_t *rp;
30577c478bd9Sstevel@tonic-gate vnode_t *vp, **vplist;
30587c478bd9Sstevel@tonic-gate long num, cnt;
30597c478bd9Sstevel@tonic-gate
30607c478bd9Sstevel@tonic-gate /*
30617c478bd9Sstevel@tonic-gate * Check to see whether there is anything to do.
30627c478bd9Sstevel@tonic-gate */
30637c478bd9Sstevel@tonic-gate num = rnew;
30647c478bd9Sstevel@tonic-gate if (num == 0)
30657c478bd9Sstevel@tonic-gate return;
30667c478bd9Sstevel@tonic-gate
30677c478bd9Sstevel@tonic-gate /*
30687c478bd9Sstevel@tonic-gate * Allocate a slot for all currently active rnodes on the
30697c478bd9Sstevel@tonic-gate * supposition that they all may need flushing.
30707c478bd9Sstevel@tonic-gate */
30717c478bd9Sstevel@tonic-gate vplist = kmem_alloc(num * sizeof (*vplist), KM_SLEEP);
30727c478bd9Sstevel@tonic-gate cnt = 0;
30737c478bd9Sstevel@tonic-gate
30747c478bd9Sstevel@tonic-gate /*
3075*49e84970SMarcel Telka * If the vfs is known we can do fast path by iterating all rnodes that
3076*49e84970SMarcel Telka * belongs to this vfs. This is much faster than the traditional way
3077*49e84970SMarcel Telka * of iterating rtable (below) in a case there is a lot of rnodes that
3078*49e84970SMarcel Telka * does not belong to our vfs.
3079*49e84970SMarcel Telka */
3080*49e84970SMarcel Telka if (vfsp != NULL) {
3081*49e84970SMarcel Telka mntinfo_t *mi = VFTOMI(vfsp);
3082*49e84970SMarcel Telka
3083*49e84970SMarcel Telka mutex_enter(&mi->mi_rnodes_lock);
3084*49e84970SMarcel Telka for (rp = list_head(&mi->mi_rnodes); rp != NULL;
3085*49e84970SMarcel Telka rp = list_next(&mi->mi_rnodes, rp)) {
3086*49e84970SMarcel Telka vp = RTOV(rp);
3087*49e84970SMarcel Telka /*
3088*49e84970SMarcel Telka * Don't bother sync'ing a vp if it
3089*49e84970SMarcel Telka * is part of virtual swap device or
3090*49e84970SMarcel Telka * if VFS is read-only
3091*49e84970SMarcel Telka */
3092*49e84970SMarcel Telka if (IS_SWAPVP(vp) || vn_is_readonly(vp))
3093*49e84970SMarcel Telka continue;
3094*49e84970SMarcel Telka /*
3095*49e84970SMarcel Telka * If the vnode has pages and is marked as either dirty
3096*49e84970SMarcel Telka * or mmap'd, hold and add this vnode to the list of
3097*49e84970SMarcel Telka * vnodes to flush.
3098*49e84970SMarcel Telka */
3099*49e84970SMarcel Telka ASSERT(vp->v_vfsp == vfsp);
3100*49e84970SMarcel Telka if (vn_has_cached_data(vp) &&
3101*49e84970SMarcel Telka ((rp->r_flags & RDIRTY) || rp->r_mapcnt > 0)) {
3102*49e84970SMarcel Telka VN_HOLD(vp);
3103*49e84970SMarcel Telka vplist[cnt++] = vp;
3104*49e84970SMarcel Telka if (cnt == num) {
3105*49e84970SMarcel Telka /*
3106*49e84970SMarcel Telka * The vplist is full because there is
3107*49e84970SMarcel Telka * too many rnodes. We are done for
3108*49e84970SMarcel Telka * now.
3109*49e84970SMarcel Telka */
3110*49e84970SMarcel Telka break;
3111*49e84970SMarcel Telka }
3112*49e84970SMarcel Telka }
3113*49e84970SMarcel Telka }
3114*49e84970SMarcel Telka mutex_exit(&mi->mi_rnodes_lock);
3115*49e84970SMarcel Telka
3116*49e84970SMarcel Telka goto done;
3117*49e84970SMarcel Telka }
3118*49e84970SMarcel Telka
3119*49e84970SMarcel Telka ASSERT(vfsp == NULL);
3120*49e84970SMarcel Telka
3121*49e84970SMarcel Telka /*
31227c478bd9Sstevel@tonic-gate * Walk the hash queues looking for rnodes with page
31237c478bd9Sstevel@tonic-gate * lists associated with them. Make a list of these
31247c478bd9Sstevel@tonic-gate * files.
31257c478bd9Sstevel@tonic-gate */
31267c478bd9Sstevel@tonic-gate for (index = 0; index < rtablesize; index++) {
31277c478bd9Sstevel@tonic-gate rw_enter(&rtable[index].r_lock, RW_READER);
31287c478bd9Sstevel@tonic-gate for (rp = rtable[index].r_hashf;
31297c478bd9Sstevel@tonic-gate rp != (rnode_t *)(&rtable[index]);
31307c478bd9Sstevel@tonic-gate rp = rp->r_hashf) {
31317c478bd9Sstevel@tonic-gate vp = RTOV(rp);
31327c478bd9Sstevel@tonic-gate /*
31337c478bd9Sstevel@tonic-gate * Don't bother sync'ing a vp if it
31347c478bd9Sstevel@tonic-gate * is part of virtual swap device or
31357c478bd9Sstevel@tonic-gate * if VFS is read-only
31367c478bd9Sstevel@tonic-gate */
31377c478bd9Sstevel@tonic-gate if (IS_SWAPVP(vp) || vn_is_readonly(vp))
31387c478bd9Sstevel@tonic-gate continue;
31397c478bd9Sstevel@tonic-gate /*
3140*49e84970SMarcel Telka * If the vnode has pages and is marked as either dirty
3141*49e84970SMarcel Telka * or mmap'd, hold and add this vnode to the list of
31427c478bd9Sstevel@tonic-gate * vnodes to flush.
31437c478bd9Sstevel@tonic-gate */
3144*49e84970SMarcel Telka if (vn_has_cached_data(vp) &&
31457c478bd9Sstevel@tonic-gate ((rp->r_flags & RDIRTY) || rp->r_mapcnt > 0)) {
31467c478bd9Sstevel@tonic-gate VN_HOLD(vp);
31477c478bd9Sstevel@tonic-gate vplist[cnt++] = vp;
31487c478bd9Sstevel@tonic-gate if (cnt == num) {
31497c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock);
3150*49e84970SMarcel Telka /*
3151*49e84970SMarcel Telka * The vplist is full because there is
3152*49e84970SMarcel Telka * too many rnodes. We are done for
3153*49e84970SMarcel Telka * now.
3154*49e84970SMarcel Telka */
3155*49e84970SMarcel Telka goto done;
31567c478bd9Sstevel@tonic-gate }
31577c478bd9Sstevel@tonic-gate }
31587c478bd9Sstevel@tonic-gate }
31597c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock);
31607c478bd9Sstevel@tonic-gate }
3161*49e84970SMarcel Telka
3162*49e84970SMarcel Telka done:
31637c478bd9Sstevel@tonic-gate
31647c478bd9Sstevel@tonic-gate /*
31657c478bd9Sstevel@tonic-gate * Flush and release all of the files on the list.
31667c478bd9Sstevel@tonic-gate */
31677c478bd9Sstevel@tonic-gate while (cnt-- > 0) {
31687c478bd9Sstevel@tonic-gate vp = vplist[cnt];
3169da6c28aaSamw (void) VOP_PUTPAGE(vp, (u_offset_t)0, 0, B_ASYNC, cr, NULL);
31707c478bd9Sstevel@tonic-gate VN_RELE(vp);
31717c478bd9Sstevel@tonic-gate }
31727c478bd9Sstevel@tonic-gate
31737c478bd9Sstevel@tonic-gate /*
31747c478bd9Sstevel@tonic-gate * Free the space allocated to hold the list.
31757c478bd9Sstevel@tonic-gate */
31767c478bd9Sstevel@tonic-gate kmem_free(vplist, num * sizeof (*vplist));
31777c478bd9Sstevel@tonic-gate }
31787c478bd9Sstevel@tonic-gate
31797c478bd9Sstevel@tonic-gate /*
31807c478bd9Sstevel@tonic-gate * This probably needs to be larger than or equal to
31817c478bd9Sstevel@tonic-gate * log2(sizeof (struct rnode)) due to the way that rnodes are
31827c478bd9Sstevel@tonic-gate * allocated.
31837c478bd9Sstevel@tonic-gate */
31847c478bd9Sstevel@tonic-gate #define ACACHE_SHIFT_BITS 9
31857c478bd9Sstevel@tonic-gate
31867c478bd9Sstevel@tonic-gate static int
acachehash(rnode_t * rp,cred_t * cr)31877c478bd9Sstevel@tonic-gate acachehash(rnode_t *rp, cred_t *cr)
31887c478bd9Sstevel@tonic-gate {
31897c478bd9Sstevel@tonic-gate
31907c478bd9Sstevel@tonic-gate return ((((intptr_t)rp >> ACACHE_SHIFT_BITS) + crgetuid(cr)) &
31917c478bd9Sstevel@tonic-gate acachemask);
31927c478bd9Sstevel@tonic-gate }
31937c478bd9Sstevel@tonic-gate
31947c478bd9Sstevel@tonic-gate #ifdef DEBUG
31957c478bd9Sstevel@tonic-gate static long nfs_access_cache_hits = 0;
31967c478bd9Sstevel@tonic-gate static long nfs_access_cache_misses = 0;
31977c478bd9Sstevel@tonic-gate #endif
31987c478bd9Sstevel@tonic-gate
31997c478bd9Sstevel@tonic-gate nfs_access_type_t
nfs_access_check(rnode_t * rp,uint32_t acc,cred_t * cr)32007c478bd9Sstevel@tonic-gate nfs_access_check(rnode_t *rp, uint32_t acc, cred_t *cr)
32017c478bd9Sstevel@tonic-gate {
32027c478bd9Sstevel@tonic-gate vnode_t *vp;
32037c478bd9Sstevel@tonic-gate acache_t *ap;
32047c478bd9Sstevel@tonic-gate acache_hash_t *hp;
32057c478bd9Sstevel@tonic-gate nfs_access_type_t all;
32067c478bd9Sstevel@tonic-gate
32077c478bd9Sstevel@tonic-gate vp = RTOV(rp);
32087c478bd9Sstevel@tonic-gate if (!ATTRCACHE_VALID(vp) || nfs_waitfor_purge_complete(vp))
32097c478bd9Sstevel@tonic-gate return (NFS_ACCESS_UNKNOWN);
32107c478bd9Sstevel@tonic-gate
32117c478bd9Sstevel@tonic-gate if (rp->r_acache != NULL) {
32127c478bd9Sstevel@tonic-gate hp = &acache[acachehash(rp, cr)];
32137c478bd9Sstevel@tonic-gate rw_enter(&hp->lock, RW_READER);
32147c478bd9Sstevel@tonic-gate ap = hp->next;
32157c478bd9Sstevel@tonic-gate while (ap != (acache_t *)hp) {
32167c478bd9Sstevel@tonic-gate if (crcmp(ap->cred, cr) == 0 && ap->rnode == rp) {
32177c478bd9Sstevel@tonic-gate if ((ap->known & acc) == acc) {
32187c478bd9Sstevel@tonic-gate #ifdef DEBUG
32197c478bd9Sstevel@tonic-gate nfs_access_cache_hits++;
32207c478bd9Sstevel@tonic-gate #endif
32217c478bd9Sstevel@tonic-gate if ((ap->allowed & acc) == acc)
32227c478bd9Sstevel@tonic-gate all = NFS_ACCESS_ALLOWED;
32237c478bd9Sstevel@tonic-gate else
32247c478bd9Sstevel@tonic-gate all = NFS_ACCESS_DENIED;
32257c478bd9Sstevel@tonic-gate } else {
32267c478bd9Sstevel@tonic-gate #ifdef DEBUG
32277c478bd9Sstevel@tonic-gate nfs_access_cache_misses++;
32287c478bd9Sstevel@tonic-gate #endif
32297c478bd9Sstevel@tonic-gate all = NFS_ACCESS_UNKNOWN;
32307c478bd9Sstevel@tonic-gate }
32317c478bd9Sstevel@tonic-gate rw_exit(&hp->lock);
32327c478bd9Sstevel@tonic-gate return (all);
32337c478bd9Sstevel@tonic-gate }
32347c478bd9Sstevel@tonic-gate ap = ap->next;
32357c478bd9Sstevel@tonic-gate }
32367c478bd9Sstevel@tonic-gate rw_exit(&hp->lock);
32377c478bd9Sstevel@tonic-gate }
32387c478bd9Sstevel@tonic-gate
32397c478bd9Sstevel@tonic-gate #ifdef DEBUG
32407c478bd9Sstevel@tonic-gate nfs_access_cache_misses++;
32417c478bd9Sstevel@tonic-gate #endif
32427c478bd9Sstevel@tonic-gate return (NFS_ACCESS_UNKNOWN);
32437c478bd9Sstevel@tonic-gate }
32447c478bd9Sstevel@tonic-gate
32457c478bd9Sstevel@tonic-gate void
nfs_access_cache(rnode_t * rp,uint32_t acc,uint32_t resacc,cred_t * cr)32467c478bd9Sstevel@tonic-gate nfs_access_cache(rnode_t *rp, uint32_t acc, uint32_t resacc, cred_t *cr)
32477c478bd9Sstevel@tonic-gate {
32487c478bd9Sstevel@tonic-gate acache_t *ap;
32497c478bd9Sstevel@tonic-gate acache_t *nap;
32507c478bd9Sstevel@tonic-gate acache_hash_t *hp;
32517c478bd9Sstevel@tonic-gate
32527c478bd9Sstevel@tonic-gate hp = &acache[acachehash(rp, cr)];
32537c478bd9Sstevel@tonic-gate
32547c478bd9Sstevel@tonic-gate /*
32557c478bd9Sstevel@tonic-gate * Allocate now assuming that mostly an allocation will be
32567c478bd9Sstevel@tonic-gate * required. This allows the allocation to happen without
32577c478bd9Sstevel@tonic-gate * holding the hash bucket locked.
32587c478bd9Sstevel@tonic-gate */
32597c478bd9Sstevel@tonic-gate nap = kmem_cache_alloc(acache_cache, KM_NOSLEEP);
32607c478bd9Sstevel@tonic-gate if (nap != NULL) {
32617c478bd9Sstevel@tonic-gate nap->known = acc;
32627c478bd9Sstevel@tonic-gate nap->allowed = resacc;
32637c478bd9Sstevel@tonic-gate nap->rnode = rp;
32647c478bd9Sstevel@tonic-gate crhold(cr);
32657c478bd9Sstevel@tonic-gate nap->cred = cr;
32667c478bd9Sstevel@tonic-gate nap->hashq = hp;
32677c478bd9Sstevel@tonic-gate }
32687c478bd9Sstevel@tonic-gate
32697c478bd9Sstevel@tonic-gate rw_enter(&hp->lock, RW_WRITER);
32707c478bd9Sstevel@tonic-gate
32717c478bd9Sstevel@tonic-gate if (rp->r_acache != NULL) {
32727c478bd9Sstevel@tonic-gate ap = hp->next;
32737c478bd9Sstevel@tonic-gate while (ap != (acache_t *)hp) {
32747c478bd9Sstevel@tonic-gate if (crcmp(ap->cred, cr) == 0 && ap->rnode == rp) {
32757c478bd9Sstevel@tonic-gate ap->known |= acc;
32767c478bd9Sstevel@tonic-gate ap->allowed &= ~acc;
32777c478bd9Sstevel@tonic-gate ap->allowed |= resacc;
32787c478bd9Sstevel@tonic-gate rw_exit(&hp->lock);
32797c478bd9Sstevel@tonic-gate if (nap != NULL) {
32807c478bd9Sstevel@tonic-gate crfree(nap->cred);
32817c478bd9Sstevel@tonic-gate kmem_cache_free(acache_cache, nap);
32827c478bd9Sstevel@tonic-gate }
32837c478bd9Sstevel@tonic-gate return;
32847c478bd9Sstevel@tonic-gate }
32857c478bd9Sstevel@tonic-gate ap = ap->next;
32867c478bd9Sstevel@tonic-gate }
32877c478bd9Sstevel@tonic-gate }
32887c478bd9Sstevel@tonic-gate
32897c478bd9Sstevel@tonic-gate if (nap != NULL) {
32907c478bd9Sstevel@tonic-gate #ifdef DEBUG
32917c478bd9Sstevel@tonic-gate clstat_debug.access.value.ui64++;
32927c478bd9Sstevel@tonic-gate #endif
32937c478bd9Sstevel@tonic-gate nap->next = hp->next;
32947c478bd9Sstevel@tonic-gate hp->next = nap;
32957c478bd9Sstevel@tonic-gate nap->next->prev = nap;
32967c478bd9Sstevel@tonic-gate nap->prev = (acache_t *)hp;
32977c478bd9Sstevel@tonic-gate
32987c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
32997c478bd9Sstevel@tonic-gate nap->list = rp->r_acache;
33007c478bd9Sstevel@tonic-gate rp->r_acache = nap;
33017c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
33027c478bd9Sstevel@tonic-gate }
33037c478bd9Sstevel@tonic-gate
33047c478bd9Sstevel@tonic-gate rw_exit(&hp->lock);
33057c478bd9Sstevel@tonic-gate }
33067c478bd9Sstevel@tonic-gate
33077c478bd9Sstevel@tonic-gate int
nfs_access_purge_rp(rnode_t * rp)33087c478bd9Sstevel@tonic-gate nfs_access_purge_rp(rnode_t *rp)
33097c478bd9Sstevel@tonic-gate {
33107c478bd9Sstevel@tonic-gate acache_t *ap;
33117c478bd9Sstevel@tonic-gate acache_t *tmpap;
33127c478bd9Sstevel@tonic-gate acache_t *rplist;
33137c478bd9Sstevel@tonic-gate
33147c478bd9Sstevel@tonic-gate /*
33157c478bd9Sstevel@tonic-gate * If there aren't any cached entries, then there is nothing
33167c478bd9Sstevel@tonic-gate * to free.
33177c478bd9Sstevel@tonic-gate */
33187c478bd9Sstevel@tonic-gate if (rp->r_acache == NULL)
33197c478bd9Sstevel@tonic-gate return (0);
33207c478bd9Sstevel@tonic-gate
33217c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
33227c478bd9Sstevel@tonic-gate rplist = rp->r_acache;
33237c478bd9Sstevel@tonic-gate rp->r_acache = NULL;
33247c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
33257c478bd9Sstevel@tonic-gate
33267c478bd9Sstevel@tonic-gate /*
33277c478bd9Sstevel@tonic-gate * Loop through each entry in the list pointed to in the
33287c478bd9Sstevel@tonic-gate * rnode. Remove each of these entries from the hash
33297c478bd9Sstevel@tonic-gate * queue that it is on and remove it from the list in
33307c478bd9Sstevel@tonic-gate * the rnode.
33317c478bd9Sstevel@tonic-gate */
33327c478bd9Sstevel@tonic-gate for (ap = rplist; ap != NULL; ap = tmpap) {
33337c478bd9Sstevel@tonic-gate rw_enter(&ap->hashq->lock, RW_WRITER);
33347c478bd9Sstevel@tonic-gate ap->prev->next = ap->next;
33357c478bd9Sstevel@tonic-gate ap->next->prev = ap->prev;
33367c478bd9Sstevel@tonic-gate rw_exit(&ap->hashq->lock);
33377c478bd9Sstevel@tonic-gate
33387c478bd9Sstevel@tonic-gate tmpap = ap->list;
33397c478bd9Sstevel@tonic-gate crfree(ap->cred);
33407c478bd9Sstevel@tonic-gate kmem_cache_free(acache_cache, ap);
33417c478bd9Sstevel@tonic-gate #ifdef DEBUG
33427c478bd9Sstevel@tonic-gate clstat_debug.access.value.ui64--;
33437c478bd9Sstevel@tonic-gate #endif
33447c478bd9Sstevel@tonic-gate }
33457c478bd9Sstevel@tonic-gate
33467c478bd9Sstevel@tonic-gate return (1);
33477c478bd9Sstevel@tonic-gate }
33487c478bd9Sstevel@tonic-gate
33497c478bd9Sstevel@tonic-gate static const char prefix[] = ".nfs";
33507c478bd9Sstevel@tonic-gate
33517c478bd9Sstevel@tonic-gate static kmutex_t newnum_lock;
33527c478bd9Sstevel@tonic-gate
33537c478bd9Sstevel@tonic-gate int
newnum(void)33547c478bd9Sstevel@tonic-gate newnum(void)
33557c478bd9Sstevel@tonic-gate {
33567c478bd9Sstevel@tonic-gate static uint_t newnum = 0;
33577c478bd9Sstevel@tonic-gate uint_t id;
33587c478bd9Sstevel@tonic-gate
33597c478bd9Sstevel@tonic-gate mutex_enter(&newnum_lock);
33607c478bd9Sstevel@tonic-gate if (newnum == 0)
33617c478bd9Sstevel@tonic-gate newnum = gethrestime_sec() & 0xffff;
33627c478bd9Sstevel@tonic-gate id = newnum++;
33637c478bd9Sstevel@tonic-gate mutex_exit(&newnum_lock);
33647c478bd9Sstevel@tonic-gate return (id);
33657c478bd9Sstevel@tonic-gate }
33667c478bd9Sstevel@tonic-gate
33677c478bd9Sstevel@tonic-gate char *
newname(void)33687c478bd9Sstevel@tonic-gate newname(void)
33697c478bd9Sstevel@tonic-gate {
33707c478bd9Sstevel@tonic-gate char *news;
33717c478bd9Sstevel@tonic-gate char *s;
33727c478bd9Sstevel@tonic-gate const char *p;
33737c478bd9Sstevel@tonic-gate uint_t id;
33747c478bd9Sstevel@tonic-gate
33757c478bd9Sstevel@tonic-gate id = newnum();
33767c478bd9Sstevel@tonic-gate news = kmem_alloc(MAXNAMELEN, KM_SLEEP);
33777c478bd9Sstevel@tonic-gate s = news;
33787c478bd9Sstevel@tonic-gate p = prefix;
33797c478bd9Sstevel@tonic-gate while (*p != '\0')
33807c478bd9Sstevel@tonic-gate *s++ = *p++;
33817c478bd9Sstevel@tonic-gate while (id != 0) {
33827c478bd9Sstevel@tonic-gate *s++ = "0123456789ABCDEF"[id & 0x0f];
33837c478bd9Sstevel@tonic-gate id >>= 4;
33847c478bd9Sstevel@tonic-gate }
33857c478bd9Sstevel@tonic-gate *s = '\0';
33867c478bd9Sstevel@tonic-gate return (news);
33877c478bd9Sstevel@tonic-gate }
33887c478bd9Sstevel@tonic-gate
33897c478bd9Sstevel@tonic-gate /*
33907c478bd9Sstevel@tonic-gate * Snapshot callback for nfs:0:nfs_client as registered with the kstat
33917c478bd9Sstevel@tonic-gate * framework.
33927c478bd9Sstevel@tonic-gate */
33937c478bd9Sstevel@tonic-gate static int
cl_snapshot(kstat_t * ksp,void * buf,int rw)33947c478bd9Sstevel@tonic-gate cl_snapshot(kstat_t *ksp, void *buf, int rw)
33957c478bd9Sstevel@tonic-gate {
33967c478bd9Sstevel@tonic-gate ksp->ks_snaptime = gethrtime();
33977c478bd9Sstevel@tonic-gate if (rw == KSTAT_WRITE) {
33987c478bd9Sstevel@tonic-gate bcopy(buf, ksp->ks_private, sizeof (clstat_tmpl));
33997c478bd9Sstevel@tonic-gate #ifdef DEBUG
34007c478bd9Sstevel@tonic-gate /*
34017c478bd9Sstevel@tonic-gate * Currently only the global zone can write to kstats, but we
34027c478bd9Sstevel@tonic-gate * add the check just for paranoia.
34037c478bd9Sstevel@tonic-gate */
34047c478bd9Sstevel@tonic-gate if (INGLOBALZONE(curproc))
34057c478bd9Sstevel@tonic-gate bcopy((char *)buf + sizeof (clstat_tmpl), &clstat_debug,
34067c478bd9Sstevel@tonic-gate sizeof (clstat_debug));
34077c478bd9Sstevel@tonic-gate #endif
34087c478bd9Sstevel@tonic-gate } else {
34097c478bd9Sstevel@tonic-gate bcopy(ksp->ks_private, buf, sizeof (clstat_tmpl));
34107c478bd9Sstevel@tonic-gate #ifdef DEBUG
34117c478bd9Sstevel@tonic-gate /*
34127c478bd9Sstevel@tonic-gate * If we're displaying the "global" debug kstat values, we
34137c478bd9Sstevel@tonic-gate * display them as-is to all zones since in fact they apply to
34147c478bd9Sstevel@tonic-gate * the system as a whole.
34157c478bd9Sstevel@tonic-gate */
34167c478bd9Sstevel@tonic-gate bcopy(&clstat_debug, (char *)buf + sizeof (clstat_tmpl),
34177c478bd9Sstevel@tonic-gate sizeof (clstat_debug));
34187c478bd9Sstevel@tonic-gate #endif
34197c478bd9Sstevel@tonic-gate }
34207c478bd9Sstevel@tonic-gate return (0);
34217c478bd9Sstevel@tonic-gate }
34227c478bd9Sstevel@tonic-gate
34237c478bd9Sstevel@tonic-gate static void *
clinit_zone(zoneid_t zoneid)34247c478bd9Sstevel@tonic-gate clinit_zone(zoneid_t zoneid)
34257c478bd9Sstevel@tonic-gate {
34267c478bd9Sstevel@tonic-gate kstat_t *nfs_client_kstat;
34277c478bd9Sstevel@tonic-gate struct nfs_clnt *nfscl;
34287c478bd9Sstevel@tonic-gate uint_t ndata;
34297c478bd9Sstevel@tonic-gate
34307c478bd9Sstevel@tonic-gate nfscl = kmem_alloc(sizeof (*nfscl), KM_SLEEP);
34317c478bd9Sstevel@tonic-gate mutex_init(&nfscl->nfscl_chtable_lock, NULL, MUTEX_DEFAULT, NULL);
34327c478bd9Sstevel@tonic-gate nfscl->nfscl_chtable = NULL;
34337c478bd9Sstevel@tonic-gate nfscl->nfscl_zoneid = zoneid;
34347c478bd9Sstevel@tonic-gate
34357c478bd9Sstevel@tonic-gate bcopy(&clstat_tmpl, &nfscl->nfscl_stat, sizeof (clstat_tmpl));
34367c478bd9Sstevel@tonic-gate ndata = sizeof (clstat_tmpl) / sizeof (kstat_named_t);
34377c478bd9Sstevel@tonic-gate #ifdef DEBUG
34387c478bd9Sstevel@tonic-gate ndata += sizeof (clstat_debug) / sizeof (kstat_named_t);
34397c478bd9Sstevel@tonic-gate #endif
34407c478bd9Sstevel@tonic-gate if ((nfs_client_kstat = kstat_create_zone("nfs", 0, "nfs_client",
34417c478bd9Sstevel@tonic-gate "misc", KSTAT_TYPE_NAMED, ndata,
34427c478bd9Sstevel@tonic-gate KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, zoneid)) != NULL) {
34437c478bd9Sstevel@tonic-gate nfs_client_kstat->ks_private = &nfscl->nfscl_stat;
34447c478bd9Sstevel@tonic-gate nfs_client_kstat->ks_snapshot = cl_snapshot;
34457c478bd9Sstevel@tonic-gate kstat_install(nfs_client_kstat);
34467c478bd9Sstevel@tonic-gate }
34477c478bd9Sstevel@tonic-gate mutex_enter(&nfs_clnt_list_lock);
34487c478bd9Sstevel@tonic-gate list_insert_head(&nfs_clnt_list, nfscl);
34497c478bd9Sstevel@tonic-gate mutex_exit(&nfs_clnt_list_lock);
34507c478bd9Sstevel@tonic-gate return (nfscl);
34517c478bd9Sstevel@tonic-gate }
34527c478bd9Sstevel@tonic-gate
34537c478bd9Sstevel@tonic-gate /*ARGSUSED*/
34547c478bd9Sstevel@tonic-gate static void
clfini_zone(zoneid_t zoneid,void * arg)34557c478bd9Sstevel@tonic-gate clfini_zone(zoneid_t zoneid, void *arg)
34567c478bd9Sstevel@tonic-gate {
34577c478bd9Sstevel@tonic-gate struct nfs_clnt *nfscl = arg;
34587c478bd9Sstevel@tonic-gate chhead_t *chp, *next;
34597c478bd9Sstevel@tonic-gate
34607c478bd9Sstevel@tonic-gate if (nfscl == NULL)
34617c478bd9Sstevel@tonic-gate return;
34627c478bd9Sstevel@tonic-gate mutex_enter(&nfs_clnt_list_lock);
34637c478bd9Sstevel@tonic-gate list_remove(&nfs_clnt_list, nfscl);
34647c478bd9Sstevel@tonic-gate mutex_exit(&nfs_clnt_list_lock);
34657c478bd9Sstevel@tonic-gate clreclaim_zone(nfscl, 0);
34667c478bd9Sstevel@tonic-gate for (chp = nfscl->nfscl_chtable; chp != NULL; chp = next) {
34677c478bd9Sstevel@tonic-gate ASSERT(chp->ch_list == NULL);
34687c478bd9Sstevel@tonic-gate kmem_free(chp->ch_protofmly, strlen(chp->ch_protofmly) + 1);
34697c478bd9Sstevel@tonic-gate next = chp->ch_next;
34707c478bd9Sstevel@tonic-gate kmem_free(chp, sizeof (*chp));
34717c478bd9Sstevel@tonic-gate }
34727c478bd9Sstevel@tonic-gate kstat_delete_byname_zone("nfs", 0, "nfs_client", zoneid);
34737c478bd9Sstevel@tonic-gate mutex_destroy(&nfscl->nfscl_chtable_lock);
34747c478bd9Sstevel@tonic-gate kmem_free(nfscl, sizeof (*nfscl));
34757c478bd9Sstevel@tonic-gate }
34767c478bd9Sstevel@tonic-gate
34777c478bd9Sstevel@tonic-gate /*
34787c478bd9Sstevel@tonic-gate * Called by endpnt_destructor to make sure the client handles are
34797c478bd9Sstevel@tonic-gate * cleaned up before the RPC endpoints. This becomes a no-op if
34807c478bd9Sstevel@tonic-gate * clfini_zone (above) is called first. This function is needed
34817c478bd9Sstevel@tonic-gate * (rather than relying on clfini_zone to clean up) because the ZSD
34827c478bd9Sstevel@tonic-gate * callbacks have no ordering mechanism, so we have no way to ensure
34837c478bd9Sstevel@tonic-gate * that clfini_zone is called before endpnt_destructor.
34847c478bd9Sstevel@tonic-gate */
34857c478bd9Sstevel@tonic-gate void
clcleanup_zone(zoneid_t zoneid)34867c478bd9Sstevel@tonic-gate clcleanup_zone(zoneid_t zoneid)
34877c478bd9Sstevel@tonic-gate {
34887c478bd9Sstevel@tonic-gate struct nfs_clnt *nfscl;
34897c478bd9Sstevel@tonic-gate
34907c478bd9Sstevel@tonic-gate mutex_enter(&nfs_clnt_list_lock);
34917c478bd9Sstevel@tonic-gate nfscl = list_head(&nfs_clnt_list);
34927c478bd9Sstevel@tonic-gate for (; nfscl != NULL; nfscl = list_next(&nfs_clnt_list, nfscl)) {
34937c478bd9Sstevel@tonic-gate if (nfscl->nfscl_zoneid == zoneid) {
34947c478bd9Sstevel@tonic-gate clreclaim_zone(nfscl, 0);
34957c478bd9Sstevel@tonic-gate break;
34967c478bd9Sstevel@tonic-gate }
34977c478bd9Sstevel@tonic-gate }
34987c478bd9Sstevel@tonic-gate mutex_exit(&nfs_clnt_list_lock);
34997c478bd9Sstevel@tonic-gate }
35007c478bd9Sstevel@tonic-gate
35017c478bd9Sstevel@tonic-gate int
nfs_subrinit(void)35027c478bd9Sstevel@tonic-gate nfs_subrinit(void)
35037c478bd9Sstevel@tonic-gate {
35047c478bd9Sstevel@tonic-gate int i;
35057c478bd9Sstevel@tonic-gate ulong_t nrnode_max;
35067c478bd9Sstevel@tonic-gate
35077c478bd9Sstevel@tonic-gate /*
35087c478bd9Sstevel@tonic-gate * Allocate and initialize the rnode hash queues
35097c478bd9Sstevel@tonic-gate */
35107c478bd9Sstevel@tonic-gate if (nrnode <= 0)
35117c478bd9Sstevel@tonic-gate nrnode = ncsize;
35127c478bd9Sstevel@tonic-gate nrnode_max = (ulong_t)((kmem_maxavail() >> 2) / sizeof (struct rnode));
35137c478bd9Sstevel@tonic-gate if (nrnode > nrnode_max || (nrnode == 0 && ncsize == 0)) {
35147c478bd9Sstevel@tonic-gate zcmn_err(GLOBAL_ZONEID, CE_NOTE,
3515f5654033SAlexander Eremin "!setting nrnode to max value of %ld", nrnode_max);
35167c478bd9Sstevel@tonic-gate nrnode = nrnode_max;
35177c478bd9Sstevel@tonic-gate }
35187c478bd9Sstevel@tonic-gate
35197c478bd9Sstevel@tonic-gate rtablesize = 1 << highbit(nrnode / hashlen);
35207c478bd9Sstevel@tonic-gate rtablemask = rtablesize - 1;
35217c478bd9Sstevel@tonic-gate rtable = kmem_alloc(rtablesize * sizeof (*rtable), KM_SLEEP);
35227c478bd9Sstevel@tonic-gate for (i = 0; i < rtablesize; i++) {
35237c478bd9Sstevel@tonic-gate rtable[i].r_hashf = (rnode_t *)(&rtable[i]);
35247c478bd9Sstevel@tonic-gate rtable[i].r_hashb = (rnode_t *)(&rtable[i]);
35257c478bd9Sstevel@tonic-gate rw_init(&rtable[i].r_lock, NULL, RW_DEFAULT, NULL);
35267c478bd9Sstevel@tonic-gate }
35277c478bd9Sstevel@tonic-gate rnode_cache = kmem_cache_create("rnode_cache", sizeof (rnode_t),
35287c478bd9Sstevel@tonic-gate 0, NULL, NULL, nfs_reclaim, NULL, NULL, 0);
35297c478bd9Sstevel@tonic-gate
35307c478bd9Sstevel@tonic-gate /*
35317c478bd9Sstevel@tonic-gate * Allocate and initialize the access cache
35327c478bd9Sstevel@tonic-gate */
35337c478bd9Sstevel@tonic-gate
35347c478bd9Sstevel@tonic-gate /*
35357c478bd9Sstevel@tonic-gate * Initial guess is one access cache entry per rnode unless
35367c478bd9Sstevel@tonic-gate * nacache is set to a non-zero value and then it is used to
35377c478bd9Sstevel@tonic-gate * indicate a guess at the number of access cache entries.
35387c478bd9Sstevel@tonic-gate */
35397c478bd9Sstevel@tonic-gate if (nacache > 0)
35407c478bd9Sstevel@tonic-gate acachesize = 1 << highbit(nacache / hashlen);
35417c478bd9Sstevel@tonic-gate else
35427c478bd9Sstevel@tonic-gate acachesize = rtablesize;
35437c478bd9Sstevel@tonic-gate acachemask = acachesize - 1;
35447c478bd9Sstevel@tonic-gate acache = kmem_alloc(acachesize * sizeof (*acache), KM_SLEEP);
35457c478bd9Sstevel@tonic-gate for (i = 0; i < acachesize; i++) {
35467c478bd9Sstevel@tonic-gate acache[i].next = (acache_t *)&acache[i];
35477c478bd9Sstevel@tonic-gate acache[i].prev = (acache_t *)&acache[i];
35487c478bd9Sstevel@tonic-gate rw_init(&acache[i].lock, NULL, RW_DEFAULT, NULL);
35497c478bd9Sstevel@tonic-gate }
35507c478bd9Sstevel@tonic-gate acache_cache = kmem_cache_create("nfs_access_cache",
35517c478bd9Sstevel@tonic-gate sizeof (acache_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
35527c478bd9Sstevel@tonic-gate /*
35537c478bd9Sstevel@tonic-gate * Allocate and initialize the client handle cache
35547c478bd9Sstevel@tonic-gate */
35557c478bd9Sstevel@tonic-gate chtab_cache = kmem_cache_create("client_handle_cache",
35567106075aSmarks sizeof (struct chtab), 0, NULL, NULL, clreclaim, NULL, NULL, 0);
35577c478bd9Sstevel@tonic-gate /*
35587c478bd9Sstevel@tonic-gate * Initialize the list of per-zone client handles (and associated data).
35597c478bd9Sstevel@tonic-gate * This needs to be done before we call zone_key_create().
35607c478bd9Sstevel@tonic-gate */
35617c478bd9Sstevel@tonic-gate list_create(&nfs_clnt_list, sizeof (struct nfs_clnt),
35627c478bd9Sstevel@tonic-gate offsetof(struct nfs_clnt, nfscl_node));
35637c478bd9Sstevel@tonic-gate /*
35647c478bd9Sstevel@tonic-gate * Initialize the zone_key for per-zone client handle lists.
35657c478bd9Sstevel@tonic-gate */
35667c478bd9Sstevel@tonic-gate zone_key_create(&nfsclnt_zone_key, clinit_zone, NULL, clfini_zone);
35677c478bd9Sstevel@tonic-gate /*
35687c478bd9Sstevel@tonic-gate * Initialize the various mutexes and reader/writer locks
35697c478bd9Sstevel@tonic-gate */
35707c478bd9Sstevel@tonic-gate mutex_init(&rpfreelist_lock, NULL, MUTEX_DEFAULT, NULL);
35717c478bd9Sstevel@tonic-gate mutex_init(&newnum_lock, NULL, MUTEX_DEFAULT, NULL);
35727c478bd9Sstevel@tonic-gate mutex_init(&nfs_minor_lock, NULL, MUTEX_DEFAULT, NULL);
35737c478bd9Sstevel@tonic-gate
35747c478bd9Sstevel@tonic-gate /*
35757c478bd9Sstevel@tonic-gate * Assign unique major number for all nfs mounts
35767c478bd9Sstevel@tonic-gate */
35777c478bd9Sstevel@tonic-gate if ((nfs_major = getudev()) == -1) {
35787c478bd9Sstevel@tonic-gate zcmn_err(GLOBAL_ZONEID, CE_WARN,
35797c478bd9Sstevel@tonic-gate "nfs: init: can't get unique device number");
35807c478bd9Sstevel@tonic-gate nfs_major = 0;
35817c478bd9Sstevel@tonic-gate }
35827c478bd9Sstevel@tonic-gate nfs_minor = 0;
35837c478bd9Sstevel@tonic-gate
35847c478bd9Sstevel@tonic-gate if (nfs3_jukebox_delay == 0)
35857c478bd9Sstevel@tonic-gate nfs3_jukebox_delay = NFS3_JUKEBOX_DELAY;
35867c478bd9Sstevel@tonic-gate
35877c478bd9Sstevel@tonic-gate return (0);
35887c478bd9Sstevel@tonic-gate }
35897c478bd9Sstevel@tonic-gate
35907c478bd9Sstevel@tonic-gate void
nfs_subrfini(void)35917c478bd9Sstevel@tonic-gate nfs_subrfini(void)
35927c478bd9Sstevel@tonic-gate {
35937c478bd9Sstevel@tonic-gate int i;
35947c478bd9Sstevel@tonic-gate
35957c478bd9Sstevel@tonic-gate /*
35967c478bd9Sstevel@tonic-gate * Deallocate the rnode hash queues
35977c478bd9Sstevel@tonic-gate */
35987c478bd9Sstevel@tonic-gate kmem_cache_destroy(rnode_cache);
35997c478bd9Sstevel@tonic-gate
36007c478bd9Sstevel@tonic-gate for (i = 0; i < rtablesize; i++)
36017c478bd9Sstevel@tonic-gate rw_destroy(&rtable[i].r_lock);
36027c478bd9Sstevel@tonic-gate kmem_free(rtable, rtablesize * sizeof (*rtable));
36037c478bd9Sstevel@tonic-gate
36047c478bd9Sstevel@tonic-gate /*
36057c478bd9Sstevel@tonic-gate * Deallocated the access cache
36067c478bd9Sstevel@tonic-gate */
36077c478bd9Sstevel@tonic-gate kmem_cache_destroy(acache_cache);
36087c478bd9Sstevel@tonic-gate
36097c478bd9Sstevel@tonic-gate for (i = 0; i < acachesize; i++)
36107c478bd9Sstevel@tonic-gate rw_destroy(&acache[i].lock);
36117c478bd9Sstevel@tonic-gate kmem_free(acache, acachesize * sizeof (*acache));
36127c478bd9Sstevel@tonic-gate
36137c478bd9Sstevel@tonic-gate /*
36147c478bd9Sstevel@tonic-gate * Deallocate the client handle cache
36157c478bd9Sstevel@tonic-gate */
36167c478bd9Sstevel@tonic-gate kmem_cache_destroy(chtab_cache);
36177c478bd9Sstevel@tonic-gate
36187c478bd9Sstevel@tonic-gate /*
36197c478bd9Sstevel@tonic-gate * Destroy the various mutexes and reader/writer locks
36207c478bd9Sstevel@tonic-gate */
36217c478bd9Sstevel@tonic-gate mutex_destroy(&rpfreelist_lock);
36227c478bd9Sstevel@tonic-gate mutex_destroy(&newnum_lock);
36237c478bd9Sstevel@tonic-gate mutex_destroy(&nfs_minor_lock);
36247c478bd9Sstevel@tonic-gate (void) zone_key_delete(nfsclnt_zone_key);
36257c478bd9Sstevel@tonic-gate }
36267c478bd9Sstevel@tonic-gate
36277c478bd9Sstevel@tonic-gate enum nfsstat
puterrno(int error)36287c478bd9Sstevel@tonic-gate puterrno(int error)
36297c478bd9Sstevel@tonic-gate {
36307c478bd9Sstevel@tonic-gate
36317c478bd9Sstevel@tonic-gate switch (error) {
36327c478bd9Sstevel@tonic-gate case EOPNOTSUPP:
36337c478bd9Sstevel@tonic-gate return (NFSERR_OPNOTSUPP);
36347c478bd9Sstevel@tonic-gate case ENAMETOOLONG:
36357c478bd9Sstevel@tonic-gate return (NFSERR_NAMETOOLONG);
36367c478bd9Sstevel@tonic-gate case ENOTEMPTY:
36377c478bd9Sstevel@tonic-gate return (NFSERR_NOTEMPTY);
36387c478bd9Sstevel@tonic-gate case EDQUOT:
36397c478bd9Sstevel@tonic-gate return (NFSERR_DQUOT);
36407c478bd9Sstevel@tonic-gate case ESTALE:
36417c478bd9Sstevel@tonic-gate return (NFSERR_STALE);
36427c478bd9Sstevel@tonic-gate case EREMOTE:
36437c478bd9Sstevel@tonic-gate return (NFSERR_REMOTE);
36447c478bd9Sstevel@tonic-gate case ENOSYS:
36457c478bd9Sstevel@tonic-gate return (NFSERR_OPNOTSUPP);
36467c478bd9Sstevel@tonic-gate case EOVERFLOW:
36477c478bd9Sstevel@tonic-gate return (NFSERR_INVAL);
36487c478bd9Sstevel@tonic-gate default:
36497c478bd9Sstevel@tonic-gate return ((enum nfsstat)error);
36507c478bd9Sstevel@tonic-gate }
36517c478bd9Sstevel@tonic-gate /* NOTREACHED */
36527c478bd9Sstevel@tonic-gate }
36537c478bd9Sstevel@tonic-gate
36547c478bd9Sstevel@tonic-gate int
geterrno(enum nfsstat status)36557c478bd9Sstevel@tonic-gate geterrno(enum nfsstat status)
36567c478bd9Sstevel@tonic-gate {
36577c478bd9Sstevel@tonic-gate
36587c478bd9Sstevel@tonic-gate switch (status) {
36597c478bd9Sstevel@tonic-gate case NFSERR_OPNOTSUPP:
36607c478bd9Sstevel@tonic-gate return (EOPNOTSUPP);
36617c478bd9Sstevel@tonic-gate case NFSERR_NAMETOOLONG:
36627c478bd9Sstevel@tonic-gate return (ENAMETOOLONG);
36637c478bd9Sstevel@tonic-gate case NFSERR_NOTEMPTY:
36647c478bd9Sstevel@tonic-gate return (ENOTEMPTY);
36657c478bd9Sstevel@tonic-gate case NFSERR_DQUOT:
36667c478bd9Sstevel@tonic-gate return (EDQUOT);
36677c478bd9Sstevel@tonic-gate case NFSERR_STALE:
36687c478bd9Sstevel@tonic-gate return (ESTALE);
36697c478bd9Sstevel@tonic-gate case NFSERR_REMOTE:
36707c478bd9Sstevel@tonic-gate return (EREMOTE);
36717c478bd9Sstevel@tonic-gate case NFSERR_WFLUSH:
36727c478bd9Sstevel@tonic-gate return (EIO);
36737c478bd9Sstevel@tonic-gate default:
36747c478bd9Sstevel@tonic-gate return ((int)status);
36757c478bd9Sstevel@tonic-gate }
36767c478bd9Sstevel@tonic-gate /* NOTREACHED */
36777c478bd9Sstevel@tonic-gate }
36787c478bd9Sstevel@tonic-gate
36797c478bd9Sstevel@tonic-gate enum nfsstat3
puterrno3(int error)36807c478bd9Sstevel@tonic-gate puterrno3(int error)
36817c478bd9Sstevel@tonic-gate {
36827c478bd9Sstevel@tonic-gate
36837c478bd9Sstevel@tonic-gate #ifdef DEBUG
36847c478bd9Sstevel@tonic-gate switch (error) {
36857c478bd9Sstevel@tonic-gate case 0:
36867c478bd9Sstevel@tonic-gate return (NFS3_OK);
36877c478bd9Sstevel@tonic-gate case EPERM:
36887c478bd9Sstevel@tonic-gate return (NFS3ERR_PERM);
36897c478bd9Sstevel@tonic-gate case ENOENT:
36907c478bd9Sstevel@tonic-gate return (NFS3ERR_NOENT);
36917c478bd9Sstevel@tonic-gate case EIO:
36927c478bd9Sstevel@tonic-gate return (NFS3ERR_IO);
36937c478bd9Sstevel@tonic-gate case ENXIO:
36947c478bd9Sstevel@tonic-gate return (NFS3ERR_NXIO);
36957c478bd9Sstevel@tonic-gate case EACCES:
36967c478bd9Sstevel@tonic-gate return (NFS3ERR_ACCES);
36977c478bd9Sstevel@tonic-gate case EEXIST:
36987c478bd9Sstevel@tonic-gate return (NFS3ERR_EXIST);
36997c478bd9Sstevel@tonic-gate case EXDEV:
37007c478bd9Sstevel@tonic-gate return (NFS3ERR_XDEV);
37017c478bd9Sstevel@tonic-gate case ENODEV:
37027c478bd9Sstevel@tonic-gate return (NFS3ERR_NODEV);
37037c478bd9Sstevel@tonic-gate case ENOTDIR:
37047c478bd9Sstevel@tonic-gate return (NFS3ERR_NOTDIR);
37057c478bd9Sstevel@tonic-gate case EISDIR:
37067c478bd9Sstevel@tonic-gate return (NFS3ERR_ISDIR);
37077c478bd9Sstevel@tonic-gate case EINVAL:
37087c478bd9Sstevel@tonic-gate return (NFS3ERR_INVAL);
37097c478bd9Sstevel@tonic-gate case EFBIG:
37107c478bd9Sstevel@tonic-gate return (NFS3ERR_FBIG);
37117c478bd9Sstevel@tonic-gate case ENOSPC:
37127c478bd9Sstevel@tonic-gate return (NFS3ERR_NOSPC);
37137c478bd9Sstevel@tonic-gate case EROFS:
37147c478bd9Sstevel@tonic-gate return (NFS3ERR_ROFS);
37157c478bd9Sstevel@tonic-gate case EMLINK:
37167c478bd9Sstevel@tonic-gate return (NFS3ERR_MLINK);
37177c478bd9Sstevel@tonic-gate case ENAMETOOLONG:
37187c478bd9Sstevel@tonic-gate return (NFS3ERR_NAMETOOLONG);
37197c478bd9Sstevel@tonic-gate case ENOTEMPTY:
37207c478bd9Sstevel@tonic-gate return (NFS3ERR_NOTEMPTY);
37217c478bd9Sstevel@tonic-gate case EDQUOT:
37227c478bd9Sstevel@tonic-gate return (NFS3ERR_DQUOT);
37237c478bd9Sstevel@tonic-gate case ESTALE:
37247c478bd9Sstevel@tonic-gate return (NFS3ERR_STALE);
37257c478bd9Sstevel@tonic-gate case EREMOTE:
37267c478bd9Sstevel@tonic-gate return (NFS3ERR_REMOTE);
37277106075aSmarks case ENOSYS:
37287c478bd9Sstevel@tonic-gate case EOPNOTSUPP:
37297c478bd9Sstevel@tonic-gate return (NFS3ERR_NOTSUPP);
37307c478bd9Sstevel@tonic-gate case EOVERFLOW:
37317c478bd9Sstevel@tonic-gate return (NFS3ERR_INVAL);
37327c478bd9Sstevel@tonic-gate default:
37337c478bd9Sstevel@tonic-gate zcmn_err(getzoneid(), CE_WARN,
37347c478bd9Sstevel@tonic-gate "puterrno3: got error %d", error);
37357c478bd9Sstevel@tonic-gate return ((enum nfsstat3)error);
37367c478bd9Sstevel@tonic-gate }
37377c478bd9Sstevel@tonic-gate #else
37387c478bd9Sstevel@tonic-gate switch (error) {
37397c478bd9Sstevel@tonic-gate case ENAMETOOLONG:
37407c478bd9Sstevel@tonic-gate return (NFS3ERR_NAMETOOLONG);
37417c478bd9Sstevel@tonic-gate case ENOTEMPTY:
37427c478bd9Sstevel@tonic-gate return (NFS3ERR_NOTEMPTY);
37437c478bd9Sstevel@tonic-gate case EDQUOT:
37447c478bd9Sstevel@tonic-gate return (NFS3ERR_DQUOT);
37457c478bd9Sstevel@tonic-gate case ESTALE:
37467c478bd9Sstevel@tonic-gate return (NFS3ERR_STALE);
37477106075aSmarks case ENOSYS:
37487c478bd9Sstevel@tonic-gate case EOPNOTSUPP:
37497c478bd9Sstevel@tonic-gate return (NFS3ERR_NOTSUPP);
37507c478bd9Sstevel@tonic-gate case EREMOTE:
37517c478bd9Sstevel@tonic-gate return (NFS3ERR_REMOTE);
37527c478bd9Sstevel@tonic-gate case EOVERFLOW:
37537c478bd9Sstevel@tonic-gate return (NFS3ERR_INVAL);
37547c478bd9Sstevel@tonic-gate default:
37557c478bd9Sstevel@tonic-gate return ((enum nfsstat3)error);
37567c478bd9Sstevel@tonic-gate }
37577c478bd9Sstevel@tonic-gate #endif
37587c478bd9Sstevel@tonic-gate }
37597c478bd9Sstevel@tonic-gate
37607c478bd9Sstevel@tonic-gate int
geterrno3(enum nfsstat3 status)37617c478bd9Sstevel@tonic-gate geterrno3(enum nfsstat3 status)
37627c478bd9Sstevel@tonic-gate {
37637c478bd9Sstevel@tonic-gate
37647c478bd9Sstevel@tonic-gate #ifdef DEBUG
37657c478bd9Sstevel@tonic-gate switch (status) {
37667c478bd9Sstevel@tonic-gate case NFS3_OK:
37677c478bd9Sstevel@tonic-gate return (0);
37687c478bd9Sstevel@tonic-gate case NFS3ERR_PERM:
37697c478bd9Sstevel@tonic-gate return (EPERM);
37707c478bd9Sstevel@tonic-gate case NFS3ERR_NOENT:
37717c478bd9Sstevel@tonic-gate return (ENOENT);
37727c478bd9Sstevel@tonic-gate case NFS3ERR_IO:
37737c478bd9Sstevel@tonic-gate return (EIO);
37747c478bd9Sstevel@tonic-gate case NFS3ERR_NXIO:
37757c478bd9Sstevel@tonic-gate return (ENXIO);
37767c478bd9Sstevel@tonic-gate case NFS3ERR_ACCES:
37777c478bd9Sstevel@tonic-gate return (EACCES);
37787c478bd9Sstevel@tonic-gate case NFS3ERR_EXIST:
37797c478bd9Sstevel@tonic-gate return (EEXIST);
37807c478bd9Sstevel@tonic-gate case NFS3ERR_XDEV:
37817c478bd9Sstevel@tonic-gate return (EXDEV);
37827c478bd9Sstevel@tonic-gate case NFS3ERR_NODEV:
37837c478bd9Sstevel@tonic-gate return (ENODEV);
37847c478bd9Sstevel@tonic-gate case NFS3ERR_NOTDIR:
37857c478bd9Sstevel@tonic-gate return (ENOTDIR);
37867c478bd9Sstevel@tonic-gate case NFS3ERR_ISDIR:
37877c478bd9Sstevel@tonic-gate return (EISDIR);
37887c478bd9Sstevel@tonic-gate case NFS3ERR_INVAL:
37897c478bd9Sstevel@tonic-gate return (EINVAL);
37907c478bd9Sstevel@tonic-gate case NFS3ERR_FBIG:
37917c478bd9Sstevel@tonic-gate return (EFBIG);
37927c478bd9Sstevel@tonic-gate case NFS3ERR_NOSPC:
37937c478bd9Sstevel@tonic-gate return (ENOSPC);
37947c478bd9Sstevel@tonic-gate case NFS3ERR_ROFS:
37957c478bd9Sstevel@tonic-gate return (EROFS);
37967c478bd9Sstevel@tonic-gate case NFS3ERR_MLINK:
37977c478bd9Sstevel@tonic-gate return (EMLINK);
37987c478bd9Sstevel@tonic-gate case NFS3ERR_NAMETOOLONG:
37997c478bd9Sstevel@tonic-gate return (ENAMETOOLONG);
38007c478bd9Sstevel@tonic-gate case NFS3ERR_NOTEMPTY:
38017c478bd9Sstevel@tonic-gate return (ENOTEMPTY);
38027c478bd9Sstevel@tonic-gate case NFS3ERR_DQUOT:
38037c478bd9Sstevel@tonic-gate return (EDQUOT);
38047c478bd9Sstevel@tonic-gate case NFS3ERR_STALE:
38057c478bd9Sstevel@tonic-gate return (ESTALE);
38067c478bd9Sstevel@tonic-gate case NFS3ERR_REMOTE:
38077c478bd9Sstevel@tonic-gate return (EREMOTE);
38087c478bd9Sstevel@tonic-gate case NFS3ERR_BADHANDLE:
38097c478bd9Sstevel@tonic-gate return (ESTALE);
38107c478bd9Sstevel@tonic-gate case NFS3ERR_NOT_SYNC:
38117c478bd9Sstevel@tonic-gate return (EINVAL);
38127c478bd9Sstevel@tonic-gate case NFS3ERR_BAD_COOKIE:
38137c478bd9Sstevel@tonic-gate return (ENOENT);
38147c478bd9Sstevel@tonic-gate case NFS3ERR_NOTSUPP:
38157c478bd9Sstevel@tonic-gate return (EOPNOTSUPP);
38167c478bd9Sstevel@tonic-gate case NFS3ERR_TOOSMALL:
38177c478bd9Sstevel@tonic-gate return (EINVAL);
38187c478bd9Sstevel@tonic-gate case NFS3ERR_SERVERFAULT:
38197c478bd9Sstevel@tonic-gate return (EIO);
38207c478bd9Sstevel@tonic-gate case NFS3ERR_BADTYPE:
38217c478bd9Sstevel@tonic-gate return (EINVAL);
38227c478bd9Sstevel@tonic-gate case NFS3ERR_JUKEBOX:
38237c478bd9Sstevel@tonic-gate return (ENXIO);
38247c478bd9Sstevel@tonic-gate default:
38257c478bd9Sstevel@tonic-gate zcmn_err(getzoneid(), CE_WARN,
38267c478bd9Sstevel@tonic-gate "geterrno3: got status %d", status);
38277c478bd9Sstevel@tonic-gate return ((int)status);
38287c478bd9Sstevel@tonic-gate }
38297c478bd9Sstevel@tonic-gate #else
38307c478bd9Sstevel@tonic-gate switch (status) {
38317c478bd9Sstevel@tonic-gate case NFS3ERR_NAMETOOLONG:
38327c478bd9Sstevel@tonic-gate return (ENAMETOOLONG);
38337c478bd9Sstevel@tonic-gate case NFS3ERR_NOTEMPTY:
38347c478bd9Sstevel@tonic-gate return (ENOTEMPTY);
38357c478bd9Sstevel@tonic-gate case NFS3ERR_DQUOT:
38367c478bd9Sstevel@tonic-gate return (EDQUOT);
38377c478bd9Sstevel@tonic-gate case NFS3ERR_STALE:
38387c478bd9Sstevel@tonic-gate case NFS3ERR_BADHANDLE:
38397c478bd9Sstevel@tonic-gate return (ESTALE);
38407c478bd9Sstevel@tonic-gate case NFS3ERR_NOTSUPP:
38417c478bd9Sstevel@tonic-gate return (EOPNOTSUPP);
38427c478bd9Sstevel@tonic-gate case NFS3ERR_REMOTE:
38437c478bd9Sstevel@tonic-gate return (EREMOTE);
38447c478bd9Sstevel@tonic-gate case NFS3ERR_NOT_SYNC:
38457c478bd9Sstevel@tonic-gate case NFS3ERR_TOOSMALL:
38467c478bd9Sstevel@tonic-gate case NFS3ERR_BADTYPE:
38477c478bd9Sstevel@tonic-gate return (EINVAL);
38487c478bd9Sstevel@tonic-gate case NFS3ERR_BAD_COOKIE:
38497c478bd9Sstevel@tonic-gate return (ENOENT);
38507c478bd9Sstevel@tonic-gate case NFS3ERR_SERVERFAULT:
38517c478bd9Sstevel@tonic-gate return (EIO);
38527c478bd9Sstevel@tonic-gate case NFS3ERR_JUKEBOX:
38537c478bd9Sstevel@tonic-gate return (ENXIO);
38547c478bd9Sstevel@tonic-gate default:
38557c478bd9Sstevel@tonic-gate return ((int)status);
38567c478bd9Sstevel@tonic-gate }
38577c478bd9Sstevel@tonic-gate #endif
38587c478bd9Sstevel@tonic-gate }
38597c478bd9Sstevel@tonic-gate
38607c478bd9Sstevel@tonic-gate rddir_cache *
rddir_cache_alloc(int flags)38617c478bd9Sstevel@tonic-gate rddir_cache_alloc(int flags)
38627c478bd9Sstevel@tonic-gate {
38637c478bd9Sstevel@tonic-gate rddir_cache *rc;
38647c478bd9Sstevel@tonic-gate
38657c478bd9Sstevel@tonic-gate rc = kmem_alloc(sizeof (*rc), flags);
38667c478bd9Sstevel@tonic-gate if (rc != NULL) {
38677c478bd9Sstevel@tonic-gate rc->entries = NULL;
38687c478bd9Sstevel@tonic-gate rc->flags = RDDIR;
38697c478bd9Sstevel@tonic-gate cv_init(&rc->cv, NULL, CV_DEFAULT, NULL);
38707c478bd9Sstevel@tonic-gate mutex_init(&rc->lock, NULL, MUTEX_DEFAULT, NULL);
38717c478bd9Sstevel@tonic-gate rc->count = 1;
38727c478bd9Sstevel@tonic-gate #ifdef DEBUG
38731a5e258fSJosef 'Jeff' Sipek atomic_inc_64(&clstat_debug.dirent.value.ui64);
38747c478bd9Sstevel@tonic-gate #endif
38757c478bd9Sstevel@tonic-gate }
38767c478bd9Sstevel@tonic-gate return (rc);
38777c478bd9Sstevel@tonic-gate }
38787c478bd9Sstevel@tonic-gate
38797c478bd9Sstevel@tonic-gate static void
rddir_cache_free(rddir_cache * rc)38807c478bd9Sstevel@tonic-gate rddir_cache_free(rddir_cache *rc)
38817c478bd9Sstevel@tonic-gate {
38827c478bd9Sstevel@tonic-gate
38837c478bd9Sstevel@tonic-gate #ifdef DEBUG
38841a5e258fSJosef 'Jeff' Sipek atomic_dec_64(&clstat_debug.dirent.value.ui64);
38857c478bd9Sstevel@tonic-gate #endif
38867c478bd9Sstevel@tonic-gate if (rc->entries != NULL) {
38877c478bd9Sstevel@tonic-gate #ifdef DEBUG
38887c478bd9Sstevel@tonic-gate rddir_cache_buf_free(rc->entries, rc->buflen);
38897c478bd9Sstevel@tonic-gate #else
38907c478bd9Sstevel@tonic-gate kmem_free(rc->entries, rc->buflen);
38917c478bd9Sstevel@tonic-gate #endif
38927c478bd9Sstevel@tonic-gate }
38937c478bd9Sstevel@tonic-gate cv_destroy(&rc->cv);
38947c478bd9Sstevel@tonic-gate mutex_destroy(&rc->lock);
38957c478bd9Sstevel@tonic-gate kmem_free(rc, sizeof (*rc));
38967c478bd9Sstevel@tonic-gate }
38977c478bd9Sstevel@tonic-gate
38987c478bd9Sstevel@tonic-gate void
rddir_cache_hold(rddir_cache * rc)38997c478bd9Sstevel@tonic-gate rddir_cache_hold(rddir_cache *rc)
39007c478bd9Sstevel@tonic-gate {
39017c478bd9Sstevel@tonic-gate
39027c478bd9Sstevel@tonic-gate mutex_enter(&rc->lock);
39037c478bd9Sstevel@tonic-gate rc->count++;
39047c478bd9Sstevel@tonic-gate mutex_exit(&rc->lock);
39057c478bd9Sstevel@tonic-gate }
39067c478bd9Sstevel@tonic-gate
39077c478bd9Sstevel@tonic-gate void
rddir_cache_rele(rddir_cache * rc)39087c478bd9Sstevel@tonic-gate rddir_cache_rele(rddir_cache *rc)
39097c478bd9Sstevel@tonic-gate {
39107c478bd9Sstevel@tonic-gate
39117c478bd9Sstevel@tonic-gate mutex_enter(&rc->lock);
39127c478bd9Sstevel@tonic-gate ASSERT(rc->count > 0);
39137c478bd9Sstevel@tonic-gate if (--rc->count == 0) {
39147c478bd9Sstevel@tonic-gate mutex_exit(&rc->lock);
39157c478bd9Sstevel@tonic-gate rddir_cache_free(rc);
39167c478bd9Sstevel@tonic-gate } else
39177c478bd9Sstevel@tonic-gate mutex_exit(&rc->lock);
39187c478bd9Sstevel@tonic-gate }
39197c478bd9Sstevel@tonic-gate
39207c478bd9Sstevel@tonic-gate #ifdef DEBUG
39217c478bd9Sstevel@tonic-gate char *
rddir_cache_buf_alloc(size_t size,int flags)39227c478bd9Sstevel@tonic-gate rddir_cache_buf_alloc(size_t size, int flags)
39237c478bd9Sstevel@tonic-gate {
39247c478bd9Sstevel@tonic-gate char *rc;
39257c478bd9Sstevel@tonic-gate
39267c478bd9Sstevel@tonic-gate rc = kmem_alloc(size, flags);
39277c478bd9Sstevel@tonic-gate if (rc != NULL)
39287c478bd9Sstevel@tonic-gate atomic_add_64(&clstat_debug.dirents.value.ui64, size);
39297c478bd9Sstevel@tonic-gate return (rc);
39307c478bd9Sstevel@tonic-gate }
39317c478bd9Sstevel@tonic-gate
39327c478bd9Sstevel@tonic-gate void
rddir_cache_buf_free(void * addr,size_t size)39337c478bd9Sstevel@tonic-gate rddir_cache_buf_free(void *addr, size_t size)
39347c478bd9Sstevel@tonic-gate {
39357c478bd9Sstevel@tonic-gate
39367c478bd9Sstevel@tonic-gate atomic_add_64(&clstat_debug.dirents.value.ui64, -(int64_t)size);
39377c478bd9Sstevel@tonic-gate kmem_free(addr, size);
39387c478bd9Sstevel@tonic-gate }
39397c478bd9Sstevel@tonic-gate #endif
39407c478bd9Sstevel@tonic-gate
39417c478bd9Sstevel@tonic-gate static int
nfs_free_data_reclaim(rnode_t * rp)39427c478bd9Sstevel@tonic-gate nfs_free_data_reclaim(rnode_t *rp)
39437c478bd9Sstevel@tonic-gate {
39447c478bd9Sstevel@tonic-gate char *contents;
39457c478bd9Sstevel@tonic-gate int size;
39467c478bd9Sstevel@tonic-gate vsecattr_t *vsp;
39477c478bd9Sstevel@tonic-gate nfs3_pathconf_info *info;
39487c478bd9Sstevel@tonic-gate int freed;
39497c478bd9Sstevel@tonic-gate cred_t *cred;
39507c478bd9Sstevel@tonic-gate
39517c478bd9Sstevel@tonic-gate /*
39527c478bd9Sstevel@tonic-gate * Free any held credentials and caches which
39537c478bd9Sstevel@tonic-gate * may be associated with this rnode.
39547c478bd9Sstevel@tonic-gate */
39557c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
39567c478bd9Sstevel@tonic-gate cred = rp->r_cred;
39577c478bd9Sstevel@tonic-gate rp->r_cred = NULL;
39587c478bd9Sstevel@tonic-gate contents = rp->r_symlink.contents;
39597c478bd9Sstevel@tonic-gate size = rp->r_symlink.size;
39607c478bd9Sstevel@tonic-gate rp->r_symlink.contents = NULL;
39617c478bd9Sstevel@tonic-gate vsp = rp->r_secattr;
39627c478bd9Sstevel@tonic-gate rp->r_secattr = NULL;
39637c478bd9Sstevel@tonic-gate info = rp->r_pathconf;
39647c478bd9Sstevel@tonic-gate rp->r_pathconf = NULL;
39657c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
39667c478bd9Sstevel@tonic-gate
39677c478bd9Sstevel@tonic-gate if (cred != NULL)
39687c478bd9Sstevel@tonic-gate crfree(cred);
39697c478bd9Sstevel@tonic-gate
39707c478bd9Sstevel@tonic-gate /*
39717c478bd9Sstevel@tonic-gate * Free the access cache entries.
39727c478bd9Sstevel@tonic-gate */
39737c478bd9Sstevel@tonic-gate freed = nfs_access_purge_rp(rp);
39747c478bd9Sstevel@tonic-gate
39757c478bd9Sstevel@tonic-gate if (!HAVE_RDDIR_CACHE(rp) &&
39767c478bd9Sstevel@tonic-gate contents == NULL &&
39777c478bd9Sstevel@tonic-gate vsp == NULL &&
39787c478bd9Sstevel@tonic-gate info == NULL)
39797c478bd9Sstevel@tonic-gate return (freed);
39807c478bd9Sstevel@tonic-gate
39817c478bd9Sstevel@tonic-gate /*
39827c478bd9Sstevel@tonic-gate * Free the readdir cache entries
39837c478bd9Sstevel@tonic-gate */
39847c478bd9Sstevel@tonic-gate if (HAVE_RDDIR_CACHE(rp))
39857c478bd9Sstevel@tonic-gate nfs_purge_rddir_cache(RTOV(rp));
39867c478bd9Sstevel@tonic-gate
39877c478bd9Sstevel@tonic-gate /*
39887c478bd9Sstevel@tonic-gate * Free the symbolic link cache.
39897c478bd9Sstevel@tonic-gate */
39907c478bd9Sstevel@tonic-gate if (contents != NULL) {
39917c478bd9Sstevel@tonic-gate
39927c478bd9Sstevel@tonic-gate kmem_free((void *)contents, size);
39937c478bd9Sstevel@tonic-gate }
39947c478bd9Sstevel@tonic-gate
39957c478bd9Sstevel@tonic-gate /*
39967c478bd9Sstevel@tonic-gate * Free any cached ACL.
39977c478bd9Sstevel@tonic-gate */
39987c478bd9Sstevel@tonic-gate if (vsp != NULL)
39997c478bd9Sstevel@tonic-gate nfs_acl_free(vsp);
40007c478bd9Sstevel@tonic-gate
40017c478bd9Sstevel@tonic-gate /*
40027c478bd9Sstevel@tonic-gate * Free any cached pathconf information.
40037c478bd9Sstevel@tonic-gate */
40047c478bd9Sstevel@tonic-gate if (info != NULL)
40057c478bd9Sstevel@tonic-gate kmem_free(info, sizeof (*info));
40067c478bd9Sstevel@tonic-gate
40077c478bd9Sstevel@tonic-gate return (1);
40087c478bd9Sstevel@tonic-gate }
40097c478bd9Sstevel@tonic-gate
40107c478bd9Sstevel@tonic-gate static int
nfs_active_data_reclaim(rnode_t * rp)40117c478bd9Sstevel@tonic-gate nfs_active_data_reclaim(rnode_t *rp)
40127c478bd9Sstevel@tonic-gate {
40137c478bd9Sstevel@tonic-gate char *contents;
40147c478bd9Sstevel@tonic-gate int size;
40157c478bd9Sstevel@tonic-gate vsecattr_t *vsp;
40167c478bd9Sstevel@tonic-gate nfs3_pathconf_info *info;
40177c478bd9Sstevel@tonic-gate int freed;
40187c478bd9Sstevel@tonic-gate
40197c478bd9Sstevel@tonic-gate /*
40207c478bd9Sstevel@tonic-gate * Free any held credentials and caches which
40217c478bd9Sstevel@tonic-gate * may be associated with this rnode.
40227c478bd9Sstevel@tonic-gate */
40237c478bd9Sstevel@tonic-gate if (!mutex_tryenter(&rp->r_statelock))
40247c478bd9Sstevel@tonic-gate return (0);
40257c478bd9Sstevel@tonic-gate contents = rp->r_symlink.contents;
40267c478bd9Sstevel@tonic-gate size = rp->r_symlink.size;
40277c478bd9Sstevel@tonic-gate rp->r_symlink.contents = NULL;
40287c478bd9Sstevel@tonic-gate vsp = rp->r_secattr;
40297c478bd9Sstevel@tonic-gate rp->r_secattr = NULL;
40307c478bd9Sstevel@tonic-gate info = rp->r_pathconf;
40317c478bd9Sstevel@tonic-gate rp->r_pathconf = NULL;
40327c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
40337c478bd9Sstevel@tonic-gate
40347c478bd9Sstevel@tonic-gate /*
40357c478bd9Sstevel@tonic-gate * Free the access cache entries.
40367c478bd9Sstevel@tonic-gate */
40377c478bd9Sstevel@tonic-gate freed = nfs_access_purge_rp(rp);
40387c478bd9Sstevel@tonic-gate
40397c478bd9Sstevel@tonic-gate if (!HAVE_RDDIR_CACHE(rp) &&
40407c478bd9Sstevel@tonic-gate contents == NULL &&
40417c478bd9Sstevel@tonic-gate vsp == NULL &&
40427c478bd9Sstevel@tonic-gate info == NULL)
40437c478bd9Sstevel@tonic-gate return (freed);
40447c478bd9Sstevel@tonic-gate
40457c478bd9Sstevel@tonic-gate /*
40467c478bd9Sstevel@tonic-gate * Free the readdir cache entries
40477c478bd9Sstevel@tonic-gate */
40487c478bd9Sstevel@tonic-gate if (HAVE_RDDIR_CACHE(rp))
40497c478bd9Sstevel@tonic-gate nfs_purge_rddir_cache(RTOV(rp));
40507c478bd9Sstevel@tonic-gate
40517c478bd9Sstevel@tonic-gate /*
40527c478bd9Sstevel@tonic-gate * Free the symbolic link cache.
40537c478bd9Sstevel@tonic-gate */
40547c478bd9Sstevel@tonic-gate if (contents != NULL) {
40557c478bd9Sstevel@tonic-gate
40567c478bd9Sstevel@tonic-gate kmem_free((void *)contents, size);
40577c478bd9Sstevel@tonic-gate }
40587c478bd9Sstevel@tonic-gate
40597c478bd9Sstevel@tonic-gate /*
40607c478bd9Sstevel@tonic-gate * Free any cached ACL.
40617c478bd9Sstevel@tonic-gate */
40627c478bd9Sstevel@tonic-gate if (vsp != NULL)
40637c478bd9Sstevel@tonic-gate nfs_acl_free(vsp);
40647c478bd9Sstevel@tonic-gate
40657c478bd9Sstevel@tonic-gate /*
40667c478bd9Sstevel@tonic-gate * Free any cached pathconf information.
40677c478bd9Sstevel@tonic-gate */
40687c478bd9Sstevel@tonic-gate if (info != NULL)
40697c478bd9Sstevel@tonic-gate kmem_free(info, sizeof (*info));
40707c478bd9Sstevel@tonic-gate
40717c478bd9Sstevel@tonic-gate return (1);
40727c478bd9Sstevel@tonic-gate }
40737c478bd9Sstevel@tonic-gate
40747c478bd9Sstevel@tonic-gate static int
nfs_free_reclaim(void)40757c478bd9Sstevel@tonic-gate nfs_free_reclaim(void)
40767c478bd9Sstevel@tonic-gate {
40777c478bd9Sstevel@tonic-gate int freed;
40787c478bd9Sstevel@tonic-gate rnode_t *rp;
40797c478bd9Sstevel@tonic-gate
40807c478bd9Sstevel@tonic-gate #ifdef DEBUG
40817c478bd9Sstevel@tonic-gate clstat_debug.f_reclaim.value.ui64++;
40827c478bd9Sstevel@tonic-gate #endif
40837c478bd9Sstevel@tonic-gate freed = 0;
40847c478bd9Sstevel@tonic-gate mutex_enter(&rpfreelist_lock);
40857c478bd9Sstevel@tonic-gate rp = rpfreelist;
40867c478bd9Sstevel@tonic-gate if (rp != NULL) {
40877c478bd9Sstevel@tonic-gate do {
40887c478bd9Sstevel@tonic-gate if (nfs_free_data_reclaim(rp))
40897c478bd9Sstevel@tonic-gate freed = 1;
40907c478bd9Sstevel@tonic-gate } while ((rp = rp->r_freef) != rpfreelist);
40917c478bd9Sstevel@tonic-gate }
40927c478bd9Sstevel@tonic-gate mutex_exit(&rpfreelist_lock);
40937c478bd9Sstevel@tonic-gate return (freed);
40947c478bd9Sstevel@tonic-gate }
40957c478bd9Sstevel@tonic-gate
40967c478bd9Sstevel@tonic-gate static int
nfs_active_reclaim(void)40977c478bd9Sstevel@tonic-gate nfs_active_reclaim(void)
40987c478bd9Sstevel@tonic-gate {
40997c478bd9Sstevel@tonic-gate int freed;
41007c478bd9Sstevel@tonic-gate int index;
41017c478bd9Sstevel@tonic-gate rnode_t *rp;
41027c478bd9Sstevel@tonic-gate
41037c478bd9Sstevel@tonic-gate #ifdef DEBUG
41047c478bd9Sstevel@tonic-gate clstat_debug.a_reclaim.value.ui64++;
41057c478bd9Sstevel@tonic-gate #endif
41067c478bd9Sstevel@tonic-gate freed = 0;
41077c478bd9Sstevel@tonic-gate for (index = 0; index < rtablesize; index++) {
41087c478bd9Sstevel@tonic-gate rw_enter(&rtable[index].r_lock, RW_READER);
41097c478bd9Sstevel@tonic-gate for (rp = rtable[index].r_hashf;
41107c478bd9Sstevel@tonic-gate rp != (rnode_t *)(&rtable[index]);
41117c478bd9Sstevel@tonic-gate rp = rp->r_hashf) {
41127c478bd9Sstevel@tonic-gate if (nfs_active_data_reclaim(rp))
41137c478bd9Sstevel@tonic-gate freed = 1;
41147c478bd9Sstevel@tonic-gate }
41157c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock);
41167c478bd9Sstevel@tonic-gate }
41177c478bd9Sstevel@tonic-gate return (freed);
41187c478bd9Sstevel@tonic-gate }
41197c478bd9Sstevel@tonic-gate
41207c478bd9Sstevel@tonic-gate static int
nfs_rnode_reclaim(void)41217c478bd9Sstevel@tonic-gate nfs_rnode_reclaim(void)
41227c478bd9Sstevel@tonic-gate {
41237c478bd9Sstevel@tonic-gate int freed;
41247c478bd9Sstevel@tonic-gate rnode_t *rp;
41257c478bd9Sstevel@tonic-gate vnode_t *vp;
41267c478bd9Sstevel@tonic-gate
41277c478bd9Sstevel@tonic-gate #ifdef DEBUG
41287c478bd9Sstevel@tonic-gate clstat_debug.r_reclaim.value.ui64++;
41297c478bd9Sstevel@tonic-gate #endif
41307c478bd9Sstevel@tonic-gate freed = 0;
41317c478bd9Sstevel@tonic-gate mutex_enter(&rpfreelist_lock);
41327c478bd9Sstevel@tonic-gate while ((rp = rpfreelist) != NULL) {
41337c478bd9Sstevel@tonic-gate rp_rmfree(rp);
41347c478bd9Sstevel@tonic-gate mutex_exit(&rpfreelist_lock);
41357c478bd9Sstevel@tonic-gate if (rp->r_flags & RHASHED) {
41367c478bd9Sstevel@tonic-gate vp = RTOV(rp);
41377c478bd9Sstevel@tonic-gate rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
41387c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock);
41397c478bd9Sstevel@tonic-gate if (vp->v_count > 1) {
41407c478bd9Sstevel@tonic-gate vp->v_count--;
41417c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock);
41427c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock);
41437c478bd9Sstevel@tonic-gate mutex_enter(&rpfreelist_lock);
41447c478bd9Sstevel@tonic-gate continue;
41457c478bd9Sstevel@tonic-gate }
41467c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock);
41477c478bd9Sstevel@tonic-gate rp_rmhash_locked(rp);
41487c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock);
41497c478bd9Sstevel@tonic-gate }
41507c478bd9Sstevel@tonic-gate /*
41517c478bd9Sstevel@tonic-gate * This call to rp_addfree will end up destroying the
41527c478bd9Sstevel@tonic-gate * rnode, but in a safe way with the appropriate set
41537c478bd9Sstevel@tonic-gate * of checks done.
41547c478bd9Sstevel@tonic-gate */
41557c478bd9Sstevel@tonic-gate rp_addfree(rp, CRED());
41567c478bd9Sstevel@tonic-gate mutex_enter(&rpfreelist_lock);
41577c478bd9Sstevel@tonic-gate }
41587c478bd9Sstevel@tonic-gate mutex_exit(&rpfreelist_lock);
41597c478bd9Sstevel@tonic-gate return (freed);
41607c478bd9Sstevel@tonic-gate }
41617c478bd9Sstevel@tonic-gate
41627c478bd9Sstevel@tonic-gate /*ARGSUSED*/
41637c478bd9Sstevel@tonic-gate static void
nfs_reclaim(void * cdrarg)41647c478bd9Sstevel@tonic-gate nfs_reclaim(void *cdrarg)
41657c478bd9Sstevel@tonic-gate {
41667c478bd9Sstevel@tonic-gate
41677c478bd9Sstevel@tonic-gate #ifdef DEBUG
41687c478bd9Sstevel@tonic-gate clstat_debug.reclaim.value.ui64++;
41697c478bd9Sstevel@tonic-gate #endif
41707c478bd9Sstevel@tonic-gate if (nfs_free_reclaim())
41717c478bd9Sstevel@tonic-gate return;
41727c478bd9Sstevel@tonic-gate
41737c478bd9Sstevel@tonic-gate if (nfs_active_reclaim())
41747c478bd9Sstevel@tonic-gate return;
41757c478bd9Sstevel@tonic-gate
41767c478bd9Sstevel@tonic-gate (void) nfs_rnode_reclaim();
41777c478bd9Sstevel@tonic-gate }
41787c478bd9Sstevel@tonic-gate
41797c478bd9Sstevel@tonic-gate /*
41807c478bd9Sstevel@tonic-gate * NFS client failover support
41817c478bd9Sstevel@tonic-gate *
41827c478bd9Sstevel@tonic-gate * Routines to copy filehandles
41837c478bd9Sstevel@tonic-gate */
41847c478bd9Sstevel@tonic-gate void
nfscopyfh(caddr_t fhp,vnode_t * vp)41857c478bd9Sstevel@tonic-gate nfscopyfh(caddr_t fhp, vnode_t *vp)
41867c478bd9Sstevel@tonic-gate {
41877c478bd9Sstevel@tonic-gate fhandle_t *dest = (fhandle_t *)fhp;
41887c478bd9Sstevel@tonic-gate
41897c478bd9Sstevel@tonic-gate if (dest != NULL)
41907c478bd9Sstevel@tonic-gate *dest = *VTOFH(vp);
41917c478bd9Sstevel@tonic-gate }
41927c478bd9Sstevel@tonic-gate
41937c478bd9Sstevel@tonic-gate void
nfs3copyfh(caddr_t fhp,vnode_t * vp)41947c478bd9Sstevel@tonic-gate nfs3copyfh(caddr_t fhp, vnode_t *vp)
41957c478bd9Sstevel@tonic-gate {
41967c478bd9Sstevel@tonic-gate nfs_fh3 *dest = (nfs_fh3 *)fhp;
41977c478bd9Sstevel@tonic-gate
41987c478bd9Sstevel@tonic-gate if (dest != NULL)
41997c478bd9Sstevel@tonic-gate *dest = *VTOFH3(vp);
42007c478bd9Sstevel@tonic-gate }
42017c478bd9Sstevel@tonic-gate
42027c478bd9Sstevel@tonic-gate /*
42037c478bd9Sstevel@tonic-gate * NFS client failover support
42047c478bd9Sstevel@tonic-gate *
42057c478bd9Sstevel@tonic-gate * failover_safe() will test various conditions to ensure that
42067c478bd9Sstevel@tonic-gate * failover is permitted for this vnode. It will be denied
42077c478bd9Sstevel@tonic-gate * if:
42087c478bd9Sstevel@tonic-gate * 1) the operation in progress does not support failover (NULL fi)
42097c478bd9Sstevel@tonic-gate * 2) there are no available replicas (NULL mi_servers->sv_next)
42107c478bd9Sstevel@tonic-gate * 3) any locks are outstanding on this file
42117c478bd9Sstevel@tonic-gate */
42127c478bd9Sstevel@tonic-gate static int
failover_safe(failinfo_t * fi)42137c478bd9Sstevel@tonic-gate failover_safe(failinfo_t *fi)
42147c478bd9Sstevel@tonic-gate {
42157c478bd9Sstevel@tonic-gate
42167c478bd9Sstevel@tonic-gate /*
42177c478bd9Sstevel@tonic-gate * Does this op permit failover?
42187c478bd9Sstevel@tonic-gate */
42197c478bd9Sstevel@tonic-gate if (fi == NULL || fi->vp == NULL)
42207c478bd9Sstevel@tonic-gate return (0);
42217c478bd9Sstevel@tonic-gate
42227c478bd9Sstevel@tonic-gate /*
42237c478bd9Sstevel@tonic-gate * Are there any alternates to failover to?
42247c478bd9Sstevel@tonic-gate */
42257c478bd9Sstevel@tonic-gate if (VTOMI(fi->vp)->mi_servers->sv_next == NULL)
42267c478bd9Sstevel@tonic-gate return (0);
42277c478bd9Sstevel@tonic-gate
42287c478bd9Sstevel@tonic-gate /*
42297c478bd9Sstevel@tonic-gate * Disable check; we've forced local locking
42307c478bd9Sstevel@tonic-gate *
42317c478bd9Sstevel@tonic-gate * if (flk_has_remote_locks(fi->vp))
42327c478bd9Sstevel@tonic-gate * return (0);
42337c478bd9Sstevel@tonic-gate */
42347c478bd9Sstevel@tonic-gate
42357c478bd9Sstevel@tonic-gate /*
42367c478bd9Sstevel@tonic-gate * If we have no partial path, we can't do anything
42377c478bd9Sstevel@tonic-gate */
42387c478bd9Sstevel@tonic-gate if (VTOR(fi->vp)->r_path == NULL)
42397c478bd9Sstevel@tonic-gate return (0);
42407c478bd9Sstevel@tonic-gate
42417c478bd9Sstevel@tonic-gate return (1);
42427c478bd9Sstevel@tonic-gate }
42437c478bd9Sstevel@tonic-gate
42447c478bd9Sstevel@tonic-gate #include <sys/thread.h>
42457c478bd9Sstevel@tonic-gate
42467c478bd9Sstevel@tonic-gate /*
42477c478bd9Sstevel@tonic-gate * NFS client failover support
42487c478bd9Sstevel@tonic-gate *
42497c478bd9Sstevel@tonic-gate * failover_newserver() will start a search for a new server,
42507c478bd9Sstevel@tonic-gate * preferably by starting an async thread to do the work. If
42517c478bd9Sstevel@tonic-gate * someone is already doing this (recognizable by MI_BINDINPROG
42527c478bd9Sstevel@tonic-gate * being set), it will simply return and the calling thread
42537c478bd9Sstevel@tonic-gate * will queue on the mi_failover_cv condition variable.
42547c478bd9Sstevel@tonic-gate */
42557c478bd9Sstevel@tonic-gate static void
failover_newserver(mntinfo_t * mi)42567c478bd9Sstevel@tonic-gate failover_newserver(mntinfo_t *mi)
42577c478bd9Sstevel@tonic-gate {
42587c478bd9Sstevel@tonic-gate /*
42597c478bd9Sstevel@tonic-gate * Check if someone else is doing this already
42607c478bd9Sstevel@tonic-gate */
42617c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
42627c478bd9Sstevel@tonic-gate if (mi->mi_flags & MI_BINDINPROG) {
42637c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
42647c478bd9Sstevel@tonic-gate return;
42657c478bd9Sstevel@tonic-gate }
42667c478bd9Sstevel@tonic-gate mi->mi_flags |= MI_BINDINPROG;
42677c478bd9Sstevel@tonic-gate
42687c478bd9Sstevel@tonic-gate /*
42697c478bd9Sstevel@tonic-gate * Need to hold the vfs struct so that it can't be released
42707c478bd9Sstevel@tonic-gate * while the failover thread is selecting a new server.
42717c478bd9Sstevel@tonic-gate */
42727c478bd9Sstevel@tonic-gate VFS_HOLD(mi->mi_vfsp);
42737c478bd9Sstevel@tonic-gate
42747c478bd9Sstevel@tonic-gate /*
42757c478bd9Sstevel@tonic-gate * Start a thread to do the real searching.
42767c478bd9Sstevel@tonic-gate */
42777c478bd9Sstevel@tonic-gate (void) zthread_create(NULL, 0, failover_thread, mi, 0, minclsyspri);
42787c478bd9Sstevel@tonic-gate
42797c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
42807c478bd9Sstevel@tonic-gate }
42817c478bd9Sstevel@tonic-gate
42827c478bd9Sstevel@tonic-gate /*
42837c478bd9Sstevel@tonic-gate * NFS client failover support
42847c478bd9Sstevel@tonic-gate *
42857c478bd9Sstevel@tonic-gate * failover_thread() will find a new server to replace the one
42867c478bd9Sstevel@tonic-gate * currently in use, wake up other threads waiting on this mount
42877c478bd9Sstevel@tonic-gate * point, and die. It will start at the head of the server list
42887c478bd9Sstevel@tonic-gate * and poll servers until it finds one with an NFS server which is
42897c478bd9Sstevel@tonic-gate * registered and responds to a NULL procedure ping.
42907c478bd9Sstevel@tonic-gate *
42917c478bd9Sstevel@tonic-gate * XXX failover_thread is unsafe within the scope of the
42927c478bd9Sstevel@tonic-gate * present model defined for cpr to suspend the system.
42937c478bd9Sstevel@tonic-gate * Specifically, over-the-wire calls made by the thread
42947c478bd9Sstevel@tonic-gate * are unsafe. The thread needs to be reevaluated in case of
42957c478bd9Sstevel@tonic-gate * future updates to the cpr suspend model.
42967c478bd9Sstevel@tonic-gate */
42977c478bd9Sstevel@tonic-gate static void
failover_thread(mntinfo_t * mi)42987c478bd9Sstevel@tonic-gate failover_thread(mntinfo_t *mi)
42997c478bd9Sstevel@tonic-gate {
43007c478bd9Sstevel@tonic-gate servinfo_t *svp = NULL;
43017c478bd9Sstevel@tonic-gate CLIENT *cl;
43027c478bd9Sstevel@tonic-gate enum clnt_stat status;
43037c478bd9Sstevel@tonic-gate struct timeval tv;
43047c478bd9Sstevel@tonic-gate int error;
43057c478bd9Sstevel@tonic-gate int oncethru = 0;
43067c478bd9Sstevel@tonic-gate callb_cpr_t cprinfo;
43077c478bd9Sstevel@tonic-gate rnode_t *rp;
43087c478bd9Sstevel@tonic-gate int index;
43097c478bd9Sstevel@tonic-gate char *srvnames;
43107c478bd9Sstevel@tonic-gate size_t srvnames_len;
43117c478bd9Sstevel@tonic-gate struct nfs_clnt *nfscl = NULL;
43127c478bd9Sstevel@tonic-gate zoneid_t zoneid = getzoneid();
43137c478bd9Sstevel@tonic-gate
43147c478bd9Sstevel@tonic-gate #ifdef DEBUG
43157c478bd9Sstevel@tonic-gate /*
43167c478bd9Sstevel@tonic-gate * This is currently only needed to access counters which exist on
43177c478bd9Sstevel@tonic-gate * DEBUG kernels, hence we don't want to pay the penalty of the lookup
43187c478bd9Sstevel@tonic-gate * on non-DEBUG kernels.
43197c478bd9Sstevel@tonic-gate */
4320108322fbScarlsonj nfscl = zone_getspecific(nfsclnt_zone_key, nfs_zone());
43217c478bd9Sstevel@tonic-gate ASSERT(nfscl != NULL);
43227c478bd9Sstevel@tonic-gate #endif
43237c478bd9Sstevel@tonic-gate
43247c478bd9Sstevel@tonic-gate /*
43257c478bd9Sstevel@tonic-gate * Its safe to piggyback on the mi_lock since failover_newserver()
43267c478bd9Sstevel@tonic-gate * code guarantees that there will be only one failover thread
43277c478bd9Sstevel@tonic-gate * per mountinfo at any instance.
43287c478bd9Sstevel@tonic-gate */
43297c478bd9Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &mi->mi_lock, callb_generic_cpr,
43307c478bd9Sstevel@tonic-gate "failover_thread");
43317c478bd9Sstevel@tonic-gate
43327c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
43337c478bd9Sstevel@tonic-gate while (mi->mi_readers) {
43347c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo);
43357c478bd9Sstevel@tonic-gate cv_wait(&mi->mi_failover_cv, &mi->mi_lock);
43367c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &mi->mi_lock);
43377c478bd9Sstevel@tonic-gate }
43387c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
43397c478bd9Sstevel@tonic-gate
43407c478bd9Sstevel@tonic-gate tv.tv_sec = 2;
43417c478bd9Sstevel@tonic-gate tv.tv_usec = 0;
43427c478bd9Sstevel@tonic-gate
43437c478bd9Sstevel@tonic-gate /*
43447c478bd9Sstevel@tonic-gate * Ping the null NFS procedure of every server in
43457c478bd9Sstevel@tonic-gate * the list until one responds. We always start
43467c478bd9Sstevel@tonic-gate * at the head of the list and always skip the one
43477c478bd9Sstevel@tonic-gate * that is current, since it's caused us a problem.
43487c478bd9Sstevel@tonic-gate */
43497c478bd9Sstevel@tonic-gate while (svp == NULL) {
43507c478bd9Sstevel@tonic-gate for (svp = mi->mi_servers; svp; svp = svp->sv_next) {
43517c478bd9Sstevel@tonic-gate if (!oncethru && svp == mi->mi_curr_serv)
43527c478bd9Sstevel@tonic-gate continue;
43537c478bd9Sstevel@tonic-gate
43547c478bd9Sstevel@tonic-gate /*
43557c478bd9Sstevel@tonic-gate * If the file system was forcibly umounted
43567c478bd9Sstevel@tonic-gate * while trying to do a failover, then just
43577c478bd9Sstevel@tonic-gate * give up on the failover. It won't matter
43587c478bd9Sstevel@tonic-gate * what the server is.
43597c478bd9Sstevel@tonic-gate */
43607c478bd9Sstevel@tonic-gate if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
43617c478bd9Sstevel@tonic-gate svp = NULL;
43627c478bd9Sstevel@tonic-gate goto done;
43637c478bd9Sstevel@tonic-gate }
43647c478bd9Sstevel@tonic-gate
43657c478bd9Sstevel@tonic-gate error = clnt_tli_kcreate(svp->sv_knconf, &svp->sv_addr,
43667c478bd9Sstevel@tonic-gate NFS_PROGRAM, NFS_VERSION, 0, 1, CRED(), &cl);
43677c478bd9Sstevel@tonic-gate if (error)
43687c478bd9Sstevel@tonic-gate continue;
43697c478bd9Sstevel@tonic-gate
43707c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_INT))
43717c478bd9Sstevel@tonic-gate cl->cl_nosignal = TRUE;
43727c478bd9Sstevel@tonic-gate status = CLNT_CALL(cl, RFS_NULL, xdr_void, NULL,
43737c478bd9Sstevel@tonic-gate xdr_void, NULL, tv);
43747c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_INT))
43757c478bd9Sstevel@tonic-gate cl->cl_nosignal = FALSE;
43767c478bd9Sstevel@tonic-gate AUTH_DESTROY(cl->cl_auth);
43777c478bd9Sstevel@tonic-gate CLNT_DESTROY(cl);
43787c478bd9Sstevel@tonic-gate if (status == RPC_SUCCESS) {
43797c478bd9Sstevel@tonic-gate if (svp == mi->mi_curr_serv) {
43807c478bd9Sstevel@tonic-gate #ifdef DEBUG
43817c478bd9Sstevel@tonic-gate zcmn_err(zoneid, CE_NOTE,
43827c478bd9Sstevel@tonic-gate "NFS%d: failing over: selecting original server %s",
43837c478bd9Sstevel@tonic-gate mi->mi_vers, svp->sv_hostname);
43847c478bd9Sstevel@tonic-gate #else
43857c478bd9Sstevel@tonic-gate zcmn_err(zoneid, CE_NOTE,
43867c478bd9Sstevel@tonic-gate "NFS: failing over: selecting original server %s",
43877c478bd9Sstevel@tonic-gate svp->sv_hostname);
43887c478bd9Sstevel@tonic-gate #endif
43897c478bd9Sstevel@tonic-gate } else {
43907c478bd9Sstevel@tonic-gate #ifdef DEBUG
43917c478bd9Sstevel@tonic-gate zcmn_err(zoneid, CE_NOTE,
43927c478bd9Sstevel@tonic-gate "NFS%d: failing over from %s to %s",
43937c478bd9Sstevel@tonic-gate mi->mi_vers,
43947c478bd9Sstevel@tonic-gate mi->mi_curr_serv->sv_hostname,
43957c478bd9Sstevel@tonic-gate svp->sv_hostname);
43967c478bd9Sstevel@tonic-gate #else
43977c478bd9Sstevel@tonic-gate zcmn_err(zoneid, CE_NOTE,
43987c478bd9Sstevel@tonic-gate "NFS: failing over from %s to %s",
43997c478bd9Sstevel@tonic-gate mi->mi_curr_serv->sv_hostname,
44007c478bd9Sstevel@tonic-gate svp->sv_hostname);
44017c478bd9Sstevel@tonic-gate #endif
44027c478bd9Sstevel@tonic-gate }
44037c478bd9Sstevel@tonic-gate break;
44047c478bd9Sstevel@tonic-gate }
44057c478bd9Sstevel@tonic-gate }
44067c478bd9Sstevel@tonic-gate
44077c478bd9Sstevel@tonic-gate if (svp == NULL) {
44087c478bd9Sstevel@tonic-gate if (!oncethru) {
44097c478bd9Sstevel@tonic-gate srvnames = nfs_getsrvnames(mi, &srvnames_len);
44107c478bd9Sstevel@tonic-gate #ifdef DEBUG
44117c478bd9Sstevel@tonic-gate zprintf(zoneid,
44127c478bd9Sstevel@tonic-gate "NFS%d servers %s not responding "
44137c478bd9Sstevel@tonic-gate "still trying\n", mi->mi_vers, srvnames);
44147c478bd9Sstevel@tonic-gate #else
44157c478bd9Sstevel@tonic-gate zprintf(zoneid, "NFS servers %s not responding "
44167c478bd9Sstevel@tonic-gate "still trying\n", srvnames);
44177c478bd9Sstevel@tonic-gate #endif
44187c478bd9Sstevel@tonic-gate oncethru = 1;
44197c478bd9Sstevel@tonic-gate }
44207c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
44217c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo);
44227c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
44237c478bd9Sstevel@tonic-gate delay(hz);
44247c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
44257c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &mi->mi_lock);
44267c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
44277c478bd9Sstevel@tonic-gate }
44287c478bd9Sstevel@tonic-gate }
44297c478bd9Sstevel@tonic-gate
44307c478bd9Sstevel@tonic-gate if (oncethru) {
44317c478bd9Sstevel@tonic-gate #ifdef DEBUG
44327c478bd9Sstevel@tonic-gate zprintf(zoneid, "NFS%d servers %s ok\n", mi->mi_vers, srvnames);
44337c478bd9Sstevel@tonic-gate #else
44347c478bd9Sstevel@tonic-gate zprintf(zoneid, "NFS servers %s ok\n", srvnames);
44357c478bd9Sstevel@tonic-gate #endif
44367c478bd9Sstevel@tonic-gate }
44377c478bd9Sstevel@tonic-gate
44387c478bd9Sstevel@tonic-gate if (svp != mi->mi_curr_serv) {
44397c478bd9Sstevel@tonic-gate (void) dnlc_purge_vfsp(mi->mi_vfsp, 0);
44407c478bd9Sstevel@tonic-gate index = rtablehash(&mi->mi_curr_serv->sv_fhandle);
44417c478bd9Sstevel@tonic-gate rw_enter(&rtable[index].r_lock, RW_WRITER);
44427c478bd9Sstevel@tonic-gate rp = rfind(&rtable[index], &mi->mi_curr_serv->sv_fhandle,
44437c478bd9Sstevel@tonic-gate mi->mi_vfsp);
44447c478bd9Sstevel@tonic-gate if (rp != NULL) {
44457c478bd9Sstevel@tonic-gate if (rp->r_flags & RHASHED)
44467c478bd9Sstevel@tonic-gate rp_rmhash_locked(rp);
44477c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock);
44487c478bd9Sstevel@tonic-gate rp->r_server = svp;
44497c478bd9Sstevel@tonic-gate rp->r_fh = svp->sv_fhandle;
44507c478bd9Sstevel@tonic-gate (void) nfs_free_data_reclaim(rp);
44517c478bd9Sstevel@tonic-gate index = rtablehash(&rp->r_fh);
44527c478bd9Sstevel@tonic-gate rp->r_hashq = &rtable[index];
44537c478bd9Sstevel@tonic-gate rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
44547c478bd9Sstevel@tonic-gate vn_exists(RTOV(rp));
44557c478bd9Sstevel@tonic-gate rp_addhash(rp);
44567c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock);
44577c478bd9Sstevel@tonic-gate VN_RELE(RTOV(rp));
44587c478bd9Sstevel@tonic-gate } else
44597c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock);
44607c478bd9Sstevel@tonic-gate }
44617c478bd9Sstevel@tonic-gate
44627c478bd9Sstevel@tonic-gate done:
44637c478bd9Sstevel@tonic-gate if (oncethru)
44647c478bd9Sstevel@tonic-gate kmem_free(srvnames, srvnames_len);
44657c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
44667c478bd9Sstevel@tonic-gate mi->mi_flags &= ~MI_BINDINPROG;
44677c478bd9Sstevel@tonic-gate if (svp != NULL) {
44687c478bd9Sstevel@tonic-gate mi->mi_curr_serv = svp;
44697c478bd9Sstevel@tonic-gate mi->mi_failover++;
44707c478bd9Sstevel@tonic-gate #ifdef DEBUG
44717c478bd9Sstevel@tonic-gate nfscl->nfscl_stat.failover.value.ui64++;
44727c478bd9Sstevel@tonic-gate #endif
44737c478bd9Sstevel@tonic-gate }
44747c478bd9Sstevel@tonic-gate cv_broadcast(&mi->mi_failover_cv);
44757c478bd9Sstevel@tonic-gate CALLB_CPR_EXIT(&cprinfo);
44767c478bd9Sstevel@tonic-gate VFS_RELE(mi->mi_vfsp);
44777c478bd9Sstevel@tonic-gate zthread_exit();
44787c478bd9Sstevel@tonic-gate /* NOTREACHED */
44797c478bd9Sstevel@tonic-gate }
44807c478bd9Sstevel@tonic-gate
44817c478bd9Sstevel@tonic-gate /*
44827c478bd9Sstevel@tonic-gate * NFS client failover support
44837c478bd9Sstevel@tonic-gate *
44847c478bd9Sstevel@tonic-gate * failover_wait() will put the thread to sleep until MI_BINDINPROG
44857c478bd9Sstevel@tonic-gate * is cleared, meaning that failover is complete. Called with
44867c478bd9Sstevel@tonic-gate * mi_lock mutex held.
44877c478bd9Sstevel@tonic-gate */
44887c478bd9Sstevel@tonic-gate static int
failover_wait(mntinfo_t * mi)44897c478bd9Sstevel@tonic-gate failover_wait(mntinfo_t *mi)
44907c478bd9Sstevel@tonic-gate {
44917c478bd9Sstevel@tonic-gate k_sigset_t smask;
44927c478bd9Sstevel@tonic-gate
44937c478bd9Sstevel@tonic-gate /*
44947c478bd9Sstevel@tonic-gate * If someone else is hunting for a living server,
44957c478bd9Sstevel@tonic-gate * sleep until it's done. After our sleep, we may
44967c478bd9Sstevel@tonic-gate * be bound to the right server and get off cheaply.
44977c478bd9Sstevel@tonic-gate */
44987c478bd9Sstevel@tonic-gate while (mi->mi_flags & MI_BINDINPROG) {
44997c478bd9Sstevel@tonic-gate /*
45007c478bd9Sstevel@tonic-gate * Mask out all signals except SIGHUP, SIGINT, SIGQUIT
45017c478bd9Sstevel@tonic-gate * and SIGTERM. (Preserving the existing masks).
45027c478bd9Sstevel@tonic-gate * Mask out SIGINT if mount option nointr is specified.
45037c478bd9Sstevel@tonic-gate */
45047c478bd9Sstevel@tonic-gate sigintr(&smask, (int)mi->mi_flags & MI_INT);
45057c478bd9Sstevel@tonic-gate if (!cv_wait_sig(&mi->mi_failover_cv, &mi->mi_lock)) {
45067c478bd9Sstevel@tonic-gate /*
45077c478bd9Sstevel@tonic-gate * restore original signal mask
45087c478bd9Sstevel@tonic-gate */
45097c478bd9Sstevel@tonic-gate sigunintr(&smask);
45107c478bd9Sstevel@tonic-gate return (EINTR);
45117c478bd9Sstevel@tonic-gate }
45127c478bd9Sstevel@tonic-gate /*
45137c478bd9Sstevel@tonic-gate * restore original signal mask
45147c478bd9Sstevel@tonic-gate */
45157c478bd9Sstevel@tonic-gate sigunintr(&smask);
45167c478bd9Sstevel@tonic-gate }
45177c478bd9Sstevel@tonic-gate return (0);
45187c478bd9Sstevel@tonic-gate }
45197c478bd9Sstevel@tonic-gate
45207c478bd9Sstevel@tonic-gate /*
45217c478bd9Sstevel@tonic-gate * NFS client failover support
45227c478bd9Sstevel@tonic-gate *
45237c478bd9Sstevel@tonic-gate * failover_remap() will do a partial pathname lookup and find the
45247c478bd9Sstevel@tonic-gate * desired vnode on the current server. The interim vnode will be
45257c478bd9Sstevel@tonic-gate * discarded after we pilfer the new filehandle.
45267c478bd9Sstevel@tonic-gate *
45277c478bd9Sstevel@tonic-gate * Side effects:
45287c478bd9Sstevel@tonic-gate * - This routine will also update the filehandle in the args structure
45297c478bd9Sstevel@tonic-gate * pointed to by the fi->fhp pointer if it is non-NULL.
45307c478bd9Sstevel@tonic-gate */
45317c478bd9Sstevel@tonic-gate
45327c478bd9Sstevel@tonic-gate static int
failover_remap(failinfo_t * fi)45337c478bd9Sstevel@tonic-gate failover_remap(failinfo_t *fi)
45347c478bd9Sstevel@tonic-gate {
45357c478bd9Sstevel@tonic-gate vnode_t *vp, *nvp, *rootvp;
45367c478bd9Sstevel@tonic-gate rnode_t *rp, *nrp;
45377c478bd9Sstevel@tonic-gate mntinfo_t *mi;
45387c478bd9Sstevel@tonic-gate int error;
45397c478bd9Sstevel@tonic-gate #ifdef DEBUG
45407c478bd9Sstevel@tonic-gate struct nfs_clnt *nfscl;
45417c478bd9Sstevel@tonic-gate
4542108322fbScarlsonj nfscl = zone_getspecific(nfsclnt_zone_key, nfs_zone());
45437c478bd9Sstevel@tonic-gate ASSERT(nfscl != NULL);
45447c478bd9Sstevel@tonic-gate #endif
45457c478bd9Sstevel@tonic-gate /*
45467c478bd9Sstevel@tonic-gate * Sanity check
45477c478bd9Sstevel@tonic-gate */
45487c478bd9Sstevel@tonic-gate if (fi == NULL || fi->vp == NULL || fi->lookupproc == NULL)
45497c478bd9Sstevel@tonic-gate return (EINVAL);
45507c478bd9Sstevel@tonic-gate vp = fi->vp;
45517c478bd9Sstevel@tonic-gate rp = VTOR(vp);
45527c478bd9Sstevel@tonic-gate mi = VTOMI(vp);
45537c478bd9Sstevel@tonic-gate
45547c478bd9Sstevel@tonic-gate if (!(vp->v_flag & VROOT)) {
45557c478bd9Sstevel@tonic-gate /*
45567c478bd9Sstevel@tonic-gate * Given the root fh, use the path stored in
45577c478bd9Sstevel@tonic-gate * the rnode to find the fh for the new server.
45587c478bd9Sstevel@tonic-gate */
45597c478bd9Sstevel@tonic-gate error = VFS_ROOT(mi->mi_vfsp, &rootvp);
45607c478bd9Sstevel@tonic-gate if (error)
45617c478bd9Sstevel@tonic-gate return (error);
45627c478bd9Sstevel@tonic-gate
45637c478bd9Sstevel@tonic-gate error = failover_lookup(rp->r_path, rootvp,
45647c478bd9Sstevel@tonic-gate fi->lookupproc, fi->xattrdirproc, &nvp);
45657c478bd9Sstevel@tonic-gate
45667c478bd9Sstevel@tonic-gate VN_RELE(rootvp);
45677c478bd9Sstevel@tonic-gate
45687c478bd9Sstevel@tonic-gate if (error)
45697c478bd9Sstevel@tonic-gate return (error);
45707c478bd9Sstevel@tonic-gate
45717c478bd9Sstevel@tonic-gate /*
45727c478bd9Sstevel@tonic-gate * If we found the same rnode, we're done now
45737c478bd9Sstevel@tonic-gate */
45747c478bd9Sstevel@tonic-gate if (nvp == vp) {
45757c478bd9Sstevel@tonic-gate /*
45767c478bd9Sstevel@tonic-gate * Failed and the new server may physically be same
45777c478bd9Sstevel@tonic-gate * OR may share a same disk subsystem. In this case
45787c478bd9Sstevel@tonic-gate * file handle for a particular file path is not going
45797c478bd9Sstevel@tonic-gate * to change, given the same filehandle lookup will
45807c478bd9Sstevel@tonic-gate * always locate the same rnode as the existing one.
45817c478bd9Sstevel@tonic-gate * All we might need to do is to update the r_server
45827c478bd9Sstevel@tonic-gate * with the current servinfo.
45837c478bd9Sstevel@tonic-gate */
45847c478bd9Sstevel@tonic-gate if (!VALID_FH(fi)) {
45857c478bd9Sstevel@tonic-gate rp->r_server = mi->mi_curr_serv;
45867c478bd9Sstevel@tonic-gate }
45877c478bd9Sstevel@tonic-gate VN_RELE(nvp);
45887c478bd9Sstevel@tonic-gate return (0);
45897c478bd9Sstevel@tonic-gate }
45907c478bd9Sstevel@tonic-gate
45917c478bd9Sstevel@tonic-gate /*
45927c478bd9Sstevel@tonic-gate * Try to make it so that no one else will find this
45937c478bd9Sstevel@tonic-gate * vnode because it is just a temporary to hold the
45947c478bd9Sstevel@tonic-gate * new file handle until that file handle can be
45957c478bd9Sstevel@tonic-gate * copied to the original vnode/rnode.
45967c478bd9Sstevel@tonic-gate */
45977c478bd9Sstevel@tonic-gate nrp = VTOR(nvp);
4598e8dc3b7dSvv149972 mutex_enter(&mi->mi_remap_lock);
4599e8dc3b7dSvv149972 /*
4600e8dc3b7dSvv149972 * Some other thread could have raced in here and could
4601e8dc3b7dSvv149972 * have done the remap for this particular rnode before
4602e8dc3b7dSvv149972 * this thread here. Check for rp->r_server and
4603e8dc3b7dSvv149972 * mi->mi_curr_serv and return if they are same.
4604e8dc3b7dSvv149972 */
4605e8dc3b7dSvv149972 if (VALID_FH(fi)) {
4606e8dc3b7dSvv149972 mutex_exit(&mi->mi_remap_lock);
4607e8dc3b7dSvv149972 VN_RELE(nvp);
4608e8dc3b7dSvv149972 return (0);
4609e8dc3b7dSvv149972 }
4610e8dc3b7dSvv149972
46117c478bd9Sstevel@tonic-gate if (nrp->r_flags & RHASHED)
46127c478bd9Sstevel@tonic-gate rp_rmhash(nrp);
46137c478bd9Sstevel@tonic-gate
46147c478bd9Sstevel@tonic-gate /*
46157c478bd9Sstevel@tonic-gate * As a heuristic check on the validity of the new
46167c478bd9Sstevel@tonic-gate * file, check that the size and type match against
46177c478bd9Sstevel@tonic-gate * that we remember from the old version.
46187c478bd9Sstevel@tonic-gate */
46197c478bd9Sstevel@tonic-gate if (rp->r_size != nrp->r_size || vp->v_type != nvp->v_type) {
4620e8dc3b7dSvv149972 mutex_exit(&mi->mi_remap_lock);
46217c478bd9Sstevel@tonic-gate zcmn_err(mi->mi_zone->zone_id, CE_WARN,
46227c478bd9Sstevel@tonic-gate "NFS replicas %s and %s: file %s not same.",
46237c478bd9Sstevel@tonic-gate rp->r_server->sv_hostname,
46247c478bd9Sstevel@tonic-gate nrp->r_server->sv_hostname, rp->r_path);
46257c478bd9Sstevel@tonic-gate VN_RELE(nvp);
46267c478bd9Sstevel@tonic-gate return (EINVAL);
46277c478bd9Sstevel@tonic-gate }
46287c478bd9Sstevel@tonic-gate
46297c478bd9Sstevel@tonic-gate /*
46307c478bd9Sstevel@tonic-gate * snarf the filehandle from the new rnode
46317c478bd9Sstevel@tonic-gate * then release it, again while updating the
46327c478bd9Sstevel@tonic-gate * hash queues for the rnode.
46337c478bd9Sstevel@tonic-gate */
46347c478bd9Sstevel@tonic-gate if (rp->r_flags & RHASHED)
46357c478bd9Sstevel@tonic-gate rp_rmhash(rp);
46367c478bd9Sstevel@tonic-gate rp->r_server = mi->mi_curr_serv;
46377c478bd9Sstevel@tonic-gate rp->r_fh = nrp->r_fh;
4638e8dc3b7dSvv149972 rp->r_hashq = nrp->r_hashq;
46397c478bd9Sstevel@tonic-gate /*
46407c478bd9Sstevel@tonic-gate * Copy the attributes from the new rnode to the old
46417c478bd9Sstevel@tonic-gate * rnode. This will help to reduce unnecessary page
46427c478bd9Sstevel@tonic-gate * cache flushes.
46437c478bd9Sstevel@tonic-gate */
46447c478bd9Sstevel@tonic-gate rp->r_attr = nrp->r_attr;
46457c478bd9Sstevel@tonic-gate rp->r_attrtime = nrp->r_attrtime;
46467c478bd9Sstevel@tonic-gate rp->r_mtime = nrp->r_mtime;
46477c478bd9Sstevel@tonic-gate (void) nfs_free_data_reclaim(rp);
46487c478bd9Sstevel@tonic-gate nfs_setswaplike(vp, &rp->r_attr);
46497c478bd9Sstevel@tonic-gate rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
46507c478bd9Sstevel@tonic-gate rp_addhash(rp);
46517c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock);
4652e8dc3b7dSvv149972 mutex_exit(&mi->mi_remap_lock);
46537c478bd9Sstevel@tonic-gate VN_RELE(nvp);
46547c478bd9Sstevel@tonic-gate }
46557c478bd9Sstevel@tonic-gate
46567c478bd9Sstevel@tonic-gate /*
46577c478bd9Sstevel@tonic-gate * Update successful failover remap count
46587c478bd9Sstevel@tonic-gate */
46597c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
46607c478bd9Sstevel@tonic-gate mi->mi_remap++;
46617c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
46627c478bd9Sstevel@tonic-gate #ifdef DEBUG
46637c478bd9Sstevel@tonic-gate nfscl->nfscl_stat.remap.value.ui64++;
46647c478bd9Sstevel@tonic-gate #endif
46657c478bd9Sstevel@tonic-gate
46667c478bd9Sstevel@tonic-gate /*
46677c478bd9Sstevel@tonic-gate * If we have a copied filehandle to update, do it now.
46687c478bd9Sstevel@tonic-gate */
46697c478bd9Sstevel@tonic-gate if (fi->fhp != NULL && fi->copyproc != NULL)
46707c478bd9Sstevel@tonic-gate (*fi->copyproc)(fi->fhp, vp);
46717c478bd9Sstevel@tonic-gate
46727c478bd9Sstevel@tonic-gate return (0);
46737c478bd9Sstevel@tonic-gate }
46747c478bd9Sstevel@tonic-gate
46757c478bd9Sstevel@tonic-gate /*
46767c478bd9Sstevel@tonic-gate * NFS client failover support
46777c478bd9Sstevel@tonic-gate *
46787c478bd9Sstevel@tonic-gate * We want a simple pathname lookup routine to parse the pieces
46797c478bd9Sstevel@tonic-gate * of path in rp->r_path. We know that the path was a created
46807c478bd9Sstevel@tonic-gate * as rnodes were made, so we know we have only to deal with
46817c478bd9Sstevel@tonic-gate * paths that look like:
46827c478bd9Sstevel@tonic-gate * dir1/dir2/dir3/file
46837c478bd9Sstevel@tonic-gate * Any evidence of anything like .., symlinks, and ENOTDIR
46847c478bd9Sstevel@tonic-gate * are hard errors, because they mean something in this filesystem
46857c478bd9Sstevel@tonic-gate * is different from the one we came from, or has changed under
46867c478bd9Sstevel@tonic-gate * us in some way. If this is true, we want the failure.
46877c478bd9Sstevel@tonic-gate *
46887c478bd9Sstevel@tonic-gate * Extended attributes: if the filesystem is mounted with extended
46897c478bd9Sstevel@tonic-gate * attributes enabled (-o xattr), the attribute directory will be
46907c478bd9Sstevel@tonic-gate * represented in the r_path as the magic name XATTR_RPATH. So if
46917c478bd9Sstevel@tonic-gate * we see that name in the pathname, is must be because this node
46927c478bd9Sstevel@tonic-gate * is an extended attribute. Therefore, look it up that way.
46937c478bd9Sstevel@tonic-gate */
46947c478bd9Sstevel@tonic-gate static int
failover_lookup(char * path,vnode_t * root,int (* lookupproc)(vnode_t *,char *,vnode_t **,struct pathname *,int,vnode_t *,cred_t *,int),int (* xattrdirproc)(vnode_t *,vnode_t **,bool_t,cred_t *,int),vnode_t ** new)46957c478bd9Sstevel@tonic-gate failover_lookup(char *path, vnode_t *root,
46967c478bd9Sstevel@tonic-gate int (*lookupproc)(vnode_t *, char *, vnode_t **, struct pathname *, int,
46977c478bd9Sstevel@tonic-gate vnode_t *, cred_t *, int),
46987c478bd9Sstevel@tonic-gate int (*xattrdirproc)(vnode_t *, vnode_t **, bool_t, cred_t *, int),
46997c478bd9Sstevel@tonic-gate vnode_t **new)
47007c478bd9Sstevel@tonic-gate {
47017c478bd9Sstevel@tonic-gate vnode_t *dvp, *nvp;
47027c478bd9Sstevel@tonic-gate int error = EINVAL;
47037c478bd9Sstevel@tonic-gate char *s, *p, *tmppath;
47047c478bd9Sstevel@tonic-gate size_t len;
47057c478bd9Sstevel@tonic-gate mntinfo_t *mi;
47067c478bd9Sstevel@tonic-gate bool_t xattr;
47077c478bd9Sstevel@tonic-gate
47087c478bd9Sstevel@tonic-gate /* Make local copy of path */
47097c478bd9Sstevel@tonic-gate len = strlen(path) + 1;
47107c478bd9Sstevel@tonic-gate tmppath = kmem_alloc(len, KM_SLEEP);
47117c478bd9Sstevel@tonic-gate (void) strcpy(tmppath, path);
47127c478bd9Sstevel@tonic-gate s = tmppath;
47137c478bd9Sstevel@tonic-gate
47147c478bd9Sstevel@tonic-gate dvp = root;
47157c478bd9Sstevel@tonic-gate VN_HOLD(dvp);
47167c478bd9Sstevel@tonic-gate mi = VTOMI(root);
47177c478bd9Sstevel@tonic-gate xattr = mi->mi_flags & MI_EXTATTR;
47187c478bd9Sstevel@tonic-gate
47197c478bd9Sstevel@tonic-gate do {
47207c478bd9Sstevel@tonic-gate p = strchr(s, '/');
47217c478bd9Sstevel@tonic-gate if (p != NULL)
47227c478bd9Sstevel@tonic-gate *p = '\0';
47237c478bd9Sstevel@tonic-gate if (xattr && strcmp(s, XATTR_RPATH) == 0) {
47247c478bd9Sstevel@tonic-gate error = (*xattrdirproc)(dvp, &nvp, FALSE, CRED(),
47257c478bd9Sstevel@tonic-gate RFSCALL_SOFT);
47267c478bd9Sstevel@tonic-gate } else {
47277c478bd9Sstevel@tonic-gate error = (*lookupproc)(dvp, s, &nvp, NULL, 0, NULL,
47287c478bd9Sstevel@tonic-gate CRED(), RFSCALL_SOFT);
47297c478bd9Sstevel@tonic-gate }
47307c478bd9Sstevel@tonic-gate if (p != NULL)
47317c478bd9Sstevel@tonic-gate *p++ = '/';
47327c478bd9Sstevel@tonic-gate if (error) {
47337c478bd9Sstevel@tonic-gate VN_RELE(dvp);
47347c478bd9Sstevel@tonic-gate kmem_free(tmppath, len);
47357c478bd9Sstevel@tonic-gate return (error);
47367c478bd9Sstevel@tonic-gate }
47377c478bd9Sstevel@tonic-gate s = p;
47387c478bd9Sstevel@tonic-gate VN_RELE(dvp);
47397c478bd9Sstevel@tonic-gate dvp = nvp;
47407c478bd9Sstevel@tonic-gate } while (p != NULL);
47417c478bd9Sstevel@tonic-gate
47427c478bd9Sstevel@tonic-gate if (nvp != NULL && new != NULL)
47437c478bd9Sstevel@tonic-gate *new = nvp;
47447c478bd9Sstevel@tonic-gate kmem_free(tmppath, len);
47457c478bd9Sstevel@tonic-gate return (0);
47467c478bd9Sstevel@tonic-gate }
47477c478bd9Sstevel@tonic-gate
47487c478bd9Sstevel@tonic-gate /*
47497c478bd9Sstevel@tonic-gate * NFS client failover support
47507c478bd9Sstevel@tonic-gate *
47517c478bd9Sstevel@tonic-gate * sv_free() frees the malloc'd portion of a "servinfo_t".
47527c478bd9Sstevel@tonic-gate */
47537c478bd9Sstevel@tonic-gate void
sv_free(servinfo_t * svp)47547c478bd9Sstevel@tonic-gate sv_free(servinfo_t *svp)
47557c478bd9Sstevel@tonic-gate {
47567c478bd9Sstevel@tonic-gate servinfo_t *next;
47577c478bd9Sstevel@tonic-gate struct knetconfig *knconf;
47587c478bd9Sstevel@tonic-gate
47597c478bd9Sstevel@tonic-gate while (svp != NULL) {
47607c478bd9Sstevel@tonic-gate next = svp->sv_next;
47617c478bd9Sstevel@tonic-gate if (svp->sv_secdata)
47627c478bd9Sstevel@tonic-gate sec_clnt_freeinfo(svp->sv_secdata);
47637c478bd9Sstevel@tonic-gate if (svp->sv_hostname && svp->sv_hostnamelen > 0)
47647c478bd9Sstevel@tonic-gate kmem_free(svp->sv_hostname, svp->sv_hostnamelen);
47657c478bd9Sstevel@tonic-gate knconf = svp->sv_knconf;
47667c478bd9Sstevel@tonic-gate if (knconf != NULL) {
47677c478bd9Sstevel@tonic-gate if (knconf->knc_protofmly != NULL)
47687c478bd9Sstevel@tonic-gate kmem_free(knconf->knc_protofmly, KNC_STRSIZE);
47697c478bd9Sstevel@tonic-gate if (knconf->knc_proto != NULL)
47707c478bd9Sstevel@tonic-gate kmem_free(knconf->knc_proto, KNC_STRSIZE);
47717c478bd9Sstevel@tonic-gate kmem_free(knconf, sizeof (*knconf));
47727c478bd9Sstevel@tonic-gate }
47737c478bd9Sstevel@tonic-gate knconf = svp->sv_origknconf;
47747c478bd9Sstevel@tonic-gate if (knconf != NULL) {
47757c478bd9Sstevel@tonic-gate if (knconf->knc_protofmly != NULL)
47767c478bd9Sstevel@tonic-gate kmem_free(knconf->knc_protofmly, KNC_STRSIZE);
47777c478bd9Sstevel@tonic-gate if (knconf->knc_proto != NULL)
47787c478bd9Sstevel@tonic-gate kmem_free(knconf->knc_proto, KNC_STRSIZE);
47797c478bd9Sstevel@tonic-gate kmem_free(knconf, sizeof (*knconf));
47807c478bd9Sstevel@tonic-gate }
47817c478bd9Sstevel@tonic-gate if (svp->sv_addr.buf != NULL && svp->sv_addr.maxlen != 0)
47827c478bd9Sstevel@tonic-gate kmem_free(svp->sv_addr.buf, svp->sv_addr.maxlen);
47837c478bd9Sstevel@tonic-gate mutex_destroy(&svp->sv_lock);
47847c478bd9Sstevel@tonic-gate kmem_free(svp, sizeof (*svp));
47857c478bd9Sstevel@tonic-gate svp = next;
47867c478bd9Sstevel@tonic-gate }
47877c478bd9Sstevel@tonic-gate }
47887c478bd9Sstevel@tonic-gate
47897c478bd9Sstevel@tonic-gate /*
47907c478bd9Sstevel@tonic-gate * Only can return non-zero if intr != 0.
47917c478bd9Sstevel@tonic-gate */
47927c478bd9Sstevel@tonic-gate int
nfs_rw_enter_sig(nfs_rwlock_t * l,krw_t rw,int intr)47937c478bd9Sstevel@tonic-gate nfs_rw_enter_sig(nfs_rwlock_t *l, krw_t rw, int intr)
47947c478bd9Sstevel@tonic-gate {
47957c478bd9Sstevel@tonic-gate
47967c478bd9Sstevel@tonic-gate mutex_enter(&l->lock);
47977c478bd9Sstevel@tonic-gate
47987c478bd9Sstevel@tonic-gate /*
47997c478bd9Sstevel@tonic-gate * If this is a nested enter, then allow it. There
48007c478bd9Sstevel@tonic-gate * must be as many exits as enters through.
48017c478bd9Sstevel@tonic-gate */
48027c478bd9Sstevel@tonic-gate if (l->owner == curthread) {
48037c478bd9Sstevel@tonic-gate /* lock is held for writing by current thread */
48047c478bd9Sstevel@tonic-gate ASSERT(rw == RW_READER || rw == RW_WRITER);
48057c478bd9Sstevel@tonic-gate l->count--;
48067c478bd9Sstevel@tonic-gate } else if (rw == RW_READER) {
48077c478bd9Sstevel@tonic-gate /*
48087c478bd9Sstevel@tonic-gate * While there is a writer active or writers waiting,
48097c478bd9Sstevel@tonic-gate * then wait for them to finish up and move on. Then,
48107c478bd9Sstevel@tonic-gate * increment the count to indicate that a reader is
48117c478bd9Sstevel@tonic-gate * active.
48127c478bd9Sstevel@tonic-gate */
48137c478bd9Sstevel@tonic-gate while (l->count < 0 || l->waiters > 0) {
48147c478bd9Sstevel@tonic-gate if (intr) {
48157c478bd9Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread);
48167c478bd9Sstevel@tonic-gate
48177c478bd9Sstevel@tonic-gate if (lwp != NULL)
48187c478bd9Sstevel@tonic-gate lwp->lwp_nostop++;
4819c31e9bbdSMarcel Telka if (cv_wait_sig(&l->cv_rd, &l->lock) == 0) {
48207c478bd9Sstevel@tonic-gate if (lwp != NULL)
48217c478bd9Sstevel@tonic-gate lwp->lwp_nostop--;
48227c478bd9Sstevel@tonic-gate mutex_exit(&l->lock);
48237c478bd9Sstevel@tonic-gate return (EINTR);
48247c478bd9Sstevel@tonic-gate }
48257c478bd9Sstevel@tonic-gate if (lwp != NULL)
48267c478bd9Sstevel@tonic-gate lwp->lwp_nostop--;
48277c478bd9Sstevel@tonic-gate } else
4828c31e9bbdSMarcel Telka cv_wait(&l->cv_rd, &l->lock);
48297c478bd9Sstevel@tonic-gate }
48307c478bd9Sstevel@tonic-gate ASSERT(l->count < INT_MAX);
48317c478bd9Sstevel@tonic-gate #ifdef DEBUG
48327c478bd9Sstevel@tonic-gate if ((l->count % 10000) == 9999)
48337c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "nfs_rw_enter_sig: count %d on"
48347c478bd9Sstevel@tonic-gate "rwlock @ %p\n", l->count, (void *)&l);
48357c478bd9Sstevel@tonic-gate #endif
48367c478bd9Sstevel@tonic-gate l->count++;
48377c478bd9Sstevel@tonic-gate } else {
48387c478bd9Sstevel@tonic-gate ASSERT(rw == RW_WRITER);
48397c478bd9Sstevel@tonic-gate /*
48407c478bd9Sstevel@tonic-gate * While there are readers active or a writer
48417c478bd9Sstevel@tonic-gate * active, then wait for all of the readers
48427c478bd9Sstevel@tonic-gate * to finish or for the writer to finish.
48437c478bd9Sstevel@tonic-gate * Then, set the owner field to curthread and
48447c478bd9Sstevel@tonic-gate * decrement count to indicate that a writer
48457c478bd9Sstevel@tonic-gate * is active.
48467c478bd9Sstevel@tonic-gate */
48476792c3a7SMarcel Telka while (l->count != 0) {
48487c478bd9Sstevel@tonic-gate l->waiters++;
48497c478bd9Sstevel@tonic-gate if (intr) {
48507c478bd9Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread);
48517c478bd9Sstevel@tonic-gate
48527c478bd9Sstevel@tonic-gate if (lwp != NULL)
48537c478bd9Sstevel@tonic-gate lwp->lwp_nostop++;
48546792c3a7SMarcel Telka if (cv_wait_sig(&l->cv, &l->lock) == 0) {
48557c478bd9Sstevel@tonic-gate if (lwp != NULL)
48567c478bd9Sstevel@tonic-gate lwp->lwp_nostop--;
48577c478bd9Sstevel@tonic-gate l->waiters--;
48586792c3a7SMarcel Telka /*
48596792c3a7SMarcel Telka * If there are readers active and no
4860c31e9bbdSMarcel Telka * writers waiting then wake up all of
4861c31e9bbdSMarcel Telka * the waiting readers (if any).
48626792c3a7SMarcel Telka */
48636792c3a7SMarcel Telka if (l->count > 0 && l->waiters == 0)
4864c31e9bbdSMarcel Telka cv_broadcast(&l->cv_rd);
48657c478bd9Sstevel@tonic-gate mutex_exit(&l->lock);
48667c478bd9Sstevel@tonic-gate return (EINTR);
48677c478bd9Sstevel@tonic-gate }
48687c478bd9Sstevel@tonic-gate if (lwp != NULL)
48697c478bd9Sstevel@tonic-gate lwp->lwp_nostop--;
48707c478bd9Sstevel@tonic-gate } else
48717c478bd9Sstevel@tonic-gate cv_wait(&l->cv, &l->lock);
48727c478bd9Sstevel@tonic-gate l->waiters--;
48737c478bd9Sstevel@tonic-gate }
48746792c3a7SMarcel Telka ASSERT(l->owner == NULL);
48757c478bd9Sstevel@tonic-gate l->owner = curthread;
48767c478bd9Sstevel@tonic-gate l->count--;
48777c478bd9Sstevel@tonic-gate }
48787c478bd9Sstevel@tonic-gate
48797c478bd9Sstevel@tonic-gate mutex_exit(&l->lock);
48807c478bd9Sstevel@tonic-gate
48817c478bd9Sstevel@tonic-gate return (0);
48827c478bd9Sstevel@tonic-gate }
48837c478bd9Sstevel@tonic-gate
48847c478bd9Sstevel@tonic-gate /*
48857c478bd9Sstevel@tonic-gate * If the lock is available, obtain it and return non-zero. If there is
48867c478bd9Sstevel@tonic-gate * already a conflicting lock, return 0 immediately.
48877c478bd9Sstevel@tonic-gate */
48887c478bd9Sstevel@tonic-gate
48897c478bd9Sstevel@tonic-gate int
nfs_rw_tryenter(nfs_rwlock_t * l,krw_t rw)48907c478bd9Sstevel@tonic-gate nfs_rw_tryenter(nfs_rwlock_t *l, krw_t rw)
48917c478bd9Sstevel@tonic-gate {
48927c478bd9Sstevel@tonic-gate mutex_enter(&l->lock);
48937c478bd9Sstevel@tonic-gate
48947c478bd9Sstevel@tonic-gate /*
48957c478bd9Sstevel@tonic-gate * If this is a nested enter, then allow it. There
48967c478bd9Sstevel@tonic-gate * must be as many exits as enters through.
48977c478bd9Sstevel@tonic-gate */
48987c478bd9Sstevel@tonic-gate if (l->owner == curthread) {
48997c478bd9Sstevel@tonic-gate /* lock is held for writing by current thread */
49007c478bd9Sstevel@tonic-gate ASSERT(rw == RW_READER || rw == RW_WRITER);
49017c478bd9Sstevel@tonic-gate l->count--;
49027c478bd9Sstevel@tonic-gate } else if (rw == RW_READER) {
49037c478bd9Sstevel@tonic-gate /*
49047c478bd9Sstevel@tonic-gate * If there is a writer active or writers waiting, deny the
49057c478bd9Sstevel@tonic-gate * lock. Otherwise, bump the count of readers.
49067c478bd9Sstevel@tonic-gate */
49077c478bd9Sstevel@tonic-gate if (l->count < 0 || l->waiters > 0) {
49087c478bd9Sstevel@tonic-gate mutex_exit(&l->lock);
49097c478bd9Sstevel@tonic-gate return (0);
49107c478bd9Sstevel@tonic-gate }
49117c478bd9Sstevel@tonic-gate l->count++;
49127c478bd9Sstevel@tonic-gate } else {
49137c478bd9Sstevel@tonic-gate ASSERT(rw == RW_WRITER);
49147c478bd9Sstevel@tonic-gate /*
49157c478bd9Sstevel@tonic-gate * If there are readers active or a writer active, deny the
49167c478bd9Sstevel@tonic-gate * lock. Otherwise, set the owner field to curthread and
49177c478bd9Sstevel@tonic-gate * decrement count to indicate that a writer is active.
49187c478bd9Sstevel@tonic-gate */
49196792c3a7SMarcel Telka if (l->count != 0) {
49207c478bd9Sstevel@tonic-gate mutex_exit(&l->lock);
49217c478bd9Sstevel@tonic-gate return (0);
49227c478bd9Sstevel@tonic-gate }
49236792c3a7SMarcel Telka ASSERT(l->owner == NULL);
49247c478bd9Sstevel@tonic-gate l->owner = curthread;
49257c478bd9Sstevel@tonic-gate l->count--;
49267c478bd9Sstevel@tonic-gate }
49277c478bd9Sstevel@tonic-gate
49287c478bd9Sstevel@tonic-gate mutex_exit(&l->lock);
49297c478bd9Sstevel@tonic-gate
49307c478bd9Sstevel@tonic-gate return (1);
49317c478bd9Sstevel@tonic-gate }
49327c478bd9Sstevel@tonic-gate
49337c478bd9Sstevel@tonic-gate void
nfs_rw_exit(nfs_rwlock_t * l)49347c478bd9Sstevel@tonic-gate nfs_rw_exit(nfs_rwlock_t *l)
49357c478bd9Sstevel@tonic-gate {
49367c478bd9Sstevel@tonic-gate
49377c478bd9Sstevel@tonic-gate mutex_enter(&l->lock);
4938c31e9bbdSMarcel Telka
49397c478bd9Sstevel@tonic-gate if (l->owner != NULL) {
49407c478bd9Sstevel@tonic-gate ASSERT(l->owner == curthread);
4941c31e9bbdSMarcel Telka
4942c31e9bbdSMarcel Telka /*
4943c31e9bbdSMarcel Telka * To release a writer lock increment count to indicate that
4944c31e9bbdSMarcel Telka * there is one less writer active. If this was the last of
4945c31e9bbdSMarcel Telka * possibly nested writer locks, then clear the owner field as
4946c31e9bbdSMarcel Telka * well to indicate that there is no writer active.
4947c31e9bbdSMarcel Telka */
4948c31e9bbdSMarcel Telka ASSERT(l->count < 0);
49497c478bd9Sstevel@tonic-gate l->count++;
49507c478bd9Sstevel@tonic-gate if (l->count == 0) {
49517c478bd9Sstevel@tonic-gate l->owner = NULL;
4952c31e9bbdSMarcel Telka
4953c31e9bbdSMarcel Telka /*
4954c31e9bbdSMarcel Telka * If there are no writers waiting then wakeup all of
4955c31e9bbdSMarcel Telka * the waiting readers (if any).
4956c31e9bbdSMarcel Telka */
4957c31e9bbdSMarcel Telka if (l->waiters == 0)
4958c31e9bbdSMarcel Telka cv_broadcast(&l->cv_rd);
49597c478bd9Sstevel@tonic-gate }
49607c478bd9Sstevel@tonic-gate } else {
4961c31e9bbdSMarcel Telka /*
4962c31e9bbdSMarcel Telka * To release a reader lock just decrement count to indicate
4963c31e9bbdSMarcel Telka * that there is one less reader active.
4964c31e9bbdSMarcel Telka */
49657c478bd9Sstevel@tonic-gate ASSERT(l->count > 0);
49667c478bd9Sstevel@tonic-gate l->count--;
4967c31e9bbdSMarcel Telka }
4968c31e9bbdSMarcel Telka
4969c31e9bbdSMarcel Telka /*
4970c31e9bbdSMarcel Telka * If there are no readers active nor a writer active and there is a
4971c31e9bbdSMarcel Telka * writer waiting we need to wake up it.
4972c31e9bbdSMarcel Telka */
49737c478bd9Sstevel@tonic-gate if (l->count == 0 && l->waiters > 0)
49746792c3a7SMarcel Telka cv_signal(&l->cv);
49757c478bd9Sstevel@tonic-gate mutex_exit(&l->lock);
49767c478bd9Sstevel@tonic-gate }
49777c478bd9Sstevel@tonic-gate
49787c478bd9Sstevel@tonic-gate int
nfs_rw_lock_held(nfs_rwlock_t * l,krw_t rw)49797c478bd9Sstevel@tonic-gate nfs_rw_lock_held(nfs_rwlock_t *l, krw_t rw)
49807c478bd9Sstevel@tonic-gate {
49817c478bd9Sstevel@tonic-gate
49827c478bd9Sstevel@tonic-gate if (rw == RW_READER)
49837c478bd9Sstevel@tonic-gate return (l->count > 0);
49847c478bd9Sstevel@tonic-gate ASSERT(rw == RW_WRITER);
49857c478bd9Sstevel@tonic-gate return (l->count < 0);
49867c478bd9Sstevel@tonic-gate }
49877c478bd9Sstevel@tonic-gate
49887c478bd9Sstevel@tonic-gate /* ARGSUSED */
49897c478bd9Sstevel@tonic-gate void
nfs_rw_init(nfs_rwlock_t * l,char * name,krw_type_t type,void * arg)49907c478bd9Sstevel@tonic-gate nfs_rw_init(nfs_rwlock_t *l, char *name, krw_type_t type, void *arg)
49917c478bd9Sstevel@tonic-gate {
49927c478bd9Sstevel@tonic-gate
49937c478bd9Sstevel@tonic-gate l->count = 0;
49947c478bd9Sstevel@tonic-gate l->waiters = 0;
49957c478bd9Sstevel@tonic-gate l->owner = NULL;
49967c478bd9Sstevel@tonic-gate mutex_init(&l->lock, NULL, MUTEX_DEFAULT, NULL);
49977c478bd9Sstevel@tonic-gate cv_init(&l->cv, NULL, CV_DEFAULT, NULL);
4998c31e9bbdSMarcel Telka cv_init(&l->cv_rd, NULL, CV_DEFAULT, NULL);
49997c478bd9Sstevel@tonic-gate }
50007c478bd9Sstevel@tonic-gate
50017c478bd9Sstevel@tonic-gate void
nfs_rw_destroy(nfs_rwlock_t * l)50027c478bd9Sstevel@tonic-gate nfs_rw_destroy(nfs_rwlock_t *l)
50037c478bd9Sstevel@tonic-gate {
50047c478bd9Sstevel@tonic-gate
50057c478bd9Sstevel@tonic-gate mutex_destroy(&l->lock);
50067c478bd9Sstevel@tonic-gate cv_destroy(&l->cv);
5007c31e9bbdSMarcel Telka cv_destroy(&l->cv_rd);
50087c478bd9Sstevel@tonic-gate }
50097c478bd9Sstevel@tonic-gate
50107c478bd9Sstevel@tonic-gate int
nfs3_rddir_compar(const void * x,const void * y)50117c478bd9Sstevel@tonic-gate nfs3_rddir_compar(const void *x, const void *y)
50127c478bd9Sstevel@tonic-gate {
50137c478bd9Sstevel@tonic-gate rddir_cache *a = (rddir_cache *)x;
50147c478bd9Sstevel@tonic-gate rddir_cache *b = (rddir_cache *)y;
50157c478bd9Sstevel@tonic-gate
50167c478bd9Sstevel@tonic-gate if (a->nfs3_cookie == b->nfs3_cookie) {
50177c478bd9Sstevel@tonic-gate if (a->buflen == b->buflen)
50187c478bd9Sstevel@tonic-gate return (0);
50197c478bd9Sstevel@tonic-gate if (a->buflen < b->buflen)
50207c478bd9Sstevel@tonic-gate return (-1);
50217c478bd9Sstevel@tonic-gate return (1);
50227c478bd9Sstevel@tonic-gate }
50237c478bd9Sstevel@tonic-gate
50247c478bd9Sstevel@tonic-gate if (a->nfs3_cookie < b->nfs3_cookie)
50257c478bd9Sstevel@tonic-gate return (-1);
50267c478bd9Sstevel@tonic-gate
50277c478bd9Sstevel@tonic-gate return (1);
50287c478bd9Sstevel@tonic-gate }
50297c478bd9Sstevel@tonic-gate
50307c478bd9Sstevel@tonic-gate int
nfs_rddir_compar(const void * x,const void * y)50317c478bd9Sstevel@tonic-gate nfs_rddir_compar(const void *x, const void *y)
50327c478bd9Sstevel@tonic-gate {
50337c478bd9Sstevel@tonic-gate rddir_cache *a = (rddir_cache *)x;
50347c478bd9Sstevel@tonic-gate rddir_cache *b = (rddir_cache *)y;
50357c478bd9Sstevel@tonic-gate
50367c478bd9Sstevel@tonic-gate if (a->nfs_cookie == b->nfs_cookie) {
50377c478bd9Sstevel@tonic-gate if (a->buflen == b->buflen)
50387c478bd9Sstevel@tonic-gate return (0);
50397c478bd9Sstevel@tonic-gate if (a->buflen < b->buflen)
50407c478bd9Sstevel@tonic-gate return (-1);
50417c478bd9Sstevel@tonic-gate return (1);
50427c478bd9Sstevel@tonic-gate }
50437c478bd9Sstevel@tonic-gate
50447c478bd9Sstevel@tonic-gate if (a->nfs_cookie < b->nfs_cookie)
50457c478bd9Sstevel@tonic-gate return (-1);
50467c478bd9Sstevel@tonic-gate
50477c478bd9Sstevel@tonic-gate return (1);
50487c478bd9Sstevel@tonic-gate }
50497c478bd9Sstevel@tonic-gate
50507c478bd9Sstevel@tonic-gate static char *
nfs_getsrvnames(mntinfo_t * mi,size_t * len)50517c478bd9Sstevel@tonic-gate nfs_getsrvnames(mntinfo_t *mi, size_t *len)
50527c478bd9Sstevel@tonic-gate {
50537c478bd9Sstevel@tonic-gate servinfo_t *s;
50547c478bd9Sstevel@tonic-gate char *srvnames;
50557c478bd9Sstevel@tonic-gate char *namep;
50567c478bd9Sstevel@tonic-gate size_t length;
50577c478bd9Sstevel@tonic-gate
50587c478bd9Sstevel@tonic-gate /*
50597c478bd9Sstevel@tonic-gate * Calculate the length of the string required to hold all
50607c478bd9Sstevel@tonic-gate * of the server names plus either a comma or a null
50617c478bd9Sstevel@tonic-gate * character following each individual one.
50627c478bd9Sstevel@tonic-gate */
50637c478bd9Sstevel@tonic-gate length = 0;
50647c478bd9Sstevel@tonic-gate for (s = mi->mi_servers; s != NULL; s = s->sv_next)
50657c478bd9Sstevel@tonic-gate length += s->sv_hostnamelen;
50667c478bd9Sstevel@tonic-gate
50677c478bd9Sstevel@tonic-gate srvnames = kmem_alloc(length, KM_SLEEP);
50687c478bd9Sstevel@tonic-gate
50697c478bd9Sstevel@tonic-gate namep = srvnames;
50707c478bd9Sstevel@tonic-gate for (s = mi->mi_servers; s != NULL; s = s->sv_next) {
50717c478bd9Sstevel@tonic-gate (void) strcpy(namep, s->sv_hostname);
50727c478bd9Sstevel@tonic-gate namep += s->sv_hostnamelen - 1;
50737c478bd9Sstevel@tonic-gate *namep++ = ',';
50747c478bd9Sstevel@tonic-gate }
50757c478bd9Sstevel@tonic-gate *--namep = '\0';
50767c478bd9Sstevel@tonic-gate
50777c478bd9Sstevel@tonic-gate *len = length;
50787c478bd9Sstevel@tonic-gate
50797c478bd9Sstevel@tonic-gate return (srvnames);
50807c478bd9Sstevel@tonic-gate }
5081108322fbScarlsonj
5082108322fbScarlsonj /*
5083108322fbScarlsonj * These two functions are temporary and designed for the upgrade-workaround
5084108322fbScarlsonj * only. They cannot be used for general zone-crossing NFS client support, and
5085108322fbScarlsonj * will be removed shortly.
5086108322fbScarlsonj *
5087108322fbScarlsonj * When the workaround is enabled, all NFS traffic is forced into the global
5088108322fbScarlsonj * zone. These functions are called when the code needs to refer to the state
5089108322fbScarlsonj * of the underlying network connection. They're not called when the function
5090108322fbScarlsonj * needs to refer to the state of the process that invoked the system call.
5091108322fbScarlsonj * (E.g., when checking whether the zone is shutting down during the mount()
5092108322fbScarlsonj * call.)
5093108322fbScarlsonj */
5094108322fbScarlsonj
5095108322fbScarlsonj struct zone *
nfs_zone(void)5096108322fbScarlsonj nfs_zone(void)
5097108322fbScarlsonj {
5098108322fbScarlsonj return (nfs_global_client_only != 0 ? global_zone : curproc->p_zone);
5099108322fbScarlsonj }
5100108322fbScarlsonj
5101108322fbScarlsonj zoneid_t
nfs_zoneid(void)5102108322fbScarlsonj nfs_zoneid(void)
5103108322fbScarlsonj {
5104108322fbScarlsonj return (nfs_global_client_only != 0 ? GLOBAL_ZONEID : getzoneid());
5105108322fbScarlsonj }
510645916cd2Sjpk
510745916cd2Sjpk /*
510845916cd2Sjpk * nfs_mount_label_policy:
510945916cd2Sjpk * Determine whether the mount is allowed according to MAC check,
511045916cd2Sjpk * by comparing (where appropriate) label of the remote server
511145916cd2Sjpk * against the label of the zone being mounted into.
511245916cd2Sjpk *
511345916cd2Sjpk * Returns:
511445916cd2Sjpk * 0 : access allowed
511545916cd2Sjpk * -1 : read-only access allowed (i.e., read-down)
511645916cd2Sjpk * >0 : error code, such as EACCES
511745916cd2Sjpk */
511845916cd2Sjpk int
nfs_mount_label_policy(vfs_t * vfsp,struct netbuf * addr,struct knetconfig * knconf,cred_t * cr)511945916cd2Sjpk nfs_mount_label_policy(vfs_t *vfsp, struct netbuf *addr,
512045916cd2Sjpk struct knetconfig *knconf, cred_t *cr)
512145916cd2Sjpk {
512245916cd2Sjpk int addr_type;
512345916cd2Sjpk void *ipaddr;
512445916cd2Sjpk bslabel_t *server_sl, *mntlabel;
512545916cd2Sjpk zone_t *mntzone = NULL;
512645916cd2Sjpk ts_label_t *zlabel;
512745916cd2Sjpk tsol_tpc_t *tp;
512845916cd2Sjpk ts_label_t *tsl = NULL;
512945916cd2Sjpk int retv;
513045916cd2Sjpk
513145916cd2Sjpk /*
513245916cd2Sjpk * Get the zone's label. Each zone on a labeled system has a label.
513345916cd2Sjpk */
513445916cd2Sjpk mntzone = zone_find_by_any_path(refstr_value(vfsp->vfs_mntpt), B_FALSE);
513545916cd2Sjpk zlabel = mntzone->zone_slabel;
513645916cd2Sjpk ASSERT(zlabel != NULL);
513745916cd2Sjpk label_hold(zlabel);
513845916cd2Sjpk
513945916cd2Sjpk if (strcmp(knconf->knc_protofmly, NC_INET) == 0) {
514045916cd2Sjpk addr_type = IPV4_VERSION;
514145916cd2Sjpk ipaddr = &((struct sockaddr_in *)addr->buf)->sin_addr;
514245916cd2Sjpk } else if (strcmp(knconf->knc_protofmly, NC_INET6) == 0) {
514345916cd2Sjpk addr_type = IPV6_VERSION;
514445916cd2Sjpk ipaddr = &((struct sockaddr_in6 *)addr->buf)->sin6_addr;
514545916cd2Sjpk } else {
514645916cd2Sjpk retv = 0;
514745916cd2Sjpk goto out;
514845916cd2Sjpk }
514945916cd2Sjpk
515045916cd2Sjpk retv = EACCES; /* assume the worst */
515145916cd2Sjpk
515245916cd2Sjpk /*
515345916cd2Sjpk * Next, get the assigned label of the remote server.
515445916cd2Sjpk */
515545916cd2Sjpk tp = find_tpc(ipaddr, addr_type, B_FALSE);
515645916cd2Sjpk if (tp == NULL)
515745916cd2Sjpk goto out; /* error getting host entry */
515845916cd2Sjpk
515945916cd2Sjpk if (tp->tpc_tp.tp_doi != zlabel->tsl_doi)
516045916cd2Sjpk goto rel_tpc; /* invalid domain */
516145916cd2Sjpk if ((tp->tpc_tp.host_type != SUN_CIPSO) &&
516245916cd2Sjpk (tp->tpc_tp.host_type != UNLABELED))
516345916cd2Sjpk goto rel_tpc; /* invalid hosttype */
516445916cd2Sjpk
516545916cd2Sjpk if (tp->tpc_tp.host_type == SUN_CIPSO) {
516645916cd2Sjpk tsl = getflabel_cipso(vfsp);
516745916cd2Sjpk if (tsl == NULL)
516845916cd2Sjpk goto rel_tpc; /* error getting server lbl */
516945916cd2Sjpk
517045916cd2Sjpk server_sl = label2bslabel(tsl);
517145916cd2Sjpk } else { /* UNLABELED */
517245916cd2Sjpk server_sl = &tp->tpc_tp.tp_def_label;
517345916cd2Sjpk }
517445916cd2Sjpk
517545916cd2Sjpk mntlabel = label2bslabel(zlabel);
517645916cd2Sjpk
517745916cd2Sjpk /*
517845916cd2Sjpk * Now compare labels to complete the MAC check. If the labels
517945916cd2Sjpk * are equal or if the requestor is in the global zone and has
518045916cd2Sjpk * NET_MAC_AWARE, then allow read-write access. (Except for
518145916cd2Sjpk * mounts into the global zone itself; restrict these to
518245916cd2Sjpk * read-only.)
518345916cd2Sjpk *
518445916cd2Sjpk * If the requestor is in some other zone, but his label
518545916cd2Sjpk * dominates the server, then allow read-down.
518645916cd2Sjpk *
518745916cd2Sjpk * Otherwise, access is denied.
518845916cd2Sjpk */
518945916cd2Sjpk if (blequal(mntlabel, server_sl) ||
519045916cd2Sjpk (crgetzoneid(cr) == GLOBAL_ZONEID &&
519145916cd2Sjpk getpflags(NET_MAC_AWARE, cr) != 0)) {
519245916cd2Sjpk if ((mntzone == global_zone) ||
519345916cd2Sjpk !blequal(mntlabel, server_sl))
519445916cd2Sjpk retv = -1; /* read-only */
519545916cd2Sjpk else
519645916cd2Sjpk retv = 0; /* access OK */
519745916cd2Sjpk } else if (bldominates(mntlabel, server_sl)) {
519845916cd2Sjpk retv = -1; /* read-only */
519945916cd2Sjpk } else {
520045916cd2Sjpk retv = EACCES;
520145916cd2Sjpk }
520245916cd2Sjpk
520345916cd2Sjpk if (tsl != NULL)
520445916cd2Sjpk label_rele(tsl);
520545916cd2Sjpk
520645916cd2Sjpk rel_tpc:
520745916cd2Sjpk TPC_RELE(tp);
520845916cd2Sjpk out:
520945916cd2Sjpk if (mntzone)
521045916cd2Sjpk zone_rele(mntzone);
521145916cd2Sjpk label_rele(zlabel);
521245916cd2Sjpk return (retv);
521345916cd2Sjpk }
52149acbbeafSnn35248
52159acbbeafSnn35248 boolean_t
nfs_has_ctty(void)52169acbbeafSnn35248 nfs_has_ctty(void)
52179acbbeafSnn35248 {
52189acbbeafSnn35248 boolean_t rv;
52199acbbeafSnn35248 mutex_enter(&curproc->p_splock);
52209acbbeafSnn35248 rv = (curproc->p_sessp->s_vp != NULL);
52219acbbeafSnn35248 mutex_exit(&curproc->p_splock);
52229acbbeafSnn35248 return (rv);
52239acbbeafSnn35248 }
522403986916Sjarrett
522503986916Sjarrett /*
522693aeed83Smarks * See if xattr directory to see if it has any generic user attributes
522793aeed83Smarks */
522893aeed83Smarks int
do_xattr_exists_check(vnode_t * vp,ulong_t * valp,cred_t * cr)522993aeed83Smarks do_xattr_exists_check(vnode_t *vp, ulong_t *valp, cred_t *cr)
523093aeed83Smarks {
523193aeed83Smarks struct uio uio;
523293aeed83Smarks struct iovec iov;
523393aeed83Smarks char *dbuf;
523493aeed83Smarks struct dirent64 *dp;
523593aeed83Smarks size_t dlen = 8 * 1024;
523693aeed83Smarks size_t dbuflen;
523793aeed83Smarks int eof = 0;
523893aeed83Smarks int error;
523993aeed83Smarks
524093aeed83Smarks *valp = 0;
524193aeed83Smarks dbuf = kmem_alloc(dlen, KM_SLEEP);
524293aeed83Smarks uio.uio_iov = &iov;
524393aeed83Smarks uio.uio_iovcnt = 1;
524493aeed83Smarks uio.uio_segflg = UIO_SYSSPACE;
524593aeed83Smarks uio.uio_fmode = 0;
524693aeed83Smarks uio.uio_extflg = UIO_COPY_CACHED;
524793aeed83Smarks uio.uio_loffset = 0;
524893aeed83Smarks uio.uio_resid = dlen;
524993aeed83Smarks iov.iov_base = dbuf;
525093aeed83Smarks iov.iov_len = dlen;
525193aeed83Smarks (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
525293aeed83Smarks error = VOP_READDIR(vp, &uio, cr, &eof, NULL, 0);
525393aeed83Smarks VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
525493aeed83Smarks
525593aeed83Smarks dbuflen = dlen - uio.uio_resid;
525693aeed83Smarks
525793aeed83Smarks if (error || dbuflen == 0) {
525893aeed83Smarks kmem_free(dbuf, dlen);
525993aeed83Smarks return (error);
526093aeed83Smarks }
526193aeed83Smarks
526293aeed83Smarks dp = (dirent64_t *)dbuf;
526393aeed83Smarks
526493aeed83Smarks while ((intptr_t)dp < (intptr_t)dbuf + dbuflen) {
526593aeed83Smarks if (strcmp(dp->d_name, ".") == 0 ||
526693aeed83Smarks strcmp(dp->d_name, "..") == 0 || strcmp(dp->d_name,
526793aeed83Smarks VIEW_READWRITE) == 0 || strcmp(dp->d_name,
526893aeed83Smarks VIEW_READONLY) == 0) {
526993aeed83Smarks dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen);
527093aeed83Smarks continue;
527193aeed83Smarks }
527293aeed83Smarks
527393aeed83Smarks *valp = 1;
527493aeed83Smarks break;
527593aeed83Smarks }
527693aeed83Smarks kmem_free(dbuf, dlen);
527793aeed83Smarks return (0);
527893aeed83Smarks }
5279