xref: /freebsd/sys/fs/nfsserver/nfs_nfsdport.c (revision cfbe7a62dc62e8a5d7520cb5eb8ad7c4a9418e26)
19ec7b004SRick Macklem /*-
251369649SPedro F. Giffuni  * SPDX-License-Identifier: BSD-3-Clause
351369649SPedro F. Giffuni  *
49ec7b004SRick Macklem  * Copyright (c) 1989, 1993
59ec7b004SRick Macklem  *	The Regents of the University of California.  All rights reserved.
69ec7b004SRick Macklem  *
79ec7b004SRick Macklem  * This code is derived from software contributed to Berkeley by
89ec7b004SRick Macklem  * Rick Macklem at The University of Guelph.
99ec7b004SRick Macklem  *
109ec7b004SRick Macklem  * Redistribution and use in source and binary forms, with or without
119ec7b004SRick Macklem  * modification, are permitted provided that the following conditions
129ec7b004SRick Macklem  * are met:
139ec7b004SRick Macklem  * 1. Redistributions of source code must retain the above copyright
149ec7b004SRick Macklem  *    notice, this list of conditions and the following disclaimer.
159ec7b004SRick Macklem  * 2. Redistributions in binary form must reproduce the above copyright
169ec7b004SRick Macklem  *    notice, this list of conditions and the following disclaimer in the
179ec7b004SRick Macklem  *    documentation and/or other materials provided with the distribution.
18fbbd9655SWarner Losh  * 3. Neither the name of the University nor the names of its contributors
199ec7b004SRick Macklem  *    may be used to endorse or promote products derived from this software
209ec7b004SRick Macklem  *    without specific prior written permission.
219ec7b004SRick Macklem  *
229ec7b004SRick Macklem  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
239ec7b004SRick Macklem  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
249ec7b004SRick Macklem  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
259ec7b004SRick Macklem  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
269ec7b004SRick Macklem  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
279ec7b004SRick Macklem  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
289ec7b004SRick Macklem  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
299ec7b004SRick Macklem  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
309ec7b004SRick Macklem  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
319ec7b004SRick Macklem  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
329ec7b004SRick Macklem  * SUCH DAMAGE.
339ec7b004SRick Macklem  *
349ec7b004SRick Macklem  */
359ec7b004SRick Macklem 
364a144410SRobert Watson #include <sys/capsicum.h>
3790d2dfabSRick Macklem #include <sys/extattr.h>
38a9d2f8d8SRobert Watson 
399ec7b004SRick Macklem /*
409ec7b004SRick Macklem  * Functions that perform the vfs operations required by the routines in
419ec7b004SRick Macklem  * nfsd_serv.c. It is hoped that this change will make the server more
429ec7b004SRick Macklem  * portable.
439ec7b004SRick Macklem  */
449ec7b004SRick Macklem 
459ec7b004SRick Macklem #include <fs/nfs/nfsport.h>
46c057a378SRick Macklem #include <security/mac/mac_framework.h>
473455c738SAlexander Motin #include <sys/callout.h>
48c057a378SRick Macklem #include <sys/filio.h>
49377c50f6SRick Macklem #include <sys/hash.h>
507e44856eSRick Macklem #include <sys/osd.h>
519ec7b004SRick Macklem #include <sys/sysctl.h>
52b839e625SRick Macklem #include <nlm/nlm_prot.h>
53b839e625SRick Macklem #include <nlm/nlm.h>
54b068bb09SKonstantin Belousov #include <vm/vm_param.h>
55b068bb09SKonstantin Belousov #include <vm/vnode_pager.h>
569ec7b004SRick Macklem 
57de5b1952SAlexander Leidinger FEATURE(nfsd, "NFSv4 server");
58de5b1952SAlexander Leidinger 
599ec7b004SRick Macklem extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1;
609ec7b004SRick Macklem extern int nfsrv_useacl;
619ec7b004SRick Macklem extern int newnfs_numnfsd;
621f54e596SRick Macklem extern int nfsrv_sessionhashsize;
6390d2dfabSRick Macklem extern struct nfslayouthash *nfslayouthash;
6490d2dfabSRick Macklem extern int nfsrv_layouthashsize;
6590d2dfabSRick Macklem extern struct mtx nfsrv_dslock_mtx;
6690d2dfabSRick Macklem extern int nfs_pnfsiothreads;
6790d2dfabSRick Macklem extern volatile int nfsrv_devidcnt;
6890d2dfabSRick Macklem extern int nfsrv_maxpnfsmirror;
69ee29e6f3SRick Macklem extern uint32_t nfs_srvmaxio;
70ee29e6f3SRick Macklem extern int nfs_bufpackets;
71ee29e6f3SRick Macklem extern u_long sb_max_adj;
7267284d32SRick Macklem extern struct nfsv4lock nfsv4rootfs_lock;
737e44856eSRick Macklem 
747e44856eSRick Macklem NFSD_VNET_DECLARE(int, nfsrv_numnfsd);
757e44856eSRick Macklem NFSD_VNET_DECLARE(struct nfsrv_stablefirst, nfsrv_stablefirst);
767e44856eSRick Macklem NFSD_VNET_DECLARE(SVCPOOL *, nfsrvd_pool);
777e44856eSRick Macklem NFSD_VNET_DECLARE(struct nfsclienthashhead *, nfsclienthash);
787e44856eSRick Macklem NFSD_VNET_DECLARE(struct nfslockhashhead *, nfslockhash);
797e44856eSRick Macklem NFSD_VNET_DECLARE(struct nfssessionhash *, nfssessionhash);
807e44856eSRick Macklem NFSD_VNET_DECLARE(struct nfsv4lock, nfsd_suspend_lock);
819d329bbcSRick Macklem NFSD_VNET_DECLARE(struct nfsstatsv1 *, nfsstatsv1_p);
827e44856eSRick Macklem 
839ec7b004SRick Macklem NFSDLOCKMUTEX;
8490d2dfabSRick Macklem NFSSTATESPINLOCK;
8593c5875bSRick Macklem struct mtx nfsrc_udpmtx;
869ec7b004SRick Macklem struct mtx nfs_v4root_mutex;
8790d2dfabSRick Macklem struct mtx nfsrv_dontlistlock_mtx;
8890d2dfabSRick Macklem struct mtx nfsrv_recalllock_mtx;
897e44856eSRick Macklem struct nfsrvfh nfs_pubfh;
907e44856eSRick Macklem int nfs_pubfhset = 0;
91c59e4cc3SRick Macklem int nfsd_debuglevel = 0;
925f73287aSRick Macklem static pid_t nfsd_master_pid = (pid_t)-1;
935f73287aSRick Macklem static char nfsd_master_comm[MAXCOMLEN + 1];
945f73287aSRick Macklem static struct timeval nfsd_master_start;
95b839e625SRick Macklem static uint32_t nfsv4_sysid = 0;
9690d2dfabSRick Macklem static fhandle_t zerofh;
977e44856eSRick Macklem 
987e44856eSRick Macklem NFSD_VNET_DEFINE(struct proc *, nfsd_master_proc) = NULL;
997e44856eSRick Macklem NFSD_VNET_DEFINE(struct nfsrvhashhead *, nfsrvudphashtbl);
1007e44856eSRick Macklem NFSD_VNET_DEFINE(struct nfsrchash_bucket *, nfsrchash_table);
1017e44856eSRick Macklem NFSD_VNET_DEFINE(struct nfsrchash_bucket *, nfsrcahash_table);
1027e44856eSRick Macklem NFSD_VNET_DEFINE(struct nfsrvfh, nfs_rootfh);
1037e44856eSRick Macklem NFSD_VNET_DEFINE(int, nfs_rootfhset) = 0;
1047e44856eSRick Macklem NFSD_VNET_DEFINE(struct callout, nfsd_callout);
1057e44856eSRick Macklem NFSD_VNET_DEFINE_STATIC(struct mount *, nfsv4root_mnt);
1067e44856eSRick Macklem NFSD_VNET_DEFINE_STATIC(struct vfsoptlist, nfsv4root_opt);
1077e44856eSRick Macklem NFSD_VNET_DEFINE_STATIC(struct vfsoptlist, nfsv4root_newopt);
1087e44856eSRick Macklem NFSD_VNET_DEFINE_STATIC(bool, nfsrv_suspend_nfsd) = false;
1097e44856eSRick Macklem NFSD_VNET_DEFINE_STATIC(bool, nfsrv_mntinited) = false;
1107e44856eSRick Macklem 
11157d1e464SRick Macklem static int nfssvc_srvcall(struct thread *, struct nfssvc_args *,
11257d1e464SRick Macklem     struct ucred *);
113ae781657SRick Macklem static void nfsvno_updateds(struct vnode *, struct ucred *, struct thread *);
1149ec7b004SRick Macklem 
11507c0c166SRick Macklem int nfsrv_enable_crossmntpt = 1;
1169ec7b004SRick Macklem static int nfs_commit_blks;
1179ec7b004SRick Macklem static int nfs_commit_miss;
1189ec7b004SRick Macklem extern int nfsrv_issuedelegs;
11957d1e464SRick Macklem extern int nfsrv_dolocallocks;
12090d2dfabSRick Macklem extern struct nfsdevicehead nfsrv_devidhead;
12190d2dfabSRick Macklem 
122cd5edc7dSRick Macklem /* Map d_type to vnode type. */
123cd5edc7dSRick Macklem static uint8_t dtype_to_vnode[DT_WHT + 1] = { VNON, VFIFO, VCHR, VNON, VDIR,
124cd5edc7dSRick Macklem     VNON, VBLK, VNON, VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON };
125cd5edc7dSRick Macklem #define	NFS_DTYPETOVTYPE(t)	((t) <= DT_WHT ? dtype_to_vnode[(t)] : VNON)
126cd5edc7dSRick Macklem 
127c057a378SRick Macklem static int nfsrv_createiovec(int, struct mbuf **, struct mbuf **,
128c057a378SRick Macklem     struct iovec **);
129cb889ce6SRick Macklem static int nfsrv_createiovec_extpgs(int, int, struct mbuf **,
130cb889ce6SRick Macklem     struct mbuf **, struct iovec **);
131c057a378SRick Macklem static int nfsrv_createiovecw(int, struct mbuf *, char *, struct iovec **,
132c057a378SRick Macklem     int *);
13390d2dfabSRick Macklem static void nfsrv_pnfscreate(struct vnode *, struct vattr *, struct ucred *,
13490d2dfabSRick Macklem     NFSPROC_T *);
13590d2dfabSRick Macklem static void nfsrv_pnfsremovesetup(struct vnode *, NFSPROC_T *, struct vnode **,
13690d2dfabSRick Macklem     int *, char *, fhandle_t *);
13790d2dfabSRick Macklem static void nfsrv_pnfsremove(struct vnode **, int, char *, fhandle_t *,
13890d2dfabSRick Macklem     NFSPROC_T *);
1394ce21f37SRick Macklem static int nfsrv_proxyds(struct vnode *, off_t, int, struct ucred *,
1404ce21f37SRick Macklem     struct thread *, int, struct mbuf **, char *, struct mbuf **,
141c057a378SRick Macklem     struct nfsvattr *, struct acl *, off_t *, int, bool *);
14290d2dfabSRick Macklem static int nfsrv_setextattr(struct vnode *, struct nfsvattr *, NFSPROC_T *);
14390d2dfabSRick Macklem static int nfsrv_readdsrpc(fhandle_t *, off_t, int, struct ucred *,
14490d2dfabSRick Macklem     NFSPROC_T *, struct nfsmount *, struct mbuf **, struct mbuf **);
14590d2dfabSRick Macklem static int nfsrv_writedsrpc(fhandle_t *, off_t, int, struct ucred *,
14690d2dfabSRick Macklem     NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct mbuf **,
14790d2dfabSRick Macklem     char *, int *);
148c057a378SRick Macklem static int nfsrv_allocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *,
149c057a378SRick Macklem     NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *);
150bb958dcfSRick Macklem static int nfsrv_deallocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *,
151bb958dcfSRick Macklem     NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *);
15290d2dfabSRick Macklem static int nfsrv_setacldsrpc(fhandle_t *, struct ucred *, NFSPROC_T *,
15390d2dfabSRick Macklem     struct vnode *, struct nfsmount **, int, struct acl *, int *);
15490d2dfabSRick Macklem static int nfsrv_setattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *,
15590d2dfabSRick Macklem     struct vnode *, struct nfsmount **, int, struct nfsvattr *, int *);
15690d2dfabSRick Macklem static int nfsrv_getattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *,
15790d2dfabSRick Macklem     struct vnode *, struct nfsmount *, struct nfsvattr *);
158c057a378SRick Macklem static int nfsrv_seekdsrpc(fhandle_t *, off_t *, int, bool *, struct ucred *,
159c057a378SRick Macklem     NFSPROC_T *, struct nfsmount *);
16090d2dfabSRick Macklem static int nfsrv_putfhname(fhandle_t *, char *);
16190d2dfabSRick Macklem static int nfsrv_pnfslookupds(struct vnode *, struct vnode *,
16290d2dfabSRick Macklem     struct pnfsdsfile *, struct vnode **, NFSPROC_T *);
1633e5ba2e1SRick Macklem static void nfsrv_pnfssetfh(struct vnode *, struct pnfsdsfile *, char *, char *,
16490d2dfabSRick Macklem     struct vnode *, NFSPROC_T *);
16590d2dfabSRick Macklem static int nfsrv_dsremove(struct vnode *, char *, struct ucred *, NFSPROC_T *);
16690d2dfabSRick Macklem static int nfsrv_dssetacl(struct vnode *, struct acl *, struct ucred *,
16790d2dfabSRick Macklem     NFSPROC_T *);
168910ccc77SRick Macklem static int nfsrv_pnfsstatfs(struct statfs *, struct mount *);
16990d2dfabSRick Macklem 
17090d2dfabSRick Macklem int nfs_pnfsio(task_fn_t *, void *);
17157d1e464SRick Macklem 
1727029da5cSPawel Biernacki SYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
1737029da5cSPawel Biernacki     "NFS server");
174a0c2c369SRick Macklem SYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW,
17507c0c166SRick Macklem     &nfsrv_enable_crossmntpt, 0, "Enable nfsd to cross mount points");
176a0c2c369SRick Macklem SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks,
17757d1e464SRick Macklem     0, "");
178a0c2c369SRick Macklem SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss,
17957d1e464SRick Macklem     0, "");
180a0c2c369SRick Macklem SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW,
18157d1e464SRick Macklem     &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations");
182c59e4cc3SRick Macklem SYSCTL_INT(_vfs_nfsd, OID_AUTO, debuglevel, CTLFLAG_RW, &nfsd_debuglevel,
18350a220c6SEdward Tomasz Napierala     0, "Debug level for NFS server");
1847e44856eSRick Macklem NFSD_VNET_DECLARE(int, nfsd_enable_stringtouid);
1857e44856eSRick Macklem SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_stringtouid,
1867e44856eSRick Macklem     CTLFLAG_NFSD_VNET | CTLFLAG_RW, &NFSD_VNET_NAME(nfsd_enable_stringtouid),
1877e44856eSRick Macklem     0, "Enable nfsd to accept numeric owner_names");
18890d2dfabSRick Macklem static int nfsrv_pnfsgetdsattr = 1;
18990d2dfabSRick Macklem SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsgetdsattr, CTLFLAG_RW,
19090d2dfabSRick Macklem     &nfsrv_pnfsgetdsattr, 0, "When set getattr gets DS attributes via RPC");
19190d2dfabSRick Macklem 
19290d2dfabSRick Macklem /*
19390d2dfabSRick Macklem  * nfsrv_dsdirsize can only be increased and only when the nfsd threads are
19490d2dfabSRick Macklem  * not running.
19590d2dfabSRick Macklem  * The dsN subdirectories for the increased values must have been created
19690d2dfabSRick Macklem  * on all DS servers before this increase is done.
19790d2dfabSRick Macklem  */
19890d2dfabSRick Macklem u_int	nfsrv_dsdirsize = 20;
19990d2dfabSRick Macklem static int
sysctl_dsdirsize(SYSCTL_HANDLER_ARGS)20090d2dfabSRick Macklem sysctl_dsdirsize(SYSCTL_HANDLER_ARGS)
20190d2dfabSRick Macklem {
20290d2dfabSRick Macklem 	int error, newdsdirsize;
20390d2dfabSRick Macklem 
20490d2dfabSRick Macklem 	newdsdirsize = nfsrv_dsdirsize;
20590d2dfabSRick Macklem 	error = sysctl_handle_int(oidp, &newdsdirsize, 0, req);
20690d2dfabSRick Macklem 	if (error != 0 || req->newptr == NULL)
20790d2dfabSRick Macklem 		return (error);
20890d2dfabSRick Macklem 	if (newdsdirsize <= nfsrv_dsdirsize || newdsdirsize > 10000 ||
20990d2dfabSRick Macklem 	    newnfs_numnfsd != 0)
21090d2dfabSRick Macklem 		return (EINVAL);
21190d2dfabSRick Macklem 	nfsrv_dsdirsize = newdsdirsize;
21290d2dfabSRick Macklem 	return (0);
21390d2dfabSRick Macklem }
2147493134eSMateusz Guzik SYSCTL_PROC(_vfs_nfsd, OID_AUTO, dsdirsize,
2157493134eSMateusz Guzik     CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(nfsrv_dsdirsize),
2167493134eSMateusz Guzik     sysctl_dsdirsize, "IU", "Number of dsN subdirs on the DS servers");
2179ec7b004SRick Macklem 
218ee29e6f3SRick Macklem /*
219ee29e6f3SRick Macklem  * nfs_srvmaxio can only be increased and only when the nfsd threads are
220ee29e6f3SRick Macklem  * not running.  The setting must be a power of 2, with the current limit of
221ee29e6f3SRick Macklem  * 1Mbyte.
222ee29e6f3SRick Macklem  */
223ee29e6f3SRick Macklem static int
sysctl_srvmaxio(SYSCTL_HANDLER_ARGS)224ee29e6f3SRick Macklem sysctl_srvmaxio(SYSCTL_HANDLER_ARGS)
225ee29e6f3SRick Macklem {
226ee29e6f3SRick Macklem 	int error;
227ee29e6f3SRick Macklem 	u_int newsrvmaxio;
228ee29e6f3SRick Macklem 	uint64_t tval;
229ee29e6f3SRick Macklem 
230ee29e6f3SRick Macklem 	newsrvmaxio = nfs_srvmaxio;
231ee29e6f3SRick Macklem 	error = sysctl_handle_int(oidp, &newsrvmaxio, 0, req);
232ee29e6f3SRick Macklem 	if (error != 0 || req->newptr == NULL)
233ee29e6f3SRick Macklem 		return (error);
234ee29e6f3SRick Macklem 	if (newsrvmaxio == nfs_srvmaxio)
235ee29e6f3SRick Macklem 		return (0);
236ee29e6f3SRick Macklem 	if (newsrvmaxio < nfs_srvmaxio) {
237ee29e6f3SRick Macklem 		printf("nfsd: vfs.nfsd.srvmaxio can only be increased\n");
238ee29e6f3SRick Macklem 		return (EINVAL);
239ee29e6f3SRick Macklem 	}
240ee29e6f3SRick Macklem 	if (newsrvmaxio > 1048576) {
241ee29e6f3SRick Macklem 		printf("nfsd: vfs.nfsd.srvmaxio cannot be > 1Mbyte\n");
242ee29e6f3SRick Macklem 		return (EINVAL);
243ee29e6f3SRick Macklem 	}
244ee29e6f3SRick Macklem 	if ((newsrvmaxio & (newsrvmaxio - 1)) != 0) {
245ee29e6f3SRick Macklem 		printf("nfsd: vfs.nfsd.srvmaxio must be a power of 2\n");
246ee29e6f3SRick Macklem 		return (EINVAL);
247ee29e6f3SRick Macklem 	}
248ee29e6f3SRick Macklem 
249ee29e6f3SRick Macklem 	/*
250ee29e6f3SRick Macklem 	 * Check that kern.ipc.maxsockbuf is large enough for
251ee29e6f3SRick Macklem 	 * newsrviomax, given the setting of vfs.nfs.bufpackets.
252ee29e6f3SRick Macklem 	 */
253ee29e6f3SRick Macklem 	if ((newsrvmaxio + NFS_MAXXDR) * nfs_bufpackets >
254ee29e6f3SRick Macklem 	    sb_max_adj) {
255ee29e6f3SRick Macklem 		/*
256ee29e6f3SRick Macklem 		 * Suggest vfs.nfs.bufpackets * maximum RPC message for
257ee29e6f3SRick Macklem 		 * sb_max_adj.
258ee29e6f3SRick Macklem 		 */
259ee29e6f3SRick Macklem 		tval = (newsrvmaxio + NFS_MAXXDR) * nfs_bufpackets;
260ee29e6f3SRick Macklem 
261ee29e6f3SRick Macklem 		/*
262ee29e6f3SRick Macklem 		 * Convert suggested sb_max_adj value to a suggested
263ee29e6f3SRick Macklem 		 * sb_max value, which is what is set via kern.ipc.maxsockbuf.
264ee29e6f3SRick Macklem 		 * Perform the inverse calculation of (from uipc_sockbuf.c):
265ee29e6f3SRick Macklem 		 * sb_max_adj = (u_quad_t)sb_max * MCLBYTES /
266ee29e6f3SRick Macklem 		 *     (MSIZE + MCLBYTES);
267ee29e6f3SRick Macklem 		 * XXX If the calculation of sb_max_adj from sb_max changes,
268ee29e6f3SRick Macklem 		 *     this calculation must be changed as well.
269ee29e6f3SRick Macklem 		 */
270ee29e6f3SRick Macklem 		tval *= (MSIZE + MCLBYTES);  /* Brackets for readability. */
271ee29e6f3SRick Macklem 		tval += MCLBYTES - 1;        /* Round up divide. */
272ee29e6f3SRick Macklem 		tval /= MCLBYTES;
273ee29e6f3SRick Macklem 		printf("nfsd: set kern.ipc.maxsockbuf to a minimum of "
274ee29e6f3SRick Macklem 		    "%ju to support %ubyte NFS I/O\n", (uintmax_t)tval,
275ee29e6f3SRick Macklem 		    newsrvmaxio);
276ee29e6f3SRick Macklem 		return (EINVAL);
277ee29e6f3SRick Macklem 	}
278ee29e6f3SRick Macklem 
279ee29e6f3SRick Macklem 	NFSD_LOCK();
280ee29e6f3SRick Macklem 	if (newnfs_numnfsd != 0) {
281ee29e6f3SRick Macklem 		NFSD_UNLOCK();
282ee29e6f3SRick Macklem 		printf("nfsd: cannot set vfs.nfsd.srvmaxio when nfsd "
283ee29e6f3SRick Macklem 		    "threads are running\n");
284ee29e6f3SRick Macklem 		return (EINVAL);
285ee29e6f3SRick Macklem 	}
286ee29e6f3SRick Macklem 
287ee29e6f3SRick Macklem 
288ee29e6f3SRick Macklem 	nfs_srvmaxio = newsrvmaxio;
289ee29e6f3SRick Macklem 	NFSD_UNLOCK();
290ee29e6f3SRick Macklem 	return (0);
291ee29e6f3SRick Macklem }
292ee29e6f3SRick Macklem SYSCTL_PROC(_vfs_nfsd, OID_AUTO, srvmaxio,
293ee29e6f3SRick Macklem     CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0,
294ee29e6f3SRick Macklem     sysctl_srvmaxio, "IU", "Maximum I/O size in bytes");
295ee29e6f3SRick Macklem 
29667284d32SRick Macklem static int
sysctl_dolocallocks(SYSCTL_HANDLER_ARGS)29767284d32SRick Macklem sysctl_dolocallocks(SYSCTL_HANDLER_ARGS)
29867284d32SRick Macklem {
29967284d32SRick Macklem 	int error, igotlock, newdolocallocks;
30067284d32SRick Macklem 
30167284d32SRick Macklem 	newdolocallocks = nfsrv_dolocallocks;
30267284d32SRick Macklem 	error = sysctl_handle_int(oidp, &newdolocallocks, 0, req);
30367284d32SRick Macklem 	if (error != 0 || req->newptr == NULL)
30467284d32SRick Macklem 		return (error);
30567284d32SRick Macklem 	if (newdolocallocks == nfsrv_dolocallocks)
30667284d32SRick Macklem 		return (0);
30767284d32SRick Macklem 	if (jailed(curthread->td_ucred))
30867284d32SRick Macklem 		return (EINVAL);
30967284d32SRick Macklem 
31067284d32SRick Macklem 	NFSLOCKV4ROOTMUTEX();
31167284d32SRick Macklem 	do {
31267284d32SRick Macklem 		igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
31367284d32SRick Macklem 		    NFSV4ROOTLOCKMUTEXPTR, NULL);
31467284d32SRick Macklem 	} while (!igotlock);
31567284d32SRick Macklem 	NFSUNLOCKV4ROOTMUTEX();
31667284d32SRick Macklem 
31767284d32SRick Macklem 	nfsrv_dolocallocks = newdolocallocks;
31867284d32SRick Macklem 
31967284d32SRick Macklem 	NFSLOCKV4ROOTMUTEX();
32067284d32SRick Macklem 	nfsv4_unlock(&nfsv4rootfs_lock, 0);
32167284d32SRick Macklem 	NFSUNLOCKV4ROOTMUTEX();
32267284d32SRick Macklem 	return (0);
32367284d32SRick Macklem }
32467284d32SRick Macklem SYSCTL_PROC(_vfs_nfsd, OID_AUTO, enable_locallocks,
32567284d32SRick Macklem     CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0,
32667284d32SRick Macklem     sysctl_dolocallocks, "IU", "Enable nfsd to acquire local locks on files");
32767284d32SRick Macklem 
328574862c8SJohn Baldwin #define	MAX_REORDERED_RPC	16
329574862c8SJohn Baldwin #define	NUM_HEURISTIC		1031
3309ec7b004SRick Macklem #define	NHUSE_INIT		64
3319ec7b004SRick Macklem #define	NHUSE_INC		16
3329ec7b004SRick Macklem #define	NHUSE_MAX		2048
3339ec7b004SRick Macklem 
3349ec7b004SRick Macklem static struct nfsheur {
3359ec7b004SRick Macklem 	struct vnode *nh_vp;	/* vp to match (unreferenced pointer) */
336574862c8SJohn Baldwin 	off_t nh_nextoff;	/* next offset for sequential detection */
3379ec7b004SRick Macklem 	int nh_use;		/* use count for selection */
3389ec7b004SRick Macklem 	int nh_seqcount;	/* heuristic */
3399ec7b004SRick Macklem } nfsheur[NUM_HEURISTIC];
3409ec7b004SRick Macklem 
3419ec7b004SRick Macklem /*
342574862c8SJohn Baldwin  * Heuristic to detect sequential operation.
343574862c8SJohn Baldwin  */
344574862c8SJohn Baldwin static struct nfsheur *
nfsrv_sequential_heuristic(struct uio * uio,struct vnode * vp)345574862c8SJohn Baldwin nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp)
346574862c8SJohn Baldwin {
347574862c8SJohn Baldwin 	struct nfsheur *nh;
348574862c8SJohn Baldwin 	int hi, try;
349574862c8SJohn Baldwin 
350574862c8SJohn Baldwin 	/* Locate best candidate. */
351574862c8SJohn Baldwin 	try = 32;
352574862c8SJohn Baldwin 	hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
353574862c8SJohn Baldwin 	nh = &nfsheur[hi];
354574862c8SJohn Baldwin 	while (try--) {
355574862c8SJohn Baldwin 		if (nfsheur[hi].nh_vp == vp) {
356574862c8SJohn Baldwin 			nh = &nfsheur[hi];
357574862c8SJohn Baldwin 			break;
358574862c8SJohn Baldwin 		}
359574862c8SJohn Baldwin 		if (nfsheur[hi].nh_use > 0)
360574862c8SJohn Baldwin 			--nfsheur[hi].nh_use;
361574862c8SJohn Baldwin 		hi = (hi + 1) % NUM_HEURISTIC;
362574862c8SJohn Baldwin 		if (nfsheur[hi].nh_use < nh->nh_use)
363574862c8SJohn Baldwin 			nh = &nfsheur[hi];
364574862c8SJohn Baldwin 	}
365574862c8SJohn Baldwin 
366574862c8SJohn Baldwin 	/* Initialize hint if this is a new file. */
367574862c8SJohn Baldwin 	if (nh->nh_vp != vp) {
368574862c8SJohn Baldwin 		nh->nh_vp = vp;
369574862c8SJohn Baldwin 		nh->nh_nextoff = uio->uio_offset;
370574862c8SJohn Baldwin 		nh->nh_use = NHUSE_INIT;
371574862c8SJohn Baldwin 		if (uio->uio_offset == 0)
372574862c8SJohn Baldwin 			nh->nh_seqcount = 4;
373574862c8SJohn Baldwin 		else
374574862c8SJohn Baldwin 			nh->nh_seqcount = 1;
375574862c8SJohn Baldwin 	}
376574862c8SJohn Baldwin 
377574862c8SJohn Baldwin 	/* Calculate heuristic. */
378574862c8SJohn Baldwin 	if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) ||
379574862c8SJohn Baldwin 	    uio->uio_offset == nh->nh_nextoff) {
380574862c8SJohn Baldwin 		/* See comments in vfs_vnops.c:sequential_heuristic(). */
381574862c8SJohn Baldwin 		nh->nh_seqcount += howmany(uio->uio_resid, 16384);
382574862c8SJohn Baldwin 		if (nh->nh_seqcount > IO_SEQMAX)
383574862c8SJohn Baldwin 			nh->nh_seqcount = IO_SEQMAX;
384574862c8SJohn Baldwin 	} else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC *
385574862c8SJohn Baldwin 	    imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) {
386574862c8SJohn Baldwin 		/* Probably a reordered RPC, leave seqcount alone. */
387574862c8SJohn Baldwin 	} else if (nh->nh_seqcount > 1) {
388574862c8SJohn Baldwin 		nh->nh_seqcount /= 2;
389574862c8SJohn Baldwin 	} else {
390574862c8SJohn Baldwin 		nh->nh_seqcount = 0;
391574862c8SJohn Baldwin 	}
392574862c8SJohn Baldwin 	nh->nh_use += NHUSE_INC;
393574862c8SJohn Baldwin 	if (nh->nh_use > NHUSE_MAX)
394574862c8SJohn Baldwin 		nh->nh_use = NHUSE_MAX;
395574862c8SJohn Baldwin 	return (nh);
396574862c8SJohn Baldwin }
397574862c8SJohn Baldwin 
398574862c8SJohn Baldwin /*
3999ec7b004SRick Macklem  * Get attributes into nfsvattr structure.
4009ec7b004SRick Macklem  */
4019ec7b004SRick Macklem int
nfsvno_getattr(struct vnode * vp,struct nfsvattr * nvap,struct nfsrv_descript * nd,struct thread * p,int vpislocked,nfsattrbit_t * attrbitp)40290d2dfabSRick Macklem nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap,
40390d2dfabSRick Macklem     struct nfsrv_descript *nd, struct thread *p, int vpislocked,
40490d2dfabSRick Macklem     nfsattrbit_t *attrbitp)
4059ec7b004SRick Macklem {
40690d2dfabSRick Macklem 	int error, gotattr, lockedit = 0;
40790d2dfabSRick Macklem 	struct nfsvattr na;
4089ec7b004SRick Macklem 
4090cf42b62SRick Macklem 	if (vpislocked == 0) {
4100cf42b62SRick Macklem 		/*
4110cf42b62SRick Macklem 		 * When vpislocked == 0, the vnode is either exclusively
4120cf42b62SRick Macklem 		 * locked by this thread or not locked by this thread.
4130cf42b62SRick Macklem 		 * As such, shared lock it, if not exclusively locked.
4140cf42b62SRick Macklem 		 */
41568347a92SZack Kirsch 		if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) {
4169ec7b004SRick Macklem 			lockedit = 1;
41798f234f3SZack Kirsch 			NFSVOPLOCK(vp, LK_SHARED | LK_RETRY);
4180cf42b62SRick Macklem 		}
4199ec7b004SRick Macklem 	}
42090d2dfabSRick Macklem 
42190d2dfabSRick Macklem 	/*
42214eff785SRick Macklem 	 * Acquire the Change, Size, TimeAccess, TimeModify and SpaceUsed
42314eff785SRick Macklem 	 * attributes, as required.
42490d2dfabSRick Macklem 	 * This needs to be done for regular files if:
42590d2dfabSRick Macklem 	 * - non-NFSv4 RPCs or
42690d2dfabSRick Macklem 	 * - when attrbitp == NULL or
42790d2dfabSRick Macklem 	 * - an NFSv4 RPC with any of the above attributes in attrbitp.
42890d2dfabSRick Macklem 	 * A return of 0 for nfsrv_proxyds() indicates that it has acquired
42990d2dfabSRick Macklem 	 * these attributes.  nfsrv_proxyds() will return an error if the
43090d2dfabSRick Macklem 	 * server is not a pNFS one.
43190d2dfabSRick Macklem 	 */
43290d2dfabSRick Macklem 	gotattr = 0;
43390d2dfabSRick Macklem 	if (vp->v_type == VREG && nfsrv_devidcnt > 0 && (attrbitp == NULL ||
43490d2dfabSRick Macklem 	    (nd->nd_flag & ND_NFSV4) == 0 ||
43590d2dfabSRick Macklem 	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_CHANGE) ||
43690d2dfabSRick Macklem 	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE) ||
43790d2dfabSRick Macklem 	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEACCESS) ||
43814eff785SRick Macklem 	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEMODIFY) ||
43914eff785SRick Macklem 	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEUSED))) {
4404ce21f37SRick Macklem 		error = nfsrv_proxyds(vp, 0, 0, nd->nd_cred, p,
441c057a378SRick Macklem 		    NFSPROC_GETATTR, NULL, NULL, NULL, &na, NULL, NULL, 0,
442c057a378SRick Macklem 		    NULL);
44390d2dfabSRick Macklem 		if (error == 0)
44490d2dfabSRick Macklem 			gotattr = 1;
44590d2dfabSRick Macklem 	}
44690d2dfabSRick Macklem 
44790d2dfabSRick Macklem 	error = VOP_GETATTR(vp, &nvap->na_vattr, nd->nd_cred);
4480cf42b62SRick Macklem 	if (lockedit != 0)
449b249ce48SMateusz Guzik 		NFSVOPUNLOCK(vp);
450a9285ae5SZack Kirsch 
45190d2dfabSRick Macklem 	/*
45290d2dfabSRick Macklem 	 * If we got the Change, Size and Modify Time from the DS,
45390d2dfabSRick Macklem 	 * replace them.
45490d2dfabSRick Macklem 	 */
45590d2dfabSRick Macklem 	if (gotattr != 0) {
45690d2dfabSRick Macklem 		nvap->na_atime = na.na_atime;
45790d2dfabSRick Macklem 		nvap->na_mtime = na.na_mtime;
45890d2dfabSRick Macklem 		nvap->na_filerev = na.na_filerev;
45990d2dfabSRick Macklem 		nvap->na_size = na.na_size;
46014eff785SRick Macklem 		nvap->na_bytes = na.na_bytes;
46190d2dfabSRick Macklem 	}
46290d2dfabSRick Macklem 	NFSD_DEBUG(4, "nfsvno_getattr: gotattr=%d err=%d chg=%ju\n", gotattr,
46390d2dfabSRick Macklem 	    error, (uintmax_t)na.na_filerev);
46490d2dfabSRick Macklem 
465a9285ae5SZack Kirsch 	NFSEXITCODE(error);
4669ec7b004SRick Macklem 	return (error);
4679ec7b004SRick Macklem }
4689ec7b004SRick Macklem 
4699ec7b004SRick Macklem /*
4709ec7b004SRick Macklem  * Get a file handle for a vnode.
4719ec7b004SRick Macklem  */
4729ec7b004SRick Macklem int
nfsvno_getfh(struct vnode * vp,fhandle_t * fhp,struct thread * p)4739ec7b004SRick Macklem nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p)
4749ec7b004SRick Macklem {
4759ec7b004SRick Macklem 	int error;
4769ec7b004SRick Macklem 
4779ec7b004SRick Macklem 	NFSBZERO((caddr_t)fhp, sizeof(fhandle_t));
4789ec7b004SRick Macklem 	fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
4799ec7b004SRick Macklem 	error = VOP_VPTOFH(vp, &fhp->fh_fid);
480a9285ae5SZack Kirsch 
481a9285ae5SZack Kirsch 	NFSEXITCODE(error);
4829ec7b004SRick Macklem 	return (error);
4839ec7b004SRick Macklem }
4849ec7b004SRick Macklem 
4859ec7b004SRick Macklem /*
4869ec7b004SRick Macklem  * Perform access checking for vnodes obtained from file handles that would
4879ec7b004SRick Macklem  * refer to files already opened by a Unix client. You cannot just use
4888da45f2cSRick Macklem  * vn_writechk() and VOP_ACCESSX() for two reasons.
4898da45f2cSRick Macklem  * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write
4908da45f2cSRick Macklem  *     case.
4919ec7b004SRick Macklem  * 2 - The owner is to be given access irrespective of mode bits for some
4929ec7b004SRick Macklem  *     operations, so that processes that chmod after opening a file don't
4939ec7b004SRick Macklem  *     break.
4949ec7b004SRick Macklem  */
4959ec7b004SRick Macklem int
nfsvno_accchk(struct vnode * vp,accmode_t accmode,struct ucred * cred,struct nfsexstuff * exp,struct thread * p,int override,int vpislocked,u_int32_t * supportedtypep)4968da45f2cSRick Macklem nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred,
4978da45f2cSRick Macklem     struct nfsexstuff *exp, struct thread *p, int override, int vpislocked,
4988da45f2cSRick Macklem     u_int32_t *supportedtypep)
4999ec7b004SRick Macklem {
5009ec7b004SRick Macklem 	struct vattr vattr;
5019ec7b004SRick Macklem 	int error = 0, getret = 0;
5029ec7b004SRick Macklem 
503629fa50eSRick Macklem 	if (vpislocked == 0) {
504a9285ae5SZack Kirsch 		if (NFSVOPLOCK(vp, LK_SHARED) != 0) {
505a9285ae5SZack Kirsch 			error = EPERM;
506a9285ae5SZack Kirsch 			goto out;
507a9285ae5SZack Kirsch 		}
508629fa50eSRick Macklem 	}
5099ec7b004SRick Macklem 	if (accmode & VWRITE) {
5109ec7b004SRick Macklem 		/* Just vn_writechk() changed to check rdonly */
5119ec7b004SRick Macklem 		/*
5129ec7b004SRick Macklem 		 * Disallow write attempts on read-only file systems;
5139ec7b004SRick Macklem 		 * unless the file is a socket or a block or character
5149ec7b004SRick Macklem 		 * device resident on the file system.
5159ec7b004SRick Macklem 		 */
5169ec7b004SRick Macklem 		if (NFSVNO_EXRDONLY(exp) ||
5179ec7b004SRick Macklem 		    (vp->v_mount->mnt_flag & MNT_RDONLY)) {
5189ec7b004SRick Macklem 			switch (vp->v_type) {
5199ec7b004SRick Macklem 			case VREG:
5209ec7b004SRick Macklem 			case VDIR:
5219ec7b004SRick Macklem 			case VLNK:
522629fa50eSRick Macklem 				error = EROFS;
5239ec7b004SRick Macklem 			default:
5249ec7b004SRick Macklem 				break;
5259ec7b004SRick Macklem 			}
5269ec7b004SRick Macklem 		}
5279ec7b004SRick Macklem 		/*
5289ec7b004SRick Macklem 		 * If there's shared text associated with
5299ec7b004SRick Macklem 		 * the inode, try to free it up once.  If
5309ec7b004SRick Macklem 		 * we fail, we can't allow writing.
5319ec7b004SRick Macklem 		 */
532877d24acSKonstantin Belousov 		if (VOP_IS_TEXT(vp) && error == 0)
533629fa50eSRick Macklem 			error = ETXTBSY;
5349ec7b004SRick Macklem 	}
535629fa50eSRick Macklem 	if (error != 0) {
5369ec7b004SRick Macklem 		if (vpislocked == 0)
537b249ce48SMateusz Guzik 			NFSVOPUNLOCK(vp);
538a9285ae5SZack Kirsch 		goto out;
539629fa50eSRick Macklem 	}
5409ec7b004SRick Macklem 
5419ec7b004SRick Macklem 	/*
5429ec7b004SRick Macklem 	 * Should the override still be applied when ACLs are enabled?
5439ec7b004SRick Macklem 	 */
5448da45f2cSRick Macklem 	error = VOP_ACCESSX(vp, accmode, cred, p);
5458da45f2cSRick Macklem 	if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) {
5468da45f2cSRick Macklem 		/*
5478da45f2cSRick Macklem 		 * Try again with VEXPLICIT_DENY, to see if the test for
5488da45f2cSRick Macklem 		 * deletion is supported.
5498da45f2cSRick Macklem 		 */
5508da45f2cSRick Macklem 		error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p);
5518da45f2cSRick Macklem 		if (error == 0) {
5528da45f2cSRick Macklem 			if (vp->v_type == VDIR) {
5538da45f2cSRick Macklem 				accmode &= ~(VDELETE | VDELETE_CHILD);
5548da45f2cSRick Macklem 				accmode |= VWRITE;
5558da45f2cSRick Macklem 				error = VOP_ACCESSX(vp, accmode, cred, p);
5568da45f2cSRick Macklem 			} else if (supportedtypep != NULL) {
5578da45f2cSRick Macklem 				*supportedtypep &= ~NFSACCESS_DELETE;
5588da45f2cSRick Macklem 			}
5598da45f2cSRick Macklem 		}
5608da45f2cSRick Macklem 	}
5619ec7b004SRick Macklem 
5629ec7b004SRick Macklem 	/*
5639ec7b004SRick Macklem 	 * Allow certain operations for the owner (reads and writes
5649ec7b004SRick Macklem 	 * on files that are already open).
5659ec7b004SRick Macklem 	 */
5669ec7b004SRick Macklem 	if (override != NFSACCCHK_NOOVERRIDE &&
5679ec7b004SRick Macklem 	    (error == EPERM || error == EACCES)) {
5689ec7b004SRick Macklem 		if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT))
5699ec7b004SRick Macklem 			error = 0;
5709ec7b004SRick Macklem 		else if (override & NFSACCCHK_ALLOWOWNER) {
5719ec7b004SRick Macklem 			getret = VOP_GETATTR(vp, &vattr, cred);
5729ec7b004SRick Macklem 			if (getret == 0 && cred->cr_uid == vattr.va_uid)
5739ec7b004SRick Macklem 				error = 0;
5749ec7b004SRick Macklem 		}
5759ec7b004SRick Macklem 	}
5769ec7b004SRick Macklem 	if (vpislocked == 0)
577b249ce48SMateusz Guzik 		NFSVOPUNLOCK(vp);
578a9285ae5SZack Kirsch 
579a9285ae5SZack Kirsch out:
580a9285ae5SZack Kirsch 	NFSEXITCODE(error);
5819ec7b004SRick Macklem 	return (error);
5829ec7b004SRick Macklem }
5839ec7b004SRick Macklem 
5849ec7b004SRick Macklem /*
5859ec7b004SRick Macklem  * Set attribute(s) vnop.
5869ec7b004SRick Macklem  */
5879ec7b004SRick Macklem int
nfsvno_setattr(struct vnode * vp,struct nfsvattr * nvap,struct ucred * cred,struct thread * p,struct nfsexstuff * exp)5889ec7b004SRick Macklem nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred,
5899ec7b004SRick Macklem     struct thread *p, struct nfsexstuff *exp)
5909ec7b004SRick Macklem {
5919fbb0fafSRick Macklem 	u_quad_t savsize = 0;
5929fbb0fafSRick Macklem 	int error, savedit;
5932de592f6SRick Macklem 	time_t savbtime;
5949ec7b004SRick Macklem 
5959fbb0fafSRick Macklem 	/*
5969fbb0fafSRick Macklem 	 * If this is an exported file system and a pNFS service is running,
5979fbb0fafSRick Macklem 	 * don't VOP_SETATTR() of size for the MDS file system.
5989fbb0fafSRick Macklem 	 */
5999fbb0fafSRick Macklem 	savedit = 0;
6009fbb0fafSRick Macklem 	error = 0;
6019fbb0fafSRick Macklem 	if (vp->v_type == VREG && (vp->v_mount->mnt_flag & MNT_EXPORTED) != 0 &&
6029fbb0fafSRick Macklem 	    nfsrv_devidcnt != 0 && nvap->na_vattr.va_size != VNOVAL &&
6039fbb0fafSRick Macklem 	    nvap->na_vattr.va_size > 0) {
6049fbb0fafSRick Macklem 		savsize = nvap->na_vattr.va_size;
6059fbb0fafSRick Macklem 		nvap->na_vattr.va_size = VNOVAL;
6069fbb0fafSRick Macklem 		if (nvap->na_vattr.va_uid != (uid_t)VNOVAL ||
6079fbb0fafSRick Macklem 		    nvap->na_vattr.va_gid != (gid_t)VNOVAL ||
6089fbb0fafSRick Macklem 		    nvap->na_vattr.va_mode != (mode_t)VNOVAL ||
6099fbb0fafSRick Macklem 		    nvap->na_vattr.va_atime.tv_sec != VNOVAL ||
6109fbb0fafSRick Macklem 		    nvap->na_vattr.va_mtime.tv_sec != VNOVAL)
6119fbb0fafSRick Macklem 			savedit = 1;
6129fbb0fafSRick Macklem 		else
6139fbb0fafSRick Macklem 			savedit = 2;
6149fbb0fafSRick Macklem 	}
6159fbb0fafSRick Macklem 	if (savedit != 2)
6169ec7b004SRick Macklem 		error = VOP_SETATTR(vp, &nvap->na_vattr, cred);
6179fbb0fafSRick Macklem 	if (savedit != 0)
6189fbb0fafSRick Macklem 		nvap->na_vattr.va_size = savsize;
61990d2dfabSRick Macklem 	if (error == 0 && (nvap->na_vattr.va_uid != (uid_t)VNOVAL ||
62090d2dfabSRick Macklem 	    nvap->na_vattr.va_gid != (gid_t)VNOVAL ||
62190d2dfabSRick Macklem 	    nvap->na_vattr.va_size != VNOVAL ||
62290d2dfabSRick Macklem 	    nvap->na_vattr.va_mode != (mode_t)VNOVAL ||
62390d2dfabSRick Macklem 	    nvap->na_vattr.va_atime.tv_sec != VNOVAL ||
62490d2dfabSRick Macklem 	    nvap->na_vattr.va_mtime.tv_sec != VNOVAL)) {
6252de592f6SRick Macklem 		/* Never modify birthtime on a DS file. */
6262de592f6SRick Macklem 		savbtime = nvap->na_vattr.va_birthtime.tv_sec;
6272de592f6SRick Macklem 		nvap->na_vattr.va_birthtime.tv_sec = VNOVAL;
62890d2dfabSRick Macklem 		/* For a pNFS server, set the attributes on the DS file. */
6294ce21f37SRick Macklem 		error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETATTR,
630c057a378SRick Macklem 		    NULL, NULL, NULL, nvap, NULL, NULL, 0, NULL);
6312de592f6SRick Macklem 		nvap->na_vattr.va_birthtime.tv_sec = savbtime;
63290d2dfabSRick Macklem 		if (error == ENOENT)
63390d2dfabSRick Macklem 			error = 0;
63490d2dfabSRick Macklem 	}
635a9285ae5SZack Kirsch 	NFSEXITCODE(error);
6369ec7b004SRick Macklem 	return (error);
6379ec7b004SRick Macklem }
6389ec7b004SRick Macklem 
6399ec7b004SRick Macklem /*
64013b2772fSRick Macklem  * Set up nameidata for a lookup() call and do it.
6419ec7b004SRick Macklem  */
6429ec7b004SRick Macklem int
nfsvno_namei(struct nfsrv_descript * nd,struct nameidata * ndp,struct vnode * dp,int islocked,struct nfsexstuff * exp,struct vnode ** retdirp)6439ec7b004SRick Macklem nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp,
644ef7d2c1fSMateusz Guzik     struct vnode *dp, int islocked, struct nfsexstuff *exp,
6459ec7b004SRick Macklem     struct vnode **retdirp)
6469ec7b004SRick Macklem {
6479ec7b004SRick Macklem 	struct componentname *cnp = &ndp->ni_cnd;
6489ec7b004SRick Macklem 	int i;
6499ec7b004SRick Macklem 	struct iovec aiov;
6509ec7b004SRick Macklem 	struct uio auio;
6519ec7b004SRick Macklem 	int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen;
652151ba793SAlexander Kabaev 	int error = 0;
6539ec7b004SRick Macklem 	char *cp;
6549ec7b004SRick Macklem 
6559ec7b004SRick Macklem 	*retdirp = NULL;
6569ec7b004SRick Macklem 	cnp->cn_nameptr = cnp->cn_pnbuf;
6577359fdcfSKonstantin Belousov 	ndp->ni_lcf = 0;
6589ec7b004SRick Macklem 	/*
6599ec7b004SRick Macklem 	 * Extract and set starting directory.
6609ec7b004SRick Macklem 	 */
6619ec7b004SRick Macklem 	if (dp->v_type != VDIR) {
6629ec7b004SRick Macklem 		if (islocked)
6639ec7b004SRick Macklem 			vput(dp);
6649ec7b004SRick Macklem 		else
6659ec7b004SRick Macklem 			vrele(dp);
6669ec7b004SRick Macklem 		nfsvno_relpathbuf(ndp);
667a9285ae5SZack Kirsch 		error = ENOTDIR;
668a9285ae5SZack Kirsch 		goto out1;
6699ec7b004SRick Macklem 	}
6709ec7b004SRick Macklem 	if (islocked)
671b249ce48SMateusz Guzik 		NFSVOPUNLOCK(dp);
6729ec7b004SRick Macklem 	VREF(dp);
6739ec7b004SRick Macklem 	*retdirp = dp;
6749ec7b004SRick Macklem 	if (NFSVNO_EXRDONLY(exp))
6759ec7b004SRick Macklem 		cnp->cn_flags |= RDONLY;
6769ec7b004SRick Macklem 	ndp->ni_segflg = UIO_SYSSPACE;
6779ec7b004SRick Macklem 
6789ec7b004SRick Macklem 	if (nd->nd_flag & ND_PUBLOOKUP) {
6799ec7b004SRick Macklem 		ndp->ni_loopcnt = 0;
6809ec7b004SRick Macklem 		if (cnp->cn_pnbuf[0] == '/') {
6819ec7b004SRick Macklem 			vrele(dp);
6829ec7b004SRick Macklem 			/*
6839ec7b004SRick Macklem 			 * Check for degenerate pathnames here, since lookup()
6849ec7b004SRick Macklem 			 * panics on them.
6859ec7b004SRick Macklem 			 */
6869ec7b004SRick Macklem 			for (i = 1; i < ndp->ni_pathlen; i++)
6879ec7b004SRick Macklem 				if (cnp->cn_pnbuf[i] != '/')
6889ec7b004SRick Macklem 					break;
6899ec7b004SRick Macklem 			if (i == ndp->ni_pathlen) {
6909ec7b004SRick Macklem 				error = NFSERR_ACCES;
6919ec7b004SRick Macklem 				goto out;
6929ec7b004SRick Macklem 			}
6939ec7b004SRick Macklem 			dp = rootvnode;
6949ec7b004SRick Macklem 			VREF(dp);
6959ec7b004SRick Macklem 		}
69607c0c166SRick Macklem 	} else if ((nfsrv_enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) ||
6979ec7b004SRick Macklem 	    (nd->nd_flag & ND_NFSV4) == 0) {
6989ec7b004SRick Macklem 		/*
6999ec7b004SRick Macklem 		 * Only cross mount points for NFSv4 when doing a
7009ec7b004SRick Macklem 		 * mount while traversing the file system above
70107c0c166SRick Macklem 		 * the mount point, unless nfsrv_enable_crossmntpt is set.
7029ec7b004SRick Macklem 		 */
7039ec7b004SRick Macklem 		cnp->cn_flags |= NOCROSSMOUNT;
7049ec7b004SRick Macklem 	}
7059ec7b004SRick Macklem 
7069ec7b004SRick Macklem 	/*
7079ec7b004SRick Macklem 	 * Initialize for scan, set ni_startdir and bump ref on dp again
70872ccd4ccSDag-Erling Smørgrav 	 * because lookup() will dereference ni_startdir.
7099ec7b004SRick Macklem 	 */
7109ec7b004SRick Macklem 
7119ec7b004SRick Macklem 	ndp->ni_startdir = dp;
7129ec7b004SRick Macklem 	ndp->ni_rootdir = rootvnode;
713b76ec2dbSRick Macklem 	ndp->ni_topdir = NULL;
7149ec7b004SRick Macklem 
7159ec7b004SRick Macklem 	if (!lockleaf)
7169ec7b004SRick Macklem 		cnp->cn_flags |= LOCKLEAF;
7179ec7b004SRick Macklem 	for (;;) {
7189ec7b004SRick Macklem 		cnp->cn_nameptr = cnp->cn_pnbuf;
7199ec7b004SRick Macklem 		/*
7209ec7b004SRick Macklem 		 * Call lookup() to do the real work.  If an error occurs,
7219ec7b004SRick Macklem 		 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and
7229ec7b004SRick Macklem 		 * we do not have to dereference anything before returning.
7239ec7b004SRick Macklem 		 * In either case ni_startdir will be dereferenced and NULLed
7249ec7b004SRick Macklem 		 * out.
7259ec7b004SRick Macklem 		 */
7260134bbe5SMateusz Guzik 		error = vfs_lookup(ndp);
7279ec7b004SRick Macklem 		if (error)
7289ec7b004SRick Macklem 			break;
7299ec7b004SRick Macklem 
7309ec7b004SRick Macklem 		/*
7319ec7b004SRick Macklem 		 * Check for encountering a symbolic link.  Trivial
7329ec7b004SRick Macklem 		 * termination occurs if no symlink encountered.
7339ec7b004SRick Macklem 		 */
7349ec7b004SRick Macklem 		if ((cnp->cn_flags & ISSYMLINK) == 0) {
7359ec7b004SRick Macklem 			if (ndp->ni_vp && !lockleaf)
736b249ce48SMateusz Guzik 				NFSVOPUNLOCK(ndp->ni_vp);
7379ec7b004SRick Macklem 			break;
7389ec7b004SRick Macklem 		}
7399ec7b004SRick Macklem 
7409ec7b004SRick Macklem 		/*
7419ec7b004SRick Macklem 		 * Validate symlink
7429ec7b004SRick Macklem 		 */
7439ec7b004SRick Macklem 		if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
744b249ce48SMateusz Guzik 			NFSVOPUNLOCK(ndp->ni_dvp);
7459ec7b004SRick Macklem 		if (!(nd->nd_flag & ND_PUBLOOKUP)) {
7469ec7b004SRick Macklem 			error = EINVAL;
7479ec7b004SRick Macklem 			goto badlink2;
7489ec7b004SRick Macklem 		}
7499ec7b004SRick Macklem 
7509ec7b004SRick Macklem 		if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
7519ec7b004SRick Macklem 			error = ELOOP;
7529ec7b004SRick Macklem 			goto badlink2;
7539ec7b004SRick Macklem 		}
7549ec7b004SRick Macklem 		if (ndp->ni_pathlen > 1)
7559ec7b004SRick Macklem 			cp = uma_zalloc(namei_zone, M_WAITOK);
7569ec7b004SRick Macklem 		else
7579ec7b004SRick Macklem 			cp = cnp->cn_pnbuf;
7589ec7b004SRick Macklem 		aiov.iov_base = cp;
7599ec7b004SRick Macklem 		aiov.iov_len = MAXPATHLEN;
7609ec7b004SRick Macklem 		auio.uio_iov = &aiov;
7619ec7b004SRick Macklem 		auio.uio_iovcnt = 1;
7629ec7b004SRick Macklem 		auio.uio_offset = 0;
7639ec7b004SRick Macklem 		auio.uio_rw = UIO_READ;
7649ec7b004SRick Macklem 		auio.uio_segflg = UIO_SYSSPACE;
7659ec7b004SRick Macklem 		auio.uio_td = NULL;
7669ec7b004SRick Macklem 		auio.uio_resid = MAXPATHLEN;
7679ec7b004SRick Macklem 		error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
7689ec7b004SRick Macklem 		if (error) {
7699ec7b004SRick Macklem 		badlink1:
7709ec7b004SRick Macklem 			if (ndp->ni_pathlen > 1)
7719ec7b004SRick Macklem 				uma_zfree(namei_zone, cp);
7729ec7b004SRick Macklem 		badlink2:
7739ec7b004SRick Macklem 			vrele(ndp->ni_dvp);
7749ec7b004SRick Macklem 			vput(ndp->ni_vp);
7759ec7b004SRick Macklem 			break;
7769ec7b004SRick Macklem 		}
7779ec7b004SRick Macklem 		linklen = MAXPATHLEN - auio.uio_resid;
7789ec7b004SRick Macklem 		if (linklen == 0) {
7799ec7b004SRick Macklem 			error = ENOENT;
7809ec7b004SRick Macklem 			goto badlink1;
7819ec7b004SRick Macklem 		}
7829ec7b004SRick Macklem 		if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
7839ec7b004SRick Macklem 			error = ENAMETOOLONG;
7849ec7b004SRick Macklem 			goto badlink1;
7859ec7b004SRick Macklem 		}
7869ec7b004SRick Macklem 
7879ec7b004SRick Macklem 		/*
7889ec7b004SRick Macklem 		 * Adjust or replace path
7899ec7b004SRick Macklem 		 */
7909ec7b004SRick Macklem 		if (ndp->ni_pathlen > 1) {
7919ec7b004SRick Macklem 			NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
7929ec7b004SRick Macklem 			uma_zfree(namei_zone, cnp->cn_pnbuf);
7939ec7b004SRick Macklem 			cnp->cn_pnbuf = cp;
7949ec7b004SRick Macklem 		} else
7959ec7b004SRick Macklem 			cnp->cn_pnbuf[linklen] = '\0';
7969ec7b004SRick Macklem 		ndp->ni_pathlen += linklen;
7979ec7b004SRick Macklem 
7989ec7b004SRick Macklem 		/*
7999ec7b004SRick Macklem 		 * Cleanup refs for next loop and check if root directory
8009ec7b004SRick Macklem 		 * should replace current directory.  Normally ni_dvp
8019ec7b004SRick Macklem 		 * becomes the new base directory and is cleaned up when
8029ec7b004SRick Macklem 		 * we loop.  Explicitly null pointers after invalidation
8039ec7b004SRick Macklem 		 * to clarify operation.
8049ec7b004SRick Macklem 		 */
8059ec7b004SRick Macklem 		vput(ndp->ni_vp);
8069ec7b004SRick Macklem 		ndp->ni_vp = NULL;
8079ec7b004SRick Macklem 
8089ec7b004SRick Macklem 		if (cnp->cn_pnbuf[0] == '/') {
8099ec7b004SRick Macklem 			vrele(ndp->ni_dvp);
8109ec7b004SRick Macklem 			ndp->ni_dvp = ndp->ni_rootdir;
8119ec7b004SRick Macklem 			VREF(ndp->ni_dvp);
8129ec7b004SRick Macklem 		}
8139ec7b004SRick Macklem 		ndp->ni_startdir = ndp->ni_dvp;
8149ec7b004SRick Macklem 		ndp->ni_dvp = NULL;
8159ec7b004SRick Macklem 	}
8169ec7b004SRick Macklem 	if (!lockleaf)
8179ec7b004SRick Macklem 		cnp->cn_flags &= ~LOCKLEAF;
8189ec7b004SRick Macklem 
8199ec7b004SRick Macklem out:
8209ec7b004SRick Macklem 	if (error) {
8213676a0d8SJohn W. De Boskey 		nfsvno_relpathbuf(ndp);
8229ec7b004SRick Macklem 		ndp->ni_vp = NULL;
8239ec7b004SRick Macklem 		ndp->ni_dvp = NULL;
8249ec7b004SRick Macklem 		ndp->ni_startdir = NULL;
8259ec7b004SRick Macklem 	} else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) {
8269ec7b004SRick Macklem 		ndp->ni_dvp = NULL;
8279ec7b004SRick Macklem 	}
828a9285ae5SZack Kirsch 
829a9285ae5SZack Kirsch out1:
830a9285ae5SZack Kirsch 	NFSEXITCODE2(error, nd);
8319ec7b004SRick Macklem 	return (error);
8329ec7b004SRick Macklem }
8339ec7b004SRick Macklem 
8349ec7b004SRick Macklem /*
8359ec7b004SRick Macklem  * Set up a pathname buffer and return a pointer to it and, optionally
8369ec7b004SRick Macklem  * set a hash pointer.
8379ec7b004SRick Macklem  */
8389ec7b004SRick Macklem void
nfsvno_setpathbuf(struct nameidata * ndp,char ** bufpp,u_long ** hashpp)8399ec7b004SRick Macklem nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp)
8409ec7b004SRick Macklem {
8419ec7b004SRick Macklem 	struct componentname *cnp = &ndp->ni_cnd;
8429ec7b004SRick Macklem 
8435b5b7e2cSMateusz Guzik 	cnp->cn_flags |= (NOMACCHECK);
8449ec7b004SRick Macklem 	cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
8459ec7b004SRick Macklem 	if (hashpp != NULL)
8469ec7b004SRick Macklem 		*hashpp = NULL;
8479ec7b004SRick Macklem 	*bufpp = cnp->cn_pnbuf;
8489ec7b004SRick Macklem }
8499ec7b004SRick Macklem 
8509ec7b004SRick Macklem /*
8519ec7b004SRick Macklem  * Release the above path buffer, if not released by nfsvno_namei().
8529ec7b004SRick Macklem  */
8539ec7b004SRick Macklem void
nfsvno_relpathbuf(struct nameidata * ndp)8549ec7b004SRick Macklem nfsvno_relpathbuf(struct nameidata *ndp)
8559ec7b004SRick Macklem {
8569ec7b004SRick Macklem 
8579ec7b004SRick Macklem 	uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf);
8585b5b7e2cSMateusz Guzik 	ndp->ni_cnd.cn_pnbuf = NULL;
8599ec7b004SRick Macklem }
8609ec7b004SRick Macklem 
8619ec7b004SRick Macklem /*
8629ec7b004SRick Macklem  * Readlink vnode op into an mbuf list.
8639ec7b004SRick Macklem  */
8649ec7b004SRick Macklem int
nfsvno_readlink(struct vnode * vp,struct ucred * cred,int maxextsiz,struct thread * p,struct mbuf ** mpp,struct mbuf ** mpendp,int * lenp)865cb889ce6SRick Macklem nfsvno_readlink(struct vnode *vp, struct ucred *cred, int maxextsiz,
866cb889ce6SRick Macklem     struct thread *p, struct mbuf **mpp, struct mbuf **mpendp, int *lenp)
8679ec7b004SRick Macklem {
868c057a378SRick Macklem 	struct iovec *iv;
8699ec7b004SRick Macklem 	struct uio io, *uiop = &io;
870c057a378SRick Macklem 	struct mbuf *mp, *mp3;
871c057a378SRick Macklem 	int len, tlen, error = 0;
8729ec7b004SRick Macklem 
8739ec7b004SRick Macklem 	len = NFS_MAXPATHLEN;
874cb889ce6SRick Macklem 	if (maxextsiz > 0)
875cb889ce6SRick Macklem 		uiop->uio_iovcnt = nfsrv_createiovec_extpgs(len, maxextsiz,
876cb889ce6SRick Macklem 		    &mp3, &mp, &iv);
877cb889ce6SRick Macklem 	else
878c057a378SRick Macklem 		uiop->uio_iovcnt = nfsrv_createiovec(len, &mp3, &mp, &iv);
8799ec7b004SRick Macklem 	uiop->uio_iov = iv;
8809ec7b004SRick Macklem 	uiop->uio_offset = 0;
8819ec7b004SRick Macklem 	uiop->uio_resid = len;
8829ec7b004SRick Macklem 	uiop->uio_rw = UIO_READ;
8839ec7b004SRick Macklem 	uiop->uio_segflg = UIO_SYSSPACE;
8849ec7b004SRick Macklem 	uiop->uio_td = NULL;
8859ec7b004SRick Macklem 	error = VOP_READLINK(vp, uiop, cred);
886c057a378SRick Macklem 	free(iv, M_TEMP);
8879ec7b004SRick Macklem 	if (error) {
8889ec7b004SRick Macklem 		m_freem(mp3);
8899ec7b004SRick Macklem 		*lenp = 0;
890a9285ae5SZack Kirsch 		goto out;
8919ec7b004SRick Macklem 	}
8929ec7b004SRick Macklem 	if (uiop->uio_resid > 0) {
8939ec7b004SRick Macklem 		len -= uiop->uio_resid;
8949ec7b004SRick Macklem 		tlen = NFSM_RNDUP(len);
89518a48314SRick Macklem 		if (tlen == 0) {
89618a48314SRick Macklem 			m_freem(mp3);
89718a48314SRick Macklem 			mp3 = mp = NULL;
89818a48314SRick Macklem 		} else if (tlen != NFS_MAXPATHLEN || tlen != len)
89918a48314SRick Macklem 			mp = nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen,
90018a48314SRick Macklem 			    tlen - len);
9019ec7b004SRick Macklem 	}
9029ec7b004SRick Macklem 	*lenp = len;
9039ec7b004SRick Macklem 	*mpp = mp3;
9049ec7b004SRick Macklem 	*mpendp = mp;
905a9285ae5SZack Kirsch 
906a9285ae5SZack Kirsch out:
907a9285ae5SZack Kirsch 	NFSEXITCODE(error);
908a9285ae5SZack Kirsch 	return (error);
9099ec7b004SRick Macklem }
9109ec7b004SRick Macklem 
9119ec7b004SRick Macklem /*
912c057a378SRick Macklem  * Create an mbuf chain and an associated iovec that can be used to Read
913c057a378SRick Macklem  * or Getextattr of data.
914c057a378SRick Macklem  * Upon success, return pointers to the first and last mbufs in the chain
915c057a378SRick Macklem  * plus the malloc'd iovec and its iovlen.
9169ec7b004SRick Macklem  */
917c057a378SRick Macklem static int
nfsrv_createiovec(int len,struct mbuf ** mpp,struct mbuf ** mpendp,struct iovec ** ivp)918c057a378SRick Macklem nfsrv_createiovec(int len, struct mbuf **mpp, struct mbuf **mpendp,
919c057a378SRick Macklem     struct iovec **ivp)
9209ec7b004SRick Macklem {
921c057a378SRick Macklem 	struct mbuf *m, *m2 = NULL, *m3;
9229ec7b004SRick Macklem 	struct iovec *iv;
923c057a378SRick Macklem 	int i, left, siz;
9249ec7b004SRick Macklem 
925c057a378SRick Macklem 	left = len;
9269ec7b004SRick Macklem 	m3 = NULL;
9279ec7b004SRick Macklem 	/*
9289ec7b004SRick Macklem 	 * Generate the mbuf list with the uio_iov ref. to it.
9299ec7b004SRick Macklem 	 */
9309ec7b004SRick Macklem 	i = 0;
9319ec7b004SRick Macklem 	while (left > 0) {
9329ec7b004SRick Macklem 		NFSMGET(m);
933eb1b1807SGleb Smirnoff 		MCLGET(m, M_WAITOK);
9349ec7b004SRick Macklem 		m->m_len = 0;
9359ec7b004SRick Macklem 		siz = min(M_TRAILINGSPACE(m), left);
9369ec7b004SRick Macklem 		left -= siz;
9379ec7b004SRick Macklem 		i++;
9389ec7b004SRick Macklem 		if (m3)
9399ec7b004SRick Macklem 			m2->m_next = m;
9409ec7b004SRick Macklem 		else
9419ec7b004SRick Macklem 			m3 = m;
9429ec7b004SRick Macklem 		m2 = m;
9439ec7b004SRick Macklem 	}
944c057a378SRick Macklem 	*ivp = iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK);
9459ec7b004SRick Macklem 	m = m3;
9469ec7b004SRick Macklem 	left = len;
9479ec7b004SRick Macklem 	i = 0;
9489ec7b004SRick Macklem 	while (left > 0) {
9499ec7b004SRick Macklem 		if (m == NULL)
950cb889ce6SRick Macklem 			panic("nfsrv_createiovec iov");
9519ec7b004SRick Macklem 		siz = min(M_TRAILINGSPACE(m), left);
9529ec7b004SRick Macklem 		if (siz > 0) {
9539ec7b004SRick Macklem 			iv->iov_base = mtod(m, caddr_t) + m->m_len;
9549ec7b004SRick Macklem 			iv->iov_len = siz;
9559ec7b004SRick Macklem 			m->m_len += siz;
9569ec7b004SRick Macklem 			left -= siz;
9579ec7b004SRick Macklem 			iv++;
9589ec7b004SRick Macklem 			i++;
9599ec7b004SRick Macklem 		}
9609ec7b004SRick Macklem 		m = m->m_next;
9619ec7b004SRick Macklem 	}
962c057a378SRick Macklem 	*mpp = m3;
963c057a378SRick Macklem 	*mpendp = m2;
964c057a378SRick Macklem 	return (i);
965c057a378SRick Macklem }
966c057a378SRick Macklem 
967c057a378SRick Macklem /*
968cb889ce6SRick Macklem  * Create an mbuf chain and an associated iovec that can be used to Read
969cb889ce6SRick Macklem  * or Getextattr of data.
970cb889ce6SRick Macklem  * Upon success, return pointers to the first and last mbufs in the chain
971cb889ce6SRick Macklem  * plus the malloc'd iovec and its iovlen.
972cb889ce6SRick Macklem  * Same as above, but creates ext_pgs mbuf(s).
973cb889ce6SRick Macklem  */
974cb889ce6SRick Macklem static int
nfsrv_createiovec_extpgs(int len,int maxextsiz,struct mbuf ** mpp,struct mbuf ** mpendp,struct iovec ** ivp)975cb889ce6SRick Macklem nfsrv_createiovec_extpgs(int len, int maxextsiz, struct mbuf **mpp,
976cb889ce6SRick Macklem     struct mbuf **mpendp, struct iovec **ivp)
977cb889ce6SRick Macklem {
978cb889ce6SRick Macklem 	struct mbuf *m, *m2 = NULL, *m3;
979cb889ce6SRick Macklem 	struct iovec *iv;
980cb889ce6SRick Macklem 	int i, left, pgno, siz;
981cb889ce6SRick Macklem 
982cb889ce6SRick Macklem 	left = len;
983cb889ce6SRick Macklem 	m3 = NULL;
984cb889ce6SRick Macklem 	/*
985cb889ce6SRick Macklem 	 * Generate the mbuf list with the uio_iov ref. to it.
986cb889ce6SRick Macklem 	 */
987cb889ce6SRick Macklem 	i = 0;
988cb889ce6SRick Macklem 	while (left > 0) {
989cb889ce6SRick Macklem 		siz = min(left, maxextsiz);
990cb889ce6SRick Macklem 		m = mb_alloc_ext_plus_pages(siz, M_WAITOK);
991cb889ce6SRick Macklem 		left -= siz;
992cb889ce6SRick Macklem 		i += m->m_epg_npgs;
993cb889ce6SRick Macklem 		if (m3 != NULL)
994cb889ce6SRick Macklem 			m2->m_next = m;
995cb889ce6SRick Macklem 		else
996cb889ce6SRick Macklem 			m3 = m;
997cb889ce6SRick Macklem 		m2 = m;
998cb889ce6SRick Macklem 	}
999cb889ce6SRick Macklem 	*ivp = iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK);
1000cb889ce6SRick Macklem 	m = m3;
1001cb889ce6SRick Macklem 	left = len;
1002cb889ce6SRick Macklem 	i = 0;
1003cb889ce6SRick Macklem 	pgno = 0;
1004cb889ce6SRick Macklem 	while (left > 0) {
1005cb889ce6SRick Macklem 		if (m == NULL)
1006cb889ce6SRick Macklem 			panic("nfsvno_createiovec_extpgs iov");
1007cb889ce6SRick Macklem 		siz = min(PAGE_SIZE, left);
1008cb889ce6SRick Macklem 		if (siz > 0) {
1009cb889ce6SRick Macklem 			iv->iov_base = (void *)PHYS_TO_DMAP(m->m_epg_pa[pgno]);
1010cb889ce6SRick Macklem 			iv->iov_len = siz;
1011cb889ce6SRick Macklem 			m->m_len += siz;
1012cb889ce6SRick Macklem 			if (pgno == m->m_epg_npgs - 1)
1013cb889ce6SRick Macklem 				m->m_epg_last_len = siz;
1014cb889ce6SRick Macklem 			left -= siz;
1015cb889ce6SRick Macklem 			iv++;
1016cb889ce6SRick Macklem 			i++;
1017cb889ce6SRick Macklem 			pgno++;
1018cb889ce6SRick Macklem 		}
1019cb889ce6SRick Macklem 		if (pgno == m->m_epg_npgs && left > 0) {
1020cb889ce6SRick Macklem 			m = m->m_next;
1021cb889ce6SRick Macklem 			if (m == NULL)
1022cb889ce6SRick Macklem 				panic("nfsvno_createiovec_extpgs iov");
1023cb889ce6SRick Macklem 			pgno = 0;
1024cb889ce6SRick Macklem 		}
1025cb889ce6SRick Macklem 	}
1026cb889ce6SRick Macklem 	*mpp = m3;
1027cb889ce6SRick Macklem 	*mpendp = m2;
1028cb889ce6SRick Macklem 	return (i);
1029cb889ce6SRick Macklem }
1030cb889ce6SRick Macklem 
1031cb889ce6SRick Macklem /*
1032c057a378SRick Macklem  * Read vnode op call into mbuf list.
1033c057a378SRick Macklem  */
1034c057a378SRick Macklem int
nfsvno_read(struct vnode * vp,off_t off,int cnt,struct ucred * cred,int maxextsiz,struct thread * p,struct mbuf ** mpp,struct mbuf ** mpendp)1035c057a378SRick Macklem nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred,
1036cb889ce6SRick Macklem     int maxextsiz, struct thread *p, struct mbuf **mpp,
1037cb889ce6SRick Macklem     struct mbuf **mpendp)
1038c057a378SRick Macklem {
1039c057a378SRick Macklem 	struct mbuf *m;
1040c057a378SRick Macklem 	struct iovec *iv;
1041c057a378SRick Macklem 	int error = 0, len, tlen, ioflag = 0;
1042c057a378SRick Macklem 	struct mbuf *m3;
1043c057a378SRick Macklem 	struct uio io, *uiop = &io;
1044c057a378SRick Macklem 	struct nfsheur *nh;
1045c057a378SRick Macklem 
1046c057a378SRick Macklem 	/*
1047c057a378SRick Macklem 	 * Attempt to read from a DS file. A return of ENOENT implies
1048c057a378SRick Macklem 	 * there is no DS file to read.
1049c057a378SRick Macklem 	 */
1050c057a378SRick Macklem 	error = nfsrv_proxyds(vp, off, cnt, cred, p, NFSPROC_READDS, mpp,
1051c057a378SRick Macklem 	    NULL, mpendp, NULL, NULL, NULL, 0, NULL);
1052c057a378SRick Macklem 	if (error != ENOENT)
1053c057a378SRick Macklem 		return (error);
1054c057a378SRick Macklem 
1055c057a378SRick Macklem 	len = NFSM_RNDUP(cnt);
1056cb889ce6SRick Macklem 	if (maxextsiz > 0)
1057cb889ce6SRick Macklem 		uiop->uio_iovcnt = nfsrv_createiovec_extpgs(len, maxextsiz,
1058cb889ce6SRick Macklem 		    &m3, &m, &iv);
1059cb889ce6SRick Macklem 	else
1060c057a378SRick Macklem 		uiop->uio_iovcnt = nfsrv_createiovec(len, &m3, &m, &iv);
1061c057a378SRick Macklem 	uiop->uio_iov = iv;
10629ec7b004SRick Macklem 	uiop->uio_offset = off;
10639ec7b004SRick Macklem 	uiop->uio_resid = len;
10649ec7b004SRick Macklem 	uiop->uio_rw = UIO_READ;
10659ec7b004SRick Macklem 	uiop->uio_segflg = UIO_SYSSPACE;
10663c53f923SRick Macklem 	uiop->uio_td = NULL;
1067574862c8SJohn Baldwin 	nh = nfsrv_sequential_heuristic(uiop, vp);
1068574862c8SJohn Baldwin 	ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
10691b819cf2SRick Macklem 	/* XXX KDM make this more systematic? */
1070b039ca07SRick Macklem 	NFSD_VNET(nfsstatsv1_p)->srvbytes[NFSV4OP_READ] += uiop->uio_resid;
10719ec7b004SRick Macklem 	error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
1072c057a378SRick Macklem 	free(iv, M_TEMP);
10739ec7b004SRick Macklem 	if (error) {
10749ec7b004SRick Macklem 		m_freem(m3);
10759ec7b004SRick Macklem 		*mpp = NULL;
1076a9285ae5SZack Kirsch 		goto out;
10779ec7b004SRick Macklem 	}
1078574862c8SJohn Baldwin 	nh->nh_nextoff = uiop->uio_offset;
10799ec7b004SRick Macklem 	tlen = len - uiop->uio_resid;
10809ec7b004SRick Macklem 	cnt = cnt < tlen ? cnt : tlen;
10819ec7b004SRick Macklem 	tlen = NFSM_RNDUP(cnt);
10829ec7b004SRick Macklem 	if (tlen == 0) {
10839ec7b004SRick Macklem 		m_freem(m3);
108418a48314SRick Macklem 		m3 = m = NULL;
10859ec7b004SRick Macklem 	} else if (len != tlen || tlen != cnt)
108618a48314SRick Macklem 		m = nfsrv_adj(m3, len - tlen, tlen - cnt);
10879ec7b004SRick Macklem 	*mpp = m3;
1088c057a378SRick Macklem 	*mpendp = m;
1089a9285ae5SZack Kirsch 
1090a9285ae5SZack Kirsch out:
1091a9285ae5SZack Kirsch 	NFSEXITCODE(error);
1092a9285ae5SZack Kirsch 	return (error);
10939ec7b004SRick Macklem }
10949ec7b004SRick Macklem 
10959ec7b004SRick Macklem /*
1096c057a378SRick Macklem  * Create the iovec for the mbuf chain passed in as an argument.
1097c057a378SRick Macklem  * The "cp" argument is where the data starts within the first mbuf in
1098c057a378SRick Macklem  * the chain. It returns the iovec and the iovcnt.
10999ec7b004SRick Macklem  */
1100c057a378SRick Macklem static int
nfsrv_createiovecw(int retlen,struct mbuf * m,char * cp,struct iovec ** ivpp,int * iovcntp)1101c057a378SRick Macklem nfsrv_createiovecw(int retlen, struct mbuf *m, char *cp, struct iovec **ivpp,
1102c057a378SRick Macklem     int *iovcntp)
11039ec7b004SRick Macklem {
1104c057a378SRick Macklem 	struct mbuf *mp;
11059ec7b004SRick Macklem 	struct iovec *ivp;
1106c057a378SRick Macklem 	int cnt, i, len;
11079ec7b004SRick Macklem 
110890d2dfabSRick Macklem 	/*
1109c057a378SRick Macklem 	 * Loop through the mbuf chain, counting how many mbufs are a
1110c057a378SRick Macklem 	 * part of this write operation, so the iovec size is known.
111190d2dfabSRick Macklem 	 */
1112c057a378SRick Macklem 	cnt = 0;
1113c057a378SRick Macklem 	len = retlen;
1114c057a378SRick Macklem 	mp = m;
11159f6624d3SRick Macklem 	i = mtod(mp, caddr_t) + mp->m_len - cp;
1116c057a378SRick Macklem 	while (len > 0) {
1117c057a378SRick Macklem 		if (i > 0) {
1118c057a378SRick Macklem 			len -= i;
1119c057a378SRick Macklem 			cnt++;
1120c057a378SRick Macklem 		}
11219f6624d3SRick Macklem 		mp = mp->m_next;
1122c057a378SRick Macklem 		if (!mp) {
1123c057a378SRick Macklem 			if (len > 0)
1124c057a378SRick Macklem 				return (EBADRPC);
1125c057a378SRick Macklem 		} else
11269f6624d3SRick Macklem 			i = mp->m_len;
112790d2dfabSRick Macklem 	}
112890d2dfabSRick Macklem 
1129c057a378SRick Macklem 	/* Now, create the iovec. */
1130c057a378SRick Macklem 	mp = m;
1131c057a378SRick Macklem 	*ivpp = ivp = malloc(cnt * sizeof (struct iovec), M_TEMP,
11329ec7b004SRick Macklem 	    M_WAITOK);
1133c057a378SRick Macklem 	*iovcntp = cnt;
11349ec7b004SRick Macklem 	i = mtod(mp, caddr_t) + mp->m_len - cp;
11359ec7b004SRick Macklem 	len = retlen;
11369ec7b004SRick Macklem 	while (len > 0) {
11379ec7b004SRick Macklem 		if (mp == NULL)
1138cb889ce6SRick Macklem 			panic("nfsrv_createiovecw");
11399ec7b004SRick Macklem 		if (i > 0) {
11409ec7b004SRick Macklem 			i = min(i, len);
11419ec7b004SRick Macklem 			ivp->iov_base = cp;
11429ec7b004SRick Macklem 			ivp->iov_len = i;
11439ec7b004SRick Macklem 			ivp++;
11449ec7b004SRick Macklem 			len -= i;
11459ec7b004SRick Macklem 		}
11469ec7b004SRick Macklem 		mp = mp->m_next;
11479ec7b004SRick Macklem 		if (mp) {
11489ec7b004SRick Macklem 			i = mp->m_len;
11499ec7b004SRick Macklem 			cp = mtod(mp, caddr_t);
11509ec7b004SRick Macklem 		}
11519ec7b004SRick Macklem 	}
1152c057a378SRick Macklem 	return (0);
1153c057a378SRick Macklem }
1154c057a378SRick Macklem 
1155c057a378SRick Macklem /*
1156c057a378SRick Macklem  * Write vnode op from an mbuf list.
1157c057a378SRick Macklem  */
1158c057a378SRick Macklem int
nfsvno_write(struct vnode * vp,off_t off,int retlen,int * stable,struct mbuf * mp,char * cp,struct ucred * cred,struct thread * p)1159c057a378SRick Macklem nfsvno_write(struct vnode *vp, off_t off, int retlen, int *stable,
1160c057a378SRick Macklem     struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p)
1161c057a378SRick Macklem {
1162c057a378SRick Macklem 	struct iovec *iv;
1163c057a378SRick Macklem 	int cnt, ioflags, error;
1164c057a378SRick Macklem 	struct uio io, *uiop = &io;
1165c057a378SRick Macklem 	struct nfsheur *nh;
1166c057a378SRick Macklem 
1167c057a378SRick Macklem 	/*
1168c057a378SRick Macklem 	 * Attempt to write to a DS file. A return of ENOENT implies
1169c057a378SRick Macklem 	 * there is no DS file to write.
1170c057a378SRick Macklem 	 */
1171c057a378SRick Macklem 	error = nfsrv_proxyds(vp, off, retlen, cred, p, NFSPROC_WRITEDS,
1172c057a378SRick Macklem 	    &mp, cp, NULL, NULL, NULL, NULL, 0, NULL);
1173c057a378SRick Macklem 	if (error != ENOENT) {
1174c057a378SRick Macklem 		*stable = NFSWRITE_FILESYNC;
1175c057a378SRick Macklem 		return (error);
1176c057a378SRick Macklem 	}
1177c057a378SRick Macklem 
117890d2dfabSRick Macklem 	if (*stable == NFSWRITE_UNSTABLE)
11799ec7b004SRick Macklem 		ioflags = IO_NODELOCKED;
11809ec7b004SRick Macklem 	else
11819ec7b004SRick Macklem 		ioflags = (IO_SYNC | IO_NODELOCKED);
1182c057a378SRick Macklem 	error = nfsrv_createiovecw(retlen, mp, cp, &iv, &cnt);
1183c057a378SRick Macklem 	if (error != 0)
1184c057a378SRick Macklem 		return (error);
1185c057a378SRick Macklem 	uiop->uio_iov = iv;
1186c057a378SRick Macklem 	uiop->uio_iovcnt = cnt;
11879ec7b004SRick Macklem 	uiop->uio_resid = retlen;
11889ec7b004SRick Macklem 	uiop->uio_rw = UIO_WRITE;
11899ec7b004SRick Macklem 	uiop->uio_segflg = UIO_SYSSPACE;
11909ec7b004SRick Macklem 	NFSUIOPROC(uiop, p);
11919ec7b004SRick Macklem 	uiop->uio_offset = off;
1192574862c8SJohn Baldwin 	nh = nfsrv_sequential_heuristic(uiop, vp);
1193574862c8SJohn Baldwin 	ioflags |= nh->nh_seqcount << IO_SEQSHIFT;
11941b819cf2SRick Macklem 	/* XXX KDM make this more systematic? */
1195b039ca07SRick Macklem 	NFSD_VNET(nfsstatsv1_p)->srvbytes[NFSV4OP_WRITE] += uiop->uio_resid;
11969ec7b004SRick Macklem 	error = VOP_WRITE(vp, uiop, ioflags, cred);
1197574862c8SJohn Baldwin 	if (error == 0)
1198574862c8SJohn Baldwin 		nh->nh_nextoff = uiop->uio_offset;
1199222daa42SConrad Meyer 	free(iv, M_TEMP);
1200a9285ae5SZack Kirsch 
1201a9285ae5SZack Kirsch 	NFSEXITCODE(error);
12029ec7b004SRick Macklem 	return (error);
12039ec7b004SRick Macklem }
12049ec7b004SRick Macklem 
12059ec7b004SRick Macklem /*
12069ec7b004SRick Macklem  * Common code for creating a regular file (plus special files for V2).
12079ec7b004SRick Macklem  */
12089ec7b004SRick Macklem int
nfsvno_createsub(struct nfsrv_descript * nd,struct nameidata * ndp,struct vnode ** vpp,struct nfsvattr * nvap,int * exclusive_flagp,int32_t * cverf,NFSDEV_T rdev,struct nfsexstuff * exp)12099ec7b004SRick Macklem nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp,
12109ec7b004SRick Macklem     struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp,
1211127152feSEdward Tomasz Napierala     int32_t *cverf, NFSDEV_T rdev, struct nfsexstuff *exp)
12129ec7b004SRick Macklem {
12139ec7b004SRick Macklem 	u_quad_t tempsize;
12149ec7b004SRick Macklem 	int error;
1215127152feSEdward Tomasz Napierala 	struct thread *p = curthread;
12169ec7b004SRick Macklem 
12179ec7b004SRick Macklem 	error = nd->nd_repstat;
12189ec7b004SRick Macklem 	if (!error && ndp->ni_vp == NULL) {
12199ec7b004SRick Macklem 		if (nvap->na_type == VREG || nvap->na_type == VSOCK) {
12209ec7b004SRick Macklem 			error = VOP_CREATE(ndp->ni_dvp,
12219ec7b004SRick Macklem 			    &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
122290d2dfabSRick Macklem 			/* For a pNFS server, create the data file on a DS. */
122390d2dfabSRick Macklem 			if (error == 0 && nvap->na_type == VREG) {
122490d2dfabSRick Macklem 				/*
122590d2dfabSRick Macklem 				 * Create a data file on a DS for a pNFS server.
122690d2dfabSRick Macklem 				 * This function just returns if not
122790d2dfabSRick Macklem 				 * running a pNFS DS or the creation fails.
122890d2dfabSRick Macklem 				 */
122990d2dfabSRick Macklem 				nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr,
123090d2dfabSRick Macklem 				    nd->nd_cred, p);
123190d2dfabSRick Macklem 			}
12324a21bcb2SKonstantin Belousov 			VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp :
12334a21bcb2SKonstantin Belousov 			    NULL, false);
12349ec7b004SRick Macklem 			nfsvno_relpathbuf(ndp);
12359ec7b004SRick Macklem 			if (!error) {
12369ec7b004SRick Macklem 				if (*exclusive_flagp) {
12379ec7b004SRick Macklem 					*exclusive_flagp = 0;
12389ec7b004SRick Macklem 					NFSVNO_ATTRINIT(nvap);
1239086f6e0cSRick Macklem 					nvap->na_atime.tv_sec = cverf[0];
1240086f6e0cSRick Macklem 					nvap->na_atime.tv_nsec = cverf[1];
12419ec7b004SRick Macklem 					error = VOP_SETATTR(ndp->ni_vp,
12429ec7b004SRick Macklem 					    &nvap->na_vattr, nd->nd_cred);
124313c581fcSRick Macklem 					if (error != 0) {
124413c581fcSRick Macklem 						vput(ndp->ni_vp);
124513c581fcSRick Macklem 						ndp->ni_vp = NULL;
124613c581fcSRick Macklem 						error = NFSERR_NOTSUPP;
124713c581fcSRick Macklem 					}
12489ec7b004SRick Macklem 				}
12499ec7b004SRick Macklem 			}
12509ec7b004SRick Macklem 		/*
12519ec7b004SRick Macklem 		 * NFS V2 Only. nfsrvd_mknod() does this for V3.
12529ec7b004SRick Macklem 		 * (This implies, just get out on an error.)
12539ec7b004SRick Macklem 		 */
12549ec7b004SRick Macklem 		} else if (nvap->na_type == VCHR || nvap->na_type == VBLK ||
12559ec7b004SRick Macklem 			nvap->na_type == VFIFO) {
12569ec7b004SRick Macklem 			if (nvap->na_type == VCHR && rdev == 0xffffffff)
12579ec7b004SRick Macklem 				nvap->na_type = VFIFO;
12589ec7b004SRick Macklem                         if (nvap->na_type != VFIFO &&
1259cc426dd3SMateusz Guzik 			    (error = priv_check_cred(nd->nd_cred, PRIV_VFS_MKNOD_DEV))) {
12609ec7b004SRick Macklem 				nfsvno_relpathbuf(ndp);
12619ec7b004SRick Macklem 				vput(ndp->ni_dvp);
1262a9285ae5SZack Kirsch 				goto out;
12639ec7b004SRick Macklem 			}
12649ec7b004SRick Macklem 			nvap->na_rdev = rdev;
12659ec7b004SRick Macklem 			error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
12669ec7b004SRick Macklem 			    &ndp->ni_cnd, &nvap->na_vattr);
12674a21bcb2SKonstantin Belousov 			VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp :
12684a21bcb2SKonstantin Belousov 			    NULL, false);
12699ec7b004SRick Macklem 			nfsvno_relpathbuf(ndp);
1270f659876fSRick Macklem 			if (error)
1271a9285ae5SZack Kirsch 				goto out;
12729ec7b004SRick Macklem 		} else {
12739ec7b004SRick Macklem 			nfsvno_relpathbuf(ndp);
12749ec7b004SRick Macklem 			vput(ndp->ni_dvp);
1275a9285ae5SZack Kirsch 			error = ENXIO;
1276a9285ae5SZack Kirsch 			goto out;
12779ec7b004SRick Macklem 		}
12789ec7b004SRick Macklem 		*vpp = ndp->ni_vp;
12799ec7b004SRick Macklem 	} else {
12809ec7b004SRick Macklem 		/*
12819ec7b004SRick Macklem 		 * Handle cases where error is already set and/or
12829ec7b004SRick Macklem 		 * the file exists.
12839ec7b004SRick Macklem 		 * 1 - clean up the lookup
12849ec7b004SRick Macklem 		 * 2 - iff !error and na_size set, truncate it
12859ec7b004SRick Macklem 		 */
12869ec7b004SRick Macklem 		nfsvno_relpathbuf(ndp);
12879ec7b004SRick Macklem 		*vpp = ndp->ni_vp;
12889ec7b004SRick Macklem 		if (ndp->ni_dvp == *vpp)
12899ec7b004SRick Macklem 			vrele(ndp->ni_dvp);
12909ec7b004SRick Macklem 		else
12919ec7b004SRick Macklem 			vput(ndp->ni_dvp);
12929ec7b004SRick Macklem 		if (!error && nvap->na_size != VNOVAL) {
12938da45f2cSRick Macklem 			error = nfsvno_accchk(*vpp, VWRITE,
12949ec7b004SRick Macklem 			    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
12958da45f2cSRick Macklem 			    NFSACCCHK_VPISLOCKED, NULL);
12969ec7b004SRick Macklem 			if (!error) {
12979ec7b004SRick Macklem 				tempsize = nvap->na_size;
12989ec7b004SRick Macklem 				NFSVNO_ATTRINIT(nvap);
12999ec7b004SRick Macklem 				nvap->na_size = tempsize;
1300f32bf50dSRick Macklem 				error = nfsvno_setattr(*vpp, nvap,
1301f32bf50dSRick Macklem 				    nd->nd_cred, p, exp);
13029ec7b004SRick Macklem 			}
13039ec7b004SRick Macklem 		}
13049ec7b004SRick Macklem 		if (error)
13059ec7b004SRick Macklem 			vput(*vpp);
13069ec7b004SRick Macklem 	}
1307a9285ae5SZack Kirsch 
1308a9285ae5SZack Kirsch out:
1309a9285ae5SZack Kirsch 	NFSEXITCODE(error);
13109ec7b004SRick Macklem 	return (error);
13119ec7b004SRick Macklem }
13129ec7b004SRick Macklem 
13139ec7b004SRick Macklem /*
13149ec7b004SRick Macklem  * Do a mknod vnode op.
13159ec7b004SRick Macklem  */
13169ec7b004SRick Macklem int
nfsvno_mknod(struct nameidata * ndp,struct nfsvattr * nvap,struct ucred * cred,struct thread * p)13179ec7b004SRick Macklem nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred,
13189ec7b004SRick Macklem     struct thread *p)
13199ec7b004SRick Macklem {
13209ec7b004SRick Macklem 	int error = 0;
1321ba8cc6d7SMateusz Guzik 	__enum_uint8(vtype) vtyp;
13229ec7b004SRick Macklem 
13239ec7b004SRick Macklem 	vtyp = nvap->na_type;
13249ec7b004SRick Macklem 	/*
13259ec7b004SRick Macklem 	 * Iff doesn't exist, create it.
13269ec7b004SRick Macklem 	 */
13279ec7b004SRick Macklem 	if (ndp->ni_vp) {
13289ec7b004SRick Macklem 		nfsvno_relpathbuf(ndp);
13299ec7b004SRick Macklem 		vput(ndp->ni_dvp);
13309ec7b004SRick Macklem 		vrele(ndp->ni_vp);
1331a9285ae5SZack Kirsch 		error = EEXIST;
1332a9285ae5SZack Kirsch 		goto out;
13339ec7b004SRick Macklem 	}
13349ec7b004SRick Macklem 	if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
13359ec7b004SRick Macklem 		nfsvno_relpathbuf(ndp);
13369ec7b004SRick Macklem 		vput(ndp->ni_dvp);
1337a9285ae5SZack Kirsch 		error = NFSERR_BADTYPE;
1338a9285ae5SZack Kirsch 		goto out;
13399ec7b004SRick Macklem 	}
13409ec7b004SRick Macklem 	if (vtyp == VSOCK) {
13419ec7b004SRick Macklem 		error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
13429ec7b004SRick Macklem 		    &ndp->ni_cnd, &nvap->na_vattr);
13434a21bcb2SKonstantin Belousov 		VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL,
13444a21bcb2SKonstantin Belousov 		    false);
13459ec7b004SRick Macklem 		nfsvno_relpathbuf(ndp);
13469ec7b004SRick Macklem 	} else {
13479ec7b004SRick Macklem 		if (nvap->na_type != VFIFO &&
1348cc426dd3SMateusz Guzik 		    (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV))) {
13499ec7b004SRick Macklem 			nfsvno_relpathbuf(ndp);
13509ec7b004SRick Macklem 			vput(ndp->ni_dvp);
1351a9285ae5SZack Kirsch 			goto out;
13529ec7b004SRick Macklem 		}
13539ec7b004SRick Macklem 		error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
13549ec7b004SRick Macklem 		    &ndp->ni_cnd, &nvap->na_vattr);
13554a21bcb2SKonstantin Belousov 		VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL,
13564a21bcb2SKonstantin Belousov 		    false);
13579ec7b004SRick Macklem 		nfsvno_relpathbuf(ndp);
13589ec7b004SRick Macklem 		/*
13599ec7b004SRick Macklem 		 * Since VOP_MKNOD returns the ni_vp, I can't
13609ec7b004SRick Macklem 		 * see any reason to do the lookup.
13619ec7b004SRick Macklem 		 */
13629ec7b004SRick Macklem 	}
1363a9285ae5SZack Kirsch 
1364a9285ae5SZack Kirsch out:
1365a9285ae5SZack Kirsch 	NFSEXITCODE(error);
13669ec7b004SRick Macklem 	return (error);
13679ec7b004SRick Macklem }
13689ec7b004SRick Macklem 
13699ec7b004SRick Macklem /*
13709ec7b004SRick Macklem  * Mkdir vnode op.
13719ec7b004SRick Macklem  */
13729ec7b004SRick Macklem int
nfsvno_mkdir(struct nameidata * ndp,struct nfsvattr * nvap,uid_t saved_uid,struct ucred * cred,struct thread * p,struct nfsexstuff * exp)13739ec7b004SRick Macklem nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid,
13749ec7b004SRick Macklem     struct ucred *cred, struct thread *p, struct nfsexstuff *exp)
13759ec7b004SRick Macklem {
13769ec7b004SRick Macklem 	int error = 0;
13779ec7b004SRick Macklem 
13789ec7b004SRick Macklem 	if (ndp->ni_vp != NULL) {
13799ec7b004SRick Macklem 		if (ndp->ni_dvp == ndp->ni_vp)
13809ec7b004SRick Macklem 			vrele(ndp->ni_dvp);
13819ec7b004SRick Macklem 		else
13829ec7b004SRick Macklem 			vput(ndp->ni_dvp);
13839ec7b004SRick Macklem 		vrele(ndp->ni_vp);
138415b28cb8SRick Macklem 		nfsvno_relpathbuf(ndp);
1385a9285ae5SZack Kirsch 		error = EEXIST;
1386a9285ae5SZack Kirsch 		goto out;
13879ec7b004SRick Macklem 	}
13889ec7b004SRick Macklem 	error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
13899ec7b004SRick Macklem 	    &nvap->na_vattr);
13904a21bcb2SKonstantin Belousov 	VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL, false);
13919ec7b004SRick Macklem 	nfsvno_relpathbuf(ndp);
1392a9285ae5SZack Kirsch 
1393a9285ae5SZack Kirsch out:
1394a9285ae5SZack Kirsch 	NFSEXITCODE(error);
13959ec7b004SRick Macklem 	return (error);
13969ec7b004SRick Macklem }
13979ec7b004SRick Macklem 
13989ec7b004SRick Macklem /*
13999ec7b004SRick Macklem  * symlink vnode op.
14009ec7b004SRick Macklem  */
14019ec7b004SRick Macklem int
nfsvno_symlink(struct nameidata * ndp,struct nfsvattr * nvap,char * pathcp,int pathlen,int not_v2,uid_t saved_uid,struct ucred * cred,struct thread * p,struct nfsexstuff * exp)14029ec7b004SRick Macklem nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp,
14039ec7b004SRick Macklem     int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p,
14049ec7b004SRick Macklem     struct nfsexstuff *exp)
14059ec7b004SRick Macklem {
14069ec7b004SRick Macklem 	int error = 0;
14079ec7b004SRick Macklem 
14089ec7b004SRick Macklem 	if (ndp->ni_vp) {
14099ec7b004SRick Macklem 		nfsvno_relpathbuf(ndp);
14109ec7b004SRick Macklem 		if (ndp->ni_dvp == ndp->ni_vp)
14119ec7b004SRick Macklem 			vrele(ndp->ni_dvp);
14129ec7b004SRick Macklem 		else
14139ec7b004SRick Macklem 			vput(ndp->ni_dvp);
14149ec7b004SRick Macklem 		vrele(ndp->ni_vp);
1415a9285ae5SZack Kirsch 		error = EEXIST;
1416a9285ae5SZack Kirsch 		goto out;
14179ec7b004SRick Macklem 	}
14189ec7b004SRick Macklem 
14199ec7b004SRick Macklem 	error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
14209ec7b004SRick Macklem 	    &nvap->na_vattr, pathcp);
14219ec7b004SRick Macklem 	/*
14229ec7b004SRick Macklem 	 * Although FreeBSD still had the lookup code in
14239ec7b004SRick Macklem 	 * it for 7/current, there doesn't seem to be any
14249ec7b004SRick Macklem 	 * point, since VOP_SYMLINK() returns the ni_vp.
14259ec7b004SRick Macklem 	 * Just vput it for v2.
14269ec7b004SRick Macklem 	 */
14274a21bcb2SKonstantin Belousov 	VOP_VPUT_PAIR(ndp->ni_dvp, &ndp->ni_vp, !not_v2 && error == 0);
14284a21bcb2SKonstantin Belousov 	nfsvno_relpathbuf(ndp);
1429a9285ae5SZack Kirsch 
1430a9285ae5SZack Kirsch out:
1431a9285ae5SZack Kirsch 	NFSEXITCODE(error);
14329ec7b004SRick Macklem 	return (error);
14339ec7b004SRick Macklem }
14349ec7b004SRick Macklem 
14359ec7b004SRick Macklem /*
14369ec7b004SRick Macklem  * Parse symbolic link arguments.
1437222daa42SConrad Meyer  * This function has an ugly side effect. It will malloc() an area for
14389ec7b004SRick Macklem  * the symlink and set iov_base to point to it, only if it succeeds.
14399ec7b004SRick Macklem  * So, if it returns with uiop->uio_iov->iov_base != NULL, that must
14409ec7b004SRick Macklem  * be FREE'd later.
14419ec7b004SRick Macklem  */
14429ec7b004SRick Macklem int
nfsvno_getsymlink(struct nfsrv_descript * nd,struct nfsvattr * nvap,struct thread * p,char ** pathcpp,int * lenp)14439ec7b004SRick Macklem nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap,
14449ec7b004SRick Macklem     struct thread *p, char **pathcpp, int *lenp)
14459ec7b004SRick Macklem {
14469ec7b004SRick Macklem 	u_int32_t *tl;
14479ec7b004SRick Macklem 	char *pathcp = NULL;
14489ec7b004SRick Macklem 	int error = 0, len;
14499ec7b004SRick Macklem 	struct nfsv2_sattr *sp;
14509ec7b004SRick Macklem 
14519ec7b004SRick Macklem 	*pathcpp = NULL;
14529ec7b004SRick Macklem 	*lenp = 0;
14539ec7b004SRick Macklem 	if ((nd->nd_flag & ND_NFSV3) &&
1454d8a5961fSMarcelo Araujo 	    (error = nfsrv_sattr(nd, NULL, nvap, NULL, NULL, p)))
14559ec7b004SRick Macklem 		goto nfsmout;
14569ec7b004SRick Macklem 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
14579ec7b004SRick Macklem 	len = fxdr_unsigned(int, *tl);
14589ec7b004SRick Macklem 	if (len > NFS_MAXPATHLEN || len <= 0) {
14599ec7b004SRick Macklem 		error = EBADRPC;
14609ec7b004SRick Macklem 		goto nfsmout;
14619ec7b004SRick Macklem 	}
1462222daa42SConrad Meyer 	pathcp = malloc(len + 1, M_TEMP, M_WAITOK);
14639ec7b004SRick Macklem 	error = nfsrv_mtostr(nd, pathcp, len);
14649ec7b004SRick Macklem 	if (error)
14659ec7b004SRick Macklem 		goto nfsmout;
14669ec7b004SRick Macklem 	if (nd->nd_flag & ND_NFSV2) {
14679ec7b004SRick Macklem 		NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
14689ec7b004SRick Macklem 		nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode);
14699ec7b004SRick Macklem 	}
14709ec7b004SRick Macklem 	*pathcpp = pathcp;
14719ec7b004SRick Macklem 	*lenp = len;
1472a9285ae5SZack Kirsch 	NFSEXITCODE2(0, nd);
14739ec7b004SRick Macklem 	return (0);
14749ec7b004SRick Macklem nfsmout:
14759ec7b004SRick Macklem 	if (pathcp)
14769ec7b004SRick Macklem 		free(pathcp, M_TEMP);
1477a9285ae5SZack Kirsch 	NFSEXITCODE2(error, nd);
14789ec7b004SRick Macklem 	return (error);
14799ec7b004SRick Macklem }
14809ec7b004SRick Macklem 
14819ec7b004SRick Macklem /*
14829ec7b004SRick Macklem  * Remove a non-directory object.
14839ec7b004SRick Macklem  */
14849ec7b004SRick Macklem int
nfsvno_removesub(struct nameidata * ndp,int is_v4,struct ucred * cred,struct thread * p,struct nfsexstuff * exp)14859ec7b004SRick Macklem nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred,
14869ec7b004SRick Macklem     struct thread *p, struct nfsexstuff *exp)
14879ec7b004SRick Macklem {
148890d2dfabSRick Macklem 	struct vnode *vp, *dsdvp[NFSDEV_MAXMIRRORS];
148990d2dfabSRick Macklem 	int error = 0, mirrorcnt;
149090d2dfabSRick Macklem 	char fname[PNFS_FILENAME_LEN + 1];
149190d2dfabSRick Macklem 	fhandle_t fh;
14929ec7b004SRick Macklem 
14939ec7b004SRick Macklem 	vp = ndp->ni_vp;
149490d2dfabSRick Macklem 	dsdvp[0] = NULL;
14959ec7b004SRick Macklem 	if (vp->v_type == VDIR)
14969ec7b004SRick Macklem 		error = NFSERR_ISDIR;
14979ec7b004SRick Macklem 	else if (is_v4)
1498c057a378SRick Macklem 		error = nfsrv_checkremove(vp, 1, NULL, (nfsquad_t)((u_quad_t)0),
1499c057a378SRick Macklem 		    p);
150090d2dfabSRick Macklem 	if (error == 0)
150190d2dfabSRick Macklem 		nfsrv_pnfsremovesetup(vp, p, dsdvp, &mirrorcnt, fname, &fh);
15029ec7b004SRick Macklem 	if (!error)
15039ec7b004SRick Macklem 		error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd);
150490d2dfabSRick Macklem 	if (error == 0 && dsdvp[0] != NULL)
150590d2dfabSRick Macklem 		nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p);
15069ec7b004SRick Macklem 	if (ndp->ni_dvp == vp)
15079ec7b004SRick Macklem 		vrele(ndp->ni_dvp);
15089ec7b004SRick Macklem 	else
15099ec7b004SRick Macklem 		vput(ndp->ni_dvp);
15109ec7b004SRick Macklem 	vput(vp);
1511a607cc6dSRick Macklem 	nfsvno_relpathbuf(ndp);
1512a9285ae5SZack Kirsch 	NFSEXITCODE(error);
15139ec7b004SRick Macklem 	return (error);
15149ec7b004SRick Macklem }
15159ec7b004SRick Macklem 
15169ec7b004SRick Macklem /*
15179ec7b004SRick Macklem  * Remove a directory.
15189ec7b004SRick Macklem  */
15199ec7b004SRick Macklem int
nfsvno_rmdirsub(struct nameidata * ndp,int is_v4,struct ucred * cred,struct thread * p,struct nfsexstuff * exp)15209ec7b004SRick Macklem nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred,
15219ec7b004SRick Macklem     struct thread *p, struct nfsexstuff *exp)
15229ec7b004SRick Macklem {
15239ec7b004SRick Macklem 	struct vnode *vp;
15249ec7b004SRick Macklem 	int error = 0;
15259ec7b004SRick Macklem 
15269ec7b004SRick Macklem 	vp = ndp->ni_vp;
15279ec7b004SRick Macklem 	if (vp->v_type != VDIR) {
15289ec7b004SRick Macklem 		error = ENOTDIR;
15299ec7b004SRick Macklem 		goto out;
15309ec7b004SRick Macklem 	}
15319ec7b004SRick Macklem 	/*
15329ec7b004SRick Macklem 	 * No rmdir "." please.
15339ec7b004SRick Macklem 	 */
15349ec7b004SRick Macklem 	if (ndp->ni_dvp == vp) {
15359ec7b004SRick Macklem 		error = EINVAL;
15369ec7b004SRick Macklem 		goto out;
15379ec7b004SRick Macklem 	}
15389ec7b004SRick Macklem 	/*
15399ec7b004SRick Macklem 	 * The root of a mounted filesystem cannot be deleted.
15409ec7b004SRick Macklem 	 */
15419ec7b004SRick Macklem 	if (vp->v_vflag & VV_ROOT)
15429ec7b004SRick Macklem 		error = EBUSY;
15439ec7b004SRick Macklem out:
15449ec7b004SRick Macklem 	if (!error)
15459ec7b004SRick Macklem 		error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd);
15469ec7b004SRick Macklem 	if (ndp->ni_dvp == vp)
15479ec7b004SRick Macklem 		vrele(ndp->ni_dvp);
15489ec7b004SRick Macklem 	else
15499ec7b004SRick Macklem 		vput(ndp->ni_dvp);
15509ec7b004SRick Macklem 	vput(vp);
1551a607cc6dSRick Macklem 	nfsvno_relpathbuf(ndp);
1552a9285ae5SZack Kirsch 	NFSEXITCODE(error);
15539ec7b004SRick Macklem 	return (error);
15549ec7b004SRick Macklem }
15559ec7b004SRick Macklem 
15569ec7b004SRick Macklem /*
15579ec7b004SRick Macklem  * Rename vnode op.
15589ec7b004SRick Macklem  */
15599ec7b004SRick Macklem int
nfsvno_rename(struct nameidata * fromndp,struct nameidata * tondp,u_int32_t ndstat,u_int32_t ndflag,struct ucred * cred,struct thread * p)15609ec7b004SRick Macklem nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp,
15619ec7b004SRick Macklem     u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p)
15629ec7b004SRick Macklem {
156390d2dfabSRick Macklem 	struct vnode *fvp, *tvp, *tdvp, *dsdvp[NFSDEV_MAXMIRRORS];
156490d2dfabSRick Macklem 	int error = 0, mirrorcnt;
156590d2dfabSRick Macklem 	char fname[PNFS_FILENAME_LEN + 1];
156690d2dfabSRick Macklem 	fhandle_t fh;
15679ec7b004SRick Macklem 
156890d2dfabSRick Macklem 	dsdvp[0] = NULL;
15699ec7b004SRick Macklem 	fvp = fromndp->ni_vp;
15709ec7b004SRick Macklem 	if (ndstat) {
15719ec7b004SRick Macklem 		vrele(fromndp->ni_dvp);
15729ec7b004SRick Macklem 		vrele(fvp);
15739ec7b004SRick Macklem 		error = ndstat;
15749ec7b004SRick Macklem 		goto out1;
15759ec7b004SRick Macklem 	}
15769ec7b004SRick Macklem 	tdvp = tondp->ni_dvp;
15779ec7b004SRick Macklem 	tvp = tondp->ni_vp;
15789ec7b004SRick Macklem 	if (tvp != NULL) {
15799ec7b004SRick Macklem 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
15809ec7b004SRick Macklem 			error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST;
15819ec7b004SRick Macklem 			goto out;
15829ec7b004SRick Macklem 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
15839ec7b004SRick Macklem 			error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST;
15849ec7b004SRick Macklem 			goto out;
15859ec7b004SRick Macklem 		}
15869ec7b004SRick Macklem 		if (tvp->v_type == VDIR && tvp->v_mountedhere) {
15879ec7b004SRick Macklem 			error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
15889ec7b004SRick Macklem 			goto out;
15899ec7b004SRick Macklem 		}
15909ec7b004SRick Macklem 
15919ec7b004SRick Macklem 		/*
15929ec7b004SRick Macklem 		 * A rename to '.' or '..' results in a prematurely
15939ec7b004SRick Macklem 		 * unlocked vnode on FreeBSD5, so I'm just going to fail that
15949ec7b004SRick Macklem 		 * here.
15959ec7b004SRick Macklem 		 */
15969ec7b004SRick Macklem 		if ((tondp->ni_cnd.cn_namelen == 1 &&
15979ec7b004SRick Macklem 		     tondp->ni_cnd.cn_nameptr[0] == '.') ||
15989ec7b004SRick Macklem 		    (tondp->ni_cnd.cn_namelen == 2 &&
15999ec7b004SRick Macklem 		     tondp->ni_cnd.cn_nameptr[0] == '.' &&
16009ec7b004SRick Macklem 		     tondp->ni_cnd.cn_nameptr[1] == '.')) {
16019ec7b004SRick Macklem 			error = EINVAL;
16029ec7b004SRick Macklem 			goto out;
16039ec7b004SRick Macklem 		}
16049ec7b004SRick Macklem 	}
16059ec7b004SRick Macklem 	if (fvp->v_type == VDIR && fvp->v_mountedhere) {
16069ec7b004SRick Macklem 		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
16079ec7b004SRick Macklem 		goto out;
16089ec7b004SRick Macklem 	}
16099ec7b004SRick Macklem 	if (fvp->v_mount != tdvp->v_mount) {
16109ec7b004SRick Macklem 		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
16119ec7b004SRick Macklem 		goto out;
16129ec7b004SRick Macklem 	}
16139ec7b004SRick Macklem 	if (fvp == tdvp) {
16149ec7b004SRick Macklem 		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL;
16159ec7b004SRick Macklem 		goto out;
16169ec7b004SRick Macklem 	}
16179ec7b004SRick Macklem 	if (fvp == tvp) {
16189ec7b004SRick Macklem 		/*
16199ec7b004SRick Macklem 		 * If source and destination are the same, there is nothing to
16209ec7b004SRick Macklem 		 * do. Set error to -1 to indicate this.
16219ec7b004SRick Macklem 		 */
16229ec7b004SRick Macklem 		error = -1;
16239ec7b004SRick Macklem 		goto out;
16249ec7b004SRick Macklem 	}
16259ec7b004SRick Macklem 	if (ndflag & ND_NFSV4) {
162698f234f3SZack Kirsch 		if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) {
1627c057a378SRick Macklem 			error = nfsrv_checkremove(fvp, 0, NULL,
1628c057a378SRick Macklem 			    (nfsquad_t)((u_quad_t)0), p);
1629b249ce48SMateusz Guzik 			NFSVOPUNLOCK(fvp);
1630629fa50eSRick Macklem 		} else
1631629fa50eSRick Macklem 			error = EPERM;
16329ec7b004SRick Macklem 		if (tvp && !error)
1633c057a378SRick Macklem 			error = nfsrv_checkremove(tvp, 1, NULL,
1634c057a378SRick Macklem 			    (nfsquad_t)((u_quad_t)0), p);
16359ec7b004SRick Macklem 	} else {
16369ec7b004SRick Macklem 		/*
16379ec7b004SRick Macklem 		 * For NFSv2 and NFSv3, try to get rid of the delegation, so
16389ec7b004SRick Macklem 		 * that the NFSv4 client won't be confused by the rename.
16399ec7b004SRick Macklem 		 * Since nfsd_recalldelegation() can only be called on an
16409ec7b004SRick Macklem 		 * unlocked vnode at this point and fvp is the file that will
16419ec7b004SRick Macklem 		 * still exist after the rename, just do fvp.
16429ec7b004SRick Macklem 		 */
16439ec7b004SRick Macklem 		nfsd_recalldelegation(fvp, p);
16449ec7b004SRick Macklem 	}
164590d2dfabSRick Macklem 	if (error == 0 && tvp != NULL) {
164690d2dfabSRick Macklem 		nfsrv_pnfsremovesetup(tvp, p, dsdvp, &mirrorcnt, fname, &fh);
164790d2dfabSRick Macklem 		NFSD_DEBUG(4, "nfsvno_rename: pnfsremovesetup"
164890d2dfabSRick Macklem 		    " dsdvp=%p\n", dsdvp[0]);
164990d2dfabSRick Macklem 	}
16509ec7b004SRick Macklem out:
16519ec7b004SRick Macklem 	if (!error) {
16529ec7b004SRick Macklem 		error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp,
16539ec7b004SRick Macklem 		    &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp,
16549ec7b004SRick Macklem 		    &tondp->ni_cnd);
16559ec7b004SRick Macklem 	} else {
16569ec7b004SRick Macklem 		if (tdvp == tvp)
16579ec7b004SRick Macklem 			vrele(tdvp);
16589ec7b004SRick Macklem 		else
16599ec7b004SRick Macklem 			vput(tdvp);
16609ec7b004SRick Macklem 		if (tvp)
16619ec7b004SRick Macklem 			vput(tvp);
16629ec7b004SRick Macklem 		vrele(fromndp->ni_dvp);
16639ec7b004SRick Macklem 		vrele(fvp);
16649ec7b004SRick Macklem 		if (error == -1)
16659ec7b004SRick Macklem 			error = 0;
16669ec7b004SRick Macklem 	}
166790d2dfabSRick Macklem 
166890d2dfabSRick Macklem 	/*
166990d2dfabSRick Macklem 	 * If dsdvp[0] != NULL, it was set up by nfsrv_pnfsremovesetup() and
167090d2dfabSRick Macklem 	 * if the rename succeeded, the DS file for the tvp needs to be
167190d2dfabSRick Macklem 	 * removed.
167290d2dfabSRick Macklem 	 */
167390d2dfabSRick Macklem 	if (error == 0 && dsdvp[0] != NULL) {
167490d2dfabSRick Macklem 		nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p);
167590d2dfabSRick Macklem 		NFSD_DEBUG(4, "nfsvno_rename: pnfsremove\n");
167690d2dfabSRick Macklem 	}
167790d2dfabSRick Macklem 
16789ec7b004SRick Macklem 	nfsvno_relpathbuf(tondp);
16799ec7b004SRick Macklem out1:
16809ec7b004SRick Macklem 	nfsvno_relpathbuf(fromndp);
1681a9285ae5SZack Kirsch 	NFSEXITCODE(error);
16829ec7b004SRick Macklem 	return (error);
16839ec7b004SRick Macklem }
16849ec7b004SRick Macklem 
16859ec7b004SRick Macklem /*
16869ec7b004SRick Macklem  * Link vnode op.
16879ec7b004SRick Macklem  */
16889ec7b004SRick Macklem int
nfsvno_link(struct nameidata * ndp,struct vnode * vp,nfsquad_t clientid,struct ucred * cred,struct thread * p,struct nfsexstuff * exp)16893f65000bSRick Macklem nfsvno_link(struct nameidata *ndp, struct vnode *vp, nfsquad_t clientid,
16903f65000bSRick Macklem     struct ucred *cred, struct thread *p, struct nfsexstuff *exp)
16919ec7b004SRick Macklem {
16929ec7b004SRick Macklem 	struct vnode *xp;
16939ec7b004SRick Macklem 	int error = 0;
16949ec7b004SRick Macklem 
16959ec7b004SRick Macklem 	xp = ndp->ni_vp;
16969ec7b004SRick Macklem 	if (xp != NULL) {
16979ec7b004SRick Macklem 		error = EEXIST;
16989ec7b004SRick Macklem 	} else {
16999ec7b004SRick Macklem 		xp = ndp->ni_dvp;
17009ec7b004SRick Macklem 		if (vp->v_mount != xp->v_mount)
17019ec7b004SRick Macklem 			error = EXDEV;
17029ec7b004SRick Macklem 	}
17039ec7b004SRick Macklem 	if (!error) {
170498f234f3SZack Kirsch 		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
17053f65000bSRick Macklem 		if (!VN_IS_DOOMED(vp)) {
17063f65000bSRick Macklem 			error = nfsrv_checkremove(vp, 0, NULL, clientid, p);
17073f65000bSRick Macklem 			if (error == 0)
17089ec7b004SRick Macklem 				error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd);
17093f65000bSRick Macklem 		} else
1710629fa50eSRick Macklem 			error = EPERM;
17114a21bcb2SKonstantin Belousov 		if (ndp->ni_dvp == vp) {
17129ec7b004SRick Macklem 			vrele(ndp->ni_dvp);
1713b249ce48SMateusz Guzik 			NFSVOPUNLOCK(vp);
17149ec7b004SRick Macklem 		} else {
17154a21bcb2SKonstantin Belousov 			vref(vp);
17164a21bcb2SKonstantin Belousov 			VOP_VPUT_PAIR(ndp->ni_dvp, &vp, true);
17174a21bcb2SKonstantin Belousov 		}
17184a21bcb2SKonstantin Belousov 	} else {
17199ec7b004SRick Macklem 		if (ndp->ni_dvp == ndp->ni_vp)
17209ec7b004SRick Macklem 			vrele(ndp->ni_dvp);
17219ec7b004SRick Macklem 		else
17229ec7b004SRick Macklem 			vput(ndp->ni_dvp);
17239ec7b004SRick Macklem 		if (ndp->ni_vp)
17249ec7b004SRick Macklem 			vrele(ndp->ni_vp);
17259ec7b004SRick Macklem 	}
17269ec7b004SRick Macklem 	nfsvno_relpathbuf(ndp);
1727a9285ae5SZack Kirsch 	NFSEXITCODE(error);
17289ec7b004SRick Macklem 	return (error);
17299ec7b004SRick Macklem }
17309ec7b004SRick Macklem 
17319ec7b004SRick Macklem /*
17329ec7b004SRick Macklem  * Do the fsync() appropriate for the commit.
17339ec7b004SRick Macklem  */
17349ec7b004SRick Macklem int
nfsvno_fsync(struct vnode * vp,u_int64_t off,int cnt,struct ucred * cred,struct thread * td)17359ec7b004SRick Macklem nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred,
17369ec7b004SRick Macklem     struct thread *td)
17379ec7b004SRick Macklem {
17389ec7b004SRick Macklem 	int error = 0;
17399ec7b004SRick Macklem 
174022ea9f58SRick Macklem 	/*
174122ea9f58SRick Macklem 	 * RFC 1813 3.3.21: if count is 0, a flush from offset to the end of
174222ea9f58SRick Macklem 	 * file is done.  At this time VOP_FSYNC does not accept offset and
174322ea9f58SRick Macklem 	 * byte count parameters so call VOP_FSYNC the whole file for now.
174422ea9f58SRick Macklem 	 * The same is true for NFSv4: RFC 3530 Sec. 14.2.3.
1745dda11d4aSRick Macklem 	 * File systems that do not use the buffer cache (as indicated
1746dda11d4aSRick Macklem 	 * by MNTK_USES_BCACHE not being set) must use VOP_FSYNC().
174722ea9f58SRick Macklem 	 */
1748dda11d4aSRick Macklem 	if (cnt == 0 || cnt > MAX_COMMIT_COUNT ||
1749dda11d4aSRick Macklem 	    (vp->v_mount->mnt_kern_flag & MNTK_USES_BCACHE) == 0) {
17509ec7b004SRick Macklem 		/*
17519ec7b004SRick Macklem 		 * Give up and do the whole thing
17529ec7b004SRick Macklem 		 */
1753b068bb09SKonstantin Belousov 		vnode_pager_clean_sync(vp);
17549ec7b004SRick Macklem 		error = VOP_FSYNC(vp, MNT_WAIT, td);
17559ec7b004SRick Macklem 	} else {
17569ec7b004SRick Macklem 		/*
17579ec7b004SRick Macklem 		 * Locate and synchronously write any buffers that fall
17589ec7b004SRick Macklem 		 * into the requested range.  Note:  we are assuming that
17599ec7b004SRick Macklem 		 * f_iosize is a power of 2.
17609ec7b004SRick Macklem 		 */
17619ec7b004SRick Macklem 		int iosize = vp->v_mount->mnt_stat.f_iosize;
17629ec7b004SRick Macklem 		int iomask = iosize - 1;
17639ec7b004SRick Macklem 		struct bufobj *bo;
17649ec7b004SRick Macklem 		daddr_t lblkno;
17659ec7b004SRick Macklem 
17669ec7b004SRick Macklem 		/*
1767a96c9b30SPedro F. Giffuni 		 * Align to iosize boundary, super-align to page boundary.
17689ec7b004SRick Macklem 		 */
17699ec7b004SRick Macklem 		if (off & iomask) {
17709ec7b004SRick Macklem 			cnt += off & iomask;
17719ec7b004SRick Macklem 			off &= ~(u_quad_t)iomask;
17729ec7b004SRick Macklem 		}
17739ec7b004SRick Macklem 		if (off & PAGE_MASK) {
17749ec7b004SRick Macklem 			cnt += off & PAGE_MASK;
17759ec7b004SRick Macklem 			off &= ~(u_quad_t)PAGE_MASK;
17769ec7b004SRick Macklem 		}
17779ec7b004SRick Macklem 		lblkno = off / iosize;
17789ec7b004SRick Macklem 
177967d0e293SJeff Roberson 		if (vp->v_object && vm_object_mightbedirty(vp->v_object)) {
178089f6b863SAttilio Rao 			VM_OBJECT_WLOCK(vp->v_object);
178117f3095dSAlan Cox 			vm_object_page_clean(vp->v_object, off, off + cnt,
178217f3095dSAlan Cox 			    OBJPC_SYNC);
178389f6b863SAttilio Rao 			VM_OBJECT_WUNLOCK(vp->v_object);
17849ec7b004SRick Macklem 		}
17859ec7b004SRick Macklem 
17869ec7b004SRick Macklem 		bo = &vp->v_bufobj;
17879ec7b004SRick Macklem 		BO_LOCK(bo);
17889ec7b004SRick Macklem 		while (cnt > 0) {
17899ec7b004SRick Macklem 			struct buf *bp;
17909ec7b004SRick Macklem 
17919ec7b004SRick Macklem 			/*
17929ec7b004SRick Macklem 			 * If we have a buffer and it is marked B_DELWRI we
17939ec7b004SRick Macklem 			 * have to lock and write it.  Otherwise the prior
17949ec7b004SRick Macklem 			 * write is assumed to have already been committed.
17959ec7b004SRick Macklem 			 *
17969ec7b004SRick Macklem 			 * gbincore() can return invalid buffers now so we
17979ec7b004SRick Macklem 			 * have to check that bit as well (though B_DELWRI
17989ec7b004SRick Macklem 			 * should not be set if B_INVAL is set there could be
17999ec7b004SRick Macklem 			 * a race here since we haven't locked the buffer).
18009ec7b004SRick Macklem 			 */
18019ec7b004SRick Macklem 			if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) {
18029ec7b004SRick Macklem 				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
180322a72260SJeff Roberson 				    LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) {
18049ec7b004SRick Macklem 					BO_LOCK(bo);
18059ec7b004SRick Macklem 					continue; /* retry */
18069ec7b004SRick Macklem 				}
18079ec7b004SRick Macklem 			    	if ((bp->b_flags & (B_DELWRI|B_INVAL)) ==
18089ec7b004SRick Macklem 				    B_DELWRI) {
18099ec7b004SRick Macklem 					bremfree(bp);
18109ec7b004SRick Macklem 					bp->b_flags &= ~B_ASYNC;
18119ec7b004SRick Macklem 					bwrite(bp);
18129ec7b004SRick Macklem 					++nfs_commit_miss;
18139ec7b004SRick Macklem 				} else
18149ec7b004SRick Macklem 					BUF_UNLOCK(bp);
18159ec7b004SRick Macklem 				BO_LOCK(bo);
18169ec7b004SRick Macklem 			}
18179ec7b004SRick Macklem 			++nfs_commit_blks;
18189ec7b004SRick Macklem 			if (cnt < iosize)
18199ec7b004SRick Macklem 				break;
18209ec7b004SRick Macklem 			cnt -= iosize;
18219ec7b004SRick Macklem 			++lblkno;
18229ec7b004SRick Macklem 		}
18239ec7b004SRick Macklem 		BO_UNLOCK(bo);
18249ec7b004SRick Macklem 	}
1825a9285ae5SZack Kirsch 	NFSEXITCODE(error);
18269ec7b004SRick Macklem 	return (error);
18279ec7b004SRick Macklem }
18289ec7b004SRick Macklem 
18299ec7b004SRick Macklem /*
18309ec7b004SRick Macklem  * Statfs vnode op.
18319ec7b004SRick Macklem  */
18329ec7b004SRick Macklem int
nfsvno_statfs(struct vnode * vp,struct statfs * sf)1833dfd233edSAttilio Rao nfsvno_statfs(struct vnode *vp, struct statfs *sf)
18349ec7b004SRick Macklem {
183590d2dfabSRick Macklem 	struct statfs *tsf;
183678e4b1f8SRick Macklem 	int error;
18379ec7b004SRick Macklem 
183890d2dfabSRick Macklem 	tsf = NULL;
183990d2dfabSRick Macklem 	if (nfsrv_devidcnt > 0) {
184090d2dfabSRick Macklem 		/* For a pNFS service, get the DS numbers. */
184190d2dfabSRick Macklem 		tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK | M_ZERO);
1842910ccc77SRick Macklem 		error = nfsrv_pnfsstatfs(tsf, vp->v_mount);
184390d2dfabSRick Macklem 		if (error != 0) {
184490d2dfabSRick Macklem 			free(tsf, M_TEMP);
184590d2dfabSRick Macklem 			tsf = NULL;
184690d2dfabSRick Macklem 		}
184790d2dfabSRick Macklem 	}
184878e4b1f8SRick Macklem 	error = VFS_STATFS(vp->v_mount, sf);
184978e4b1f8SRick Macklem 	if (error == 0) {
185090d2dfabSRick Macklem 		if (tsf != NULL) {
185190d2dfabSRick Macklem 			sf->f_blocks = tsf->f_blocks;
185290d2dfabSRick Macklem 			sf->f_bavail = tsf->f_bavail;
185390d2dfabSRick Macklem 			sf->f_bfree = tsf->f_bfree;
185490d2dfabSRick Macklem 			sf->f_bsize = tsf->f_bsize;
185590d2dfabSRick Macklem 		}
185678e4b1f8SRick Macklem 		/*
185778e4b1f8SRick Macklem 		 * Since NFS handles these values as unsigned on the
185878e4b1f8SRick Macklem 		 * wire, there is no way to represent negative values,
185978e4b1f8SRick Macklem 		 * so set them to 0. Without this, they will appear
186078e4b1f8SRick Macklem 		 * to be very large positive values for clients like
186178e4b1f8SRick Macklem 		 * Solaris10.
186278e4b1f8SRick Macklem 		 */
186378e4b1f8SRick Macklem 		if (sf->f_bavail < 0)
186478e4b1f8SRick Macklem 			sf->f_bavail = 0;
186578e4b1f8SRick Macklem 		if (sf->f_ffree < 0)
186678e4b1f8SRick Macklem 			sf->f_ffree = 0;
186778e4b1f8SRick Macklem 	}
186890d2dfabSRick Macklem 	free(tsf, M_TEMP);
1869a9285ae5SZack Kirsch 	NFSEXITCODE(error);
187078e4b1f8SRick Macklem 	return (error);
18719ec7b004SRick Macklem }
18729ec7b004SRick Macklem 
18739ec7b004SRick Macklem /*
18749ec7b004SRick Macklem  * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but
18759ec7b004SRick Macklem  * must handle nfsrv_opencheck() calls after any other access checks.
18769ec7b004SRick Macklem  */
18779ec7b004SRick Macklem void
nfsvno_open(struct nfsrv_descript * nd,struct nameidata * ndp,nfsquad_t clientid,nfsv4stateid_t * stateidp,struct nfsstate * stp,int * exclusive_flagp,struct nfsvattr * nvap,int32_t * cverf,int create,NFSACL_T * aclp,nfsattrbit_t * attrbitp,struct ucred * cred,bool done_namei,struct nfsexstuff * exp,struct vnode ** vpp)18789ec7b004SRick Macklem nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp,
18799ec7b004SRick Macklem     nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp,
1880086f6e0cSRick Macklem     int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create,
1881dcfa3ee4SRick Macklem     NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred, bool done_namei,
18829ec7b004SRick Macklem     struct nfsexstuff *exp, struct vnode **vpp)
18839ec7b004SRick Macklem {
18849ec7b004SRick Macklem 	struct vnode *vp = NULL;
18859ec7b004SRick Macklem 	u_quad_t tempsize;
18869ec7b004SRick Macklem 	struct nfsexstuff nes;
188701c27978SEdward Tomasz Napierala 	struct thread *p = curthread;
18883e230e0cSRick Macklem 	uint32_t oldrepstat;
18899ec7b004SRick Macklem 
18903e230e0cSRick Macklem 	if (ndp->ni_vp == NULL) {
18913e230e0cSRick Macklem 		/*
18923e230e0cSRick Macklem 		 * If nfsrv_opencheck() sets nd_repstat, done_namei needs to be
18933e230e0cSRick Macklem 		 * set true, since cleanup after nfsvno_namei() is needed.
18943e230e0cSRick Macklem 		 */
18953e230e0cSRick Macklem 		oldrepstat = nd->nd_repstat;
18969ec7b004SRick Macklem 		nd->nd_repstat = nfsrv_opencheck(clientid,
18979ec7b004SRick Macklem 		    stateidp, stp, NULL, nd, p, nd->nd_repstat);
18983e230e0cSRick Macklem 		if (nd->nd_repstat != 0 && oldrepstat == 0)
18993e230e0cSRick Macklem 			done_namei = true;
19003e230e0cSRick Macklem 	}
19019ec7b004SRick Macklem 	if (!nd->nd_repstat) {
19029ec7b004SRick Macklem 		if (ndp->ni_vp == NULL) {
19039ec7b004SRick Macklem 			nd->nd_repstat = VOP_CREATE(ndp->ni_dvp,
19049ec7b004SRick Macklem 			    &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
190590d2dfabSRick Macklem 			/* For a pNFS server, create the data file on a DS. */
190690d2dfabSRick Macklem 			if (nd->nd_repstat == 0) {
190790d2dfabSRick Macklem 				/*
190890d2dfabSRick Macklem 				 * Create a data file on a DS for a pNFS server.
190990d2dfabSRick Macklem 				 * This function just returns if not
191090d2dfabSRick Macklem 				 * running a pNFS DS or the creation fails.
191190d2dfabSRick Macklem 				 */
191290d2dfabSRick Macklem 				nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr,
191390d2dfabSRick Macklem 				    cred, p);
191490d2dfabSRick Macklem 			}
19154a21bcb2SKonstantin Belousov 			VOP_VPUT_PAIR(ndp->ni_dvp, nd->nd_repstat == 0 ?
19164a21bcb2SKonstantin Belousov 			    &ndp->ni_vp : NULL, false);
19179ec7b004SRick Macklem 			nfsvno_relpathbuf(ndp);
19189ec7b004SRick Macklem 			if (!nd->nd_repstat) {
19199ec7b004SRick Macklem 				if (*exclusive_flagp) {
19209ec7b004SRick Macklem 					*exclusive_flagp = 0;
19219ec7b004SRick Macklem 					NFSVNO_ATTRINIT(nvap);
1922086f6e0cSRick Macklem 					nvap->na_atime.tv_sec = cverf[0];
1923086f6e0cSRick Macklem 					nvap->na_atime.tv_nsec = cverf[1];
19249ec7b004SRick Macklem 					nd->nd_repstat = VOP_SETATTR(ndp->ni_vp,
19259ec7b004SRick Macklem 					    &nvap->na_vattr, cred);
192613c581fcSRick Macklem 					if (nd->nd_repstat != 0) {
192713c581fcSRick Macklem 						vput(ndp->ni_vp);
192813c581fcSRick Macklem 						ndp->ni_vp = NULL;
192913c581fcSRick Macklem 						nd->nd_repstat = NFSERR_NOTSUPP;
1930dedec68cSRick Macklem 					} else
1931dedec68cSRick Macklem 						NFSSETBIT_ATTRBIT(attrbitp,
1932dedec68cSRick Macklem 						    NFSATTRBIT_TIMEACCESS);
19339ec7b004SRick Macklem 				} else {
19349ec7b004SRick Macklem 					nfsrv_fixattr(nd, ndp->ni_vp, nvap,
19359ec7b004SRick Macklem 					    aclp, p, attrbitp, exp);
19369ec7b004SRick Macklem 				}
19379ec7b004SRick Macklem 			}
19389ec7b004SRick Macklem 			vp = ndp->ni_vp;
19399ec7b004SRick Macklem 		} else {
19409ec7b004SRick Macklem 			nfsvno_relpathbuf(ndp);
19419ec7b004SRick Macklem 			vp = ndp->ni_vp;
19429ec7b004SRick Macklem 			if (create == NFSV4OPEN_CREATE) {
19439ec7b004SRick Macklem 				if (ndp->ni_dvp == vp)
19449ec7b004SRick Macklem 					vrele(ndp->ni_dvp);
19459ec7b004SRick Macklem 				else
19469ec7b004SRick Macklem 					vput(ndp->ni_dvp);
19479ec7b004SRick Macklem 			}
19489ec7b004SRick Macklem 			if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) {
19499ec7b004SRick Macklem 				if (ndp->ni_cnd.cn_flags & RDONLY)
19509ec7b004SRick Macklem 					NFSVNO_SETEXRDONLY(&nes);
19519ec7b004SRick Macklem 				else
19529ec7b004SRick Macklem 					NFSVNO_EXINIT(&nes);
19539ec7b004SRick Macklem 				nd->nd_repstat = nfsvno_accchk(vp,
19548da45f2cSRick Macklem 				    VWRITE, cred, &nes, p,
19558da45f2cSRick Macklem 				    NFSACCCHK_NOOVERRIDE,
19568da45f2cSRick Macklem 				    NFSACCCHK_VPISLOCKED, NULL);
19579ec7b004SRick Macklem 				nd->nd_repstat = nfsrv_opencheck(clientid,
19589ec7b004SRick Macklem 				    stateidp, stp, vp, nd, p, nd->nd_repstat);
19599ec7b004SRick Macklem 				if (!nd->nd_repstat) {
19609ec7b004SRick Macklem 					tempsize = nvap->na_size;
19619ec7b004SRick Macklem 					NFSVNO_ATTRINIT(nvap);
19629ec7b004SRick Macklem 					nvap->na_size = tempsize;
1963f32bf50dSRick Macklem 					nd->nd_repstat = nfsvno_setattr(vp,
1964f32bf50dSRick Macklem 					    nvap, cred, p, exp);
19659ec7b004SRick Macklem 				}
19669ec7b004SRick Macklem 			} else if (vp->v_type == VREG) {
19679ec7b004SRick Macklem 				nd->nd_repstat = nfsrv_opencheck(clientid,
19689ec7b004SRick Macklem 				    stateidp, stp, vp, nd, p, nd->nd_repstat);
19699ec7b004SRick Macklem 			}
19709ec7b004SRick Macklem 		}
1971ded5f295SRick Macklem 	} else if (done_namei) {
19725fd0916cSRick Macklem 		KASSERT(create == NFSV4OPEN_CREATE,
19735fd0916cSRick Macklem 		    ("nfsvno_open: not create"));
1974ded5f295SRick Macklem 		/*
1975ded5f295SRick Macklem 		 * done_namei is set when nfsvno_namei() has completed
1976ded5f295SRick Macklem 		 * successfully, but a subsequent error was set in
1977ded5f295SRick Macklem 		 * nd_repstat.  As such, cleanup of the nfsvno_namei()
1978ded5f295SRick Macklem 		 * results is required.
1979ded5f295SRick Macklem 		 */
19809ec7b004SRick Macklem 		nfsvno_relpathbuf(ndp);
19819ec7b004SRick Macklem 		if (ndp->ni_dvp == ndp->ni_vp)
19829ec7b004SRick Macklem 			vrele(ndp->ni_dvp);
19839ec7b004SRick Macklem 		else
19849ec7b004SRick Macklem 			vput(ndp->ni_dvp);
19859ec7b004SRick Macklem 		if (ndp->ni_vp)
19869ec7b004SRick Macklem 			vput(ndp->ni_vp);
19879ec7b004SRick Macklem 	}
19889ec7b004SRick Macklem 	*vpp = vp;
1989a9285ae5SZack Kirsch 
1990a9285ae5SZack Kirsch 	NFSEXITCODE2(0, nd);
19919ec7b004SRick Macklem }
19929ec7b004SRick Macklem 
19939ec7b004SRick Macklem /*
19949ec7b004SRick Macklem  * Updates the file rev and sets the mtime and ctime
19959ec7b004SRick Macklem  * to the current clock time, returning the va_filerev and va_Xtime
19969ec7b004SRick Macklem  * values.
1997abd80ddbSMateusz Guzik  * Return ESTALE to indicate the vnode is VIRF_DOOMED.
19989ec7b004SRick Macklem  */
199943a213bbSRick Macklem int
nfsvno_updfilerev(struct vnode * vp,struct nfsvattr * nvap,struct nfsrv_descript * nd,struct thread * p)20009ec7b004SRick Macklem nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap,
200190d2dfabSRick Macklem     struct nfsrv_descript *nd, struct thread *p)
20029ec7b004SRick Macklem {
20039ec7b004SRick Macklem 	struct vattr va;
20049ec7b004SRick Macklem 
20059ec7b004SRick Macklem 	VATTR_NULL(&va);
2006d177f14dSJohn Baldwin 	vfs_timestamp(&va.va_mtime);
200743a213bbSRick Macklem 	if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) {
200843a213bbSRick Macklem 		NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY);
2009abd80ddbSMateusz Guzik 		if (VN_IS_DOOMED(vp))
201043a213bbSRick Macklem 			return (ESTALE);
201143a213bbSRick Macklem 	}
201290d2dfabSRick Macklem 	(void) VOP_SETATTR(vp, &va, nd->nd_cred);
201390d2dfabSRick Macklem 	(void) nfsvno_getattr(vp, nvap, nd, p, 1, NULL);
201443a213bbSRick Macklem 	return (0);
20159ec7b004SRick Macklem }
20169ec7b004SRick Macklem 
20179ec7b004SRick Macklem /*
20189ec7b004SRick Macklem  * Glue routine to nfsv4_fillattr().
20199ec7b004SRick Macklem  */
20209ec7b004SRick Macklem int
nfsvno_fillattr(struct nfsrv_descript * nd,struct mount * mp,struct vnode * vp,struct nfsvattr * nvap,fhandle_t * fhp,int rderror,nfsattrbit_t * attrbitp,struct ucred * cred,struct thread * p,int isdgram,int reterr,int supports_nfsv4acls,int at_root,uint64_t mounted_on_fileno)202107c0c166SRick Macklem nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp,
20229ec7b004SRick Macklem     struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp,
2023a09001a8SRick Macklem     struct ucred *cred, struct thread *p, int isdgram, int reterr,
2024a09001a8SRick Macklem     int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno)
20259ec7b004SRick Macklem {
202690d2dfabSRick Macklem 	struct statfs *sf;
20279ec7b004SRick Macklem 	int error;
20289ec7b004SRick Macklem 
202990d2dfabSRick Macklem 	sf = NULL;
203090d2dfabSRick Macklem 	if (nfsrv_devidcnt > 0 &&
203190d2dfabSRick Macklem 	    (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEAVAIL) ||
203290d2dfabSRick Macklem 	     NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEFREE) ||
203390d2dfabSRick Macklem 	     NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACETOTAL))) {
203490d2dfabSRick Macklem 		sf = malloc(sizeof(*sf), M_TEMP, M_WAITOK | M_ZERO);
2035910ccc77SRick Macklem 		error = nfsrv_pnfsstatfs(sf, mp);
203690d2dfabSRick Macklem 		if (error != 0) {
203790d2dfabSRick Macklem 			free(sf, M_TEMP);
203890d2dfabSRick Macklem 			sf = NULL;
203990d2dfabSRick Macklem 		}
204090d2dfabSRick Macklem 	}
204107c0c166SRick Macklem 	error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror,
2042a09001a8SRick Macklem 	    attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root,
204390d2dfabSRick Macklem 	    mounted_on_fileno, sf);
204490d2dfabSRick Macklem 	free(sf, M_TEMP);
2045a9285ae5SZack Kirsch 	NFSEXITCODE2(0, nd);
20469ec7b004SRick Macklem 	return (error);
20479ec7b004SRick Macklem }
20489ec7b004SRick Macklem 
20499ec7b004SRick Macklem /* Since the Readdir vnode ops vary, put the entire functions in here. */
20509ec7b004SRick Macklem /*
20519ec7b004SRick Macklem  * nfs readdir service
20529ec7b004SRick Macklem  * - mallocs what it thinks is enough to read
20539ec7b004SRick Macklem  *	count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR
205454bde1faSRick Macklem  * - calls VOP_READDIR()
20559ec7b004SRick Macklem  * - loops around building the reply
20569ec7b004SRick Macklem  *	if the output generated exceeds count break out of loop
20579ec7b004SRick Macklem  *	The NFSM_CLGET macro is used here so that the reply will be packed
20589ec7b004SRick Macklem  *	tightly in mbuf clusters.
20599ec7b004SRick Macklem  * - it trims out records with d_fileno == 0
20609ec7b004SRick Macklem  *	this doesn't matter for Unix clients, but they might confuse clients
20619ec7b004SRick Macklem  *	for other os'.
20629ec7b004SRick Macklem  * - it trims out records with d_type == DT_WHT
20639ec7b004SRick Macklem  *	these cannot be seen through NFS (unless we extend the protocol)
20649ec7b004SRick Macklem  *     The alternate call nfsrvd_readdirplus() does lookups as well.
20659ec7b004SRick Macklem  * PS: The NFS protocol spec. does not clarify what the "count" byte
20669ec7b004SRick Macklem  *	argument is a count of.. just name strings and file id's or the
20679ec7b004SRick Macklem  *	entire reply rpc or ...
20689ec7b004SRick Macklem  *	I tried just file name and id sizes and it confused the Sun client,
20699ec7b004SRick Macklem  *	so I am using the full rpc size now. The "paranoia.." comment refers
20709ec7b004SRick Macklem  *	to including the status longwords that are not a part of the dir.
20719ec7b004SRick Macklem  *	"entry" structures, but are in the rpc.
20729ec7b004SRick Macklem  */
20739ec7b004SRick Macklem int
nfsrvd_readdir(struct nfsrv_descript * nd,int isdgram,struct vnode * vp,struct nfsexstuff * exp)20749ec7b004SRick Macklem nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram,
2075af444b18SEdward Tomasz Napierala     struct vnode *vp, struct nfsexstuff *exp)
20769ec7b004SRick Macklem {
20779ec7b004SRick Macklem 	struct dirent *dp;
20789ec7b004SRick Macklem 	u_int32_t *tl;
20799ec7b004SRick Macklem 	int dirlen;
20809ec7b004SRick Macklem 	char *cpos, *cend, *rbuf;
20819ec7b004SRick Macklem 	struct nfsvattr at;
20829ec7b004SRick Macklem 	int nlen, error = 0, getret = 1;
20839ec7b004SRick Macklem 	int siz, cnt, fullsiz, eofflag, ncookies;
20848014c971SRick Macklem 	u_int64_t off, toff, verf __unused;
2085b214fcceSAlan Somers 	uint64_t *cookies = NULL, *cookiep;
20869ec7b004SRick Macklem 	struct uio io;
20879ec7b004SRick Macklem 	struct iovec iv;
20884fc0f18cSBryan Drewery 	int is_ufs;
2089af444b18SEdward Tomasz Napierala 	struct thread *p = curthread;
20909ec7b004SRick Macklem 
20919ec7b004SRick Macklem 	if (nd->nd_repstat) {
20929ec7b004SRick Macklem 		nfsrv_postopattr(nd, getret, &at);
2093a9285ae5SZack Kirsch 		goto out;
20949ec7b004SRick Macklem 	}
20959ec7b004SRick Macklem 	if (nd->nd_flag & ND_NFSV2) {
20969ec7b004SRick Macklem 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
20979ec7b004SRick Macklem 		off = fxdr_unsigned(u_quad_t, *tl++);
20989ec7b004SRick Macklem 	} else {
20999ec7b004SRick Macklem 		NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
21009ec7b004SRick Macklem 		off = fxdr_hyper(tl);
21019ec7b004SRick Macklem 		tl += 2;
21029ec7b004SRick Macklem 		verf = fxdr_hyper(tl);
21039ec7b004SRick Macklem 		tl += 2;
21049ec7b004SRick Macklem 	}
21059ec7b004SRick Macklem 	toff = off;
21069ec7b004SRick Macklem 	cnt = fxdr_unsigned(int, *tl);
210754bde1faSRick Macklem 	if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0)
21089ec7b004SRick Macklem 		cnt = NFS_SRVMAXDATA(nd);
21099ec7b004SRick Macklem 	siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
21109ec7b004SRick Macklem 	fullsiz = siz;
21119ec7b004SRick Macklem 	if (nd->nd_flag & ND_NFSV3) {
211290d2dfabSRick Macklem 		nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1,
211390d2dfabSRick Macklem 		    NULL);
21149ec7b004SRick Macklem #if 0
21159ec7b004SRick Macklem 		/*
21169ec7b004SRick Macklem 		 * va_filerev is not sufficient as a cookie verifier,
21179ec7b004SRick Macklem 		 * since it is not supposed to change when entries are
21189ec7b004SRick Macklem 		 * removed/added unless that offset cookies returned to
21199ec7b004SRick Macklem 		 * the client are no longer valid.
21209ec7b004SRick Macklem 		 */
21219ec7b004SRick Macklem 		if (!nd->nd_repstat && toff && verf != at.na_filerev)
21229ec7b004SRick Macklem 			nd->nd_repstat = NFSERR_BAD_COOKIE;
21239ec7b004SRick Macklem #endif
21249ec7b004SRick Macklem 	}
2125c93c82f4SDag-Erling Smørgrav 	if (!nd->nd_repstat && vp->v_type != VDIR)
2126c93c82f4SDag-Erling Smørgrav 		nd->nd_repstat = NFSERR_NOTDIR;
212754bde1faSRick Macklem 	if (nd->nd_repstat == 0 && cnt == 0) {
212854bde1faSRick Macklem 		if (nd->nd_flag & ND_NFSV2)
212954bde1faSRick Macklem 			/* NFSv2 does not have NFSERR_TOOSMALL */
213054bde1faSRick Macklem 			nd->nd_repstat = EPERM;
213154bde1faSRick Macklem 		else
213254bde1faSRick Macklem 			nd->nd_repstat = NFSERR_TOOSMALL;
213354bde1faSRick Macklem 	}
21349ec7b004SRick Macklem 	if (!nd->nd_repstat)
21358da45f2cSRick Macklem 		nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
21369ec7b004SRick Macklem 		    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
21378da45f2cSRick Macklem 		    NFSACCCHK_VPISLOCKED, NULL);
21389ec7b004SRick Macklem 	if (nd->nd_repstat) {
21399ec7b004SRick Macklem 		vput(vp);
21409ec7b004SRick Macklem 		if (nd->nd_flag & ND_NFSV3)
21419ec7b004SRick Macklem 			nfsrv_postopattr(nd, getret, &at);
2142a9285ae5SZack Kirsch 		goto out;
21439ec7b004SRick Macklem 	}
21444fc0f18cSBryan Drewery 	is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0;
2145222daa42SConrad Meyer 	rbuf = malloc(siz, M_TEMP, M_WAITOK);
21469ec7b004SRick Macklem again:
21479ec7b004SRick Macklem 	eofflag = 0;
21489ec7b004SRick Macklem 	if (cookies) {
2149222daa42SConrad Meyer 		free(cookies, M_TEMP);
21509ec7b004SRick Macklem 		cookies = NULL;
21519ec7b004SRick Macklem 	}
21529ec7b004SRick Macklem 
21539ec7b004SRick Macklem 	iv.iov_base = rbuf;
21549ec7b004SRick Macklem 	iv.iov_len = siz;
21559ec7b004SRick Macklem 	io.uio_iov = &iv;
21569ec7b004SRick Macklem 	io.uio_iovcnt = 1;
21579ec7b004SRick Macklem 	io.uio_offset = (off_t)off;
21589ec7b004SRick Macklem 	io.uio_resid = siz;
21599ec7b004SRick Macklem 	io.uio_segflg = UIO_SYSSPACE;
21609ec7b004SRick Macklem 	io.uio_rw = UIO_READ;
21619ec7b004SRick Macklem 	io.uio_td = NULL;
21629ec7b004SRick Macklem 	nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
21639ec7b004SRick Macklem 	    &cookies);
21649ec7b004SRick Macklem 	off = (u_int64_t)io.uio_offset;
21659ec7b004SRick Macklem 	if (io.uio_resid)
21669ec7b004SRick Macklem 		siz -= io.uio_resid;
21679ec7b004SRick Macklem 
21689ec7b004SRick Macklem 	if (!cookies && !nd->nd_repstat)
21699ec7b004SRick Macklem 		nd->nd_repstat = NFSERR_PERM;
21709ec7b004SRick Macklem 	if (nd->nd_flag & ND_NFSV3) {
217190d2dfabSRick Macklem 		getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL);
21729ec7b004SRick Macklem 		if (!nd->nd_repstat)
21739ec7b004SRick Macklem 			nd->nd_repstat = getret;
21749ec7b004SRick Macklem 	}
21759ec7b004SRick Macklem 
21769ec7b004SRick Macklem 	/*
21779ec7b004SRick Macklem 	 * Handles the failed cases. nd->nd_repstat == 0 past here.
21789ec7b004SRick Macklem 	 */
21799ec7b004SRick Macklem 	if (nd->nd_repstat) {
2180a852f40bSRick Macklem 		vput(vp);
2181222daa42SConrad Meyer 		free(rbuf, M_TEMP);
21829ec7b004SRick Macklem 		if (cookies)
2183222daa42SConrad Meyer 			free(cookies, M_TEMP);
21849ec7b004SRick Macklem 		if (nd->nd_flag & ND_NFSV3)
21859ec7b004SRick Macklem 			nfsrv_postopattr(nd, getret, &at);
2186a9285ae5SZack Kirsch 		goto out;
21879ec7b004SRick Macklem 	}
21889ec7b004SRick Macklem 	/*
21899ec7b004SRick Macklem 	 * If nothing read, return eof
21909ec7b004SRick Macklem 	 * rpc reply
21919ec7b004SRick Macklem 	 */
21929ec7b004SRick Macklem 	if (siz == 0) {
2193a852f40bSRick Macklem 		vput(vp);
21949ec7b004SRick Macklem 		if (nd->nd_flag & ND_NFSV2) {
21959ec7b004SRick Macklem 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
21969ec7b004SRick Macklem 		} else {
21979ec7b004SRick Macklem 			nfsrv_postopattr(nd, getret, &at);
21989ec7b004SRick Macklem 			NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
21999ec7b004SRick Macklem 			txdr_hyper(at.na_filerev, tl);
22009ec7b004SRick Macklem 			tl += 2;
22019ec7b004SRick Macklem 		}
22029ec7b004SRick Macklem 		*tl++ = newnfs_false;
22039ec7b004SRick Macklem 		*tl = newnfs_true;
2204222daa42SConrad Meyer 		free(rbuf, M_TEMP);
2205222daa42SConrad Meyer 		free(cookies, M_TEMP);
2206a9285ae5SZack Kirsch 		goto out;
22079ec7b004SRick Macklem 	}
22089ec7b004SRick Macklem 
22099ec7b004SRick Macklem 	/*
22109ec7b004SRick Macklem 	 * Check for degenerate cases of nothing useful read.
22119ec7b004SRick Macklem 	 * If so go try again
22129ec7b004SRick Macklem 	 */
22139ec7b004SRick Macklem 	cpos = rbuf;
22149ec7b004SRick Macklem 	cend = rbuf + siz;
22159ec7b004SRick Macklem 	dp = (struct dirent *)cpos;
22169ec7b004SRick Macklem 	cookiep = cookies;
22179ec7b004SRick Macklem 
22189ec7b004SRick Macklem 	/*
22199ec7b004SRick Macklem 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
22209ec7b004SRick Macklem 	 * directory offset up to a block boundary, so it is necessary to
22219ec7b004SRick Macklem 	 * skip over the records that precede the requested offset. This
22229ec7b004SRick Macklem 	 * requires the assumption that file offset cookies monotonically
22239ec7b004SRick Macklem 	 * increase.
22249ec7b004SRick Macklem 	 */
22259ec7b004SRick Macklem 	while (cpos < cend && ncookies > 0 &&
22269ec7b004SRick Macklem 	    (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
22274fc0f18cSBryan Drewery 	     (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff))) {
22289ec7b004SRick Macklem 		cpos += dp->d_reclen;
22299ec7b004SRick Macklem 		dp = (struct dirent *)cpos;
22309ec7b004SRick Macklem 		cookiep++;
22319ec7b004SRick Macklem 		ncookies--;
22329ec7b004SRick Macklem 	}
22339ec7b004SRick Macklem 	if (cpos >= cend || ncookies == 0) {
22349ec7b004SRick Macklem 		siz = fullsiz;
22359ec7b004SRick Macklem 		toff = off;
22369ec7b004SRick Macklem 		goto again;
22379ec7b004SRick Macklem 	}
2238a852f40bSRick Macklem 	vput(vp);
22399ec7b004SRick Macklem 
22409ec7b004SRick Macklem 	/*
2241ea83d07eSRick Macklem 	 * If cnt > MCLBYTES and the reply will not be saved, use
2242ea83d07eSRick Macklem 	 * ext_pgs mbufs for TLS.
2243ea83d07eSRick Macklem 	 * For NFSv4.0, we do not know for sure if the reply will
2244ea83d07eSRick Macklem 	 * be saved, so do not use ext_pgs mbufs for NFSv4.0.
2245ea83d07eSRick Macklem 	 */
2246ea83d07eSRick Macklem 	if (cnt > MCLBYTES && siz > MCLBYTES &&
2247ea83d07eSRick Macklem 	    (nd->nd_flag & (ND_TLS | ND_EXTPG | ND_SAVEREPLY)) == ND_TLS &&
2248ea83d07eSRick Macklem 	    (nd->nd_flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4)
2249ea83d07eSRick Macklem 		nd->nd_flag |= ND_EXTPG;
2250ea83d07eSRick Macklem 
2251ea83d07eSRick Macklem 	/*
22529ec7b004SRick Macklem 	 * dirlen is the size of the reply, including all XDR and must
22539ec7b004SRick Macklem 	 * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate
22549ec7b004SRick Macklem 	 * if the XDR should be included in "count", but to be safe, we do.
22559ec7b004SRick Macklem 	 * (Include the two booleans at the end of the reply in dirlen now.)
22569ec7b004SRick Macklem 	 */
22579ec7b004SRick Macklem 	if (nd->nd_flag & ND_NFSV3) {
22589ec7b004SRick Macklem 		nfsrv_postopattr(nd, getret, &at);
22599ec7b004SRick Macklem 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
22609ec7b004SRick Macklem 		txdr_hyper(at.na_filerev, tl);
22619ec7b004SRick Macklem 		dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
22629ec7b004SRick Macklem 	} else {
22639ec7b004SRick Macklem 		dirlen = 2 * NFSX_UNSIGNED;
22649ec7b004SRick Macklem 	}
22659ec7b004SRick Macklem 
22669ec7b004SRick Macklem 	/* Loop through the records and build reply */
22679ec7b004SRick Macklem 	while (cpos < cend && ncookies > 0) {
22689ec7b004SRick Macklem 		nlen = dp->d_namlen;
22699ec7b004SRick Macklem 		if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
22709ec7b004SRick Macklem 			nlen <= NFS_MAXNAMLEN) {
22719ec7b004SRick Macklem 			if (nd->nd_flag & ND_NFSV3)
22729ec7b004SRick Macklem 				dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
22739ec7b004SRick Macklem 			else
22749ec7b004SRick Macklem 				dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
22759ec7b004SRick Macklem 			if (dirlen > cnt) {
22769ec7b004SRick Macklem 				eofflag = 0;
22779ec7b004SRick Macklem 				break;
22789ec7b004SRick Macklem 			}
22799ec7b004SRick Macklem 
22809ec7b004SRick Macklem 			/*
22819ec7b004SRick Macklem 			 * Build the directory record xdr from
22829ec7b004SRick Macklem 			 * the dirent entry.
22839ec7b004SRick Macklem 			 */
22849ec7b004SRick Macklem 			if (nd->nd_flag & ND_NFSV3) {
22859ec7b004SRick Macklem 				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
22869ec7b004SRick Macklem 				*tl++ = newnfs_true;
2287648a208eSRick Macklem 				txdr_hyper(dp->d_fileno, tl);
22889ec7b004SRick Macklem 			} else {
22899ec7b004SRick Macklem 				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
22909ec7b004SRick Macklem 				*tl++ = newnfs_true;
22919ec7b004SRick Macklem 				*tl = txdr_unsigned(dp->d_fileno);
2292648a208eSRick Macklem 			}
22939ec7b004SRick Macklem 			(void) nfsm_strtom(nd, dp->d_name, nlen);
22949ec7b004SRick Macklem 			if (nd->nd_flag & ND_NFSV3) {
22959ec7b004SRick Macklem 				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
229632fbc5d8SAlan Somers 				txdr_hyper(*cookiep, tl);
229732fbc5d8SAlan Somers 			} else {
22989ec7b004SRick Macklem 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
22999ec7b004SRick Macklem 				*tl = txdr_unsigned(*cookiep);
23009ec7b004SRick Macklem 			}
230132fbc5d8SAlan Somers 		}
23029ec7b004SRick Macklem 		cpos += dp->d_reclen;
23039ec7b004SRick Macklem 		dp = (struct dirent *)cpos;
23049ec7b004SRick Macklem 		cookiep++;
23059ec7b004SRick Macklem 		ncookies--;
23069ec7b004SRick Macklem 	}
23079ec7b004SRick Macklem 	if (cpos < cend)
23089ec7b004SRick Macklem 		eofflag = 0;
23099ec7b004SRick Macklem 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
23109ec7b004SRick Macklem 	*tl++ = newnfs_false;
23119ec7b004SRick Macklem 	if (eofflag)
23129ec7b004SRick Macklem 		*tl = newnfs_true;
23139ec7b004SRick Macklem 	else
23149ec7b004SRick Macklem 		*tl = newnfs_false;
2315222daa42SConrad Meyer 	free(rbuf, M_TEMP);
2316222daa42SConrad Meyer 	free(cookies, M_TEMP);
2317a9285ae5SZack Kirsch 
2318a9285ae5SZack Kirsch out:
2319a9285ae5SZack Kirsch 	NFSEXITCODE2(0, nd);
23209ec7b004SRick Macklem 	return (0);
23219ec7b004SRick Macklem nfsmout:
23229ec7b004SRick Macklem 	vput(vp);
2323a9285ae5SZack Kirsch 	NFSEXITCODE2(error, nd);
23249ec7b004SRick Macklem 	return (error);
23259ec7b004SRick Macklem }
23269ec7b004SRick Macklem 
23279ec7b004SRick Macklem /*
23289ec7b004SRick Macklem  * Readdirplus for V3 and Readdir for V4.
23299ec7b004SRick Macklem  */
23309ec7b004SRick Macklem int
nfsrvd_readdirplus(struct nfsrv_descript * nd,int isdgram,struct vnode * vp,struct nfsexstuff * exp)23319ec7b004SRick Macklem nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram,
2332af444b18SEdward Tomasz Napierala     struct vnode *vp, struct nfsexstuff *exp)
23339ec7b004SRick Macklem {
23349ec7b004SRick Macklem 	struct dirent *dp;
23359ec7b004SRick Macklem 	u_int32_t *tl;
23369ec7b004SRick Macklem 	int dirlen;
23379ec7b004SRick Macklem 	char *cpos, *cend, *rbuf;
23389ec7b004SRick Macklem 	struct vnode *nvp;
23399ec7b004SRick Macklem 	fhandle_t nfh;
23409ec7b004SRick Macklem 	struct nfsvattr nva, at, *nvap = &nva;
23419ec7b004SRick Macklem 	struct mbuf *mb0, *mb1;
23429ec7b004SRick Macklem 	struct nfsreferral *refp;
234338e3ea69SRick Macklem 	int nlen, r, error = 0, getret = 1, usevget = 1;
23449ec7b004SRick Macklem 	int siz, cnt, fullsiz, eofflag, ncookies, entrycnt;
23459ec7b004SRick Macklem 	caddr_t bpos0, bpos1;
2346638b90a1SRick Macklem 	u_int64_t off, toff, verf __unused;
2347b214fcceSAlan Somers 	uint64_t *cookies = NULL, *cookiep;
2348cd5edc7dSRick Macklem 	nfsattrbit_t attrbits, rderrbits, savbits, refbits;
23499ec7b004SRick Macklem 	struct uio io;
23509ec7b004SRick Macklem 	struct iovec iv;
235138e3ea69SRick Macklem 	struct componentname cn;
23524fc0f18cSBryan Drewery 	int at_root, is_ufs, is_zfs, needs_unbusy, supports_nfsv4acls;
235307c0c166SRick Macklem 	struct mount *mp, *new_mp;
235407c0c166SRick Macklem 	uint64_t mounted_on_fileno;
2355af444b18SEdward Tomasz Napierala 	struct thread *p = curthread;
2356ea83d07eSRick Macklem 	int bextpg0, bextpg1, bextpgsiz0, bextpgsiz1;
23579ec7b004SRick Macklem 
23589ec7b004SRick Macklem 	if (nd->nd_repstat) {
23599ec7b004SRick Macklem 		nfsrv_postopattr(nd, getret, &at);
2360a9285ae5SZack Kirsch 		goto out;
23619ec7b004SRick Macklem 	}
23629ec7b004SRick Macklem 	NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
23639ec7b004SRick Macklem 	off = fxdr_hyper(tl);
23649ec7b004SRick Macklem 	toff = off;
23659ec7b004SRick Macklem 	tl += 2;
23669ec7b004SRick Macklem 	verf = fxdr_hyper(tl);
23679ec7b004SRick Macklem 	tl += 2;
23689ec7b004SRick Macklem 	siz = fxdr_unsigned(int, *tl++);
23699ec7b004SRick Macklem 	cnt = fxdr_unsigned(int, *tl);
23709ec7b004SRick Macklem 
23719ec7b004SRick Macklem 	/*
23729ec7b004SRick Macklem 	 * Use the server's maximum data transfer size as the upper bound
23739ec7b004SRick Macklem 	 * on reply datalen.
23749ec7b004SRick Macklem 	 */
237554bde1faSRick Macklem 	if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0)
23769ec7b004SRick Macklem 		cnt = NFS_SRVMAXDATA(nd);
23779ec7b004SRick Macklem 
23789ec7b004SRick Macklem 	/*
23799ec7b004SRick Macklem 	 * siz is a "hint" of how much directory information (name, fileid,
23809ec7b004SRick Macklem 	 * cookie) should be in the reply. At least one client "hints" 0,
23819ec7b004SRick Macklem 	 * so I set it to cnt for that case. I also round it up to the
23829ec7b004SRick Macklem 	 * next multiple of DIRBLKSIZ.
238375772b69SRick Macklem 	 * Since the size of a Readdirplus directory entry reply will always
238475772b69SRick Macklem 	 * be greater than a directory entry returned by VOP_READDIR(), it
238575772b69SRick Macklem 	 * does not make sense to read more than NFS_SRVMAXDATA() via
238675772b69SRick Macklem 	 * VOP_READDIR().
23879ec7b004SRick Macklem 	 */
238854bde1faSRick Macklem 	if (siz <= 0)
23899ec7b004SRick Macklem 		siz = cnt;
239075772b69SRick Macklem 	else if (siz > NFS_SRVMAXDATA(nd))
239175772b69SRick Macklem 		siz = NFS_SRVMAXDATA(nd);
23929ec7b004SRick Macklem 	siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
23939ec7b004SRick Macklem 
23949ec7b004SRick Macklem 	if (nd->nd_flag & ND_NFSV4) {
23959ec7b004SRick Macklem 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
23969ec7b004SRick Macklem 		if (error)
23979ec7b004SRick Macklem 			goto nfsmout;
23989ec7b004SRick Macklem 		NFSSET_ATTRBIT(&savbits, &attrbits);
2399cd5edc7dSRick Macklem 		NFSSET_ATTRBIT(&refbits, &attrbits);
2400ea5776ecSRick Macklem 		NFSCLRNOTFILLABLE_ATTRBIT(&attrbits, nd);
24019ec7b004SRick Macklem 		NFSZERO_ATTRBIT(&rderrbits);
24029ec7b004SRick Macklem 		NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR);
2403cd5edc7dSRick Macklem 		/*
2404cd5edc7dSRick Macklem 		 * If these 4 bits are the only attributes requested by the
2405cd5edc7dSRick Macklem 		 * client, they can be satisfied without acquiring the vnode
2406cd5edc7dSRick Macklem 		 * for the file object unless it is a directory.
2407cd5edc7dSRick Macklem 		 * This will be indicated by savbits being all 0s.
2408cd5edc7dSRick Macklem 		 */
2409cd5edc7dSRick Macklem 		NFSCLRBIT_ATTRBIT(&savbits, NFSATTRBIT_TYPE);
2410cd5edc7dSRick Macklem 		NFSCLRBIT_ATTRBIT(&savbits, NFSATTRBIT_FILEID);
2411cd5edc7dSRick Macklem 		NFSCLRBIT_ATTRBIT(&savbits, NFSATTRBIT_MOUNTEDONFILEID);
2412cd5edc7dSRick Macklem 		NFSCLRBIT_ATTRBIT(&savbits, NFSATTRBIT_RDATTRERROR);
24139ec7b004SRick Macklem 	} else {
24149ec7b004SRick Macklem 		NFSZERO_ATTRBIT(&attrbits);
24159ec7b004SRick Macklem 	}
24169ec7b004SRick Macklem 	fullsiz = siz;
241790d2dfabSRick Macklem 	nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL);
24188361de25SRick Macklem #if 0
24199ec7b004SRick Macklem 	if (!nd->nd_repstat) {
24209ec7b004SRick Macklem 	    if (off && verf != at.na_filerev) {
24219ec7b004SRick Macklem 		/*
24229ec7b004SRick Macklem 		 * va_filerev is not sufficient as a cookie verifier,
24239ec7b004SRick Macklem 		 * since it is not supposed to change when entries are
24249ec7b004SRick Macklem 		 * removed/added unless that offset cookies returned to
24259ec7b004SRick Macklem 		 * the client are no longer valid.
24269ec7b004SRick Macklem 		 */
24279ec7b004SRick Macklem 		if (nd->nd_flag & ND_NFSV4) {
24289ec7b004SRick Macklem 			nd->nd_repstat = NFSERR_NOTSAME;
24299ec7b004SRick Macklem 		} else {
24309ec7b004SRick Macklem 			nd->nd_repstat = NFSERR_BAD_COOKIE;
24319ec7b004SRick Macklem 		}
24328361de25SRick Macklem 	    }
24338361de25SRick Macklem 	}
24349ec7b004SRick Macklem #endif
24359ec7b004SRick Macklem 	if (!nd->nd_repstat && vp->v_type != VDIR)
24369ec7b004SRick Macklem 		nd->nd_repstat = NFSERR_NOTDIR;
24379ec7b004SRick Macklem 	if (!nd->nd_repstat && cnt == 0)
24389ec7b004SRick Macklem 		nd->nd_repstat = NFSERR_TOOSMALL;
24399ec7b004SRick Macklem 	if (!nd->nd_repstat)
24408da45f2cSRick Macklem 		nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
24419ec7b004SRick Macklem 		    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
24428da45f2cSRick Macklem 		    NFSACCCHK_VPISLOCKED, NULL);
24439ec7b004SRick Macklem 	if (nd->nd_repstat) {
24449ec7b004SRick Macklem 		vput(vp);
24459ec7b004SRick Macklem 		if (nd->nd_flag & ND_NFSV3)
24469ec7b004SRick Macklem 			nfsrv_postopattr(nd, getret, &at);
2447a9285ae5SZack Kirsch 		goto out;
24489ec7b004SRick Macklem 	}
24494fc0f18cSBryan Drewery 	is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0;
24504fc0f18cSBryan Drewery 	is_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs") == 0;
24519ec7b004SRick Macklem 
2452222daa42SConrad Meyer 	rbuf = malloc(siz, M_TEMP, M_WAITOK);
24539ec7b004SRick Macklem again:
24549ec7b004SRick Macklem 	eofflag = 0;
24559ec7b004SRick Macklem 	if (cookies) {
2456222daa42SConrad Meyer 		free(cookies, M_TEMP);
24579ec7b004SRick Macklem 		cookies = NULL;
24589ec7b004SRick Macklem 	}
24599ec7b004SRick Macklem 
24609ec7b004SRick Macklem 	iv.iov_base = rbuf;
24619ec7b004SRick Macklem 	iv.iov_len = siz;
24629ec7b004SRick Macklem 	io.uio_iov = &iv;
24639ec7b004SRick Macklem 	io.uio_iovcnt = 1;
24649ec7b004SRick Macklem 	io.uio_offset = (off_t)off;
24659ec7b004SRick Macklem 	io.uio_resid = siz;
24669ec7b004SRick Macklem 	io.uio_segflg = UIO_SYSSPACE;
24679ec7b004SRick Macklem 	io.uio_rw = UIO_READ;
24689ec7b004SRick Macklem 	io.uio_td = NULL;
24699ec7b004SRick Macklem 	nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
24709ec7b004SRick Macklem 	    &cookies);
24719ec7b004SRick Macklem 	off = (u_int64_t)io.uio_offset;
24729ec7b004SRick Macklem 	if (io.uio_resid)
24739ec7b004SRick Macklem 		siz -= io.uio_resid;
24749ec7b004SRick Macklem 
247590d2dfabSRick Macklem 	getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL);
24769ec7b004SRick Macklem 
24779ec7b004SRick Macklem 	if (!cookies && !nd->nd_repstat)
24789ec7b004SRick Macklem 		nd->nd_repstat = NFSERR_PERM;
24799ec7b004SRick Macklem 	if (!nd->nd_repstat)
24809ec7b004SRick Macklem 		nd->nd_repstat = getret;
24819ec7b004SRick Macklem 	if (nd->nd_repstat) {
248238e3ea69SRick Macklem 		vput(vp);
24839ec7b004SRick Macklem 		if (cookies)
2484222daa42SConrad Meyer 			free(cookies, M_TEMP);
2485222daa42SConrad Meyer 		free(rbuf, M_TEMP);
24869ec7b004SRick Macklem 		if (nd->nd_flag & ND_NFSV3)
24879ec7b004SRick Macklem 			nfsrv_postopattr(nd, getret, &at);
2488a9285ae5SZack Kirsch 		goto out;
24899ec7b004SRick Macklem 	}
24909ec7b004SRick Macklem 	/*
24919ec7b004SRick Macklem 	 * If nothing read, return eof
24929ec7b004SRick Macklem 	 * rpc reply
24939ec7b004SRick Macklem 	 */
24949ec7b004SRick Macklem 	if (siz == 0) {
249538e3ea69SRick Macklem 		vput(vp);
24969ec7b004SRick Macklem 		if (nd->nd_flag & ND_NFSV3)
24979ec7b004SRick Macklem 			nfsrv_postopattr(nd, getret, &at);
24989ec7b004SRick Macklem 		NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
24999ec7b004SRick Macklem 		txdr_hyper(at.na_filerev, tl);
25009ec7b004SRick Macklem 		tl += 2;
25019ec7b004SRick Macklem 		*tl++ = newnfs_false;
25029ec7b004SRick Macklem 		*tl = newnfs_true;
2503222daa42SConrad Meyer 		free(cookies, M_TEMP);
2504222daa42SConrad Meyer 		free(rbuf, M_TEMP);
2505a9285ae5SZack Kirsch 		goto out;
25069ec7b004SRick Macklem 	}
25079ec7b004SRick Macklem 
25089ec7b004SRick Macklem 	/*
25099ec7b004SRick Macklem 	 * Check for degenerate cases of nothing useful read.
25109ec7b004SRick Macklem 	 * If so go try again
25119ec7b004SRick Macklem 	 */
25129ec7b004SRick Macklem 	cpos = rbuf;
25139ec7b004SRick Macklem 	cend = rbuf + siz;
25149ec7b004SRick Macklem 	dp = (struct dirent *)cpos;
25159ec7b004SRick Macklem 	cookiep = cookies;
25169ec7b004SRick Macklem 
25179ec7b004SRick Macklem 	/*
25189ec7b004SRick Macklem 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
25199ec7b004SRick Macklem 	 * directory offset up to a block boundary, so it is necessary to
25209ec7b004SRick Macklem 	 * skip over the records that precede the requested offset. This
25219ec7b004SRick Macklem 	 * requires the assumption that file offset cookies monotonically
25229ec7b004SRick Macklem 	 * increase.
25239ec7b004SRick Macklem 	 */
25249ec7b004SRick Macklem 	while (cpos < cend && ncookies > 0 &&
25259ec7b004SRick Macklem 	  (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
25264fc0f18cSBryan Drewery 	   (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff) ||
25279ec7b004SRick Macklem 	   ((nd->nd_flag & ND_NFSV4) &&
25289ec7b004SRick Macklem 	    ((dp->d_namlen == 1 && dp->d_name[0] == '.') ||
25299ec7b004SRick Macklem 	     (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) {
25309ec7b004SRick Macklem 		cpos += dp->d_reclen;
25319ec7b004SRick Macklem 		dp = (struct dirent *)cpos;
25329ec7b004SRick Macklem 		cookiep++;
25339ec7b004SRick Macklem 		ncookies--;
25349ec7b004SRick Macklem 	}
25359ec7b004SRick Macklem 	if (cpos >= cend || ncookies == 0) {
25369ec7b004SRick Macklem 		siz = fullsiz;
25379ec7b004SRick Macklem 		toff = off;
25389ec7b004SRick Macklem 		goto again;
25399ec7b004SRick Macklem 	}
2540f9266eb1SRick Macklem 
2541f9266eb1SRick Macklem 	/*
2542f9266eb1SRick Macklem 	 * Busy the file system so that the mount point won't go away
2543f9266eb1SRick Macklem 	 * and, as such, VFS_VGET() can be used safely.
2544f9266eb1SRick Macklem 	 */
2545f9266eb1SRick Macklem 	mp = vp->v_mount;
2546f9266eb1SRick Macklem 	vfs_ref(mp);
2547b249ce48SMateusz Guzik 	NFSVOPUNLOCK(vp);
2548f9266eb1SRick Macklem 	nd->nd_repstat = vfs_busy(mp, 0);
2549f9266eb1SRick Macklem 	vfs_rel(mp);
2550f9266eb1SRick Macklem 	if (nd->nd_repstat != 0) {
2551f9266eb1SRick Macklem 		vrele(vp);
2552f9266eb1SRick Macklem 		free(cookies, M_TEMP);
2553f9266eb1SRick Macklem 		free(rbuf, M_TEMP);
2554f9266eb1SRick Macklem 		if (nd->nd_flag & ND_NFSV3)
2555f9266eb1SRick Macklem 			nfsrv_postopattr(nd, getret, &at);
2556a9285ae5SZack Kirsch 		goto out;
2557f9266eb1SRick Macklem 	}
25589ec7b004SRick Macklem 
25599ec7b004SRick Macklem 	/*
256069921123SKonstantin Belousov 	 * Check to see if entries in this directory can be safely acquired
256169921123SKonstantin Belousov 	 * via VFS_VGET() or if a switch to VOP_LOOKUP() is required.
256269921123SKonstantin Belousov 	 * ZFS snapshot directories need VOP_LOOKUP(), so that any
256369921123SKonstantin Belousov 	 * automount of the snapshot directory that is required will
256469921123SKonstantin Belousov 	 * be done.
256569921123SKonstantin Belousov 	 * This needs to be done here for NFSv4, since NFSv4 never does
256669921123SKonstantin Belousov 	 * a VFS_VGET() for "." or "..".
25670c695afbSRick Macklem 	 */
256869921123SKonstantin Belousov 	if (is_zfs == 1) {
256969921123SKonstantin Belousov 		r = VFS_VGET(mp, at.na_fileid, LK_SHARED, &nvp);
257069921123SKonstantin Belousov 		if (r == EOPNOTSUPP) {
25710c695afbSRick Macklem 			usevget = 0;
25720c695afbSRick Macklem 			cn.cn_nameiop = LOOKUP;
25730c695afbSRick Macklem 			cn.cn_lkflags = LK_SHARED | LK_RETRY;
25740c695afbSRick Macklem 			cn.cn_cred = nd->nd_cred;
257569921123SKonstantin Belousov 		} else if (r == 0)
257669921123SKonstantin Belousov 			vput(nvp);
257769921123SKonstantin Belousov 	}
25780c695afbSRick Macklem 
25790c695afbSRick Macklem 	/*
2580ea83d07eSRick Macklem 	 * If the reply is likely to exceed MCLBYTES and the reply will
2581ea83d07eSRick Macklem 	 * not be saved, use ext_pgs mbufs for TLS.
2582ea83d07eSRick Macklem 	 * It is difficult to predict how large each entry will be and
2583ea83d07eSRick Macklem 	 * how many entries have been read, so just assume the directory
2584ea83d07eSRick Macklem 	 * entries grow by a factor of 4 when attributes are included.
2585ea83d07eSRick Macklem 	 * For NFSv4.0, we do not know for sure if the reply will
2586ea83d07eSRick Macklem 	 * be saved, so do not use ext_pgs mbufs for NFSv4.0.
2587ea83d07eSRick Macklem 	 */
2588ea83d07eSRick Macklem 	if (cnt > MCLBYTES && siz > MCLBYTES / 4 &&
2589ea83d07eSRick Macklem 	    (nd->nd_flag & (ND_TLS | ND_EXTPG | ND_SAVEREPLY)) == ND_TLS &&
2590ea83d07eSRick Macklem 	    (nd->nd_flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4)
2591ea83d07eSRick Macklem 		nd->nd_flag |= ND_EXTPG;
2592ea83d07eSRick Macklem 
2593ea83d07eSRick Macklem 	/*
25949ec7b004SRick Macklem 	 * Save this position, in case there is an error before one entry
25959ec7b004SRick Macklem 	 * is created.
25969ec7b004SRick Macklem 	 */
25979ec7b004SRick Macklem 	mb0 = nd->nd_mb;
25989ec7b004SRick Macklem 	bpos0 = nd->nd_bpos;
2599ea83d07eSRick Macklem 	bextpg0 = nd->nd_bextpg;
2600ea83d07eSRick Macklem 	bextpgsiz0 = nd->nd_bextpgsiz;
26019ec7b004SRick Macklem 
26029ec7b004SRick Macklem 	/*
26039ec7b004SRick Macklem 	 * Fill in the first part of the reply.
26049ec7b004SRick Macklem 	 * dirlen is the reply length in bytes and cannot exceed cnt.
26059ec7b004SRick Macklem 	 * (Include the two booleans at the end of the reply in dirlen now,
26069ec7b004SRick Macklem 	 *  so we recognize when we have exceeded cnt.)
26079ec7b004SRick Macklem 	 */
26089ec7b004SRick Macklem 	if (nd->nd_flag & ND_NFSV3) {
26099ec7b004SRick Macklem 		dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
26109ec7b004SRick Macklem 		nfsrv_postopattr(nd, getret, &at);
26119ec7b004SRick Macklem 	} else {
26129ec7b004SRick Macklem 		dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED;
26139ec7b004SRick Macklem 	}
26149ec7b004SRick Macklem 	NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
26159ec7b004SRick Macklem 	txdr_hyper(at.na_filerev, tl);
26169ec7b004SRick Macklem 
26179ec7b004SRick Macklem 	/*
26189ec7b004SRick Macklem 	 * Save this position, in case there is an empty reply needed.
26199ec7b004SRick Macklem 	 */
26209ec7b004SRick Macklem 	mb1 = nd->nd_mb;
26219ec7b004SRick Macklem 	bpos1 = nd->nd_bpos;
2622ea83d07eSRick Macklem 	bextpg1 = nd->nd_bextpg;
2623ea83d07eSRick Macklem 	bextpgsiz1 = nd->nd_bextpgsiz;
26249ec7b004SRick Macklem 
26259ec7b004SRick Macklem 	/* Loop through the records and build reply */
26269ec7b004SRick Macklem 	entrycnt = 0;
26279ec7b004SRick Macklem 	while (cpos < cend && ncookies > 0 && dirlen < cnt) {
26289ec7b004SRick Macklem 		nlen = dp->d_namlen;
26299ec7b004SRick Macklem 		if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
26309ec7b004SRick Macklem 		    nlen <= NFS_MAXNAMLEN &&
26319ec7b004SRick Macklem 		    ((nd->nd_flag & ND_NFSV3) || nlen > 2 ||
26329ec7b004SRick Macklem 		     (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.'))
26339ec7b004SRick Macklem 		      || (nlen == 1 && dp->d_name[0] != '.'))) {
26349ec7b004SRick Macklem 			/*
26359ec7b004SRick Macklem 			 * Save the current position in the reply, in case
26369ec7b004SRick Macklem 			 * this entry exceeds cnt.
26379ec7b004SRick Macklem 			 */
26389ec7b004SRick Macklem 			mb1 = nd->nd_mb;
26399ec7b004SRick Macklem 			bpos1 = nd->nd_bpos;
2640ea83d07eSRick Macklem 			bextpg1 = nd->nd_bextpg;
2641ea83d07eSRick Macklem 			bextpgsiz1 = nd->nd_bextpgsiz;
26429ec7b004SRick Macklem 
26439ec7b004SRick Macklem 			/*
26449ec7b004SRick Macklem 			 * For readdir_and_lookup get the vnode using
26459ec7b004SRick Macklem 			 * the file number.
26469ec7b004SRick Macklem 			 */
26479ec7b004SRick Macklem 			nvp = NULL;
26489ec7b004SRick Macklem 			refp = NULL;
26499ec7b004SRick Macklem 			r = 0;
265007c0c166SRick Macklem 			at_root = 0;
265107c0c166SRick Macklem 			needs_unbusy = 0;
265207c0c166SRick Macklem 			new_mp = mp;
265307c0c166SRick Macklem 			mounted_on_fileno = (uint64_t)dp->d_fileno;
26549ec7b004SRick Macklem 			if ((nd->nd_flag & ND_NFSV3) ||
2655cd5edc7dSRick Macklem 			    NFSNONZERO_ATTRBIT(&savbits) ||
2656cd5edc7dSRick Macklem 			    dp->d_type == DT_UNKNOWN ||
2657cd5edc7dSRick Macklem 			    (dp->d_type == DT_DIR &&
2658cd5edc7dSRick Macklem 			     nfsrv_enable_crossmntpt != 0)) {
26599ec7b004SRick Macklem 				if (nd->nd_flag & ND_NFSV4)
26609ec7b004SRick Macklem 					refp = nfsv4root_getreferral(NULL,
26619ec7b004SRick Macklem 					    vp, dp->d_fileno);
266238e3ea69SRick Macklem 				if (refp == NULL) {
266338e3ea69SRick Macklem 					if (usevget)
2664f9266eb1SRick Macklem 						r = VFS_VGET(mp, dp->d_fileno,
2665f9266eb1SRick Macklem 						    LK_SHARED, &nvp);
266638e3ea69SRick Macklem 					else
266738e3ea69SRick Macklem 						r = EOPNOTSUPP;
266838e3ea69SRick Macklem 					if (r == EOPNOTSUPP) {
266969921123SKonstantin Belousov 						if (usevget) {
267038e3ea69SRick Macklem 							usevget = 0;
267169921123SKonstantin Belousov 							cn.cn_nameiop = LOOKUP;
267269921123SKonstantin Belousov 							cn.cn_lkflags =
267369921123SKonstantin Belousov 							    LK_SHARED |
267469921123SKonstantin Belousov 							    LK_RETRY;
267569921123SKonstantin Belousov 							cn.cn_cred =
267669921123SKonstantin Belousov 							    nd->nd_cred;
267769921123SKonstantin Belousov 						}
267838e3ea69SRick Macklem 						cn.cn_nameptr = dp->d_name;
267938e3ea69SRick Macklem 						cn.cn_namelen = nlen;
268038e3ea69SRick Macklem 						cn.cn_flags = ISLASTCN |
26815050aa86SKonstantin Belousov 						    NOFOLLOW | LOCKLEAF;
268238e3ea69SRick Macklem 						if (nlen == 2 &&
268338e3ea69SRick Macklem 						    dp->d_name[0] == '.' &&
268438e3ea69SRick Macklem 						    dp->d_name[1] == '.')
268538e3ea69SRick Macklem 							cn.cn_flags |=
268638e3ea69SRick Macklem 							    ISDOTDOT;
268798f234f3SZack Kirsch 						if (NFSVOPLOCK(vp, LK_SHARED)
2688a852f40bSRick Macklem 						    != 0) {
2689a852f40bSRick Macklem 							nd->nd_repstat = EPERM;
2690a852f40bSRick Macklem 							break;
2691a852f40bSRick Macklem 						}
26928a1b5adeSRick Macklem 						if ((vp->v_vflag & VV_ROOT) != 0
26938a1b5adeSRick Macklem 						    && (cn.cn_flags & ISDOTDOT)
26948a1b5adeSRick Macklem 						    != 0) {
26958a1b5adeSRick Macklem 							vref(vp);
26968a1b5adeSRick Macklem 							nvp = vp;
26978a1b5adeSRick Macklem 							r = 0;
26988d2f180eSRick Macklem 						} else {
26998a1b5adeSRick Macklem 							r = VOP_LOOKUP(vp, &nvp,
27008a1b5adeSRick Macklem 							    &cn);
27018d2f180eSRick Macklem 							if (vp != nvp)
2702b249ce48SMateusz Guzik 								NFSVOPUNLOCK(vp);
27038d2f180eSRick Macklem 						}
270438e3ea69SRick Macklem 					}
270507c0c166SRick Macklem 
270607c0c166SRick Macklem 					/*
270707c0c166SRick Macklem 					 * For NFSv4, check to see if nvp is
270807c0c166SRick Macklem 					 * a mount point and get the mount
270907c0c166SRick Macklem 					 * point vnode, as required.
271007c0c166SRick Macklem 					 */
271107c0c166SRick Macklem 					if (r == 0 &&
271207c0c166SRick Macklem 					    nfsrv_enable_crossmntpt != 0 &&
271307c0c166SRick Macklem 					    (nd->nd_flag & ND_NFSV4) != 0 &&
271407c0c166SRick Macklem 					    nvp->v_type == VDIR &&
271507c0c166SRick Macklem 					    nvp->v_mountedhere != NULL) {
271607c0c166SRick Macklem 						new_mp = nvp->v_mountedhere;
271707c0c166SRick Macklem 						r = vfs_busy(new_mp, 0);
271807c0c166SRick Macklem 						vput(nvp);
271907c0c166SRick Macklem 						nvp = NULL;
272007c0c166SRick Macklem 						if (r == 0) {
272107c0c166SRick Macklem 							r = VFS_ROOT(new_mp,
272207c0c166SRick Macklem 							    LK_SHARED, &nvp);
272307c0c166SRick Macklem 							needs_unbusy = 1;
272407c0c166SRick Macklem 							if (r == 0)
272507c0c166SRick Macklem 								at_root = 1;
272607c0c166SRick Macklem 						}
272707c0c166SRick Macklem 					}
272838e3ea69SRick Macklem 				}
2729ca8f3d1cSAndriy Gapon 
2730ca8f3d1cSAndriy Gapon 				/*
2731ca8f3d1cSAndriy Gapon 				 * If we failed to look up the entry, then it
2732ca8f3d1cSAndriy Gapon 				 * has become invalid, most likely removed.
2733ca8f3d1cSAndriy Gapon 				 */
2734ca8f3d1cSAndriy Gapon 				if (r != 0) {
2735ca8f3d1cSAndriy Gapon 					if (needs_unbusy)
2736ca8f3d1cSAndriy Gapon 						vfs_unbusy(new_mp);
2737ca8f3d1cSAndriy Gapon 					goto invalid;
2738ca8f3d1cSAndriy Gapon 				}
2739ca8f3d1cSAndriy Gapon 				KASSERT(refp != NULL || nvp != NULL,
2740ca8f3d1cSAndriy Gapon 				    ("%s: undetected lookup error", __func__));
2741ca8f3d1cSAndriy Gapon 
27429ec7b004SRick Macklem 				if (refp == NULL &&
27439ec7b004SRick Macklem 				    ((nd->nd_flag & ND_NFSV3) ||
27449ec7b004SRick Macklem 				     NFSNONZERO_ATTRBIT(&attrbits))) {
27459ec7b004SRick Macklem 					r = nfsvno_getfh(nvp, &nfh, p);
27469ec7b004SRick Macklem 					if (!r)
274790d2dfabSRick Macklem 					    r = nfsvno_getattr(nvp, nvap, nd, p,
274890d2dfabSRick Macklem 						1, &attrbits);
27494fc0f18cSBryan Drewery 					if (r == 0 && is_zfs == 1 &&
27500c695afbSRick Macklem 					    nfsrv_enable_crossmntpt != 0 &&
27510c695afbSRick Macklem 					    (nd->nd_flag & ND_NFSV4) != 0 &&
27520c695afbSRick Macklem 					    nvp->v_type == VDIR &&
27530c695afbSRick Macklem 					    vp->v_mount != nvp->v_mount) {
27540c695afbSRick Macklem 					    /*
27550c695afbSRick Macklem 					     * For a ZFS snapshot, there is a
27560c695afbSRick Macklem 					     * pseudo mount that does not set
27570c695afbSRick Macklem 					     * v_mountedhere, so it needs to
27580c695afbSRick Macklem 					     * be detected via a different
27590c695afbSRick Macklem 					     * mount structure.
27600c695afbSRick Macklem 					     */
27610c695afbSRick Macklem 					    at_root = 1;
27620c695afbSRick Macklem 					    if (new_mp == mp)
27630c695afbSRick Macklem 						new_mp = nvp->v_mount;
27640c695afbSRick Macklem 					}
27659ec7b004SRick Macklem 				}
2766ca8f3d1cSAndriy Gapon 
2767ca8f3d1cSAndriy Gapon 				/*
2768ca8f3d1cSAndriy Gapon 				 * If we failed to get attributes of the entry,
2769ca8f3d1cSAndriy Gapon 				 * then just skip it for NFSv3 (the traditional
2770ca8f3d1cSAndriy Gapon 				 * behavior in the old NFS server).
2771ca8f3d1cSAndriy Gapon 				 * For NFSv4 the behavior is controlled by
2772ca8f3d1cSAndriy Gapon 				 * RDATTRERROR: we either ignore the error or
2773ca8f3d1cSAndriy Gapon 				 * fail the request.
27746fd6a0e3SRick Macklem 				 * The exception is EOPNOTSUPP, which can be
27756fd6a0e3SRick Macklem 				 * returned by nfsvno_getfh() for certain
27766fd6a0e3SRick Macklem 				 * file systems, such as devfs.  This indicates
27776fd6a0e3SRick Macklem 				 * that the file system cannot be exported,
27786fd6a0e3SRick Macklem 				 * so just skip over the entry.
2779ca8f3d1cSAndriy Gapon 				 * Note that RDATTRERROR is never set for NFSv3.
2780ca8f3d1cSAndriy Gapon 				 */
2781ca8f3d1cSAndriy Gapon 				if (r != 0) {
27829ec7b004SRick Macklem 					if (!NFSISSET_ATTRBIT(&attrbits,
27836fd6a0e3SRick Macklem 					    NFSATTRBIT_RDATTRERROR) ||
27846fd6a0e3SRick Macklem 					    r == EOPNOTSUPP) {
27859ec7b004SRick Macklem 						vput(nvp);
278607c0c166SRick Macklem 						if (needs_unbusy != 0)
278707c0c166SRick Macklem 							vfs_unbusy(new_mp);
27886fd6a0e3SRick Macklem 						if ((nd->nd_flag & ND_NFSV3) ||
27896fd6a0e3SRick Macklem 						    r == EOPNOTSUPP)
2790ca8f3d1cSAndriy Gapon 							goto invalid;
27919ec7b004SRick Macklem 						nd->nd_repstat = r;
27929ec7b004SRick Macklem 						break;
27939ec7b004SRick Macklem 					}
27949ec7b004SRick Macklem 				}
2795cd5edc7dSRick Macklem 			} else if (NFSNONZERO_ATTRBIT(&attrbits)) {
2796cd5edc7dSRick Macklem 				/* Only need Type and/or Fileid. */
2797cd5edc7dSRick Macklem 				VATTR_NULL(&nvap->na_vattr);
2798cd5edc7dSRick Macklem 				nvap->na_fileid = dp->d_fileno;
2799cd5edc7dSRick Macklem 				nvap->na_type = NFS_DTYPETOVTYPE(dp->d_type);
28009ec7b004SRick Macklem 			}
28019ec7b004SRick Macklem 
28029ec7b004SRick Macklem 			/*
28039ec7b004SRick Macklem 			 * Build the directory record xdr
28049ec7b004SRick Macklem 			 */
28059ec7b004SRick Macklem 			if (nd->nd_flag & ND_NFSV3) {
28069ec7b004SRick Macklem 				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
28079ec7b004SRick Macklem 				*tl++ = newnfs_true;
2808648a208eSRick Macklem 				txdr_hyper(dp->d_fileno, tl);
28099ec7b004SRick Macklem 				dirlen += nfsm_strtom(nd, dp->d_name, nlen);
28109ec7b004SRick Macklem 				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
281132fbc5d8SAlan Somers 				txdr_hyper(*cookiep, tl);
28129ec7b004SRick Macklem 				nfsrv_postopattr(nd, 0, nvap);
2813896516e5SRick Macklem 				dirlen += nfsm_fhtom(NULL, nd, (u_int8_t *)&nfh,
2814896516e5SRick Macklem 				    0, 1);
28159ec7b004SRick Macklem 				dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR);
28169ec7b004SRick Macklem 				if (nvp != NULL)
28179ec7b004SRick Macklem 					vput(nvp);
28189ec7b004SRick Macklem 			} else {
28199ec7b004SRick Macklem 				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
28209ec7b004SRick Macklem 				*tl++ = newnfs_true;
282132fbc5d8SAlan Somers 				txdr_hyper(*cookiep, tl);
28229ec7b004SRick Macklem 				dirlen += nfsm_strtom(nd, dp->d_name, nlen);
2823a09001a8SRick Macklem 				if (nvp != NULL) {
2824a09001a8SRick Macklem 					supports_nfsv4acls =
2825a09001a8SRick Macklem 					    nfs_supportsnfsv4acls(nvp);
2826b249ce48SMateusz Guzik 					NFSVOPUNLOCK(nvp);
2827a09001a8SRick Macklem 				} else
2828a09001a8SRick Macklem 					supports_nfsv4acls = 0;
28299ec7b004SRick Macklem 				if (refp != NULL) {
28309ec7b004SRick Macklem 					dirlen += nfsrv_putreferralattr(nd,
2831cd5edc7dSRick Macklem 					    &refbits, refp, 0,
28329ec7b004SRick Macklem 					    &nd->nd_repstat);
28339ec7b004SRick Macklem 					if (nd->nd_repstat) {
28349ec7b004SRick Macklem 						if (nvp != NULL)
28359ec7b004SRick Macklem 							vrele(nvp);
283607c0c166SRick Macklem 						if (needs_unbusy != 0)
283707c0c166SRick Macklem 							vfs_unbusy(new_mp);
28389ec7b004SRick Macklem 						break;
28399ec7b004SRick Macklem 					}
28409ec7b004SRick Macklem 				} else if (r) {
284107c0c166SRick Macklem 					dirlen += nfsvno_fillattr(nd, new_mp,
284207c0c166SRick Macklem 					    nvp, nvap, &nfh, r, &rderrbits,
2843a09001a8SRick Macklem 					    nd->nd_cred, p, isdgram, 0,
2844a09001a8SRick Macklem 					    supports_nfsv4acls, at_root,
284507c0c166SRick Macklem 					    mounted_on_fileno);
28469ec7b004SRick Macklem 				} else {
284707c0c166SRick Macklem 					dirlen += nfsvno_fillattr(nd, new_mp,
284807c0c166SRick Macklem 					    nvp, nvap, &nfh, r, &attrbits,
2849a09001a8SRick Macklem 					    nd->nd_cred, p, isdgram, 0,
2850a09001a8SRick Macklem 					    supports_nfsv4acls, at_root,
285107c0c166SRick Macklem 					    mounted_on_fileno);
28529ec7b004SRick Macklem 				}
28539ec7b004SRick Macklem 				if (nvp != NULL)
28549ec7b004SRick Macklem 					vrele(nvp);
28559ec7b004SRick Macklem 				dirlen += (3 * NFSX_UNSIGNED);
28569ec7b004SRick Macklem 			}
285707c0c166SRick Macklem 			if (needs_unbusy != 0)
285807c0c166SRick Macklem 				vfs_unbusy(new_mp);
28599ec7b004SRick Macklem 			if (dirlen <= cnt)
28609ec7b004SRick Macklem 				entrycnt++;
28619ec7b004SRick Macklem 		}
2862ca8f3d1cSAndriy Gapon invalid:
28639ec7b004SRick Macklem 		cpos += dp->d_reclen;
28649ec7b004SRick Macklem 		dp = (struct dirent *)cpos;
28659ec7b004SRick Macklem 		cookiep++;
28669ec7b004SRick Macklem 		ncookies--;
28679ec7b004SRick Macklem 	}
28689ec7b004SRick Macklem 	vrele(vp);
2869f9266eb1SRick Macklem 	vfs_unbusy(mp);
28709ec7b004SRick Macklem 
28719ec7b004SRick Macklem 	/*
28729ec7b004SRick Macklem 	 * If dirlen > cnt, we must strip off the last entry. If that
28739ec7b004SRick Macklem 	 * results in an empty reply, report NFSERR_TOOSMALL.
28749ec7b004SRick Macklem 	 */
28759ec7b004SRick Macklem 	if (dirlen > cnt || nd->nd_repstat) {
28769ec7b004SRick Macklem 		if (!nd->nd_repstat && entrycnt == 0)
28779ec7b004SRick Macklem 			nd->nd_repstat = NFSERR_TOOSMALL;
28786c7d2293SRick Macklem 		if (nd->nd_repstat) {
2879ea83d07eSRick Macklem 			nfsm_trimtrailing(nd, mb0, bpos0, bextpg0, bextpgsiz0);
28806c7d2293SRick Macklem 			if (nd->nd_flag & ND_NFSV3)
28816c7d2293SRick Macklem 				nfsrv_postopattr(nd, getret, &at);
28826c7d2293SRick Macklem 		} else
2883ea83d07eSRick Macklem 			nfsm_trimtrailing(nd, mb1, bpos1, bextpg1, bextpgsiz1);
28849ec7b004SRick Macklem 		eofflag = 0;
28859ec7b004SRick Macklem 	} else if (cpos < cend)
28869ec7b004SRick Macklem 		eofflag = 0;
28879ec7b004SRick Macklem 	if (!nd->nd_repstat) {
28889ec7b004SRick Macklem 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
28899ec7b004SRick Macklem 		*tl++ = newnfs_false;
28909ec7b004SRick Macklem 		if (eofflag)
28919ec7b004SRick Macklem 			*tl = newnfs_true;
28929ec7b004SRick Macklem 		else
28939ec7b004SRick Macklem 			*tl = newnfs_false;
28949ec7b004SRick Macklem 	}
2895222daa42SConrad Meyer 	free(cookies, M_TEMP);
2896222daa42SConrad Meyer 	free(rbuf, M_TEMP);
2897a9285ae5SZack Kirsch 
2898a9285ae5SZack Kirsch out:
2899a9285ae5SZack Kirsch 	NFSEXITCODE2(0, nd);
29009ec7b004SRick Macklem 	return (0);
29019ec7b004SRick Macklem nfsmout:
29029ec7b004SRick Macklem 	vput(vp);
2903a9285ae5SZack Kirsch 	NFSEXITCODE2(error, nd);
29049ec7b004SRick Macklem 	return (error);
29059ec7b004SRick Macklem }
29069ec7b004SRick Macklem 
29079ec7b004SRick Macklem /*
29089ec7b004SRick Macklem  * Get the settable attributes out of the mbuf list.
29099ec7b004SRick Macklem  * (Return 0 or EBADRPC)
29109ec7b004SRick Macklem  */
29119ec7b004SRick Macklem int
nfsrv_sattr(struct nfsrv_descript * nd,vnode_t vp,struct nfsvattr * nvap,nfsattrbit_t * attrbitp,NFSACL_T * aclp,struct thread * p)2912d8a5961fSMarcelo Araujo nfsrv_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
29139ec7b004SRick Macklem     nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
29149ec7b004SRick Macklem {
29159ec7b004SRick Macklem 	u_int32_t *tl;
29169ec7b004SRick Macklem 	struct nfsv2_sattr *sp;
29179ec7b004SRick Macklem 	int error = 0, toclient = 0;
29189ec7b004SRick Macklem 
29199ec7b004SRick Macklem 	switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) {
29209ec7b004SRick Macklem 	case ND_NFSV2:
29219ec7b004SRick Macklem 		NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
29229ec7b004SRick Macklem 		/*
29239ec7b004SRick Macklem 		 * Some old clients didn't fill in the high order 16bits.
29249ec7b004SRick Macklem 		 * --> check the low order 2 bytes for 0xffff
29259ec7b004SRick Macklem 		 */
29269ec7b004SRick Macklem 		if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
29279ec7b004SRick Macklem 			nvap->na_mode = nfstov_mode(sp->sa_mode);
29289ec7b004SRick Macklem 		if (sp->sa_uid != newnfs_xdrneg1)
29299ec7b004SRick Macklem 			nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid);
29309ec7b004SRick Macklem 		if (sp->sa_gid != newnfs_xdrneg1)
29319ec7b004SRick Macklem 			nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid);
29329ec7b004SRick Macklem 		if (sp->sa_size != newnfs_xdrneg1)
29339ec7b004SRick Macklem 			nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size);
29349ec7b004SRick Macklem 		if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) {
29359ec7b004SRick Macklem #ifdef notyet
29369ec7b004SRick Macklem 			fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime);
29379ec7b004SRick Macklem #else
29389ec7b004SRick Macklem 			nvap->na_atime.tv_sec =
29399ec7b004SRick Macklem 				fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec);
29409ec7b004SRick Macklem 			nvap->na_atime.tv_nsec = 0;
29419ec7b004SRick Macklem #endif
29429ec7b004SRick Macklem 		}
29439ec7b004SRick Macklem 		if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1)
29449ec7b004SRick Macklem 			fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime);
29459ec7b004SRick Macklem 		break;
29469ec7b004SRick Macklem 	case ND_NFSV3:
29479ec7b004SRick Macklem 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
29489ec7b004SRick Macklem 		if (*tl == newnfs_true) {
29499ec7b004SRick Macklem 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
29509ec7b004SRick Macklem 			nvap->na_mode = nfstov_mode(*tl);
29519ec7b004SRick Macklem 		}
29529ec7b004SRick Macklem 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
29539ec7b004SRick Macklem 		if (*tl == newnfs_true) {
29549ec7b004SRick Macklem 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
29559ec7b004SRick Macklem 			nvap->na_uid = fxdr_unsigned(uid_t, *tl);
29569ec7b004SRick Macklem 		}
29579ec7b004SRick Macklem 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
29589ec7b004SRick Macklem 		if (*tl == newnfs_true) {
29599ec7b004SRick Macklem 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
29609ec7b004SRick Macklem 			nvap->na_gid = fxdr_unsigned(gid_t, *tl);
29619ec7b004SRick Macklem 		}
29629ec7b004SRick Macklem 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
29639ec7b004SRick Macklem 		if (*tl == newnfs_true) {
29649ec7b004SRick Macklem 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
29659ec7b004SRick Macklem 			nvap->na_size = fxdr_hyper(tl);
29669ec7b004SRick Macklem 		}
29679ec7b004SRick Macklem 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
29689ec7b004SRick Macklem 		switch (fxdr_unsigned(int, *tl)) {
29699ec7b004SRick Macklem 		case NFSV3SATTRTIME_TOCLIENT:
29709ec7b004SRick Macklem 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
29719ec7b004SRick Macklem 			fxdr_nfsv3time(tl, &nvap->na_atime);
29729ec7b004SRick Macklem 			toclient = 1;
29739ec7b004SRick Macklem 			break;
29749ec7b004SRick Macklem 		case NFSV3SATTRTIME_TOSERVER:
2975d177f14dSJohn Baldwin 			vfs_timestamp(&nvap->na_atime);
29769ec7b004SRick Macklem 			nvap->na_vaflags |= VA_UTIMES_NULL;
29779ec7b004SRick Macklem 			break;
297874b8d63dSPedro F. Giffuni 		}
29799ec7b004SRick Macklem 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
29809ec7b004SRick Macklem 		switch (fxdr_unsigned(int, *tl)) {
29819ec7b004SRick Macklem 		case NFSV3SATTRTIME_TOCLIENT:
29829ec7b004SRick Macklem 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
29839ec7b004SRick Macklem 			fxdr_nfsv3time(tl, &nvap->na_mtime);
29849ec7b004SRick Macklem 			nvap->na_vaflags &= ~VA_UTIMES_NULL;
29859ec7b004SRick Macklem 			break;
29869ec7b004SRick Macklem 		case NFSV3SATTRTIME_TOSERVER:
2987d177f14dSJohn Baldwin 			vfs_timestamp(&nvap->na_mtime);
29889ec7b004SRick Macklem 			if (!toclient)
29899ec7b004SRick Macklem 				nvap->na_vaflags |= VA_UTIMES_NULL;
29909ec7b004SRick Macklem 			break;
299174b8d63dSPedro F. Giffuni 		}
29929ec7b004SRick Macklem 		break;
29939ec7b004SRick Macklem 	case ND_NFSV4:
2994d8a5961fSMarcelo Araujo 		error = nfsv4_sattr(nd, vp, nvap, attrbitp, aclp, p);
299574b8d63dSPedro F. Giffuni 	}
29969ec7b004SRick Macklem nfsmout:
2997a9285ae5SZack Kirsch 	NFSEXITCODE2(error, nd);
29989ec7b004SRick Macklem 	return (error);
29999ec7b004SRick Macklem }
30009ec7b004SRick Macklem 
30019ec7b004SRick Macklem /*
30029ec7b004SRick Macklem  * Handle the setable attributes for V4.
30039ec7b004SRick Macklem  * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise.
30049ec7b004SRick Macklem  */
30059ec7b004SRick Macklem int
nfsv4_sattr(struct nfsrv_descript * nd,vnode_t vp,struct nfsvattr * nvap,nfsattrbit_t * attrbitp,NFSACL_T * aclp,struct thread * p)3006d8a5961fSMarcelo Araujo nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
30079ec7b004SRick Macklem     nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
30089ec7b004SRick Macklem {
30099ec7b004SRick Macklem 	u_int32_t *tl;
30109ec7b004SRick Macklem 	int attrsum = 0;
30119ec7b004SRick Macklem 	int i, j;
30129ec7b004SRick Macklem 	int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0;
3013b4372164SRick Macklem 	int moderet, toclient = 0;
30149ec7b004SRick Macklem 	u_char *cp, namestr[NFSV4_SMALLSTR + 1];
30159ec7b004SRick Macklem 	uid_t uid;
30169ec7b004SRick Macklem 	gid_t gid;
3017b4372164SRick Macklem 	u_short mode, mask;		/* Same type as va_mode. */
3018b4372164SRick Macklem 	struct vattr va;
30199ec7b004SRick Macklem 
30209ec7b004SRick Macklem 	error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup);
30219ec7b004SRick Macklem 	if (error)
3022a9285ae5SZack Kirsch 		goto nfsmout;
30239ec7b004SRick Macklem 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
30249ec7b004SRick Macklem 	attrsize = fxdr_unsigned(int, *tl);
30259ec7b004SRick Macklem 
30269ec7b004SRick Macklem 	/*
30279ec7b004SRick Macklem 	 * Loop around getting the setable attributes. If an unsupported
30289ec7b004SRick Macklem 	 * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return.
30295037c639SRick Macklem 	 * Once nd_repstat != 0, do not set the attribute value, but keep
30305037c639SRick Macklem 	 * parsing the attribute(s).
30319ec7b004SRick Macklem 	 */
30329ec7b004SRick Macklem 	if (retnotsup) {
30339ec7b004SRick Macklem 		nd->nd_repstat = NFSERR_ATTRNOTSUPP;
30349ec7b004SRick Macklem 		bitpos = NFSATTRBIT_MAX;
30359ec7b004SRick Macklem 	} else {
30369ec7b004SRick Macklem 		bitpos = 0;
30379ec7b004SRick Macklem 	}
3038b4372164SRick Macklem 	moderet = 0;
30399ec7b004SRick Macklem 	for (; bitpos < NFSATTRBIT_MAX; bitpos++) {
30409ec7b004SRick Macklem 	    if (attrsum > attrsize) {
30419ec7b004SRick Macklem 		error = NFSERR_BADXDR;
30429ec7b004SRick Macklem 		goto nfsmout;
30439ec7b004SRick Macklem 	    }
30449ec7b004SRick Macklem 	    if (NFSISSET_ATTRBIT(attrbitp, bitpos))
30459ec7b004SRick Macklem 		switch (bitpos) {
30469ec7b004SRick Macklem 		case NFSATTRBIT_SIZE:
30479ec7b004SRick Macklem 			NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
30485037c639SRick Macklem 			if (!nd->nd_repstat) {
30495037c639SRick Macklem 				if (vp != NULL && vp->v_type != VREG)
30505037c639SRick Macklem 					nd->nd_repstat = (vp->v_type == VDIR) ?
30515037c639SRick Macklem 					    NFSERR_ISDIR : NFSERR_INVAL;
30525037c639SRick Macklem 				else
30539ec7b004SRick Macklem 					nvap->na_size = fxdr_hyper(tl);
30545037c639SRick Macklem 			}
30559ec7b004SRick Macklem 			attrsum += NFSX_HYPER;
30569ec7b004SRick Macklem 			break;
30579ec7b004SRick Macklem 		case NFSATTRBIT_ACL:
3058a91a5784SRick Macklem 			error = nfsrv_dissectacl(nd, aclp, true, &aceerr,
3059a91a5784SRick Macklem 			    &aclsize, p);
30609ec7b004SRick Macklem 			if (error)
30619ec7b004SRick Macklem 				goto nfsmout;
30629ec7b004SRick Macklem 			if (aceerr && !nd->nd_repstat)
3063b008a72cSZack Kirsch 				nd->nd_repstat = aceerr;
30649ec7b004SRick Macklem 			attrsum += aclsize;
30659ec7b004SRick Macklem 			break;
30669ec7b004SRick Macklem 		case NFSATTRBIT_ARCHIVE:
30679ec7b004SRick Macklem 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
30689ec7b004SRick Macklem 			if (!nd->nd_repstat)
30699ec7b004SRick Macklem 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
30709ec7b004SRick Macklem 			attrsum += NFSX_UNSIGNED;
30719ec7b004SRick Macklem 			break;
30729ec7b004SRick Macklem 		case NFSATTRBIT_HIDDEN:
30739ec7b004SRick Macklem 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
30749ec7b004SRick Macklem 			if (!nd->nd_repstat)
30759ec7b004SRick Macklem 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
30769ec7b004SRick Macklem 			attrsum += NFSX_UNSIGNED;
30779ec7b004SRick Macklem 			break;
30789ec7b004SRick Macklem 		case NFSATTRBIT_MIMETYPE:
30799ec7b004SRick Macklem 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
30809ec7b004SRick Macklem 			i = fxdr_unsigned(int, *tl);
30819ec7b004SRick Macklem 			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
30829ec7b004SRick Macklem 			if (error)
30839ec7b004SRick Macklem 				goto nfsmout;
30849ec7b004SRick Macklem 			if (!nd->nd_repstat)
30859ec7b004SRick Macklem 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
30869ec7b004SRick Macklem 			attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i));
30879ec7b004SRick Macklem 			break;
30889ec7b004SRick Macklem 		case NFSATTRBIT_MODE:
3089b4372164SRick Macklem 			moderet = NFSERR_INVAL;	/* Can't do MODESETMASKED. */
30909ec7b004SRick Macklem 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
30915037c639SRick Macklem 			if (!nd->nd_repstat)
30929ec7b004SRick Macklem 				nvap->na_mode = nfstov_mode(*tl);
30939ec7b004SRick Macklem 			attrsum += NFSX_UNSIGNED;
30949ec7b004SRick Macklem 			break;
30959ec7b004SRick Macklem 		case NFSATTRBIT_OWNER:
30969ec7b004SRick Macklem 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
30979ec7b004SRick Macklem 			j = fxdr_unsigned(int, *tl);
3098a9285ae5SZack Kirsch 			if (j < 0) {
3099a9285ae5SZack Kirsch 				error = NFSERR_BADXDR;
3100a9285ae5SZack Kirsch 				goto nfsmout;
3101a9285ae5SZack Kirsch 			}
31029ec7b004SRick Macklem 			if (j > NFSV4_SMALLSTR)
31039ec7b004SRick Macklem 				cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
31049ec7b004SRick Macklem 			else
31059ec7b004SRick Macklem 				cp = namestr;
31069ec7b004SRick Macklem 			error = nfsrv_mtostr(nd, cp, j);
31079ec7b004SRick Macklem 			if (error) {
31089ec7b004SRick Macklem 				if (j > NFSV4_SMALLSTR)
31099ec7b004SRick Macklem 					free(cp, M_NFSSTRING);
3110a9285ae5SZack Kirsch 				goto nfsmout;
31119ec7b004SRick Macklem 			}
31129ec7b004SRick Macklem 			if (!nd->nd_repstat) {
31130658ac39SEdward Tomasz Napierala 				nd->nd_repstat = nfsv4_strtouid(nd, cp, j,
31140658ac39SEdward Tomasz Napierala 				    &uid);
31159ec7b004SRick Macklem 				if (!nd->nd_repstat)
31169ec7b004SRick Macklem 					nvap->na_uid = uid;
31179ec7b004SRick Macklem 			}
31189ec7b004SRick Macklem 			if (j > NFSV4_SMALLSTR)
31199ec7b004SRick Macklem 				free(cp, M_NFSSTRING);
31209ec7b004SRick Macklem 			attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
31219ec7b004SRick Macklem 			break;
31229ec7b004SRick Macklem 		case NFSATTRBIT_OWNERGROUP:
31239ec7b004SRick Macklem 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
31249ec7b004SRick Macklem 			j = fxdr_unsigned(int, *tl);
3125a9285ae5SZack Kirsch 			if (j < 0) {
3126a9285ae5SZack Kirsch 				error = NFSERR_BADXDR;
3127a9285ae5SZack Kirsch 				goto nfsmout;
3128a9285ae5SZack Kirsch 			}
31299ec7b004SRick Macklem 			if (j > NFSV4_SMALLSTR)
31309ec7b004SRick Macklem 				cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
31319ec7b004SRick Macklem 			else
31329ec7b004SRick Macklem 				cp = namestr;
31339ec7b004SRick Macklem 			error = nfsrv_mtostr(nd, cp, j);
31349ec7b004SRick Macklem 			if (error) {
31359ec7b004SRick Macklem 				if (j > NFSV4_SMALLSTR)
31369ec7b004SRick Macklem 					free(cp, M_NFSSTRING);
3137a9285ae5SZack Kirsch 				goto nfsmout;
31389ec7b004SRick Macklem 			}
31399ec7b004SRick Macklem 			if (!nd->nd_repstat) {
31402df8bd90SEdward Tomasz Napierala 				nd->nd_repstat = nfsv4_strtogid(nd, cp, j,
31412df8bd90SEdward Tomasz Napierala 				    &gid);
31429ec7b004SRick Macklem 				if (!nd->nd_repstat)
31439ec7b004SRick Macklem 					nvap->na_gid = gid;
31449ec7b004SRick Macklem 			}
31459ec7b004SRick Macklem 			if (j > NFSV4_SMALLSTR)
31469ec7b004SRick Macklem 				free(cp, M_NFSSTRING);
31479ec7b004SRick Macklem 			attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
31489ec7b004SRick Macklem 			break;
31499ec7b004SRick Macklem 		case NFSATTRBIT_SYSTEM:
31509ec7b004SRick Macklem 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
31519ec7b004SRick Macklem 			if (!nd->nd_repstat)
31529ec7b004SRick Macklem 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
31539ec7b004SRick Macklem 			attrsum += NFSX_UNSIGNED;
31549ec7b004SRick Macklem 			break;
31559ec7b004SRick Macklem 		case NFSATTRBIT_TIMEACCESSSET:
31569ec7b004SRick Macklem 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
31579ec7b004SRick Macklem 			attrsum += NFSX_UNSIGNED;
31589ec7b004SRick Macklem 			if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
31599ec7b004SRick Macklem 			    NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
31605037c639SRick Macklem 			    if (!nd->nd_repstat)
31619ec7b004SRick Macklem 				fxdr_nfsv4time(tl, &nvap->na_atime);
31629ec7b004SRick Macklem 			    toclient = 1;
31639ec7b004SRick Macklem 			    attrsum += NFSX_V4TIME;
31645037c639SRick Macklem 			} else if (!nd->nd_repstat) {
3165d177f14dSJohn Baldwin 			    vfs_timestamp(&nvap->na_atime);
31669ec7b004SRick Macklem 			    nvap->na_vaflags |= VA_UTIMES_NULL;
31679ec7b004SRick Macklem 			}
31689ec7b004SRick Macklem 			break;
31699ec7b004SRick Macklem 		case NFSATTRBIT_TIMEBACKUP:
31709ec7b004SRick Macklem 			NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
31719ec7b004SRick Macklem 			if (!nd->nd_repstat)
31729ec7b004SRick Macklem 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
31739ec7b004SRick Macklem 			attrsum += NFSX_V4TIME;
31749ec7b004SRick Macklem 			break;
31759ec7b004SRick Macklem 		case NFSATTRBIT_TIMECREATE:
31769ec7b004SRick Macklem 			NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
31775037c639SRick Macklem 			if (!nd->nd_repstat)
31782de592f6SRick Macklem 				fxdr_nfsv4time(tl, &nvap->na_btime);
31799ec7b004SRick Macklem 			attrsum += NFSX_V4TIME;
31809ec7b004SRick Macklem 			break;
31819ec7b004SRick Macklem 		case NFSATTRBIT_TIMEMODIFYSET:
31829ec7b004SRick Macklem 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
31839ec7b004SRick Macklem 			attrsum += NFSX_UNSIGNED;
31849ec7b004SRick Macklem 			if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
31859ec7b004SRick Macklem 			    NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
31865037c639SRick Macklem 			    if (!nd->nd_repstat)
31879ec7b004SRick Macklem 				fxdr_nfsv4time(tl, &nvap->na_mtime);
31889ec7b004SRick Macklem 			    nvap->na_vaflags &= ~VA_UTIMES_NULL;
31899ec7b004SRick Macklem 			    attrsum += NFSX_V4TIME;
31905037c639SRick Macklem 			} else if (!nd->nd_repstat) {
3191d177f14dSJohn Baldwin 			    vfs_timestamp(&nvap->na_mtime);
31929ec7b004SRick Macklem 			    if (!toclient)
31939ec7b004SRick Macklem 				nvap->na_vaflags |= VA_UTIMES_NULL;
31949ec7b004SRick Macklem 			}
31959ec7b004SRick Macklem 			break;
3196b4372164SRick Macklem 		case NFSATTRBIT_MODESETMASKED:
3197b4372164SRick Macklem 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
3198b4372164SRick Macklem 			mode = fxdr_unsigned(u_short, *tl++);
3199b4372164SRick Macklem 			mask = fxdr_unsigned(u_short, *tl);
3200b4372164SRick Macklem 			/*
3201b4372164SRick Macklem 			 * vp == NULL implies an Open/Create operation.
3202b4372164SRick Macklem 			 * This attribute can only be used for Setattr and
3203b4372164SRick Macklem 			 * only for NFSv4.1 or higher.
3204b4372164SRick Macklem 			 * If moderet != 0, a mode attribute has also been
3205b4372164SRick Macklem 			 * specified and this attribute cannot be done in the
3206b4372164SRick Macklem 			 * same Setattr operation.
3207b4372164SRick Macklem 			 */
32085037c639SRick Macklem 			if (!nd->nd_repstat) {
3209b4372164SRick Macklem 				if ((nd->nd_flag & ND_NFSV41) == 0)
3210b4372164SRick Macklem 					nd->nd_repstat = NFSERR_ATTRNOTSUPP;
32115037c639SRick Macklem 				else if ((mode & ~07777) != 0 ||
32125037c639SRick Macklem 				    (mask & ~07777) != 0 || vp == NULL)
3213b4372164SRick Macklem 					nd->nd_repstat = NFSERR_INVAL;
3214b4372164SRick Macklem 				else if (moderet == 0)
32155037c639SRick Macklem 					moderet = VOP_GETATTR(vp, &va,
32165037c639SRick Macklem 					    nd->nd_cred);
3217b4372164SRick Macklem 				if (moderet == 0)
3218b4372164SRick Macklem 					nvap->na_mode = (mode & mask) |
3219b4372164SRick Macklem 					    (va.va_mode & ~mask);
3220b4372164SRick Macklem 				else
3221b4372164SRick Macklem 					nd->nd_repstat = moderet;
32225037c639SRick Macklem 			}
3223b4372164SRick Macklem 			attrsum += 2 * NFSX_UNSIGNED;
3224b4372164SRick Macklem 			break;
32252477e88bSRick Macklem 		case NFSATTRBIT_MODEUMASK:
32262477e88bSRick Macklem 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
32272477e88bSRick Macklem 			mode = fxdr_unsigned(u_short, *tl++);
32282477e88bSRick Macklem 			mask = fxdr_unsigned(u_short, *tl);
32292477e88bSRick Macklem 			/*
32302477e88bSRick Macklem 			 * If moderet != 0, mode has already been done.
32312477e88bSRick Macklem 			 * If vp != NULL, this is not a file object creation.
32322477e88bSRick Macklem 			 */
32335037c639SRick Macklem 			if (!nd->nd_repstat) {
32342477e88bSRick Macklem 				if ((nd->nd_flag & ND_NFSV42) == 0)
32352477e88bSRick Macklem 					nd->nd_repstat = NFSERR_ATTRNOTSUPP;
32362477e88bSRick Macklem 				else if ((mask & ~0777) != 0 || vp != NULL ||
32372477e88bSRick Macklem 				    moderet != 0)
32382477e88bSRick Macklem 					nd->nd_repstat = NFSERR_INVAL;
32392477e88bSRick Macklem 				else
32402477e88bSRick Macklem 					nvap->na_mode = (mode & ~mask);
32415037c639SRick Macklem 			}
32422477e88bSRick Macklem 			attrsum += 2 * NFSX_UNSIGNED;
32432477e88bSRick Macklem 			break;
32449ec7b004SRick Macklem 		default:
32459ec7b004SRick Macklem 			nd->nd_repstat = NFSERR_ATTRNOTSUPP;
32469ec7b004SRick Macklem 			/*
32479ec7b004SRick Macklem 			 * set bitpos so we drop out of the loop.
32489ec7b004SRick Macklem 			 */
32499ec7b004SRick Macklem 			bitpos = NFSATTRBIT_MAX;
32509ec7b004SRick Macklem 			break;
325174b8d63dSPedro F. Giffuni 		}
32529ec7b004SRick Macklem 	}
32539ec7b004SRick Macklem 
32549ec7b004SRick Macklem 	/*
32559ec7b004SRick Macklem 	 * some clients pad the attrlist, so we need to skip over the
32565037c639SRick Macklem 	 * padding.  This also skips over unparsed non-supported attributes.
32579ec7b004SRick Macklem 	 */
32589ec7b004SRick Macklem 	if (attrsum > attrsize) {
32599ec7b004SRick Macklem 		error = NFSERR_BADXDR;
32609ec7b004SRick Macklem 	} else {
32619ec7b004SRick Macklem 		attrsize = NFSM_RNDUP(attrsize);
32629ec7b004SRick Macklem 		if (attrsum < attrsize)
32639ec7b004SRick Macklem 			error = nfsm_advance(nd, attrsize - attrsum, -1);
32649ec7b004SRick Macklem 	}
32659ec7b004SRick Macklem nfsmout:
3266a9285ae5SZack Kirsch 	NFSEXITCODE2(error, nd);
32679ec7b004SRick Macklem 	return (error);
32689ec7b004SRick Macklem }
32699ec7b004SRick Macklem 
32709ec7b004SRick Macklem /*
32719ec7b004SRick Macklem  * Check/setup export credentials.
32729ec7b004SRick Macklem  */
32739ec7b004SRick Macklem int
nfsd_excred(struct nfsrv_descript * nd,struct nfsexstuff * exp,struct ucred * credanon,bool testsec)32749ec7b004SRick Macklem nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp,
3275a5df139eSRick Macklem     struct ucred *credanon, bool testsec)
32769ec7b004SRick Macklem {
3277a5df139eSRick Macklem 	int error;
32789ec7b004SRick Macklem 
32799ec7b004SRick Macklem 	/*
32809ec7b004SRick Macklem 	 * Check/setup credentials.
32819ec7b004SRick Macklem 	 */
32829ec7b004SRick Macklem 	if (nd->nd_flag & ND_GSS)
328398ad4453SRick Macklem 		exp->nes_exflag &= ~MNT_EXPORTANON;
32849ec7b004SRick Macklem 
32859ec7b004SRick Macklem 	/*
328698ad4453SRick Macklem 	 * Check to see if the operation is allowed for this security flavor.
32879ec7b004SRick Macklem 	 */
3288a5df139eSRick Macklem 	error = 0;
3289a5df139eSRick Macklem 	if (testsec) {
3290a5df139eSRick Macklem 		error = nfsvno_testexp(nd, exp);
3291a5df139eSRick Macklem 		if (error != 0)
3292a9285ae5SZack Kirsch 			goto out;
32939ec7b004SRick Macklem 	}
32949ec7b004SRick Macklem 
32959ec7b004SRick Macklem 	/*
32969ec7b004SRick Macklem 	 * Check to see if the file system is exported V4 only.
32979ec7b004SRick Macklem 	 */
3298a9285ae5SZack Kirsch 	if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4)) {
3299a9285ae5SZack Kirsch 		error = NFSERR_PROGNOTV4;
3300a9285ae5SZack Kirsch 		goto out;
3301a9285ae5SZack Kirsch 	}
33029ec7b004SRick Macklem 
33039ec7b004SRick Macklem 	/*
33049ec7b004SRick Macklem 	 * Now, map the user credentials.
33059ec7b004SRick Macklem 	 * (Note that ND_AUTHNONE will only be set for an NFSv3
33069ec7b004SRick Macklem 	 *  Fsinfo RPC. If set for anything else, this code might need
33079ec7b004SRick Macklem 	 *  to change.)
33089ec7b004SRick Macklem 	 */
330984be7e09SRick Macklem 	if (NFSVNO_EXPORTED(exp)) {
331084be7e09SRick Macklem 		if (((nd->nd_flag & ND_GSS) == 0 && nd->nd_cred->cr_uid == 0) ||
33119ec7b004SRick Macklem 		     NFSVNO_EXPORTANON(exp) ||
331284be7e09SRick Macklem 		     (nd->nd_flag & ND_AUTHNONE) != 0) {
33139ec7b004SRick Macklem 			nd->nd_cred->cr_uid = credanon->cr_uid;
3314*cfbe7a62SOlivier Certner 			/*
3315*cfbe7a62SOlivier Certner 			 * 'credanon' is already a 'struct ucred' that was built
3316*cfbe7a62SOlivier Certner 			 * internally with calls to crsetgroups_fallback(), so
3317*cfbe7a62SOlivier Certner 			 * we don't need a fallback here.
3318*cfbe7a62SOlivier Certner 			 */
3319838d9858SBrooks Davis 			crsetgroups(nd->nd_cred, credanon->cr_ngroups,
3320838d9858SBrooks Davis 			    credanon->cr_groups);
332184be7e09SRick Macklem 		} else if ((nd->nd_flag & ND_GSS) == 0) {
332284be7e09SRick Macklem 			/*
332384be7e09SRick Macklem 			 * If using AUTH_SYS, call nfsrv_getgrpscred() to see
332484be7e09SRick Macklem 			 * if there is a replacement credential with a group
332584be7e09SRick Macklem 			 * list set up by "nfsuserd -manage-gids".
332684be7e09SRick Macklem 			 * If there is no replacement, nfsrv_getgrpscred()
332784be7e09SRick Macklem 			 * simply returns its argument.
332884be7e09SRick Macklem 			 */
332984be7e09SRick Macklem 			nd->nd_cred = nfsrv_getgrpscred(nd->nd_cred);
333084be7e09SRick Macklem 		}
33319ec7b004SRick Macklem 	}
3332a9285ae5SZack Kirsch 
3333a9285ae5SZack Kirsch out:
3334a9285ae5SZack Kirsch 	NFSEXITCODE2(error, nd);
3335a9285ae5SZack Kirsch 	return (error);
33369ec7b004SRick Macklem }
33379ec7b004SRick Macklem 
33389ec7b004SRick Macklem /*
33399ec7b004SRick Macklem  * Check exports.
33409ec7b004SRick Macklem  */
33419ec7b004SRick Macklem int
nfsvno_checkexp(struct mount * mp,struct sockaddr * nam,struct nfsexstuff * exp,struct ucred ** credp)33429ec7b004SRick Macklem nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp,
33439ec7b004SRick Macklem     struct ucred **credp)
33449ec7b004SRick Macklem {
33451f7104d7SRick Macklem 	int error;
33469ec7b004SRick Macklem 
334788175af8SRick Macklem 	error = 0;
334888175af8SRick Macklem 	*credp = NULL;
334988175af8SRick Macklem 	MNT_ILOCK(mp);
335088175af8SRick Macklem 	if (mp->mnt_exjail == NULL ||
335188175af8SRick Macklem 	    mp->mnt_exjail->cr_prison != curthread->td_ucred->cr_prison)
335288175af8SRick Macklem 		error = EACCES;
335388175af8SRick Macklem 	MNT_IUNLOCK(mp);
335488175af8SRick Macklem 	if (error == 0)
33559ec7b004SRick Macklem 		error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
33561f7104d7SRick Macklem 		    &exp->nes_numsecflavor, exp->nes_secflavors);
335798ad4453SRick Macklem 	if (error) {
33587e44856eSRick Macklem 		if (NFSD_VNET(nfs_rootfhset)) {
33599ec7b004SRick Macklem 			exp->nes_exflag = 0;
336098ad4453SRick Macklem 			exp->nes_numsecflavor = 0;
33619ec7b004SRick Macklem 			error = 0;
33629ec7b004SRick Macklem 		}
336382164bddSRick Macklem 	} else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor >
336482164bddSRick Macklem 	    MAXSECFLAVORS) {
336582164bddSRick Macklem 		printf("nfsvno_checkexp: numsecflavors out of range\n");
336682164bddSRick Macklem 		exp->nes_numsecflavor = 0;
336782164bddSRick Macklem 		error = EACCES;
336898ad4453SRick Macklem 	}
3369a9285ae5SZack Kirsch 	NFSEXITCODE(error);
33709ec7b004SRick Macklem 	return (error);
33719ec7b004SRick Macklem }
33729ec7b004SRick Macklem 
33739ec7b004SRick Macklem /*
33749ec7b004SRick Macklem  * Get a vnode for a file handle and export stuff.
33759ec7b004SRick Macklem  */
33769ec7b004SRick Macklem int
nfsvno_fhtovp(struct mount * mp,fhandle_t * fhp,struct sockaddr * nam,int lktype,struct vnode ** vpp,struct nfsexstuff * exp,struct ucred ** credp)33779ec7b004SRick Macklem nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam,
337817891d00SRick Macklem     int lktype, struct vnode **vpp, struct nfsexstuff *exp,
337917891d00SRick Macklem     struct ucred **credp)
33809ec7b004SRick Macklem {
33811f7104d7SRick Macklem 	int error;
33829ec7b004SRick Macklem 
338370839889SRick Macklem 	*credp = NULL;
338498ad4453SRick Macklem 	exp->nes_numsecflavor = 0;
33856854d648SRick Macklem 	error = VFS_FHTOVP(mp, &fhp->fh_fid, lktype, vpp);
33867482701cSRick Macklem 	if (error != 0)
33877482701cSRick Macklem 		/* Make sure the server replies ESTALE to the client. */
33887482701cSRick Macklem 		error = ESTALE;
33899ec7b004SRick Macklem 	if (nam && !error) {
339088175af8SRick Macklem 		MNT_ILOCK(mp);
339188175af8SRick Macklem 		if (mp->mnt_exjail == NULL ||
339288175af8SRick Macklem 		    mp->mnt_exjail->cr_prison != curthread->td_ucred->cr_prison)
339388175af8SRick Macklem 			error = EACCES;
339488175af8SRick Macklem 		MNT_IUNLOCK(mp);
339588175af8SRick Macklem 		if (error == 0)
33969ec7b004SRick Macklem 			error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
33971f7104d7SRick Macklem 			    &exp->nes_numsecflavor, exp->nes_secflavors);
33989ec7b004SRick Macklem 		if (error) {
33997e44856eSRick Macklem 			if (NFSD_VNET(nfs_rootfhset)) {
34009ec7b004SRick Macklem 				exp->nes_exflag = 0;
340198ad4453SRick Macklem 				exp->nes_numsecflavor = 0;
34029ec7b004SRick Macklem 				error = 0;
34039ec7b004SRick Macklem 			} else {
34049ec7b004SRick Macklem 				vput(*vpp);
34059ec7b004SRick Macklem 			}
340682164bddSRick Macklem 		} else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor >
340782164bddSRick Macklem 		    MAXSECFLAVORS) {
340882164bddSRick Macklem 			printf("nfsvno_fhtovp: numsecflavors out of range\n");
340982164bddSRick Macklem 			exp->nes_numsecflavor = 0;
341082164bddSRick Macklem 			error = EACCES;
341182164bddSRick Macklem 			vput(*vpp);
34129ec7b004SRick Macklem 		}
34139ec7b004SRick Macklem 	}
3414a9285ae5SZack Kirsch 	NFSEXITCODE(error);
34159ec7b004SRick Macklem 	return (error);
34169ec7b004SRick Macklem }
34179ec7b004SRick Macklem 
34189ec7b004SRick Macklem /*
34199ec7b004SRick Macklem  * nfsd_fhtovp() - convert a fh to a vnode ptr
34209ec7b004SRick Macklem  * 	- look up fsid in mount list (if not found ret error)
34219ec7b004SRick Macklem  *	- get vp and export rights by calling nfsvno_fhtovp()
34229ec7b004SRick Macklem  *	- if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
34239ec7b004SRick Macklem  *	  for AUTH_SYS
34248974bc2fSRick Macklem  *	- if mpp != NULL, return the mount point so that it can
34258974bc2fSRick Macklem  *	  be used for vn_finished_write() by the caller
34269ec7b004SRick Macklem  */
34279ec7b004SRick Macklem void
nfsd_fhtovp(struct nfsrv_descript * nd,struct nfsrvfh * nfp,int lktype,struct vnode ** vpp,struct nfsexstuff * exp,struct mount ** mpp,int startwrite,int nextop)342817891d00SRick Macklem nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype,
34299ec7b004SRick Macklem     struct vnode **vpp, struct nfsexstuff *exp,
3430a5df139eSRick Macklem     struct mount **mpp, int startwrite, int nextop)
34319ec7b004SRick Macklem {
343251a9b978SKonstantin Belousov 	struct mount *mp, *mpw;
34339ec7b004SRick Macklem 	struct ucred *credanon;
34349ec7b004SRick Macklem 	fhandle_t *fhp;
343551a9b978SKonstantin Belousov 	int error;
34369ec7b004SRick Macklem 
34378974bc2fSRick Macklem 	if (mpp != NULL)
343851a9b978SKonstantin Belousov 		*mpp = NULL;
34399ec7b004SRick Macklem 	*vpp = NULL;
344051a9b978SKonstantin Belousov 	fhp = (fhandle_t *)nfp->nfsrvfh_data;
344151a9b978SKonstantin Belousov 	mp = vfs_busyfs(&fhp->fh_fsid);
344251a9b978SKonstantin Belousov 	if (mp == NULL) {
34439ec7b004SRick Macklem 		nd->nd_repstat = ESTALE;
3444a9285ae5SZack Kirsch 		goto out;
34459ec7b004SRick Macklem 	}
34469ec7b004SRick Macklem 
3447d96b98a3SKenneth D. Merry 	if (startwrite) {
344851a9b978SKonstantin Belousov 		mpw = mp;
344951a9b978SKonstantin Belousov 		error = vn_start_write(NULL, &mpw, V_WAIT);
345051a9b978SKonstantin Belousov 		if (error != 0) {
345151a9b978SKonstantin Belousov 			mpw = NULL;
345251a9b978SKonstantin Belousov 			vfs_unbusy(mp);
345351a9b978SKonstantin Belousov 			nd->nd_repstat = ESTALE;
345451a9b978SKonstantin Belousov 			goto out;
345551a9b978SKonstantin Belousov 		}
3456d96b98a3SKenneth D. Merry 		if (lktype == LK_SHARED && !(MNT_SHARED_WRITES(mp)))
3457d96b98a3SKenneth D. Merry 			lktype = LK_EXCLUSIVE;
345851a9b978SKonstantin Belousov 	} else
345951a9b978SKonstantin Belousov 		mpw = NULL;
346051a9b978SKonstantin Belousov 
346117891d00SRick Macklem 	nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, lktype, vpp, exp,
34629ec7b004SRick Macklem 	    &credanon);
346347524363SRick Macklem 	vfs_unbusy(mp);
34649ec7b004SRick Macklem 
34659ec7b004SRick Macklem 	/*
34669ec7b004SRick Macklem 	 * For NFSv4 without a pseudo root fs, unexported file handles
34679ec7b004SRick Macklem 	 * can be returned, so that Lookup works everywhere.
34689ec7b004SRick Macklem 	 */
34699ec7b004SRick Macklem 	if (!nd->nd_repstat && exp->nes_exflag == 0 &&
34709ec7b004SRick Macklem 	    !(nd->nd_flag & ND_NFSV4)) {
34719ec7b004SRick Macklem 		vput(*vpp);
347251a9b978SKonstantin Belousov 		*vpp = NULL;
34739ec7b004SRick Macklem 		nd->nd_repstat = EACCES;
34749ec7b004SRick Macklem 	}
34759ec7b004SRick Macklem 
34769ec7b004SRick Macklem 	/*
34779ec7b004SRick Macklem 	 * Personally, I've never seen any point in requiring a
34789ec7b004SRick Macklem 	 * reserved port#, since only in the rare case where the
3479a96c9b30SPedro F. Giffuni 	 * clients are all boxes with secure system privileges,
34809ec7b004SRick Macklem 	 * does it provide any enhanced security, but... some people
34819ec7b004SRick Macklem 	 * believe it to be useful and keep putting this code back in.
34829ec7b004SRick Macklem 	 * (There is also some "security checker" out there that
34839ec7b004SRick Macklem 	 *  complains if the nfs server doesn't enforce this.)
34849ec7b004SRick Macklem 	 * However, note the following:
34859ec7b004SRick Macklem 	 * RFC3530 (NFSv4) specifies that a reserved port# not be
34869ec7b004SRick Macklem 	 *	required.
34879ec7b004SRick Macklem 	 * RFC2623 recommends that, if a reserved port# is checked for,
34889ec7b004SRick Macklem 	 *	that there be a way to turn that off--> ifdef'd.
34899ec7b004SRick Macklem 	 */
34909ec7b004SRick Macklem #ifdef NFS_REQRSVPORT
34919ec7b004SRick Macklem 	if (!nd->nd_repstat) {
34929ec7b004SRick Macklem 		struct sockaddr_in *saddr;
34939ec7b004SRick Macklem 		struct sockaddr_in6 *saddr6;
349498ad4453SRick Macklem 
34959ec7b004SRick Macklem 		saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
34969ec7b004SRick Macklem 		saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *);
34979ec7b004SRick Macklem 		if (!(nd->nd_flag & ND_NFSV4) &&
34989ec7b004SRick Macklem 		    ((saddr->sin_family == AF_INET &&
34999ec7b004SRick Macklem 		      ntohs(saddr->sin_port) >= IPPORT_RESERVED) ||
35009ec7b004SRick Macklem 		     (saddr6->sin6_family == AF_INET6 &&
35019ec7b004SRick Macklem 		      ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) {
35029ec7b004SRick Macklem 			vput(*vpp);
35039ec7b004SRick Macklem 			nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
35049ec7b004SRick Macklem 		}
35059ec7b004SRick Macklem 	}
35069ec7b004SRick Macklem #endif	/* NFS_REQRSVPORT */
35079ec7b004SRick Macklem 
35089ec7b004SRick Macklem 	/*
35099ec7b004SRick Macklem 	 * Check/setup credentials.
35109ec7b004SRick Macklem 	 */
35119ec7b004SRick Macklem 	if (!nd->nd_repstat) {
35129ec7b004SRick Macklem 		nd->nd_saveduid = nd->nd_cred->cr_uid;
3513a5df139eSRick Macklem 		nd->nd_repstat = nfsd_excred(nd, exp, credanon,
3514a5df139eSRick Macklem 		    nfsrv_checkwrongsec(nd, nextop, (*vpp)->v_type));
35159ec7b004SRick Macklem 		if (nd->nd_repstat)
35169ec7b004SRick Macklem 			vput(*vpp);
35179ec7b004SRick Macklem 	}
35185679fe19SAlexander Kabaev 	if (credanon != NULL)
35195679fe19SAlexander Kabaev 		crfree(credanon);
35209ec7b004SRick Macklem 	if (nd->nd_repstat) {
352151a9b978SKonstantin Belousov 		vn_finished_write(mpw);
35229ec7b004SRick Macklem 		*vpp = NULL;
352351a9b978SKonstantin Belousov 	} else if (mpp != NULL) {
352451a9b978SKonstantin Belousov 		*mpp = mpw;
35259ec7b004SRick Macklem 	}
3526a9285ae5SZack Kirsch 
3527a9285ae5SZack Kirsch out:
3528a9285ae5SZack Kirsch 	NFSEXITCODE2(0, nd);
35299ec7b004SRick Macklem }
35309ec7b004SRick Macklem 
35319ec7b004SRick Macklem /*
35329ec7b004SRick Macklem  * glue for fp.
35339ec7b004SRick Macklem  */
35342609222aSPawel Jakub Dawidek static int
fp_getfvp(struct thread * p,int fd,struct file ** fpp,struct vnode ** vpp)35359ec7b004SRick Macklem fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp)
35369ec7b004SRick Macklem {
35379ec7b004SRick Macklem 	struct filedesc *fdp;
35389ec7b004SRick Macklem 	struct file *fp;
3539a9285ae5SZack Kirsch 	int error = 0;
35409ec7b004SRick Macklem 
35419ec7b004SRick Macklem 	fdp = p->td_proc->p_fd;
35422609222aSPawel Jakub Dawidek 	if (fd < 0 || fd >= fdp->fd_nfiles ||
35432609222aSPawel Jakub Dawidek 	    (fp = fdp->fd_ofiles[fd].fde_file) == NULL) {
3544a9285ae5SZack Kirsch 		error = EBADF;
3545a9285ae5SZack Kirsch 		goto out;
3546a9285ae5SZack Kirsch 	}
35479ec7b004SRick Macklem 	*fpp = fp;
3548a9285ae5SZack Kirsch 
3549a9285ae5SZack Kirsch out:
3550a9285ae5SZack Kirsch 	NFSEXITCODE(error);
3551a9285ae5SZack Kirsch 	return (error);
35529ec7b004SRick Macklem }
35539ec7b004SRick Macklem 
35549ec7b004SRick Macklem /*
355598ad4453SRick Macklem  * Called from nfssvc() to update the exports list. Just call
35569ec7b004SRick Macklem  * vfs_export(). This has to be done, since the v4 root fake fs isn't
35579ec7b004SRick Macklem  * in the mount list.
35589ec7b004SRick Macklem  */
35599ec7b004SRick Macklem int
nfsrv_v4rootexport(void * argp,struct ucred * cred,struct thread * p)35609ec7b004SRick Macklem nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p)
35619ec7b004SRick Macklem {
35629ec7b004SRick Macklem 	struct nfsex_args *nfsexargp = (struct nfsex_args *)argp;
3563a9285ae5SZack Kirsch 	int error = 0;
35649ec7b004SRick Macklem 	struct nameidata nd;
35659ec7b004SRick Macklem 	fhandle_t fh;
35669ec7b004SRick Macklem 
356788175af8SRick Macklem 	error = vfs_export(NFSD_VNET(nfsv4root_mnt), &nfsexargp->export, false);
3568c9aad40fSRick Macklem 	if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0)
35697e44856eSRick Macklem 		NFSD_VNET(nfs_rootfhset) = 0;
3570c9aad40fSRick Macklem 	else if (error == 0) {
3571a9285ae5SZack Kirsch 		if (nfsexargp->fspec == NULL) {
3572a9285ae5SZack Kirsch 			error = EPERM;
3573a9285ae5SZack Kirsch 			goto out;
3574a9285ae5SZack Kirsch 		}
35759ec7b004SRick Macklem 		/*
35769ec7b004SRick Macklem 		 * If fspec != NULL, this is the v4root path.
35779ec7b004SRick Macklem 		 */
35787e1d3eefSMateusz Guzik 		NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, nfsexargp->fspec);
35799ec7b004SRick Macklem 		if ((error = namei(&nd)) != 0)
3580a9285ae5SZack Kirsch 			goto out;
35819ec7b004SRick Macklem 		error = nfsvno_getfh(nd.ni_vp, &fh, p);
35829ec7b004SRick Macklem 		vrele(nd.ni_vp);
35839ec7b004SRick Macklem 		if (!error) {
35847e44856eSRick Macklem 			NFSD_VNET(nfs_rootfh).nfsrvfh_len = NFSX_MYFH;
35859ec7b004SRick Macklem 			NFSBCOPY((caddr_t)&fh,
35867e44856eSRick Macklem 			    NFSD_VNET(nfs_rootfh).nfsrvfh_data,
35879ec7b004SRick Macklem 			    sizeof (fhandle_t));
35887e44856eSRick Macklem 			NFSD_VNET(nfs_rootfhset) = 1;
35899ec7b004SRick Macklem 		}
35909ec7b004SRick Macklem 	}
3591a9285ae5SZack Kirsch 
3592a9285ae5SZack Kirsch out:
3593a9285ae5SZack Kirsch 	NFSEXITCODE(error);
35949ec7b004SRick Macklem 	return (error);
35959ec7b004SRick Macklem }
35969ec7b004SRick Macklem 
35979ec7b004SRick Macklem /*
35989ec7b004SRick Macklem  * This function needs to test to see if the system is near its limit
35999ec7b004SRick Macklem  * for memory allocation via malloc() or mget() and return True iff
36009ec7b004SRick Macklem  * either of these resources are near their limit.
36019ec7b004SRick Macklem  * XXX (For now, this is just a stub.)
36029ec7b004SRick Macklem  */
36039ec7b004SRick Macklem int nfsrv_testmalloclimit = 0;
36049ec7b004SRick Macklem int
nfsrv_mallocmget_limit(void)36059ec7b004SRick Macklem nfsrv_mallocmget_limit(void)
36069ec7b004SRick Macklem {
36079ec7b004SRick Macklem 	static int printmesg = 0;
36089ec7b004SRick Macklem 	static int testval = 1;
36099ec7b004SRick Macklem 
36109ec7b004SRick Macklem 	if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) {
36119ec7b004SRick Macklem 		if ((printmesg++ % 100) == 0)
36129ec7b004SRick Macklem 			printf("nfsd: malloc/mget near limit\n");
36139ec7b004SRick Macklem 		return (1);
36149ec7b004SRick Macklem 	}
36159ec7b004SRick Macklem 	return (0);
36169ec7b004SRick Macklem }
36179ec7b004SRick Macklem 
36189ec7b004SRick Macklem /*
36199ec7b004SRick Macklem  * BSD specific initialization of a mount point.
36209ec7b004SRick Macklem  */
36219ec7b004SRick Macklem void
nfsd_mntinit(void)36229ec7b004SRick Macklem nfsd_mntinit(void)
36239ec7b004SRick Macklem {
36249ec7b004SRick Macklem 
36257e44856eSRick Macklem 	NFSD_LOCK();
36267e44856eSRick Macklem 	if (NFSD_VNET(nfsrv_mntinited)) {
36277e44856eSRick Macklem 		NFSD_UNLOCK();
36289ec7b004SRick Macklem 		return;
36297e44856eSRick Macklem 	}
36307e44856eSRick Macklem 	NFSD_VNET(nfsrv_mntinited) = true;
3631fcfdb76eSRick Macklem 	nfsrvd_init(0);
36327e44856eSRick Macklem 	NFSD_UNLOCK();
36337e44856eSRick Macklem 
36347e44856eSRick Macklem 	NFSD_VNET(nfsv4root_mnt) = malloc(sizeof(struct mount), M_TEMP,
36357e44856eSRick Macklem 	    M_WAITOK | M_ZERO);
36367e44856eSRick Macklem 	NFSD_VNET(nfsv4root_mnt)->mnt_flag = (MNT_RDONLY | MNT_EXPORTED);
36377e44856eSRick Macklem 	mtx_init(&NFSD_VNET(nfsv4root_mnt)->mnt_mtx, "nfs4mnt", NULL, MTX_DEF);
36387e44856eSRick Macklem 	lockinit(&NFSD_VNET(nfsv4root_mnt)->mnt_explock, PVFS, "explock", 0, 0);
36397e44856eSRick Macklem 	TAILQ_INIT(&NFSD_VNET(nfsv4root_mnt)->mnt_nvnodelist);
36407e44856eSRick Macklem 	TAILQ_INIT(&NFSD_VNET(nfsv4root_mnt)->mnt_lazyvnodelist);
36417e44856eSRick Macklem 	NFSD_VNET(nfsv4root_mnt)->mnt_export = NULL;
36427e44856eSRick Macklem 	TAILQ_INIT(&NFSD_VNET(nfsv4root_opt));
36437e44856eSRick Macklem 	TAILQ_INIT(&NFSD_VNET(nfsv4root_newopt));
36447e44856eSRick Macklem 	NFSD_VNET(nfsv4root_mnt)->mnt_opt = &NFSD_VNET(nfsv4root_opt);
36457e44856eSRick Macklem 	NFSD_VNET(nfsv4root_mnt)->mnt_optnew = &NFSD_VNET(nfsv4root_newopt);
36467e44856eSRick Macklem 	NFSD_VNET(nfsv4root_mnt)->mnt_nvnodelistsize = 0;
36477e44856eSRick Macklem 	NFSD_VNET(nfsv4root_mnt)->mnt_lazyvnodelistsize = 0;
36487e44856eSRick Macklem 	callout_init(&NFSD_VNET(nfsd_callout), 1);
36497e44856eSRick Macklem 
36507e44856eSRick Macklem 	nfsrvd_initcache();
36517e44856eSRick Macklem 	nfsd_init();
36529ec7b004SRick Macklem }
36539ec7b004SRick Macklem 
36543455c738SAlexander Motin static void
nfsd_timer(void * arg)36553455c738SAlexander Motin nfsd_timer(void *arg)
36563455c738SAlexander Motin {
36577e44856eSRick Macklem 	struct vnet *vnetp;
36583455c738SAlexander Motin 
36597e44856eSRick Macklem 	vnetp = (struct vnet *)arg;
36607e44856eSRick Macklem 	NFSD_CURVNET_SET_QUIET(vnetp);
36617e44856eSRick Macklem 	nfsrv_servertimer(vnetp);
36627e44856eSRick Macklem 	callout_reset_sbt(&NFSD_VNET(nfsd_callout), SBT_1S, SBT_1S, nfsd_timer,
36637e44856eSRick Macklem 	    arg, 0);
36647e44856eSRick Macklem 	NFSD_CURVNET_RESTORE();
36653455c738SAlexander Motin }
36663455c738SAlexander Motin 
36679ec7b004SRick Macklem /*
36689ec7b004SRick Macklem  * Get a vnode for a file handle, without checking exports, etc.
36699ec7b004SRick Macklem  */
36709ec7b004SRick Macklem struct vnode *
nfsvno_getvp(fhandle_t * fhp)36719ec7b004SRick Macklem nfsvno_getvp(fhandle_t *fhp)
36729ec7b004SRick Macklem {
36739ec7b004SRick Macklem 	struct mount *mp;
36749ec7b004SRick Macklem 	struct vnode *vp;
36759ec7b004SRick Macklem 	int error;
36769ec7b004SRick Macklem 
367747524363SRick Macklem 	mp = vfs_busyfs(&fhp->fh_fsid);
36789ec7b004SRick Macklem 	if (mp == NULL)
36799ec7b004SRick Macklem 		return (NULL);
3680694a586aSRick Macklem 	error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, &vp);
368147524363SRick Macklem 	vfs_unbusy(mp);
36829ec7b004SRick Macklem 	if (error)
36839ec7b004SRick Macklem 		return (NULL);
36849ec7b004SRick Macklem 	return (vp);
36859ec7b004SRick Macklem }
36869ec7b004SRick Macklem 
36879ec7b004SRick Macklem /*
36889ec7b004SRick Macklem  * Do a local VOP_ADVLOCK().
36899ec7b004SRick Macklem  */
36909ec7b004SRick Macklem int
nfsvno_advlock(struct vnode * vp,int ftype,u_int64_t first,u_int64_t end,struct thread * td)36919ec7b004SRick Macklem nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first,
36922c1b26b9SRick Macklem     u_int64_t end, struct thread *td)
36939ec7b004SRick Macklem {
3694a9285ae5SZack Kirsch 	int error = 0;
36959ec7b004SRick Macklem 	struct flock fl;
36969ec7b004SRick Macklem 	u_int64_t tlen;
36979ec7b004SRick Macklem 
3698c7aafc24SRick Macklem 	if (nfsrv_dolocallocks == 0)
3699a9285ae5SZack Kirsch 		goto out;
370052f1bb38SRick Macklem 	ASSERT_VOP_UNLOCKED(vp, "nfsvno_advlock: vp locked");
3701629fa50eSRick Macklem 
37029ec7b004SRick Macklem 	fl.l_whence = SEEK_SET;
37039ec7b004SRick Macklem 	fl.l_type = ftype;
37049ec7b004SRick Macklem 	fl.l_start = (off_t)first;
37059ec7b004SRick Macklem 	if (end == NFS64BITSSET) {
37069ec7b004SRick Macklem 		fl.l_len = 0;
37079ec7b004SRick Macklem 	} else {
37089ec7b004SRick Macklem 		tlen = end - first;
37099ec7b004SRick Macklem 		fl.l_len = (off_t)tlen;
37109ec7b004SRick Macklem 	}
37119ec7b004SRick Macklem 	/*
37122c1b26b9SRick Macklem 	 * For FreeBSD8, the l_pid and l_sysid must be set to the same
37132c1b26b9SRick Macklem 	 * values for all calls, so that all locks will be held by the
37142c1b26b9SRick Macklem 	 * nfsd server. (The nfsd server handles conflicts between the
37152c1b26b9SRick Macklem 	 * various clients.)
37162c1b26b9SRick Macklem 	 * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024
37172c1b26b9SRick Macklem 	 * bytes, so it can't be put in l_sysid.
37189ec7b004SRick Macklem 	 */
3719b839e625SRick Macklem 	if (nfsv4_sysid == 0)
3720b839e625SRick Macklem 		nfsv4_sysid = nlm_acquire_next_sysid();
37212c1b26b9SRick Macklem 	fl.l_pid = (pid_t)0;
3722b839e625SRick Macklem 	fl.l_sysid = (int)nfsv4_sysid;
37232c1b26b9SRick Macklem 
3724c7aafc24SRick Macklem 	if (ftype == F_UNLCK)
3725c7aafc24SRick Macklem 		error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl,
3726c7aafc24SRick Macklem 		    (F_POSIX | F_REMOTE));
3727c7aafc24SRick Macklem 	else
37282c1b26b9SRick Macklem 		error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl,
37292c1b26b9SRick Macklem 		    (F_POSIX | F_REMOTE));
3730a9285ae5SZack Kirsch 
3731a9285ae5SZack Kirsch out:
3732a9285ae5SZack Kirsch 	NFSEXITCODE(error);
37339ec7b004SRick Macklem 	return (error);
37349ec7b004SRick Macklem }
37359ec7b004SRick Macklem 
37369ec7b004SRick Macklem /*
37379ec7b004SRick Macklem  * Check the nfsv4 root exports.
37389ec7b004SRick Macklem  */
37399ec7b004SRick Macklem int
nfsvno_v4rootexport(struct nfsrv_descript * nd)37409ec7b004SRick Macklem nfsvno_v4rootexport(struct nfsrv_descript *nd)
37419ec7b004SRick Macklem {
37429ec7b004SRick Macklem 	struct ucred *credanon;
37431f7104d7SRick Macklem 	int error = 0, numsecflavor, secflavors[MAXSECFLAVORS], i;
37441f7104d7SRick Macklem 	uint64_t exflags;
37459ec7b004SRick Macklem 
37467e44856eSRick Macklem 	error = vfs_stdcheckexp(NFSD_VNET(nfsv4root_mnt), nd->nd_nam, &exflags,
37471f7104d7SRick Macklem 	    &credanon, &numsecflavor, secflavors);
3748a9285ae5SZack Kirsch 	if (error) {
3749a9285ae5SZack Kirsch 		error = NFSERR_PROGUNAVAIL;
3750a9285ae5SZack Kirsch 		goto out;
3751a9285ae5SZack Kirsch 	}
37525679fe19SAlexander Kabaev 	if (credanon != NULL)
37535679fe19SAlexander Kabaev 		crfree(credanon);
375498ad4453SRick Macklem 	for (i = 0; i < numsecflavor; i++) {
375598ad4453SRick Macklem 		if (secflavors[i] == AUTH_SYS)
375698ad4453SRick Macklem 			nd->nd_flag |= ND_EXAUTHSYS;
375798ad4453SRick Macklem 		else if (secflavors[i] == RPCSEC_GSS_KRB5)
375898ad4453SRick Macklem 			nd->nd_flag |= ND_EXGSS;
375998ad4453SRick Macklem 		else if (secflavors[i] == RPCSEC_GSS_KRB5I)
376098ad4453SRick Macklem 			nd->nd_flag |= ND_EXGSSINTEGRITY;
376198ad4453SRick Macklem 		else if (secflavors[i] == RPCSEC_GSS_KRB5P)
376298ad4453SRick Macklem 			nd->nd_flag |= ND_EXGSSPRIVACY;
376398ad4453SRick Macklem 	}
3764a9285ae5SZack Kirsch 
37656e4b6ff8SRick Macklem 	/* And set ND_EXxx flags for TLS. */
37666e4b6ff8SRick Macklem 	if ((exflags & MNT_EXTLS) != 0) {
37676e4b6ff8SRick Macklem 		nd->nd_flag |= ND_EXTLS;
37686e4b6ff8SRick Macklem 		if ((exflags & MNT_EXTLSCERT) != 0)
37696e4b6ff8SRick Macklem 			nd->nd_flag |= ND_EXTLSCERT;
37706e4b6ff8SRick Macklem 		if ((exflags & MNT_EXTLSCERTUSER) != 0)
37716e4b6ff8SRick Macklem 			nd->nd_flag |= ND_EXTLSCERTUSER;
37726e4b6ff8SRick Macklem 	}
37736e4b6ff8SRick Macklem 
3774a9285ae5SZack Kirsch out:
3775a9285ae5SZack Kirsch 	NFSEXITCODE(error);
3776a9285ae5SZack Kirsch 	return (error);
37779ec7b004SRick Macklem }
37789ec7b004SRick Macklem 
37799ec7b004SRick Macklem /*
3780a96c9b30SPedro F. Giffuni  * Nfs server pseudo system call for the nfsd's
37819ec7b004SRick Macklem  */
37829ec7b004SRick Macklem /*
37839ec7b004SRick Macklem  * MPSAFE
37849ec7b004SRick Macklem  */
37859ec7b004SRick Macklem static int
nfssvc_nfsd(struct thread * td,struct nfssvc_args * uap)37869ec7b004SRick Macklem nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap)
37879ec7b004SRick Macklem {
37889ec7b004SRick Macklem 	struct file *fp;
37897e745519SRick Macklem 	struct nfsd_addsock_args sockarg;
37907e745519SRick Macklem 	struct nfsd_nfsd_args nfsdarg;
379190d2dfabSRick Macklem 	struct nfsd_nfsd_oargs onfsdarg;
379290d2dfabSRick Macklem 	struct nfsd_pnfsd_args pnfsdarg;
379390d2dfabSRick Macklem 	struct vnode *vp, *nvp, *curdvp;
379490d2dfabSRick Macklem 	struct pnfsdsfile *pf;
379590d2dfabSRick Macklem 	struct nfsdevice *ds, *fds;
37967008be5bSPawel Jakub Dawidek 	cap_rights_t rights;
379790d2dfabSRick Macklem 	int buflen, error, ret;
379890d2dfabSRick Macklem 	char *buf, *cp, *cp2, *cp3;
379990d2dfabSRick Macklem 	char fname[PNFS_FILENAME_LEN + 1];
38009ec7b004SRick Macklem 
38017e44856eSRick Macklem 	NFSD_CURVNET_SET(NFSD_TD_TO_VNET(td));
38029ec7b004SRick Macklem 	if (uap->flag & NFSSVC_NFSDADDSOCK) {
38037e745519SRick Macklem 		error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg));
38049ec7b004SRick Macklem 		if (error)
3805a9285ae5SZack Kirsch 			goto out;
3806a9d2f8d8SRobert Watson 		/*
3807a9d2f8d8SRobert Watson 		 * Since we don't know what rights might be required,
3808a9d2f8d8SRobert Watson 		 * pretend that we need them all. It is better to be too
3809a9d2f8d8SRobert Watson 		 * careful than too reckless.
3810a9d2f8d8SRobert Watson 		 */
38117008be5bSPawel Jakub Dawidek 		error = fget(td, sockarg.sock,
38126b3a9a0fSMateusz Guzik 		    cap_rights_init_one(&rights, CAP_SOCK_SERVER), &fp);
38137008be5bSPawel Jakub Dawidek 		if (error != 0)
3814a9285ae5SZack Kirsch 			goto out;
38159ec7b004SRick Macklem 		if (fp->f_type != DTYPE_SOCKET) {
38169ec7b004SRick Macklem 			fdrop(fp, td);
3817a9285ae5SZack Kirsch 			error = EPERM;
3818a9285ae5SZack Kirsch 			goto out;
38199ec7b004SRick Macklem 		}
38209ec7b004SRick Macklem 		error = nfsrvd_addsock(fp);
38219ec7b004SRick Macklem 		fdrop(fp, td);
38229ec7b004SRick Macklem 	} else if (uap->flag & NFSSVC_NFSDNFSD) {
3823a9285ae5SZack Kirsch 		if (uap->argp == NULL) {
3824a9285ae5SZack Kirsch 			error = EINVAL;
3825a9285ae5SZack Kirsch 			goto out;
3826a9285ae5SZack Kirsch 		}
382790d2dfabSRick Macklem 		if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) {
382890d2dfabSRick Macklem 			error = copyin(uap->argp, &onfsdarg, sizeof(onfsdarg));
382990d2dfabSRick Macklem 			if (error == 0) {
383090d2dfabSRick Macklem 				nfsdarg.principal = onfsdarg.principal;
383190d2dfabSRick Macklem 				nfsdarg.minthreads = onfsdarg.minthreads;
383290d2dfabSRick Macklem 				nfsdarg.maxthreads = onfsdarg.maxthreads;
383390d2dfabSRick Macklem 				nfsdarg.version = 1;
383490d2dfabSRick Macklem 				nfsdarg.addr = NULL;
383590d2dfabSRick Macklem 				nfsdarg.addrlen = 0;
383690d2dfabSRick Macklem 				nfsdarg.dnshost = NULL;
383790d2dfabSRick Macklem 				nfsdarg.dnshostlen = 0;
38382f32675cSRick Macklem 				nfsdarg.dspath = NULL;
38392f32675cSRick Macklem 				nfsdarg.dspathlen = 0;
38402f32675cSRick Macklem 				nfsdarg.mdspath = NULL;
38412f32675cSRick Macklem 				nfsdarg.mdspathlen = 0;
384290d2dfabSRick Macklem 				nfsdarg.mirrorcnt = 1;
384390d2dfabSRick Macklem 			}
384490d2dfabSRick Macklem 		} else
384590d2dfabSRick Macklem 			error = copyin(uap->argp, &nfsdarg, sizeof(nfsdarg));
38467e745519SRick Macklem 		if (error)
3847a9285ae5SZack Kirsch 			goto out;
384890d2dfabSRick Macklem 		if (nfsdarg.addrlen > 0 && nfsdarg.addrlen < 10000 &&
384990d2dfabSRick Macklem 		    nfsdarg.dnshostlen > 0 && nfsdarg.dnshostlen < 10000 &&
385090d2dfabSRick Macklem 		    nfsdarg.dspathlen > 0 && nfsdarg.dspathlen < 10000 &&
38512f32675cSRick Macklem 		    nfsdarg.mdspathlen > 0 && nfsdarg.mdspathlen < 10000 &&
385290d2dfabSRick Macklem 		    nfsdarg.mirrorcnt >= 1 &&
385390d2dfabSRick Macklem 		    nfsdarg.mirrorcnt <= NFSDEV_MAXMIRRORS &&
385490d2dfabSRick Macklem 		    nfsdarg.addr != NULL && nfsdarg.dnshost != NULL &&
38552f32675cSRick Macklem 		    nfsdarg.dspath != NULL && nfsdarg.mdspath != NULL) {
385690d2dfabSRick Macklem 			NFSD_DEBUG(1, "addrlen=%d dspathlen=%d dnslen=%d"
38572f32675cSRick Macklem 			    " mdspathlen=%d mirrorcnt=%d\n", nfsdarg.addrlen,
385890d2dfabSRick Macklem 			    nfsdarg.dspathlen, nfsdarg.dnshostlen,
38592f32675cSRick Macklem 			    nfsdarg.mdspathlen, nfsdarg.mirrorcnt);
386090d2dfabSRick Macklem 			cp = malloc(nfsdarg.addrlen + 1, M_TEMP, M_WAITOK);
386190d2dfabSRick Macklem 			error = copyin(nfsdarg.addr, cp, nfsdarg.addrlen);
386290d2dfabSRick Macklem 			if (error != 0) {
386390d2dfabSRick Macklem 				free(cp, M_TEMP);
386490d2dfabSRick Macklem 				goto out;
386590d2dfabSRick Macklem 			}
386690d2dfabSRick Macklem 			cp[nfsdarg.addrlen] = '\0';	/* Ensure nul term. */
386790d2dfabSRick Macklem 			nfsdarg.addr = cp;
386890d2dfabSRick Macklem 			cp = malloc(nfsdarg.dnshostlen + 1, M_TEMP, M_WAITOK);
386990d2dfabSRick Macklem 			error = copyin(nfsdarg.dnshost, cp, nfsdarg.dnshostlen);
387090d2dfabSRick Macklem 			if (error != 0) {
387190d2dfabSRick Macklem 				free(nfsdarg.addr, M_TEMP);
387290d2dfabSRick Macklem 				free(cp, M_TEMP);
387390d2dfabSRick Macklem 				goto out;
387490d2dfabSRick Macklem 			}
387590d2dfabSRick Macklem 			cp[nfsdarg.dnshostlen] = '\0';	/* Ensure nul term. */
387690d2dfabSRick Macklem 			nfsdarg.dnshost = cp;
387790d2dfabSRick Macklem 			cp = malloc(nfsdarg.dspathlen + 1, M_TEMP, M_WAITOK);
387890d2dfabSRick Macklem 			error = copyin(nfsdarg.dspath, cp, nfsdarg.dspathlen);
387990d2dfabSRick Macklem 			if (error != 0) {
388090d2dfabSRick Macklem 				free(nfsdarg.addr, M_TEMP);
388190d2dfabSRick Macklem 				free(nfsdarg.dnshost, M_TEMP);
388290d2dfabSRick Macklem 				free(cp, M_TEMP);
388390d2dfabSRick Macklem 				goto out;
388490d2dfabSRick Macklem 			}
388590d2dfabSRick Macklem 			cp[nfsdarg.dspathlen] = '\0';	/* Ensure nul term. */
388690d2dfabSRick Macklem 			nfsdarg.dspath = cp;
38872f32675cSRick Macklem 			cp = malloc(nfsdarg.mdspathlen + 1, M_TEMP, M_WAITOK);
38882f32675cSRick Macklem 			error = copyin(nfsdarg.mdspath, cp, nfsdarg.mdspathlen);
38892f32675cSRick Macklem 			if (error != 0) {
38902f32675cSRick Macklem 				free(nfsdarg.addr, M_TEMP);
38912f32675cSRick Macklem 				free(nfsdarg.dnshost, M_TEMP);
38922f32675cSRick Macklem 				free(nfsdarg.dspath, M_TEMP);
38932f32675cSRick Macklem 				free(cp, M_TEMP);
38942f32675cSRick Macklem 				goto out;
38952f32675cSRick Macklem 			}
38962f32675cSRick Macklem 			cp[nfsdarg.mdspathlen] = '\0';	/* Ensure nul term. */
38972f32675cSRick Macklem 			nfsdarg.mdspath = cp;
389890d2dfabSRick Macklem 		} else {
389990d2dfabSRick Macklem 			nfsdarg.addr = NULL;
390090d2dfabSRick Macklem 			nfsdarg.addrlen = 0;
390190d2dfabSRick Macklem 			nfsdarg.dnshost = NULL;
390290d2dfabSRick Macklem 			nfsdarg.dnshostlen = 0;
390390d2dfabSRick Macklem 			nfsdarg.dspath = NULL;
390490d2dfabSRick Macklem 			nfsdarg.dspathlen = 0;
39052f32675cSRick Macklem 			nfsdarg.mdspath = NULL;
39062f32675cSRick Macklem 			nfsdarg.mdspathlen = 0;
390790d2dfabSRick Macklem 			nfsdarg.mirrorcnt = 1;
390890d2dfabSRick Macklem 		}
39097e44856eSRick Macklem 		nfsd_timer(NFSD_TD_TO_VNET(td));
39107e745519SRick Macklem 		error = nfsrvd_nfsd(td, &nfsdarg);
39117e44856eSRick Macklem 		callout_drain(&NFSD_VNET(nfsd_callout));
391290d2dfabSRick Macklem 		free(nfsdarg.addr, M_TEMP);
391390d2dfabSRick Macklem 		free(nfsdarg.dnshost, M_TEMP);
391490d2dfabSRick Macklem 		free(nfsdarg.dspath, M_TEMP);
39152f32675cSRick Macklem 		free(nfsdarg.mdspath, M_TEMP);
391690d2dfabSRick Macklem 	} else if (uap->flag & NFSSVC_PNFSDS) {
391790d2dfabSRick Macklem 		error = copyin(uap->argp, &pnfsdarg, sizeof(pnfsdarg));
3918de9a1a70SRick Macklem 		if (error == 0 && (pnfsdarg.op == PNFSDOP_DELDSSERVER ||
3919de9a1a70SRick Macklem 		    pnfsdarg.op == PNFSDOP_FORCEDELDS)) {
392090d2dfabSRick Macklem 			cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK);
392190d2dfabSRick Macklem 			error = copyinstr(pnfsdarg.dspath, cp, PATH_MAX + 1,
392290d2dfabSRick Macklem 			    NULL);
392390d2dfabSRick Macklem 			if (error == 0)
3924de9a1a70SRick Macklem 				error = nfsrv_deldsserver(pnfsdarg.op, cp, td);
392590d2dfabSRick Macklem 			free(cp, M_TEMP);
392690d2dfabSRick Macklem 		} else if (error == 0 && pnfsdarg.op == PNFSDOP_COPYMR) {
392790d2dfabSRick Macklem 			cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK);
392890d2dfabSRick Macklem 			buflen = sizeof(*pf) * NFSDEV_MAXMIRRORS;
392990d2dfabSRick Macklem 			buf = malloc(buflen, M_TEMP, M_WAITOK);
393090d2dfabSRick Macklem 			error = copyinstr(pnfsdarg.mdspath, cp, PATH_MAX + 1,
393190d2dfabSRick Macklem 			    NULL);
393290d2dfabSRick Macklem 			NFSD_DEBUG(4, "pnfsdcopymr cp mdspath=%d\n", error);
393390d2dfabSRick Macklem 			if (error == 0 && pnfsdarg.dspath != NULL) {
393490d2dfabSRick Macklem 				cp2 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK);
393590d2dfabSRick Macklem 				error = copyinstr(pnfsdarg.dspath, cp2,
393690d2dfabSRick Macklem 				    PATH_MAX + 1, NULL);
393790d2dfabSRick Macklem 				NFSD_DEBUG(4, "pnfsdcopymr cp dspath=%d\n",
393890d2dfabSRick Macklem 				    error);
393990d2dfabSRick Macklem 			} else
394090d2dfabSRick Macklem 				cp2 = NULL;
394190d2dfabSRick Macklem 			if (error == 0 && pnfsdarg.curdspath != NULL) {
394290d2dfabSRick Macklem 				cp3 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK);
394390d2dfabSRick Macklem 				error = copyinstr(pnfsdarg.curdspath, cp3,
394490d2dfabSRick Macklem 				    PATH_MAX + 1, NULL);
394590d2dfabSRick Macklem 				NFSD_DEBUG(4, "pnfsdcopymr cp curdspath=%d\n",
394690d2dfabSRick Macklem 				    error);
394790d2dfabSRick Macklem 			} else
394890d2dfabSRick Macklem 				cp3 = NULL;
394990d2dfabSRick Macklem 			curdvp = NULL;
395090d2dfabSRick Macklem 			fds = NULL;
395190d2dfabSRick Macklem 			if (error == 0)
395290d2dfabSRick Macklem 				error = nfsrv_mdscopymr(cp, cp2, cp3, buf,
395390d2dfabSRick Macklem 				    &buflen, fname, td, &vp, &nvp, &pf, &ds,
395490d2dfabSRick Macklem 				    &fds);
395590d2dfabSRick Macklem 			NFSD_DEBUG(4, "nfsrv_mdscopymr=%d\n", error);
395690d2dfabSRick Macklem 			if (error == 0) {
395790d2dfabSRick Macklem 				if (pf->dsf_dir >= nfsrv_dsdirsize) {
395890d2dfabSRick Macklem 					printf("copymr: dsdir out of range\n");
395990d2dfabSRick Macklem 					pf->dsf_dir = 0;
396090d2dfabSRick Macklem 				}
396190d2dfabSRick Macklem 				NFSD_DEBUG(4, "copymr: buflen=%d\n", buflen);
396290d2dfabSRick Macklem 				error = nfsrv_copymr(vp, nvp,
396390d2dfabSRick Macklem 				    ds->nfsdev_dsdir[pf->dsf_dir], ds, pf,
396490d2dfabSRick Macklem 				    (struct pnfsdsfile *)buf,
396590d2dfabSRick Macklem 				    buflen / sizeof(*pf), td->td_ucred, td);
396690d2dfabSRick Macklem 				vput(vp);
396790d2dfabSRick Macklem 				vput(nvp);
396890d2dfabSRick Macklem 				if (fds != NULL && error == 0) {
396990d2dfabSRick Macklem 					curdvp = fds->nfsdev_dsdir[pf->dsf_dir];
397090d2dfabSRick Macklem 					ret = vn_lock(curdvp, LK_EXCLUSIVE);
397190d2dfabSRick Macklem 					if (ret == 0) {
397290d2dfabSRick Macklem 						nfsrv_dsremove(curdvp, fname,
397390d2dfabSRick Macklem 						    td->td_ucred, td);
3974b249ce48SMateusz Guzik 						NFSVOPUNLOCK(curdvp);
397590d2dfabSRick Macklem 					}
397690d2dfabSRick Macklem 				}
397790d2dfabSRick Macklem 				NFSD_DEBUG(4, "nfsrv_copymr=%d\n", error);
397890d2dfabSRick Macklem 			}
397990d2dfabSRick Macklem 			free(cp, M_TEMP);
398090d2dfabSRick Macklem 			free(cp2, M_TEMP);
398190d2dfabSRick Macklem 			free(cp3, M_TEMP);
398290d2dfabSRick Macklem 			free(buf, M_TEMP);
398390d2dfabSRick Macklem 		}
39849ec7b004SRick Macklem 	} else {
39859ec7b004SRick Macklem 		error = nfssvc_srvcall(td, uap, td->td_ucred);
39869ec7b004SRick Macklem 	}
3987a9285ae5SZack Kirsch 
3988a9285ae5SZack Kirsch out:
39897e44856eSRick Macklem 	NFSD_CURVNET_RESTORE();
3990a9285ae5SZack Kirsch 	NFSEXITCODE(error);
39919ec7b004SRick Macklem 	return (error);
39929ec7b004SRick Macklem }
39939ec7b004SRick Macklem 
39949ec7b004SRick Macklem static int
nfssvc_srvcall(struct thread * p,struct nfssvc_args * uap,struct ucred * cred)39959ec7b004SRick Macklem nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred)
39969ec7b004SRick Macklem {
39979ec7b004SRick Macklem 	struct nfsex_args export;
39981f7104d7SRick Macklem 	struct nfsex_oldargs oexp;
39999ec7b004SRick Macklem 	struct file *fp = NULL;
40001f7104d7SRick Macklem 	int stablefd, i, len;
40019ec7b004SRick Macklem 	struct nfsd_clid adminrevoke;
40029ec7b004SRick Macklem 	struct nfsd_dumplist dumplist;
40039ec7b004SRick Macklem 	struct nfsd_dumpclients *dumpclients;
40049ec7b004SRick Macklem 	struct nfsd_dumplocklist dumplocklist;
40059ec7b004SRick Macklem 	struct nfsd_dumplocks *dumplocks;
40069ec7b004SRick Macklem 	struct nameidata nd;
40079ec7b004SRick Macklem 	vnode_t vp;
40086001db29SRick Macklem 	int error = EINVAL, igotlock;
40095f73287aSRick Macklem 	struct proc *procp;
40101f7104d7SRick Macklem 	gid_t *grps;
40119ec7b004SRick Macklem 
40129ec7b004SRick Macklem 	if (uap->flag & NFSSVC_PUBLICFH) {
40139ec7b004SRick Macklem 		NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data,
40149ec7b004SRick Macklem 		    sizeof (fhandle_t));
40159ec7b004SRick Macklem 		error = copyin(uap->argp,
40169ec7b004SRick Macklem 		    &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t));
40179ec7b004SRick Macklem 		if (!error)
40189ec7b004SRick Macklem 			nfs_pubfhset = 1;
40191f7104d7SRick Macklem 	} else if ((uap->flag & (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) ==
40201f7104d7SRick Macklem 	    (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) {
40219ec7b004SRick Macklem 		error = copyin(uap->argp,(caddr_t)&export,
40229ec7b004SRick Macklem 		    sizeof (struct nfsex_args));
40231f7104d7SRick Macklem 		if (!error) {
40241f7104d7SRick Macklem 			grps = NULL;
40251f7104d7SRick Macklem 			if (export.export.ex_ngroups > NGROUPS_MAX ||
40261f7104d7SRick Macklem 			    export.export.ex_ngroups < 0)
40271f7104d7SRick Macklem 				error = EINVAL;
40281f7104d7SRick Macklem 			else if (export.export.ex_ngroups > 0) {
40291f7104d7SRick Macklem 				grps = malloc(export.export.ex_ngroups *
40301f7104d7SRick Macklem 				    sizeof(gid_t), M_TEMP, M_WAITOK);
40311f7104d7SRick Macklem 				error = copyin(export.export.ex_groups, grps,
40321f7104d7SRick Macklem 				    export.export.ex_ngroups * sizeof(gid_t));
40331f7104d7SRick Macklem 				export.export.ex_groups = grps;
40341f7104d7SRick Macklem 			} else
40351f7104d7SRick Macklem 				export.export.ex_groups = NULL;
40369ec7b004SRick Macklem 			if (!error)
40379ec7b004SRick Macklem 				error = nfsrv_v4rootexport(&export, cred, p);
40381f7104d7SRick Macklem 			free(grps, M_TEMP);
40391f7104d7SRick Macklem 		}
40401f7104d7SRick Macklem 	} else if ((uap->flag & (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) ==
40411f7104d7SRick Macklem 	    NFSSVC_V4ROOTEXPORT) {
40421f7104d7SRick Macklem 		error = copyin(uap->argp,(caddr_t)&oexp,
40431f7104d7SRick Macklem 		    sizeof (struct nfsex_oldargs));
40441f7104d7SRick Macklem 		if (!error) {
40451f7104d7SRick Macklem 			memset(&export.export, 0, sizeof(export.export));
40461f7104d7SRick Macklem 			export.export.ex_flags = (uint64_t)oexp.export.ex_flags;
40471f7104d7SRick Macklem 			export.export.ex_root = oexp.export.ex_root;
40481f7104d7SRick Macklem 			export.export.ex_uid = oexp.export.ex_anon.cr_uid;
40491f7104d7SRick Macklem 			export.export.ex_ngroups =
40501f7104d7SRick Macklem 			    oexp.export.ex_anon.cr_ngroups;
40511f7104d7SRick Macklem 			export.export.ex_groups = NULL;
40521f7104d7SRick Macklem 			if (export.export.ex_ngroups > XU_NGROUPS ||
40531f7104d7SRick Macklem 			    export.export.ex_ngroups < 0)
40541f7104d7SRick Macklem 				error = EINVAL;
40551f7104d7SRick Macklem 			else if (export.export.ex_ngroups > 0) {
40561f7104d7SRick Macklem 				export.export.ex_groups = malloc(
40571f7104d7SRick Macklem 				    export.export.ex_ngroups * sizeof(gid_t),
40581f7104d7SRick Macklem 				    M_TEMP, M_WAITOK);
40591f7104d7SRick Macklem 				for (i = 0; i < export.export.ex_ngroups; i++)
40601f7104d7SRick Macklem 					export.export.ex_groups[i] =
40611f7104d7SRick Macklem 					    oexp.export.ex_anon.cr_groups[i];
40621f7104d7SRick Macklem 			}
40631f7104d7SRick Macklem 			export.export.ex_addr = oexp.export.ex_addr;
40641f7104d7SRick Macklem 			export.export.ex_addrlen = oexp.export.ex_addrlen;
40651f7104d7SRick Macklem 			export.export.ex_mask = oexp.export.ex_mask;
40661f7104d7SRick Macklem 			export.export.ex_masklen = oexp.export.ex_masklen;
40671f7104d7SRick Macklem 			export.export.ex_indexfile = oexp.export.ex_indexfile;
40681f7104d7SRick Macklem 			export.export.ex_numsecflavors =
40691f7104d7SRick Macklem 			    oexp.export.ex_numsecflavors;
40701f7104d7SRick Macklem 			if (export.export.ex_numsecflavors >= MAXSECFLAVORS ||
40711f7104d7SRick Macklem 			    export.export.ex_numsecflavors < 0)
40721f7104d7SRick Macklem 				error = EINVAL;
40731f7104d7SRick Macklem 			else {
40741f7104d7SRick Macklem 				for (i = 0; i < export.export.ex_numsecflavors;
40751f7104d7SRick Macklem 				    i++)
40761f7104d7SRick Macklem 					export.export.ex_secflavors[i] =
40771f7104d7SRick Macklem 					    oexp.export.ex_secflavors[i];
40781f7104d7SRick Macklem 			}
40791f7104d7SRick Macklem 			export.fspec = oexp.fspec;
40801f7104d7SRick Macklem 			if (error == 0)
40811f7104d7SRick Macklem 				error = nfsrv_v4rootexport(&export, cred, p);
40821f7104d7SRick Macklem 			free(export.export.ex_groups, M_TEMP);
40831f7104d7SRick Macklem 		}
40849ec7b004SRick Macklem 	} else if (uap->flag & NFSSVC_NOPUBLICFH) {
40859ec7b004SRick Macklem 		nfs_pubfhset = 0;
40869ec7b004SRick Macklem 		error = 0;
40879ec7b004SRick Macklem 	} else if (uap->flag & NFSSVC_STABLERESTART) {
40889ec7b004SRick Macklem 		error = copyin(uap->argp, (caddr_t)&stablefd,
40899ec7b004SRick Macklem 		    sizeof (int));
40909ec7b004SRick Macklem 		if (!error)
40919ec7b004SRick Macklem 			error = fp_getfvp(p, stablefd, &fp, &vp);
40929ec7b004SRick Macklem 		if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE))
40939ec7b004SRick Macklem 			error = EBADF;
40947e44856eSRick Macklem 		if (!error && NFSD_VNET(nfsrv_numnfsd) != 0)
409510dff9daSRick Macklem 			error = ENXIO;
40969ec7b004SRick Macklem 		if (!error) {
40977e44856eSRick Macklem 			NFSD_VNET(nfsrv_stablefirst).nsf_fp = fp;
40989ec7b004SRick Macklem 			nfsrv_setupstable(p);
40999ec7b004SRick Macklem 		}
41009ec7b004SRick Macklem 	} else if (uap->flag & NFSSVC_ADMINREVOKE) {
41019ec7b004SRick Macklem 		error = copyin(uap->argp, (caddr_t)&adminrevoke,
41029ec7b004SRick Macklem 		    sizeof (struct nfsd_clid));
41039ec7b004SRick Macklem 		if (!error)
41049ec7b004SRick Macklem 			error = nfsrv_adminrevoke(&adminrevoke, p);
41059ec7b004SRick Macklem 	} else if (uap->flag & NFSSVC_DUMPCLIENTS) {
41069ec7b004SRick Macklem 		error = copyin(uap->argp, (caddr_t)&dumplist,
41079ec7b004SRick Macklem 		    sizeof (struct nfsd_dumplist));
41089ec7b004SRick Macklem 		if (!error && (dumplist.ndl_size < 1 ||
41099ec7b004SRick Macklem 			dumplist.ndl_size > NFSRV_MAXDUMPLIST))
41109ec7b004SRick Macklem 			error = EPERM;
41119ec7b004SRick Macklem 		if (!error) {
41129ec7b004SRick Macklem 		    len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size;
4113d9463dd4SMark Johnston 		    dumpclients = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
41149ec7b004SRick Macklem 		    nfsrv_dumpclients(dumpclients, dumplist.ndl_size);
4115e4a458bbSRick Macklem 		    error = copyout(dumpclients, dumplist.ndl_list, len);
4116222daa42SConrad Meyer 		    free(dumpclients, M_TEMP);
41179ec7b004SRick Macklem 		}
41189ec7b004SRick Macklem 	} else if (uap->flag & NFSSVC_DUMPLOCKS) {
41199ec7b004SRick Macklem 		error = copyin(uap->argp, (caddr_t)&dumplocklist,
41209ec7b004SRick Macklem 		    sizeof (struct nfsd_dumplocklist));
41219ec7b004SRick Macklem 		if (!error && (dumplocklist.ndllck_size < 1 ||
41229ec7b004SRick Macklem 			dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST))
41239ec7b004SRick Macklem 			error = EPERM;
41249ec7b004SRick Macklem 		if (!error)
41259ec7b004SRick Macklem 			error = nfsrv_lookupfilename(&nd,
41269ec7b004SRick Macklem 				dumplocklist.ndllck_fname, p);
41279ec7b004SRick Macklem 		if (!error) {
41289ec7b004SRick Macklem 			len = sizeof (struct nfsd_dumplocks) *
41299ec7b004SRick Macklem 				dumplocklist.ndllck_size;
4130d9463dd4SMark Johnston 			dumplocks = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
41319ec7b004SRick Macklem 			nfsrv_dumplocks(nd.ni_vp, dumplocks,
41329ec7b004SRick Macklem 			    dumplocklist.ndllck_size, p);
41339ec7b004SRick Macklem 			vput(nd.ni_vp);
4134e4a458bbSRick Macklem 			error = copyout(dumplocks, dumplocklist.ndllck_list,
4135e4a458bbSRick Macklem 			    len);
4136222daa42SConrad Meyer 			free(dumplocks, M_TEMP);
41379ec7b004SRick Macklem 		}
41385f73287aSRick Macklem 	} else if (uap->flag & NFSSVC_BACKUPSTABLE) {
41395f73287aSRick Macklem 		procp = p->td_proc;
41405f73287aSRick Macklem 		PROC_LOCK(procp);
41415f73287aSRick Macklem 		nfsd_master_pid = procp->p_pid;
41425f73287aSRick Macklem 		bcopy(procp->p_comm, nfsd_master_comm, MAXCOMLEN + 1);
41435f73287aSRick Macklem 		nfsd_master_start = procp->p_stats->p_start;
41447e44856eSRick Macklem 		NFSD_VNET(nfsd_master_proc) = procp;
41455f73287aSRick Macklem 		PROC_UNLOCK(procp);
41466001db29SRick Macklem 	} else if ((uap->flag & NFSSVC_SUSPENDNFSD) != 0) {
41476001db29SRick Macklem 		NFSLOCKV4ROOTMUTEX();
41487e44856eSRick Macklem 		if (!NFSD_VNET(nfsrv_suspend_nfsd)) {
41496001db29SRick Macklem 			/* Lock out all nfsd threads */
41506001db29SRick Macklem 			do {
41517e44856eSRick Macklem 				igotlock = nfsv4_lock(
41527e44856eSRick Macklem 				    &NFSD_VNET(nfsd_suspend_lock), 1, NULL,
41537e44856eSRick Macklem 				    NFSV4ROOTLOCKMUTEXPTR, NULL);
41547e44856eSRick Macklem 			} while (igotlock == 0 &&
41557e44856eSRick Macklem 			    !NFSD_VNET(nfsrv_suspend_nfsd));
41567e44856eSRick Macklem 			NFSD_VNET(nfsrv_suspend_nfsd) = true;
41576001db29SRick Macklem 		}
41586001db29SRick Macklem 		NFSUNLOCKV4ROOTMUTEX();
41596001db29SRick Macklem 		error = 0;
41606001db29SRick Macklem 	} else if ((uap->flag & NFSSVC_RESUMENFSD) != 0) {
41616001db29SRick Macklem 		NFSLOCKV4ROOTMUTEX();
41627e44856eSRick Macklem 		if (NFSD_VNET(nfsrv_suspend_nfsd)) {
41637e44856eSRick Macklem 			nfsv4_unlock(&NFSD_VNET(nfsd_suspend_lock), 0);
41647e44856eSRick Macklem 			NFSD_VNET(nfsrv_suspend_nfsd) = false;
41656001db29SRick Macklem 		}
41666001db29SRick Macklem 		NFSUNLOCKV4ROOTMUTEX();
41676001db29SRick Macklem 		error = 0;
41689ec7b004SRick Macklem 	}
4169a9285ae5SZack Kirsch 
4170a9285ae5SZack Kirsch 	NFSEXITCODE(error);
41719ec7b004SRick Macklem 	return (error);
41729ec7b004SRick Macklem }
41739ec7b004SRick Macklem 
417498ad4453SRick Macklem /*
417598ad4453SRick Macklem  * Check exports.
417698ad4453SRick Macklem  * Returns 0 if ok, 1 otherwise.
417798ad4453SRick Macklem  */
417898ad4453SRick Macklem int
nfsvno_testexp(struct nfsrv_descript * nd,struct nfsexstuff * exp)417998ad4453SRick Macklem nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp)
418098ad4453SRick Macklem {
418198ad4453SRick Macklem 	int i;
418298ad4453SRick Macklem 
41833fc3fe90SRick Macklem 	if ((NFSVNO_EXTLS(exp) && (nd->nd_flag & ND_TLS) == 0) ||
4184a5df139eSRick Macklem 	    (NFSVNO_EXTLSCERT(exp) &&
4185a5df139eSRick Macklem 	     (nd->nd_flag & ND_TLSCERT) == 0) ||
4186a5df139eSRick Macklem 	    (NFSVNO_EXTLSCERTUSER(exp) &&
41873fc3fe90SRick Macklem 	     (nd->nd_flag & ND_TLSCERTUSER) == 0)) {
4188a5df139eSRick Macklem 		if ((nd->nd_flag & ND_NFSV4) != 0)
4189a5df139eSRick Macklem 			return (NFSERR_WRONGSEC);
4190744c2dc7SRick Macklem #ifdef notnow
4191744c2dc7SRick Macklem 		/* There is currently no auth_stat for this. */
4192a5df139eSRick Macklem 		else if ((nd->nd_flag & ND_TLS) == 0)
4193a5df139eSRick Macklem 			return (NFSERR_AUTHERR | AUTH_NEEDS_TLS);
4194a5df139eSRick Macklem 		else
4195a5df139eSRick Macklem 			return (NFSERR_AUTHERR | AUTH_NEEDS_TLS_MUTUAL_HOST);
4196744c2dc7SRick Macklem #endif
4197744c2dc7SRick Macklem 		else
4198744c2dc7SRick Macklem 			return (NFSERR_AUTHERR | AUTH_TOOWEAK);
4199a5df139eSRick Macklem 	}
4200a5df139eSRick Macklem 
4201a5df139eSRick Macklem 	/*
42023fc3fe90SRick Macklem 	 * RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to use
42033fc3fe90SRick Macklem 	 * AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS.
42043fc3fe90SRick Macklem 	 */
42053fc3fe90SRick Macklem 	if ((nd->nd_flag & ND_NFSV3) != 0 && nd->nd_procnum == NFSPROC_FSINFO)
42063fc3fe90SRick Macklem 		return (0);
42073fc3fe90SRick Macklem 
42083fc3fe90SRick Macklem 	/*
420998ad4453SRick Macklem 	 * This seems odd, but allow the case where the security flavor
421098ad4453SRick Macklem 	 * list is empty. This happens when NFSv4 is traversing non-exported
421198ad4453SRick Macklem 	 * file systems. Exported file systems should always have a non-empty
421298ad4453SRick Macklem 	 * security flavor list.
421398ad4453SRick Macklem 	 */
421498ad4453SRick Macklem 	if (exp->nes_numsecflavor == 0)
421598ad4453SRick Macklem 		return (0);
421698ad4453SRick Macklem 
421798ad4453SRick Macklem 	for (i = 0; i < exp->nes_numsecflavor; i++) {
421898ad4453SRick Macklem 		/*
421998ad4453SRick Macklem 		 * The tests for privacy and integrity must be first,
422098ad4453SRick Macklem 		 * since ND_GSS is set for everything but AUTH_SYS.
422198ad4453SRick Macklem 		 */
422298ad4453SRick Macklem 		if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P &&
422398ad4453SRick Macklem 		    (nd->nd_flag & ND_GSSPRIVACY))
422498ad4453SRick Macklem 			return (0);
422598ad4453SRick Macklem 		if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I &&
422698ad4453SRick Macklem 		    (nd->nd_flag & ND_GSSINTEGRITY))
422798ad4453SRick Macklem 			return (0);
422898ad4453SRick Macklem 		if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 &&
422998ad4453SRick Macklem 		    (nd->nd_flag & ND_GSS))
423098ad4453SRick Macklem 			return (0);
423198ad4453SRick Macklem 		if (exp->nes_secflavors[i] == AUTH_SYS &&
423298ad4453SRick Macklem 		    (nd->nd_flag & ND_GSS) == 0)
423398ad4453SRick Macklem 			return (0);
423498ad4453SRick Macklem 	}
4235a5df139eSRick Macklem 	if ((nd->nd_flag & ND_NFSV4) != 0)
4236a5df139eSRick Macklem 		return (NFSERR_WRONGSEC);
4237a5df139eSRick Macklem 	return (NFSERR_AUTHERR | AUTH_TOOWEAK);
423898ad4453SRick Macklem }
423998ad4453SRick Macklem 
424091027b4eSRick Macklem /*
424191027b4eSRick Macklem  * Calculate a hash value for the fid in a file handle.
424291027b4eSRick Macklem  */
4243377c50f6SRick Macklem uint32_t
nfsrv_hashfh(fhandle_t * fhp)424491027b4eSRick Macklem nfsrv_hashfh(fhandle_t *fhp)
424591027b4eSRick Macklem {
4246377c50f6SRick Macklem 	uint32_t hashval;
424791027b4eSRick Macklem 
4248377c50f6SRick Macklem 	hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0);
424991027b4eSRick Macklem 	return (hashval);
425091027b4eSRick Macklem }
425191027b4eSRick Macklem 
42525f73287aSRick Macklem /*
4253c59e4cc3SRick Macklem  * Calculate a hash value for the sessionid.
4254c59e4cc3SRick Macklem  */
4255c59e4cc3SRick Macklem uint32_t
nfsrv_hashsessionid(uint8_t * sessionid)4256c59e4cc3SRick Macklem nfsrv_hashsessionid(uint8_t *sessionid)
4257c59e4cc3SRick Macklem {
4258c59e4cc3SRick Macklem 	uint32_t hashval;
4259c59e4cc3SRick Macklem 
4260c59e4cc3SRick Macklem 	hashval = hash32_buf(sessionid, NFSX_V4SESSIONID, 0);
4261c59e4cc3SRick Macklem 	return (hashval);
4262c59e4cc3SRick Macklem }
4263c59e4cc3SRick Macklem 
4264c59e4cc3SRick Macklem /*
42655f73287aSRick Macklem  * Signal the userland master nfsd to backup the stable restart file.
42665f73287aSRick Macklem  */
42675f73287aSRick Macklem void
nfsrv_backupstable(void)42685f73287aSRick Macklem nfsrv_backupstable(void)
42695f73287aSRick Macklem {
42705f73287aSRick Macklem 	struct proc *procp;
42715f73287aSRick Macklem 
42727e44856eSRick Macklem 	if (NFSD_VNET(nfsd_master_proc) != NULL) {
42735f73287aSRick Macklem 		procp = pfind(nfsd_master_pid);
42745f73287aSRick Macklem 		/* Try to make sure it is the correct process. */
42757e44856eSRick Macklem 		if (procp == NFSD_VNET(nfsd_master_proc) &&
42765f73287aSRick Macklem 		    procp->p_stats->p_start.tv_sec ==
42775f73287aSRick Macklem 		    nfsd_master_start.tv_sec &&
42785f73287aSRick Macklem 		    procp->p_stats->p_start.tv_usec ==
42795f73287aSRick Macklem 		    nfsd_master_start.tv_usec &&
42805f73287aSRick Macklem 		    strcmp(procp->p_comm, nfsd_master_comm) == 0)
42818451d0ddSKip Macy 			kern_psignal(procp, SIGUSR2);
42825f73287aSRick Macklem 		else
42837e44856eSRick Macklem 			NFSD_VNET(nfsd_master_proc) = NULL;
42845f73287aSRick Macklem 
42855f73287aSRick Macklem 		if (procp != NULL)
42865f73287aSRick Macklem 			PROC_UNLOCK(procp);
42875f73287aSRick Macklem 	}
42885f73287aSRick Macklem }
42895f73287aSRick Macklem 
429090d2dfabSRick Macklem /*
429190d2dfabSRick Macklem  * Create a DS data file for nfsrv_pnfscreate(). Called for each mirror.
429290d2dfabSRick Macklem  * The arguments are in a structure, so that they can be passed through
429390d2dfabSRick Macklem  * taskqueue for a kernel process to execute this function.
429490d2dfabSRick Macklem  */
429590d2dfabSRick Macklem struct nfsrvdscreate {
429690d2dfabSRick Macklem 	int			done;
429790d2dfabSRick Macklem 	int			inprog;
429890d2dfabSRick Macklem 	struct task		tsk;
429990d2dfabSRick Macklem 	struct ucred		*tcred;
430090d2dfabSRick Macklem 	struct vnode		*dvp;
430190d2dfabSRick Macklem 	NFSPROC_T		*p;
430290d2dfabSRick Macklem 	struct pnfsdsfile	*pf;
430390d2dfabSRick Macklem 	int			err;
430490d2dfabSRick Macklem 	fhandle_t		fh;
430590d2dfabSRick Macklem 	struct vattr		va;
430690d2dfabSRick Macklem 	struct vattr		createva;
430790d2dfabSRick Macklem };
430890d2dfabSRick Macklem 
430990d2dfabSRick Macklem int
nfsrv_dscreate(struct vnode * dvp,struct vattr * vap,struct vattr * nvap,fhandle_t * fhp,struct pnfsdsfile * pf,struct pnfsdsattr * dsa,char * fnamep,struct ucred * tcred,NFSPROC_T * p,struct vnode ** nvpp)431090d2dfabSRick Macklem nfsrv_dscreate(struct vnode *dvp, struct vattr *vap, struct vattr *nvap,
431190d2dfabSRick Macklem     fhandle_t *fhp, struct pnfsdsfile *pf, struct pnfsdsattr *dsa,
431290d2dfabSRick Macklem     char *fnamep, struct ucred *tcred, NFSPROC_T *p, struct vnode **nvpp)
431390d2dfabSRick Macklem {
431490d2dfabSRick Macklem 	struct vnode *nvp;
431590d2dfabSRick Macklem 	struct nameidata named;
431690d2dfabSRick Macklem 	struct vattr va;
431790d2dfabSRick Macklem 	char *bufp;
431890d2dfabSRick Macklem 	u_long *hashp;
431990d2dfabSRick Macklem 	struct nfsnode *np;
432090d2dfabSRick Macklem 	struct nfsmount *nmp;
432190d2dfabSRick Macklem 	int error;
432290d2dfabSRick Macklem 
432390d2dfabSRick Macklem 	NFSNAMEICNDSET(&named.ni_cnd, tcred, CREATE,
432465127e98SMateusz Guzik 	    LOCKPARENT | LOCKLEAF | NOCACHE);
432590d2dfabSRick Macklem 	nfsvno_setpathbuf(&named, &bufp, &hashp);
432690d2dfabSRick Macklem 	named.ni_cnd.cn_lkflags = LK_EXCLUSIVE;
432790d2dfabSRick Macklem 	named.ni_cnd.cn_nameptr = bufp;
432890d2dfabSRick Macklem 	if (fnamep != NULL) {
432990d2dfabSRick Macklem 		strlcpy(bufp, fnamep, PNFS_FILENAME_LEN + 1);
433090d2dfabSRick Macklem 		named.ni_cnd.cn_namelen = strlen(bufp);
433190d2dfabSRick Macklem 	} else
433290d2dfabSRick Macklem 		named.ni_cnd.cn_namelen = nfsrv_putfhname(fhp, bufp);
433390d2dfabSRick Macklem 	NFSD_DEBUG(4, "nfsrv_dscreate: dvp=%p fname=%s\n", dvp, bufp);
433490d2dfabSRick Macklem 
433590d2dfabSRick Macklem 	/* Create the date file in the DS mount. */
433690d2dfabSRick Macklem 	error = NFSVOPLOCK(dvp, LK_EXCLUSIVE);
433790d2dfabSRick Macklem 	if (error == 0) {
433890d2dfabSRick Macklem 		error = VOP_CREATE(dvp, &nvp, &named.ni_cnd, vap);
43394a21bcb2SKonstantin Belousov 		vref(dvp);
43404a21bcb2SKonstantin Belousov 		VOP_VPUT_PAIR(dvp, error == 0 ? &nvp : NULL, false);
434190d2dfabSRick Macklem 		if (error == 0) {
434290d2dfabSRick Macklem 			/* Set the ownership of the file. */
434390d2dfabSRick Macklem 			error = VOP_SETATTR(nvp, nvap, tcred);
434490d2dfabSRick Macklem 			NFSD_DEBUG(4, "nfsrv_dscreate:"
434590d2dfabSRick Macklem 			    " setattr-uid=%d\n", error);
434690d2dfabSRick Macklem 			if (error != 0)
434790d2dfabSRick Macklem 				vput(nvp);
434890d2dfabSRick Macklem 		}
434990d2dfabSRick Macklem 		if (error != 0)
435090d2dfabSRick Macklem 			printf("pNFS: pnfscreate failed=%d\n", error);
435190d2dfabSRick Macklem 	} else
435290d2dfabSRick Macklem 		printf("pNFS: pnfscreate vnlock=%d\n", error);
435390d2dfabSRick Macklem 	if (error == 0) {
435490d2dfabSRick Macklem 		np = VTONFS(nvp);
435590d2dfabSRick Macklem 		nmp = VFSTONFS(nvp->v_mount);
435690d2dfabSRick Macklem 		if (strcmp(nvp->v_mount->mnt_vfc->vfc_name, "nfs")
435790d2dfabSRick Macklem 		    != 0 || nmp->nm_nam->sa_len > sizeof(
435890d2dfabSRick Macklem 		    struct sockaddr_in6) ||
435990d2dfabSRick Macklem 		    np->n_fhp->nfh_len != NFSX_MYFH) {
436090d2dfabSRick Macklem 			printf("Bad DS file: fstype=%s salen=%d"
436190d2dfabSRick Macklem 			    " fhlen=%d\n",
436290d2dfabSRick Macklem 			    nvp->v_mount->mnt_vfc->vfc_name,
436390d2dfabSRick Macklem 			    nmp->nm_nam->sa_len, np->n_fhp->nfh_len);
436490d2dfabSRick Macklem 			error = ENOENT;
436590d2dfabSRick Macklem 		}
436690d2dfabSRick Macklem 
436790d2dfabSRick Macklem 		/* Set extattrs for the DS on the MDS file. */
436890d2dfabSRick Macklem 		if (error == 0) {
436990d2dfabSRick Macklem 			if (dsa != NULL) {
437090d2dfabSRick Macklem 				error = VOP_GETATTR(nvp, &va, tcred);
437190d2dfabSRick Macklem 				if (error == 0) {
437290d2dfabSRick Macklem 					dsa->dsa_filerev = va.va_filerev;
437390d2dfabSRick Macklem 					dsa->dsa_size = va.va_size;
437490d2dfabSRick Macklem 					dsa->dsa_atime = va.va_atime;
437590d2dfabSRick Macklem 					dsa->dsa_mtime = va.va_mtime;
437614eff785SRick Macklem 					dsa->dsa_bytes = va.va_bytes;
437790d2dfabSRick Macklem 				}
437890d2dfabSRick Macklem 			}
437990d2dfabSRick Macklem 			if (error == 0) {
438090d2dfabSRick Macklem 				NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh,
438190d2dfabSRick Macklem 				    NFSX_MYFH);
438290d2dfabSRick Macklem 				NFSBCOPY(nmp->nm_nam, &pf->dsf_sin,
438390d2dfabSRick Macklem 				    nmp->nm_nam->sa_len);
438490d2dfabSRick Macklem 				NFSBCOPY(named.ni_cnd.cn_nameptr,
438590d2dfabSRick Macklem 				    pf->dsf_filename,
438690d2dfabSRick Macklem 				    sizeof(pf->dsf_filename));
438790d2dfabSRick Macklem 			}
438890d2dfabSRick Macklem 		} else
438990d2dfabSRick Macklem 			printf("pNFS: pnfscreate can't get DS"
439090d2dfabSRick Macklem 			    " attr=%d\n", error);
439190d2dfabSRick Macklem 		if (nvpp != NULL && error == 0)
439290d2dfabSRick Macklem 			*nvpp = nvp;
439390d2dfabSRick Macklem 		else
439490d2dfabSRick Macklem 			vput(nvp);
439590d2dfabSRick Macklem 	}
439690d2dfabSRick Macklem 	nfsvno_relpathbuf(&named);
439790d2dfabSRick Macklem 	return (error);
439890d2dfabSRick Macklem }
439990d2dfabSRick Macklem 
440090d2dfabSRick Macklem /*
440190d2dfabSRick Macklem  * Start up the thread that will execute nfsrv_dscreate().
440290d2dfabSRick Macklem  */
440390d2dfabSRick Macklem static void
start_dscreate(void * arg,int pending)440490d2dfabSRick Macklem start_dscreate(void *arg, int pending)
440590d2dfabSRick Macklem {
440690d2dfabSRick Macklem 	struct nfsrvdscreate *dsc;
440790d2dfabSRick Macklem 
440890d2dfabSRick Macklem 	dsc = (struct nfsrvdscreate *)arg;
440990d2dfabSRick Macklem 	dsc->err = nfsrv_dscreate(dsc->dvp, &dsc->createva, &dsc->va, &dsc->fh,
441090d2dfabSRick Macklem 	    dsc->pf, NULL, NULL, dsc->tcred, dsc->p, NULL);
441190d2dfabSRick Macklem 	dsc->done = 1;
441290d2dfabSRick Macklem 	NFSD_DEBUG(4, "start_dscreate: err=%d\n", dsc->err);
441390d2dfabSRick Macklem }
441490d2dfabSRick Macklem 
441590d2dfabSRick Macklem /*
441690d2dfabSRick Macklem  * Create a pNFS data file on the Data Server(s).
441790d2dfabSRick Macklem  */
441890d2dfabSRick Macklem static void
nfsrv_pnfscreate(struct vnode * vp,struct vattr * vap,struct ucred * cred,NFSPROC_T * p)441990d2dfabSRick Macklem nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap, struct ucred *cred,
442090d2dfabSRick Macklem     NFSPROC_T *p)
442190d2dfabSRick Macklem {
442295bf2e52SRick Macklem 	struct nfsrvdscreate *dsc, *tdsc = NULL;
4423ed66a76bSRick Macklem 	struct nfsdevice *ds, *tds, *fds;
442490d2dfabSRick Macklem 	struct mount *mp;
442590d2dfabSRick Macklem 	struct pnfsdsfile *pf, *tpf;
442690d2dfabSRick Macklem 	struct pnfsdsattr dsattr;
442790d2dfabSRick Macklem 	struct vattr va;
442890d2dfabSRick Macklem 	struct vnode *dvp[NFSDEV_MAXMIRRORS];
442990d2dfabSRick Macklem 	struct nfsmount *nmp;
443090d2dfabSRick Macklem 	fhandle_t fh;
443190d2dfabSRick Macklem 	uid_t vauid;
443290d2dfabSRick Macklem 	gid_t vagid;
443390d2dfabSRick Macklem 	u_short vamode;
443490d2dfabSRick Macklem 	struct ucred *tcred;
443590d2dfabSRick Macklem 	int dsdir[NFSDEV_MAXMIRRORS], error, i, mirrorcnt, ret;
443690d2dfabSRick Macklem 	int failpos, timo;
443790d2dfabSRick Macklem 
443890d2dfabSRick Macklem 	/* Get a DS server directory in a round-robin order. */
443990d2dfabSRick Macklem 	mirrorcnt = 1;
44402f32675cSRick Macklem 	mp = vp->v_mount;
4441ed66a76bSRick Macklem 	ds = fds = NULL;
444290d2dfabSRick Macklem 	NFSDDSLOCK();
4443ed66a76bSRick Macklem 	/*
4444ed66a76bSRick Macklem 	 * Search for the first entry that handles this MDS fs, but use the
4445ed66a76bSRick Macklem 	 * first entry for all MDS fs's otherwise.
4446ed66a76bSRick Macklem 	 */
4447ed66a76bSRick Macklem 	TAILQ_FOREACH(tds, &nfsrv_devidhead, nfsdev_list) {
4448ed66a76bSRick Macklem 		if (tds->nfsdev_nmp != NULL) {
4449ed66a76bSRick Macklem 			if (tds->nfsdev_mdsisset == 0 && ds == NULL)
4450ed66a76bSRick Macklem 				ds = tds;
4451245bfd34SRyan Moeller 			else if (tds->nfsdev_mdsisset != 0 && fsidcmp(
4452245bfd34SRyan Moeller 			    &mp->mnt_stat.f_fsid, &tds->nfsdev_mdsfsid) == 0) {
4453ed66a76bSRick Macklem 				ds = fds = tds;
445490d2dfabSRick Macklem 				break;
445590d2dfabSRick Macklem 			}
4456ed66a76bSRick Macklem 		}
4457ed66a76bSRick Macklem 	}
445890d2dfabSRick Macklem 	if (ds == NULL) {
445990d2dfabSRick Macklem 		NFSDDSUNLOCK();
446090d2dfabSRick Macklem 		NFSD_DEBUG(4, "nfsrv_pnfscreate: no srv\n");
446190d2dfabSRick Macklem 		return;
446290d2dfabSRick Macklem 	}
446390d2dfabSRick Macklem 	i = dsdir[0] = ds->nfsdev_nextdir;
446490d2dfabSRick Macklem 	ds->nfsdev_nextdir = (ds->nfsdev_nextdir + 1) % nfsrv_dsdirsize;
446590d2dfabSRick Macklem 	dvp[0] = ds->nfsdev_dsdir[i];
4466ed66a76bSRick Macklem 	tds = TAILQ_NEXT(ds, nfsdev_list);
4467ed66a76bSRick Macklem 	if (nfsrv_maxpnfsmirror > 1 && tds != NULL) {
4468ed66a76bSRick Macklem 		TAILQ_FOREACH_FROM(tds, &nfsrv_devidhead, nfsdev_list) {
4469ed66a76bSRick Macklem 			if (tds->nfsdev_nmp != NULL &&
4470ed66a76bSRick Macklem 			    ((tds->nfsdev_mdsisset == 0 && fds == NULL) ||
4471ed66a76bSRick Macklem 			     (tds->nfsdev_mdsisset != 0 && fds != NULL &&
4472245bfd34SRyan Moeller 			      fsidcmp(&mp->mnt_stat.f_fsid,
4473245bfd34SRyan Moeller 			      &tds->nfsdev_mdsfsid) == 0))) {
447490d2dfabSRick Macklem 				dsdir[mirrorcnt] = i;
4475ed66a76bSRick Macklem 				dvp[mirrorcnt] = tds->nfsdev_dsdir[i];
447690d2dfabSRick Macklem 				mirrorcnt++;
447790d2dfabSRick Macklem 				if (mirrorcnt >= nfsrv_maxpnfsmirror)
447890d2dfabSRick Macklem 					break;
447990d2dfabSRick Macklem 			}
448090d2dfabSRick Macklem 		}
448190d2dfabSRick Macklem 	}
448290d2dfabSRick Macklem 	/* Put at end of list to implement round-robin usage. */
448390d2dfabSRick Macklem 	TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list);
448490d2dfabSRick Macklem 	TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list);
448590d2dfabSRick Macklem 	NFSDDSUNLOCK();
448690d2dfabSRick Macklem 	dsc = NULL;
448790d2dfabSRick Macklem 	if (mirrorcnt > 1)
448890d2dfabSRick Macklem 		tdsc = dsc = malloc(sizeof(*dsc) * (mirrorcnt - 1), M_TEMP,
448990d2dfabSRick Macklem 		    M_WAITOK | M_ZERO);
44901aabf3fdSRick Macklem 	tpf = pf = malloc(sizeof(*pf) * nfsrv_maxpnfsmirror, M_TEMP, M_WAITOK |
44911aabf3fdSRick Macklem 	    M_ZERO);
449290d2dfabSRick Macklem 
449390d2dfabSRick Macklem 	error = nfsvno_getfh(vp, &fh, p);
449490d2dfabSRick Macklem 	if (error == 0)
449590d2dfabSRick Macklem 		error = VOP_GETATTR(vp, &va, cred);
449690d2dfabSRick Macklem 	if (error == 0) {
449790d2dfabSRick Macklem 		/* Set the attributes for "vp" to Setattr the DS vp. */
449890d2dfabSRick Macklem 		vauid = va.va_uid;
449990d2dfabSRick Macklem 		vagid = va.va_gid;
450090d2dfabSRick Macklem 		vamode = va.va_mode;
450190d2dfabSRick Macklem 		VATTR_NULL(&va);
450290d2dfabSRick Macklem 		va.va_uid = vauid;
450390d2dfabSRick Macklem 		va.va_gid = vagid;
450490d2dfabSRick Macklem 		va.va_mode = vamode;
450590d2dfabSRick Macklem 		va.va_size = 0;
450690d2dfabSRick Macklem 	} else
450790d2dfabSRick Macklem 		printf("pNFS: pnfscreate getfh+attr=%d\n", error);
450890d2dfabSRick Macklem 
450990d2dfabSRick Macklem 	NFSD_DEBUG(4, "nfsrv_pnfscreate: cruid=%d crgid=%d\n", cred->cr_uid,
451090d2dfabSRick Macklem 	    cred->cr_gid);
451190d2dfabSRick Macklem 	/* Make data file name based on FH. */
451290d2dfabSRick Macklem 	tcred = newnfs_getcred();
451390d2dfabSRick Macklem 
451490d2dfabSRick Macklem 	/*
451590d2dfabSRick Macklem 	 * Create the file on each DS mirror, using kernel process(es) for the
451690d2dfabSRick Macklem 	 * additional mirrors.
451790d2dfabSRick Macklem 	 */
451890d2dfabSRick Macklem 	failpos = -1;
451990d2dfabSRick Macklem 	for (i = 0; i < mirrorcnt - 1 && error == 0; i++, tpf++, tdsc++) {
452090d2dfabSRick Macklem 		tpf->dsf_dir = dsdir[i];
452190d2dfabSRick Macklem 		tdsc->tcred = tcred;
452290d2dfabSRick Macklem 		tdsc->p = p;
452390d2dfabSRick Macklem 		tdsc->pf = tpf;
452490d2dfabSRick Macklem 		tdsc->createva = *vap;
452525705dd5SRick Macklem 		NFSBCOPY(&fh, &tdsc->fh, sizeof(fh));
452690d2dfabSRick Macklem 		tdsc->va = va;
452790d2dfabSRick Macklem 		tdsc->dvp = dvp[i];
452890d2dfabSRick Macklem 		tdsc->done = 0;
452990d2dfabSRick Macklem 		tdsc->inprog = 0;
453090d2dfabSRick Macklem 		tdsc->err = 0;
453190d2dfabSRick Macklem 		ret = EIO;
453290d2dfabSRick Macklem 		if (nfs_pnfsiothreads != 0) {
453390d2dfabSRick Macklem 			ret = nfs_pnfsio(start_dscreate, tdsc);
453490d2dfabSRick Macklem 			NFSD_DEBUG(4, "nfsrv_pnfscreate: nfs_pnfsio=%d\n", ret);
453590d2dfabSRick Macklem 		}
453690d2dfabSRick Macklem 		if (ret != 0) {
453790d2dfabSRick Macklem 			ret = nfsrv_dscreate(dvp[i], vap, &va, &fh, tpf, NULL,
453890d2dfabSRick Macklem 			    NULL, tcred, p, NULL);
453990d2dfabSRick Macklem 			if (ret != 0) {
454090d2dfabSRick Macklem 				KASSERT(error == 0, ("nfsrv_dscreate err=%d",
454190d2dfabSRick Macklem 				    error));
454290d2dfabSRick Macklem 				if (failpos == -1 && nfsds_failerr(ret))
454390d2dfabSRick Macklem 					failpos = i;
454490d2dfabSRick Macklem 				else
454590d2dfabSRick Macklem 					error = ret;
454690d2dfabSRick Macklem 			}
454790d2dfabSRick Macklem 		}
454890d2dfabSRick Macklem 	}
454990d2dfabSRick Macklem 	if (error == 0) {
455090d2dfabSRick Macklem 		tpf->dsf_dir = dsdir[mirrorcnt - 1];
455190d2dfabSRick Macklem 		error = nfsrv_dscreate(dvp[mirrorcnt - 1], vap, &va, &fh, tpf,
455290d2dfabSRick Macklem 		    &dsattr, NULL, tcred, p, NULL);
455390d2dfabSRick Macklem 		if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(error)) {
455490d2dfabSRick Macklem 			failpos = mirrorcnt - 1;
455590d2dfabSRick Macklem 			error = 0;
455690d2dfabSRick Macklem 		}
455790d2dfabSRick Macklem 	}
455890d2dfabSRick Macklem 	timo = hz / 50;		/* Wait for 20msec. */
455990d2dfabSRick Macklem 	if (timo < 1)
456090d2dfabSRick Macklem 		timo = 1;
456190d2dfabSRick Macklem 	/* Wait for kernel task(s) to complete. */
456290d2dfabSRick Macklem 	for (tdsc = dsc, i = 0; i < mirrorcnt - 1; i++, tdsc++) {
456390d2dfabSRick Macklem 		while (tdsc->inprog != 0 && tdsc->done == 0)
456490d2dfabSRick Macklem 			tsleep(&tdsc->tsk, PVFS, "srvdcr", timo);
456590d2dfabSRick Macklem 		if (tdsc->err != 0) {
456690d2dfabSRick Macklem 			if (failpos == -1 && nfsds_failerr(tdsc->err))
456790d2dfabSRick Macklem 				failpos = i;
456890d2dfabSRick Macklem 			else if (error == 0)
456990d2dfabSRick Macklem 				error = tdsc->err;
457090d2dfabSRick Macklem 		}
457190d2dfabSRick Macklem 	}
457290d2dfabSRick Macklem 
457390d2dfabSRick Macklem 	/*
457490d2dfabSRick Macklem 	 * If failpos has been set, that mirror has failed, so it needs
457590d2dfabSRick Macklem 	 * to be disabled.
457690d2dfabSRick Macklem 	 */
457790d2dfabSRick Macklem 	if (failpos >= 0) {
457890d2dfabSRick Macklem 		nmp = VFSTONFS(dvp[failpos]->v_mount);
457990d2dfabSRick Macklem 		NFSLOCKMNT(nmp);
458090d2dfabSRick Macklem 		if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
458190d2dfabSRick Macklem 		     NFSMNTP_CANCELRPCS)) == 0) {
458290d2dfabSRick Macklem 			nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
458390d2dfabSRick Macklem 			NFSUNLOCKMNT(nmp);
4584de9a1a70SRick Macklem 			ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p);
458590d2dfabSRick Macklem 			NFSD_DEBUG(4, "dscreatfail fail=%d ds=%p\n", failpos,
458690d2dfabSRick Macklem 			    ds);
458790d2dfabSRick Macklem 			if (ds != NULL)
458890d2dfabSRick Macklem 				nfsrv_killrpcs(nmp);
458990d2dfabSRick Macklem 			NFSLOCKMNT(nmp);
459090d2dfabSRick Macklem 			nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
459190d2dfabSRick Macklem 			wakeup(nmp);
459290d2dfabSRick Macklem 		}
459390d2dfabSRick Macklem 		NFSUNLOCKMNT(nmp);
459490d2dfabSRick Macklem 	}
459590d2dfabSRick Macklem 
459690d2dfabSRick Macklem 	NFSFREECRED(tcred);
459790d2dfabSRick Macklem 	if (error == 0) {
459890d2dfabSRick Macklem 		ASSERT_VOP_ELOCKED(vp, "nfsrv_pnfscreate vp");
45991aabf3fdSRick Macklem 
46001aabf3fdSRick Macklem 		NFSD_DEBUG(4, "nfsrv_pnfscreate: mirrorcnt=%d maxmirror=%d\n",
46011aabf3fdSRick Macklem 		    mirrorcnt, nfsrv_maxpnfsmirror);
46021aabf3fdSRick Macklem 		/*
46031aabf3fdSRick Macklem 		 * For all mirrors that couldn't be created, fill in the
46041aabf3fdSRick Macklem 		 * *pf structure, but with an IP address == 0.0.0.0.
46051aabf3fdSRick Macklem 		 */
46061aabf3fdSRick Macklem 		tpf = pf + mirrorcnt;
46071aabf3fdSRick Macklem 		for (i = mirrorcnt; i < nfsrv_maxpnfsmirror; i++, tpf++) {
46081aabf3fdSRick Macklem 			*tpf = *pf;
46091aabf3fdSRick Macklem 			tpf->dsf_sin.sin_family = AF_INET;
46101aabf3fdSRick Macklem 			tpf->dsf_sin.sin_len = sizeof(struct sockaddr_in);
46111aabf3fdSRick Macklem 			tpf->dsf_sin.sin_addr.s_addr = 0;
46121aabf3fdSRick Macklem 			tpf->dsf_sin.sin_port = 0;
46131aabf3fdSRick Macklem 		}
46141aabf3fdSRick Macklem 
461590d2dfabSRick Macklem 		error = vn_extattr_set(vp, IO_NODELOCKED,
461690d2dfabSRick Macklem 		    EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile",
46171aabf3fdSRick Macklem 		    sizeof(*pf) * nfsrv_maxpnfsmirror, (char *)pf, p);
461890d2dfabSRick Macklem 		if (error == 0)
461990d2dfabSRick Macklem 			error = vn_extattr_set(vp, IO_NODELOCKED,
462090d2dfabSRick Macklem 			    EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr",
462190d2dfabSRick Macklem 			    sizeof(dsattr), (char *)&dsattr, p);
462290d2dfabSRick Macklem 		if (error != 0)
462390d2dfabSRick Macklem 			printf("pNFS: pnfscreate setextattr=%d\n",
462490d2dfabSRick Macklem 			    error);
462590d2dfabSRick Macklem 	} else
462690d2dfabSRick Macklem 		printf("pNFS: pnfscreate=%d\n", error);
462790d2dfabSRick Macklem 	free(pf, M_TEMP);
462890d2dfabSRick Macklem 	free(dsc, M_TEMP);
462990d2dfabSRick Macklem }
463090d2dfabSRick Macklem 
463190d2dfabSRick Macklem /*
463290d2dfabSRick Macklem  * Get the information needed to remove the pNFS Data Server file from the
463390d2dfabSRick Macklem  * Metadata file.  Upon success, ddvp is set non-NULL to the locked
463490d2dfabSRick Macklem  * DS directory vnode.  The caller must unlock *ddvp when done with it.
463590d2dfabSRick Macklem  */
463690d2dfabSRick Macklem static void
nfsrv_pnfsremovesetup(struct vnode * vp,NFSPROC_T * p,struct vnode ** dvpp,int * mirrorcntp,char * fname,fhandle_t * fhp)463790d2dfabSRick Macklem nfsrv_pnfsremovesetup(struct vnode *vp, NFSPROC_T *p, struct vnode **dvpp,
463890d2dfabSRick Macklem     int *mirrorcntp, char *fname, fhandle_t *fhp)
463990d2dfabSRick Macklem {
464090d2dfabSRick Macklem 	struct vattr va;
464190d2dfabSRick Macklem 	struct ucred *tcred;
464290d2dfabSRick Macklem 	char *buf;
464390d2dfabSRick Macklem 	int buflen, error;
464490d2dfabSRick Macklem 
464590d2dfabSRick Macklem 	dvpp[0] = NULL;
464690d2dfabSRick Macklem 	/* If not an exported regular file or not a pNFS server, just return. */
464790d2dfabSRick Macklem 	if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 ||
464890d2dfabSRick Macklem 	    nfsrv_devidcnt == 0)
464990d2dfabSRick Macklem 		return;
465090d2dfabSRick Macklem 
465190d2dfabSRick Macklem 	/* Check to see if this is the last hard link. */
465290d2dfabSRick Macklem 	tcred = newnfs_getcred();
465390d2dfabSRick Macklem 	error = VOP_GETATTR(vp, &va, tcred);
465490d2dfabSRick Macklem 	NFSFREECRED(tcred);
465590d2dfabSRick Macklem 	if (error != 0) {
465690d2dfabSRick Macklem 		printf("pNFS: nfsrv_pnfsremovesetup getattr=%d\n", error);
465790d2dfabSRick Macklem 		return;
465890d2dfabSRick Macklem 	}
465990d2dfabSRick Macklem 	if (va.va_nlink > 1)
466090d2dfabSRick Macklem 		return;
466190d2dfabSRick Macklem 
466290d2dfabSRick Macklem 	error = nfsvno_getfh(vp, fhp, p);
466390d2dfabSRick Macklem 	if (error != 0) {
466490d2dfabSRick Macklem 		printf("pNFS: nfsrv_pnfsremovesetup getfh=%d\n", error);
466590d2dfabSRick Macklem 		return;
466690d2dfabSRick Macklem 	}
466790d2dfabSRick Macklem 
466890d2dfabSRick Macklem 	buflen = 1024;
466990d2dfabSRick Macklem 	buf = malloc(buflen, M_TEMP, M_WAITOK);
467090d2dfabSRick Macklem 	/* Get the directory vnode for the DS mount and the file handle. */
467190d2dfabSRick Macklem 	error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, dvpp,
467290d2dfabSRick Macklem 	    NULL, NULL, fname, NULL, NULL, NULL, NULL, NULL);
467390d2dfabSRick Macklem 	free(buf, M_TEMP);
467490d2dfabSRick Macklem 	if (error != 0)
467590d2dfabSRick Macklem 		printf("pNFS: nfsrv_pnfsremovesetup getsockmnt=%d\n", error);
467690d2dfabSRick Macklem }
467790d2dfabSRick Macklem 
467890d2dfabSRick Macklem /*
467990d2dfabSRick Macklem  * Remove a DS data file for nfsrv_pnfsremove(). Called for each mirror.
468090d2dfabSRick Macklem  * The arguments are in a structure, so that they can be passed through
468190d2dfabSRick Macklem  * taskqueue for a kernel process to execute this function.
468290d2dfabSRick Macklem  */
468390d2dfabSRick Macklem struct nfsrvdsremove {
468490d2dfabSRick Macklem 	int			done;
468590d2dfabSRick Macklem 	int			inprog;
468690d2dfabSRick Macklem 	struct task		tsk;
468790d2dfabSRick Macklem 	struct ucred		*tcred;
468890d2dfabSRick Macklem 	struct vnode		*dvp;
468990d2dfabSRick Macklem 	NFSPROC_T		*p;
469090d2dfabSRick Macklem 	int			err;
469190d2dfabSRick Macklem 	char			fname[PNFS_FILENAME_LEN + 1];
469290d2dfabSRick Macklem };
469390d2dfabSRick Macklem 
469490d2dfabSRick Macklem static int
nfsrv_dsremove(struct vnode * dvp,char * fname,struct ucred * tcred,NFSPROC_T * p)469590d2dfabSRick Macklem nfsrv_dsremove(struct vnode *dvp, char *fname, struct ucred *tcred,
469690d2dfabSRick Macklem     NFSPROC_T *p)
469790d2dfabSRick Macklem {
469890d2dfabSRick Macklem 	struct nameidata named;
469990d2dfabSRick Macklem 	struct vnode *nvp;
470090d2dfabSRick Macklem 	char *bufp;
470190d2dfabSRick Macklem 	u_long *hashp;
470290d2dfabSRick Macklem 	int error;
470390d2dfabSRick Macklem 
470490d2dfabSRick Macklem 	error = NFSVOPLOCK(dvp, LK_EXCLUSIVE);
470590d2dfabSRick Macklem 	if (error != 0)
470690d2dfabSRick Macklem 		return (error);
470790d2dfabSRick Macklem 	named.ni_cnd.cn_nameiop = DELETE;
470890d2dfabSRick Macklem 	named.ni_cnd.cn_lkflags = LK_EXCLUSIVE | LK_RETRY;
470990d2dfabSRick Macklem 	named.ni_cnd.cn_cred = tcred;
47105b5b7e2cSMateusz Guzik 	named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF;
471190d2dfabSRick Macklem 	nfsvno_setpathbuf(&named, &bufp, &hashp);
471290d2dfabSRick Macklem 	named.ni_cnd.cn_nameptr = bufp;
471390d2dfabSRick Macklem 	named.ni_cnd.cn_namelen = strlen(fname);
471490d2dfabSRick Macklem 	strlcpy(bufp, fname, NAME_MAX);
471590d2dfabSRick Macklem 	NFSD_DEBUG(4, "nfsrv_pnfsremove: filename=%s\n", bufp);
471690d2dfabSRick Macklem 	error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd);
471790d2dfabSRick Macklem 	NFSD_DEBUG(4, "nfsrv_pnfsremove: aft LOOKUP=%d\n", error);
471890d2dfabSRick Macklem 	if (error == 0) {
471990d2dfabSRick Macklem 		error = VOP_REMOVE(dvp, nvp, &named.ni_cnd);
472090d2dfabSRick Macklem 		vput(nvp);
472190d2dfabSRick Macklem 	}
4722b249ce48SMateusz Guzik 	NFSVOPUNLOCK(dvp);
472390d2dfabSRick Macklem 	nfsvno_relpathbuf(&named);
472490d2dfabSRick Macklem 	if (error != 0)
472590d2dfabSRick Macklem 		printf("pNFS: nfsrv_pnfsremove failed=%d\n", error);
472690d2dfabSRick Macklem 	return (error);
472790d2dfabSRick Macklem }
472890d2dfabSRick Macklem 
472990d2dfabSRick Macklem /*
473090d2dfabSRick Macklem  * Start up the thread that will execute nfsrv_dsremove().
473190d2dfabSRick Macklem  */
473290d2dfabSRick Macklem static void
start_dsremove(void * arg,int pending)473390d2dfabSRick Macklem start_dsremove(void *arg, int pending)
473490d2dfabSRick Macklem {
473590d2dfabSRick Macklem 	struct nfsrvdsremove *dsrm;
473690d2dfabSRick Macklem 
473790d2dfabSRick Macklem 	dsrm = (struct nfsrvdsremove *)arg;
473890d2dfabSRick Macklem 	dsrm->err = nfsrv_dsremove(dsrm->dvp, dsrm->fname, dsrm->tcred,
473990d2dfabSRick Macklem 	    dsrm->p);
474090d2dfabSRick Macklem 	dsrm->done = 1;
474190d2dfabSRick Macklem 	NFSD_DEBUG(4, "start_dsremove: err=%d\n", dsrm->err);
474290d2dfabSRick Macklem }
474390d2dfabSRick Macklem 
474490d2dfabSRick Macklem /*
474590d2dfabSRick Macklem  * Remove a pNFS data file from a Data Server.
474690d2dfabSRick Macklem  * nfsrv_pnfsremovesetup() must have been called before the MDS file was
474790d2dfabSRick Macklem  * removed to set up the dvp and fill in the FH.
474890d2dfabSRick Macklem  */
474990d2dfabSRick Macklem static void
nfsrv_pnfsremove(struct vnode ** dvp,int mirrorcnt,char * fname,fhandle_t * fhp,NFSPROC_T * p)475090d2dfabSRick Macklem nfsrv_pnfsremove(struct vnode **dvp, int mirrorcnt, char *fname, fhandle_t *fhp,
475190d2dfabSRick Macklem     NFSPROC_T *p)
475290d2dfabSRick Macklem {
475390d2dfabSRick Macklem 	struct ucred *tcred;
475490d2dfabSRick Macklem 	struct nfsrvdsremove *dsrm, *tdsrm;
475590d2dfabSRick Macklem 	struct nfsdevice *ds;
475690d2dfabSRick Macklem 	struct nfsmount *nmp;
475790d2dfabSRick Macklem 	int failpos, i, ret, timo;
475890d2dfabSRick Macklem 
475990d2dfabSRick Macklem 	tcred = newnfs_getcred();
476090d2dfabSRick Macklem 	dsrm = NULL;
476190d2dfabSRick Macklem 	if (mirrorcnt > 1)
476290d2dfabSRick Macklem 		dsrm = malloc(sizeof(*dsrm) * mirrorcnt - 1, M_TEMP, M_WAITOK);
476390d2dfabSRick Macklem 	/*
476490d2dfabSRick Macklem 	 * Remove the file on each DS mirror, using kernel process(es) for the
476590d2dfabSRick Macklem 	 * additional mirrors.
476690d2dfabSRick Macklem 	 */
476790d2dfabSRick Macklem 	failpos = -1;
476890d2dfabSRick Macklem 	for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) {
476990d2dfabSRick Macklem 		tdsrm->tcred = tcred;
477090d2dfabSRick Macklem 		tdsrm->p = p;
477190d2dfabSRick Macklem 		tdsrm->dvp = dvp[i];
477290d2dfabSRick Macklem 		strlcpy(tdsrm->fname, fname, PNFS_FILENAME_LEN + 1);
477390d2dfabSRick Macklem 		tdsrm->inprog = 0;
477490d2dfabSRick Macklem 		tdsrm->done = 0;
477590d2dfabSRick Macklem 		tdsrm->err = 0;
477690d2dfabSRick Macklem 		ret = EIO;
477790d2dfabSRick Macklem 		if (nfs_pnfsiothreads != 0) {
477890d2dfabSRick Macklem 			ret = nfs_pnfsio(start_dsremove, tdsrm);
477990d2dfabSRick Macklem 			NFSD_DEBUG(4, "nfsrv_pnfsremove: nfs_pnfsio=%d\n", ret);
478090d2dfabSRick Macklem 		}
478190d2dfabSRick Macklem 		if (ret != 0) {
478290d2dfabSRick Macklem 			ret = nfsrv_dsremove(dvp[i], fname, tcred, p);
478390d2dfabSRick Macklem 			if (failpos == -1 && nfsds_failerr(ret))
478490d2dfabSRick Macklem 				failpos = i;
478590d2dfabSRick Macklem 		}
478690d2dfabSRick Macklem 	}
478790d2dfabSRick Macklem 	ret = nfsrv_dsremove(dvp[mirrorcnt - 1], fname, tcred, p);
478890d2dfabSRick Macklem 	if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(ret))
478990d2dfabSRick Macklem 		failpos = mirrorcnt - 1;
479090d2dfabSRick Macklem 	timo = hz / 50;		/* Wait for 20msec. */
479190d2dfabSRick Macklem 	if (timo < 1)
479290d2dfabSRick Macklem 		timo = 1;
479390d2dfabSRick Macklem 	/* Wait for kernel task(s) to complete. */
479490d2dfabSRick Macklem 	for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) {
479590d2dfabSRick Macklem 		while (tdsrm->inprog != 0 && tdsrm->done == 0)
479690d2dfabSRick Macklem 			tsleep(&tdsrm->tsk, PVFS, "srvdsrm", timo);
479790d2dfabSRick Macklem 		if (failpos == -1 && nfsds_failerr(tdsrm->err))
479890d2dfabSRick Macklem 			failpos = i;
479990d2dfabSRick Macklem 	}
480090d2dfabSRick Macklem 
480190d2dfabSRick Macklem 	/*
480290d2dfabSRick Macklem 	 * If failpos has been set, that mirror has failed, so it needs
480390d2dfabSRick Macklem 	 * to be disabled.
480490d2dfabSRick Macklem 	 */
480590d2dfabSRick Macklem 	if (failpos >= 0) {
480690d2dfabSRick Macklem 		nmp = VFSTONFS(dvp[failpos]->v_mount);
480790d2dfabSRick Macklem 		NFSLOCKMNT(nmp);
480890d2dfabSRick Macklem 		if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
480990d2dfabSRick Macklem 		     NFSMNTP_CANCELRPCS)) == 0) {
481090d2dfabSRick Macklem 			nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
481190d2dfabSRick Macklem 			NFSUNLOCKMNT(nmp);
4812de9a1a70SRick Macklem 			ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p);
481390d2dfabSRick Macklem 			NFSD_DEBUG(4, "dsremovefail fail=%d ds=%p\n", failpos,
481490d2dfabSRick Macklem 			    ds);
481590d2dfabSRick Macklem 			if (ds != NULL)
481690d2dfabSRick Macklem 				nfsrv_killrpcs(nmp);
481790d2dfabSRick Macklem 			NFSLOCKMNT(nmp);
481890d2dfabSRick Macklem 			nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
481990d2dfabSRick Macklem 			wakeup(nmp);
482090d2dfabSRick Macklem 		}
482190d2dfabSRick Macklem 		NFSUNLOCKMNT(nmp);
482290d2dfabSRick Macklem 	}
482390d2dfabSRick Macklem 
482490d2dfabSRick Macklem 	/* Get rid all layouts for the file. */
482590d2dfabSRick Macklem 	nfsrv_freefilelayouts(fhp);
482690d2dfabSRick Macklem 
482790d2dfabSRick Macklem 	NFSFREECRED(tcred);
482890d2dfabSRick Macklem 	free(dsrm, M_TEMP);
482990d2dfabSRick Macklem }
483090d2dfabSRick Macklem 
483190d2dfabSRick Macklem /*
483290d2dfabSRick Macklem  * Generate a file name based on the file handle and put it in *bufp.
483390d2dfabSRick Macklem  * Return the number of bytes generated.
483490d2dfabSRick Macklem  */
483590d2dfabSRick Macklem static int
nfsrv_putfhname(fhandle_t * fhp,char * bufp)483690d2dfabSRick Macklem nfsrv_putfhname(fhandle_t *fhp, char *bufp)
483790d2dfabSRick Macklem {
483890d2dfabSRick Macklem 	int i;
483990d2dfabSRick Macklem 	uint8_t *cp;
484090d2dfabSRick Macklem 	const uint8_t *hexdigits = "0123456789abcdef";
484190d2dfabSRick Macklem 
484290d2dfabSRick Macklem 	cp = (uint8_t *)fhp;
484390d2dfabSRick Macklem 	for (i = 0; i < sizeof(*fhp); i++) {
484490d2dfabSRick Macklem 		bufp[2 * i] = hexdigits[(*cp >> 4) & 0xf];
484590d2dfabSRick Macklem 		bufp[2 * i + 1] = hexdigits[*cp++ & 0xf];
484690d2dfabSRick Macklem 	}
484790d2dfabSRick Macklem 	bufp[2 * i] = '\0';
484890d2dfabSRick Macklem 	return (2 * i);
484990d2dfabSRick Macklem }
485090d2dfabSRick Macklem 
485190d2dfabSRick Macklem /*
485290d2dfabSRick Macklem  * Update the Metadata file's attributes from the DS file when a Read/Write
485390d2dfabSRick Macklem  * layout is returned.
485490d2dfabSRick Macklem  * Basically just call nfsrv_proxyds() with procedure == NFSPROC_LAYOUTRETURN
485590d2dfabSRick Macklem  * so that it does a nfsrv_getattrdsrpc() and nfsrv_setextattr() on the DS file.
485690d2dfabSRick Macklem  */
485790d2dfabSRick Macklem int
nfsrv_updatemdsattr(struct vnode * vp,struct nfsvattr * nap,NFSPROC_T * p)485890d2dfabSRick Macklem nfsrv_updatemdsattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p)
485990d2dfabSRick Macklem {
486090d2dfabSRick Macklem 	struct ucred *tcred;
486190d2dfabSRick Macklem 	int error;
486290d2dfabSRick Macklem 
486390d2dfabSRick Macklem 	/* Do this as root so that it won't fail with EACCES. */
486490d2dfabSRick Macklem 	tcred = newnfs_getcred();
48654ce21f37SRick Macklem 	error = nfsrv_proxyds(vp, 0, 0, tcred, p, NFSPROC_LAYOUTRETURN,
4866c057a378SRick Macklem 	    NULL, NULL, NULL, nap, NULL, NULL, 0, NULL);
486790d2dfabSRick Macklem 	NFSFREECRED(tcred);
486890d2dfabSRick Macklem 	return (error);
486990d2dfabSRick Macklem }
487090d2dfabSRick Macklem 
487190d2dfabSRick Macklem /*
487290d2dfabSRick Macklem  * Set the NFSv4 ACL on the DS file to the same ACL as the MDS file.
487390d2dfabSRick Macklem  */
487490d2dfabSRick Macklem static int
nfsrv_dssetacl(struct vnode * vp,struct acl * aclp,struct ucred * cred,NFSPROC_T * p)487590d2dfabSRick Macklem nfsrv_dssetacl(struct vnode *vp, struct acl *aclp, struct ucred *cred,
487690d2dfabSRick Macklem     NFSPROC_T *p)
487790d2dfabSRick Macklem {
487890d2dfabSRick Macklem 	int error;
487990d2dfabSRick Macklem 
48804ce21f37SRick Macklem 	error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETACL,
4881c057a378SRick Macklem 	    NULL, NULL, NULL, NULL, aclp, NULL, 0, NULL);
488290d2dfabSRick Macklem 	return (error);
488390d2dfabSRick Macklem }
488490d2dfabSRick Macklem 
488590d2dfabSRick Macklem static int
nfsrv_proxyds(struct vnode * vp,off_t off,int cnt,struct ucred * cred,struct thread * p,int ioproc,struct mbuf ** mpp,char * cp,struct mbuf ** mpp2,struct nfsvattr * nap,struct acl * aclp,off_t * offp,int content,bool * eofp)48864ce21f37SRick Macklem nfsrv_proxyds(struct vnode *vp, off_t off, int cnt, struct ucred *cred,
48874ce21f37SRick Macklem     struct thread *p, int ioproc, struct mbuf **mpp, char *cp,
4888c057a378SRick Macklem     struct mbuf **mpp2, struct nfsvattr *nap, struct acl *aclp,
4889c057a378SRick Macklem     off_t *offp, int content, bool *eofp)
489090d2dfabSRick Macklem {
489190d2dfabSRick Macklem 	struct nfsmount *nmp[NFSDEV_MAXMIRRORS], *failnmp;
489290d2dfabSRick Macklem 	fhandle_t fh[NFSDEV_MAXMIRRORS];
489390d2dfabSRick Macklem 	struct vnode *dvp[NFSDEV_MAXMIRRORS];
489490d2dfabSRick Macklem 	struct nfsdevice *ds;
489590d2dfabSRick Macklem 	struct pnfsdsattr dsattr;
489614eff785SRick Macklem 	struct opnfsdsattr odsattr;
489790d2dfabSRick Macklem 	char *buf;
489890d2dfabSRick Macklem 	int buflen, error, failpos, i, mirrorcnt, origmircnt, trycnt;
489990d2dfabSRick Macklem 
490090d2dfabSRick Macklem 	NFSD_DEBUG(4, "in nfsrv_proxyds\n");
490190d2dfabSRick Macklem 	/*
490290d2dfabSRick Macklem 	 * If not a regular file, not exported or not a pNFS server,
490390d2dfabSRick Macklem 	 * just return ENOENT.
490490d2dfabSRick Macklem 	 */
490590d2dfabSRick Macklem 	if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 ||
490690d2dfabSRick Macklem 	    nfsrv_devidcnt == 0)
490790d2dfabSRick Macklem 		return (ENOENT);
490890d2dfabSRick Macklem 
490990d2dfabSRick Macklem 	buflen = 1024;
491090d2dfabSRick Macklem 	buf = malloc(buflen, M_TEMP, M_WAITOK);
491190d2dfabSRick Macklem 	error = 0;
491290d2dfabSRick Macklem 
491390d2dfabSRick Macklem 	/*
491490d2dfabSRick Macklem 	 * For Getattr, get the Change attribute (va_filerev) and size (va_size)
491590d2dfabSRick Macklem 	 * from the MetaData file's extended attribute.
491690d2dfabSRick Macklem 	 */
491790d2dfabSRick Macklem 	if (ioproc == NFSPROC_GETATTR) {
491890d2dfabSRick Macklem 		error = vn_extattr_get(vp, IO_NODELOCKED,
491990d2dfabSRick Macklem 		    EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", &buflen, buf,
492090d2dfabSRick Macklem 		    p);
492190d2dfabSRick Macklem 		if (error == 0) {
492214eff785SRick Macklem 			if (buflen == sizeof(odsattr)) {
492314eff785SRick Macklem 				NFSBCOPY(buf, &odsattr, buflen);
492414eff785SRick Macklem 				nap->na_filerev = odsattr.dsa_filerev;
492514eff785SRick Macklem 				nap->na_size = odsattr.dsa_size;
492614eff785SRick Macklem 				nap->na_atime = odsattr.dsa_atime;
492714eff785SRick Macklem 				nap->na_mtime = odsattr.dsa_mtime;
492814eff785SRick Macklem 				/*
492914eff785SRick Macklem 				 * Fake na_bytes by rounding up na_size.
493014eff785SRick Macklem 				 * Since we don't know the block size, just
493114eff785SRick Macklem 				 * use BLKDEV_IOSIZE.
493214eff785SRick Macklem 				 */
493314eff785SRick Macklem 				nap->na_bytes = (odsattr.dsa_size +
493414eff785SRick Macklem 				    BLKDEV_IOSIZE - 1) & ~(BLKDEV_IOSIZE - 1);
493514eff785SRick Macklem 			} else if (buflen == sizeof(dsattr)) {
493690d2dfabSRick Macklem 				NFSBCOPY(buf, &dsattr, buflen);
493790d2dfabSRick Macklem 				nap->na_filerev = dsattr.dsa_filerev;
493890d2dfabSRick Macklem 				nap->na_size = dsattr.dsa_size;
493990d2dfabSRick Macklem 				nap->na_atime = dsattr.dsa_atime;
494090d2dfabSRick Macklem 				nap->na_mtime = dsattr.dsa_mtime;
494114eff785SRick Macklem 				nap->na_bytes = dsattr.dsa_bytes;
494214eff785SRick Macklem 			} else
494314eff785SRick Macklem 				error = ENXIO;
494414eff785SRick Macklem 		}
494514eff785SRick Macklem 		if (error == 0) {
494690d2dfabSRick Macklem 			/*
494790d2dfabSRick Macklem 			 * If nfsrv_pnfsgetdsattr is 0 or nfsrv_checkdsattr()
494890d2dfabSRick Macklem 			 * returns 0, just return now.  nfsrv_checkdsattr()
494990d2dfabSRick Macklem 			 * returns 0 if there is no Read/Write layout
495090d2dfabSRick Macklem 			 * plus either an Open/Write_access or Write
495190d2dfabSRick Macklem 			 * delegation issued to a client for the file.
495290d2dfabSRick Macklem 			 */
495390d2dfabSRick Macklem 			if (nfsrv_pnfsgetdsattr == 0 ||
49542e670777SRick Macklem 			    nfsrv_checkdsattr(vp, p) == 0) {
495590d2dfabSRick Macklem 				free(buf, M_TEMP);
495690d2dfabSRick Macklem 				return (error);
495790d2dfabSRick Macklem 			}
495890d2dfabSRick Macklem 		}
495990d2dfabSRick Macklem 
496090d2dfabSRick Macklem 		/*
496190d2dfabSRick Macklem 		 * Clear ENOATTR so the code below will attempt to do a
496290d2dfabSRick Macklem 		 * nfsrv_getattrdsrpc() to get the attributes and (re)create
496390d2dfabSRick Macklem 		 * the extended attribute.
496490d2dfabSRick Macklem 		 */
496590d2dfabSRick Macklem 		if (error == ENOATTR)
496690d2dfabSRick Macklem 			error = 0;
496790d2dfabSRick Macklem 	}
496890d2dfabSRick Macklem 
496990d2dfabSRick Macklem 	origmircnt = -1;
497090d2dfabSRick Macklem 	trycnt = 0;
497190d2dfabSRick Macklem tryagain:
497290d2dfabSRick Macklem 	if (error == 0) {
497390d2dfabSRick Macklem 		buflen = 1024;
49743e5ba2e1SRick Macklem 		if (ioproc == NFSPROC_READDS && NFSVOPISLOCKED(vp) ==
49753e5ba2e1SRick Macklem 		    LK_EXCLUSIVE)
49763e5ba2e1SRick Macklem 			printf("nfsrv_proxyds: Readds vp exclusively locked\n");
497790d2dfabSRick Macklem 		error = nfsrv_dsgetsockmnt(vp, LK_SHARED, buf, &buflen,
497890d2dfabSRick Macklem 		    &mirrorcnt, p, dvp, fh, NULL, NULL, NULL, NULL, NULL,
497990d2dfabSRick Macklem 		    NULL, NULL);
498090d2dfabSRick Macklem 		if (error == 0) {
498190d2dfabSRick Macklem 			for (i = 0; i < mirrorcnt; i++)
498290d2dfabSRick Macklem 				nmp[i] = VFSTONFS(dvp[i]->v_mount);
498390d2dfabSRick Macklem 		} else
498490d2dfabSRick Macklem 			printf("pNFS: proxy getextattr sockaddr=%d\n", error);
498590d2dfabSRick Macklem 	} else
498690d2dfabSRick Macklem 		printf("pNFS: nfsrv_dsgetsockmnt=%d\n", error);
498790d2dfabSRick Macklem 	if (error == 0) {
498890d2dfabSRick Macklem 		failpos = -1;
498990d2dfabSRick Macklem 		if (origmircnt == -1)
499090d2dfabSRick Macklem 			origmircnt = mirrorcnt;
499190d2dfabSRick Macklem 		/*
499290d2dfabSRick Macklem 		 * If failpos is set to a mirror#, then that mirror has
4993c057a378SRick Macklem 		 * failed and will be disabled. For Read, Getattr and Seek, the
499490d2dfabSRick Macklem 		 * function only tries one mirror, so if that mirror has
499590d2dfabSRick Macklem 		 * failed, it will need to be retried. As such, increment
499690d2dfabSRick Macklem 		 * tryitagain for these cases.
499790d2dfabSRick Macklem 		 * For Write, Setattr and Setacl, the function tries all
499890d2dfabSRick Macklem 		 * mirrors and will not return an error for the case where
499990d2dfabSRick Macklem 		 * one mirror has failed. For these cases, the functioning
500090d2dfabSRick Macklem 		 * mirror(s) will have been modified, so a retry isn't
500190d2dfabSRick Macklem 		 * necessary. These functions will set failpos for the
500290d2dfabSRick Macklem 		 * failed mirror#.
500390d2dfabSRick Macklem 		 */
500490d2dfabSRick Macklem 		if (ioproc == NFSPROC_READDS) {
500590d2dfabSRick Macklem 			error = nfsrv_readdsrpc(fh, off, cnt, cred, p, nmp[0],
500690d2dfabSRick Macklem 			    mpp, mpp2);
500790d2dfabSRick Macklem 			if (nfsds_failerr(error) && mirrorcnt > 1) {
500890d2dfabSRick Macklem 				/*
500990d2dfabSRick Macklem 				 * Setting failpos will cause the mirror
501090d2dfabSRick Macklem 				 * to be disabled and then a retry of this
501190d2dfabSRick Macklem 				 * read is required.
501290d2dfabSRick Macklem 				 */
501390d2dfabSRick Macklem 				failpos = 0;
501490d2dfabSRick Macklem 				error = 0;
501590d2dfabSRick Macklem 				trycnt++;
501690d2dfabSRick Macklem 			}
501790d2dfabSRick Macklem 		} else if (ioproc == NFSPROC_WRITEDS)
501890d2dfabSRick Macklem 			error = nfsrv_writedsrpc(fh, off, cnt, cred, p, vp,
501990d2dfabSRick Macklem 			    &nmp[0], mirrorcnt, mpp, cp, &failpos);
502090d2dfabSRick Macklem 		else if (ioproc == NFSPROC_SETATTR)
502190d2dfabSRick Macklem 			error = nfsrv_setattrdsrpc(fh, cred, p, vp, &nmp[0],
502290d2dfabSRick Macklem 			    mirrorcnt, nap, &failpos);
502390d2dfabSRick Macklem 		else if (ioproc == NFSPROC_SETACL)
502490d2dfabSRick Macklem 			error = nfsrv_setacldsrpc(fh, cred, p, vp, &nmp[0],
502590d2dfabSRick Macklem 			    mirrorcnt, aclp, &failpos);
5026c057a378SRick Macklem 		else if (ioproc == NFSPROC_SEEKDS) {
5027c057a378SRick Macklem 			error = nfsrv_seekdsrpc(fh, offp, content, eofp, cred,
5028c057a378SRick Macklem 			    p, nmp[0]);
5029c057a378SRick Macklem 			if (nfsds_failerr(error) && mirrorcnt > 1) {
5030c057a378SRick Macklem 				/*
5031c057a378SRick Macklem 				 * Setting failpos will cause the mirror
5032c057a378SRick Macklem 				 * to be disabled and then a retry of this
5033c057a378SRick Macklem 				 * read is required.
5034c057a378SRick Macklem 				 */
5035c057a378SRick Macklem 				failpos = 0;
5036c057a378SRick Macklem 				error = 0;
5037c057a378SRick Macklem 				trycnt++;
5038c057a378SRick Macklem 			}
5039c057a378SRick Macklem 		} else if (ioproc == NFSPROC_ALLOCATE)
5040c057a378SRick Macklem 			error = nfsrv_allocatedsrpc(fh, off, *offp, cred, p, vp,
5041c057a378SRick Macklem 			    &nmp[0], mirrorcnt, &failpos);
5042bb958dcfSRick Macklem 		else if (ioproc == NFSPROC_DEALLOCATE)
5043bb958dcfSRick Macklem 			error = nfsrv_deallocatedsrpc(fh, off, *offp, cred, p,
5044bb958dcfSRick Macklem 			    vp, &nmp[0], mirrorcnt, &failpos);
504590d2dfabSRick Macklem 		else {
504690d2dfabSRick Macklem 			error = nfsrv_getattrdsrpc(&fh[mirrorcnt - 1], cred, p,
504790d2dfabSRick Macklem 			    vp, nmp[mirrorcnt - 1], nap);
504890d2dfabSRick Macklem 			if (nfsds_failerr(error) && mirrorcnt > 1) {
504990d2dfabSRick Macklem 				/*
505090d2dfabSRick Macklem 				 * Setting failpos will cause the mirror
505190d2dfabSRick Macklem 				 * to be disabled and then a retry of this
505290d2dfabSRick Macklem 				 * getattr is required.
505390d2dfabSRick Macklem 				 */
505490d2dfabSRick Macklem 				failpos = mirrorcnt - 1;
505590d2dfabSRick Macklem 				error = 0;
505690d2dfabSRick Macklem 				trycnt++;
505790d2dfabSRick Macklem 			}
505890d2dfabSRick Macklem 		}
505990d2dfabSRick Macklem 		ds = NULL;
506090d2dfabSRick Macklem 		if (failpos >= 0) {
506190d2dfabSRick Macklem 			failnmp = nmp[failpos];
506290d2dfabSRick Macklem 			NFSLOCKMNT(failnmp);
506390d2dfabSRick Macklem 			if ((failnmp->nm_privflag & (NFSMNTP_FORCEDISM |
506490d2dfabSRick Macklem 			     NFSMNTP_CANCELRPCS)) == 0) {
506590d2dfabSRick Macklem 				failnmp->nm_privflag |= NFSMNTP_CANCELRPCS;
506690d2dfabSRick Macklem 				NFSUNLOCKMNT(failnmp);
5067de9a1a70SRick Macklem 				ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER,
5068de9a1a70SRick Macklem 				    failnmp, p);
506990d2dfabSRick Macklem 				NFSD_DEBUG(4, "dsldsnmp fail=%d ds=%p\n",
507090d2dfabSRick Macklem 				    failpos, ds);
507190d2dfabSRick Macklem 				if (ds != NULL)
507290d2dfabSRick Macklem 					nfsrv_killrpcs(failnmp);
507390d2dfabSRick Macklem 				NFSLOCKMNT(failnmp);
507490d2dfabSRick Macklem 				failnmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
507590d2dfabSRick Macklem 				wakeup(failnmp);
507690d2dfabSRick Macklem 			}
507790d2dfabSRick Macklem 			NFSUNLOCKMNT(failnmp);
507890d2dfabSRick Macklem 		}
507990d2dfabSRick Macklem 		for (i = 0; i < mirrorcnt; i++)
5080b249ce48SMateusz Guzik 			NFSVOPUNLOCK(dvp[i]);
508190d2dfabSRick Macklem 		NFSD_DEBUG(4, "nfsrv_proxyds: aft RPC=%d trya=%d\n", error,
508290d2dfabSRick Macklem 		    trycnt);
508390d2dfabSRick Macklem 		/* Try the Read/Getattr again if a mirror was deleted. */
508490d2dfabSRick Macklem 		if (ds != NULL && trycnt > 0 && trycnt < origmircnt)
508590d2dfabSRick Macklem 			goto tryagain;
508690d2dfabSRick Macklem 	} else {
508790d2dfabSRick Macklem 		/* Return ENOENT for any Extended Attribute error. */
508890d2dfabSRick Macklem 		error = ENOENT;
508990d2dfabSRick Macklem 	}
509090d2dfabSRick Macklem 	free(buf, M_TEMP);
509190d2dfabSRick Macklem 	NFSD_DEBUG(4, "nfsrv_proxyds: error=%d\n", error);
509290d2dfabSRick Macklem 	return (error);
509390d2dfabSRick Macklem }
509490d2dfabSRick Macklem 
509590d2dfabSRick Macklem /*
509690d2dfabSRick Macklem  * Get the DS mount point, fh and directory from the "pnfsd.dsfile" extended
509790d2dfabSRick Macklem  * attribute.
509890d2dfabSRick Macklem  * newnmpp - If it points to a non-NULL nmp, that is the destination and needs
509990d2dfabSRick Macklem  *           to be checked.  If it points to a NULL nmp, then it returns
510090d2dfabSRick Macklem  *           a suitable destination.
510190d2dfabSRick Macklem  * curnmp - If non-NULL, it is the source mount for the copy.
510290d2dfabSRick Macklem  */
510390d2dfabSRick Macklem int
nfsrv_dsgetsockmnt(struct vnode * vp,int lktype,char * buf,int * buflenp,int * mirrorcntp,NFSPROC_T * p,struct vnode ** dvpp,fhandle_t * fhp,char * devid,char * fnamep,struct vnode ** nvpp,struct nfsmount ** newnmpp,struct nfsmount * curnmp,int * ippos,int * dsdirp)510490d2dfabSRick Macklem nfsrv_dsgetsockmnt(struct vnode *vp, int lktype, char *buf, int *buflenp,
510590d2dfabSRick Macklem     int *mirrorcntp, NFSPROC_T *p, struct vnode **dvpp, fhandle_t *fhp,
510690d2dfabSRick Macklem     char *devid, char *fnamep, struct vnode **nvpp, struct nfsmount **newnmpp,
510790d2dfabSRick Macklem     struct nfsmount *curnmp, int *ippos, int *dsdirp)
510890d2dfabSRick Macklem {
5109f808cf72SRick Macklem 	struct vnode *dvp, *nvp = NULL, **tdvpp;
51102f32675cSRick Macklem 	struct mount *mp;
511190d2dfabSRick Macklem 	struct nfsmount *nmp, *newnmp;
511290d2dfabSRick Macklem 	struct sockaddr *sad;
511390d2dfabSRick Macklem 	struct sockaddr_in *sin;
5114ed66a76bSRick Macklem 	struct nfsdevice *ds, *tds, *fndds;
511590d2dfabSRick Macklem 	struct pnfsdsfile *pf;
511690d2dfabSRick Macklem 	uint32_t dsdir;
511790d2dfabSRick Macklem 	int error, fhiszero, fnd, gotone, i, mirrorcnt;
511890d2dfabSRick Macklem 
511990d2dfabSRick Macklem 	ASSERT_VOP_LOCKED(vp, "nfsrv_dsgetsockmnt vp");
512090d2dfabSRick Macklem 	*mirrorcntp = 1;
512190d2dfabSRick Macklem 	tdvpp = dvpp;
512290d2dfabSRick Macklem 	if (nvpp != NULL)
512390d2dfabSRick Macklem 		*nvpp = NULL;
512490d2dfabSRick Macklem 	if (dvpp != NULL)
512590d2dfabSRick Macklem 		*dvpp = NULL;
512690d2dfabSRick Macklem 	if (ippos != NULL)
512790d2dfabSRick Macklem 		*ippos = -1;
512890d2dfabSRick Macklem 	if (newnmpp != NULL)
512990d2dfabSRick Macklem 		newnmp = *newnmpp;
513090d2dfabSRick Macklem 	else
513190d2dfabSRick Macklem 		newnmp = NULL;
51322f32675cSRick Macklem 	mp = vp->v_mount;
513390d2dfabSRick Macklem 	error = vn_extattr_get(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM,
513490d2dfabSRick Macklem 	    "pnfsd.dsfile", buflenp, buf, p);
513590d2dfabSRick Macklem 	mirrorcnt = *buflenp / sizeof(*pf);
513690d2dfabSRick Macklem 	if (error == 0 && (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS ||
513790d2dfabSRick Macklem 	    *buflenp != sizeof(*pf) * mirrorcnt))
513890d2dfabSRick Macklem 		error = ENOATTR;
513990d2dfabSRick Macklem 
514090d2dfabSRick Macklem 	pf = (struct pnfsdsfile *)buf;
514190d2dfabSRick Macklem 	/* If curnmp != NULL, check for a match in the mirror list. */
514290d2dfabSRick Macklem 	if (curnmp != NULL && error == 0) {
514390d2dfabSRick Macklem 		fnd = 0;
514490d2dfabSRick Macklem 		for (i = 0; i < mirrorcnt; i++, pf++) {
514590d2dfabSRick Macklem 			sad = (struct sockaddr *)&pf->dsf_sin;
514690d2dfabSRick Macklem 			if (nfsaddr2_match(sad, curnmp->nm_nam)) {
514790d2dfabSRick Macklem 				if (ippos != NULL)
514890d2dfabSRick Macklem 					*ippos = i;
514990d2dfabSRick Macklem 				fnd = 1;
515090d2dfabSRick Macklem 				break;
515190d2dfabSRick Macklem 			}
515290d2dfabSRick Macklem 		}
515390d2dfabSRick Macklem 		if (fnd == 0)
515490d2dfabSRick Macklem 			error = ENXIO;
515590d2dfabSRick Macklem 	}
515690d2dfabSRick Macklem 
515790d2dfabSRick Macklem 	gotone = 0;
515890d2dfabSRick Macklem 	pf = (struct pnfsdsfile *)buf;
515990d2dfabSRick Macklem 	NFSD_DEBUG(4, "nfsrv_dsgetsockmnt: mirrorcnt=%d err=%d\n", mirrorcnt,
516090d2dfabSRick Macklem 	    error);
516190d2dfabSRick Macklem 	for (i = 0; i < mirrorcnt && error == 0; i++, pf++) {
516290d2dfabSRick Macklem 		fhiszero = 0;
516390d2dfabSRick Macklem 		sad = (struct sockaddr *)&pf->dsf_sin;
516490d2dfabSRick Macklem 		sin = &pf->dsf_sin;
516590d2dfabSRick Macklem 		dsdir = pf->dsf_dir;
516690d2dfabSRick Macklem 		if (dsdir >= nfsrv_dsdirsize) {
516790d2dfabSRick Macklem 			printf("nfsrv_dsgetsockmnt: dsdir=%d\n", dsdir);
516890d2dfabSRick Macklem 			error = ENOATTR;
516990d2dfabSRick Macklem 		} else if (nvpp != NULL && newnmp != NULL &&
517090d2dfabSRick Macklem 		    nfsaddr2_match(sad, newnmp->nm_nam))
517190d2dfabSRick Macklem 			error = EEXIST;
517290d2dfabSRick Macklem 		if (error == 0) {
517390d2dfabSRick Macklem 			if (ippos != NULL && curnmp == NULL &&
517490d2dfabSRick Macklem 			    sad->sa_family == AF_INET &&
517590d2dfabSRick Macklem 			    sin->sin_addr.s_addr == 0)
517690d2dfabSRick Macklem 				*ippos = i;
517790d2dfabSRick Macklem 			if (NFSBCMP(&zerofh, &pf->dsf_fh, sizeof(zerofh)) == 0)
517890d2dfabSRick Macklem 				fhiszero = 1;
517990d2dfabSRick Macklem 			/* Use the socket address to find the mount point. */
518090d2dfabSRick Macklem 			fndds = NULL;
518190d2dfabSRick Macklem 			NFSDDSLOCK();
5182ed66a76bSRick Macklem 			/* Find a match for the IP address. */
518390d2dfabSRick Macklem 			TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
518490d2dfabSRick Macklem 				if (ds->nfsdev_nmp != NULL) {
518590d2dfabSRick Macklem 					dvp = ds->nfsdev_dvp;
518690d2dfabSRick Macklem 					nmp = VFSTONFS(dvp->v_mount);
518790d2dfabSRick Macklem 					if (nmp != ds->nfsdev_nmp)
518890d2dfabSRick Macklem 						printf("different2 nmp %p %p\n",
518990d2dfabSRick Macklem 						    nmp, ds->nfsdev_nmp);
5190ed66a76bSRick Macklem 					if (nfsaddr2_match(sad, nmp->nm_nam)) {
519190d2dfabSRick Macklem 						fndds = ds;
5192ed66a76bSRick Macklem 						break;
5193ed66a76bSRick Macklem 					}
5194ed66a76bSRick Macklem 				}
5195ed66a76bSRick Macklem 			}
5196ed66a76bSRick Macklem 			if (fndds != NULL && newnmpp != NULL &&
5197ed66a76bSRick Macklem 			    newnmp == NULL) {
5198ed66a76bSRick Macklem 				/* Search for a place to make a mirror copy. */
5199ed66a76bSRick Macklem 				TAILQ_FOREACH(tds, &nfsrv_devidhead,
5200ed66a76bSRick Macklem 				    nfsdev_list) {
5201ed66a76bSRick Macklem 					if (tds->nfsdev_nmp != NULL &&
5202ed66a76bSRick Macklem 					    fndds != tds &&
5203ed66a76bSRick Macklem 					    ((tds->nfsdev_mdsisset == 0 &&
5204ed66a76bSRick Macklem 					      fndds->nfsdev_mdsisset == 0) ||
5205ed66a76bSRick Macklem 					     (tds->nfsdev_mdsisset != 0 &&
5206ed66a76bSRick Macklem 					      fndds->nfsdev_mdsisset != 0 &&
5207245bfd34SRyan Moeller 					      fsidcmp(&tds->nfsdev_mdsfsid,
5208245bfd34SRyan Moeller 					      &mp->mnt_stat.f_fsid) == 0))) {
5209ed66a76bSRick Macklem 						*newnmpp = tds->nfsdev_nmp;
5210ed66a76bSRick Macklem 						break;
5211ed66a76bSRick Macklem 					}
5212ed66a76bSRick Macklem 				}
5213ed66a76bSRick Macklem 				if (tds != NULL) {
521490d2dfabSRick Macklem 					/*
5215ed66a76bSRick Macklem 					 * Move this entry to the end of the
5216ed66a76bSRick Macklem 					 * list, so it won't be selected as
5217ed66a76bSRick Macklem 					 * easily the next time.
521890d2dfabSRick Macklem 					 */
5219ed66a76bSRick Macklem 					TAILQ_REMOVE(&nfsrv_devidhead, tds,
5220ed66a76bSRick Macklem 					    nfsdev_list);
5221ed66a76bSRick Macklem 					TAILQ_INSERT_TAIL(&nfsrv_devidhead, tds,
5222ed66a76bSRick Macklem 					    nfsdev_list);
522390d2dfabSRick Macklem 				}
522490d2dfabSRick Macklem 			}
522590d2dfabSRick Macklem 			NFSDDSUNLOCK();
522690d2dfabSRick Macklem 			if (fndds != NULL) {
522790d2dfabSRick Macklem 				dvp = fndds->nfsdev_dsdir[dsdir];
522890d2dfabSRick Macklem 				if (lktype != 0 || fhiszero != 0 ||
522990d2dfabSRick Macklem 				    (nvpp != NULL && *nvpp == NULL)) {
523090d2dfabSRick Macklem 					if (fhiszero != 0)
523190d2dfabSRick Macklem 						error = vn_lock(dvp,
523290d2dfabSRick Macklem 						    LK_EXCLUSIVE);
523390d2dfabSRick Macklem 					else if (lktype != 0)
523490d2dfabSRick Macklem 						error = vn_lock(dvp, lktype);
523590d2dfabSRick Macklem 					else
523690d2dfabSRick Macklem 						error = vn_lock(dvp, LK_SHARED);
523790d2dfabSRick Macklem 					/*
523890d2dfabSRick Macklem 					 * If the file handle is all 0's, try to
523990d2dfabSRick Macklem 					 * do a Lookup against the DS to acquire
524090d2dfabSRick Macklem 					 * it.
524190d2dfabSRick Macklem 					 * If dvpp == NULL or the Lookup fails,
524290d2dfabSRick Macklem 					 * unlock dvp after the call.
524390d2dfabSRick Macklem 					 */
524490d2dfabSRick Macklem 					if (error == 0 && (fhiszero != 0 ||
524590d2dfabSRick Macklem 					    (nvpp != NULL && *nvpp == NULL))) {
524690d2dfabSRick Macklem 						error = nfsrv_pnfslookupds(vp,
524790d2dfabSRick Macklem 						    dvp, pf, &nvp, p);
524890d2dfabSRick Macklem 						if (error == 0) {
524990d2dfabSRick Macklem 							if (fhiszero != 0)
525090d2dfabSRick Macklem 								nfsrv_pnfssetfh(
525190d2dfabSRick Macklem 								    vp, pf,
52523e5ba2e1SRick Macklem 								    devid,
52533e5ba2e1SRick Macklem 								    fnamep,
525490d2dfabSRick Macklem 								    nvp, p);
525590d2dfabSRick Macklem 							if (nvpp != NULL &&
525690d2dfabSRick Macklem 							    *nvpp == NULL) {
525790d2dfabSRick Macklem 								*nvpp = nvp;
525890d2dfabSRick Macklem 								*dsdirp = dsdir;
525990d2dfabSRick Macklem 							} else
526090d2dfabSRick Macklem 								vput(nvp);
526190d2dfabSRick Macklem 						}
526290d2dfabSRick Macklem 						if (error != 0 || lktype == 0)
5263b249ce48SMateusz Guzik 							NFSVOPUNLOCK(dvp);
526490d2dfabSRick Macklem 					}
526590d2dfabSRick Macklem 				}
526690d2dfabSRick Macklem 				if (error == 0) {
526790d2dfabSRick Macklem 					gotone++;
526890d2dfabSRick Macklem 					NFSD_DEBUG(4, "gotone=%d\n", gotone);
526990d2dfabSRick Macklem 					if (devid != NULL) {
527090d2dfabSRick Macklem 						NFSBCOPY(fndds->nfsdev_deviceid,
527190d2dfabSRick Macklem 						    devid, NFSX_V4DEVICEID);
527290d2dfabSRick Macklem 						devid += NFSX_V4DEVICEID;
527390d2dfabSRick Macklem 					}
527490d2dfabSRick Macklem 					if (dvpp != NULL)
527590d2dfabSRick Macklem 						*tdvpp++ = dvp;
527690d2dfabSRick Macklem 					if (fhp != NULL)
527790d2dfabSRick Macklem 						NFSBCOPY(&pf->dsf_fh, fhp++,
527890d2dfabSRick Macklem 						    NFSX_MYFH);
527990d2dfabSRick Macklem 					if (fnamep != NULL && gotone == 1)
528090d2dfabSRick Macklem 						strlcpy(fnamep,
528190d2dfabSRick Macklem 						    pf->dsf_filename,
528290d2dfabSRick Macklem 						    sizeof(pf->dsf_filename));
528390d2dfabSRick Macklem 				} else
528490d2dfabSRick Macklem 					NFSD_DEBUG(4, "nfsrv_dsgetsockmnt "
528590d2dfabSRick Macklem 					    "err=%d\n", error);
528690d2dfabSRick Macklem 			}
528790d2dfabSRick Macklem 		}
528890d2dfabSRick Macklem 	}
528990d2dfabSRick Macklem 	if (error == 0 && gotone == 0)
529090d2dfabSRick Macklem 		error = ENOENT;
529190d2dfabSRick Macklem 
529290d2dfabSRick Macklem 	NFSD_DEBUG(4, "eo nfsrv_dsgetsockmnt: gotone=%d err=%d\n", gotone,
529390d2dfabSRick Macklem 	    error);
529490d2dfabSRick Macklem 	if (error == 0)
529590d2dfabSRick Macklem 		*mirrorcntp = gotone;
529690d2dfabSRick Macklem 	else {
529790d2dfabSRick Macklem 		if (gotone > 0 && dvpp != NULL) {
529890d2dfabSRick Macklem 			/*
529990d2dfabSRick Macklem 			 * If the error didn't occur on the first one and
530090d2dfabSRick Macklem 			 * dvpp != NULL, the one(s) prior to the failure will
530190d2dfabSRick Macklem 			 * have locked dvp's that need to be unlocked.
530290d2dfabSRick Macklem 			 */
530390d2dfabSRick Macklem 			for (i = 0; i < gotone; i++) {
5304b249ce48SMateusz Guzik 				NFSVOPUNLOCK(*dvpp);
530590d2dfabSRick Macklem 				*dvpp++ = NULL;
530690d2dfabSRick Macklem 			}
530790d2dfabSRick Macklem 		}
530890d2dfabSRick Macklem 		/*
530990d2dfabSRick Macklem 		 * If it found the vnode to be copied from before a failure,
531090d2dfabSRick Macklem 		 * it needs to be vput()'d.
531190d2dfabSRick Macklem 		 */
531290d2dfabSRick Macklem 		if (nvpp != NULL && *nvpp != NULL) {
531390d2dfabSRick Macklem 			vput(*nvpp);
531490d2dfabSRick Macklem 			*nvpp = NULL;
531590d2dfabSRick Macklem 		}
531690d2dfabSRick Macklem 	}
531790d2dfabSRick Macklem 	return (error);
531890d2dfabSRick Macklem }
531990d2dfabSRick Macklem 
532090d2dfabSRick Macklem /*
532190d2dfabSRick Macklem  * Set the extended attribute for the Change attribute.
532290d2dfabSRick Macklem  */
532390d2dfabSRick Macklem static int
nfsrv_setextattr(struct vnode * vp,struct nfsvattr * nap,NFSPROC_T * p)532490d2dfabSRick Macklem nfsrv_setextattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p)
532590d2dfabSRick Macklem {
532690d2dfabSRick Macklem 	struct pnfsdsattr dsattr;
532790d2dfabSRick Macklem 	int error;
532890d2dfabSRick Macklem 
532990d2dfabSRick Macklem 	ASSERT_VOP_ELOCKED(vp, "nfsrv_setextattr vp");
533090d2dfabSRick Macklem 	dsattr.dsa_filerev = nap->na_filerev;
533190d2dfabSRick Macklem 	dsattr.dsa_size = nap->na_size;
533290d2dfabSRick Macklem 	dsattr.dsa_atime = nap->na_atime;
533390d2dfabSRick Macklem 	dsattr.dsa_mtime = nap->na_mtime;
533414eff785SRick Macklem 	dsattr.dsa_bytes = nap->na_bytes;
53353e5ba2e1SRick Macklem 	error = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM,
53363e5ba2e1SRick Macklem 	    "pnfsd.dsattr", sizeof(dsattr), (char *)&dsattr, p);
533790d2dfabSRick Macklem 	if (error != 0)
533890d2dfabSRick Macklem 		printf("pNFS: setextattr=%d\n", error);
533990d2dfabSRick Macklem 	return (error);
534090d2dfabSRick Macklem }
534190d2dfabSRick Macklem 
534290d2dfabSRick Macklem static int
nfsrv_readdsrpc(fhandle_t * fhp,off_t off,int len,struct ucred * cred,NFSPROC_T * p,struct nfsmount * nmp,struct mbuf ** mpp,struct mbuf ** mpendp)534390d2dfabSRick Macklem nfsrv_readdsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred,
534490d2dfabSRick Macklem     NFSPROC_T *p, struct nfsmount *nmp, struct mbuf **mpp, struct mbuf **mpendp)
534590d2dfabSRick Macklem {
534690d2dfabSRick Macklem 	uint32_t *tl;
534790d2dfabSRick Macklem 	struct nfsrv_descript *nd;
534890d2dfabSRick Macklem 	nfsv4stateid_t st;
534990d2dfabSRick Macklem 	struct mbuf *m, *m2;
535090d2dfabSRick Macklem 	int error = 0, retlen, tlen, trimlen;
535190d2dfabSRick Macklem 
535290d2dfabSRick Macklem 	NFSD_DEBUG(4, "in nfsrv_readdsrpc\n");
535390d2dfabSRick Macklem 	nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
535490d2dfabSRick Macklem 	*mpp = NULL;
535590d2dfabSRick Macklem 	/*
535690d2dfabSRick Macklem 	 * Use a stateid where other is an alternating 01010 pattern and
535790d2dfabSRick Macklem 	 * seqid is 0xffffffff.  This value is not defined as special by
535890d2dfabSRick Macklem 	 * the RFC and is used by the FreeBSD NFS server to indicate an
535990d2dfabSRick Macklem 	 * MDS->DS proxy operation.
536090d2dfabSRick Macklem 	 */
536190d2dfabSRick Macklem 	st.other[0] = 0x55555555;
536290d2dfabSRick Macklem 	st.other[1] = 0x55555555;
536390d2dfabSRick Macklem 	st.other[2] = 0x55555555;
536490d2dfabSRick Macklem 	st.seqid = 0xffffffff;
536590d2dfabSRick Macklem 	nfscl_reqstart(nd, NFSPROC_READDS, nmp, (u_int8_t *)fhp, sizeof(*fhp),
53662b766d5eSRick Macklem 	    NULL, NULL, 0, 0, cred);
536790d2dfabSRick Macklem 	nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
536890d2dfabSRick Macklem 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3);
536990d2dfabSRick Macklem 	txdr_hyper(off, tl);
537090d2dfabSRick Macklem 	*(tl + 2) = txdr_unsigned(len);
537190d2dfabSRick Macklem 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
537290d2dfabSRick Macklem 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
537390d2dfabSRick Macklem 	if (error != 0) {
537490d2dfabSRick Macklem 		free(nd, M_TEMP);
537590d2dfabSRick Macklem 		return (error);
537690d2dfabSRick Macklem 	}
537790d2dfabSRick Macklem 	if (nd->nd_repstat == 0) {
537890d2dfabSRick Macklem 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
537990d2dfabSRick Macklem 		NFSM_STRSIZ(retlen, len);
538090d2dfabSRick Macklem 		if (retlen > 0) {
538190d2dfabSRick Macklem 			/* Trim off the pre-data XDR from the mbuf chain. */
538290d2dfabSRick Macklem 			m = nd->nd_mrep;
538390d2dfabSRick Macklem 			while (m != NULL && m != nd->nd_md) {
538490d2dfabSRick Macklem 				if (m->m_next == nd->nd_md) {
538590d2dfabSRick Macklem 					m->m_next = NULL;
538690d2dfabSRick Macklem 					m_freem(nd->nd_mrep);
538790d2dfabSRick Macklem 					nd->nd_mrep = m = nd->nd_md;
538890d2dfabSRick Macklem 				} else
538990d2dfabSRick Macklem 					m = m->m_next;
539090d2dfabSRick Macklem 			}
539190d2dfabSRick Macklem 			if (m == NULL) {
539290d2dfabSRick Macklem 				printf("nfsrv_readdsrpc: busted mbuf list\n");
539390d2dfabSRick Macklem 				error = ENOENT;
539490d2dfabSRick Macklem 				goto nfsmout;
539590d2dfabSRick Macklem 			}
539690d2dfabSRick Macklem 
539790d2dfabSRick Macklem 			/*
539890d2dfabSRick Macklem 			 * Now, adjust first mbuf so that any XDR before the
539990d2dfabSRick Macklem 			 * read data is skipped over.
540090d2dfabSRick Macklem 			 */
540190d2dfabSRick Macklem 			trimlen = nd->nd_dpos - mtod(m, char *);
540290d2dfabSRick Macklem 			if (trimlen > 0) {
540390d2dfabSRick Macklem 				m->m_len -= trimlen;
540490d2dfabSRick Macklem 				NFSM_DATAP(m, trimlen);
540590d2dfabSRick Macklem 			}
540690d2dfabSRick Macklem 
540790d2dfabSRick Macklem 			/*
540890d2dfabSRick Macklem 			 * Truncate the mbuf chain at retlen bytes of data,
540990d2dfabSRick Macklem 			 * plus XDR padding that brings the length up to a
541090d2dfabSRick Macklem 			 * multiple of 4.
541190d2dfabSRick Macklem 			 */
541290d2dfabSRick Macklem 			tlen = NFSM_RNDUP(retlen);
541390d2dfabSRick Macklem 			do {
541490d2dfabSRick Macklem 				if (m->m_len >= tlen) {
541590d2dfabSRick Macklem 					m->m_len = tlen;
541690d2dfabSRick Macklem 					tlen = 0;
541790d2dfabSRick Macklem 					m2 = m->m_next;
541890d2dfabSRick Macklem 					m->m_next = NULL;
541990d2dfabSRick Macklem 					m_freem(m2);
542090d2dfabSRick Macklem 					break;
542190d2dfabSRick Macklem 				}
542290d2dfabSRick Macklem 				tlen -= m->m_len;
542390d2dfabSRick Macklem 				m = m->m_next;
542490d2dfabSRick Macklem 			} while (m != NULL);
542590d2dfabSRick Macklem 			if (tlen > 0) {
542690d2dfabSRick Macklem 				printf("nfsrv_readdsrpc: busted mbuf list\n");
542790d2dfabSRick Macklem 				error = ENOENT;
542890d2dfabSRick Macklem 				goto nfsmout;
542990d2dfabSRick Macklem 			}
543090d2dfabSRick Macklem 			*mpp = nd->nd_mrep;
543190d2dfabSRick Macklem 			*mpendp = m;
543290d2dfabSRick Macklem 			nd->nd_mrep = NULL;
543390d2dfabSRick Macklem 		}
543490d2dfabSRick Macklem 	} else
543590d2dfabSRick Macklem 		error = nd->nd_repstat;
543690d2dfabSRick Macklem nfsmout:
543790d2dfabSRick Macklem 	/* If nd->nd_mrep is already NULL, this is a no-op. */
543890d2dfabSRick Macklem 	m_freem(nd->nd_mrep);
543990d2dfabSRick Macklem 	free(nd, M_TEMP);
544090d2dfabSRick Macklem 	NFSD_DEBUG(4, "nfsrv_readdsrpc error=%d\n", error);
544190d2dfabSRick Macklem 	return (error);
544290d2dfabSRick Macklem }
544390d2dfabSRick Macklem 
544490d2dfabSRick Macklem /*
544590d2dfabSRick Macklem  * Do a write RPC on a DS data file, using this structure for the arguments,
544690d2dfabSRick Macklem  * so that this function can be executed by a separate kernel process.
544790d2dfabSRick Macklem  */
544890d2dfabSRick Macklem struct nfsrvwritedsdorpc {
544990d2dfabSRick Macklem 	int			done;
545090d2dfabSRick Macklem 	int			inprog;
545190d2dfabSRick Macklem 	struct task		tsk;
545290d2dfabSRick Macklem 	fhandle_t		fh;
545390d2dfabSRick Macklem 	off_t			off;
545490d2dfabSRick Macklem 	int			len;
545590d2dfabSRick Macklem 	struct nfsmount		*nmp;
545690d2dfabSRick Macklem 	struct ucred		*cred;
545790d2dfabSRick Macklem 	NFSPROC_T		*p;
545890d2dfabSRick Macklem 	struct mbuf		*m;
545990d2dfabSRick Macklem 	int			err;
546090d2dfabSRick Macklem };
546190d2dfabSRick Macklem 
546290d2dfabSRick Macklem static int
nfsrv_writedsdorpc(struct nfsmount * nmp,fhandle_t * fhp,off_t off,int len,struct nfsvattr * nap,struct mbuf * m,struct ucred * cred,NFSPROC_T * p)546390d2dfabSRick Macklem nfsrv_writedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, int len,
546490d2dfabSRick Macklem     struct nfsvattr *nap, struct mbuf *m, struct ucred *cred, NFSPROC_T *p)
546590d2dfabSRick Macklem {
546690d2dfabSRick Macklem 	uint32_t *tl;
546790d2dfabSRick Macklem 	struct nfsrv_descript *nd;
546890d2dfabSRick Macklem 	nfsattrbit_t attrbits;
546990d2dfabSRick Macklem 	nfsv4stateid_t st;
547090d2dfabSRick Macklem 	int commit, error, retlen;
547190d2dfabSRick Macklem 
547290d2dfabSRick Macklem 	nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
547390d2dfabSRick Macklem 	nfscl_reqstart(nd, NFSPROC_WRITE, nmp, (u_int8_t *)fhp,
54742b766d5eSRick Macklem 	    sizeof(fhandle_t), NULL, NULL, 0, 0, cred);
547590d2dfabSRick Macklem 
547690d2dfabSRick Macklem 	/*
547790d2dfabSRick Macklem 	 * Use a stateid where other is an alternating 01010 pattern and
547890d2dfabSRick Macklem 	 * seqid is 0xffffffff.  This value is not defined as special by
547990d2dfabSRick Macklem 	 * the RFC and is used by the FreeBSD NFS server to indicate an
548090d2dfabSRick Macklem 	 * MDS->DS proxy operation.
548190d2dfabSRick Macklem 	 */
548290d2dfabSRick Macklem 	st.other[0] = 0x55555555;
548390d2dfabSRick Macklem 	st.other[1] = 0x55555555;
548490d2dfabSRick Macklem 	st.other[2] = 0x55555555;
548590d2dfabSRick Macklem 	st.seqid = 0xffffffff;
548690d2dfabSRick Macklem 	nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
548790d2dfabSRick Macklem 	NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
548890d2dfabSRick Macklem 	txdr_hyper(off, tl);
548990d2dfabSRick Macklem 	tl += 2;
549090d2dfabSRick Macklem 	/*
549190d2dfabSRick Macklem 	 * Do all writes FileSync, since the server doesn't hold onto dirty
549290d2dfabSRick Macklem 	 * buffers.  Since clients should be accessing the DS servers directly
549390d2dfabSRick Macklem 	 * using the pNFS layouts, this just needs to work correctly as a
549490d2dfabSRick Macklem 	 * fallback.
549590d2dfabSRick Macklem 	 */
549690d2dfabSRick Macklem 	*tl++ = txdr_unsigned(NFSWRITE_FILESYNC);
549790d2dfabSRick Macklem 	*tl = txdr_unsigned(len);
549890d2dfabSRick Macklem 	NFSD_DEBUG(4, "nfsrv_writedsdorpc: len=%d\n", len);
549990d2dfabSRick Macklem 
550090d2dfabSRick Macklem 	/* Put data in mbuf chain. */
550190d2dfabSRick Macklem 	nd->nd_mb->m_next = m;
550290d2dfabSRick Macklem 
550390d2dfabSRick Macklem 	/* Set nd_mb and nd_bpos to end of data. */
550490d2dfabSRick Macklem 	while (m->m_next != NULL)
550590d2dfabSRick Macklem 		m = m->m_next;
550690d2dfabSRick Macklem 	nd->nd_mb = m;
55073d7650f0SRick Macklem 	nfsm_set(nd, m->m_len);
550890d2dfabSRick Macklem 	NFSD_DEBUG(4, "nfsrv_writedsdorpc: lastmb len=%d\n", m->m_len);
550990d2dfabSRick Macklem 
551014eff785SRick Macklem 	/* Do a Getattr for the attributes that change upon writing. */
551190d2dfabSRick Macklem 	NFSZERO_ATTRBIT(&attrbits);
551290d2dfabSRick Macklem 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE);
551390d2dfabSRick Macklem 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
551490d2dfabSRick Macklem 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS);
551590d2dfabSRick Macklem 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
551614eff785SRick Macklem 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED);
551790d2dfabSRick Macklem 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
551890d2dfabSRick Macklem 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
551990d2dfabSRick Macklem 	(void) nfsrv_putattrbit(nd, &attrbits);
552090d2dfabSRick Macklem 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
552190d2dfabSRick Macklem 	    cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
552290d2dfabSRick Macklem 	if (error != 0) {
552390d2dfabSRick Macklem 		free(nd, M_TEMP);
552490d2dfabSRick Macklem 		return (error);
552590d2dfabSRick Macklem 	}
552690d2dfabSRick Macklem 	NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft writerpc=%d\n", nd->nd_repstat);
552790d2dfabSRick Macklem 	/* Get rid of weak cache consistency data for now. */
552890d2dfabSRick Macklem 	if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) ==
552990d2dfabSRick Macklem 	    (ND_NFSV4 | ND_V4WCCATTR)) {
553090d2dfabSRick Macklem 		error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
553190d2dfabSRick Macklem 		    NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
553290d2dfabSRick Macklem 		NFSD_DEBUG(4, "nfsrv_writedsdorpc: wcc attr=%d\n", error);
553390d2dfabSRick Macklem 		if (error != 0)
553490d2dfabSRick Macklem 			goto nfsmout;
553590d2dfabSRick Macklem 		/*
553690d2dfabSRick Macklem 		 * Get rid of Op# and status for next op.
553790d2dfabSRick Macklem 		 */
553890d2dfabSRick Macklem 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
553990d2dfabSRick Macklem 		if (*++tl != 0)
554090d2dfabSRick Macklem 			nd->nd_flag |= ND_NOMOREDATA;
554190d2dfabSRick Macklem 	}
554290d2dfabSRick Macklem 	if (nd->nd_repstat == 0) {
554390d2dfabSRick Macklem 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
554490d2dfabSRick Macklem 		retlen = fxdr_unsigned(int, *tl++);
554590d2dfabSRick Macklem 		commit = fxdr_unsigned(int, *tl);
554690d2dfabSRick Macklem 		if (commit != NFSWRITE_FILESYNC)
554790d2dfabSRick Macklem 			error = NFSERR_IO;
554890d2dfabSRick Macklem 		NFSD_DEBUG(4, "nfsrv_writedsdorpc:retlen=%d commit=%d err=%d\n",
554990d2dfabSRick Macklem 		    retlen, commit, error);
555090d2dfabSRick Macklem 	} else
555190d2dfabSRick Macklem 		error = nd->nd_repstat;
555290d2dfabSRick Macklem 	/* We have no use for the Write Verifier since we use FileSync. */
555390d2dfabSRick Macklem 
555490d2dfabSRick Macklem 	/*
555590d2dfabSRick Macklem 	 * Get the Change, Size, Access Time and Modify Time attributes and set
555690d2dfabSRick Macklem 	 * on the Metadata file, so its attributes will be what the file's
555790d2dfabSRick Macklem 	 * would be if it had been written.
555890d2dfabSRick Macklem 	 */
555990d2dfabSRick Macklem 	if (error == 0) {
556090d2dfabSRick Macklem 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
556190d2dfabSRick Macklem 		error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
556290d2dfabSRick Macklem 		    NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
556390d2dfabSRick Macklem 	}
556490d2dfabSRick Macklem 	NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft loadattr=%d\n", error);
556590d2dfabSRick Macklem nfsmout:
556690d2dfabSRick Macklem 	m_freem(nd->nd_mrep);
556790d2dfabSRick Macklem 	free(nd, M_TEMP);
556890d2dfabSRick Macklem 	NFSD_DEBUG(4, "nfsrv_writedsdorpc error=%d\n", error);
556990d2dfabSRick Macklem 	return (error);
557090d2dfabSRick Macklem }
557190d2dfabSRick Macklem 
557290d2dfabSRick Macklem /*
557390d2dfabSRick Macklem  * Start up the thread that will execute nfsrv_writedsdorpc().
557490d2dfabSRick Macklem  */
557590d2dfabSRick Macklem static void
start_writedsdorpc(void * arg,int pending)557690d2dfabSRick Macklem start_writedsdorpc(void *arg, int pending)
557790d2dfabSRick Macklem {
557890d2dfabSRick Macklem 	struct nfsrvwritedsdorpc *drpc;
557990d2dfabSRick Macklem 
558090d2dfabSRick Macklem 	drpc = (struct nfsrvwritedsdorpc *)arg;
558190d2dfabSRick Macklem 	drpc->err = nfsrv_writedsdorpc(drpc->nmp, &drpc->fh, drpc->off,
558290d2dfabSRick Macklem 	    drpc->len, NULL, drpc->m, drpc->cred, drpc->p);
558390d2dfabSRick Macklem 	drpc->done = 1;
558490d2dfabSRick Macklem 	NFSD_DEBUG(4, "start_writedsdorpc: err=%d\n", drpc->err);
558590d2dfabSRick Macklem }
558690d2dfabSRick Macklem 
558790d2dfabSRick Macklem static int
nfsrv_writedsrpc(fhandle_t * fhp,off_t off,int len,struct ucred * cred,NFSPROC_T * p,struct vnode * vp,struct nfsmount ** nmpp,int mirrorcnt,struct mbuf ** mpp,char * cp,int * failposp)558890d2dfabSRick Macklem nfsrv_writedsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred,
558990d2dfabSRick Macklem     NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt,
559090d2dfabSRick Macklem     struct mbuf **mpp, char *cp, int *failposp)
559190d2dfabSRick Macklem {
5592bf6ac05aSRick Macklem 	struct nfsrvwritedsdorpc *drpc, *tdrpc = NULL;
559390d2dfabSRick Macklem 	struct nfsvattr na;
559490d2dfabSRick Macklem 	struct mbuf *m;
559590d2dfabSRick Macklem 	int error, i, offs, ret, timo;
559690d2dfabSRick Macklem 
559790d2dfabSRick Macklem 	NFSD_DEBUG(4, "in nfsrv_writedsrpc\n");
559890d2dfabSRick Macklem 	KASSERT(*mpp != NULL, ("nfsrv_writedsrpc: NULL mbuf chain"));
559990d2dfabSRick Macklem 	drpc = NULL;
560090d2dfabSRick Macklem 	if (mirrorcnt > 1)
560190d2dfabSRick Macklem 		tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP,
560290d2dfabSRick Macklem 		    M_WAITOK);
560390d2dfabSRick Macklem 
560490d2dfabSRick Macklem 	/* Calculate offset in mbuf chain that data starts. */
560590d2dfabSRick Macklem 	offs = cp - mtod(*mpp, char *);
560690d2dfabSRick Macklem 	NFSD_DEBUG(4, "nfsrv_writedsrpc: mcopy offs=%d len=%d\n", offs, len);
560790d2dfabSRick Macklem 
560890d2dfabSRick Macklem 	/*
560990d2dfabSRick Macklem 	 * Do the write RPC for every DS, using a separate kernel process
561090d2dfabSRick Macklem 	 * for every DS except the last one.
561190d2dfabSRick Macklem 	 */
561290d2dfabSRick Macklem 	error = 0;
561390d2dfabSRick Macklem 	for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
561490d2dfabSRick Macklem 		tdrpc->done = 0;
561525705dd5SRick Macklem 		NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp));
561690d2dfabSRick Macklem 		tdrpc->off = off;
561790d2dfabSRick Macklem 		tdrpc->len = len;
561890d2dfabSRick Macklem 		tdrpc->nmp = *nmpp;
561990d2dfabSRick Macklem 		tdrpc->cred = cred;
562090d2dfabSRick Macklem 		tdrpc->p = p;
562190d2dfabSRick Macklem 		tdrpc->inprog = 0;
562290d2dfabSRick Macklem 		tdrpc->err = 0;
562390d2dfabSRick Macklem 		tdrpc->m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK);
562490d2dfabSRick Macklem 		ret = EIO;
562590d2dfabSRick Macklem 		if (nfs_pnfsiothreads != 0) {
562690d2dfabSRick Macklem 			ret = nfs_pnfsio(start_writedsdorpc, tdrpc);
562790d2dfabSRick Macklem 			NFSD_DEBUG(4, "nfsrv_writedsrpc: nfs_pnfsio=%d\n",
562890d2dfabSRick Macklem 			    ret);
562990d2dfabSRick Macklem 		}
563090d2dfabSRick Macklem 		if (ret != 0) {
563190d2dfabSRick Macklem 			ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, NULL,
563290d2dfabSRick Macklem 			    tdrpc->m, cred, p);
563390d2dfabSRick Macklem 			if (nfsds_failerr(ret) && *failposp == -1)
563490d2dfabSRick Macklem 				*failposp = i;
563590d2dfabSRick Macklem 			else if (error == 0 && ret != 0)
563690d2dfabSRick Macklem 				error = ret;
563790d2dfabSRick Macklem 		}
563890d2dfabSRick Macklem 		nmpp++;
563990d2dfabSRick Macklem 		fhp++;
564090d2dfabSRick Macklem 	}
564190d2dfabSRick Macklem 	m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK);
564290d2dfabSRick Macklem 	ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, &na, m, cred, p);
564390d2dfabSRick Macklem 	if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1)
564490d2dfabSRick Macklem 		*failposp = mirrorcnt - 1;
564590d2dfabSRick Macklem 	else if (error == 0 && ret != 0)
564690d2dfabSRick Macklem 		error = ret;
564790d2dfabSRick Macklem 	if (error == 0)
564890d2dfabSRick Macklem 		error = nfsrv_setextattr(vp, &na, p);
564990d2dfabSRick Macklem 	NFSD_DEBUG(4, "nfsrv_writedsrpc: aft setextat=%d\n", error);
565090d2dfabSRick Macklem 	tdrpc = drpc;
565190d2dfabSRick Macklem 	timo = hz / 50;		/* Wait for 20msec. */
565290d2dfabSRick Macklem 	if (timo < 1)
565390d2dfabSRick Macklem 		timo = 1;
565490d2dfabSRick Macklem 	for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
565590d2dfabSRick Macklem 		/* Wait for RPCs on separate threads to complete. */
565690d2dfabSRick Macklem 		while (tdrpc->inprog != 0 && tdrpc->done == 0)
565790d2dfabSRick Macklem 			tsleep(&tdrpc->tsk, PVFS, "srvwrds", timo);
565890d2dfabSRick Macklem 		if (nfsds_failerr(tdrpc->err) && *failposp == -1)
565990d2dfabSRick Macklem 			*failposp = i;
566090d2dfabSRick Macklem 		else if (error == 0 && tdrpc->err != 0)
566190d2dfabSRick Macklem 			error = tdrpc->err;
566290d2dfabSRick Macklem 	}
566390d2dfabSRick Macklem 	free(drpc, M_TEMP);
566490d2dfabSRick Macklem 	return (error);
566590d2dfabSRick Macklem }
566690d2dfabSRick Macklem 
5667c057a378SRick Macklem /*
5668c057a378SRick Macklem  * Do a allocate RPC on a DS data file, using this structure for the arguments,
5669c057a378SRick Macklem  * so that this function can be executed by a separate kernel process.
5670c057a378SRick Macklem  */
5671c057a378SRick Macklem struct nfsrvallocatedsdorpc {
5672c057a378SRick Macklem 	int			done;
5673c057a378SRick Macklem 	int			inprog;
5674c057a378SRick Macklem 	struct task		tsk;
5675c057a378SRick Macklem 	fhandle_t		fh;
5676c057a378SRick Macklem 	off_t			off;
5677c057a378SRick Macklem 	off_t			len;
5678c057a378SRick Macklem 	struct nfsmount		*nmp;
5679c057a378SRick Macklem 	struct ucred		*cred;
5680c057a378SRick Macklem 	NFSPROC_T		*p;
5681c057a378SRick Macklem 	int			err;
5682c057a378SRick Macklem };
5683c057a378SRick Macklem 
5684c057a378SRick Macklem static int
nfsrv_allocatedsdorpc(struct nfsmount * nmp,fhandle_t * fhp,off_t off,off_t len,struct nfsvattr * nap,struct ucred * cred,NFSPROC_T * p)5685c057a378SRick Macklem nfsrv_allocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off,
5686c057a378SRick Macklem     off_t len, struct nfsvattr *nap, struct ucred *cred, NFSPROC_T *p)
5687c057a378SRick Macklem {
5688c057a378SRick Macklem 	uint32_t *tl;
5689c057a378SRick Macklem 	struct nfsrv_descript *nd;
5690c057a378SRick Macklem 	nfsattrbit_t attrbits;
5691c057a378SRick Macklem 	nfsv4stateid_t st;
5692c057a378SRick Macklem 	int error;
5693c057a378SRick Macklem 
5694c057a378SRick Macklem 	nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
5695c057a378SRick Macklem 	nfscl_reqstart(nd, NFSPROC_ALLOCATE, nmp, (u_int8_t *)fhp,
56962b766d5eSRick Macklem 	    sizeof(fhandle_t), NULL, NULL, 0, 0, cred);
5697c057a378SRick Macklem 
5698c057a378SRick Macklem 	/*
5699c057a378SRick Macklem 	 * Use a stateid where other is an alternating 01010 pattern and
5700c057a378SRick Macklem 	 * seqid is 0xffffffff.  This value is not defined as special by
5701c057a378SRick Macklem 	 * the RFC and is used by the FreeBSD NFS server to indicate an
5702c057a378SRick Macklem 	 * MDS->DS proxy operation.
5703c057a378SRick Macklem 	 */
5704c057a378SRick Macklem 	st.other[0] = 0x55555555;
5705c057a378SRick Macklem 	st.other[1] = 0x55555555;
5706c057a378SRick Macklem 	st.other[2] = 0x55555555;
5707c057a378SRick Macklem 	st.seqid = 0xffffffff;
5708c057a378SRick Macklem 	nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
5709c057a378SRick Macklem 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED);
5710c057a378SRick Macklem 	txdr_hyper(off, tl); tl += 2;
5711c057a378SRick Macklem 	txdr_hyper(len, tl); tl += 2;
5712c057a378SRick Macklem 	NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: len=%jd\n", (intmax_t)len);
5713c057a378SRick Macklem 
5714c057a378SRick Macklem 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
5715c057a378SRick Macklem 	NFSGETATTR_ATTRBIT(&attrbits);
5716c057a378SRick Macklem 	nfsrv_putattrbit(nd, &attrbits);
5717c057a378SRick Macklem 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
5718c057a378SRick Macklem 	    cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5719c057a378SRick Macklem 	if (error != 0) {
5720c057a378SRick Macklem 		free(nd, M_TEMP);
5721c057a378SRick Macklem 		return (error);
5722c057a378SRick Macklem 	}
5723c057a378SRick Macklem 	NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft allocaterpc=%d\n",
5724c057a378SRick Macklem 	    nd->nd_repstat);
5725c057a378SRick Macklem 	if (nd->nd_repstat == 0) {
5726c057a378SRick Macklem 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5727c057a378SRick Macklem 		error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
5728c057a378SRick Macklem 		    NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
5729c057a378SRick Macklem 	} else
5730c057a378SRick Macklem 		error = nd->nd_repstat;
5731c057a378SRick Macklem 	NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft loadattr=%d\n", error);
5732c057a378SRick Macklem nfsmout:
5733c057a378SRick Macklem 	m_freem(nd->nd_mrep);
5734c057a378SRick Macklem 	free(nd, M_TEMP);
5735c057a378SRick Macklem 	NFSD_DEBUG(4, "nfsrv_allocatedsdorpc error=%d\n", error);
5736c057a378SRick Macklem 	return (error);
5737c057a378SRick Macklem }
5738c057a378SRick Macklem 
5739c057a378SRick Macklem /*
5740c057a378SRick Macklem  * Start up the thread that will execute nfsrv_allocatedsdorpc().
5741c057a378SRick Macklem  */
5742c057a378SRick Macklem static void
start_allocatedsdorpc(void * arg,int pending)5743c057a378SRick Macklem start_allocatedsdorpc(void *arg, int pending)
5744c057a378SRick Macklem {
5745c057a378SRick Macklem 	struct nfsrvallocatedsdorpc *drpc;
5746c057a378SRick Macklem 
5747c057a378SRick Macklem 	drpc = (struct nfsrvallocatedsdorpc *)arg;
5748c057a378SRick Macklem 	drpc->err = nfsrv_allocatedsdorpc(drpc->nmp, &drpc->fh, drpc->off,
5749c057a378SRick Macklem 	    drpc->len, NULL, drpc->cred, drpc->p);
5750c057a378SRick Macklem 	drpc->done = 1;
5751c057a378SRick Macklem 	NFSD_DEBUG(4, "start_allocatedsdorpc: err=%d\n", drpc->err);
5752c057a378SRick Macklem }
5753c057a378SRick Macklem 
5754c057a378SRick Macklem static int
nfsrv_allocatedsrpc(fhandle_t * fhp,off_t off,off_t len,struct ucred * cred,NFSPROC_T * p,struct vnode * vp,struct nfsmount ** nmpp,int mirrorcnt,int * failposp)5755c057a378SRick Macklem nfsrv_allocatedsrpc(fhandle_t *fhp, off_t off, off_t len, struct ucred *cred,
5756c057a378SRick Macklem     NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt,
5757c057a378SRick Macklem     int *failposp)
5758c057a378SRick Macklem {
5759bf6ac05aSRick Macklem 	struct nfsrvallocatedsdorpc *drpc, *tdrpc = NULL;
5760c057a378SRick Macklem 	struct nfsvattr na;
5761c057a378SRick Macklem 	int error, i, ret, timo;
5762c057a378SRick Macklem 
5763c057a378SRick Macklem 	NFSD_DEBUG(4, "in nfsrv_allocatedsrpc\n");
5764c057a378SRick Macklem 	drpc = NULL;
5765c057a378SRick Macklem 	if (mirrorcnt > 1)
5766c057a378SRick Macklem 		tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP,
5767c057a378SRick Macklem 		    M_WAITOK);
5768c057a378SRick Macklem 
5769c057a378SRick Macklem 	/*
5770c057a378SRick Macklem 	 * Do the allocate RPC for every DS, using a separate kernel process
5771c057a378SRick Macklem 	 * for every DS except the last one.
5772c057a378SRick Macklem 	 */
5773c057a378SRick Macklem 	error = 0;
5774c057a378SRick Macklem 	for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
5775c057a378SRick Macklem 		tdrpc->done = 0;
5776c057a378SRick Macklem 		NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp));
5777c057a378SRick Macklem 		tdrpc->off = off;
5778c057a378SRick Macklem 		tdrpc->len = len;
5779c057a378SRick Macklem 		tdrpc->nmp = *nmpp;
5780c057a378SRick Macklem 		tdrpc->cred = cred;
5781c057a378SRick Macklem 		tdrpc->p = p;
5782c057a378SRick Macklem 		tdrpc->inprog = 0;
5783c057a378SRick Macklem 		tdrpc->err = 0;
5784c057a378SRick Macklem 		ret = EIO;
5785c057a378SRick Macklem 		if (nfs_pnfsiothreads != 0) {
5786c057a378SRick Macklem 			ret = nfs_pnfsio(start_allocatedsdorpc, tdrpc);
5787c057a378SRick Macklem 			NFSD_DEBUG(4, "nfsrv_allocatedsrpc: nfs_pnfsio=%d\n",
5788c057a378SRick Macklem 			    ret);
5789c057a378SRick Macklem 		}
5790c057a378SRick Macklem 		if (ret != 0) {
5791c057a378SRick Macklem 			ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, NULL,
5792c057a378SRick Macklem 			    cred, p);
5793c057a378SRick Macklem 			if (nfsds_failerr(ret) && *failposp == -1)
5794c057a378SRick Macklem 				*failposp = i;
5795c057a378SRick Macklem 			else if (error == 0 && ret != 0)
5796c057a378SRick Macklem 				error = ret;
5797c057a378SRick Macklem 		}
5798c057a378SRick Macklem 		nmpp++;
5799c057a378SRick Macklem 		fhp++;
5800c057a378SRick Macklem 	}
5801c057a378SRick Macklem 	ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, &na, cred, p);
5802c057a378SRick Macklem 	if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1)
5803c057a378SRick Macklem 		*failposp = mirrorcnt - 1;
5804c057a378SRick Macklem 	else if (error == 0 && ret != 0)
5805c057a378SRick Macklem 		error = ret;
5806c057a378SRick Macklem 	if (error == 0)
5807c057a378SRick Macklem 		error = nfsrv_setextattr(vp, &na, p);
5808c057a378SRick Macklem 	NFSD_DEBUG(4, "nfsrv_allocatedsrpc: aft setextat=%d\n", error);
5809c057a378SRick Macklem 	tdrpc = drpc;
5810c057a378SRick Macklem 	timo = hz / 50;		/* Wait for 20msec. */
5811c057a378SRick Macklem 	if (timo < 1)
5812c057a378SRick Macklem 		timo = 1;
5813c057a378SRick Macklem 	for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
5814c057a378SRick Macklem 		/* Wait for RPCs on separate threads to complete. */
5815c057a378SRick Macklem 		while (tdrpc->inprog != 0 && tdrpc->done == 0)
5816c057a378SRick Macklem 			tsleep(&tdrpc->tsk, PVFS, "srvalds", timo);
5817c057a378SRick Macklem 		if (nfsds_failerr(tdrpc->err) && *failposp == -1)
5818c057a378SRick Macklem 			*failposp = i;
5819c057a378SRick Macklem 		else if (error == 0 && tdrpc->err != 0)
5820c057a378SRick Macklem 			error = tdrpc->err;
5821c057a378SRick Macklem 	}
5822c057a378SRick Macklem 	free(drpc, M_TEMP);
5823c057a378SRick Macklem 	return (error);
5824c057a378SRick Macklem }
5825c057a378SRick Macklem 
5826bb958dcfSRick Macklem /*
5827bb958dcfSRick Macklem  * Do a deallocate RPC on a DS data file, using this structure for the
5828bb958dcfSRick Macklem  * arguments, so that this function can be executed by a separate kernel
5829bb958dcfSRick Macklem  * process.
5830bb958dcfSRick Macklem  */
5831bb958dcfSRick Macklem struct nfsrvdeallocatedsdorpc {
5832bb958dcfSRick Macklem 	int			done;
5833bb958dcfSRick Macklem 	int			inprog;
5834bb958dcfSRick Macklem 	struct task		tsk;
5835bb958dcfSRick Macklem 	fhandle_t		fh;
5836bb958dcfSRick Macklem 	off_t			off;
5837bb958dcfSRick Macklem 	off_t			len;
5838bb958dcfSRick Macklem 	struct nfsmount		*nmp;
5839bb958dcfSRick Macklem 	struct ucred		*cred;
5840bb958dcfSRick Macklem 	NFSPROC_T		*p;
5841bb958dcfSRick Macklem 	int			err;
5842bb958dcfSRick Macklem };
5843bb958dcfSRick Macklem 
5844bb958dcfSRick Macklem static int
nfsrv_deallocatedsdorpc(struct nfsmount * nmp,fhandle_t * fhp,off_t off,off_t len,struct nfsvattr * nap,struct ucred * cred,NFSPROC_T * p)5845bb958dcfSRick Macklem nfsrv_deallocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off,
5846bb958dcfSRick Macklem     off_t len, struct nfsvattr *nap, struct ucred *cred, NFSPROC_T *p)
5847bb958dcfSRick Macklem {
5848bb958dcfSRick Macklem 	uint32_t *tl;
5849bb958dcfSRick Macklem 	struct nfsrv_descript *nd;
5850bb958dcfSRick Macklem 	nfsattrbit_t attrbits;
5851bb958dcfSRick Macklem 	nfsv4stateid_t st;
5852bb958dcfSRick Macklem 	int error;
5853bb958dcfSRick Macklem 
5854bb958dcfSRick Macklem 	nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
5855bb958dcfSRick Macklem 	nfscl_reqstart(nd, NFSPROC_DEALLOCATE, nmp, (u_int8_t *)fhp,
58562b766d5eSRick Macklem 	    sizeof(fhandle_t), NULL, NULL, 0, 0, cred);
5857bb958dcfSRick Macklem 
5858bb958dcfSRick Macklem 	/*
5859bb958dcfSRick Macklem 	 * Use a stateid where other is an alternating 01010 pattern and
5860bb958dcfSRick Macklem 	 * seqid is 0xffffffff.  This value is not defined as special by
5861bb958dcfSRick Macklem 	 * the RFC and is used by the FreeBSD NFS server to indicate an
5862bb958dcfSRick Macklem 	 * MDS->DS proxy operation.
5863bb958dcfSRick Macklem 	 */
5864bb958dcfSRick Macklem 	st.other[0] = 0x55555555;
5865bb958dcfSRick Macklem 	st.other[1] = 0x55555555;
5866bb958dcfSRick Macklem 	st.other[2] = 0x55555555;
5867bb958dcfSRick Macklem 	st.seqid = 0xffffffff;
5868bb958dcfSRick Macklem 	nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
5869bb958dcfSRick Macklem 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED);
5870bb958dcfSRick Macklem 	txdr_hyper(off, tl); tl += 2;
5871bb958dcfSRick Macklem 	txdr_hyper(len, tl); tl += 2;
5872bb958dcfSRick Macklem 	NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: len=%jd\n", (intmax_t)len);
5873bb958dcfSRick Macklem 
587493a32050SRick Macklem 	/* Do a Getattr for the attributes that change upon writing. */
587593a32050SRick Macklem 	NFSZERO_ATTRBIT(&attrbits);
587693a32050SRick Macklem 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE);
587793a32050SRick Macklem 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
587893a32050SRick Macklem 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS);
587993a32050SRick Macklem 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
588093a32050SRick Macklem 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED);
5881bb958dcfSRick Macklem 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
5882bb958dcfSRick Macklem 	nfsrv_putattrbit(nd, &attrbits);
5883bb958dcfSRick Macklem 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
5884bb958dcfSRick Macklem 	    cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
5885bb958dcfSRick Macklem 	if (error != 0) {
5886bb958dcfSRick Macklem 		free(nd, M_TEMP);
5887bb958dcfSRick Macklem 		return (error);
5888bb958dcfSRick Macklem 	}
588993a32050SRick Macklem 	NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: aft deallocaterpc=%d\n",
5890bb958dcfSRick Macklem 	    nd->nd_repstat);
589193a32050SRick Macklem 	/* Get rid of weak cache consistency data for now. */
589293a32050SRick Macklem 	if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) ==
589393a32050SRick Macklem 	    (ND_NFSV4 | ND_V4WCCATTR)) {
589493a32050SRick Macklem 		error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
589593a32050SRick Macklem 		    NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
589693a32050SRick Macklem 		NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: wcc attr=%d\n", error);
589793a32050SRick Macklem 		if (error != 0)
589893a32050SRick Macklem 			goto nfsmout;
589993a32050SRick Macklem 		/*
590093a32050SRick Macklem 		 * Get rid of Op# and status for next op.
590193a32050SRick Macklem 		 */
590293a32050SRick Macklem 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
590393a32050SRick Macklem 		if (*++tl != 0)
590493a32050SRick Macklem 			nd->nd_flag |= ND_NOMOREDATA;
590593a32050SRick Macklem 	}
5906bb958dcfSRick Macklem 	if (nd->nd_repstat == 0) {
5907bb958dcfSRick Macklem 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
5908bb958dcfSRick Macklem 		error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
5909bb958dcfSRick Macklem 		    NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
5910bb958dcfSRick Macklem 	} else
5911bb958dcfSRick Macklem 		error = nd->nd_repstat;
5912bb958dcfSRick Macklem 	NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: aft loadattr=%d\n", error);
5913bb958dcfSRick Macklem nfsmout:
5914bb958dcfSRick Macklem 	m_freem(nd->nd_mrep);
5915bb958dcfSRick Macklem 	free(nd, M_TEMP);
5916bb958dcfSRick Macklem 	NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc error=%d\n", error);
5917bb958dcfSRick Macklem 	return (error);
5918bb958dcfSRick Macklem }
5919bb958dcfSRick Macklem 
5920bb958dcfSRick Macklem /*
5921bb958dcfSRick Macklem  * Start up the thread that will execute nfsrv_deallocatedsdorpc().
5922bb958dcfSRick Macklem  */
5923bb958dcfSRick Macklem static void
start_deallocatedsdorpc(void * arg,int pending)5924bb958dcfSRick Macklem start_deallocatedsdorpc(void *arg, int pending)
5925bb958dcfSRick Macklem {
5926bb958dcfSRick Macklem 	struct nfsrvdeallocatedsdorpc *drpc;
5927bb958dcfSRick Macklem 
5928bb958dcfSRick Macklem 	drpc = (struct nfsrvdeallocatedsdorpc *)arg;
5929bb958dcfSRick Macklem 	drpc->err = nfsrv_deallocatedsdorpc(drpc->nmp, &drpc->fh, drpc->off,
5930bb958dcfSRick Macklem 	    drpc->len, NULL, drpc->cred, drpc->p);
5931bb958dcfSRick Macklem 	drpc->done = 1;
5932bb958dcfSRick Macklem 	NFSD_DEBUG(4, "start_deallocatedsdorpc: err=%d\n", drpc->err);
5933bb958dcfSRick Macklem }
5934bb958dcfSRick Macklem 
5935bb958dcfSRick Macklem static int
nfsrv_deallocatedsrpc(fhandle_t * fhp,off_t off,off_t len,struct ucred * cred,NFSPROC_T * p,struct vnode * vp,struct nfsmount ** nmpp,int mirrorcnt,int * failposp)5936bb958dcfSRick Macklem nfsrv_deallocatedsrpc(fhandle_t *fhp, off_t off, off_t len, struct ucred *cred,
5937bb958dcfSRick Macklem     NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt,
5938bb958dcfSRick Macklem     int *failposp)
5939bb958dcfSRick Macklem {
5940bb958dcfSRick Macklem 	struct nfsrvdeallocatedsdorpc *drpc, *tdrpc = NULL;
5941bb958dcfSRick Macklem 	struct nfsvattr na;
5942bb958dcfSRick Macklem 	int error, i, ret, timo;
5943bb958dcfSRick Macklem 
5944bb958dcfSRick Macklem 	NFSD_DEBUG(4, "in nfsrv_deallocatedsrpc\n");
5945bb958dcfSRick Macklem 	drpc = NULL;
5946bb958dcfSRick Macklem 	if (mirrorcnt > 1)
5947bb958dcfSRick Macklem 		tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP,
5948bb958dcfSRick Macklem 		    M_WAITOK);
5949bb958dcfSRick Macklem 
5950bb958dcfSRick Macklem 	/*
5951bb958dcfSRick Macklem 	 * Do the deallocate RPC for every DS, using a separate kernel process
5952bb958dcfSRick Macklem 	 * for every DS except the last one.
5953bb958dcfSRick Macklem 	 */
5954bb958dcfSRick Macklem 	error = 0;
5955bb958dcfSRick Macklem 	for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
5956bb958dcfSRick Macklem 		tdrpc->done = 0;
5957bb958dcfSRick Macklem 		NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp));
5958bb958dcfSRick Macklem 		tdrpc->off = off;
5959bb958dcfSRick Macklem 		tdrpc->len = len;
5960bb958dcfSRick Macklem 		tdrpc->nmp = *nmpp;
5961bb958dcfSRick Macklem 		tdrpc->cred = cred;
5962bb958dcfSRick Macklem 		tdrpc->p = p;
5963bb958dcfSRick Macklem 		tdrpc->inprog = 0;
5964bb958dcfSRick Macklem 		tdrpc->err = 0;
5965bb958dcfSRick Macklem 		ret = EIO;
5966bb958dcfSRick Macklem 		if (nfs_pnfsiothreads != 0) {
5967bb958dcfSRick Macklem 			ret = nfs_pnfsio(start_deallocatedsdorpc, tdrpc);
5968bb958dcfSRick Macklem 			NFSD_DEBUG(4, "nfsrv_deallocatedsrpc: nfs_pnfsio=%d\n",
5969bb958dcfSRick Macklem 			    ret);
5970bb958dcfSRick Macklem 		}
5971bb958dcfSRick Macklem 		if (ret != 0) {
5972bb958dcfSRick Macklem 			ret = nfsrv_deallocatedsdorpc(*nmpp, fhp, off, len,
5973bb958dcfSRick Macklem 			    NULL, cred, p);
5974bb958dcfSRick Macklem 			if (nfsds_failerr(ret) && *failposp == -1)
5975bb958dcfSRick Macklem 				*failposp = i;
5976bb958dcfSRick Macklem 			else if (error == 0 && ret != 0)
5977bb958dcfSRick Macklem 				error = ret;
5978bb958dcfSRick Macklem 		}
5979bb958dcfSRick Macklem 		nmpp++;
5980bb958dcfSRick Macklem 		fhp++;
5981bb958dcfSRick Macklem 	}
5982bb958dcfSRick Macklem 	ret = nfsrv_deallocatedsdorpc(*nmpp, fhp, off, len, &na, cred, p);
5983bb958dcfSRick Macklem 	if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1)
5984bb958dcfSRick Macklem 		*failposp = mirrorcnt - 1;
5985bb958dcfSRick Macklem 	else if (error == 0 && ret != 0)
5986bb958dcfSRick Macklem 		error = ret;
5987bb958dcfSRick Macklem 	if (error == 0)
5988bb958dcfSRick Macklem 		error = nfsrv_setextattr(vp, &na, p);
5989bb958dcfSRick Macklem 	NFSD_DEBUG(4, "nfsrv_deallocatedsrpc: aft setextat=%d\n", error);
5990bb958dcfSRick Macklem 	tdrpc = drpc;
5991bb958dcfSRick Macklem 	timo = hz / 50;		/* Wait for 20msec. */
5992bb958dcfSRick Macklem 	if (timo < 1)
5993bb958dcfSRick Macklem 		timo = 1;
5994bb958dcfSRick Macklem 	for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
5995bb958dcfSRick Macklem 		/* Wait for RPCs on separate threads to complete. */
5996bb958dcfSRick Macklem 		while (tdrpc->inprog != 0 && tdrpc->done == 0)
5997bb958dcfSRick Macklem 			tsleep(&tdrpc->tsk, PVFS, "srvalds", timo);
5998bb958dcfSRick Macklem 		if (nfsds_failerr(tdrpc->err) && *failposp == -1)
5999bb958dcfSRick Macklem 			*failposp = i;
6000bb958dcfSRick Macklem 		else if (error == 0 && tdrpc->err != 0)
6001bb958dcfSRick Macklem 			error = tdrpc->err;
6002bb958dcfSRick Macklem 	}
6003bb958dcfSRick Macklem 	free(drpc, M_TEMP);
6004bb958dcfSRick Macklem 	return (error);
6005bb958dcfSRick Macklem }
6006bb958dcfSRick Macklem 
600790d2dfabSRick Macklem static int
nfsrv_setattrdsdorpc(fhandle_t * fhp,struct ucred * cred,NFSPROC_T * p,struct vnode * vp,struct nfsmount * nmp,struct nfsvattr * nap,struct nfsvattr * dsnap)600890d2dfabSRick Macklem nfsrv_setattrdsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
600990d2dfabSRick Macklem     struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap,
601090d2dfabSRick Macklem     struct nfsvattr *dsnap)
601190d2dfabSRick Macklem {
601290d2dfabSRick Macklem 	uint32_t *tl;
601390d2dfabSRick Macklem 	struct nfsrv_descript *nd;
601490d2dfabSRick Macklem 	nfsv4stateid_t st;
601590d2dfabSRick Macklem 	nfsattrbit_t attrbits;
601690d2dfabSRick Macklem 	int error;
601790d2dfabSRick Macklem 
601890d2dfabSRick Macklem 	NFSD_DEBUG(4, "in nfsrv_setattrdsdorpc\n");
601990d2dfabSRick Macklem 	nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
602090d2dfabSRick Macklem 	/*
602190d2dfabSRick Macklem 	 * Use a stateid where other is an alternating 01010 pattern and
602290d2dfabSRick Macklem 	 * seqid is 0xffffffff.  This value is not defined as special by
602390d2dfabSRick Macklem 	 * the RFC and is used by the FreeBSD NFS server to indicate an
602490d2dfabSRick Macklem 	 * MDS->DS proxy operation.
602590d2dfabSRick Macklem 	 */
602690d2dfabSRick Macklem 	st.other[0] = 0x55555555;
602790d2dfabSRick Macklem 	st.other[1] = 0x55555555;
602890d2dfabSRick Macklem 	st.other[2] = 0x55555555;
602990d2dfabSRick Macklem 	st.seqid = 0xffffffff;
603090d2dfabSRick Macklem 	nfscl_reqstart(nd, NFSPROC_SETATTR, nmp, (u_int8_t *)fhp, sizeof(*fhp),
60312b766d5eSRick Macklem 	    NULL, NULL, 0, 0, cred);
603290d2dfabSRick Macklem 	nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
603390d2dfabSRick Macklem 	nfscl_fillsattr(nd, &nap->na_vattr, vp, NFSSATTR_FULL, 0);
603490d2dfabSRick Macklem 
603514eff785SRick Macklem 	/* Do a Getattr for the attributes that change due to writing. */
603690d2dfabSRick Macklem 	NFSZERO_ATTRBIT(&attrbits);
603790d2dfabSRick Macklem 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE);
603890d2dfabSRick Macklem 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
603990d2dfabSRick Macklem 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS);
604090d2dfabSRick Macklem 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
604114eff785SRick Macklem 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED);
604290d2dfabSRick Macklem 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
604390d2dfabSRick Macklem 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
604490d2dfabSRick Macklem 	(void) nfsrv_putattrbit(nd, &attrbits);
604590d2dfabSRick Macklem 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
604690d2dfabSRick Macklem 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
604790d2dfabSRick Macklem 	if (error != 0) {
604890d2dfabSRick Macklem 		free(nd, M_TEMP);
604990d2dfabSRick Macklem 		return (error);
605090d2dfabSRick Macklem 	}
605190d2dfabSRick Macklem 	NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattrrpc=%d\n",
605290d2dfabSRick Macklem 	    nd->nd_repstat);
605390d2dfabSRick Macklem 	/* Get rid of weak cache consistency data for now. */
605490d2dfabSRick Macklem 	if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) ==
605590d2dfabSRick Macklem 	    (ND_NFSV4 | ND_V4WCCATTR)) {
605690d2dfabSRick Macklem 		error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL,
605790d2dfabSRick Macklem 		    NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
605890d2dfabSRick Macklem 		NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: wcc attr=%d\n", error);
605990d2dfabSRick Macklem 		if (error != 0)
606090d2dfabSRick Macklem 			goto nfsmout;
606190d2dfabSRick Macklem 		/*
606290d2dfabSRick Macklem 		 * Get rid of Op# and status for next op.
606390d2dfabSRick Macklem 		 */
606490d2dfabSRick Macklem 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
606590d2dfabSRick Macklem 		if (*++tl != 0)
606690d2dfabSRick Macklem 			nd->nd_flag |= ND_NOMOREDATA;
606790d2dfabSRick Macklem 	}
606890d2dfabSRick Macklem 	error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
606990d2dfabSRick Macklem 	if (error != 0)
607090d2dfabSRick Macklem 		goto nfsmout;
607190d2dfabSRick Macklem 	if (nd->nd_repstat != 0)
607290d2dfabSRick Macklem 		error = nd->nd_repstat;
607390d2dfabSRick Macklem 	/*
607490d2dfabSRick Macklem 	 * Get the Change, Size, Access Time and Modify Time attributes and set
607590d2dfabSRick Macklem 	 * on the Metadata file, so its attributes will be what the file's
607690d2dfabSRick Macklem 	 * would be if it had been written.
607790d2dfabSRick Macklem 	 */
607890d2dfabSRick Macklem 	if (error == 0) {
607990d2dfabSRick Macklem 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
608090d2dfabSRick Macklem 		error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL,
608190d2dfabSRick Macklem 		    NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
608290d2dfabSRick Macklem 	}
608390d2dfabSRick Macklem 	NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattr loadattr=%d\n", error);
608490d2dfabSRick Macklem nfsmout:
608590d2dfabSRick Macklem 	m_freem(nd->nd_mrep);
608690d2dfabSRick Macklem 	free(nd, M_TEMP);
608790d2dfabSRick Macklem 	NFSD_DEBUG(4, "nfsrv_setattrdsdorpc error=%d\n", error);
608890d2dfabSRick Macklem 	return (error);
608990d2dfabSRick Macklem }
609090d2dfabSRick Macklem 
609190d2dfabSRick Macklem struct nfsrvsetattrdsdorpc {
609290d2dfabSRick Macklem 	int			done;
609390d2dfabSRick Macklem 	int			inprog;
609490d2dfabSRick Macklem 	struct task		tsk;
609590d2dfabSRick Macklem 	fhandle_t		fh;
609690d2dfabSRick Macklem 	struct nfsmount		*nmp;
609790d2dfabSRick Macklem 	struct vnode		*vp;
609890d2dfabSRick Macklem 	struct ucred		*cred;
609990d2dfabSRick Macklem 	NFSPROC_T		*p;
610090d2dfabSRick Macklem 	struct nfsvattr		na;
610190d2dfabSRick Macklem 	struct nfsvattr		dsna;
610290d2dfabSRick Macklem 	int			err;
610390d2dfabSRick Macklem };
610490d2dfabSRick Macklem 
610590d2dfabSRick Macklem /*
610690d2dfabSRick Macklem  * Start up the thread that will execute nfsrv_setattrdsdorpc().
610790d2dfabSRick Macklem  */
610890d2dfabSRick Macklem static void
start_setattrdsdorpc(void * arg,int pending)610990d2dfabSRick Macklem start_setattrdsdorpc(void *arg, int pending)
611090d2dfabSRick Macklem {
611190d2dfabSRick Macklem 	struct nfsrvsetattrdsdorpc *drpc;
611290d2dfabSRick Macklem 
611390d2dfabSRick Macklem 	drpc = (struct nfsrvsetattrdsdorpc *)arg;
611490d2dfabSRick Macklem 	drpc->err = nfsrv_setattrdsdorpc(&drpc->fh, drpc->cred, drpc->p,
611590d2dfabSRick Macklem 	    drpc->vp, drpc->nmp, &drpc->na, &drpc->dsna);
611690d2dfabSRick Macklem 	drpc->done = 1;
611790d2dfabSRick Macklem }
611890d2dfabSRick Macklem 
611990d2dfabSRick Macklem static int
nfsrv_setattrdsrpc(fhandle_t * fhp,struct ucred * cred,NFSPROC_T * p,struct vnode * vp,struct nfsmount ** nmpp,int mirrorcnt,struct nfsvattr * nap,int * failposp)612090d2dfabSRick Macklem nfsrv_setattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
612190d2dfabSRick Macklem     struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt,
612290d2dfabSRick Macklem     struct nfsvattr *nap, int *failposp)
612390d2dfabSRick Macklem {
6124bf6ac05aSRick Macklem 	struct nfsrvsetattrdsdorpc *drpc, *tdrpc = NULL;
612590d2dfabSRick Macklem 	struct nfsvattr na;
612690d2dfabSRick Macklem 	int error, i, ret, timo;
612790d2dfabSRick Macklem 
612890d2dfabSRick Macklem 	NFSD_DEBUG(4, "in nfsrv_setattrdsrpc\n");
612990d2dfabSRick Macklem 	drpc = NULL;
613090d2dfabSRick Macklem 	if (mirrorcnt > 1)
613190d2dfabSRick Macklem 		tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP,
613290d2dfabSRick Macklem 		    M_WAITOK);
613390d2dfabSRick Macklem 
613490d2dfabSRick Macklem 	/*
613590d2dfabSRick Macklem 	 * Do the setattr RPC for every DS, using a separate kernel process
613690d2dfabSRick Macklem 	 * for every DS except the last one.
613790d2dfabSRick Macklem 	 */
613890d2dfabSRick Macklem 	error = 0;
613990d2dfabSRick Macklem 	for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
614090d2dfabSRick Macklem 		tdrpc->done = 0;
614190d2dfabSRick Macklem 		tdrpc->inprog = 0;
614225705dd5SRick Macklem 		NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp));
614390d2dfabSRick Macklem 		tdrpc->nmp = *nmpp;
614490d2dfabSRick Macklem 		tdrpc->vp = vp;
614590d2dfabSRick Macklem 		tdrpc->cred = cred;
614690d2dfabSRick Macklem 		tdrpc->p = p;
614790d2dfabSRick Macklem 		tdrpc->na = *nap;
614890d2dfabSRick Macklem 		tdrpc->err = 0;
614990d2dfabSRick Macklem 		ret = EIO;
615090d2dfabSRick Macklem 		if (nfs_pnfsiothreads != 0) {
615190d2dfabSRick Macklem 			ret = nfs_pnfsio(start_setattrdsdorpc, tdrpc);
615290d2dfabSRick Macklem 			NFSD_DEBUG(4, "nfsrv_setattrdsrpc: nfs_pnfsio=%d\n",
615390d2dfabSRick Macklem 			    ret);
615490d2dfabSRick Macklem 		}
615590d2dfabSRick Macklem 		if (ret != 0) {
615690d2dfabSRick Macklem 			ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap,
615790d2dfabSRick Macklem 			    &na);
615890d2dfabSRick Macklem 			if (nfsds_failerr(ret) && *failposp == -1)
615990d2dfabSRick Macklem 				*failposp = i;
616090d2dfabSRick Macklem 			else if (error == 0 && ret != 0)
616190d2dfabSRick Macklem 				error = ret;
616290d2dfabSRick Macklem 		}
616390d2dfabSRick Macklem 		nmpp++;
616490d2dfabSRick Macklem 		fhp++;
616590d2dfabSRick Macklem 	}
616690d2dfabSRick Macklem 	ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap, &na);
616790d2dfabSRick Macklem 	if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1)
616890d2dfabSRick Macklem 		*failposp = mirrorcnt - 1;
616990d2dfabSRick Macklem 	else if (error == 0 && ret != 0)
617090d2dfabSRick Macklem 		error = ret;
617190d2dfabSRick Macklem 	if (error == 0)
617290d2dfabSRick Macklem 		error = nfsrv_setextattr(vp, &na, p);
617390d2dfabSRick Macklem 	NFSD_DEBUG(4, "nfsrv_setattrdsrpc: aft setextat=%d\n", error);
617490d2dfabSRick Macklem 	tdrpc = drpc;
617590d2dfabSRick Macklem 	timo = hz / 50;		/* Wait for 20msec. */
617690d2dfabSRick Macklem 	if (timo < 1)
617790d2dfabSRick Macklem 		timo = 1;
617890d2dfabSRick Macklem 	for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
617990d2dfabSRick Macklem 		/* Wait for RPCs on separate threads to complete. */
618090d2dfabSRick Macklem 		while (tdrpc->inprog != 0 && tdrpc->done == 0)
618190d2dfabSRick Macklem 			tsleep(&tdrpc->tsk, PVFS, "srvsads", timo);
618290d2dfabSRick Macklem 		if (nfsds_failerr(tdrpc->err) && *failposp == -1)
618390d2dfabSRick Macklem 			*failposp = i;
618490d2dfabSRick Macklem 		else if (error == 0 && tdrpc->err != 0)
618590d2dfabSRick Macklem 			error = tdrpc->err;
618690d2dfabSRick Macklem 	}
618790d2dfabSRick Macklem 	free(drpc, M_TEMP);
618890d2dfabSRick Macklem 	return (error);
618990d2dfabSRick Macklem }
619090d2dfabSRick Macklem 
619190d2dfabSRick Macklem /*
619290d2dfabSRick Macklem  * Do a Setattr of an NFSv4 ACL on the DS file.
619390d2dfabSRick Macklem  */
619490d2dfabSRick Macklem static int
nfsrv_setacldsdorpc(fhandle_t * fhp,struct ucred * cred,NFSPROC_T * p,struct vnode * vp,struct nfsmount * nmp,struct acl * aclp)619590d2dfabSRick Macklem nfsrv_setacldsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
619690d2dfabSRick Macklem     struct vnode *vp, struct nfsmount *nmp, struct acl *aclp)
619790d2dfabSRick Macklem {
619890d2dfabSRick Macklem 	struct nfsrv_descript *nd;
619990d2dfabSRick Macklem 	nfsv4stateid_t st;
620090d2dfabSRick Macklem 	nfsattrbit_t attrbits;
620190d2dfabSRick Macklem 	int error;
620290d2dfabSRick Macklem 
620390d2dfabSRick Macklem 	NFSD_DEBUG(4, "in nfsrv_setacldsdorpc\n");
620490d2dfabSRick Macklem 	nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
620590d2dfabSRick Macklem 	/*
620690d2dfabSRick Macklem 	 * Use a stateid where other is an alternating 01010 pattern and
620790d2dfabSRick Macklem 	 * seqid is 0xffffffff.  This value is not defined as special by
620890d2dfabSRick Macklem 	 * the RFC and is used by the FreeBSD NFS server to indicate an
620990d2dfabSRick Macklem 	 * MDS->DS proxy operation.
621090d2dfabSRick Macklem 	 */
621190d2dfabSRick Macklem 	st.other[0] = 0x55555555;
621290d2dfabSRick Macklem 	st.other[1] = 0x55555555;
621390d2dfabSRick Macklem 	st.other[2] = 0x55555555;
621490d2dfabSRick Macklem 	st.seqid = 0xffffffff;
621590d2dfabSRick Macklem 	nfscl_reqstart(nd, NFSPROC_SETACL, nmp, (u_int8_t *)fhp, sizeof(*fhp),
62162b766d5eSRick Macklem 	    NULL, NULL, 0, 0, cred);
621790d2dfabSRick Macklem 	nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
621890d2dfabSRick Macklem 	NFSZERO_ATTRBIT(&attrbits);
621990d2dfabSRick Macklem 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
622090d2dfabSRick Macklem 	/*
622190d2dfabSRick Macklem 	 * The "vp" argument to nfsv4_fillattr() is only used for vnode_type(),
622290d2dfabSRick Macklem 	 * so passing in the metadata "vp" will be ok, since it is of
622390d2dfabSRick Macklem 	 * the same type (VREG).
622490d2dfabSRick Macklem 	 */
622590d2dfabSRick Macklem 	nfsv4_fillattr(nd, NULL, vp, aclp, NULL, NULL, 0, &attrbits, NULL,
622690d2dfabSRick Macklem 	    NULL, 0, 0, 0, 0, 0, NULL);
622790d2dfabSRick Macklem 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
622890d2dfabSRick Macklem 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
622990d2dfabSRick Macklem 	if (error != 0) {
623090d2dfabSRick Macklem 		free(nd, M_TEMP);
623190d2dfabSRick Macklem 		return (error);
623290d2dfabSRick Macklem 	}
623390d2dfabSRick Macklem 	NFSD_DEBUG(4, "nfsrv_setacldsdorpc: aft setaclrpc=%d\n",
623490d2dfabSRick Macklem 	    nd->nd_repstat);
623590d2dfabSRick Macklem 	error = nd->nd_repstat;
623690d2dfabSRick Macklem 	m_freem(nd->nd_mrep);
623790d2dfabSRick Macklem 	free(nd, M_TEMP);
623890d2dfabSRick Macklem 	return (error);
623990d2dfabSRick Macklem }
624090d2dfabSRick Macklem 
624190d2dfabSRick Macklem struct nfsrvsetacldsdorpc {
624290d2dfabSRick Macklem 	int			done;
624390d2dfabSRick Macklem 	int			inprog;
624490d2dfabSRick Macklem 	struct task		tsk;
624590d2dfabSRick Macklem 	fhandle_t		fh;
624690d2dfabSRick Macklem 	struct nfsmount		*nmp;
624790d2dfabSRick Macklem 	struct vnode		*vp;
624890d2dfabSRick Macklem 	struct ucred		*cred;
624990d2dfabSRick Macklem 	NFSPROC_T		*p;
625090d2dfabSRick Macklem 	struct acl		*aclp;
625190d2dfabSRick Macklem 	int			err;
625290d2dfabSRick Macklem };
625390d2dfabSRick Macklem 
625490d2dfabSRick Macklem /*
625590d2dfabSRick Macklem  * Start up the thread that will execute nfsrv_setacldsdorpc().
625690d2dfabSRick Macklem  */
625790d2dfabSRick Macklem static void
start_setacldsdorpc(void * arg,int pending)625890d2dfabSRick Macklem start_setacldsdorpc(void *arg, int pending)
625990d2dfabSRick Macklem {
626090d2dfabSRick Macklem 	struct nfsrvsetacldsdorpc *drpc;
626190d2dfabSRick Macklem 
626290d2dfabSRick Macklem 	drpc = (struct nfsrvsetacldsdorpc *)arg;
626390d2dfabSRick Macklem 	drpc->err = nfsrv_setacldsdorpc(&drpc->fh, drpc->cred, drpc->p,
626490d2dfabSRick Macklem 	    drpc->vp, drpc->nmp, drpc->aclp);
626590d2dfabSRick Macklem 	drpc->done = 1;
626690d2dfabSRick Macklem }
626790d2dfabSRick Macklem 
626890d2dfabSRick Macklem static int
nfsrv_setacldsrpc(fhandle_t * fhp,struct ucred * cred,NFSPROC_T * p,struct vnode * vp,struct nfsmount ** nmpp,int mirrorcnt,struct acl * aclp,int * failposp)626990d2dfabSRick Macklem nfsrv_setacldsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
627090d2dfabSRick Macklem     struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, struct acl *aclp,
627190d2dfabSRick Macklem     int *failposp)
627290d2dfabSRick Macklem {
6273bf6ac05aSRick Macklem 	struct nfsrvsetacldsdorpc *drpc, *tdrpc = NULL;
627490d2dfabSRick Macklem 	int error, i, ret, timo;
627590d2dfabSRick Macklem 
627690d2dfabSRick Macklem 	NFSD_DEBUG(4, "in nfsrv_setacldsrpc\n");
627790d2dfabSRick Macklem 	drpc = NULL;
627890d2dfabSRick Macklem 	if (mirrorcnt > 1)
627990d2dfabSRick Macklem 		tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP,
628090d2dfabSRick Macklem 		    M_WAITOK);
628190d2dfabSRick Macklem 
628290d2dfabSRick Macklem 	/*
628390d2dfabSRick Macklem 	 * Do the setattr RPC for every DS, using a separate kernel process
628490d2dfabSRick Macklem 	 * for every DS except the last one.
628590d2dfabSRick Macklem 	 */
628690d2dfabSRick Macklem 	error = 0;
628790d2dfabSRick Macklem 	for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
628890d2dfabSRick Macklem 		tdrpc->done = 0;
628990d2dfabSRick Macklem 		tdrpc->inprog = 0;
629025705dd5SRick Macklem 		NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp));
629190d2dfabSRick Macklem 		tdrpc->nmp = *nmpp;
629290d2dfabSRick Macklem 		tdrpc->vp = vp;
629390d2dfabSRick Macklem 		tdrpc->cred = cred;
629490d2dfabSRick Macklem 		tdrpc->p = p;
629590d2dfabSRick Macklem 		tdrpc->aclp = aclp;
629690d2dfabSRick Macklem 		tdrpc->err = 0;
629790d2dfabSRick Macklem 		ret = EIO;
629890d2dfabSRick Macklem 		if (nfs_pnfsiothreads != 0) {
629990d2dfabSRick Macklem 			ret = nfs_pnfsio(start_setacldsdorpc, tdrpc);
630090d2dfabSRick Macklem 			NFSD_DEBUG(4, "nfsrv_setacldsrpc: nfs_pnfsio=%d\n",
630190d2dfabSRick Macklem 			    ret);
630290d2dfabSRick Macklem 		}
630390d2dfabSRick Macklem 		if (ret != 0) {
630490d2dfabSRick Macklem 			ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp,
630590d2dfabSRick Macklem 			    aclp);
630690d2dfabSRick Macklem 			if (nfsds_failerr(ret) && *failposp == -1)
630790d2dfabSRick Macklem 				*failposp = i;
630890d2dfabSRick Macklem 			else if (error == 0 && ret != 0)
630990d2dfabSRick Macklem 				error = ret;
631090d2dfabSRick Macklem 		}
631190d2dfabSRick Macklem 		nmpp++;
631290d2dfabSRick Macklem 		fhp++;
631390d2dfabSRick Macklem 	}
631490d2dfabSRick Macklem 	ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, aclp);
631590d2dfabSRick Macklem 	if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1)
631690d2dfabSRick Macklem 		*failposp = mirrorcnt - 1;
631790d2dfabSRick Macklem 	else if (error == 0 && ret != 0)
631890d2dfabSRick Macklem 		error = ret;
631990d2dfabSRick Macklem 	NFSD_DEBUG(4, "nfsrv_setacldsrpc: aft setextat=%d\n", error);
632090d2dfabSRick Macklem 	tdrpc = drpc;
632190d2dfabSRick Macklem 	timo = hz / 50;		/* Wait for 20msec. */
632290d2dfabSRick Macklem 	if (timo < 1)
632390d2dfabSRick Macklem 		timo = 1;
632490d2dfabSRick Macklem 	for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
632590d2dfabSRick Macklem 		/* Wait for RPCs on separate threads to complete. */
632690d2dfabSRick Macklem 		while (tdrpc->inprog != 0 && tdrpc->done == 0)
632790d2dfabSRick Macklem 			tsleep(&tdrpc->tsk, PVFS, "srvacds", timo);
632890d2dfabSRick Macklem 		if (nfsds_failerr(tdrpc->err) && *failposp == -1)
632990d2dfabSRick Macklem 			*failposp = i;
633090d2dfabSRick Macklem 		else if (error == 0 && tdrpc->err != 0)
633190d2dfabSRick Macklem 			error = tdrpc->err;
633290d2dfabSRick Macklem 	}
633390d2dfabSRick Macklem 	free(drpc, M_TEMP);
633490d2dfabSRick Macklem 	return (error);
633590d2dfabSRick Macklem }
633690d2dfabSRick Macklem 
633790d2dfabSRick Macklem /*
633814eff785SRick Macklem  * Getattr call to the DS for the attributes that change due to writing.
633990d2dfabSRick Macklem  */
634090d2dfabSRick Macklem static int
nfsrv_getattrdsrpc(fhandle_t * fhp,struct ucred * cred,NFSPROC_T * p,struct vnode * vp,struct nfsmount * nmp,struct nfsvattr * nap)634190d2dfabSRick Macklem nfsrv_getattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
634290d2dfabSRick Macklem     struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap)
634390d2dfabSRick Macklem {
634490d2dfabSRick Macklem 	struct nfsrv_descript *nd;
634590d2dfabSRick Macklem 	int error;
634690d2dfabSRick Macklem 	nfsattrbit_t attrbits;
634790d2dfabSRick Macklem 
634890d2dfabSRick Macklem 	NFSD_DEBUG(4, "in nfsrv_getattrdsrpc\n");
634990d2dfabSRick Macklem 	nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
635090d2dfabSRick Macklem 	nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, (u_int8_t *)fhp,
63512b766d5eSRick Macklem 	    sizeof(fhandle_t), NULL, NULL, 0, 0, cred);
635290d2dfabSRick Macklem 	NFSZERO_ATTRBIT(&attrbits);
635390d2dfabSRick Macklem 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE);
635490d2dfabSRick Macklem 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
635590d2dfabSRick Macklem 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS);
635690d2dfabSRick Macklem 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
635714eff785SRick Macklem 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED);
635890d2dfabSRick Macklem 	(void) nfsrv_putattrbit(nd, &attrbits);
635990d2dfabSRick Macklem 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
636090d2dfabSRick Macklem 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
636190d2dfabSRick Macklem 	if (error != 0) {
636290d2dfabSRick Macklem 		free(nd, M_TEMP);
636390d2dfabSRick Macklem 		return (error);
636490d2dfabSRick Macklem 	}
636590d2dfabSRick Macklem 	NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft getattrrpc=%d\n",
636690d2dfabSRick Macklem 	    nd->nd_repstat);
636790d2dfabSRick Macklem 	if (nd->nd_repstat == 0) {
636890d2dfabSRick Macklem 		error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
636990d2dfabSRick Macklem 		    NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL,
637090d2dfabSRick Macklem 		    NULL, NULL);
637190d2dfabSRick Macklem 		/*
637290d2dfabSRick Macklem 		 * We can only save the updated values in the extended
637390d2dfabSRick Macklem 		 * attribute if the vp is exclusively locked.
637490d2dfabSRick Macklem 		 * This should happen when any of the following operations
637590d2dfabSRick Macklem 		 * occur on the vnode:
637690d2dfabSRick Macklem 		 *    Close, Delegreturn, LayoutCommit, LayoutReturn
637790d2dfabSRick Macklem 		 * As such, the updated extended attribute should get saved
637890d2dfabSRick Macklem 		 * before nfsrv_checkdsattr() returns 0 and allows the cached
637990d2dfabSRick Macklem 		 * attributes to be returned without calling this function.
638090d2dfabSRick Macklem 		 */
638190d2dfabSRick Macklem 		if (error == 0 && VOP_ISLOCKED(vp) == LK_EXCLUSIVE) {
638290d2dfabSRick Macklem 			error = nfsrv_setextattr(vp, nap, p);
638390d2dfabSRick Macklem 			NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft setextat=%d\n",
638490d2dfabSRick Macklem 			    error);
638590d2dfabSRick Macklem 		}
638690d2dfabSRick Macklem 	} else
638790d2dfabSRick Macklem 		error = nd->nd_repstat;
638890d2dfabSRick Macklem 	m_freem(nd->nd_mrep);
638990d2dfabSRick Macklem 	free(nd, M_TEMP);
639090d2dfabSRick Macklem 	NFSD_DEBUG(4, "nfsrv_getattrdsrpc error=%d\n", error);
639190d2dfabSRick Macklem 	return (error);
639290d2dfabSRick Macklem }
639390d2dfabSRick Macklem 
639490d2dfabSRick Macklem /*
6395c057a378SRick Macklem  * Seek call to a DS.
6396c057a378SRick Macklem  */
6397c057a378SRick Macklem static int
nfsrv_seekdsrpc(fhandle_t * fhp,off_t * offp,int content,bool * eofp,struct ucred * cred,NFSPROC_T * p,struct nfsmount * nmp)6398c057a378SRick Macklem nfsrv_seekdsrpc(fhandle_t *fhp, off_t *offp, int content, bool *eofp,
6399c057a378SRick Macklem     struct ucred *cred, NFSPROC_T *p, struct nfsmount *nmp)
6400c057a378SRick Macklem {
6401c057a378SRick Macklem 	uint32_t *tl;
6402c057a378SRick Macklem 	struct nfsrv_descript *nd;
6403c057a378SRick Macklem 	nfsv4stateid_t st;
6404c057a378SRick Macklem 	int error;
6405c057a378SRick Macklem 
6406c057a378SRick Macklem 	NFSD_DEBUG(4, "in nfsrv_seekdsrpc\n");
6407c057a378SRick Macklem 	/*
6408c057a378SRick Macklem 	 * Use a stateid where other is an alternating 01010 pattern and
6409c057a378SRick Macklem 	 * seqid is 0xffffffff.  This value is not defined as special by
6410c057a378SRick Macklem 	 * the RFC and is used by the FreeBSD NFS server to indicate an
6411c057a378SRick Macklem 	 * MDS->DS proxy operation.
6412c057a378SRick Macklem 	 */
6413c057a378SRick Macklem 	st.other[0] = 0x55555555;
6414c057a378SRick Macklem 	st.other[1] = 0x55555555;
6415c057a378SRick Macklem 	st.other[2] = 0x55555555;
6416c057a378SRick Macklem 	st.seqid = 0xffffffff;
6417c057a378SRick Macklem 	nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
6418c057a378SRick Macklem 	nfscl_reqstart(nd, NFSPROC_SEEKDS, nmp, (u_int8_t *)fhp,
64192b766d5eSRick Macklem 	    sizeof(fhandle_t), NULL, NULL, 0, 0, cred);
6420c057a378SRick Macklem 	nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
6421c057a378SRick Macklem 	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
6422c057a378SRick Macklem 	txdr_hyper(*offp, tl); tl += 2;
6423c057a378SRick Macklem 	*tl = txdr_unsigned(content);
6424c057a378SRick Macklem 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
6425c057a378SRick Macklem 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
6426c057a378SRick Macklem 	if (error != 0) {
6427c057a378SRick Macklem 		free(nd, M_TEMP);
6428c057a378SRick Macklem 		return (error);
6429c057a378SRick Macklem 	}
6430c057a378SRick Macklem 	NFSD_DEBUG(4, "nfsrv_seekdsrpc: aft seekrpc=%d\n", nd->nd_repstat);
6431c057a378SRick Macklem 	if (nd->nd_repstat == 0) {
6432c057a378SRick Macklem 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED + NFSX_HYPER);
6433c057a378SRick Macklem 		if (*tl++ == newnfs_true)
6434c057a378SRick Macklem 			*eofp = true;
6435c057a378SRick Macklem 		else
6436c057a378SRick Macklem 			*eofp = false;
6437c057a378SRick Macklem 		*offp = fxdr_hyper(tl);
6438c057a378SRick Macklem 	} else
6439c057a378SRick Macklem 		error = nd->nd_repstat;
6440c057a378SRick Macklem nfsmout:
6441c057a378SRick Macklem 	m_freem(nd->nd_mrep);
6442c057a378SRick Macklem 	free(nd, M_TEMP);
6443c057a378SRick Macklem 	NFSD_DEBUG(4, "nfsrv_seekdsrpc error=%d\n", error);
6444c057a378SRick Macklem 	return (error);
6445c057a378SRick Macklem }
6446c057a378SRick Macklem 
6447c057a378SRick Macklem /*
644890d2dfabSRick Macklem  * Get the device id and file handle for a DS file.
644990d2dfabSRick Macklem  */
645090d2dfabSRick Macklem int
nfsrv_dsgetdevandfh(struct vnode * vp,NFSPROC_T * p,int * mirrorcntp,fhandle_t * fhp,char * devid)645190d2dfabSRick Macklem nfsrv_dsgetdevandfh(struct vnode *vp, NFSPROC_T *p, int *mirrorcntp,
645290d2dfabSRick Macklem     fhandle_t *fhp, char *devid)
645390d2dfabSRick Macklem {
645490d2dfabSRick Macklem 	int buflen, error;
645590d2dfabSRick Macklem 	char *buf;
645690d2dfabSRick Macklem 
645790d2dfabSRick Macklem 	buflen = 1024;
645890d2dfabSRick Macklem 	buf = malloc(buflen, M_TEMP, M_WAITOK);
645990d2dfabSRick Macklem 	error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, NULL,
646090d2dfabSRick Macklem 	    fhp, devid, NULL, NULL, NULL, NULL, NULL, NULL);
646190d2dfabSRick Macklem 	free(buf, M_TEMP);
646290d2dfabSRick Macklem 	return (error);
646390d2dfabSRick Macklem }
646490d2dfabSRick Macklem 
646590d2dfabSRick Macklem /*
646690d2dfabSRick Macklem  * Do a Lookup against the DS for the filename.
646790d2dfabSRick Macklem  */
646890d2dfabSRick Macklem static int
nfsrv_pnfslookupds(struct vnode * vp,struct vnode * dvp,struct pnfsdsfile * pf,struct vnode ** nvpp,NFSPROC_T * p)646990d2dfabSRick Macklem nfsrv_pnfslookupds(struct vnode *vp, struct vnode *dvp, struct pnfsdsfile *pf,
647090d2dfabSRick Macklem     struct vnode **nvpp, NFSPROC_T *p)
647190d2dfabSRick Macklem {
647290d2dfabSRick Macklem 	struct nameidata named;
647390d2dfabSRick Macklem 	struct ucred *tcred;
647490d2dfabSRick Macklem 	char *bufp;
647590d2dfabSRick Macklem 	u_long *hashp;
647690d2dfabSRick Macklem 	struct vnode *nvp;
647790d2dfabSRick Macklem 	int error;
647890d2dfabSRick Macklem 
647990d2dfabSRick Macklem 	tcred = newnfs_getcred();
648090d2dfabSRick Macklem 	named.ni_cnd.cn_nameiop = LOOKUP;
648190d2dfabSRick Macklem 	named.ni_cnd.cn_lkflags = LK_SHARED | LK_RETRY;
648290d2dfabSRick Macklem 	named.ni_cnd.cn_cred = tcred;
64835b5b7e2cSMateusz Guzik 	named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF;
648490d2dfabSRick Macklem 	nfsvno_setpathbuf(&named, &bufp, &hashp);
648590d2dfabSRick Macklem 	named.ni_cnd.cn_nameptr = bufp;
648690d2dfabSRick Macklem 	named.ni_cnd.cn_namelen = strlen(pf->dsf_filename);
648790d2dfabSRick Macklem 	strlcpy(bufp, pf->dsf_filename, NAME_MAX);
648890d2dfabSRick Macklem 	NFSD_DEBUG(4, "nfsrv_pnfslookupds: filename=%s\n", bufp);
648990d2dfabSRick Macklem 	error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd);
649090d2dfabSRick Macklem 	NFSD_DEBUG(4, "nfsrv_pnfslookupds: aft LOOKUP=%d\n", error);
649190d2dfabSRick Macklem 	NFSFREECRED(tcred);
649290d2dfabSRick Macklem 	nfsvno_relpathbuf(&named);
649390d2dfabSRick Macklem 	if (error == 0)
649490d2dfabSRick Macklem 		*nvpp = nvp;
649590d2dfabSRick Macklem 	NFSD_DEBUG(4, "eo nfsrv_pnfslookupds=%d\n", error);
649690d2dfabSRick Macklem 	return (error);
649790d2dfabSRick Macklem }
649890d2dfabSRick Macklem 
649990d2dfabSRick Macklem /*
650090d2dfabSRick Macklem  * Set the file handle to the correct one.
650190d2dfabSRick Macklem  */
650290d2dfabSRick Macklem static void
nfsrv_pnfssetfh(struct vnode * vp,struct pnfsdsfile * pf,char * devid,char * fnamep,struct vnode * nvp,NFSPROC_T * p)65033e5ba2e1SRick Macklem nfsrv_pnfssetfh(struct vnode *vp, struct pnfsdsfile *pf, char *devid,
65043e5ba2e1SRick Macklem     char *fnamep, struct vnode *nvp, NFSPROC_T *p)
650590d2dfabSRick Macklem {
650690d2dfabSRick Macklem 	struct nfsnode *np;
650795bf2e52SRick Macklem 	int ret = 0;
650890d2dfabSRick Macklem 
650990d2dfabSRick Macklem 	np = VTONFS(nvp);
651090d2dfabSRick Macklem 	NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, NFSX_MYFH);
651190d2dfabSRick Macklem 	/*
65123e5ba2e1SRick Macklem 	 * We can only do a vn_set_extattr() if the vnode is exclusively
65133e5ba2e1SRick Macklem 	 * locked and vn_start_write() has been done.  If devid != NULL or
65143e5ba2e1SRick Macklem 	 * fnamep != NULL or the vnode is shared locked, vn_start_write()
65153e5ba2e1SRick Macklem 	 * may not have been done.
65163e5ba2e1SRick Macklem 	 * If not done now, it will be done on a future call.
651790d2dfabSRick Macklem 	 */
65183e5ba2e1SRick Macklem 	if (devid == NULL && fnamep == NULL && NFSVOPISLOCKED(vp) ==
65193e5ba2e1SRick Macklem 	    LK_EXCLUSIVE)
652090d2dfabSRick Macklem 		ret = vn_extattr_set(vp, IO_NODELOCKED,
65213e5ba2e1SRick Macklem 		    EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", sizeof(*pf),
65223e5ba2e1SRick Macklem 		    (char *)pf, p);
652390d2dfabSRick Macklem 	NFSD_DEBUG(4, "eo nfsrv_pnfssetfh=%d\n", ret);
652490d2dfabSRick Macklem }
652590d2dfabSRick Macklem 
652690d2dfabSRick Macklem /*
652790d2dfabSRick Macklem  * Cause RPCs waiting on "nmp" to fail.  This is called for a DS mount point
652890d2dfabSRick Macklem  * when the DS has failed.
652990d2dfabSRick Macklem  */
653090d2dfabSRick Macklem void
nfsrv_killrpcs(struct nfsmount * nmp)653190d2dfabSRick Macklem nfsrv_killrpcs(struct nfsmount *nmp)
653290d2dfabSRick Macklem {
653390d2dfabSRick Macklem 
653490d2dfabSRick Macklem 	/*
653590d2dfabSRick Macklem 	 * Call newnfs_nmcancelreqs() to cause
653690d2dfabSRick Macklem 	 * any RPCs in progress on the mount point to
653790d2dfabSRick Macklem 	 * fail.
653890d2dfabSRick Macklem 	 * This will cause any process waiting for an
653990d2dfabSRick Macklem 	 * RPC to complete while holding a vnode lock
654090d2dfabSRick Macklem 	 * on the mounted-on vnode (such as "df" or
654190d2dfabSRick Macklem 	 * a non-forced "umount") to fail.
654290d2dfabSRick Macklem 	 * This will unlock the mounted-on vnode so
654390d2dfabSRick Macklem 	 * a forced dismount can succeed.
654490d2dfabSRick Macklem 	 * The NFSMNTP_CANCELRPCS flag should be set when this function is
654590d2dfabSRick Macklem 	 * called.
654690d2dfabSRick Macklem 	 */
654790d2dfabSRick Macklem 	newnfs_nmcancelreqs(nmp);
654890d2dfabSRick Macklem }
654990d2dfabSRick Macklem 
655090d2dfabSRick Macklem /*
655190d2dfabSRick Macklem  * Sum up the statfs info for each of the DSs, so that the client will
655290d2dfabSRick Macklem  * receive the total for all DSs.
655390d2dfabSRick Macklem  */
655490d2dfabSRick Macklem static int
nfsrv_pnfsstatfs(struct statfs * sf,struct mount * mp)6555910ccc77SRick Macklem nfsrv_pnfsstatfs(struct statfs *sf, struct mount *mp)
655690d2dfabSRick Macklem {
655790d2dfabSRick Macklem 	struct statfs *tsf;
655890d2dfabSRick Macklem 	struct nfsdevice *ds;
655990d2dfabSRick Macklem 	struct vnode **dvpp, **tdvpp, *dvp;
656090d2dfabSRick Macklem 	uint64_t tot;
656190d2dfabSRick Macklem 	int cnt, error = 0, i;
656290d2dfabSRick Macklem 
656390d2dfabSRick Macklem 	if (nfsrv_devidcnt <= 0)
656490d2dfabSRick Macklem 		return (ENXIO);
656590d2dfabSRick Macklem 	dvpp = mallocarray(nfsrv_devidcnt, sizeof(*dvpp), M_TEMP, M_WAITOK);
656690d2dfabSRick Macklem 	tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK);
656790d2dfabSRick Macklem 
656890d2dfabSRick Macklem 	/* Get an array of the dvps for the DSs. */
656990d2dfabSRick Macklem 	tdvpp = dvpp;
657090d2dfabSRick Macklem 	i = 0;
657190d2dfabSRick Macklem 	NFSDDSLOCK();
6572910ccc77SRick Macklem 	/* First, search for matches for same file system. */
657390d2dfabSRick Macklem 	TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
6574910ccc77SRick Macklem 		if (ds->nfsdev_nmp != NULL && ds->nfsdev_mdsisset != 0 &&
6575245bfd34SRyan Moeller 		    fsidcmp(&ds->nfsdev_mdsfsid, &mp->mnt_stat.f_fsid) == 0) {
657690d2dfabSRick Macklem 			if (++i > nfsrv_devidcnt)
657790d2dfabSRick Macklem 				break;
657890d2dfabSRick Macklem 			*tdvpp++ = ds->nfsdev_dvp;
657990d2dfabSRick Macklem 		}
658090d2dfabSRick Macklem 	}
6581910ccc77SRick Macklem 	/*
6582910ccc77SRick Macklem 	 * If no matches for same file system, total all servers not assigned
6583910ccc77SRick Macklem 	 * to a file system.
6584910ccc77SRick Macklem 	 */
6585910ccc77SRick Macklem 	if (i == 0) {
6586910ccc77SRick Macklem 		TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
6587910ccc77SRick Macklem 			if (ds->nfsdev_nmp != NULL &&
6588910ccc77SRick Macklem 			    ds->nfsdev_mdsisset == 0) {
6589910ccc77SRick Macklem 				if (++i > nfsrv_devidcnt)
6590910ccc77SRick Macklem 					break;
6591910ccc77SRick Macklem 				*tdvpp++ = ds->nfsdev_dvp;
6592910ccc77SRick Macklem 			}
6593910ccc77SRick Macklem 		}
6594910ccc77SRick Macklem 	}
659590d2dfabSRick Macklem 	NFSDDSUNLOCK();
659690d2dfabSRick Macklem 	cnt = i;
659790d2dfabSRick Macklem 
659890d2dfabSRick Macklem 	/* Do a VFS_STATFS() for each of the DSs and sum them up. */
659990d2dfabSRick Macklem 	tdvpp = dvpp;
660090d2dfabSRick Macklem 	for (i = 0; i < cnt && error == 0; i++) {
660190d2dfabSRick Macklem 		dvp = *tdvpp++;
660290d2dfabSRick Macklem 		error = VFS_STATFS(dvp->v_mount, tsf);
660390d2dfabSRick Macklem 		if (error == 0) {
660490d2dfabSRick Macklem 			if (sf->f_bsize == 0) {
660590d2dfabSRick Macklem 				if (tsf->f_bsize > 0)
660690d2dfabSRick Macklem 					sf->f_bsize = tsf->f_bsize;
660790d2dfabSRick Macklem 				else
660890d2dfabSRick Macklem 					sf->f_bsize = 8192;
660990d2dfabSRick Macklem 			}
661090d2dfabSRick Macklem 			if (tsf->f_blocks > 0) {
661190d2dfabSRick Macklem 				if (sf->f_bsize != tsf->f_bsize) {
661290d2dfabSRick Macklem 					tot = tsf->f_blocks * tsf->f_bsize;
661390d2dfabSRick Macklem 					sf->f_blocks += (tot / sf->f_bsize);
661490d2dfabSRick Macklem 				} else
661590d2dfabSRick Macklem 					sf->f_blocks += tsf->f_blocks;
661690d2dfabSRick Macklem 			}
661790d2dfabSRick Macklem 			if (tsf->f_bfree > 0) {
661890d2dfabSRick Macklem 				if (sf->f_bsize != tsf->f_bsize) {
661990d2dfabSRick Macklem 					tot = tsf->f_bfree * tsf->f_bsize;
662090d2dfabSRick Macklem 					sf->f_bfree += (tot / sf->f_bsize);
662190d2dfabSRick Macklem 				} else
662290d2dfabSRick Macklem 					sf->f_bfree += tsf->f_bfree;
662390d2dfabSRick Macklem 			}
662490d2dfabSRick Macklem 			if (tsf->f_bavail > 0) {
662590d2dfabSRick Macklem 				if (sf->f_bsize != tsf->f_bsize) {
662690d2dfabSRick Macklem 					tot = tsf->f_bavail * tsf->f_bsize;
662790d2dfabSRick Macklem 					sf->f_bavail += (tot / sf->f_bsize);
662890d2dfabSRick Macklem 				} else
662990d2dfabSRick Macklem 					sf->f_bavail += tsf->f_bavail;
663090d2dfabSRick Macklem 			}
663190d2dfabSRick Macklem 		}
663290d2dfabSRick Macklem 	}
663390d2dfabSRick Macklem 	free(tsf, M_TEMP);
663490d2dfabSRick Macklem 	free(dvpp, M_TEMP);
663590d2dfabSRick Macklem 	return (error);
663690d2dfabSRick Macklem }
663790d2dfabSRick Macklem 
663890d2dfabSRick Macklem /*
663990d2dfabSRick Macklem  * Set an NFSv4 acl.
664090d2dfabSRick Macklem  */
664190d2dfabSRick Macklem int
nfsrv_setacl(struct vnode * vp,NFSACL_T * aclp,struct ucred * cred,NFSPROC_T * p)664290d2dfabSRick Macklem nfsrv_setacl(struct vnode *vp, NFSACL_T *aclp, struct ucred *cred, NFSPROC_T *p)
664390d2dfabSRick Macklem {
664490d2dfabSRick Macklem 	int error;
664590d2dfabSRick Macklem 
664690d2dfabSRick Macklem 	if (nfsrv_useacl == 0 || nfs_supportsnfsv4acls(vp) == 0) {
664790d2dfabSRick Macklem 		error = NFSERR_ATTRNOTSUPP;
664890d2dfabSRick Macklem 		goto out;
664990d2dfabSRick Macklem 	}
665090d2dfabSRick Macklem 	/*
665190d2dfabSRick Macklem 	 * With NFSv4 ACLs, chmod(2) may need to add additional entries.
665290d2dfabSRick Macklem 	 * Make sure it has enough room for that - splitting every entry
665390d2dfabSRick Macklem 	 * into two and appending "canonical six" entries at the end.
665490d2dfabSRick Macklem 	 * Cribbed out of kern/vfs_acl.c - Rick M.
665590d2dfabSRick Macklem 	 */
665690d2dfabSRick Macklem 	if (aclp->acl_cnt > (ACL_MAX_ENTRIES - 6) / 2) {
665790d2dfabSRick Macklem 		error = NFSERR_ATTRNOTSUPP;
665890d2dfabSRick Macklem 		goto out;
665990d2dfabSRick Macklem 	}
666090d2dfabSRick Macklem 	error = VOP_SETACL(vp, ACL_TYPE_NFS4, aclp, cred, p);
666190d2dfabSRick Macklem 	if (error == 0) {
666290d2dfabSRick Macklem 		error = nfsrv_dssetacl(vp, aclp, cred, p);
666390d2dfabSRick Macklem 		if (error == ENOENT)
666490d2dfabSRick Macklem 			error = 0;
666590d2dfabSRick Macklem 	}
666690d2dfabSRick Macklem 
666790d2dfabSRick Macklem out:
666890d2dfabSRick Macklem 	NFSEXITCODE(error);
666990d2dfabSRick Macklem 	return (error);
667090d2dfabSRick Macklem }
667190d2dfabSRick Macklem 
6672c057a378SRick Macklem /*
6673c057a378SRick Macklem  * Seek vnode op call (actually it is a VOP_IOCTL()).
6674c057a378SRick Macklem  * This function is called with the vnode locked, but unlocks and vrele()s
6675c057a378SRick Macklem  * the vp before returning.
6676c057a378SRick Macklem  */
6677c057a378SRick Macklem int
nfsvno_seek(struct nfsrv_descript * nd,struct vnode * vp,u_long cmd,off_t * offp,int content,bool * eofp,struct ucred * cred,NFSPROC_T * p)6678c057a378SRick Macklem nfsvno_seek(struct nfsrv_descript *nd, struct vnode *vp, u_long cmd,
6679c057a378SRick Macklem     off_t *offp, int content, bool *eofp, struct ucred *cred, NFSPROC_T *p)
6680c057a378SRick Macklem {
6681c057a378SRick Macklem 	struct nfsvattr at;
6682c057a378SRick Macklem 	int error, ret;
6683c057a378SRick Macklem 
6684c057a378SRick Macklem 	ASSERT_VOP_LOCKED(vp, "nfsvno_seek vp");
6685c057a378SRick Macklem 	/*
6686c057a378SRick Macklem 	 * Attempt to seek on a DS file. A return of ENOENT implies
6687c057a378SRick Macklem 	 * there is no DS file to seek on.
6688c057a378SRick Macklem 	 */
6689c057a378SRick Macklem 	error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SEEKDS, NULL,
6690c057a378SRick Macklem 	    NULL, NULL, NULL, NULL, offp, content, eofp);
6691c057a378SRick Macklem 	if (error != ENOENT) {
6692c057a378SRick Macklem 		vput(vp);
6693c057a378SRick Macklem 		return (error);
6694c057a378SRick Macklem 	}
6695c057a378SRick Macklem 
6696c057a378SRick Macklem 	/*
6697c057a378SRick Macklem 	 * Do the VOP_IOCTL() call.  For the case where *offp == file_size,
6698c057a378SRick Macklem 	 * VOP_IOCTL() will return ENXIO.  However, the correct reply for
6699c057a378SRick Macklem 	 * NFSv4.2 is *eofp == true and error == 0 for this case.
6700c057a378SRick Macklem 	 */
6701b249ce48SMateusz Guzik 	NFSVOPUNLOCK(vp);
6702c057a378SRick Macklem 	error = VOP_IOCTL(vp, cmd, offp, 0, cred, p);
6703c057a378SRick Macklem 	*eofp = false;
6704c057a378SRick Macklem 	if (error == ENXIO || (error == 0 && cmd == FIOSEEKHOLE)) {
6705c057a378SRick Macklem 		/* Handle the cases where we might be at EOF. */
6706c057a378SRick Macklem 		ret = nfsvno_getattr(vp, &at, nd, p, 0, NULL);
6707c057a378SRick Macklem 		if (ret == 0 && *offp == at.na_size) {
6708c057a378SRick Macklem 			*eofp = true;
6709c057a378SRick Macklem 			error = 0;
6710c057a378SRick Macklem 		}
6711c057a378SRick Macklem 		if (ret != 0 && error == 0)
6712c057a378SRick Macklem 			error = ret;
6713c057a378SRick Macklem 	}
6714c057a378SRick Macklem 	vrele(vp);
6715c057a378SRick Macklem 	NFSEXITCODE(error);
6716c057a378SRick Macklem 	return (error);
6717c057a378SRick Macklem }
6718c057a378SRick Macklem 
6719c057a378SRick Macklem /*
6720c057a378SRick Macklem  * Allocate vnode op call.
6721c057a378SRick Macklem  */
6722c057a378SRick Macklem int
nfsvno_allocate(struct vnode * vp,off_t off,off_t len,struct ucred * cred,NFSPROC_T * p)6723c057a378SRick Macklem nfsvno_allocate(struct vnode *vp, off_t off, off_t len, struct ucred *cred,
6724c057a378SRick Macklem     NFSPROC_T *p)
6725c057a378SRick Macklem {
672613914e51SRick Macklem 	int error;
672713914e51SRick Macklem 	off_t olen;
6728c057a378SRick Macklem 
6729c057a378SRick Macklem 	ASSERT_VOP_ELOCKED(vp, "nfsvno_allocate vp");
6730c057a378SRick Macklem 	/*
6731c057a378SRick Macklem 	 * Attempt to allocate on a DS file. A return of ENOENT implies
6732c057a378SRick Macklem 	 * there is no DS file to allocate on.
6733c057a378SRick Macklem 	 */
6734c057a378SRick Macklem 	error = nfsrv_proxyds(vp, off, 0, cred, p, NFSPROC_ALLOCATE, NULL,
6735c057a378SRick Macklem 	    NULL, NULL, NULL, NULL, &len, 0, NULL);
6736c057a378SRick Macklem 	if (error != ENOENT)
6737c057a378SRick Macklem 		return (error);
6738c057a378SRick Macklem 
6739c057a378SRick Macklem 	/*
674013914e51SRick Macklem 	 * Do the actual VOP_ALLOCATE(), looping so long as
674113914e51SRick Macklem 	 * progress is being made, to achieve completion.
6742c057a378SRick Macklem 	 */
674313914e51SRick Macklem 	do {
674413914e51SRick Macklem 		olen = len;
6745f0c9847aSRick Macklem 		error = VOP_ALLOCATE(vp, &off, &len, IO_SYNC, cred);
674613914e51SRick Macklem 		if (error == 0 && len > 0 && olen > len)
674713914e51SRick Macklem 			maybe_yield();
674813914e51SRick Macklem 	} while (error == 0 && len > 0 && olen > len);
6749c057a378SRick Macklem 	if (error == 0 && len > 0)
6750c057a378SRick Macklem 		error = NFSERR_IO;
6751c057a378SRick Macklem 	NFSEXITCODE(error);
6752c057a378SRick Macklem 	return (error);
6753c057a378SRick Macklem }
6754c057a378SRick Macklem 
6755c057a378SRick Macklem /*
6756bb958dcfSRick Macklem  * Deallocate vnode op call.
6757bb958dcfSRick Macklem  */
6758bb958dcfSRick Macklem int
nfsvno_deallocate(struct vnode * vp,off_t off,off_t len,struct ucred * cred,NFSPROC_T * p)6759bb958dcfSRick Macklem nfsvno_deallocate(struct vnode *vp, off_t off, off_t len, struct ucred *cred,
6760bb958dcfSRick Macklem     NFSPROC_T *p)
6761bb958dcfSRick Macklem {
6762bb958dcfSRick Macklem 	int error;
6763bb958dcfSRick Macklem 	off_t olen;
6764bb958dcfSRick Macklem 
6765bb958dcfSRick Macklem 	ASSERT_VOP_ELOCKED(vp, "nfsvno_deallocate vp");
6766bb958dcfSRick Macklem 	/*
6767bb958dcfSRick Macklem 	 * Attempt to deallocate on a DS file. A return of ENOENT implies
6768bb958dcfSRick Macklem 	 * there is no DS file to deallocate on.
6769bb958dcfSRick Macklem 	 */
6770bb958dcfSRick Macklem 	error = nfsrv_proxyds(vp, off, 0, cred, p, NFSPROC_DEALLOCATE, NULL,
6771bb958dcfSRick Macklem 	    NULL, NULL, NULL, NULL, &len, 0, NULL);
6772bb958dcfSRick Macklem 	if (error != ENOENT)
6773bb958dcfSRick Macklem 		return (error);
6774bb958dcfSRick Macklem 
6775bb958dcfSRick Macklem 	/*
6776bb958dcfSRick Macklem 	 * Do the actual VOP_DEALLOCATE(), looping so long as
6777bb958dcfSRick Macklem 	 * progress is being made, to achieve completion.
6778bb958dcfSRick Macklem 	 */
6779bb958dcfSRick Macklem 	do {
6780bb958dcfSRick Macklem 		olen = len;
6781bb958dcfSRick Macklem 		error = VOP_DEALLOCATE(vp, &off, &len, 0, IO_SYNC, cred);
6782bb958dcfSRick Macklem 		if (error == 0 && len > 0 && olen > len)
6783bb958dcfSRick Macklem 			maybe_yield();
6784bb958dcfSRick Macklem 	} while (error == 0 && len > 0 && olen > len);
6785bb958dcfSRick Macklem 	if (error == 0 && len > 0)
6786bb958dcfSRick Macklem 		error = NFSERR_IO;
6787bb958dcfSRick Macklem 	NFSEXITCODE(error);
6788bb958dcfSRick Macklem 	return (error);
6789bb958dcfSRick Macklem }
6790bb958dcfSRick Macklem 
6791bb958dcfSRick Macklem /*
6792c057a378SRick Macklem  * Get Extended Atribute vnode op into an mbuf list.
6793c057a378SRick Macklem  */
6794c057a378SRick Macklem int
nfsvno_getxattr(struct vnode * vp,char * name,uint32_t maxresp,struct ucred * cred,uint64_t flag,int maxextsiz,struct thread * p,struct mbuf ** mpp,struct mbuf ** mpendp,int * lenp)6795c057a378SRick Macklem nfsvno_getxattr(struct vnode *vp, char *name, uint32_t maxresp,
6796cb889ce6SRick Macklem     struct ucred *cred, uint64_t flag, int maxextsiz, struct thread *p,
6797cb889ce6SRick Macklem     struct mbuf **mpp, struct mbuf **mpendp, int *lenp)
6798c057a378SRick Macklem {
6799c057a378SRick Macklem 	struct iovec *iv;
6800c057a378SRick Macklem 	struct uio io, *uiop = &io;
6801c057a378SRick Macklem 	struct mbuf *m, *m2;
6802c057a378SRick Macklem 	int alen, error, len, tlen;
6803c057a378SRick Macklem 	size_t siz;
6804c057a378SRick Macklem 
6805c057a378SRick Macklem 	/* First, find out the size of the extended attribute. */
6806c057a378SRick Macklem 	error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL,
6807c057a378SRick Macklem 	    &siz, cred, p);
6808c057a378SRick Macklem 	if (error != 0)
6809c057a378SRick Macklem 		return (NFSERR_NOXATTR);
6810c057a378SRick Macklem 	if (siz > maxresp - NFS_MAXXDR)
6811c057a378SRick Macklem 		return (NFSERR_XATTR2BIG);
6812c057a378SRick Macklem 	len = siz;
6813c057a378SRick Macklem 	tlen = NFSM_RNDUP(len);
6814fb8ed4c5SRick Macklem 	if (tlen > 0) {
6815cb889ce6SRick Macklem 		/*
6816cb889ce6SRick Macklem 		 * If cnt > MCLBYTES and the reply will not be saved, use
6817cb889ce6SRick Macklem 		 * ext_pgs mbufs for TLS.
6818cb889ce6SRick Macklem 		 * For NFSv4.0, we do not know for sure if the reply will
6819cb889ce6SRick Macklem 		 * be saved, so do not use ext_pgs mbufs for NFSv4.0.
6820cb889ce6SRick Macklem 		 * Always use ext_pgs mbufs if ND_EXTPG is set.
6821cb889ce6SRick Macklem 		 */
6822cb889ce6SRick Macklem 		if ((flag & ND_EXTPG) != 0 || (tlen > MCLBYTES &&
6823cb889ce6SRick Macklem 		    (flag & (ND_TLS | ND_SAVEREPLY)) == ND_TLS &&
6824cb889ce6SRick Macklem 		    (flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4))
6825cb889ce6SRick Macklem 			uiop->uio_iovcnt = nfsrv_createiovec_extpgs(tlen,
6826cb889ce6SRick Macklem 			    maxextsiz, &m, &m2, &iv);
6827cb889ce6SRick Macklem 		else
6828cb889ce6SRick Macklem 			uiop->uio_iovcnt = nfsrv_createiovec(tlen, &m, &m2,
6829cb889ce6SRick Macklem 			    &iv);
6830c057a378SRick Macklem 		uiop->uio_iov = iv;
6831fb8ed4c5SRick Macklem 	} else {
6832fb8ed4c5SRick Macklem 		uiop->uio_iovcnt = 0;
6833fb8ed4c5SRick Macklem 		uiop->uio_iov = iv = NULL;
6834fb8ed4c5SRick Macklem 		m = m2 = NULL;
6835fb8ed4c5SRick Macklem 	}
6836c057a378SRick Macklem 	uiop->uio_offset = 0;
6837c057a378SRick Macklem 	uiop->uio_resid = tlen;
6838c057a378SRick Macklem 	uiop->uio_rw = UIO_READ;
6839c057a378SRick Macklem 	uiop->uio_segflg = UIO_SYSSPACE;
6840c057a378SRick Macklem 	uiop->uio_td = p;
6841c057a378SRick Macklem #ifdef MAC
6842c057a378SRick Macklem 	error = mac_vnode_check_getextattr(cred, vp, EXTATTR_NAMESPACE_USER,
6843c057a378SRick Macklem 	    name);
6844c057a378SRick Macklem 	if (error != 0)
6845c057a378SRick Macklem 		goto out;
6846c057a378SRick Macklem #endif
6847c057a378SRick Macklem 
6848fb8ed4c5SRick Macklem 	if (tlen > 0)
6849fb8ed4c5SRick Macklem 		error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop,
6850fb8ed4c5SRick Macklem 		    NULL, cred, p);
6851c057a378SRick Macklem 	if (error != 0)
6852c057a378SRick Macklem 		goto out;
6853c057a378SRick Macklem 	if (uiop->uio_resid > 0) {
6854c057a378SRick Macklem 		alen = tlen;
6855c057a378SRick Macklem 		len = tlen - uiop->uio_resid;
6856c057a378SRick Macklem 		tlen = NFSM_RNDUP(len);
6857c057a378SRick Macklem 		if (alen != tlen)
6858c057a378SRick Macklem 			printf("nfsvno_getxattr: weird size read\n");
685918a48314SRick Macklem 		if (tlen == 0) {
686018a48314SRick Macklem 			m_freem(m);
686118a48314SRick Macklem 			m = m2 = NULL;
686218a48314SRick Macklem 		} else if (alen != tlen || tlen != len)
686318a48314SRick Macklem 			m2 = nfsrv_adj(m, alen - tlen, tlen - len);
6864c057a378SRick Macklem 	}
6865c057a378SRick Macklem 	*lenp = len;
6866c057a378SRick Macklem 	*mpp = m;
6867c057a378SRick Macklem 	*mpendp = m2;
6868c057a378SRick Macklem 
6869c057a378SRick Macklem out:
6870c057a378SRick Macklem 	if (error != 0) {
6871fb8ed4c5SRick Macklem 		if (m != NULL)
6872c057a378SRick Macklem 			m_freem(m);
6873c057a378SRick Macklem 		*lenp = 0;
6874c057a378SRick Macklem 	}
6875c057a378SRick Macklem 	free(iv, M_TEMP);
6876c057a378SRick Macklem 	NFSEXITCODE(error);
6877c057a378SRick Macklem 	return (error);
6878c057a378SRick Macklem }
6879c057a378SRick Macklem 
6880c057a378SRick Macklem /*
6881c057a378SRick Macklem  * Set Extended attribute vnode op from an mbuf list.
6882c057a378SRick Macklem  */
6883c057a378SRick Macklem int
nfsvno_setxattr(struct vnode * vp,char * name,int len,struct mbuf * m,char * cp,struct ucred * cred,struct thread * p)6884c057a378SRick Macklem nfsvno_setxattr(struct vnode *vp, char *name, int len, struct mbuf *m,
6885c057a378SRick Macklem     char *cp, struct ucred *cred, struct thread *p)
6886c057a378SRick Macklem {
6887c057a378SRick Macklem 	struct iovec *iv;
6888c057a378SRick Macklem 	struct uio uio, *uiop = &uio;
6889c057a378SRick Macklem 	int cnt, error;
6890c057a378SRick Macklem 
689195bf2e52SRick Macklem 	error = 0;
6892c057a378SRick Macklem #ifdef MAC
6893c057a378SRick Macklem 	error = mac_vnode_check_setextattr(cred, vp, EXTATTR_NAMESPACE_USER,
6894c057a378SRick Macklem 	    name);
689595bf2e52SRick Macklem #endif
6896c057a378SRick Macklem 	if (error != 0)
6897c057a378SRick Macklem 		goto out;
6898c057a378SRick Macklem 
6899c057a378SRick Macklem 	uiop->uio_rw = UIO_WRITE;
6900c057a378SRick Macklem 	uiop->uio_segflg = UIO_SYSSPACE;
6901c057a378SRick Macklem 	uiop->uio_td = p;
6902c057a378SRick Macklem 	uiop->uio_offset = 0;
6903c057a378SRick Macklem 	uiop->uio_resid = len;
6904fb8ed4c5SRick Macklem 	if (len > 0) {
6905c057a378SRick Macklem 		error = nfsrv_createiovecw(len, m, cp, &iv, &cnt);
6906c057a378SRick Macklem 		uiop->uio_iov = iv;
6907c057a378SRick Macklem 		uiop->uio_iovcnt = cnt;
6908fb8ed4c5SRick Macklem 	} else {
6909fb8ed4c5SRick Macklem 		uiop->uio_iov = iv = NULL;
6910fb8ed4c5SRick Macklem 		uiop->uio_iovcnt = 0;
6911fb8ed4c5SRick Macklem 	}
6912c057a378SRick Macklem 	if (error == 0) {
6913c057a378SRick Macklem 		error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop,
6914c057a378SRick Macklem 		    cred, p);
6915ae781657SRick Macklem 		if (error == 0) {
6916ae781657SRick Macklem 			if (vp->v_type == VREG && nfsrv_devidcnt != 0)
6917ae781657SRick Macklem 				nfsvno_updateds(vp, cred, p);
69188cee2ebaSCy Schubert 			error = VOP_FSYNC(vp, MNT_WAIT, p);
6919ae781657SRick Macklem 		}
6920c057a378SRick Macklem 		free(iv, M_TEMP);
6921c057a378SRick Macklem 	}
6922c057a378SRick Macklem 
6923c057a378SRick Macklem out:
6924c057a378SRick Macklem 	NFSEXITCODE(error);
6925c057a378SRick Macklem 	return (error);
6926c057a378SRick Macklem }
6927c057a378SRick Macklem 
6928c057a378SRick Macklem /*
6929ae781657SRick Macklem  * For a pNFS server, the DS file's ctime and
6930ae781657SRick Macklem  * va_filerev (TimeMetadata and Change) needs to
6931ae781657SRick Macklem  * be updated.  This is a hack, but works by
6932ae781657SRick Macklem  * flipping the S_ISGID bit in va_mode and then
6933ae781657SRick Macklem  * flipping it back.
6934ae781657SRick Macklem  * It does result in two MDS->DS RPCs, but creating
6935ae781657SRick Macklem  * a custom RPC just to do this seems overkill, since
6936ae781657SRick Macklem  * Setxattr/Rmxattr will not be done that frequently.
6937ae781657SRick Macklem  * If it fails part way through, that is not too
6938ae781657SRick Macklem  * serious, since the DS file is never executed.
6939ae781657SRick Macklem  */
6940ae781657SRick Macklem static void
nfsvno_updateds(struct vnode * vp,struct ucred * cred,NFSPROC_T * p)6941ae781657SRick Macklem nfsvno_updateds(struct vnode *vp, struct ucred *cred, NFSPROC_T *p)
6942ae781657SRick Macklem {
6943ae781657SRick Macklem 	struct nfsvattr nva;
6944ae781657SRick Macklem 	int ret;
6945ae781657SRick Macklem 	u_short tmode;
6946ae781657SRick Macklem 
6947ae781657SRick Macklem 	ret = VOP_GETATTR(vp, &nva.na_vattr, cred);
6948ae781657SRick Macklem 	if (ret == 0) {
6949ae781657SRick Macklem 		tmode = nva.na_mode;
6950ae781657SRick Macklem 		NFSVNO_ATTRINIT(&nva);
6951ae781657SRick Macklem 		tmode ^= S_ISGID;
6952ae781657SRick Macklem 		NFSVNO_SETATTRVAL(&nva, mode, tmode);
6953ae781657SRick Macklem 		ret = nfsrv_proxyds(vp, 0, 0, cred, p,
6954ae781657SRick Macklem 		    NFSPROC_SETATTR, NULL, NULL, NULL, &nva,
6955ae781657SRick Macklem 		    NULL, NULL, 0, NULL);
6956ae781657SRick Macklem 		if (ret == 0) {
6957ae781657SRick Macklem 			tmode ^= S_ISGID;
6958ae781657SRick Macklem 			NFSVNO_SETATTRVAL(&nva, mode, tmode);
6959ae781657SRick Macklem 			ret = nfsrv_proxyds(vp, 0, 0, cred, p,
6960ae781657SRick Macklem 			    NFSPROC_SETATTR, NULL, NULL, NULL,
6961ae781657SRick Macklem 			    &nva, NULL, NULL, 0, NULL);
6962ae781657SRick Macklem 		}
6963ae781657SRick Macklem 	}
6964ae781657SRick Macklem }
6965ae781657SRick Macklem 
6966ae781657SRick Macklem /*
6967c057a378SRick Macklem  * Remove Extended attribute vnode op.
6968c057a378SRick Macklem  */
6969c057a378SRick Macklem int
nfsvno_rmxattr(struct nfsrv_descript * nd,struct vnode * vp,char * name,struct ucred * cred,struct thread * p)6970c057a378SRick Macklem nfsvno_rmxattr(struct nfsrv_descript *nd, struct vnode *vp, char *name,
6971c057a378SRick Macklem     struct ucred *cred, struct thread *p)
6972c057a378SRick Macklem {
6973c057a378SRick Macklem 	int error;
6974c057a378SRick Macklem 
6975c057a378SRick Macklem 	/*
6976c057a378SRick Macklem 	 * Get rid of any delegations.  I am not sure why this is required,
6977c057a378SRick Macklem 	 * but RFC-8276 says so.
6978c057a378SRick Macklem 	 */
6979c057a378SRick Macklem 	error = nfsrv_checkremove(vp, 0, nd, nd->nd_clientid, p);
6980c057a378SRick Macklem 	if (error != 0)
6981c057a378SRick Macklem 		goto out;
6982c057a378SRick Macklem #ifdef MAC
6983c057a378SRick Macklem 	error = mac_vnode_check_deleteextattr(cred, vp, EXTATTR_NAMESPACE_USER,
6984c057a378SRick Macklem 	    name);
6985c057a378SRick Macklem 	if (error != 0)
6986c057a378SRick Macklem 		goto out;
6987c057a378SRick Macklem #endif
6988c057a378SRick Macklem 
6989c057a378SRick Macklem 	error = VOP_DELETEEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, cred, p);
6990c057a378SRick Macklem 	if (error == EOPNOTSUPP)
6991c057a378SRick Macklem 		error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL,
6992c057a378SRick Macklem 		    cred, p);
6993ae781657SRick Macklem 	if (error == 0) {
6994ae781657SRick Macklem 		if (vp->v_type == VREG && nfsrv_devidcnt != 0)
6995ae781657SRick Macklem 			nfsvno_updateds(vp, cred, p);
69968cee2ebaSCy Schubert 		error = VOP_FSYNC(vp, MNT_WAIT, p);
6997ae781657SRick Macklem 	}
6998c057a378SRick Macklem out:
6999c057a378SRick Macklem 	NFSEXITCODE(error);
7000c057a378SRick Macklem 	return (error);
7001c057a378SRick Macklem }
7002c057a378SRick Macklem 
7003c057a378SRick Macklem /*
7004c057a378SRick Macklem  * List Extended Atribute vnode op into an mbuf list.
7005c057a378SRick Macklem  */
7006c057a378SRick Macklem int
nfsvno_listxattr(struct vnode * vp,uint64_t cookie,struct ucred * cred,struct thread * p,u_char ** bufp,uint32_t * lenp,bool * eofp)7007c057a378SRick Macklem nfsvno_listxattr(struct vnode *vp, uint64_t cookie, struct ucred *cred,
7008c057a378SRick Macklem     struct thread *p, u_char **bufp, uint32_t *lenp, bool *eofp)
7009c057a378SRick Macklem {
7010c057a378SRick Macklem 	struct iovec iv;
7011c057a378SRick Macklem 	struct uio io;
7012c057a378SRick Macklem 	int error;
7013c057a378SRick Macklem 	size_t siz;
7014c057a378SRick Macklem 
7015c057a378SRick Macklem 	*bufp = NULL;
7016c057a378SRick Macklem 	/* First, find out the size of the extended attribute. */
7017c057a378SRick Macklem 	error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, NULL, &siz, cred,
7018c057a378SRick Macklem 	    p);
7019c057a378SRick Macklem 	if (error != 0)
7020c057a378SRick Macklem 		return (NFSERR_NOXATTR);
7021c057a378SRick Macklem 	if (siz <= cookie) {
7022c057a378SRick Macklem 		*lenp = 0;
7023c057a378SRick Macklem 		*eofp = true;
7024c057a378SRick Macklem 		goto out;
7025c057a378SRick Macklem 	}
7026c057a378SRick Macklem 	if (siz > cookie + *lenp) {
7027c057a378SRick Macklem 		siz = cookie + *lenp;
7028c057a378SRick Macklem 		*eofp = false;
7029c057a378SRick Macklem 	} else
7030c057a378SRick Macklem 		*eofp = true;
7031c057a378SRick Macklem 	/* Just choose a sanity limit of 10Mbytes for malloc(M_TEMP). */
7032c057a378SRick Macklem 	if (siz > 10 * 1024 * 1024) {
7033c057a378SRick Macklem 		error = NFSERR_XATTR2BIG;
7034c057a378SRick Macklem 		goto out;
7035c057a378SRick Macklem 	}
7036c057a378SRick Macklem 	*bufp = malloc(siz, M_TEMP, M_WAITOK);
7037c057a378SRick Macklem 	iv.iov_base = *bufp;
7038c057a378SRick Macklem 	iv.iov_len = siz;
7039c057a378SRick Macklem 	io.uio_iovcnt = 1;
7040c057a378SRick Macklem 	io.uio_iov = &iv;
7041c057a378SRick Macklem 	io.uio_offset = 0;
7042c057a378SRick Macklem 	io.uio_resid = siz;
7043c057a378SRick Macklem 	io.uio_rw = UIO_READ;
7044c057a378SRick Macklem 	io.uio_segflg = UIO_SYSSPACE;
7045c057a378SRick Macklem 	io.uio_td = p;
7046c057a378SRick Macklem #ifdef MAC
7047c057a378SRick Macklem 	error = mac_vnode_check_listextattr(cred, vp, EXTATTR_NAMESPACE_USER);
7048c057a378SRick Macklem 	if (error != 0)
7049c057a378SRick Macklem 		goto out;
7050c057a378SRick Macklem #endif
7051c057a378SRick Macklem 
7052c057a378SRick Macklem 	error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, &io, NULL, cred,
7053c057a378SRick Macklem 	    p);
7054c057a378SRick Macklem 	if (error != 0)
7055c057a378SRick Macklem 		goto out;
7056c057a378SRick Macklem 	if (io.uio_resid > 0)
7057c057a378SRick Macklem 		siz -= io.uio_resid;
7058c057a378SRick Macklem 	*lenp = siz;
7059c057a378SRick Macklem 
7060c057a378SRick Macklem out:
7061c057a378SRick Macklem 	if (error != 0) {
7062c057a378SRick Macklem 		free(*bufp, M_TEMP);
7063c057a378SRick Macklem 		*bufp = NULL;
7064c057a378SRick Macklem 	}
7065c057a378SRick Macklem 	NFSEXITCODE(error);
7066c057a378SRick Macklem 	return (error);
7067c057a378SRick Macklem }
7068c057a378SRick Macklem 
7069ea83d07eSRick Macklem /*
7070ea83d07eSRick Macklem  * Trim trailing data off the mbuf list being built.
7071ea83d07eSRick Macklem  */
7072774a3685SRick Macklem void
nfsm_trimtrailing(struct nfsrv_descript * nd,struct mbuf * mb,char * bpos,int bextpg,int bextpgsiz)7073ea83d07eSRick Macklem nfsm_trimtrailing(struct nfsrv_descript *nd, struct mbuf *mb, char *bpos,
7074ea83d07eSRick Macklem     int bextpg, int bextpgsiz)
7075ea83d07eSRick Macklem {
7076ea83d07eSRick Macklem 	vm_page_t pg;
7077ea83d07eSRick Macklem 	int fullpgsiz, i;
7078ea83d07eSRick Macklem 
7079ea83d07eSRick Macklem 	if (mb->m_next != NULL) {
7080ea83d07eSRick Macklem 		m_freem(mb->m_next);
7081ea83d07eSRick Macklem 		mb->m_next = NULL;
7082ea83d07eSRick Macklem 	}
7083ea83d07eSRick Macklem 	if ((mb->m_flags & M_EXTPG) != 0) {
7084148a227bSRick Macklem 		KASSERT(bextpg >= 0 && bextpg < mb->m_epg_npgs,
7085148a227bSRick Macklem 		    ("nfsm_trimtrailing: bextpg out of range"));
7086148a227bSRick Macklem 		KASSERT(bpos == (char *)(void *)
7087148a227bSRick Macklem 		    PHYS_TO_DMAP(mb->m_epg_pa[bextpg]) + PAGE_SIZE - bextpgsiz,
7088148a227bSRick Macklem 		    ("nfsm_trimtrailing: bextpgsiz bad!"));
7089148a227bSRick Macklem 
7090ea83d07eSRick Macklem 		/* First, get rid of any pages after this position. */
7091ea83d07eSRick Macklem 		for (i = mb->m_epg_npgs - 1; i > bextpg; i--) {
7092ea83d07eSRick Macklem 			pg = PHYS_TO_VM_PAGE(mb->m_epg_pa[i]);
7093ea83d07eSRick Macklem 			vm_page_unwire_noq(pg);
7094ea83d07eSRick Macklem 			vm_page_free(pg);
7095ea83d07eSRick Macklem 		}
7096ea83d07eSRick Macklem 		mb->m_epg_npgs = bextpg + 1;
7097ea83d07eSRick Macklem 		if (bextpg == 0)
7098ea83d07eSRick Macklem 			fullpgsiz = PAGE_SIZE - mb->m_epg_1st_off;
7099ea83d07eSRick Macklem 		else
7100ea83d07eSRick Macklem 			fullpgsiz = PAGE_SIZE;
7101ea83d07eSRick Macklem 		mb->m_epg_last_len = fullpgsiz - bextpgsiz;
7102ea83d07eSRick Macklem 		mb->m_len = m_epg_pagelen(mb, 0, mb->m_epg_1st_off);
7103ea83d07eSRick Macklem 		for (i = 1; i < mb->m_epg_npgs; i++)
7104ea83d07eSRick Macklem 			mb->m_len += m_epg_pagelen(mb, i, 0);
7105ea83d07eSRick Macklem 		nd->nd_bextpgsiz = bextpgsiz;
7106ea83d07eSRick Macklem 		nd->nd_bextpg = bextpg;
7107ea83d07eSRick Macklem 	} else
7108ea83d07eSRick Macklem 		mb->m_len = bpos - mtod(mb, char *);
7109ea83d07eSRick Macklem 	nd->nd_mb = mb;
7110ea83d07eSRick Macklem 	nd->nd_bpos = bpos;
7111ea83d07eSRick Macklem }
7112ea83d07eSRick Macklem 
7113a5df139eSRick Macklem 
7114a5df139eSRick Macklem /*
7115a5df139eSRick Macklem  * Check to see if a put file handle operation should test for
7116a5df139eSRick Macklem  * NFSERR_WRONGSEC, although NFSv3 actually returns NFSERR_AUTHERR.
7117a5df139eSRick Macklem  * When Open is the next operation, NFSERR_WRONGSEC cannot be
71183fc3fe90SRick Macklem  * replied for the Open cases that use a component.  This can
7119a5df139eSRick Macklem  * be identified by the fact that the file handle's type is VDIR.
7120a5df139eSRick Macklem  */
7121a5df139eSRick Macklem bool
nfsrv_checkwrongsec(struct nfsrv_descript * nd,int nextop,__enum_uint8 (vtype)vtyp)7122ba8cc6d7SMateusz Guzik nfsrv_checkwrongsec(struct nfsrv_descript *nd, int nextop, __enum_uint8(vtype) vtyp)
7123a5df139eSRick Macklem {
7124a5df139eSRick Macklem 
71253fc3fe90SRick Macklem 	if ((nd->nd_flag & ND_NFSV4) == 0)
7126a5df139eSRick Macklem 		return (true);
7127a5df139eSRick Macklem 
7128a5df139eSRick Macklem 	if ((nd->nd_flag & ND_LASTOP) != 0)
7129a5df139eSRick Macklem 		return (false);
7130a5df139eSRick Macklem 
7131a5df139eSRick Macklem 	if (nextop == NFSV4OP_PUTROOTFH || nextop == NFSV4OP_PUTFH ||
7132a5df139eSRick Macklem 	    nextop == NFSV4OP_PUTPUBFH || nextop == NFSV4OP_RESTOREFH ||
7133a5df139eSRick Macklem 	    nextop == NFSV4OP_LOOKUP || nextop == NFSV4OP_LOOKUPP ||
7134a5df139eSRick Macklem 	    nextop == NFSV4OP_SECINFO || nextop == NFSV4OP_SECINFONONAME)
7135a5df139eSRick Macklem 		return (false);
7136a5df139eSRick Macklem 	if (nextop == NFSV4OP_OPEN && vtyp == VDIR)
7137a5df139eSRick Macklem 		return (false);
7138a5df139eSRick Macklem 	return (true);
7139a5df139eSRick Macklem }
7140a5df139eSRick Macklem 
7141f8dc0630SRick Macklem /*
7142f8dc0630SRick Macklem  * Check DSs marked no space.
7143f8dc0630SRick Macklem  */
7144f8dc0630SRick Macklem void
nfsrv_checknospc(void)7145f8dc0630SRick Macklem nfsrv_checknospc(void)
7146f8dc0630SRick Macklem {
7147f8dc0630SRick Macklem 	struct statfs *tsf;
7148f8dc0630SRick Macklem 	struct nfsdevice *ds;
7149f8dc0630SRick Macklem 	struct vnode **dvpp, **tdvpp, *dvp;
7150f8dc0630SRick Macklem 	char *devid, *tdevid;
7151f8dc0630SRick Macklem 	int cnt, error = 0, i;
7152f8dc0630SRick Macklem 
7153f8dc0630SRick Macklem 	if (nfsrv_devidcnt <= 0)
7154f8dc0630SRick Macklem 		return;
7155f8dc0630SRick Macklem 	dvpp = mallocarray(nfsrv_devidcnt, sizeof(*dvpp), M_TEMP, M_WAITOK);
7156f8dc0630SRick Macklem 	devid = malloc(nfsrv_devidcnt * NFSX_V4DEVICEID, M_TEMP, M_WAITOK);
7157f8dc0630SRick Macklem 	tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK);
7158f8dc0630SRick Macklem 
7159f8dc0630SRick Macklem 	/* Get an array of the dvps for the DSs. */
7160f8dc0630SRick Macklem 	tdvpp = dvpp;
7161f8dc0630SRick Macklem 	tdevid = devid;
7162f8dc0630SRick Macklem 	i = 0;
7163f8dc0630SRick Macklem 	NFSDDSLOCK();
7164f8dc0630SRick Macklem 	/* First, search for matches for same file system. */
7165f8dc0630SRick Macklem 	TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
7166f8dc0630SRick Macklem 		if (ds->nfsdev_nmp != NULL && ds->nfsdev_nospc) {
7167f8dc0630SRick Macklem 			if (++i > nfsrv_devidcnt)
7168f8dc0630SRick Macklem 				break;
7169f8dc0630SRick Macklem 			*tdvpp++ = ds->nfsdev_dvp;
7170f8dc0630SRick Macklem 			NFSBCOPY(ds->nfsdev_deviceid, tdevid, NFSX_V4DEVICEID);
7171f8dc0630SRick Macklem 			tdevid += NFSX_V4DEVICEID;
7172f8dc0630SRick Macklem 		}
7173f8dc0630SRick Macklem 	}
7174f8dc0630SRick Macklem 	NFSDDSUNLOCK();
7175f8dc0630SRick Macklem 
7176f8dc0630SRick Macklem 	/* Do a VFS_STATFS() for each of the DSs and clear no space. */
7177f8dc0630SRick Macklem 	cnt = i;
7178f8dc0630SRick Macklem 	tdvpp = dvpp;
7179f8dc0630SRick Macklem 	tdevid = devid;
7180f8dc0630SRick Macklem 	for (i = 0; i < cnt && error == 0; i++) {
7181f8dc0630SRick Macklem 		dvp = *tdvpp++;
7182f8dc0630SRick Macklem 		error = VFS_STATFS(dvp->v_mount, tsf);
7183f8dc0630SRick Macklem 		if (error == 0 && tsf->f_bavail > 0) {
7184f8dc0630SRick Macklem 			NFSD_DEBUG(1, "nfsrv_checknospc: reset nospc\n");
7185f8dc0630SRick Macklem 			nfsrv_marknospc(tdevid, false);
7186f8dc0630SRick Macklem 		}
7187f8dc0630SRick Macklem 		tdevid += NFSX_V4DEVICEID;
7188f8dc0630SRick Macklem 	}
7189f8dc0630SRick Macklem 	free(tsf, M_TEMP);
7190f8dc0630SRick Macklem 	free(dvpp, M_TEMP);
7191f8dc0630SRick Macklem 	free(devid, M_TEMP);
7192f8dc0630SRick Macklem }
7193f8dc0630SRick Macklem 
71947e44856eSRick Macklem /*
71957e44856eSRick Macklem  * Initialize everything that needs to be initialized for a vnet.
71967e44856eSRick Macklem  */
71977e44856eSRick Macklem static void
nfsrv_vnetinit(const void * unused __unused)71987e44856eSRick Macklem nfsrv_vnetinit(const void *unused __unused)
71997e44856eSRick Macklem {
72007e44856eSRick Macklem 
72017e44856eSRick Macklem 	nfsd_mntinit();
72027e44856eSRick Macklem }
7203ed03776cSRick Macklem VNET_SYSINIT(nfsrv_vnetinit, SI_SUB_VNET_DONE, SI_ORDER_ANY,
72047e44856eSRick Macklem     nfsrv_vnetinit, NULL);
72057e44856eSRick Macklem 
72067e44856eSRick Macklem /*
72077e44856eSRick Macklem  * Clean up everything that is in a vnet and needs to be
72087e44856eSRick Macklem  * done when the jail is destroyed or the module unloaded.
72097e44856eSRick Macklem  */
72107e44856eSRick Macklem static void
nfsrv_cleanup(const void * unused __unused)7211ef6fcc5eSRick Macklem nfsrv_cleanup(const void *unused __unused)
72127e44856eSRick Macklem {
72137e44856eSRick Macklem 	int i;
72147e44856eSRick Macklem 
72157e44856eSRick Macklem 	NFSD_LOCK();
72167e44856eSRick Macklem 	if (!NFSD_VNET(nfsrv_mntinited)) {
72177e44856eSRick Macklem 		NFSD_UNLOCK();
72187e44856eSRick Macklem 		return;
72197e44856eSRick Macklem 	}
72207e44856eSRick Macklem 	NFSD_VNET(nfsrv_mntinited) = false;
72217e44856eSRick Macklem 	NFSD_UNLOCK();
72227e44856eSRick Macklem 
72237e44856eSRick Macklem 	/* Clean out all NFSv4 state. */
72247e44856eSRick Macklem 	nfsrv_throwawayallstate(curthread);
72257e44856eSRick Macklem 
72267e44856eSRick Macklem 	/* Clean the NFS server reply cache */
72277e44856eSRick Macklem 	nfsrvd_cleancache();
72287e44856eSRick Macklem 
72297e44856eSRick Macklem 	/* Clean out v4root exports. */
72307e44856eSRick Macklem 	if (NFSD_VNET(nfsv4root_mnt)->mnt_export != NULL) {
72317e44856eSRick Macklem 		vfs_free_addrlist(NFSD_VNET(nfsv4root_mnt)->mnt_export);
72327e44856eSRick Macklem 		free(NFSD_VNET(nfsv4root_mnt)->mnt_export, M_MOUNT);
72337e44856eSRick Macklem 		NFSD_VNET(nfsv4root_mnt)->mnt_export = NULL;
72347e44856eSRick Macklem 	}
72357e44856eSRick Macklem 
72367e44856eSRick Macklem 	/* Free up the krpc server pool. */
72377e44856eSRick Macklem 	if (NFSD_VNET(nfsrvd_pool) != NULL)
72387e44856eSRick Macklem 		svcpool_destroy(NFSD_VNET(nfsrvd_pool));
72397e44856eSRick Macklem 
72407e44856eSRick Macklem 	/* and get rid of the locks */
72417e44856eSRick Macklem 	for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
72427e44856eSRick Macklem 		mtx_destroy(&NFSD_VNET(nfsrchash_table)[i].mtx);
72437e44856eSRick Macklem 		mtx_destroy(&NFSD_VNET(nfsrcahash_table)[i].mtx);
72447e44856eSRick Macklem 	}
72457e44856eSRick Macklem 	mtx_destroy(&NFSD_VNET(nfsv4root_mnt)->mnt_mtx);
72467e44856eSRick Macklem 	for (i = 0; i < nfsrv_sessionhashsize; i++)
72477e44856eSRick Macklem 		mtx_destroy(&NFSD_VNET(nfssessionhash)[i].mtx);
72487e44856eSRick Macklem 	lockdestroy(&NFSD_VNET(nfsv4root_mnt)->mnt_explock);
72497e44856eSRick Macklem 	free(NFSD_VNET(nfsrvudphashtbl), M_NFSRVCACHE);
72507e44856eSRick Macklem 	free(NFSD_VNET(nfsrchash_table), M_NFSRVCACHE);
72517e44856eSRick Macklem 	free(NFSD_VNET(nfsrcahash_table), M_NFSRVCACHE);
72527e44856eSRick Macklem 	free(NFSD_VNET(nfsclienthash), M_NFSDCLIENT);
72537e44856eSRick Macklem 	free(NFSD_VNET(nfslockhash), M_NFSDLOCKFILE);
72547e44856eSRick Macklem 	free(NFSD_VNET(nfssessionhash), M_NFSDSESSION);
72557e44856eSRick Macklem 	free(NFSD_VNET(nfsv4root_mnt), M_TEMP);
72567e44856eSRick Macklem 	NFSD_VNET(nfsv4root_mnt) = NULL;
72577e44856eSRick Macklem }
7258ef6fcc5eSRick Macklem VNET_SYSUNINIT(nfsrv_cleanup, SI_SUB_VNET_DONE, SI_ORDER_ANY,
7259ef6fcc5eSRick Macklem     nfsrv_cleanup, NULL);
72607e44856eSRick Macklem 
72619ec7b004SRick Macklem extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *);
72629ec7b004SRick Macklem 
72639ec7b004SRick Macklem /*
72649ec7b004SRick Macklem  * Called once to initialize data structures...
72659ec7b004SRick Macklem  */
72669ec7b004SRick Macklem static int
nfsd_modevent(module_t mod,int type,void * data)72679ec7b004SRick Macklem nfsd_modevent(module_t mod, int type, void *data)
72689ec7b004SRick Macklem {
726993c5875bSRick Macklem 	int error = 0, i;
72709ec7b004SRick Macklem 	static int loaded = 0;
72719ec7b004SRick Macklem 
72729ec7b004SRick Macklem 	switch (type) {
72739ec7b004SRick Macklem 	case MOD_LOAD:
72749ec7b004SRick Macklem 		if (loaded)
7275a9285ae5SZack Kirsch 			goto out;
72769ec7b004SRick Macklem 		newnfs_portinit();
7277e7375b6fSKonstantin Belousov 		mtx_init(&nfsrc_udpmtx, "nfsuc", NULL, MTX_DEF);
7278e7375b6fSKonstantin Belousov 		mtx_init(&nfs_v4root_mutex, "nfs4rt", NULL, MTX_DEF);
727990d2dfabSRick Macklem 		mtx_init(&nfsrv_dontlistlock_mtx, "nfs4dnl", NULL, MTX_DEF);
728090d2dfabSRick Macklem 		mtx_init(&nfsrv_recalllock_mtx, "nfs4rec", NULL, MTX_DEF);
72819ec7b004SRick Macklem #ifdef VV_DISABLEDELEG
72829ec7b004SRick Macklem 		vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation;
72839ec7b004SRick Macklem 		vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation;
72849ec7b004SRick Macklem #endif
72859ec7b004SRick Macklem 		nfsd_call_nfsd = nfssvc_nfsd;
72869ec7b004SRick Macklem 		loaded = 1;
72879ec7b004SRick Macklem 		break;
72889ec7b004SRick Macklem 
72899ec7b004SRick Macklem 	case MOD_UNLOAD:
72909ec7b004SRick Macklem 		if (newnfs_numnfsd != 0) {
72919ec7b004SRick Macklem 			error = EBUSY;
72929ec7b004SRick Macklem 			break;
72939ec7b004SRick Macklem 		}
72949ec7b004SRick Macklem 
72959ec7b004SRick Macklem #ifdef VV_DISABLEDELEG
72969ec7b004SRick Macklem 		vn_deleg_ops.vndeleg_recall = NULL;
72979ec7b004SRick Macklem 		vn_deleg_ops.vndeleg_disable = NULL;
72989ec7b004SRick Macklem #endif
72999ec7b004SRick Macklem 		nfsd_call_nfsd = NULL;
730093c5875bSRick Macklem 		mtx_destroy(&nfsrc_udpmtx);
73019ec7b004SRick Macklem 		mtx_destroy(&nfs_v4root_mutex);
730290d2dfabSRick Macklem 		mtx_destroy(&nfsrv_dontlistlock_mtx);
730390d2dfabSRick Macklem 		mtx_destroy(&nfsrv_recalllock_mtx);
730490d2dfabSRick Macklem 		if (nfslayouthash != NULL) {
730590d2dfabSRick Macklem 			for (i = 0; i < nfsrv_layouthashsize; i++)
730690d2dfabSRick Macklem 				mtx_destroy(&nfslayouthash[i].mtx);
730790d2dfabSRick Macklem 			free(nfslayouthash, M_NFSDSESSION);
730890d2dfabSRick Macklem 		}
73099ec7b004SRick Macklem 		loaded = 0;
73109ec7b004SRick Macklem 		break;
73119ec7b004SRick Macklem 	default:
73129ec7b004SRick Macklem 		error = EOPNOTSUPP;
73139ec7b004SRick Macklem 		break;
73149ec7b004SRick Macklem 	}
7315a9285ae5SZack Kirsch 
7316a9285ae5SZack Kirsch out:
7317a9285ae5SZack Kirsch 	NFSEXITCODE(error);
7318a9285ae5SZack Kirsch 	return (error);
73199ec7b004SRick Macklem }
73209ec7b004SRick Macklem static moduledata_t nfsd_mod = {
73219ec7b004SRick Macklem 	"nfsd",
73229ec7b004SRick Macklem 	nfsd_modevent,
73239ec7b004SRick Macklem 	NULL,
73249ec7b004SRick Macklem };
73259ec7b004SRick Macklem DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY);
73269ec7b004SRick Macklem 
73279ec7b004SRick Macklem /* So that loader and kldload(2) can find us, wherever we are.. */
73289ec7b004SRick Macklem MODULE_VERSION(nfsd, 1);
73299ec7b004SRick Macklem MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1);
733092f7f12bSRick Macklem MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1);
7331a8437c97SRick Macklem MODULE_DEPEND(nfsd, krpc, 1, 1, 1);
7332a8437c97SRick Macklem MODULE_DEPEND(nfsd, nfssvc, 1, 1, 1);
7333