xref: /freebsd/sys/fs/nfsserver/nfs_nfsdport.c (revision 4ed925457ab06e83238a5db33e89ccc94b99a713)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  */
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 /*
38  * Functions that perform the vfs operations required by the routines in
39  * nfsd_serv.c. It is hoped that this change will make the server more
40  * portable.
41  */
42 
43 #include <fs/nfs/nfsport.h>
44 #include <sys/sysctl.h>
45 #include <nlm/nlm_prot.h>
46 #include <nlm/nlm.h>
47 
48 extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1;
49 extern int nfsv4root_set;
50 extern int nfsrv_useacl;
51 extern int newnfs_numnfsd;
52 extern struct mount nfsv4root_mnt;
53 extern struct nfsrv_stablefirst nfsrv_stablefirst;
54 extern void (*nfsd_call_servertimer)(void);
55 struct vfsoptlist nfsv4root_opt, nfsv4root_newopt;
56 NFSDLOCKMUTEX;
57 struct mtx nfs_cache_mutex;
58 struct mtx nfs_v4root_mutex;
59 struct nfsrvfh nfs_rootfh, nfs_pubfh;
60 int nfs_pubfhset = 0, nfs_rootfhset = 0;
61 static uint32_t nfsv4_sysid = 0;
62 
63 static int nfssvc_srvcall(struct thread *, struct nfssvc_args *,
64     struct ucred *);
65 
66 static int enable_crossmntpt = 1;
67 static int nfs_commit_blks;
68 static int nfs_commit_miss;
69 extern int nfsrv_issuedelegs;
70 extern int nfsrv_dolocallocks;
71 
72 SYSCTL_DECL(_vfs_newnfs);
73 SYSCTL_INT(_vfs_newnfs, OID_AUTO, mirrormnt, CTLFLAG_RW, &enable_crossmntpt,
74     0, "Enable nfsd to cross mount points");
75 SYSCTL_INT(_vfs_newnfs, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks,
76     0, "");
77 SYSCTL_INT(_vfs_newnfs, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss,
78     0, "");
79 SYSCTL_INT(_vfs_newnfs, OID_AUTO, issue_delegations, CTLFLAG_RW,
80     &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations");
81 SYSCTL_INT(_vfs_newnfs, OID_AUTO, enable_locallocks, CTLFLAG_RW,
82     &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files");
83 
84 #define	NUM_HEURISTIC		1017
85 #define	NHUSE_INIT		64
86 #define	NHUSE_INC		16
87 #define	NHUSE_MAX		2048
88 
89 static struct nfsheur {
90 	struct vnode *nh_vp;	/* vp to match (unreferenced pointer) */
91 	off_t nh_nextr;		/* next offset for sequential detection */
92 	int nh_use;		/* use count for selection */
93 	int nh_seqcount;	/* heuristic */
94 } nfsheur[NUM_HEURISTIC];
95 
96 
97 /*
98  * Get attributes into nfsvattr structure.
99  */
100 int
101 nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred,
102     struct thread *p)
103 {
104 	int error, lockedit = 0;
105 
106 	/* Since FreeBSD insists the vnode be locked... */
107 	if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
108 		lockedit = 1;
109 		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY, p);
110 	}
111 	error = VOP_GETATTR(vp, &nvap->na_vattr, cred);
112 	if (lockedit)
113 		NFSVOPUNLOCK(vp, 0, p);
114 	return (error);
115 }
116 
117 /*
118  * Get a file handle for a vnode.
119  */
120 int
121 nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p)
122 {
123 	int error;
124 
125 	NFSBZERO((caddr_t)fhp, sizeof(fhandle_t));
126 	fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
127 	error = VOP_VPTOFH(vp, &fhp->fh_fid);
128 	return (error);
129 }
130 
131 /*
132  * Perform access checking for vnodes obtained from file handles that would
133  * refer to files already opened by a Unix client. You cannot just use
134  * vn_writechk() and VOP_ACCESSX() for two reasons.
135  * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write
136  *     case.
137  * 2 - The owner is to be given access irrespective of mode bits for some
138  *     operations, so that processes that chmod after opening a file don't
139  *     break.
140  */
141 int
142 nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred,
143     struct nfsexstuff *exp, struct thread *p, int override, int vpislocked,
144     u_int32_t *supportedtypep)
145 {
146 	struct vattr vattr;
147 	int error = 0, getret = 0;
148 
149 	if (accmode & VWRITE) {
150 		/* Just vn_writechk() changed to check rdonly */
151 		/*
152 		 * Disallow write attempts on read-only file systems;
153 		 * unless the file is a socket or a block or character
154 		 * device resident on the file system.
155 		 */
156 		if (NFSVNO_EXRDONLY(exp) ||
157 		    (vp->v_mount->mnt_flag & MNT_RDONLY)) {
158 			switch (vp->v_type) {
159 			case VREG:
160 			case VDIR:
161 			case VLNK:
162 				return (EROFS);
163 			default:
164 				break;
165 			}
166 		}
167 		/*
168 		 * If there's shared text associated with
169 		 * the inode, try to free it up once.  If
170 		 * we fail, we can't allow writing.
171 		 */
172 		if (vp->v_vflag & VV_TEXT)
173 			return (ETXTBSY);
174 	}
175 	if (vpislocked == 0)
176 		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY, p);
177 
178 	/*
179 	 * Should the override still be applied when ACLs are enabled?
180 	 */
181 	error = VOP_ACCESSX(vp, accmode, cred, p);
182 	if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) {
183 		/*
184 		 * Try again with VEXPLICIT_DENY, to see if the test for
185 		 * deletion is supported.
186 		 */
187 		error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p);
188 		if (error == 0) {
189 			if (vp->v_type == VDIR) {
190 				accmode &= ~(VDELETE | VDELETE_CHILD);
191 				accmode |= VWRITE;
192 				error = VOP_ACCESSX(vp, accmode, cred, p);
193 			} else if (supportedtypep != NULL) {
194 				*supportedtypep &= ~NFSACCESS_DELETE;
195 			}
196 		}
197 	}
198 
199 	/*
200 	 * Allow certain operations for the owner (reads and writes
201 	 * on files that are already open).
202 	 */
203 	if (override != NFSACCCHK_NOOVERRIDE &&
204 	    (error == EPERM || error == EACCES)) {
205 		if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT))
206 			error = 0;
207 		else if (override & NFSACCCHK_ALLOWOWNER) {
208 			getret = VOP_GETATTR(vp, &vattr, cred);
209 			if (getret == 0 && cred->cr_uid == vattr.va_uid)
210 				error = 0;
211 		}
212 	}
213 	if (vpislocked == 0)
214 		NFSVOPUNLOCK(vp, 0, p);
215 	return (error);
216 }
217 
218 /*
219  * Set attribute(s) vnop.
220  */
221 int
222 nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred,
223     struct thread *p, struct nfsexstuff *exp)
224 {
225 	int error;
226 
227 	error = VOP_SETATTR(vp, &nvap->na_vattr, cred);
228 	return (error);
229 }
230 
231 /*
232  * Set up nameidata for a lookup() call and do it
233  * For the cases where we are crossing mount points
234  * (looking up the public fh path or the v4 root path when
235  *  not using a pseudo-root fs), set/release the Giant lock,
236  * as required.
237  */
238 int
239 nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp,
240     struct vnode *dp, int islocked, struct nfsexstuff *exp, struct thread *p,
241     struct vnode **retdirp)
242 {
243 	struct componentname *cnp = &ndp->ni_cnd;
244 	int i;
245 	struct iovec aiov;
246 	struct uio auio;
247 	int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen;
248 	int error = 0, crossmnt;
249 	char *cp;
250 
251 	*retdirp = NULL;
252 	cnp->cn_nameptr = cnp->cn_pnbuf;
253 	/*
254 	 * Extract and set starting directory.
255 	 */
256 	if (dp->v_type != VDIR) {
257 		if (islocked)
258 			vput(dp);
259 		else
260 			vrele(dp);
261 		nfsvno_relpathbuf(ndp);
262 		return (ENOTDIR);
263 	}
264 	if (islocked)
265 		NFSVOPUNLOCK(dp, 0, p);
266 	VREF(dp);
267 	*retdirp = dp;
268 	if (NFSVNO_EXRDONLY(exp))
269 		cnp->cn_flags |= RDONLY;
270 	ndp->ni_segflg = UIO_SYSSPACE;
271 	crossmnt = 1;
272 
273 	if (nd->nd_flag & ND_PUBLOOKUP) {
274 		ndp->ni_loopcnt = 0;
275 		if (cnp->cn_pnbuf[0] == '/') {
276 			vrele(dp);
277 			/*
278 			 * Check for degenerate pathnames here, since lookup()
279 			 * panics on them.
280 			 */
281 			for (i = 1; i < ndp->ni_pathlen; i++)
282 				if (cnp->cn_pnbuf[i] != '/')
283 					break;
284 			if (i == ndp->ni_pathlen) {
285 				error = NFSERR_ACCES;
286 				goto out;
287 			}
288 			dp = rootvnode;
289 			VREF(dp);
290 		}
291 	} else if ((enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) ||
292 	    (nd->nd_flag & ND_NFSV4) == 0) {
293 		/*
294 		 * Only cross mount points for NFSv4 when doing a
295 		 * mount while traversing the file system above
296 		 * the mount point, unless enable_crossmntpt is set.
297 		 */
298 		cnp->cn_flags |= NOCROSSMOUNT;
299 		crossmnt = 0;
300 	}
301 
302 	/*
303 	 * Initialize for scan, set ni_startdir and bump ref on dp again
304 	 * becuase lookup() will dereference ni_startdir.
305 	 */
306 
307 	cnp->cn_thread = p;
308 	ndp->ni_startdir = dp;
309 	ndp->ni_rootdir = rootvnode;
310 
311 	if (!lockleaf)
312 		cnp->cn_flags |= LOCKLEAF;
313 	for (;;) {
314 		cnp->cn_nameptr = cnp->cn_pnbuf;
315 		/*
316 		 * Call lookup() to do the real work.  If an error occurs,
317 		 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and
318 		 * we do not have to dereference anything before returning.
319 		 * In either case ni_startdir will be dereferenced and NULLed
320 		 * out.
321 		 */
322 		if (exp->nes_vfslocked)
323 			ndp->ni_cnd.cn_flags |= GIANTHELD;
324 		error = lookup(ndp);
325 		/*
326 		 * The Giant lock should only change when
327 		 * crossing mount points.
328 		 */
329 		if (crossmnt) {
330 			exp->nes_vfslocked =
331 			    (ndp->ni_cnd.cn_flags & GIANTHELD) != 0;
332 			ndp->ni_cnd.cn_flags &= ~GIANTHELD;
333 		}
334 		if (error)
335 			break;
336 
337 		/*
338 		 * Check for encountering a symbolic link.  Trivial
339 		 * termination occurs if no symlink encountered.
340 		 */
341 		if ((cnp->cn_flags & ISSYMLINK) == 0) {
342 			if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0)
343 				nfsvno_relpathbuf(ndp);
344 			if (ndp->ni_vp && !lockleaf)
345 				NFSVOPUNLOCK(ndp->ni_vp, 0, p);
346 			break;
347 		}
348 
349 		/*
350 		 * Validate symlink
351 		 */
352 		if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
353 			NFSVOPUNLOCK(ndp->ni_dvp, 0, p);
354 		if (!(nd->nd_flag & ND_PUBLOOKUP)) {
355 			error = EINVAL;
356 			goto badlink2;
357 		}
358 
359 		if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
360 			error = ELOOP;
361 			goto badlink2;
362 		}
363 		if (ndp->ni_pathlen > 1)
364 			cp = uma_zalloc(namei_zone, M_WAITOK);
365 		else
366 			cp = cnp->cn_pnbuf;
367 		aiov.iov_base = cp;
368 		aiov.iov_len = MAXPATHLEN;
369 		auio.uio_iov = &aiov;
370 		auio.uio_iovcnt = 1;
371 		auio.uio_offset = 0;
372 		auio.uio_rw = UIO_READ;
373 		auio.uio_segflg = UIO_SYSSPACE;
374 		auio.uio_td = NULL;
375 		auio.uio_resid = MAXPATHLEN;
376 		error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
377 		if (error) {
378 		badlink1:
379 			if (ndp->ni_pathlen > 1)
380 				uma_zfree(namei_zone, cp);
381 		badlink2:
382 			vrele(ndp->ni_dvp);
383 			vput(ndp->ni_vp);
384 			break;
385 		}
386 		linklen = MAXPATHLEN - auio.uio_resid;
387 		if (linklen == 0) {
388 			error = ENOENT;
389 			goto badlink1;
390 		}
391 		if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
392 			error = ENAMETOOLONG;
393 			goto badlink1;
394 		}
395 
396 		/*
397 		 * Adjust or replace path
398 		 */
399 		if (ndp->ni_pathlen > 1) {
400 			NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
401 			uma_zfree(namei_zone, cnp->cn_pnbuf);
402 			cnp->cn_pnbuf = cp;
403 		} else
404 			cnp->cn_pnbuf[linklen] = '\0';
405 		ndp->ni_pathlen += linklen;
406 
407 		/*
408 		 * Cleanup refs for next loop and check if root directory
409 		 * should replace current directory.  Normally ni_dvp
410 		 * becomes the new base directory and is cleaned up when
411 		 * we loop.  Explicitly null pointers after invalidation
412 		 * to clarify operation.
413 		 */
414 		vput(ndp->ni_vp);
415 		ndp->ni_vp = NULL;
416 
417 		if (cnp->cn_pnbuf[0] == '/') {
418 			vrele(ndp->ni_dvp);
419 			ndp->ni_dvp = ndp->ni_rootdir;
420 			VREF(ndp->ni_dvp);
421 		}
422 		ndp->ni_startdir = ndp->ni_dvp;
423 		ndp->ni_dvp = NULL;
424 	}
425 	if (!lockleaf)
426 		cnp->cn_flags &= ~LOCKLEAF;
427 
428 out:
429 	if (error) {
430 		uma_zfree(namei_zone, cnp->cn_pnbuf);
431 		ndp->ni_vp = NULL;
432 		ndp->ni_dvp = NULL;
433 		ndp->ni_startdir = NULL;
434 		cnp->cn_flags &= ~HASBUF;
435 	} else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) {
436 		ndp->ni_dvp = NULL;
437 	}
438 	return (error);
439 }
440 
441 /*
442  * Set up a pathname buffer and return a pointer to it and, optionally
443  * set a hash pointer.
444  */
445 void
446 nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp)
447 {
448 	struct componentname *cnp = &ndp->ni_cnd;
449 
450 	cnp->cn_flags |= (NOMACCHECK | HASBUF);
451 	cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
452 	if (hashpp != NULL)
453 		*hashpp = NULL;
454 	*bufpp = cnp->cn_pnbuf;
455 }
456 
457 /*
458  * Release the above path buffer, if not released by nfsvno_namei().
459  */
460 void
461 nfsvno_relpathbuf(struct nameidata *ndp)
462 {
463 
464 	if ((ndp->ni_cnd.cn_flags & HASBUF) == 0)
465 		panic("nfsrelpath");
466 	uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf);
467 	ndp->ni_cnd.cn_flags &= ~HASBUF;
468 }
469 
470 /*
471  * Readlink vnode op into an mbuf list.
472  */
473 int
474 nfsvno_readlink(struct vnode *vp, struct ucred *cred, struct thread *p,
475     struct mbuf **mpp, struct mbuf **mpendp, int *lenp)
476 {
477 	struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
478 	struct iovec *ivp = iv;
479 	struct uio io, *uiop = &io;
480 	struct mbuf *mp, *mp2 = NULL, *mp3 = NULL;
481 	int i, len, tlen, error;
482 
483 	len = 0;
484 	i = 0;
485 	while (len < NFS_MAXPATHLEN) {
486 		NFSMGET(mp);
487 		MCLGET(mp, M_WAIT);
488 		mp->m_len = NFSMSIZ(mp);
489 		if (len == 0) {
490 			mp3 = mp2 = mp;
491 		} else {
492 			mp2->m_next = mp;
493 			mp2 = mp;
494 		}
495 		if ((len + mp->m_len) > NFS_MAXPATHLEN) {
496 			mp->m_len = NFS_MAXPATHLEN - len;
497 			len = NFS_MAXPATHLEN;
498 		} else {
499 			len += mp->m_len;
500 		}
501 		ivp->iov_base = mtod(mp, caddr_t);
502 		ivp->iov_len = mp->m_len;
503 		i++;
504 		ivp++;
505 	}
506 	uiop->uio_iov = iv;
507 	uiop->uio_iovcnt = i;
508 	uiop->uio_offset = 0;
509 	uiop->uio_resid = len;
510 	uiop->uio_rw = UIO_READ;
511 	uiop->uio_segflg = UIO_SYSSPACE;
512 	uiop->uio_td = NULL;
513 	error = VOP_READLINK(vp, uiop, cred);
514 	if (error) {
515 		m_freem(mp3);
516 		*lenp = 0;
517 		return (error);
518 	}
519 	if (uiop->uio_resid > 0) {
520 		len -= uiop->uio_resid;
521 		tlen = NFSM_RNDUP(len);
522 		nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen, tlen - len);
523 	}
524 	*lenp = len;
525 	*mpp = mp3;
526 	*mpendp = mp;
527 	return (0);
528 }
529 
530 /*
531  * Read vnode op call into mbuf list.
532  */
533 int
534 nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred,
535     struct thread *p, struct mbuf **mpp, struct mbuf **mpendp)
536 {
537 	struct mbuf *m;
538 	int i;
539 	struct iovec *iv;
540 	struct iovec *iv2;
541 	int error = 0, len, left, siz, tlen, ioflag = 0, hi, try = 32;
542 	struct mbuf *m2 = NULL, *m3;
543 	struct uio io, *uiop = &io;
544 	struct nfsheur *nh;
545 
546 	/*
547 	 * Calculate seqcount for heuristic
548 	 */
549 	/*
550 	 * Locate best candidate
551 	 */
552 
553 	hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
554 	nh = &nfsheur[hi];
555 
556 	while (try--) {
557 		if (nfsheur[hi].nh_vp == vp) {
558 			nh = &nfsheur[hi];
559 			break;
560 		}
561 		if (nfsheur[hi].nh_use > 0)
562 			--nfsheur[hi].nh_use;
563 		hi = (hi + 1) % NUM_HEURISTIC;
564 		if (nfsheur[hi].nh_use < nh->nh_use)
565 			nh = &nfsheur[hi];
566 	}
567 
568 	if (nh->nh_vp != vp) {
569 		nh->nh_vp = vp;
570 		nh->nh_nextr = off;
571 		nh->nh_use = NHUSE_INIT;
572 		if (off == 0)
573 			nh->nh_seqcount = 4;
574 		else
575 			nh->nh_seqcount = 1;
576 	}
577 
578 	/*
579 	 * Calculate heuristic
580 	 */
581 
582 	if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) {
583 		if (++nh->nh_seqcount > IO_SEQMAX)
584 			nh->nh_seqcount = IO_SEQMAX;
585 	} else if (nh->nh_seqcount > 1) {
586 		nh->nh_seqcount = 1;
587 	} else {
588 		nh->nh_seqcount = 0;
589 	}
590 	nh->nh_use += NHUSE_INC;
591 	if (nh->nh_use > NHUSE_MAX)
592 		nh->nh_use = NHUSE_MAX;
593 	ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
594 
595 	len = left = NFSM_RNDUP(cnt);
596 	m3 = NULL;
597 	/*
598 	 * Generate the mbuf list with the uio_iov ref. to it.
599 	 */
600 	i = 0;
601 	while (left > 0) {
602 		NFSMGET(m);
603 		MCLGET(m, M_WAIT);
604 		m->m_len = 0;
605 		siz = min(M_TRAILINGSPACE(m), left);
606 		left -= siz;
607 		i++;
608 		if (m3)
609 			m2->m_next = m;
610 		else
611 			m3 = m;
612 		m2 = m;
613 	}
614 	MALLOC(iv, struct iovec *, i * sizeof (struct iovec),
615 	    M_TEMP, M_WAITOK);
616 	uiop->uio_iov = iv2 = iv;
617 	m = m3;
618 	left = len;
619 	i = 0;
620 	while (left > 0) {
621 		if (m == NULL)
622 			panic("nfsvno_read iov");
623 		siz = min(M_TRAILINGSPACE(m), left);
624 		if (siz > 0) {
625 			iv->iov_base = mtod(m, caddr_t) + m->m_len;
626 			iv->iov_len = siz;
627 			m->m_len += siz;
628 			left -= siz;
629 			iv++;
630 			i++;
631 		}
632 		m = m->m_next;
633 	}
634 	uiop->uio_iovcnt = i;
635 	uiop->uio_offset = off;
636 	uiop->uio_resid = len;
637 	uiop->uio_rw = UIO_READ;
638 	uiop->uio_segflg = UIO_SYSSPACE;
639 	error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
640 	FREE((caddr_t)iv2, M_TEMP);
641 	if (error) {
642 		m_freem(m3);
643 		*mpp = NULL;
644 		return (error);
645 	}
646 	tlen = len - uiop->uio_resid;
647 	cnt = cnt < tlen ? cnt : tlen;
648 	tlen = NFSM_RNDUP(cnt);
649 	if (tlen == 0) {
650 		m_freem(m3);
651 		m3 = NULL;
652 	} else if (len != tlen || tlen != cnt)
653 		nfsrv_adj(m3, len - tlen, tlen - cnt);
654 	*mpp = m3;
655 	*mpendp = m2;
656 	return (0);
657 }
658 
659 /*
660  * Write vnode op from an mbuf list.
661  */
662 int
663 nfsvno_write(struct vnode *vp, off_t off, int retlen, int cnt, int stable,
664     struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p)
665 {
666 	struct iovec *ivp;
667 	int i, len;
668 	struct iovec *iv;
669 	int ioflags, error;
670 	struct uio io, *uiop = &io;
671 
672 	MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP,
673 	    M_WAITOK);
674 	uiop->uio_iov = iv = ivp;
675 	uiop->uio_iovcnt = cnt;
676 	i = mtod(mp, caddr_t) + mp->m_len - cp;
677 	len = retlen;
678 	while (len > 0) {
679 		if (mp == NULL)
680 			panic("nfsvno_write");
681 		if (i > 0) {
682 			i = min(i, len);
683 			ivp->iov_base = cp;
684 			ivp->iov_len = i;
685 			ivp++;
686 			len -= i;
687 		}
688 		mp = mp->m_next;
689 		if (mp) {
690 			i = mp->m_len;
691 			cp = mtod(mp, caddr_t);
692 		}
693 	}
694 
695 	if (stable == NFSWRITE_UNSTABLE)
696 		ioflags = IO_NODELOCKED;
697 	else
698 		ioflags = (IO_SYNC | IO_NODELOCKED);
699 	uiop->uio_resid = retlen;
700 	uiop->uio_rw = UIO_WRITE;
701 	uiop->uio_segflg = UIO_SYSSPACE;
702 	NFSUIOPROC(uiop, p);
703 	uiop->uio_offset = off;
704 	error = VOP_WRITE(vp, uiop, ioflags, cred);
705 	FREE((caddr_t)iv, M_TEMP);
706 	return (error);
707 }
708 
709 /*
710  * Common code for creating a regular file (plus special files for V2).
711  */
712 int
713 nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp,
714     struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp,
715     int32_t *cverf, NFSDEV_T rdev, struct thread *p, struct nfsexstuff *exp)
716 {
717 	u_quad_t tempsize;
718 	int error;
719 
720 	error = nd->nd_repstat;
721 	if (!error && ndp->ni_vp == NULL) {
722 		if (nvap->na_type == VREG || nvap->na_type == VSOCK) {
723 			vrele(ndp->ni_startdir);
724 			error = VOP_CREATE(ndp->ni_dvp,
725 			    &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
726 			vput(ndp->ni_dvp);
727 			nfsvno_relpathbuf(ndp);
728 			if (!error) {
729 				if (*exclusive_flagp) {
730 					*exclusive_flagp = 0;
731 					NFSVNO_ATTRINIT(nvap);
732 					nvap->na_atime.tv_sec = cverf[0];
733 					nvap->na_atime.tv_nsec = cverf[1];
734 					error = VOP_SETATTR(ndp->ni_vp,
735 					    &nvap->na_vattr, nd->nd_cred);
736 				}
737 			}
738 		/*
739 		 * NFS V2 Only. nfsrvd_mknod() does this for V3.
740 		 * (This implies, just get out on an error.)
741 		 */
742 		} else if (nvap->na_type == VCHR || nvap->na_type == VBLK ||
743 			nvap->na_type == VFIFO) {
744 			if (nvap->na_type == VCHR && rdev == 0xffffffff)
745 				nvap->na_type = VFIFO;
746                         if (nvap->na_type != VFIFO &&
747 			    (error = priv_check_cred(nd->nd_cred,
748 			     PRIV_VFS_MKNOD_DEV, 0))) {
749 				vrele(ndp->ni_startdir);
750 				nfsvno_relpathbuf(ndp);
751 				vput(ndp->ni_dvp);
752 				return (error);
753 			}
754 			nvap->na_rdev = rdev;
755 			error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
756 			    &ndp->ni_cnd, &nvap->na_vattr);
757 			vput(ndp->ni_dvp);
758 			nfsvno_relpathbuf(ndp);
759 			if (error) {
760 				vrele(ndp->ni_startdir);
761 				return (error);
762 			}
763 		} else {
764 			vrele(ndp->ni_startdir);
765 			nfsvno_relpathbuf(ndp);
766 			vput(ndp->ni_dvp);
767 			return (ENXIO);
768 		}
769 		*vpp = ndp->ni_vp;
770 	} else {
771 		/*
772 		 * Handle cases where error is already set and/or
773 		 * the file exists.
774 		 * 1 - clean up the lookup
775 		 * 2 - iff !error and na_size set, truncate it
776 		 */
777 		vrele(ndp->ni_startdir);
778 		nfsvno_relpathbuf(ndp);
779 		*vpp = ndp->ni_vp;
780 		if (ndp->ni_dvp == *vpp)
781 			vrele(ndp->ni_dvp);
782 		else
783 			vput(ndp->ni_dvp);
784 		if (!error && nvap->na_size != VNOVAL) {
785 			error = nfsvno_accchk(*vpp, VWRITE,
786 			    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
787 			    NFSACCCHK_VPISLOCKED, NULL);
788 			if (!error) {
789 				tempsize = nvap->na_size;
790 				NFSVNO_ATTRINIT(nvap);
791 				nvap->na_size = tempsize;
792 				error = VOP_SETATTR(*vpp,
793 				    &nvap->na_vattr, nd->nd_cred);
794 			}
795 		}
796 		if (error)
797 			vput(*vpp);
798 	}
799 	return (error);
800 }
801 
802 /*
803  * Do a mknod vnode op.
804  */
805 int
806 nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred,
807     struct thread *p)
808 {
809 	int error = 0;
810 	enum vtype vtyp;
811 
812 	vtyp = nvap->na_type;
813 	/*
814 	 * Iff doesn't exist, create it.
815 	 */
816 	if (ndp->ni_vp) {
817 		vrele(ndp->ni_startdir);
818 		nfsvno_relpathbuf(ndp);
819 		vput(ndp->ni_dvp);
820 		vrele(ndp->ni_vp);
821 		return (EEXIST);
822 	}
823 	if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
824 		vrele(ndp->ni_startdir);
825 		nfsvno_relpathbuf(ndp);
826 		vput(ndp->ni_dvp);
827 		return (NFSERR_BADTYPE);
828 	}
829 	if (vtyp == VSOCK) {
830 		vrele(ndp->ni_startdir);
831 		error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
832 		    &ndp->ni_cnd, &nvap->na_vattr);
833 		vput(ndp->ni_dvp);
834 		nfsvno_relpathbuf(ndp);
835 	} else {
836 		if (nvap->na_type != VFIFO &&
837 		    (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV, 0))) {
838 			vrele(ndp->ni_startdir);
839 			nfsvno_relpathbuf(ndp);
840 			vput(ndp->ni_dvp);
841 			return (error);
842 		}
843 		error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
844 		    &ndp->ni_cnd, &nvap->na_vattr);
845 		vput(ndp->ni_dvp);
846 		nfsvno_relpathbuf(ndp);
847 		if (error)
848 			vrele(ndp->ni_startdir);
849 		/*
850 		 * Since VOP_MKNOD returns the ni_vp, I can't
851 		 * see any reason to do the lookup.
852 		 */
853 	}
854 	return (error);
855 }
856 
857 /*
858  * Mkdir vnode op.
859  */
860 int
861 nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid,
862     struct ucred *cred, struct thread *p, struct nfsexstuff *exp)
863 {
864 	int error = 0;
865 
866 	if (ndp->ni_vp != NULL) {
867 		if (ndp->ni_dvp == ndp->ni_vp)
868 			vrele(ndp->ni_dvp);
869 		else
870 			vput(ndp->ni_dvp);
871 		vrele(ndp->ni_vp);
872 		return (EEXIST);
873 	}
874 	error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
875 	    &nvap->na_vattr);
876 	vput(ndp->ni_dvp);
877 	nfsvno_relpathbuf(ndp);
878 	return (error);
879 }
880 
881 /*
882  * symlink vnode op.
883  */
884 int
885 nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp,
886     int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p,
887     struct nfsexstuff *exp)
888 {
889 	int error = 0;
890 
891 	if (ndp->ni_vp) {
892 		vrele(ndp->ni_startdir);
893 		nfsvno_relpathbuf(ndp);
894 		if (ndp->ni_dvp == ndp->ni_vp)
895 			vrele(ndp->ni_dvp);
896 		else
897 			vput(ndp->ni_dvp);
898 		vrele(ndp->ni_vp);
899 		return (EEXIST);
900 	}
901 
902 	error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
903 	    &nvap->na_vattr, pathcp);
904 	vput(ndp->ni_dvp);
905 	vrele(ndp->ni_startdir);
906 	nfsvno_relpathbuf(ndp);
907 	/*
908 	 * Although FreeBSD still had the lookup code in
909 	 * it for 7/current, there doesn't seem to be any
910 	 * point, since VOP_SYMLINK() returns the ni_vp.
911 	 * Just vput it for v2.
912 	 */
913 	if (!not_v2 && !error)
914 		vput(ndp->ni_vp);
915 	return (error);
916 }
917 
918 /*
919  * Parse symbolic link arguments.
920  * This function has an ugly side effect. It will MALLOC() an area for
921  * the symlink and set iov_base to point to it, only if it succeeds.
922  * So, if it returns with uiop->uio_iov->iov_base != NULL, that must
923  * be FREE'd later.
924  */
925 int
926 nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap,
927     struct thread *p, char **pathcpp, int *lenp)
928 {
929 	u_int32_t *tl;
930 	char *pathcp = NULL;
931 	int error = 0, len;
932 	struct nfsv2_sattr *sp;
933 
934 	*pathcpp = NULL;
935 	*lenp = 0;
936 	if ((nd->nd_flag & ND_NFSV3) &&
937 	    (error = nfsrv_sattr(nd, nvap, NULL, NULL, p)))
938 		goto nfsmout;
939 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
940 	len = fxdr_unsigned(int, *tl);
941 	if (len > NFS_MAXPATHLEN || len <= 0) {
942 		error = EBADRPC;
943 		goto nfsmout;
944 	}
945 	MALLOC(pathcp, caddr_t, len + 1, M_TEMP, M_WAITOK);
946 	error = nfsrv_mtostr(nd, pathcp, len);
947 	if (error)
948 		goto nfsmout;
949 	if (nd->nd_flag & ND_NFSV2) {
950 		NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
951 		nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode);
952 	}
953 	*pathcpp = pathcp;
954 	*lenp = len;
955 	return (0);
956 nfsmout:
957 	if (pathcp)
958 		free(pathcp, M_TEMP);
959 	return (error);
960 }
961 
962 /*
963  * Remove a non-directory object.
964  */
965 int
966 nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred,
967     struct thread *p, struct nfsexstuff *exp)
968 {
969 	struct vnode *vp;
970 	int error = 0;
971 
972 	vp = ndp->ni_vp;
973 	if (vp->v_type == VDIR)
974 		error = NFSERR_ISDIR;
975 	else if (is_v4)
976 		error = nfsrv_checkremove(vp, 1, p);
977 	if (!error)
978 		error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd);
979 	if (ndp->ni_dvp == vp)
980 		vrele(ndp->ni_dvp);
981 	else
982 		vput(ndp->ni_dvp);
983 	vput(vp);
984 	return (error);
985 }
986 
987 /*
988  * Remove a directory.
989  */
990 int
991 nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred,
992     struct thread *p, struct nfsexstuff *exp)
993 {
994 	struct vnode *vp;
995 	int error = 0;
996 
997 	vp = ndp->ni_vp;
998 	if (vp->v_type != VDIR) {
999 		error = ENOTDIR;
1000 		goto out;
1001 	}
1002 	/*
1003 	 * No rmdir "." please.
1004 	 */
1005 	if (ndp->ni_dvp == vp) {
1006 		error = EINVAL;
1007 		goto out;
1008 	}
1009 	/*
1010 	 * The root of a mounted filesystem cannot be deleted.
1011 	 */
1012 	if (vp->v_vflag & VV_ROOT)
1013 		error = EBUSY;
1014 out:
1015 	if (!error)
1016 		error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd);
1017 	if (ndp->ni_dvp == vp)
1018 		vrele(ndp->ni_dvp);
1019 	else
1020 		vput(ndp->ni_dvp);
1021 	vput(vp);
1022 	return (error);
1023 }
1024 
1025 /*
1026  * Rename vnode op.
1027  */
1028 int
1029 nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp,
1030     u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p)
1031 {
1032 	struct vnode *fvp, *tvp, *tdvp;
1033 	int error = 0;
1034 
1035 	fvp = fromndp->ni_vp;
1036 	if (ndstat) {
1037 		vrele(fromndp->ni_dvp);
1038 		vrele(fvp);
1039 		error = ndstat;
1040 		goto out1;
1041 	}
1042 	tdvp = tondp->ni_dvp;
1043 	tvp = tondp->ni_vp;
1044 	if (tvp != NULL) {
1045 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
1046 			error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST;
1047 			goto out;
1048 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
1049 			error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST;
1050 			goto out;
1051 		}
1052 		if (tvp->v_type == VDIR && tvp->v_mountedhere) {
1053 			error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
1054 			goto out;
1055 		}
1056 
1057 		/*
1058 		 * A rename to '.' or '..' results in a prematurely
1059 		 * unlocked vnode on FreeBSD5, so I'm just going to fail that
1060 		 * here.
1061 		 */
1062 		if ((tondp->ni_cnd.cn_namelen == 1 &&
1063 		     tondp->ni_cnd.cn_nameptr[0] == '.') ||
1064 		    (tondp->ni_cnd.cn_namelen == 2 &&
1065 		     tondp->ni_cnd.cn_nameptr[0] == '.' &&
1066 		     tondp->ni_cnd.cn_nameptr[1] == '.')) {
1067 			error = EINVAL;
1068 			goto out;
1069 		}
1070 	}
1071 	if (fvp->v_type == VDIR && fvp->v_mountedhere) {
1072 		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
1073 		goto out;
1074 	}
1075 	if (fvp->v_mount != tdvp->v_mount) {
1076 		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
1077 		goto out;
1078 	}
1079 	if (fvp == tdvp) {
1080 		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL;
1081 		goto out;
1082 	}
1083 	if (fvp == tvp) {
1084 		/*
1085 		 * If source and destination are the same, there is nothing to
1086 		 * do. Set error to -1 to indicate this.
1087 		 */
1088 		error = -1;
1089 		goto out;
1090 	}
1091 	if (ndflag & ND_NFSV4) {
1092 		NFSVOPLOCK(fvp, LK_EXCLUSIVE | LK_RETRY, p);
1093 		error = nfsrv_checkremove(fvp, 0, p);
1094 		NFSVOPUNLOCK(fvp, 0, p);
1095 		if (tvp && !error)
1096 			error = nfsrv_checkremove(tvp, 1, p);
1097 	} else {
1098 		/*
1099 		 * For NFSv2 and NFSv3, try to get rid of the delegation, so
1100 		 * that the NFSv4 client won't be confused by the rename.
1101 		 * Since nfsd_recalldelegation() can only be called on an
1102 		 * unlocked vnode at this point and fvp is the file that will
1103 		 * still exist after the rename, just do fvp.
1104 		 */
1105 		nfsd_recalldelegation(fvp, p);
1106 	}
1107 out:
1108 	if (!error) {
1109 		error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp,
1110 		    &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp,
1111 		    &tondp->ni_cnd);
1112 	} else {
1113 		if (tdvp == tvp)
1114 			vrele(tdvp);
1115 		else
1116 			vput(tdvp);
1117 		if (tvp)
1118 			vput(tvp);
1119 		vrele(fromndp->ni_dvp);
1120 		vrele(fvp);
1121 		if (error == -1)
1122 			error = 0;
1123 	}
1124 	vrele(tondp->ni_startdir);
1125 	nfsvno_relpathbuf(tondp);
1126 out1:
1127 	vrele(fromndp->ni_startdir);
1128 	nfsvno_relpathbuf(fromndp);
1129 	return (error);
1130 }
1131 
1132 /*
1133  * Link vnode op.
1134  */
1135 int
1136 nfsvno_link(struct nameidata *ndp, struct vnode *vp, struct ucred *cred,
1137     struct thread *p, struct nfsexstuff *exp)
1138 {
1139 	struct vnode *xp;
1140 	int error = 0;
1141 
1142 	xp = ndp->ni_vp;
1143 	if (xp != NULL) {
1144 		error = EEXIST;
1145 	} else {
1146 		xp = ndp->ni_dvp;
1147 		if (vp->v_mount != xp->v_mount)
1148 			error = EXDEV;
1149 	}
1150 	if (!error) {
1151 		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY, p);
1152 		error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd);
1153 		if (ndp->ni_dvp == vp)
1154 			vrele(ndp->ni_dvp);
1155 		else
1156 			vput(ndp->ni_dvp);
1157 		NFSVOPUNLOCK(vp, 0, p);
1158 	} else {
1159 		if (ndp->ni_dvp == ndp->ni_vp)
1160 			vrele(ndp->ni_dvp);
1161 		else
1162 			vput(ndp->ni_dvp);
1163 		if (ndp->ni_vp)
1164 			vrele(ndp->ni_vp);
1165 	}
1166 	nfsvno_relpathbuf(ndp);
1167 	return (error);
1168 }
1169 
1170 /*
1171  * Do the fsync() appropriate for the commit.
1172  */
1173 int
1174 nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred,
1175     struct thread *td)
1176 {
1177 	int error = 0;
1178 
1179 	if (cnt > MAX_COMMIT_COUNT) {
1180 		/*
1181 		 * Give up and do the whole thing
1182 		 */
1183 		if (vp->v_object &&
1184 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
1185 			VM_OBJECT_LOCK(vp->v_object);
1186 			vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
1187 			VM_OBJECT_UNLOCK(vp->v_object);
1188 		}
1189 		error = VOP_FSYNC(vp, MNT_WAIT, td);
1190 	} else {
1191 		/*
1192 		 * Locate and synchronously write any buffers that fall
1193 		 * into the requested range.  Note:  we are assuming that
1194 		 * f_iosize is a power of 2.
1195 		 */
1196 		int iosize = vp->v_mount->mnt_stat.f_iosize;
1197 		int iomask = iosize - 1;
1198 		struct bufobj *bo;
1199 		daddr_t lblkno;
1200 
1201 		/*
1202 		 * Align to iosize boundry, super-align to page boundry.
1203 		 */
1204 		if (off & iomask) {
1205 			cnt += off & iomask;
1206 			off &= ~(u_quad_t)iomask;
1207 		}
1208 		if (off & PAGE_MASK) {
1209 			cnt += off & PAGE_MASK;
1210 			off &= ~(u_quad_t)PAGE_MASK;
1211 		}
1212 		lblkno = off / iosize;
1213 
1214 		if (vp->v_object &&
1215 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
1216 			VM_OBJECT_LOCK(vp->v_object);
1217 			vm_object_page_clean(vp->v_object, off / PAGE_SIZE, (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC);
1218 			VM_OBJECT_UNLOCK(vp->v_object);
1219 		}
1220 
1221 		bo = &vp->v_bufobj;
1222 		BO_LOCK(bo);
1223 		while (cnt > 0) {
1224 			struct buf *bp;
1225 
1226 			/*
1227 			 * If we have a buffer and it is marked B_DELWRI we
1228 			 * have to lock and write it.  Otherwise the prior
1229 			 * write is assumed to have already been committed.
1230 			 *
1231 			 * gbincore() can return invalid buffers now so we
1232 			 * have to check that bit as well (though B_DELWRI
1233 			 * should not be set if B_INVAL is set there could be
1234 			 * a race here since we haven't locked the buffer).
1235 			 */
1236 			if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) {
1237 				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
1238 				    LK_INTERLOCK, BO_MTX(bo)) == ENOLCK) {
1239 					BO_LOCK(bo);
1240 					continue; /* retry */
1241 				}
1242 			    	if ((bp->b_flags & (B_DELWRI|B_INVAL)) ==
1243 				    B_DELWRI) {
1244 					bremfree(bp);
1245 					bp->b_flags &= ~B_ASYNC;
1246 					bwrite(bp);
1247 					++nfs_commit_miss;
1248 				} else
1249 					BUF_UNLOCK(bp);
1250 				BO_LOCK(bo);
1251 			}
1252 			++nfs_commit_blks;
1253 			if (cnt < iosize)
1254 				break;
1255 			cnt -= iosize;
1256 			++lblkno;
1257 		}
1258 		BO_UNLOCK(bo);
1259 	}
1260 	return (error);
1261 }
1262 
1263 /*
1264  * Statfs vnode op.
1265  */
1266 int
1267 nfsvno_statfs(struct vnode *vp, struct statfs *sf)
1268 {
1269 
1270 	return (VFS_STATFS(vp->v_mount, sf));
1271 }
1272 
1273 /*
1274  * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but
1275  * must handle nfsrv_opencheck() calls after any other access checks.
1276  */
1277 void
1278 nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp,
1279     nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp,
1280     int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create,
1281     NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred, struct thread *p,
1282     struct nfsexstuff *exp, struct vnode **vpp)
1283 {
1284 	struct vnode *vp = NULL;
1285 	u_quad_t tempsize;
1286 	struct nfsexstuff nes;
1287 
1288 	if (ndp->ni_vp == NULL)
1289 		nd->nd_repstat = nfsrv_opencheck(clientid,
1290 		    stateidp, stp, NULL, nd, p, nd->nd_repstat);
1291 	if (!nd->nd_repstat) {
1292 		if (ndp->ni_vp == NULL) {
1293 			vrele(ndp->ni_startdir);
1294 			nd->nd_repstat = VOP_CREATE(ndp->ni_dvp,
1295 			    &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
1296 			vput(ndp->ni_dvp);
1297 			nfsvno_relpathbuf(ndp);
1298 			if (!nd->nd_repstat) {
1299 				if (*exclusive_flagp) {
1300 					*exclusive_flagp = 0;
1301 					NFSVNO_ATTRINIT(nvap);
1302 					nvap->na_atime.tv_sec = cverf[0];
1303 					nvap->na_atime.tv_nsec = cverf[1];
1304 					nd->nd_repstat = VOP_SETATTR(ndp->ni_vp,
1305 					    &nvap->na_vattr, cred);
1306 				} else {
1307 					nfsrv_fixattr(nd, ndp->ni_vp, nvap,
1308 					    aclp, p, attrbitp, exp);
1309 				}
1310 			}
1311 			vp = ndp->ni_vp;
1312 		} else {
1313 			if (ndp->ni_startdir)
1314 				vrele(ndp->ni_startdir);
1315 			nfsvno_relpathbuf(ndp);
1316 			vp = ndp->ni_vp;
1317 			if (create == NFSV4OPEN_CREATE) {
1318 				if (ndp->ni_dvp == vp)
1319 					vrele(ndp->ni_dvp);
1320 				else
1321 					vput(ndp->ni_dvp);
1322 			}
1323 			if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) {
1324 				if (ndp->ni_cnd.cn_flags & RDONLY)
1325 					NFSVNO_SETEXRDONLY(&nes);
1326 				else
1327 					NFSVNO_EXINIT(&nes);
1328 				nd->nd_repstat = nfsvno_accchk(vp,
1329 				    VWRITE, cred, &nes, p,
1330 				    NFSACCCHK_NOOVERRIDE,
1331 				    NFSACCCHK_VPISLOCKED, NULL);
1332 				nd->nd_repstat = nfsrv_opencheck(clientid,
1333 				    stateidp, stp, vp, nd, p, nd->nd_repstat);
1334 				if (!nd->nd_repstat) {
1335 					tempsize = nvap->na_size;
1336 					NFSVNO_ATTRINIT(nvap);
1337 					nvap->na_size = tempsize;
1338 					nd->nd_repstat = VOP_SETATTR(vp,
1339 					    &nvap->na_vattr, cred);
1340 				}
1341 			} else if (vp->v_type == VREG) {
1342 				nd->nd_repstat = nfsrv_opencheck(clientid,
1343 				    stateidp, stp, vp, nd, p, nd->nd_repstat);
1344 			}
1345 		}
1346 	} else {
1347 		if (ndp->ni_cnd.cn_flags & HASBUF)
1348 			nfsvno_relpathbuf(ndp);
1349 		if (ndp->ni_startdir && create == NFSV4OPEN_CREATE) {
1350 			vrele(ndp->ni_startdir);
1351 			if (ndp->ni_dvp == ndp->ni_vp)
1352 				vrele(ndp->ni_dvp);
1353 			else
1354 				vput(ndp->ni_dvp);
1355 			if (ndp->ni_vp)
1356 				vput(ndp->ni_vp);
1357 		}
1358 	}
1359 	*vpp = vp;
1360 }
1361 
1362 /*
1363  * Updates the file rev and sets the mtime and ctime
1364  * to the current clock time, returning the va_filerev and va_Xtime
1365  * values.
1366  */
1367 void
1368 nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap,
1369     struct ucred *cred, struct thread *p)
1370 {
1371 	struct vattr va;
1372 
1373 	VATTR_NULL(&va);
1374 	getnanotime(&va.va_mtime);
1375 	(void) VOP_SETATTR(vp, &va, cred);
1376 	(void) nfsvno_getattr(vp, nvap, cred, p);
1377 }
1378 
1379 /*
1380  * Glue routine to nfsv4_fillattr().
1381  */
1382 int
1383 nfsvno_fillattr(struct nfsrv_descript *nd, struct vnode *vp,
1384     struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp,
1385     struct ucred *cred, struct thread *p, int isdgram, int reterr)
1386 {
1387 	int error;
1388 
1389 	error = nfsv4_fillattr(nd, vp, NULL, &nvap->na_vattr, fhp, rderror,
1390 	    attrbitp, cred, p, isdgram, reterr);
1391 	return (error);
1392 }
1393 
1394 /* Since the Readdir vnode ops vary, put the entire functions in here. */
1395 /*
1396  * nfs readdir service
1397  * - mallocs what it thinks is enough to read
1398  *	count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR
1399  * - calls nfsvno_readdir()
1400  * - loops around building the reply
1401  *	if the output generated exceeds count break out of loop
1402  *	The NFSM_CLGET macro is used here so that the reply will be packed
1403  *	tightly in mbuf clusters.
1404  * - it only knows that it has encountered eof when the nfsvno_readdir()
1405  *	reads nothing
1406  * - as such one readdir rpc will return eof false although you are there
1407  *	and then the next will return eof
1408  * - it trims out records with d_fileno == 0
1409  *	this doesn't matter for Unix clients, but they might confuse clients
1410  *	for other os'.
1411  * - it trims out records with d_type == DT_WHT
1412  *	these cannot be seen through NFS (unless we extend the protocol)
1413  * NB: It is tempting to set eof to true if the nfsvno_readdir() reads less
1414  *	than requested, but this may not apply to all filesystems. For
1415  *	example, client NFS does not { although it is never remote mounted
1416  *	anyhow }
1417  *     The alternate call nfsrvd_readdirplus() does lookups as well.
1418  * PS: The NFS protocol spec. does not clarify what the "count" byte
1419  *	argument is a count of.. just name strings and file id's or the
1420  *	entire reply rpc or ...
1421  *	I tried just file name and id sizes and it confused the Sun client,
1422  *	so I am using the full rpc size now. The "paranoia.." comment refers
1423  *	to including the status longwords that are not a part of the dir.
1424  *	"entry" structures, but are in the rpc.
1425  */
1426 int
1427 nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram,
1428     struct vnode *vp, struct thread *p, struct nfsexstuff *exp)
1429 {
1430 	struct dirent *dp;
1431 	u_int32_t *tl;
1432 	int dirlen;
1433 	char *cpos, *cend, *rbuf;
1434 	struct nfsvattr at;
1435 	int nlen, error = 0, getret = 1;
1436 	int siz, cnt, fullsiz, eofflag, ncookies;
1437 	u_int64_t off, toff, verf;
1438 	u_long *cookies = NULL, *cookiep;
1439 	struct uio io;
1440 	struct iovec iv;
1441 
1442 	if (nd->nd_repstat) {
1443 		nfsrv_postopattr(nd, getret, &at);
1444 		return (0);
1445 	}
1446 	if (nd->nd_flag & ND_NFSV2) {
1447 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1448 		off = fxdr_unsigned(u_quad_t, *tl++);
1449 	} else {
1450 		NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
1451 		off = fxdr_hyper(tl);
1452 		tl += 2;
1453 		verf = fxdr_hyper(tl);
1454 		tl += 2;
1455 	}
1456 	toff = off;
1457 	cnt = fxdr_unsigned(int, *tl);
1458 	if (cnt > NFS_SRVMAXDATA(nd))
1459 		cnt = NFS_SRVMAXDATA(nd);
1460 	siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
1461 	fullsiz = siz;
1462 	if (nd->nd_flag & ND_NFSV3) {
1463 		nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd->nd_cred,
1464 		    p);
1465 #if 0
1466 		/*
1467 		 * va_filerev is not sufficient as a cookie verifier,
1468 		 * since it is not supposed to change when entries are
1469 		 * removed/added unless that offset cookies returned to
1470 		 * the client are no longer valid.
1471 		 */
1472 		if (!nd->nd_repstat && toff && verf != at.na_filerev)
1473 			nd->nd_repstat = NFSERR_BAD_COOKIE;
1474 #endif
1475 	}
1476 	if (!nd->nd_repstat)
1477 		nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
1478 		    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
1479 		    NFSACCCHK_VPISLOCKED, NULL);
1480 	if (nd->nd_repstat) {
1481 		vput(vp);
1482 		if (nd->nd_flag & ND_NFSV3)
1483 			nfsrv_postopattr(nd, getret, &at);
1484 		return (0);
1485 	}
1486 	NFSVOPUNLOCK(vp, 0, p);
1487 	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
1488 again:
1489 	eofflag = 0;
1490 	if (cookies) {
1491 		free((caddr_t)cookies, M_TEMP);
1492 		cookies = NULL;
1493 	}
1494 
1495 	iv.iov_base = rbuf;
1496 	iv.iov_len = siz;
1497 	io.uio_iov = &iv;
1498 	io.uio_iovcnt = 1;
1499 	io.uio_offset = (off_t)off;
1500 	io.uio_resid = siz;
1501 	io.uio_segflg = UIO_SYSSPACE;
1502 	io.uio_rw = UIO_READ;
1503 	io.uio_td = NULL;
1504 	NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY, p);
1505 	nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
1506 	    &cookies);
1507 	NFSVOPUNLOCK(vp, 0, p);
1508 	off = (u_int64_t)io.uio_offset;
1509 	if (io.uio_resid)
1510 		siz -= io.uio_resid;
1511 
1512 	if (!cookies && !nd->nd_repstat)
1513 		nd->nd_repstat = NFSERR_PERM;
1514 	if (nd->nd_flag & ND_NFSV3) {
1515 		getret = nfsvno_getattr(vp, &at, nd->nd_cred, p);
1516 		if (!nd->nd_repstat)
1517 			nd->nd_repstat = getret;
1518 	}
1519 
1520 	/*
1521 	 * Handles the failed cases. nd->nd_repstat == 0 past here.
1522 	 */
1523 	if (nd->nd_repstat) {
1524 		vrele(vp);
1525 		free((caddr_t)rbuf, M_TEMP);
1526 		if (cookies)
1527 			free((caddr_t)cookies, M_TEMP);
1528 		if (nd->nd_flag & ND_NFSV3)
1529 			nfsrv_postopattr(nd, getret, &at);
1530 		return (0);
1531 	}
1532 	/*
1533 	 * If nothing read, return eof
1534 	 * rpc reply
1535 	 */
1536 	if (siz == 0) {
1537 		vrele(vp);
1538 		if (nd->nd_flag & ND_NFSV2) {
1539 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1540 		} else {
1541 			nfsrv_postopattr(nd, getret, &at);
1542 			NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1543 			txdr_hyper(at.na_filerev, tl);
1544 			tl += 2;
1545 		}
1546 		*tl++ = newnfs_false;
1547 		*tl = newnfs_true;
1548 		FREE((caddr_t)rbuf, M_TEMP);
1549 		FREE((caddr_t)cookies, M_TEMP);
1550 		return (0);
1551 	}
1552 
1553 	/*
1554 	 * Check for degenerate cases of nothing useful read.
1555 	 * If so go try again
1556 	 */
1557 	cpos = rbuf;
1558 	cend = rbuf + siz;
1559 	dp = (struct dirent *)cpos;
1560 	cookiep = cookies;
1561 
1562 	/*
1563 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
1564 	 * directory offset up to a block boundary, so it is necessary to
1565 	 * skip over the records that precede the requested offset. This
1566 	 * requires the assumption that file offset cookies monotonically
1567 	 * increase.
1568 	 */
1569 	while (cpos < cend && ncookies > 0 &&
1570 	    (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
1571 	     ((u_quad_t)(*cookiep)) <= toff)) {
1572 		cpos += dp->d_reclen;
1573 		dp = (struct dirent *)cpos;
1574 		cookiep++;
1575 		ncookies--;
1576 	}
1577 	if (cpos >= cend || ncookies == 0) {
1578 		siz = fullsiz;
1579 		toff = off;
1580 		goto again;
1581 	}
1582 
1583 	/*
1584 	 * dirlen is the size of the reply, including all XDR and must
1585 	 * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate
1586 	 * if the XDR should be included in "count", but to be safe, we do.
1587 	 * (Include the two booleans at the end of the reply in dirlen now.)
1588 	 */
1589 	if (nd->nd_flag & ND_NFSV3) {
1590 		nfsrv_postopattr(nd, getret, &at);
1591 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1592 		txdr_hyper(at.na_filerev, tl);
1593 		dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
1594 	} else {
1595 		dirlen = 2 * NFSX_UNSIGNED;
1596 	}
1597 
1598 	/* Loop through the records and build reply */
1599 	while (cpos < cend && ncookies > 0) {
1600 		nlen = dp->d_namlen;
1601 		if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
1602 			nlen <= NFS_MAXNAMLEN) {
1603 			if (nd->nd_flag & ND_NFSV3)
1604 				dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
1605 			else
1606 				dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
1607 			if (dirlen > cnt) {
1608 				eofflag = 0;
1609 				break;
1610 			}
1611 
1612 			/*
1613 			 * Build the directory record xdr from
1614 			 * the dirent entry.
1615 			 */
1616 			if (nd->nd_flag & ND_NFSV3) {
1617 				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1618 				*tl++ = newnfs_true;
1619 				*tl++ = 0;
1620 			} else {
1621 				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1622 				*tl++ = newnfs_true;
1623 			}
1624 			*tl = txdr_unsigned(dp->d_fileno);
1625 			(void) nfsm_strtom(nd, dp->d_name, nlen);
1626 			if (nd->nd_flag & ND_NFSV3) {
1627 				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1628 				*tl++ = 0;
1629 			} else
1630 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1631 			*tl = txdr_unsigned(*cookiep);
1632 		}
1633 		cpos += dp->d_reclen;
1634 		dp = (struct dirent *)cpos;
1635 		cookiep++;
1636 		ncookies--;
1637 	}
1638 	if (cpos < cend)
1639 		eofflag = 0;
1640 	vrele(vp);
1641 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1642 	*tl++ = newnfs_false;
1643 	if (eofflag)
1644 		*tl = newnfs_true;
1645 	else
1646 		*tl = newnfs_false;
1647 	FREE((caddr_t)rbuf, M_TEMP);
1648 	FREE((caddr_t)cookies, M_TEMP);
1649 	return (0);
1650 nfsmout:
1651 	vput(vp);
1652 	return (error);
1653 }
1654 
1655 /*
1656  * Readdirplus for V3 and Readdir for V4.
1657  */
1658 int
1659 nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram,
1660     struct vnode *vp, struct thread *p, struct nfsexstuff *exp)
1661 {
1662 	struct dirent *dp;
1663 	u_int32_t *tl;
1664 	int dirlen;
1665 	char *cpos, *cend, *rbuf;
1666 	struct vnode *nvp;
1667 	fhandle_t nfh;
1668 	struct nfsvattr nva, at, *nvap = &nva;
1669 	struct mbuf *mb0, *mb1;
1670 	struct nfsreferral *refp;
1671 	int nlen, r, error = 0, getret = 1, usevget = 1;
1672 	int siz, cnt, fullsiz, eofflag, ncookies, entrycnt;
1673 	caddr_t bpos0, bpos1;
1674 	u_int64_t off, toff, verf;
1675 	u_long *cookies = NULL, *cookiep;
1676 	nfsattrbit_t attrbits, rderrbits, savbits;
1677 	struct uio io;
1678 	struct iovec iv;
1679 	struct componentname cn;
1680 
1681 	if (nd->nd_repstat) {
1682 		nfsrv_postopattr(nd, getret, &at);
1683 		return (0);
1684 	}
1685 	NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
1686 	off = fxdr_hyper(tl);
1687 	toff = off;
1688 	tl += 2;
1689 	verf = fxdr_hyper(tl);
1690 	tl += 2;
1691 	siz = fxdr_unsigned(int, *tl++);
1692 	cnt = fxdr_unsigned(int, *tl);
1693 
1694 	/*
1695 	 * Use the server's maximum data transfer size as the upper bound
1696 	 * on reply datalen.
1697 	 */
1698 	if (cnt > NFS_SRVMAXDATA(nd))
1699 		cnt = NFS_SRVMAXDATA(nd);
1700 
1701 	/*
1702 	 * siz is a "hint" of how much directory information (name, fileid,
1703 	 * cookie) should be in the reply. At least one client "hints" 0,
1704 	 * so I set it to cnt for that case. I also round it up to the
1705 	 * next multiple of DIRBLKSIZ.
1706 	 */
1707 	if (siz == 0)
1708 		siz = cnt;
1709 	siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
1710 
1711 	if (nd->nd_flag & ND_NFSV4) {
1712 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1713 		if (error)
1714 			goto nfsmout;
1715 		NFSSET_ATTRBIT(&savbits, &attrbits);
1716 		NFSCLRNOTFILLABLE_ATTRBIT(&attrbits);
1717 		NFSZERO_ATTRBIT(&rderrbits);
1718 		NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR);
1719 	} else {
1720 		NFSZERO_ATTRBIT(&attrbits);
1721 	}
1722 	fullsiz = siz;
1723 	nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd->nd_cred, p);
1724 	if (!nd->nd_repstat) {
1725 	    if (off && verf != at.na_filerev) {
1726 		/*
1727 		 * va_filerev is not sufficient as a cookie verifier,
1728 		 * since it is not supposed to change when entries are
1729 		 * removed/added unless that offset cookies returned to
1730 		 * the client are no longer valid.
1731 		 */
1732 #if 0
1733 		if (nd->nd_flag & ND_NFSV4) {
1734 			nd->nd_repstat = NFSERR_NOTSAME;
1735 		} else {
1736 			nd->nd_repstat = NFSERR_BAD_COOKIE;
1737 		}
1738 #endif
1739 	    } else if ((nd->nd_flag & ND_NFSV4) && off == 0 && verf != 0) {
1740 		nd->nd_repstat = NFSERR_BAD_COOKIE;
1741 	    }
1742 	}
1743 	if (!nd->nd_repstat && vp->v_type != VDIR)
1744 		nd->nd_repstat = NFSERR_NOTDIR;
1745 	if (!nd->nd_repstat && cnt == 0)
1746 		nd->nd_repstat = NFSERR_TOOSMALL;
1747 	if (!nd->nd_repstat)
1748 		nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
1749 		    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
1750 		    NFSACCCHK_VPISLOCKED, NULL);
1751 	if (nd->nd_repstat) {
1752 		vput(vp);
1753 		if (nd->nd_flag & ND_NFSV3)
1754 			nfsrv_postopattr(nd, getret, &at);
1755 		return (0);
1756 	}
1757 
1758 	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
1759 again:
1760 	eofflag = 0;
1761 	if (cookies) {
1762 		free((caddr_t)cookies, M_TEMP);
1763 		cookies = NULL;
1764 	}
1765 
1766 	iv.iov_base = rbuf;
1767 	iv.iov_len = siz;
1768 	io.uio_iov = &iv;
1769 	io.uio_iovcnt = 1;
1770 	io.uio_offset = (off_t)off;
1771 	io.uio_resid = siz;
1772 	io.uio_segflg = UIO_SYSSPACE;
1773 	io.uio_rw = UIO_READ;
1774 	io.uio_td = NULL;
1775 	nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
1776 	    &cookies);
1777 	off = (u_int64_t)io.uio_offset;
1778 	if (io.uio_resid)
1779 		siz -= io.uio_resid;
1780 
1781 	getret = nfsvno_getattr(vp, &at, nd->nd_cred, p);
1782 
1783 	if (!cookies && !nd->nd_repstat)
1784 		nd->nd_repstat = NFSERR_PERM;
1785 	if (!nd->nd_repstat)
1786 		nd->nd_repstat = getret;
1787 	if (nd->nd_repstat) {
1788 		vput(vp);
1789 		if (cookies)
1790 			free((caddr_t)cookies, M_TEMP);
1791 		free((caddr_t)rbuf, M_TEMP);
1792 		if (nd->nd_flag & ND_NFSV3)
1793 			nfsrv_postopattr(nd, getret, &at);
1794 		return (0);
1795 	}
1796 	/*
1797 	 * If nothing read, return eof
1798 	 * rpc reply
1799 	 */
1800 	if (siz == 0) {
1801 		vput(vp);
1802 		if (nd->nd_flag & ND_NFSV3)
1803 			nfsrv_postopattr(nd, getret, &at);
1804 		NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1805 		txdr_hyper(at.na_filerev, tl);
1806 		tl += 2;
1807 		*tl++ = newnfs_false;
1808 		*tl = newnfs_true;
1809 		free((caddr_t)cookies, M_TEMP);
1810 		free((caddr_t)rbuf, M_TEMP);
1811 		return (0);
1812 	}
1813 
1814 	/*
1815 	 * Check for degenerate cases of nothing useful read.
1816 	 * If so go try again
1817 	 */
1818 	cpos = rbuf;
1819 	cend = rbuf + siz;
1820 	dp = (struct dirent *)cpos;
1821 	cookiep = cookies;
1822 
1823 	/*
1824 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
1825 	 * directory offset up to a block boundary, so it is necessary to
1826 	 * skip over the records that precede the requested offset. This
1827 	 * requires the assumption that file offset cookies monotonically
1828 	 * increase.
1829 	 */
1830 	while (cpos < cend && ncookies > 0 &&
1831 	  (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
1832 	   ((u_quad_t)(*cookiep)) <= toff ||
1833 	   ((nd->nd_flag & ND_NFSV4) &&
1834 	    ((dp->d_namlen == 1 && dp->d_name[0] == '.') ||
1835 	     (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) {
1836 		cpos += dp->d_reclen;
1837 		dp = (struct dirent *)cpos;
1838 		cookiep++;
1839 		ncookies--;
1840 	}
1841 	if (cpos >= cend || ncookies == 0) {
1842 		siz = fullsiz;
1843 		toff = off;
1844 		goto again;
1845 	}
1846 	NFSVOPUNLOCK(vp, 0, p);
1847 
1848 	/*
1849 	 * Save this position, in case there is an error before one entry
1850 	 * is created.
1851 	 */
1852 	mb0 = nd->nd_mb;
1853 	bpos0 = nd->nd_bpos;
1854 
1855 	/*
1856 	 * Fill in the first part of the reply.
1857 	 * dirlen is the reply length in bytes and cannot exceed cnt.
1858 	 * (Include the two booleans at the end of the reply in dirlen now,
1859 	 *  so we recognize when we have exceeded cnt.)
1860 	 */
1861 	if (nd->nd_flag & ND_NFSV3) {
1862 		dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
1863 		nfsrv_postopattr(nd, getret, &at);
1864 	} else {
1865 		dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED;
1866 	}
1867 	NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
1868 	txdr_hyper(at.na_filerev, tl);
1869 
1870 	/*
1871 	 * Save this position, in case there is an empty reply needed.
1872 	 */
1873 	mb1 = nd->nd_mb;
1874 	bpos1 = nd->nd_bpos;
1875 
1876 	/* Loop through the records and build reply */
1877 	entrycnt = 0;
1878 	while (cpos < cend && ncookies > 0 && dirlen < cnt) {
1879 		nlen = dp->d_namlen;
1880 		if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
1881 		    nlen <= NFS_MAXNAMLEN &&
1882 		    ((nd->nd_flag & ND_NFSV3) || nlen > 2 ||
1883 		     (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.'))
1884 		      || (nlen == 1 && dp->d_name[0] != '.'))) {
1885 			/*
1886 			 * Save the current position in the reply, in case
1887 			 * this entry exceeds cnt.
1888 			 */
1889 			mb1 = nd->nd_mb;
1890 			bpos1 = nd->nd_bpos;
1891 
1892 			/*
1893 			 * For readdir_and_lookup get the vnode using
1894 			 * the file number.
1895 			 */
1896 			nvp = NULL;
1897 			refp = NULL;
1898 			r = 0;
1899 			if ((nd->nd_flag & ND_NFSV3) ||
1900 			    NFSNONZERO_ATTRBIT(&savbits)) {
1901 				if (nd->nd_flag & ND_NFSV4)
1902 					refp = nfsv4root_getreferral(NULL,
1903 					    vp, dp->d_fileno);
1904 				if (refp == NULL) {
1905 					if (usevget)
1906 						r = VFS_VGET(vp->v_mount,
1907 						    dp->d_fileno, LK_EXCLUSIVE,
1908 						    &nvp);
1909 					else
1910 						r = EOPNOTSUPP;
1911 					if (r == EOPNOTSUPP) {
1912 						if (usevget) {
1913 							usevget = 0;
1914 							cn.cn_nameiop = LOOKUP;
1915 							cn.cn_lkflags =
1916 							    LK_EXCLUSIVE |
1917 							    LK_RETRY;
1918 							cn.cn_cred =
1919 							    nd->nd_cred;
1920 							cn.cn_thread = p;
1921 						}
1922 						cn.cn_nameptr = dp->d_name;
1923 						cn.cn_namelen = nlen;
1924 						cn.cn_flags = ISLASTCN |
1925 						    NOFOLLOW | LOCKLEAF |
1926 						    MPSAFE;
1927 						if (nlen == 2 &&
1928 						    dp->d_name[0] == '.' &&
1929 						    dp->d_name[1] == '.')
1930 							cn.cn_flags |=
1931 							    ISDOTDOT;
1932 						if (!VOP_ISLOCKED(vp))
1933 							vn_lock(vp,
1934 							    LK_EXCLUSIVE |
1935 							    LK_RETRY);
1936 						r = VOP_LOOKUP(vp, &nvp, &cn);
1937 					}
1938 				}
1939 				if (!r) {
1940 				    if (refp == NULL &&
1941 					((nd->nd_flag & ND_NFSV3) ||
1942 					 NFSNONZERO_ATTRBIT(&attrbits))) {
1943 					r = nfsvno_getfh(nvp, &nfh, p);
1944 					if (!r)
1945 					    r = nfsvno_getattr(nvp, nvap,
1946 						nd->nd_cred, p);
1947 				    }
1948 				} else {
1949 				    nvp = NULL;
1950 				}
1951 				if (r) {
1952 					if (!NFSISSET_ATTRBIT(&attrbits,
1953 					    NFSATTRBIT_RDATTRERROR)) {
1954 						if (nvp != NULL)
1955 							vput(nvp);
1956 						nd->nd_repstat = r;
1957 						break;
1958 					}
1959 				}
1960 			}
1961 
1962 			/*
1963 			 * Build the directory record xdr
1964 			 */
1965 			if (nd->nd_flag & ND_NFSV3) {
1966 				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1967 				*tl++ = newnfs_true;
1968 				*tl++ = 0;
1969 				*tl = txdr_unsigned(dp->d_fileno);
1970 				dirlen += nfsm_strtom(nd, dp->d_name, nlen);
1971 				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1972 				*tl++ = 0;
1973 				*tl = txdr_unsigned(*cookiep);
1974 				nfsrv_postopattr(nd, 0, nvap);
1975 				dirlen += nfsm_fhtom(nd,(u_int8_t *)&nfh,0,1);
1976 				dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR);
1977 				if (nvp != NULL)
1978 					vput(nvp);
1979 			} else {
1980 				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1981 				*tl++ = newnfs_true;
1982 				*tl++ = 0;
1983 				*tl = txdr_unsigned(*cookiep);
1984 				dirlen += nfsm_strtom(nd, dp->d_name, nlen);
1985 				if (nvp != NULL)
1986 					NFSVOPUNLOCK(nvp, 0, p);
1987 				if (refp != NULL) {
1988 					dirlen += nfsrv_putreferralattr(nd,
1989 					    &savbits, refp, 0,
1990 					    &nd->nd_repstat);
1991 					if (nd->nd_repstat) {
1992 						if (nvp != NULL)
1993 							vrele(nvp);
1994 						break;
1995 					}
1996 				} else if (r) {
1997 					dirlen += nfsvno_fillattr(nd, nvp, nvap,
1998 					    &nfh, r, &rderrbits, nd->nd_cred,
1999 					    p, isdgram, 0);
2000 				} else {
2001 					dirlen += nfsvno_fillattr(nd, nvp, nvap,
2002 					    &nfh, r, &attrbits, nd->nd_cred,
2003 					    p, isdgram, 0);
2004 				}
2005 				if (nvp != NULL)
2006 					vrele(nvp);
2007 				dirlen += (3 * NFSX_UNSIGNED);
2008 			}
2009 			if (dirlen <= cnt)
2010 				entrycnt++;
2011 		}
2012 		cpos += dp->d_reclen;
2013 		dp = (struct dirent *)cpos;
2014 		cookiep++;
2015 		ncookies--;
2016 	}
2017 	if (!usevget && VOP_ISLOCKED(vp))
2018 		vput(vp);
2019 	else
2020 		vrele(vp);
2021 
2022 	/*
2023 	 * If dirlen > cnt, we must strip off the last entry. If that
2024 	 * results in an empty reply, report NFSERR_TOOSMALL.
2025 	 */
2026 	if (dirlen > cnt || nd->nd_repstat) {
2027 		if (!nd->nd_repstat && entrycnt == 0)
2028 			nd->nd_repstat = NFSERR_TOOSMALL;
2029 		if (nd->nd_repstat)
2030 			newnfs_trimtrailing(nd, mb0, bpos0);
2031 		else
2032 			newnfs_trimtrailing(nd, mb1, bpos1);
2033 		eofflag = 0;
2034 	} else if (cpos < cend)
2035 		eofflag = 0;
2036 	if (!nd->nd_repstat) {
2037 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2038 		*tl++ = newnfs_false;
2039 		if (eofflag)
2040 			*tl = newnfs_true;
2041 		else
2042 			*tl = newnfs_false;
2043 	}
2044 	FREE((caddr_t)cookies, M_TEMP);
2045 	FREE((caddr_t)rbuf, M_TEMP);
2046 	return (0);
2047 nfsmout:
2048 	vput(vp);
2049 	return (error);
2050 }
2051 
2052 /*
2053  * Get the settable attributes out of the mbuf list.
2054  * (Return 0 or EBADRPC)
2055  */
2056 int
2057 nfsrv_sattr(struct nfsrv_descript *nd, struct nfsvattr *nvap,
2058     nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
2059 {
2060 	u_int32_t *tl;
2061 	struct nfsv2_sattr *sp;
2062 	struct timeval curtime;
2063 	int error = 0, toclient = 0;
2064 
2065 	switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) {
2066 	case ND_NFSV2:
2067 		NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
2068 		/*
2069 		 * Some old clients didn't fill in the high order 16bits.
2070 		 * --> check the low order 2 bytes for 0xffff
2071 		 */
2072 		if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
2073 			nvap->na_mode = nfstov_mode(sp->sa_mode);
2074 		if (sp->sa_uid != newnfs_xdrneg1)
2075 			nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid);
2076 		if (sp->sa_gid != newnfs_xdrneg1)
2077 			nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid);
2078 		if (sp->sa_size != newnfs_xdrneg1)
2079 			nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size);
2080 		if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) {
2081 #ifdef notyet
2082 			fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime);
2083 #else
2084 			nvap->na_atime.tv_sec =
2085 				fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec);
2086 			nvap->na_atime.tv_nsec = 0;
2087 #endif
2088 		}
2089 		if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1)
2090 			fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime);
2091 		break;
2092 	case ND_NFSV3:
2093 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2094 		if (*tl == newnfs_true) {
2095 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2096 			nvap->na_mode = nfstov_mode(*tl);
2097 		}
2098 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2099 		if (*tl == newnfs_true) {
2100 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2101 			nvap->na_uid = fxdr_unsigned(uid_t, *tl);
2102 		}
2103 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2104 		if (*tl == newnfs_true) {
2105 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2106 			nvap->na_gid = fxdr_unsigned(gid_t, *tl);
2107 		}
2108 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2109 		if (*tl == newnfs_true) {
2110 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2111 			nvap->na_size = fxdr_hyper(tl);
2112 		}
2113 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2114 		switch (fxdr_unsigned(int, *tl)) {
2115 		case NFSV3SATTRTIME_TOCLIENT:
2116 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2117 			fxdr_nfsv3time(tl, &nvap->na_atime);
2118 			toclient = 1;
2119 			break;
2120 		case NFSV3SATTRTIME_TOSERVER:
2121 			NFSGETTIME(&curtime);
2122 			nvap->na_atime.tv_sec = curtime.tv_sec;
2123 			nvap->na_atime.tv_nsec = curtime.tv_usec * 1000;
2124 			nvap->na_vaflags |= VA_UTIMES_NULL;
2125 			break;
2126 		};
2127 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2128 		switch (fxdr_unsigned(int, *tl)) {
2129 		case NFSV3SATTRTIME_TOCLIENT:
2130 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2131 			fxdr_nfsv3time(tl, &nvap->na_mtime);
2132 			nvap->na_vaflags &= ~VA_UTIMES_NULL;
2133 			break;
2134 		case NFSV3SATTRTIME_TOSERVER:
2135 			NFSGETTIME(&curtime);
2136 			nvap->na_mtime.tv_sec = curtime.tv_sec;
2137 			nvap->na_mtime.tv_nsec = curtime.tv_usec * 1000;
2138 			if (!toclient)
2139 				nvap->na_vaflags |= VA_UTIMES_NULL;
2140 			break;
2141 		};
2142 		break;
2143 	case ND_NFSV4:
2144 		error = nfsv4_sattr(nd, nvap, attrbitp, aclp, p);
2145 	};
2146 nfsmout:
2147 	return (error);
2148 }
2149 
2150 /*
2151  * Handle the setable attributes for V4.
2152  * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise.
2153  */
2154 int
2155 nfsv4_sattr(struct nfsrv_descript *nd, struct nfsvattr *nvap,
2156     nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
2157 {
2158 	u_int32_t *tl;
2159 	int attrsum = 0;
2160 	int i, j;
2161 	int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0;
2162 	int toclient = 0;
2163 	u_char *cp, namestr[NFSV4_SMALLSTR + 1];
2164 	uid_t uid;
2165 	gid_t gid;
2166 	struct timeval curtime;
2167 
2168 	error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup);
2169 	if (error)
2170 		return (error);
2171 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2172 	attrsize = fxdr_unsigned(int, *tl);
2173 
2174 	/*
2175 	 * Loop around getting the setable attributes. If an unsupported
2176 	 * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return.
2177 	 */
2178 	if (retnotsup) {
2179 		nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2180 		bitpos = NFSATTRBIT_MAX;
2181 	} else {
2182 		bitpos = 0;
2183 	}
2184 	for (; bitpos < NFSATTRBIT_MAX; bitpos++) {
2185 	    if (attrsum > attrsize) {
2186 		error = NFSERR_BADXDR;
2187 		goto nfsmout;
2188 	    }
2189 	    if (NFSISSET_ATTRBIT(attrbitp, bitpos))
2190 		switch (bitpos) {
2191 		case NFSATTRBIT_SIZE:
2192 			NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
2193 			nvap->na_size = fxdr_hyper(tl);
2194 			attrsum += NFSX_HYPER;
2195 			break;
2196 		case NFSATTRBIT_ACL:
2197 			error = nfsrv_dissectacl(nd, aclp, &aceerr, &aclsize,
2198 			    p);
2199 			if (error)
2200 				goto nfsmout;
2201 			if (aceerr && !nd->nd_repstat)
2202 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2203 			attrsum += aclsize;
2204 			break;
2205 		case NFSATTRBIT_ARCHIVE:
2206 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2207 			if (!nd->nd_repstat)
2208 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2209 			attrsum += NFSX_UNSIGNED;
2210 			break;
2211 		case NFSATTRBIT_HIDDEN:
2212 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2213 			if (!nd->nd_repstat)
2214 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2215 			attrsum += NFSX_UNSIGNED;
2216 			break;
2217 		case NFSATTRBIT_MIMETYPE:
2218 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2219 			i = fxdr_unsigned(int, *tl);
2220 			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
2221 			if (error)
2222 				goto nfsmout;
2223 			if (!nd->nd_repstat)
2224 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2225 			attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i));
2226 			break;
2227 		case NFSATTRBIT_MODE:
2228 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2229 			nvap->na_mode = nfstov_mode(*tl);
2230 			attrsum += NFSX_UNSIGNED;
2231 			break;
2232 		case NFSATTRBIT_OWNER:
2233 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2234 			j = fxdr_unsigned(int, *tl);
2235 			if (j < 0)
2236 				return (NFSERR_BADXDR);
2237 			if (j > NFSV4_SMALLSTR)
2238 				cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
2239 			else
2240 				cp = namestr;
2241 			error = nfsrv_mtostr(nd, cp, j);
2242 			if (error) {
2243 				if (j > NFSV4_SMALLSTR)
2244 					free(cp, M_NFSSTRING);
2245 				return (error);
2246 			}
2247 			if (!nd->nd_repstat) {
2248 				nd->nd_repstat = nfsv4_strtouid(cp,j,&uid,p);
2249 				if (!nd->nd_repstat)
2250 					nvap->na_uid = uid;
2251 			}
2252 			if (j > NFSV4_SMALLSTR)
2253 				free(cp, M_NFSSTRING);
2254 			attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
2255 			break;
2256 		case NFSATTRBIT_OWNERGROUP:
2257 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2258 			j = fxdr_unsigned(int, *tl);
2259 			if (j < 0)
2260 				return (NFSERR_BADXDR);
2261 			if (j > NFSV4_SMALLSTR)
2262 				cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
2263 			else
2264 				cp = namestr;
2265 			error = nfsrv_mtostr(nd, cp, j);
2266 			if (error) {
2267 				if (j > NFSV4_SMALLSTR)
2268 					free(cp, M_NFSSTRING);
2269 				return (error);
2270 			}
2271 			if (!nd->nd_repstat) {
2272 				nd->nd_repstat = nfsv4_strtogid(cp,j,&gid,p);
2273 				if (!nd->nd_repstat)
2274 					nvap->na_gid = gid;
2275 			}
2276 			if (j > NFSV4_SMALLSTR)
2277 				free(cp, M_NFSSTRING);
2278 			attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
2279 			break;
2280 		case NFSATTRBIT_SYSTEM:
2281 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2282 			if (!nd->nd_repstat)
2283 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2284 			attrsum += NFSX_UNSIGNED;
2285 			break;
2286 		case NFSATTRBIT_TIMEACCESSSET:
2287 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2288 			attrsum += NFSX_UNSIGNED;
2289 			if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
2290 			    NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
2291 			    fxdr_nfsv4time(tl, &nvap->na_atime);
2292 			    toclient = 1;
2293 			    attrsum += NFSX_V4TIME;
2294 			} else {
2295 			    NFSGETTIME(&curtime);
2296 			    nvap->na_atime.tv_sec = curtime.tv_sec;
2297 			    nvap->na_atime.tv_nsec = curtime.tv_usec * 1000;
2298 			    nvap->na_vaflags |= VA_UTIMES_NULL;
2299 			}
2300 			break;
2301 		case NFSATTRBIT_TIMEBACKUP:
2302 			NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
2303 			if (!nd->nd_repstat)
2304 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2305 			attrsum += NFSX_V4TIME;
2306 			break;
2307 		case NFSATTRBIT_TIMECREATE:
2308 			NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
2309 			if (!nd->nd_repstat)
2310 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2311 			attrsum += NFSX_V4TIME;
2312 			break;
2313 		case NFSATTRBIT_TIMEMODIFYSET:
2314 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2315 			attrsum += NFSX_UNSIGNED;
2316 			if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
2317 			    NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
2318 			    fxdr_nfsv4time(tl, &nvap->na_mtime);
2319 			    nvap->na_vaflags &= ~VA_UTIMES_NULL;
2320 			    attrsum += NFSX_V4TIME;
2321 			} else {
2322 			    NFSGETTIME(&curtime);
2323 			    nvap->na_mtime.tv_sec = curtime.tv_sec;
2324 			    nvap->na_mtime.tv_nsec = curtime.tv_usec * 1000;
2325 			    if (!toclient)
2326 				nvap->na_vaflags |= VA_UTIMES_NULL;
2327 			}
2328 			break;
2329 		default:
2330 			nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2331 			/*
2332 			 * set bitpos so we drop out of the loop.
2333 			 */
2334 			bitpos = NFSATTRBIT_MAX;
2335 			break;
2336 		};
2337 	}
2338 
2339 	/*
2340 	 * some clients pad the attrlist, so we need to skip over the
2341 	 * padding.
2342 	 */
2343 	if (attrsum > attrsize) {
2344 		error = NFSERR_BADXDR;
2345 	} else {
2346 		attrsize = NFSM_RNDUP(attrsize);
2347 		if (attrsum < attrsize)
2348 			error = nfsm_advance(nd, attrsize - attrsum, -1);
2349 	}
2350 nfsmout:
2351 	return (error);
2352 }
2353 
2354 /*
2355  * Check/setup export credentials.
2356  */
2357 int
2358 nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp,
2359     struct ucred *credanon)
2360 {
2361 	int error = 0;
2362 
2363 	/*
2364 	 * Check/setup credentials.
2365 	 */
2366 	if (nd->nd_flag & ND_GSS)
2367 		exp->nes_exflag &= ~MNT_EXPORTANON;
2368 
2369 	/*
2370 	 * Check to see if the operation is allowed for this security flavor.
2371 	 * RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to
2372 	 * AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS.
2373 	 * Also, allow Secinfo, so that it can acquire the correct flavor(s).
2374 	 */
2375 	if (nfsvno_testexp(nd, exp) &&
2376 	    nd->nd_procnum != NFSV4OP_SECINFO &&
2377 	    nd->nd_procnum != NFSPROC_FSINFO) {
2378 		if (nd->nd_flag & ND_NFSV4)
2379 			error = NFSERR_WRONGSEC;
2380 		else
2381 			error = (NFSERR_AUTHERR | AUTH_TOOWEAK);
2382 		return (error);
2383 	}
2384 
2385 	/*
2386 	 * Check to see if the file system is exported V4 only.
2387 	 */
2388 	if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4))
2389 		return (NFSERR_PROGNOTV4);
2390 
2391 	/*
2392 	 * Now, map the user credentials.
2393 	 * (Note that ND_AUTHNONE will only be set for an NFSv3
2394 	 *  Fsinfo RPC. If set for anything else, this code might need
2395 	 *  to change.)
2396 	 */
2397 	if (NFSVNO_EXPORTED(exp) &&
2398 	    ((!(nd->nd_flag & ND_GSS) && nd->nd_cred->cr_uid == 0) ||
2399 	     NFSVNO_EXPORTANON(exp) ||
2400 	     (nd->nd_flag & ND_AUTHNONE))) {
2401 		nd->nd_cred->cr_uid = credanon->cr_uid;
2402 		nd->nd_cred->cr_gid = credanon->cr_gid;
2403 		crsetgroups(nd->nd_cred, credanon->cr_ngroups,
2404 		    credanon->cr_groups);
2405 	}
2406 	return (0);
2407 }
2408 
2409 /*
2410  * Check exports.
2411  */
2412 int
2413 nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp,
2414     struct ucred **credp)
2415 {
2416 	int i, error, *secflavors;
2417 
2418 	error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
2419 	    &exp->nes_numsecflavor, &secflavors);
2420 	if (error) {
2421 		if (nfs_rootfhset) {
2422 			exp->nes_exflag = 0;
2423 			exp->nes_numsecflavor = 0;
2424 			error = 0;
2425 		}
2426 	} else {
2427 		/* Copy the security flavors. */
2428 		for (i = 0; i < exp->nes_numsecflavor; i++)
2429 			exp->nes_secflavors[i] = secflavors[i];
2430 	}
2431 	return (error);
2432 }
2433 
2434 /*
2435  * Get a vnode for a file handle and export stuff.
2436  */
2437 int
2438 nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam,
2439     struct vnode **vpp, struct nfsexstuff *exp, struct ucred **credp)
2440 {
2441 	int i, error, *secflavors;
2442 
2443 	*credp = NULL;
2444 	exp->nes_numsecflavor = 0;
2445 	error = VFS_FHTOVP(mp, &fhp->fh_fid, vpp);
2446 	if (nam && !error) {
2447 		error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
2448 		    &exp->nes_numsecflavor, &secflavors);
2449 		if (error) {
2450 			if (nfs_rootfhset) {
2451 				exp->nes_exflag = 0;
2452 				exp->nes_numsecflavor = 0;
2453 				error = 0;
2454 			} else {
2455 				vput(*vpp);
2456 			}
2457 		} else {
2458 			/* Copy the security flavors. */
2459 			for (i = 0; i < exp->nes_numsecflavor; i++)
2460 				exp->nes_secflavors[i] = secflavors[i];
2461 		}
2462 	}
2463 	return (error);
2464 }
2465 
2466 /*
2467  * Do the pathconf vnode op.
2468  */
2469 int
2470 nfsvno_pathconf(struct vnode *vp, int flag, register_t *retf,
2471     struct ucred *cred, struct thread *p)
2472 {
2473 	int error;
2474 
2475 	error = VOP_PATHCONF(vp, flag, retf);
2476 	return (error);
2477 }
2478 
2479 /*
2480  * nfsd_fhtovp() - convert a fh to a vnode ptr
2481  * 	- look up fsid in mount list (if not found ret error)
2482  *	- get vp and export rights by calling nfsvno_fhtovp()
2483  *	- if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
2484  *	  for AUTH_SYS
2485  * Also handle getting the Giant lock for the file system,
2486  * as required:
2487  * - if same mount point as *mpp
2488  *       do nothing
2489  *   else if *mpp == NULL
2490  *       if already locked
2491  *           leave it locked
2492  *       else
2493  *           call VFS_LOCK_GIANT()
2494  *   else
2495  *       if already locked
2496  *            unlock Giant
2497  *       call VFS_LOCK_GIANT()
2498  */
2499 void
2500 nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp,
2501     struct vnode **vpp, struct nfsexstuff *exp,
2502     struct mount **mpp, int startwrite, struct thread *p)
2503 {
2504 	struct mount *mp;
2505 	struct ucred *credanon;
2506 	fhandle_t *fhp;
2507 
2508 	fhp = (fhandle_t *)nfp->nfsrvfh_data;
2509 	/*
2510 	 * Check for the special case of the nfsv4root_fh.
2511 	 */
2512 	mp = vfs_getvfs(&fhp->fh_fsid);
2513 	if (!mp) {
2514 		*vpp = NULL;
2515 		nd->nd_repstat = ESTALE;
2516 		if (*mpp && exp->nes_vfslocked)
2517 			VFS_UNLOCK_GIANT(*mpp);
2518 		*mpp = NULL;
2519 		exp->nes_vfslocked = 0;
2520 		return;
2521 	}
2522 
2523 	/*
2524 	 * Now, handle Giant for the file system.
2525 	 */
2526 	if (*mpp != NULL && *mpp != mp && exp->nes_vfslocked) {
2527 		VFS_UNLOCK_GIANT(*mpp);
2528 		exp->nes_vfslocked = 0;
2529 	}
2530 	if (!exp->nes_vfslocked && *mpp != mp)
2531 		exp->nes_vfslocked = VFS_LOCK_GIANT(mp);
2532 
2533 	*mpp = mp;
2534 	if (startwrite)
2535 		vn_start_write(NULL, mpp, V_WAIT);
2536 
2537 	nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, vpp, exp,
2538 	    &credanon);
2539 
2540 	/*
2541 	 * For NFSv4 without a pseudo root fs, unexported file handles
2542 	 * can be returned, so that Lookup works everywhere.
2543 	 */
2544 	if (!nd->nd_repstat && exp->nes_exflag == 0 &&
2545 	    !(nd->nd_flag & ND_NFSV4)) {
2546 		vput(*vpp);
2547 		nd->nd_repstat = EACCES;
2548 	}
2549 
2550 	/*
2551 	 * Personally, I've never seen any point in requiring a
2552 	 * reserved port#, since only in the rare case where the
2553 	 * clients are all boxes with secure system priviledges,
2554 	 * does it provide any enhanced security, but... some people
2555 	 * believe it to be useful and keep putting this code back in.
2556 	 * (There is also some "security checker" out there that
2557 	 *  complains if the nfs server doesn't enforce this.)
2558 	 * However, note the following:
2559 	 * RFC3530 (NFSv4) specifies that a reserved port# not be
2560 	 *	required.
2561 	 * RFC2623 recommends that, if a reserved port# is checked for,
2562 	 *	that there be a way to turn that off--> ifdef'd.
2563 	 */
2564 #ifdef NFS_REQRSVPORT
2565 	if (!nd->nd_repstat) {
2566 		struct sockaddr_in *saddr;
2567 		struct sockaddr_in6 *saddr6;
2568 
2569 		saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
2570 		saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *);
2571 		if (!(nd->nd_flag & ND_NFSV4) &&
2572 		    ((saddr->sin_family == AF_INET &&
2573 		      ntohs(saddr->sin_port) >= IPPORT_RESERVED) ||
2574 		     (saddr6->sin6_family == AF_INET6 &&
2575 		      ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) {
2576 			vput(*vpp);
2577 			nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
2578 		}
2579 	}
2580 #endif	/* NFS_REQRSVPORT */
2581 
2582 	/*
2583 	 * Check/setup credentials.
2584 	 */
2585 	if (!nd->nd_repstat) {
2586 		nd->nd_saveduid = nd->nd_cred->cr_uid;
2587 		nd->nd_repstat = nfsd_excred(nd, exp, credanon);
2588 		if (nd->nd_repstat)
2589 			vput(*vpp);
2590 	}
2591 	if (credanon != NULL)
2592 		crfree(credanon);
2593 	if (nd->nd_repstat) {
2594 		if (startwrite)
2595 			vn_finished_write(mp);
2596 		if (exp->nes_vfslocked) {
2597 			VFS_UNLOCK_GIANT(mp);
2598 			exp->nes_vfslocked = 0;
2599 		}
2600 		vfs_rel(mp);
2601 		*vpp = NULL;
2602 		*mpp = NULL;
2603 	} else {
2604 		vfs_rel(mp);
2605 	}
2606 }
2607 
2608 /*
2609  * glue for fp.
2610  */
2611 int
2612 fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp)
2613 {
2614 	struct filedesc *fdp;
2615 	struct file *fp;
2616 
2617 	fdp = p->td_proc->p_fd;
2618 	if (fd >= fdp->fd_nfiles ||
2619 	    (fp = fdp->fd_ofiles[fd]) == NULL)
2620 		return (EBADF);
2621 	*fpp = fp;
2622 	return (0);
2623 }
2624 
2625 /*
2626  * Called from nfssvc() to update the exports list. Just call
2627  * vfs_export(). This has to be done, since the v4 root fake fs isn't
2628  * in the mount list.
2629  */
2630 int
2631 nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p)
2632 {
2633 	struct nfsex_args *nfsexargp = (struct nfsex_args *)argp;
2634 	int error;
2635 	struct nameidata nd;
2636 	fhandle_t fh;
2637 
2638 	error = vfs_export(&nfsv4root_mnt, &nfsexargp->export);
2639 	if ((nfsexargp->export.ex_flags & MNT_DELEXPORT)) {
2640 		nfs_rootfhset = 0;
2641 		nfsv4root_set = 0;
2642 	} else if (error == 0) {
2643 		if (nfsexargp->fspec == NULL)
2644 			return (EPERM);
2645 		/*
2646 		 * If fspec != NULL, this is the v4root path.
2647 		 */
2648 		NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_USERSPACE,
2649 		    nfsexargp->fspec, p);
2650 		if ((error = namei(&nd)) != 0)
2651 			return (error);
2652 		error = nfsvno_getfh(nd.ni_vp, &fh, p);
2653 		vrele(nd.ni_vp);
2654 		if (!error) {
2655 			nfs_rootfh.nfsrvfh_len = NFSX_MYFH;
2656 			NFSBCOPY((caddr_t)&fh,
2657 			    nfs_rootfh.nfsrvfh_data,
2658 			    sizeof (fhandle_t));
2659 			nfs_rootfhset = 1;
2660 		}
2661 	}
2662 	return (error);
2663 }
2664 
2665 /*
2666  * Get the tcp socket sequence numbers we need.
2667  * (Maybe this should be moved to the tcp sources?)
2668  */
2669 int
2670 nfsrv_getsocksndseq(struct socket *so, tcp_seq *maxp, tcp_seq *unap)
2671 {
2672 	struct inpcb *inp;
2673 	struct tcpcb *tp;
2674 	int error = EPIPE;
2675 
2676 	INP_INFO_RLOCK(&V_tcbinfo);
2677 	inp = sotoinpcb(so);
2678 	if (inp == NULL) {
2679 		INP_INFO_RUNLOCK(&V_tcbinfo);
2680 		return (error);
2681 	}
2682 	INP_RLOCK(inp);
2683 	INP_INFO_RUNLOCK(&V_tcbinfo);
2684 	tp = intotcpcb(inp);
2685 	if (tp != NULL && tp->t_state == TCPS_ESTABLISHED) {
2686 		*maxp = tp->snd_max;
2687 		*unap = tp->snd_una;
2688 		error = 0;
2689 	}
2690 	INP_RUNLOCK(inp);
2691 	return (error);
2692 }
2693 
2694 /*
2695  * This function needs to test to see if the system is near its limit
2696  * for memory allocation via malloc() or mget() and return True iff
2697  * either of these resources are near their limit.
2698  * XXX (For now, this is just a stub.)
2699  */
2700 int nfsrv_testmalloclimit = 0;
2701 int
2702 nfsrv_mallocmget_limit(void)
2703 {
2704 	static int printmesg = 0;
2705 	static int testval = 1;
2706 
2707 	if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) {
2708 		if ((printmesg++ % 100) == 0)
2709 			printf("nfsd: malloc/mget near limit\n");
2710 		return (1);
2711 	}
2712 	return (0);
2713 }
2714 
2715 /*
2716  * BSD specific initialization of a mount point.
2717  */
2718 void
2719 nfsd_mntinit(void)
2720 {
2721 	static int inited = 0;
2722 
2723 	if (inited)
2724 		return;
2725 	inited = 1;
2726 	nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED);
2727 	TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist);
2728 	nfsv4root_mnt.mnt_export = NULL;
2729 	TAILQ_INIT(&nfsv4root_opt);
2730 	TAILQ_INIT(&nfsv4root_newopt);
2731 	nfsv4root_mnt.mnt_opt = &nfsv4root_opt;
2732 	nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt;
2733 	nfsv4root_mnt.mnt_nvnodelistsize = 0;
2734 }
2735 
2736 /*
2737  * Get a vnode for a file handle, without checking exports, etc.
2738  */
2739 struct vnode *
2740 nfsvno_getvp(fhandle_t *fhp)
2741 {
2742 	struct mount *mp;
2743 	struct vnode *vp;
2744 	int error;
2745 
2746 	mp = vfs_getvfs(&fhp->fh_fsid);
2747 	if (mp == NULL)
2748 		return (NULL);
2749 	error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp);
2750 	if (error)
2751 		return (NULL);
2752 	return (vp);
2753 }
2754 
2755 /*
2756  * Check to see it a byte range lock held by a process running
2757  * locally on the server conflicts with the new lock.
2758  */
2759 int
2760 nfsvno_localconflict(struct vnode *vp, int ftype, u_int64_t first,
2761     u_int64_t end, struct nfslockconflict *cfp, struct thread *td)
2762 {
2763 	int error;
2764 	struct flock fl;
2765 
2766 	if (!nfsrv_dolocallocks)
2767 		return (0);
2768 	fl.l_whence = SEEK_SET;
2769 	fl.l_type = ftype;
2770 	fl.l_start = (off_t)first;
2771 	if (end == NFS64BITSSET)
2772 		fl.l_len = 0;
2773 	else
2774 		fl.l_len = (off_t)(end - first);
2775 	/*
2776 	 * For FreeBSD8, the l_pid and l_sysid must be set to the same
2777 	 * values for all calls, so that all locks will be held by the
2778 	 * nfsd server. (The nfsd server handles conflicts between the
2779 	 * various clients.)
2780 	 * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024
2781 	 * bytes, so it can't be put in l_sysid.
2782 	 */
2783 	if (nfsv4_sysid == 0)
2784 		nfsv4_sysid = nlm_acquire_next_sysid();
2785 	fl.l_pid = (pid_t)0;
2786 	fl.l_sysid = (int)nfsv4_sysid;
2787 
2788 	NFSVOPUNLOCK(vp, 0, td);
2789 	error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_GETLK, &fl,
2790 	    (F_POSIX | F_REMOTE));
2791 	NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY, td);
2792 	if (error)
2793 		return (error);
2794 	if (fl.l_type == F_UNLCK)
2795 		return (0);
2796 	if (cfp != NULL) {
2797 		cfp->cl_clientid.lval[0] = cfp->cl_clientid.lval[1] = 0;
2798 		cfp->cl_first = (u_int64_t)fl.l_start;
2799 		if (fl.l_len == 0)
2800 			cfp->cl_end = NFS64BITSSET;
2801 		else
2802 			cfp->cl_end = (u_int64_t)
2803 			    (fl.l_start + fl.l_len);
2804 		if (fl.l_type == F_WRLCK)
2805 			cfp->cl_flags = NFSLCK_WRITE;
2806 		else
2807 			cfp->cl_flags = NFSLCK_READ;
2808 		sprintf(cfp->cl_owner, "LOCALID%d", fl.l_pid);
2809 		cfp->cl_ownerlen = strlen(cfp->cl_owner);
2810 		return (NFSERR_DENIED);
2811 	}
2812 	return (NFSERR_INVAL);
2813 }
2814 
2815 /*
2816  * Do a local VOP_ADVLOCK().
2817  */
2818 int
2819 nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first,
2820     u_int64_t end, struct thread *td)
2821 {
2822 	int error;
2823 	struct flock fl;
2824 	u_int64_t tlen;
2825 
2826 	if (!nfsrv_dolocallocks)
2827 		return (0);
2828 	fl.l_whence = SEEK_SET;
2829 	fl.l_type = ftype;
2830 	fl.l_start = (off_t)first;
2831 	if (end == NFS64BITSSET) {
2832 		fl.l_len = 0;
2833 	} else {
2834 		tlen = end - first;
2835 		fl.l_len = (off_t)tlen;
2836 	}
2837 	/*
2838 	 * For FreeBSD8, the l_pid and l_sysid must be set to the same
2839 	 * values for all calls, so that all locks will be held by the
2840 	 * nfsd server. (The nfsd server handles conflicts between the
2841 	 * various clients.)
2842 	 * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024
2843 	 * bytes, so it can't be put in l_sysid.
2844 	 */
2845 	if (nfsv4_sysid == 0)
2846 		nfsv4_sysid = nlm_acquire_next_sysid();
2847 	fl.l_pid = (pid_t)0;
2848 	fl.l_sysid = (int)nfsv4_sysid;
2849 
2850 	NFSVOPUNLOCK(vp, 0, td);
2851 	error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl,
2852 	    (F_POSIX | F_REMOTE));
2853 	NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY, td);
2854 	return (error);
2855 }
2856 
2857 /*
2858  * Unlock an underlying local file system.
2859  */
2860 void
2861 nfsvno_unlockvfs(struct mount *mp)
2862 {
2863 
2864 	VFS_UNLOCK_GIANT(mp);
2865 }
2866 
2867 /*
2868  * Lock an underlying file system, as required, and return
2869  * whether or not it is locked.
2870  */
2871 int
2872 nfsvno_lockvfs(struct mount *mp)
2873 {
2874 	int ret;
2875 
2876 	ret = VFS_LOCK_GIANT(mp);
2877 	return (ret);
2878 }
2879 
2880 /*
2881  * Check the nfsv4 root exports.
2882  */
2883 int
2884 nfsvno_v4rootexport(struct nfsrv_descript *nd)
2885 {
2886 	struct ucred *credanon;
2887 	int exflags, error, numsecflavor, *secflavors, i;
2888 
2889 	error = vfs_stdcheckexp(&nfsv4root_mnt, nd->nd_nam, &exflags,
2890 	    &credanon, &numsecflavor, &secflavors);
2891 	if (error)
2892 		return (NFSERR_PROGUNAVAIL);
2893 	if (credanon != NULL)
2894 		crfree(credanon);
2895 	for (i = 0; i < numsecflavor; i++) {
2896 		if (secflavors[i] == AUTH_SYS)
2897 			nd->nd_flag |= ND_EXAUTHSYS;
2898 		else if (secflavors[i] == RPCSEC_GSS_KRB5)
2899 			nd->nd_flag |= ND_EXGSS;
2900 		else if (secflavors[i] == RPCSEC_GSS_KRB5I)
2901 			nd->nd_flag |= ND_EXGSSINTEGRITY;
2902 		else if (secflavors[i] == RPCSEC_GSS_KRB5P)
2903 			nd->nd_flag |= ND_EXGSSPRIVACY;
2904 	}
2905 	return (0);
2906 }
2907 
2908 /*
2909  * Nfs server psuedo system call for the nfsd's
2910  */
2911 /*
2912  * MPSAFE
2913  */
2914 static int
2915 nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap)
2916 {
2917 	struct file *fp;
2918 	struct nfsd_addsock_args sockarg;
2919 	struct nfsd_nfsd_args nfsdarg;
2920 	int error;
2921 
2922 	if (uap->flag & NFSSVC_NFSDADDSOCK) {
2923 		error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg));
2924 		if (error)
2925 			return (error);
2926 		if ((error = fget(td, sockarg.sock, &fp)) != 0) {
2927 			return (error);
2928 		}
2929 		if (fp->f_type != DTYPE_SOCKET) {
2930 			fdrop(fp, td);
2931 			return (EPERM);
2932 		}
2933 		error = nfsrvd_addsock(fp);
2934 		fdrop(fp, td);
2935 	} else if (uap->flag & NFSSVC_NFSDNFSD) {
2936 		if (uap->argp == NULL)
2937 			return (EINVAL);
2938 		error = copyin(uap->argp, (caddr_t)&nfsdarg,
2939 		    sizeof (nfsdarg));
2940 		if (error)
2941 			return (error);
2942 		error = nfsrvd_nfsd(td, &nfsdarg);
2943 	} else {
2944 		error = nfssvc_srvcall(td, uap, td->td_ucred);
2945 	}
2946 	return (error);
2947 }
2948 
2949 static int
2950 nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred)
2951 {
2952 	struct nfsex_args export;
2953 	struct file *fp = NULL;
2954 	int stablefd, len;
2955 	struct nfsd_clid adminrevoke;
2956 	struct nfsd_dumplist dumplist;
2957 	struct nfsd_dumpclients *dumpclients;
2958 	struct nfsd_dumplocklist dumplocklist;
2959 	struct nfsd_dumplocks *dumplocks;
2960 	struct nameidata nd;
2961 	vnode_t vp;
2962 	int error = EINVAL;
2963 
2964 	if (uap->flag & NFSSVC_PUBLICFH) {
2965 		NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data,
2966 		    sizeof (fhandle_t));
2967 		error = copyin(uap->argp,
2968 		    &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t));
2969 		if (!error)
2970 			nfs_pubfhset = 1;
2971 	} else if (uap->flag & NFSSVC_V4ROOTEXPORT) {
2972 		error = copyin(uap->argp,(caddr_t)&export,
2973 		    sizeof (struct nfsex_args));
2974 		if (!error)
2975 			error = nfsrv_v4rootexport(&export, cred, p);
2976 	} else if (uap->flag & NFSSVC_NOPUBLICFH) {
2977 		nfs_pubfhset = 0;
2978 		error = 0;
2979 	} else if (uap->flag & NFSSVC_STABLERESTART) {
2980 		error = copyin(uap->argp, (caddr_t)&stablefd,
2981 		    sizeof (int));
2982 		if (!error)
2983 			error = fp_getfvp(p, stablefd, &fp, &vp);
2984 		if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE))
2985 			error = EBADF;
2986 		if (!error && newnfs_numnfsd != 0)
2987 			error = EPERM;
2988 		if (!error) {
2989 			nfsrv_stablefirst.nsf_fp = fp;
2990 			nfsrv_setupstable(p);
2991 		}
2992 	} else if (uap->flag & NFSSVC_ADMINREVOKE) {
2993 		error = copyin(uap->argp, (caddr_t)&adminrevoke,
2994 		    sizeof (struct nfsd_clid));
2995 		if (!error)
2996 			error = nfsrv_adminrevoke(&adminrevoke, p);
2997 	} else if (uap->flag & NFSSVC_DUMPCLIENTS) {
2998 		error = copyin(uap->argp, (caddr_t)&dumplist,
2999 		    sizeof (struct nfsd_dumplist));
3000 		if (!error && (dumplist.ndl_size < 1 ||
3001 			dumplist.ndl_size > NFSRV_MAXDUMPLIST))
3002 			error = EPERM;
3003 		if (!error) {
3004 		    len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size;
3005 		    dumpclients = (struct nfsd_dumpclients *)malloc(len,
3006 			M_TEMP, M_WAITOK);
3007 		    nfsrv_dumpclients(dumpclients, dumplist.ndl_size);
3008 		    error = copyout(dumpclients,
3009 			CAST_USER_ADDR_T(dumplist.ndl_list), len);
3010 		    free((caddr_t)dumpclients, M_TEMP);
3011 		}
3012 	} else if (uap->flag & NFSSVC_DUMPLOCKS) {
3013 		error = copyin(uap->argp, (caddr_t)&dumplocklist,
3014 		    sizeof (struct nfsd_dumplocklist));
3015 		if (!error && (dumplocklist.ndllck_size < 1 ||
3016 			dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST))
3017 			error = EPERM;
3018 		if (!error)
3019 			error = nfsrv_lookupfilename(&nd,
3020 				dumplocklist.ndllck_fname, p);
3021 		if (!error) {
3022 			len = sizeof (struct nfsd_dumplocks) *
3023 				dumplocklist.ndllck_size;
3024 			dumplocks = (struct nfsd_dumplocks *)malloc(len,
3025 				M_TEMP, M_WAITOK);
3026 			nfsrv_dumplocks(nd.ni_vp, dumplocks,
3027 			    dumplocklist.ndllck_size, p);
3028 			vput(nd.ni_vp);
3029 			error = copyout(dumplocks,
3030 			    CAST_USER_ADDR_T(dumplocklist.ndllck_list), len);
3031 			free((caddr_t)dumplocks, M_TEMP);
3032 		}
3033 	}
3034 	return (error);
3035 }
3036 
3037 /*
3038  * Check exports.
3039  * Returns 0 if ok, 1 otherwise.
3040  */
3041 int
3042 nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp)
3043 {
3044 	int i;
3045 
3046 	/*
3047 	 * This seems odd, but allow the case where the security flavor
3048 	 * list is empty. This happens when NFSv4 is traversing non-exported
3049 	 * file systems. Exported file systems should always have a non-empty
3050 	 * security flavor list.
3051 	 */
3052 	if (exp->nes_numsecflavor == 0)
3053 		return (0);
3054 
3055 	for (i = 0; i < exp->nes_numsecflavor; i++) {
3056 		/*
3057 		 * The tests for privacy and integrity must be first,
3058 		 * since ND_GSS is set for everything but AUTH_SYS.
3059 		 */
3060 		if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P &&
3061 		    (nd->nd_flag & ND_GSSPRIVACY))
3062 			return (0);
3063 		if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I &&
3064 		    (nd->nd_flag & ND_GSSINTEGRITY))
3065 			return (0);
3066 		if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 &&
3067 		    (nd->nd_flag & ND_GSS))
3068 			return (0);
3069 		if (exp->nes_secflavors[i] == AUTH_SYS &&
3070 		    (nd->nd_flag & ND_GSS) == 0)
3071 			return (0);
3072 	}
3073 	return (1);
3074 }
3075 
3076 extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *);
3077 
3078 /*
3079  * Called once to initialize data structures...
3080  */
3081 static int
3082 nfsd_modevent(module_t mod, int type, void *data)
3083 {
3084 	int error = 0;
3085 	static int loaded = 0;
3086 
3087 	switch (type) {
3088 	case MOD_LOAD:
3089 		if (loaded)
3090 			return (0);
3091 		newnfs_portinit();
3092 		mtx_init(&nfs_cache_mutex, "nfs_cache_mutex", NULL, MTX_DEF);
3093 		mtx_init(&nfs_v4root_mutex, "nfs_v4root_mutex", NULL, MTX_DEF);
3094 		mtx_init(&nfsv4root_mnt.mnt_mtx, "struct mount mtx", NULL,
3095 		    MTX_DEF);
3096 		lockinit(&nfsv4root_mnt.mnt_explock, PVFS, "explock", 0, 0);
3097 		nfsrvd_initcache();
3098 		nfsd_init();
3099 		NFSD_LOCK();
3100 		nfsrvd_init(0);
3101 		NFSD_UNLOCK();
3102 		nfsd_mntinit();
3103 #ifdef VV_DISABLEDELEG
3104 		vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation;
3105 		vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation;
3106 #endif
3107 		nfsd_call_servertimer = nfsrv_servertimer;
3108 		nfsd_call_nfsd = nfssvc_nfsd;
3109 		loaded = 1;
3110 		break;
3111 
3112 	case MOD_UNLOAD:
3113 		if (newnfs_numnfsd != 0) {
3114 			error = EBUSY;
3115 			break;
3116 		}
3117 
3118 #ifdef VV_DISABLEDELEG
3119 		vn_deleg_ops.vndeleg_recall = NULL;
3120 		vn_deleg_ops.vndeleg_disable = NULL;
3121 #endif
3122 		nfsd_call_servertimer = NULL;
3123 		nfsd_call_nfsd = NULL;
3124 		/* and get rid of the locks */
3125 		mtx_destroy(&nfs_cache_mutex);
3126 		mtx_destroy(&nfs_v4root_mutex);
3127 		mtx_destroy(&nfsv4root_mnt.mnt_mtx);
3128 		lockdestroy(&nfsv4root_mnt.mnt_explock);
3129 		loaded = 0;
3130 		break;
3131 	default:
3132 		error = EOPNOTSUPP;
3133 		break;
3134 	}
3135 	return error;
3136 }
3137 static moduledata_t nfsd_mod = {
3138 	"nfsd",
3139 	nfsd_modevent,
3140 	NULL,
3141 };
3142 DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY);
3143 
3144 /* So that loader and kldload(2) can find us, wherever we are.. */
3145 MODULE_VERSION(nfsd, 1);
3146 MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1);
3147 MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1);
3148 
3149