xref: /freebsd/sys/fs/nfsserver/nfs_nfsdport.c (revision aa64588d28258aef88cc33b8043112e8856948d0)
1 /*-
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  */
33 
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36 
37 /*
38  * Functions that perform the vfs operations required by the routines in
39  * nfsd_serv.c. It is hoped that this change will make the server more
40  * portable.
41  */
42 
43 #include <fs/nfs/nfsport.h>
44 #include <sys/sysctl.h>
45 #include <nlm/nlm_prot.h>
46 #include <nlm/nlm.h>
47 
48 extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1;
49 extern int nfsv4root_set;
50 extern int nfsrv_useacl;
51 extern int newnfs_numnfsd;
52 extern struct mount nfsv4root_mnt;
53 extern struct nfsrv_stablefirst nfsrv_stablefirst;
54 extern void (*nfsd_call_servertimer)(void);
55 struct vfsoptlist nfsv4root_opt, nfsv4root_newopt;
56 NFSDLOCKMUTEX;
57 struct mtx nfs_cache_mutex;
58 struct mtx nfs_v4root_mutex;
59 struct nfsrvfh nfs_rootfh, nfs_pubfh;
60 int nfs_pubfhset = 0, nfs_rootfhset = 0;
61 static uint32_t nfsv4_sysid = 0;
62 
63 static int nfssvc_srvcall(struct thread *, struct nfssvc_args *,
64     struct ucred *);
65 
66 static int enable_crossmntpt = 1;
67 static int nfs_commit_blks;
68 static int nfs_commit_miss;
69 extern int nfsrv_issuedelegs;
70 extern int nfsrv_dolocallocks;
71 
72 SYSCTL_DECL(_vfs_newnfs);
73 SYSCTL_INT(_vfs_newnfs, OID_AUTO, mirrormnt, CTLFLAG_RW, &enable_crossmntpt,
74     0, "Enable nfsd to cross mount points");
75 SYSCTL_INT(_vfs_newnfs, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks,
76     0, "");
77 SYSCTL_INT(_vfs_newnfs, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss,
78     0, "");
79 SYSCTL_INT(_vfs_newnfs, OID_AUTO, issue_delegations, CTLFLAG_RW,
80     &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations");
81 SYSCTL_INT(_vfs_newnfs, OID_AUTO, enable_locallocks, CTLFLAG_RW,
82     &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files");
83 
84 #define	NUM_HEURISTIC		1017
85 #define	NHUSE_INIT		64
86 #define	NHUSE_INC		16
87 #define	NHUSE_MAX		2048
88 
89 static struct nfsheur {
90 	struct vnode *nh_vp;	/* vp to match (unreferenced pointer) */
91 	off_t nh_nextr;		/* next offset for sequential detection */
92 	int nh_use;		/* use count for selection */
93 	int nh_seqcount;	/* heuristic */
94 } nfsheur[NUM_HEURISTIC];
95 
96 
97 /*
98  * Get attributes into nfsvattr structure.
99  */
100 int
101 nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred,
102     struct thread *p)
103 {
104 	int error, lockedit = 0;
105 
106 	/* Since FreeBSD insists the vnode be locked... */
107 	if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
108 		lockedit = 1;
109 		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY, p);
110 	}
111 	error = VOP_GETATTR(vp, &nvap->na_vattr, cred);
112 	if (lockedit)
113 		NFSVOPUNLOCK(vp, 0, p);
114 	return (error);
115 }
116 
117 /*
118  * Get a file handle for a vnode.
119  */
120 int
121 nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p)
122 {
123 	int error;
124 
125 	NFSBZERO((caddr_t)fhp, sizeof(fhandle_t));
126 	fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
127 	error = VOP_VPTOFH(vp, &fhp->fh_fid);
128 	return (error);
129 }
130 
131 /*
132  * Perform access checking for vnodes obtained from file handles that would
133  * refer to files already opened by a Unix client. You cannot just use
134  * vn_writechk() and VOP_ACCESSX() for two reasons.
135  * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write
136  *     case.
137  * 2 - The owner is to be given access irrespective of mode bits for some
138  *     operations, so that processes that chmod after opening a file don't
139  *     break.
140  */
141 int
142 nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred,
143     struct nfsexstuff *exp, struct thread *p, int override, int vpislocked,
144     u_int32_t *supportedtypep)
145 {
146 	struct vattr vattr;
147 	int error = 0, getret = 0;
148 
149 	if (accmode & VWRITE) {
150 		/* Just vn_writechk() changed to check rdonly */
151 		/*
152 		 * Disallow write attempts on read-only file systems;
153 		 * unless the file is a socket or a block or character
154 		 * device resident on the file system.
155 		 */
156 		if (NFSVNO_EXRDONLY(exp) ||
157 		    (vp->v_mount->mnt_flag & MNT_RDONLY)) {
158 			switch (vp->v_type) {
159 			case VREG:
160 			case VDIR:
161 			case VLNK:
162 				return (EROFS);
163 			default:
164 				break;
165 			}
166 		}
167 		/*
168 		 * If there's shared text associated with
169 		 * the inode, try to free it up once.  If
170 		 * we fail, we can't allow writing.
171 		 */
172 		if (vp->v_vflag & VV_TEXT)
173 			return (ETXTBSY);
174 	}
175 	if (vpislocked == 0)
176 		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY, p);
177 
178 	/*
179 	 * Should the override still be applied when ACLs are enabled?
180 	 */
181 	error = VOP_ACCESSX(vp, accmode, cred, p);
182 	if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) {
183 		/*
184 		 * Try again with VEXPLICIT_DENY, to see if the test for
185 		 * deletion is supported.
186 		 */
187 		error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p);
188 		if (error == 0) {
189 			if (vp->v_type == VDIR) {
190 				accmode &= ~(VDELETE | VDELETE_CHILD);
191 				accmode |= VWRITE;
192 				error = VOP_ACCESSX(vp, accmode, cred, p);
193 			} else if (supportedtypep != NULL) {
194 				*supportedtypep &= ~NFSACCESS_DELETE;
195 			}
196 		}
197 	}
198 
199 	/*
200 	 * Allow certain operations for the owner (reads and writes
201 	 * on files that are already open).
202 	 */
203 	if (override != NFSACCCHK_NOOVERRIDE &&
204 	    (error == EPERM || error == EACCES)) {
205 		if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT))
206 			error = 0;
207 		else if (override & NFSACCCHK_ALLOWOWNER) {
208 			getret = VOP_GETATTR(vp, &vattr, cred);
209 			if (getret == 0 && cred->cr_uid == vattr.va_uid)
210 				error = 0;
211 		}
212 	}
213 	if (vpislocked == 0)
214 		NFSVOPUNLOCK(vp, 0, p);
215 	return (error);
216 }
217 
218 /*
219  * Set attribute(s) vnop.
220  */
221 int
222 nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred,
223     struct thread *p, struct nfsexstuff *exp)
224 {
225 	int error;
226 
227 	error = VOP_SETATTR(vp, &nvap->na_vattr, cred);
228 	return (error);
229 }
230 
231 /*
232  * Set up nameidata for a lookup() call and do it
233  * For the cases where we are crossing mount points
234  * (looking up the public fh path or the v4 root path when
235  *  not using a pseudo-root fs), set/release the Giant lock,
236  * as required.
237  */
238 int
239 nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp,
240     struct vnode *dp, int islocked, struct nfsexstuff *exp, struct thread *p,
241     struct vnode **retdirp)
242 {
243 	struct componentname *cnp = &ndp->ni_cnd;
244 	int i;
245 	struct iovec aiov;
246 	struct uio auio;
247 	int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen;
248 	int error = 0, crossmnt;
249 	char *cp;
250 
251 	*retdirp = NULL;
252 	cnp->cn_nameptr = cnp->cn_pnbuf;
253 	/*
254 	 * Extract and set starting directory.
255 	 */
256 	if (dp->v_type != VDIR) {
257 		if (islocked)
258 			vput(dp);
259 		else
260 			vrele(dp);
261 		nfsvno_relpathbuf(ndp);
262 		return (ENOTDIR);
263 	}
264 	if (islocked)
265 		NFSVOPUNLOCK(dp, 0, p);
266 	VREF(dp);
267 	*retdirp = dp;
268 	if (NFSVNO_EXRDONLY(exp))
269 		cnp->cn_flags |= RDONLY;
270 	ndp->ni_segflg = UIO_SYSSPACE;
271 	crossmnt = 1;
272 
273 	if (nd->nd_flag & ND_PUBLOOKUP) {
274 		ndp->ni_loopcnt = 0;
275 		if (cnp->cn_pnbuf[0] == '/') {
276 			vrele(dp);
277 			/*
278 			 * Check for degenerate pathnames here, since lookup()
279 			 * panics on them.
280 			 */
281 			for (i = 1; i < ndp->ni_pathlen; i++)
282 				if (cnp->cn_pnbuf[i] != '/')
283 					break;
284 			if (i == ndp->ni_pathlen) {
285 				error = NFSERR_ACCES;
286 				goto out;
287 			}
288 			dp = rootvnode;
289 			VREF(dp);
290 		}
291 	} else if ((enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) ||
292 	    (nd->nd_flag & ND_NFSV4) == 0) {
293 		/*
294 		 * Only cross mount points for NFSv4 when doing a
295 		 * mount while traversing the file system above
296 		 * the mount point, unless enable_crossmntpt is set.
297 		 */
298 		cnp->cn_flags |= NOCROSSMOUNT;
299 		crossmnt = 0;
300 	}
301 
302 	/*
303 	 * Initialize for scan, set ni_startdir and bump ref on dp again
304 	 * becuase lookup() will dereference ni_startdir.
305 	 */
306 
307 	cnp->cn_thread = p;
308 	ndp->ni_startdir = dp;
309 	ndp->ni_rootdir = rootvnode;
310 
311 	if (!lockleaf)
312 		cnp->cn_flags |= LOCKLEAF;
313 	for (;;) {
314 		cnp->cn_nameptr = cnp->cn_pnbuf;
315 		/*
316 		 * Call lookup() to do the real work.  If an error occurs,
317 		 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and
318 		 * we do not have to dereference anything before returning.
319 		 * In either case ni_startdir will be dereferenced and NULLed
320 		 * out.
321 		 */
322 		if (exp->nes_vfslocked)
323 			ndp->ni_cnd.cn_flags |= GIANTHELD;
324 		error = lookup(ndp);
325 		/*
326 		 * The Giant lock should only change when
327 		 * crossing mount points.
328 		 */
329 		if (crossmnt) {
330 			exp->nes_vfslocked =
331 			    (ndp->ni_cnd.cn_flags & GIANTHELD) != 0;
332 			ndp->ni_cnd.cn_flags &= ~GIANTHELD;
333 		}
334 		if (error)
335 			break;
336 
337 		/*
338 		 * Check for encountering a symbolic link.  Trivial
339 		 * termination occurs if no symlink encountered.
340 		 */
341 		if ((cnp->cn_flags & ISSYMLINK) == 0) {
342 			if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0)
343 				nfsvno_relpathbuf(ndp);
344 			if (ndp->ni_vp && !lockleaf)
345 				NFSVOPUNLOCK(ndp->ni_vp, 0, p);
346 			break;
347 		}
348 
349 		/*
350 		 * Validate symlink
351 		 */
352 		if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
353 			NFSVOPUNLOCK(ndp->ni_dvp, 0, p);
354 		if (!(nd->nd_flag & ND_PUBLOOKUP)) {
355 			error = EINVAL;
356 			goto badlink2;
357 		}
358 
359 		if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
360 			error = ELOOP;
361 			goto badlink2;
362 		}
363 		if (ndp->ni_pathlen > 1)
364 			cp = uma_zalloc(namei_zone, M_WAITOK);
365 		else
366 			cp = cnp->cn_pnbuf;
367 		aiov.iov_base = cp;
368 		aiov.iov_len = MAXPATHLEN;
369 		auio.uio_iov = &aiov;
370 		auio.uio_iovcnt = 1;
371 		auio.uio_offset = 0;
372 		auio.uio_rw = UIO_READ;
373 		auio.uio_segflg = UIO_SYSSPACE;
374 		auio.uio_td = NULL;
375 		auio.uio_resid = MAXPATHLEN;
376 		error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
377 		if (error) {
378 		badlink1:
379 			if (ndp->ni_pathlen > 1)
380 				uma_zfree(namei_zone, cp);
381 		badlink2:
382 			vrele(ndp->ni_dvp);
383 			vput(ndp->ni_vp);
384 			break;
385 		}
386 		linklen = MAXPATHLEN - auio.uio_resid;
387 		if (linklen == 0) {
388 			error = ENOENT;
389 			goto badlink1;
390 		}
391 		if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
392 			error = ENAMETOOLONG;
393 			goto badlink1;
394 		}
395 
396 		/*
397 		 * Adjust or replace path
398 		 */
399 		if (ndp->ni_pathlen > 1) {
400 			NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
401 			uma_zfree(namei_zone, cnp->cn_pnbuf);
402 			cnp->cn_pnbuf = cp;
403 		} else
404 			cnp->cn_pnbuf[linklen] = '\0';
405 		ndp->ni_pathlen += linklen;
406 
407 		/*
408 		 * Cleanup refs for next loop and check if root directory
409 		 * should replace current directory.  Normally ni_dvp
410 		 * becomes the new base directory and is cleaned up when
411 		 * we loop.  Explicitly null pointers after invalidation
412 		 * to clarify operation.
413 		 */
414 		vput(ndp->ni_vp);
415 		ndp->ni_vp = NULL;
416 
417 		if (cnp->cn_pnbuf[0] == '/') {
418 			vrele(ndp->ni_dvp);
419 			ndp->ni_dvp = ndp->ni_rootdir;
420 			VREF(ndp->ni_dvp);
421 		}
422 		ndp->ni_startdir = ndp->ni_dvp;
423 		ndp->ni_dvp = NULL;
424 	}
425 	if (!lockleaf)
426 		cnp->cn_flags &= ~LOCKLEAF;
427 
428 out:
429 	if (error) {
430 		uma_zfree(namei_zone, cnp->cn_pnbuf);
431 		ndp->ni_vp = NULL;
432 		ndp->ni_dvp = NULL;
433 		ndp->ni_startdir = NULL;
434 		cnp->cn_flags &= ~HASBUF;
435 	} else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) {
436 		ndp->ni_dvp = NULL;
437 	}
438 	return (error);
439 }
440 
441 /*
442  * Set up a pathname buffer and return a pointer to it and, optionally
443  * set a hash pointer.
444  */
445 void
446 nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp)
447 {
448 	struct componentname *cnp = &ndp->ni_cnd;
449 
450 	cnp->cn_flags |= (NOMACCHECK | HASBUF);
451 	cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
452 	if (hashpp != NULL)
453 		*hashpp = NULL;
454 	*bufpp = cnp->cn_pnbuf;
455 }
456 
457 /*
458  * Release the above path buffer, if not released by nfsvno_namei().
459  */
460 void
461 nfsvno_relpathbuf(struct nameidata *ndp)
462 {
463 
464 	if ((ndp->ni_cnd.cn_flags & HASBUF) == 0)
465 		panic("nfsrelpath");
466 	uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf);
467 	ndp->ni_cnd.cn_flags &= ~HASBUF;
468 }
469 
470 /*
471  * Readlink vnode op into an mbuf list.
472  */
473 int
474 nfsvno_readlink(struct vnode *vp, struct ucred *cred, struct thread *p,
475     struct mbuf **mpp, struct mbuf **mpendp, int *lenp)
476 {
477 	struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
478 	struct iovec *ivp = iv;
479 	struct uio io, *uiop = &io;
480 	struct mbuf *mp, *mp2 = NULL, *mp3 = NULL;
481 	int i, len, tlen, error;
482 
483 	len = 0;
484 	i = 0;
485 	while (len < NFS_MAXPATHLEN) {
486 		NFSMGET(mp);
487 		MCLGET(mp, M_WAIT);
488 		mp->m_len = NFSMSIZ(mp);
489 		if (len == 0) {
490 			mp3 = mp2 = mp;
491 		} else {
492 			mp2->m_next = mp;
493 			mp2 = mp;
494 		}
495 		if ((len + mp->m_len) > NFS_MAXPATHLEN) {
496 			mp->m_len = NFS_MAXPATHLEN - len;
497 			len = NFS_MAXPATHLEN;
498 		} else {
499 			len += mp->m_len;
500 		}
501 		ivp->iov_base = mtod(mp, caddr_t);
502 		ivp->iov_len = mp->m_len;
503 		i++;
504 		ivp++;
505 	}
506 	uiop->uio_iov = iv;
507 	uiop->uio_iovcnt = i;
508 	uiop->uio_offset = 0;
509 	uiop->uio_resid = len;
510 	uiop->uio_rw = UIO_READ;
511 	uiop->uio_segflg = UIO_SYSSPACE;
512 	uiop->uio_td = NULL;
513 	error = VOP_READLINK(vp, uiop, cred);
514 	if (error) {
515 		m_freem(mp3);
516 		*lenp = 0;
517 		return (error);
518 	}
519 	if (uiop->uio_resid > 0) {
520 		len -= uiop->uio_resid;
521 		tlen = NFSM_RNDUP(len);
522 		nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen, tlen - len);
523 	}
524 	*lenp = len;
525 	*mpp = mp3;
526 	*mpendp = mp;
527 	return (0);
528 }
529 
530 /*
531  * Read vnode op call into mbuf list.
532  */
533 int
534 nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred,
535     struct thread *p, struct mbuf **mpp, struct mbuf **mpendp)
536 {
537 	struct mbuf *m;
538 	int i;
539 	struct iovec *iv;
540 	struct iovec *iv2;
541 	int error = 0, len, left, siz, tlen, ioflag = 0, hi, try = 32;
542 	struct mbuf *m2 = NULL, *m3;
543 	struct uio io, *uiop = &io;
544 	struct nfsheur *nh;
545 
546 	/*
547 	 * Calculate seqcount for heuristic
548 	 */
549 	/*
550 	 * Locate best candidate
551 	 */
552 
553 	hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
554 	nh = &nfsheur[hi];
555 
556 	while (try--) {
557 		if (nfsheur[hi].nh_vp == vp) {
558 			nh = &nfsheur[hi];
559 			break;
560 		}
561 		if (nfsheur[hi].nh_use > 0)
562 			--nfsheur[hi].nh_use;
563 		hi = (hi + 1) % NUM_HEURISTIC;
564 		if (nfsheur[hi].nh_use < nh->nh_use)
565 			nh = &nfsheur[hi];
566 	}
567 
568 	if (nh->nh_vp != vp) {
569 		nh->nh_vp = vp;
570 		nh->nh_nextr = off;
571 		nh->nh_use = NHUSE_INIT;
572 		if (off == 0)
573 			nh->nh_seqcount = 4;
574 		else
575 			nh->nh_seqcount = 1;
576 	}
577 
578 	/*
579 	 * Calculate heuristic
580 	 */
581 
582 	if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) {
583 		if (++nh->nh_seqcount > IO_SEQMAX)
584 			nh->nh_seqcount = IO_SEQMAX;
585 	} else if (nh->nh_seqcount > 1) {
586 		nh->nh_seqcount = 1;
587 	} else {
588 		nh->nh_seqcount = 0;
589 	}
590 	nh->nh_use += NHUSE_INC;
591 	if (nh->nh_use > NHUSE_MAX)
592 		nh->nh_use = NHUSE_MAX;
593 	ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
594 
595 	len = left = NFSM_RNDUP(cnt);
596 	m3 = NULL;
597 	/*
598 	 * Generate the mbuf list with the uio_iov ref. to it.
599 	 */
600 	i = 0;
601 	while (left > 0) {
602 		NFSMGET(m);
603 		MCLGET(m, M_WAIT);
604 		m->m_len = 0;
605 		siz = min(M_TRAILINGSPACE(m), left);
606 		left -= siz;
607 		i++;
608 		if (m3)
609 			m2->m_next = m;
610 		else
611 			m3 = m;
612 		m2 = m;
613 	}
614 	MALLOC(iv, struct iovec *, i * sizeof (struct iovec),
615 	    M_TEMP, M_WAITOK);
616 	uiop->uio_iov = iv2 = iv;
617 	m = m3;
618 	left = len;
619 	i = 0;
620 	while (left > 0) {
621 		if (m == NULL)
622 			panic("nfsvno_read iov");
623 		siz = min(M_TRAILINGSPACE(m), left);
624 		if (siz > 0) {
625 			iv->iov_base = mtod(m, caddr_t) + m->m_len;
626 			iv->iov_len = siz;
627 			m->m_len += siz;
628 			left -= siz;
629 			iv++;
630 			i++;
631 		}
632 		m = m->m_next;
633 	}
634 	uiop->uio_iovcnt = i;
635 	uiop->uio_offset = off;
636 	uiop->uio_resid = len;
637 	uiop->uio_rw = UIO_READ;
638 	uiop->uio_segflg = UIO_SYSSPACE;
639 	error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
640 	FREE((caddr_t)iv2, M_TEMP);
641 	if (error) {
642 		m_freem(m3);
643 		*mpp = NULL;
644 		return (error);
645 	}
646 	tlen = len - uiop->uio_resid;
647 	cnt = cnt < tlen ? cnt : tlen;
648 	tlen = NFSM_RNDUP(cnt);
649 	if (tlen == 0) {
650 		m_freem(m3);
651 		m3 = NULL;
652 	} else if (len != tlen || tlen != cnt)
653 		nfsrv_adj(m3, len - tlen, tlen - cnt);
654 	*mpp = m3;
655 	*mpendp = m2;
656 	return (0);
657 }
658 
659 /*
660  * Write vnode op from an mbuf list.
661  */
662 int
663 nfsvno_write(struct vnode *vp, off_t off, int retlen, int cnt, int stable,
664     struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p)
665 {
666 	struct iovec *ivp;
667 	int i, len;
668 	struct iovec *iv;
669 	int ioflags, error;
670 	struct uio io, *uiop = &io;
671 
672 	MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP,
673 	    M_WAITOK);
674 	uiop->uio_iov = iv = ivp;
675 	uiop->uio_iovcnt = cnt;
676 	i = mtod(mp, caddr_t) + mp->m_len - cp;
677 	len = retlen;
678 	while (len > 0) {
679 		if (mp == NULL)
680 			panic("nfsvno_write");
681 		if (i > 0) {
682 			i = min(i, len);
683 			ivp->iov_base = cp;
684 			ivp->iov_len = i;
685 			ivp++;
686 			len -= i;
687 		}
688 		mp = mp->m_next;
689 		if (mp) {
690 			i = mp->m_len;
691 			cp = mtod(mp, caddr_t);
692 		}
693 	}
694 
695 	if (stable == NFSWRITE_UNSTABLE)
696 		ioflags = IO_NODELOCKED;
697 	else
698 		ioflags = (IO_SYNC | IO_NODELOCKED);
699 	uiop->uio_resid = retlen;
700 	uiop->uio_rw = UIO_WRITE;
701 	uiop->uio_segflg = UIO_SYSSPACE;
702 	NFSUIOPROC(uiop, p);
703 	uiop->uio_offset = off;
704 	error = VOP_WRITE(vp, uiop, ioflags, cred);
705 	FREE((caddr_t)iv, M_TEMP);
706 	return (error);
707 }
708 
709 /*
710  * Common code for creating a regular file (plus special files for V2).
711  */
712 int
713 nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp,
714     struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp,
715     int32_t *cverf, NFSDEV_T rdev, struct thread *p, struct nfsexstuff *exp)
716 {
717 	u_quad_t tempsize;
718 	int error;
719 
720 	error = nd->nd_repstat;
721 	if (!error && ndp->ni_vp == NULL) {
722 		if (nvap->na_type == VREG || nvap->na_type == VSOCK) {
723 			vrele(ndp->ni_startdir);
724 			error = VOP_CREATE(ndp->ni_dvp,
725 			    &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
726 			vput(ndp->ni_dvp);
727 			nfsvno_relpathbuf(ndp);
728 			if (!error) {
729 				if (*exclusive_flagp) {
730 					*exclusive_flagp = 0;
731 					NFSVNO_ATTRINIT(nvap);
732 					nvap->na_atime.tv_sec = cverf[0];
733 					nvap->na_atime.tv_nsec = cverf[1];
734 					error = VOP_SETATTR(ndp->ni_vp,
735 					    &nvap->na_vattr, nd->nd_cred);
736 				}
737 			}
738 		/*
739 		 * NFS V2 Only. nfsrvd_mknod() does this for V3.
740 		 * (This implies, just get out on an error.)
741 		 */
742 		} else if (nvap->na_type == VCHR || nvap->na_type == VBLK ||
743 			nvap->na_type == VFIFO) {
744 			if (nvap->na_type == VCHR && rdev == 0xffffffff)
745 				nvap->na_type = VFIFO;
746                         if (nvap->na_type != VFIFO &&
747 			    (error = priv_check_cred(nd->nd_cred,
748 			     PRIV_VFS_MKNOD_DEV, 0))) {
749 				vrele(ndp->ni_startdir);
750 				nfsvno_relpathbuf(ndp);
751 				vput(ndp->ni_dvp);
752 				return (error);
753 			}
754 			nvap->na_rdev = rdev;
755 			error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
756 			    &ndp->ni_cnd, &nvap->na_vattr);
757 			vput(ndp->ni_dvp);
758 			nfsvno_relpathbuf(ndp);
759 			if (error) {
760 				vrele(ndp->ni_startdir);
761 				return (error);
762 			}
763 		} else {
764 			vrele(ndp->ni_startdir);
765 			nfsvno_relpathbuf(ndp);
766 			vput(ndp->ni_dvp);
767 			return (ENXIO);
768 		}
769 		*vpp = ndp->ni_vp;
770 	} else {
771 		/*
772 		 * Handle cases where error is already set and/or
773 		 * the file exists.
774 		 * 1 - clean up the lookup
775 		 * 2 - iff !error and na_size set, truncate it
776 		 */
777 		vrele(ndp->ni_startdir);
778 		nfsvno_relpathbuf(ndp);
779 		*vpp = ndp->ni_vp;
780 		if (ndp->ni_dvp == *vpp)
781 			vrele(ndp->ni_dvp);
782 		else
783 			vput(ndp->ni_dvp);
784 		if (!error && nvap->na_size != VNOVAL) {
785 			error = nfsvno_accchk(*vpp, VWRITE,
786 			    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
787 			    NFSACCCHK_VPISLOCKED, NULL);
788 			if (!error) {
789 				tempsize = nvap->na_size;
790 				NFSVNO_ATTRINIT(nvap);
791 				nvap->na_size = tempsize;
792 				error = VOP_SETATTR(*vpp,
793 				    &nvap->na_vattr, nd->nd_cred);
794 			}
795 		}
796 		if (error)
797 			vput(*vpp);
798 	}
799 	return (error);
800 }
801 
802 /*
803  * Do a mknod vnode op.
804  */
805 int
806 nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred,
807     struct thread *p)
808 {
809 	int error = 0;
810 	enum vtype vtyp;
811 
812 	vtyp = nvap->na_type;
813 	/*
814 	 * Iff doesn't exist, create it.
815 	 */
816 	if (ndp->ni_vp) {
817 		vrele(ndp->ni_startdir);
818 		nfsvno_relpathbuf(ndp);
819 		vput(ndp->ni_dvp);
820 		vrele(ndp->ni_vp);
821 		return (EEXIST);
822 	}
823 	if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
824 		vrele(ndp->ni_startdir);
825 		nfsvno_relpathbuf(ndp);
826 		vput(ndp->ni_dvp);
827 		return (NFSERR_BADTYPE);
828 	}
829 	if (vtyp == VSOCK) {
830 		vrele(ndp->ni_startdir);
831 		error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
832 		    &ndp->ni_cnd, &nvap->na_vattr);
833 		vput(ndp->ni_dvp);
834 		nfsvno_relpathbuf(ndp);
835 	} else {
836 		if (nvap->na_type != VFIFO &&
837 		    (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV, 0))) {
838 			vrele(ndp->ni_startdir);
839 			nfsvno_relpathbuf(ndp);
840 			vput(ndp->ni_dvp);
841 			return (error);
842 		}
843 		error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
844 		    &ndp->ni_cnd, &nvap->na_vattr);
845 		vput(ndp->ni_dvp);
846 		nfsvno_relpathbuf(ndp);
847 		if (error)
848 			vrele(ndp->ni_startdir);
849 		/*
850 		 * Since VOP_MKNOD returns the ni_vp, I can't
851 		 * see any reason to do the lookup.
852 		 */
853 	}
854 	return (error);
855 }
856 
857 /*
858  * Mkdir vnode op.
859  */
860 int
861 nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid,
862     struct ucred *cred, struct thread *p, struct nfsexstuff *exp)
863 {
864 	int error = 0;
865 
866 	if (ndp->ni_vp != NULL) {
867 		if (ndp->ni_dvp == ndp->ni_vp)
868 			vrele(ndp->ni_dvp);
869 		else
870 			vput(ndp->ni_dvp);
871 		vrele(ndp->ni_vp);
872 		nfsvno_relpathbuf(ndp);
873 		return (EEXIST);
874 	}
875 	error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
876 	    &nvap->na_vattr);
877 	vput(ndp->ni_dvp);
878 	nfsvno_relpathbuf(ndp);
879 	return (error);
880 }
881 
882 /*
883  * symlink vnode op.
884  */
885 int
886 nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp,
887     int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p,
888     struct nfsexstuff *exp)
889 {
890 	int error = 0;
891 
892 	if (ndp->ni_vp) {
893 		vrele(ndp->ni_startdir);
894 		nfsvno_relpathbuf(ndp);
895 		if (ndp->ni_dvp == ndp->ni_vp)
896 			vrele(ndp->ni_dvp);
897 		else
898 			vput(ndp->ni_dvp);
899 		vrele(ndp->ni_vp);
900 		return (EEXIST);
901 	}
902 
903 	error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
904 	    &nvap->na_vattr, pathcp);
905 	vput(ndp->ni_dvp);
906 	vrele(ndp->ni_startdir);
907 	nfsvno_relpathbuf(ndp);
908 	/*
909 	 * Although FreeBSD still had the lookup code in
910 	 * it for 7/current, there doesn't seem to be any
911 	 * point, since VOP_SYMLINK() returns the ni_vp.
912 	 * Just vput it for v2.
913 	 */
914 	if (!not_v2 && !error)
915 		vput(ndp->ni_vp);
916 	return (error);
917 }
918 
919 /*
920  * Parse symbolic link arguments.
921  * This function has an ugly side effect. It will MALLOC() an area for
922  * the symlink and set iov_base to point to it, only if it succeeds.
923  * So, if it returns with uiop->uio_iov->iov_base != NULL, that must
924  * be FREE'd later.
925  */
926 int
927 nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap,
928     struct thread *p, char **pathcpp, int *lenp)
929 {
930 	u_int32_t *tl;
931 	char *pathcp = NULL;
932 	int error = 0, len;
933 	struct nfsv2_sattr *sp;
934 
935 	*pathcpp = NULL;
936 	*lenp = 0;
937 	if ((nd->nd_flag & ND_NFSV3) &&
938 	    (error = nfsrv_sattr(nd, nvap, NULL, NULL, p)))
939 		goto nfsmout;
940 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
941 	len = fxdr_unsigned(int, *tl);
942 	if (len > NFS_MAXPATHLEN || len <= 0) {
943 		error = EBADRPC;
944 		goto nfsmout;
945 	}
946 	MALLOC(pathcp, caddr_t, len + 1, M_TEMP, M_WAITOK);
947 	error = nfsrv_mtostr(nd, pathcp, len);
948 	if (error)
949 		goto nfsmout;
950 	if (nd->nd_flag & ND_NFSV2) {
951 		NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
952 		nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode);
953 	}
954 	*pathcpp = pathcp;
955 	*lenp = len;
956 	return (0);
957 nfsmout:
958 	if (pathcp)
959 		free(pathcp, M_TEMP);
960 	return (error);
961 }
962 
963 /*
964  * Remove a non-directory object.
965  */
966 int
967 nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred,
968     struct thread *p, struct nfsexstuff *exp)
969 {
970 	struct vnode *vp;
971 	int error = 0;
972 
973 	vp = ndp->ni_vp;
974 	if (vp->v_type == VDIR)
975 		error = NFSERR_ISDIR;
976 	else if (is_v4)
977 		error = nfsrv_checkremove(vp, 1, p);
978 	if (!error)
979 		error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd);
980 	if (ndp->ni_dvp == vp)
981 		vrele(ndp->ni_dvp);
982 	else
983 		vput(ndp->ni_dvp);
984 	vput(vp);
985 	return (error);
986 }
987 
988 /*
989  * Remove a directory.
990  */
991 int
992 nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred,
993     struct thread *p, struct nfsexstuff *exp)
994 {
995 	struct vnode *vp;
996 	int error = 0;
997 
998 	vp = ndp->ni_vp;
999 	if (vp->v_type != VDIR) {
1000 		error = ENOTDIR;
1001 		goto out;
1002 	}
1003 	/*
1004 	 * No rmdir "." please.
1005 	 */
1006 	if (ndp->ni_dvp == vp) {
1007 		error = EINVAL;
1008 		goto out;
1009 	}
1010 	/*
1011 	 * The root of a mounted filesystem cannot be deleted.
1012 	 */
1013 	if (vp->v_vflag & VV_ROOT)
1014 		error = EBUSY;
1015 out:
1016 	if (!error)
1017 		error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd);
1018 	if (ndp->ni_dvp == vp)
1019 		vrele(ndp->ni_dvp);
1020 	else
1021 		vput(ndp->ni_dvp);
1022 	vput(vp);
1023 	return (error);
1024 }
1025 
1026 /*
1027  * Rename vnode op.
1028  */
1029 int
1030 nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp,
1031     u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p)
1032 {
1033 	struct vnode *fvp, *tvp, *tdvp;
1034 	int error = 0;
1035 
1036 	fvp = fromndp->ni_vp;
1037 	if (ndstat) {
1038 		vrele(fromndp->ni_dvp);
1039 		vrele(fvp);
1040 		error = ndstat;
1041 		goto out1;
1042 	}
1043 	tdvp = tondp->ni_dvp;
1044 	tvp = tondp->ni_vp;
1045 	if (tvp != NULL) {
1046 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
1047 			error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST;
1048 			goto out;
1049 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
1050 			error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST;
1051 			goto out;
1052 		}
1053 		if (tvp->v_type == VDIR && tvp->v_mountedhere) {
1054 			error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
1055 			goto out;
1056 		}
1057 
1058 		/*
1059 		 * A rename to '.' or '..' results in a prematurely
1060 		 * unlocked vnode on FreeBSD5, so I'm just going to fail that
1061 		 * here.
1062 		 */
1063 		if ((tondp->ni_cnd.cn_namelen == 1 &&
1064 		     tondp->ni_cnd.cn_nameptr[0] == '.') ||
1065 		    (tondp->ni_cnd.cn_namelen == 2 &&
1066 		     tondp->ni_cnd.cn_nameptr[0] == '.' &&
1067 		     tondp->ni_cnd.cn_nameptr[1] == '.')) {
1068 			error = EINVAL;
1069 			goto out;
1070 		}
1071 	}
1072 	if (fvp->v_type == VDIR && fvp->v_mountedhere) {
1073 		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
1074 		goto out;
1075 	}
1076 	if (fvp->v_mount != tdvp->v_mount) {
1077 		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
1078 		goto out;
1079 	}
1080 	if (fvp == tdvp) {
1081 		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL;
1082 		goto out;
1083 	}
1084 	if (fvp == tvp) {
1085 		/*
1086 		 * If source and destination are the same, there is nothing to
1087 		 * do. Set error to -1 to indicate this.
1088 		 */
1089 		error = -1;
1090 		goto out;
1091 	}
1092 	if (ndflag & ND_NFSV4) {
1093 		NFSVOPLOCK(fvp, LK_EXCLUSIVE | LK_RETRY, p);
1094 		error = nfsrv_checkremove(fvp, 0, p);
1095 		NFSVOPUNLOCK(fvp, 0, p);
1096 		if (tvp && !error)
1097 			error = nfsrv_checkremove(tvp, 1, p);
1098 	} else {
1099 		/*
1100 		 * For NFSv2 and NFSv3, try to get rid of the delegation, so
1101 		 * that the NFSv4 client won't be confused by the rename.
1102 		 * Since nfsd_recalldelegation() can only be called on an
1103 		 * unlocked vnode at this point and fvp is the file that will
1104 		 * still exist after the rename, just do fvp.
1105 		 */
1106 		nfsd_recalldelegation(fvp, p);
1107 	}
1108 out:
1109 	if (!error) {
1110 		error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp,
1111 		    &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp,
1112 		    &tondp->ni_cnd);
1113 	} else {
1114 		if (tdvp == tvp)
1115 			vrele(tdvp);
1116 		else
1117 			vput(tdvp);
1118 		if (tvp)
1119 			vput(tvp);
1120 		vrele(fromndp->ni_dvp);
1121 		vrele(fvp);
1122 		if (error == -1)
1123 			error = 0;
1124 	}
1125 	vrele(tondp->ni_startdir);
1126 	nfsvno_relpathbuf(tondp);
1127 out1:
1128 	vrele(fromndp->ni_startdir);
1129 	nfsvno_relpathbuf(fromndp);
1130 	return (error);
1131 }
1132 
1133 /*
1134  * Link vnode op.
1135  */
1136 int
1137 nfsvno_link(struct nameidata *ndp, struct vnode *vp, struct ucred *cred,
1138     struct thread *p, struct nfsexstuff *exp)
1139 {
1140 	struct vnode *xp;
1141 	int error = 0;
1142 
1143 	xp = ndp->ni_vp;
1144 	if (xp != NULL) {
1145 		error = EEXIST;
1146 	} else {
1147 		xp = ndp->ni_dvp;
1148 		if (vp->v_mount != xp->v_mount)
1149 			error = EXDEV;
1150 	}
1151 	if (!error) {
1152 		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY, p);
1153 		error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd);
1154 		if (ndp->ni_dvp == vp)
1155 			vrele(ndp->ni_dvp);
1156 		else
1157 			vput(ndp->ni_dvp);
1158 		NFSVOPUNLOCK(vp, 0, p);
1159 	} else {
1160 		if (ndp->ni_dvp == ndp->ni_vp)
1161 			vrele(ndp->ni_dvp);
1162 		else
1163 			vput(ndp->ni_dvp);
1164 		if (ndp->ni_vp)
1165 			vrele(ndp->ni_vp);
1166 	}
1167 	nfsvno_relpathbuf(ndp);
1168 	return (error);
1169 }
1170 
1171 /*
1172  * Do the fsync() appropriate for the commit.
1173  */
1174 int
1175 nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred,
1176     struct thread *td)
1177 {
1178 	int error = 0;
1179 
1180 	if (cnt > MAX_COMMIT_COUNT) {
1181 		/*
1182 		 * Give up and do the whole thing
1183 		 */
1184 		if (vp->v_object &&
1185 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
1186 			VM_OBJECT_LOCK(vp->v_object);
1187 			vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
1188 			VM_OBJECT_UNLOCK(vp->v_object);
1189 		}
1190 		error = VOP_FSYNC(vp, MNT_WAIT, td);
1191 	} else {
1192 		/*
1193 		 * Locate and synchronously write any buffers that fall
1194 		 * into the requested range.  Note:  we are assuming that
1195 		 * f_iosize is a power of 2.
1196 		 */
1197 		int iosize = vp->v_mount->mnt_stat.f_iosize;
1198 		int iomask = iosize - 1;
1199 		struct bufobj *bo;
1200 		daddr_t lblkno;
1201 
1202 		/*
1203 		 * Align to iosize boundry, super-align to page boundry.
1204 		 */
1205 		if (off & iomask) {
1206 			cnt += off & iomask;
1207 			off &= ~(u_quad_t)iomask;
1208 		}
1209 		if (off & PAGE_MASK) {
1210 			cnt += off & PAGE_MASK;
1211 			off &= ~(u_quad_t)PAGE_MASK;
1212 		}
1213 		lblkno = off / iosize;
1214 
1215 		if (vp->v_object &&
1216 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
1217 			VM_OBJECT_LOCK(vp->v_object);
1218 			vm_object_page_clean(vp->v_object, off / PAGE_SIZE, (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC);
1219 			VM_OBJECT_UNLOCK(vp->v_object);
1220 		}
1221 
1222 		bo = &vp->v_bufobj;
1223 		BO_LOCK(bo);
1224 		while (cnt > 0) {
1225 			struct buf *bp;
1226 
1227 			/*
1228 			 * If we have a buffer and it is marked B_DELWRI we
1229 			 * have to lock and write it.  Otherwise the prior
1230 			 * write is assumed to have already been committed.
1231 			 *
1232 			 * gbincore() can return invalid buffers now so we
1233 			 * have to check that bit as well (though B_DELWRI
1234 			 * should not be set if B_INVAL is set there could be
1235 			 * a race here since we haven't locked the buffer).
1236 			 */
1237 			if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) {
1238 				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
1239 				    LK_INTERLOCK, BO_MTX(bo)) == ENOLCK) {
1240 					BO_LOCK(bo);
1241 					continue; /* retry */
1242 				}
1243 			    	if ((bp->b_flags & (B_DELWRI|B_INVAL)) ==
1244 				    B_DELWRI) {
1245 					bremfree(bp);
1246 					bp->b_flags &= ~B_ASYNC;
1247 					bwrite(bp);
1248 					++nfs_commit_miss;
1249 				} else
1250 					BUF_UNLOCK(bp);
1251 				BO_LOCK(bo);
1252 			}
1253 			++nfs_commit_blks;
1254 			if (cnt < iosize)
1255 				break;
1256 			cnt -= iosize;
1257 			++lblkno;
1258 		}
1259 		BO_UNLOCK(bo);
1260 	}
1261 	return (error);
1262 }
1263 
1264 /*
1265  * Statfs vnode op.
1266  */
1267 int
1268 nfsvno_statfs(struct vnode *vp, struct statfs *sf)
1269 {
1270 
1271 	return (VFS_STATFS(vp->v_mount, sf));
1272 }
1273 
1274 /*
1275  * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but
1276  * must handle nfsrv_opencheck() calls after any other access checks.
1277  */
1278 void
1279 nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp,
1280     nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp,
1281     int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create,
1282     NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred, struct thread *p,
1283     struct nfsexstuff *exp, struct vnode **vpp)
1284 {
1285 	struct vnode *vp = NULL;
1286 	u_quad_t tempsize;
1287 	struct nfsexstuff nes;
1288 
1289 	if (ndp->ni_vp == NULL)
1290 		nd->nd_repstat = nfsrv_opencheck(clientid,
1291 		    stateidp, stp, NULL, nd, p, nd->nd_repstat);
1292 	if (!nd->nd_repstat) {
1293 		if (ndp->ni_vp == NULL) {
1294 			vrele(ndp->ni_startdir);
1295 			nd->nd_repstat = VOP_CREATE(ndp->ni_dvp,
1296 			    &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
1297 			vput(ndp->ni_dvp);
1298 			nfsvno_relpathbuf(ndp);
1299 			if (!nd->nd_repstat) {
1300 				if (*exclusive_flagp) {
1301 					*exclusive_flagp = 0;
1302 					NFSVNO_ATTRINIT(nvap);
1303 					nvap->na_atime.tv_sec = cverf[0];
1304 					nvap->na_atime.tv_nsec = cverf[1];
1305 					nd->nd_repstat = VOP_SETATTR(ndp->ni_vp,
1306 					    &nvap->na_vattr, cred);
1307 				} else {
1308 					nfsrv_fixattr(nd, ndp->ni_vp, nvap,
1309 					    aclp, p, attrbitp, exp);
1310 				}
1311 			}
1312 			vp = ndp->ni_vp;
1313 		} else {
1314 			if (ndp->ni_startdir)
1315 				vrele(ndp->ni_startdir);
1316 			nfsvno_relpathbuf(ndp);
1317 			vp = ndp->ni_vp;
1318 			if (create == NFSV4OPEN_CREATE) {
1319 				if (ndp->ni_dvp == vp)
1320 					vrele(ndp->ni_dvp);
1321 				else
1322 					vput(ndp->ni_dvp);
1323 			}
1324 			if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) {
1325 				if (ndp->ni_cnd.cn_flags & RDONLY)
1326 					NFSVNO_SETEXRDONLY(&nes);
1327 				else
1328 					NFSVNO_EXINIT(&nes);
1329 				nd->nd_repstat = nfsvno_accchk(vp,
1330 				    VWRITE, cred, &nes, p,
1331 				    NFSACCCHK_NOOVERRIDE,
1332 				    NFSACCCHK_VPISLOCKED, NULL);
1333 				nd->nd_repstat = nfsrv_opencheck(clientid,
1334 				    stateidp, stp, vp, nd, p, nd->nd_repstat);
1335 				if (!nd->nd_repstat) {
1336 					tempsize = nvap->na_size;
1337 					NFSVNO_ATTRINIT(nvap);
1338 					nvap->na_size = tempsize;
1339 					nd->nd_repstat = VOP_SETATTR(vp,
1340 					    &nvap->na_vattr, cred);
1341 				}
1342 			} else if (vp->v_type == VREG) {
1343 				nd->nd_repstat = nfsrv_opencheck(clientid,
1344 				    stateidp, stp, vp, nd, p, nd->nd_repstat);
1345 			}
1346 		}
1347 	} else {
1348 		if (ndp->ni_cnd.cn_flags & HASBUF)
1349 			nfsvno_relpathbuf(ndp);
1350 		if (ndp->ni_startdir && create == NFSV4OPEN_CREATE) {
1351 			vrele(ndp->ni_startdir);
1352 			if (ndp->ni_dvp == ndp->ni_vp)
1353 				vrele(ndp->ni_dvp);
1354 			else
1355 				vput(ndp->ni_dvp);
1356 			if (ndp->ni_vp)
1357 				vput(ndp->ni_vp);
1358 		}
1359 	}
1360 	*vpp = vp;
1361 }
1362 
1363 /*
1364  * Updates the file rev and sets the mtime and ctime
1365  * to the current clock time, returning the va_filerev and va_Xtime
1366  * values.
1367  */
1368 void
1369 nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap,
1370     struct ucred *cred, struct thread *p)
1371 {
1372 	struct vattr va;
1373 
1374 	VATTR_NULL(&va);
1375 	getnanotime(&va.va_mtime);
1376 	(void) VOP_SETATTR(vp, &va, cred);
1377 	(void) nfsvno_getattr(vp, nvap, cred, p);
1378 }
1379 
1380 /*
1381  * Glue routine to nfsv4_fillattr().
1382  */
1383 int
1384 nfsvno_fillattr(struct nfsrv_descript *nd, struct vnode *vp,
1385     struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp,
1386     struct ucred *cred, struct thread *p, int isdgram, int reterr)
1387 {
1388 	int error;
1389 
1390 	error = nfsv4_fillattr(nd, vp, NULL, &nvap->na_vattr, fhp, rderror,
1391 	    attrbitp, cred, p, isdgram, reterr);
1392 	return (error);
1393 }
1394 
1395 /* Since the Readdir vnode ops vary, put the entire functions in here. */
1396 /*
1397  * nfs readdir service
1398  * - mallocs what it thinks is enough to read
1399  *	count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR
1400  * - calls VOP_READDIR()
1401  * - loops around building the reply
1402  *	if the output generated exceeds count break out of loop
1403  *	The NFSM_CLGET macro is used here so that the reply will be packed
1404  *	tightly in mbuf clusters.
1405  * - it trims out records with d_fileno == 0
1406  *	this doesn't matter for Unix clients, but they might confuse clients
1407  *	for other os'.
1408  * - it trims out records with d_type == DT_WHT
1409  *	these cannot be seen through NFS (unless we extend the protocol)
1410  *     The alternate call nfsrvd_readdirplus() does lookups as well.
1411  * PS: The NFS protocol spec. does not clarify what the "count" byte
1412  *	argument is a count of.. just name strings and file id's or the
1413  *	entire reply rpc or ...
1414  *	I tried just file name and id sizes and it confused the Sun client,
1415  *	so I am using the full rpc size now. The "paranoia.." comment refers
1416  *	to including the status longwords that are not a part of the dir.
1417  *	"entry" structures, but are in the rpc.
1418  */
1419 int
1420 nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram,
1421     struct vnode *vp, struct thread *p, struct nfsexstuff *exp)
1422 {
1423 	struct dirent *dp;
1424 	u_int32_t *tl;
1425 	int dirlen;
1426 	char *cpos, *cend, *rbuf;
1427 	struct nfsvattr at;
1428 	int nlen, error = 0, getret = 1;
1429 	int siz, cnt, fullsiz, eofflag, ncookies;
1430 	u_int64_t off, toff, verf;
1431 	u_long *cookies = NULL, *cookiep;
1432 	struct uio io;
1433 	struct iovec iv;
1434 
1435 	if (nd->nd_repstat) {
1436 		nfsrv_postopattr(nd, getret, &at);
1437 		return (0);
1438 	}
1439 	if (nd->nd_flag & ND_NFSV2) {
1440 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1441 		off = fxdr_unsigned(u_quad_t, *tl++);
1442 	} else {
1443 		NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
1444 		off = fxdr_hyper(tl);
1445 		tl += 2;
1446 		verf = fxdr_hyper(tl);
1447 		tl += 2;
1448 	}
1449 	toff = off;
1450 	cnt = fxdr_unsigned(int, *tl);
1451 	if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0)
1452 		cnt = NFS_SRVMAXDATA(nd);
1453 	siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
1454 	fullsiz = siz;
1455 	if (nd->nd_flag & ND_NFSV3) {
1456 		nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd->nd_cred,
1457 		    p);
1458 #if 0
1459 		/*
1460 		 * va_filerev is not sufficient as a cookie verifier,
1461 		 * since it is not supposed to change when entries are
1462 		 * removed/added unless that offset cookies returned to
1463 		 * the client are no longer valid.
1464 		 */
1465 		if (!nd->nd_repstat && toff && verf != at.na_filerev)
1466 			nd->nd_repstat = NFSERR_BAD_COOKIE;
1467 #endif
1468 	}
1469 	if (nd->nd_repstat == 0 && cnt == 0) {
1470 		if (nd->nd_flag & ND_NFSV2)
1471 			/* NFSv2 does not have NFSERR_TOOSMALL */
1472 			nd->nd_repstat = EPERM;
1473 		else
1474 			nd->nd_repstat = NFSERR_TOOSMALL;
1475 	}
1476 	if (!nd->nd_repstat)
1477 		nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
1478 		    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
1479 		    NFSACCCHK_VPISLOCKED, NULL);
1480 	if (nd->nd_repstat) {
1481 		vput(vp);
1482 		if (nd->nd_flag & ND_NFSV3)
1483 			nfsrv_postopattr(nd, getret, &at);
1484 		return (0);
1485 	}
1486 	NFSVOPUNLOCK(vp, 0, p);
1487 	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
1488 again:
1489 	eofflag = 0;
1490 	if (cookies) {
1491 		free((caddr_t)cookies, M_TEMP);
1492 		cookies = NULL;
1493 	}
1494 
1495 	iv.iov_base = rbuf;
1496 	iv.iov_len = siz;
1497 	io.uio_iov = &iv;
1498 	io.uio_iovcnt = 1;
1499 	io.uio_offset = (off_t)off;
1500 	io.uio_resid = siz;
1501 	io.uio_segflg = UIO_SYSSPACE;
1502 	io.uio_rw = UIO_READ;
1503 	io.uio_td = NULL;
1504 	NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY, p);
1505 	nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
1506 	    &cookies);
1507 	NFSVOPUNLOCK(vp, 0, p);
1508 	off = (u_int64_t)io.uio_offset;
1509 	if (io.uio_resid)
1510 		siz -= io.uio_resid;
1511 
1512 	if (!cookies && !nd->nd_repstat)
1513 		nd->nd_repstat = NFSERR_PERM;
1514 	if (nd->nd_flag & ND_NFSV3) {
1515 		getret = nfsvno_getattr(vp, &at, nd->nd_cred, p);
1516 		if (!nd->nd_repstat)
1517 			nd->nd_repstat = getret;
1518 	}
1519 
1520 	/*
1521 	 * Handles the failed cases. nd->nd_repstat == 0 past here.
1522 	 */
1523 	if (nd->nd_repstat) {
1524 		vrele(vp);
1525 		free((caddr_t)rbuf, M_TEMP);
1526 		if (cookies)
1527 			free((caddr_t)cookies, M_TEMP);
1528 		if (nd->nd_flag & ND_NFSV3)
1529 			nfsrv_postopattr(nd, getret, &at);
1530 		return (0);
1531 	}
1532 	/*
1533 	 * If nothing read, return eof
1534 	 * rpc reply
1535 	 */
1536 	if (siz == 0) {
1537 		vrele(vp);
1538 		if (nd->nd_flag & ND_NFSV2) {
1539 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1540 		} else {
1541 			nfsrv_postopattr(nd, getret, &at);
1542 			NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1543 			txdr_hyper(at.na_filerev, tl);
1544 			tl += 2;
1545 		}
1546 		*tl++ = newnfs_false;
1547 		*tl = newnfs_true;
1548 		FREE((caddr_t)rbuf, M_TEMP);
1549 		FREE((caddr_t)cookies, M_TEMP);
1550 		return (0);
1551 	}
1552 
1553 	/*
1554 	 * Check for degenerate cases of nothing useful read.
1555 	 * If so go try again
1556 	 */
1557 	cpos = rbuf;
1558 	cend = rbuf + siz;
1559 	dp = (struct dirent *)cpos;
1560 	cookiep = cookies;
1561 
1562 	/*
1563 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
1564 	 * directory offset up to a block boundary, so it is necessary to
1565 	 * skip over the records that precede the requested offset. This
1566 	 * requires the assumption that file offset cookies monotonically
1567 	 * increase.
1568 	 */
1569 	while (cpos < cend && ncookies > 0 &&
1570 	    (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
1571 	     ((u_quad_t)(*cookiep)) <= toff)) {
1572 		cpos += dp->d_reclen;
1573 		dp = (struct dirent *)cpos;
1574 		cookiep++;
1575 		ncookies--;
1576 	}
1577 	if (cpos >= cend || ncookies == 0) {
1578 		siz = fullsiz;
1579 		toff = off;
1580 		goto again;
1581 	}
1582 
1583 	/*
1584 	 * dirlen is the size of the reply, including all XDR and must
1585 	 * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate
1586 	 * if the XDR should be included in "count", but to be safe, we do.
1587 	 * (Include the two booleans at the end of the reply in dirlen now.)
1588 	 */
1589 	if (nd->nd_flag & ND_NFSV3) {
1590 		nfsrv_postopattr(nd, getret, &at);
1591 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1592 		txdr_hyper(at.na_filerev, tl);
1593 		dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
1594 	} else {
1595 		dirlen = 2 * NFSX_UNSIGNED;
1596 	}
1597 
1598 	/* Loop through the records and build reply */
1599 	while (cpos < cend && ncookies > 0) {
1600 		nlen = dp->d_namlen;
1601 		if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
1602 			nlen <= NFS_MAXNAMLEN) {
1603 			if (nd->nd_flag & ND_NFSV3)
1604 				dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
1605 			else
1606 				dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
1607 			if (dirlen > cnt) {
1608 				eofflag = 0;
1609 				break;
1610 			}
1611 
1612 			/*
1613 			 * Build the directory record xdr from
1614 			 * the dirent entry.
1615 			 */
1616 			if (nd->nd_flag & ND_NFSV3) {
1617 				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1618 				*tl++ = newnfs_true;
1619 				*tl++ = 0;
1620 			} else {
1621 				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1622 				*tl++ = newnfs_true;
1623 			}
1624 			*tl = txdr_unsigned(dp->d_fileno);
1625 			(void) nfsm_strtom(nd, dp->d_name, nlen);
1626 			if (nd->nd_flag & ND_NFSV3) {
1627 				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1628 				*tl++ = 0;
1629 			} else
1630 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
1631 			*tl = txdr_unsigned(*cookiep);
1632 		}
1633 		cpos += dp->d_reclen;
1634 		dp = (struct dirent *)cpos;
1635 		cookiep++;
1636 		ncookies--;
1637 	}
1638 	if (cpos < cend)
1639 		eofflag = 0;
1640 	vrele(vp);
1641 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1642 	*tl++ = newnfs_false;
1643 	if (eofflag)
1644 		*tl = newnfs_true;
1645 	else
1646 		*tl = newnfs_false;
1647 	FREE((caddr_t)rbuf, M_TEMP);
1648 	FREE((caddr_t)cookies, M_TEMP);
1649 	return (0);
1650 nfsmout:
1651 	vput(vp);
1652 	return (error);
1653 }
1654 
1655 /*
1656  * Readdirplus for V3 and Readdir for V4.
1657  */
1658 int
1659 nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram,
1660     struct vnode *vp, struct thread *p, struct nfsexstuff *exp)
1661 {
1662 	struct dirent *dp;
1663 	u_int32_t *tl;
1664 	int dirlen;
1665 	char *cpos, *cend, *rbuf;
1666 	struct vnode *nvp;
1667 	fhandle_t nfh;
1668 	struct nfsvattr nva, at, *nvap = &nva;
1669 	struct mbuf *mb0, *mb1;
1670 	struct nfsreferral *refp;
1671 	int nlen, r, error = 0, getret = 1, usevget = 1;
1672 	int siz, cnt, fullsiz, eofflag, ncookies, entrycnt;
1673 	caddr_t bpos0, bpos1;
1674 	u_int64_t off, toff, verf;
1675 	u_long *cookies = NULL, *cookiep;
1676 	nfsattrbit_t attrbits, rderrbits, savbits;
1677 	struct uio io;
1678 	struct iovec iv;
1679 	struct componentname cn;
1680 
1681 	if (nd->nd_repstat) {
1682 		nfsrv_postopattr(nd, getret, &at);
1683 		return (0);
1684 	}
1685 	NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
1686 	off = fxdr_hyper(tl);
1687 	toff = off;
1688 	tl += 2;
1689 	verf = fxdr_hyper(tl);
1690 	tl += 2;
1691 	siz = fxdr_unsigned(int, *tl++);
1692 	cnt = fxdr_unsigned(int, *tl);
1693 
1694 	/*
1695 	 * Use the server's maximum data transfer size as the upper bound
1696 	 * on reply datalen.
1697 	 */
1698 	if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0)
1699 		cnt = NFS_SRVMAXDATA(nd);
1700 
1701 	/*
1702 	 * siz is a "hint" of how much directory information (name, fileid,
1703 	 * cookie) should be in the reply. At least one client "hints" 0,
1704 	 * so I set it to cnt for that case. I also round it up to the
1705 	 * next multiple of DIRBLKSIZ.
1706 	 */
1707 	if (siz <= 0)
1708 		siz = cnt;
1709 	siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
1710 
1711 	if (nd->nd_flag & ND_NFSV4) {
1712 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
1713 		if (error)
1714 			goto nfsmout;
1715 		NFSSET_ATTRBIT(&savbits, &attrbits);
1716 		NFSCLRNOTFILLABLE_ATTRBIT(&attrbits);
1717 		NFSZERO_ATTRBIT(&rderrbits);
1718 		NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR);
1719 	} else {
1720 		NFSZERO_ATTRBIT(&attrbits);
1721 	}
1722 	fullsiz = siz;
1723 	nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd->nd_cred, p);
1724 	if (!nd->nd_repstat) {
1725 	    if (off && verf != at.na_filerev) {
1726 		/*
1727 		 * va_filerev is not sufficient as a cookie verifier,
1728 		 * since it is not supposed to change when entries are
1729 		 * removed/added unless that offset cookies returned to
1730 		 * the client are no longer valid.
1731 		 */
1732 #if 0
1733 		if (nd->nd_flag & ND_NFSV4) {
1734 			nd->nd_repstat = NFSERR_NOTSAME;
1735 		} else {
1736 			nd->nd_repstat = NFSERR_BAD_COOKIE;
1737 		}
1738 #endif
1739 	    } else if ((nd->nd_flag & ND_NFSV4) && off == 0 && verf != 0) {
1740 		nd->nd_repstat = NFSERR_BAD_COOKIE;
1741 	    }
1742 	}
1743 	if (!nd->nd_repstat && vp->v_type != VDIR)
1744 		nd->nd_repstat = NFSERR_NOTDIR;
1745 	if (!nd->nd_repstat && cnt == 0)
1746 		nd->nd_repstat = NFSERR_TOOSMALL;
1747 	if (!nd->nd_repstat)
1748 		nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
1749 		    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
1750 		    NFSACCCHK_VPISLOCKED, NULL);
1751 	if (nd->nd_repstat) {
1752 		vput(vp);
1753 		if (nd->nd_flag & ND_NFSV3)
1754 			nfsrv_postopattr(nd, getret, &at);
1755 		return (0);
1756 	}
1757 
1758 	MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
1759 again:
1760 	eofflag = 0;
1761 	if (cookies) {
1762 		free((caddr_t)cookies, M_TEMP);
1763 		cookies = NULL;
1764 	}
1765 
1766 	iv.iov_base = rbuf;
1767 	iv.iov_len = siz;
1768 	io.uio_iov = &iv;
1769 	io.uio_iovcnt = 1;
1770 	io.uio_offset = (off_t)off;
1771 	io.uio_resid = siz;
1772 	io.uio_segflg = UIO_SYSSPACE;
1773 	io.uio_rw = UIO_READ;
1774 	io.uio_td = NULL;
1775 	nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
1776 	    &cookies);
1777 	off = (u_int64_t)io.uio_offset;
1778 	if (io.uio_resid)
1779 		siz -= io.uio_resid;
1780 
1781 	getret = nfsvno_getattr(vp, &at, nd->nd_cred, p);
1782 
1783 	if (!cookies && !nd->nd_repstat)
1784 		nd->nd_repstat = NFSERR_PERM;
1785 	if (!nd->nd_repstat)
1786 		nd->nd_repstat = getret;
1787 	if (nd->nd_repstat) {
1788 		vput(vp);
1789 		if (cookies)
1790 			free((caddr_t)cookies, M_TEMP);
1791 		free((caddr_t)rbuf, M_TEMP);
1792 		if (nd->nd_flag & ND_NFSV3)
1793 			nfsrv_postopattr(nd, getret, &at);
1794 		return (0);
1795 	}
1796 	/*
1797 	 * If nothing read, return eof
1798 	 * rpc reply
1799 	 */
1800 	if (siz == 0) {
1801 		vput(vp);
1802 		if (nd->nd_flag & ND_NFSV3)
1803 			nfsrv_postopattr(nd, getret, &at);
1804 		NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
1805 		txdr_hyper(at.na_filerev, tl);
1806 		tl += 2;
1807 		*tl++ = newnfs_false;
1808 		*tl = newnfs_true;
1809 		free((caddr_t)cookies, M_TEMP);
1810 		free((caddr_t)rbuf, M_TEMP);
1811 		return (0);
1812 	}
1813 
1814 	/*
1815 	 * Check for degenerate cases of nothing useful read.
1816 	 * If so go try again
1817 	 */
1818 	cpos = rbuf;
1819 	cend = rbuf + siz;
1820 	dp = (struct dirent *)cpos;
1821 	cookiep = cookies;
1822 
1823 	/*
1824 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
1825 	 * directory offset up to a block boundary, so it is necessary to
1826 	 * skip over the records that precede the requested offset. This
1827 	 * requires the assumption that file offset cookies monotonically
1828 	 * increase.
1829 	 */
1830 	while (cpos < cend && ncookies > 0 &&
1831 	  (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
1832 	   ((u_quad_t)(*cookiep)) <= toff ||
1833 	   ((nd->nd_flag & ND_NFSV4) &&
1834 	    ((dp->d_namlen == 1 && dp->d_name[0] == '.') ||
1835 	     (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) {
1836 		cpos += dp->d_reclen;
1837 		dp = (struct dirent *)cpos;
1838 		cookiep++;
1839 		ncookies--;
1840 	}
1841 	if (cpos >= cend || ncookies == 0) {
1842 		siz = fullsiz;
1843 		toff = off;
1844 		goto again;
1845 	}
1846 	NFSVOPUNLOCK(vp, 0, p);
1847 
1848 	/*
1849 	 * Save this position, in case there is an error before one entry
1850 	 * is created.
1851 	 */
1852 	mb0 = nd->nd_mb;
1853 	bpos0 = nd->nd_bpos;
1854 
1855 	/*
1856 	 * Fill in the first part of the reply.
1857 	 * dirlen is the reply length in bytes and cannot exceed cnt.
1858 	 * (Include the two booleans at the end of the reply in dirlen now,
1859 	 *  so we recognize when we have exceeded cnt.)
1860 	 */
1861 	if (nd->nd_flag & ND_NFSV3) {
1862 		dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
1863 		nfsrv_postopattr(nd, getret, &at);
1864 	} else {
1865 		dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED;
1866 	}
1867 	NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
1868 	txdr_hyper(at.na_filerev, tl);
1869 
1870 	/*
1871 	 * Save this position, in case there is an empty reply needed.
1872 	 */
1873 	mb1 = nd->nd_mb;
1874 	bpos1 = nd->nd_bpos;
1875 
1876 	/* Loop through the records and build reply */
1877 	entrycnt = 0;
1878 	while (cpos < cend && ncookies > 0 && dirlen < cnt) {
1879 		nlen = dp->d_namlen;
1880 		if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
1881 		    nlen <= NFS_MAXNAMLEN &&
1882 		    ((nd->nd_flag & ND_NFSV3) || nlen > 2 ||
1883 		     (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.'))
1884 		      || (nlen == 1 && dp->d_name[0] != '.'))) {
1885 			/*
1886 			 * Save the current position in the reply, in case
1887 			 * this entry exceeds cnt.
1888 			 */
1889 			mb1 = nd->nd_mb;
1890 			bpos1 = nd->nd_bpos;
1891 
1892 			/*
1893 			 * For readdir_and_lookup get the vnode using
1894 			 * the file number.
1895 			 */
1896 			nvp = NULL;
1897 			refp = NULL;
1898 			r = 0;
1899 			if ((nd->nd_flag & ND_NFSV3) ||
1900 			    NFSNONZERO_ATTRBIT(&savbits)) {
1901 				if (nd->nd_flag & ND_NFSV4)
1902 					refp = nfsv4root_getreferral(NULL,
1903 					    vp, dp->d_fileno);
1904 				if (refp == NULL) {
1905 					if (usevget)
1906 						r = VFS_VGET(vp->v_mount,
1907 						    dp->d_fileno, LK_EXCLUSIVE,
1908 						    &nvp);
1909 					else
1910 						r = EOPNOTSUPP;
1911 					if (r == EOPNOTSUPP) {
1912 						if (usevget) {
1913 							usevget = 0;
1914 							cn.cn_nameiop = LOOKUP;
1915 							cn.cn_lkflags =
1916 							    LK_EXCLUSIVE |
1917 							    LK_RETRY;
1918 							cn.cn_cred =
1919 							    nd->nd_cred;
1920 							cn.cn_thread = p;
1921 						}
1922 						cn.cn_nameptr = dp->d_name;
1923 						cn.cn_namelen = nlen;
1924 						cn.cn_flags = ISLASTCN |
1925 						    NOFOLLOW | LOCKLEAF |
1926 						    MPSAFE;
1927 						if (nlen == 2 &&
1928 						    dp->d_name[0] == '.' &&
1929 						    dp->d_name[1] == '.')
1930 							cn.cn_flags |=
1931 							    ISDOTDOT;
1932 						if (!VOP_ISLOCKED(vp))
1933 							vn_lock(vp,
1934 							    LK_EXCLUSIVE |
1935 							    LK_RETRY);
1936 						r = VOP_LOOKUP(vp, &nvp, &cn);
1937 					}
1938 				}
1939 				if (!r) {
1940 				    if (refp == NULL &&
1941 					((nd->nd_flag & ND_NFSV3) ||
1942 					 NFSNONZERO_ATTRBIT(&attrbits))) {
1943 					r = nfsvno_getfh(nvp, &nfh, p);
1944 					if (!r)
1945 					    r = nfsvno_getattr(nvp, nvap,
1946 						nd->nd_cred, p);
1947 				    }
1948 				} else {
1949 				    nvp = NULL;
1950 				}
1951 				if (r) {
1952 					if (!NFSISSET_ATTRBIT(&attrbits,
1953 					    NFSATTRBIT_RDATTRERROR)) {
1954 						if (nvp != NULL)
1955 							vput(nvp);
1956 						nd->nd_repstat = r;
1957 						break;
1958 					}
1959 				}
1960 			}
1961 
1962 			/*
1963 			 * Build the directory record xdr
1964 			 */
1965 			if (nd->nd_flag & ND_NFSV3) {
1966 				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1967 				*tl++ = newnfs_true;
1968 				*tl++ = 0;
1969 				*tl = txdr_unsigned(dp->d_fileno);
1970 				dirlen += nfsm_strtom(nd, dp->d_name, nlen);
1971 				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
1972 				*tl++ = 0;
1973 				*tl = txdr_unsigned(*cookiep);
1974 				nfsrv_postopattr(nd, 0, nvap);
1975 				dirlen += nfsm_fhtom(nd,(u_int8_t *)&nfh,0,1);
1976 				dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR);
1977 				if (nvp != NULL)
1978 					vput(nvp);
1979 			} else {
1980 				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
1981 				*tl++ = newnfs_true;
1982 				*tl++ = 0;
1983 				*tl = txdr_unsigned(*cookiep);
1984 				dirlen += nfsm_strtom(nd, dp->d_name, nlen);
1985 				if (nvp != NULL)
1986 					NFSVOPUNLOCK(nvp, 0, p);
1987 				if (refp != NULL) {
1988 					dirlen += nfsrv_putreferralattr(nd,
1989 					    &savbits, refp, 0,
1990 					    &nd->nd_repstat);
1991 					if (nd->nd_repstat) {
1992 						if (nvp != NULL)
1993 							vrele(nvp);
1994 						break;
1995 					}
1996 				} else if (r) {
1997 					dirlen += nfsvno_fillattr(nd, nvp, nvap,
1998 					    &nfh, r, &rderrbits, nd->nd_cred,
1999 					    p, isdgram, 0);
2000 				} else {
2001 					dirlen += nfsvno_fillattr(nd, nvp, nvap,
2002 					    &nfh, r, &attrbits, nd->nd_cred,
2003 					    p, isdgram, 0);
2004 				}
2005 				if (nvp != NULL)
2006 					vrele(nvp);
2007 				dirlen += (3 * NFSX_UNSIGNED);
2008 			}
2009 			if (dirlen <= cnt)
2010 				entrycnt++;
2011 		}
2012 		cpos += dp->d_reclen;
2013 		dp = (struct dirent *)cpos;
2014 		cookiep++;
2015 		ncookies--;
2016 	}
2017 	if (!usevget && VOP_ISLOCKED(vp))
2018 		vput(vp);
2019 	else
2020 		vrele(vp);
2021 
2022 	/*
2023 	 * If dirlen > cnt, we must strip off the last entry. If that
2024 	 * results in an empty reply, report NFSERR_TOOSMALL.
2025 	 */
2026 	if (dirlen > cnt || nd->nd_repstat) {
2027 		if (!nd->nd_repstat && entrycnt == 0)
2028 			nd->nd_repstat = NFSERR_TOOSMALL;
2029 		if (nd->nd_repstat)
2030 			newnfs_trimtrailing(nd, mb0, bpos0);
2031 		else
2032 			newnfs_trimtrailing(nd, mb1, bpos1);
2033 		eofflag = 0;
2034 	} else if (cpos < cend)
2035 		eofflag = 0;
2036 	if (!nd->nd_repstat) {
2037 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2038 		*tl++ = newnfs_false;
2039 		if (eofflag)
2040 			*tl = newnfs_true;
2041 		else
2042 			*tl = newnfs_false;
2043 	}
2044 	FREE((caddr_t)cookies, M_TEMP);
2045 	FREE((caddr_t)rbuf, M_TEMP);
2046 	return (0);
2047 nfsmout:
2048 	vput(vp);
2049 	return (error);
2050 }
2051 
2052 /*
2053  * Get the settable attributes out of the mbuf list.
2054  * (Return 0 or EBADRPC)
2055  */
2056 int
2057 nfsrv_sattr(struct nfsrv_descript *nd, struct nfsvattr *nvap,
2058     nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
2059 {
2060 	u_int32_t *tl;
2061 	struct nfsv2_sattr *sp;
2062 	struct timeval curtime;
2063 	int error = 0, toclient = 0;
2064 
2065 	switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) {
2066 	case ND_NFSV2:
2067 		NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
2068 		/*
2069 		 * Some old clients didn't fill in the high order 16bits.
2070 		 * --> check the low order 2 bytes for 0xffff
2071 		 */
2072 		if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
2073 			nvap->na_mode = nfstov_mode(sp->sa_mode);
2074 		if (sp->sa_uid != newnfs_xdrneg1)
2075 			nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid);
2076 		if (sp->sa_gid != newnfs_xdrneg1)
2077 			nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid);
2078 		if (sp->sa_size != newnfs_xdrneg1)
2079 			nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size);
2080 		if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) {
2081 #ifdef notyet
2082 			fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime);
2083 #else
2084 			nvap->na_atime.tv_sec =
2085 				fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec);
2086 			nvap->na_atime.tv_nsec = 0;
2087 #endif
2088 		}
2089 		if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1)
2090 			fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime);
2091 		break;
2092 	case ND_NFSV3:
2093 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2094 		if (*tl == newnfs_true) {
2095 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2096 			nvap->na_mode = nfstov_mode(*tl);
2097 		}
2098 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2099 		if (*tl == newnfs_true) {
2100 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2101 			nvap->na_uid = fxdr_unsigned(uid_t, *tl);
2102 		}
2103 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2104 		if (*tl == newnfs_true) {
2105 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2106 			nvap->na_gid = fxdr_unsigned(gid_t, *tl);
2107 		}
2108 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2109 		if (*tl == newnfs_true) {
2110 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2111 			nvap->na_size = fxdr_hyper(tl);
2112 		}
2113 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2114 		switch (fxdr_unsigned(int, *tl)) {
2115 		case NFSV3SATTRTIME_TOCLIENT:
2116 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2117 			fxdr_nfsv3time(tl, &nvap->na_atime);
2118 			toclient = 1;
2119 			break;
2120 		case NFSV3SATTRTIME_TOSERVER:
2121 			NFSGETTIME(&curtime);
2122 			nvap->na_atime.tv_sec = curtime.tv_sec;
2123 			nvap->na_atime.tv_nsec = curtime.tv_usec * 1000;
2124 			nvap->na_vaflags |= VA_UTIMES_NULL;
2125 			break;
2126 		};
2127 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2128 		switch (fxdr_unsigned(int, *tl)) {
2129 		case NFSV3SATTRTIME_TOCLIENT:
2130 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
2131 			fxdr_nfsv3time(tl, &nvap->na_mtime);
2132 			nvap->na_vaflags &= ~VA_UTIMES_NULL;
2133 			break;
2134 		case NFSV3SATTRTIME_TOSERVER:
2135 			NFSGETTIME(&curtime);
2136 			nvap->na_mtime.tv_sec = curtime.tv_sec;
2137 			nvap->na_mtime.tv_nsec = curtime.tv_usec * 1000;
2138 			if (!toclient)
2139 				nvap->na_vaflags |= VA_UTIMES_NULL;
2140 			break;
2141 		};
2142 		break;
2143 	case ND_NFSV4:
2144 		error = nfsv4_sattr(nd, nvap, attrbitp, aclp, p);
2145 	};
2146 nfsmout:
2147 	return (error);
2148 }
2149 
2150 /*
2151  * Handle the setable attributes for V4.
2152  * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise.
2153  */
2154 int
2155 nfsv4_sattr(struct nfsrv_descript *nd, struct nfsvattr *nvap,
2156     nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
2157 {
2158 	u_int32_t *tl;
2159 	int attrsum = 0;
2160 	int i, j;
2161 	int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0;
2162 	int toclient = 0;
2163 	u_char *cp, namestr[NFSV4_SMALLSTR + 1];
2164 	uid_t uid;
2165 	gid_t gid;
2166 	struct timeval curtime;
2167 
2168 	error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup);
2169 	if (error)
2170 		return (error);
2171 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2172 	attrsize = fxdr_unsigned(int, *tl);
2173 
2174 	/*
2175 	 * Loop around getting the setable attributes. If an unsupported
2176 	 * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return.
2177 	 */
2178 	if (retnotsup) {
2179 		nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2180 		bitpos = NFSATTRBIT_MAX;
2181 	} else {
2182 		bitpos = 0;
2183 	}
2184 	for (; bitpos < NFSATTRBIT_MAX; bitpos++) {
2185 	    if (attrsum > attrsize) {
2186 		error = NFSERR_BADXDR;
2187 		goto nfsmout;
2188 	    }
2189 	    if (NFSISSET_ATTRBIT(attrbitp, bitpos))
2190 		switch (bitpos) {
2191 		case NFSATTRBIT_SIZE:
2192 			NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
2193 			nvap->na_size = fxdr_hyper(tl);
2194 			attrsum += NFSX_HYPER;
2195 			break;
2196 		case NFSATTRBIT_ACL:
2197 			error = nfsrv_dissectacl(nd, aclp, &aceerr, &aclsize,
2198 			    p);
2199 			if (error)
2200 				goto nfsmout;
2201 			if (aceerr && !nd->nd_repstat)
2202 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2203 			attrsum += aclsize;
2204 			break;
2205 		case NFSATTRBIT_ARCHIVE:
2206 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2207 			if (!nd->nd_repstat)
2208 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2209 			attrsum += NFSX_UNSIGNED;
2210 			break;
2211 		case NFSATTRBIT_HIDDEN:
2212 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2213 			if (!nd->nd_repstat)
2214 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2215 			attrsum += NFSX_UNSIGNED;
2216 			break;
2217 		case NFSATTRBIT_MIMETYPE:
2218 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2219 			i = fxdr_unsigned(int, *tl);
2220 			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
2221 			if (error)
2222 				goto nfsmout;
2223 			if (!nd->nd_repstat)
2224 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2225 			attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i));
2226 			break;
2227 		case NFSATTRBIT_MODE:
2228 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2229 			nvap->na_mode = nfstov_mode(*tl);
2230 			attrsum += NFSX_UNSIGNED;
2231 			break;
2232 		case NFSATTRBIT_OWNER:
2233 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2234 			j = fxdr_unsigned(int, *tl);
2235 			if (j < 0)
2236 				return (NFSERR_BADXDR);
2237 			if (j > NFSV4_SMALLSTR)
2238 				cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
2239 			else
2240 				cp = namestr;
2241 			error = nfsrv_mtostr(nd, cp, j);
2242 			if (error) {
2243 				if (j > NFSV4_SMALLSTR)
2244 					free(cp, M_NFSSTRING);
2245 				return (error);
2246 			}
2247 			if (!nd->nd_repstat) {
2248 				nd->nd_repstat = nfsv4_strtouid(cp,j,&uid,p);
2249 				if (!nd->nd_repstat)
2250 					nvap->na_uid = uid;
2251 			}
2252 			if (j > NFSV4_SMALLSTR)
2253 				free(cp, M_NFSSTRING);
2254 			attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
2255 			break;
2256 		case NFSATTRBIT_OWNERGROUP:
2257 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2258 			j = fxdr_unsigned(int, *tl);
2259 			if (j < 0)
2260 				return (NFSERR_BADXDR);
2261 			if (j > NFSV4_SMALLSTR)
2262 				cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
2263 			else
2264 				cp = namestr;
2265 			error = nfsrv_mtostr(nd, cp, j);
2266 			if (error) {
2267 				if (j > NFSV4_SMALLSTR)
2268 					free(cp, M_NFSSTRING);
2269 				return (error);
2270 			}
2271 			if (!nd->nd_repstat) {
2272 				nd->nd_repstat = nfsv4_strtogid(cp,j,&gid,p);
2273 				if (!nd->nd_repstat)
2274 					nvap->na_gid = gid;
2275 			}
2276 			if (j > NFSV4_SMALLSTR)
2277 				free(cp, M_NFSSTRING);
2278 			attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
2279 			break;
2280 		case NFSATTRBIT_SYSTEM:
2281 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2282 			if (!nd->nd_repstat)
2283 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2284 			attrsum += NFSX_UNSIGNED;
2285 			break;
2286 		case NFSATTRBIT_TIMEACCESSSET:
2287 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2288 			attrsum += NFSX_UNSIGNED;
2289 			if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
2290 			    NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
2291 			    fxdr_nfsv4time(tl, &nvap->na_atime);
2292 			    toclient = 1;
2293 			    attrsum += NFSX_V4TIME;
2294 			} else {
2295 			    NFSGETTIME(&curtime);
2296 			    nvap->na_atime.tv_sec = curtime.tv_sec;
2297 			    nvap->na_atime.tv_nsec = curtime.tv_usec * 1000;
2298 			    nvap->na_vaflags |= VA_UTIMES_NULL;
2299 			}
2300 			break;
2301 		case NFSATTRBIT_TIMEBACKUP:
2302 			NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
2303 			if (!nd->nd_repstat)
2304 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2305 			attrsum += NFSX_V4TIME;
2306 			break;
2307 		case NFSATTRBIT_TIMECREATE:
2308 			NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
2309 			if (!nd->nd_repstat)
2310 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2311 			attrsum += NFSX_V4TIME;
2312 			break;
2313 		case NFSATTRBIT_TIMEMODIFYSET:
2314 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
2315 			attrsum += NFSX_UNSIGNED;
2316 			if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
2317 			    NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
2318 			    fxdr_nfsv4time(tl, &nvap->na_mtime);
2319 			    nvap->na_vaflags &= ~VA_UTIMES_NULL;
2320 			    attrsum += NFSX_V4TIME;
2321 			} else {
2322 			    NFSGETTIME(&curtime);
2323 			    nvap->na_mtime.tv_sec = curtime.tv_sec;
2324 			    nvap->na_mtime.tv_nsec = curtime.tv_usec * 1000;
2325 			    if (!toclient)
2326 				nvap->na_vaflags |= VA_UTIMES_NULL;
2327 			}
2328 			break;
2329 		default:
2330 			nd->nd_repstat = NFSERR_ATTRNOTSUPP;
2331 			/*
2332 			 * set bitpos so we drop out of the loop.
2333 			 */
2334 			bitpos = NFSATTRBIT_MAX;
2335 			break;
2336 		};
2337 	}
2338 
2339 	/*
2340 	 * some clients pad the attrlist, so we need to skip over the
2341 	 * padding.
2342 	 */
2343 	if (attrsum > attrsize) {
2344 		error = NFSERR_BADXDR;
2345 	} else {
2346 		attrsize = NFSM_RNDUP(attrsize);
2347 		if (attrsum < attrsize)
2348 			error = nfsm_advance(nd, attrsize - attrsum, -1);
2349 	}
2350 nfsmout:
2351 	return (error);
2352 }
2353 
2354 /*
2355  * Check/setup export credentials.
2356  */
2357 int
2358 nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp,
2359     struct ucred *credanon)
2360 {
2361 	int error = 0;
2362 
2363 	/*
2364 	 * Check/setup credentials.
2365 	 */
2366 	if (nd->nd_flag & ND_GSS)
2367 		exp->nes_exflag &= ~MNT_EXPORTANON;
2368 
2369 	/*
2370 	 * Check to see if the operation is allowed for this security flavor.
2371 	 * RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to
2372 	 * AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS.
2373 	 * Also, allow Secinfo, so that it can acquire the correct flavor(s).
2374 	 */
2375 	if (nfsvno_testexp(nd, exp) &&
2376 	    nd->nd_procnum != NFSV4OP_SECINFO &&
2377 	    nd->nd_procnum != NFSPROC_FSINFO) {
2378 		if (nd->nd_flag & ND_NFSV4)
2379 			error = NFSERR_WRONGSEC;
2380 		else
2381 			error = (NFSERR_AUTHERR | AUTH_TOOWEAK);
2382 		return (error);
2383 	}
2384 
2385 	/*
2386 	 * Check to see if the file system is exported V4 only.
2387 	 */
2388 	if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4))
2389 		return (NFSERR_PROGNOTV4);
2390 
2391 	/*
2392 	 * Now, map the user credentials.
2393 	 * (Note that ND_AUTHNONE will only be set for an NFSv3
2394 	 *  Fsinfo RPC. If set for anything else, this code might need
2395 	 *  to change.)
2396 	 */
2397 	if (NFSVNO_EXPORTED(exp) &&
2398 	    ((!(nd->nd_flag & ND_GSS) && nd->nd_cred->cr_uid == 0) ||
2399 	     NFSVNO_EXPORTANON(exp) ||
2400 	     (nd->nd_flag & ND_AUTHNONE))) {
2401 		nd->nd_cred->cr_uid = credanon->cr_uid;
2402 		nd->nd_cred->cr_gid = credanon->cr_gid;
2403 		crsetgroups(nd->nd_cred, credanon->cr_ngroups,
2404 		    credanon->cr_groups);
2405 	}
2406 	return (0);
2407 }
2408 
2409 /*
2410  * Check exports.
2411  */
2412 int
2413 nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp,
2414     struct ucred **credp)
2415 {
2416 	int i, error, *secflavors;
2417 
2418 	error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
2419 	    &exp->nes_numsecflavor, &secflavors);
2420 	if (error) {
2421 		if (nfs_rootfhset) {
2422 			exp->nes_exflag = 0;
2423 			exp->nes_numsecflavor = 0;
2424 			error = 0;
2425 		}
2426 	} else {
2427 		/* Copy the security flavors. */
2428 		for (i = 0; i < exp->nes_numsecflavor; i++)
2429 			exp->nes_secflavors[i] = secflavors[i];
2430 	}
2431 	return (error);
2432 }
2433 
2434 /*
2435  * Get a vnode for a file handle and export stuff.
2436  */
2437 int
2438 nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam,
2439     struct vnode **vpp, struct nfsexstuff *exp, struct ucred **credp)
2440 {
2441 	int i, error, *secflavors;
2442 
2443 	*credp = NULL;
2444 	exp->nes_numsecflavor = 0;
2445 	error = VFS_FHTOVP(mp, &fhp->fh_fid, vpp);
2446 	if (error != 0)
2447 		/* Make sure the server replies ESTALE to the client. */
2448 		error = ESTALE;
2449 	if (nam && !error) {
2450 		error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
2451 		    &exp->nes_numsecflavor, &secflavors);
2452 		if (error) {
2453 			if (nfs_rootfhset) {
2454 				exp->nes_exflag = 0;
2455 				exp->nes_numsecflavor = 0;
2456 				error = 0;
2457 			} else {
2458 				vput(*vpp);
2459 			}
2460 		} else {
2461 			/* Copy the security flavors. */
2462 			for (i = 0; i < exp->nes_numsecflavor; i++)
2463 				exp->nes_secflavors[i] = secflavors[i];
2464 		}
2465 	}
2466 	return (error);
2467 }
2468 
2469 /*
2470  * Do the pathconf vnode op.
2471  */
2472 int
2473 nfsvno_pathconf(struct vnode *vp, int flag, register_t *retf,
2474     struct ucred *cred, struct thread *p)
2475 {
2476 	int error;
2477 
2478 	error = VOP_PATHCONF(vp, flag, retf);
2479 	return (error);
2480 }
2481 
2482 /*
2483  * nfsd_fhtovp() - convert a fh to a vnode ptr
2484  * 	- look up fsid in mount list (if not found ret error)
2485  *	- get vp and export rights by calling nfsvno_fhtovp()
2486  *	- if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
2487  *	  for AUTH_SYS
2488  * Also handle getting the Giant lock for the file system,
2489  * as required:
2490  * - if same mount point as *mpp
2491  *       do nothing
2492  *   else if *mpp == NULL
2493  *       if already locked
2494  *           leave it locked
2495  *       else
2496  *           call VFS_LOCK_GIANT()
2497  *   else
2498  *       if already locked
2499  *            unlock Giant
2500  *       call VFS_LOCK_GIANT()
2501  */
2502 void
2503 nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp,
2504     struct vnode **vpp, struct nfsexstuff *exp,
2505     struct mount **mpp, int startwrite, struct thread *p)
2506 {
2507 	struct mount *mp;
2508 	struct ucred *credanon;
2509 	fhandle_t *fhp;
2510 
2511 	fhp = (fhandle_t *)nfp->nfsrvfh_data;
2512 	/*
2513 	 * Check for the special case of the nfsv4root_fh.
2514 	 */
2515 	mp = vfs_getvfs(&fhp->fh_fsid);
2516 	if (!mp) {
2517 		*vpp = NULL;
2518 		nd->nd_repstat = ESTALE;
2519 		if (*mpp && exp->nes_vfslocked)
2520 			VFS_UNLOCK_GIANT(*mpp);
2521 		*mpp = NULL;
2522 		exp->nes_vfslocked = 0;
2523 		return;
2524 	}
2525 
2526 	/*
2527 	 * Now, handle Giant for the file system.
2528 	 */
2529 	if (*mpp != NULL && *mpp != mp && exp->nes_vfslocked) {
2530 		VFS_UNLOCK_GIANT(*mpp);
2531 		exp->nes_vfslocked = 0;
2532 	}
2533 	if (!exp->nes_vfslocked && *mpp != mp)
2534 		exp->nes_vfslocked = VFS_LOCK_GIANT(mp);
2535 
2536 	*mpp = mp;
2537 	if (startwrite)
2538 		vn_start_write(NULL, mpp, V_WAIT);
2539 
2540 	nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, vpp, exp,
2541 	    &credanon);
2542 
2543 	/*
2544 	 * For NFSv4 without a pseudo root fs, unexported file handles
2545 	 * can be returned, so that Lookup works everywhere.
2546 	 */
2547 	if (!nd->nd_repstat && exp->nes_exflag == 0 &&
2548 	    !(nd->nd_flag & ND_NFSV4)) {
2549 		vput(*vpp);
2550 		nd->nd_repstat = EACCES;
2551 	}
2552 
2553 	/*
2554 	 * Personally, I've never seen any point in requiring a
2555 	 * reserved port#, since only in the rare case where the
2556 	 * clients are all boxes with secure system priviledges,
2557 	 * does it provide any enhanced security, but... some people
2558 	 * believe it to be useful and keep putting this code back in.
2559 	 * (There is also some "security checker" out there that
2560 	 *  complains if the nfs server doesn't enforce this.)
2561 	 * However, note the following:
2562 	 * RFC3530 (NFSv4) specifies that a reserved port# not be
2563 	 *	required.
2564 	 * RFC2623 recommends that, if a reserved port# is checked for,
2565 	 *	that there be a way to turn that off--> ifdef'd.
2566 	 */
2567 #ifdef NFS_REQRSVPORT
2568 	if (!nd->nd_repstat) {
2569 		struct sockaddr_in *saddr;
2570 		struct sockaddr_in6 *saddr6;
2571 
2572 		saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
2573 		saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *);
2574 		if (!(nd->nd_flag & ND_NFSV4) &&
2575 		    ((saddr->sin_family == AF_INET &&
2576 		      ntohs(saddr->sin_port) >= IPPORT_RESERVED) ||
2577 		     (saddr6->sin6_family == AF_INET6 &&
2578 		      ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) {
2579 			vput(*vpp);
2580 			nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
2581 		}
2582 	}
2583 #endif	/* NFS_REQRSVPORT */
2584 
2585 	/*
2586 	 * Check/setup credentials.
2587 	 */
2588 	if (!nd->nd_repstat) {
2589 		nd->nd_saveduid = nd->nd_cred->cr_uid;
2590 		nd->nd_repstat = nfsd_excred(nd, exp, credanon);
2591 		if (nd->nd_repstat)
2592 			vput(*vpp);
2593 	}
2594 	if (credanon != NULL)
2595 		crfree(credanon);
2596 	if (nd->nd_repstat) {
2597 		if (startwrite)
2598 			vn_finished_write(mp);
2599 		if (exp->nes_vfslocked) {
2600 			VFS_UNLOCK_GIANT(mp);
2601 			exp->nes_vfslocked = 0;
2602 		}
2603 		vfs_rel(mp);
2604 		*vpp = NULL;
2605 		*mpp = NULL;
2606 	} else {
2607 		vfs_rel(mp);
2608 	}
2609 }
2610 
2611 /*
2612  * glue for fp.
2613  */
2614 int
2615 fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp)
2616 {
2617 	struct filedesc *fdp;
2618 	struct file *fp;
2619 
2620 	fdp = p->td_proc->p_fd;
2621 	if (fd >= fdp->fd_nfiles ||
2622 	    (fp = fdp->fd_ofiles[fd]) == NULL)
2623 		return (EBADF);
2624 	*fpp = fp;
2625 	return (0);
2626 }
2627 
2628 /*
2629  * Called from nfssvc() to update the exports list. Just call
2630  * vfs_export(). This has to be done, since the v4 root fake fs isn't
2631  * in the mount list.
2632  */
2633 int
2634 nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p)
2635 {
2636 	struct nfsex_args *nfsexargp = (struct nfsex_args *)argp;
2637 	int error;
2638 	struct nameidata nd;
2639 	fhandle_t fh;
2640 
2641 	error = vfs_export(&nfsv4root_mnt, &nfsexargp->export);
2642 	if ((nfsexargp->export.ex_flags & MNT_DELEXPORT)) {
2643 		nfs_rootfhset = 0;
2644 		nfsv4root_set = 0;
2645 	} else if (error == 0) {
2646 		if (nfsexargp->fspec == NULL)
2647 			return (EPERM);
2648 		/*
2649 		 * If fspec != NULL, this is the v4root path.
2650 		 */
2651 		NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_USERSPACE,
2652 		    nfsexargp->fspec, p);
2653 		if ((error = namei(&nd)) != 0)
2654 			return (error);
2655 		error = nfsvno_getfh(nd.ni_vp, &fh, p);
2656 		vrele(nd.ni_vp);
2657 		if (!error) {
2658 			nfs_rootfh.nfsrvfh_len = NFSX_MYFH;
2659 			NFSBCOPY((caddr_t)&fh,
2660 			    nfs_rootfh.nfsrvfh_data,
2661 			    sizeof (fhandle_t));
2662 			nfs_rootfhset = 1;
2663 		}
2664 	}
2665 	return (error);
2666 }
2667 
2668 /*
2669  * Get the tcp socket sequence numbers we need.
2670  * (Maybe this should be moved to the tcp sources?)
2671  */
2672 int
2673 nfsrv_getsocksndseq(struct socket *so, tcp_seq *maxp, tcp_seq *unap)
2674 {
2675 	struct inpcb *inp;
2676 	struct tcpcb *tp;
2677 
2678 	inp = sotoinpcb(so);
2679 	KASSERT(inp != NULL, ("nfsrv_getsocksndseq: inp == NULL"));
2680 	INP_RLOCK(inp);
2681 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
2682 		INP_RUNLOCK(inp);
2683 		return (EPIPE);
2684 	}
2685 	tp = intotcpcb(inp);
2686 	if (tp->t_state != TCPS_ESTABLISHED) {
2687 		INP_RUNLOCK(inp);
2688 		return (EPIPE);
2689 	}
2690 	*maxp = tp->snd_max;
2691 	*unap = tp->snd_una;
2692 	INP_RUNLOCK(inp);
2693 	return (0);
2694 }
2695 
2696 /*
2697  * This function needs to test to see if the system is near its limit
2698  * for memory allocation via malloc() or mget() and return True iff
2699  * either of these resources are near their limit.
2700  * XXX (For now, this is just a stub.)
2701  */
2702 int nfsrv_testmalloclimit = 0;
2703 int
2704 nfsrv_mallocmget_limit(void)
2705 {
2706 	static int printmesg = 0;
2707 	static int testval = 1;
2708 
2709 	if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) {
2710 		if ((printmesg++ % 100) == 0)
2711 			printf("nfsd: malloc/mget near limit\n");
2712 		return (1);
2713 	}
2714 	return (0);
2715 }
2716 
2717 /*
2718  * BSD specific initialization of a mount point.
2719  */
2720 void
2721 nfsd_mntinit(void)
2722 {
2723 	static int inited = 0;
2724 
2725 	if (inited)
2726 		return;
2727 	inited = 1;
2728 	nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED);
2729 	TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist);
2730 	nfsv4root_mnt.mnt_export = NULL;
2731 	TAILQ_INIT(&nfsv4root_opt);
2732 	TAILQ_INIT(&nfsv4root_newopt);
2733 	nfsv4root_mnt.mnt_opt = &nfsv4root_opt;
2734 	nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt;
2735 	nfsv4root_mnt.mnt_nvnodelistsize = 0;
2736 }
2737 
2738 /*
2739  * Get a vnode for a file handle, without checking exports, etc.
2740  */
2741 struct vnode *
2742 nfsvno_getvp(fhandle_t *fhp)
2743 {
2744 	struct mount *mp;
2745 	struct vnode *vp;
2746 	int error;
2747 
2748 	mp = vfs_getvfs(&fhp->fh_fsid);
2749 	if (mp == NULL)
2750 		return (NULL);
2751 	error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp);
2752 	if (error)
2753 		return (NULL);
2754 	return (vp);
2755 }
2756 
2757 /*
2758  * Check to see it a byte range lock held by a process running
2759  * locally on the server conflicts with the new lock.
2760  */
2761 int
2762 nfsvno_localconflict(struct vnode *vp, int ftype, u_int64_t first,
2763     u_int64_t end, struct nfslockconflict *cfp, struct thread *td)
2764 {
2765 	int error;
2766 	struct flock fl;
2767 
2768 	if (!nfsrv_dolocallocks)
2769 		return (0);
2770 	fl.l_whence = SEEK_SET;
2771 	fl.l_type = ftype;
2772 	fl.l_start = (off_t)first;
2773 	if (end == NFS64BITSSET)
2774 		fl.l_len = 0;
2775 	else
2776 		fl.l_len = (off_t)(end - first);
2777 	/*
2778 	 * For FreeBSD8, the l_pid and l_sysid must be set to the same
2779 	 * values for all calls, so that all locks will be held by the
2780 	 * nfsd server. (The nfsd server handles conflicts between the
2781 	 * various clients.)
2782 	 * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024
2783 	 * bytes, so it can't be put in l_sysid.
2784 	 */
2785 	if (nfsv4_sysid == 0)
2786 		nfsv4_sysid = nlm_acquire_next_sysid();
2787 	fl.l_pid = (pid_t)0;
2788 	fl.l_sysid = (int)nfsv4_sysid;
2789 
2790 	NFSVOPUNLOCK(vp, 0, td);
2791 	error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_GETLK, &fl,
2792 	    (F_POSIX | F_REMOTE));
2793 	NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY, td);
2794 	if (error)
2795 		return (error);
2796 	if (fl.l_type == F_UNLCK)
2797 		return (0);
2798 	if (cfp != NULL) {
2799 		cfp->cl_clientid.lval[0] = cfp->cl_clientid.lval[1] = 0;
2800 		cfp->cl_first = (u_int64_t)fl.l_start;
2801 		if (fl.l_len == 0)
2802 			cfp->cl_end = NFS64BITSSET;
2803 		else
2804 			cfp->cl_end = (u_int64_t)
2805 			    (fl.l_start + fl.l_len);
2806 		if (fl.l_type == F_WRLCK)
2807 			cfp->cl_flags = NFSLCK_WRITE;
2808 		else
2809 			cfp->cl_flags = NFSLCK_READ;
2810 		sprintf(cfp->cl_owner, "LOCALID%d", fl.l_pid);
2811 		cfp->cl_ownerlen = strlen(cfp->cl_owner);
2812 		return (NFSERR_DENIED);
2813 	}
2814 	return (NFSERR_INVAL);
2815 }
2816 
2817 /*
2818  * Do a local VOP_ADVLOCK().
2819  */
2820 int
2821 nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first,
2822     u_int64_t end, struct thread *td)
2823 {
2824 	int error;
2825 	struct flock fl;
2826 	u_int64_t tlen;
2827 
2828 	if (!nfsrv_dolocallocks)
2829 		return (0);
2830 	fl.l_whence = SEEK_SET;
2831 	fl.l_type = ftype;
2832 	fl.l_start = (off_t)first;
2833 	if (end == NFS64BITSSET) {
2834 		fl.l_len = 0;
2835 	} else {
2836 		tlen = end - first;
2837 		fl.l_len = (off_t)tlen;
2838 	}
2839 	/*
2840 	 * For FreeBSD8, the l_pid and l_sysid must be set to the same
2841 	 * values for all calls, so that all locks will be held by the
2842 	 * nfsd server. (The nfsd server handles conflicts between the
2843 	 * various clients.)
2844 	 * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024
2845 	 * bytes, so it can't be put in l_sysid.
2846 	 */
2847 	if (nfsv4_sysid == 0)
2848 		nfsv4_sysid = nlm_acquire_next_sysid();
2849 	fl.l_pid = (pid_t)0;
2850 	fl.l_sysid = (int)nfsv4_sysid;
2851 
2852 	NFSVOPUNLOCK(vp, 0, td);
2853 	error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl,
2854 	    (F_POSIX | F_REMOTE));
2855 	NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY, td);
2856 	return (error);
2857 }
2858 
2859 /*
2860  * Unlock an underlying local file system.
2861  */
2862 void
2863 nfsvno_unlockvfs(struct mount *mp)
2864 {
2865 
2866 	VFS_UNLOCK_GIANT(mp);
2867 }
2868 
2869 /*
2870  * Lock an underlying file system, as required, and return
2871  * whether or not it is locked.
2872  */
2873 int
2874 nfsvno_lockvfs(struct mount *mp)
2875 {
2876 	int ret;
2877 
2878 	ret = VFS_LOCK_GIANT(mp);
2879 	return (ret);
2880 }
2881 
2882 /*
2883  * Check the nfsv4 root exports.
2884  */
2885 int
2886 nfsvno_v4rootexport(struct nfsrv_descript *nd)
2887 {
2888 	struct ucred *credanon;
2889 	int exflags, error, numsecflavor, *secflavors, i;
2890 
2891 	error = vfs_stdcheckexp(&nfsv4root_mnt, nd->nd_nam, &exflags,
2892 	    &credanon, &numsecflavor, &secflavors);
2893 	if (error)
2894 		return (NFSERR_PROGUNAVAIL);
2895 	if (credanon != NULL)
2896 		crfree(credanon);
2897 	for (i = 0; i < numsecflavor; i++) {
2898 		if (secflavors[i] == AUTH_SYS)
2899 			nd->nd_flag |= ND_EXAUTHSYS;
2900 		else if (secflavors[i] == RPCSEC_GSS_KRB5)
2901 			nd->nd_flag |= ND_EXGSS;
2902 		else if (secflavors[i] == RPCSEC_GSS_KRB5I)
2903 			nd->nd_flag |= ND_EXGSSINTEGRITY;
2904 		else if (secflavors[i] == RPCSEC_GSS_KRB5P)
2905 			nd->nd_flag |= ND_EXGSSPRIVACY;
2906 	}
2907 	return (0);
2908 }
2909 
2910 /*
2911  * Nfs server psuedo system call for the nfsd's
2912  */
2913 /*
2914  * MPSAFE
2915  */
2916 static int
2917 nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap)
2918 {
2919 	struct file *fp;
2920 	struct nfsd_addsock_args sockarg;
2921 	struct nfsd_nfsd_args nfsdarg;
2922 	int error;
2923 
2924 	if (uap->flag & NFSSVC_NFSDADDSOCK) {
2925 		error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg));
2926 		if (error)
2927 			return (error);
2928 		if ((error = fget(td, sockarg.sock, &fp)) != 0) {
2929 			return (error);
2930 		}
2931 		if (fp->f_type != DTYPE_SOCKET) {
2932 			fdrop(fp, td);
2933 			return (EPERM);
2934 		}
2935 		error = nfsrvd_addsock(fp);
2936 		fdrop(fp, td);
2937 	} else if (uap->flag & NFSSVC_NFSDNFSD) {
2938 		if (uap->argp == NULL)
2939 			return (EINVAL);
2940 		error = copyin(uap->argp, (caddr_t)&nfsdarg,
2941 		    sizeof (nfsdarg));
2942 		if (error)
2943 			return (error);
2944 		error = nfsrvd_nfsd(td, &nfsdarg);
2945 	} else {
2946 		error = nfssvc_srvcall(td, uap, td->td_ucred);
2947 	}
2948 	return (error);
2949 }
2950 
2951 static int
2952 nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred)
2953 {
2954 	struct nfsex_args export;
2955 	struct file *fp = NULL;
2956 	int stablefd, len;
2957 	struct nfsd_clid adminrevoke;
2958 	struct nfsd_dumplist dumplist;
2959 	struct nfsd_dumpclients *dumpclients;
2960 	struct nfsd_dumplocklist dumplocklist;
2961 	struct nfsd_dumplocks *dumplocks;
2962 	struct nameidata nd;
2963 	vnode_t vp;
2964 	int error = EINVAL;
2965 
2966 	if (uap->flag & NFSSVC_PUBLICFH) {
2967 		NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data,
2968 		    sizeof (fhandle_t));
2969 		error = copyin(uap->argp,
2970 		    &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t));
2971 		if (!error)
2972 			nfs_pubfhset = 1;
2973 	} else if (uap->flag & NFSSVC_V4ROOTEXPORT) {
2974 		error = copyin(uap->argp,(caddr_t)&export,
2975 		    sizeof (struct nfsex_args));
2976 		if (!error)
2977 			error = nfsrv_v4rootexport(&export, cred, p);
2978 	} else if (uap->flag & NFSSVC_NOPUBLICFH) {
2979 		nfs_pubfhset = 0;
2980 		error = 0;
2981 	} else if (uap->flag & NFSSVC_STABLERESTART) {
2982 		error = copyin(uap->argp, (caddr_t)&stablefd,
2983 		    sizeof (int));
2984 		if (!error)
2985 			error = fp_getfvp(p, stablefd, &fp, &vp);
2986 		if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE))
2987 			error = EBADF;
2988 		if (!error && newnfs_numnfsd != 0)
2989 			error = EPERM;
2990 		if (!error) {
2991 			nfsrv_stablefirst.nsf_fp = fp;
2992 			nfsrv_setupstable(p);
2993 		}
2994 	} else if (uap->flag & NFSSVC_ADMINREVOKE) {
2995 		error = copyin(uap->argp, (caddr_t)&adminrevoke,
2996 		    sizeof (struct nfsd_clid));
2997 		if (!error)
2998 			error = nfsrv_adminrevoke(&adminrevoke, p);
2999 	} else if (uap->flag & NFSSVC_DUMPCLIENTS) {
3000 		error = copyin(uap->argp, (caddr_t)&dumplist,
3001 		    sizeof (struct nfsd_dumplist));
3002 		if (!error && (dumplist.ndl_size < 1 ||
3003 			dumplist.ndl_size > NFSRV_MAXDUMPLIST))
3004 			error = EPERM;
3005 		if (!error) {
3006 		    len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size;
3007 		    dumpclients = (struct nfsd_dumpclients *)malloc(len,
3008 			M_TEMP, M_WAITOK);
3009 		    nfsrv_dumpclients(dumpclients, dumplist.ndl_size);
3010 		    error = copyout(dumpclients,
3011 			CAST_USER_ADDR_T(dumplist.ndl_list), len);
3012 		    free((caddr_t)dumpclients, M_TEMP);
3013 		}
3014 	} else if (uap->flag & NFSSVC_DUMPLOCKS) {
3015 		error = copyin(uap->argp, (caddr_t)&dumplocklist,
3016 		    sizeof (struct nfsd_dumplocklist));
3017 		if (!error && (dumplocklist.ndllck_size < 1 ||
3018 			dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST))
3019 			error = EPERM;
3020 		if (!error)
3021 			error = nfsrv_lookupfilename(&nd,
3022 				dumplocklist.ndllck_fname, p);
3023 		if (!error) {
3024 			len = sizeof (struct nfsd_dumplocks) *
3025 				dumplocklist.ndllck_size;
3026 			dumplocks = (struct nfsd_dumplocks *)malloc(len,
3027 				M_TEMP, M_WAITOK);
3028 			nfsrv_dumplocks(nd.ni_vp, dumplocks,
3029 			    dumplocklist.ndllck_size, p);
3030 			vput(nd.ni_vp);
3031 			error = copyout(dumplocks,
3032 			    CAST_USER_ADDR_T(dumplocklist.ndllck_list), len);
3033 			free((caddr_t)dumplocks, M_TEMP);
3034 		}
3035 	}
3036 	return (error);
3037 }
3038 
3039 /*
3040  * Check exports.
3041  * Returns 0 if ok, 1 otherwise.
3042  */
3043 int
3044 nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp)
3045 {
3046 	int i;
3047 
3048 	/*
3049 	 * This seems odd, but allow the case where the security flavor
3050 	 * list is empty. This happens when NFSv4 is traversing non-exported
3051 	 * file systems. Exported file systems should always have a non-empty
3052 	 * security flavor list.
3053 	 */
3054 	if (exp->nes_numsecflavor == 0)
3055 		return (0);
3056 
3057 	for (i = 0; i < exp->nes_numsecflavor; i++) {
3058 		/*
3059 		 * The tests for privacy and integrity must be first,
3060 		 * since ND_GSS is set for everything but AUTH_SYS.
3061 		 */
3062 		if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P &&
3063 		    (nd->nd_flag & ND_GSSPRIVACY))
3064 			return (0);
3065 		if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I &&
3066 		    (nd->nd_flag & ND_GSSINTEGRITY))
3067 			return (0);
3068 		if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 &&
3069 		    (nd->nd_flag & ND_GSS))
3070 			return (0);
3071 		if (exp->nes_secflavors[i] == AUTH_SYS &&
3072 		    (nd->nd_flag & ND_GSS) == 0)
3073 			return (0);
3074 	}
3075 	return (1);
3076 }
3077 
3078 extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *);
3079 
3080 /*
3081  * Called once to initialize data structures...
3082  */
3083 static int
3084 nfsd_modevent(module_t mod, int type, void *data)
3085 {
3086 	int error = 0;
3087 	static int loaded = 0;
3088 
3089 	switch (type) {
3090 	case MOD_LOAD:
3091 		if (loaded)
3092 			return (0);
3093 		newnfs_portinit();
3094 		mtx_init(&nfs_cache_mutex, "nfs_cache_mutex", NULL, MTX_DEF);
3095 		mtx_init(&nfs_v4root_mutex, "nfs_v4root_mutex", NULL, MTX_DEF);
3096 		mtx_init(&nfsv4root_mnt.mnt_mtx, "struct mount mtx", NULL,
3097 		    MTX_DEF);
3098 		lockinit(&nfsv4root_mnt.mnt_explock, PVFS, "explock", 0, 0);
3099 		nfsrvd_initcache();
3100 		nfsd_init();
3101 		NFSD_LOCK();
3102 		nfsrvd_init(0);
3103 		NFSD_UNLOCK();
3104 		nfsd_mntinit();
3105 #ifdef VV_DISABLEDELEG
3106 		vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation;
3107 		vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation;
3108 #endif
3109 		nfsd_call_servertimer = nfsrv_servertimer;
3110 		nfsd_call_nfsd = nfssvc_nfsd;
3111 		loaded = 1;
3112 		break;
3113 
3114 	case MOD_UNLOAD:
3115 		if (newnfs_numnfsd != 0) {
3116 			error = EBUSY;
3117 			break;
3118 		}
3119 
3120 #ifdef VV_DISABLEDELEG
3121 		vn_deleg_ops.vndeleg_recall = NULL;
3122 		vn_deleg_ops.vndeleg_disable = NULL;
3123 #endif
3124 		nfsd_call_servertimer = NULL;
3125 		nfsd_call_nfsd = NULL;
3126 		/* and get rid of the locks */
3127 		mtx_destroy(&nfs_cache_mutex);
3128 		mtx_destroy(&nfs_v4root_mutex);
3129 		mtx_destroy(&nfsv4root_mnt.mnt_mtx);
3130 		lockdestroy(&nfsv4root_mnt.mnt_explock);
3131 		loaded = 0;
3132 		break;
3133 	default:
3134 		error = EOPNOTSUPP;
3135 		break;
3136 	}
3137 	return error;
3138 }
3139 static moduledata_t nfsd_mod = {
3140 	"nfsd",
3141 	nfsd_modevent,
3142 	NULL,
3143 };
3144 DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY);
3145 
3146 /* So that loader and kldload(2) can find us, wherever we are.. */
3147 MODULE_VERSION(nfsd, 1);
3148 MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1);
3149 MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1);
3150 
3151