xref: /titanic_50/usr/src/uts/common/fs/nfs/nfs3_srv.c (revision 90f050286227cf4c4f8aa425555d04723d331d48)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28 
29 #pragma ident	"%Z%%M%	%I%	%E% SMI"
30 
31 #include <sys/param.h>
32 #include <sys/types.h>
33 #include <sys/systm.h>
34 #include <sys/cred.h>
35 #include <sys/buf.h>
36 #include <sys/vfs.h>
37 #include <sys/vnode.h>
38 #include <sys/uio.h>
39 #include <sys/errno.h>
40 #include <sys/sysmacros.h>
41 #include <sys/statvfs.h>
42 #include <sys/kmem.h>
43 #include <sys/dirent.h>
44 #include <sys/cmn_err.h>
45 #include <sys/debug.h>
46 #include <sys/systeminfo.h>
47 #include <sys/flock.h>
48 #include <sys/nbmlock.h>
49 #include <sys/policy.h>
50 
51 #include <rpc/types.h>
52 #include <rpc/auth.h>
53 #include <rpc/svc.h>
54 
55 #include <nfs/nfs.h>
56 #include <nfs/export.h>
57 
58 #include <sys/strsubr.h>
59 
60 /*
61  * These are the interface routines for the server side of the
62  * Network File System.  See the NFS version 3 protocol specification
63  * for a description of this interface.
64  */
65 
66 #ifdef DEBUG
67 int rfs3_do_pre_op_attr = 1;
68 int rfs3_do_post_op_attr = 1;
69 int rfs3_do_post_op_fh3 = 1;
70 #endif
71 
72 static writeverf3 write3verf;
73 
74 static int	sattr3_to_vattr(sattr3 *, struct vattr *);
75 static int	vattr_to_fattr3(struct vattr *, fattr3 *);
76 static int	vattr_to_wcc_attr(struct vattr *, wcc_attr *);
77 static void	vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
78 static void	vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
79 
80 /* ARGSUSED */
81 void
82 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
83 	struct svc_req *req, cred_t *cr)
84 {
85 	int error;
86 	vnode_t *vp;
87 	struct vattr va;
88 
89 	vp = nfs3_fhtovp(&args->object, exi);
90 	if (vp == NULL) {
91 		error = ESTALE;
92 		goto out;
93 	}
94 
95 	va.va_mask = AT_ALL;
96 	error = rfs4_delegated_getattr(vp, &va, 0, cr);
97 
98 	VN_RELE(vp);
99 
100 	if (!error) {
101 		/* overflow error if time or size is out of range */
102 		error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
103 		if (error)
104 			goto out;
105 		resp->status = NFS3_OK;
106 		return;
107 	}
108 
109 out:
110 	if (curthread->t_flag & T_WOULDBLOCK) {
111 		curthread->t_flag &= ~T_WOULDBLOCK;
112 		resp->status = NFS3ERR_JUKEBOX;
113 	} else
114 		resp->status = puterrno3(error);
115 }
116 
117 void *
118 rfs3_getattr_getfh(GETATTR3args *args)
119 {
120 
121 	return (&args->object);
122 }
123 
124 void
125 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
126 	struct svc_req *req, cred_t *cr)
127 {
128 	int error;
129 	vnode_t *vp;
130 	struct vattr *bvap;
131 	struct vattr bva;
132 	struct vattr *avap;
133 	struct vattr ava;
134 	int flag;
135 	int in_crit = 0;
136 	struct flock64 bf;
137 
138 	bvap = NULL;
139 	avap = NULL;
140 
141 	vp = nfs3_fhtovp(&args->object, exi);
142 	if (vp == NULL) {
143 		error = ESTALE;
144 		goto out;
145 	}
146 
147 	error = sattr3_to_vattr(&args->new_attributes, &ava);
148 	if (error)
149 		goto out;
150 
151 	/*
152 	 * We need to specially handle size changes because of
153 	 * possible conflicting NBMAND locks. Get into critical
154 	 * region before VOP_GETATTR, so the size attribute is
155 	 * valid when checking conflicts.
156 	 *
157 	 * Also, check to see if the v4 side of the server has
158 	 * delegated this file.  If so, then we return JUKEBOX to
159 	 * allow the client to retrasmit its request.
160 	 */
161 	if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
162 		if (rfs4_check_delegated(FWRITE, vp, TRUE)) {
163 			resp->status = NFS3ERR_JUKEBOX;
164 			goto out1;
165 		}
166 		if (nbl_need_check(vp)) {
167 			nbl_start_crit(vp, RW_READER);
168 			in_crit = 1;
169 		}
170 	}
171 
172 	bva.va_mask = AT_ALL;
173 	error = rfs4_delegated_getattr(vp, &bva, 0, cr);
174 
175 	/*
176 	 * If we can't get the attributes, then we can't do the
177 	 * right access checking.  So, we'll fail the request.
178 	 */
179 	if (error)
180 		goto out;
181 
182 #ifdef DEBUG
183 	if (rfs3_do_pre_op_attr)
184 		bvap = &bva;
185 #else
186 	bvap = &bva;
187 #endif
188 
189 	if (rdonly(exi, req) || vn_is_readonly(vp)) {
190 		resp->status = NFS3ERR_ROFS;
191 		goto out1;
192 	}
193 
194 	if (args->guard.check &&
195 	    (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
196 	    args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
197 		resp->status = NFS3ERR_NOT_SYNC;
198 		goto out1;
199 	}
200 
201 	if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
202 		flag = ATTR_UTIME;
203 	else
204 		flag = 0;
205 
206 	/*
207 	 * If the filesystem is exported with nosuid, then mask off
208 	 * the setuid and setgid bits.
209 	 */
210 	if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
211 	    (exi->exi_export.ex_flags & EX_NOSUID))
212 		ava.va_mode &= ~(VSUID | VSGID);
213 
214 	/*
215 	 * We need to specially handle size changes because it is
216 	 * possible for the client to create a file with modes
217 	 * which indicate read-only, but with the file opened for
218 	 * writing.  If the client then tries to set the size of
219 	 * the file, then the normal access checking done in
220 	 * VOP_SETATTR would prevent the client from doing so,
221 	 * although it should be legal for it to do so.  To get
222 	 * around this, we do the access checking for ourselves
223 	 * and then use VOP_SPACE which doesn't do the access
224 	 * checking which VOP_SETATTR does. VOP_SPACE can only
225 	 * operate on VREG files, let VOP_SETATTR handle the other
226 	 * extremely rare cases.
227 	 * Also the client should not be allowed to change the
228 	 * size of the file if there is a conflicting non-blocking
229 	 * mandatory lock in the region the change.
230 	 */
231 	if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
232 		if (in_crit) {
233 			u_offset_t offset;
234 			ssize_t length;
235 
236 			if (ava.va_size < bva.va_size) {
237 				offset = ava.va_size;
238 				length = bva.va_size - ava.va_size;
239 			} else {
240 				offset = bva.va_size;
241 				length = ava.va_size - bva.va_size;
242 			}
243 			if (nbl_conflict(vp, NBL_WRITE, offset, length, 0)) {
244 				error = EACCES;
245 				goto out;
246 			}
247 		}
248 
249 		if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
250 			ava.va_mask &= ~AT_SIZE;
251 			bf.l_type = F_WRLCK;
252 			bf.l_whence = 0;
253 			bf.l_start = (off64_t)ava.va_size;
254 			bf.l_len = 0;
255 			bf.l_sysid = 0;
256 			bf.l_pid = 0;
257 			error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
258 			    (offset_t)ava.va_size, cr, NULL);
259 		}
260 	}
261 
262 	if (!error && ava.va_mask)
263 		error = VOP_SETATTR(vp, &ava, flag, cr, NULL);
264 
265 #ifdef DEBUG
266 	if (rfs3_do_post_op_attr) {
267 		ava.va_mask = AT_ALL;
268 		avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
269 	} else
270 		avap = NULL;
271 #else
272 	ava.va_mask = AT_ALL;
273 	avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
274 #endif
275 
276 	/*
277 	 * Force modified metadata out to stable storage.
278 	 */
279 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
280 
281 	if (error)
282 		goto out;
283 
284 	if (in_crit)
285 		nbl_end_crit(vp);
286 	VN_RELE(vp);
287 
288 	resp->status = NFS3_OK;
289 	vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
290 	return;
291 
292 out:
293 	if (curthread->t_flag & T_WOULDBLOCK) {
294 		curthread->t_flag &= ~T_WOULDBLOCK;
295 		resp->status = NFS3ERR_JUKEBOX;
296 	} else
297 		resp->status = puterrno3(error);
298 out1:
299 	if (vp != NULL) {
300 		if (in_crit)
301 			nbl_end_crit(vp);
302 		VN_RELE(vp);
303 	}
304 	vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
305 }
306 
307 void *
308 rfs3_setattr_getfh(SETATTR3args *args)
309 {
310 
311 	return (&args->object);
312 }
313 
314 /* ARGSUSED */
315 void
316 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
317 	struct svc_req *req, cred_t *cr)
318 {
319 	int error;
320 	vnode_t *vp;
321 	vnode_t *dvp;
322 	struct vattr *vap;
323 	struct vattr va;
324 	struct vattr *dvap;
325 	struct vattr dva;
326 	nfs_fh3 *fhp;
327 	struct sec_ol sec = {0, 0};
328 	bool_t publicfh_flag = FALSE, auth_weak = FALSE;
329 
330 	dvap = NULL;
331 
332 	/*
333 	 * Allow lookups from the root - the default
334 	 * location of the public filehandle.
335 	 */
336 	if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
337 		dvp = rootdir;
338 		VN_HOLD(dvp);
339 	} else {
340 		dvp = nfs3_fhtovp(&args->what.dir, exi);
341 		if (dvp == NULL) {
342 			error = ESTALE;
343 			goto out;
344 		}
345 	}
346 
347 #ifdef DEBUG
348 	if (rfs3_do_pre_op_attr) {
349 		dva.va_mask = AT_ALL;
350 		dvap = VOP_GETATTR(dvp, &dva, 0, cr) ? NULL : &dva;
351 	}
352 #else
353 	dva.va_mask = AT_ALL;
354 	dvap = VOP_GETATTR(dvp, &dva, 0, cr) ? NULL : &dva;
355 #endif
356 
357 	if (args->what.name == nfs3nametoolong) {
358 		resp->status = NFS3ERR_NAMETOOLONG;
359 		goto out1;
360 	}
361 
362 	if (args->what.name == NULL || *(args->what.name) == '\0') {
363 		resp->status = NFS3ERR_ACCES;
364 		goto out1;
365 	}
366 
367 	fhp = &args->what.dir;
368 	if (strcmp(args->what.name, "..") == 0 &&
369 	    EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
370 		resp->status = NFS3ERR_NOENT;
371 		goto out1;
372 	}
373 
374 	/*
375 	 * If the public filehandle is used then allow
376 	 * a multi-component lookup
377 	 */
378 	if (PUBLIC_FH3(&args->what.dir)) {
379 		publicfh_flag = TRUE;
380 		error = rfs_publicfh_mclookup(args->what.name, dvp, cr, &vp,
381 					&exi, &sec);
382 		if (error && exi != NULL)
383 			exi_rele(exi);  /* See the comment below */
384 	} else {
385 		error = VOP_LOOKUP(dvp, args->what.name, &vp,
386 				NULL, 0, NULL, cr);
387 	}
388 
389 #ifdef DEBUG
390 	if (rfs3_do_post_op_attr) {
391 		dva.va_mask = AT_ALL;
392 		dvap = VOP_GETATTR(dvp, &dva, 0, cr) ? NULL : &dva;
393 	} else
394 		dvap = NULL;
395 #else
396 	dva.va_mask = AT_ALL;
397 	dvap = VOP_GETATTR(dvp, &dva, 0, cr) ? NULL : &dva;
398 #endif
399 
400 	if (error)
401 		goto out;
402 
403 	if (sec.sec_flags & SEC_QUERY) {
404 		error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
405 	} else {
406 		error = makefh3(&resp->resok.object, vp, exi);
407 		if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
408 			auth_weak = TRUE;
409 	}
410 
411 	if (error) {
412 		VN_RELE(vp);
413 		goto out;
414 	}
415 
416 	/*
417 	 * If publicfh_flag is true then we have called rfs_publicfh_mclookup
418 	 * and have obtained a new exportinfo in exi which needs to be
419 	 * released. Note the the original exportinfo pointed to by exi
420 	 * will be released by the caller, common_dispatch.
421 	 */
422 	if (publicfh_flag)
423 		exi_rele(exi);
424 
425 	VN_RELE(dvp);
426 
427 #ifdef DEBUG
428 	if (rfs3_do_post_op_attr) {
429 		va.va_mask = AT_ALL;
430 		vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
431 	} else
432 		vap = NULL;
433 #else
434 	va.va_mask = AT_ALL;
435 	vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
436 #endif
437 
438 	VN_RELE(vp);
439 
440 	resp->status = NFS3_OK;
441 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
442 	vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
443 
444 	/*
445 	 * If it's public fh, no 0x81, and client's flavor is
446 	 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
447 	 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
448 	 */
449 	if (auth_weak)
450 		resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
451 
452 	return;
453 
454 out:
455 	if (curthread->t_flag & T_WOULDBLOCK) {
456 		curthread->t_flag &= ~T_WOULDBLOCK;
457 		resp->status = NFS3ERR_JUKEBOX;
458 	} else
459 		resp->status = puterrno3(error);
460 out1:
461 	if (dvp != NULL)
462 		VN_RELE(dvp);
463 	vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
464 
465 }
466 
467 void *
468 rfs3_lookup_getfh(LOOKUP3args *args)
469 {
470 
471 	return (&args->what.dir);
472 }
473 
474 /* ARGSUSED */
475 void
476 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
477 	struct svc_req *req, cred_t *cr)
478 {
479 	int error;
480 	vnode_t *vp;
481 	struct vattr *vap;
482 	struct vattr va;
483 	int checkwriteperm;
484 
485 	vap = NULL;
486 
487 	vp = nfs3_fhtovp(&args->object, exi);
488 	if (vp == NULL) {
489 		error = ESTALE;
490 		goto out;
491 	}
492 
493 	/*
494 	 * If the file system is exported read only, it is not appropriate
495 	 * to check write permissions for regular files and directories.
496 	 * Special files are interpreted by the client, so the underlying
497 	 * permissions are sent back to the client for interpretation.
498 	 */
499 	if (rdonly(exi, req) && (vp->v_type == VREG || vp->v_type == VDIR))
500 		checkwriteperm = 0;
501 	else
502 		checkwriteperm = 1;
503 
504 	/*
505 	 * We need the mode so that we can correctly determine access
506 	 * permissions relative to a mandatory lock file.  Access to
507 	 * mandatory lock files is denied on the server, so it might
508 	 * as well be reflected to the server during the open.
509 	 */
510 	va.va_mask = AT_MODE;
511 	error = VOP_GETATTR(vp, &va, 0, cr);
512 	if (error)
513 		goto out;
514 
515 #ifdef DEBUG
516 	if (rfs3_do_post_op_attr)
517 		vap = &va;
518 #else
519 	vap = &va;
520 #endif
521 
522 	resp->resok.access = 0;
523 
524 	if (args->access & ACCESS3_READ) {
525 		error = VOP_ACCESS(vp, VREAD, 0, cr);
526 		if (error) {
527 			if (curthread->t_flag & T_WOULDBLOCK)
528 				goto out;
529 		} else if (!MANDLOCK(vp, va.va_mode))
530 			resp->resok.access |= ACCESS3_READ;
531 	}
532 	if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
533 		error = VOP_ACCESS(vp, VEXEC, 0, cr);
534 		if (error) {
535 			if (curthread->t_flag & T_WOULDBLOCK)
536 				goto out;
537 		} else
538 			resp->resok.access |= ACCESS3_LOOKUP;
539 	}
540 	if (checkwriteperm &&
541 	    (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
542 		error = VOP_ACCESS(vp, VWRITE, 0, cr);
543 		if (error) {
544 			if (curthread->t_flag & T_WOULDBLOCK)
545 				goto out;
546 		} else if (!MANDLOCK(vp, va.va_mode)) {
547 			resp->resok.access |=
548 			    (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
549 		}
550 	}
551 	if (checkwriteperm &&
552 	    (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
553 		error = VOP_ACCESS(vp, VWRITE, 0, cr);
554 		if (error) {
555 			if (curthread->t_flag & T_WOULDBLOCK)
556 				goto out;
557 		} else
558 			resp->resok.access |= ACCESS3_DELETE;
559 	}
560 	if (args->access & ACCESS3_EXECUTE) {
561 		error = VOP_ACCESS(vp, VEXEC, 0, cr);
562 		if (error) {
563 			if (curthread->t_flag & T_WOULDBLOCK)
564 				goto out;
565 		} else if (!MANDLOCK(vp, va.va_mode))
566 			resp->resok.access |= ACCESS3_EXECUTE;
567 	}
568 
569 #ifdef DEBUG
570 	if (rfs3_do_post_op_attr) {
571 		va.va_mask = AT_ALL;
572 		vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
573 	} else
574 		vap = NULL;
575 #else
576 	va.va_mask = AT_ALL;
577 	vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
578 #endif
579 
580 	VN_RELE(vp);
581 
582 	resp->status = NFS3_OK;
583 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
584 	return;
585 
586 out:
587 	if (curthread->t_flag & T_WOULDBLOCK) {
588 		curthread->t_flag &= ~T_WOULDBLOCK;
589 		resp->status = NFS3ERR_JUKEBOX;
590 	} else
591 		resp->status = puterrno3(error);
592 	if (vp != NULL)
593 		VN_RELE(vp);
594 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
595 }
596 
597 void *
598 rfs3_access_getfh(ACCESS3args *args)
599 {
600 
601 	return (&args->object);
602 }
603 
604 /* ARGSUSED */
605 void
606 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
607 	struct svc_req *req, cred_t *cr)
608 {
609 	int error;
610 	vnode_t *vp;
611 	struct vattr *vap;
612 	struct vattr va;
613 	struct iovec iov;
614 	struct uio uio;
615 	char *data;
616 
617 	vap = NULL;
618 
619 	vp = nfs3_fhtovp(&args->symlink, exi);
620 	if (vp == NULL) {
621 		error = ESTALE;
622 		goto out;
623 	}
624 
625 	va.va_mask = AT_ALL;
626 	error = VOP_GETATTR(vp, &va, 0, cr);
627 	if (error)
628 		goto out;
629 
630 #ifdef DEBUG
631 	if (rfs3_do_post_op_attr)
632 		vap = &va;
633 #else
634 	vap = &va;
635 #endif
636 
637 	if (vp->v_type != VLNK) {
638 		resp->status = NFS3ERR_INVAL;
639 		goto out1;
640 	}
641 
642 	if (MANDLOCK(vp, va.va_mode)) {
643 		resp->status = NFS3ERR_ACCES;
644 		goto out1;
645 	}
646 
647 	data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
648 
649 	iov.iov_base = data;
650 	iov.iov_len = MAXPATHLEN;
651 	uio.uio_iov = &iov;
652 	uio.uio_iovcnt = 1;
653 	uio.uio_segflg = UIO_SYSSPACE;
654 	uio.uio_extflg = UIO_COPY_CACHED;
655 	uio.uio_loffset = 0;
656 	uio.uio_resid = MAXPATHLEN;
657 
658 	error = VOP_READLINK(vp, &uio, cr);
659 
660 #ifdef DEBUG
661 	if (rfs3_do_post_op_attr) {
662 		va.va_mask = AT_ALL;
663 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
664 	} else
665 		vap = NULL;
666 #else
667 	va.va_mask = AT_ALL;
668 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
669 #endif
670 
671 #if 0 /* notyet */
672 	/*
673 	 * Don't do this.  It causes local disk writes when just
674 	 * reading the file and the overhead is deemed larger
675 	 * than the benefit.
676 	 */
677 	/*
678 	 * Force modified metadata out to stable storage.
679 	 */
680 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
681 #endif
682 
683 	if (error) {
684 		kmem_free(data, MAXPATHLEN + 1);
685 		goto out;
686 	}
687 
688 	VN_RELE(vp);
689 
690 	resp->status = NFS3_OK;
691 	vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
692 	resp->resok.data = data;
693 	*(data + MAXPATHLEN - uio.uio_resid) = '\0';
694 	return;
695 
696 out:
697 	if (curthread->t_flag & T_WOULDBLOCK) {
698 		curthread->t_flag &= ~T_WOULDBLOCK;
699 		resp->status = NFS3ERR_JUKEBOX;
700 	} else
701 		resp->status = puterrno3(error);
702 out1:
703 	if (vp != NULL)
704 		VN_RELE(vp);
705 	vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
706 }
707 
708 void *
709 rfs3_readlink_getfh(READLINK3args *args)
710 {
711 
712 	return (&args->symlink);
713 }
714 
715 void
716 rfs3_readlink_free(READLINK3res *resp)
717 {
718 
719 	if (resp->status == NFS3_OK)
720 		kmem_free(resp->resok.data, MAXPATHLEN + 1);
721 }
722 
723 /* ARGSUSED */
724 void
725 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
726 	struct svc_req *req, cred_t *cr)
727 {
728 	int error;
729 	vnode_t *vp;
730 	struct vattr *vap;
731 	struct vattr va;
732 	struct iovec iov;
733 	struct uio uio;
734 	u_offset_t offset;
735 	mblk_t *mp;
736 	int alloc_err = 0;
737 	int in_crit = 0;
738 	int need_rwunlock = 0;
739 
740 	vap = NULL;
741 
742 	vp = nfs3_fhtovp(&args->file, exi);
743 	if (vp == NULL) {
744 		error = ESTALE;
745 		goto out;
746 	}
747 
748 	/*
749 	 * Check to see if the v4 side of the server has delegated
750 	 * this file.  If so, then we return JUKEBOX to allow the
751 	 * client to retrasmit its request.
752 	 */
753 	if (rfs4_check_delegated(FREAD, vp, FALSE)) {
754 		resp->status = NFS3ERR_JUKEBOX;
755 		goto out1;
756 	}
757 
758 	/*
759 	 * Enter the critical region before calling VOP_RWLOCK
760 	 * to avoid a deadlock with write requests.
761 	 */
762 	if (nbl_need_check(vp)) {
763 		nbl_start_crit(vp, RW_READER);
764 		in_crit = 1;
765 		if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0)) {
766 			error = EACCES;
767 			goto out;
768 		}
769 	}
770 
771 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
772 	need_rwunlock = 1;
773 
774 	va.va_mask = AT_ALL;
775 	error = VOP_GETATTR(vp, &va, 0, cr);
776 
777 	/*
778 	 * If we can't get the attributes, then we can't do the
779 	 * right access checking.  So, we'll fail the request.
780 	 */
781 	if (error)
782 		goto out;
783 
784 #ifdef DEBUG
785 	if (rfs3_do_post_op_attr)
786 		vap = &va;
787 #else
788 	vap = &va;
789 #endif
790 
791 	if (vp->v_type != VREG) {
792 		resp->status = NFS3ERR_INVAL;
793 		goto out1;
794 	}
795 
796 	if (crgetuid(cr) != va.va_uid) {
797 		error = VOP_ACCESS(vp, VREAD, 0, cr);
798 		if (error) {
799 			if (curthread->t_flag & T_WOULDBLOCK)
800 				goto out;
801 			error = VOP_ACCESS(vp, VEXEC, 0, cr);
802 			if (error)
803 				goto out;
804 		}
805 	}
806 
807 	if (MANDLOCK(vp, va.va_mode)) {
808 		resp->status = NFS3ERR_ACCES;
809 		goto out1;
810 	}
811 
812 	offset = args->offset;
813 	if (offset >= va.va_size) {
814 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
815 		if (in_crit)
816 			nbl_end_crit(vp);
817 		VN_RELE(vp);
818 		resp->status = NFS3_OK;
819 		vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
820 		resp->resok.count = 0;
821 		resp->resok.eof = TRUE;
822 		resp->resok.data.data_len = 0;
823 		resp->resok.data.data_val = NULL;
824 		resp->resok.data.mp = NULL;
825 		return;
826 	}
827 
828 	if (args->count == 0) {
829 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
830 		if (in_crit)
831 			nbl_end_crit(vp);
832 		VN_RELE(vp);
833 		resp->status = NFS3_OK;
834 		vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
835 		resp->resok.count = 0;
836 		resp->resok.eof = FALSE;
837 		resp->resok.data.data_len = 0;
838 		resp->resok.data.data_val = NULL;
839 		resp->resok.data.mp = NULL;
840 		return;
841 	}
842 
843 	/*
844 	 * do not allocate memory more the max. allowed
845 	 * transfer size
846 	 */
847 	if (args->count > rfs3_tsize(req))
848 		args->count = rfs3_tsize(req);
849 
850 	/*
851 	 * mp will contain the data to be sent out in the read reply.
852 	 * This will be freed after the reply has been sent out (by the
853 	 * driver).
854 	 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
855 	 * that the call to xdrmblk_putmblk() never fails.
856 	 */
857 	mp = allocb_wait(RNDUP(args->count), BPRI_MED, STR_NOSIG, &alloc_err);
858 	ASSERT(mp != NULL);
859 	ASSERT(alloc_err == 0);
860 
861 	iov.iov_base = (caddr_t)mp->b_datap->db_base;
862 	iov.iov_len = args->count;
863 	uio.uio_iov = &iov;
864 	uio.uio_iovcnt = 1;
865 	uio.uio_segflg = UIO_SYSSPACE;
866 	uio.uio_extflg = UIO_COPY_CACHED;
867 	uio.uio_loffset = args->offset;
868 	uio.uio_resid = args->count;
869 
870 	error = VOP_READ(vp, &uio, 0, cr, NULL);
871 
872 	if (error) {
873 		freeb(mp);
874 		goto out;
875 	}
876 
877 	va.va_mask = AT_ALL;
878 	error = VOP_GETATTR(vp, &va, 0, cr);
879 
880 #ifdef DEBUG
881 	if (rfs3_do_post_op_attr) {
882 		if (error)
883 			vap = NULL;
884 		else
885 			vap = &va;
886 	} else
887 		vap = NULL;
888 #else
889 	if (error)
890 		vap = NULL;
891 	else
892 		vap = &va;
893 #endif
894 
895 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
896 
897 #if 0 /* notyet */
898 	/*
899 	 * Don't do this.  It causes local disk writes when just
900 	 * reading the file and the overhead is deemed larger
901 	 * than the benefit.
902 	 */
903 	/*
904 	 * Force modified metadata out to stable storage.
905 	 */
906 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
907 #endif
908 
909 	if (in_crit)
910 		nbl_end_crit(vp);
911 	VN_RELE(vp);
912 
913 	resp->status = NFS3_OK;
914 	vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
915 	resp->resok.count = args->count - uio.uio_resid;
916 	if (!error && offset + resp->resok.count == va.va_size)
917 		resp->resok.eof = TRUE;
918 	else
919 		resp->resok.eof = FALSE;
920 	resp->resok.data.data_len = resp->resok.count;
921 	resp->resok.data.data_val = (char *)mp->b_datap->db_base;
922 
923 	resp->resok.data.mp = mp;
924 
925 	resp->resok.size = (uint_t)args->count;
926 	return;
927 
928 out:
929 	if (curthread->t_flag & T_WOULDBLOCK) {
930 		curthread->t_flag &= ~T_WOULDBLOCK;
931 		resp->status = NFS3ERR_JUKEBOX;
932 	} else
933 		resp->status = puterrno3(error);
934 out1:
935 	if (vp != NULL) {
936 		if (need_rwunlock)
937 			VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
938 		if (in_crit)
939 			nbl_end_crit(vp);
940 		VN_RELE(vp);
941 	}
942 	vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
943 }
944 
945 void
946 rfs3_read_free(READ3res *resp)
947 {
948 	mblk_t *mp;
949 
950 	if (resp->status == NFS3_OK) {
951 		mp = resp->resok.data.mp;
952 		if (mp != NULL)
953 			freeb(mp);
954 	}
955 }
956 
957 void *
958 rfs3_read_getfh(READ3args *args)
959 {
960 
961 	return (&args->file);
962 }
963 
964 #define	MAX_IOVECS	12
965 
966 #ifdef DEBUG
967 static int rfs3_write_hits = 0;
968 static int rfs3_write_misses = 0;
969 #endif
970 
971 void
972 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
973 	struct svc_req *req, cred_t *cr)
974 {
975 	int error;
976 	vnode_t *vp;
977 	struct vattr *bvap = NULL;
978 	struct vattr bva;
979 	struct vattr *avap = NULL;
980 	struct vattr ava;
981 	u_offset_t rlimit;
982 	struct uio uio;
983 	struct iovec iov[MAX_IOVECS];
984 	mblk_t *m;
985 	struct iovec *iovp;
986 	int iovcnt;
987 	int ioflag;
988 	cred_t *savecred;
989 	int in_crit = 0;
990 	int rwlock_ret = -1;
991 
992 	vp = nfs3_fhtovp(&args->file, exi);
993 	if (vp == NULL) {
994 		error = ESTALE;
995 		goto out;
996 	}
997 
998 	/*
999 	 * Check to see if the v4 side of the server has delegated
1000 	 * this file.  If so, then we return JUKEBOX to allow the
1001 	 * client to retrasmit its request.
1002 	 */
1003 	if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1004 		resp->status = NFS3ERR_JUKEBOX;
1005 		goto out1;
1006 	}
1007 
1008 	/*
1009 	 * We have to enter the critical region before calling VOP_RWLOCK
1010 	 * to avoid a deadlock with ufs.
1011 	 */
1012 	if (nbl_need_check(vp)) {
1013 		nbl_start_crit(vp, RW_READER);
1014 		in_crit = 1;
1015 		if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0)) {
1016 			error = EACCES;
1017 			goto out;
1018 		}
1019 	}
1020 
1021 	rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1022 
1023 	bva.va_mask = AT_ALL;
1024 	error = VOP_GETATTR(vp, &bva, 0, cr);
1025 
1026 	/*
1027 	 * If we can't get the attributes, then we can't do the
1028 	 * right access checking.  So, we'll fail the request.
1029 	 */
1030 	if (error)
1031 		goto out;
1032 
1033 	bvap = &bva;
1034 #ifdef DEBUG
1035 	if (!rfs3_do_pre_op_attr)
1036 		bvap = NULL;
1037 #endif
1038 	avap = bvap;
1039 
1040 	if (args->count != args->data.data_len) {
1041 		resp->status = NFS3ERR_INVAL;
1042 		goto out1;
1043 	}
1044 
1045 	if (rdonly(exi, req)) {
1046 		resp->status = NFS3ERR_ROFS;
1047 		goto out1;
1048 	}
1049 
1050 	if (vp->v_type != VREG) {
1051 		resp->status = NFS3ERR_INVAL;
1052 		goto out1;
1053 	}
1054 
1055 	if (crgetuid(cr) != bva.va_uid &&
1056 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr)))
1057 		goto out;
1058 
1059 	if (MANDLOCK(vp, bva.va_mode)) {
1060 		resp->status = NFS3ERR_ACCES;
1061 		goto out1;
1062 	}
1063 
1064 	if (args->count == 0) {
1065 		VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1066 		VN_RELE(vp);
1067 		resp->status = NFS3_OK;
1068 		vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1069 		resp->resok.count = 0;
1070 		resp->resok.committed = args->stable;
1071 		resp->resok.verf = write3verf;
1072 		return;
1073 	}
1074 
1075 	if (args->mblk != NULL) {
1076 		iovcnt = 0;
1077 		for (m = args->mblk; m != NULL; m = m->b_cont)
1078 			iovcnt++;
1079 		if (iovcnt <= MAX_IOVECS) {
1080 #ifdef DEBUG
1081 			rfs3_write_hits++;
1082 #endif
1083 			iovp = iov;
1084 		} else {
1085 #ifdef DEBUG
1086 			rfs3_write_misses++;
1087 #endif
1088 			iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1089 		}
1090 		mblk_to_iov(args->mblk, iovcnt, iovp);
1091 	} else {
1092 		iovcnt = 1;
1093 		iovp = iov;
1094 		iovp->iov_base = args->data.data_val;
1095 		iovp->iov_len = args->count;
1096 	}
1097 
1098 	uio.uio_iov = iovp;
1099 	uio.uio_iovcnt = iovcnt;
1100 
1101 	uio.uio_segflg = UIO_SYSSPACE;
1102 	uio.uio_extflg = UIO_COPY_DEFAULT;
1103 	uio.uio_loffset = args->offset;
1104 	uio.uio_resid = args->count;
1105 	uio.uio_llimit = curproc->p_fsz_ctl;
1106 	rlimit = uio.uio_llimit - args->offset;
1107 	if (rlimit < (u_offset_t)uio.uio_resid)
1108 		uio.uio_resid = (int)rlimit;
1109 
1110 	if (args->stable == UNSTABLE)
1111 		ioflag = 0;
1112 	else if (args->stable == FILE_SYNC)
1113 		ioflag = FSYNC;
1114 	else if (args->stable == DATA_SYNC)
1115 		ioflag = FDSYNC;
1116 	else {
1117 		if (iovp != iov)
1118 			kmem_free(iovp, sizeof (*iovp) * iovcnt);
1119 		resp->status = NFS3ERR_INVAL;
1120 		goto out1;
1121 	}
1122 
1123 	/*
1124 	 * We're changing creds because VM may fault and we need
1125 	 * the cred of the current thread to be used if quota
1126 	 * checking is enabled.
1127 	 */
1128 	savecred = curthread->t_cred;
1129 	curthread->t_cred = cr;
1130 	error = VOP_WRITE(vp, &uio, ioflag, cr, NULL);
1131 	curthread->t_cred = savecred;
1132 
1133 	if (iovp != iov)
1134 		kmem_free(iovp, sizeof (*iovp) * iovcnt);
1135 
1136 	ava.va_mask = AT_ALL;
1137 	avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
1138 
1139 #ifdef DEBUG
1140 	if (!rfs3_do_post_op_attr)
1141 		avap = NULL;
1142 #endif
1143 
1144 	if (error)
1145 		goto out;
1146 
1147 	VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1148 	if (in_crit)
1149 		nbl_end_crit(vp);
1150 	VN_RELE(vp);
1151 
1152 	/*
1153 	 * If we were unable to get the V_WRITELOCK_TRUE, then we
1154 	 * may not have accurate after attrs, so check if
1155 	 * we have both attributes, they have a non-zero va_seq, and
1156 	 * va_seq has changed by exactly one,
1157 	 * if not, turn off the before attr.
1158 	 */
1159 	if (rwlock_ret != V_WRITELOCK_TRUE) {
1160 		if (bvap == NULL || avap == NULL ||
1161 				bvap->va_seq == 0 || avap->va_seq == 0 ||
1162 				avap->va_seq != (bvap->va_seq + 1)) {
1163 			bvap = NULL;
1164 		}
1165 	}
1166 
1167 	resp->status = NFS3_OK;
1168 	vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1169 	resp->resok.count = args->count - uio.uio_resid;
1170 	resp->resok.committed = args->stable;
1171 	resp->resok.verf = write3verf;
1172 	return;
1173 
1174 out:
1175 	if (curthread->t_flag & T_WOULDBLOCK) {
1176 		curthread->t_flag &= ~T_WOULDBLOCK;
1177 		resp->status = NFS3ERR_JUKEBOX;
1178 	} else
1179 		resp->status = puterrno3(error);
1180 out1:
1181 	if (vp != NULL) {
1182 		if (rwlock_ret != -1)
1183 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1184 		if (in_crit)
1185 			nbl_end_crit(vp);
1186 		VN_RELE(vp);
1187 	}
1188 	vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1189 }
1190 
1191 void *
1192 rfs3_write_getfh(WRITE3args *args)
1193 {
1194 
1195 	return (&args->file);
1196 }
1197 
1198 void
1199 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1200 	struct svc_req *req, cred_t *cr)
1201 {
1202 	int error;
1203 	int in_crit = 0;
1204 	vnode_t *vp;
1205 	vnode_t *tvp = NULL;
1206 	vnode_t *dvp;
1207 	struct vattr *vap;
1208 	struct vattr va;
1209 	struct vattr *dbvap;
1210 	struct vattr dbva;
1211 	struct vattr *davap;
1212 	struct vattr dava;
1213 	enum vcexcl excl;
1214 	nfstime3 *mtime;
1215 	len_t reqsize;
1216 	bool_t trunc;
1217 
1218 	dbvap = NULL;
1219 	davap = NULL;
1220 
1221 	dvp = nfs3_fhtovp(&args->where.dir, exi);
1222 	if (dvp == NULL) {
1223 		error = ESTALE;
1224 		goto out;
1225 	}
1226 
1227 #ifdef DEBUG
1228 	if (rfs3_do_pre_op_attr) {
1229 		dbva.va_mask = AT_ALL;
1230 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1231 	} else
1232 		dbvap = NULL;
1233 #else
1234 	dbva.va_mask = AT_ALL;
1235 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1236 #endif
1237 	davap = dbvap;
1238 
1239 	if (args->where.name == nfs3nametoolong) {
1240 		resp->status = NFS3ERR_NAMETOOLONG;
1241 		goto out1;
1242 	}
1243 
1244 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1245 		resp->status = NFS3ERR_ACCES;
1246 		goto out1;
1247 	}
1248 
1249 	if (rdonly(exi, req)) {
1250 		resp->status = NFS3ERR_ROFS;
1251 		goto out1;
1252 	}
1253 
1254 	if (args->how.mode == EXCLUSIVE) {
1255 		va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1256 		va.va_type = VREG;
1257 		va.va_mode = (mode_t)0;
1258 		/*
1259 		 * Ensure no time overflows and that types match
1260 		 */
1261 		mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1262 		va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1263 		va.va_mtime.tv_nsec = mtime->nseconds;
1264 		excl = EXCL;
1265 	} else {
1266 		error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1267 		    &va);
1268 		if (error)
1269 			goto out;
1270 		va.va_mask |= AT_TYPE;
1271 		va.va_type = VREG;
1272 		if (args->how.mode == GUARDED)
1273 			excl = EXCL;
1274 		else {
1275 			excl = NONEXCL;
1276 
1277 			/*
1278 			 * During creation of file in non-exclusive mode
1279 			 * if size of file is being set then make sure
1280 			 * that if the file already exists that no conflicting
1281 			 * non-blocking mandatory locks exists in the region
1282 			 * being modified. If there are conflicting locks fail
1283 			 * the operation with EACCES.
1284 			 */
1285 			if (va.va_mask & AT_SIZE) {
1286 				struct vattr tva;
1287 
1288 				/*
1289 				 * Does file already exist?
1290 				 */
1291 				error = VOP_LOOKUP(dvp, args->where.name, &tvp,
1292 						NULL, 0, NULL, cr);
1293 
1294 				/*
1295 				 * Check to see if the file has been delegated
1296 				 * to a v4 client.  If so, then begin recall of
1297 				 * the delegation and return JUKEBOX to allow
1298 				 * the client to retrasmit its request.
1299 				 */
1300 
1301 				trunc = va.va_size == 0;
1302 				if (!error &&
1303 				    rfs4_check_delegated(FWRITE, tvp, trunc)) {
1304 					resp->status = NFS3ERR_JUKEBOX;
1305 					goto out1;
1306 				}
1307 
1308 				/*
1309 				 * Check for NBMAND lock conflicts
1310 				 */
1311 				if (!error && nbl_need_check(tvp)) {
1312 					u_offset_t offset;
1313 					ssize_t len;
1314 
1315 					nbl_start_crit(tvp, RW_READER);
1316 					in_crit = 1;
1317 
1318 					tva.va_mask = AT_SIZE;
1319 					error = VOP_GETATTR(tvp, &tva, 0, cr);
1320 					/*
1321 					 * Can't check for conflicts, so return
1322 					 * error.
1323 					 */
1324 					if (error)
1325 						goto out;
1326 
1327 					offset = tva.va_size < va.va_size ?
1328 						tva.va_size : va.va_size;
1329 					len = tva.va_size < va.va_size ?
1330 						va.va_size - tva.va_size :
1331 						tva.va_size - va.va_size;
1332 					if (nbl_conflict(tvp, NBL_WRITE,
1333 							offset, len, 0)) {
1334 						error = EACCES;
1335 						goto out;
1336 					}
1337 				} else if (tvp) {
1338 					VN_RELE(tvp);
1339 					tvp = NULL;
1340 				}
1341 			}
1342 		}
1343 		if (va.va_mask & AT_SIZE)
1344 			reqsize = va.va_size;
1345 	}
1346 
1347 	/*
1348 	 * Must specify the mode.
1349 	 */
1350 	if (!(va.va_mask & AT_MODE)) {
1351 		resp->status = NFS3ERR_INVAL;
1352 		goto out1;
1353 	}
1354 
1355 	/*
1356 	 * If the filesystem is exported with nosuid, then mask off
1357 	 * the setuid and setgid bits.
1358 	 */
1359 	if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1360 		va.va_mode &= ~(VSUID | VSGID);
1361 
1362 tryagain:
1363 	/*
1364 	 * The file open mode used is VWRITE.  If the client needs
1365 	 * some other semantic, then it should do the access checking
1366 	 * itself.  It would have been nice to have the file open mode
1367 	 * passed as part of the arguments.
1368 	 */
1369 	error = VOP_CREATE(dvp, args->where.name, &va, excl, VWRITE,
1370 	    &vp, cr, 0);
1371 
1372 #ifdef DEBUG
1373 	if (rfs3_do_post_op_attr) {
1374 		dava.va_mask = AT_ALL;
1375 		davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1376 	} else
1377 		davap = NULL;
1378 #else
1379 	dava.va_mask = AT_ALL;
1380 	davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1381 #endif
1382 
1383 	if (error) {
1384 		/*
1385 		 * If we got something other than file already exists
1386 		 * then just return this error.  Otherwise, we got
1387 		 * EEXIST.  If we were doing a GUARDED create, then
1388 		 * just return this error.  Otherwise, we need to
1389 		 * make sure that this wasn't a duplicate of an
1390 		 * exclusive create request.
1391 		 *
1392 		 * The assumption is made that a non-exclusive create
1393 		 * request will never return EEXIST.
1394 		 */
1395 		if (error != EEXIST || args->how.mode == GUARDED)
1396 			goto out;
1397 		/*
1398 		 * Lookup the file so that we can get a vnode for it.
1399 		 */
1400 		error = VOP_LOOKUP(dvp, args->where.name, &vp, NULL, 0,
1401 		    NULL, cr);
1402 		if (error) {
1403 			/*
1404 			 * We couldn't find the file that we thought that
1405 			 * we just created.  So, we'll just try creating
1406 			 * it again.
1407 			 */
1408 			if (error == ENOENT)
1409 				goto tryagain;
1410 			goto out;
1411 		}
1412 
1413 		/*
1414 		 * If the file is delegated to a v4 client, go ahead
1415 		 * and initiate recall, this create is a hint that a
1416 		 * conflicting v3 open has occurred.
1417 		 */
1418 
1419 		if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1420 			VN_RELE(vp);
1421 			resp->status = NFS3ERR_JUKEBOX;
1422 			goto out1;
1423 		}
1424 
1425 		va.va_mask = AT_ALL;
1426 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1427 
1428 		mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1429 		/* % with INT32_MAX to prevent overflows */
1430 		if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1431 		    vap->va_mtime.tv_sec !=
1432 		    (mtime->seconds % INT32_MAX) ||
1433 		    vap->va_mtime.tv_nsec != mtime->nseconds)) {
1434 			VN_RELE(vp);
1435 			error = EEXIST;
1436 			goto out;
1437 		}
1438 	} else {
1439 
1440 		if ((args->how.mode == UNCHECKED ||
1441 		    args->how.mode == GUARDED) &&
1442 		    args->how.createhow3_u.obj_attributes.size.set_it &&
1443 		    va.va_size == 0)
1444 			trunc = TRUE;
1445 		else
1446 			trunc = FALSE;
1447 
1448 		if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1449 			VN_RELE(vp);
1450 			resp->status = NFS3ERR_JUKEBOX;
1451 			goto out1;
1452 		}
1453 
1454 		va.va_mask = AT_ALL;
1455 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1456 
1457 		/*
1458 		 * We need to check to make sure that the file got
1459 		 * created to the indicated size.  If not, we do a
1460 		 * setattr to try to change the size, but we don't
1461 		 * try too hard.  This shouldn't a problem as most
1462 		 * clients will only specifiy a size of zero which
1463 		 * local file systems handle.  However, even if
1464 		 * the client does specify a non-zero size, it can
1465 		 * still recover by checking the size of the file
1466 		 * after it has created it and then issue a setattr
1467 		 * request of its own to set the size of the file.
1468 		 */
1469 		if (vap != NULL &&
1470 		    (args->how.mode == UNCHECKED ||
1471 		    args->how.mode == GUARDED) &&
1472 		    args->how.createhow3_u.obj_attributes.size.set_it &&
1473 		    vap->va_size != reqsize) {
1474 			va.va_mask = AT_SIZE;
1475 			va.va_size = reqsize;
1476 			(void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1477 			va.va_mask = AT_ALL;
1478 			vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1479 		}
1480 	}
1481 
1482 #ifdef DEBUG
1483 	if (!rfs3_do_post_op_attr)
1484 		vap = NULL;
1485 #endif
1486 
1487 #ifdef DEBUG
1488 	if (!rfs3_do_post_op_fh3)
1489 		resp->resok.obj.handle_follows = FALSE;
1490 	else {
1491 #endif
1492 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1493 	if (error)
1494 		resp->resok.obj.handle_follows = FALSE;
1495 	else
1496 		resp->resok.obj.handle_follows = TRUE;
1497 #ifdef DEBUG
1498 	}
1499 #endif
1500 
1501 	/*
1502 	 * Force modified data and metadata out to stable storage.
1503 	 */
1504 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
1505 	(void) VOP_FSYNC(dvp, 0, cr);
1506 
1507 	VN_RELE(vp);
1508 	VN_RELE(dvp);
1509 	if (tvp != NULL) {
1510 		if (in_crit)
1511 			nbl_end_crit(tvp);
1512 		VN_RELE(tvp);
1513 	}
1514 
1515 	resp->status = NFS3_OK;
1516 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1517 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1518 	return;
1519 
1520 out:
1521 	if (curthread->t_flag & T_WOULDBLOCK) {
1522 		curthread->t_flag &= ~T_WOULDBLOCK;
1523 		resp->status = NFS3ERR_JUKEBOX;
1524 	} else
1525 		resp->status = puterrno3(error);
1526 out1:
1527 	if (tvp != NULL) {
1528 		if (in_crit)
1529 			nbl_end_crit(tvp);
1530 		VN_RELE(tvp);
1531 	}
1532 	if (dvp != NULL)
1533 		VN_RELE(dvp);
1534 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1535 }
1536 
1537 void *
1538 rfs3_create_getfh(CREATE3args *args)
1539 {
1540 
1541 	return (&args->where.dir);
1542 }
1543 
1544 void
1545 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1546 	struct svc_req *req, cred_t *cr)
1547 {
1548 	int error;
1549 	vnode_t *vp = NULL;
1550 	vnode_t *dvp;
1551 	struct vattr *vap;
1552 	struct vattr va;
1553 	struct vattr *dbvap;
1554 	struct vattr dbva;
1555 	struct vattr *davap;
1556 	struct vattr dava;
1557 
1558 	dbvap = NULL;
1559 	davap = NULL;
1560 
1561 	dvp = nfs3_fhtovp(&args->where.dir, exi);
1562 	if (dvp == NULL) {
1563 		error = ESTALE;
1564 		goto out;
1565 	}
1566 
1567 #ifdef DEBUG
1568 	if (rfs3_do_pre_op_attr) {
1569 		dbva.va_mask = AT_ALL;
1570 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1571 	} else
1572 		dbvap = NULL;
1573 #else
1574 	dbva.va_mask = AT_ALL;
1575 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1576 #endif
1577 	davap = dbvap;
1578 
1579 	if (args->where.name == nfs3nametoolong) {
1580 		resp->status = NFS3ERR_NAMETOOLONG;
1581 		goto out1;
1582 	}
1583 
1584 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1585 		resp->status = NFS3ERR_ACCES;
1586 		goto out1;
1587 	}
1588 
1589 	if (rdonly(exi, req)) {
1590 		resp->status = NFS3ERR_ROFS;
1591 		goto out1;
1592 	}
1593 
1594 	error = sattr3_to_vattr(&args->attributes, &va);
1595 	if (error)
1596 		goto out;
1597 
1598 	if (!(va.va_mask & AT_MODE)) {
1599 		resp->status = NFS3ERR_INVAL;
1600 		goto out1;
1601 	}
1602 
1603 	va.va_mask |= AT_TYPE;
1604 	va.va_type = VDIR;
1605 
1606 	error = VOP_MKDIR(dvp, args->where.name, &va, &vp, cr);
1607 
1608 #ifdef DEBUG
1609 	if (rfs3_do_post_op_attr) {
1610 		dava.va_mask = AT_ALL;
1611 		davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1612 	} else
1613 		davap = NULL;
1614 #else
1615 	dava.va_mask = AT_ALL;
1616 	davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1617 #endif
1618 
1619 	/*
1620 	 * Force modified data and metadata out to stable storage.
1621 	 */
1622 	(void) VOP_FSYNC(dvp, 0, cr);
1623 
1624 	if (error)
1625 		goto out;
1626 
1627 	VN_RELE(dvp);
1628 
1629 #ifdef DEBUG
1630 	if (!rfs3_do_post_op_fh3)
1631 		resp->resok.obj.handle_follows = FALSE;
1632 	else {
1633 #endif
1634 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1635 	if (error)
1636 		resp->resok.obj.handle_follows = FALSE;
1637 	else
1638 		resp->resok.obj.handle_follows = TRUE;
1639 #ifdef DEBUG
1640 	}
1641 #endif
1642 
1643 #ifdef DEBUG
1644 	if (rfs3_do_post_op_attr) {
1645 		va.va_mask = AT_ALL;
1646 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1647 	} else
1648 		vap = NULL;
1649 #else
1650 	va.va_mask = AT_ALL;
1651 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1652 #endif
1653 
1654 	/*
1655 	 * Force modified data and metadata out to stable storage.
1656 	 */
1657 	(void) VOP_FSYNC(vp, 0, cr);
1658 
1659 	VN_RELE(vp);
1660 
1661 	resp->status = NFS3_OK;
1662 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1663 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1664 	return;
1665 
1666 out:
1667 	if (curthread->t_flag & T_WOULDBLOCK) {
1668 		curthread->t_flag &= ~T_WOULDBLOCK;
1669 		resp->status = NFS3ERR_JUKEBOX;
1670 	} else
1671 		resp->status = puterrno3(error);
1672 out1:
1673 	if (dvp != NULL)
1674 		VN_RELE(dvp);
1675 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1676 }
1677 
1678 void *
1679 rfs3_mkdir_getfh(MKDIR3args *args)
1680 {
1681 
1682 	return (&args->where.dir);
1683 }
1684 
1685 void
1686 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
1687 	struct svc_req *req, cred_t *cr)
1688 {
1689 	int error;
1690 	vnode_t *vp;
1691 	vnode_t *dvp;
1692 	struct vattr *vap;
1693 	struct vattr va;
1694 	struct vattr *dbvap;
1695 	struct vattr dbva;
1696 	struct vattr *davap;
1697 	struct vattr dava;
1698 
1699 	dbvap = NULL;
1700 	davap = NULL;
1701 
1702 	dvp = nfs3_fhtovp(&args->where.dir, exi);
1703 	if (dvp == NULL) {
1704 		error = ESTALE;
1705 		goto out;
1706 	}
1707 
1708 #ifdef DEBUG
1709 	if (rfs3_do_pre_op_attr) {
1710 		dbva.va_mask = AT_ALL;
1711 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1712 	} else
1713 		dbvap = NULL;
1714 #else
1715 	dbva.va_mask = AT_ALL;
1716 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1717 #endif
1718 	davap = dbvap;
1719 
1720 	if (args->where.name == nfs3nametoolong) {
1721 		resp->status = NFS3ERR_NAMETOOLONG;
1722 		goto out1;
1723 	}
1724 
1725 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1726 		resp->status = NFS3ERR_ACCES;
1727 		goto out1;
1728 	}
1729 
1730 	if (rdonly(exi, req)) {
1731 		resp->status = NFS3ERR_ROFS;
1732 		goto out1;
1733 	}
1734 
1735 	error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
1736 	if (error)
1737 		goto out;
1738 
1739 	if (!(va.va_mask & AT_MODE)) {
1740 		resp->status = NFS3ERR_INVAL;
1741 		goto out1;
1742 	}
1743 
1744 	if (args->symlink.symlink_data == nfs3nametoolong) {
1745 		resp->status = NFS3ERR_NAMETOOLONG;
1746 		goto out1;
1747 	}
1748 
1749 	va.va_mask |= AT_TYPE;
1750 	va.va_type = VLNK;
1751 
1752 	error = VOP_SYMLINK(dvp, args->where.name, &va,
1753 	    args->symlink.symlink_data, cr);
1754 
1755 #ifdef DEBUG
1756 	if (rfs3_do_post_op_attr) {
1757 		dava.va_mask = AT_ALL;
1758 		davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1759 	} else
1760 		davap = NULL;
1761 #else
1762 	dava.va_mask = AT_ALL;
1763 	davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1764 #endif
1765 
1766 	if (error)
1767 		goto out;
1768 
1769 	error = VOP_LOOKUP(dvp, args->where.name, &vp, NULL, 0, NULL, cr);
1770 
1771 	/*
1772 	 * Force modified data and metadata out to stable storage.
1773 	 */
1774 	(void) VOP_FSYNC(dvp, 0, cr);
1775 
1776 	VN_RELE(dvp);
1777 
1778 	resp->status = NFS3_OK;
1779 	if (error) {
1780 		resp->resok.obj.handle_follows = FALSE;
1781 		vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
1782 		vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1783 		return;
1784 	}
1785 
1786 #ifdef DEBUG
1787 	if (!rfs3_do_post_op_fh3)
1788 		resp->resok.obj.handle_follows = FALSE;
1789 	else {
1790 #endif
1791 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1792 	if (error)
1793 		resp->resok.obj.handle_follows = FALSE;
1794 	else
1795 		resp->resok.obj.handle_follows = TRUE;
1796 #ifdef DEBUG
1797 	}
1798 #endif
1799 
1800 #ifdef DEBUG
1801 	if (rfs3_do_post_op_attr) {
1802 		va.va_mask = AT_ALL;
1803 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1804 	} else
1805 		vap = NULL;
1806 #else
1807 	va.va_mask = AT_ALL;
1808 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1809 #endif
1810 
1811 	/*
1812 	 * Force modified data and metadata out to stable storage.
1813 	 */
1814 	(void) VOP_FSYNC(vp, 0, cr);
1815 
1816 	VN_RELE(vp);
1817 
1818 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1819 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1820 	return;
1821 
1822 out:
1823 	if (curthread->t_flag & T_WOULDBLOCK) {
1824 		curthread->t_flag &= ~T_WOULDBLOCK;
1825 		resp->status = NFS3ERR_JUKEBOX;
1826 	} else
1827 		resp->status = puterrno3(error);
1828 out1:
1829 	if (dvp != NULL)
1830 		VN_RELE(dvp);
1831 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1832 }
1833 
1834 void *
1835 rfs3_symlink_getfh(SYMLINK3args *args)
1836 {
1837 
1838 	return (&args->where.dir);
1839 }
1840 
1841 void
1842 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
1843 	struct svc_req *req, cred_t *cr)
1844 {
1845 	int error;
1846 	vnode_t *vp;
1847 	vnode_t *dvp;
1848 	struct vattr *vap;
1849 	struct vattr va;
1850 	struct vattr *dbvap;
1851 	struct vattr dbva;
1852 	struct vattr *davap;
1853 	struct vattr dava;
1854 	int mode;
1855 	enum vcexcl excl;
1856 
1857 	dbvap = NULL;
1858 	davap = NULL;
1859 
1860 	dvp = nfs3_fhtovp(&args->where.dir, exi);
1861 	if (dvp == NULL) {
1862 		error = ESTALE;
1863 		goto out;
1864 	}
1865 
1866 #ifdef DEBUG
1867 	if (rfs3_do_pre_op_attr) {
1868 		dbva.va_mask = AT_ALL;
1869 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1870 	} else
1871 		dbvap = NULL;
1872 #else
1873 	dbva.va_mask = AT_ALL;
1874 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1875 #endif
1876 	davap = dbvap;
1877 
1878 	if (args->where.name == nfs3nametoolong) {
1879 		resp->status = NFS3ERR_NAMETOOLONG;
1880 		goto out1;
1881 	}
1882 
1883 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1884 		resp->status = NFS3ERR_ACCES;
1885 		goto out1;
1886 	}
1887 
1888 	if (rdonly(exi, req)) {
1889 		resp->status = NFS3ERR_ROFS;
1890 		goto out1;
1891 	}
1892 
1893 	switch (args->what.type) {
1894 	case NF3CHR:
1895 	case NF3BLK:
1896 		error = sattr3_to_vattr(
1897 		    &args->what.mknoddata3_u.device.dev_attributes, &va);
1898 		if (error)
1899 			goto out;
1900 		if (secpolicy_sys_devices(cr) != 0) {
1901 			resp->status = NFS3ERR_PERM;
1902 			goto out1;
1903 		}
1904 		if (args->what.type == NF3CHR)
1905 			va.va_type = VCHR;
1906 		else
1907 			va.va_type = VBLK;
1908 		va.va_rdev = makedevice(
1909 		    args->what.mknoddata3_u.device.spec.specdata1,
1910 		    args->what.mknoddata3_u.device.spec.specdata2);
1911 		va.va_mask |= AT_TYPE | AT_RDEV;
1912 		break;
1913 	case NF3SOCK:
1914 		error = sattr3_to_vattr(
1915 		    &args->what.mknoddata3_u.pipe_attributes, &va);
1916 		if (error)
1917 			goto out;
1918 		va.va_type = VSOCK;
1919 		va.va_mask |= AT_TYPE;
1920 		break;
1921 	case NF3FIFO:
1922 		error = sattr3_to_vattr(
1923 		    &args->what.mknoddata3_u.pipe_attributes, &va);
1924 		if (error)
1925 			goto out;
1926 		va.va_type = VFIFO;
1927 		va.va_mask |= AT_TYPE;
1928 		break;
1929 	default:
1930 		resp->status = NFS3ERR_BADTYPE;
1931 		goto out1;
1932 	}
1933 
1934 	/*
1935 	 * Must specify the mode.
1936 	 */
1937 	if (!(va.va_mask & AT_MODE)) {
1938 		resp->status = NFS3ERR_INVAL;
1939 		goto out1;
1940 	}
1941 
1942 	excl = EXCL;
1943 
1944 	mode = 0;
1945 
1946 	error = VOP_CREATE(dvp, args->where.name, &va, excl, mode,
1947 	    &vp, cr, 0);
1948 
1949 #ifdef DEBUG
1950 	if (rfs3_do_post_op_attr) {
1951 		dava.va_mask = AT_ALL;
1952 		davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1953 	} else
1954 		davap = NULL;
1955 #else
1956 	dava.va_mask = AT_ALL;
1957 	davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1958 #endif
1959 
1960 	/*
1961 	 * Force modified data and metadata out to stable storage.
1962 	 */
1963 	(void) VOP_FSYNC(dvp, 0, cr);
1964 
1965 	if (error)
1966 		goto out;
1967 
1968 	VN_RELE(dvp);
1969 
1970 	resp->status = NFS3_OK;
1971 
1972 #ifdef DEBUG
1973 	if (!rfs3_do_post_op_fh3)
1974 		resp->resok.obj.handle_follows = FALSE;
1975 	else {
1976 #endif
1977 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1978 	if (error)
1979 		resp->resok.obj.handle_follows = FALSE;
1980 	else
1981 		resp->resok.obj.handle_follows = TRUE;
1982 #ifdef DEBUG
1983 	}
1984 #endif
1985 
1986 #ifdef DEBUG
1987 	if (rfs3_do_post_op_attr) {
1988 		va.va_mask = AT_ALL;
1989 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1990 	} else
1991 		vap = NULL;
1992 #else
1993 	va.va_mask = AT_ALL;
1994 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1995 #endif
1996 
1997 	/*
1998 	 * Force modified metadata out to stable storage.
1999 	 */
2000 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
2001 
2002 	VN_RELE(vp);
2003 
2004 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2005 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2006 	return;
2007 
2008 out:
2009 	if (curthread->t_flag & T_WOULDBLOCK) {
2010 		curthread->t_flag &= ~T_WOULDBLOCK;
2011 		resp->status = NFS3ERR_JUKEBOX;
2012 	} else
2013 		resp->status = puterrno3(error);
2014 out1:
2015 	if (dvp != NULL)
2016 		VN_RELE(dvp);
2017 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2018 }
2019 
2020 void *
2021 rfs3_mknod_getfh(MKNOD3args *args)
2022 {
2023 
2024 	return (&args->where.dir);
2025 }
2026 
2027 void
2028 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2029 	struct svc_req *req, cred_t *cr)
2030 {
2031 	int error = 0;
2032 	vnode_t *vp;
2033 	struct vattr *bvap;
2034 	struct vattr bva;
2035 	struct vattr *avap;
2036 	struct vattr ava;
2037 	vnode_t *targvp = NULL;
2038 
2039 	bvap = NULL;
2040 	avap = NULL;
2041 
2042 	vp = nfs3_fhtovp(&args->object.dir, exi);
2043 	if (vp == NULL) {
2044 		error = ESTALE;
2045 		goto out;
2046 	}
2047 
2048 #ifdef DEBUG
2049 	if (rfs3_do_pre_op_attr) {
2050 		bva.va_mask = AT_ALL;
2051 		bvap = VOP_GETATTR(vp, &bva, 0, cr) ? NULL : &bva;
2052 	} else
2053 		bvap = NULL;
2054 #else
2055 	bva.va_mask = AT_ALL;
2056 	bvap = VOP_GETATTR(vp, &bva, 0, cr) ? NULL : &bva;
2057 #endif
2058 	avap = bvap;
2059 
2060 	if (vp->v_type != VDIR) {
2061 		resp->status = NFS3ERR_NOTDIR;
2062 		goto out1;
2063 	}
2064 
2065 	if (args->object.name == nfs3nametoolong) {
2066 		resp->status = NFS3ERR_NAMETOOLONG;
2067 		goto out1;
2068 	}
2069 
2070 	if (args->object.name == NULL || *(args->object.name) == '\0') {
2071 		resp->status = NFS3ERR_ACCES;
2072 		goto out1;
2073 	}
2074 
2075 	if (rdonly(exi, req)) {
2076 		resp->status = NFS3ERR_ROFS;
2077 		goto out1;
2078 	}
2079 
2080 	/*
2081 	 * Check for a conflict with a non-blocking mandatory share
2082 	 * reservation and V4 delegations
2083 	 */
2084 	error = VOP_LOOKUP(vp, args->object.name, &targvp, NULL, 0,
2085 			NULL, cr);
2086 	if (error != 0)
2087 		goto out;
2088 
2089 	if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2090 		resp->status = NFS3ERR_JUKEBOX;
2091 		goto out1;
2092 	}
2093 
2094 	if (!nbl_need_check(targvp)) {
2095 		error = VOP_REMOVE(vp, args->object.name, cr);
2096 	} else {
2097 		nbl_start_crit(targvp, RW_READER);
2098 		if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0)) {
2099 			error = EACCES;
2100 		} else {
2101 			error = VOP_REMOVE(vp, args->object.name, cr);
2102 		}
2103 		nbl_end_crit(targvp);
2104 	}
2105 	VN_RELE(targvp);
2106 	targvp = NULL;
2107 
2108 #ifdef DEBUG
2109 	if (rfs3_do_post_op_attr) {
2110 		ava.va_mask = AT_ALL;
2111 		avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
2112 	} else
2113 		avap = NULL;
2114 #else
2115 	ava.va_mask = AT_ALL;
2116 	avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
2117 #endif
2118 
2119 	/*
2120 	 * Force modified data and metadata out to stable storage.
2121 	 */
2122 	(void) VOP_FSYNC(vp, 0, cr);
2123 
2124 	if (error)
2125 		goto out;
2126 
2127 	VN_RELE(vp);
2128 
2129 	resp->status = NFS3_OK;
2130 	vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2131 	return;
2132 
2133 out:
2134 	if (curthread->t_flag & T_WOULDBLOCK) {
2135 		curthread->t_flag &= ~T_WOULDBLOCK;
2136 		resp->status = NFS3ERR_JUKEBOX;
2137 	} else
2138 		resp->status = puterrno3(error);
2139 out1:
2140 	if (vp != NULL)
2141 		VN_RELE(vp);
2142 	vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2143 }
2144 
2145 void *
2146 rfs3_remove_getfh(REMOVE3args *args)
2147 {
2148 
2149 	return (&args->object.dir);
2150 }
2151 
2152 void
2153 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2154 	struct svc_req *req, cred_t *cr)
2155 {
2156 	int error;
2157 	vnode_t *vp;
2158 	struct vattr *bvap;
2159 	struct vattr bva;
2160 	struct vattr *avap;
2161 	struct vattr ava;
2162 
2163 	bvap = NULL;
2164 	avap = NULL;
2165 
2166 	vp = nfs3_fhtovp(&args->object.dir, exi);
2167 	if (vp == NULL) {
2168 		error = ESTALE;
2169 		goto out;
2170 	}
2171 
2172 #ifdef DEBUG
2173 	if (rfs3_do_pre_op_attr) {
2174 		bva.va_mask = AT_ALL;
2175 		bvap = VOP_GETATTR(vp, &bva, 0, cr) ? NULL : &bva;
2176 	} else
2177 		bvap = NULL;
2178 #else
2179 	bva.va_mask = AT_ALL;
2180 	bvap = VOP_GETATTR(vp, &bva, 0, cr) ? NULL : &bva;
2181 #endif
2182 	avap = bvap;
2183 
2184 	if (vp->v_type != VDIR) {
2185 		resp->status = NFS3ERR_NOTDIR;
2186 		goto out1;
2187 	}
2188 
2189 	if (args->object.name == nfs3nametoolong) {
2190 		resp->status = NFS3ERR_NAMETOOLONG;
2191 		goto out1;
2192 	}
2193 
2194 	if (args->object.name == NULL || *(args->object.name) == '\0') {
2195 		resp->status = NFS3ERR_ACCES;
2196 		goto out1;
2197 	}
2198 
2199 	if (rdonly(exi, req)) {
2200 		resp->status = NFS3ERR_ROFS;
2201 		goto out1;
2202 	}
2203 
2204 	error = VOP_RMDIR(vp, args->object.name, rootdir, cr);
2205 
2206 #ifdef DEBUG
2207 	if (rfs3_do_post_op_attr) {
2208 		ava.va_mask = AT_ALL;
2209 		avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
2210 	} else
2211 		avap = NULL;
2212 #else
2213 	ava.va_mask = AT_ALL;
2214 	avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
2215 #endif
2216 
2217 	/*
2218 	 * Force modified data and metadata out to stable storage.
2219 	 */
2220 	(void) VOP_FSYNC(vp, 0, cr);
2221 
2222 	if (error) {
2223 		/*
2224 		 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2225 		 * if the directory is not empty.  A System V NFS server
2226 		 * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2227 		 * over the wire.
2228 		 */
2229 		if (error == EEXIST)
2230 			error = ENOTEMPTY;
2231 		goto out;
2232 	}
2233 
2234 	VN_RELE(vp);
2235 
2236 	resp->status = NFS3_OK;
2237 	vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2238 	return;
2239 
2240 out:
2241 	if (curthread->t_flag & T_WOULDBLOCK) {
2242 		curthread->t_flag &= ~T_WOULDBLOCK;
2243 		resp->status = NFS3ERR_JUKEBOX;
2244 	} else
2245 		resp->status = puterrno3(error);
2246 out1:
2247 	if (vp != NULL)
2248 		VN_RELE(vp);
2249 	vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2250 }
2251 
2252 void *
2253 rfs3_rmdir_getfh(RMDIR3args *args)
2254 {
2255 
2256 	return (&args->object.dir);
2257 }
2258 
2259 void
2260 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2261 	struct svc_req *req, cred_t *cr)
2262 {
2263 	int error = 0;
2264 	vnode_t *fvp;
2265 	vnode_t *tvp;
2266 	vnode_t *targvp;
2267 	struct vattr *fbvap;
2268 	struct vattr fbva;
2269 	struct vattr *favap;
2270 	struct vattr fava;
2271 	struct vattr *tbvap;
2272 	struct vattr tbva;
2273 	struct vattr *tavap;
2274 	struct vattr tava;
2275 	nfs_fh3 *fh3;
2276 	struct exportinfo *to_exi;
2277 	vnode_t *srcvp = NULL;
2278 
2279 	fbvap = NULL;
2280 	favap = NULL;
2281 	tbvap = NULL;
2282 	tavap = NULL;
2283 	tvp = NULL;
2284 
2285 	fvp = nfs3_fhtovp(&args->from.dir, exi);
2286 	if (fvp == NULL) {
2287 		error = ESTALE;
2288 		goto out;
2289 	}
2290 
2291 #ifdef DEBUG
2292 	if (rfs3_do_pre_op_attr) {
2293 		fbva.va_mask = AT_ALL;
2294 		fbvap = VOP_GETATTR(fvp, &fbva, 0, cr) ? NULL : &fbva;
2295 	} else
2296 		fbvap = NULL;
2297 #else
2298 	fbva.va_mask = AT_ALL;
2299 	fbvap = VOP_GETATTR(fvp, &fbva, 0, cr) ? NULL : &fbva;
2300 #endif
2301 	favap = fbvap;
2302 
2303 	fh3 = &args->to.dir;
2304 	to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2305 	if (to_exi == NULL) {
2306 		resp->status = NFS3ERR_ACCES;
2307 		goto out1;
2308 	}
2309 	exi_rele(to_exi);
2310 
2311 	if (to_exi != exi) {
2312 		resp->status = NFS3ERR_XDEV;
2313 		goto out1;
2314 	}
2315 
2316 	tvp = nfs3_fhtovp(&args->to.dir, exi);
2317 	if (tvp == NULL) {
2318 		error = ESTALE;
2319 		goto out;
2320 	}
2321 
2322 #ifdef DEBUG
2323 	if (rfs3_do_pre_op_attr) {
2324 		tbva.va_mask = AT_ALL;
2325 		tbvap = VOP_GETATTR(tvp, &tbva, 0, cr) ? NULL : &tbva;
2326 	} else
2327 		tbvap = NULL;
2328 #else
2329 	tbva.va_mask = AT_ALL;
2330 	tbvap = VOP_GETATTR(tvp, &tbva, 0, cr) ? NULL : &tbva;
2331 #endif
2332 	tavap = tbvap;
2333 
2334 	if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2335 		resp->status = NFS3ERR_NOTDIR;
2336 		goto out1;
2337 	}
2338 
2339 	if (args->from.name == nfs3nametoolong ||
2340 	    args->to.name == nfs3nametoolong) {
2341 		resp->status = NFS3ERR_NAMETOOLONG;
2342 		goto out1;
2343 	}
2344 	if (args->from.name == NULL || *(args->from.name) == '\0' ||
2345 	    args->to.name == NULL || *(args->to.name) == '\0') {
2346 		resp->status = NFS3ERR_ACCES;
2347 		goto out1;
2348 	}
2349 
2350 	if (rdonly(exi, req)) {
2351 		resp->status = NFS3ERR_ROFS;
2352 		goto out1;
2353 	}
2354 
2355 	/*
2356 	 * Check for a conflict with a non-blocking mandatory share
2357 	 * reservation or V4 delegations.
2358 	 */
2359 	error = VOP_LOOKUP(fvp, args->from.name, &srcvp, NULL, 0,
2360 			NULL, cr);
2361 	if (error != 0)
2362 		goto out;
2363 
2364 	/*
2365 	 * If we rename a delegated file we should recall the
2366 	 * delegation, since future opens should fail or would
2367 	 * refer to a new file.
2368 	 */
2369 	if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2370 		resp->status = NFS3ERR_JUKEBOX;
2371 		goto out1;
2372 	}
2373 
2374 	/*
2375 	 * Check for renaming over a delegated file.  Check rfs4_deleg_policy
2376 	 * first to avoid VOP_LOOKUP if possible.
2377 	 */
2378 	if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2379 	    VOP_LOOKUP(tvp, args->to.name, &targvp, NULL, 0, NULL, cr) == 0) {
2380 
2381 		if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2382 			VN_RELE(targvp);
2383 			resp->status = NFS3ERR_JUKEBOX;
2384 			goto out1;
2385 		}
2386 		VN_RELE(targvp);
2387 	}
2388 
2389 	if (!nbl_need_check(srcvp)) {
2390 		error = VOP_RENAME(fvp, args->from.name, tvp,
2391 				    args->to.name, cr);
2392 	} else {
2393 		nbl_start_crit(srcvp, RW_READER);
2394 		if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0)) {
2395 			error = EACCES;
2396 		} else {
2397 			error = VOP_RENAME(fvp, args->from.name, tvp,
2398 				    args->to.name, cr);
2399 		}
2400 		nbl_end_crit(srcvp);
2401 	}
2402 	if (error == 0) {
2403 		char *tmp;
2404 
2405 		/* fix the path name for the renamed file */
2406 		mutex_enter(&srcvp->v_lock);
2407 		tmp = srcvp->v_path;
2408 		srcvp->v_path = NULL;
2409 		mutex_exit(&srcvp->v_lock);
2410 		vn_setpath(rootdir, tvp, srcvp, args->to.name,
2411 				strlen(args->to.name));
2412 		if (tmp != NULL)
2413 			kmem_free(tmp, strlen(tmp) + 1);
2414 	}
2415 	VN_RELE(srcvp);
2416 	srcvp = NULL;
2417 
2418 #ifdef DEBUG
2419 	if (rfs3_do_post_op_attr) {
2420 		fava.va_mask = AT_ALL;
2421 		favap = VOP_GETATTR(fvp, &fava, 0, cr) ? NULL : &fava;
2422 		tava.va_mask = AT_ALL;
2423 		tavap = VOP_GETATTR(tvp, &tava, 0, cr) ? NULL : &tava;
2424 	} else {
2425 		favap = NULL;
2426 		tavap = NULL;
2427 	}
2428 #else
2429 	fava.va_mask = AT_ALL;
2430 	favap = VOP_GETATTR(fvp, &fava, 0, cr) ? NULL : &fava;
2431 	tava.va_mask = AT_ALL;
2432 	tavap = VOP_GETATTR(tvp, &tava, 0, cr) ? NULL : &tava;
2433 #endif
2434 
2435 	/*
2436 	 * Force modified data and metadata out to stable storage.
2437 	 */
2438 	(void) VOP_FSYNC(fvp, 0, cr);
2439 	(void) VOP_FSYNC(tvp, 0, cr);
2440 
2441 	if (error)
2442 		goto out;
2443 
2444 	VN_RELE(tvp);
2445 	VN_RELE(fvp);
2446 
2447 	resp->status = NFS3_OK;
2448 	vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2449 	vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2450 	return;
2451 
2452 out:
2453 	if (curthread->t_flag & T_WOULDBLOCK) {
2454 		curthread->t_flag &= ~T_WOULDBLOCK;
2455 		resp->status = NFS3ERR_JUKEBOX;
2456 	} else
2457 		resp->status = puterrno3(error);
2458 out1:
2459 	if (fvp != NULL)
2460 		VN_RELE(fvp);
2461 	if (tvp != NULL)
2462 		VN_RELE(tvp);
2463 	vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2464 	vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2465 }
2466 
2467 void *
2468 rfs3_rename_getfh(RENAME3args *args)
2469 {
2470 
2471 	return (&args->from.dir);
2472 }
2473 
2474 void
2475 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2476 	struct svc_req *req, cred_t *cr)
2477 {
2478 	int error;
2479 	vnode_t *vp;
2480 	vnode_t *dvp;
2481 	struct vattr *vap;
2482 	struct vattr va;
2483 	struct vattr *bvap;
2484 	struct vattr bva;
2485 	struct vattr *avap;
2486 	struct vattr ava;
2487 	nfs_fh3	*fh3;
2488 	struct exportinfo *to_exi;
2489 
2490 	vap = NULL;
2491 	bvap = NULL;
2492 	avap = NULL;
2493 	dvp = NULL;
2494 
2495 	vp = nfs3_fhtovp(&args->file, exi);
2496 	if (vp == NULL) {
2497 		error = ESTALE;
2498 		goto out;
2499 	}
2500 
2501 #ifdef DEBUG
2502 	if (rfs3_do_pre_op_attr) {
2503 		va.va_mask = AT_ALL;
2504 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2505 	} else
2506 		vap = NULL;
2507 #else
2508 	va.va_mask = AT_ALL;
2509 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2510 #endif
2511 
2512 	fh3 = &args->link.dir;
2513 	to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2514 	if (to_exi == NULL) {
2515 		resp->status = NFS3ERR_ACCES;
2516 		goto out1;
2517 	}
2518 	exi_rele(to_exi);
2519 
2520 	if (to_exi != exi) {
2521 		resp->status = NFS3ERR_XDEV;
2522 		goto out1;
2523 	}
2524 
2525 	dvp = nfs3_fhtovp(&args->link.dir, exi);
2526 	if (dvp == NULL) {
2527 		error = ESTALE;
2528 		goto out;
2529 	}
2530 
2531 #ifdef DEBUG
2532 	if (rfs3_do_pre_op_attr) {
2533 		bva.va_mask = AT_ALL;
2534 		bvap = VOP_GETATTR(dvp, &bva, 0, cr) ? NULL : &bva;
2535 	} else
2536 		bvap = NULL;
2537 #else
2538 	bva.va_mask = AT_ALL;
2539 	bvap = VOP_GETATTR(dvp, &bva, 0, cr) ? NULL : &bva;
2540 #endif
2541 
2542 	if (dvp->v_type != VDIR) {
2543 		resp->status = NFS3ERR_NOTDIR;
2544 		goto out1;
2545 	}
2546 
2547 	if (args->link.name == nfs3nametoolong) {
2548 		resp->status = NFS3ERR_NAMETOOLONG;
2549 		goto out1;
2550 	}
2551 
2552 	if (args->link.name == NULL || *(args->link.name) == '\0') {
2553 		resp->status = NFS3ERR_ACCES;
2554 		goto out1;
2555 	}
2556 
2557 	if (rdonly(exi, req)) {
2558 		resp->status = NFS3ERR_ROFS;
2559 		goto out1;
2560 	}
2561 
2562 	error = VOP_LINK(dvp, vp, args->link.name, cr);
2563 
2564 #ifdef DEBUG
2565 	if (rfs3_do_post_op_attr) {
2566 		va.va_mask = AT_ALL;
2567 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2568 		ava.va_mask = AT_ALL;
2569 		avap = VOP_GETATTR(dvp, &ava, 0, cr) ? NULL : &ava;
2570 	} else {
2571 		vap = NULL;
2572 		avap = NULL;
2573 	}
2574 #else
2575 	va.va_mask = AT_ALL;
2576 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2577 	ava.va_mask = AT_ALL;
2578 	avap = VOP_GETATTR(dvp, &ava, 0, cr) ? NULL : &ava;
2579 #endif
2580 
2581 	/*
2582 	 * Force modified data and metadata out to stable storage.
2583 	 */
2584 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
2585 	(void) VOP_FSYNC(dvp, 0, cr);
2586 
2587 	if (error)
2588 		goto out;
2589 
2590 	VN_RELE(dvp);
2591 	VN_RELE(vp);
2592 
2593 	resp->status = NFS3_OK;
2594 	vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
2595 	vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
2596 	return;
2597 
2598 out:
2599 	if (curthread->t_flag & T_WOULDBLOCK) {
2600 		curthread->t_flag &= ~T_WOULDBLOCK;
2601 		resp->status = NFS3ERR_JUKEBOX;
2602 	} else
2603 		resp->status = puterrno3(error);
2604 out1:
2605 	if (vp != NULL)
2606 		VN_RELE(vp);
2607 	if (dvp != NULL)
2608 		VN_RELE(dvp);
2609 	vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
2610 	vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
2611 }
2612 
2613 void *
2614 rfs3_link_getfh(LINK3args *args)
2615 {
2616 
2617 	return (&args->file);
2618 }
2619 
2620 /*
2621  * This macro defines the size of a response which contains attribute
2622  * information and one directory entry (whose length is specified by
2623  * the macro parameter).  If the incoming request is larger than this,
2624  * then we are guaranteed to be able to return at one directory entry
2625  * if one exists.  Therefore, we do not need to check for
2626  * NFS3ERR_TOOSMALL if the requested size is larger then this.  If it
2627  * is not, then we need to check to make sure that this error does not
2628  * need to be returned.
2629  *
2630  * NFS3_READDIR_MIN_COUNT is comprised of following :
2631  *
2632  * status - 1 * BYTES_PER_XDR_UNIT
2633  * attr. flag - 1 * BYTES_PER_XDR_UNIT
2634  * cookie verifier - 2 * BYTES_PER_XDR_UNIT
2635  * attributes  - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
2636  * boolean - 1 * BYTES_PER_XDR_UNIT
2637  * file id - 2 * BYTES_PER_XDR_UNIT
2638  * direcotory name length - 1 * BYTES_PER_XDR_UNIT
2639  * cookie - 2 * BYTES_PER_XDR_UNIT
2640  * end of list - 1 * BYTES_PER_XDR_UNIT
2641  * end of file - 1 * BYTES_PER_XDR_UNIT
2642  * Name length of directory to the nearest byte
2643  */
2644 
2645 #define	NFS3_READDIR_MIN_COUNT(length)	\
2646 	((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
2647 		BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
2648 
2649 /* ARGSUSED */
2650 void
2651 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
2652 	struct svc_req *req, cred_t *cr)
2653 {
2654 	int error;
2655 	vnode_t *vp;
2656 	struct vattr *vap;
2657 	struct vattr va;
2658 	struct iovec iov;
2659 	struct uio uio;
2660 	char *data;
2661 	int iseof;
2662 	int bufsize;
2663 	int namlen;
2664 	uint_t count;
2665 
2666 	vap = NULL;
2667 
2668 	vp = nfs3_fhtovp(&args->dir, exi);
2669 	if (vp == NULL) {
2670 		error = ESTALE;
2671 		goto out;
2672 	}
2673 
2674 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2675 
2676 #ifdef DEBUG
2677 	if (rfs3_do_pre_op_attr) {
2678 		va.va_mask = AT_ALL;
2679 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2680 	} else
2681 		vap = NULL;
2682 #else
2683 	va.va_mask = AT_ALL;
2684 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2685 #endif
2686 
2687 	if (vp->v_type != VDIR) {
2688 		resp->status = NFS3ERR_NOTDIR;
2689 		goto out1;
2690 	}
2691 
2692 	error = VOP_ACCESS(vp, VREAD, 0, cr);
2693 	if (error)
2694 		goto out;
2695 
2696 	/*
2697 	 * Now don't allow arbitrary count to alloc;
2698 	 * allow the maximum not to exceed rfs3_tsize()
2699 	 */
2700 	if (args->count > rfs3_tsize(req))
2701 		args->count = rfs3_tsize(req);
2702 
2703 	/*
2704 	 * Make sure that there is room to read at least one entry
2705 	 * if any are available.
2706 	 */
2707 	if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
2708 		count = DIRENT64_RECLEN(MAXNAMELEN);
2709 	else
2710 		count = args->count;
2711 
2712 	data = kmem_alloc(count, KM_SLEEP);
2713 
2714 	iov.iov_base = data;
2715 	iov.iov_len = count;
2716 	uio.uio_iov = &iov;
2717 	uio.uio_iovcnt = 1;
2718 	uio.uio_segflg = UIO_SYSSPACE;
2719 	uio.uio_extflg = UIO_COPY_CACHED;
2720 	uio.uio_loffset = (offset_t)args->cookie;
2721 	uio.uio_resid = count;
2722 
2723 	error = VOP_READDIR(vp, &uio, cr, &iseof);
2724 
2725 #ifdef DEBUG
2726 	if (rfs3_do_post_op_attr) {
2727 		va.va_mask = AT_ALL;
2728 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2729 	} else
2730 		vap = NULL;
2731 #else
2732 	va.va_mask = AT_ALL;
2733 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2734 #endif
2735 
2736 	if (error) {
2737 		kmem_free(data, count);
2738 		goto out;
2739 	}
2740 
2741 	/*
2742 	 * If the count was not large enough to be able to guarantee
2743 	 * to be able to return at least one entry, then need to
2744 	 * check to see if NFS3ERR_TOOSMALL should be returned.
2745 	 */
2746 	if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
2747 		/*
2748 		 * bufsize is used to keep track of the size of the response.
2749 		 * It is primed with:
2750 		 *	1 for the status +
2751 		 *	1 for the dir_attributes.attributes boolean +
2752 		 *	2 for the cookie verifier
2753 		 * all times BYTES_PER_XDR_UNIT to convert from XDR units
2754 		 * to bytes.  If there are directory attributes to be
2755 		 * returned, then:
2756 		 *	NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
2757 		 * time BYTES_PER_XDR_UNIT is added to account for them.
2758 		 */
2759 		bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
2760 		if (vap != NULL)
2761 			bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
2762 		/*
2763 		 * An entry is composed of:
2764 		 *	1 for the true/false list indicator +
2765 		 *	2 for the fileid +
2766 		 *	1 for the length of the name +
2767 		 *	2 for the cookie +
2768 		 * all times BYTES_PER_XDR_UNIT to convert from
2769 		 * XDR units to bytes, plus the length of the name
2770 		 * rounded up to the nearest BYTES_PER_XDR_UNIT.
2771 		 */
2772 		if (count != uio.uio_resid) {
2773 			namlen = strlen(((struct dirent64 *)data)->d_name);
2774 			bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
2775 				    roundup(namlen, BYTES_PER_XDR_UNIT);
2776 		}
2777 		/*
2778 		 * We need to check to see if the number of bytes left
2779 		 * to go into the buffer will actually fit into the
2780 		 * buffer.  This is calculated as the size of this
2781 		 * entry plus:
2782 		 *	1 for the true/false list indicator +
2783 		 *	1 for the eof indicator
2784 		 * times BYTES_PER_XDR_UNIT to convert from from
2785 		 * XDR units to bytes.
2786 		 */
2787 		bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
2788 		if (bufsize > args->count) {
2789 			kmem_free(data, count);
2790 			resp->status = NFS3ERR_TOOSMALL;
2791 			goto out1;
2792 		}
2793 	}
2794 
2795 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2796 
2797 #if 0 /* notyet */
2798 	/*
2799 	 * Don't do this.  It causes local disk writes when just
2800 	 * reading the file and the overhead is deemed larger
2801 	 * than the benefit.
2802 	 */
2803 	/*
2804 	 * Force modified metadata out to stable storage.
2805 	 */
2806 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
2807 #endif
2808 
2809 	VN_RELE(vp);
2810 
2811 	resp->status = NFS3_OK;
2812 	vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
2813 	resp->resok.cookieverf = 0;
2814 	resp->resok.reply.entries = (entry3 *)data;
2815 	resp->resok.reply.eof = iseof;
2816 	resp->resok.size = count - uio.uio_resid;
2817 	resp->resok.count = args->count;
2818 	resp->resok.freecount = count;
2819 	return;
2820 
2821 out:
2822 	if (curthread->t_flag & T_WOULDBLOCK) {
2823 		curthread->t_flag &= ~T_WOULDBLOCK;
2824 		resp->status = NFS3ERR_JUKEBOX;
2825 	} else
2826 		resp->status = puterrno3(error);
2827 out1:
2828 	if (vp != NULL) {
2829 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2830 		VN_RELE(vp);
2831 	}
2832 	vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
2833 }
2834 
2835 void *
2836 rfs3_readdir_getfh(READDIR3args *args)
2837 {
2838 
2839 	return (&args->dir);
2840 }
2841 
2842 void
2843 rfs3_readdir_free(READDIR3res *resp)
2844 {
2845 
2846 	if (resp->status == NFS3_OK)
2847 		kmem_free(resp->resok.reply.entries, resp->resok.freecount);
2848 }
2849 
2850 #ifdef nextdp
2851 #undef nextdp
2852 #endif
2853 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
2854 
2855 /*
2856  * This macro computes the size of a response which contains
2857  * one directory entry including the attributes as well as file handle.
2858  * If the incoming request is larger than this, then we are guaranteed to be
2859  * able to return at least one more directory entry if one exists.
2860  *
2861  * NFS3_READDIRPLUS_ENTRY is made up of the following:
2862  *
2863  * boolean - 1 * BYTES_PER_XDR_UNIT
2864  * file id - 2 * BYTES_PER_XDR_UNIT
2865  * directory name length - 1 * BYTES_PER_XDR_UNIT
2866  * cookie - 2 * BYTES_PER_XDR_UNIT
2867  * attribute flag - 1 * BYTES_PER_XDR_UNIT
2868  * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
2869  * status byte for file handle - 1 *  BYTES_PER_XDR_UNIT
2870  * length of a file handle - 1 * BYTES_PER_XDR_UNIT
2871  * Maxmum length of a file handle (NFS3_MAXFHSIZE)
2872  * name length of the entry to the nearest bytes
2873  */
2874 #define	NFS3_READDIRPLUS_ENTRY(namelen)	\
2875 	((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
2876 		BYTES_PER_XDR_UNIT + \
2877 	NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
2878 
2879 static int rfs3_readdir_unit = MAXBSIZE;
2880 
2881 /* ARGSUSED */
2882 void
2883 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
2884 	struct exportinfo *exi, struct svc_req *req, cred_t *cr)
2885 {
2886 	int error;
2887 	vnode_t *vp;
2888 	struct vattr *vap;
2889 	struct vattr va;
2890 	struct iovec iov;
2891 	struct uio uio;
2892 	char *data;
2893 	int iseof;
2894 	struct dirent64 *dp;
2895 	vnode_t *nvp;
2896 	struct vattr *nvap;
2897 	struct vattr nva;
2898 	entryplus3_info *infop = NULL;
2899 	int size = 0;
2900 	int nents = 0;
2901 	int bufsize = 0;
2902 	int entrysize = 0;
2903 	int tofit = 0;
2904 	int rd_unit = rfs3_readdir_unit;
2905 	int prev_len;
2906 	int space_left;
2907 	int i;
2908 	uint_t *namlen = NULL;
2909 
2910 	vap = NULL;
2911 
2912 	vp = nfs3_fhtovp(&args->dir, exi);
2913 	if (vp == NULL) {
2914 		error = ESTALE;
2915 		goto out;
2916 	}
2917 
2918 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2919 
2920 #ifdef DEBUG
2921 	if (rfs3_do_pre_op_attr) {
2922 		va.va_mask = AT_ALL;
2923 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2924 	} else
2925 		vap = NULL;
2926 #else
2927 	va.va_mask = AT_ALL;
2928 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2929 #endif
2930 
2931 	if (vp->v_type != VDIR) {
2932 		error = ENOTDIR;
2933 		goto out;
2934 	}
2935 
2936 	error = VOP_ACCESS(vp, VREAD, 0, cr);
2937 	if (error)
2938 		goto out;
2939 
2940 	/*
2941 	 * Don't allow arbitrary counts for allocation
2942 	 */
2943 	if (args->maxcount > rfs3_tsize(req))
2944 		args->maxcount = rfs3_tsize(req);
2945 
2946 	/*
2947 	 * Make sure that there is room to read at least one entry
2948 	 * if any are available
2949 	 */
2950 	args->dircount = MIN(args->dircount, args->maxcount);
2951 
2952 	if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
2953 		args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
2954 
2955 	/*
2956 	 * This allocation relies on a minimum directory entry
2957 	 * being roughly 24 bytes.  Therefore, the namlen array
2958 	 * will have enough space based on the maximum number of
2959 	 * entries to read.
2960 	 */
2961 	namlen = kmem_alloc(args->dircount, KM_SLEEP);
2962 
2963 	space_left = args->dircount;
2964 	data = kmem_alloc(args->dircount, KM_SLEEP);
2965 	dp = (struct dirent64 *)data;
2966 	uio.uio_iov = &iov;
2967 	uio.uio_iovcnt = 1;
2968 	uio.uio_segflg = UIO_SYSSPACE;
2969 	uio.uio_extflg = UIO_COPY_CACHED;
2970 	uio.uio_loffset = (offset_t)args->cookie;
2971 
2972 	/*
2973 	 * bufsize is used to keep track of the size of the response as we
2974 	 * get post op attributes and filehandles for each entry.  This is
2975 	 * an optimization as the server may have read more entries than will
2976 	 * fit in the buffer specified by maxcount.  We stop calculating
2977 	 * post op attributes and filehandles once we have exceeded maxcount.
2978 	 * This will minimize the effect of truncation.
2979 	 *
2980 	 * It is primed with:
2981 	 *	1 for the status +
2982 	 *	1 for the dir_attributes.attributes boolean +
2983 	 *	2 for the cookie verifier
2984 	 * all times BYTES_PER_XDR_UNIT to convert from XDR units
2985 	 * to bytes.  If there are directory attributes to be
2986 	 * returned, then:
2987 	 *	NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
2988 	 * time BYTES_PER_XDR_UNIT is added to account for them.
2989 	 */
2990 	bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
2991 	if (vap != NULL)
2992 		bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
2993 
2994 getmoredents:
2995 	/*
2996 	 * Here we make a check so that our read unit is not larger than
2997 	 * the space left in the buffer.
2998 	 */
2999 	rd_unit = MIN(rd_unit, space_left);
3000 	iov.iov_base = (char *)dp;
3001 	iov.iov_len = rd_unit;
3002 	uio.uio_resid = rd_unit;
3003 	prev_len = rd_unit;
3004 
3005 	error = VOP_READDIR(vp, &uio, cr, &iseof);
3006 
3007 	if (error) {
3008 		kmem_free(data, args->dircount);
3009 		goto out;
3010 	}
3011 
3012 	if (uio.uio_resid == prev_len && !iseof) {
3013 		if (nents == 0) {
3014 			kmem_free(data, args->dircount);
3015 			resp->status = NFS3ERR_TOOSMALL;
3016 			goto out1;
3017 		}
3018 
3019 		/*
3020 		 * We could not get any more entries, so get the attributes
3021 		 * and filehandle for the entries already obtained.
3022 		 */
3023 		goto good;
3024 	}
3025 
3026 	/*
3027 	 * We estimate the size of the response by assuming the
3028 	 * entry exists and attributes and filehandle are also valid
3029 	 */
3030 	for (size = prev_len - uio.uio_resid;
3031 		size > 0;
3032 		size -= dp->d_reclen, dp = nextdp(dp)) {
3033 
3034 		if (dp->d_ino == 0) {
3035 			nents++;
3036 			continue;
3037 		}
3038 
3039 		namlen[nents] = strlen(dp->d_name);
3040 		entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3041 
3042 		/*
3043 		 * We need to check to see if the number of bytes left
3044 		 * to go into the buffer will actually fit into the
3045 		 * buffer.  This is calculated as the size of this
3046 		 * entry plus:
3047 		 *	1 for the true/false list indicator +
3048 		 *	1 for the eof indicator
3049 		 * times BYTES_PER_XDR_UNIT to convert from XDR units
3050 		 * to bytes.
3051 		 *
3052 		 * Also check the dircount limit against the first entry read
3053 		 *
3054 		 */
3055 		tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3056 		if (bufsize + tofit > args->maxcount) {
3057 			/*
3058 			 * We make a check here to see if this was the
3059 			 * first entry being measured.  If so, then maxcount
3060 			 * was too small to begin with and so we need to
3061 			 * return with NFS3ERR_TOOSMALL.
3062 			 */
3063 			if (nents == 0) {
3064 				kmem_free(data, args->dircount);
3065 				resp->status = NFS3ERR_TOOSMALL;
3066 				goto out1;
3067 			}
3068 			iseof = FALSE;
3069 			goto good;
3070 		}
3071 		bufsize += entrysize;
3072 		nents++;
3073 	}
3074 
3075 	/*
3076 	 * If there is enough room to fit at least 1 more entry including
3077 	 * post op attributes and filehandle in the buffer AND that we haven't
3078 	 * exceeded dircount then go back and get some more.
3079 	 */
3080 	if (!iseof &&
3081 	    (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3082 		space_left -= (prev_len - uio.uio_resid);
3083 		if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3084 			goto getmoredents;
3085 
3086 		/* else, fall through */
3087 	}
3088 
3089 good:
3090 
3091 #ifdef DEBUG
3092 	if (rfs3_do_post_op_attr) {
3093 		va.va_mask = AT_ALL;
3094 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3095 	} else
3096 		vap = NULL;
3097 #else
3098 	va.va_mask = AT_ALL;
3099 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3100 #endif
3101 
3102 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3103 
3104 	infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3105 	resp->resok.infop = infop;
3106 
3107 	dp = (struct dirent64 *)data;
3108 	for (i = 0; i < nents; i++) {
3109 
3110 		if (dp->d_ino == 0) {
3111 			infop[i].attr.attributes = FALSE;
3112 			infop[i].fh.handle_follows = FALSE;
3113 			dp = nextdp(dp);
3114 			continue;
3115 		}
3116 
3117 		infop[i].namelen = namlen[i];
3118 
3119 		error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr);
3120 		if (error) {
3121 			infop[i].attr.attributes = FALSE;
3122 			infop[i].fh.handle_follows = FALSE;
3123 			dp = nextdp(dp);
3124 			continue;
3125 		}
3126 
3127 #ifdef DEBUG
3128 		if (rfs3_do_post_op_attr) {
3129 			nva.va_mask = AT_ALL;
3130 			nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ?
3131 				NULL : &nva;
3132 		} else
3133 			nvap = NULL;
3134 #else
3135 		nva.va_mask = AT_ALL;
3136 		nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3137 #endif
3138 		vattr_to_post_op_attr(nvap, &infop[i].attr);
3139 
3140 #ifdef DEBUG
3141 		if (!rfs3_do_post_op_fh3)
3142 			infop[i].fh.handle_follows = FALSE;
3143 		else {
3144 #endif
3145 		error = makefh3(&infop[i].fh.handle, nvp, exi);
3146 		if (!error)
3147 			infop[i].fh.handle_follows = TRUE;
3148 		else
3149 			infop[i].fh.handle_follows = FALSE;
3150 #ifdef DEBUG
3151 		}
3152 #endif
3153 
3154 		VN_RELE(nvp);
3155 		dp = nextdp(dp);
3156 	}
3157 
3158 #if 0 /* notyet */
3159 	/*
3160 	 * Don't do this.  It causes local disk writes when just
3161 	 * reading the file and the overhead is deemed larger
3162 	 * than the benefit.
3163 	 */
3164 	/*
3165 	 * Force modified metadata out to stable storage.
3166 	 */
3167 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
3168 #endif
3169 
3170 	VN_RELE(vp);
3171 
3172 	kmem_free(namlen, args->dircount);
3173 
3174 	resp->status = NFS3_OK;
3175 	vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3176 	resp->resok.cookieverf = 0;
3177 	resp->resok.reply.entries = (entryplus3 *)data;
3178 	resp->resok.reply.eof = iseof;
3179 	resp->resok.size = nents;
3180 	resp->resok.count = args->dircount;
3181 	resp->resok.maxcount = args->maxcount;
3182 	return;
3183 
3184 out:
3185 	if (curthread->t_flag & T_WOULDBLOCK) {
3186 		curthread->t_flag &= ~T_WOULDBLOCK;
3187 		resp->status = NFS3ERR_JUKEBOX;
3188 	} else
3189 		resp->status = puterrno3(error);
3190 out1:
3191 	if (vp != NULL) {
3192 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3193 		VN_RELE(vp);
3194 	}
3195 
3196 	if (namlen != NULL)
3197 		kmem_free(namlen, args->dircount);
3198 
3199 	vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3200 }
3201 
3202 void *
3203 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3204 {
3205 
3206 	return (&args->dir);
3207 }
3208 
3209 void
3210 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3211 {
3212 
3213 	if (resp->status == NFS3_OK) {
3214 		kmem_free(resp->resok.reply.entries, resp->resok.count);
3215 		kmem_free(resp->resok.infop,
3216 			resp->resok.size * sizeof (struct entryplus3_info));
3217 	}
3218 }
3219 
3220 /* ARGSUSED */
3221 void
3222 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3223 	struct svc_req *req, cred_t *cr)
3224 {
3225 	int error;
3226 	vnode_t *vp;
3227 	struct vattr *vap;
3228 	struct vattr va;
3229 	struct statvfs64 sb;
3230 
3231 	vap = NULL;
3232 
3233 	vp = nfs3_fhtovp(&args->fsroot, exi);
3234 	if (vp == NULL) {
3235 		error = ESTALE;
3236 		goto out;
3237 	}
3238 
3239 	error = VFS_STATVFS(vp->v_vfsp, &sb);
3240 
3241 #ifdef DEBUG
3242 	if (rfs3_do_post_op_attr) {
3243 		va.va_mask = AT_ALL;
3244 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3245 	} else
3246 		vap = NULL;
3247 #else
3248 	va.va_mask = AT_ALL;
3249 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3250 #endif
3251 
3252 	VN_RELE(vp);
3253 
3254 	if (error)
3255 		goto out;
3256 
3257 	resp->status = NFS3_OK;
3258 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3259 	if (sb.f_blocks != (fsblkcnt64_t)-1)
3260 		resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3261 	else
3262 		resp->resok.tbytes = (size3)sb.f_blocks;
3263 	if (sb.f_bfree != (fsblkcnt64_t)-1)
3264 		resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3265 	else
3266 		resp->resok.fbytes = (size3)sb.f_bfree;
3267 	if (sb.f_bavail != (fsblkcnt64_t)-1)
3268 		resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3269 	else
3270 		resp->resok.abytes = (size3)sb.f_bavail;
3271 	resp->resok.tfiles = (size3)sb.f_files;
3272 	resp->resok.ffiles = (size3)sb.f_ffree;
3273 	resp->resok.afiles = (size3)sb.f_favail;
3274 	resp->resok.invarsec = 0;
3275 	return;
3276 
3277 out:
3278 	if (curthread->t_flag & T_WOULDBLOCK) {
3279 		curthread->t_flag &= ~T_WOULDBLOCK;
3280 		resp->status = NFS3ERR_JUKEBOX;
3281 	} else
3282 		resp->status = puterrno3(error);
3283 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3284 }
3285 
3286 void *
3287 rfs3_fsstat_getfh(FSSTAT3args *args)
3288 {
3289 
3290 	return (&args->fsroot);
3291 }
3292 
3293 /* ARGSUSED */
3294 void
3295 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3296 	struct svc_req *req, cred_t *cr)
3297 {
3298 	vnode_t *vp;
3299 	struct vattr *vap;
3300 	struct vattr va;
3301 	uint32_t xfer_size;
3302 	ulong_t l = 0;
3303 	int error;
3304 
3305 	vp = nfs3_fhtovp(&args->fsroot, exi);
3306 	if (vp == NULL) {
3307 		if (curthread->t_flag & T_WOULDBLOCK) {
3308 			curthread->t_flag &= ~T_WOULDBLOCK;
3309 			resp->status = NFS3ERR_JUKEBOX;
3310 		} else
3311 			resp->status = NFS3ERR_STALE;
3312 		vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3313 		return;
3314 	}
3315 
3316 #ifdef DEBUG
3317 	if (rfs3_do_post_op_attr) {
3318 		va.va_mask = AT_ALL;
3319 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3320 	} else
3321 		vap = NULL;
3322 #else
3323 	va.va_mask = AT_ALL;
3324 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3325 #endif
3326 
3327 	resp->status = NFS3_OK;
3328 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3329 	xfer_size = rfs3_tsize(req);
3330 	resp->resok.rtmax = xfer_size;
3331 	resp->resok.rtpref = xfer_size;
3332 	resp->resok.rtmult = DEV_BSIZE;
3333 	resp->resok.wtmax = xfer_size;
3334 	resp->resok.wtpref = xfer_size;
3335 	resp->resok.wtmult = DEV_BSIZE;
3336 	resp->resok.dtpref = MAXBSIZE;
3337 
3338 	/*
3339 	 * Large file spec: want maxfilesize based on limit of
3340 	 * underlying filesystem.  We can guess 2^31-1 if need be.
3341 	 */
3342 	error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr);
3343 
3344 	VN_RELE(vp);
3345 
3346 	if (!error && l != 0 && l <= 64)
3347 		resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3348 	else
3349 		resp->resok.maxfilesize = MAXOFF32_T;
3350 
3351 	resp->resok.time_delta.seconds = 0;
3352 	resp->resok.time_delta.nseconds = 1000;
3353 	resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3354 	    FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3355 }
3356 
3357 void *
3358 rfs3_fsinfo_getfh(FSINFO3args *args)
3359 {
3360 
3361 	return (&args->fsroot);
3362 }
3363 
3364 /* ARGSUSED */
3365 void
3366 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
3367 	struct svc_req *req, cred_t *cr)
3368 {
3369 	int error;
3370 	vnode_t *vp;
3371 	struct vattr *vap;
3372 	struct vattr va;
3373 	ulong_t val;
3374 
3375 	vap = NULL;
3376 
3377 	vp = nfs3_fhtovp(&args->object, exi);
3378 	if (vp == NULL) {
3379 		error = ESTALE;
3380 		goto out;
3381 	}
3382 
3383 #ifdef DEBUG
3384 	if (rfs3_do_post_op_attr) {
3385 		va.va_mask = AT_ALL;
3386 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3387 	} else
3388 		vap = NULL;
3389 #else
3390 	va.va_mask = AT_ALL;
3391 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3392 #endif
3393 
3394 	error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr);
3395 	if (error)
3396 		goto out;
3397 	resp->resok.info.link_max = (uint32)val;
3398 
3399 	error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr);
3400 	if (error)
3401 		goto out;
3402 	resp->resok.info.name_max = (uint32)val;
3403 
3404 	error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr);
3405 	if (error)
3406 		goto out;
3407 	if (val == 1)
3408 		resp->resok.info.no_trunc = TRUE;
3409 	else
3410 		resp->resok.info.no_trunc = FALSE;
3411 
3412 	error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr);
3413 	if (error)
3414 		goto out;
3415 	if (val == 1)
3416 		resp->resok.info.chown_restricted = TRUE;
3417 	else
3418 		resp->resok.info.chown_restricted = FALSE;
3419 
3420 	VN_RELE(vp);
3421 
3422 	resp->status = NFS3_OK;
3423 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3424 	resp->resok.info.case_insensitive = FALSE;
3425 	resp->resok.info.case_preserving = TRUE;
3426 	return;
3427 
3428 out:
3429 	if (curthread->t_flag & T_WOULDBLOCK) {
3430 		curthread->t_flag &= ~T_WOULDBLOCK;
3431 		resp->status = NFS3ERR_JUKEBOX;
3432 	} else
3433 		resp->status = puterrno3(error);
3434 	if (vp != NULL)
3435 		VN_RELE(vp);
3436 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3437 }
3438 
3439 void *
3440 rfs3_pathconf_getfh(PATHCONF3args *args)
3441 {
3442 
3443 	return (&args->object);
3444 }
3445 
3446 void
3447 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
3448 	struct svc_req *req, cred_t *cr)
3449 {
3450 	int error;
3451 	vnode_t *vp;
3452 	struct vattr *bvap;
3453 	struct vattr bva;
3454 	struct vattr *avap;
3455 	struct vattr ava;
3456 
3457 	bvap = NULL;
3458 	avap = NULL;
3459 
3460 	vp = nfs3_fhtovp(&args->file, exi);
3461 	if (vp == NULL) {
3462 		error = ESTALE;
3463 		goto out;
3464 	}
3465 
3466 	bva.va_mask = AT_ALL;
3467 	error = VOP_GETATTR(vp, &bva, 0, cr);
3468 
3469 	/*
3470 	 * If we can't get the attributes, then we can't do the
3471 	 * right access checking.  So, we'll fail the request.
3472 	 */
3473 	if (error)
3474 		goto out;
3475 
3476 #ifdef DEBUG
3477 	if (rfs3_do_pre_op_attr)
3478 		bvap = &bva;
3479 	else
3480 		bvap = NULL;
3481 #else
3482 	bvap = &bva;
3483 #endif
3484 
3485 	if (rdonly(exi, req)) {
3486 		resp->status = NFS3ERR_ROFS;
3487 		goto out1;
3488 	}
3489 
3490 	if (vp->v_type != VREG) {
3491 		resp->status = NFS3ERR_INVAL;
3492 		goto out1;
3493 	}
3494 
3495 	if (crgetuid(cr) != bva.va_uid &&
3496 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr)))
3497 		goto out;
3498 
3499 	error = VOP_PUTPAGE(vp, args->offset, args->count, 0, cr);
3500 	if (!error)
3501 		error = VOP_FSYNC(vp, FNODSYNC, cr);
3502 
3503 #ifdef DEBUG
3504 	if (rfs3_do_post_op_attr) {
3505 		ava.va_mask = AT_ALL;
3506 		avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
3507 	} else
3508 		avap = NULL;
3509 #else
3510 	ava.va_mask = AT_ALL;
3511 	avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
3512 #endif
3513 
3514 	if (error)
3515 		goto out;
3516 
3517 	VN_RELE(vp);
3518 
3519 	resp->status = NFS3_OK;
3520 	vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
3521 	resp->resok.verf = write3verf;
3522 	return;
3523 
3524 out:
3525 	if (curthread->t_flag & T_WOULDBLOCK) {
3526 		curthread->t_flag &= ~T_WOULDBLOCK;
3527 		resp->status = NFS3ERR_JUKEBOX;
3528 	} else
3529 		resp->status = puterrno3(error);
3530 out1:
3531 	if (vp != NULL)
3532 		VN_RELE(vp);
3533 	vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
3534 }
3535 
3536 void *
3537 rfs3_commit_getfh(COMMIT3args *args)
3538 {
3539 
3540 	return (&args->file);
3541 }
3542 
3543 static int
3544 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
3545 {
3546 
3547 	vap->va_mask = 0;
3548 
3549 	if (sap->mode.set_it) {
3550 		vap->va_mode = (mode_t)sap->mode.mode;
3551 		vap->va_mask |= AT_MODE;
3552 	}
3553 	if (sap->uid.set_it) {
3554 		vap->va_uid = (uid_t)sap->uid.uid;
3555 		vap->va_mask |= AT_UID;
3556 	}
3557 	if (sap->gid.set_it) {
3558 		vap->va_gid = (gid_t)sap->gid.gid;
3559 		vap->va_mask |= AT_GID;
3560 	}
3561 	if (sap->size.set_it) {
3562 		if (sap->size.size > (size3)((u_longlong_t)-1))
3563 			return (EINVAL);
3564 		vap->va_size = sap->size.size;
3565 		vap->va_mask |= AT_SIZE;
3566 	}
3567 	if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
3568 #ifndef _LP64
3569 		/* check time validity */
3570 		if (!NFS3_TIME_OK(sap->atime.atime.seconds))
3571 			return (EOVERFLOW);
3572 #endif
3573 		/*
3574 		 * nfs protocol defines times as unsigned so don't extend sign,
3575 		 * unless sysadmin set nfs_allow_preepoch_time.
3576 		 */
3577 		NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
3578 			sap->atime.atime.seconds);
3579 		vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
3580 		vap->va_mask |= AT_ATIME;
3581 	} else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
3582 		gethrestime(&vap->va_atime);
3583 		vap->va_mask |= AT_ATIME;
3584 	}
3585 	if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
3586 #ifndef _LP64
3587 		/* check time validity */
3588 		if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
3589 			return (EOVERFLOW);
3590 #endif
3591 		/*
3592 		 * nfs protocol defines times as unsigned so don't extend sign,
3593 		 * unless sysadmin set nfs_allow_preepoch_time.
3594 		 */
3595 		NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
3596 			sap->mtime.mtime.seconds);
3597 		vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
3598 		vap->va_mask |= AT_MTIME;
3599 	} else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
3600 		gethrestime(&vap->va_mtime);
3601 		vap->va_mask |= AT_MTIME;
3602 	}
3603 
3604 	return (0);
3605 }
3606 
3607 static ftype3 vt_to_nf3[] = {
3608 	0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
3609 };
3610 
3611 static int
3612 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
3613 {
3614 
3615 	ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
3616 	/* Return error if time or size overflow */
3617 	if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
3618 		return (EOVERFLOW);
3619 	}
3620 	fap->type = vt_to_nf3[vap->va_type];
3621 	fap->mode = (mode3)(vap->va_mode & MODEMASK);
3622 	fap->nlink = (uint32)vap->va_nlink;
3623 	if (vap->va_uid == UID_NOBODY)
3624 		fap->uid = (uid3)NFS_UID_NOBODY;
3625 	else
3626 		fap->uid = (uid3)vap->va_uid;
3627 	if (vap->va_gid == GID_NOBODY)
3628 		fap->gid = (gid3)NFS_GID_NOBODY;
3629 	else
3630 		fap->gid = (gid3)vap->va_gid;
3631 	fap->size = (size3)vap->va_size;
3632 	fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
3633 	fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
3634 	fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
3635 	fap->fsid = (uint64)vap->va_fsid;
3636 	fap->fileid = (fileid3)vap->va_nodeid;
3637 	fap->atime.seconds = vap->va_atime.tv_sec;
3638 	fap->atime.nseconds = vap->va_atime.tv_nsec;
3639 	fap->mtime.seconds = vap->va_mtime.tv_sec;
3640 	fap->mtime.nseconds = vap->va_mtime.tv_nsec;
3641 	fap->ctime.seconds = vap->va_ctime.tv_sec;
3642 	fap->ctime.nseconds = vap->va_ctime.tv_nsec;
3643 	return (0);
3644 }
3645 
3646 static int
3647 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
3648 {
3649 
3650 	/* Return error if time or size overflow */
3651 	if (!  (NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
3652 		NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
3653 		NFS3_SIZE_OK(vap->va_size))) {
3654 		return (EOVERFLOW);
3655 	}
3656 	wccap->size = (size3)vap->va_size;
3657 	wccap->mtime.seconds = vap->va_mtime.tv_sec;
3658 	wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
3659 	wccap->ctime.seconds = vap->va_ctime.tv_sec;
3660 	wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
3661 	return (0);
3662 }
3663 
3664 static void
3665 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
3666 {
3667 
3668 	/* don't return attrs if time overflow */
3669 	if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
3670 		poap->attributes = TRUE;
3671 	} else
3672 		poap->attributes = FALSE;
3673 }
3674 
3675 void
3676 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
3677 {
3678 
3679 	/* don't return attrs if time overflow */
3680 	if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
3681 		poap->attributes = TRUE;
3682 	} else
3683 		poap->attributes = FALSE;
3684 }
3685 
3686 static void
3687 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
3688 {
3689 
3690 	vattr_to_pre_op_attr(bvap, &wccp->before);
3691 	vattr_to_post_op_attr(avap, &wccp->after);
3692 }
3693 
3694 void
3695 rfs3_srvrinit(void)
3696 {
3697 	struct rfs3_verf_overlay {
3698 		uint_t id; /* a "unique" identifier */
3699 		int ts; /* a unique timestamp */
3700 	} *verfp;
3701 	timestruc_t now;
3702 
3703 	/*
3704 	 * The following algorithm attempts to find a unique verifier
3705 	 * to be used as the write verifier returned from the server
3706 	 * to the client.  It is important that this verifier change
3707 	 * whenever the server reboots.  Of secondary importance, it
3708 	 * is important for the verifier to be unique between two
3709 	 * different servers.
3710 	 *
3711 	 * Thus, an attempt is made to use the system hostid and the
3712 	 * current time in seconds when the nfssrv kernel module is
3713 	 * loaded.  It is assumed that an NFS server will not be able
3714 	 * to boot and then to reboot in less than a second.  If the
3715 	 * hostid has not been set, then the current high resolution
3716 	 * time is used.  This will ensure different verifiers each
3717 	 * time the server reboots and minimize the chances that two
3718 	 * different servers will have the same verifier.
3719 	 */
3720 
3721 #ifndef	lint
3722 	/*
3723 	 * We ASSERT that this constant logic expression is
3724 	 * always true because in the past, it wasn't.
3725 	 */
3726 	ASSERT(sizeof (*verfp) <= sizeof (write3verf));
3727 #endif
3728 
3729 	gethrestime(&now);
3730 	verfp = (struct rfs3_verf_overlay *)&write3verf;
3731 	verfp->ts = (int)now.tv_sec;
3732 	verfp->id = (uint_t)nfs_atoi(hw_serial);
3733 
3734 	if (verfp->id == 0)
3735 		verfp->id = (uint_t)now.tv_nsec;
3736 
3737 }
3738 
3739 void
3740 rfs3_srvrfini(void)
3741 {
3742 	/* Nothing to do */
3743 }
3744