xref: /titanic_50/usr/src/uts/common/fs/nfs/nfs3_srv.c (revision 6451fdbca2f79129a3a09d2fe3f6dd4d062bebff)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include <sys/param.h>
33 #include <sys/types.h>
34 #include <sys/systm.h>
35 #include <sys/cred.h>
36 #include <sys/buf.h>
37 #include <sys/vfs.h>
38 #include <sys/vnode.h>
39 #include <sys/uio.h>
40 #include <sys/errno.h>
41 #include <sys/sysmacros.h>
42 #include <sys/statvfs.h>
43 #include <sys/kmem.h>
44 #include <sys/dirent.h>
45 #include <sys/cmn_err.h>
46 #include <sys/debug.h>
47 #include <sys/systeminfo.h>
48 #include <sys/flock.h>
49 #include <sys/nbmlock.h>
50 #include <sys/policy.h>
51 
52 #include <rpc/types.h>
53 #include <rpc/auth.h>
54 #include <rpc/svc.h>
55 
56 #include <nfs/nfs.h>
57 #include <nfs/export.h>
58 
59 #include <sys/strsubr.h>
60 
61 /*
62  * These are the interface routines for the server side of the
63  * Network File System.  See the NFS version 3 protocol specification
64  * for a description of this interface.
65  */
66 
67 #ifdef DEBUG
68 int rfs3_do_pre_op_attr = 1;
69 int rfs3_do_post_op_attr = 1;
70 int rfs3_do_post_op_fh3 = 1;
71 #endif
72 
73 static writeverf3 write3verf;
74 
75 static int	sattr3_to_vattr(sattr3 *, struct vattr *);
76 static int	vattr_to_fattr3(struct vattr *, fattr3 *);
77 static int	vattr_to_wcc_attr(struct vattr *, wcc_attr *);
78 static void	vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
79 static void	vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
80 
81 /* ARGSUSED */
82 void
83 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
84 	struct svc_req *req, cred_t *cr)
85 {
86 	int error;
87 	vnode_t *vp;
88 	struct vattr va;
89 
90 	vp = nfs3_fhtovp(&args->object, exi);
91 	if (vp == NULL) {
92 		error = ESTALE;
93 		goto out;
94 	}
95 
96 	va.va_mask = AT_ALL;
97 	error = rfs4_delegated_getattr(vp, &va, 0, cr);
98 
99 	VN_RELE(vp);
100 
101 	if (!error) {
102 		/* overflow error if time or size is out of range */
103 		error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
104 		if (error)
105 			goto out;
106 		resp->status = NFS3_OK;
107 		return;
108 	}
109 
110 out:
111 	if (curthread->t_flag & T_WOULDBLOCK) {
112 		curthread->t_flag &= ~T_WOULDBLOCK;
113 		resp->status = NFS3ERR_JUKEBOX;
114 	} else
115 		resp->status = puterrno3(error);
116 }
117 
118 fhandle_t *
119 rfs3_getattr_getfh(GETATTR3args *args)
120 {
121 
122 	return ((fhandle_t *)&args->object.fh3_u.nfs_fh3_i.fh3_i);
123 }
124 
125 void
126 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
127 	struct svc_req *req, cred_t *cr)
128 {
129 	int error;
130 	vnode_t *vp;
131 	struct vattr *bvap;
132 	struct vattr bva;
133 	struct vattr *avap;
134 	struct vattr ava;
135 	int flag;
136 	int in_crit = 0;
137 	struct flock64 bf;
138 
139 	bvap = NULL;
140 	avap = NULL;
141 
142 	vp = nfs3_fhtovp(&args->object, exi);
143 	if (vp == NULL) {
144 		error = ESTALE;
145 		goto out;
146 	}
147 
148 	error = sattr3_to_vattr(&args->new_attributes, &ava);
149 	if (error)
150 		goto out;
151 
152 	/*
153 	 * We need to specially handle size changes because of
154 	 * possible conflicting NBMAND locks. Get into critical
155 	 * region before VOP_GETATTR, so the size attribute is
156 	 * valid when checking conflicts.
157 	 *
158 	 * Also, check to see if the v4 side of the server has
159 	 * delegated this file.  If so, then we return JUKEBOX to
160 	 * allow the client to retrasmit its request.
161 	 */
162 	if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
163 		if (rfs4_check_delegated(FWRITE, vp, TRUE)) {
164 			resp->status = NFS3ERR_JUKEBOX;
165 			goto out1;
166 		}
167 		if (nbl_need_check(vp)) {
168 			nbl_start_crit(vp, RW_READER);
169 			in_crit = 1;
170 		}
171 	}
172 
173 	bva.va_mask = AT_ALL;
174 	error = rfs4_delegated_getattr(vp, &bva, 0, cr);
175 
176 	/*
177 	 * If we can't get the attributes, then we can't do the
178 	 * right access checking.  So, we'll fail the request.
179 	 */
180 	if (error)
181 		goto out;
182 
183 #ifdef DEBUG
184 	if (rfs3_do_pre_op_attr)
185 		bvap = &bva;
186 #else
187 	bvap = &bva;
188 #endif
189 
190 	if (rdonly(exi, req) || vn_is_readonly(vp)) {
191 		resp->status = NFS3ERR_ROFS;
192 		goto out1;
193 	}
194 
195 	if (args->guard.check &&
196 	    (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
197 	    args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
198 		resp->status = NFS3ERR_NOT_SYNC;
199 		goto out1;
200 	}
201 
202 	if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
203 		flag = ATTR_UTIME;
204 	else
205 		flag = 0;
206 
207 	/*
208 	 * If the filesystem is exported with nosuid, then mask off
209 	 * the setuid and setgid bits.
210 	 */
211 	if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
212 	    (exi->exi_export.ex_flags & EX_NOSUID))
213 		ava.va_mode &= ~(VSUID | VSGID);
214 
215 	/*
216 	 * We need to specially handle size changes because it is
217 	 * possible for the client to create a file with modes
218 	 * which indicate read-only, but with the file opened for
219 	 * writing.  If the client then tries to set the size of
220 	 * the file, then the normal access checking done in
221 	 * VOP_SETATTR would prevent the client from doing so,
222 	 * although it should be legal for it to do so.  To get
223 	 * around this, we do the access checking for ourselves
224 	 * and then use VOP_SPACE which doesn't do the access
225 	 * checking which VOP_SETATTR does. VOP_SPACE can only
226 	 * operate on VREG files, let VOP_SETATTR handle the other
227 	 * extremely rare cases.
228 	 * Also the client should not be allowed to change the
229 	 * size of the file if there is a conflicting non-blocking
230 	 * mandatory lock in the region the change.
231 	 */
232 	if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
233 		if (in_crit) {
234 			u_offset_t offset;
235 			ssize_t length;
236 
237 			if (ava.va_size < bva.va_size) {
238 				offset = ava.va_size;
239 				length = bva.va_size - ava.va_size;
240 			} else {
241 				offset = bva.va_size;
242 				length = ava.va_size - bva.va_size;
243 			}
244 			if (nbl_conflict(vp, NBL_WRITE, offset, length, 0)) {
245 				error = EACCES;
246 				goto out;
247 			}
248 		}
249 
250 		if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
251 			ava.va_mask &= ~AT_SIZE;
252 			bf.l_type = F_WRLCK;
253 			bf.l_whence = 0;
254 			bf.l_start = (off64_t)ava.va_size;
255 			bf.l_len = 0;
256 			bf.l_sysid = 0;
257 			bf.l_pid = 0;
258 			error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
259 			    (offset_t)ava.va_size, cr, NULL);
260 		}
261 	}
262 
263 	if (!error && ava.va_mask)
264 		error = VOP_SETATTR(vp, &ava, flag, cr, NULL);
265 
266 #ifdef DEBUG
267 	if (rfs3_do_post_op_attr) {
268 		ava.va_mask = AT_ALL;
269 		avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
270 	} else
271 		avap = NULL;
272 #else
273 	ava.va_mask = AT_ALL;
274 	avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
275 #endif
276 
277 	/*
278 	 * Force modified metadata out to stable storage.
279 	 */
280 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
281 
282 	if (error)
283 		goto out;
284 
285 	if (in_crit)
286 		nbl_end_crit(vp);
287 	VN_RELE(vp);
288 
289 	resp->status = NFS3_OK;
290 	vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
291 	return;
292 
293 out:
294 	if (curthread->t_flag & T_WOULDBLOCK) {
295 		curthread->t_flag &= ~T_WOULDBLOCK;
296 		resp->status = NFS3ERR_JUKEBOX;
297 	} else
298 		resp->status = puterrno3(error);
299 out1:
300 	if (vp != NULL) {
301 		if (in_crit)
302 			nbl_end_crit(vp);
303 		VN_RELE(vp);
304 	}
305 	vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
306 }
307 
308 fhandle_t *
309 rfs3_setattr_getfh(SETATTR3args *args)
310 {
311 
312 	return ((fhandle_t *)&args->object.fh3_u.nfs_fh3_i.fh3_i);
313 }
314 
315 /* ARGSUSED */
316 void
317 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
318 	struct svc_req *req, cred_t *cr)
319 {
320 	int error;
321 	vnode_t *vp;
322 	vnode_t *dvp;
323 	struct vattr *vap;
324 	struct vattr va;
325 	struct vattr *dvap;
326 	struct vattr dva;
327 	nfs_fh3 *fhp;
328 	struct sec_ol sec = {0, 0};
329 	bool_t publicfh_flag = FALSE, auth_weak = FALSE;
330 
331 	dvap = NULL;
332 
333 	/*
334 	 * Allow lookups from the root - the default
335 	 * location of the public filehandle.
336 	 */
337 	if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
338 		dvp = rootdir;
339 		VN_HOLD(dvp);
340 	} else {
341 		dvp = nfs3_fhtovp(args->what.dirp, exi);
342 		if (dvp == NULL) {
343 			error = ESTALE;
344 			goto out;
345 		}
346 	}
347 
348 #ifdef DEBUG
349 	if (rfs3_do_pre_op_attr) {
350 		dva.va_mask = AT_ALL;
351 		dvap = VOP_GETATTR(dvp, &dva, 0, cr) ? NULL : &dva;
352 	}
353 #else
354 	dva.va_mask = AT_ALL;
355 	dvap = VOP_GETATTR(dvp, &dva, 0, cr) ? NULL : &dva;
356 #endif
357 
358 	if (args->what.name == nfs3nametoolong) {
359 		resp->status = NFS3ERR_NAMETOOLONG;
360 		goto out1;
361 	}
362 
363 	if (args->what.name == NULL || *(args->what.name) == '\0') {
364 		resp->status = NFS3ERR_ACCES;
365 		goto out1;
366 	}
367 
368 	fhp = args->what.dirp;
369 	if (strcmp(args->what.name, "..") == 0 &&
370 	    EQFID(&exi->exi_fid, (fid_t *)&fhp->fh3_len)) {
371 		resp->status = NFS3ERR_NOENT;
372 		goto out1;
373 	}
374 
375 	/*
376 	 * If the public filehandle is used then allow
377 	 * a multi-component lookup
378 	 */
379 	if (PUBLIC_FH3(args->what.dirp)) {
380 		publicfh_flag = TRUE;
381 		error = rfs_publicfh_mclookup(args->what.name, dvp, cr, &vp,
382 					&exi, &sec);
383 		if (error && exi != NULL)
384 			exi_rele(exi);  /* See the comment below */
385 	} else {
386 		error = VOP_LOOKUP(dvp, args->what.name, &vp,
387 				NULL, 0, NULL, cr);
388 	}
389 
390 #ifdef DEBUG
391 	if (rfs3_do_post_op_attr) {
392 		dva.va_mask = AT_ALL;
393 		dvap = VOP_GETATTR(dvp, &dva, 0, cr) ? NULL : &dva;
394 	} else
395 		dvap = NULL;
396 #else
397 	dva.va_mask = AT_ALL;
398 	dvap = VOP_GETATTR(dvp, &dva, 0, cr) ? NULL : &dva;
399 #endif
400 
401 	if (error)
402 		goto out;
403 
404 	if (sec.sec_flags & SEC_QUERY) {
405 		error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
406 	} else {
407 		error = makefh3(&resp->resok.object, vp, exi);
408 		if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
409 			auth_weak = TRUE;
410 	}
411 
412 	if (error) {
413 		VN_RELE(vp);
414 		goto out;
415 	}
416 
417 	/*
418 	 * If publicfh_flag is true then we have called rfs_publicfh_mclookup
419 	 * and have obtained a new exportinfo in exi which needs to be
420 	 * released. Note the the original exportinfo pointed to by exi
421 	 * will be released by the caller, common_dispatch.
422 	 */
423 	if (publicfh_flag)
424 		exi_rele(exi);
425 
426 	VN_RELE(dvp);
427 
428 #ifdef DEBUG
429 	if (rfs3_do_post_op_attr) {
430 		va.va_mask = AT_ALL;
431 		vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
432 	} else
433 		vap = NULL;
434 #else
435 	va.va_mask = AT_ALL;
436 	vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
437 #endif
438 
439 	VN_RELE(vp);
440 
441 	resp->status = NFS3_OK;
442 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
443 	vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
444 
445 	/*
446 	 * If it's public fh, no 0x81, and client's flavor is
447 	 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
448 	 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
449 	 */
450 	if (auth_weak)
451 		resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
452 
453 	return;
454 
455 out:
456 	if (curthread->t_flag & T_WOULDBLOCK) {
457 		curthread->t_flag &= ~T_WOULDBLOCK;
458 		resp->status = NFS3ERR_JUKEBOX;
459 	} else
460 		resp->status = puterrno3(error);
461 out1:
462 	if (dvp != NULL)
463 		VN_RELE(dvp);
464 	vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
465 
466 }
467 
468 fhandle_t *
469 rfs3_lookup_getfh(LOOKUP3args *args)
470 {
471 
472 	return ((fhandle_t *)&args->what.dirp->fh3_u.nfs_fh3_i.fh3_i);
473 }
474 
475 /* ARGSUSED */
476 void
477 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
478 	struct svc_req *req, cred_t *cr)
479 {
480 	int error;
481 	vnode_t *vp;
482 	struct vattr *vap;
483 	struct vattr va;
484 	int checkwriteperm;
485 
486 	vap = NULL;
487 
488 	vp = nfs3_fhtovp(&args->object, exi);
489 	if (vp == NULL) {
490 		error = ESTALE;
491 		goto out;
492 	}
493 
494 	/*
495 	 * If the file system is exported read only, it is not appropriate
496 	 * to check write permissions for regular files and directories.
497 	 * Special files are interpreted by the client, so the underlying
498 	 * permissions are sent back to the client for interpretation.
499 	 */
500 	if (rdonly(exi, req) && (vp->v_type == VREG || vp->v_type == VDIR))
501 		checkwriteperm = 0;
502 	else
503 		checkwriteperm = 1;
504 
505 	/*
506 	 * We need the mode so that we can correctly determine access
507 	 * permissions relative to a mandatory lock file.  Access to
508 	 * mandatory lock files is denied on the server, so it might
509 	 * as well be reflected to the server during the open.
510 	 */
511 	va.va_mask = AT_MODE;
512 	error = VOP_GETATTR(vp, &va, 0, cr);
513 	if (error)
514 		goto out;
515 
516 #ifdef DEBUG
517 	if (rfs3_do_post_op_attr)
518 		vap = &va;
519 #else
520 	vap = &va;
521 #endif
522 
523 	resp->resok.access = 0;
524 
525 	if (args->access & ACCESS3_READ) {
526 		error = VOP_ACCESS(vp, VREAD, 0, cr);
527 		if (error) {
528 			if (curthread->t_flag & T_WOULDBLOCK)
529 				goto out;
530 		} else if (!MANDLOCK(vp, va.va_mode))
531 			resp->resok.access |= ACCESS3_READ;
532 	}
533 	if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
534 		error = VOP_ACCESS(vp, VEXEC, 0, cr);
535 		if (error) {
536 			if (curthread->t_flag & T_WOULDBLOCK)
537 				goto out;
538 		} else
539 			resp->resok.access |= ACCESS3_LOOKUP;
540 	}
541 	if (checkwriteperm &&
542 	    (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
543 		error = VOP_ACCESS(vp, VWRITE, 0, cr);
544 		if (error) {
545 			if (curthread->t_flag & T_WOULDBLOCK)
546 				goto out;
547 		} else if (!MANDLOCK(vp, va.va_mode)) {
548 			resp->resok.access |=
549 			    (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
550 		}
551 	}
552 	if (checkwriteperm &&
553 	    (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
554 		error = VOP_ACCESS(vp, VWRITE, 0, cr);
555 		if (error) {
556 			if (curthread->t_flag & T_WOULDBLOCK)
557 				goto out;
558 		} else
559 			resp->resok.access |= ACCESS3_DELETE;
560 	}
561 	if (args->access & ACCESS3_EXECUTE) {
562 		error = VOP_ACCESS(vp, VEXEC, 0, cr);
563 		if (error) {
564 			if (curthread->t_flag & T_WOULDBLOCK)
565 				goto out;
566 		} else if (!MANDLOCK(vp, va.va_mode))
567 			resp->resok.access |= ACCESS3_EXECUTE;
568 	}
569 
570 #ifdef DEBUG
571 	if (rfs3_do_post_op_attr) {
572 		va.va_mask = AT_ALL;
573 		vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
574 	} else
575 		vap = NULL;
576 #else
577 	va.va_mask = AT_ALL;
578 	vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
579 #endif
580 
581 	VN_RELE(vp);
582 
583 	resp->status = NFS3_OK;
584 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
585 	return;
586 
587 out:
588 	if (curthread->t_flag & T_WOULDBLOCK) {
589 		curthread->t_flag &= ~T_WOULDBLOCK;
590 		resp->status = NFS3ERR_JUKEBOX;
591 	} else
592 		resp->status = puterrno3(error);
593 	if (vp != NULL)
594 		VN_RELE(vp);
595 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
596 }
597 
598 fhandle_t *
599 rfs3_access_getfh(ACCESS3args *args)
600 {
601 
602 	return ((fhandle_t *)&args->object.fh3_u.nfs_fh3_i.fh3_i);
603 }
604 
605 /* ARGSUSED */
606 void
607 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
608 	struct svc_req *req, cred_t *cr)
609 {
610 	int error;
611 	vnode_t *vp;
612 	struct vattr *vap;
613 	struct vattr va;
614 	struct iovec iov;
615 	struct uio uio;
616 	char *data;
617 
618 	vap = NULL;
619 
620 	vp = nfs3_fhtovp(&args->symlink, exi);
621 	if (vp == NULL) {
622 		error = ESTALE;
623 		goto out;
624 	}
625 
626 	va.va_mask = AT_ALL;
627 	error = VOP_GETATTR(vp, &va, 0, cr);
628 	if (error)
629 		goto out;
630 
631 #ifdef DEBUG
632 	if (rfs3_do_post_op_attr)
633 		vap = &va;
634 #else
635 	vap = &va;
636 #endif
637 
638 	if (vp->v_type != VLNK) {
639 		resp->status = NFS3ERR_INVAL;
640 		goto out1;
641 	}
642 
643 	if (MANDLOCK(vp, va.va_mode)) {
644 		resp->status = NFS3ERR_ACCES;
645 		goto out1;
646 	}
647 
648 	data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
649 
650 	iov.iov_base = data;
651 	iov.iov_len = MAXPATHLEN;
652 	uio.uio_iov = &iov;
653 	uio.uio_iovcnt = 1;
654 	uio.uio_segflg = UIO_SYSSPACE;
655 	uio.uio_extflg = UIO_COPY_CACHED;
656 	uio.uio_loffset = 0;
657 	uio.uio_resid = MAXPATHLEN;
658 
659 	error = VOP_READLINK(vp, &uio, cr);
660 
661 #ifdef DEBUG
662 	if (rfs3_do_post_op_attr) {
663 		va.va_mask = AT_ALL;
664 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
665 	} else
666 		vap = NULL;
667 #else
668 	va.va_mask = AT_ALL;
669 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
670 #endif
671 
672 #if 0 /* notyet */
673 	/*
674 	 * Don't do this.  It causes local disk writes when just
675 	 * reading the file and the overhead is deemed larger
676 	 * than the benefit.
677 	 */
678 	/*
679 	 * Force modified metadata out to stable storage.
680 	 */
681 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
682 #endif
683 
684 	if (error) {
685 		kmem_free(data, MAXPATHLEN + 1);
686 		goto out;
687 	}
688 
689 	VN_RELE(vp);
690 
691 	resp->status = NFS3_OK;
692 	vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
693 	resp->resok.data = data;
694 	*(data + MAXPATHLEN - uio.uio_resid) = '\0';
695 	return;
696 
697 out:
698 	if (curthread->t_flag & T_WOULDBLOCK) {
699 		curthread->t_flag &= ~T_WOULDBLOCK;
700 		resp->status = NFS3ERR_JUKEBOX;
701 	} else
702 		resp->status = puterrno3(error);
703 out1:
704 	if (vp != NULL)
705 		VN_RELE(vp);
706 	vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
707 }
708 
709 fhandle_t *
710 rfs3_readlink_getfh(READLINK3args *args)
711 {
712 
713 	return ((fhandle_t *)&args->symlink.fh3_u.nfs_fh3_i.fh3_i);
714 }
715 
716 void
717 rfs3_readlink_free(READLINK3res *resp)
718 {
719 
720 	if (resp->status == NFS3_OK)
721 		kmem_free(resp->resok.data, MAXPATHLEN + 1);
722 }
723 
724 /* ARGSUSED */
725 void
726 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
727 	struct svc_req *req, cred_t *cr)
728 {
729 	int error;
730 	vnode_t *vp;
731 	struct vattr *vap;
732 	struct vattr va;
733 	struct iovec iov;
734 	struct uio uio;
735 	u_offset_t offset;
736 	mblk_t *mp;
737 	int alloc_err = 0;
738 	int in_crit = 0;
739 	int need_rwunlock = 0;
740 
741 	vap = NULL;
742 
743 	vp = nfs3_fhtovp(&args->file, exi);
744 	if (vp == NULL) {
745 		error = ESTALE;
746 		goto out;
747 	}
748 
749 	/*
750 	 * Check to see if the v4 side of the server has delegated
751 	 * this file.  If so, then we return JUKEBOX to allow the
752 	 * client to retrasmit its request.
753 	 */
754 	if (rfs4_check_delegated(FREAD, vp, FALSE)) {
755 		resp->status = NFS3ERR_JUKEBOX;
756 		goto out1;
757 	}
758 
759 	/*
760 	 * Enter the critical region before calling VOP_RWLOCK
761 	 * to avoid a deadlock with write requests.
762 	 */
763 	if (nbl_need_check(vp)) {
764 		nbl_start_crit(vp, RW_READER);
765 		in_crit = 1;
766 		if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0)) {
767 			error = EACCES;
768 			goto out;
769 		}
770 	}
771 
772 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
773 	need_rwunlock = 1;
774 
775 	va.va_mask = AT_ALL;
776 	error = VOP_GETATTR(vp, &va, 0, cr);
777 
778 	/*
779 	 * If we can't get the attributes, then we can't do the
780 	 * right access checking.  So, we'll fail the request.
781 	 */
782 	if (error)
783 		goto out;
784 
785 #ifdef DEBUG
786 	if (rfs3_do_post_op_attr)
787 		vap = &va;
788 #else
789 	vap = &va;
790 #endif
791 
792 	if (vp->v_type != VREG) {
793 		resp->status = NFS3ERR_INVAL;
794 		goto out1;
795 	}
796 
797 	if (crgetuid(cr) != va.va_uid) {
798 		error = VOP_ACCESS(vp, VREAD, 0, cr);
799 		if (error) {
800 			if (curthread->t_flag & T_WOULDBLOCK)
801 				goto out;
802 			error = VOP_ACCESS(vp, VEXEC, 0, cr);
803 			if (error)
804 				goto out;
805 		}
806 	}
807 
808 	if (MANDLOCK(vp, va.va_mode)) {
809 		resp->status = NFS3ERR_ACCES;
810 		goto out1;
811 	}
812 
813 	offset = args->offset;
814 	if (offset >= va.va_size) {
815 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
816 		if (in_crit)
817 			nbl_end_crit(vp);
818 		VN_RELE(vp);
819 		resp->status = NFS3_OK;
820 		vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
821 		resp->resok.count = 0;
822 		resp->resok.eof = TRUE;
823 		resp->resok.data.data_len = 0;
824 		resp->resok.data.data_val = NULL;
825 		resp->resok.data.mp = NULL;
826 		return;
827 	}
828 
829 	if (args->count == 0) {
830 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
831 		if (in_crit)
832 			nbl_end_crit(vp);
833 		VN_RELE(vp);
834 		resp->status = NFS3_OK;
835 		vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
836 		resp->resok.count = 0;
837 		resp->resok.eof = FALSE;
838 		resp->resok.data.data_len = 0;
839 		resp->resok.data.data_val = NULL;
840 		resp->resok.data.mp = NULL;
841 		return;
842 	}
843 
844 	/*
845 	 * do not allocate memory more the max. allowed
846 	 * transfer size
847 	 */
848 	if (args->count > rfs3_tsize(req))
849 		args->count = rfs3_tsize(req);
850 
851 	/*
852 	 * mp will contain the data to be sent out in the read reply.
853 	 * This will be freed after the reply has been sent out (by the
854 	 * driver).
855 	 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
856 	 * that the call to xdrmblk_putmblk() never fails.
857 	 */
858 	mp = allocb_wait(RNDUP(args->count), BPRI_MED, STR_NOSIG, &alloc_err);
859 	ASSERT(mp != NULL);
860 	ASSERT(alloc_err == 0);
861 
862 	iov.iov_base = (caddr_t)mp->b_datap->db_base;
863 	iov.iov_len = args->count;
864 	uio.uio_iov = &iov;
865 	uio.uio_iovcnt = 1;
866 	uio.uio_segflg = UIO_SYSSPACE;
867 	uio.uio_extflg = UIO_COPY_CACHED;
868 	uio.uio_loffset = args->offset;
869 	uio.uio_resid = args->count;
870 
871 	error = VOP_READ(vp, &uio, 0, cr, NULL);
872 
873 	if (error) {
874 		freeb(mp);
875 		goto out;
876 	}
877 
878 	va.va_mask = AT_ALL;
879 	error = VOP_GETATTR(vp, &va, 0, cr);
880 
881 #ifdef DEBUG
882 	if (rfs3_do_post_op_attr) {
883 		if (error)
884 			vap = NULL;
885 		else
886 			vap = &va;
887 	} else
888 		vap = NULL;
889 #else
890 	if (error)
891 		vap = NULL;
892 	else
893 		vap = &va;
894 #endif
895 
896 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
897 
898 #if 0 /* notyet */
899 	/*
900 	 * Don't do this.  It causes local disk writes when just
901 	 * reading the file and the overhead is deemed larger
902 	 * than the benefit.
903 	 */
904 	/*
905 	 * Force modified metadata out to stable storage.
906 	 */
907 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
908 #endif
909 
910 	if (in_crit)
911 		nbl_end_crit(vp);
912 	VN_RELE(vp);
913 
914 	resp->status = NFS3_OK;
915 	vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
916 	resp->resok.count = args->count - uio.uio_resid;
917 	if (!error && offset + resp->resok.count == va.va_size)
918 		resp->resok.eof = TRUE;
919 	else
920 		resp->resok.eof = FALSE;
921 	resp->resok.data.data_len = resp->resok.count;
922 	resp->resok.data.data_val = (char *)mp->b_datap->db_base;
923 
924 	resp->resok.data.mp = mp;
925 
926 	resp->resok.size = (uint_t)args->count;
927 	return;
928 
929 out:
930 	if (curthread->t_flag & T_WOULDBLOCK) {
931 		curthread->t_flag &= ~T_WOULDBLOCK;
932 		resp->status = NFS3ERR_JUKEBOX;
933 	} else
934 		resp->status = puterrno3(error);
935 out1:
936 	if (vp != NULL) {
937 		if (need_rwunlock)
938 			VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
939 		if (in_crit)
940 			nbl_end_crit(vp);
941 		VN_RELE(vp);
942 	}
943 	vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
944 }
945 
946 void
947 rfs3_read_free(READ3res *resp)
948 {
949 	mblk_t *mp;
950 
951 	if (resp->status == NFS3_OK) {
952 		mp = resp->resok.data.mp;
953 		if (mp != NULL)
954 			freeb(mp);
955 	}
956 }
957 
958 fhandle_t *
959 rfs3_read_getfh(READ3args *args)
960 {
961 
962 	return ((fhandle_t *)&args->file.fh3_u.nfs_fh3_i.fh3_i);
963 }
964 
965 #define	MAX_IOVECS	12
966 
967 #ifdef DEBUG
968 static int rfs3_write_hits = 0;
969 static int rfs3_write_misses = 0;
970 #endif
971 
972 void
973 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
974 	struct svc_req *req, cred_t *cr)
975 {
976 	int error;
977 	vnode_t *vp;
978 	struct vattr *bvap = NULL;
979 	struct vattr bva;
980 	struct vattr *avap = NULL;
981 	struct vattr ava;
982 	u_offset_t rlimit;
983 	struct uio uio;
984 	struct iovec iov[MAX_IOVECS];
985 	mblk_t *m;
986 	struct iovec *iovp;
987 	int iovcnt;
988 	int ioflag;
989 	cred_t *savecred;
990 	int in_crit = 0;
991 	int rwlock_ret = -1;
992 
993 	vp = nfs3_fhtovp(&args->file, exi);
994 	if (vp == NULL) {
995 		error = ESTALE;
996 		goto out;
997 	}
998 
999 	/*
1000 	 * Check to see if the v4 side of the server has delegated
1001 	 * this file.  If so, then we return JUKEBOX to allow the
1002 	 * client to retrasmit its request.
1003 	 */
1004 	if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1005 		resp->status = NFS3ERR_JUKEBOX;
1006 		goto out1;
1007 	}
1008 
1009 	/*
1010 	 * We have to enter the critical region before calling VOP_RWLOCK
1011 	 * to avoid a deadlock with ufs.
1012 	 */
1013 	if (nbl_need_check(vp)) {
1014 		nbl_start_crit(vp, RW_READER);
1015 		in_crit = 1;
1016 		if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0)) {
1017 			error = EACCES;
1018 			goto out;
1019 		}
1020 	}
1021 
1022 	rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1023 
1024 	bva.va_mask = AT_ALL;
1025 	error = VOP_GETATTR(vp, &bva, 0, cr);
1026 
1027 	/*
1028 	 * If we can't get the attributes, then we can't do the
1029 	 * right access checking.  So, we'll fail the request.
1030 	 */
1031 	if (error)
1032 		goto out;
1033 
1034 	bvap = &bva;
1035 #ifdef DEBUG
1036 	if (!rfs3_do_pre_op_attr)
1037 		bvap = NULL;
1038 #endif
1039 	avap = bvap;
1040 
1041 	if (args->count != args->data.data_len) {
1042 		resp->status = NFS3ERR_INVAL;
1043 		goto out1;
1044 	}
1045 
1046 	if (rdonly(exi, req)) {
1047 		resp->status = NFS3ERR_ROFS;
1048 		goto out1;
1049 	}
1050 
1051 	if (vp->v_type != VREG) {
1052 		resp->status = NFS3ERR_INVAL;
1053 		goto out1;
1054 	}
1055 
1056 	if (crgetuid(cr) != bva.va_uid &&
1057 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr)))
1058 		goto out;
1059 
1060 	if (MANDLOCK(vp, bva.va_mode)) {
1061 		resp->status = NFS3ERR_ACCES;
1062 		goto out1;
1063 	}
1064 
1065 	if (args->count == 0) {
1066 		VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1067 		VN_RELE(vp);
1068 		resp->status = NFS3_OK;
1069 		vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1070 		resp->resok.count = 0;
1071 		resp->resok.committed = args->stable;
1072 		resp->resok.verf = write3verf;
1073 		return;
1074 	}
1075 
1076 	if (args->mblk != NULL) {
1077 		iovcnt = 0;
1078 		for (m = args->mblk; m != NULL; m = m->b_cont)
1079 			iovcnt++;
1080 		if (iovcnt <= MAX_IOVECS) {
1081 #ifdef DEBUG
1082 			rfs3_write_hits++;
1083 #endif
1084 			iovp = iov;
1085 		} else {
1086 #ifdef DEBUG
1087 			rfs3_write_misses++;
1088 #endif
1089 			iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1090 		}
1091 		mblk_to_iov(args->mblk, iovcnt, iovp);
1092 	} else {
1093 		iovcnt = 1;
1094 		iovp = iov;
1095 		iovp->iov_base = args->data.data_val;
1096 		iovp->iov_len = args->count;
1097 	}
1098 
1099 	uio.uio_iov = iovp;
1100 	uio.uio_iovcnt = iovcnt;
1101 
1102 	uio.uio_segflg = UIO_SYSSPACE;
1103 	uio.uio_extflg = UIO_COPY_DEFAULT;
1104 	uio.uio_loffset = args->offset;
1105 	uio.uio_resid = args->count;
1106 	uio.uio_llimit = curproc->p_fsz_ctl;
1107 	rlimit = uio.uio_llimit - args->offset;
1108 	if (rlimit < (u_offset_t)uio.uio_resid)
1109 		uio.uio_resid = (int)rlimit;
1110 
1111 	if (args->stable == UNSTABLE)
1112 		ioflag = 0;
1113 	else if (args->stable == FILE_SYNC)
1114 		ioflag = FSYNC;
1115 	else if (args->stable == DATA_SYNC)
1116 		ioflag = FDSYNC;
1117 	else {
1118 		if (iovp != iov)
1119 			kmem_free(iovp, sizeof (*iovp) * iovcnt);
1120 		resp->status = NFS3ERR_INVAL;
1121 		goto out1;
1122 	}
1123 
1124 	/*
1125 	 * We're changing creds because VM may fault and we need
1126 	 * the cred of the current thread to be used if quota
1127 	 * checking is enabled.
1128 	 */
1129 	savecred = curthread->t_cred;
1130 	curthread->t_cred = cr;
1131 	error = VOP_WRITE(vp, &uio, ioflag, cr, NULL);
1132 	curthread->t_cred = savecred;
1133 
1134 	if (iovp != iov)
1135 		kmem_free(iovp, sizeof (*iovp) * iovcnt);
1136 
1137 	ava.va_mask = AT_ALL;
1138 	avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
1139 
1140 #ifdef DEBUG
1141 	if (!rfs3_do_post_op_attr)
1142 		avap = NULL;
1143 #endif
1144 
1145 	if (error)
1146 		goto out;
1147 
1148 	VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1149 	if (in_crit)
1150 		nbl_end_crit(vp);
1151 	VN_RELE(vp);
1152 
1153 	/*
1154 	 * If we were unable to get the V_WRITELOCK_TRUE, then we
1155 	 * may not have accurate after attrs, so check if
1156 	 * we have both attributes, they have a non-zero va_seq, and
1157 	 * va_seq has changed by exactly one,
1158 	 * if not, turn off the before attr.
1159 	 */
1160 	if (rwlock_ret != V_WRITELOCK_TRUE) {
1161 		if (bvap == NULL || avap == NULL ||
1162 				bvap->va_seq == 0 || avap->va_seq == 0 ||
1163 				avap->va_seq != (bvap->va_seq + 1)) {
1164 			bvap = NULL;
1165 		}
1166 	}
1167 
1168 	resp->status = NFS3_OK;
1169 	vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1170 	resp->resok.count = args->count - uio.uio_resid;
1171 	resp->resok.committed = args->stable;
1172 	resp->resok.verf = write3verf;
1173 	return;
1174 
1175 out:
1176 	if (curthread->t_flag & T_WOULDBLOCK) {
1177 		curthread->t_flag &= ~T_WOULDBLOCK;
1178 		resp->status = NFS3ERR_JUKEBOX;
1179 	} else
1180 		resp->status = puterrno3(error);
1181 out1:
1182 	if (vp != NULL) {
1183 		if (rwlock_ret != -1)
1184 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1185 		if (in_crit)
1186 			nbl_end_crit(vp);
1187 		VN_RELE(vp);
1188 	}
1189 	vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1190 }
1191 
1192 fhandle_t *
1193 rfs3_write_getfh(WRITE3args *args)
1194 {
1195 
1196 	return ((fhandle_t *)&args->file.fh3_u.nfs_fh3_i.fh3_i);
1197 }
1198 
1199 void
1200 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1201 	struct svc_req *req, cred_t *cr)
1202 {
1203 	int error;
1204 	int in_crit = 0;
1205 	vnode_t *vp;
1206 	vnode_t *tvp = NULL;
1207 	vnode_t *dvp;
1208 	struct vattr *vap;
1209 	struct vattr va;
1210 	struct vattr *dbvap;
1211 	struct vattr dbva;
1212 	struct vattr *davap;
1213 	struct vattr dava;
1214 	enum vcexcl excl;
1215 	nfstime3 *mtime;
1216 	len_t reqsize;
1217 	bool_t trunc;
1218 
1219 	dbvap = NULL;
1220 	davap = NULL;
1221 
1222 	dvp = nfs3_fhtovp(args->where.dirp, exi);
1223 	if (dvp == NULL) {
1224 		error = ESTALE;
1225 		goto out;
1226 	}
1227 
1228 #ifdef DEBUG
1229 	if (rfs3_do_pre_op_attr) {
1230 		dbva.va_mask = AT_ALL;
1231 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1232 	} else
1233 		dbvap = NULL;
1234 #else
1235 	dbva.va_mask = AT_ALL;
1236 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1237 #endif
1238 	davap = dbvap;
1239 
1240 	if (args->where.name == nfs3nametoolong) {
1241 		resp->status = NFS3ERR_NAMETOOLONG;
1242 		goto out1;
1243 	}
1244 
1245 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1246 		resp->status = NFS3ERR_ACCES;
1247 		goto out1;
1248 	}
1249 
1250 	if (rdonly(exi, req)) {
1251 		resp->status = NFS3ERR_ROFS;
1252 		goto out1;
1253 	}
1254 
1255 	if (args->how.mode == EXCLUSIVE) {
1256 		va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1257 		va.va_type = VREG;
1258 		va.va_mode = (mode_t)0;
1259 		/*
1260 		 * Ensure no time overflows and that types match
1261 		 */
1262 		mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1263 		va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1264 		va.va_mtime.tv_nsec = mtime->nseconds;
1265 		excl = EXCL;
1266 	} else {
1267 		error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1268 		    &va);
1269 		if (error)
1270 			goto out;
1271 		va.va_mask |= AT_TYPE;
1272 		va.va_type = VREG;
1273 		if (args->how.mode == GUARDED)
1274 			excl = EXCL;
1275 		else {
1276 			excl = NONEXCL;
1277 
1278 			/*
1279 			 * During creation of file in non-exclusive mode
1280 			 * if size of file is being set then make sure
1281 			 * that if the file already exists that no conflicting
1282 			 * non-blocking mandatory locks exists in the region
1283 			 * being modified. If there are conflicting locks fail
1284 			 * the operation with EACCES.
1285 			 */
1286 			if (va.va_mask & AT_SIZE) {
1287 				struct vattr tva;
1288 
1289 				/*
1290 				 * Does file already exist?
1291 				 */
1292 				error = VOP_LOOKUP(dvp, args->where.name, &tvp,
1293 						NULL, 0, NULL, cr);
1294 
1295 				/*
1296 				 * Check to see if the file has been delegated
1297 				 * to a v4 client.  If so, then begin recall of
1298 				 * the delegation and return JUKEBOX to allow
1299 				 * the client to retrasmit its request.
1300 				 */
1301 
1302 				trunc = va.va_size == 0;
1303 				if (!error &&
1304 				    rfs4_check_delegated(FWRITE, tvp, trunc)) {
1305 					resp->status = NFS3ERR_JUKEBOX;
1306 					goto out1;
1307 				}
1308 
1309 				/*
1310 				 * Check for NBMAND lock conflicts
1311 				 */
1312 				if (!error && nbl_need_check(tvp)) {
1313 					u_offset_t offset;
1314 					ssize_t len;
1315 
1316 					nbl_start_crit(tvp, RW_READER);
1317 					in_crit = 1;
1318 
1319 					tva.va_mask = AT_SIZE;
1320 					error = VOP_GETATTR(tvp, &tva, 0, cr);
1321 					/*
1322 					 * Can't check for conflicts, so return
1323 					 * error.
1324 					 */
1325 					if (error)
1326 						goto out;
1327 
1328 					offset = tva.va_size < va.va_size ?
1329 						tva.va_size : va.va_size;
1330 					len = tva.va_size < va.va_size ?
1331 						va.va_size - tva.va_size :
1332 						tva.va_size - va.va_size;
1333 					if (nbl_conflict(tvp, NBL_WRITE,
1334 							offset, len, 0)) {
1335 						error = EACCES;
1336 						goto out;
1337 					}
1338 				} else if (tvp) {
1339 					VN_RELE(tvp);
1340 					tvp = NULL;
1341 				}
1342 			}
1343 		}
1344 		if (va.va_mask & AT_SIZE)
1345 			reqsize = va.va_size;
1346 	}
1347 
1348 	/*
1349 	 * Must specify the mode.
1350 	 */
1351 	if (!(va.va_mask & AT_MODE)) {
1352 		resp->status = NFS3ERR_INVAL;
1353 		goto out1;
1354 	}
1355 
1356 	/*
1357 	 * If the filesystem is exported with nosuid, then mask off
1358 	 * the setuid and setgid bits.
1359 	 */
1360 	if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1361 		va.va_mode &= ~(VSUID | VSGID);
1362 
1363 tryagain:
1364 	/*
1365 	 * The file open mode used is VWRITE.  If the client needs
1366 	 * some other semantic, then it should do the access checking
1367 	 * itself.  It would have been nice to have the file open mode
1368 	 * passed as part of the arguments.
1369 	 */
1370 	error = VOP_CREATE(dvp, args->where.name, &va, excl, VWRITE,
1371 	    &vp, cr, 0);
1372 
1373 #ifdef DEBUG
1374 	if (rfs3_do_post_op_attr) {
1375 		dava.va_mask = AT_ALL;
1376 		davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1377 	} else
1378 		davap = NULL;
1379 #else
1380 	dava.va_mask = AT_ALL;
1381 	davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1382 #endif
1383 
1384 	if (error) {
1385 		/*
1386 		 * If we got something other than file already exists
1387 		 * then just return this error.  Otherwise, we got
1388 		 * EEXIST.  If we were doing a GUARDED create, then
1389 		 * just return this error.  Otherwise, we need to
1390 		 * make sure that this wasn't a duplicate of an
1391 		 * exclusive create request.
1392 		 *
1393 		 * The assumption is made that a non-exclusive create
1394 		 * request will never return EEXIST.
1395 		 */
1396 		if (error != EEXIST || args->how.mode == GUARDED)
1397 			goto out;
1398 		/*
1399 		 * Lookup the file so that we can get a vnode for it.
1400 		 */
1401 		error = VOP_LOOKUP(dvp, args->where.name, &vp, NULL, 0,
1402 		    NULL, cr);
1403 		if (error) {
1404 			/*
1405 			 * We couldn't find the file that we thought that
1406 			 * we just created.  So, we'll just try creating
1407 			 * it again.
1408 			 */
1409 			if (error == ENOENT)
1410 				goto tryagain;
1411 			goto out;
1412 		}
1413 
1414 		/*
1415 		 * If the file is delegated to a v4 client, go ahead
1416 		 * and initiate recall, this create is a hint that a
1417 		 * conflicting v3 open has occurred.
1418 		 */
1419 
1420 		if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1421 			VN_RELE(vp);
1422 			resp->status = NFS3ERR_JUKEBOX;
1423 			goto out1;
1424 		}
1425 
1426 		va.va_mask = AT_ALL;
1427 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1428 
1429 		mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1430 		/* % with INT32_MAX to prevent overflows */
1431 		if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1432 		    vap->va_mtime.tv_sec !=
1433 		    (mtime->seconds % INT32_MAX) ||
1434 		    vap->va_mtime.tv_nsec != mtime->nseconds)) {
1435 			VN_RELE(vp);
1436 			error = EEXIST;
1437 			goto out;
1438 		}
1439 	} else {
1440 
1441 		if ((args->how.mode == UNCHECKED ||
1442 		    args->how.mode == GUARDED) &&
1443 		    args->how.createhow3_u.obj_attributes.size.set_it &&
1444 		    va.va_size == 0)
1445 			trunc = TRUE;
1446 		else
1447 			trunc = FALSE;
1448 
1449 		if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1450 			VN_RELE(vp);
1451 			resp->status = NFS3ERR_JUKEBOX;
1452 			goto out1;
1453 		}
1454 
1455 		va.va_mask = AT_ALL;
1456 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1457 
1458 		/*
1459 		 * We need to check to make sure that the file got
1460 		 * created to the indicated size.  If not, we do a
1461 		 * setattr to try to change the size, but we don't
1462 		 * try too hard.  This shouldn't a problem as most
1463 		 * clients will only specifiy a size of zero which
1464 		 * local file systems handle.  However, even if
1465 		 * the client does specify a non-zero size, it can
1466 		 * still recover by checking the size of the file
1467 		 * after it has created it and then issue a setattr
1468 		 * request of its own to set the size of the file.
1469 		 */
1470 		if (vap != NULL &&
1471 		    (args->how.mode == UNCHECKED ||
1472 		    args->how.mode == GUARDED) &&
1473 		    args->how.createhow3_u.obj_attributes.size.set_it &&
1474 		    vap->va_size != reqsize) {
1475 			va.va_mask = AT_SIZE;
1476 			va.va_size = reqsize;
1477 			(void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1478 			va.va_mask = AT_ALL;
1479 			vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1480 		}
1481 	}
1482 
1483 #ifdef DEBUG
1484 	if (!rfs3_do_post_op_attr)
1485 		vap = NULL;
1486 #endif
1487 
1488 #ifdef DEBUG
1489 	if (!rfs3_do_post_op_fh3)
1490 		resp->resok.obj.handle_follows = FALSE;
1491 	else {
1492 #endif
1493 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1494 	if (error)
1495 		resp->resok.obj.handle_follows = FALSE;
1496 	else
1497 		resp->resok.obj.handle_follows = TRUE;
1498 #ifdef DEBUG
1499 	}
1500 #endif
1501 
1502 	/*
1503 	 * Force modified data and metadata out to stable storage.
1504 	 */
1505 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
1506 	(void) VOP_FSYNC(dvp, 0, cr);
1507 
1508 	VN_RELE(vp);
1509 	VN_RELE(dvp);
1510 	if (tvp != NULL) {
1511 		if (in_crit)
1512 			nbl_end_crit(tvp);
1513 		VN_RELE(tvp);
1514 	}
1515 
1516 	resp->status = NFS3_OK;
1517 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1518 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1519 	return;
1520 
1521 out:
1522 	if (curthread->t_flag & T_WOULDBLOCK) {
1523 		curthread->t_flag &= ~T_WOULDBLOCK;
1524 		resp->status = NFS3ERR_JUKEBOX;
1525 	} else
1526 		resp->status = puterrno3(error);
1527 out1:
1528 	if (tvp != NULL) {
1529 		if (in_crit)
1530 			nbl_end_crit(tvp);
1531 		VN_RELE(tvp);
1532 	}
1533 	if (dvp != NULL)
1534 		VN_RELE(dvp);
1535 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1536 }
1537 
1538 fhandle_t *
1539 rfs3_create_getfh(CREATE3args *args)
1540 {
1541 
1542 	return ((fhandle_t *)&args->where.dir.fh3_u.nfs_fh3_i.fh3_i);
1543 }
1544 
1545 void
1546 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1547 	struct svc_req *req, cred_t *cr)
1548 {
1549 	int error;
1550 	vnode_t *vp = NULL;
1551 	vnode_t *dvp;
1552 	struct vattr *vap;
1553 	struct vattr va;
1554 	struct vattr *dbvap;
1555 	struct vattr dbva;
1556 	struct vattr *davap;
1557 	struct vattr dava;
1558 
1559 	dbvap = NULL;
1560 	davap = NULL;
1561 
1562 	dvp = nfs3_fhtovp(args->where.dirp, exi);
1563 	if (dvp == NULL) {
1564 		error = ESTALE;
1565 		goto out;
1566 	}
1567 
1568 #ifdef DEBUG
1569 	if (rfs3_do_pre_op_attr) {
1570 		dbva.va_mask = AT_ALL;
1571 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1572 	} else
1573 		dbvap = NULL;
1574 #else
1575 	dbva.va_mask = AT_ALL;
1576 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1577 #endif
1578 	davap = dbvap;
1579 
1580 	if (args->where.name == nfs3nametoolong) {
1581 		resp->status = NFS3ERR_NAMETOOLONG;
1582 		goto out1;
1583 	}
1584 
1585 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1586 		resp->status = NFS3ERR_ACCES;
1587 		goto out1;
1588 	}
1589 
1590 	if (rdonly(exi, req)) {
1591 		resp->status = NFS3ERR_ROFS;
1592 		goto out1;
1593 	}
1594 
1595 	error = sattr3_to_vattr(&args->attributes, &va);
1596 	if (error)
1597 		goto out;
1598 
1599 	if (!(va.va_mask & AT_MODE)) {
1600 		resp->status = NFS3ERR_INVAL;
1601 		goto out1;
1602 	}
1603 
1604 	va.va_mask |= AT_TYPE;
1605 	va.va_type = VDIR;
1606 
1607 	error = VOP_MKDIR(dvp, args->where.name, &va, &vp, cr);
1608 
1609 #ifdef DEBUG
1610 	if (rfs3_do_post_op_attr) {
1611 		dava.va_mask = AT_ALL;
1612 		davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1613 	} else
1614 		davap = NULL;
1615 #else
1616 	dava.va_mask = AT_ALL;
1617 	davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1618 #endif
1619 
1620 	/*
1621 	 * Force modified data and metadata out to stable storage.
1622 	 */
1623 	(void) VOP_FSYNC(dvp, 0, cr);
1624 
1625 	if (error)
1626 		goto out;
1627 
1628 	VN_RELE(dvp);
1629 
1630 #ifdef DEBUG
1631 	if (!rfs3_do_post_op_fh3)
1632 		resp->resok.obj.handle_follows = FALSE;
1633 	else {
1634 #endif
1635 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1636 	if (error)
1637 		resp->resok.obj.handle_follows = FALSE;
1638 	else
1639 		resp->resok.obj.handle_follows = TRUE;
1640 #ifdef DEBUG
1641 	}
1642 #endif
1643 
1644 #ifdef DEBUG
1645 	if (rfs3_do_post_op_attr) {
1646 		va.va_mask = AT_ALL;
1647 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1648 	} else
1649 		vap = NULL;
1650 #else
1651 	va.va_mask = AT_ALL;
1652 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1653 #endif
1654 
1655 	/*
1656 	 * Force modified data and metadata out to stable storage.
1657 	 */
1658 	(void) VOP_FSYNC(vp, 0, cr);
1659 
1660 	VN_RELE(vp);
1661 
1662 	resp->status = NFS3_OK;
1663 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1664 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1665 	return;
1666 
1667 out:
1668 	if (curthread->t_flag & T_WOULDBLOCK) {
1669 		curthread->t_flag &= ~T_WOULDBLOCK;
1670 		resp->status = NFS3ERR_JUKEBOX;
1671 	} else
1672 		resp->status = puterrno3(error);
1673 out1:
1674 	if (dvp != NULL)
1675 		VN_RELE(dvp);
1676 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1677 }
1678 
1679 fhandle_t *
1680 rfs3_mkdir_getfh(MKDIR3args *args)
1681 {
1682 
1683 	return ((fhandle_t *)&args->where.dir.fh3_u.nfs_fh3_i.fh3_i);
1684 }
1685 
1686 void
1687 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
1688 	struct svc_req *req, cred_t *cr)
1689 {
1690 	int error;
1691 	vnode_t *vp;
1692 	vnode_t *dvp;
1693 	struct vattr *vap;
1694 	struct vattr va;
1695 	struct vattr *dbvap;
1696 	struct vattr dbva;
1697 	struct vattr *davap;
1698 	struct vattr dava;
1699 
1700 	dbvap = NULL;
1701 	davap = NULL;
1702 
1703 	dvp = nfs3_fhtovp(args->where.dirp, exi);
1704 	if (dvp == NULL) {
1705 		error = ESTALE;
1706 		goto out;
1707 	}
1708 
1709 #ifdef DEBUG
1710 	if (rfs3_do_pre_op_attr) {
1711 		dbva.va_mask = AT_ALL;
1712 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1713 	} else
1714 		dbvap = NULL;
1715 #else
1716 	dbva.va_mask = AT_ALL;
1717 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1718 #endif
1719 	davap = dbvap;
1720 
1721 	if (args->where.name == nfs3nametoolong) {
1722 		resp->status = NFS3ERR_NAMETOOLONG;
1723 		goto out1;
1724 	}
1725 
1726 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1727 		resp->status = NFS3ERR_ACCES;
1728 		goto out1;
1729 	}
1730 
1731 	if (rdonly(exi, req)) {
1732 		resp->status = NFS3ERR_ROFS;
1733 		goto out1;
1734 	}
1735 
1736 	error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
1737 	if (error)
1738 		goto out;
1739 
1740 	if (!(va.va_mask & AT_MODE)) {
1741 		resp->status = NFS3ERR_INVAL;
1742 		goto out1;
1743 	}
1744 
1745 	if (args->symlink.symlink_data == nfs3nametoolong) {
1746 		resp->status = NFS3ERR_NAMETOOLONG;
1747 		goto out1;
1748 	}
1749 
1750 	va.va_mask |= AT_TYPE;
1751 	va.va_type = VLNK;
1752 
1753 	error = VOP_SYMLINK(dvp, args->where.name, &va,
1754 	    args->symlink.symlink_data, cr);
1755 
1756 #ifdef DEBUG
1757 	if (rfs3_do_post_op_attr) {
1758 		dava.va_mask = AT_ALL;
1759 		davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1760 	} else
1761 		davap = NULL;
1762 #else
1763 	dava.va_mask = AT_ALL;
1764 	davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1765 #endif
1766 
1767 	if (error)
1768 		goto out;
1769 
1770 	error = VOP_LOOKUP(dvp, args->where.name, &vp, NULL, 0, NULL, cr);
1771 
1772 	/*
1773 	 * Force modified data and metadata out to stable storage.
1774 	 */
1775 	(void) VOP_FSYNC(dvp, 0, cr);
1776 
1777 	VN_RELE(dvp);
1778 
1779 	resp->status = NFS3_OK;
1780 	if (error) {
1781 		resp->resok.obj.handle_follows = FALSE;
1782 		vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
1783 		vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1784 		return;
1785 	}
1786 
1787 #ifdef DEBUG
1788 	if (!rfs3_do_post_op_fh3)
1789 		resp->resok.obj.handle_follows = FALSE;
1790 	else {
1791 #endif
1792 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1793 	if (error)
1794 		resp->resok.obj.handle_follows = FALSE;
1795 	else
1796 		resp->resok.obj.handle_follows = TRUE;
1797 #ifdef DEBUG
1798 	}
1799 #endif
1800 
1801 #ifdef DEBUG
1802 	if (rfs3_do_post_op_attr) {
1803 		va.va_mask = AT_ALL;
1804 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1805 	} else
1806 		vap = NULL;
1807 #else
1808 	va.va_mask = AT_ALL;
1809 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1810 #endif
1811 
1812 	/*
1813 	 * Force modified data and metadata out to stable storage.
1814 	 */
1815 	(void) VOP_FSYNC(vp, 0, cr);
1816 
1817 	VN_RELE(vp);
1818 
1819 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1820 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1821 	return;
1822 
1823 out:
1824 	if (curthread->t_flag & T_WOULDBLOCK) {
1825 		curthread->t_flag &= ~T_WOULDBLOCK;
1826 		resp->status = NFS3ERR_JUKEBOX;
1827 	} else
1828 		resp->status = puterrno3(error);
1829 out1:
1830 	if (dvp != NULL)
1831 		VN_RELE(dvp);
1832 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1833 }
1834 
1835 fhandle_t *
1836 rfs3_symlink_getfh(SYMLINK3args *args)
1837 {
1838 
1839 	return ((fhandle_t *)&args->where.dirp->fh3_u.nfs_fh3_i.fh3_i);
1840 }
1841 
1842 void
1843 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
1844 	struct svc_req *req, cred_t *cr)
1845 {
1846 	int error;
1847 	vnode_t *vp;
1848 	vnode_t *dvp;
1849 	struct vattr *vap;
1850 	struct vattr va;
1851 	struct vattr *dbvap;
1852 	struct vattr dbva;
1853 	struct vattr *davap;
1854 	struct vattr dava;
1855 	int mode;
1856 	enum vcexcl excl;
1857 
1858 	dbvap = NULL;
1859 	davap = NULL;
1860 
1861 	dvp = nfs3_fhtovp(args->where.dirp, exi);
1862 	if (dvp == NULL) {
1863 		error = ESTALE;
1864 		goto out;
1865 	}
1866 
1867 #ifdef DEBUG
1868 	if (rfs3_do_pre_op_attr) {
1869 		dbva.va_mask = AT_ALL;
1870 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1871 	} else
1872 		dbvap = NULL;
1873 #else
1874 	dbva.va_mask = AT_ALL;
1875 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1876 #endif
1877 	davap = dbvap;
1878 
1879 	if (args->where.name == nfs3nametoolong) {
1880 		resp->status = NFS3ERR_NAMETOOLONG;
1881 		goto out1;
1882 	}
1883 
1884 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1885 		resp->status = NFS3ERR_ACCES;
1886 		goto out1;
1887 	}
1888 
1889 	if (rdonly(exi, req)) {
1890 		resp->status = NFS3ERR_ROFS;
1891 		goto out1;
1892 	}
1893 
1894 	switch (args->what.type) {
1895 	case NF3CHR:
1896 	case NF3BLK:
1897 		error = sattr3_to_vattr(
1898 		    &args->what.mknoddata3_u.device.dev_attributes, &va);
1899 		if (error)
1900 			goto out;
1901 		if (secpolicy_sys_devices(cr) != 0) {
1902 			resp->status = NFS3ERR_PERM;
1903 			goto out1;
1904 		}
1905 		if (args->what.type == NF3CHR)
1906 			va.va_type = VCHR;
1907 		else
1908 			va.va_type = VBLK;
1909 		va.va_rdev = makedevice(
1910 		    args->what.mknoddata3_u.device.spec.specdata1,
1911 		    args->what.mknoddata3_u.device.spec.specdata2);
1912 		va.va_mask |= AT_TYPE | AT_RDEV;
1913 		break;
1914 	case NF3SOCK:
1915 		error = sattr3_to_vattr(
1916 		    &args->what.mknoddata3_u.pipe_attributes, &va);
1917 		if (error)
1918 			goto out;
1919 		va.va_type = VSOCK;
1920 		va.va_mask |= AT_TYPE;
1921 		break;
1922 	case NF3FIFO:
1923 		error = sattr3_to_vattr(
1924 		    &args->what.mknoddata3_u.pipe_attributes, &va);
1925 		if (error)
1926 			goto out;
1927 		va.va_type = VFIFO;
1928 		va.va_mask |= AT_TYPE;
1929 		break;
1930 	default:
1931 		resp->status = NFS3ERR_BADTYPE;
1932 		goto out1;
1933 	}
1934 
1935 	/*
1936 	 * Must specify the mode.
1937 	 */
1938 	if (!(va.va_mask & AT_MODE)) {
1939 		resp->status = NFS3ERR_INVAL;
1940 		goto out1;
1941 	}
1942 
1943 	excl = EXCL;
1944 
1945 	mode = 0;
1946 
1947 	error = VOP_CREATE(dvp, args->where.name, &va, excl, mode,
1948 	    &vp, cr, 0);
1949 
1950 #ifdef DEBUG
1951 	if (rfs3_do_post_op_attr) {
1952 		dava.va_mask = AT_ALL;
1953 		davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1954 	} else
1955 		davap = NULL;
1956 #else
1957 	dava.va_mask = AT_ALL;
1958 	davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1959 #endif
1960 
1961 	/*
1962 	 * Force modified data and metadata out to stable storage.
1963 	 */
1964 	(void) VOP_FSYNC(dvp, 0, cr);
1965 
1966 	if (error)
1967 		goto out;
1968 
1969 	VN_RELE(dvp);
1970 
1971 	resp->status = NFS3_OK;
1972 
1973 #ifdef DEBUG
1974 	if (!rfs3_do_post_op_fh3)
1975 		resp->resok.obj.handle_follows = FALSE;
1976 	else {
1977 #endif
1978 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1979 	if (error)
1980 		resp->resok.obj.handle_follows = FALSE;
1981 	else
1982 		resp->resok.obj.handle_follows = TRUE;
1983 #ifdef DEBUG
1984 	}
1985 #endif
1986 
1987 #ifdef DEBUG
1988 	if (rfs3_do_post_op_attr) {
1989 		va.va_mask = AT_ALL;
1990 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1991 	} else
1992 		vap = NULL;
1993 #else
1994 	va.va_mask = AT_ALL;
1995 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1996 #endif
1997 
1998 	/*
1999 	 * Force modified metadata out to stable storage.
2000 	 */
2001 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
2002 
2003 	VN_RELE(vp);
2004 
2005 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2006 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2007 	return;
2008 
2009 out:
2010 	if (curthread->t_flag & T_WOULDBLOCK) {
2011 		curthread->t_flag &= ~T_WOULDBLOCK;
2012 		resp->status = NFS3ERR_JUKEBOX;
2013 	} else
2014 		resp->status = puterrno3(error);
2015 out1:
2016 	if (dvp != NULL)
2017 		VN_RELE(dvp);
2018 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2019 }
2020 
2021 fhandle_t *
2022 rfs3_mknod_getfh(MKNOD3args *args)
2023 {
2024 
2025 	return ((fhandle_t *)&args->where.dirp->fh3_u.nfs_fh3_i.fh3_i);
2026 }
2027 
2028 void
2029 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2030 	struct svc_req *req, cred_t *cr)
2031 {
2032 	int error = 0;
2033 	vnode_t *vp;
2034 	struct vattr *bvap;
2035 	struct vattr bva;
2036 	struct vattr *avap;
2037 	struct vattr ava;
2038 	vnode_t *targvp = NULL;
2039 
2040 	bvap = NULL;
2041 	avap = NULL;
2042 
2043 	vp = nfs3_fhtovp(args->object.dirp, exi);
2044 	if (vp == NULL) {
2045 		error = ESTALE;
2046 		goto out;
2047 	}
2048 
2049 #ifdef DEBUG
2050 	if (rfs3_do_pre_op_attr) {
2051 		bva.va_mask = AT_ALL;
2052 		bvap = VOP_GETATTR(vp, &bva, 0, cr) ? NULL : &bva;
2053 	} else
2054 		bvap = NULL;
2055 #else
2056 	bva.va_mask = AT_ALL;
2057 	bvap = VOP_GETATTR(vp, &bva, 0, cr) ? NULL : &bva;
2058 #endif
2059 	avap = bvap;
2060 
2061 	if (vp->v_type != VDIR) {
2062 		resp->status = NFS3ERR_NOTDIR;
2063 		goto out1;
2064 	}
2065 
2066 	if (args->object.name == nfs3nametoolong) {
2067 		resp->status = NFS3ERR_NAMETOOLONG;
2068 		goto out1;
2069 	}
2070 
2071 	if (args->object.name == NULL || *(args->object.name) == '\0') {
2072 		resp->status = NFS3ERR_ACCES;
2073 		goto out1;
2074 	}
2075 
2076 	if (rdonly(exi, req)) {
2077 		resp->status = NFS3ERR_ROFS;
2078 		goto out1;
2079 	}
2080 
2081 	/*
2082 	 * Check for a conflict with a non-blocking mandatory share
2083 	 * reservation and V4 delegations
2084 	 */
2085 	error = VOP_LOOKUP(vp, args->object.name, &targvp, NULL, 0,
2086 			NULL, cr);
2087 	if (error != 0)
2088 		goto out;
2089 
2090 	if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2091 		resp->status = NFS3ERR_JUKEBOX;
2092 		goto out1;
2093 	}
2094 
2095 	if (!nbl_need_check(targvp)) {
2096 		error = VOP_REMOVE(vp, args->object.name, cr);
2097 	} else {
2098 		nbl_start_crit(targvp, RW_READER);
2099 		if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0)) {
2100 			error = EACCES;
2101 		} else {
2102 			error = VOP_REMOVE(vp, args->object.name, cr);
2103 		}
2104 		nbl_end_crit(targvp);
2105 	}
2106 	VN_RELE(targvp);
2107 	targvp = NULL;
2108 
2109 #ifdef DEBUG
2110 	if (rfs3_do_post_op_attr) {
2111 		ava.va_mask = AT_ALL;
2112 		avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
2113 	} else
2114 		avap = NULL;
2115 #else
2116 	ava.va_mask = AT_ALL;
2117 	avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
2118 #endif
2119 
2120 	/*
2121 	 * Force modified data and metadata out to stable storage.
2122 	 */
2123 	(void) VOP_FSYNC(vp, 0, cr);
2124 
2125 	if (error)
2126 		goto out;
2127 
2128 	VN_RELE(vp);
2129 
2130 	resp->status = NFS3_OK;
2131 	vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2132 	return;
2133 
2134 out:
2135 	if (curthread->t_flag & T_WOULDBLOCK) {
2136 		curthread->t_flag &= ~T_WOULDBLOCK;
2137 		resp->status = NFS3ERR_JUKEBOX;
2138 	} else
2139 		resp->status = puterrno3(error);
2140 out1:
2141 	if (vp != NULL)
2142 		VN_RELE(vp);
2143 	vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2144 }
2145 
2146 fhandle_t *
2147 rfs3_remove_getfh(REMOVE3args *args)
2148 {
2149 
2150 	return ((fhandle_t *)&args->object.dirp->fh3_u.nfs_fh3_i.fh3_i);
2151 }
2152 
2153 void
2154 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2155 	struct svc_req *req, cred_t *cr)
2156 {
2157 	int error;
2158 	vnode_t *vp;
2159 	struct vattr *bvap;
2160 	struct vattr bva;
2161 	struct vattr *avap;
2162 	struct vattr ava;
2163 
2164 	bvap = NULL;
2165 	avap = NULL;
2166 
2167 	vp = nfs3_fhtovp(args->object.dirp, exi);
2168 	if (vp == NULL) {
2169 		error = ESTALE;
2170 		goto out;
2171 	}
2172 
2173 #ifdef DEBUG
2174 	if (rfs3_do_pre_op_attr) {
2175 		bva.va_mask = AT_ALL;
2176 		bvap = VOP_GETATTR(vp, &bva, 0, cr) ? NULL : &bva;
2177 	} else
2178 		bvap = NULL;
2179 #else
2180 	bva.va_mask = AT_ALL;
2181 	bvap = VOP_GETATTR(vp, &bva, 0, cr) ? NULL : &bva;
2182 #endif
2183 	avap = bvap;
2184 
2185 	if (vp->v_type != VDIR) {
2186 		resp->status = NFS3ERR_NOTDIR;
2187 		goto out1;
2188 	}
2189 
2190 	if (args->object.name == nfs3nametoolong) {
2191 		resp->status = NFS3ERR_NAMETOOLONG;
2192 		goto out1;
2193 	}
2194 
2195 	if (args->object.name == NULL || *(args->object.name) == '\0') {
2196 		resp->status = NFS3ERR_ACCES;
2197 		goto out1;
2198 	}
2199 
2200 	if (rdonly(exi, req)) {
2201 		resp->status = NFS3ERR_ROFS;
2202 		goto out1;
2203 	}
2204 
2205 	error = VOP_RMDIR(vp, args->object.name, rootdir, cr);
2206 
2207 #ifdef DEBUG
2208 	if (rfs3_do_post_op_attr) {
2209 		ava.va_mask = AT_ALL;
2210 		avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
2211 	} else
2212 		avap = NULL;
2213 #else
2214 	ava.va_mask = AT_ALL;
2215 	avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
2216 #endif
2217 
2218 	/*
2219 	 * Force modified data and metadata out to stable storage.
2220 	 */
2221 	(void) VOP_FSYNC(vp, 0, cr);
2222 
2223 	if (error) {
2224 		/*
2225 		 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2226 		 * if the directory is not empty.  A System V NFS server
2227 		 * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2228 		 * over the wire.
2229 		 */
2230 		if (error == EEXIST)
2231 			error = ENOTEMPTY;
2232 		goto out;
2233 	}
2234 
2235 	VN_RELE(vp);
2236 
2237 	resp->status = NFS3_OK;
2238 	vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2239 	return;
2240 
2241 out:
2242 	if (curthread->t_flag & T_WOULDBLOCK) {
2243 		curthread->t_flag &= ~T_WOULDBLOCK;
2244 		resp->status = NFS3ERR_JUKEBOX;
2245 	} else
2246 		resp->status = puterrno3(error);
2247 out1:
2248 	if (vp != NULL)
2249 		VN_RELE(vp);
2250 	vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2251 }
2252 
2253 fhandle_t *
2254 rfs3_rmdir_getfh(RMDIR3args *args)
2255 {
2256 
2257 	return ((fhandle_t *)&args->object.dirp->fh3_u.nfs_fh3_i.fh3_i);
2258 }
2259 
2260 void
2261 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2262 	struct svc_req *req, cred_t *cr)
2263 {
2264 	int error = 0;
2265 	vnode_t *fvp;
2266 	vnode_t *tvp;
2267 	vnode_t *targvp;
2268 	struct vattr *fbvap;
2269 	struct vattr fbva;
2270 	struct vattr *favap;
2271 	struct vattr fava;
2272 	struct vattr *tbvap;
2273 	struct vattr tbva;
2274 	struct vattr *tavap;
2275 	struct vattr tava;
2276 	nfs_fh3	*fh3;
2277 	struct exportinfo *to_exi;
2278 	vnode_t *srcvp = NULL;
2279 
2280 	fbvap = NULL;
2281 	favap = NULL;
2282 	tbvap = NULL;
2283 	tavap = NULL;
2284 	tvp = NULL;
2285 
2286 	fvp = nfs3_fhtovp(args->from.dirp, exi);
2287 	if (fvp == NULL) {
2288 		error = ESTALE;
2289 		goto out;
2290 	}
2291 
2292 #ifdef DEBUG
2293 	if (rfs3_do_pre_op_attr) {
2294 		fbva.va_mask = AT_ALL;
2295 		fbvap = VOP_GETATTR(fvp, &fbva, 0, cr) ? NULL : &fbva;
2296 	} else
2297 		fbvap = NULL;
2298 #else
2299 	fbva.va_mask = AT_ALL;
2300 	fbvap = VOP_GETATTR(fvp, &fbva, 0, cr) ? NULL : &fbva;
2301 #endif
2302 	favap = fbvap;
2303 
2304 	fh3 = args->to.dirp;
2305 	to_exi = checkexport(&fh3->fh3_fsid, (fid_t *)&fh3->fh3_xlen);
2306 	if (to_exi == NULL) {
2307 		resp->status = NFS3ERR_ACCES;
2308 		goto out1;
2309 	}
2310 	exi_rele(to_exi);
2311 
2312 	if (to_exi != exi) {
2313 		resp->status = NFS3ERR_XDEV;
2314 		goto out1;
2315 	}
2316 
2317 	tvp = nfs3_fhtovp(args->to.dirp, exi);
2318 	if (tvp == NULL) {
2319 		error = ESTALE;
2320 		goto out;
2321 	}
2322 
2323 #ifdef DEBUG
2324 	if (rfs3_do_pre_op_attr) {
2325 		tbva.va_mask = AT_ALL;
2326 		tbvap = VOP_GETATTR(tvp, &tbva, 0, cr) ? NULL : &tbva;
2327 	} else
2328 		tbvap = NULL;
2329 #else
2330 	tbva.va_mask = AT_ALL;
2331 	tbvap = VOP_GETATTR(tvp, &tbva, 0, cr) ? NULL : &tbva;
2332 #endif
2333 	tavap = tbvap;
2334 
2335 	if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2336 		resp->status = NFS3ERR_NOTDIR;
2337 		goto out1;
2338 	}
2339 
2340 	if (args->from.name == nfs3nametoolong ||
2341 	    args->to.name == nfs3nametoolong) {
2342 		resp->status = NFS3ERR_NAMETOOLONG;
2343 		goto out1;
2344 	}
2345 	if (args->from.name == NULL || *(args->from.name) == '\0' ||
2346 	    args->to.name == NULL || *(args->to.name) == '\0') {
2347 		resp->status = NFS3ERR_ACCES;
2348 		goto out1;
2349 	}
2350 
2351 	if (rdonly(exi, req)) {
2352 		resp->status = NFS3ERR_ROFS;
2353 		goto out1;
2354 	}
2355 
2356 	/*
2357 	 * Check for a conflict with a non-blocking mandatory share
2358 	 * reservation or V4 delegations.
2359 	 */
2360 	error = VOP_LOOKUP(fvp, args->from.name, &srcvp, NULL, 0,
2361 			NULL, cr);
2362 	if (error != 0)
2363 		goto out;
2364 
2365 	/*
2366 	 * If we rename a delegated file we should recall the
2367 	 * delegation, since future opens should fail or would
2368 	 * refer to a new file.
2369 	 */
2370 	if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2371 		resp->status = NFS3ERR_JUKEBOX;
2372 		goto out1;
2373 	}
2374 
2375 	/*
2376 	 * Check for renaming over a delegated file.  Check rfs4_deleg_policy
2377 	 * first to avoid VOP_LOOKUP if possible.
2378 	 */
2379 	if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2380 	    VOP_LOOKUP(tvp, args->to.name, &targvp, NULL, 0, NULL, cr) == 0) {
2381 
2382 		if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2383 			VN_RELE(targvp);
2384 			resp->status = NFS3ERR_JUKEBOX;
2385 			goto out1;
2386 		}
2387 		VN_RELE(targvp);
2388 	}
2389 
2390 	if (!nbl_need_check(srcvp)) {
2391 		error = VOP_RENAME(fvp, args->from.name, tvp,
2392 				    args->to.name, cr);
2393 	} else {
2394 		nbl_start_crit(srcvp, RW_READER);
2395 		if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0)) {
2396 			error = EACCES;
2397 		} else {
2398 			error = VOP_RENAME(fvp, args->from.name, tvp,
2399 				    args->to.name, cr);
2400 		}
2401 		nbl_end_crit(srcvp);
2402 	}
2403 	VN_RELE(srcvp);
2404 	srcvp = NULL;
2405 
2406 #ifdef DEBUG
2407 	if (rfs3_do_post_op_attr) {
2408 		fava.va_mask = AT_ALL;
2409 		favap = VOP_GETATTR(fvp, &fava, 0, cr) ? NULL : &fava;
2410 		tava.va_mask = AT_ALL;
2411 		tavap = VOP_GETATTR(tvp, &tava, 0, cr) ? NULL : &tava;
2412 	} else {
2413 		favap = NULL;
2414 		tavap = NULL;
2415 	}
2416 #else
2417 	fava.va_mask = AT_ALL;
2418 	favap = VOP_GETATTR(fvp, &fava, 0, cr) ? NULL : &fava;
2419 	tava.va_mask = AT_ALL;
2420 	tavap = VOP_GETATTR(tvp, &tava, 0, cr) ? NULL : &tava;
2421 #endif
2422 
2423 	/*
2424 	 * Force modified data and metadata out to stable storage.
2425 	 */
2426 	(void) VOP_FSYNC(fvp, 0, cr);
2427 	(void) VOP_FSYNC(tvp, 0, cr);
2428 
2429 	if (error)
2430 		goto out;
2431 
2432 	VN_RELE(tvp);
2433 	VN_RELE(fvp);
2434 
2435 	resp->status = NFS3_OK;
2436 	vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2437 	vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2438 	return;
2439 
2440 out:
2441 	if (curthread->t_flag & T_WOULDBLOCK) {
2442 		curthread->t_flag &= ~T_WOULDBLOCK;
2443 		resp->status = NFS3ERR_JUKEBOX;
2444 	} else
2445 		resp->status = puterrno3(error);
2446 out1:
2447 	if (fvp != NULL)
2448 		VN_RELE(fvp);
2449 	if (tvp != NULL)
2450 		VN_RELE(tvp);
2451 	vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2452 	vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2453 }
2454 
2455 fhandle_t *
2456 rfs3_rename_getfh(RENAME3args *args)
2457 {
2458 
2459 	return ((fhandle_t *)&args->from.dirp->fh3_u.nfs_fh3_i.fh3_i);
2460 }
2461 
2462 void
2463 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2464 	struct svc_req *req, cred_t *cr)
2465 {
2466 	int error;
2467 	vnode_t *vp;
2468 	vnode_t *dvp;
2469 	struct vattr *vap;
2470 	struct vattr va;
2471 	struct vattr *bvap;
2472 	struct vattr bva;
2473 	struct vattr *avap;
2474 	struct vattr ava;
2475 	nfs_fh3	*fh3;
2476 	struct exportinfo *to_exi;
2477 
2478 	vap = NULL;
2479 	bvap = NULL;
2480 	avap = NULL;
2481 	dvp = NULL;
2482 
2483 	vp = nfs3_fhtovp(&args->file, exi);
2484 	if (vp == NULL) {
2485 		error = ESTALE;
2486 		goto out;
2487 	}
2488 
2489 #ifdef DEBUG
2490 	if (rfs3_do_pre_op_attr) {
2491 		va.va_mask = AT_ALL;
2492 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2493 	} else
2494 		vap = NULL;
2495 #else
2496 	va.va_mask = AT_ALL;
2497 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2498 #endif
2499 
2500 	fh3 = args->link.dirp;
2501 	to_exi = checkexport(&fh3->fh3_fsid, (fid_t *)&fh3->fh3_xlen);
2502 	if (to_exi == NULL) {
2503 		resp->status = NFS3ERR_ACCES;
2504 		goto out1;
2505 	}
2506 	exi_rele(to_exi);
2507 
2508 	if (to_exi != exi) {
2509 		resp->status = NFS3ERR_XDEV;
2510 		goto out1;
2511 	}
2512 
2513 	dvp = nfs3_fhtovp(args->link.dirp, exi);
2514 	if (dvp == NULL) {
2515 		error = ESTALE;
2516 		goto out;
2517 	}
2518 
2519 #ifdef DEBUG
2520 	if (rfs3_do_pre_op_attr) {
2521 		bva.va_mask = AT_ALL;
2522 		bvap = VOP_GETATTR(dvp, &bva, 0, cr) ? NULL : &bva;
2523 	} else
2524 		bvap = NULL;
2525 #else
2526 	bva.va_mask = AT_ALL;
2527 	bvap = VOP_GETATTR(dvp, &bva, 0, cr) ? NULL : &bva;
2528 #endif
2529 
2530 	if (dvp->v_type != VDIR) {
2531 		resp->status = NFS3ERR_NOTDIR;
2532 		goto out1;
2533 	}
2534 
2535 	if (args->link.name == nfs3nametoolong) {
2536 		resp->status = NFS3ERR_NAMETOOLONG;
2537 		goto out1;
2538 	}
2539 
2540 	if (args->link.name == NULL || *(args->link.name) == '\0') {
2541 		resp->status = NFS3ERR_ACCES;
2542 		goto out1;
2543 	}
2544 
2545 	if (rdonly(exi, req)) {
2546 		resp->status = NFS3ERR_ROFS;
2547 		goto out1;
2548 	}
2549 
2550 	error = VOP_LINK(dvp, vp, args->link.name, cr);
2551 
2552 #ifdef DEBUG
2553 	if (rfs3_do_post_op_attr) {
2554 		va.va_mask = AT_ALL;
2555 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2556 		ava.va_mask = AT_ALL;
2557 		avap = VOP_GETATTR(dvp, &ava, 0, cr) ? NULL : &ava;
2558 	} else {
2559 		vap = NULL;
2560 		avap = NULL;
2561 	}
2562 #else
2563 	va.va_mask = AT_ALL;
2564 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2565 	ava.va_mask = AT_ALL;
2566 	avap = VOP_GETATTR(dvp, &ava, 0, cr) ? NULL : &ava;
2567 #endif
2568 
2569 	/*
2570 	 * Force modified data and metadata out to stable storage.
2571 	 */
2572 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
2573 	(void) VOP_FSYNC(dvp, 0, cr);
2574 
2575 	if (error)
2576 		goto out;
2577 
2578 	VN_RELE(dvp);
2579 	VN_RELE(vp);
2580 
2581 	resp->status = NFS3_OK;
2582 	vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
2583 	vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
2584 	return;
2585 
2586 out:
2587 	if (curthread->t_flag & T_WOULDBLOCK) {
2588 		curthread->t_flag &= ~T_WOULDBLOCK;
2589 		resp->status = NFS3ERR_JUKEBOX;
2590 	} else
2591 		resp->status = puterrno3(error);
2592 out1:
2593 	if (vp != NULL)
2594 		VN_RELE(vp);
2595 	if (dvp != NULL)
2596 		VN_RELE(dvp);
2597 	vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
2598 	vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
2599 }
2600 
2601 fhandle_t *
2602 rfs3_link_getfh(LINK3args *args)
2603 {
2604 
2605 	return ((fhandle_t *)&args->file.fh3_u.nfs_fh3_i.fh3_i);
2606 }
2607 
2608 /*
2609  * This macro defines the size of a response which contains attribute
2610  * information and one directory entry (whose length is specified by
2611  * the macro parameter).  If the incoming request is larger than this,
2612  * then we are guaranteed to be able to return at one directory entry
2613  * if one exists.  Therefore, we do not need to check for
2614  * NFS3ERR_TOOSMALL if the requested size is larger then this.  If it
2615  * is not, then we need to check to make sure that this error does not
2616  * need to be returned.
2617  *
2618  * NFS3_READDIR_MIN_COUNT is comprised of following :
2619  *
2620  * status - 1 * BYTES_PER_XDR_UNIT
2621  * attr. flag - 1 * BYTES_PER_XDR_UNIT
2622  * cookie verifier - 2 * BYTES_PER_XDR_UNIT
2623  * attributes  - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
2624  * boolean - 1 * BYTES_PER_XDR_UNIT
2625  * file id - 2 * BYTES_PER_XDR_UNIT
2626  * direcotory name length - 1 * BYTES_PER_XDR_UNIT
2627  * cookie - 2 * BYTES_PER_XDR_UNIT
2628  * end of list - 1 * BYTES_PER_XDR_UNIT
2629  * end of file - 1 * BYTES_PER_XDR_UNIT
2630  * Name length of directory to the nearest byte
2631  */
2632 
2633 #define	NFS3_READDIR_MIN_COUNT(length)	\
2634 	((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
2635 		BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
2636 
2637 /* ARGSUSED */
2638 void
2639 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
2640 	struct svc_req *req, cred_t *cr)
2641 {
2642 	int error;
2643 	vnode_t *vp;
2644 	struct vattr *vap;
2645 	struct vattr va;
2646 	struct iovec iov;
2647 	struct uio uio;
2648 	char *data;
2649 	int iseof;
2650 	int bufsize;
2651 	int namlen;
2652 	uint_t count;
2653 
2654 	vap = NULL;
2655 
2656 	vp = nfs3_fhtovp(&args->dir, exi);
2657 	if (vp == NULL) {
2658 		error = ESTALE;
2659 		goto out;
2660 	}
2661 
2662 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2663 
2664 #ifdef DEBUG
2665 	if (rfs3_do_pre_op_attr) {
2666 		va.va_mask = AT_ALL;
2667 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2668 	} else
2669 		vap = NULL;
2670 #else
2671 	va.va_mask = AT_ALL;
2672 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2673 #endif
2674 
2675 	if (vp->v_type != VDIR) {
2676 		resp->status = NFS3ERR_NOTDIR;
2677 		goto out1;
2678 	}
2679 
2680 	error = VOP_ACCESS(vp, VREAD, 0, cr);
2681 	if (error)
2682 		goto out;
2683 
2684 	/*
2685 	 * Now don't allow arbitrary count to alloc;
2686 	 * allow the maximum not to exceed rfs3_tsize()
2687 	 */
2688 	if (args->count > rfs3_tsize(req))
2689 		args->count = rfs3_tsize(req);
2690 
2691 	/*
2692 	 * Make sure that there is room to read at least one entry
2693 	 * if any are available.
2694 	 */
2695 	if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
2696 		count = DIRENT64_RECLEN(MAXNAMELEN);
2697 	else
2698 		count = args->count;
2699 
2700 	data = kmem_alloc(count, KM_SLEEP);
2701 
2702 	iov.iov_base = data;
2703 	iov.iov_len = count;
2704 	uio.uio_iov = &iov;
2705 	uio.uio_iovcnt = 1;
2706 	uio.uio_segflg = UIO_SYSSPACE;
2707 	uio.uio_extflg = UIO_COPY_CACHED;
2708 	uio.uio_loffset = (offset_t)args->cookie;
2709 	uio.uio_resid = count;
2710 
2711 	error = VOP_READDIR(vp, &uio, cr, &iseof);
2712 
2713 #ifdef DEBUG
2714 	if (rfs3_do_post_op_attr) {
2715 		va.va_mask = AT_ALL;
2716 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2717 	} else
2718 		vap = NULL;
2719 #else
2720 	va.va_mask = AT_ALL;
2721 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2722 #endif
2723 
2724 	if (error) {
2725 		kmem_free(data, count);
2726 		goto out;
2727 	}
2728 
2729 	/*
2730 	 * If the count was not large enough to be able to guarantee
2731 	 * to be able to return at least one entry, then need to
2732 	 * check to see if NFS3ERR_TOOSMALL should be returned.
2733 	 */
2734 	if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
2735 		/*
2736 		 * bufsize is used to keep track of the size of the response.
2737 		 * It is primed with:
2738 		 *	1 for the status +
2739 		 *	1 for the dir_attributes.attributes boolean +
2740 		 *	2 for the cookie verifier
2741 		 * all times BYTES_PER_XDR_UNIT to convert from XDR units
2742 		 * to bytes.  If there are directory attributes to be
2743 		 * returned, then:
2744 		 *	NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
2745 		 * time BYTES_PER_XDR_UNIT is added to account for them.
2746 		 */
2747 		bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
2748 		if (vap != NULL)
2749 			bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
2750 		/*
2751 		 * An entry is composed of:
2752 		 *	1 for the true/false list indicator +
2753 		 *	2 for the fileid +
2754 		 *	1 for the length of the name +
2755 		 *	2 for the cookie +
2756 		 * all times BYTES_PER_XDR_UNIT to convert from
2757 		 * XDR units to bytes, plus the length of the name
2758 		 * rounded up to the nearest BYTES_PER_XDR_UNIT.
2759 		 */
2760 		if (count != uio.uio_resid) {
2761 			namlen = strlen(((struct dirent64 *)data)->d_name);
2762 			bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
2763 				    roundup(namlen, BYTES_PER_XDR_UNIT);
2764 		}
2765 		/*
2766 		 * We need to check to see if the number of bytes left
2767 		 * to go into the buffer will actually fit into the
2768 		 * buffer.  This is calculated as the size of this
2769 		 * entry plus:
2770 		 *	1 for the true/false list indicator +
2771 		 *	1 for the eof indicator
2772 		 * times BYTES_PER_XDR_UNIT to convert from from
2773 		 * XDR units to bytes.
2774 		 */
2775 		bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
2776 		if (bufsize > args->count) {
2777 			kmem_free(data, count);
2778 			resp->status = NFS3ERR_TOOSMALL;
2779 			goto out1;
2780 		}
2781 	}
2782 
2783 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2784 
2785 #if 0 /* notyet */
2786 	/*
2787 	 * Don't do this.  It causes local disk writes when just
2788 	 * reading the file and the overhead is deemed larger
2789 	 * than the benefit.
2790 	 */
2791 	/*
2792 	 * Force modified metadata out to stable storage.
2793 	 */
2794 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
2795 #endif
2796 
2797 	VN_RELE(vp);
2798 
2799 	resp->status = NFS3_OK;
2800 	vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
2801 	resp->resok.cookieverf = 0;
2802 	resp->resok.reply.entries = (entry3 *)data;
2803 	resp->resok.reply.eof = iseof;
2804 	resp->resok.size = count - uio.uio_resid;
2805 	resp->resok.count = args->count;
2806 	resp->resok.freecount = count;
2807 	return;
2808 
2809 out:
2810 	if (curthread->t_flag & T_WOULDBLOCK) {
2811 		curthread->t_flag &= ~T_WOULDBLOCK;
2812 		resp->status = NFS3ERR_JUKEBOX;
2813 	} else
2814 		resp->status = puterrno3(error);
2815 out1:
2816 	if (vp != NULL) {
2817 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2818 		VN_RELE(vp);
2819 	}
2820 	vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
2821 }
2822 
2823 fhandle_t *
2824 rfs3_readdir_getfh(READDIR3args *args)
2825 {
2826 
2827 	return ((fhandle_t *)&args->dir.fh3_u.nfs_fh3_i.fh3_i);
2828 }
2829 
2830 void
2831 rfs3_readdir_free(READDIR3res *resp)
2832 {
2833 
2834 	if (resp->status == NFS3_OK)
2835 		kmem_free(resp->resok.reply.entries, resp->resok.freecount);
2836 }
2837 
2838 #ifdef nextdp
2839 #undef nextdp
2840 #endif
2841 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
2842 
2843 /*
2844  * This macro computes the size of a response which contains
2845  * one directory entry including the attributes as well as file handle.
2846  * If the incoming request is larger than this, then we are guaranteed to be
2847  * able to return at least one more directory entry if one exists.
2848  *
2849  * NFS3_READDIRPLUS_ENTRY is made up of the following:
2850  *
2851  * boolean - 1 * BYTES_PER_XDR_UNIT
2852  * file id - 2 * BYTES_PER_XDR_UNIT
2853  * directory name length - 1 * BYTES_PER_XDR_UNIT
2854  * cookie - 2 * BYTES_PER_XDR_UNIT
2855  * attribute flag - 1 * BYTES_PER_XDR_UNIT
2856  * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
2857  * status byte for file handle - 1 *  BYTES_PER_XDR_UNIT
2858  * length of a file handle - 1 * BYTES_PER_XDR_UNIT
2859  * Maxmum length of a file handle (NFS3_CURFHSIZE)
2860  * name length of the entry to the nearest bytes
2861  */
2862 #define	NFS3_READDIRPLUS_ENTRY(namelen)	\
2863 	((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
2864 		BYTES_PER_XDR_UNIT + \
2865 	NFS3_CURFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
2866 
2867 static int rfs3_readdir_unit = MAXBSIZE;
2868 
2869 /* ARGSUSED */
2870 void
2871 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
2872 	struct exportinfo *exi, struct svc_req *req, cred_t *cr)
2873 {
2874 	int error;
2875 	vnode_t *vp;
2876 	struct vattr *vap;
2877 	struct vattr va;
2878 	struct iovec iov;
2879 	struct uio uio;
2880 	char *data;
2881 	int iseof;
2882 	struct dirent64 *dp;
2883 	vnode_t *nvp;
2884 	struct vattr *nvap;
2885 	struct vattr nva;
2886 	entryplus3_info *infop = NULL;
2887 	int size = 0;
2888 	int nents = 0;
2889 	int bufsize = 0;
2890 	int entrysize = 0;
2891 	int tofit = 0;
2892 	int rd_unit = rfs3_readdir_unit;
2893 	int prev_len;
2894 	int space_left;
2895 	int i;
2896 	uint_t *namlen = NULL;
2897 
2898 	vap = NULL;
2899 
2900 	vp = nfs3_fhtovp(&args->dir, exi);
2901 	if (vp == NULL) {
2902 		error = ESTALE;
2903 		goto out;
2904 	}
2905 
2906 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2907 
2908 #ifdef DEBUG
2909 	if (rfs3_do_pre_op_attr) {
2910 		va.va_mask = AT_ALL;
2911 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2912 	} else
2913 		vap = NULL;
2914 #else
2915 	va.va_mask = AT_ALL;
2916 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2917 #endif
2918 
2919 	if (vp->v_type != VDIR) {
2920 		error = ENOTDIR;
2921 		goto out;
2922 	}
2923 
2924 	error = VOP_ACCESS(vp, VREAD, 0, cr);
2925 	if (error)
2926 		goto out;
2927 
2928 	/*
2929 	 * Don't allow arbitrary counts for allocation
2930 	 */
2931 	if (args->maxcount > rfs3_tsize(req))
2932 		args->maxcount = rfs3_tsize(req);
2933 
2934 	/*
2935 	 * Make sure that there is room to read at least one entry
2936 	 * if any are available
2937 	 */
2938 	args->dircount = MIN(args->dircount, args->maxcount);
2939 
2940 	if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
2941 		args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
2942 
2943 	/*
2944 	 * This allocation relies on a minimum directory entry
2945 	 * being roughly 24 bytes.  Therefore, the namlen array
2946 	 * will have enough space based on the maximum number of
2947 	 * entries to read.
2948 	 */
2949 	namlen = kmem_alloc(args->dircount, KM_SLEEP);
2950 
2951 	space_left = args->dircount;
2952 	data = kmem_alloc(args->dircount, KM_SLEEP);
2953 	dp = (struct dirent64 *)data;
2954 	uio.uio_iov = &iov;
2955 	uio.uio_iovcnt = 1;
2956 	uio.uio_segflg = UIO_SYSSPACE;
2957 	uio.uio_extflg = UIO_COPY_CACHED;
2958 	uio.uio_loffset = (offset_t)args->cookie;
2959 
2960 	/*
2961 	 * bufsize is used to keep track of the size of the response as we
2962 	 * get post op attributes and filehandles for each entry.  This is
2963 	 * an optimization as the server may have read more entries than will
2964 	 * fit in the buffer specified by maxcount.  We stop calculating
2965 	 * post op attributes and filehandles once we have exceeded maxcount.
2966 	 * This will minimize the effect of truncation.
2967 	 *
2968 	 * It is primed with:
2969 	 *	1 for the status +
2970 	 *	1 for the dir_attributes.attributes boolean +
2971 	 *	2 for the cookie verifier
2972 	 * all times BYTES_PER_XDR_UNIT to convert from XDR units
2973 	 * to bytes.  If there are directory attributes to be
2974 	 * returned, then:
2975 	 *	NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
2976 	 * time BYTES_PER_XDR_UNIT is added to account for them.
2977 	 */
2978 	bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
2979 	if (vap != NULL)
2980 		bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
2981 
2982 getmoredents:
2983 	/*
2984 	 * Here we make a check so that our read unit is not larger than
2985 	 * the space left in the buffer.
2986 	 */
2987 	rd_unit = MIN(rd_unit, space_left);
2988 	iov.iov_base = (char *)dp;
2989 	iov.iov_len = rd_unit;
2990 	uio.uio_resid = rd_unit;
2991 	prev_len = rd_unit;
2992 
2993 	error = VOP_READDIR(vp, &uio, cr, &iseof);
2994 
2995 	if (error) {
2996 		kmem_free(data, args->dircount);
2997 		goto out;
2998 	}
2999 
3000 	if (uio.uio_resid == prev_len && !iseof) {
3001 		if (nents == 0) {
3002 			kmem_free(data, args->dircount);
3003 			resp->status = NFS3ERR_TOOSMALL;
3004 			goto out1;
3005 		}
3006 
3007 		/*
3008 		 * We could not get any more entries, so get the attributes
3009 		 * and filehandle for the entries already obtained.
3010 		 */
3011 		goto good;
3012 	}
3013 
3014 	/*
3015 	 * We estimate the size of the response by assuming the
3016 	 * entry exists and attributes and filehandle are also valid
3017 	 */
3018 	for (size = prev_len - uio.uio_resid;
3019 		size > 0;
3020 		size -= dp->d_reclen, dp = nextdp(dp)) {
3021 
3022 		if (dp->d_ino == 0) {
3023 			nents++;
3024 			continue;
3025 		}
3026 
3027 		namlen[nents] = strlen(dp->d_name);
3028 		entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3029 
3030 		/*
3031 		 * We need to check to see if the number of bytes left
3032 		 * to go into the buffer will actually fit into the
3033 		 * buffer.  This is calculated as the size of this
3034 		 * entry plus:
3035 		 *	1 for the true/false list indicator +
3036 		 *	1 for the eof indicator
3037 		 * times BYTES_PER_XDR_UNIT to convert from XDR units
3038 		 * to bytes.
3039 		 *
3040 		 * Also check the dircount limit against the first entry read
3041 		 *
3042 		 */
3043 		tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3044 		if (bufsize + tofit > args->maxcount) {
3045 			/*
3046 			 * We make a check here to see if this was the
3047 			 * first entry being measured.  If so, then maxcount
3048 			 * was too small to begin with and so we need to
3049 			 * return with NFS3ERR_TOOSMALL.
3050 			 */
3051 			if (nents == 0) {
3052 				kmem_free(data, args->dircount);
3053 				resp->status = NFS3ERR_TOOSMALL;
3054 				goto out1;
3055 			}
3056 			iseof = FALSE;
3057 			goto good;
3058 		}
3059 		bufsize += entrysize;
3060 		nents++;
3061 	}
3062 
3063 	/*
3064 	 * If there is enough room to fit at least 1 more entry including
3065 	 * post op attributes and filehandle in the buffer AND that we haven't
3066 	 * exceeded dircount then go back and get some more.
3067 	 */
3068 	if (!iseof &&
3069 	    (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3070 		space_left -= (prev_len - uio.uio_resid);
3071 		if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3072 			goto getmoredents;
3073 
3074 		/* else, fall through */
3075 	}
3076 
3077 good:
3078 
3079 #ifdef DEBUG
3080 	if (rfs3_do_post_op_attr) {
3081 		va.va_mask = AT_ALL;
3082 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3083 	} else
3084 		vap = NULL;
3085 #else
3086 	va.va_mask = AT_ALL;
3087 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3088 #endif
3089 
3090 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3091 
3092 	infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3093 	resp->resok.infop = infop;
3094 
3095 	dp = (struct dirent64 *)data;
3096 	for (i = 0; i < nents; i++) {
3097 
3098 		if (dp->d_ino == 0) {
3099 			infop[i].attr.attributes = FALSE;
3100 			infop[i].fh.handle_follows = FALSE;
3101 			dp = nextdp(dp);
3102 			continue;
3103 		}
3104 
3105 		infop[i].namelen = namlen[i];
3106 
3107 		error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr);
3108 		if (error) {
3109 			infop[i].attr.attributes = FALSE;
3110 			infop[i].fh.handle_follows = FALSE;
3111 			dp = nextdp(dp);
3112 			continue;
3113 		}
3114 
3115 #ifdef DEBUG
3116 		if (rfs3_do_post_op_attr) {
3117 			nva.va_mask = AT_ALL;
3118 			nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ?
3119 				NULL : &nva;
3120 		} else
3121 			nvap = NULL;
3122 #else
3123 		nva.va_mask = AT_ALL;
3124 		nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3125 #endif
3126 		vattr_to_post_op_attr(nvap, &infop[i].attr);
3127 
3128 #ifdef DEBUG
3129 		if (!rfs3_do_post_op_fh3)
3130 			infop[i].fh.handle_follows = FALSE;
3131 		else {
3132 #endif
3133 		error = makefh3(&infop[i].fh.handle, nvp, exi);
3134 		if (!error)
3135 			infop[i].fh.handle_follows = TRUE;
3136 		else
3137 			infop[i].fh.handle_follows = FALSE;
3138 #ifdef DEBUG
3139 		}
3140 #endif
3141 
3142 		VN_RELE(nvp);
3143 		dp = nextdp(dp);
3144 	}
3145 
3146 #if 0 /* notyet */
3147 	/*
3148 	 * Don't do this.  It causes local disk writes when just
3149 	 * reading the file and the overhead is deemed larger
3150 	 * than the benefit.
3151 	 */
3152 	/*
3153 	 * Force modified metadata out to stable storage.
3154 	 */
3155 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
3156 #endif
3157 
3158 	VN_RELE(vp);
3159 
3160 	kmem_free(namlen, args->dircount);
3161 
3162 	resp->status = NFS3_OK;
3163 	vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3164 	resp->resok.cookieverf = 0;
3165 	resp->resok.reply.entries = (entryplus3 *)data;
3166 	resp->resok.reply.eof = iseof;
3167 	resp->resok.size = nents;
3168 	resp->resok.count = args->dircount;
3169 	resp->resok.maxcount = args->maxcount;
3170 	return;
3171 
3172 out:
3173 	if (curthread->t_flag & T_WOULDBLOCK) {
3174 		curthread->t_flag &= ~T_WOULDBLOCK;
3175 		resp->status = NFS3ERR_JUKEBOX;
3176 	} else
3177 		resp->status = puterrno3(error);
3178 out1:
3179 	if (vp != NULL) {
3180 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3181 		VN_RELE(vp);
3182 	}
3183 
3184 	if (namlen != NULL)
3185 		kmem_free(namlen, args->dircount);
3186 
3187 	vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3188 }
3189 
3190 fhandle_t *
3191 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3192 {
3193 
3194 	return ((fhandle_t *)&args->dir.fh3_u.nfs_fh3_i.fh3_i);
3195 }
3196 
3197 void
3198 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3199 {
3200 
3201 	if (resp->status == NFS3_OK) {
3202 		kmem_free(resp->resok.reply.entries, resp->resok.count);
3203 		kmem_free(resp->resok.infop,
3204 			resp->resok.size * sizeof (struct entryplus3_info));
3205 	}
3206 }
3207 
3208 /* ARGSUSED */
3209 void
3210 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3211 	struct svc_req *req, cred_t *cr)
3212 {
3213 	int error;
3214 	vnode_t *vp;
3215 	struct vattr *vap;
3216 	struct vattr va;
3217 	struct statvfs64 sb;
3218 
3219 	vap = NULL;
3220 
3221 	vp = nfs3_fhtovp(&args->fsroot, exi);
3222 	if (vp == NULL) {
3223 		error = ESTALE;
3224 		goto out;
3225 	}
3226 
3227 	error = VFS_STATVFS(vp->v_vfsp, &sb);
3228 
3229 #ifdef DEBUG
3230 	if (rfs3_do_post_op_attr) {
3231 		va.va_mask = AT_ALL;
3232 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3233 	} else
3234 		vap = NULL;
3235 #else
3236 	va.va_mask = AT_ALL;
3237 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3238 #endif
3239 
3240 	VN_RELE(vp);
3241 
3242 	if (error)
3243 		goto out;
3244 
3245 	resp->status = NFS3_OK;
3246 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3247 	if (sb.f_blocks != (fsblkcnt64_t)-1)
3248 		resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3249 	else
3250 		resp->resok.tbytes = (size3)sb.f_blocks;
3251 	if (sb.f_bfree != (fsblkcnt64_t)-1)
3252 		resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3253 	else
3254 		resp->resok.fbytes = (size3)sb.f_bfree;
3255 	if (sb.f_bavail != (fsblkcnt64_t)-1)
3256 		resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3257 	else
3258 		resp->resok.abytes = (size3)sb.f_bavail;
3259 	resp->resok.tfiles = (size3)sb.f_files;
3260 	resp->resok.ffiles = (size3)sb.f_ffree;
3261 	resp->resok.afiles = (size3)sb.f_favail;
3262 	resp->resok.invarsec = 0;
3263 	return;
3264 
3265 out:
3266 	if (curthread->t_flag & T_WOULDBLOCK) {
3267 		curthread->t_flag &= ~T_WOULDBLOCK;
3268 		resp->status = NFS3ERR_JUKEBOX;
3269 	} else
3270 		resp->status = puterrno3(error);
3271 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3272 }
3273 
3274 fhandle_t *
3275 rfs3_fsstat_getfh(FSSTAT3args *args)
3276 {
3277 
3278 	return ((fhandle_t *)&args->fsroot.fh3_u.nfs_fh3_i.fh3_i);
3279 }
3280 
3281 /* ARGSUSED */
3282 void
3283 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3284 	struct svc_req *req, cred_t *cr)
3285 {
3286 	vnode_t *vp;
3287 	struct vattr *vap;
3288 	struct vattr va;
3289 	uint32_t xfer_size;
3290 	ulong_t l = 0;
3291 	int error;
3292 
3293 	vp = nfs3_fhtovp(&args->fsroot, exi);
3294 	if (vp == NULL) {
3295 		if (curthread->t_flag & T_WOULDBLOCK) {
3296 			curthread->t_flag &= ~T_WOULDBLOCK;
3297 			resp->status = NFS3ERR_JUKEBOX;
3298 		} else
3299 			resp->status = NFS3ERR_STALE;
3300 		vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3301 		return;
3302 	}
3303 
3304 #ifdef DEBUG
3305 	if (rfs3_do_post_op_attr) {
3306 		va.va_mask = AT_ALL;
3307 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3308 	} else
3309 		vap = NULL;
3310 #else
3311 	va.va_mask = AT_ALL;
3312 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3313 #endif
3314 
3315 	resp->status = NFS3_OK;
3316 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3317 	xfer_size = rfs3_tsize(req);
3318 	resp->resok.rtmax = xfer_size;
3319 	resp->resok.rtpref = xfer_size;
3320 	resp->resok.rtmult = DEV_BSIZE;
3321 	resp->resok.wtmax = xfer_size;
3322 	resp->resok.wtpref = xfer_size;
3323 	resp->resok.wtmult = DEV_BSIZE;
3324 	resp->resok.dtpref = MAXBSIZE;
3325 
3326 	/*
3327 	 * Large file spec: want maxfilesize based on limit of
3328 	 * underlying filesystem.  We can guess 2^31-1 if need be.
3329 	 */
3330 	error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr);
3331 
3332 	VN_RELE(vp);
3333 
3334 	if (!error && l != 0 && l <= 64)
3335 		resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3336 	else
3337 		resp->resok.maxfilesize = MAXOFF32_T;
3338 
3339 	resp->resok.time_delta.seconds = 0;
3340 	resp->resok.time_delta.nseconds = 1000;
3341 	resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3342 	    FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3343 }
3344 
3345 fhandle_t *
3346 rfs3_fsinfo_getfh(FSINFO3args *args)
3347 {
3348 
3349 	return ((fhandle_t *)&args->fsroot.fh3_u.nfs_fh3_i.fh3_i);
3350 }
3351 
3352 /* ARGSUSED */
3353 void
3354 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
3355 	struct svc_req *req, cred_t *cr)
3356 {
3357 	int error;
3358 	vnode_t *vp;
3359 	struct vattr *vap;
3360 	struct vattr va;
3361 	ulong_t val;
3362 
3363 	vap = NULL;
3364 
3365 	vp = nfs3_fhtovp(&args->object, exi);
3366 	if (vp == NULL) {
3367 		error = ESTALE;
3368 		goto out;
3369 	}
3370 
3371 #ifdef DEBUG
3372 	if (rfs3_do_post_op_attr) {
3373 		va.va_mask = AT_ALL;
3374 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3375 	} else
3376 		vap = NULL;
3377 #else
3378 	va.va_mask = AT_ALL;
3379 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3380 #endif
3381 
3382 	error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr);
3383 	if (error)
3384 		goto out;
3385 	resp->resok.info.link_max = (uint32)val;
3386 
3387 	error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr);
3388 	if (error)
3389 		goto out;
3390 	resp->resok.info.name_max = (uint32)val;
3391 
3392 	error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr);
3393 	if (error)
3394 		goto out;
3395 	if (val == 1)
3396 		resp->resok.info.no_trunc = TRUE;
3397 	else
3398 		resp->resok.info.no_trunc = FALSE;
3399 
3400 	error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr);
3401 	if (error)
3402 		goto out;
3403 	if (val == 1)
3404 		resp->resok.info.chown_restricted = TRUE;
3405 	else
3406 		resp->resok.info.chown_restricted = FALSE;
3407 
3408 	VN_RELE(vp);
3409 
3410 	resp->status = NFS3_OK;
3411 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3412 	resp->resok.info.case_insensitive = FALSE;
3413 	resp->resok.info.case_preserving = TRUE;
3414 	return;
3415 
3416 out:
3417 	if (curthread->t_flag & T_WOULDBLOCK) {
3418 		curthread->t_flag &= ~T_WOULDBLOCK;
3419 		resp->status = NFS3ERR_JUKEBOX;
3420 	} else
3421 		resp->status = puterrno3(error);
3422 	if (vp != NULL)
3423 		VN_RELE(vp);
3424 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3425 }
3426 
3427 fhandle_t *
3428 rfs3_pathconf_getfh(PATHCONF3args *args)
3429 {
3430 
3431 	return ((fhandle_t *)&args->object.fh3_u.nfs_fh3_i.fh3_i);
3432 }
3433 
3434 void
3435 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
3436 	struct svc_req *req, cred_t *cr)
3437 {
3438 	int error;
3439 	vnode_t *vp;
3440 	struct vattr *bvap;
3441 	struct vattr bva;
3442 	struct vattr *avap;
3443 	struct vattr ava;
3444 
3445 	bvap = NULL;
3446 	avap = NULL;
3447 
3448 	vp = nfs3_fhtovp(&args->file, exi);
3449 	if (vp == NULL) {
3450 		error = ESTALE;
3451 		goto out;
3452 	}
3453 
3454 	bva.va_mask = AT_ALL;
3455 	error = VOP_GETATTR(vp, &bva, 0, cr);
3456 
3457 	/*
3458 	 * If we can't get the attributes, then we can't do the
3459 	 * right access checking.  So, we'll fail the request.
3460 	 */
3461 	if (error)
3462 		goto out;
3463 
3464 #ifdef DEBUG
3465 	if (rfs3_do_pre_op_attr)
3466 		bvap = &bva;
3467 	else
3468 		bvap = NULL;
3469 #else
3470 	bvap = &bva;
3471 #endif
3472 
3473 	if (rdonly(exi, req)) {
3474 		resp->status = NFS3ERR_ROFS;
3475 		goto out1;
3476 	}
3477 
3478 	if (vp->v_type != VREG) {
3479 		resp->status = NFS3ERR_INVAL;
3480 		goto out1;
3481 	}
3482 
3483 	if (crgetuid(cr) != bva.va_uid &&
3484 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr)))
3485 		goto out;
3486 
3487 	error = VOP_PUTPAGE(vp, args->offset, args->count, 0, cr);
3488 	if (!error)
3489 		error = VOP_FSYNC(vp, FNODSYNC, cr);
3490 
3491 #ifdef DEBUG
3492 	if (rfs3_do_post_op_attr) {
3493 		ava.va_mask = AT_ALL;
3494 		avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
3495 	} else
3496 		avap = NULL;
3497 #else
3498 	ava.va_mask = AT_ALL;
3499 	avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
3500 #endif
3501 
3502 	if (error)
3503 		goto out;
3504 
3505 	VN_RELE(vp);
3506 
3507 	resp->status = NFS3_OK;
3508 	vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
3509 	resp->resok.verf = write3verf;
3510 	return;
3511 
3512 out:
3513 	if (curthread->t_flag & T_WOULDBLOCK) {
3514 		curthread->t_flag &= ~T_WOULDBLOCK;
3515 		resp->status = NFS3ERR_JUKEBOX;
3516 	} else
3517 		resp->status = puterrno3(error);
3518 out1:
3519 	if (vp != NULL)
3520 		VN_RELE(vp);
3521 	vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
3522 }
3523 
3524 fhandle_t *
3525 rfs3_commit_getfh(COMMIT3args *args)
3526 {
3527 
3528 	return ((fhandle_t *)&args->file.fh3_u.nfs_fh3_i.fh3_i);
3529 }
3530 
3531 static int
3532 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
3533 {
3534 
3535 	vap->va_mask = 0;
3536 
3537 	if (sap->mode.set_it) {
3538 		vap->va_mode = (mode_t)sap->mode.mode;
3539 		vap->va_mask |= AT_MODE;
3540 	}
3541 	if (sap->uid.set_it) {
3542 		vap->va_uid = (uid_t)sap->uid.uid;
3543 		vap->va_mask |= AT_UID;
3544 	}
3545 	if (sap->gid.set_it) {
3546 		vap->va_gid = (gid_t)sap->gid.gid;
3547 		vap->va_mask |= AT_GID;
3548 	}
3549 	if (sap->size.set_it) {
3550 		if (sap->size.size > (size3)((u_longlong_t)-1))
3551 			return (EINVAL);
3552 		vap->va_size = sap->size.size;
3553 		vap->va_mask |= AT_SIZE;
3554 	}
3555 	if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
3556 #ifndef _LP64
3557 		/* check time validity */
3558 		if (!NFS3_TIME_OK(sap->atime.atime.seconds))
3559 			return (EOVERFLOW);
3560 #endif
3561 		/*
3562 		 * nfs protocol defines times as unsigned so don't extend sign,
3563 		 * unless sysadmin set nfs_allow_preepoch_time.
3564 		 */
3565 		NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
3566 			sap->atime.atime.seconds);
3567 		vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
3568 		vap->va_mask |= AT_ATIME;
3569 	} else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
3570 		gethrestime(&vap->va_atime);
3571 		vap->va_mask |= AT_ATIME;
3572 	}
3573 	if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
3574 #ifndef _LP64
3575 		/* check time validity */
3576 		if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
3577 			return (EOVERFLOW);
3578 #endif
3579 		/*
3580 		 * nfs protocol defines times as unsigned so don't extend sign,
3581 		 * unless sysadmin set nfs_allow_preepoch_time.
3582 		 */
3583 		NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
3584 			sap->mtime.mtime.seconds);
3585 		vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
3586 		vap->va_mask |= AT_MTIME;
3587 	} else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
3588 		gethrestime(&vap->va_mtime);
3589 		vap->va_mask |= AT_MTIME;
3590 	}
3591 
3592 	return (0);
3593 }
3594 
3595 static ftype3 vt_to_nf3[] = {
3596 	0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
3597 };
3598 
3599 static int
3600 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
3601 {
3602 
3603 	ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
3604 	/* Return error if time or size overflow */
3605 	if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
3606 		return (EOVERFLOW);
3607 	}
3608 	fap->type = vt_to_nf3[vap->va_type];
3609 	fap->mode = (mode3)(vap->va_mode & MODEMASK);
3610 	fap->nlink = (uint32)vap->va_nlink;
3611 	if (vap->va_uid == UID_NOBODY)
3612 		fap->uid = (uid3)NFS_UID_NOBODY;
3613 	else
3614 		fap->uid = (uid3)vap->va_uid;
3615 	if (vap->va_gid == GID_NOBODY)
3616 		fap->gid = (gid3)NFS_GID_NOBODY;
3617 	else
3618 		fap->gid = (gid3)vap->va_gid;
3619 	fap->size = (size3)vap->va_size;
3620 	fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
3621 	fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
3622 	fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
3623 	fap->fsid = (uint64)vap->va_fsid;
3624 	fap->fileid = (fileid3)vap->va_nodeid;
3625 	fap->atime.seconds = vap->va_atime.tv_sec;
3626 	fap->atime.nseconds = vap->va_atime.tv_nsec;
3627 	fap->mtime.seconds = vap->va_mtime.tv_sec;
3628 	fap->mtime.nseconds = vap->va_mtime.tv_nsec;
3629 	fap->ctime.seconds = vap->va_ctime.tv_sec;
3630 	fap->ctime.nseconds = vap->va_ctime.tv_nsec;
3631 	return (0);
3632 }
3633 
3634 static int
3635 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
3636 {
3637 
3638 	/* Return error if time or size overflow */
3639 	if (!  (NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
3640 		NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
3641 		NFS3_SIZE_OK(vap->va_size))) {
3642 		return (EOVERFLOW);
3643 	}
3644 	wccap->size = (size3)vap->va_size;
3645 	wccap->mtime.seconds = vap->va_mtime.tv_sec;
3646 	wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
3647 	wccap->ctime.seconds = vap->va_ctime.tv_sec;
3648 	wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
3649 	return (0);
3650 }
3651 
3652 static void
3653 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
3654 {
3655 
3656 	/* don't return attrs if time overflow */
3657 	if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
3658 		poap->attributes = TRUE;
3659 	} else
3660 		poap->attributes = FALSE;
3661 }
3662 
3663 void
3664 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
3665 {
3666 
3667 	/* don't return attrs if time overflow */
3668 	if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
3669 		poap->attributes = TRUE;
3670 	} else
3671 		poap->attributes = FALSE;
3672 }
3673 
3674 static void
3675 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
3676 {
3677 
3678 	vattr_to_pre_op_attr(bvap, &wccp->before);
3679 	vattr_to_post_op_attr(avap, &wccp->after);
3680 }
3681 
3682 void
3683 rfs3_srvrinit(void)
3684 {
3685 	struct rfs3_verf_overlay {
3686 		uint_t id; /* a "unique" identifier */
3687 		int ts; /* a unique timestamp */
3688 	} *verfp;
3689 	timestruc_t now;
3690 
3691 	/*
3692 	 * The following algorithm attempts to find a unique verifier
3693 	 * to be used as the write verifier returned from the server
3694 	 * to the client.  It is important that this verifier change
3695 	 * whenever the server reboots.  Of secondary importance, it
3696 	 * is important for the verifier to be unique between two
3697 	 * different servers.
3698 	 *
3699 	 * Thus, an attempt is made to use the system hostid and the
3700 	 * current time in seconds when the nfssrv kernel module is
3701 	 * loaded.  It is assumed that an NFS server will not be able
3702 	 * to boot and then to reboot in less than a second.  If the
3703 	 * hostid has not been set, then the current high resolution
3704 	 * time is used.  This will ensure different verifiers each
3705 	 * time the server reboots and minimize the chances that two
3706 	 * different servers will have the same verifier.
3707 	 */
3708 
3709 #ifndef	lint
3710 	/*
3711 	 * We ASSERT that this constant logic expression is
3712 	 * always true because in the past, it wasn't.
3713 	 */
3714 	ASSERT(sizeof (*verfp) <= sizeof (write3verf));
3715 #endif
3716 
3717 	gethrestime(&now);
3718 	verfp = (struct rfs3_verf_overlay *)&write3verf;
3719 	verfp->ts = (int)now.tv_sec;
3720 	verfp->id = (uint_t)nfs_atoi(hw_serial);
3721 
3722 	if (verfp->id == 0)
3723 		verfp->id = (uint_t)now.tv_nsec;
3724 
3725 }
3726 
3727 void
3728 rfs3_srvrfini(void)
3729 {
3730 	/* Nothing to do */
3731 }
3732