xref: /illumos-gate/usr/src/uts/common/fs/nfs/nfs3_srv.c (revision a07094369b21309434206d9b3601d162693466fc)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include <sys/param.h>
33 #include <sys/types.h>
34 #include <sys/systm.h>
35 #include <sys/cred.h>
36 #include <sys/buf.h>
37 #include <sys/vfs.h>
38 #include <sys/vnode.h>
39 #include <sys/uio.h>
40 #include <sys/errno.h>
41 #include <sys/sysmacros.h>
42 #include <sys/statvfs.h>
43 #include <sys/kmem.h>
44 #include <sys/dirent.h>
45 #include <sys/cmn_err.h>
46 #include <sys/debug.h>
47 #include <sys/systeminfo.h>
48 #include <sys/flock.h>
49 #include <sys/nbmlock.h>
50 #include <sys/policy.h>
51 
52 #include <rpc/types.h>
53 #include <rpc/auth.h>
54 #include <rpc/svc.h>
55 
56 #include <nfs/nfs.h>
57 #include <nfs/export.h>
58 
59 #include <sys/strsubr.h>
60 
61 /*
62  * These are the interface routines for the server side of the
63  * Network File System.  See the NFS version 3 protocol specification
64  * for a description of this interface.
65  */
66 
67 #ifdef DEBUG
68 int rfs3_do_pre_op_attr = 1;
69 int rfs3_do_post_op_attr = 1;
70 int rfs3_do_post_op_fh3 = 1;
71 #endif
72 
73 static writeverf3 write3verf;
74 
75 static int	sattr3_to_vattr(sattr3 *, struct vattr *);
76 static int	vattr_to_fattr3(struct vattr *, fattr3 *);
77 static int	vattr_to_wcc_attr(struct vattr *, wcc_attr *);
78 static void	vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
79 static void	vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
80 
81 /* ARGSUSED */
82 void
83 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
84 	struct svc_req *req, cred_t *cr)
85 {
86 	int error;
87 	vnode_t *vp;
88 	struct vattr va;
89 
90 	vp = nfs3_fhtovp(&args->object, exi);
91 	if (vp == NULL) {
92 		error = ESTALE;
93 		goto out;
94 	}
95 
96 	va.va_mask = AT_ALL;
97 	error = rfs4_delegated_getattr(vp, &va, 0, cr);
98 
99 	VN_RELE(vp);
100 
101 	if (!error) {
102 		/* overflow error if time or size is out of range */
103 		error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
104 		if (error)
105 			goto out;
106 		resp->status = NFS3_OK;
107 		return;
108 	}
109 
110 out:
111 	if (curthread->t_flag & T_WOULDBLOCK) {
112 		curthread->t_flag &= ~T_WOULDBLOCK;
113 		resp->status = NFS3ERR_JUKEBOX;
114 	} else
115 		resp->status = puterrno3(error);
116 }
117 
118 fhandle_t *
119 rfs3_getattr_getfh(GETATTR3args *args)
120 {
121 
122 	return ((fhandle_t *)&args->object.fh3_u.nfs_fh3_i.fh3_i);
123 }
124 
125 void
126 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
127 	struct svc_req *req, cred_t *cr)
128 {
129 	int error;
130 	vnode_t *vp;
131 	struct vattr *bvap;
132 	struct vattr bva;
133 	struct vattr *avap;
134 	struct vattr ava;
135 	int flag;
136 	int in_crit = 0;
137 	struct flock64 bf;
138 
139 	bvap = NULL;
140 	avap = NULL;
141 
142 	vp = nfs3_fhtovp(&args->object, exi);
143 	if (vp == NULL) {
144 		error = ESTALE;
145 		goto out;
146 	}
147 
148 	error = sattr3_to_vattr(&args->new_attributes, &ava);
149 	if (error)
150 		goto out;
151 
152 	/*
153 	 * We need to specially handle size changes because of
154 	 * possible conflicting NBMAND locks. Get into critical
155 	 * region before VOP_GETATTR, so the size attribute is
156 	 * valid when checking conflicts.
157 	 *
158 	 * Also, check to see if the v4 side of the server has
159 	 * delegated this file.  If so, then we return JUKEBOX to
160 	 * allow the client to retrasmit its request.
161 	 */
162 	if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
163 		if (rfs4_check_delegated(FWRITE, vp, TRUE)) {
164 			resp->status = NFS3ERR_JUKEBOX;
165 			goto out1;
166 		}
167 		if (nbl_need_check(vp)) {
168 			nbl_start_crit(vp, RW_READER);
169 			in_crit = 1;
170 		}
171 	}
172 
173 	bva.va_mask = AT_ALL;
174 	error = rfs4_delegated_getattr(vp, &bva, 0, cr);
175 
176 	/*
177 	 * If we can't get the attributes, then we can't do the
178 	 * right access checking.  So, we'll fail the request.
179 	 */
180 	if (error)
181 		goto out;
182 
183 #ifdef DEBUG
184 	if (rfs3_do_pre_op_attr)
185 		bvap = &bva;
186 #else
187 	bvap = &bva;
188 #endif
189 
190 	if (rdonly(exi, req) || vn_is_readonly(vp)) {
191 		resp->status = NFS3ERR_ROFS;
192 		goto out1;
193 	}
194 
195 	if (args->guard.check &&
196 	    (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
197 	    args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
198 		resp->status = NFS3ERR_NOT_SYNC;
199 		goto out1;
200 	}
201 
202 	if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
203 		flag = ATTR_UTIME;
204 	else
205 		flag = 0;
206 
207 	/*
208 	 * If the filesystem is exported with nosuid, then mask off
209 	 * the setuid and setgid bits.
210 	 */
211 	if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
212 	    (exi->exi_export.ex_flags & EX_NOSUID))
213 		ava.va_mode &= ~(VSUID | VSGID);
214 
215 	/*
216 	 * We need to specially handle size changes because it is
217 	 * possible for the client to create a file with modes
218 	 * which indicate read-only, but with the file opened for
219 	 * writing.  If the client then tries to set the size of
220 	 * the file, then the normal access checking done in
221 	 * VOP_SETATTR would prevent the client from doing so,
222 	 * although it should be legal for it to do so.  To get
223 	 * around this, we do the access checking for ourselves
224 	 * and then use VOP_SPACE which doesn't do the access
225 	 * checking which VOP_SETATTR does. VOP_SPACE can only
226 	 * operate on VREG files, let VOP_SETATTR handle the other
227 	 * extremely rare cases.
228 	 * Also the client should not be allowed to change the
229 	 * size of the file if there is a conflicting non-blocking
230 	 * mandatory lock in the region the change.
231 	 */
232 	if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
233 		if (in_crit) {
234 			u_offset_t offset;
235 			ssize_t length;
236 
237 			if (ava.va_size < bva.va_size) {
238 				offset = ava.va_size;
239 				length = bva.va_size - ava.va_size;
240 			} else {
241 				offset = bva.va_size;
242 				length = ava.va_size - bva.va_size;
243 			}
244 			if (nbl_conflict(vp, NBL_WRITE, offset, length, 0)) {
245 				error = EACCES;
246 				goto out;
247 			}
248 		}
249 
250 		if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
251 			ava.va_mask &= ~AT_SIZE;
252 			bf.l_type = F_WRLCK;
253 			bf.l_whence = 0;
254 			bf.l_start = (off64_t)ava.va_size;
255 			bf.l_len = 0;
256 			bf.l_sysid = 0;
257 			bf.l_pid = 0;
258 			error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
259 			    (offset_t)ava.va_size, cr, NULL);
260 		}
261 	}
262 
263 	if (!error && ava.va_mask)
264 		error = VOP_SETATTR(vp, &ava, flag, cr, NULL);
265 
266 #ifdef DEBUG
267 	if (rfs3_do_post_op_attr) {
268 		ava.va_mask = AT_ALL;
269 		avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
270 	} else
271 		avap = NULL;
272 #else
273 	ava.va_mask = AT_ALL;
274 	avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
275 #endif
276 
277 	/*
278 	 * Force modified metadata out to stable storage.
279 	 */
280 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
281 
282 	if (error)
283 		goto out;
284 
285 	if (in_crit)
286 		nbl_end_crit(vp);
287 	VN_RELE(vp);
288 
289 	resp->status = NFS3_OK;
290 	vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
291 	return;
292 
293 out:
294 	if (curthread->t_flag & T_WOULDBLOCK) {
295 		curthread->t_flag &= ~T_WOULDBLOCK;
296 		resp->status = NFS3ERR_JUKEBOX;
297 	} else
298 		resp->status = puterrno3(error);
299 out1:
300 	if (vp != NULL) {
301 		if (in_crit)
302 			nbl_end_crit(vp);
303 		VN_RELE(vp);
304 	}
305 	vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
306 }
307 
308 fhandle_t *
309 rfs3_setattr_getfh(SETATTR3args *args)
310 {
311 
312 	return ((fhandle_t *)&args->object.fh3_u.nfs_fh3_i.fh3_i);
313 }
314 
315 /* ARGSUSED */
316 void
317 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
318 	struct svc_req *req, cred_t *cr)
319 {
320 	int error;
321 	vnode_t *vp;
322 	vnode_t *dvp;
323 	struct vattr *vap;
324 	struct vattr va;
325 	struct vattr *dvap;
326 	struct vattr dva;
327 	nfs_fh3 *fhp;
328 	struct sec_ol sec = {0, 0};
329 	bool_t publicfh_flag = FALSE, auth_weak = FALSE;
330 
331 	dvap = NULL;
332 
333 	/*
334 	 * Allow lookups from the root - the default
335 	 * location of the public filehandle.
336 	 */
337 	if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
338 		dvp = rootdir;
339 		VN_HOLD(dvp);
340 	} else {
341 		dvp = nfs3_fhtovp(args->what.dirp, exi);
342 		if (dvp == NULL) {
343 			error = ESTALE;
344 			goto out;
345 		}
346 	}
347 
348 #ifdef DEBUG
349 	if (rfs3_do_pre_op_attr) {
350 		dva.va_mask = AT_ALL;
351 		dvap = VOP_GETATTR(dvp, &dva, 0, cr) ? NULL : &dva;
352 	}
353 #else
354 	dva.va_mask = AT_ALL;
355 	dvap = VOP_GETATTR(dvp, &dva, 0, cr) ? NULL : &dva;
356 #endif
357 
358 	if (args->what.name == nfs3nametoolong) {
359 		resp->status = NFS3ERR_NAMETOOLONG;
360 		goto out1;
361 	}
362 
363 	if (args->what.name == NULL || *(args->what.name) == '\0') {
364 		resp->status = NFS3ERR_ACCES;
365 		goto out1;
366 	}
367 
368 	fhp = args->what.dirp;
369 	if (strcmp(args->what.name, "..") == 0 &&
370 	    EQFID(&exi->exi_fid, (fid_t *)&fhp->fh3_len)) {
371 		resp->status = NFS3ERR_NOENT;
372 		goto out1;
373 	}
374 
375 	/*
376 	 * If the public filehandle is used then allow
377 	 * a multi-component lookup
378 	 */
379 	if (PUBLIC_FH3(args->what.dirp)) {
380 		publicfh_flag = TRUE;
381 		error = rfs_publicfh_mclookup(args->what.name, dvp, cr, &vp,
382 					&exi, &sec);
383 		if (error && exi != NULL)
384 			exi_rele(exi);  /* See the comment below */
385 	} else {
386 		error = VOP_LOOKUP(dvp, args->what.name, &vp,
387 				NULL, 0, NULL, cr);
388 	}
389 
390 #ifdef DEBUG
391 	if (rfs3_do_post_op_attr) {
392 		dva.va_mask = AT_ALL;
393 		dvap = VOP_GETATTR(dvp, &dva, 0, cr) ? NULL : &dva;
394 	} else
395 		dvap = NULL;
396 #else
397 	dva.va_mask = AT_ALL;
398 	dvap = VOP_GETATTR(dvp, &dva, 0, cr) ? NULL : &dva;
399 #endif
400 
401 	if (error)
402 		goto out;
403 
404 	if (sec.sec_flags & SEC_QUERY) {
405 		error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
406 	} else {
407 		error = makefh3(&resp->resok.object, vp, exi);
408 		if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
409 			auth_weak = TRUE;
410 	}
411 
412 	if (error) {
413 		VN_RELE(vp);
414 		goto out;
415 	}
416 
417 	/*
418 	 * If publicfh_flag is true then we have called rfs_publicfh_mclookup
419 	 * and have obtained a new exportinfo in exi which needs to be
420 	 * released. Note the the original exportinfo pointed to by exi
421 	 * will be released by the caller, common_dispatch.
422 	 */
423 	if (publicfh_flag)
424 		exi_rele(exi);
425 
426 	VN_RELE(dvp);
427 
428 #ifdef DEBUG
429 	if (rfs3_do_post_op_attr) {
430 		va.va_mask = AT_ALL;
431 		vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
432 	} else
433 		vap = NULL;
434 #else
435 	va.va_mask = AT_ALL;
436 	vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
437 #endif
438 
439 	VN_RELE(vp);
440 
441 	resp->status = NFS3_OK;
442 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
443 	vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
444 
445 	/*
446 	 * If it's public fh, no 0x81, and client's flavor is
447 	 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
448 	 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
449 	 */
450 	if (auth_weak)
451 		resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
452 
453 	return;
454 
455 out:
456 	if (curthread->t_flag & T_WOULDBLOCK) {
457 		curthread->t_flag &= ~T_WOULDBLOCK;
458 		resp->status = NFS3ERR_JUKEBOX;
459 	} else
460 		resp->status = puterrno3(error);
461 out1:
462 	if (dvp != NULL)
463 		VN_RELE(dvp);
464 	vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
465 
466 }
467 
468 fhandle_t *
469 rfs3_lookup_getfh(LOOKUP3args *args)
470 {
471 
472 	return ((fhandle_t *)&args->what.dirp->fh3_u.nfs_fh3_i.fh3_i);
473 }
474 
475 /* ARGSUSED */
476 void
477 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
478 	struct svc_req *req, cred_t *cr)
479 {
480 	int error;
481 	vnode_t *vp;
482 	struct vattr *vap;
483 	struct vattr va;
484 	int checkwriteperm;
485 
486 	vap = NULL;
487 
488 	vp = nfs3_fhtovp(&args->object, exi);
489 	if (vp == NULL) {
490 		error = ESTALE;
491 		goto out;
492 	}
493 
494 	/*
495 	 * If the file system is exported read only, it is not appropriate
496 	 * to check write permissions for regular files and directories.
497 	 * Special files are interpreted by the client, so the underlying
498 	 * permissions are sent back to the client for interpretation.
499 	 */
500 	if (rdonly(exi, req) && (vp->v_type == VREG || vp->v_type == VDIR))
501 		checkwriteperm = 0;
502 	else
503 		checkwriteperm = 1;
504 
505 	/*
506 	 * We need the mode so that we can correctly determine access
507 	 * permissions relative to a mandatory lock file.  Access to
508 	 * mandatory lock files is denied on the server, so it might
509 	 * as well be reflected to the server during the open.
510 	 */
511 	va.va_mask = AT_MODE;
512 	error = VOP_GETATTR(vp, &va, 0, cr);
513 	if (error)
514 		goto out;
515 
516 #ifdef DEBUG
517 	if (rfs3_do_post_op_attr)
518 		vap = &va;
519 #else
520 	vap = &va;
521 #endif
522 
523 	resp->resok.access = 0;
524 
525 	if (args->access & ACCESS3_READ) {
526 		error = VOP_ACCESS(vp, VREAD, 0, cr);
527 		if (error) {
528 			if (curthread->t_flag & T_WOULDBLOCK)
529 				goto out;
530 		} else if (!MANDLOCK(vp, va.va_mode))
531 			resp->resok.access |= ACCESS3_READ;
532 	}
533 	if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
534 		error = VOP_ACCESS(vp, VEXEC, 0, cr);
535 		if (error) {
536 			if (curthread->t_flag & T_WOULDBLOCK)
537 				goto out;
538 		} else
539 			resp->resok.access |= ACCESS3_LOOKUP;
540 	}
541 	if (checkwriteperm &&
542 	    (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
543 		error = VOP_ACCESS(vp, VWRITE, 0, cr);
544 		if (error) {
545 			if (curthread->t_flag & T_WOULDBLOCK)
546 				goto out;
547 		} else if (!MANDLOCK(vp, va.va_mode)) {
548 			resp->resok.access |=
549 			    (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
550 		}
551 	}
552 	if (checkwriteperm &&
553 	    (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
554 		error = VOP_ACCESS(vp, VWRITE, 0, cr);
555 		if (error) {
556 			if (curthread->t_flag & T_WOULDBLOCK)
557 				goto out;
558 		} else
559 			resp->resok.access |= ACCESS3_DELETE;
560 	}
561 	if (args->access & ACCESS3_EXECUTE) {
562 		error = VOP_ACCESS(vp, VEXEC, 0, cr);
563 		if (error) {
564 			if (curthread->t_flag & T_WOULDBLOCK)
565 				goto out;
566 		} else if (!MANDLOCK(vp, va.va_mode))
567 			resp->resok.access |= ACCESS3_EXECUTE;
568 	}
569 
570 #ifdef DEBUG
571 	if (rfs3_do_post_op_attr) {
572 		va.va_mask = AT_ALL;
573 		vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
574 	} else
575 		vap = NULL;
576 #else
577 	va.va_mask = AT_ALL;
578 	vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
579 #endif
580 
581 	VN_RELE(vp);
582 
583 	resp->status = NFS3_OK;
584 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
585 	return;
586 
587 out:
588 	if (curthread->t_flag & T_WOULDBLOCK) {
589 		curthread->t_flag &= ~T_WOULDBLOCK;
590 		resp->status = NFS3ERR_JUKEBOX;
591 	} else
592 		resp->status = puterrno3(error);
593 	if (vp != NULL)
594 		VN_RELE(vp);
595 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
596 }
597 
598 fhandle_t *
599 rfs3_access_getfh(ACCESS3args *args)
600 {
601 
602 	return ((fhandle_t *)&args->object.fh3_u.nfs_fh3_i.fh3_i);
603 }
604 
605 /* ARGSUSED */
606 void
607 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
608 	struct svc_req *req, cred_t *cr)
609 {
610 	int error;
611 	vnode_t *vp;
612 	struct vattr *vap;
613 	struct vattr va;
614 	struct iovec iov;
615 	struct uio uio;
616 	char *data;
617 
618 	vap = NULL;
619 
620 	vp = nfs3_fhtovp(&args->symlink, exi);
621 	if (vp == NULL) {
622 		error = ESTALE;
623 		goto out;
624 	}
625 
626 	va.va_mask = AT_ALL;
627 	error = VOP_GETATTR(vp, &va, 0, cr);
628 	if (error)
629 		goto out;
630 
631 #ifdef DEBUG
632 	if (rfs3_do_post_op_attr)
633 		vap = &va;
634 #else
635 	vap = &va;
636 #endif
637 
638 	if (vp->v_type != VLNK) {
639 		resp->status = NFS3ERR_INVAL;
640 		goto out1;
641 	}
642 
643 	if (MANDLOCK(vp, va.va_mode)) {
644 		resp->status = NFS3ERR_ACCES;
645 		goto out1;
646 	}
647 
648 	data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
649 
650 	iov.iov_base = data;
651 	iov.iov_len = MAXPATHLEN;
652 	uio.uio_iov = &iov;
653 	uio.uio_iovcnt = 1;
654 	uio.uio_segflg = UIO_SYSSPACE;
655 	uio.uio_extflg = UIO_COPY_CACHED;
656 	uio.uio_loffset = 0;
657 	uio.uio_resid = MAXPATHLEN;
658 
659 	error = VOP_READLINK(vp, &uio, cr);
660 
661 #ifdef DEBUG
662 	if (rfs3_do_post_op_attr) {
663 		va.va_mask = AT_ALL;
664 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
665 	} else
666 		vap = NULL;
667 #else
668 	va.va_mask = AT_ALL;
669 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
670 #endif
671 
672 #if 0 /* notyet */
673 	/*
674 	 * Don't do this.  It causes local disk writes when just
675 	 * reading the file and the overhead is deemed larger
676 	 * than the benefit.
677 	 */
678 	/*
679 	 * Force modified metadata out to stable storage.
680 	 */
681 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
682 #endif
683 
684 	if (error) {
685 		kmem_free(data, MAXPATHLEN + 1);
686 		goto out;
687 	}
688 
689 	VN_RELE(vp);
690 
691 	resp->status = NFS3_OK;
692 	vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
693 	resp->resok.data = data;
694 	*(data + MAXPATHLEN - uio.uio_resid) = '\0';
695 	return;
696 
697 out:
698 	if (curthread->t_flag & T_WOULDBLOCK) {
699 		curthread->t_flag &= ~T_WOULDBLOCK;
700 		resp->status = NFS3ERR_JUKEBOX;
701 	} else
702 		resp->status = puterrno3(error);
703 out1:
704 	if (vp != NULL)
705 		VN_RELE(vp);
706 	vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
707 }
708 
709 fhandle_t *
710 rfs3_readlink_getfh(READLINK3args *args)
711 {
712 
713 	return ((fhandle_t *)&args->symlink.fh3_u.nfs_fh3_i.fh3_i);
714 }
715 
716 void
717 rfs3_readlink_free(READLINK3res *resp)
718 {
719 
720 	if (resp->status == NFS3_OK)
721 		kmem_free(resp->resok.data, MAXPATHLEN + 1);
722 }
723 
724 /* ARGSUSED */
725 void
726 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
727 	struct svc_req *req, cred_t *cr)
728 {
729 	int error;
730 	vnode_t *vp;
731 	struct vattr *vap;
732 	struct vattr va;
733 	struct iovec iov;
734 	struct uio uio;
735 	u_offset_t offset;
736 	mblk_t *mp;
737 	int alloc_err = 0;
738 	int in_crit = 0;
739 	int need_rwunlock = 0;
740 
741 	vap = NULL;
742 
743 	vp = nfs3_fhtovp(&args->file, exi);
744 	if (vp == NULL) {
745 		error = ESTALE;
746 		goto out;
747 	}
748 
749 	/*
750 	 * Check to see if the v4 side of the server has delegated
751 	 * this file.  If so, then we return JUKEBOX to allow the
752 	 * client to retrasmit its request.
753 	 */
754 	if (rfs4_check_delegated(FREAD, vp, FALSE)) {
755 		resp->status = NFS3ERR_JUKEBOX;
756 		goto out1;
757 	}
758 
759 	/*
760 	 * Enter the critical region before calling VOP_RWLOCK
761 	 * to avoid a deadlock with write requests.
762 	 */
763 	if (nbl_need_check(vp)) {
764 		nbl_start_crit(vp, RW_READER);
765 		in_crit = 1;
766 		if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0)) {
767 			error = EACCES;
768 			goto out;
769 		}
770 	}
771 
772 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
773 	need_rwunlock = 1;
774 
775 	va.va_mask = AT_ALL;
776 	error = VOP_GETATTR(vp, &va, 0, cr);
777 
778 	/*
779 	 * If we can't get the attributes, then we can't do the
780 	 * right access checking.  So, we'll fail the request.
781 	 */
782 	if (error)
783 		goto out;
784 
785 #ifdef DEBUG
786 	if (rfs3_do_post_op_attr)
787 		vap = &va;
788 #else
789 	vap = &va;
790 #endif
791 
792 	if (vp->v_type != VREG) {
793 		resp->status = NFS3ERR_INVAL;
794 		goto out1;
795 	}
796 
797 	if (crgetuid(cr) != va.va_uid) {
798 		error = VOP_ACCESS(vp, VREAD, 0, cr);
799 		if (error) {
800 			if (curthread->t_flag & T_WOULDBLOCK)
801 				goto out;
802 			error = VOP_ACCESS(vp, VEXEC, 0, cr);
803 			if (error)
804 				goto out;
805 		}
806 	}
807 
808 	if (MANDLOCK(vp, va.va_mode)) {
809 		resp->status = NFS3ERR_ACCES;
810 		goto out1;
811 	}
812 
813 	offset = args->offset;
814 	if (offset >= va.va_size) {
815 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
816 		if (in_crit)
817 			nbl_end_crit(vp);
818 		VN_RELE(vp);
819 		resp->status = NFS3_OK;
820 		vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
821 		resp->resok.count = 0;
822 		resp->resok.eof = TRUE;
823 		resp->resok.data.data_len = 0;
824 		resp->resok.data.data_val = NULL;
825 		resp->resok.data.mp = NULL;
826 		return;
827 	}
828 
829 	if (args->count == 0) {
830 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
831 		if (in_crit)
832 			nbl_end_crit(vp);
833 		VN_RELE(vp);
834 		resp->status = NFS3_OK;
835 		vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
836 		resp->resok.count = 0;
837 		resp->resok.eof = FALSE;
838 		resp->resok.data.data_len = 0;
839 		resp->resok.data.data_val = NULL;
840 		resp->resok.data.mp = NULL;
841 		return;
842 	}
843 
844 	/*
845 	 * do not allocate memory more the max. allowed
846 	 * transfer size
847 	 */
848 	if (args->count > rfs3_tsize(req))
849 		args->count = rfs3_tsize(req);
850 
851 	/*
852 	 * mp will contain the data to be sent out in the read reply.
853 	 * This will be freed after the reply has been sent out (by the
854 	 * driver).
855 	 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
856 	 * that the call to xdrmblk_putmblk() never fails.
857 	 */
858 	mp = allocb_wait(RNDUP(args->count), BPRI_MED, STR_NOSIG, &alloc_err);
859 	ASSERT(mp != NULL);
860 	ASSERT(alloc_err == 0);
861 
862 	iov.iov_base = (caddr_t)mp->b_datap->db_base;
863 	iov.iov_len = args->count;
864 	uio.uio_iov = &iov;
865 	uio.uio_iovcnt = 1;
866 	uio.uio_segflg = UIO_SYSSPACE;
867 	uio.uio_extflg = UIO_COPY_CACHED;
868 	uio.uio_loffset = args->offset;
869 	uio.uio_resid = args->count;
870 
871 	error = VOP_READ(vp, &uio, 0, cr, NULL);
872 
873 	if (error) {
874 		freeb(mp);
875 		goto out;
876 	}
877 
878 	va.va_mask = AT_ALL;
879 	error = VOP_GETATTR(vp, &va, 0, cr);
880 
881 #ifdef DEBUG
882 	if (rfs3_do_post_op_attr) {
883 		if (error)
884 			vap = NULL;
885 		else
886 			vap = &va;
887 	} else
888 		vap = NULL;
889 #else
890 	if (error)
891 		vap = NULL;
892 	else
893 		vap = &va;
894 #endif
895 
896 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
897 
898 #if 0 /* notyet */
899 	/*
900 	 * Don't do this.  It causes local disk writes when just
901 	 * reading the file and the overhead is deemed larger
902 	 * than the benefit.
903 	 */
904 	/*
905 	 * Force modified metadata out to stable storage.
906 	 */
907 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
908 #endif
909 
910 	if (in_crit)
911 		nbl_end_crit(vp);
912 	VN_RELE(vp);
913 
914 	resp->status = NFS3_OK;
915 	vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
916 	resp->resok.count = args->count - uio.uio_resid;
917 	if (!error && offset + resp->resok.count == va.va_size)
918 		resp->resok.eof = TRUE;
919 	else
920 		resp->resok.eof = FALSE;
921 	resp->resok.data.data_len = resp->resok.count;
922 	resp->resok.data.data_val = (char *)mp->b_datap->db_base;
923 
924 	resp->resok.data.mp = mp;
925 
926 	resp->resok.size = (uint_t)args->count;
927 	return;
928 
929 out:
930 	if (curthread->t_flag & T_WOULDBLOCK) {
931 		curthread->t_flag &= ~T_WOULDBLOCK;
932 		resp->status = NFS3ERR_JUKEBOX;
933 	} else
934 		resp->status = puterrno3(error);
935 out1:
936 	if (vp != NULL) {
937 		if (need_rwunlock)
938 			VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
939 		if (in_crit)
940 			nbl_end_crit(vp);
941 		VN_RELE(vp);
942 	}
943 	vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
944 }
945 
946 void
947 rfs3_read_free(READ3res *resp)
948 {
949 	mblk_t *mp;
950 
951 	if (resp->status == NFS3_OK) {
952 		mp = resp->resok.data.mp;
953 		if (mp != NULL)
954 			freeb(mp);
955 	}
956 }
957 
958 fhandle_t *
959 rfs3_read_getfh(READ3args *args)
960 {
961 
962 	return ((fhandle_t *)&args->file.fh3_u.nfs_fh3_i.fh3_i);
963 }
964 
965 #define	MAX_IOVECS	12
966 
967 #ifdef DEBUG
968 static int rfs3_write_hits = 0;
969 static int rfs3_write_misses = 0;
970 #endif
971 
972 void
973 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
974 	struct svc_req *req, cred_t *cr)
975 {
976 	int error;
977 	vnode_t *vp;
978 	struct vattr *bvap = NULL;
979 	struct vattr bva;
980 	struct vattr *avap = NULL;
981 	struct vattr ava;
982 	u_offset_t rlimit;
983 	struct uio uio;
984 	struct iovec iov[MAX_IOVECS];
985 	mblk_t *m;
986 	struct iovec *iovp;
987 	int iovcnt;
988 	int ioflag;
989 	cred_t *savecred;
990 	int in_crit = 0;
991 	int rwlock_ret = -1;
992 
993 	vp = nfs3_fhtovp(&args->file, exi);
994 	if (vp == NULL) {
995 		error = ESTALE;
996 		goto out;
997 	}
998 
999 	/*
1000 	 * Check to see if the v4 side of the server has delegated
1001 	 * this file.  If so, then we return JUKEBOX to allow the
1002 	 * client to retrasmit its request.
1003 	 */
1004 	if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1005 		resp->status = NFS3ERR_JUKEBOX;
1006 		goto out1;
1007 	}
1008 
1009 	/*
1010 	 * We have to enter the critical region before calling VOP_RWLOCK
1011 	 * to avoid a deadlock with ufs.
1012 	 */
1013 	if (nbl_need_check(vp)) {
1014 		nbl_start_crit(vp, RW_READER);
1015 		in_crit = 1;
1016 		if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0)) {
1017 			error = EACCES;
1018 			goto out;
1019 		}
1020 	}
1021 
1022 	rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1023 
1024 	bva.va_mask = AT_ALL;
1025 	error = VOP_GETATTR(vp, &bva, 0, cr);
1026 
1027 	/*
1028 	 * If we can't get the attributes, then we can't do the
1029 	 * right access checking.  So, we'll fail the request.
1030 	 */
1031 	if (error)
1032 		goto out;
1033 
1034 	bvap = &bva;
1035 #ifdef DEBUG
1036 	if (!rfs3_do_pre_op_attr)
1037 		bvap = NULL;
1038 #endif
1039 	avap = bvap;
1040 
1041 	if (args->count != args->data.data_len) {
1042 		resp->status = NFS3ERR_INVAL;
1043 		goto out1;
1044 	}
1045 
1046 	if (rdonly(exi, req)) {
1047 		resp->status = NFS3ERR_ROFS;
1048 		goto out1;
1049 	}
1050 
1051 	if (vp->v_type != VREG) {
1052 		resp->status = NFS3ERR_INVAL;
1053 		goto out1;
1054 	}
1055 
1056 	if (crgetuid(cr) != bva.va_uid &&
1057 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr)))
1058 		goto out;
1059 
1060 	if (MANDLOCK(vp, bva.va_mode)) {
1061 		resp->status = NFS3ERR_ACCES;
1062 		goto out1;
1063 	}
1064 
1065 	if (args->count == 0) {
1066 		VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1067 		VN_RELE(vp);
1068 		resp->status = NFS3_OK;
1069 		vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1070 		resp->resok.count = 0;
1071 		resp->resok.committed = args->stable;
1072 		resp->resok.verf = write3verf;
1073 		return;
1074 	}
1075 
1076 	if (args->mblk != NULL) {
1077 		iovcnt = 0;
1078 		for (m = args->mblk; m != NULL; m = m->b_cont)
1079 			iovcnt++;
1080 		if (iovcnt <= MAX_IOVECS) {
1081 #ifdef DEBUG
1082 			rfs3_write_hits++;
1083 #endif
1084 			iovp = iov;
1085 		} else {
1086 #ifdef DEBUG
1087 			rfs3_write_misses++;
1088 #endif
1089 			iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1090 		}
1091 		mblk_to_iov(args->mblk, iovcnt, iovp);
1092 	} else {
1093 		iovcnt = 1;
1094 		iovp = iov;
1095 		iovp->iov_base = args->data.data_val;
1096 		iovp->iov_len = args->count;
1097 	}
1098 
1099 	uio.uio_iov = iovp;
1100 	uio.uio_iovcnt = iovcnt;
1101 
1102 	uio.uio_segflg = UIO_SYSSPACE;
1103 	uio.uio_extflg = UIO_COPY_DEFAULT;
1104 	uio.uio_loffset = args->offset;
1105 	uio.uio_resid = args->count;
1106 	uio.uio_llimit = curproc->p_fsz_ctl;
1107 	rlimit = uio.uio_llimit - args->offset;
1108 	if (rlimit < (u_offset_t)uio.uio_resid)
1109 		uio.uio_resid = (int)rlimit;
1110 
1111 	if (args->stable == UNSTABLE)
1112 		ioflag = 0;
1113 	else if (args->stable == FILE_SYNC)
1114 		ioflag = FSYNC;
1115 	else if (args->stable == DATA_SYNC)
1116 		ioflag = FDSYNC;
1117 	else {
1118 		if (iovp != iov)
1119 			kmem_free(iovp, sizeof (*iovp) * iovcnt);
1120 		resp->status = NFS3ERR_INVAL;
1121 		goto out1;
1122 	}
1123 
1124 	/*
1125 	 * We're changing creds because VM may fault and we need
1126 	 * the cred of the current thread to be used if quota
1127 	 * checking is enabled.
1128 	 */
1129 	savecred = curthread->t_cred;
1130 	curthread->t_cred = cr;
1131 	error = VOP_WRITE(vp, &uio, ioflag, cr, NULL);
1132 	curthread->t_cred = savecred;
1133 
1134 	if (iovp != iov)
1135 		kmem_free(iovp, sizeof (*iovp) * iovcnt);
1136 
1137 	ava.va_mask = AT_ALL;
1138 	avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
1139 
1140 #ifdef DEBUG
1141 	if (!rfs3_do_post_op_attr)
1142 		avap = NULL;
1143 #endif
1144 
1145 	if (error)
1146 		goto out;
1147 
1148 	VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1149 	if (in_crit)
1150 		nbl_end_crit(vp);
1151 	VN_RELE(vp);
1152 
1153 	/*
1154 	 * If we were unable to get the V_WRITELOCK_TRUE, then we
1155 	 * may not have accurate after attrs, so check if
1156 	 * we have both attributes, they have a non-zero va_seq, and
1157 	 * va_seq has changed by exactly one,
1158 	 * if not, turn off the before attr.
1159 	 */
1160 	if (rwlock_ret != V_WRITELOCK_TRUE) {
1161 		if (bvap == NULL || avap == NULL ||
1162 				bvap->va_seq == 0 || avap->va_seq == 0 ||
1163 				avap->va_seq != (bvap->va_seq + 1)) {
1164 			bvap = NULL;
1165 		}
1166 	}
1167 
1168 	resp->status = NFS3_OK;
1169 	vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1170 	resp->resok.count = args->count - uio.uio_resid;
1171 	resp->resok.committed = args->stable;
1172 	resp->resok.verf = write3verf;
1173 	return;
1174 
1175 out:
1176 	if (curthread->t_flag & T_WOULDBLOCK) {
1177 		curthread->t_flag &= ~T_WOULDBLOCK;
1178 		resp->status = NFS3ERR_JUKEBOX;
1179 	} else
1180 		resp->status = puterrno3(error);
1181 out1:
1182 	if (vp != NULL) {
1183 		if (rwlock_ret != -1)
1184 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1185 		if (in_crit)
1186 			nbl_end_crit(vp);
1187 		VN_RELE(vp);
1188 	}
1189 	vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1190 }
1191 
1192 fhandle_t *
1193 rfs3_write_getfh(WRITE3args *args)
1194 {
1195 
1196 	return ((fhandle_t *)&args->file.fh3_u.nfs_fh3_i.fh3_i);
1197 }
1198 
1199 void
1200 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1201 	struct svc_req *req, cred_t *cr)
1202 {
1203 	int error;
1204 	int in_crit = 0;
1205 	vnode_t *vp;
1206 	vnode_t *tvp = NULL;
1207 	vnode_t *dvp;
1208 	struct vattr *vap;
1209 	struct vattr va;
1210 	struct vattr *dbvap;
1211 	struct vattr dbva;
1212 	struct vattr *davap;
1213 	struct vattr dava;
1214 	enum vcexcl excl;
1215 	nfstime3 *mtime;
1216 	len_t reqsize;
1217 	bool_t trunc;
1218 
1219 	dbvap = NULL;
1220 	davap = NULL;
1221 
1222 	dvp = nfs3_fhtovp(args->where.dirp, exi);
1223 	if (dvp == NULL) {
1224 		error = ESTALE;
1225 		goto out;
1226 	}
1227 
1228 #ifdef DEBUG
1229 	if (rfs3_do_pre_op_attr) {
1230 		dbva.va_mask = AT_ALL;
1231 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1232 	} else
1233 		dbvap = NULL;
1234 #else
1235 	dbva.va_mask = AT_ALL;
1236 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1237 #endif
1238 	davap = dbvap;
1239 
1240 	if (args->where.name == nfs3nametoolong) {
1241 		resp->status = NFS3ERR_NAMETOOLONG;
1242 		goto out1;
1243 	}
1244 
1245 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1246 		resp->status = NFS3ERR_ACCES;
1247 		goto out1;
1248 	}
1249 
1250 	if (rdonly(exi, req)) {
1251 		resp->status = NFS3ERR_ROFS;
1252 		goto out1;
1253 	}
1254 
1255 	if (args->how.mode == EXCLUSIVE) {
1256 		va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1257 		va.va_type = VREG;
1258 		va.va_mode = (mode_t)0;
1259 		/*
1260 		 * Ensure no time overflows and that types match
1261 		 */
1262 		mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1263 		va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1264 		va.va_mtime.tv_nsec = mtime->nseconds;
1265 		excl = EXCL;
1266 	} else {
1267 		error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1268 		    &va);
1269 		if (error)
1270 			goto out;
1271 		va.va_mask |= AT_TYPE;
1272 		va.va_type = VREG;
1273 		if (args->how.mode == GUARDED)
1274 			excl = EXCL;
1275 		else {
1276 			excl = NONEXCL;
1277 
1278 			/*
1279 			 * During creation of file in non-exclusive mode
1280 			 * if size of file is being set then make sure
1281 			 * that if the file already exists that no conflicting
1282 			 * non-blocking mandatory locks exists in the region
1283 			 * being modified. If there are conflicting locks fail
1284 			 * the operation with EACCES.
1285 			 */
1286 			if (va.va_mask & AT_SIZE) {
1287 				struct vattr tva;
1288 
1289 				/*
1290 				 * Does file already exist?
1291 				 */
1292 				error = VOP_LOOKUP(dvp, args->where.name, &tvp,
1293 						NULL, 0, NULL, cr);
1294 
1295 				/*
1296 				 * Check to see if the file has been delegated
1297 				 * to a v4 client.  If so, then begin recall of
1298 				 * the delegation and return JUKEBOX to allow
1299 				 * the client to retrasmit its request.
1300 				 */
1301 
1302 				trunc = va.va_size == 0;
1303 				if (!error &&
1304 				    rfs4_check_delegated(FWRITE, tvp, trunc)) {
1305 					resp->status = NFS3ERR_JUKEBOX;
1306 					goto out1;
1307 				}
1308 
1309 				/*
1310 				 * Check for NBMAND lock conflicts
1311 				 */
1312 				if (!error && nbl_need_check(tvp)) {
1313 					u_offset_t offset;
1314 					ssize_t len;
1315 
1316 					nbl_start_crit(tvp, RW_READER);
1317 					in_crit = 1;
1318 
1319 					tva.va_mask = AT_SIZE;
1320 					error = VOP_GETATTR(tvp, &tva, 0, cr);
1321 					/*
1322 					 * Can't check for conflicts, so return
1323 					 * error.
1324 					 */
1325 					if (error)
1326 						goto out;
1327 
1328 					offset = tva.va_size < va.va_size ?
1329 						tva.va_size : va.va_size;
1330 					len = tva.va_size < va.va_size ?
1331 						va.va_size - tva.va_size :
1332 						tva.va_size - va.va_size;
1333 					if (nbl_conflict(tvp, NBL_WRITE,
1334 							offset, len, 0)) {
1335 						error = EACCES;
1336 						goto out;
1337 					}
1338 				} else if (tvp) {
1339 					VN_RELE(tvp);
1340 					tvp = NULL;
1341 				}
1342 			}
1343 		}
1344 		if (va.va_mask & AT_SIZE)
1345 			reqsize = va.va_size;
1346 	}
1347 
1348 	/*
1349 	 * Must specify the mode.
1350 	 */
1351 	if (!(va.va_mask & AT_MODE)) {
1352 		resp->status = NFS3ERR_INVAL;
1353 		goto out1;
1354 	}
1355 
1356 	/*
1357 	 * If the filesystem is exported with nosuid, then mask off
1358 	 * the setuid and setgid bits.
1359 	 */
1360 	if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1361 		va.va_mode &= ~(VSUID | VSGID);
1362 
1363 tryagain:
1364 	/*
1365 	 * The file open mode used is VWRITE.  If the client needs
1366 	 * some other semantic, then it should do the access checking
1367 	 * itself.  It would have been nice to have the file open mode
1368 	 * passed as part of the arguments.
1369 	 */
1370 	error = VOP_CREATE(dvp, args->where.name, &va, excl, VWRITE,
1371 	    &vp, cr, 0);
1372 
1373 #ifdef DEBUG
1374 	if (rfs3_do_post_op_attr) {
1375 		dava.va_mask = AT_ALL;
1376 		davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1377 	} else
1378 		davap = NULL;
1379 #else
1380 	dava.va_mask = AT_ALL;
1381 	davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1382 #endif
1383 
1384 	if (error) {
1385 		/*
1386 		 * If we got something other than file already exists
1387 		 * then just return this error.  Otherwise, we got
1388 		 * EEXIST.  If we were doing a GUARDED create, then
1389 		 * just return this error.  Otherwise, we need to
1390 		 * make sure that this wasn't a duplicate of an
1391 		 * exclusive create request.
1392 		 *
1393 		 * The assumption is made that a non-exclusive create
1394 		 * request will never return EEXIST.
1395 		 */
1396 		if (error != EEXIST || args->how.mode == GUARDED)
1397 			goto out;
1398 		/*
1399 		 * Lookup the file so that we can get a vnode for it.
1400 		 */
1401 		error = VOP_LOOKUP(dvp, args->where.name, &vp, NULL, 0,
1402 		    NULL, cr);
1403 		if (error) {
1404 			/*
1405 			 * We couldn't find the file that we thought that
1406 			 * we just created.  So, we'll just try creating
1407 			 * it again.
1408 			 */
1409 			if (error == ENOENT)
1410 				goto tryagain;
1411 			goto out;
1412 		}
1413 
1414 		/*
1415 		 * If the file is delegated to a v4 client, go ahead
1416 		 * and initiate recall, this create is a hint that a
1417 		 * conflicting v3 open has occurred.
1418 		 */
1419 
1420 		if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1421 			VN_RELE(vp);
1422 			resp->status = NFS3ERR_JUKEBOX;
1423 			goto out1;
1424 		}
1425 
1426 		va.va_mask = AT_ALL;
1427 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1428 
1429 		mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1430 		/* % with INT32_MAX to prevent overflows */
1431 		if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1432 		    vap->va_mtime.tv_sec !=
1433 		    (mtime->seconds % INT32_MAX) ||
1434 		    vap->va_mtime.tv_nsec != mtime->nseconds)) {
1435 			VN_RELE(vp);
1436 			error = EEXIST;
1437 			goto out;
1438 		}
1439 	} else {
1440 
1441 		if ((args->how.mode == UNCHECKED ||
1442 		    args->how.mode == GUARDED) &&
1443 		    args->how.createhow3_u.obj_attributes.size.set_it &&
1444 		    va.va_size == 0)
1445 			trunc = TRUE;
1446 		else
1447 			trunc = FALSE;
1448 
1449 		if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1450 			VN_RELE(vp);
1451 			resp->status = NFS3ERR_JUKEBOX;
1452 			goto out1;
1453 		}
1454 
1455 		va.va_mask = AT_ALL;
1456 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1457 
1458 		/*
1459 		 * We need to check to make sure that the file got
1460 		 * created to the indicated size.  If not, we do a
1461 		 * setattr to try to change the size, but we don't
1462 		 * try too hard.  This shouldn't a problem as most
1463 		 * clients will only specifiy a size of zero which
1464 		 * local file systems handle.  However, even if
1465 		 * the client does specify a non-zero size, it can
1466 		 * still recover by checking the size of the file
1467 		 * after it has created it and then issue a setattr
1468 		 * request of its own to set the size of the file.
1469 		 */
1470 		if (vap != NULL &&
1471 		    (args->how.mode == UNCHECKED ||
1472 		    args->how.mode == GUARDED) &&
1473 		    args->how.createhow3_u.obj_attributes.size.set_it &&
1474 		    vap->va_size != reqsize) {
1475 			va.va_mask = AT_SIZE;
1476 			va.va_size = reqsize;
1477 			(void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1478 			va.va_mask = AT_ALL;
1479 			vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1480 		}
1481 	}
1482 
1483 #ifdef DEBUG
1484 	if (!rfs3_do_post_op_attr)
1485 		vap = NULL;
1486 #endif
1487 
1488 #ifdef DEBUG
1489 	if (!rfs3_do_post_op_fh3)
1490 		resp->resok.obj.handle_follows = FALSE;
1491 	else {
1492 #endif
1493 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1494 	if (error)
1495 		resp->resok.obj.handle_follows = FALSE;
1496 	else
1497 		resp->resok.obj.handle_follows = TRUE;
1498 #ifdef DEBUG
1499 	}
1500 #endif
1501 
1502 	/*
1503 	 * Force modified data and metadata out to stable storage.
1504 	 */
1505 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
1506 	(void) VOP_FSYNC(dvp, 0, cr);
1507 
1508 	VN_RELE(vp);
1509 	VN_RELE(dvp);
1510 	if (tvp != NULL) {
1511 		if (in_crit)
1512 			nbl_end_crit(tvp);
1513 		VN_RELE(tvp);
1514 	}
1515 
1516 	resp->status = NFS3_OK;
1517 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1518 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1519 	return;
1520 
1521 out:
1522 	if (curthread->t_flag & T_WOULDBLOCK) {
1523 		curthread->t_flag &= ~T_WOULDBLOCK;
1524 		resp->status = NFS3ERR_JUKEBOX;
1525 	} else
1526 		resp->status = puterrno3(error);
1527 out1:
1528 	if (tvp != NULL) {
1529 		if (in_crit)
1530 			nbl_end_crit(tvp);
1531 		VN_RELE(tvp);
1532 	}
1533 	if (dvp != NULL)
1534 		VN_RELE(dvp);
1535 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1536 }
1537 
1538 fhandle_t *
1539 rfs3_create_getfh(CREATE3args *args)
1540 {
1541 
1542 	return ((fhandle_t *)&args->where.dir.fh3_u.nfs_fh3_i.fh3_i);
1543 }
1544 
1545 void
1546 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1547 	struct svc_req *req, cred_t *cr)
1548 {
1549 	int error;
1550 	vnode_t *vp = NULL;
1551 	vnode_t *dvp;
1552 	struct vattr *vap;
1553 	struct vattr va;
1554 	struct vattr *dbvap;
1555 	struct vattr dbva;
1556 	struct vattr *davap;
1557 	struct vattr dava;
1558 
1559 	dbvap = NULL;
1560 	davap = NULL;
1561 
1562 	dvp = nfs3_fhtovp(args->where.dirp, exi);
1563 	if (dvp == NULL) {
1564 		error = ESTALE;
1565 		goto out;
1566 	}
1567 
1568 #ifdef DEBUG
1569 	if (rfs3_do_pre_op_attr) {
1570 		dbva.va_mask = AT_ALL;
1571 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1572 	} else
1573 		dbvap = NULL;
1574 #else
1575 	dbva.va_mask = AT_ALL;
1576 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1577 #endif
1578 	davap = dbvap;
1579 
1580 	if (args->where.name == nfs3nametoolong) {
1581 		resp->status = NFS3ERR_NAMETOOLONG;
1582 		goto out1;
1583 	}
1584 
1585 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1586 		resp->status = NFS3ERR_ACCES;
1587 		goto out1;
1588 	}
1589 
1590 	if (rdonly(exi, req)) {
1591 		resp->status = NFS3ERR_ROFS;
1592 		goto out1;
1593 	}
1594 
1595 	error = sattr3_to_vattr(&args->attributes, &va);
1596 	if (error)
1597 		goto out;
1598 
1599 	if (!(va.va_mask & AT_MODE)) {
1600 		resp->status = NFS3ERR_INVAL;
1601 		goto out1;
1602 	}
1603 
1604 	va.va_mask |= AT_TYPE;
1605 	va.va_type = VDIR;
1606 
1607 	error = VOP_MKDIR(dvp, args->where.name, &va, &vp, cr);
1608 
1609 #ifdef DEBUG
1610 	if (rfs3_do_post_op_attr) {
1611 		dava.va_mask = AT_ALL;
1612 		davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1613 	} else
1614 		davap = NULL;
1615 #else
1616 	dava.va_mask = AT_ALL;
1617 	davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1618 #endif
1619 
1620 	/*
1621 	 * Force modified data and metadata out to stable storage.
1622 	 */
1623 	(void) VOP_FSYNC(dvp, 0, cr);
1624 
1625 	if (error)
1626 		goto out;
1627 
1628 	VN_RELE(dvp);
1629 
1630 #ifdef DEBUG
1631 	if (!rfs3_do_post_op_fh3)
1632 		resp->resok.obj.handle_follows = FALSE;
1633 	else {
1634 #endif
1635 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1636 	if (error)
1637 		resp->resok.obj.handle_follows = FALSE;
1638 	else
1639 		resp->resok.obj.handle_follows = TRUE;
1640 #ifdef DEBUG
1641 	}
1642 #endif
1643 
1644 #ifdef DEBUG
1645 	if (rfs3_do_post_op_attr) {
1646 		va.va_mask = AT_ALL;
1647 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1648 	} else
1649 		vap = NULL;
1650 #else
1651 	va.va_mask = AT_ALL;
1652 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1653 #endif
1654 
1655 	/*
1656 	 * Force modified data and metadata out to stable storage.
1657 	 */
1658 	(void) VOP_FSYNC(vp, 0, cr);
1659 
1660 	VN_RELE(vp);
1661 
1662 	resp->status = NFS3_OK;
1663 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1664 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1665 	return;
1666 
1667 out:
1668 	if (curthread->t_flag & T_WOULDBLOCK) {
1669 		curthread->t_flag &= ~T_WOULDBLOCK;
1670 		resp->status = NFS3ERR_JUKEBOX;
1671 	} else
1672 		resp->status = puterrno3(error);
1673 out1:
1674 	if (dvp != NULL)
1675 		VN_RELE(dvp);
1676 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1677 }
1678 
1679 fhandle_t *
1680 rfs3_mkdir_getfh(MKDIR3args *args)
1681 {
1682 
1683 	return ((fhandle_t *)&args->where.dir.fh3_u.nfs_fh3_i.fh3_i);
1684 }
1685 
1686 void
1687 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
1688 	struct svc_req *req, cred_t *cr)
1689 {
1690 	int error;
1691 	vnode_t *vp;
1692 	vnode_t *dvp;
1693 	struct vattr *vap;
1694 	struct vattr va;
1695 	struct vattr *dbvap;
1696 	struct vattr dbva;
1697 	struct vattr *davap;
1698 	struct vattr dava;
1699 
1700 	dbvap = NULL;
1701 	davap = NULL;
1702 
1703 	dvp = nfs3_fhtovp(args->where.dirp, exi);
1704 	if (dvp == NULL) {
1705 		error = ESTALE;
1706 		goto out;
1707 	}
1708 
1709 #ifdef DEBUG
1710 	if (rfs3_do_pre_op_attr) {
1711 		dbva.va_mask = AT_ALL;
1712 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1713 	} else
1714 		dbvap = NULL;
1715 #else
1716 	dbva.va_mask = AT_ALL;
1717 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1718 #endif
1719 	davap = dbvap;
1720 
1721 	if (args->where.name == nfs3nametoolong) {
1722 		resp->status = NFS3ERR_NAMETOOLONG;
1723 		goto out1;
1724 	}
1725 
1726 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1727 		resp->status = NFS3ERR_ACCES;
1728 		goto out1;
1729 	}
1730 
1731 	if (rdonly(exi, req)) {
1732 		resp->status = NFS3ERR_ROFS;
1733 		goto out1;
1734 	}
1735 
1736 	error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
1737 	if (error)
1738 		goto out;
1739 
1740 	if (!(va.va_mask & AT_MODE)) {
1741 		resp->status = NFS3ERR_INVAL;
1742 		goto out1;
1743 	}
1744 
1745 	if (args->symlink.symlink_data == nfs3nametoolong) {
1746 		resp->status = NFS3ERR_NAMETOOLONG;
1747 		goto out1;
1748 	}
1749 
1750 	va.va_mask |= AT_TYPE;
1751 	va.va_type = VLNK;
1752 
1753 	error = VOP_SYMLINK(dvp, args->where.name, &va,
1754 	    args->symlink.symlink_data, cr);
1755 
1756 #ifdef DEBUG
1757 	if (rfs3_do_post_op_attr) {
1758 		dava.va_mask = AT_ALL;
1759 		davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1760 	} else
1761 		davap = NULL;
1762 #else
1763 	dava.va_mask = AT_ALL;
1764 	davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1765 #endif
1766 
1767 	if (error)
1768 		goto out;
1769 
1770 	error = VOP_LOOKUP(dvp, args->where.name, &vp, NULL, 0, NULL, cr);
1771 
1772 	/*
1773 	 * Force modified data and metadata out to stable storage.
1774 	 */
1775 	(void) VOP_FSYNC(dvp, 0, cr);
1776 
1777 	VN_RELE(dvp);
1778 
1779 	resp->status = NFS3_OK;
1780 	if (error) {
1781 		resp->resok.obj.handle_follows = FALSE;
1782 		vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
1783 		vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1784 		return;
1785 	}
1786 
1787 #ifdef DEBUG
1788 	if (!rfs3_do_post_op_fh3)
1789 		resp->resok.obj.handle_follows = FALSE;
1790 	else {
1791 #endif
1792 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1793 	if (error)
1794 		resp->resok.obj.handle_follows = FALSE;
1795 	else
1796 		resp->resok.obj.handle_follows = TRUE;
1797 #ifdef DEBUG
1798 	}
1799 #endif
1800 
1801 #ifdef DEBUG
1802 	if (rfs3_do_post_op_attr) {
1803 		va.va_mask = AT_ALL;
1804 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1805 	} else
1806 		vap = NULL;
1807 #else
1808 	va.va_mask = AT_ALL;
1809 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1810 #endif
1811 
1812 	/*
1813 	 * Force modified data and metadata out to stable storage.
1814 	 */
1815 	(void) VOP_FSYNC(vp, 0, cr);
1816 
1817 	VN_RELE(vp);
1818 
1819 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1820 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1821 	return;
1822 
1823 out:
1824 	if (curthread->t_flag & T_WOULDBLOCK) {
1825 		curthread->t_flag &= ~T_WOULDBLOCK;
1826 		resp->status = NFS3ERR_JUKEBOX;
1827 	} else
1828 		resp->status = puterrno3(error);
1829 out1:
1830 	if (dvp != NULL)
1831 		VN_RELE(dvp);
1832 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1833 }
1834 
1835 fhandle_t *
1836 rfs3_symlink_getfh(SYMLINK3args *args)
1837 {
1838 
1839 	return ((fhandle_t *)&args->where.dirp->fh3_u.nfs_fh3_i.fh3_i);
1840 }
1841 
1842 void
1843 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
1844 	struct svc_req *req, cred_t *cr)
1845 {
1846 	int error;
1847 	vnode_t *vp;
1848 	vnode_t *dvp;
1849 	struct vattr *vap;
1850 	struct vattr va;
1851 	struct vattr *dbvap;
1852 	struct vattr dbva;
1853 	struct vattr *davap;
1854 	struct vattr dava;
1855 	int mode;
1856 	enum vcexcl excl;
1857 
1858 	dbvap = NULL;
1859 	davap = NULL;
1860 
1861 	dvp = nfs3_fhtovp(args->where.dirp, exi);
1862 	if (dvp == NULL) {
1863 		error = ESTALE;
1864 		goto out;
1865 	}
1866 
1867 #ifdef DEBUG
1868 	if (rfs3_do_pre_op_attr) {
1869 		dbva.va_mask = AT_ALL;
1870 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1871 	} else
1872 		dbvap = NULL;
1873 #else
1874 	dbva.va_mask = AT_ALL;
1875 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1876 #endif
1877 	davap = dbvap;
1878 
1879 	if (args->where.name == nfs3nametoolong) {
1880 		resp->status = NFS3ERR_NAMETOOLONG;
1881 		goto out1;
1882 	}
1883 
1884 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1885 		resp->status = NFS3ERR_ACCES;
1886 		goto out1;
1887 	}
1888 
1889 	if (rdonly(exi, req)) {
1890 		resp->status = NFS3ERR_ROFS;
1891 		goto out1;
1892 	}
1893 
1894 	switch (args->what.type) {
1895 	case NF3CHR:
1896 	case NF3BLK:
1897 		error = sattr3_to_vattr(
1898 		    &args->what.mknoddata3_u.device.dev_attributes, &va);
1899 		if (error)
1900 			goto out;
1901 		if (secpolicy_sys_devices(cr) != 0) {
1902 			resp->status = NFS3ERR_PERM;
1903 			goto out1;
1904 		}
1905 		if (args->what.type == NF3CHR)
1906 			va.va_type = VCHR;
1907 		else
1908 			va.va_type = VBLK;
1909 		va.va_rdev = makedevice(
1910 		    args->what.mknoddata3_u.device.spec.specdata1,
1911 		    args->what.mknoddata3_u.device.spec.specdata2);
1912 		va.va_mask |= AT_TYPE | AT_RDEV;
1913 		break;
1914 	case NF3SOCK:
1915 		error = sattr3_to_vattr(
1916 		    &args->what.mknoddata3_u.pipe_attributes, &va);
1917 		if (error)
1918 			goto out;
1919 		va.va_type = VSOCK;
1920 		va.va_mask |= AT_TYPE;
1921 		break;
1922 	case NF3FIFO:
1923 		error = sattr3_to_vattr(
1924 		    &args->what.mknoddata3_u.pipe_attributes, &va);
1925 		if (error)
1926 			goto out;
1927 		va.va_type = VFIFO;
1928 		va.va_mask |= AT_TYPE;
1929 		break;
1930 	default:
1931 		resp->status = NFS3ERR_BADTYPE;
1932 		goto out1;
1933 	}
1934 
1935 	/*
1936 	 * Must specify the mode.
1937 	 */
1938 	if (!(va.va_mask & AT_MODE)) {
1939 		resp->status = NFS3ERR_INVAL;
1940 		goto out1;
1941 	}
1942 
1943 	excl = EXCL;
1944 
1945 	mode = 0;
1946 
1947 	error = VOP_CREATE(dvp, args->where.name, &va, excl, mode,
1948 	    &vp, cr, 0);
1949 
1950 #ifdef DEBUG
1951 	if (rfs3_do_post_op_attr) {
1952 		dava.va_mask = AT_ALL;
1953 		davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1954 	} else
1955 		davap = NULL;
1956 #else
1957 	dava.va_mask = AT_ALL;
1958 	davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1959 #endif
1960 
1961 	/*
1962 	 * Force modified data and metadata out to stable storage.
1963 	 */
1964 	(void) VOP_FSYNC(dvp, 0, cr);
1965 
1966 	if (error)
1967 		goto out;
1968 
1969 	VN_RELE(dvp);
1970 
1971 	resp->status = NFS3_OK;
1972 
1973 #ifdef DEBUG
1974 	if (!rfs3_do_post_op_fh3)
1975 		resp->resok.obj.handle_follows = FALSE;
1976 	else {
1977 #endif
1978 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1979 	if (error)
1980 		resp->resok.obj.handle_follows = FALSE;
1981 	else
1982 		resp->resok.obj.handle_follows = TRUE;
1983 #ifdef DEBUG
1984 	}
1985 #endif
1986 
1987 #ifdef DEBUG
1988 	if (rfs3_do_post_op_attr) {
1989 		va.va_mask = AT_ALL;
1990 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1991 	} else
1992 		vap = NULL;
1993 #else
1994 	va.va_mask = AT_ALL;
1995 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1996 #endif
1997 
1998 	/*
1999 	 * Force modified metadata out to stable storage.
2000 	 */
2001 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
2002 
2003 	VN_RELE(vp);
2004 
2005 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2006 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2007 	return;
2008 
2009 out:
2010 	if (curthread->t_flag & T_WOULDBLOCK) {
2011 		curthread->t_flag &= ~T_WOULDBLOCK;
2012 		resp->status = NFS3ERR_JUKEBOX;
2013 	} else
2014 		resp->status = puterrno3(error);
2015 out1:
2016 	if (dvp != NULL)
2017 		VN_RELE(dvp);
2018 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2019 }
2020 
2021 fhandle_t *
2022 rfs3_mknod_getfh(MKNOD3args *args)
2023 {
2024 
2025 	return ((fhandle_t *)&args->where.dirp->fh3_u.nfs_fh3_i.fh3_i);
2026 }
2027 
2028 void
2029 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2030 	struct svc_req *req, cred_t *cr)
2031 {
2032 	int error = 0;
2033 	vnode_t *vp;
2034 	struct vattr *bvap;
2035 	struct vattr bva;
2036 	struct vattr *avap;
2037 	struct vattr ava;
2038 	vnode_t *targvp = NULL;
2039 
2040 	bvap = NULL;
2041 	avap = NULL;
2042 
2043 	vp = nfs3_fhtovp(args->object.dirp, exi);
2044 	if (vp == NULL) {
2045 		error = ESTALE;
2046 		goto out;
2047 	}
2048 
2049 #ifdef DEBUG
2050 	if (rfs3_do_pre_op_attr) {
2051 		bva.va_mask = AT_ALL;
2052 		bvap = VOP_GETATTR(vp, &bva, 0, cr) ? NULL : &bva;
2053 	} else
2054 		bvap = NULL;
2055 #else
2056 	bva.va_mask = AT_ALL;
2057 	bvap = VOP_GETATTR(vp, &bva, 0, cr) ? NULL : &bva;
2058 #endif
2059 	avap = bvap;
2060 
2061 	if (vp->v_type != VDIR) {
2062 		resp->status = NFS3ERR_NOTDIR;
2063 		goto out1;
2064 	}
2065 
2066 	if (args->object.name == nfs3nametoolong) {
2067 		resp->status = NFS3ERR_NAMETOOLONG;
2068 		goto out1;
2069 	}
2070 
2071 	if (args->object.name == NULL || *(args->object.name) == '\0') {
2072 		resp->status = NFS3ERR_ACCES;
2073 		goto out1;
2074 	}
2075 
2076 	if (rdonly(exi, req)) {
2077 		resp->status = NFS3ERR_ROFS;
2078 		goto out1;
2079 	}
2080 
2081 	/*
2082 	 * Check for a conflict with a non-blocking mandatory share
2083 	 * reservation and V4 delegations
2084 	 */
2085 	error = VOP_LOOKUP(vp, args->object.name, &targvp, NULL, 0,
2086 			NULL, cr);
2087 	if (error != 0)
2088 		goto out;
2089 
2090 	if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2091 		resp->status = NFS3ERR_JUKEBOX;
2092 		goto out1;
2093 	}
2094 
2095 	if (!nbl_need_check(targvp)) {
2096 		error = VOP_REMOVE(vp, args->object.name, cr);
2097 	} else {
2098 		nbl_start_crit(targvp, RW_READER);
2099 		if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0)) {
2100 			error = EACCES;
2101 		} else {
2102 			error = VOP_REMOVE(vp, args->object.name, cr);
2103 		}
2104 		nbl_end_crit(targvp);
2105 	}
2106 	VN_RELE(targvp);
2107 	targvp = NULL;
2108 
2109 #ifdef DEBUG
2110 	if (rfs3_do_post_op_attr) {
2111 		ava.va_mask = AT_ALL;
2112 		avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
2113 	} else
2114 		avap = NULL;
2115 #else
2116 	ava.va_mask = AT_ALL;
2117 	avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
2118 #endif
2119 
2120 	/*
2121 	 * Force modified data and metadata out to stable storage.
2122 	 */
2123 	(void) VOP_FSYNC(vp, 0, cr);
2124 
2125 	if (error)
2126 		goto out;
2127 
2128 	VN_RELE(vp);
2129 
2130 	resp->status = NFS3_OK;
2131 	vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2132 	return;
2133 
2134 out:
2135 	if (curthread->t_flag & T_WOULDBLOCK) {
2136 		curthread->t_flag &= ~T_WOULDBLOCK;
2137 		resp->status = NFS3ERR_JUKEBOX;
2138 	} else
2139 		resp->status = puterrno3(error);
2140 out1:
2141 	if (vp != NULL)
2142 		VN_RELE(vp);
2143 	vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2144 }
2145 
2146 fhandle_t *
2147 rfs3_remove_getfh(REMOVE3args *args)
2148 {
2149 
2150 	return ((fhandle_t *)&args->object.dirp->fh3_u.nfs_fh3_i.fh3_i);
2151 }
2152 
2153 void
2154 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2155 	struct svc_req *req, cred_t *cr)
2156 {
2157 	int error;
2158 	vnode_t *vp;
2159 	struct vattr *bvap;
2160 	struct vattr bva;
2161 	struct vattr *avap;
2162 	struct vattr ava;
2163 
2164 	bvap = NULL;
2165 	avap = NULL;
2166 
2167 	vp = nfs3_fhtovp(args->object.dirp, exi);
2168 	if (vp == NULL) {
2169 		error = ESTALE;
2170 		goto out;
2171 	}
2172 
2173 #ifdef DEBUG
2174 	if (rfs3_do_pre_op_attr) {
2175 		bva.va_mask = AT_ALL;
2176 		bvap = VOP_GETATTR(vp, &bva, 0, cr) ? NULL : &bva;
2177 	} else
2178 		bvap = NULL;
2179 #else
2180 	bva.va_mask = AT_ALL;
2181 	bvap = VOP_GETATTR(vp, &bva, 0, cr) ? NULL : &bva;
2182 #endif
2183 	avap = bvap;
2184 
2185 	if (vp->v_type != VDIR) {
2186 		resp->status = NFS3ERR_NOTDIR;
2187 		goto out1;
2188 	}
2189 
2190 	if (args->object.name == nfs3nametoolong) {
2191 		resp->status = NFS3ERR_NAMETOOLONG;
2192 		goto out1;
2193 	}
2194 
2195 	if (args->object.name == NULL || *(args->object.name) == '\0') {
2196 		resp->status = NFS3ERR_ACCES;
2197 		goto out1;
2198 	}
2199 
2200 	if (rdonly(exi, req)) {
2201 		resp->status = NFS3ERR_ROFS;
2202 		goto out1;
2203 	}
2204 
2205 	error = VOP_RMDIR(vp, args->object.name, rootdir, cr);
2206 
2207 #ifdef DEBUG
2208 	if (rfs3_do_post_op_attr) {
2209 		ava.va_mask = AT_ALL;
2210 		avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
2211 	} else
2212 		avap = NULL;
2213 #else
2214 	ava.va_mask = AT_ALL;
2215 	avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
2216 #endif
2217 
2218 	/*
2219 	 * Force modified data and metadata out to stable storage.
2220 	 */
2221 	(void) VOP_FSYNC(vp, 0, cr);
2222 
2223 	if (error) {
2224 		/*
2225 		 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2226 		 * if the directory is not empty.  A System V NFS server
2227 		 * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2228 		 * over the wire.
2229 		 */
2230 		if (error == EEXIST)
2231 			error = ENOTEMPTY;
2232 		goto out;
2233 	}
2234 
2235 	VN_RELE(vp);
2236 
2237 	resp->status = NFS3_OK;
2238 	vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2239 	return;
2240 
2241 out:
2242 	if (curthread->t_flag & T_WOULDBLOCK) {
2243 		curthread->t_flag &= ~T_WOULDBLOCK;
2244 		resp->status = NFS3ERR_JUKEBOX;
2245 	} else
2246 		resp->status = puterrno3(error);
2247 out1:
2248 	if (vp != NULL)
2249 		VN_RELE(vp);
2250 	vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2251 }
2252 
2253 fhandle_t *
2254 rfs3_rmdir_getfh(RMDIR3args *args)
2255 {
2256 
2257 	return ((fhandle_t *)&args->object.dirp->fh3_u.nfs_fh3_i.fh3_i);
2258 }
2259 
2260 void
2261 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2262 	struct svc_req *req, cred_t *cr)
2263 {
2264 	int error = 0;
2265 	vnode_t *fvp;
2266 	vnode_t *tvp;
2267 	vnode_t *targvp;
2268 	struct vattr *fbvap;
2269 	struct vattr fbva;
2270 	struct vattr *favap;
2271 	struct vattr fava;
2272 	struct vattr *tbvap;
2273 	struct vattr tbva;
2274 	struct vattr *tavap;
2275 	struct vattr tava;
2276 	nfs_fh3	*fh3;
2277 	struct exportinfo *to_exi;
2278 	vnode_t *srcvp = NULL;
2279 
2280 	fbvap = NULL;
2281 	favap = NULL;
2282 	tbvap = NULL;
2283 	tavap = NULL;
2284 	tvp = NULL;
2285 
2286 	fvp = nfs3_fhtovp(args->from.dirp, exi);
2287 	if (fvp == NULL) {
2288 		error = ESTALE;
2289 		goto out;
2290 	}
2291 
2292 #ifdef DEBUG
2293 	if (rfs3_do_pre_op_attr) {
2294 		fbva.va_mask = AT_ALL;
2295 		fbvap = VOP_GETATTR(fvp, &fbva, 0, cr) ? NULL : &fbva;
2296 	} else
2297 		fbvap = NULL;
2298 #else
2299 	fbva.va_mask = AT_ALL;
2300 	fbvap = VOP_GETATTR(fvp, &fbva, 0, cr) ? NULL : &fbva;
2301 #endif
2302 	favap = fbvap;
2303 
2304 	fh3 = args->to.dirp;
2305 	to_exi = checkexport(&fh3->fh3_fsid, (fid_t *)&fh3->fh3_xlen);
2306 	if (to_exi == NULL) {
2307 		resp->status = NFS3ERR_ACCES;
2308 		goto out1;
2309 	}
2310 	exi_rele(to_exi);
2311 
2312 	if (to_exi != exi) {
2313 		resp->status = NFS3ERR_XDEV;
2314 		goto out1;
2315 	}
2316 
2317 	tvp = nfs3_fhtovp(args->to.dirp, exi);
2318 	if (tvp == NULL) {
2319 		error = ESTALE;
2320 		goto out;
2321 	}
2322 
2323 #ifdef DEBUG
2324 	if (rfs3_do_pre_op_attr) {
2325 		tbva.va_mask = AT_ALL;
2326 		tbvap = VOP_GETATTR(tvp, &tbva, 0, cr) ? NULL : &tbva;
2327 	} else
2328 		tbvap = NULL;
2329 #else
2330 	tbva.va_mask = AT_ALL;
2331 	tbvap = VOP_GETATTR(tvp, &tbva, 0, cr) ? NULL : &tbva;
2332 #endif
2333 	tavap = tbvap;
2334 
2335 	if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2336 		resp->status = NFS3ERR_NOTDIR;
2337 		goto out1;
2338 	}
2339 
2340 	if (args->from.name == nfs3nametoolong ||
2341 	    args->to.name == nfs3nametoolong) {
2342 		resp->status = NFS3ERR_NAMETOOLONG;
2343 		goto out1;
2344 	}
2345 	if (args->from.name == NULL || *(args->from.name) == '\0' ||
2346 	    args->to.name == NULL || *(args->to.name) == '\0') {
2347 		resp->status = NFS3ERR_ACCES;
2348 		goto out1;
2349 	}
2350 
2351 	if (rdonly(exi, req)) {
2352 		resp->status = NFS3ERR_ROFS;
2353 		goto out1;
2354 	}
2355 
2356 	/*
2357 	 * Check for a conflict with a non-blocking mandatory share
2358 	 * reservation or V4 delegations.
2359 	 */
2360 	error = VOP_LOOKUP(fvp, args->from.name, &srcvp, NULL, 0,
2361 			NULL, cr);
2362 	if (error != 0)
2363 		goto out;
2364 
2365 	/*
2366 	 * If we rename a delegated file we should recall the
2367 	 * delegation, since future opens should fail or would
2368 	 * refer to a new file.
2369 	 */
2370 	if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2371 		resp->status = NFS3ERR_JUKEBOX;
2372 		goto out1;
2373 	}
2374 
2375 	/*
2376 	 * Check for renaming over a delegated file.  Check rfs4_deleg_policy
2377 	 * first to avoid VOP_LOOKUP if possible.
2378 	 */
2379 	if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2380 	    VOP_LOOKUP(tvp, args->to.name, &targvp, NULL, 0, NULL, cr) == 0) {
2381 
2382 		if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2383 			VN_RELE(targvp);
2384 			resp->status = NFS3ERR_JUKEBOX;
2385 			goto out1;
2386 		}
2387 		VN_RELE(targvp);
2388 	}
2389 
2390 	if (!nbl_need_check(srcvp)) {
2391 		error = VOP_RENAME(fvp, args->from.name, tvp,
2392 				    args->to.name, cr);
2393 	} else {
2394 		nbl_start_crit(srcvp, RW_READER);
2395 		if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0)) {
2396 			error = EACCES;
2397 		} else {
2398 			error = VOP_RENAME(fvp, args->from.name, tvp,
2399 				    args->to.name, cr);
2400 		}
2401 		nbl_end_crit(srcvp);
2402 	}
2403 	if (error == 0) {
2404 		char *tmp;
2405 
2406 		/* fix the path name for the renamed file */
2407 		mutex_enter(&srcvp->v_lock);
2408 		tmp = srcvp->v_path;
2409 		srcvp->v_path = NULL;
2410 		mutex_exit(&srcvp->v_lock);
2411 		vn_setpath(rootdir, tvp, srcvp, args->to.name,
2412 				strlen(args->to.name));
2413 		if (tmp != NULL)
2414 			kmem_free(tmp, strlen(tmp) + 1);
2415 	}
2416 	VN_RELE(srcvp);
2417 	srcvp = NULL;
2418 
2419 #ifdef DEBUG
2420 	if (rfs3_do_post_op_attr) {
2421 		fava.va_mask = AT_ALL;
2422 		favap = VOP_GETATTR(fvp, &fava, 0, cr) ? NULL : &fava;
2423 		tava.va_mask = AT_ALL;
2424 		tavap = VOP_GETATTR(tvp, &tava, 0, cr) ? NULL : &tava;
2425 	} else {
2426 		favap = NULL;
2427 		tavap = NULL;
2428 	}
2429 #else
2430 	fava.va_mask = AT_ALL;
2431 	favap = VOP_GETATTR(fvp, &fava, 0, cr) ? NULL : &fava;
2432 	tava.va_mask = AT_ALL;
2433 	tavap = VOP_GETATTR(tvp, &tava, 0, cr) ? NULL : &tava;
2434 #endif
2435 
2436 	/*
2437 	 * Force modified data and metadata out to stable storage.
2438 	 */
2439 	(void) VOP_FSYNC(fvp, 0, cr);
2440 	(void) VOP_FSYNC(tvp, 0, cr);
2441 
2442 	if (error)
2443 		goto out;
2444 
2445 	VN_RELE(tvp);
2446 	VN_RELE(fvp);
2447 
2448 	resp->status = NFS3_OK;
2449 	vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2450 	vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2451 	return;
2452 
2453 out:
2454 	if (curthread->t_flag & T_WOULDBLOCK) {
2455 		curthread->t_flag &= ~T_WOULDBLOCK;
2456 		resp->status = NFS3ERR_JUKEBOX;
2457 	} else
2458 		resp->status = puterrno3(error);
2459 out1:
2460 	if (fvp != NULL)
2461 		VN_RELE(fvp);
2462 	if (tvp != NULL)
2463 		VN_RELE(tvp);
2464 	vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2465 	vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2466 }
2467 
2468 fhandle_t *
2469 rfs3_rename_getfh(RENAME3args *args)
2470 {
2471 
2472 	return ((fhandle_t *)&args->from.dirp->fh3_u.nfs_fh3_i.fh3_i);
2473 }
2474 
2475 void
2476 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2477 	struct svc_req *req, cred_t *cr)
2478 {
2479 	int error;
2480 	vnode_t *vp;
2481 	vnode_t *dvp;
2482 	struct vattr *vap;
2483 	struct vattr va;
2484 	struct vattr *bvap;
2485 	struct vattr bva;
2486 	struct vattr *avap;
2487 	struct vattr ava;
2488 	nfs_fh3	*fh3;
2489 	struct exportinfo *to_exi;
2490 
2491 	vap = NULL;
2492 	bvap = NULL;
2493 	avap = NULL;
2494 	dvp = NULL;
2495 
2496 	vp = nfs3_fhtovp(&args->file, exi);
2497 	if (vp == NULL) {
2498 		error = ESTALE;
2499 		goto out;
2500 	}
2501 
2502 #ifdef DEBUG
2503 	if (rfs3_do_pre_op_attr) {
2504 		va.va_mask = AT_ALL;
2505 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2506 	} else
2507 		vap = NULL;
2508 #else
2509 	va.va_mask = AT_ALL;
2510 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2511 #endif
2512 
2513 	fh3 = args->link.dirp;
2514 	to_exi = checkexport(&fh3->fh3_fsid, (fid_t *)&fh3->fh3_xlen);
2515 	if (to_exi == NULL) {
2516 		resp->status = NFS3ERR_ACCES;
2517 		goto out1;
2518 	}
2519 	exi_rele(to_exi);
2520 
2521 	if (to_exi != exi) {
2522 		resp->status = NFS3ERR_XDEV;
2523 		goto out1;
2524 	}
2525 
2526 	dvp = nfs3_fhtovp(args->link.dirp, exi);
2527 	if (dvp == NULL) {
2528 		error = ESTALE;
2529 		goto out;
2530 	}
2531 
2532 #ifdef DEBUG
2533 	if (rfs3_do_pre_op_attr) {
2534 		bva.va_mask = AT_ALL;
2535 		bvap = VOP_GETATTR(dvp, &bva, 0, cr) ? NULL : &bva;
2536 	} else
2537 		bvap = NULL;
2538 #else
2539 	bva.va_mask = AT_ALL;
2540 	bvap = VOP_GETATTR(dvp, &bva, 0, cr) ? NULL : &bva;
2541 #endif
2542 
2543 	if (dvp->v_type != VDIR) {
2544 		resp->status = NFS3ERR_NOTDIR;
2545 		goto out1;
2546 	}
2547 
2548 	if (args->link.name == nfs3nametoolong) {
2549 		resp->status = NFS3ERR_NAMETOOLONG;
2550 		goto out1;
2551 	}
2552 
2553 	if (args->link.name == NULL || *(args->link.name) == '\0') {
2554 		resp->status = NFS3ERR_ACCES;
2555 		goto out1;
2556 	}
2557 
2558 	if (rdonly(exi, req)) {
2559 		resp->status = NFS3ERR_ROFS;
2560 		goto out1;
2561 	}
2562 
2563 	error = VOP_LINK(dvp, vp, args->link.name, cr);
2564 
2565 #ifdef DEBUG
2566 	if (rfs3_do_post_op_attr) {
2567 		va.va_mask = AT_ALL;
2568 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2569 		ava.va_mask = AT_ALL;
2570 		avap = VOP_GETATTR(dvp, &ava, 0, cr) ? NULL : &ava;
2571 	} else {
2572 		vap = NULL;
2573 		avap = NULL;
2574 	}
2575 #else
2576 	va.va_mask = AT_ALL;
2577 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2578 	ava.va_mask = AT_ALL;
2579 	avap = VOP_GETATTR(dvp, &ava, 0, cr) ? NULL : &ava;
2580 #endif
2581 
2582 	/*
2583 	 * Force modified data and metadata out to stable storage.
2584 	 */
2585 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
2586 	(void) VOP_FSYNC(dvp, 0, cr);
2587 
2588 	if (error)
2589 		goto out;
2590 
2591 	VN_RELE(dvp);
2592 	VN_RELE(vp);
2593 
2594 	resp->status = NFS3_OK;
2595 	vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
2596 	vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
2597 	return;
2598 
2599 out:
2600 	if (curthread->t_flag & T_WOULDBLOCK) {
2601 		curthread->t_flag &= ~T_WOULDBLOCK;
2602 		resp->status = NFS3ERR_JUKEBOX;
2603 	} else
2604 		resp->status = puterrno3(error);
2605 out1:
2606 	if (vp != NULL)
2607 		VN_RELE(vp);
2608 	if (dvp != NULL)
2609 		VN_RELE(dvp);
2610 	vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
2611 	vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
2612 }
2613 
2614 fhandle_t *
2615 rfs3_link_getfh(LINK3args *args)
2616 {
2617 
2618 	return ((fhandle_t *)&args->file.fh3_u.nfs_fh3_i.fh3_i);
2619 }
2620 
2621 /*
2622  * This macro defines the size of a response which contains attribute
2623  * information and one directory entry (whose length is specified by
2624  * the macro parameter).  If the incoming request is larger than this,
2625  * then we are guaranteed to be able to return at one directory entry
2626  * if one exists.  Therefore, we do not need to check for
2627  * NFS3ERR_TOOSMALL if the requested size is larger then this.  If it
2628  * is not, then we need to check to make sure that this error does not
2629  * need to be returned.
2630  *
2631  * NFS3_READDIR_MIN_COUNT is comprised of following :
2632  *
2633  * status - 1 * BYTES_PER_XDR_UNIT
2634  * attr. flag - 1 * BYTES_PER_XDR_UNIT
2635  * cookie verifier - 2 * BYTES_PER_XDR_UNIT
2636  * attributes  - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
2637  * boolean - 1 * BYTES_PER_XDR_UNIT
2638  * file id - 2 * BYTES_PER_XDR_UNIT
2639  * direcotory name length - 1 * BYTES_PER_XDR_UNIT
2640  * cookie - 2 * BYTES_PER_XDR_UNIT
2641  * end of list - 1 * BYTES_PER_XDR_UNIT
2642  * end of file - 1 * BYTES_PER_XDR_UNIT
2643  * Name length of directory to the nearest byte
2644  */
2645 
2646 #define	NFS3_READDIR_MIN_COUNT(length)	\
2647 	((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
2648 		BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
2649 
2650 /* ARGSUSED */
2651 void
2652 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
2653 	struct svc_req *req, cred_t *cr)
2654 {
2655 	int error;
2656 	vnode_t *vp;
2657 	struct vattr *vap;
2658 	struct vattr va;
2659 	struct iovec iov;
2660 	struct uio uio;
2661 	char *data;
2662 	int iseof;
2663 	int bufsize;
2664 	int namlen;
2665 	uint_t count;
2666 
2667 	vap = NULL;
2668 
2669 	vp = nfs3_fhtovp(&args->dir, exi);
2670 	if (vp == NULL) {
2671 		error = ESTALE;
2672 		goto out;
2673 	}
2674 
2675 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2676 
2677 #ifdef DEBUG
2678 	if (rfs3_do_pre_op_attr) {
2679 		va.va_mask = AT_ALL;
2680 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2681 	} else
2682 		vap = NULL;
2683 #else
2684 	va.va_mask = AT_ALL;
2685 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2686 #endif
2687 
2688 	if (vp->v_type != VDIR) {
2689 		resp->status = NFS3ERR_NOTDIR;
2690 		goto out1;
2691 	}
2692 
2693 	error = VOP_ACCESS(vp, VREAD, 0, cr);
2694 	if (error)
2695 		goto out;
2696 
2697 	/*
2698 	 * Now don't allow arbitrary count to alloc;
2699 	 * allow the maximum not to exceed rfs3_tsize()
2700 	 */
2701 	if (args->count > rfs3_tsize(req))
2702 		args->count = rfs3_tsize(req);
2703 
2704 	/*
2705 	 * Make sure that there is room to read at least one entry
2706 	 * if any are available.
2707 	 */
2708 	if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
2709 		count = DIRENT64_RECLEN(MAXNAMELEN);
2710 	else
2711 		count = args->count;
2712 
2713 	data = kmem_alloc(count, KM_SLEEP);
2714 
2715 	iov.iov_base = data;
2716 	iov.iov_len = count;
2717 	uio.uio_iov = &iov;
2718 	uio.uio_iovcnt = 1;
2719 	uio.uio_segflg = UIO_SYSSPACE;
2720 	uio.uio_extflg = UIO_COPY_CACHED;
2721 	uio.uio_loffset = (offset_t)args->cookie;
2722 	uio.uio_resid = count;
2723 
2724 	error = VOP_READDIR(vp, &uio, cr, &iseof);
2725 
2726 #ifdef DEBUG
2727 	if (rfs3_do_post_op_attr) {
2728 		va.va_mask = AT_ALL;
2729 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2730 	} else
2731 		vap = NULL;
2732 #else
2733 	va.va_mask = AT_ALL;
2734 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2735 #endif
2736 
2737 	if (error) {
2738 		kmem_free(data, count);
2739 		goto out;
2740 	}
2741 
2742 	/*
2743 	 * If the count was not large enough to be able to guarantee
2744 	 * to be able to return at least one entry, then need to
2745 	 * check to see if NFS3ERR_TOOSMALL should be returned.
2746 	 */
2747 	if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
2748 		/*
2749 		 * bufsize is used to keep track of the size of the response.
2750 		 * It is primed with:
2751 		 *	1 for the status +
2752 		 *	1 for the dir_attributes.attributes boolean +
2753 		 *	2 for the cookie verifier
2754 		 * all times BYTES_PER_XDR_UNIT to convert from XDR units
2755 		 * to bytes.  If there are directory attributes to be
2756 		 * returned, then:
2757 		 *	NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
2758 		 * time BYTES_PER_XDR_UNIT is added to account for them.
2759 		 */
2760 		bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
2761 		if (vap != NULL)
2762 			bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
2763 		/*
2764 		 * An entry is composed of:
2765 		 *	1 for the true/false list indicator +
2766 		 *	2 for the fileid +
2767 		 *	1 for the length of the name +
2768 		 *	2 for the cookie +
2769 		 * all times BYTES_PER_XDR_UNIT to convert from
2770 		 * XDR units to bytes, plus the length of the name
2771 		 * rounded up to the nearest BYTES_PER_XDR_UNIT.
2772 		 */
2773 		if (count != uio.uio_resid) {
2774 			namlen = strlen(((struct dirent64 *)data)->d_name);
2775 			bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
2776 				    roundup(namlen, BYTES_PER_XDR_UNIT);
2777 		}
2778 		/*
2779 		 * We need to check to see if the number of bytes left
2780 		 * to go into the buffer will actually fit into the
2781 		 * buffer.  This is calculated as the size of this
2782 		 * entry plus:
2783 		 *	1 for the true/false list indicator +
2784 		 *	1 for the eof indicator
2785 		 * times BYTES_PER_XDR_UNIT to convert from from
2786 		 * XDR units to bytes.
2787 		 */
2788 		bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
2789 		if (bufsize > args->count) {
2790 			kmem_free(data, count);
2791 			resp->status = NFS3ERR_TOOSMALL;
2792 			goto out1;
2793 		}
2794 	}
2795 
2796 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2797 
2798 #if 0 /* notyet */
2799 	/*
2800 	 * Don't do this.  It causes local disk writes when just
2801 	 * reading the file and the overhead is deemed larger
2802 	 * than the benefit.
2803 	 */
2804 	/*
2805 	 * Force modified metadata out to stable storage.
2806 	 */
2807 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
2808 #endif
2809 
2810 	VN_RELE(vp);
2811 
2812 	resp->status = NFS3_OK;
2813 	vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
2814 	resp->resok.cookieverf = 0;
2815 	resp->resok.reply.entries = (entry3 *)data;
2816 	resp->resok.reply.eof = iseof;
2817 	resp->resok.size = count - uio.uio_resid;
2818 	resp->resok.count = args->count;
2819 	resp->resok.freecount = count;
2820 	return;
2821 
2822 out:
2823 	if (curthread->t_flag & T_WOULDBLOCK) {
2824 		curthread->t_flag &= ~T_WOULDBLOCK;
2825 		resp->status = NFS3ERR_JUKEBOX;
2826 	} else
2827 		resp->status = puterrno3(error);
2828 out1:
2829 	if (vp != NULL) {
2830 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2831 		VN_RELE(vp);
2832 	}
2833 	vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
2834 }
2835 
2836 fhandle_t *
2837 rfs3_readdir_getfh(READDIR3args *args)
2838 {
2839 
2840 	return ((fhandle_t *)&args->dir.fh3_u.nfs_fh3_i.fh3_i);
2841 }
2842 
2843 void
2844 rfs3_readdir_free(READDIR3res *resp)
2845 {
2846 
2847 	if (resp->status == NFS3_OK)
2848 		kmem_free(resp->resok.reply.entries, resp->resok.freecount);
2849 }
2850 
2851 #ifdef nextdp
2852 #undef nextdp
2853 #endif
2854 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
2855 
2856 /*
2857  * This macro computes the size of a response which contains
2858  * one directory entry including the attributes as well as file handle.
2859  * If the incoming request is larger than this, then we are guaranteed to be
2860  * able to return at least one more directory entry if one exists.
2861  *
2862  * NFS3_READDIRPLUS_ENTRY is made up of the following:
2863  *
2864  * boolean - 1 * BYTES_PER_XDR_UNIT
2865  * file id - 2 * BYTES_PER_XDR_UNIT
2866  * directory name length - 1 * BYTES_PER_XDR_UNIT
2867  * cookie - 2 * BYTES_PER_XDR_UNIT
2868  * attribute flag - 1 * BYTES_PER_XDR_UNIT
2869  * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
2870  * status byte for file handle - 1 *  BYTES_PER_XDR_UNIT
2871  * length of a file handle - 1 * BYTES_PER_XDR_UNIT
2872  * Maxmum length of a file handle (NFS3_CURFHSIZE)
2873  * name length of the entry to the nearest bytes
2874  */
2875 #define	NFS3_READDIRPLUS_ENTRY(namelen)	\
2876 	((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
2877 		BYTES_PER_XDR_UNIT + \
2878 	NFS3_CURFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
2879 
2880 static int rfs3_readdir_unit = MAXBSIZE;
2881 
2882 /* ARGSUSED */
2883 void
2884 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
2885 	struct exportinfo *exi, struct svc_req *req, cred_t *cr)
2886 {
2887 	int error;
2888 	vnode_t *vp;
2889 	struct vattr *vap;
2890 	struct vattr va;
2891 	struct iovec iov;
2892 	struct uio uio;
2893 	char *data;
2894 	int iseof;
2895 	struct dirent64 *dp;
2896 	vnode_t *nvp;
2897 	struct vattr *nvap;
2898 	struct vattr nva;
2899 	entryplus3_info *infop = NULL;
2900 	int size = 0;
2901 	int nents = 0;
2902 	int bufsize = 0;
2903 	int entrysize = 0;
2904 	int tofit = 0;
2905 	int rd_unit = rfs3_readdir_unit;
2906 	int prev_len;
2907 	int space_left;
2908 	int i;
2909 	uint_t *namlen = NULL;
2910 
2911 	vap = NULL;
2912 
2913 	vp = nfs3_fhtovp(&args->dir, exi);
2914 	if (vp == NULL) {
2915 		error = ESTALE;
2916 		goto out;
2917 	}
2918 
2919 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2920 
2921 #ifdef DEBUG
2922 	if (rfs3_do_pre_op_attr) {
2923 		va.va_mask = AT_ALL;
2924 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2925 	} else
2926 		vap = NULL;
2927 #else
2928 	va.va_mask = AT_ALL;
2929 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2930 #endif
2931 
2932 	if (vp->v_type != VDIR) {
2933 		error = ENOTDIR;
2934 		goto out;
2935 	}
2936 
2937 	error = VOP_ACCESS(vp, VREAD, 0, cr);
2938 	if (error)
2939 		goto out;
2940 
2941 	/*
2942 	 * Don't allow arbitrary counts for allocation
2943 	 */
2944 	if (args->maxcount > rfs3_tsize(req))
2945 		args->maxcount = rfs3_tsize(req);
2946 
2947 	/*
2948 	 * Make sure that there is room to read at least one entry
2949 	 * if any are available
2950 	 */
2951 	args->dircount = MIN(args->dircount, args->maxcount);
2952 
2953 	if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
2954 		args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
2955 
2956 	/*
2957 	 * This allocation relies on a minimum directory entry
2958 	 * being roughly 24 bytes.  Therefore, the namlen array
2959 	 * will have enough space based on the maximum number of
2960 	 * entries to read.
2961 	 */
2962 	namlen = kmem_alloc(args->dircount, KM_SLEEP);
2963 
2964 	space_left = args->dircount;
2965 	data = kmem_alloc(args->dircount, KM_SLEEP);
2966 	dp = (struct dirent64 *)data;
2967 	uio.uio_iov = &iov;
2968 	uio.uio_iovcnt = 1;
2969 	uio.uio_segflg = UIO_SYSSPACE;
2970 	uio.uio_extflg = UIO_COPY_CACHED;
2971 	uio.uio_loffset = (offset_t)args->cookie;
2972 
2973 	/*
2974 	 * bufsize is used to keep track of the size of the response as we
2975 	 * get post op attributes and filehandles for each entry.  This is
2976 	 * an optimization as the server may have read more entries than will
2977 	 * fit in the buffer specified by maxcount.  We stop calculating
2978 	 * post op attributes and filehandles once we have exceeded maxcount.
2979 	 * This will minimize the effect of truncation.
2980 	 *
2981 	 * It is primed with:
2982 	 *	1 for the status +
2983 	 *	1 for the dir_attributes.attributes boolean +
2984 	 *	2 for the cookie verifier
2985 	 * all times BYTES_PER_XDR_UNIT to convert from XDR units
2986 	 * to bytes.  If there are directory attributes to be
2987 	 * returned, then:
2988 	 *	NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
2989 	 * time BYTES_PER_XDR_UNIT is added to account for them.
2990 	 */
2991 	bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
2992 	if (vap != NULL)
2993 		bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
2994 
2995 getmoredents:
2996 	/*
2997 	 * Here we make a check so that our read unit is not larger than
2998 	 * the space left in the buffer.
2999 	 */
3000 	rd_unit = MIN(rd_unit, space_left);
3001 	iov.iov_base = (char *)dp;
3002 	iov.iov_len = rd_unit;
3003 	uio.uio_resid = rd_unit;
3004 	prev_len = rd_unit;
3005 
3006 	error = VOP_READDIR(vp, &uio, cr, &iseof);
3007 
3008 	if (error) {
3009 		kmem_free(data, args->dircount);
3010 		goto out;
3011 	}
3012 
3013 	if (uio.uio_resid == prev_len && !iseof) {
3014 		if (nents == 0) {
3015 			kmem_free(data, args->dircount);
3016 			resp->status = NFS3ERR_TOOSMALL;
3017 			goto out1;
3018 		}
3019 
3020 		/*
3021 		 * We could not get any more entries, so get the attributes
3022 		 * and filehandle for the entries already obtained.
3023 		 */
3024 		goto good;
3025 	}
3026 
3027 	/*
3028 	 * We estimate the size of the response by assuming the
3029 	 * entry exists and attributes and filehandle are also valid
3030 	 */
3031 	for (size = prev_len - uio.uio_resid;
3032 		size > 0;
3033 		size -= dp->d_reclen, dp = nextdp(dp)) {
3034 
3035 		if (dp->d_ino == 0) {
3036 			nents++;
3037 			continue;
3038 		}
3039 
3040 		namlen[nents] = strlen(dp->d_name);
3041 		entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3042 
3043 		/*
3044 		 * We need to check to see if the number of bytes left
3045 		 * to go into the buffer will actually fit into the
3046 		 * buffer.  This is calculated as the size of this
3047 		 * entry plus:
3048 		 *	1 for the true/false list indicator +
3049 		 *	1 for the eof indicator
3050 		 * times BYTES_PER_XDR_UNIT to convert from XDR units
3051 		 * to bytes.
3052 		 *
3053 		 * Also check the dircount limit against the first entry read
3054 		 *
3055 		 */
3056 		tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3057 		if (bufsize + tofit > args->maxcount) {
3058 			/*
3059 			 * We make a check here to see if this was the
3060 			 * first entry being measured.  If so, then maxcount
3061 			 * was too small to begin with and so we need to
3062 			 * return with NFS3ERR_TOOSMALL.
3063 			 */
3064 			if (nents == 0) {
3065 				kmem_free(data, args->dircount);
3066 				resp->status = NFS3ERR_TOOSMALL;
3067 				goto out1;
3068 			}
3069 			iseof = FALSE;
3070 			goto good;
3071 		}
3072 		bufsize += entrysize;
3073 		nents++;
3074 	}
3075 
3076 	/*
3077 	 * If there is enough room to fit at least 1 more entry including
3078 	 * post op attributes and filehandle in the buffer AND that we haven't
3079 	 * exceeded dircount then go back and get some more.
3080 	 */
3081 	if (!iseof &&
3082 	    (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3083 		space_left -= (prev_len - uio.uio_resid);
3084 		if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3085 			goto getmoredents;
3086 
3087 		/* else, fall through */
3088 	}
3089 
3090 good:
3091 
3092 #ifdef DEBUG
3093 	if (rfs3_do_post_op_attr) {
3094 		va.va_mask = AT_ALL;
3095 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3096 	} else
3097 		vap = NULL;
3098 #else
3099 	va.va_mask = AT_ALL;
3100 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3101 #endif
3102 
3103 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3104 
3105 	infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3106 	resp->resok.infop = infop;
3107 
3108 	dp = (struct dirent64 *)data;
3109 	for (i = 0; i < nents; i++) {
3110 
3111 		if (dp->d_ino == 0) {
3112 			infop[i].attr.attributes = FALSE;
3113 			infop[i].fh.handle_follows = FALSE;
3114 			dp = nextdp(dp);
3115 			continue;
3116 		}
3117 
3118 		infop[i].namelen = namlen[i];
3119 
3120 		error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr);
3121 		if (error) {
3122 			infop[i].attr.attributes = FALSE;
3123 			infop[i].fh.handle_follows = FALSE;
3124 			dp = nextdp(dp);
3125 			continue;
3126 		}
3127 
3128 #ifdef DEBUG
3129 		if (rfs3_do_post_op_attr) {
3130 			nva.va_mask = AT_ALL;
3131 			nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ?
3132 				NULL : &nva;
3133 		} else
3134 			nvap = NULL;
3135 #else
3136 		nva.va_mask = AT_ALL;
3137 		nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3138 #endif
3139 		vattr_to_post_op_attr(nvap, &infop[i].attr);
3140 
3141 #ifdef DEBUG
3142 		if (!rfs3_do_post_op_fh3)
3143 			infop[i].fh.handle_follows = FALSE;
3144 		else {
3145 #endif
3146 		error = makefh3(&infop[i].fh.handle, nvp, exi);
3147 		if (!error)
3148 			infop[i].fh.handle_follows = TRUE;
3149 		else
3150 			infop[i].fh.handle_follows = FALSE;
3151 #ifdef DEBUG
3152 		}
3153 #endif
3154 
3155 		VN_RELE(nvp);
3156 		dp = nextdp(dp);
3157 	}
3158 
3159 #if 0 /* notyet */
3160 	/*
3161 	 * Don't do this.  It causes local disk writes when just
3162 	 * reading the file and the overhead is deemed larger
3163 	 * than the benefit.
3164 	 */
3165 	/*
3166 	 * Force modified metadata out to stable storage.
3167 	 */
3168 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
3169 #endif
3170 
3171 	VN_RELE(vp);
3172 
3173 	kmem_free(namlen, args->dircount);
3174 
3175 	resp->status = NFS3_OK;
3176 	vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3177 	resp->resok.cookieverf = 0;
3178 	resp->resok.reply.entries = (entryplus3 *)data;
3179 	resp->resok.reply.eof = iseof;
3180 	resp->resok.size = nents;
3181 	resp->resok.count = args->dircount;
3182 	resp->resok.maxcount = args->maxcount;
3183 	return;
3184 
3185 out:
3186 	if (curthread->t_flag & T_WOULDBLOCK) {
3187 		curthread->t_flag &= ~T_WOULDBLOCK;
3188 		resp->status = NFS3ERR_JUKEBOX;
3189 	} else
3190 		resp->status = puterrno3(error);
3191 out1:
3192 	if (vp != NULL) {
3193 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3194 		VN_RELE(vp);
3195 	}
3196 
3197 	if (namlen != NULL)
3198 		kmem_free(namlen, args->dircount);
3199 
3200 	vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3201 }
3202 
3203 fhandle_t *
3204 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3205 {
3206 
3207 	return ((fhandle_t *)&args->dir.fh3_u.nfs_fh3_i.fh3_i);
3208 }
3209 
3210 void
3211 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3212 {
3213 
3214 	if (resp->status == NFS3_OK) {
3215 		kmem_free(resp->resok.reply.entries, resp->resok.count);
3216 		kmem_free(resp->resok.infop,
3217 			resp->resok.size * sizeof (struct entryplus3_info));
3218 	}
3219 }
3220 
3221 /* ARGSUSED */
3222 void
3223 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3224 	struct svc_req *req, cred_t *cr)
3225 {
3226 	int error;
3227 	vnode_t *vp;
3228 	struct vattr *vap;
3229 	struct vattr va;
3230 	struct statvfs64 sb;
3231 
3232 	vap = NULL;
3233 
3234 	vp = nfs3_fhtovp(&args->fsroot, exi);
3235 	if (vp == NULL) {
3236 		error = ESTALE;
3237 		goto out;
3238 	}
3239 
3240 	error = VFS_STATVFS(vp->v_vfsp, &sb);
3241 
3242 #ifdef DEBUG
3243 	if (rfs3_do_post_op_attr) {
3244 		va.va_mask = AT_ALL;
3245 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3246 	} else
3247 		vap = NULL;
3248 #else
3249 	va.va_mask = AT_ALL;
3250 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3251 #endif
3252 
3253 	VN_RELE(vp);
3254 
3255 	if (error)
3256 		goto out;
3257 
3258 	resp->status = NFS3_OK;
3259 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3260 	if (sb.f_blocks != (fsblkcnt64_t)-1)
3261 		resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3262 	else
3263 		resp->resok.tbytes = (size3)sb.f_blocks;
3264 	if (sb.f_bfree != (fsblkcnt64_t)-1)
3265 		resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3266 	else
3267 		resp->resok.fbytes = (size3)sb.f_bfree;
3268 	if (sb.f_bavail != (fsblkcnt64_t)-1)
3269 		resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3270 	else
3271 		resp->resok.abytes = (size3)sb.f_bavail;
3272 	resp->resok.tfiles = (size3)sb.f_files;
3273 	resp->resok.ffiles = (size3)sb.f_ffree;
3274 	resp->resok.afiles = (size3)sb.f_favail;
3275 	resp->resok.invarsec = 0;
3276 	return;
3277 
3278 out:
3279 	if (curthread->t_flag & T_WOULDBLOCK) {
3280 		curthread->t_flag &= ~T_WOULDBLOCK;
3281 		resp->status = NFS3ERR_JUKEBOX;
3282 	} else
3283 		resp->status = puterrno3(error);
3284 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3285 }
3286 
3287 fhandle_t *
3288 rfs3_fsstat_getfh(FSSTAT3args *args)
3289 {
3290 
3291 	return ((fhandle_t *)&args->fsroot.fh3_u.nfs_fh3_i.fh3_i);
3292 }
3293 
3294 /* ARGSUSED */
3295 void
3296 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3297 	struct svc_req *req, cred_t *cr)
3298 {
3299 	vnode_t *vp;
3300 	struct vattr *vap;
3301 	struct vattr va;
3302 	uint32_t xfer_size;
3303 	ulong_t l = 0;
3304 	int error;
3305 
3306 	vp = nfs3_fhtovp(&args->fsroot, exi);
3307 	if (vp == NULL) {
3308 		if (curthread->t_flag & T_WOULDBLOCK) {
3309 			curthread->t_flag &= ~T_WOULDBLOCK;
3310 			resp->status = NFS3ERR_JUKEBOX;
3311 		} else
3312 			resp->status = NFS3ERR_STALE;
3313 		vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3314 		return;
3315 	}
3316 
3317 #ifdef DEBUG
3318 	if (rfs3_do_post_op_attr) {
3319 		va.va_mask = AT_ALL;
3320 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3321 	} else
3322 		vap = NULL;
3323 #else
3324 	va.va_mask = AT_ALL;
3325 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3326 #endif
3327 
3328 	resp->status = NFS3_OK;
3329 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3330 	xfer_size = rfs3_tsize(req);
3331 	resp->resok.rtmax = xfer_size;
3332 	resp->resok.rtpref = xfer_size;
3333 	resp->resok.rtmult = DEV_BSIZE;
3334 	resp->resok.wtmax = xfer_size;
3335 	resp->resok.wtpref = xfer_size;
3336 	resp->resok.wtmult = DEV_BSIZE;
3337 	resp->resok.dtpref = MAXBSIZE;
3338 
3339 	/*
3340 	 * Large file spec: want maxfilesize based on limit of
3341 	 * underlying filesystem.  We can guess 2^31-1 if need be.
3342 	 */
3343 	error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr);
3344 
3345 	VN_RELE(vp);
3346 
3347 	if (!error && l != 0 && l <= 64)
3348 		resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3349 	else
3350 		resp->resok.maxfilesize = MAXOFF32_T;
3351 
3352 	resp->resok.time_delta.seconds = 0;
3353 	resp->resok.time_delta.nseconds = 1000;
3354 	resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3355 	    FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3356 }
3357 
3358 fhandle_t *
3359 rfs3_fsinfo_getfh(FSINFO3args *args)
3360 {
3361 
3362 	return ((fhandle_t *)&args->fsroot.fh3_u.nfs_fh3_i.fh3_i);
3363 }
3364 
3365 /* ARGSUSED */
3366 void
3367 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
3368 	struct svc_req *req, cred_t *cr)
3369 {
3370 	int error;
3371 	vnode_t *vp;
3372 	struct vattr *vap;
3373 	struct vattr va;
3374 	ulong_t val;
3375 
3376 	vap = NULL;
3377 
3378 	vp = nfs3_fhtovp(&args->object, exi);
3379 	if (vp == NULL) {
3380 		error = ESTALE;
3381 		goto out;
3382 	}
3383 
3384 #ifdef DEBUG
3385 	if (rfs3_do_post_op_attr) {
3386 		va.va_mask = AT_ALL;
3387 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3388 	} else
3389 		vap = NULL;
3390 #else
3391 	va.va_mask = AT_ALL;
3392 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3393 #endif
3394 
3395 	error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr);
3396 	if (error)
3397 		goto out;
3398 	resp->resok.info.link_max = (uint32)val;
3399 
3400 	error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr);
3401 	if (error)
3402 		goto out;
3403 	resp->resok.info.name_max = (uint32)val;
3404 
3405 	error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr);
3406 	if (error)
3407 		goto out;
3408 	if (val == 1)
3409 		resp->resok.info.no_trunc = TRUE;
3410 	else
3411 		resp->resok.info.no_trunc = FALSE;
3412 
3413 	error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr);
3414 	if (error)
3415 		goto out;
3416 	if (val == 1)
3417 		resp->resok.info.chown_restricted = TRUE;
3418 	else
3419 		resp->resok.info.chown_restricted = FALSE;
3420 
3421 	VN_RELE(vp);
3422 
3423 	resp->status = NFS3_OK;
3424 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3425 	resp->resok.info.case_insensitive = FALSE;
3426 	resp->resok.info.case_preserving = TRUE;
3427 	return;
3428 
3429 out:
3430 	if (curthread->t_flag & T_WOULDBLOCK) {
3431 		curthread->t_flag &= ~T_WOULDBLOCK;
3432 		resp->status = NFS3ERR_JUKEBOX;
3433 	} else
3434 		resp->status = puterrno3(error);
3435 	if (vp != NULL)
3436 		VN_RELE(vp);
3437 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3438 }
3439 
3440 fhandle_t *
3441 rfs3_pathconf_getfh(PATHCONF3args *args)
3442 {
3443 
3444 	return ((fhandle_t *)&args->object.fh3_u.nfs_fh3_i.fh3_i);
3445 }
3446 
3447 void
3448 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
3449 	struct svc_req *req, cred_t *cr)
3450 {
3451 	int error;
3452 	vnode_t *vp;
3453 	struct vattr *bvap;
3454 	struct vattr bva;
3455 	struct vattr *avap;
3456 	struct vattr ava;
3457 
3458 	bvap = NULL;
3459 	avap = NULL;
3460 
3461 	vp = nfs3_fhtovp(&args->file, exi);
3462 	if (vp == NULL) {
3463 		error = ESTALE;
3464 		goto out;
3465 	}
3466 
3467 	bva.va_mask = AT_ALL;
3468 	error = VOP_GETATTR(vp, &bva, 0, cr);
3469 
3470 	/*
3471 	 * If we can't get the attributes, then we can't do the
3472 	 * right access checking.  So, we'll fail the request.
3473 	 */
3474 	if (error)
3475 		goto out;
3476 
3477 #ifdef DEBUG
3478 	if (rfs3_do_pre_op_attr)
3479 		bvap = &bva;
3480 	else
3481 		bvap = NULL;
3482 #else
3483 	bvap = &bva;
3484 #endif
3485 
3486 	if (rdonly(exi, req)) {
3487 		resp->status = NFS3ERR_ROFS;
3488 		goto out1;
3489 	}
3490 
3491 	if (vp->v_type != VREG) {
3492 		resp->status = NFS3ERR_INVAL;
3493 		goto out1;
3494 	}
3495 
3496 	if (crgetuid(cr) != bva.va_uid &&
3497 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr)))
3498 		goto out;
3499 
3500 	error = VOP_PUTPAGE(vp, args->offset, args->count, 0, cr);
3501 	if (!error)
3502 		error = VOP_FSYNC(vp, FNODSYNC, cr);
3503 
3504 #ifdef DEBUG
3505 	if (rfs3_do_post_op_attr) {
3506 		ava.va_mask = AT_ALL;
3507 		avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
3508 	} else
3509 		avap = NULL;
3510 #else
3511 	ava.va_mask = AT_ALL;
3512 	avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
3513 #endif
3514 
3515 	if (error)
3516 		goto out;
3517 
3518 	VN_RELE(vp);
3519 
3520 	resp->status = NFS3_OK;
3521 	vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
3522 	resp->resok.verf = write3verf;
3523 	return;
3524 
3525 out:
3526 	if (curthread->t_flag & T_WOULDBLOCK) {
3527 		curthread->t_flag &= ~T_WOULDBLOCK;
3528 		resp->status = NFS3ERR_JUKEBOX;
3529 	} else
3530 		resp->status = puterrno3(error);
3531 out1:
3532 	if (vp != NULL)
3533 		VN_RELE(vp);
3534 	vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
3535 }
3536 
3537 fhandle_t *
3538 rfs3_commit_getfh(COMMIT3args *args)
3539 {
3540 
3541 	return ((fhandle_t *)&args->file.fh3_u.nfs_fh3_i.fh3_i);
3542 }
3543 
3544 static int
3545 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
3546 {
3547 
3548 	vap->va_mask = 0;
3549 
3550 	if (sap->mode.set_it) {
3551 		vap->va_mode = (mode_t)sap->mode.mode;
3552 		vap->va_mask |= AT_MODE;
3553 	}
3554 	if (sap->uid.set_it) {
3555 		vap->va_uid = (uid_t)sap->uid.uid;
3556 		vap->va_mask |= AT_UID;
3557 	}
3558 	if (sap->gid.set_it) {
3559 		vap->va_gid = (gid_t)sap->gid.gid;
3560 		vap->va_mask |= AT_GID;
3561 	}
3562 	if (sap->size.set_it) {
3563 		if (sap->size.size > (size3)((u_longlong_t)-1))
3564 			return (EINVAL);
3565 		vap->va_size = sap->size.size;
3566 		vap->va_mask |= AT_SIZE;
3567 	}
3568 	if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
3569 #ifndef _LP64
3570 		/* check time validity */
3571 		if (!NFS3_TIME_OK(sap->atime.atime.seconds))
3572 			return (EOVERFLOW);
3573 #endif
3574 		/*
3575 		 * nfs protocol defines times as unsigned so don't extend sign,
3576 		 * unless sysadmin set nfs_allow_preepoch_time.
3577 		 */
3578 		NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
3579 			sap->atime.atime.seconds);
3580 		vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
3581 		vap->va_mask |= AT_ATIME;
3582 	} else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
3583 		gethrestime(&vap->va_atime);
3584 		vap->va_mask |= AT_ATIME;
3585 	}
3586 	if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
3587 #ifndef _LP64
3588 		/* check time validity */
3589 		if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
3590 			return (EOVERFLOW);
3591 #endif
3592 		/*
3593 		 * nfs protocol defines times as unsigned so don't extend sign,
3594 		 * unless sysadmin set nfs_allow_preepoch_time.
3595 		 */
3596 		NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
3597 			sap->mtime.mtime.seconds);
3598 		vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
3599 		vap->va_mask |= AT_MTIME;
3600 	} else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
3601 		gethrestime(&vap->va_mtime);
3602 		vap->va_mask |= AT_MTIME;
3603 	}
3604 
3605 	return (0);
3606 }
3607 
3608 static ftype3 vt_to_nf3[] = {
3609 	0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
3610 };
3611 
3612 static int
3613 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
3614 {
3615 
3616 	ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
3617 	/* Return error if time or size overflow */
3618 	if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
3619 		return (EOVERFLOW);
3620 	}
3621 	fap->type = vt_to_nf3[vap->va_type];
3622 	fap->mode = (mode3)(vap->va_mode & MODEMASK);
3623 	fap->nlink = (uint32)vap->va_nlink;
3624 	if (vap->va_uid == UID_NOBODY)
3625 		fap->uid = (uid3)NFS_UID_NOBODY;
3626 	else
3627 		fap->uid = (uid3)vap->va_uid;
3628 	if (vap->va_gid == GID_NOBODY)
3629 		fap->gid = (gid3)NFS_GID_NOBODY;
3630 	else
3631 		fap->gid = (gid3)vap->va_gid;
3632 	fap->size = (size3)vap->va_size;
3633 	fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
3634 	fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
3635 	fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
3636 	fap->fsid = (uint64)vap->va_fsid;
3637 	fap->fileid = (fileid3)vap->va_nodeid;
3638 	fap->atime.seconds = vap->va_atime.tv_sec;
3639 	fap->atime.nseconds = vap->va_atime.tv_nsec;
3640 	fap->mtime.seconds = vap->va_mtime.tv_sec;
3641 	fap->mtime.nseconds = vap->va_mtime.tv_nsec;
3642 	fap->ctime.seconds = vap->va_ctime.tv_sec;
3643 	fap->ctime.nseconds = vap->va_ctime.tv_nsec;
3644 	return (0);
3645 }
3646 
3647 static int
3648 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
3649 {
3650 
3651 	/* Return error if time or size overflow */
3652 	if (!  (NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
3653 		NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
3654 		NFS3_SIZE_OK(vap->va_size))) {
3655 		return (EOVERFLOW);
3656 	}
3657 	wccap->size = (size3)vap->va_size;
3658 	wccap->mtime.seconds = vap->va_mtime.tv_sec;
3659 	wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
3660 	wccap->ctime.seconds = vap->va_ctime.tv_sec;
3661 	wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
3662 	return (0);
3663 }
3664 
3665 static void
3666 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
3667 {
3668 
3669 	/* don't return attrs if time overflow */
3670 	if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
3671 		poap->attributes = TRUE;
3672 	} else
3673 		poap->attributes = FALSE;
3674 }
3675 
3676 void
3677 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
3678 {
3679 
3680 	/* don't return attrs if time overflow */
3681 	if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
3682 		poap->attributes = TRUE;
3683 	} else
3684 		poap->attributes = FALSE;
3685 }
3686 
3687 static void
3688 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
3689 {
3690 
3691 	vattr_to_pre_op_attr(bvap, &wccp->before);
3692 	vattr_to_post_op_attr(avap, &wccp->after);
3693 }
3694 
3695 void
3696 rfs3_srvrinit(void)
3697 {
3698 	struct rfs3_verf_overlay {
3699 		uint_t id; /* a "unique" identifier */
3700 		int ts; /* a unique timestamp */
3701 	} *verfp;
3702 	timestruc_t now;
3703 
3704 	/*
3705 	 * The following algorithm attempts to find a unique verifier
3706 	 * to be used as the write verifier returned from the server
3707 	 * to the client.  It is important that this verifier change
3708 	 * whenever the server reboots.  Of secondary importance, it
3709 	 * is important for the verifier to be unique between two
3710 	 * different servers.
3711 	 *
3712 	 * Thus, an attempt is made to use the system hostid and the
3713 	 * current time in seconds when the nfssrv kernel module is
3714 	 * loaded.  It is assumed that an NFS server will not be able
3715 	 * to boot and then to reboot in less than a second.  If the
3716 	 * hostid has not been set, then the current high resolution
3717 	 * time is used.  This will ensure different verifiers each
3718 	 * time the server reboots and minimize the chances that two
3719 	 * different servers will have the same verifier.
3720 	 */
3721 
3722 #ifndef	lint
3723 	/*
3724 	 * We ASSERT that this constant logic expression is
3725 	 * always true because in the past, it wasn't.
3726 	 */
3727 	ASSERT(sizeof (*verfp) <= sizeof (write3verf));
3728 #endif
3729 
3730 	gethrestime(&now);
3731 	verfp = (struct rfs3_verf_overlay *)&write3verf;
3732 	verfp->ts = (int)now.tv_sec;
3733 	verfp->id = (uint_t)nfs_atoi(hw_serial);
3734 
3735 	if (verfp->id == 0)
3736 		verfp->id = (uint_t)now.tv_nsec;
3737 
3738 }
3739 
3740 void
3741 rfs3_srvrfini(void)
3742 {
3743 	/* Nothing to do */
3744 }
3745