xref: /titanic_41/usr/src/uts/common/fs/nfs/nfs3_srv.c (revision 8eea8e29cc4374d1ee24c25a07f45af132db3499)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include <sys/param.h>
33 #include <sys/types.h>
34 #include <sys/systm.h>
35 #include <sys/cred.h>
36 #include <sys/buf.h>
37 #include <sys/vfs.h>
38 #include <sys/vnode.h>
39 #include <sys/uio.h>
40 #include <sys/errno.h>
41 #include <sys/sysmacros.h>
42 #include <sys/statvfs.h>
43 #include <sys/kmem.h>
44 #include <sys/dirent.h>
45 #include <sys/cmn_err.h>
46 #include <sys/debug.h>
47 #include <sys/systeminfo.h>
48 #include <sys/flock.h>
49 #include <sys/nbmlock.h>
50 #include <sys/policy.h>
51 
52 #include <rpc/types.h>
53 #include <rpc/auth.h>
54 #include <rpc/svc.h>
55 
56 #include <nfs/nfs.h>
57 #include <nfs/export.h>
58 
59 #include <sys/strsubr.h>
60 
61 /*
62  * These are the interface routines for the server side of the
63  * Network File System.  See the NFS version 3 protocol specification
64  * for a description of this interface.
65  */
66 
67 #ifdef DEBUG
68 int rfs3_do_pre_op_attr = 1;
69 int rfs3_do_post_op_attr = 1;
70 int rfs3_do_post_op_fh3 = 1;
71 #endif
72 
73 static writeverf3 write3verf;
74 
75 static int	sattr3_to_vattr(sattr3 *, struct vattr *);
76 static int	vattr_to_fattr3(struct vattr *, fattr3 *);
77 static int	vattr_to_wcc_attr(struct vattr *, wcc_attr *);
78 static void	vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
79 static void	vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
80 
81 /* ARGSUSED */
82 void
83 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
84 	struct svc_req *req, cred_t *cr)
85 {
86 	int error;
87 	vnode_t *vp;
88 	struct vattr va;
89 
90 	vp = nfs3_fhtovp(&args->object, exi);
91 	if (vp == NULL) {
92 		error = ESTALE;
93 		goto out;
94 	}
95 
96 	va.va_mask = AT_ALL;
97 	error = rfs4_delegated_getattr(vp, &va, 0, cr);
98 
99 	VN_RELE(vp);
100 
101 	if (!error) {
102 		/* overflow error if time or size is out of range */
103 		error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
104 		if (error)
105 			goto out;
106 		resp->status = NFS3_OK;
107 		return;
108 	}
109 
110 out:
111 	if (curthread->t_flag & T_WOULDBLOCK) {
112 		curthread->t_flag &= ~T_WOULDBLOCK;
113 		resp->status = NFS3ERR_JUKEBOX;
114 	} else
115 		resp->status = puterrno3(error);
116 }
117 
118 fhandle_t *
119 rfs3_getattr_getfh(GETATTR3args *args)
120 {
121 
122 	return ((fhandle_t *)&args->object.fh3_u.nfs_fh3_i.fh3_i);
123 }
124 
125 void
126 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
127 	struct svc_req *req, cred_t *cr)
128 {
129 	int error;
130 	vnode_t *vp;
131 	struct vattr *bvap;
132 	struct vattr bva;
133 	struct vattr *avap;
134 	struct vattr ava;
135 	int flag;
136 	int in_crit = 0;
137 	struct flock64 bf;
138 
139 	bvap = NULL;
140 	avap = NULL;
141 
142 	vp = nfs3_fhtovp(&args->object, exi);
143 	if (vp == NULL) {
144 		error = ESTALE;
145 		goto out;
146 	}
147 
148 	error = sattr3_to_vattr(&args->new_attributes, &ava);
149 	if (error)
150 		goto out;
151 
152 	/*
153 	 * We need to specially handle size changes because of
154 	 * possible conflicting NBMAND locks. Get into critical
155 	 * region before VOP_GETATTR, so the size attribute is
156 	 * valid when checking conflicts.
157 	 *
158 	 * Also, check to see if the v4 side of the server has
159 	 * delegated this file.  If so, then we return JUKEBOX to
160 	 * allow the client to retrasmit its request.
161 	 */
162 	if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
163 		if (rfs4_check_delegated(FWRITE, vp, TRUE)) {
164 			resp->status = NFS3ERR_JUKEBOX;
165 			goto out1;
166 		}
167 		if (nbl_need_check(vp)) {
168 			nbl_start_crit(vp, RW_READER);
169 			in_crit = 1;
170 		}
171 	}
172 
173 	bva.va_mask = AT_ALL;
174 	error = rfs4_delegated_getattr(vp, &bva, 0, cr);
175 
176 	/*
177 	 * If we can't get the attributes, then we can't do the
178 	 * right access checking.  So, we'll fail the request.
179 	 */
180 	if (error)
181 		goto out;
182 
183 #ifdef DEBUG
184 	if (rfs3_do_pre_op_attr)
185 		bvap = &bva;
186 #else
187 	bvap = &bva;
188 #endif
189 
190 	if (rdonly(exi, req) || vn_is_readonly(vp)) {
191 		resp->status = NFS3ERR_ROFS;
192 		goto out1;
193 	}
194 
195 	if (args->guard.check &&
196 	    (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
197 	    args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
198 		resp->status = NFS3ERR_NOT_SYNC;
199 		goto out1;
200 	}
201 
202 	if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
203 		flag = ATTR_UTIME;
204 	else
205 		flag = 0;
206 
207 	/*
208 	 * If the filesystem is exported with nosuid, then mask off
209 	 * the setuid and setgid bits.
210 	 */
211 	if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
212 	    (exi->exi_export.ex_flags & EX_NOSUID))
213 		ava.va_mode &= ~(VSUID | VSGID);
214 
215 	/*
216 	 * We need to specially handle size changes because it is
217 	 * possible for the client to create a file with modes
218 	 * which indicate read-only, but with the file opened for
219 	 * writing.  If the client then tries to set the size of
220 	 * the file, then the normal access checking done in
221 	 * VOP_SETATTR would prevent the client from doing so,
222 	 * although it should be legal for it to do so.  To get
223 	 * around this, we do the access checking for ourselves
224 	 * and then use VOP_SPACE which doesn't do the access
225 	 * checking which VOP_SETATTR does. VOP_SPACE can only
226 	 * operate on VREG files, let VOP_SETATTR handle the other
227 	 * extremely rare cases.
228 	 * Also the client should not be allowed to change the
229 	 * size of the file if there is a conflicting non-blocking
230 	 * mandatory lock in the region the change.
231 	 */
232 	if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
233 		if (in_crit) {
234 			u_offset_t offset;
235 			ssize_t length;
236 
237 			if (ava.va_size < bva.va_size) {
238 				offset = ava.va_size;
239 				length = bva.va_size - ava.va_size;
240 			} else {
241 				offset = bva.va_size;
242 				length = ava.va_size - bva.va_size;
243 			}
244 			if (nbl_conflict(vp, NBL_WRITE, offset, length, 0)) {
245 				error = EACCES;
246 				goto out;
247 			}
248 		}
249 
250 		if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
251 			ava.va_mask &= ~AT_SIZE;
252 			bf.l_type = F_WRLCK;
253 			bf.l_whence = 0;
254 			bf.l_start = (off64_t)ava.va_size;
255 			bf.l_len = 0;
256 			bf.l_sysid = 0;
257 			bf.l_pid = 0;
258 			error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
259 			    (offset_t)ava.va_size, cr, NULL);
260 		}
261 	}
262 
263 	if (!error && ava.va_mask)
264 		error = VOP_SETATTR(vp, &ava, flag, cr, NULL);
265 
266 #ifdef DEBUG
267 	if (rfs3_do_post_op_attr) {
268 		ava.va_mask = AT_ALL;
269 		avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
270 	} else
271 		avap = NULL;
272 #else
273 	ava.va_mask = AT_ALL;
274 	avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
275 #endif
276 
277 	/*
278 	 * Force modified metadata out to stable storage.
279 	 */
280 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
281 
282 	if (error)
283 		goto out;
284 
285 	if (in_crit)
286 		nbl_end_crit(vp);
287 	VN_RELE(vp);
288 
289 	resp->status = NFS3_OK;
290 	vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
291 	return;
292 
293 out:
294 	if (curthread->t_flag & T_WOULDBLOCK) {
295 		curthread->t_flag &= ~T_WOULDBLOCK;
296 		resp->status = NFS3ERR_JUKEBOX;
297 	} else
298 		resp->status = puterrno3(error);
299 out1:
300 	if (vp != NULL) {
301 		if (in_crit)
302 			nbl_end_crit(vp);
303 		VN_RELE(vp);
304 	}
305 	vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
306 }
307 
308 fhandle_t *
309 rfs3_setattr_getfh(SETATTR3args *args)
310 {
311 
312 	return ((fhandle_t *)&args->object.fh3_u.nfs_fh3_i.fh3_i);
313 }
314 
315 /* ARGSUSED */
316 void
317 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
318 	struct svc_req *req, cred_t *cr)
319 {
320 	int error;
321 	vnode_t *vp;
322 	vnode_t *dvp;
323 	struct vattr *vap;
324 	struct vattr va;
325 	struct vattr *dvap;
326 	struct vattr dva;
327 	nfs_fh3 *fhp;
328 	struct sec_ol sec = {0, 0};
329 	bool_t publicfh_flag = FALSE, auth_weak = FALSE;
330 
331 	dvap = NULL;
332 
333 	/*
334 	 * Allow lookups from the root - the default
335 	 * location of the public filehandle.
336 	 */
337 	if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
338 		dvp = rootdir;
339 		VN_HOLD(dvp);
340 	} else {
341 		dvp = nfs3_fhtovp(args->what.dirp, exi);
342 		if (dvp == NULL) {
343 			error = ESTALE;
344 			goto out;
345 		}
346 	}
347 
348 #ifdef DEBUG
349 	if (rfs3_do_pre_op_attr) {
350 		dva.va_mask = AT_ALL;
351 		dvap = VOP_GETATTR(dvp, &dva, 0, cr) ? NULL : &dva;
352 	}
353 #else
354 	dva.va_mask = AT_ALL;
355 	dvap = VOP_GETATTR(dvp, &dva, 0, cr) ? NULL : &dva;
356 #endif
357 
358 	if (args->what.name == nfs3nametoolong) {
359 		resp->status = NFS3ERR_NAMETOOLONG;
360 		goto out1;
361 	}
362 
363 	if (args->what.name == NULL || *(args->what.name) == '\0') {
364 		resp->status = NFS3ERR_ACCES;
365 		goto out1;
366 	}
367 
368 	fhp = args->what.dirp;
369 	if (strcmp(args->what.name, "..") == 0 &&
370 	    EQFID(&exi->exi_fid, (fid_t *)&fhp->fh3_len)) {
371 		resp->status = NFS3ERR_NOENT;
372 		goto out1;
373 	}
374 
375 	/*
376 	 * If the public filehandle is used then allow
377 	 * a multi-component lookup
378 	 */
379 	if (PUBLIC_FH3(args->what.dirp)) {
380 		publicfh_flag = TRUE;
381 		error = rfs_publicfh_mclookup(args->what.name, dvp, cr, &vp,
382 					&exi, &sec);
383 		if (error && exi != NULL)
384 			exi_rele(exi);  /* See the comment below */
385 	} else {
386 		error = VOP_LOOKUP(dvp, args->what.name, &vp,
387 				NULL, 0, NULL, cr);
388 	}
389 
390 #ifdef DEBUG
391 	if (rfs3_do_post_op_attr) {
392 		dva.va_mask = AT_ALL;
393 		dvap = VOP_GETATTR(dvp, &dva, 0, cr) ? NULL : &dva;
394 	} else
395 		dvap = NULL;
396 #else
397 	dva.va_mask = AT_ALL;
398 	dvap = VOP_GETATTR(dvp, &dva, 0, cr) ? NULL : &dva;
399 #endif
400 
401 	if (error)
402 		goto out;
403 
404 	VN_SETPATH(rootdir, dvp, vp, args->what.name, strlen(args->what.name));
405 
406 	if (sec.sec_flags & SEC_QUERY) {
407 		error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
408 	} else {
409 		error = makefh3(&resp->resok.object, vp, exi);
410 		if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
411 			auth_weak = TRUE;
412 	}
413 
414 	if (error) {
415 		VN_RELE(vp);
416 		goto out;
417 	}
418 
419 	/*
420 	 * If publicfh_flag is true then we have called rfs_publicfh_mclookup
421 	 * and have obtained a new exportinfo in exi which needs to be
422 	 * released. Note the the original exportinfo pointed to by exi
423 	 * will be released by the caller, common_dispatch.
424 	 */
425 	if (publicfh_flag)
426 		exi_rele(exi);
427 
428 	VN_RELE(dvp);
429 
430 #ifdef DEBUG
431 	if (rfs3_do_post_op_attr) {
432 		va.va_mask = AT_ALL;
433 		vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
434 	} else
435 		vap = NULL;
436 #else
437 	va.va_mask = AT_ALL;
438 	vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
439 #endif
440 
441 	VN_RELE(vp);
442 
443 	resp->status = NFS3_OK;
444 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
445 	vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
446 
447 	/*
448 	 * If it's public fh, no 0x81, and client's flavor is
449 	 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
450 	 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
451 	 */
452 	if (auth_weak)
453 		resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
454 
455 	return;
456 
457 out:
458 	if (curthread->t_flag & T_WOULDBLOCK) {
459 		curthread->t_flag &= ~T_WOULDBLOCK;
460 		resp->status = NFS3ERR_JUKEBOX;
461 	} else
462 		resp->status = puterrno3(error);
463 out1:
464 	if (dvp != NULL)
465 		VN_RELE(dvp);
466 	vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
467 
468 }
469 
470 fhandle_t *
471 rfs3_lookup_getfh(LOOKUP3args *args)
472 {
473 
474 	return ((fhandle_t *)&args->what.dirp->fh3_u.nfs_fh3_i.fh3_i);
475 }
476 
477 /* ARGSUSED */
478 void
479 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
480 	struct svc_req *req, cred_t *cr)
481 {
482 	int error;
483 	vnode_t *vp;
484 	struct vattr *vap;
485 	struct vattr va;
486 	int checkwriteperm;
487 
488 	vap = NULL;
489 
490 	vp = nfs3_fhtovp(&args->object, exi);
491 	if (vp == NULL) {
492 		error = ESTALE;
493 		goto out;
494 	}
495 
496 	/*
497 	 * If the file system is exported read only, it is not appropriate
498 	 * to check write permissions for regular files and directories.
499 	 * Special files are interpreted by the client, so the underlying
500 	 * permissions are sent back to the client for interpretation.
501 	 */
502 	if (rdonly(exi, req) && (vp->v_type == VREG || vp->v_type == VDIR))
503 		checkwriteperm = 0;
504 	else
505 		checkwriteperm = 1;
506 
507 	/*
508 	 * We need the mode so that we can correctly determine access
509 	 * permissions relative to a mandatory lock file.  Access to
510 	 * mandatory lock files is denied on the server, so it might
511 	 * as well be reflected to the server during the open.
512 	 */
513 	va.va_mask = AT_MODE;
514 	error = VOP_GETATTR(vp, &va, 0, cr);
515 	if (error)
516 		goto out;
517 
518 #ifdef DEBUG
519 	if (rfs3_do_post_op_attr)
520 		vap = &va;
521 #else
522 	vap = &va;
523 #endif
524 
525 	resp->resok.access = 0;
526 
527 	if (args->access & ACCESS3_READ) {
528 		error = VOP_ACCESS(vp, VREAD, 0, cr);
529 		if (error) {
530 			if (curthread->t_flag & T_WOULDBLOCK)
531 				goto out;
532 		} else if (!MANDLOCK(vp, va.va_mode))
533 			resp->resok.access |= ACCESS3_READ;
534 	}
535 	if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
536 		error = VOP_ACCESS(vp, VEXEC, 0, cr);
537 		if (error) {
538 			if (curthread->t_flag & T_WOULDBLOCK)
539 				goto out;
540 		} else
541 			resp->resok.access |= ACCESS3_LOOKUP;
542 	}
543 	if (checkwriteperm &&
544 	    (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
545 		error = VOP_ACCESS(vp, VWRITE, 0, cr);
546 		if (error) {
547 			if (curthread->t_flag & T_WOULDBLOCK)
548 				goto out;
549 		} else if (!MANDLOCK(vp, va.va_mode)) {
550 			resp->resok.access |=
551 			    (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
552 		}
553 	}
554 	if (checkwriteperm &&
555 	    (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
556 		error = VOP_ACCESS(vp, VWRITE, 0, cr);
557 		if (error) {
558 			if (curthread->t_flag & T_WOULDBLOCK)
559 				goto out;
560 		} else
561 			resp->resok.access |= ACCESS3_DELETE;
562 	}
563 	if (args->access & ACCESS3_EXECUTE) {
564 		error = VOP_ACCESS(vp, VEXEC, 0, cr);
565 		if (error) {
566 			if (curthread->t_flag & T_WOULDBLOCK)
567 				goto out;
568 		} else if (!MANDLOCK(vp, va.va_mode))
569 			resp->resok.access |= ACCESS3_EXECUTE;
570 	}
571 
572 #ifdef DEBUG
573 	if (rfs3_do_post_op_attr) {
574 		va.va_mask = AT_ALL;
575 		vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
576 	} else
577 		vap = NULL;
578 #else
579 	va.va_mask = AT_ALL;
580 	vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
581 #endif
582 
583 	VN_RELE(vp);
584 
585 	resp->status = NFS3_OK;
586 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
587 	return;
588 
589 out:
590 	if (curthread->t_flag & T_WOULDBLOCK) {
591 		curthread->t_flag &= ~T_WOULDBLOCK;
592 		resp->status = NFS3ERR_JUKEBOX;
593 	} else
594 		resp->status = puterrno3(error);
595 	if (vp != NULL)
596 		VN_RELE(vp);
597 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
598 }
599 
600 fhandle_t *
601 rfs3_access_getfh(ACCESS3args *args)
602 {
603 
604 	return ((fhandle_t *)&args->object.fh3_u.nfs_fh3_i.fh3_i);
605 }
606 
607 /* ARGSUSED */
608 void
609 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
610 	struct svc_req *req, cred_t *cr)
611 {
612 	int error;
613 	vnode_t *vp;
614 	struct vattr *vap;
615 	struct vattr va;
616 	struct iovec iov;
617 	struct uio uio;
618 	char *data;
619 
620 	vap = NULL;
621 
622 	vp = nfs3_fhtovp(&args->symlink, exi);
623 	if (vp == NULL) {
624 		error = ESTALE;
625 		goto out;
626 	}
627 
628 	va.va_mask = AT_ALL;
629 	error = VOP_GETATTR(vp, &va, 0, cr);
630 	if (error)
631 		goto out;
632 
633 #ifdef DEBUG
634 	if (rfs3_do_post_op_attr)
635 		vap = &va;
636 #else
637 	vap = &va;
638 #endif
639 
640 	if (vp->v_type != VLNK) {
641 		resp->status = NFS3ERR_INVAL;
642 		goto out1;
643 	}
644 
645 	if (MANDLOCK(vp, va.va_mode)) {
646 		resp->status = NFS3ERR_ACCES;
647 		goto out1;
648 	}
649 
650 	data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
651 
652 	iov.iov_base = data;
653 	iov.iov_len = MAXPATHLEN;
654 	uio.uio_iov = &iov;
655 	uio.uio_iovcnt = 1;
656 	uio.uio_segflg = UIO_SYSSPACE;
657 	uio.uio_extflg = UIO_COPY_CACHED;
658 	uio.uio_loffset = 0;
659 	uio.uio_resid = MAXPATHLEN;
660 
661 	error = VOP_READLINK(vp, &uio, cr);
662 
663 #ifdef DEBUG
664 	if (rfs3_do_post_op_attr) {
665 		va.va_mask = AT_ALL;
666 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
667 	} else
668 		vap = NULL;
669 #else
670 	va.va_mask = AT_ALL;
671 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
672 #endif
673 
674 #if 0 /* notyet */
675 	/*
676 	 * Don't do this.  It causes local disk writes when just
677 	 * reading the file and the overhead is deemed larger
678 	 * than the benefit.
679 	 */
680 	/*
681 	 * Force modified metadata out to stable storage.
682 	 */
683 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
684 #endif
685 
686 	if (error) {
687 		kmem_free(data, MAXPATHLEN + 1);
688 		goto out;
689 	}
690 
691 	VN_RELE(vp);
692 
693 	resp->status = NFS3_OK;
694 	vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
695 	resp->resok.data = data;
696 	*(data + MAXPATHLEN - uio.uio_resid) = '\0';
697 	return;
698 
699 out:
700 	if (curthread->t_flag & T_WOULDBLOCK) {
701 		curthread->t_flag &= ~T_WOULDBLOCK;
702 		resp->status = NFS3ERR_JUKEBOX;
703 	} else
704 		resp->status = puterrno3(error);
705 out1:
706 	if (vp != NULL)
707 		VN_RELE(vp);
708 	vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
709 }
710 
711 fhandle_t *
712 rfs3_readlink_getfh(READLINK3args *args)
713 {
714 
715 	return ((fhandle_t *)&args->symlink.fh3_u.nfs_fh3_i.fh3_i);
716 }
717 
718 void
719 rfs3_readlink_free(READLINK3res *resp)
720 {
721 
722 	if (resp->status == NFS3_OK)
723 		kmem_free(resp->resok.data, MAXPATHLEN + 1);
724 }
725 
726 /* ARGSUSED */
727 void
728 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
729 	struct svc_req *req, cred_t *cr)
730 {
731 	int error;
732 	vnode_t *vp;
733 	struct vattr *vap;
734 	struct vattr va;
735 	struct iovec iov;
736 	struct uio uio;
737 	u_offset_t offset;
738 	mblk_t *mp;
739 	int alloc_err = 0;
740 	int in_crit = 0;
741 	int need_rwunlock = 0;
742 
743 	vap = NULL;
744 
745 	vp = nfs3_fhtovp(&args->file, exi);
746 	if (vp == NULL) {
747 		error = ESTALE;
748 		goto out;
749 	}
750 
751 	/*
752 	 * Check to see if the v4 side of the server has delegated
753 	 * this file.  If so, then we return JUKEBOX to allow the
754 	 * client to retrasmit its request.
755 	 */
756 	if (rfs4_check_delegated(FREAD, vp, FALSE)) {
757 		resp->status = NFS3ERR_JUKEBOX;
758 		goto out1;
759 	}
760 
761 	/*
762 	 * Enter the critical region before calling VOP_RWLOCK
763 	 * to avoid a deadlock with write requests.
764 	 */
765 	if (nbl_need_check(vp)) {
766 		nbl_start_crit(vp, RW_READER);
767 		in_crit = 1;
768 		if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0)) {
769 			error = EACCES;
770 			goto out;
771 		}
772 	}
773 
774 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
775 	need_rwunlock = 1;
776 
777 	va.va_mask = AT_ALL;
778 	error = VOP_GETATTR(vp, &va, 0, cr);
779 
780 	/*
781 	 * If we can't get the attributes, then we can't do the
782 	 * right access checking.  So, we'll fail the request.
783 	 */
784 	if (error)
785 		goto out;
786 
787 #ifdef DEBUG
788 	if (rfs3_do_post_op_attr)
789 		vap = &va;
790 #else
791 	vap = &va;
792 #endif
793 
794 	if (vp->v_type != VREG) {
795 		resp->status = NFS3ERR_INVAL;
796 		goto out1;
797 	}
798 
799 	if (crgetuid(cr) != va.va_uid) {
800 		error = VOP_ACCESS(vp, VREAD, 0, cr);
801 		if (error) {
802 			if (curthread->t_flag & T_WOULDBLOCK)
803 				goto out;
804 			error = VOP_ACCESS(vp, VEXEC, 0, cr);
805 			if (error)
806 				goto out;
807 		}
808 	}
809 
810 	if (MANDLOCK(vp, va.va_mode)) {
811 		resp->status = NFS3ERR_ACCES;
812 		goto out1;
813 	}
814 
815 	offset = args->offset;
816 	if (offset >= va.va_size) {
817 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
818 		if (in_crit)
819 			nbl_end_crit(vp);
820 		VN_RELE(vp);
821 		resp->status = NFS3_OK;
822 		vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
823 		resp->resok.count = 0;
824 		resp->resok.eof = TRUE;
825 		resp->resok.data.data_len = 0;
826 		resp->resok.data.data_val = NULL;
827 		resp->resok.data.mp = NULL;
828 		return;
829 	}
830 
831 	if (args->count == 0) {
832 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
833 		if (in_crit)
834 			nbl_end_crit(vp);
835 		VN_RELE(vp);
836 		resp->status = NFS3_OK;
837 		vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
838 		resp->resok.count = 0;
839 		resp->resok.eof = FALSE;
840 		resp->resok.data.data_len = 0;
841 		resp->resok.data.data_val = NULL;
842 		resp->resok.data.mp = NULL;
843 		return;
844 	}
845 
846 	/*
847 	 * do not allocate memory more the max. allowed
848 	 * transfer size
849 	 */
850 	if (args->count > rfs3_tsize(req))
851 		args->count = rfs3_tsize(req);
852 
853 	/*
854 	 * mp will contain the data to be sent out in the read reply.
855 	 * This will be freed after the reply has been sent out (by the
856 	 * driver).
857 	 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
858 	 * that the call to xdrmblk_putmblk() never fails.
859 	 */
860 	mp = allocb_wait(RNDUP(args->count), BPRI_MED, STR_NOSIG, &alloc_err);
861 	ASSERT(mp != NULL);
862 	ASSERT(alloc_err == 0);
863 
864 	iov.iov_base = (caddr_t)mp->b_datap->db_base;
865 	iov.iov_len = args->count;
866 	uio.uio_iov = &iov;
867 	uio.uio_iovcnt = 1;
868 	uio.uio_segflg = UIO_SYSSPACE;
869 	uio.uio_extflg = UIO_COPY_CACHED;
870 	uio.uio_loffset = args->offset;
871 	uio.uio_resid = args->count;
872 
873 	error = VOP_READ(vp, &uio, 0, cr, NULL);
874 
875 	if (error) {
876 		freeb(mp);
877 		goto out;
878 	}
879 
880 	va.va_mask = AT_ALL;
881 	error = VOP_GETATTR(vp, &va, 0, cr);
882 
883 #ifdef DEBUG
884 	if (rfs3_do_post_op_attr) {
885 		if (error)
886 			vap = NULL;
887 		else
888 			vap = &va;
889 	} else
890 		vap = NULL;
891 #else
892 	if (error)
893 		vap = NULL;
894 	else
895 		vap = &va;
896 #endif
897 
898 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
899 
900 #if 0 /* notyet */
901 	/*
902 	 * Don't do this.  It causes local disk writes when just
903 	 * reading the file and the overhead is deemed larger
904 	 * than the benefit.
905 	 */
906 	/*
907 	 * Force modified metadata out to stable storage.
908 	 */
909 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
910 #endif
911 
912 	if (in_crit)
913 		nbl_end_crit(vp);
914 	VN_RELE(vp);
915 
916 	resp->status = NFS3_OK;
917 	vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
918 	resp->resok.count = args->count - uio.uio_resid;
919 	if (!error && offset + resp->resok.count == va.va_size)
920 		resp->resok.eof = TRUE;
921 	else
922 		resp->resok.eof = FALSE;
923 	resp->resok.data.data_len = resp->resok.count;
924 	resp->resok.data.data_val = (char *)mp->b_datap->db_base;
925 
926 	resp->resok.data.mp = mp;
927 
928 	resp->resok.size = (uint_t)args->count;
929 	return;
930 
931 out:
932 	if (curthread->t_flag & T_WOULDBLOCK) {
933 		curthread->t_flag &= ~T_WOULDBLOCK;
934 		resp->status = NFS3ERR_JUKEBOX;
935 	} else
936 		resp->status = puterrno3(error);
937 out1:
938 	if (vp != NULL) {
939 		if (need_rwunlock)
940 			VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
941 		if (in_crit)
942 			nbl_end_crit(vp);
943 		VN_RELE(vp);
944 	}
945 	vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
946 }
947 
948 void
949 rfs3_read_free(READ3res *resp)
950 {
951 	mblk_t *mp;
952 
953 	if (resp->status == NFS3_OK) {
954 		mp = resp->resok.data.mp;
955 		if (mp != NULL)
956 			freeb(mp);
957 	}
958 }
959 
960 fhandle_t *
961 rfs3_read_getfh(READ3args *args)
962 {
963 
964 	return ((fhandle_t *)&args->file.fh3_u.nfs_fh3_i.fh3_i);
965 }
966 
967 #define	MAX_IOVECS	12
968 
969 #ifdef DEBUG
970 static int rfs3_write_hits = 0;
971 static int rfs3_write_misses = 0;
972 #endif
973 
974 void
975 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
976 	struct svc_req *req, cred_t *cr)
977 {
978 	int error;
979 	vnode_t *vp;
980 	struct vattr *bvap = NULL;
981 	struct vattr bva;
982 	struct vattr *avap = NULL;
983 	struct vattr ava;
984 	u_offset_t rlimit;
985 	struct uio uio;
986 	struct iovec iov[MAX_IOVECS];
987 	mblk_t *m;
988 	struct iovec *iovp;
989 	int iovcnt;
990 	int ioflag;
991 	cred_t *savecred;
992 	int in_crit = 0;
993 	int rwlock_ret = -1;
994 
995 	vp = nfs3_fhtovp(&args->file, exi);
996 	if (vp == NULL) {
997 		error = ESTALE;
998 		goto out;
999 	}
1000 
1001 	/*
1002 	 * Check to see if the v4 side of the server has delegated
1003 	 * this file.  If so, then we return JUKEBOX to allow the
1004 	 * client to retrasmit its request.
1005 	 */
1006 	if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1007 		resp->status = NFS3ERR_JUKEBOX;
1008 		goto out1;
1009 	}
1010 
1011 	/*
1012 	 * We have to enter the critical region before calling VOP_RWLOCK
1013 	 * to avoid a deadlock with ufs.
1014 	 */
1015 	if (nbl_need_check(vp)) {
1016 		nbl_start_crit(vp, RW_READER);
1017 		in_crit = 1;
1018 		if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0)) {
1019 			error = EACCES;
1020 			goto out;
1021 		}
1022 	}
1023 
1024 	rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1025 
1026 	bva.va_mask = AT_ALL;
1027 	error = VOP_GETATTR(vp, &bva, 0, cr);
1028 
1029 	/*
1030 	 * If we can't get the attributes, then we can't do the
1031 	 * right access checking.  So, we'll fail the request.
1032 	 */
1033 	if (error)
1034 		goto out;
1035 
1036 	bvap = &bva;
1037 #ifdef DEBUG
1038 	if (!rfs3_do_pre_op_attr)
1039 		bvap = NULL;
1040 #endif
1041 	avap = bvap;
1042 
1043 	if (args->count != args->data.data_len) {
1044 		resp->status = NFS3ERR_INVAL;
1045 		goto out1;
1046 	}
1047 
1048 	if (rdonly(exi, req)) {
1049 		resp->status = NFS3ERR_ROFS;
1050 		goto out1;
1051 	}
1052 
1053 	if (vp->v_type != VREG) {
1054 		resp->status = NFS3ERR_INVAL;
1055 		goto out1;
1056 	}
1057 
1058 	if (crgetuid(cr) != bva.va_uid &&
1059 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr)))
1060 		goto out;
1061 
1062 	if (MANDLOCK(vp, bva.va_mode)) {
1063 		resp->status = NFS3ERR_ACCES;
1064 		goto out1;
1065 	}
1066 
1067 	if (args->count == 0) {
1068 		VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1069 		VN_RELE(vp);
1070 		resp->status = NFS3_OK;
1071 		vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1072 		resp->resok.count = 0;
1073 		resp->resok.committed = args->stable;
1074 		resp->resok.verf = write3verf;
1075 		return;
1076 	}
1077 
1078 	if (args->mblk != NULL) {
1079 		iovcnt = 0;
1080 		for (m = args->mblk; m != NULL; m = m->b_cont)
1081 			iovcnt++;
1082 		if (iovcnt <= MAX_IOVECS) {
1083 #ifdef DEBUG
1084 			rfs3_write_hits++;
1085 #endif
1086 			iovp = iov;
1087 		} else {
1088 #ifdef DEBUG
1089 			rfs3_write_misses++;
1090 #endif
1091 			iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1092 		}
1093 		mblk_to_iov(args->mblk, iovcnt, iovp);
1094 	} else {
1095 		iovcnt = 1;
1096 		iovp = iov;
1097 		iovp->iov_base = args->data.data_val;
1098 		iovp->iov_len = args->count;
1099 	}
1100 
1101 	uio.uio_iov = iovp;
1102 	uio.uio_iovcnt = iovcnt;
1103 
1104 	uio.uio_segflg = UIO_SYSSPACE;
1105 	uio.uio_extflg = UIO_COPY_DEFAULT;
1106 	uio.uio_loffset = args->offset;
1107 	uio.uio_resid = args->count;
1108 	uio.uio_llimit = curproc->p_fsz_ctl;
1109 	rlimit = uio.uio_llimit - args->offset;
1110 	if (rlimit < (u_offset_t)uio.uio_resid)
1111 		uio.uio_resid = (int)rlimit;
1112 
1113 	if (args->stable == UNSTABLE)
1114 		ioflag = 0;
1115 	else if (args->stable == FILE_SYNC)
1116 		ioflag = FSYNC;
1117 	else if (args->stable == DATA_SYNC)
1118 		ioflag = FDSYNC;
1119 	else {
1120 		if (iovp != iov)
1121 			kmem_free(iovp, sizeof (*iovp) * iovcnt);
1122 		resp->status = NFS3ERR_INVAL;
1123 		goto out1;
1124 	}
1125 
1126 	/*
1127 	 * We're changing creds because VM may fault and we need
1128 	 * the cred of the current thread to be used if quota
1129 	 * checking is enabled.
1130 	 */
1131 	savecred = curthread->t_cred;
1132 	curthread->t_cred = cr;
1133 	error = VOP_WRITE(vp, &uio, ioflag, cr, NULL);
1134 	curthread->t_cred = savecred;
1135 
1136 	if (iovp != iov)
1137 		kmem_free(iovp, sizeof (*iovp) * iovcnt);
1138 
1139 	ava.va_mask = AT_ALL;
1140 	avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
1141 
1142 #ifdef DEBUG
1143 	if (!rfs3_do_post_op_attr)
1144 		avap = NULL;
1145 #endif
1146 
1147 	if (error)
1148 		goto out;
1149 
1150 	VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1151 	if (in_crit)
1152 		nbl_end_crit(vp);
1153 	VN_RELE(vp);
1154 
1155 	/*
1156 	 * If we were unable to get the V_WRITELOCK_TRUE, then we
1157 	 * may not have accurate after attrs, so check if
1158 	 * we have both attributes, they have a non-zero va_seq, and
1159 	 * va_seq has changed by exactly one,
1160 	 * if not, turn off the before attr.
1161 	 */
1162 	if (rwlock_ret != V_WRITELOCK_TRUE) {
1163 		if (bvap == NULL || avap == NULL ||
1164 				bvap->va_seq == 0 || avap->va_seq == 0 ||
1165 				avap->va_seq != (bvap->va_seq + 1)) {
1166 			bvap = NULL;
1167 		}
1168 	}
1169 
1170 	resp->status = NFS3_OK;
1171 	vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1172 	resp->resok.count = args->count - uio.uio_resid;
1173 	resp->resok.committed = args->stable;
1174 	resp->resok.verf = write3verf;
1175 	return;
1176 
1177 out:
1178 	if (curthread->t_flag & T_WOULDBLOCK) {
1179 		curthread->t_flag &= ~T_WOULDBLOCK;
1180 		resp->status = NFS3ERR_JUKEBOX;
1181 	} else
1182 		resp->status = puterrno3(error);
1183 out1:
1184 	if (vp != NULL) {
1185 		if (rwlock_ret != -1)
1186 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1187 		if (in_crit)
1188 			nbl_end_crit(vp);
1189 		VN_RELE(vp);
1190 	}
1191 	vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1192 }
1193 
1194 fhandle_t *
1195 rfs3_write_getfh(WRITE3args *args)
1196 {
1197 
1198 	return ((fhandle_t *)&args->file.fh3_u.nfs_fh3_i.fh3_i);
1199 }
1200 
1201 void
1202 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1203 	struct svc_req *req, cred_t *cr)
1204 {
1205 	int error;
1206 	int in_crit = 0;
1207 	vnode_t *vp;
1208 	vnode_t *tvp = NULL;
1209 	vnode_t *dvp;
1210 	struct vattr *vap;
1211 	struct vattr va;
1212 	struct vattr *dbvap;
1213 	struct vattr dbva;
1214 	struct vattr *davap;
1215 	struct vattr dava;
1216 	enum vcexcl excl;
1217 	nfstime3 *mtime;
1218 	len_t reqsize;
1219 	bool_t trunc;
1220 
1221 	dbvap = NULL;
1222 	davap = NULL;
1223 
1224 	dvp = nfs3_fhtovp(args->where.dirp, exi);
1225 	if (dvp == NULL) {
1226 		error = ESTALE;
1227 		goto out;
1228 	}
1229 
1230 #ifdef DEBUG
1231 	if (rfs3_do_pre_op_attr) {
1232 		dbva.va_mask = AT_ALL;
1233 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1234 	} else
1235 		dbvap = NULL;
1236 #else
1237 	dbva.va_mask = AT_ALL;
1238 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1239 #endif
1240 	davap = dbvap;
1241 
1242 	if (args->where.name == nfs3nametoolong) {
1243 		resp->status = NFS3ERR_NAMETOOLONG;
1244 		goto out1;
1245 	}
1246 
1247 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1248 		resp->status = NFS3ERR_ACCES;
1249 		goto out1;
1250 	}
1251 
1252 	if (rdonly(exi, req)) {
1253 		resp->status = NFS3ERR_ROFS;
1254 		goto out1;
1255 	}
1256 
1257 	if (args->how.mode == EXCLUSIVE) {
1258 		va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1259 		va.va_type = VREG;
1260 		va.va_mode = (mode_t)0;
1261 		/*
1262 		 * Ensure no time overflows and that types match
1263 		 */
1264 		mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1265 		va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1266 		va.va_mtime.tv_nsec = mtime->nseconds;
1267 		excl = EXCL;
1268 	} else {
1269 		error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1270 		    &va);
1271 		if (error)
1272 			goto out;
1273 		va.va_mask |= AT_TYPE;
1274 		va.va_type = VREG;
1275 		if (args->how.mode == GUARDED)
1276 			excl = EXCL;
1277 		else {
1278 			excl = NONEXCL;
1279 
1280 			/*
1281 			 * During creation of file in non-exclusive mode
1282 			 * if size of file is being set then make sure
1283 			 * that if the file already exists that no conflicting
1284 			 * non-blocking mandatory locks exists in the region
1285 			 * being modified. If there are conflicting locks fail
1286 			 * the operation with EACCES.
1287 			 */
1288 			if (va.va_mask & AT_SIZE) {
1289 				struct vattr tva;
1290 
1291 				/*
1292 				 * Does file already exist?
1293 				 */
1294 				error = VOP_LOOKUP(dvp, args->where.name, &tvp,
1295 						NULL, 0, NULL, cr);
1296 
1297 				if (!error)
1298 					VN_SETPATH(rootdir, dvp, tvp,
1299 						args->where.name,
1300 						strlen(args->where.name));
1301 
1302 				/*
1303 				 * Check to see if the file has been delegated
1304 				 * to a v4 client.  If so, then begin recall of
1305 				 * the delegation and return JUKEBOX to allow
1306 				 * the client to retrasmit its request.
1307 				 */
1308 
1309 				trunc = va.va_size == 0;
1310 				if (!error &&
1311 				    rfs4_check_delegated(FWRITE, tvp, trunc)) {
1312 					resp->status = NFS3ERR_JUKEBOX;
1313 					goto out1;
1314 				}
1315 
1316 				/*
1317 				 * Check for NBMAND lock conflicts
1318 				 */
1319 				if (!error && nbl_need_check(tvp)) {
1320 					u_offset_t offset;
1321 					ssize_t len;
1322 
1323 					nbl_start_crit(tvp, RW_READER);
1324 					in_crit = 1;
1325 
1326 					tva.va_mask = AT_SIZE;
1327 					error = VOP_GETATTR(tvp, &tva, 0, cr);
1328 					/*
1329 					 * Can't check for conflicts, so return
1330 					 * error.
1331 					 */
1332 					if (error)
1333 						goto out;
1334 
1335 					offset = tva.va_size < va.va_size ?
1336 						tva.va_size : va.va_size;
1337 					len = tva.va_size < va.va_size ?
1338 						va.va_size - tva.va_size :
1339 						tva.va_size - va.va_size;
1340 					if (nbl_conflict(tvp, NBL_WRITE,
1341 							offset, len, 0)) {
1342 						error = EACCES;
1343 						goto out;
1344 					}
1345 				} else if (tvp) {
1346 					VN_RELE(tvp);
1347 					tvp = NULL;
1348 				}
1349 			}
1350 		}
1351 		if (va.va_mask & AT_SIZE)
1352 			reqsize = va.va_size;
1353 	}
1354 
1355 	/*
1356 	 * Must specify the mode.
1357 	 */
1358 	if (!(va.va_mask & AT_MODE)) {
1359 		resp->status = NFS3ERR_INVAL;
1360 		goto out1;
1361 	}
1362 
1363 	/*
1364 	 * If the filesystem is exported with nosuid, then mask off
1365 	 * the setuid and setgid bits.
1366 	 */
1367 	if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1368 		va.va_mode &= ~(VSUID | VSGID);
1369 
1370 tryagain:
1371 	/*
1372 	 * The file open mode used is VWRITE.  If the client needs
1373 	 * some other semantic, then it should do the access checking
1374 	 * itself.  It would have been nice to have the file open mode
1375 	 * passed as part of the arguments.
1376 	 */
1377 	error = VOP_CREATE(dvp, args->where.name, &va, excl, VWRITE,
1378 	    &vp, cr, 0);
1379 
1380 #ifdef DEBUG
1381 	if (rfs3_do_post_op_attr) {
1382 		dava.va_mask = AT_ALL;
1383 		davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1384 	} else
1385 		davap = NULL;
1386 #else
1387 	dava.va_mask = AT_ALL;
1388 	davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1389 #endif
1390 
1391 	if (error) {
1392 		/*
1393 		 * If we got something other than file already exists
1394 		 * then just return this error.  Otherwise, we got
1395 		 * EEXIST.  If we were doing a GUARDED create, then
1396 		 * just return this error.  Otherwise, we need to
1397 		 * make sure that this wasn't a duplicate of an
1398 		 * exclusive create request.
1399 		 *
1400 		 * The assumption is made that a non-exclusive create
1401 		 * request will never return EEXIST.
1402 		 */
1403 		if (error != EEXIST || args->how.mode == GUARDED)
1404 			goto out;
1405 		/*
1406 		 * Lookup the file so that we can get a vnode for it.
1407 		 */
1408 		error = VOP_LOOKUP(dvp, args->where.name, &vp, NULL, 0,
1409 		    NULL, cr);
1410 		if (error) {
1411 			/*
1412 			 * We couldn't find the file that we thought that
1413 			 * we just created.  So, we'll just try creating
1414 			 * it again.
1415 			 */
1416 			if (error == ENOENT)
1417 				goto tryagain;
1418 			goto out;
1419 		} else {
1420 			VN_SETPATH(rootdir, dvp, vp, args->where.name,
1421 				strlen(args->where.name));
1422 		}
1423 
1424 		/*
1425 		 * If the file is delegated to a v4 client, go ahead
1426 		 * and initiate recall, this create is a hint that a
1427 		 * conflicting v3 open has occurred.
1428 		 */
1429 
1430 		if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1431 			VN_RELE(vp);
1432 			resp->status = NFS3ERR_JUKEBOX;
1433 			goto out1;
1434 		}
1435 
1436 		va.va_mask = AT_ALL;
1437 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1438 
1439 		mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1440 		/* % with INT32_MAX to prevent overflows */
1441 		if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1442 		    vap->va_mtime.tv_sec !=
1443 		    (mtime->seconds % INT32_MAX) ||
1444 		    vap->va_mtime.tv_nsec != mtime->nseconds)) {
1445 			VN_RELE(vp);
1446 			error = EEXIST;
1447 			goto out;
1448 		}
1449 	} else {
1450 
1451 		if ((args->how.mode == UNCHECKED ||
1452 		    args->how.mode == GUARDED) &&
1453 		    args->how.createhow3_u.obj_attributes.size.set_it &&
1454 		    va.va_size == 0)
1455 			trunc = TRUE;
1456 		else
1457 			trunc = FALSE;
1458 
1459 		if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1460 			VN_RELE(vp);
1461 			resp->status = NFS3ERR_JUKEBOX;
1462 			goto out1;
1463 		}
1464 
1465 		va.va_mask = AT_ALL;
1466 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1467 
1468 		/*
1469 		 * We need to check to make sure that the file got
1470 		 * created to the indicated size.  If not, we do a
1471 		 * setattr to try to change the size, but we don't
1472 		 * try too hard.  This shouldn't a problem as most
1473 		 * clients will only specifiy a size of zero which
1474 		 * local file systems handle.  However, even if
1475 		 * the client does specify a non-zero size, it can
1476 		 * still recover by checking the size of the file
1477 		 * after it has created it and then issue a setattr
1478 		 * request of its own to set the size of the file.
1479 		 */
1480 		if (vap != NULL &&
1481 		    (args->how.mode == UNCHECKED ||
1482 		    args->how.mode == GUARDED) &&
1483 		    args->how.createhow3_u.obj_attributes.size.set_it &&
1484 		    vap->va_size != reqsize) {
1485 			va.va_mask = AT_SIZE;
1486 			va.va_size = reqsize;
1487 			(void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1488 			va.va_mask = AT_ALL;
1489 			vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1490 		}
1491 	}
1492 
1493 #ifdef DEBUG
1494 	if (!rfs3_do_post_op_attr)
1495 		vap = NULL;
1496 #endif
1497 
1498 #ifdef DEBUG
1499 	if (!rfs3_do_post_op_fh3)
1500 		resp->resok.obj.handle_follows = FALSE;
1501 	else {
1502 #endif
1503 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1504 	if (error)
1505 		resp->resok.obj.handle_follows = FALSE;
1506 	else
1507 		resp->resok.obj.handle_follows = TRUE;
1508 #ifdef DEBUG
1509 	}
1510 #endif
1511 
1512 	/*
1513 	 * Force modified data and metadata out to stable storage.
1514 	 */
1515 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
1516 	(void) VOP_FSYNC(dvp, 0, cr);
1517 
1518 	VN_RELE(vp);
1519 	VN_RELE(dvp);
1520 	if (tvp != NULL) {
1521 		if (in_crit)
1522 			nbl_end_crit(tvp);
1523 		VN_RELE(tvp);
1524 	}
1525 
1526 	resp->status = NFS3_OK;
1527 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1528 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1529 	return;
1530 
1531 out:
1532 	if (curthread->t_flag & T_WOULDBLOCK) {
1533 		curthread->t_flag &= ~T_WOULDBLOCK;
1534 		resp->status = NFS3ERR_JUKEBOX;
1535 	} else
1536 		resp->status = puterrno3(error);
1537 out1:
1538 	if (tvp != NULL) {
1539 		if (in_crit)
1540 			nbl_end_crit(tvp);
1541 		VN_RELE(tvp);
1542 	}
1543 	if (dvp != NULL)
1544 		VN_RELE(dvp);
1545 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1546 }
1547 
1548 fhandle_t *
1549 rfs3_create_getfh(CREATE3args *args)
1550 {
1551 
1552 	return ((fhandle_t *)&args->where.dir.fh3_u.nfs_fh3_i.fh3_i);
1553 }
1554 
1555 void
1556 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1557 	struct svc_req *req, cred_t *cr)
1558 {
1559 	int error;
1560 	vnode_t *vp = NULL;
1561 	vnode_t *dvp;
1562 	struct vattr *vap;
1563 	struct vattr va;
1564 	struct vattr *dbvap;
1565 	struct vattr dbva;
1566 	struct vattr *davap;
1567 	struct vattr dava;
1568 
1569 	dbvap = NULL;
1570 	davap = NULL;
1571 
1572 	dvp = nfs3_fhtovp(args->where.dirp, exi);
1573 	if (dvp == NULL) {
1574 		error = ESTALE;
1575 		goto out;
1576 	}
1577 
1578 #ifdef DEBUG
1579 	if (rfs3_do_pre_op_attr) {
1580 		dbva.va_mask = AT_ALL;
1581 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1582 	} else
1583 		dbvap = NULL;
1584 #else
1585 	dbva.va_mask = AT_ALL;
1586 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1587 #endif
1588 	davap = dbvap;
1589 
1590 	if (args->where.name == nfs3nametoolong) {
1591 		resp->status = NFS3ERR_NAMETOOLONG;
1592 		goto out1;
1593 	}
1594 
1595 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1596 		resp->status = NFS3ERR_ACCES;
1597 		goto out1;
1598 	}
1599 
1600 	if (rdonly(exi, req)) {
1601 		resp->status = NFS3ERR_ROFS;
1602 		goto out1;
1603 	}
1604 
1605 	error = sattr3_to_vattr(&args->attributes, &va);
1606 	if (error)
1607 		goto out;
1608 
1609 	if (!(va.va_mask & AT_MODE)) {
1610 		resp->status = NFS3ERR_INVAL;
1611 		goto out1;
1612 	}
1613 
1614 	va.va_mask |= AT_TYPE;
1615 	va.va_type = VDIR;
1616 
1617 	error = VOP_MKDIR(dvp, args->where.name, &va, &vp, cr);
1618 
1619 #ifdef DEBUG
1620 	if (rfs3_do_post_op_attr) {
1621 		dava.va_mask = AT_ALL;
1622 		davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1623 	} else
1624 		davap = NULL;
1625 #else
1626 	dava.va_mask = AT_ALL;
1627 	davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1628 #endif
1629 
1630 	/*
1631 	 * Force modified data and metadata out to stable storage.
1632 	 */
1633 	(void) VOP_FSYNC(dvp, 0, cr);
1634 
1635 	if (error)
1636 		goto out;
1637 
1638 	VN_RELE(dvp);
1639 
1640 #ifdef DEBUG
1641 	if (!rfs3_do_post_op_fh3)
1642 		resp->resok.obj.handle_follows = FALSE;
1643 	else {
1644 #endif
1645 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1646 	if (error)
1647 		resp->resok.obj.handle_follows = FALSE;
1648 	else
1649 		resp->resok.obj.handle_follows = TRUE;
1650 #ifdef DEBUG
1651 	}
1652 #endif
1653 
1654 #ifdef DEBUG
1655 	if (rfs3_do_post_op_attr) {
1656 		va.va_mask = AT_ALL;
1657 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1658 	} else
1659 		vap = NULL;
1660 #else
1661 	va.va_mask = AT_ALL;
1662 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1663 #endif
1664 
1665 	/*
1666 	 * Force modified data and metadata out to stable storage.
1667 	 */
1668 	(void) VOP_FSYNC(vp, 0, cr);
1669 
1670 	VN_RELE(vp);
1671 
1672 	resp->status = NFS3_OK;
1673 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1674 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1675 	return;
1676 
1677 out:
1678 	if (curthread->t_flag & T_WOULDBLOCK) {
1679 		curthread->t_flag &= ~T_WOULDBLOCK;
1680 		resp->status = NFS3ERR_JUKEBOX;
1681 	} else
1682 		resp->status = puterrno3(error);
1683 out1:
1684 	if (dvp != NULL)
1685 		VN_RELE(dvp);
1686 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1687 }
1688 
1689 fhandle_t *
1690 rfs3_mkdir_getfh(MKDIR3args *args)
1691 {
1692 
1693 	return ((fhandle_t *)&args->where.dir.fh3_u.nfs_fh3_i.fh3_i);
1694 }
1695 
1696 void
1697 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
1698 	struct svc_req *req, cred_t *cr)
1699 {
1700 	int error;
1701 	vnode_t *vp;
1702 	vnode_t *dvp;
1703 	struct vattr *vap;
1704 	struct vattr va;
1705 	struct vattr *dbvap;
1706 	struct vattr dbva;
1707 	struct vattr *davap;
1708 	struct vattr dava;
1709 
1710 	dbvap = NULL;
1711 	davap = NULL;
1712 
1713 	dvp = nfs3_fhtovp(args->where.dirp, exi);
1714 	if (dvp == NULL) {
1715 		error = ESTALE;
1716 		goto out;
1717 	}
1718 
1719 #ifdef DEBUG
1720 	if (rfs3_do_pre_op_attr) {
1721 		dbva.va_mask = AT_ALL;
1722 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1723 	} else
1724 		dbvap = NULL;
1725 #else
1726 	dbva.va_mask = AT_ALL;
1727 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1728 #endif
1729 	davap = dbvap;
1730 
1731 	if (args->where.name == nfs3nametoolong) {
1732 		resp->status = NFS3ERR_NAMETOOLONG;
1733 		goto out1;
1734 	}
1735 
1736 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1737 		resp->status = NFS3ERR_ACCES;
1738 		goto out1;
1739 	}
1740 
1741 	if (rdonly(exi, req)) {
1742 		resp->status = NFS3ERR_ROFS;
1743 		goto out1;
1744 	}
1745 
1746 	error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
1747 	if (error)
1748 		goto out;
1749 
1750 	if (!(va.va_mask & AT_MODE)) {
1751 		resp->status = NFS3ERR_INVAL;
1752 		goto out1;
1753 	}
1754 
1755 	if (args->symlink.symlink_data == nfs3nametoolong) {
1756 		resp->status = NFS3ERR_NAMETOOLONG;
1757 		goto out1;
1758 	}
1759 
1760 	va.va_mask |= AT_TYPE;
1761 	va.va_type = VLNK;
1762 
1763 	error = VOP_SYMLINK(dvp, args->where.name, &va,
1764 	    args->symlink.symlink_data, cr);
1765 
1766 #ifdef DEBUG
1767 	if (rfs3_do_post_op_attr) {
1768 		dava.va_mask = AT_ALL;
1769 		davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1770 	} else
1771 		davap = NULL;
1772 #else
1773 	dava.va_mask = AT_ALL;
1774 	davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1775 #endif
1776 
1777 	if (error)
1778 		goto out;
1779 
1780 	error = VOP_LOOKUP(dvp, args->where.name, &vp, NULL, 0, NULL, cr);
1781 
1782 	if (!error)
1783 		VN_SETPATH(rootdir, dvp, vp, args->where.name,
1784 			strlen(args->where.name));
1785 
1786 	/*
1787 	 * Force modified data and metadata out to stable storage.
1788 	 */
1789 	(void) VOP_FSYNC(dvp, 0, cr);
1790 
1791 	VN_RELE(dvp);
1792 
1793 	resp->status = NFS3_OK;
1794 	if (error) {
1795 		resp->resok.obj.handle_follows = FALSE;
1796 		vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
1797 		vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1798 		return;
1799 	}
1800 
1801 #ifdef DEBUG
1802 	if (!rfs3_do_post_op_fh3)
1803 		resp->resok.obj.handle_follows = FALSE;
1804 	else {
1805 #endif
1806 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1807 	if (error)
1808 		resp->resok.obj.handle_follows = FALSE;
1809 	else
1810 		resp->resok.obj.handle_follows = TRUE;
1811 #ifdef DEBUG
1812 	}
1813 #endif
1814 
1815 #ifdef DEBUG
1816 	if (rfs3_do_post_op_attr) {
1817 		va.va_mask = AT_ALL;
1818 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1819 	} else
1820 		vap = NULL;
1821 #else
1822 	va.va_mask = AT_ALL;
1823 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
1824 #endif
1825 
1826 	/*
1827 	 * Force modified data and metadata out to stable storage.
1828 	 */
1829 	(void) VOP_FSYNC(vp, 0, cr);
1830 
1831 	VN_RELE(vp);
1832 
1833 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1834 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1835 	return;
1836 
1837 out:
1838 	if (curthread->t_flag & T_WOULDBLOCK) {
1839 		curthread->t_flag &= ~T_WOULDBLOCK;
1840 		resp->status = NFS3ERR_JUKEBOX;
1841 	} else
1842 		resp->status = puterrno3(error);
1843 out1:
1844 	if (dvp != NULL)
1845 		VN_RELE(dvp);
1846 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1847 }
1848 
1849 fhandle_t *
1850 rfs3_symlink_getfh(SYMLINK3args *args)
1851 {
1852 
1853 	return ((fhandle_t *)&args->where.dirp->fh3_u.nfs_fh3_i.fh3_i);
1854 }
1855 
1856 void
1857 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
1858 	struct svc_req *req, cred_t *cr)
1859 {
1860 	int error;
1861 	vnode_t *vp;
1862 	vnode_t *dvp;
1863 	struct vattr *vap;
1864 	struct vattr va;
1865 	struct vattr *dbvap;
1866 	struct vattr dbva;
1867 	struct vattr *davap;
1868 	struct vattr dava;
1869 	int mode;
1870 	enum vcexcl excl;
1871 
1872 	dbvap = NULL;
1873 	davap = NULL;
1874 
1875 	dvp = nfs3_fhtovp(args->where.dirp, exi);
1876 	if (dvp == NULL) {
1877 		error = ESTALE;
1878 		goto out;
1879 	}
1880 
1881 #ifdef DEBUG
1882 	if (rfs3_do_pre_op_attr) {
1883 		dbva.va_mask = AT_ALL;
1884 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1885 	} else
1886 		dbvap = NULL;
1887 #else
1888 	dbva.va_mask = AT_ALL;
1889 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr) ? NULL : &dbva;
1890 #endif
1891 	davap = dbvap;
1892 
1893 	if (args->where.name == nfs3nametoolong) {
1894 		resp->status = NFS3ERR_NAMETOOLONG;
1895 		goto out1;
1896 	}
1897 
1898 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1899 		resp->status = NFS3ERR_ACCES;
1900 		goto out1;
1901 	}
1902 
1903 	if (rdonly(exi, req)) {
1904 		resp->status = NFS3ERR_ROFS;
1905 		goto out1;
1906 	}
1907 
1908 	switch (args->what.type) {
1909 	case NF3CHR:
1910 	case NF3BLK:
1911 		error = sattr3_to_vattr(
1912 		    &args->what.mknoddata3_u.device.dev_attributes, &va);
1913 		if (error)
1914 			goto out;
1915 		if (secpolicy_sys_devices(cr) != 0) {
1916 			resp->status = NFS3ERR_PERM;
1917 			goto out1;
1918 		}
1919 		if (args->what.type == NF3CHR)
1920 			va.va_type = VCHR;
1921 		else
1922 			va.va_type = VBLK;
1923 		va.va_rdev = makedevice(
1924 		    args->what.mknoddata3_u.device.spec.specdata1,
1925 		    args->what.mknoddata3_u.device.spec.specdata2);
1926 		va.va_mask |= AT_TYPE | AT_RDEV;
1927 		break;
1928 	case NF3SOCK:
1929 		error = sattr3_to_vattr(
1930 		    &args->what.mknoddata3_u.pipe_attributes, &va);
1931 		if (error)
1932 			goto out;
1933 		va.va_type = VSOCK;
1934 		va.va_mask |= AT_TYPE;
1935 		break;
1936 	case NF3FIFO:
1937 		error = sattr3_to_vattr(
1938 		    &args->what.mknoddata3_u.pipe_attributes, &va);
1939 		if (error)
1940 			goto out;
1941 		va.va_type = VFIFO;
1942 		va.va_mask |= AT_TYPE;
1943 		break;
1944 	default:
1945 		resp->status = NFS3ERR_BADTYPE;
1946 		goto out1;
1947 	}
1948 
1949 	/*
1950 	 * Must specify the mode.
1951 	 */
1952 	if (!(va.va_mask & AT_MODE)) {
1953 		resp->status = NFS3ERR_INVAL;
1954 		goto out1;
1955 	}
1956 
1957 	excl = EXCL;
1958 
1959 	mode = 0;
1960 
1961 	error = VOP_CREATE(dvp, args->where.name, &va, excl, mode,
1962 	    &vp, cr, 0);
1963 
1964 #ifdef DEBUG
1965 	if (rfs3_do_post_op_attr) {
1966 		dava.va_mask = AT_ALL;
1967 		davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1968 	} else
1969 		davap = NULL;
1970 #else
1971 	dava.va_mask = AT_ALL;
1972 	davap = VOP_GETATTR(dvp, &dava, 0, cr) ? NULL : &dava;
1973 #endif
1974 
1975 	/*
1976 	 * Force modified data and metadata out to stable storage.
1977 	 */
1978 	(void) VOP_FSYNC(dvp, 0, cr);
1979 
1980 	if (error)
1981 		goto out;
1982 
1983 	VN_RELE(dvp);
1984 
1985 	resp->status = NFS3_OK;
1986 
1987 #ifdef DEBUG
1988 	if (!rfs3_do_post_op_fh3)
1989 		resp->resok.obj.handle_follows = FALSE;
1990 	else {
1991 #endif
1992 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1993 	if (error)
1994 		resp->resok.obj.handle_follows = FALSE;
1995 	else
1996 		resp->resok.obj.handle_follows = TRUE;
1997 #ifdef DEBUG
1998 	}
1999 #endif
2000 
2001 #ifdef DEBUG
2002 	if (rfs3_do_post_op_attr) {
2003 		va.va_mask = AT_ALL;
2004 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2005 	} else
2006 		vap = NULL;
2007 #else
2008 	va.va_mask = AT_ALL;
2009 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2010 #endif
2011 
2012 	/*
2013 	 * Force modified metadata out to stable storage.
2014 	 */
2015 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
2016 
2017 	VN_RELE(vp);
2018 
2019 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2020 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2021 	return;
2022 
2023 out:
2024 	if (curthread->t_flag & T_WOULDBLOCK) {
2025 		curthread->t_flag &= ~T_WOULDBLOCK;
2026 		resp->status = NFS3ERR_JUKEBOX;
2027 	} else
2028 		resp->status = puterrno3(error);
2029 out1:
2030 	if (dvp != NULL)
2031 		VN_RELE(dvp);
2032 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2033 }
2034 
2035 fhandle_t *
2036 rfs3_mknod_getfh(MKNOD3args *args)
2037 {
2038 
2039 	return ((fhandle_t *)&args->where.dirp->fh3_u.nfs_fh3_i.fh3_i);
2040 }
2041 
2042 void
2043 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2044 	struct svc_req *req, cred_t *cr)
2045 {
2046 	int error = 0;
2047 	vnode_t *vp;
2048 	struct vattr *bvap;
2049 	struct vattr bva;
2050 	struct vattr *avap;
2051 	struct vattr ava;
2052 	vnode_t *targvp = NULL;
2053 
2054 	bvap = NULL;
2055 	avap = NULL;
2056 
2057 	vp = nfs3_fhtovp(args->object.dirp, exi);
2058 	if (vp == NULL) {
2059 		error = ESTALE;
2060 		goto out;
2061 	}
2062 
2063 #ifdef DEBUG
2064 	if (rfs3_do_pre_op_attr) {
2065 		bva.va_mask = AT_ALL;
2066 		bvap = VOP_GETATTR(vp, &bva, 0, cr) ? NULL : &bva;
2067 	} else
2068 		bvap = NULL;
2069 #else
2070 	bva.va_mask = AT_ALL;
2071 	bvap = VOP_GETATTR(vp, &bva, 0, cr) ? NULL : &bva;
2072 #endif
2073 	avap = bvap;
2074 
2075 	if (vp->v_type != VDIR) {
2076 		resp->status = NFS3ERR_NOTDIR;
2077 		goto out1;
2078 	}
2079 
2080 	if (args->object.name == nfs3nametoolong) {
2081 		resp->status = NFS3ERR_NAMETOOLONG;
2082 		goto out1;
2083 	}
2084 
2085 	if (args->object.name == NULL || *(args->object.name) == '\0') {
2086 		resp->status = NFS3ERR_ACCES;
2087 		goto out1;
2088 	}
2089 
2090 	if (rdonly(exi, req)) {
2091 		resp->status = NFS3ERR_ROFS;
2092 		goto out1;
2093 	}
2094 
2095 	/*
2096 	 * Check for a conflict with a non-blocking mandatory share
2097 	 * reservation and V4 delegations
2098 	 */
2099 	error = VOP_LOOKUP(vp, args->object.name, &targvp, NULL, 0,
2100 			NULL, cr);
2101 	if (error != 0)
2102 		goto out;
2103 
2104 	if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2105 		resp->status = NFS3ERR_JUKEBOX;
2106 		goto out1;
2107 	}
2108 
2109 	if (!nbl_need_check(targvp)) {
2110 		error = VOP_REMOVE(vp, args->object.name, cr);
2111 	} else {
2112 		nbl_start_crit(targvp, RW_READER);
2113 		if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0)) {
2114 			error = EACCES;
2115 		} else {
2116 			error = VOP_REMOVE(vp, args->object.name, cr);
2117 		}
2118 		nbl_end_crit(targvp);
2119 	}
2120 	VN_RELE(targvp);
2121 	targvp = NULL;
2122 
2123 #ifdef DEBUG
2124 	if (rfs3_do_post_op_attr) {
2125 		ava.va_mask = AT_ALL;
2126 		avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
2127 	} else
2128 		avap = NULL;
2129 #else
2130 	ava.va_mask = AT_ALL;
2131 	avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
2132 #endif
2133 
2134 	/*
2135 	 * Force modified data and metadata out to stable storage.
2136 	 */
2137 	(void) VOP_FSYNC(vp, 0, cr);
2138 
2139 	if (error)
2140 		goto out;
2141 
2142 	VN_RELE(vp);
2143 
2144 	resp->status = NFS3_OK;
2145 	vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2146 	return;
2147 
2148 out:
2149 	if (curthread->t_flag & T_WOULDBLOCK) {
2150 		curthread->t_flag &= ~T_WOULDBLOCK;
2151 		resp->status = NFS3ERR_JUKEBOX;
2152 	} else
2153 		resp->status = puterrno3(error);
2154 out1:
2155 	if (vp != NULL)
2156 		VN_RELE(vp);
2157 	vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2158 }
2159 
2160 fhandle_t *
2161 rfs3_remove_getfh(REMOVE3args *args)
2162 {
2163 
2164 	return ((fhandle_t *)&args->object.dirp->fh3_u.nfs_fh3_i.fh3_i);
2165 }
2166 
2167 void
2168 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2169 	struct svc_req *req, cred_t *cr)
2170 {
2171 	int error;
2172 	vnode_t *vp;
2173 	struct vattr *bvap;
2174 	struct vattr bva;
2175 	struct vattr *avap;
2176 	struct vattr ava;
2177 
2178 	bvap = NULL;
2179 	avap = NULL;
2180 
2181 	vp = nfs3_fhtovp(args->object.dirp, exi);
2182 	if (vp == NULL) {
2183 		error = ESTALE;
2184 		goto out;
2185 	}
2186 
2187 #ifdef DEBUG
2188 	if (rfs3_do_pre_op_attr) {
2189 		bva.va_mask = AT_ALL;
2190 		bvap = VOP_GETATTR(vp, &bva, 0, cr) ? NULL : &bva;
2191 	} else
2192 		bvap = NULL;
2193 #else
2194 	bva.va_mask = AT_ALL;
2195 	bvap = VOP_GETATTR(vp, &bva, 0, cr) ? NULL : &bva;
2196 #endif
2197 	avap = bvap;
2198 
2199 	if (vp->v_type != VDIR) {
2200 		resp->status = NFS3ERR_NOTDIR;
2201 		goto out1;
2202 	}
2203 
2204 	if (args->object.name == nfs3nametoolong) {
2205 		resp->status = NFS3ERR_NAMETOOLONG;
2206 		goto out1;
2207 	}
2208 
2209 	if (args->object.name == NULL || *(args->object.name) == '\0') {
2210 		resp->status = NFS3ERR_ACCES;
2211 		goto out1;
2212 	}
2213 
2214 	if (rdonly(exi, req)) {
2215 		resp->status = NFS3ERR_ROFS;
2216 		goto out1;
2217 	}
2218 
2219 	error = VOP_RMDIR(vp, args->object.name, rootdir, cr);
2220 
2221 #ifdef DEBUG
2222 	if (rfs3_do_post_op_attr) {
2223 		ava.va_mask = AT_ALL;
2224 		avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
2225 	} else
2226 		avap = NULL;
2227 #else
2228 	ava.va_mask = AT_ALL;
2229 	avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
2230 #endif
2231 
2232 	/*
2233 	 * Force modified data and metadata out to stable storage.
2234 	 */
2235 	(void) VOP_FSYNC(vp, 0, cr);
2236 
2237 	if (error) {
2238 		/*
2239 		 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2240 		 * if the directory is not empty.  A System V NFS server
2241 		 * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2242 		 * over the wire.
2243 		 */
2244 		if (error == EEXIST)
2245 			error = ENOTEMPTY;
2246 		goto out;
2247 	}
2248 
2249 	VN_RELE(vp);
2250 
2251 	resp->status = NFS3_OK;
2252 	vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2253 	return;
2254 
2255 out:
2256 	if (curthread->t_flag & T_WOULDBLOCK) {
2257 		curthread->t_flag &= ~T_WOULDBLOCK;
2258 		resp->status = NFS3ERR_JUKEBOX;
2259 	} else
2260 		resp->status = puterrno3(error);
2261 out1:
2262 	if (vp != NULL)
2263 		VN_RELE(vp);
2264 	vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2265 }
2266 
2267 fhandle_t *
2268 rfs3_rmdir_getfh(RMDIR3args *args)
2269 {
2270 
2271 	return ((fhandle_t *)&args->object.dirp->fh3_u.nfs_fh3_i.fh3_i);
2272 }
2273 
2274 void
2275 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2276 	struct svc_req *req, cred_t *cr)
2277 {
2278 	int error = 0;
2279 	vnode_t *fvp;
2280 	vnode_t *tvp;
2281 	vnode_t *targvp;
2282 	struct vattr *fbvap;
2283 	struct vattr fbva;
2284 	struct vattr *favap;
2285 	struct vattr fava;
2286 	struct vattr *tbvap;
2287 	struct vattr tbva;
2288 	struct vattr *tavap;
2289 	struct vattr tava;
2290 	nfs_fh3	*fh3;
2291 	struct exportinfo *to_exi;
2292 	vnode_t *srcvp = NULL;
2293 
2294 	fbvap = NULL;
2295 	favap = NULL;
2296 	tbvap = NULL;
2297 	tavap = NULL;
2298 	tvp = NULL;
2299 
2300 	fvp = nfs3_fhtovp(args->from.dirp, exi);
2301 	if (fvp == NULL) {
2302 		error = ESTALE;
2303 		goto out;
2304 	}
2305 
2306 #ifdef DEBUG
2307 	if (rfs3_do_pre_op_attr) {
2308 		fbva.va_mask = AT_ALL;
2309 		fbvap = VOP_GETATTR(fvp, &fbva, 0, cr) ? NULL : &fbva;
2310 	} else
2311 		fbvap = NULL;
2312 #else
2313 	fbva.va_mask = AT_ALL;
2314 	fbvap = VOP_GETATTR(fvp, &fbva, 0, cr) ? NULL : &fbva;
2315 #endif
2316 	favap = fbvap;
2317 
2318 	fh3 = args->to.dirp;
2319 	to_exi = checkexport(&fh3->fh3_fsid, (fid_t *)&fh3->fh3_xlen);
2320 	if (to_exi == NULL) {
2321 		resp->status = NFS3ERR_ACCES;
2322 		goto out1;
2323 	}
2324 	exi_rele(to_exi);
2325 
2326 	if (to_exi != exi) {
2327 		resp->status = NFS3ERR_XDEV;
2328 		goto out1;
2329 	}
2330 
2331 	tvp = nfs3_fhtovp(args->to.dirp, exi);
2332 	if (tvp == NULL) {
2333 		error = ESTALE;
2334 		goto out;
2335 	}
2336 
2337 #ifdef DEBUG
2338 	if (rfs3_do_pre_op_attr) {
2339 		tbva.va_mask = AT_ALL;
2340 		tbvap = VOP_GETATTR(tvp, &tbva, 0, cr) ? NULL : &tbva;
2341 	} else
2342 		tbvap = NULL;
2343 #else
2344 	tbva.va_mask = AT_ALL;
2345 	tbvap = VOP_GETATTR(tvp, &tbva, 0, cr) ? NULL : &tbva;
2346 #endif
2347 	tavap = tbvap;
2348 
2349 	if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2350 		resp->status = NFS3ERR_NOTDIR;
2351 		goto out1;
2352 	}
2353 
2354 	if (args->from.name == nfs3nametoolong ||
2355 	    args->to.name == nfs3nametoolong) {
2356 		resp->status = NFS3ERR_NAMETOOLONG;
2357 		goto out1;
2358 	}
2359 	if (args->from.name == NULL || *(args->from.name) == '\0' ||
2360 	    args->to.name == NULL || *(args->to.name) == '\0') {
2361 		resp->status = NFS3ERR_ACCES;
2362 		goto out1;
2363 	}
2364 
2365 	if (rdonly(exi, req)) {
2366 		resp->status = NFS3ERR_ROFS;
2367 		goto out1;
2368 	}
2369 
2370 	/*
2371 	 * Check for a conflict with a non-blocking mandatory share
2372 	 * reservation or V4 delegations.
2373 	 */
2374 	error = VOP_LOOKUP(fvp, args->from.name, &srcvp, NULL, 0,
2375 			NULL, cr);
2376 	if (error != 0)
2377 		goto out;
2378 
2379 	/*
2380 	 * If we rename a delegated file we should recall the
2381 	 * delegation, since future opens should fail or would
2382 	 * refer to a new file.
2383 	 */
2384 	if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2385 		resp->status = NFS3ERR_JUKEBOX;
2386 		goto out1;
2387 	}
2388 
2389 	/*
2390 	 * Check for renaming over a delegated file.  Check rfs4_deleg_policy
2391 	 * first to avoid VOP_LOOKUP if possible.
2392 	 */
2393 	if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2394 	    VOP_LOOKUP(tvp, args->to.name, &targvp, NULL, 0, NULL, cr) == 0) {
2395 
2396 		if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2397 			VN_RELE(targvp);
2398 			resp->status = NFS3ERR_JUKEBOX;
2399 			goto out1;
2400 		}
2401 		VN_RELE(targvp);
2402 	}
2403 
2404 	if (!nbl_need_check(srcvp)) {
2405 		error = VOP_RENAME(fvp, args->from.name, tvp,
2406 				    args->to.name, cr);
2407 	} else {
2408 		nbl_start_crit(srcvp, RW_READER);
2409 		if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0)) {
2410 			error = EACCES;
2411 		} else {
2412 			error = VOP_RENAME(fvp, args->from.name, tvp,
2413 				    args->to.name, cr);
2414 		}
2415 		nbl_end_crit(srcvp);
2416 	}
2417 	VN_RELE(srcvp);
2418 	srcvp = NULL;
2419 
2420 #ifdef DEBUG
2421 	if (rfs3_do_post_op_attr) {
2422 		fava.va_mask = AT_ALL;
2423 		favap = VOP_GETATTR(fvp, &fava, 0, cr) ? NULL : &fava;
2424 		tava.va_mask = AT_ALL;
2425 		tavap = VOP_GETATTR(tvp, &tava, 0, cr) ? NULL : &tava;
2426 	} else {
2427 		favap = NULL;
2428 		tavap = NULL;
2429 	}
2430 #else
2431 	fava.va_mask = AT_ALL;
2432 	favap = VOP_GETATTR(fvp, &fava, 0, cr) ? NULL : &fava;
2433 	tava.va_mask = AT_ALL;
2434 	tavap = VOP_GETATTR(tvp, &tava, 0, cr) ? NULL : &tava;
2435 #endif
2436 
2437 	/*
2438 	 * Force modified data and metadata out to stable storage.
2439 	 */
2440 	(void) VOP_FSYNC(fvp, 0, cr);
2441 	(void) VOP_FSYNC(tvp, 0, cr);
2442 
2443 	if (error)
2444 		goto out;
2445 
2446 	VN_RELE(tvp);
2447 	VN_RELE(fvp);
2448 
2449 	resp->status = NFS3_OK;
2450 	vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2451 	vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2452 	return;
2453 
2454 out:
2455 	if (curthread->t_flag & T_WOULDBLOCK) {
2456 		curthread->t_flag &= ~T_WOULDBLOCK;
2457 		resp->status = NFS3ERR_JUKEBOX;
2458 	} else
2459 		resp->status = puterrno3(error);
2460 out1:
2461 	if (fvp != NULL)
2462 		VN_RELE(fvp);
2463 	if (tvp != NULL)
2464 		VN_RELE(tvp);
2465 	vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2466 	vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2467 }
2468 
2469 fhandle_t *
2470 rfs3_rename_getfh(RENAME3args *args)
2471 {
2472 
2473 	return ((fhandle_t *)&args->from.dirp->fh3_u.nfs_fh3_i.fh3_i);
2474 }
2475 
2476 void
2477 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2478 	struct svc_req *req, cred_t *cr)
2479 {
2480 	int error;
2481 	vnode_t *vp;
2482 	vnode_t *dvp;
2483 	struct vattr *vap;
2484 	struct vattr va;
2485 	struct vattr *bvap;
2486 	struct vattr bva;
2487 	struct vattr *avap;
2488 	struct vattr ava;
2489 	nfs_fh3	*fh3;
2490 	struct exportinfo *to_exi;
2491 
2492 	vap = NULL;
2493 	bvap = NULL;
2494 	avap = NULL;
2495 	dvp = NULL;
2496 
2497 	vp = nfs3_fhtovp(&args->file, exi);
2498 	if (vp == NULL) {
2499 		error = ESTALE;
2500 		goto out;
2501 	}
2502 
2503 #ifdef DEBUG
2504 	if (rfs3_do_pre_op_attr) {
2505 		va.va_mask = AT_ALL;
2506 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2507 	} else
2508 		vap = NULL;
2509 #else
2510 	va.va_mask = AT_ALL;
2511 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2512 #endif
2513 
2514 	fh3 = args->link.dirp;
2515 	to_exi = checkexport(&fh3->fh3_fsid, (fid_t *)&fh3->fh3_xlen);
2516 	if (to_exi == NULL) {
2517 		resp->status = NFS3ERR_ACCES;
2518 		goto out1;
2519 	}
2520 	exi_rele(to_exi);
2521 
2522 	if (to_exi != exi) {
2523 		resp->status = NFS3ERR_XDEV;
2524 		goto out1;
2525 	}
2526 
2527 	dvp = nfs3_fhtovp(args->link.dirp, exi);
2528 	if (dvp == NULL) {
2529 		error = ESTALE;
2530 		goto out;
2531 	}
2532 
2533 #ifdef DEBUG
2534 	if (rfs3_do_pre_op_attr) {
2535 		bva.va_mask = AT_ALL;
2536 		bvap = VOP_GETATTR(dvp, &bva, 0, cr) ? NULL : &bva;
2537 	} else
2538 		bvap = NULL;
2539 #else
2540 	bva.va_mask = AT_ALL;
2541 	bvap = VOP_GETATTR(dvp, &bva, 0, cr) ? NULL : &bva;
2542 #endif
2543 
2544 	if (dvp->v_type != VDIR) {
2545 		resp->status = NFS3ERR_NOTDIR;
2546 		goto out1;
2547 	}
2548 
2549 	if (args->link.name == nfs3nametoolong) {
2550 		resp->status = NFS3ERR_NAMETOOLONG;
2551 		goto out1;
2552 	}
2553 
2554 	if (args->link.name == NULL || *(args->link.name) == '\0') {
2555 		resp->status = NFS3ERR_ACCES;
2556 		goto out1;
2557 	}
2558 
2559 	if (rdonly(exi, req)) {
2560 		resp->status = NFS3ERR_ROFS;
2561 		goto out1;
2562 	}
2563 
2564 	error = VOP_LINK(dvp, vp, args->link.name, cr);
2565 
2566 #ifdef DEBUG
2567 	if (rfs3_do_post_op_attr) {
2568 		va.va_mask = AT_ALL;
2569 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2570 		ava.va_mask = AT_ALL;
2571 		avap = VOP_GETATTR(dvp, &ava, 0, cr) ? NULL : &ava;
2572 	} else {
2573 		vap = NULL;
2574 		avap = NULL;
2575 	}
2576 #else
2577 	va.va_mask = AT_ALL;
2578 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2579 	ava.va_mask = AT_ALL;
2580 	avap = VOP_GETATTR(dvp, &ava, 0, cr) ? NULL : &ava;
2581 #endif
2582 
2583 	/*
2584 	 * Force modified data and metadata out to stable storage.
2585 	 */
2586 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
2587 	(void) VOP_FSYNC(dvp, 0, cr);
2588 
2589 	if (error)
2590 		goto out;
2591 
2592 	VN_RELE(dvp);
2593 	VN_RELE(vp);
2594 
2595 	resp->status = NFS3_OK;
2596 	vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
2597 	vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
2598 	return;
2599 
2600 out:
2601 	if (curthread->t_flag & T_WOULDBLOCK) {
2602 		curthread->t_flag &= ~T_WOULDBLOCK;
2603 		resp->status = NFS3ERR_JUKEBOX;
2604 	} else
2605 		resp->status = puterrno3(error);
2606 out1:
2607 	if (vp != NULL)
2608 		VN_RELE(vp);
2609 	if (dvp != NULL)
2610 		VN_RELE(dvp);
2611 	vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
2612 	vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
2613 }
2614 
2615 fhandle_t *
2616 rfs3_link_getfh(LINK3args *args)
2617 {
2618 
2619 	return ((fhandle_t *)&args->file.fh3_u.nfs_fh3_i.fh3_i);
2620 }
2621 
2622 /*
2623  * This macro defines the size of a response which contains attribute
2624  * information and one directory entry (whose length is specified by
2625  * the macro parameter).  If the incoming request is larger than this,
2626  * then we are guaranteed to be able to return at one directory entry
2627  * if one exists.  Therefore, we do not need to check for
2628  * NFS3ERR_TOOSMALL if the requested size is larger then this.  If it
2629  * is not, then we need to check to make sure that this error does not
2630  * need to be returned.
2631  *
2632  * NFS3_READDIR_MIN_COUNT is comprised of following :
2633  *
2634  * status - 1 * BYTES_PER_XDR_UNIT
2635  * attr. flag - 1 * BYTES_PER_XDR_UNIT
2636  * cookie verifier - 2 * BYTES_PER_XDR_UNIT
2637  * attributes  - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
2638  * boolean - 1 * BYTES_PER_XDR_UNIT
2639  * file id - 2 * BYTES_PER_XDR_UNIT
2640  * direcotory name length - 1 * BYTES_PER_XDR_UNIT
2641  * cookie - 2 * BYTES_PER_XDR_UNIT
2642  * end of list - 1 * BYTES_PER_XDR_UNIT
2643  * end of file - 1 * BYTES_PER_XDR_UNIT
2644  * Name length of directory to the nearest byte
2645  */
2646 
2647 #define	NFS3_READDIR_MIN_COUNT(length)	\
2648 	((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
2649 		BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
2650 
2651 /* ARGSUSED */
2652 void
2653 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
2654 	struct svc_req *req, cred_t *cr)
2655 {
2656 	int error;
2657 	vnode_t *vp;
2658 	struct vattr *vap;
2659 	struct vattr va;
2660 	struct iovec iov;
2661 	struct uio uio;
2662 	char *data;
2663 	int iseof;
2664 	int bufsize;
2665 	int namlen;
2666 	uint_t count;
2667 
2668 	vap = NULL;
2669 
2670 	vp = nfs3_fhtovp(&args->dir, exi);
2671 	if (vp == NULL) {
2672 		error = ESTALE;
2673 		goto out;
2674 	}
2675 
2676 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2677 
2678 #ifdef DEBUG
2679 	if (rfs3_do_pre_op_attr) {
2680 		va.va_mask = AT_ALL;
2681 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2682 	} else
2683 		vap = NULL;
2684 #else
2685 	va.va_mask = AT_ALL;
2686 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2687 #endif
2688 
2689 	if (vp->v_type != VDIR) {
2690 		resp->status = NFS3ERR_NOTDIR;
2691 		goto out1;
2692 	}
2693 
2694 	error = VOP_ACCESS(vp, VREAD, 0, cr);
2695 	if (error)
2696 		goto out;
2697 
2698 	/*
2699 	 * Now don't allow arbitrary count to alloc;
2700 	 * allow the maximum not to exceed rfs3_tsize()
2701 	 */
2702 	if (args->count > rfs3_tsize(req))
2703 		args->count = rfs3_tsize(req);
2704 
2705 	/*
2706 	 * Make sure that there is room to read at least one entry
2707 	 * if any are available.
2708 	 */
2709 	if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
2710 		count = DIRENT64_RECLEN(MAXNAMELEN);
2711 	else
2712 		count = args->count;
2713 
2714 	data = kmem_alloc(count, KM_SLEEP);
2715 
2716 	iov.iov_base = data;
2717 	iov.iov_len = count;
2718 	uio.uio_iov = &iov;
2719 	uio.uio_iovcnt = 1;
2720 	uio.uio_segflg = UIO_SYSSPACE;
2721 	uio.uio_extflg = UIO_COPY_CACHED;
2722 	uio.uio_loffset = (offset_t)args->cookie;
2723 	uio.uio_resid = count;
2724 
2725 	error = VOP_READDIR(vp, &uio, cr, &iseof);
2726 
2727 #ifdef DEBUG
2728 	if (rfs3_do_post_op_attr) {
2729 		va.va_mask = AT_ALL;
2730 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2731 	} else
2732 		vap = NULL;
2733 #else
2734 	va.va_mask = AT_ALL;
2735 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2736 #endif
2737 
2738 	if (error) {
2739 		kmem_free(data, count);
2740 		goto out;
2741 	}
2742 
2743 	/*
2744 	 * If the count was not large enough to be able to guarantee
2745 	 * to be able to return at least one entry, then need to
2746 	 * check to see if NFS3ERR_TOOSMALL should be returned.
2747 	 */
2748 	if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
2749 		/*
2750 		 * bufsize is used to keep track of the size of the response.
2751 		 * It is primed with:
2752 		 *	1 for the status +
2753 		 *	1 for the dir_attributes.attributes boolean +
2754 		 *	2 for the cookie verifier
2755 		 * all times BYTES_PER_XDR_UNIT to convert from XDR units
2756 		 * to bytes.  If there are directory attributes to be
2757 		 * returned, then:
2758 		 *	NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
2759 		 * time BYTES_PER_XDR_UNIT is added to account for them.
2760 		 */
2761 		bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
2762 		if (vap != NULL)
2763 			bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
2764 		/*
2765 		 * An entry is composed of:
2766 		 *	1 for the true/false list indicator +
2767 		 *	2 for the fileid +
2768 		 *	1 for the length of the name +
2769 		 *	2 for the cookie +
2770 		 * all times BYTES_PER_XDR_UNIT to convert from
2771 		 * XDR units to bytes, plus the length of the name
2772 		 * rounded up to the nearest BYTES_PER_XDR_UNIT.
2773 		 */
2774 		if (count != uio.uio_resid) {
2775 			namlen = strlen(((struct dirent64 *)data)->d_name);
2776 			bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
2777 				    roundup(namlen, BYTES_PER_XDR_UNIT);
2778 		}
2779 		/*
2780 		 * We need to check to see if the number of bytes left
2781 		 * to go into the buffer will actually fit into the
2782 		 * buffer.  This is calculated as the size of this
2783 		 * entry plus:
2784 		 *	1 for the true/false list indicator +
2785 		 *	1 for the eof indicator
2786 		 * times BYTES_PER_XDR_UNIT to convert from from
2787 		 * XDR units to bytes.
2788 		 */
2789 		bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
2790 		if (bufsize > args->count) {
2791 			kmem_free(data, count);
2792 			resp->status = NFS3ERR_TOOSMALL;
2793 			goto out1;
2794 		}
2795 	}
2796 
2797 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2798 
2799 #if 0 /* notyet */
2800 	/*
2801 	 * Don't do this.  It causes local disk writes when just
2802 	 * reading the file and the overhead is deemed larger
2803 	 * than the benefit.
2804 	 */
2805 	/*
2806 	 * Force modified metadata out to stable storage.
2807 	 */
2808 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
2809 #endif
2810 
2811 	VN_RELE(vp);
2812 
2813 	resp->status = NFS3_OK;
2814 	vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
2815 	resp->resok.cookieverf = 0;
2816 	resp->resok.reply.entries = (entry3 *)data;
2817 	resp->resok.reply.eof = iseof;
2818 	resp->resok.size = count - uio.uio_resid;
2819 	resp->resok.count = args->count;
2820 	resp->resok.freecount = count;
2821 	return;
2822 
2823 out:
2824 	if (curthread->t_flag & T_WOULDBLOCK) {
2825 		curthread->t_flag &= ~T_WOULDBLOCK;
2826 		resp->status = NFS3ERR_JUKEBOX;
2827 	} else
2828 		resp->status = puterrno3(error);
2829 out1:
2830 	if (vp != NULL) {
2831 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2832 		VN_RELE(vp);
2833 	}
2834 	vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
2835 }
2836 
2837 fhandle_t *
2838 rfs3_readdir_getfh(READDIR3args *args)
2839 {
2840 
2841 	return ((fhandle_t *)&args->dir.fh3_u.nfs_fh3_i.fh3_i);
2842 }
2843 
2844 void
2845 rfs3_readdir_free(READDIR3res *resp)
2846 {
2847 
2848 	if (resp->status == NFS3_OK)
2849 		kmem_free(resp->resok.reply.entries, resp->resok.freecount);
2850 }
2851 
2852 #ifdef nextdp
2853 #undef nextdp
2854 #endif
2855 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
2856 
2857 /*
2858  * This macro computes the size of a response which contains
2859  * one directory entry including the attributes as well as file handle.
2860  * If the incoming request is larger than this, then we are guaranteed to be
2861  * able to return at least one more directory entry if one exists.
2862  *
2863  * NFS3_READDIRPLUS_ENTRY is made up of the following:
2864  *
2865  * boolean - 1 * BYTES_PER_XDR_UNIT
2866  * file id - 2 * BYTES_PER_XDR_UNIT
2867  * directory name length - 1 * BYTES_PER_XDR_UNIT
2868  * cookie - 2 * BYTES_PER_XDR_UNIT
2869  * attribute flag - 1 * BYTES_PER_XDR_UNIT
2870  * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
2871  * status byte for file handle - 1 *  BYTES_PER_XDR_UNIT
2872  * length of a file handle - 1 * BYTES_PER_XDR_UNIT
2873  * Maxmum length of a file handle (NFS3_CURFHSIZE)
2874  * name length of the entry to the nearest bytes
2875  */
2876 #define	NFS3_READDIRPLUS_ENTRY(namelen)	\
2877 	((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
2878 		BYTES_PER_XDR_UNIT + \
2879 	NFS3_CURFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
2880 
2881 static int rfs3_readdir_unit = MAXBSIZE;
2882 
2883 /* ARGSUSED */
2884 void
2885 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
2886 	struct exportinfo *exi, struct svc_req *req, cred_t *cr)
2887 {
2888 	int error;
2889 	vnode_t *vp;
2890 	struct vattr *vap;
2891 	struct vattr va;
2892 	struct iovec iov;
2893 	struct uio uio;
2894 	char *data;
2895 	int iseof;
2896 	struct dirent64 *dp;
2897 	vnode_t *nvp;
2898 	struct vattr *nvap;
2899 	struct vattr nva;
2900 	entryplus3_info *infop = NULL;
2901 	int size = 0;
2902 	int nents = 0;
2903 	int bufsize = 0;
2904 	int entrysize = 0;
2905 	int tofit = 0;
2906 	int rd_unit = rfs3_readdir_unit;
2907 	int prev_len;
2908 	int space_left;
2909 	int i;
2910 	uint_t *namlen = NULL;
2911 
2912 	vap = NULL;
2913 
2914 	vp = nfs3_fhtovp(&args->dir, exi);
2915 	if (vp == NULL) {
2916 		error = ESTALE;
2917 		goto out;
2918 	}
2919 
2920 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2921 
2922 #ifdef DEBUG
2923 	if (rfs3_do_pre_op_attr) {
2924 		va.va_mask = AT_ALL;
2925 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2926 	} else
2927 		vap = NULL;
2928 #else
2929 	va.va_mask = AT_ALL;
2930 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
2931 #endif
2932 
2933 	if (vp->v_type != VDIR) {
2934 		error = ENOTDIR;
2935 		goto out;
2936 	}
2937 
2938 	error = VOP_ACCESS(vp, VREAD, 0, cr);
2939 	if (error)
2940 		goto out;
2941 
2942 	/*
2943 	 * Don't allow arbitrary counts for allocation
2944 	 */
2945 	if (args->maxcount > rfs3_tsize(req))
2946 		args->maxcount = rfs3_tsize(req);
2947 
2948 	/*
2949 	 * Make sure that there is room to read at least one entry
2950 	 * if any are available
2951 	 */
2952 	args->dircount = MIN(args->dircount, args->maxcount);
2953 
2954 	if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
2955 		args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
2956 
2957 	/*
2958 	 * This allocation relies on a minimum directory entry
2959 	 * being roughly 24 bytes.  Therefore, the namlen array
2960 	 * will have enough space based on the maximum number of
2961 	 * entries to read.
2962 	 */
2963 	namlen = kmem_alloc(args->dircount, KM_SLEEP);
2964 
2965 	space_left = args->dircount;
2966 	data = kmem_alloc(args->dircount, KM_SLEEP);
2967 	dp = (struct dirent64 *)data;
2968 	uio.uio_iov = &iov;
2969 	uio.uio_iovcnt = 1;
2970 	uio.uio_segflg = UIO_SYSSPACE;
2971 	uio.uio_extflg = UIO_COPY_CACHED;
2972 	uio.uio_loffset = (offset_t)args->cookie;
2973 
2974 	/*
2975 	 * bufsize is used to keep track of the size of the response as we
2976 	 * get post op attributes and filehandles for each entry.  This is
2977 	 * an optimization as the server may have read more entries than will
2978 	 * fit in the buffer specified by maxcount.  We stop calculating
2979 	 * post op attributes and filehandles once we have exceeded maxcount.
2980 	 * This will minimize the effect of truncation.
2981 	 *
2982 	 * It is primed with:
2983 	 *	1 for the status +
2984 	 *	1 for the dir_attributes.attributes boolean +
2985 	 *	2 for the cookie verifier
2986 	 * all times BYTES_PER_XDR_UNIT to convert from XDR units
2987 	 * to bytes.  If there are directory attributes to be
2988 	 * returned, then:
2989 	 *	NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
2990 	 * time BYTES_PER_XDR_UNIT is added to account for them.
2991 	 */
2992 	bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
2993 	if (vap != NULL)
2994 		bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
2995 
2996 getmoredents:
2997 	/*
2998 	 * Here we make a check so that our read unit is not larger than
2999 	 * the space left in the buffer.
3000 	 */
3001 	rd_unit = MIN(rd_unit, space_left);
3002 	iov.iov_base = (char *)dp;
3003 	iov.iov_len = rd_unit;
3004 	uio.uio_resid = rd_unit;
3005 	prev_len = rd_unit;
3006 
3007 	error = VOP_READDIR(vp, &uio, cr, &iseof);
3008 
3009 	if (error) {
3010 		kmem_free(data, args->dircount);
3011 		goto out;
3012 	}
3013 
3014 	if (uio.uio_resid == prev_len && !iseof) {
3015 		if (nents == 0) {
3016 			kmem_free(data, args->dircount);
3017 			resp->status = NFS3ERR_TOOSMALL;
3018 			goto out1;
3019 		}
3020 
3021 		/*
3022 		 * We could not get any more entries, so get the attributes
3023 		 * and filehandle for the entries already obtained.
3024 		 */
3025 		goto good;
3026 	}
3027 
3028 	/*
3029 	 * We estimate the size of the response by assuming the
3030 	 * entry exists and attributes and filehandle are also valid
3031 	 */
3032 	for (size = prev_len - uio.uio_resid;
3033 		size > 0;
3034 		size -= dp->d_reclen, dp = nextdp(dp)) {
3035 
3036 		if (dp->d_ino == 0) {
3037 			nents++;
3038 			continue;
3039 		}
3040 
3041 		namlen[nents] = strlen(dp->d_name);
3042 		entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3043 
3044 		/*
3045 		 * We need to check to see if the number of bytes left
3046 		 * to go into the buffer will actually fit into the
3047 		 * buffer.  This is calculated as the size of this
3048 		 * entry plus:
3049 		 *	1 for the true/false list indicator +
3050 		 *	1 for the eof indicator
3051 		 * times BYTES_PER_XDR_UNIT to convert from XDR units
3052 		 * to bytes.
3053 		 *
3054 		 * Also check the dircount limit against the first entry read
3055 		 *
3056 		 */
3057 		tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3058 		if (bufsize + tofit > args->maxcount) {
3059 			/*
3060 			 * We make a check here to see if this was the
3061 			 * first entry being measured.  If so, then maxcount
3062 			 * was too small to begin with and so we need to
3063 			 * return with NFS3ERR_TOOSMALL.
3064 			 */
3065 			if (nents == 0) {
3066 				kmem_free(data, args->dircount);
3067 				resp->status = NFS3ERR_TOOSMALL;
3068 				goto out1;
3069 			}
3070 			iseof = FALSE;
3071 			goto good;
3072 		}
3073 		bufsize += entrysize;
3074 		nents++;
3075 	}
3076 
3077 	/*
3078 	 * If there is enough room to fit at least 1 more entry including
3079 	 * post op attributes and filehandle in the buffer AND that we haven't
3080 	 * exceeded dircount then go back and get some more.
3081 	 */
3082 	if (!iseof &&
3083 	    (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3084 		space_left -= (prev_len - uio.uio_resid);
3085 		if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3086 			goto getmoredents;
3087 
3088 		/* else, fall through */
3089 	}
3090 
3091 good:
3092 
3093 #ifdef DEBUG
3094 	if (rfs3_do_post_op_attr) {
3095 		va.va_mask = AT_ALL;
3096 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3097 	} else
3098 		vap = NULL;
3099 #else
3100 	va.va_mask = AT_ALL;
3101 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3102 #endif
3103 
3104 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3105 
3106 	infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3107 	resp->resok.infop = infop;
3108 
3109 	dp = (struct dirent64 *)data;
3110 	for (i = 0; i < nents; i++) {
3111 
3112 		if (dp->d_ino == 0) {
3113 			infop[i].attr.attributes = FALSE;
3114 			infop[i].fh.handle_follows = FALSE;
3115 			dp = nextdp(dp);
3116 			continue;
3117 		}
3118 
3119 		infop[i].namelen = namlen[i];
3120 
3121 		error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr);
3122 		if (error) {
3123 			infop[i].attr.attributes = FALSE;
3124 			infop[i].fh.handle_follows = FALSE;
3125 			dp = nextdp(dp);
3126 			continue;
3127 		}
3128 		VN_SETPATH(rootdir, vp, nvp, dp->d_name, strlen(dp->d_name));
3129 
3130 #ifdef DEBUG
3131 		if (rfs3_do_post_op_attr) {
3132 			nva.va_mask = AT_ALL;
3133 			nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ?
3134 				NULL : &nva;
3135 		} else
3136 			nvap = NULL;
3137 #else
3138 		nva.va_mask = AT_ALL;
3139 		nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3140 #endif
3141 		vattr_to_post_op_attr(nvap, &infop[i].attr);
3142 
3143 #ifdef DEBUG
3144 		if (!rfs3_do_post_op_fh3)
3145 			infop[i].fh.handle_follows = FALSE;
3146 		else {
3147 #endif
3148 		error = makefh3(&infop[i].fh.handle, nvp, exi);
3149 		if (!error)
3150 			infop[i].fh.handle_follows = TRUE;
3151 		else
3152 			infop[i].fh.handle_follows = FALSE;
3153 #ifdef DEBUG
3154 		}
3155 #endif
3156 
3157 		VN_RELE(nvp);
3158 		dp = nextdp(dp);
3159 	}
3160 
3161 #if 0 /* notyet */
3162 	/*
3163 	 * Don't do this.  It causes local disk writes when just
3164 	 * reading the file and the overhead is deemed larger
3165 	 * than the benefit.
3166 	 */
3167 	/*
3168 	 * Force modified metadata out to stable storage.
3169 	 */
3170 	(void) VOP_FSYNC(vp, FNODSYNC, cr);
3171 #endif
3172 
3173 	VN_RELE(vp);
3174 
3175 	kmem_free(namlen, args->dircount);
3176 
3177 	resp->status = NFS3_OK;
3178 	vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3179 	resp->resok.cookieverf = 0;
3180 	resp->resok.reply.entries = (entryplus3 *)data;
3181 	resp->resok.reply.eof = iseof;
3182 	resp->resok.size = nents;
3183 	resp->resok.count = args->dircount;
3184 	resp->resok.maxcount = args->maxcount;
3185 	return;
3186 
3187 out:
3188 	if (curthread->t_flag & T_WOULDBLOCK) {
3189 		curthread->t_flag &= ~T_WOULDBLOCK;
3190 		resp->status = NFS3ERR_JUKEBOX;
3191 	} else
3192 		resp->status = puterrno3(error);
3193 out1:
3194 	if (vp != NULL) {
3195 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3196 		VN_RELE(vp);
3197 	}
3198 
3199 	if (namlen != NULL)
3200 		kmem_free(namlen, args->dircount);
3201 
3202 	vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3203 }
3204 
3205 fhandle_t *
3206 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3207 {
3208 
3209 	return ((fhandle_t *)&args->dir.fh3_u.nfs_fh3_i.fh3_i);
3210 }
3211 
3212 void
3213 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3214 {
3215 
3216 	if (resp->status == NFS3_OK) {
3217 		kmem_free(resp->resok.reply.entries, resp->resok.count);
3218 		kmem_free(resp->resok.infop,
3219 			resp->resok.size * sizeof (struct entryplus3_info));
3220 	}
3221 }
3222 
3223 /* ARGSUSED */
3224 void
3225 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3226 	struct svc_req *req, cred_t *cr)
3227 {
3228 	int error;
3229 	vnode_t *vp;
3230 	struct vattr *vap;
3231 	struct vattr va;
3232 	struct statvfs64 sb;
3233 
3234 	vap = NULL;
3235 
3236 	vp = nfs3_fhtovp(&args->fsroot, exi);
3237 	if (vp == NULL) {
3238 		error = ESTALE;
3239 		goto out;
3240 	}
3241 
3242 	error = VFS_STATVFS(vp->v_vfsp, &sb);
3243 
3244 #ifdef DEBUG
3245 	if (rfs3_do_post_op_attr) {
3246 		va.va_mask = AT_ALL;
3247 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3248 	} else
3249 		vap = NULL;
3250 #else
3251 	va.va_mask = AT_ALL;
3252 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3253 #endif
3254 
3255 	VN_RELE(vp);
3256 
3257 	if (error)
3258 		goto out;
3259 
3260 	resp->status = NFS3_OK;
3261 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3262 	if (sb.f_blocks != (fsblkcnt64_t)-1)
3263 		resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3264 	else
3265 		resp->resok.tbytes = (size3)sb.f_blocks;
3266 	if (sb.f_bfree != (fsblkcnt64_t)-1)
3267 		resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3268 	else
3269 		resp->resok.fbytes = (size3)sb.f_bfree;
3270 	if (sb.f_bavail != (fsblkcnt64_t)-1)
3271 		resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3272 	else
3273 		resp->resok.abytes = (size3)sb.f_bavail;
3274 	resp->resok.tfiles = (size3)sb.f_files;
3275 	resp->resok.ffiles = (size3)sb.f_ffree;
3276 	resp->resok.afiles = (size3)sb.f_favail;
3277 	resp->resok.invarsec = 0;
3278 	return;
3279 
3280 out:
3281 	if (curthread->t_flag & T_WOULDBLOCK) {
3282 		curthread->t_flag &= ~T_WOULDBLOCK;
3283 		resp->status = NFS3ERR_JUKEBOX;
3284 	} else
3285 		resp->status = puterrno3(error);
3286 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3287 }
3288 
3289 fhandle_t *
3290 rfs3_fsstat_getfh(FSSTAT3args *args)
3291 {
3292 
3293 	return ((fhandle_t *)&args->fsroot.fh3_u.nfs_fh3_i.fh3_i);
3294 }
3295 
3296 /* ARGSUSED */
3297 void
3298 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3299 	struct svc_req *req, cred_t *cr)
3300 {
3301 	vnode_t *vp;
3302 	struct vattr *vap;
3303 	struct vattr va;
3304 	uint32_t xfer_size;
3305 	ulong_t l = 0;
3306 	int error;
3307 
3308 	vp = nfs3_fhtovp(&args->fsroot, exi);
3309 	if (vp == NULL) {
3310 		if (curthread->t_flag & T_WOULDBLOCK) {
3311 			curthread->t_flag &= ~T_WOULDBLOCK;
3312 			resp->status = NFS3ERR_JUKEBOX;
3313 		} else
3314 			resp->status = NFS3ERR_STALE;
3315 		vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3316 		return;
3317 	}
3318 
3319 #ifdef DEBUG
3320 	if (rfs3_do_post_op_attr) {
3321 		va.va_mask = AT_ALL;
3322 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3323 	} else
3324 		vap = NULL;
3325 #else
3326 	va.va_mask = AT_ALL;
3327 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3328 #endif
3329 
3330 	resp->status = NFS3_OK;
3331 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3332 	xfer_size = rfs3_tsize(req);
3333 	resp->resok.rtmax = xfer_size;
3334 	resp->resok.rtpref = xfer_size;
3335 	resp->resok.rtmult = DEV_BSIZE;
3336 	resp->resok.wtmax = xfer_size;
3337 	resp->resok.wtpref = xfer_size;
3338 	resp->resok.wtmult = DEV_BSIZE;
3339 	resp->resok.dtpref = MAXBSIZE;
3340 
3341 	/*
3342 	 * Large file spec: want maxfilesize based on limit of
3343 	 * underlying filesystem.  We can guess 2^31-1 if need be.
3344 	 */
3345 	error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr);
3346 
3347 	VN_RELE(vp);
3348 
3349 	if (!error && l != 0 && l <= 64)
3350 		resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3351 	else
3352 		resp->resok.maxfilesize = MAXOFF32_T;
3353 
3354 	resp->resok.time_delta.seconds = 0;
3355 	resp->resok.time_delta.nseconds = 1000;
3356 	resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3357 	    FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3358 }
3359 
3360 fhandle_t *
3361 rfs3_fsinfo_getfh(FSINFO3args *args)
3362 {
3363 
3364 	return ((fhandle_t *)&args->fsroot.fh3_u.nfs_fh3_i.fh3_i);
3365 }
3366 
3367 /* ARGSUSED */
3368 void
3369 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
3370 	struct svc_req *req, cred_t *cr)
3371 {
3372 	int error;
3373 	vnode_t *vp;
3374 	struct vattr *vap;
3375 	struct vattr va;
3376 	ulong_t val;
3377 
3378 	vap = NULL;
3379 
3380 	vp = nfs3_fhtovp(&args->object, exi);
3381 	if (vp == NULL) {
3382 		error = ESTALE;
3383 		goto out;
3384 	}
3385 
3386 #ifdef DEBUG
3387 	if (rfs3_do_post_op_attr) {
3388 		va.va_mask = AT_ALL;
3389 		vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3390 	} else
3391 		vap = NULL;
3392 #else
3393 	va.va_mask = AT_ALL;
3394 	vap = VOP_GETATTR(vp, &va, 0, cr) ? NULL : &va;
3395 #endif
3396 
3397 	error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr);
3398 	if (error)
3399 		goto out;
3400 	resp->resok.info.link_max = (uint32)val;
3401 
3402 	error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr);
3403 	if (error)
3404 		goto out;
3405 	resp->resok.info.name_max = (uint32)val;
3406 
3407 	error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr);
3408 	if (error)
3409 		goto out;
3410 	if (val == 1)
3411 		resp->resok.info.no_trunc = TRUE;
3412 	else
3413 		resp->resok.info.no_trunc = FALSE;
3414 
3415 	error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr);
3416 	if (error)
3417 		goto out;
3418 	if (val == 1)
3419 		resp->resok.info.chown_restricted = TRUE;
3420 	else
3421 		resp->resok.info.chown_restricted = FALSE;
3422 
3423 	VN_RELE(vp);
3424 
3425 	resp->status = NFS3_OK;
3426 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3427 	resp->resok.info.case_insensitive = FALSE;
3428 	resp->resok.info.case_preserving = TRUE;
3429 	return;
3430 
3431 out:
3432 	if (curthread->t_flag & T_WOULDBLOCK) {
3433 		curthread->t_flag &= ~T_WOULDBLOCK;
3434 		resp->status = NFS3ERR_JUKEBOX;
3435 	} else
3436 		resp->status = puterrno3(error);
3437 	if (vp != NULL)
3438 		VN_RELE(vp);
3439 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3440 }
3441 
3442 fhandle_t *
3443 rfs3_pathconf_getfh(PATHCONF3args *args)
3444 {
3445 
3446 	return ((fhandle_t *)&args->object.fh3_u.nfs_fh3_i.fh3_i);
3447 }
3448 
3449 void
3450 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
3451 	struct svc_req *req, cred_t *cr)
3452 {
3453 	int error;
3454 	vnode_t *vp;
3455 	struct vattr *bvap;
3456 	struct vattr bva;
3457 	struct vattr *avap;
3458 	struct vattr ava;
3459 
3460 	bvap = NULL;
3461 	avap = NULL;
3462 
3463 	vp = nfs3_fhtovp(&args->file, exi);
3464 	if (vp == NULL) {
3465 		error = ESTALE;
3466 		goto out;
3467 	}
3468 
3469 	bva.va_mask = AT_ALL;
3470 	error = VOP_GETATTR(vp, &bva, 0, cr);
3471 
3472 	/*
3473 	 * If we can't get the attributes, then we can't do the
3474 	 * right access checking.  So, we'll fail the request.
3475 	 */
3476 	if (error)
3477 		goto out;
3478 
3479 #ifdef DEBUG
3480 	if (rfs3_do_pre_op_attr)
3481 		bvap = &bva;
3482 	else
3483 		bvap = NULL;
3484 #else
3485 	bvap = &bva;
3486 #endif
3487 
3488 	if (rdonly(exi, req)) {
3489 		resp->status = NFS3ERR_ROFS;
3490 		goto out1;
3491 	}
3492 
3493 	if (vp->v_type != VREG) {
3494 		resp->status = NFS3ERR_INVAL;
3495 		goto out1;
3496 	}
3497 
3498 	if (crgetuid(cr) != bva.va_uid &&
3499 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr)))
3500 		goto out;
3501 
3502 	error = VOP_PUTPAGE(vp, args->offset, args->count, 0, cr);
3503 	if (!error)
3504 		error = VOP_FSYNC(vp, FNODSYNC, cr);
3505 
3506 #ifdef DEBUG
3507 	if (rfs3_do_post_op_attr) {
3508 		ava.va_mask = AT_ALL;
3509 		avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
3510 	} else
3511 		avap = NULL;
3512 #else
3513 	ava.va_mask = AT_ALL;
3514 	avap = VOP_GETATTR(vp, &ava, 0, cr) ? NULL : &ava;
3515 #endif
3516 
3517 	if (error)
3518 		goto out;
3519 
3520 	VN_RELE(vp);
3521 
3522 	resp->status = NFS3_OK;
3523 	vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
3524 	resp->resok.verf = write3verf;
3525 	return;
3526 
3527 out:
3528 	if (curthread->t_flag & T_WOULDBLOCK) {
3529 		curthread->t_flag &= ~T_WOULDBLOCK;
3530 		resp->status = NFS3ERR_JUKEBOX;
3531 	} else
3532 		resp->status = puterrno3(error);
3533 out1:
3534 	if (vp != NULL)
3535 		VN_RELE(vp);
3536 	vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
3537 }
3538 
3539 fhandle_t *
3540 rfs3_commit_getfh(COMMIT3args *args)
3541 {
3542 
3543 	return ((fhandle_t *)&args->file.fh3_u.nfs_fh3_i.fh3_i);
3544 }
3545 
3546 static int
3547 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
3548 {
3549 
3550 	vap->va_mask = 0;
3551 
3552 	if (sap->mode.set_it) {
3553 		vap->va_mode = (mode_t)sap->mode.mode;
3554 		vap->va_mask |= AT_MODE;
3555 	}
3556 	if (sap->uid.set_it) {
3557 		vap->va_uid = (uid_t)sap->uid.uid;
3558 		vap->va_mask |= AT_UID;
3559 	}
3560 	if (sap->gid.set_it) {
3561 		vap->va_gid = (gid_t)sap->gid.gid;
3562 		vap->va_mask |= AT_GID;
3563 	}
3564 	if (sap->size.set_it) {
3565 		if (sap->size.size > (size3)((u_longlong_t)-1))
3566 			return (EINVAL);
3567 		vap->va_size = sap->size.size;
3568 		vap->va_mask |= AT_SIZE;
3569 	}
3570 	if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
3571 #ifndef _LP64
3572 		/* check time validity */
3573 		if (!NFS3_TIME_OK(sap->atime.atime.seconds))
3574 			return (EOVERFLOW);
3575 #endif
3576 		/*
3577 		 * nfs protocol defines times as unsigned so don't extend sign,
3578 		 * unless sysadmin set nfs_allow_preepoch_time.
3579 		 */
3580 		NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
3581 			sap->atime.atime.seconds);
3582 		vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
3583 		vap->va_mask |= AT_ATIME;
3584 	} else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
3585 		gethrestime(&vap->va_atime);
3586 		vap->va_mask |= AT_ATIME;
3587 	}
3588 	if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
3589 #ifndef _LP64
3590 		/* check time validity */
3591 		if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
3592 			return (EOVERFLOW);
3593 #endif
3594 		/*
3595 		 * nfs protocol defines times as unsigned so don't extend sign,
3596 		 * unless sysadmin set nfs_allow_preepoch_time.
3597 		 */
3598 		NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
3599 			sap->mtime.mtime.seconds);
3600 		vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
3601 		vap->va_mask |= AT_MTIME;
3602 	} else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
3603 		gethrestime(&vap->va_mtime);
3604 		vap->va_mask |= AT_MTIME;
3605 	}
3606 
3607 	return (0);
3608 }
3609 
3610 static ftype3 vt_to_nf3[] = {
3611 	0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
3612 };
3613 
3614 static int
3615 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
3616 {
3617 
3618 	ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
3619 	/* Return error if time or size overflow */
3620 	if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
3621 		return (EOVERFLOW);
3622 	}
3623 	fap->type = vt_to_nf3[vap->va_type];
3624 	fap->mode = (mode3)(vap->va_mode & MODEMASK);
3625 	fap->nlink = (uint32)vap->va_nlink;
3626 	if (vap->va_uid == UID_NOBODY)
3627 		fap->uid = (uid3)NFS_UID_NOBODY;
3628 	else
3629 		fap->uid = (uid3)vap->va_uid;
3630 	if (vap->va_gid == GID_NOBODY)
3631 		fap->gid = (gid3)NFS_GID_NOBODY;
3632 	else
3633 		fap->gid = (gid3)vap->va_gid;
3634 	fap->size = (size3)vap->va_size;
3635 	fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
3636 	fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
3637 	fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
3638 	fap->fsid = (uint64)vap->va_fsid;
3639 	fap->fileid = (fileid3)vap->va_nodeid;
3640 	fap->atime.seconds = vap->va_atime.tv_sec;
3641 	fap->atime.nseconds = vap->va_atime.tv_nsec;
3642 	fap->mtime.seconds = vap->va_mtime.tv_sec;
3643 	fap->mtime.nseconds = vap->va_mtime.tv_nsec;
3644 	fap->ctime.seconds = vap->va_ctime.tv_sec;
3645 	fap->ctime.nseconds = vap->va_ctime.tv_nsec;
3646 	return (0);
3647 }
3648 
3649 static int
3650 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
3651 {
3652 
3653 	/* Return error if time or size overflow */
3654 	if (!  (NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
3655 		NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
3656 		NFS3_SIZE_OK(vap->va_size))) {
3657 		return (EOVERFLOW);
3658 	}
3659 	wccap->size = (size3)vap->va_size;
3660 	wccap->mtime.seconds = vap->va_mtime.tv_sec;
3661 	wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
3662 	wccap->ctime.seconds = vap->va_ctime.tv_sec;
3663 	wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
3664 	return (0);
3665 }
3666 
3667 static void
3668 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
3669 {
3670 
3671 	/* don't return attrs if time overflow */
3672 	if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
3673 		poap->attributes = TRUE;
3674 	} else
3675 		poap->attributes = FALSE;
3676 }
3677 
3678 void
3679 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
3680 {
3681 
3682 	/* don't return attrs if time overflow */
3683 	if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
3684 		poap->attributes = TRUE;
3685 	} else
3686 		poap->attributes = FALSE;
3687 }
3688 
3689 static void
3690 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
3691 {
3692 
3693 	vattr_to_pre_op_attr(bvap, &wccp->before);
3694 	vattr_to_post_op_attr(avap, &wccp->after);
3695 }
3696 
3697 void
3698 rfs3_srvrinit(void)
3699 {
3700 	struct rfs3_verf_overlay {
3701 		uint_t id; /* a "unique" identifier */
3702 		int ts; /* a unique timestamp */
3703 	} *verfp;
3704 	timestruc_t now;
3705 
3706 	/*
3707 	 * The following algorithm attempts to find a unique verifier
3708 	 * to be used as the write verifier returned from the server
3709 	 * to the client.  It is important that this verifier change
3710 	 * whenever the server reboots.  Of secondary importance, it
3711 	 * is important for the verifier to be unique between two
3712 	 * different servers.
3713 	 *
3714 	 * Thus, an attempt is made to use the system hostid and the
3715 	 * current time in seconds when the nfssrv kernel module is
3716 	 * loaded.  It is assumed that an NFS server will not be able
3717 	 * to boot and then to reboot in less than a second.  If the
3718 	 * hostid has not been set, then the current high resolution
3719 	 * time is used.  This will ensure different verifiers each
3720 	 * time the server reboots and minimize the chances that two
3721 	 * different servers will have the same verifier.
3722 	 */
3723 
3724 #ifndef	lint
3725 	/*
3726 	 * We ASSERT that this constant logic expression is
3727 	 * always true because in the past, it wasn't.
3728 	 */
3729 	ASSERT(sizeof (*verfp) <= sizeof (write3verf));
3730 #endif
3731 
3732 	gethrestime(&now);
3733 	verfp = (struct rfs3_verf_overlay *)&write3verf;
3734 	verfp->ts = (int)now.tv_sec;
3735 	verfp->id = (uint_t)nfs_atoi(hw_serial);
3736 
3737 	if (verfp->id == 0)
3738 		verfp->id = (uint_t)now.tv_nsec;
3739 
3740 }
3741 
3742 void
3743 rfs3_srvrfini(void)
3744 {
3745 	/* Nothing to do */
3746 }
3747