xref: /titanic_44/usr/src/uts/common/fs/nfs/nfs3_srv.c (revision 98157a7002f4f2cf7978f3084ca5577f0a1d72b2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28 
29 #pragma ident	"%Z%%M%	%I%	%E% SMI"
30 
31 #include <sys/param.h>
32 #include <sys/types.h>
33 #include <sys/systm.h>
34 #include <sys/cred.h>
35 #include <sys/buf.h>
36 #include <sys/vfs.h>
37 #include <sys/vnode.h>
38 #include <sys/uio.h>
39 #include <sys/errno.h>
40 #include <sys/sysmacros.h>
41 #include <sys/statvfs.h>
42 #include <sys/kmem.h>
43 #include <sys/dirent.h>
44 #include <sys/cmn_err.h>
45 #include <sys/debug.h>
46 #include <sys/systeminfo.h>
47 #include <sys/flock.h>
48 #include <sys/nbmlock.h>
49 #include <sys/policy.h>
50 #include <sys/sdt.h>
51 
52 #include <rpc/types.h>
53 #include <rpc/auth.h>
54 #include <rpc/svc.h>
55 
56 #include <nfs/nfs.h>
57 #include <nfs/export.h>
58 
59 #include <sys/strsubr.h>
60 
61 #include <sys/tsol/label.h>
62 #include <sys/tsol/tndb.h>
63 
64 #include <inet/ip.h>
65 #include <inet/ip6.h>
66 
67 /*
68  * These are the interface routines for the server side of the
69  * Network File System.  See the NFS version 3 protocol specification
70  * for a description of this interface.
71  */
72 
73 #ifdef DEBUG
74 int rfs3_do_pre_op_attr = 1;
75 int rfs3_do_post_op_attr = 1;
76 int rfs3_do_post_op_fh3 = 1;
77 #endif
78 
79 static writeverf3 write3verf;
80 
81 static int	sattr3_to_vattr(sattr3 *, struct vattr *);
82 static int	vattr_to_fattr3(struct vattr *, fattr3 *);
83 static int	vattr_to_wcc_attr(struct vattr *, wcc_attr *);
84 static void	vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
85 static void	vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
86 
87 u_longlong_t nfs3_srv_caller_id;
88 
89 /* ARGSUSED */
90 void
91 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
92 	struct svc_req *req, cred_t *cr)
93 {
94 	int error;
95 	vnode_t *vp;
96 	struct vattr va;
97 
98 	vp = nfs3_fhtovp(&args->object, exi);
99 
100 	DTRACE_NFSV3_4(op__getattr__start, struct svc_req *, req,
101 	    cred_t *, cr, vnode_t *, vp, GETATTR3args *, args);
102 
103 	if (vp == NULL) {
104 		error = ESTALE;
105 		goto out;
106 	}
107 
108 	va.va_mask = AT_ALL;
109 	error = rfs4_delegated_getattr(vp, &va, 0, cr);
110 
111 	if (!error) {
112 		/* overflow error if time or size is out of range */
113 		error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
114 		if (error)
115 			goto out;
116 		resp->status = NFS3_OK;
117 
118 		DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
119 		    cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
120 
121 		VN_RELE(vp);
122 
123 		return;
124 	}
125 
126 out:
127 	if (curthread->t_flag & T_WOULDBLOCK) {
128 		curthread->t_flag &= ~T_WOULDBLOCK;
129 		resp->status = NFS3ERR_JUKEBOX;
130 	} else
131 		resp->status = puterrno3(error);
132 
133 	DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
134 	    cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
135 
136 	if (vp != NULL)
137 		VN_RELE(vp);
138 }
139 
140 void *
141 rfs3_getattr_getfh(GETATTR3args *args)
142 {
143 
144 	return (&args->object);
145 }
146 
147 void
148 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
149 	struct svc_req *req, cred_t *cr)
150 {
151 	int error;
152 	vnode_t *vp;
153 	struct vattr *bvap;
154 	struct vattr bva;
155 	struct vattr *avap;
156 	struct vattr ava;
157 	int flag;
158 	int in_crit = 0;
159 	struct flock64 bf;
160 	caller_context_t ct;
161 
162 	bvap = NULL;
163 	avap = NULL;
164 
165 	vp = nfs3_fhtovp(&args->object, exi);
166 
167 	DTRACE_NFSV3_4(op__setattr__start, struct svc_req *, req,
168 	    cred_t *, cr, vnode_t *, vp, SETATTR3args *, args);
169 
170 	if (vp == NULL) {
171 		error = ESTALE;
172 		goto out;
173 	}
174 
175 	error = sattr3_to_vattr(&args->new_attributes, &ava);
176 	if (error)
177 		goto out;
178 
179 	if (is_system_labeled()) {
180 		bslabel_t *clabel = req->rq_label;
181 
182 		ASSERT(clabel != NULL);
183 		DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
184 		    "got client label from request(1)", struct svc_req *, req);
185 
186 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
187 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK)) {
188 				resp->status = NFS3ERR_ACCES;
189 				goto out1;
190 			}
191 		}
192 	}
193 
194 	/*
195 	 * We need to specially handle size changes because of
196 	 * possible conflicting NBMAND locks. Get into critical
197 	 * region before VOP_GETATTR, so the size attribute is
198 	 * valid when checking conflicts.
199 	 *
200 	 * Also, check to see if the v4 side of the server has
201 	 * delegated this file.  If so, then we return JUKEBOX to
202 	 * allow the client to retrasmit its request.
203 	 */
204 	if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
205 		if (nbl_need_check(vp)) {
206 			nbl_start_crit(vp, RW_READER);
207 			in_crit = 1;
208 		}
209 	}
210 
211 	bva.va_mask = AT_ALL;
212 	error = rfs4_delegated_getattr(vp, &bva, 0, cr);
213 
214 	/*
215 	 * If we can't get the attributes, then we can't do the
216 	 * right access checking.  So, we'll fail the request.
217 	 */
218 	if (error)
219 		goto out;
220 
221 #ifdef DEBUG
222 	if (rfs3_do_pre_op_attr)
223 		bvap = &bva;
224 #else
225 	bvap = &bva;
226 #endif
227 
228 	if (rdonly(exi, req) || vn_is_readonly(vp)) {
229 		resp->status = NFS3ERR_ROFS;
230 		goto out1;
231 	}
232 
233 	if (args->guard.check &&
234 	    (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
235 	    args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
236 		resp->status = NFS3ERR_NOT_SYNC;
237 		goto out1;
238 	}
239 
240 	if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
241 		flag = ATTR_UTIME;
242 	else
243 		flag = 0;
244 
245 	/*
246 	 * If the filesystem is exported with nosuid, then mask off
247 	 * the setuid and setgid bits.
248 	 */
249 	if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
250 	    (exi->exi_export.ex_flags & EX_NOSUID))
251 		ava.va_mode &= ~(VSUID | VSGID);
252 
253 	ct.cc_sysid = 0;
254 	ct.cc_pid = 0;
255 	ct.cc_caller_id = nfs3_srv_caller_id;
256 	ct.cc_flags = CC_DONTBLOCK;
257 
258 	/*
259 	 * We need to specially handle size changes because it is
260 	 * possible for the client to create a file with modes
261 	 * which indicate read-only, but with the file opened for
262 	 * writing.  If the client then tries to set the size of
263 	 * the file, then the normal access checking done in
264 	 * VOP_SETATTR would prevent the client from doing so,
265 	 * although it should be legal for it to do so.  To get
266 	 * around this, we do the access checking for ourselves
267 	 * and then use VOP_SPACE which doesn't do the access
268 	 * checking which VOP_SETATTR does. VOP_SPACE can only
269 	 * operate on VREG files, let VOP_SETATTR handle the other
270 	 * extremely rare cases.
271 	 * Also the client should not be allowed to change the
272 	 * size of the file if there is a conflicting non-blocking
273 	 * mandatory lock in the region the change.
274 	 */
275 	if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
276 		if (in_crit) {
277 			u_offset_t offset;
278 			ssize_t length;
279 
280 			if (ava.va_size < bva.va_size) {
281 				offset = ava.va_size;
282 				length = bva.va_size - ava.va_size;
283 			} else {
284 				offset = bva.va_size;
285 				length = ava.va_size - bva.va_size;
286 			}
287 			if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
288 			    NULL)) {
289 				error = EACCES;
290 				goto out;
291 			}
292 		}
293 
294 		if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
295 			ava.va_mask &= ~AT_SIZE;
296 			bf.l_type = F_WRLCK;
297 			bf.l_whence = 0;
298 			bf.l_start = (off64_t)ava.va_size;
299 			bf.l_len = 0;
300 			bf.l_sysid = 0;
301 			bf.l_pid = 0;
302 			error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
303 			    (offset_t)ava.va_size, cr, &ct);
304 		}
305 	}
306 
307 	if (!error && ava.va_mask)
308 		error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
309 
310 	/* check if a monitor detected a delegation conflict */
311 	if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
312 		resp->status = NFS3ERR_JUKEBOX;
313 		goto out1;
314 	}
315 
316 #ifdef DEBUG
317 	if (rfs3_do_post_op_attr) {
318 		ava.va_mask = AT_ALL;
319 		avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
320 	} else
321 		avap = NULL;
322 #else
323 	ava.va_mask = AT_ALL;
324 	avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
325 #endif
326 
327 	/*
328 	 * Force modified metadata out to stable storage.
329 	 */
330 	(void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
331 
332 	if (error)
333 		goto out;
334 
335 	if (in_crit)
336 		nbl_end_crit(vp);
337 
338 	resp->status = NFS3_OK;
339 	vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
340 
341 	DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
342 	    cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
343 
344 	VN_RELE(vp);
345 
346 	return;
347 
348 out:
349 	if (curthread->t_flag & T_WOULDBLOCK) {
350 		curthread->t_flag &= ~T_WOULDBLOCK;
351 		resp->status = NFS3ERR_JUKEBOX;
352 	} else
353 		resp->status = puterrno3(error);
354 out1:
355 	DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
356 	    cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
357 
358 	if (vp != NULL) {
359 		if (in_crit)
360 			nbl_end_crit(vp);
361 		VN_RELE(vp);
362 	}
363 	vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
364 }
365 
366 void *
367 rfs3_setattr_getfh(SETATTR3args *args)
368 {
369 
370 	return (&args->object);
371 }
372 
373 /* ARGSUSED */
374 void
375 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
376 	struct svc_req *req, cred_t *cr)
377 {
378 	int error;
379 	vnode_t *vp;
380 	vnode_t *dvp;
381 	struct vattr *vap;
382 	struct vattr va;
383 	struct vattr *dvap;
384 	struct vattr dva;
385 	nfs_fh3 *fhp;
386 	struct sec_ol sec = {0, 0};
387 	bool_t publicfh_flag = FALSE, auth_weak = FALSE;
388 
389 	dvap = NULL;
390 
391 	/*
392 	 * Allow lookups from the root - the default
393 	 * location of the public filehandle.
394 	 */
395 	if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
396 		dvp = rootdir;
397 		VN_HOLD(dvp);
398 
399 		DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
400 		    cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
401 	} else {
402 		dvp = nfs3_fhtovp(&args->what.dir, exi);
403 
404 		DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
405 		    cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
406 
407 		if (dvp == NULL) {
408 			error = ESTALE;
409 			goto out;
410 		}
411 	}
412 
413 #ifdef DEBUG
414 	if (rfs3_do_pre_op_attr) {
415 		dva.va_mask = AT_ALL;
416 		dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
417 	}
418 #else
419 	dva.va_mask = AT_ALL;
420 	dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
421 #endif
422 
423 	if (args->what.name == nfs3nametoolong) {
424 		resp->status = NFS3ERR_NAMETOOLONG;
425 		goto out1;
426 	}
427 
428 	if (args->what.name == NULL || *(args->what.name) == '\0') {
429 		resp->status = NFS3ERR_ACCES;
430 		goto out1;
431 	}
432 
433 	fhp = &args->what.dir;
434 	if (strcmp(args->what.name, "..") == 0 &&
435 	    EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
436 		resp->status = NFS3ERR_NOENT;
437 		goto out1;
438 	}
439 
440 	/*
441 	 * If the public filehandle is used then allow
442 	 * a multi-component lookup
443 	 */
444 	if (PUBLIC_FH3(&args->what.dir)) {
445 		publicfh_flag = TRUE;
446 		error = rfs_publicfh_mclookup(args->what.name, dvp, cr, &vp,
447 		    &exi, &sec);
448 		if (error && exi != NULL)
449 			exi_rele(exi); /* See comment below Re: publicfh_flag */
450 		/*
451 		 * Since WebNFS may bypass MOUNT, we need to ensure this
452 		 * request didn't come from an unlabeled admin_low client.
453 		 */
454 		if (is_system_labeled() && error == 0) {
455 			struct sockaddr *ca;
456 			int		addr_type;
457 			void		*ipaddr;
458 			tsol_tpc_t	*tp;
459 
460 			ca = (struct sockaddr *)svc_getrpccaller(
461 			    req->rq_xprt)->buf;
462 			if (ca->sa_family == AF_INET) {
463 				addr_type = IPV4_VERSION;
464 				ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
465 			} else if (ca->sa_family == AF_INET6) {
466 				addr_type = IPV6_VERSION;
467 				ipaddr = &((struct sockaddr_in6 *)
468 				    ca)->sin6_addr;
469 			}
470 			tp = find_tpc(ipaddr, addr_type, B_FALSE);
471 			if (tp == NULL || tp->tpc_tp.tp_doi !=
472 			    l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
473 			    SUN_CIPSO) {
474 				if (exi != NULL)
475 					exi_rele(exi);
476 				VN_RELE(vp);
477 				resp->status = NFS3ERR_ACCES;
478 				error = 1;
479 			}
480 			if (tp != NULL)
481 				TPC_RELE(tp);
482 		}
483 	} else {
484 		error = VOP_LOOKUP(dvp, args->what.name, &vp,
485 		    NULL, 0, NULL, cr, NULL, NULL, NULL);
486 	}
487 
488 	if (is_system_labeled() && error == 0) {
489 		bslabel_t *clabel = req->rq_label;
490 
491 		ASSERT(clabel != NULL);
492 		DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
493 		    "got client label from request(1)", struct svc_req *, req);
494 
495 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
496 			if (!do_rfs_label_check(clabel, dvp,
497 			    DOMINANCE_CHECK)) {
498 				if (publicfh_flag && exi != NULL)
499 					exi_rele(exi);
500 				VN_RELE(vp);
501 				resp->status = NFS3ERR_ACCES;
502 				error = 1;
503 			}
504 		}
505 	}
506 
507 #ifdef DEBUG
508 	if (rfs3_do_post_op_attr) {
509 		dva.va_mask = AT_ALL;
510 		dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
511 	} else
512 		dvap = NULL;
513 #else
514 	dva.va_mask = AT_ALL;
515 	dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
516 #endif
517 
518 	if (error)
519 		goto out;
520 
521 	if (sec.sec_flags & SEC_QUERY) {
522 		error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
523 	} else {
524 		error = makefh3(&resp->resok.object, vp, exi);
525 		if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
526 			auth_weak = TRUE;
527 	}
528 
529 	if (error) {
530 		VN_RELE(vp);
531 		goto out;
532 	}
533 
534 	/*
535 	 * If publicfh_flag is true then we have called rfs_publicfh_mclookup
536 	 * and have obtained a new exportinfo in exi which needs to be
537 	 * released. Note the the original exportinfo pointed to by exi
538 	 * will be released by the caller, common_dispatch.
539 	 */
540 	if (publicfh_flag)
541 		exi_rele(exi);
542 
543 #ifdef DEBUG
544 	if (rfs3_do_post_op_attr) {
545 		va.va_mask = AT_ALL;
546 		vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
547 	} else
548 		vap = NULL;
549 #else
550 	va.va_mask = AT_ALL;
551 	vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
552 #endif
553 
554 	VN_RELE(vp);
555 
556 	resp->status = NFS3_OK;
557 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
558 	vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
559 
560 	/*
561 	 * If it's public fh, no 0x81, and client's flavor is
562 	 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
563 	 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
564 	 */
565 	if (auth_weak)
566 		resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
567 
568 	DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
569 	    cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
570 	VN_RELE(dvp);
571 
572 	return;
573 
574 out:
575 	if (curthread->t_flag & T_WOULDBLOCK) {
576 		curthread->t_flag &= ~T_WOULDBLOCK;
577 		resp->status = NFS3ERR_JUKEBOX;
578 	} else
579 		resp->status = puterrno3(error);
580 out1:
581 	DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
582 	    cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
583 
584 	if (dvp != NULL)
585 		VN_RELE(dvp);
586 	vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
587 
588 }
589 
590 void *
591 rfs3_lookup_getfh(LOOKUP3args *args)
592 {
593 
594 	return (&args->what.dir);
595 }
596 
597 /* ARGSUSED */
598 void
599 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
600 	struct svc_req *req, cred_t *cr)
601 {
602 	int error;
603 	vnode_t *vp;
604 	struct vattr *vap;
605 	struct vattr va;
606 	int checkwriteperm;
607 	boolean_t dominant_label = B_FALSE;
608 	boolean_t equal_label = B_FALSE;
609 	boolean_t admin_low_client;
610 
611 	vap = NULL;
612 
613 	vp = nfs3_fhtovp(&args->object, exi);
614 
615 	DTRACE_NFSV3_4(op__access__start, struct svc_req *, req,
616 	    cred_t *, cr, vnode_t *, vp, ACCESS3args *, args);
617 
618 	if (vp == NULL) {
619 		error = ESTALE;
620 		goto out;
621 	}
622 
623 	/*
624 	 * If the file system is exported read only, it is not appropriate
625 	 * to check write permissions for regular files and directories.
626 	 * Special files are interpreted by the client, so the underlying
627 	 * permissions are sent back to the client for interpretation.
628 	 */
629 	if (rdonly(exi, req) && (vp->v_type == VREG || vp->v_type == VDIR))
630 		checkwriteperm = 0;
631 	else
632 		checkwriteperm = 1;
633 
634 	/*
635 	 * We need the mode so that we can correctly determine access
636 	 * permissions relative to a mandatory lock file.  Access to
637 	 * mandatory lock files is denied on the server, so it might
638 	 * as well be reflected to the server during the open.
639 	 */
640 	va.va_mask = AT_MODE;
641 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
642 	if (error)
643 		goto out;
644 
645 #ifdef DEBUG
646 	if (rfs3_do_post_op_attr)
647 		vap = &va;
648 #else
649 	vap = &va;
650 #endif
651 
652 	resp->resok.access = 0;
653 
654 	if (is_system_labeled()) {
655 		bslabel_t *clabel = req->rq_label;
656 
657 		ASSERT(clabel != NULL);
658 		DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
659 		    "got client label from request(1)", struct svc_req *, req);
660 
661 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
662 			if ((equal_label = do_rfs_label_check(clabel, vp,
663 			    EQUALITY_CHECK)) == B_FALSE) {
664 				dominant_label = do_rfs_label_check(clabel,
665 				    vp, DOMINANCE_CHECK);
666 			} else
667 				dominant_label = B_TRUE;
668 			admin_low_client = B_FALSE;
669 		} else
670 			admin_low_client = B_TRUE;
671 	}
672 
673 	if (args->access & ACCESS3_READ) {
674 		error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
675 		if (error) {
676 			if (curthread->t_flag & T_WOULDBLOCK)
677 				goto out;
678 		} else if (!MANDLOCK(vp, va.va_mode) &&
679 		    (!is_system_labeled() || admin_low_client ||
680 		    dominant_label))
681 			resp->resok.access |= ACCESS3_READ;
682 	}
683 	if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
684 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
685 		if (error) {
686 			if (curthread->t_flag & T_WOULDBLOCK)
687 				goto out;
688 		} else if (!is_system_labeled() || admin_low_client ||
689 		    dominant_label)
690 			resp->resok.access |= ACCESS3_LOOKUP;
691 	}
692 	if (checkwriteperm &&
693 	    (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
694 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
695 		if (error) {
696 			if (curthread->t_flag & T_WOULDBLOCK)
697 				goto out;
698 		} else if (!MANDLOCK(vp, va.va_mode) &&
699 		    (!is_system_labeled() || admin_low_client || equal_label)) {
700 			resp->resok.access |=
701 			    (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
702 		}
703 	}
704 	if (checkwriteperm &&
705 	    (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
706 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
707 		if (error) {
708 			if (curthread->t_flag & T_WOULDBLOCK)
709 				goto out;
710 		} else if (!is_system_labeled() || admin_low_client ||
711 		    equal_label)
712 			resp->resok.access |= ACCESS3_DELETE;
713 	}
714 	if (args->access & ACCESS3_EXECUTE) {
715 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
716 		if (error) {
717 			if (curthread->t_flag & T_WOULDBLOCK)
718 				goto out;
719 		} else if (!MANDLOCK(vp, va.va_mode) &&
720 		    (!is_system_labeled() || admin_low_client ||
721 		    dominant_label))
722 			resp->resok.access |= ACCESS3_EXECUTE;
723 	}
724 
725 #ifdef DEBUG
726 	if (rfs3_do_post_op_attr) {
727 		va.va_mask = AT_ALL;
728 		vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
729 	} else
730 		vap = NULL;
731 #else
732 	va.va_mask = AT_ALL;
733 	vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
734 #endif
735 
736 	resp->status = NFS3_OK;
737 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
738 
739 	DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
740 	    cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
741 
742 	VN_RELE(vp);
743 
744 	return;
745 
746 out:
747 	if (curthread->t_flag & T_WOULDBLOCK) {
748 		curthread->t_flag &= ~T_WOULDBLOCK;
749 		resp->status = NFS3ERR_JUKEBOX;
750 	} else
751 		resp->status = puterrno3(error);
752 	DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
753 	    cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
754 	if (vp != NULL)
755 		VN_RELE(vp);
756 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
757 }
758 
759 void *
760 rfs3_access_getfh(ACCESS3args *args)
761 {
762 
763 	return (&args->object);
764 }
765 
766 /* ARGSUSED */
767 void
768 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
769 	struct svc_req *req, cred_t *cr)
770 {
771 	int error;
772 	vnode_t *vp;
773 	struct vattr *vap;
774 	struct vattr va;
775 	struct iovec iov;
776 	struct uio uio;
777 	char *data;
778 
779 	vap = NULL;
780 
781 	vp = nfs3_fhtovp(&args->symlink, exi);
782 
783 	DTRACE_NFSV3_4(op__readlink__start, struct svc_req *, req,
784 	    cred_t *, cr, vnode_t *, vp, READLINK3args *, args);
785 
786 	if (vp == NULL) {
787 		error = ESTALE;
788 		goto out;
789 	}
790 
791 	va.va_mask = AT_ALL;
792 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
793 	if (error)
794 		goto out;
795 
796 #ifdef DEBUG
797 	if (rfs3_do_post_op_attr)
798 		vap = &va;
799 #else
800 	vap = &va;
801 #endif
802 
803 	if (vp->v_type != VLNK) {
804 		resp->status = NFS3ERR_INVAL;
805 		goto out1;
806 	}
807 
808 	if (MANDLOCK(vp, va.va_mode)) {
809 		resp->status = NFS3ERR_ACCES;
810 		goto out1;
811 	}
812 
813 	if (is_system_labeled()) {
814 		bslabel_t *clabel = req->rq_label;
815 
816 		ASSERT(clabel != NULL);
817 		DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
818 		    "got client label from request(1)", struct svc_req *, req);
819 
820 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
821 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK)) {
822 				resp->status = NFS3ERR_ACCES;
823 				goto out1;
824 			}
825 		}
826 	}
827 
828 	data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
829 
830 	iov.iov_base = data;
831 	iov.iov_len = MAXPATHLEN;
832 	uio.uio_iov = &iov;
833 	uio.uio_iovcnt = 1;
834 	uio.uio_segflg = UIO_SYSSPACE;
835 	uio.uio_extflg = UIO_COPY_CACHED;
836 	uio.uio_loffset = 0;
837 	uio.uio_resid = MAXPATHLEN;
838 
839 	error = VOP_READLINK(vp, &uio, cr, NULL);
840 
841 #ifdef DEBUG
842 	if (rfs3_do_post_op_attr) {
843 		va.va_mask = AT_ALL;
844 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
845 	} else
846 		vap = NULL;
847 #else
848 	va.va_mask = AT_ALL;
849 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
850 #endif
851 
852 #if 0 /* notyet */
853 	/*
854 	 * Don't do this.  It causes local disk writes when just
855 	 * reading the file and the overhead is deemed larger
856 	 * than the benefit.
857 	 */
858 	/*
859 	 * Force modified metadata out to stable storage.
860 	 */
861 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
862 #endif
863 
864 	if (error) {
865 		kmem_free(data, MAXPATHLEN + 1);
866 		goto out;
867 	}
868 
869 	resp->status = NFS3_OK;
870 	vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
871 	resp->resok.data = data;
872 	*(data + MAXPATHLEN - uio.uio_resid) = '\0';
873 
874 	DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
875 	    cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
876 	VN_RELE(vp);
877 
878 	return;
879 
880 out:
881 	if (curthread->t_flag & T_WOULDBLOCK) {
882 		curthread->t_flag &= ~T_WOULDBLOCK;
883 		resp->status = NFS3ERR_JUKEBOX;
884 	} else
885 		resp->status = puterrno3(error);
886 out1:
887 	DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
888 	    cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
889 	if (vp != NULL)
890 		VN_RELE(vp);
891 	vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
892 }
893 
894 void *
895 rfs3_readlink_getfh(READLINK3args *args)
896 {
897 
898 	return (&args->symlink);
899 }
900 
901 void
902 rfs3_readlink_free(READLINK3res *resp)
903 {
904 
905 	if (resp->status == NFS3_OK)
906 		kmem_free(resp->resok.data, MAXPATHLEN + 1);
907 }
908 
909 /* ARGSUSED */
910 void
911 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
912 	struct svc_req *req, cred_t *cr)
913 {
914 	int error;
915 	vnode_t *vp;
916 	struct vattr *vap;
917 	struct vattr va;
918 	struct iovec iov;
919 	struct uio uio;
920 	u_offset_t offset;
921 	mblk_t *mp;
922 	int alloc_err = 0;
923 	int in_crit = 0;
924 	int need_rwunlock = 0;
925 	caller_context_t ct;
926 
927 	vap = NULL;
928 
929 	vp = nfs3_fhtovp(&args->file, exi);
930 
931 	DTRACE_NFSV3_4(op__read__start, struct svc_req *, req,
932 	    cred_t *, cr, vnode_t *, vp, READ3args *, args);
933 
934 	if (vp == NULL) {
935 		error = ESTALE;
936 		goto out;
937 	}
938 
939 	if (is_system_labeled()) {
940 		bslabel_t *clabel = req->rq_label;
941 
942 		ASSERT(clabel != NULL);
943 		DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
944 		    "got client label from request(1)", struct svc_req *, req);
945 
946 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
947 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK)) {
948 				resp->status = NFS3ERR_ACCES;
949 				goto out1;
950 			}
951 		}
952 	}
953 
954 	ct.cc_sysid = 0;
955 	ct.cc_pid = 0;
956 	ct.cc_caller_id = nfs3_srv_caller_id;
957 	ct.cc_flags = CC_DONTBLOCK;
958 
959 	/*
960 	 * Enter the critical region before calling VOP_RWLOCK
961 	 * to avoid a deadlock with write requests.
962 	 */
963 	if (nbl_need_check(vp)) {
964 		nbl_start_crit(vp, RW_READER);
965 		in_crit = 1;
966 		if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
967 		    NULL)) {
968 			error = EACCES;
969 			goto out;
970 		}
971 	}
972 
973 	error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
974 
975 	/* check if a monitor detected a delegation conflict */
976 	if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
977 		resp->status = NFS3ERR_JUKEBOX;
978 		goto out1;
979 	}
980 
981 	need_rwunlock = 1;
982 
983 	va.va_mask = AT_ALL;
984 	error = VOP_GETATTR(vp, &va, 0, cr, &ct);
985 
986 	/*
987 	 * If we can't get the attributes, then we can't do the
988 	 * right access checking.  So, we'll fail the request.
989 	 */
990 	if (error)
991 		goto out;
992 
993 #ifdef DEBUG
994 	if (rfs3_do_post_op_attr)
995 		vap = &va;
996 #else
997 	vap = &va;
998 #endif
999 
1000 	if (vp->v_type != VREG) {
1001 		resp->status = NFS3ERR_INVAL;
1002 		goto out1;
1003 	}
1004 
1005 	if (crgetuid(cr) != va.va_uid) {
1006 		error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1007 		if (error) {
1008 			if (curthread->t_flag & T_WOULDBLOCK)
1009 				goto out;
1010 			error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1011 			if (error)
1012 				goto out;
1013 		}
1014 	}
1015 
1016 	if (MANDLOCK(vp, va.va_mode)) {
1017 		resp->status = NFS3ERR_ACCES;
1018 		goto out1;
1019 	}
1020 
1021 	offset = args->offset;
1022 	if (offset >= va.va_size) {
1023 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1024 		if (in_crit)
1025 			nbl_end_crit(vp);
1026 		resp->status = NFS3_OK;
1027 		vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1028 		resp->resok.count = 0;
1029 		resp->resok.eof = TRUE;
1030 		resp->resok.data.data_len = 0;
1031 		resp->resok.data.data_val = NULL;
1032 		resp->resok.data.mp = NULL;
1033 		goto done;
1034 	}
1035 
1036 	if (args->count == 0) {
1037 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1038 		if (in_crit)
1039 			nbl_end_crit(vp);
1040 		resp->status = NFS3_OK;
1041 		vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1042 		resp->resok.count = 0;
1043 		resp->resok.eof = FALSE;
1044 		resp->resok.data.data_len = 0;
1045 		resp->resok.data.data_val = NULL;
1046 		resp->resok.data.mp = NULL;
1047 		goto done;
1048 	}
1049 
1050 	/*
1051 	 * do not allocate memory more the max. allowed
1052 	 * transfer size
1053 	 */
1054 	if (args->count > rfs3_tsize(req))
1055 		args->count = rfs3_tsize(req);
1056 
1057 	/*
1058 	 * mp will contain the data to be sent out in the read reply.
1059 	 * This will be freed after the reply has been sent out (by the
1060 	 * driver).
1061 	 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
1062 	 * that the call to xdrmblk_putmblk() never fails.
1063 	 */
1064 	mp = allocb_wait(RNDUP(args->count), BPRI_MED, STR_NOSIG, &alloc_err);
1065 	ASSERT(mp != NULL);
1066 	ASSERT(alloc_err == 0);
1067 
1068 	iov.iov_base = (caddr_t)mp->b_datap->db_base;
1069 	iov.iov_len = args->count;
1070 	uio.uio_iov = &iov;
1071 	uio.uio_iovcnt = 1;
1072 	uio.uio_segflg = UIO_SYSSPACE;
1073 	uio.uio_extflg = UIO_COPY_CACHED;
1074 	uio.uio_loffset = args->offset;
1075 	uio.uio_resid = args->count;
1076 
1077 	error = VOP_READ(vp, &uio, 0, cr, &ct);
1078 
1079 	if (error) {
1080 		freeb(mp);
1081 		/* check if a monitor detected a delegation conflict */
1082 		if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1083 			resp->status = NFS3ERR_JUKEBOX;
1084 			goto out1;
1085 		}
1086 		goto out;
1087 	}
1088 
1089 	va.va_mask = AT_ALL;
1090 	error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1091 
1092 #ifdef DEBUG
1093 	if (rfs3_do_post_op_attr) {
1094 		if (error)
1095 			vap = NULL;
1096 		else
1097 			vap = &va;
1098 	} else
1099 		vap = NULL;
1100 #else
1101 	if (error)
1102 		vap = NULL;
1103 	else
1104 		vap = &va;
1105 #endif
1106 
1107 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1108 
1109 #if 0 /* notyet */
1110 	/*
1111 	 * Don't do this.  It causes local disk writes when just
1112 	 * reading the file and the overhead is deemed larger
1113 	 * than the benefit.
1114 	 */
1115 	/*
1116 	 * Force modified metadata out to stable storage.
1117 	 */
1118 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1119 #endif
1120 
1121 	if (in_crit)
1122 		nbl_end_crit(vp);
1123 
1124 	resp->status = NFS3_OK;
1125 	vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1126 	resp->resok.count = args->count - uio.uio_resid;
1127 	if (!error && offset + resp->resok.count == va.va_size)
1128 		resp->resok.eof = TRUE;
1129 	else
1130 		resp->resok.eof = FALSE;
1131 	resp->resok.data.data_len = resp->resok.count;
1132 	resp->resok.data.data_val = (char *)mp->b_datap->db_base;
1133 
1134 	resp->resok.data.mp = mp;
1135 
1136 	resp->resok.size = (uint_t)args->count;
1137 
1138 done:
1139 	DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1140 	    cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1141 
1142 	VN_RELE(vp);
1143 
1144 	return;
1145 
1146 out:
1147 	if (curthread->t_flag & T_WOULDBLOCK) {
1148 		curthread->t_flag &= ~T_WOULDBLOCK;
1149 		resp->status = NFS3ERR_JUKEBOX;
1150 	} else
1151 		resp->status = puterrno3(error);
1152 out1:
1153 	DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1154 	    cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1155 
1156 	if (vp != NULL) {
1157 		if (need_rwunlock)
1158 			VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1159 		if (in_crit)
1160 			nbl_end_crit(vp);
1161 		VN_RELE(vp);
1162 	}
1163 	vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1164 }
1165 
1166 void
1167 rfs3_read_free(READ3res *resp)
1168 {
1169 	mblk_t *mp;
1170 
1171 	if (resp->status == NFS3_OK) {
1172 		mp = resp->resok.data.mp;
1173 		if (mp != NULL)
1174 			freeb(mp);
1175 	}
1176 }
1177 
1178 void *
1179 rfs3_read_getfh(READ3args *args)
1180 {
1181 
1182 	return (&args->file);
1183 }
1184 
1185 #define	MAX_IOVECS	12
1186 
1187 #ifdef DEBUG
1188 static int rfs3_write_hits = 0;
1189 static int rfs3_write_misses = 0;
1190 #endif
1191 
1192 void
1193 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1194 	struct svc_req *req, cred_t *cr)
1195 {
1196 	int error;
1197 	vnode_t *vp;
1198 	struct vattr *bvap = NULL;
1199 	struct vattr bva;
1200 	struct vattr *avap = NULL;
1201 	struct vattr ava;
1202 	u_offset_t rlimit;
1203 	struct uio uio;
1204 	struct iovec iov[MAX_IOVECS];
1205 	mblk_t *m;
1206 	struct iovec *iovp;
1207 	int iovcnt;
1208 	int ioflag;
1209 	cred_t *savecred;
1210 	int in_crit = 0;
1211 	int rwlock_ret = -1;
1212 	caller_context_t ct;
1213 
1214 	vp = nfs3_fhtovp(&args->file, exi);
1215 
1216 	DTRACE_NFSV3_4(op__write__start, struct svc_req *, req,
1217 	    cred_t *, cr, vnode_t *, vp, WRITE3args *, args);
1218 
1219 	if (vp == NULL) {
1220 		error = ESTALE;
1221 		goto err;
1222 	}
1223 
1224 	if (is_system_labeled()) {
1225 		bslabel_t *clabel = req->rq_label;
1226 
1227 		ASSERT(clabel != NULL);
1228 		DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1229 		    "got client label from request(1)", struct svc_req *, req);
1230 
1231 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1232 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK)) {
1233 				resp->status = NFS3ERR_ACCES;
1234 				goto err1;
1235 			}
1236 		}
1237 	}
1238 
1239 	ct.cc_sysid = 0;
1240 	ct.cc_pid = 0;
1241 	ct.cc_caller_id = nfs3_srv_caller_id;
1242 	ct.cc_flags = CC_DONTBLOCK;
1243 
1244 	/*
1245 	 * We have to enter the critical region before calling VOP_RWLOCK
1246 	 * to avoid a deadlock with ufs.
1247 	 */
1248 	if (nbl_need_check(vp)) {
1249 		nbl_start_crit(vp, RW_READER);
1250 		in_crit = 1;
1251 		if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1252 		    NULL)) {
1253 			error = EACCES;
1254 			goto err;
1255 		}
1256 	}
1257 
1258 	rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1259 
1260 	/* check if a monitor detected a delegation conflict */
1261 	if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1262 		resp->status = NFS3ERR_JUKEBOX;
1263 		rwlock_ret = -1;
1264 		goto err1;
1265 	}
1266 
1267 
1268 	bva.va_mask = AT_ALL;
1269 	error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1270 
1271 	/*
1272 	 * If we can't get the attributes, then we can't do the
1273 	 * right access checking.  So, we'll fail the request.
1274 	 */
1275 	if (error)
1276 		goto err;
1277 
1278 	bvap = &bva;
1279 #ifdef DEBUG
1280 	if (!rfs3_do_pre_op_attr)
1281 		bvap = NULL;
1282 #endif
1283 	avap = bvap;
1284 
1285 	if (args->count != args->data.data_len) {
1286 		resp->status = NFS3ERR_INVAL;
1287 		goto err1;
1288 	}
1289 
1290 	if (rdonly(exi, req)) {
1291 		resp->status = NFS3ERR_ROFS;
1292 		goto err1;
1293 	}
1294 
1295 	if (vp->v_type != VREG) {
1296 		resp->status = NFS3ERR_INVAL;
1297 		goto err1;
1298 	}
1299 
1300 	if (crgetuid(cr) != bva.va_uid &&
1301 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1302 		goto err;
1303 
1304 	if (MANDLOCK(vp, bva.va_mode)) {
1305 		resp->status = NFS3ERR_ACCES;
1306 		goto err1;
1307 	}
1308 
1309 	if (args->count == 0) {
1310 		resp->status = NFS3_OK;
1311 		vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1312 		resp->resok.count = 0;
1313 		resp->resok.committed = args->stable;
1314 		resp->resok.verf = write3verf;
1315 		goto out;
1316 	}
1317 
1318 	if (args->mblk != NULL) {
1319 		iovcnt = 0;
1320 		for (m = args->mblk; m != NULL; m = m->b_cont)
1321 			iovcnt++;
1322 		if (iovcnt <= MAX_IOVECS) {
1323 #ifdef DEBUG
1324 			rfs3_write_hits++;
1325 #endif
1326 			iovp = iov;
1327 		} else {
1328 #ifdef DEBUG
1329 			rfs3_write_misses++;
1330 #endif
1331 			iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1332 		}
1333 		mblk_to_iov(args->mblk, iovcnt, iovp);
1334 	} else {
1335 		iovcnt = 1;
1336 		iovp = iov;
1337 		iovp->iov_base = args->data.data_val;
1338 		iovp->iov_len = args->count;
1339 	}
1340 
1341 	uio.uio_iov = iovp;
1342 	uio.uio_iovcnt = iovcnt;
1343 
1344 	uio.uio_segflg = UIO_SYSSPACE;
1345 	uio.uio_extflg = UIO_COPY_DEFAULT;
1346 	uio.uio_loffset = args->offset;
1347 	uio.uio_resid = args->count;
1348 	uio.uio_llimit = curproc->p_fsz_ctl;
1349 	rlimit = uio.uio_llimit - args->offset;
1350 	if (rlimit < (u_offset_t)uio.uio_resid)
1351 		uio.uio_resid = (int)rlimit;
1352 
1353 	if (args->stable == UNSTABLE)
1354 		ioflag = 0;
1355 	else if (args->stable == FILE_SYNC)
1356 		ioflag = FSYNC;
1357 	else if (args->stable == DATA_SYNC)
1358 		ioflag = FDSYNC;
1359 	else {
1360 		if (iovp != iov)
1361 			kmem_free(iovp, sizeof (*iovp) * iovcnt);
1362 		resp->status = NFS3ERR_INVAL;
1363 		goto err1;
1364 	}
1365 
1366 	/*
1367 	 * We're changing creds because VM may fault and we need
1368 	 * the cred of the current thread to be used if quota
1369 	 * checking is enabled.
1370 	 */
1371 	savecred = curthread->t_cred;
1372 	curthread->t_cred = cr;
1373 	error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1374 	curthread->t_cred = savecred;
1375 
1376 	if (iovp != iov)
1377 		kmem_free(iovp, sizeof (*iovp) * iovcnt);
1378 
1379 	/* check if a monitor detected a delegation conflict */
1380 	if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1381 		resp->status = NFS3ERR_JUKEBOX;
1382 		goto err1;
1383 	}
1384 
1385 	ava.va_mask = AT_ALL;
1386 	avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1387 
1388 #ifdef DEBUG
1389 	if (!rfs3_do_post_op_attr)
1390 		avap = NULL;
1391 #endif
1392 
1393 	if (error)
1394 		goto err;
1395 
1396 	/*
1397 	 * If we were unable to get the V_WRITELOCK_TRUE, then we
1398 	 * may not have accurate after attrs, so check if
1399 	 * we have both attributes, they have a non-zero va_seq, and
1400 	 * va_seq has changed by exactly one,
1401 	 * if not, turn off the before attr.
1402 	 */
1403 	if (rwlock_ret != V_WRITELOCK_TRUE) {
1404 		if (bvap == NULL || avap == NULL ||
1405 		    bvap->va_seq == 0 || avap->va_seq == 0 ||
1406 		    avap->va_seq != (bvap->va_seq + 1)) {
1407 			bvap = NULL;
1408 		}
1409 	}
1410 
1411 	resp->status = NFS3_OK;
1412 	vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1413 	resp->resok.count = args->count - uio.uio_resid;
1414 	resp->resok.committed = args->stable;
1415 	resp->resok.verf = write3verf;
1416 	goto out;
1417 
1418 err:
1419 	if (curthread->t_flag & T_WOULDBLOCK) {
1420 		curthread->t_flag &= ~T_WOULDBLOCK;
1421 		resp->status = NFS3ERR_JUKEBOX;
1422 	} else
1423 		resp->status = puterrno3(error);
1424 err1:
1425 	vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1426 out:
1427 	DTRACE_NFSV3_4(op__write__done, struct svc_req *, req,
1428 	    cred_t *, cr, vnode_t *, vp, WRITE3res *, resp);
1429 
1430 	if (vp != NULL) {
1431 		if (rwlock_ret != -1)
1432 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1433 		if (in_crit)
1434 			nbl_end_crit(vp);
1435 		VN_RELE(vp);
1436 	}
1437 }
1438 
1439 void *
1440 rfs3_write_getfh(WRITE3args *args)
1441 {
1442 
1443 	return (&args->file);
1444 }
1445 
1446 void
1447 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1448 	struct svc_req *req, cred_t *cr)
1449 {
1450 	int error;
1451 	int in_crit = 0;
1452 	vnode_t *vp;
1453 	vnode_t *tvp = NULL;
1454 	vnode_t *dvp;
1455 	struct vattr *vap;
1456 	struct vattr va;
1457 	struct vattr *dbvap;
1458 	struct vattr dbva;
1459 	struct vattr *davap;
1460 	struct vattr dava;
1461 	enum vcexcl excl;
1462 	nfstime3 *mtime;
1463 	len_t reqsize;
1464 	bool_t trunc;
1465 
1466 	dbvap = NULL;
1467 	davap = NULL;
1468 
1469 	dvp = nfs3_fhtovp(&args->where.dir, exi);
1470 
1471 	DTRACE_NFSV3_4(op__create__start, struct svc_req *, req,
1472 	    cred_t *, cr, vnode_t *, dvp, CREATE3args *, args);
1473 
1474 	if (dvp == NULL) {
1475 		error = ESTALE;
1476 		goto out;
1477 	}
1478 
1479 #ifdef DEBUG
1480 	if (rfs3_do_pre_op_attr) {
1481 		dbva.va_mask = AT_ALL;
1482 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1483 	} else
1484 		dbvap = NULL;
1485 #else
1486 	dbva.va_mask = AT_ALL;
1487 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1488 #endif
1489 	davap = dbvap;
1490 
1491 	if (args->where.name == nfs3nametoolong) {
1492 		resp->status = NFS3ERR_NAMETOOLONG;
1493 		goto out1;
1494 	}
1495 
1496 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1497 		resp->status = NFS3ERR_ACCES;
1498 		goto out1;
1499 	}
1500 
1501 	if (rdonly(exi, req)) {
1502 		resp->status = NFS3ERR_ROFS;
1503 		goto out1;
1504 	}
1505 
1506 	if (is_system_labeled()) {
1507 		bslabel_t *clabel = req->rq_label;
1508 
1509 		ASSERT(clabel != NULL);
1510 		DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1511 		    "got client label from request(1)", struct svc_req *, req);
1512 
1513 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1514 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK)) {
1515 				resp->status = NFS3ERR_ACCES;
1516 				goto out1;
1517 			}
1518 		}
1519 	}
1520 
1521 	if (args->how.mode == EXCLUSIVE) {
1522 		va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1523 		va.va_type = VREG;
1524 		va.va_mode = (mode_t)0;
1525 		/*
1526 		 * Ensure no time overflows and that types match
1527 		 */
1528 		mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1529 		va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1530 		va.va_mtime.tv_nsec = mtime->nseconds;
1531 		excl = EXCL;
1532 	} else {
1533 		error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1534 		    &va);
1535 		if (error)
1536 			goto out;
1537 		va.va_mask |= AT_TYPE;
1538 		va.va_type = VREG;
1539 		if (args->how.mode == GUARDED)
1540 			excl = EXCL;
1541 		else {
1542 			excl = NONEXCL;
1543 
1544 			/*
1545 			 * During creation of file in non-exclusive mode
1546 			 * if size of file is being set then make sure
1547 			 * that if the file already exists that no conflicting
1548 			 * non-blocking mandatory locks exists in the region
1549 			 * being modified. If there are conflicting locks fail
1550 			 * the operation with EACCES.
1551 			 */
1552 			if (va.va_mask & AT_SIZE) {
1553 				struct vattr tva;
1554 
1555 				/*
1556 				 * Does file already exist?
1557 				 */
1558 				error = VOP_LOOKUP(dvp, args->where.name, &tvp,
1559 				    NULL, 0, NULL, cr, NULL, NULL, NULL);
1560 
1561 				/*
1562 				 * Check to see if the file has been delegated
1563 				 * to a v4 client.  If so, then begin recall of
1564 				 * the delegation and return JUKEBOX to allow
1565 				 * the client to retrasmit its request.
1566 				 */
1567 
1568 				trunc = va.va_size == 0;
1569 				if (!error &&
1570 				    rfs4_check_delegated(FWRITE, tvp, trunc)) {
1571 					resp->status = NFS3ERR_JUKEBOX;
1572 					goto out1;
1573 				}
1574 
1575 				/*
1576 				 * Check for NBMAND lock conflicts
1577 				 */
1578 				if (!error && nbl_need_check(tvp)) {
1579 					u_offset_t offset;
1580 					ssize_t len;
1581 
1582 					nbl_start_crit(tvp, RW_READER);
1583 					in_crit = 1;
1584 
1585 					tva.va_mask = AT_SIZE;
1586 					error = VOP_GETATTR(tvp, &tva, 0, cr,
1587 					    NULL);
1588 					/*
1589 					 * Can't check for conflicts, so return
1590 					 * error.
1591 					 */
1592 					if (error)
1593 						goto out;
1594 
1595 					offset = tva.va_size < va.va_size ?
1596 					    tva.va_size : va.va_size;
1597 					len = tva.va_size < va.va_size ?
1598 					    va.va_size - tva.va_size :
1599 					    tva.va_size - va.va_size;
1600 					if (nbl_conflict(tvp, NBL_WRITE,
1601 					    offset, len, 0, NULL)) {
1602 						error = EACCES;
1603 						goto out;
1604 					}
1605 				} else if (tvp) {
1606 					VN_RELE(tvp);
1607 					tvp = NULL;
1608 				}
1609 			}
1610 		}
1611 		if (va.va_mask & AT_SIZE)
1612 			reqsize = va.va_size;
1613 	}
1614 
1615 	/*
1616 	 * Must specify the mode.
1617 	 */
1618 	if (!(va.va_mask & AT_MODE)) {
1619 		resp->status = NFS3ERR_INVAL;
1620 		goto out1;
1621 	}
1622 
1623 	/*
1624 	 * If the filesystem is exported with nosuid, then mask off
1625 	 * the setuid and setgid bits.
1626 	 */
1627 	if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1628 		va.va_mode &= ~(VSUID | VSGID);
1629 
1630 tryagain:
1631 	/*
1632 	 * The file open mode used is VWRITE.  If the client needs
1633 	 * some other semantic, then it should do the access checking
1634 	 * itself.  It would have been nice to have the file open mode
1635 	 * passed as part of the arguments.
1636 	 */
1637 	error = VOP_CREATE(dvp, args->where.name, &va, excl, VWRITE,
1638 	    &vp, cr, 0, NULL, NULL);
1639 
1640 #ifdef DEBUG
1641 	if (rfs3_do_post_op_attr) {
1642 		dava.va_mask = AT_ALL;
1643 		davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1644 	} else
1645 		davap = NULL;
1646 #else
1647 	dava.va_mask = AT_ALL;
1648 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1649 #endif
1650 
1651 	if (error) {
1652 		/*
1653 		 * If we got something other than file already exists
1654 		 * then just return this error.  Otherwise, we got
1655 		 * EEXIST.  If we were doing a GUARDED create, then
1656 		 * just return this error.  Otherwise, we need to
1657 		 * make sure that this wasn't a duplicate of an
1658 		 * exclusive create request.
1659 		 *
1660 		 * The assumption is made that a non-exclusive create
1661 		 * request will never return EEXIST.
1662 		 */
1663 		if (error != EEXIST || args->how.mode == GUARDED)
1664 			goto out;
1665 		/*
1666 		 * Lookup the file so that we can get a vnode for it.
1667 		 */
1668 		error = VOP_LOOKUP(dvp, args->where.name, &vp, NULL, 0,
1669 		    NULL, cr, NULL, NULL, NULL);
1670 		if (error) {
1671 			/*
1672 			 * We couldn't find the file that we thought that
1673 			 * we just created.  So, we'll just try creating
1674 			 * it again.
1675 			 */
1676 			if (error == ENOENT)
1677 				goto tryagain;
1678 			goto out;
1679 		}
1680 
1681 		/*
1682 		 * If the file is delegated to a v4 client, go ahead
1683 		 * and initiate recall, this create is a hint that a
1684 		 * conflicting v3 open has occurred.
1685 		 */
1686 
1687 		if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1688 			VN_RELE(vp);
1689 			resp->status = NFS3ERR_JUKEBOX;
1690 			goto out1;
1691 		}
1692 
1693 		va.va_mask = AT_ALL;
1694 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1695 
1696 		mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1697 		/* % with INT32_MAX to prevent overflows */
1698 		if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1699 		    vap->va_mtime.tv_sec !=
1700 		    (mtime->seconds % INT32_MAX) ||
1701 		    vap->va_mtime.tv_nsec != mtime->nseconds)) {
1702 			VN_RELE(vp);
1703 			error = EEXIST;
1704 			goto out;
1705 		}
1706 	} else {
1707 
1708 		if ((args->how.mode == UNCHECKED ||
1709 		    args->how.mode == GUARDED) &&
1710 		    args->how.createhow3_u.obj_attributes.size.set_it &&
1711 		    va.va_size == 0)
1712 			trunc = TRUE;
1713 		else
1714 			trunc = FALSE;
1715 
1716 		if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1717 			VN_RELE(vp);
1718 			resp->status = NFS3ERR_JUKEBOX;
1719 			goto out1;
1720 		}
1721 
1722 		va.va_mask = AT_ALL;
1723 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1724 
1725 		/*
1726 		 * We need to check to make sure that the file got
1727 		 * created to the indicated size.  If not, we do a
1728 		 * setattr to try to change the size, but we don't
1729 		 * try too hard.  This shouldn't a problem as most
1730 		 * clients will only specifiy a size of zero which
1731 		 * local file systems handle.  However, even if
1732 		 * the client does specify a non-zero size, it can
1733 		 * still recover by checking the size of the file
1734 		 * after it has created it and then issue a setattr
1735 		 * request of its own to set the size of the file.
1736 		 */
1737 		if (vap != NULL &&
1738 		    (args->how.mode == UNCHECKED ||
1739 		    args->how.mode == GUARDED) &&
1740 		    args->how.createhow3_u.obj_attributes.size.set_it &&
1741 		    vap->va_size != reqsize) {
1742 			va.va_mask = AT_SIZE;
1743 			va.va_size = reqsize;
1744 			(void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1745 			va.va_mask = AT_ALL;
1746 			vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1747 		}
1748 	}
1749 
1750 #ifdef DEBUG
1751 	if (!rfs3_do_post_op_attr)
1752 		vap = NULL;
1753 #endif
1754 
1755 #ifdef DEBUG
1756 	if (!rfs3_do_post_op_fh3)
1757 		resp->resok.obj.handle_follows = FALSE;
1758 	else {
1759 #endif
1760 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1761 	if (error)
1762 		resp->resok.obj.handle_follows = FALSE;
1763 	else
1764 		resp->resok.obj.handle_follows = TRUE;
1765 #ifdef DEBUG
1766 	}
1767 #endif
1768 
1769 	/*
1770 	 * Force modified data and metadata out to stable storage.
1771 	 */
1772 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1773 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
1774 
1775 	VN_RELE(vp);
1776 	if (tvp != NULL) {
1777 		if (in_crit)
1778 			nbl_end_crit(tvp);
1779 		VN_RELE(tvp);
1780 	}
1781 
1782 	resp->status = NFS3_OK;
1783 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1784 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1785 
1786 	DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1787 	    cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1788 
1789 	VN_RELE(dvp);
1790 	return;
1791 
1792 out:
1793 	if (curthread->t_flag & T_WOULDBLOCK) {
1794 		curthread->t_flag &= ~T_WOULDBLOCK;
1795 		resp->status = NFS3ERR_JUKEBOX;
1796 	} else
1797 		resp->status = puterrno3(error);
1798 out1:
1799 	DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1800 	    cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1801 
1802 	if (tvp != NULL) {
1803 		if (in_crit)
1804 			nbl_end_crit(tvp);
1805 		VN_RELE(tvp);
1806 	}
1807 	if (dvp != NULL)
1808 		VN_RELE(dvp);
1809 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1810 }
1811 
1812 void *
1813 rfs3_create_getfh(CREATE3args *args)
1814 {
1815 
1816 	return (&args->where.dir);
1817 }
1818 
1819 void
1820 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1821 	struct svc_req *req, cred_t *cr)
1822 {
1823 	int error;
1824 	vnode_t *vp = NULL;
1825 	vnode_t *dvp;
1826 	struct vattr *vap;
1827 	struct vattr va;
1828 	struct vattr *dbvap;
1829 	struct vattr dbva;
1830 	struct vattr *davap;
1831 	struct vattr dava;
1832 
1833 	dbvap = NULL;
1834 	davap = NULL;
1835 
1836 	dvp = nfs3_fhtovp(&args->where.dir, exi);
1837 
1838 	DTRACE_NFSV3_4(op__mkdir__start, struct svc_req *, req,
1839 	    cred_t *, cr, vnode_t *, dvp, MKDIR3args *, args);
1840 
1841 	if (dvp == NULL) {
1842 		error = ESTALE;
1843 		goto out;
1844 	}
1845 
1846 #ifdef DEBUG
1847 	if (rfs3_do_pre_op_attr) {
1848 		dbva.va_mask = AT_ALL;
1849 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1850 	} else
1851 		dbvap = NULL;
1852 #else
1853 	dbva.va_mask = AT_ALL;
1854 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1855 #endif
1856 	davap = dbvap;
1857 
1858 	if (args->where.name == nfs3nametoolong) {
1859 		resp->status = NFS3ERR_NAMETOOLONG;
1860 		goto out1;
1861 	}
1862 
1863 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1864 		resp->status = NFS3ERR_ACCES;
1865 		goto out1;
1866 	}
1867 
1868 	if (rdonly(exi, req)) {
1869 		resp->status = NFS3ERR_ROFS;
1870 		goto out1;
1871 	}
1872 
1873 	if (is_system_labeled()) {
1874 		bslabel_t *clabel = req->rq_label;
1875 
1876 		ASSERT(clabel != NULL);
1877 		DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1878 		    "got client label from request(1)", struct svc_req *, req);
1879 
1880 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1881 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK)) {
1882 				resp->status = NFS3ERR_ACCES;
1883 				goto out1;
1884 			}
1885 		}
1886 	}
1887 
1888 	error = sattr3_to_vattr(&args->attributes, &va);
1889 	if (error)
1890 		goto out;
1891 
1892 	if (!(va.va_mask & AT_MODE)) {
1893 		resp->status = NFS3ERR_INVAL;
1894 		goto out1;
1895 	}
1896 
1897 	va.va_mask |= AT_TYPE;
1898 	va.va_type = VDIR;
1899 
1900 	error = VOP_MKDIR(dvp, args->where.name, &va, &vp, cr, NULL, 0, NULL);
1901 
1902 #ifdef DEBUG
1903 	if (rfs3_do_post_op_attr) {
1904 		dava.va_mask = AT_ALL;
1905 		davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1906 	} else
1907 		davap = NULL;
1908 #else
1909 	dava.va_mask = AT_ALL;
1910 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1911 #endif
1912 
1913 	/*
1914 	 * Force modified data and metadata out to stable storage.
1915 	 */
1916 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
1917 
1918 	if (error)
1919 		goto out;
1920 
1921 #ifdef DEBUG
1922 	if (!rfs3_do_post_op_fh3)
1923 		resp->resok.obj.handle_follows = FALSE;
1924 	else {
1925 #endif
1926 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1927 	if (error)
1928 		resp->resok.obj.handle_follows = FALSE;
1929 	else
1930 		resp->resok.obj.handle_follows = TRUE;
1931 #ifdef DEBUG
1932 	}
1933 #endif
1934 
1935 #ifdef DEBUG
1936 	if (rfs3_do_post_op_attr) {
1937 		va.va_mask = AT_ALL;
1938 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1939 	} else
1940 		vap = NULL;
1941 #else
1942 	va.va_mask = AT_ALL;
1943 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1944 #endif
1945 
1946 	/*
1947 	 * Force modified data and metadata out to stable storage.
1948 	 */
1949 	(void) VOP_FSYNC(vp, 0, cr, NULL);
1950 
1951 	VN_RELE(vp);
1952 
1953 	resp->status = NFS3_OK;
1954 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1955 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1956 
1957 	DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
1958 	    cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
1959 	VN_RELE(dvp);
1960 
1961 	return;
1962 
1963 out:
1964 	if (curthread->t_flag & T_WOULDBLOCK) {
1965 		curthread->t_flag &= ~T_WOULDBLOCK;
1966 		resp->status = NFS3ERR_JUKEBOX;
1967 	} else
1968 		resp->status = puterrno3(error);
1969 out1:
1970 	DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
1971 	    cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
1972 	if (dvp != NULL)
1973 		VN_RELE(dvp);
1974 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1975 }
1976 
1977 void *
1978 rfs3_mkdir_getfh(MKDIR3args *args)
1979 {
1980 
1981 	return (&args->where.dir);
1982 }
1983 
1984 void
1985 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
1986 	struct svc_req *req, cred_t *cr)
1987 {
1988 	int error;
1989 	vnode_t *vp;
1990 	vnode_t *dvp;
1991 	struct vattr *vap;
1992 	struct vattr va;
1993 	struct vattr *dbvap;
1994 	struct vattr dbva;
1995 	struct vattr *davap;
1996 	struct vattr dava;
1997 
1998 	dbvap = NULL;
1999 	davap = NULL;
2000 
2001 	dvp = nfs3_fhtovp(&args->where.dir, exi);
2002 
2003 	DTRACE_NFSV3_4(op__symlink__start, struct svc_req *, req,
2004 	    cred_t *, cr, vnode_t *, dvp, SYMLINK3args *, args);
2005 
2006 	if (dvp == NULL) {
2007 		error = ESTALE;
2008 		goto err;
2009 	}
2010 
2011 #ifdef DEBUG
2012 	if (rfs3_do_pre_op_attr) {
2013 		dbva.va_mask = AT_ALL;
2014 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2015 	} else
2016 		dbvap = NULL;
2017 #else
2018 	dbva.va_mask = AT_ALL;
2019 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2020 #endif
2021 	davap = dbvap;
2022 
2023 	if (args->where.name == nfs3nametoolong) {
2024 		resp->status = NFS3ERR_NAMETOOLONG;
2025 		goto err1;
2026 	}
2027 
2028 	if (args->where.name == NULL || *(args->where.name) == '\0') {
2029 		resp->status = NFS3ERR_ACCES;
2030 		goto err1;
2031 	}
2032 
2033 	if (rdonly(exi, req)) {
2034 		resp->status = NFS3ERR_ROFS;
2035 		goto err1;
2036 	}
2037 
2038 	if (is_system_labeled()) {
2039 		bslabel_t *clabel = req->rq_label;
2040 
2041 		ASSERT(clabel != NULL);
2042 		DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2043 		    "got client label from request(1)", struct svc_req *, req);
2044 
2045 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2046 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK)) {
2047 				resp->status = NFS3ERR_ACCES;
2048 				goto err1;
2049 			}
2050 		}
2051 	}
2052 
2053 	error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2054 	if (error)
2055 		goto err;
2056 
2057 	if (!(va.va_mask & AT_MODE)) {
2058 		resp->status = NFS3ERR_INVAL;
2059 		goto err1;
2060 	}
2061 
2062 	if (args->symlink.symlink_data == nfs3nametoolong) {
2063 		resp->status = NFS3ERR_NAMETOOLONG;
2064 		goto err1;
2065 	}
2066 
2067 	va.va_mask |= AT_TYPE;
2068 	va.va_type = VLNK;
2069 
2070 	error = VOP_SYMLINK(dvp, args->where.name, &va,
2071 	    args->symlink.symlink_data, cr, NULL, 0);
2072 
2073 #ifdef DEBUG
2074 	if (rfs3_do_post_op_attr) {
2075 		dava.va_mask = AT_ALL;
2076 		davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2077 	} else
2078 		davap = NULL;
2079 #else
2080 	dava.va_mask = AT_ALL;
2081 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2082 #endif
2083 
2084 	if (error)
2085 		goto err;
2086 
2087 	error = VOP_LOOKUP(dvp, args->where.name, &vp, NULL, 0, NULL, cr,
2088 	    NULL, NULL, NULL);
2089 
2090 	/*
2091 	 * Force modified data and metadata out to stable storage.
2092 	 */
2093 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
2094 
2095 
2096 	resp->status = NFS3_OK;
2097 	if (error) {
2098 		resp->resok.obj.handle_follows = FALSE;
2099 		vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2100 		vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2101 		goto out;
2102 	}
2103 
2104 #ifdef DEBUG
2105 	if (!rfs3_do_post_op_fh3)
2106 		resp->resok.obj.handle_follows = FALSE;
2107 	else {
2108 #endif
2109 	error = makefh3(&resp->resok.obj.handle, vp, exi);
2110 	if (error)
2111 		resp->resok.obj.handle_follows = FALSE;
2112 	else
2113 		resp->resok.obj.handle_follows = TRUE;
2114 #ifdef DEBUG
2115 	}
2116 #endif
2117 
2118 #ifdef DEBUG
2119 	if (rfs3_do_post_op_attr) {
2120 		va.va_mask = AT_ALL;
2121 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2122 	} else
2123 		vap = NULL;
2124 #else
2125 	va.va_mask = AT_ALL;
2126 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2127 #endif
2128 
2129 	/*
2130 	 * Force modified data and metadata out to stable storage.
2131 	 */
2132 	(void) VOP_FSYNC(vp, 0, cr, NULL);
2133 
2134 	VN_RELE(vp);
2135 
2136 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2137 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2138 	goto out;
2139 
2140 err:
2141 	if (curthread->t_flag & T_WOULDBLOCK) {
2142 		curthread->t_flag &= ~T_WOULDBLOCK;
2143 		resp->status = NFS3ERR_JUKEBOX;
2144 	} else
2145 		resp->status = puterrno3(error);
2146 err1:
2147 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2148 out:
2149 	DTRACE_NFSV3_4(op__symlink__done, struct svc_req *, req,
2150 	    cred_t *, cr, vnode_t *, dvp, SYMLINK3res *, resp);
2151 
2152 	if (dvp != NULL)
2153 		VN_RELE(dvp);
2154 }
2155 
2156 void *
2157 rfs3_symlink_getfh(SYMLINK3args *args)
2158 {
2159 
2160 	return (&args->where.dir);
2161 }
2162 
2163 void
2164 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2165 	struct svc_req *req, cred_t *cr)
2166 {
2167 	int error;
2168 	vnode_t *vp;
2169 	vnode_t *dvp;
2170 	struct vattr *vap;
2171 	struct vattr va;
2172 	struct vattr *dbvap;
2173 	struct vattr dbva;
2174 	struct vattr *davap;
2175 	struct vattr dava;
2176 	int mode;
2177 	enum vcexcl excl;
2178 
2179 	dbvap = NULL;
2180 	davap = NULL;
2181 
2182 	dvp = nfs3_fhtovp(&args->where.dir, exi);
2183 
2184 	DTRACE_NFSV3_4(op__mknod__start, struct svc_req *, req,
2185 	    cred_t *, cr, vnode_t *, dvp, MKNOD3args *, args);
2186 
2187 	if (dvp == NULL) {
2188 		error = ESTALE;
2189 		goto out;
2190 	}
2191 
2192 #ifdef DEBUG
2193 	if (rfs3_do_pre_op_attr) {
2194 		dbva.va_mask = AT_ALL;
2195 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2196 	} else
2197 		dbvap = NULL;
2198 #else
2199 	dbva.va_mask = AT_ALL;
2200 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2201 #endif
2202 	davap = dbvap;
2203 
2204 	if (args->where.name == nfs3nametoolong) {
2205 		resp->status = NFS3ERR_NAMETOOLONG;
2206 		goto out1;
2207 	}
2208 
2209 	if (args->where.name == NULL || *(args->where.name) == '\0') {
2210 		resp->status = NFS3ERR_ACCES;
2211 		goto out1;
2212 	}
2213 
2214 	if (rdonly(exi, req)) {
2215 		resp->status = NFS3ERR_ROFS;
2216 		goto out1;
2217 	}
2218 
2219 	if (is_system_labeled()) {
2220 		bslabel_t *clabel = req->rq_label;
2221 
2222 		ASSERT(clabel != NULL);
2223 		DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2224 		    "got client label from request(1)", struct svc_req *, req);
2225 
2226 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2227 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK)) {
2228 				resp->status = NFS3ERR_ACCES;
2229 				goto out1;
2230 			}
2231 		}
2232 	}
2233 
2234 	switch (args->what.type) {
2235 	case NF3CHR:
2236 	case NF3BLK:
2237 		error = sattr3_to_vattr(
2238 		    &args->what.mknoddata3_u.device.dev_attributes, &va);
2239 		if (error)
2240 			goto out;
2241 		if (secpolicy_sys_devices(cr) != 0) {
2242 			resp->status = NFS3ERR_PERM;
2243 			goto out1;
2244 		}
2245 		if (args->what.type == NF3CHR)
2246 			va.va_type = VCHR;
2247 		else
2248 			va.va_type = VBLK;
2249 		va.va_rdev = makedevice(
2250 		    args->what.mknoddata3_u.device.spec.specdata1,
2251 		    args->what.mknoddata3_u.device.spec.specdata2);
2252 		va.va_mask |= AT_TYPE | AT_RDEV;
2253 		break;
2254 	case NF3SOCK:
2255 		error = sattr3_to_vattr(
2256 		    &args->what.mknoddata3_u.pipe_attributes, &va);
2257 		if (error)
2258 			goto out;
2259 		va.va_type = VSOCK;
2260 		va.va_mask |= AT_TYPE;
2261 		break;
2262 	case NF3FIFO:
2263 		error = sattr3_to_vattr(
2264 		    &args->what.mknoddata3_u.pipe_attributes, &va);
2265 		if (error)
2266 			goto out;
2267 		va.va_type = VFIFO;
2268 		va.va_mask |= AT_TYPE;
2269 		break;
2270 	default:
2271 		resp->status = NFS3ERR_BADTYPE;
2272 		goto out1;
2273 	}
2274 
2275 	/*
2276 	 * Must specify the mode.
2277 	 */
2278 	if (!(va.va_mask & AT_MODE)) {
2279 		resp->status = NFS3ERR_INVAL;
2280 		goto out1;
2281 	}
2282 
2283 	excl = EXCL;
2284 
2285 	mode = 0;
2286 
2287 	error = VOP_CREATE(dvp, args->where.name, &va, excl, mode,
2288 	    &vp, cr, 0, NULL, NULL);
2289 
2290 #ifdef DEBUG
2291 	if (rfs3_do_post_op_attr) {
2292 		dava.va_mask = AT_ALL;
2293 		davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2294 	} else
2295 		davap = NULL;
2296 #else
2297 	dava.va_mask = AT_ALL;
2298 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2299 #endif
2300 
2301 	/*
2302 	 * Force modified data and metadata out to stable storage.
2303 	 */
2304 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
2305 
2306 	if (error)
2307 		goto out;
2308 
2309 	resp->status = NFS3_OK;
2310 
2311 #ifdef DEBUG
2312 	if (!rfs3_do_post_op_fh3)
2313 		resp->resok.obj.handle_follows = FALSE;
2314 	else {
2315 #endif
2316 	error = makefh3(&resp->resok.obj.handle, vp, exi);
2317 	if (error)
2318 		resp->resok.obj.handle_follows = FALSE;
2319 	else
2320 		resp->resok.obj.handle_follows = TRUE;
2321 #ifdef DEBUG
2322 	}
2323 #endif
2324 
2325 #ifdef DEBUG
2326 	if (rfs3_do_post_op_attr) {
2327 		va.va_mask = AT_ALL;
2328 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2329 	} else
2330 		vap = NULL;
2331 #else
2332 	va.va_mask = AT_ALL;
2333 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2334 #endif
2335 
2336 	/*
2337 	 * Force modified metadata out to stable storage.
2338 	 */
2339 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2340 
2341 	VN_RELE(vp);
2342 
2343 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2344 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2345 	DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2346 	    cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2347 	VN_RELE(dvp);
2348 	return;
2349 
2350 out:
2351 	if (curthread->t_flag & T_WOULDBLOCK) {
2352 		curthread->t_flag &= ~T_WOULDBLOCK;
2353 		resp->status = NFS3ERR_JUKEBOX;
2354 	} else
2355 		resp->status = puterrno3(error);
2356 out1:
2357 	DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2358 	    cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2359 	if (dvp != NULL)
2360 		VN_RELE(dvp);
2361 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2362 }
2363 
2364 void *
2365 rfs3_mknod_getfh(MKNOD3args *args)
2366 {
2367 
2368 	return (&args->where.dir);
2369 }
2370 
2371 void
2372 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2373 	struct svc_req *req, cred_t *cr)
2374 {
2375 	int error = 0;
2376 	vnode_t *vp;
2377 	struct vattr *bvap;
2378 	struct vattr bva;
2379 	struct vattr *avap;
2380 	struct vattr ava;
2381 	vnode_t *targvp = NULL;
2382 
2383 	bvap = NULL;
2384 	avap = NULL;
2385 
2386 	vp = nfs3_fhtovp(&args->object.dir, exi);
2387 
2388 	DTRACE_NFSV3_4(op__remove__start, struct svc_req *, req,
2389 	    cred_t *, cr, vnode_t *, vp, REMOVE3args *, args);
2390 
2391 	if (vp == NULL) {
2392 		error = ESTALE;
2393 		goto err;
2394 	}
2395 
2396 #ifdef DEBUG
2397 	if (rfs3_do_pre_op_attr) {
2398 		bva.va_mask = AT_ALL;
2399 		bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2400 	} else
2401 		bvap = NULL;
2402 #else
2403 	bva.va_mask = AT_ALL;
2404 	bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2405 #endif
2406 	avap = bvap;
2407 
2408 	if (vp->v_type != VDIR) {
2409 		resp->status = NFS3ERR_NOTDIR;
2410 		goto err1;
2411 	}
2412 
2413 	if (args->object.name == nfs3nametoolong) {
2414 		resp->status = NFS3ERR_NAMETOOLONG;
2415 		goto err1;
2416 	}
2417 
2418 	if (args->object.name == NULL || *(args->object.name) == '\0') {
2419 		resp->status = NFS3ERR_ACCES;
2420 		goto err1;
2421 	}
2422 
2423 	if (rdonly(exi, req)) {
2424 		resp->status = NFS3ERR_ROFS;
2425 		goto err1;
2426 	}
2427 
2428 	if (is_system_labeled()) {
2429 		bslabel_t *clabel = req->rq_label;
2430 
2431 		ASSERT(clabel != NULL);
2432 		DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2433 		    "got client label from request(1)", struct svc_req *, req);
2434 
2435 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2436 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK)) {
2437 				resp->status = NFS3ERR_ACCES;
2438 				goto err1;
2439 			}
2440 		}
2441 	}
2442 
2443 	/*
2444 	 * Check for a conflict with a non-blocking mandatory share
2445 	 * reservation and V4 delegations
2446 	 */
2447 	error = VOP_LOOKUP(vp, args->object.name, &targvp, NULL, 0,
2448 	    NULL, cr, NULL, NULL, NULL);
2449 	if (error != 0)
2450 		goto err;
2451 
2452 	if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2453 		resp->status = NFS3ERR_JUKEBOX;
2454 		goto err1;
2455 	}
2456 
2457 	if (!nbl_need_check(targvp)) {
2458 		error = VOP_REMOVE(vp, args->object.name, cr, NULL, 0);
2459 	} else {
2460 		nbl_start_crit(targvp, RW_READER);
2461 		if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2462 			error = EACCES;
2463 		} else {
2464 			error = VOP_REMOVE(vp, args->object.name, cr, NULL, 0);
2465 		}
2466 		nbl_end_crit(targvp);
2467 	}
2468 	VN_RELE(targvp);
2469 	targvp = NULL;
2470 
2471 #ifdef DEBUG
2472 	if (rfs3_do_post_op_attr) {
2473 		ava.va_mask = AT_ALL;
2474 		avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2475 	} else
2476 		avap = NULL;
2477 #else
2478 	ava.va_mask = AT_ALL;
2479 	avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2480 #endif
2481 
2482 	/*
2483 	 * Force modified data and metadata out to stable storage.
2484 	 */
2485 	(void) VOP_FSYNC(vp, 0, cr, NULL);
2486 
2487 	if (error)
2488 		goto err;
2489 
2490 	resp->status = NFS3_OK;
2491 	vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2492 	goto out;
2493 
2494 err:
2495 	if (curthread->t_flag & T_WOULDBLOCK) {
2496 		curthread->t_flag &= ~T_WOULDBLOCK;
2497 		resp->status = NFS3ERR_JUKEBOX;
2498 	} else
2499 		resp->status = puterrno3(error);
2500 err1:
2501 	vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2502 out:
2503 	DTRACE_NFSV3_4(op__remove__done, struct svc_req *, req,
2504 	    cred_t *, cr, vnode_t *, vp, REMOVE3res *, resp);
2505 	if (vp != NULL)
2506 		VN_RELE(vp);
2507 }
2508 
2509 void *
2510 rfs3_remove_getfh(REMOVE3args *args)
2511 {
2512 
2513 	return (&args->object.dir);
2514 }
2515 
2516 void
2517 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2518 	struct svc_req *req, cred_t *cr)
2519 {
2520 	int error;
2521 	vnode_t *vp;
2522 	struct vattr *bvap;
2523 	struct vattr bva;
2524 	struct vattr *avap;
2525 	struct vattr ava;
2526 
2527 	bvap = NULL;
2528 	avap = NULL;
2529 
2530 	vp = nfs3_fhtovp(&args->object.dir, exi);
2531 
2532 	DTRACE_NFSV3_4(op__rmdir__start, struct svc_req *, req,
2533 	    cred_t *, cr, vnode_t *, vp, RMDIR3args *, args);
2534 
2535 	if (vp == NULL) {
2536 		error = ESTALE;
2537 		goto err;
2538 	}
2539 
2540 #ifdef DEBUG
2541 	if (rfs3_do_pre_op_attr) {
2542 		bva.va_mask = AT_ALL;
2543 		bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2544 	} else
2545 		bvap = NULL;
2546 #else
2547 	bva.va_mask = AT_ALL;
2548 	bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2549 #endif
2550 	avap = bvap;
2551 
2552 	if (vp->v_type != VDIR) {
2553 		resp->status = NFS3ERR_NOTDIR;
2554 		goto err1;
2555 	}
2556 
2557 	if (args->object.name == nfs3nametoolong) {
2558 		resp->status = NFS3ERR_NAMETOOLONG;
2559 		goto err1;
2560 	}
2561 
2562 	if (args->object.name == NULL || *(args->object.name) == '\0') {
2563 		resp->status = NFS3ERR_ACCES;
2564 		goto err1;
2565 	}
2566 
2567 	if (rdonly(exi, req)) {
2568 		resp->status = NFS3ERR_ROFS;
2569 		goto err1;
2570 	}
2571 
2572 	if (is_system_labeled()) {
2573 		bslabel_t *clabel = req->rq_label;
2574 
2575 		ASSERT(clabel != NULL);
2576 		DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2577 		    "got client label from request(1)", struct svc_req *, req);
2578 
2579 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2580 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK)) {
2581 				resp->status = NFS3ERR_ACCES;
2582 				goto err1;
2583 			}
2584 		}
2585 	}
2586 
2587 	error = VOP_RMDIR(vp, args->object.name, rootdir, cr, NULL, 0);
2588 
2589 #ifdef DEBUG
2590 	if (rfs3_do_post_op_attr) {
2591 		ava.va_mask = AT_ALL;
2592 		avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2593 	} else
2594 		avap = NULL;
2595 #else
2596 	ava.va_mask = AT_ALL;
2597 	avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2598 #endif
2599 
2600 	/*
2601 	 * Force modified data and metadata out to stable storage.
2602 	 */
2603 	(void) VOP_FSYNC(vp, 0, cr, NULL);
2604 
2605 	if (error) {
2606 		/*
2607 		 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2608 		 * if the directory is not empty.  A System V NFS server
2609 		 * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2610 		 * over the wire.
2611 		 */
2612 		if (error == EEXIST)
2613 			error = ENOTEMPTY;
2614 		goto err;
2615 	}
2616 
2617 	resp->status = NFS3_OK;
2618 	vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2619 	goto out;
2620 
2621 err:
2622 	if (curthread->t_flag & T_WOULDBLOCK) {
2623 		curthread->t_flag &= ~T_WOULDBLOCK;
2624 		resp->status = NFS3ERR_JUKEBOX;
2625 	} else
2626 		resp->status = puterrno3(error);
2627 err1:
2628 	vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2629 out:
2630 	DTRACE_NFSV3_4(op__rmdir__done, struct svc_req *, req,
2631 	    cred_t *, cr, vnode_t *, vp, RMDIR3res *, resp);
2632 	if (vp != NULL)
2633 		VN_RELE(vp);
2634 
2635 }
2636 
2637 void *
2638 rfs3_rmdir_getfh(RMDIR3args *args)
2639 {
2640 
2641 	return (&args->object.dir);
2642 }
2643 
2644 void
2645 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2646 	struct svc_req *req, cred_t *cr)
2647 {
2648 	int error = 0;
2649 	vnode_t *fvp;
2650 	vnode_t *tvp;
2651 	vnode_t *targvp;
2652 	struct vattr *fbvap;
2653 	struct vattr fbva;
2654 	struct vattr *favap;
2655 	struct vattr fava;
2656 	struct vattr *tbvap;
2657 	struct vattr tbva;
2658 	struct vattr *tavap;
2659 	struct vattr tava;
2660 	nfs_fh3 *fh3;
2661 	struct exportinfo *to_exi;
2662 	vnode_t *srcvp = NULL;
2663 	bslabel_t *clabel;
2664 
2665 	fbvap = NULL;
2666 	favap = NULL;
2667 	tbvap = NULL;
2668 	tavap = NULL;
2669 	tvp = NULL;
2670 
2671 	fvp = nfs3_fhtovp(&args->from.dir, exi);
2672 
2673 	DTRACE_NFSV3_4(op__rename__start, struct svc_req *, req,
2674 	    cred_t *, cr, vnode_t *, fvp, RENAME3args *, args);
2675 
2676 	if (fvp == NULL) {
2677 		error = ESTALE;
2678 		goto err;
2679 	}
2680 
2681 	if (is_system_labeled()) {
2682 		clabel = req->rq_label;
2683 		ASSERT(clabel != NULL);
2684 		DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2685 		    "got client label from request(1)", struct svc_req *, req);
2686 
2687 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2688 			if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK)) {
2689 				resp->status = NFS3ERR_ACCES;
2690 				goto err1;
2691 			}
2692 		}
2693 	}
2694 
2695 #ifdef DEBUG
2696 	if (rfs3_do_pre_op_attr) {
2697 		fbva.va_mask = AT_ALL;
2698 		fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2699 	} else
2700 		fbvap = NULL;
2701 #else
2702 	fbva.va_mask = AT_ALL;
2703 	fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2704 #endif
2705 	favap = fbvap;
2706 
2707 	fh3 = &args->to.dir;
2708 	to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2709 	if (to_exi == NULL) {
2710 		resp->status = NFS3ERR_ACCES;
2711 		goto err1;
2712 	}
2713 	exi_rele(to_exi);
2714 
2715 	if (to_exi != exi) {
2716 		resp->status = NFS3ERR_XDEV;
2717 		goto err1;
2718 	}
2719 
2720 	tvp = nfs3_fhtovp(&args->to.dir, exi);
2721 	if (tvp == NULL) {
2722 		error = ESTALE;
2723 		goto err;
2724 	}
2725 
2726 #ifdef DEBUG
2727 	if (rfs3_do_pre_op_attr) {
2728 		tbva.va_mask = AT_ALL;
2729 		tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2730 	} else
2731 		tbvap = NULL;
2732 #else
2733 	tbva.va_mask = AT_ALL;
2734 	tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2735 #endif
2736 	tavap = tbvap;
2737 
2738 	if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2739 		resp->status = NFS3ERR_NOTDIR;
2740 		goto err1;
2741 	}
2742 
2743 	if (args->from.name == nfs3nametoolong ||
2744 	    args->to.name == nfs3nametoolong) {
2745 		resp->status = NFS3ERR_NAMETOOLONG;
2746 		goto err1;
2747 	}
2748 	if (args->from.name == NULL || *(args->from.name) == '\0' ||
2749 	    args->to.name == NULL || *(args->to.name) == '\0') {
2750 		resp->status = NFS3ERR_ACCES;
2751 		goto err1;
2752 	}
2753 
2754 	if (rdonly(exi, req)) {
2755 		resp->status = NFS3ERR_ROFS;
2756 		goto err1;
2757 	}
2758 
2759 	if (is_system_labeled()) {
2760 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2761 			if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK)) {
2762 				resp->status = NFS3ERR_ACCES;
2763 				goto err1;
2764 			}
2765 		}
2766 	}
2767 
2768 	/*
2769 	 * Check for a conflict with a non-blocking mandatory share
2770 	 * reservation or V4 delegations.
2771 	 */
2772 	error = VOP_LOOKUP(fvp, args->from.name, &srcvp, NULL, 0,
2773 	    NULL, cr, NULL, NULL, NULL);
2774 	if (error != 0)
2775 		goto err;
2776 
2777 	/*
2778 	 * If we rename a delegated file we should recall the
2779 	 * delegation, since future opens should fail or would
2780 	 * refer to a new file.
2781 	 */
2782 	if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2783 		resp->status = NFS3ERR_JUKEBOX;
2784 		goto err1;
2785 	}
2786 
2787 	/*
2788 	 * Check for renaming over a delegated file.  Check rfs4_deleg_policy
2789 	 * first to avoid VOP_LOOKUP if possible.
2790 	 */
2791 	if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2792 	    VOP_LOOKUP(tvp, args->to.name, &targvp, NULL, 0, NULL, cr,
2793 	    NULL, NULL, NULL) == 0) {
2794 
2795 		if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2796 			VN_RELE(targvp);
2797 			resp->status = NFS3ERR_JUKEBOX;
2798 			goto err1;
2799 		}
2800 		VN_RELE(targvp);
2801 	}
2802 
2803 	if (!nbl_need_check(srcvp)) {
2804 		error = VOP_RENAME(fvp, args->from.name, tvp,
2805 		    args->to.name, cr, NULL, 0);
2806 	} else {
2807 		nbl_start_crit(srcvp, RW_READER);
2808 		if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
2809 			error = EACCES;
2810 		} else {
2811 			error = VOP_RENAME(fvp, args->from.name, tvp,
2812 			    args->to.name, cr, NULL, 0);
2813 		}
2814 		nbl_end_crit(srcvp);
2815 	}
2816 	if (error == 0) {
2817 		char *tmp;
2818 
2819 		/* fix the path name for the renamed file */
2820 		mutex_enter(&srcvp->v_lock);
2821 		tmp = srcvp->v_path;
2822 		srcvp->v_path = NULL;
2823 		mutex_exit(&srcvp->v_lock);
2824 		vn_setpath(rootdir, tvp, srcvp, args->to.name,
2825 		    strlen(args->to.name));
2826 		if (tmp != NULL)
2827 			kmem_free(tmp, strlen(tmp) + 1);
2828 	}
2829 	VN_RELE(srcvp);
2830 	srcvp = NULL;
2831 
2832 #ifdef DEBUG
2833 	if (rfs3_do_post_op_attr) {
2834 		fava.va_mask = AT_ALL;
2835 		favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2836 		tava.va_mask = AT_ALL;
2837 		tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2838 	} else {
2839 		favap = NULL;
2840 		tavap = NULL;
2841 	}
2842 #else
2843 	fava.va_mask = AT_ALL;
2844 	favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2845 	tava.va_mask = AT_ALL;
2846 	tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2847 #endif
2848 
2849 	/*
2850 	 * Force modified data and metadata out to stable storage.
2851 	 */
2852 	(void) VOP_FSYNC(fvp, 0, cr, NULL);
2853 	(void) VOP_FSYNC(tvp, 0, cr, NULL);
2854 
2855 	if (error)
2856 		goto err;
2857 
2858 	resp->status = NFS3_OK;
2859 	vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2860 	vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2861 	goto out;
2862 
2863 err:
2864 	if (curthread->t_flag & T_WOULDBLOCK) {
2865 		curthread->t_flag &= ~T_WOULDBLOCK;
2866 		resp->status = NFS3ERR_JUKEBOX;
2867 	} else
2868 		resp->status = puterrno3(error);
2869 err1:
2870 	vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2871 	vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2872 out:
2873 	DTRACE_NFSV3_4(op__rename__done, struct svc_req *, req,
2874 	    cred_t *, cr, vnode_t *, fvp, RENAME3res *, resp);
2875 	if (fvp != NULL)
2876 		VN_RELE(fvp);
2877 	if (tvp != NULL)
2878 		VN_RELE(tvp);
2879 }
2880 
2881 void *
2882 rfs3_rename_getfh(RENAME3args *args)
2883 {
2884 
2885 	return (&args->from.dir);
2886 }
2887 
2888 void
2889 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2890 	struct svc_req *req, cred_t *cr)
2891 {
2892 	int error;
2893 	vnode_t *vp;
2894 	vnode_t *dvp;
2895 	struct vattr *vap;
2896 	struct vattr va;
2897 	struct vattr *bvap;
2898 	struct vattr bva;
2899 	struct vattr *avap;
2900 	struct vattr ava;
2901 	nfs_fh3	*fh3;
2902 	struct exportinfo *to_exi;
2903 	bslabel_t *clabel;
2904 
2905 	vap = NULL;
2906 	bvap = NULL;
2907 	avap = NULL;
2908 	dvp = NULL;
2909 
2910 	vp = nfs3_fhtovp(&args->file, exi);
2911 
2912 	DTRACE_NFSV3_4(op__link__start, struct svc_req *, req,
2913 	    cred_t *, cr, vnode_t *, vp, LINK3args *, args);
2914 
2915 	if (vp == NULL) {
2916 		error = ESTALE;
2917 		goto out;
2918 	}
2919 
2920 #ifdef DEBUG
2921 	if (rfs3_do_pre_op_attr) {
2922 		va.va_mask = AT_ALL;
2923 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2924 	} else
2925 		vap = NULL;
2926 #else
2927 	va.va_mask = AT_ALL;
2928 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2929 #endif
2930 
2931 	fh3 = &args->link.dir;
2932 	to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2933 	if (to_exi == NULL) {
2934 		resp->status = NFS3ERR_ACCES;
2935 		goto out1;
2936 	}
2937 	exi_rele(to_exi);
2938 
2939 	if (to_exi != exi) {
2940 		resp->status = NFS3ERR_XDEV;
2941 		goto out1;
2942 	}
2943 
2944 	if (is_system_labeled()) {
2945 		clabel = req->rq_label;
2946 
2947 		ASSERT(clabel != NULL);
2948 		DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
2949 		    "got client label from request(1)", struct svc_req *, req);
2950 
2951 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2952 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK)) {
2953 				resp->status = NFS3ERR_ACCES;
2954 				goto out1;
2955 			}
2956 		}
2957 	}
2958 
2959 	dvp = nfs3_fhtovp(&args->link.dir, exi);
2960 	if (dvp == NULL) {
2961 		error = ESTALE;
2962 		goto out;
2963 	}
2964 
2965 #ifdef DEBUG
2966 	if (rfs3_do_pre_op_attr) {
2967 		bva.va_mask = AT_ALL;
2968 		bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
2969 	} else
2970 		bvap = NULL;
2971 #else
2972 	bva.va_mask = AT_ALL;
2973 	bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
2974 #endif
2975 
2976 	if (dvp->v_type != VDIR) {
2977 		resp->status = NFS3ERR_NOTDIR;
2978 		goto out1;
2979 	}
2980 
2981 	if (args->link.name == nfs3nametoolong) {
2982 		resp->status = NFS3ERR_NAMETOOLONG;
2983 		goto out1;
2984 	}
2985 
2986 	if (args->link.name == NULL || *(args->link.name) == '\0') {
2987 		resp->status = NFS3ERR_ACCES;
2988 		goto out1;
2989 	}
2990 
2991 	if (rdonly(exi, req)) {
2992 		resp->status = NFS3ERR_ROFS;
2993 		goto out1;
2994 	}
2995 
2996 	if (is_system_labeled()) {
2997 		DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
2998 		    "got client label from request(1)", struct svc_req *, req);
2999 
3000 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3001 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK)) {
3002 				resp->status = NFS3ERR_ACCES;
3003 				goto out1;
3004 			}
3005 		}
3006 	}
3007 
3008 	error = VOP_LINK(dvp, vp, args->link.name, cr, NULL, 0);
3009 
3010 #ifdef DEBUG
3011 	if (rfs3_do_post_op_attr) {
3012 		va.va_mask = AT_ALL;
3013 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3014 		ava.va_mask = AT_ALL;
3015 		avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3016 	} else {
3017 		vap = NULL;
3018 		avap = NULL;
3019 	}
3020 #else
3021 	va.va_mask = AT_ALL;
3022 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3023 	ava.va_mask = AT_ALL;
3024 	avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3025 #endif
3026 
3027 	/*
3028 	 * Force modified data and metadata out to stable storage.
3029 	 */
3030 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3031 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
3032 
3033 	if (error)
3034 		goto out;
3035 
3036 	VN_RELE(dvp);
3037 
3038 	resp->status = NFS3_OK;
3039 	vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3040 	vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3041 
3042 	DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3043 	    cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3044 
3045 	VN_RELE(vp);
3046 
3047 	return;
3048 
3049 out:
3050 	if (curthread->t_flag & T_WOULDBLOCK) {
3051 		curthread->t_flag &= ~T_WOULDBLOCK;
3052 		resp->status = NFS3ERR_JUKEBOX;
3053 	} else
3054 		resp->status = puterrno3(error);
3055 out1:
3056 	DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3057 	    cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3058 
3059 	if (vp != NULL)
3060 		VN_RELE(vp);
3061 	if (dvp != NULL)
3062 		VN_RELE(dvp);
3063 	vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3064 	vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3065 }
3066 
3067 void *
3068 rfs3_link_getfh(LINK3args *args)
3069 {
3070 
3071 	return (&args->file);
3072 }
3073 
3074 /*
3075  * This macro defines the size of a response which contains attribute
3076  * information and one directory entry (whose length is specified by
3077  * the macro parameter).  If the incoming request is larger than this,
3078  * then we are guaranteed to be able to return at one directory entry
3079  * if one exists.  Therefore, we do not need to check for
3080  * NFS3ERR_TOOSMALL if the requested size is larger then this.  If it
3081  * is not, then we need to check to make sure that this error does not
3082  * need to be returned.
3083  *
3084  * NFS3_READDIR_MIN_COUNT is comprised of following :
3085  *
3086  * status - 1 * BYTES_PER_XDR_UNIT
3087  * attr. flag - 1 * BYTES_PER_XDR_UNIT
3088  * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3089  * attributes  - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3090  * boolean - 1 * BYTES_PER_XDR_UNIT
3091  * file id - 2 * BYTES_PER_XDR_UNIT
3092  * directory name length - 1 * BYTES_PER_XDR_UNIT
3093  * cookie - 2 * BYTES_PER_XDR_UNIT
3094  * end of list - 1 * BYTES_PER_XDR_UNIT
3095  * end of file - 1 * BYTES_PER_XDR_UNIT
3096  * Name length of directory to the nearest byte
3097  */
3098 
3099 #define	NFS3_READDIR_MIN_COUNT(length)	\
3100 	((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3101 		BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3102 
3103 /* ARGSUSED */
3104 void
3105 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3106 	struct svc_req *req, cred_t *cr)
3107 {
3108 	int error;
3109 	vnode_t *vp;
3110 	struct vattr *vap;
3111 	struct vattr va;
3112 	struct iovec iov;
3113 	struct uio uio;
3114 	char *data;
3115 	int iseof;
3116 	int bufsize;
3117 	int namlen;
3118 	uint_t count;
3119 
3120 	vap = NULL;
3121 
3122 	vp = nfs3_fhtovp(&args->dir, exi);
3123 
3124 	DTRACE_NFSV3_4(op__readdir__start, struct svc_req *, req,
3125 	    cred_t *, cr, vnode_t *, vp, READDIR3args *, args);
3126 
3127 	if (vp == NULL) {
3128 		error = ESTALE;
3129 		goto out;
3130 	}
3131 
3132 	if (is_system_labeled()) {
3133 		bslabel_t *clabel = req->rq_label;
3134 
3135 		ASSERT(clabel != NULL);
3136 		DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3137 		    "got client label from request(1)", struct svc_req *, req);
3138 
3139 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3140 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK)) {
3141 				resp->status = NFS3ERR_ACCES;
3142 				goto out1;
3143 			}
3144 		}
3145 	}
3146 
3147 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3148 
3149 #ifdef DEBUG
3150 	if (rfs3_do_pre_op_attr) {
3151 		va.va_mask = AT_ALL;
3152 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3153 	} else
3154 		vap = NULL;
3155 #else
3156 	va.va_mask = AT_ALL;
3157 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3158 #endif
3159 
3160 	if (vp->v_type != VDIR) {
3161 		resp->status = NFS3ERR_NOTDIR;
3162 		goto out1;
3163 	}
3164 
3165 	error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3166 	if (error)
3167 		goto out;
3168 
3169 	/*
3170 	 * Now don't allow arbitrary count to alloc;
3171 	 * allow the maximum not to exceed rfs3_tsize()
3172 	 */
3173 	if (args->count > rfs3_tsize(req))
3174 		args->count = rfs3_tsize(req);
3175 
3176 	/*
3177 	 * Make sure that there is room to read at least one entry
3178 	 * if any are available.
3179 	 */
3180 	if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3181 		count = DIRENT64_RECLEN(MAXNAMELEN);
3182 	else
3183 		count = args->count;
3184 
3185 	data = kmem_alloc(count, KM_SLEEP);
3186 
3187 	iov.iov_base = data;
3188 	iov.iov_len = count;
3189 	uio.uio_iov = &iov;
3190 	uio.uio_iovcnt = 1;
3191 	uio.uio_segflg = UIO_SYSSPACE;
3192 	uio.uio_extflg = UIO_COPY_CACHED;
3193 	uio.uio_loffset = (offset_t)args->cookie;
3194 	uio.uio_resid = count;
3195 
3196 	error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3197 
3198 #ifdef DEBUG
3199 	if (rfs3_do_post_op_attr) {
3200 		va.va_mask = AT_ALL;
3201 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3202 	} else
3203 		vap = NULL;
3204 #else
3205 	va.va_mask = AT_ALL;
3206 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3207 #endif
3208 
3209 	if (error) {
3210 		kmem_free(data, count);
3211 		goto out;
3212 	}
3213 
3214 	/*
3215 	 * If the count was not large enough to be able to guarantee
3216 	 * to be able to return at least one entry, then need to
3217 	 * check to see if NFS3ERR_TOOSMALL should be returned.
3218 	 */
3219 	if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3220 		/*
3221 		 * bufsize is used to keep track of the size of the response.
3222 		 * It is primed with:
3223 		 *	1 for the status +
3224 		 *	1 for the dir_attributes.attributes boolean +
3225 		 *	2 for the cookie verifier
3226 		 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3227 		 * to bytes.  If there are directory attributes to be
3228 		 * returned, then:
3229 		 *	NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3230 		 * time BYTES_PER_XDR_UNIT is added to account for them.
3231 		 */
3232 		bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3233 		if (vap != NULL)
3234 			bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3235 		/*
3236 		 * An entry is composed of:
3237 		 *	1 for the true/false list indicator +
3238 		 *	2 for the fileid +
3239 		 *	1 for the length of the name +
3240 		 *	2 for the cookie +
3241 		 * all times BYTES_PER_XDR_UNIT to convert from
3242 		 * XDR units to bytes, plus the length of the name
3243 		 * rounded up to the nearest BYTES_PER_XDR_UNIT.
3244 		 */
3245 		if (count != uio.uio_resid) {
3246 			namlen = strlen(((struct dirent64 *)data)->d_name);
3247 			bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3248 			    roundup(namlen, BYTES_PER_XDR_UNIT);
3249 		}
3250 		/*
3251 		 * We need to check to see if the number of bytes left
3252 		 * to go into the buffer will actually fit into the
3253 		 * buffer.  This is calculated as the size of this
3254 		 * entry plus:
3255 		 *	1 for the true/false list indicator +
3256 		 *	1 for the eof indicator
3257 		 * times BYTES_PER_XDR_UNIT to convert from from
3258 		 * XDR units to bytes.
3259 		 */
3260 		bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3261 		if (bufsize > args->count) {
3262 			kmem_free(data, count);
3263 			resp->status = NFS3ERR_TOOSMALL;
3264 			goto out1;
3265 		}
3266 	}
3267 
3268 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3269 
3270 #if 0 /* notyet */
3271 	/*
3272 	 * Don't do this.  It causes local disk writes when just
3273 	 * reading the file and the overhead is deemed larger
3274 	 * than the benefit.
3275 	 */
3276 	/*
3277 	 * Force modified metadata out to stable storage.
3278 	 */
3279 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3280 #endif
3281 
3282 	resp->status = NFS3_OK;
3283 	vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3284 	resp->resok.cookieverf = 0;
3285 	resp->resok.reply.entries = (entry3 *)data;
3286 	resp->resok.reply.eof = iseof;
3287 	resp->resok.size = count - uio.uio_resid;
3288 	resp->resok.count = args->count;
3289 	resp->resok.freecount = count;
3290 
3291 	DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3292 	    cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3293 
3294 	VN_RELE(vp);
3295 
3296 	return;
3297 
3298 out:
3299 	if (curthread->t_flag & T_WOULDBLOCK) {
3300 		curthread->t_flag &= ~T_WOULDBLOCK;
3301 		resp->status = NFS3ERR_JUKEBOX;
3302 	} else
3303 		resp->status = puterrno3(error);
3304 out1:
3305 	DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3306 	    cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3307 
3308 	if (vp != NULL) {
3309 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3310 		VN_RELE(vp);
3311 	}
3312 	vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3313 }
3314 
3315 void *
3316 rfs3_readdir_getfh(READDIR3args *args)
3317 {
3318 
3319 	return (&args->dir);
3320 }
3321 
3322 void
3323 rfs3_readdir_free(READDIR3res *resp)
3324 {
3325 
3326 	if (resp->status == NFS3_OK)
3327 		kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3328 }
3329 
3330 #ifdef nextdp
3331 #undef nextdp
3332 #endif
3333 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3334 
3335 /*
3336  * This macro computes the size of a response which contains
3337  * one directory entry including the attributes as well as file handle.
3338  * If the incoming request is larger than this, then we are guaranteed to be
3339  * able to return at least one more directory entry if one exists.
3340  *
3341  * NFS3_READDIRPLUS_ENTRY is made up of the following:
3342  *
3343  * boolean - 1 * BYTES_PER_XDR_UNIT
3344  * file id - 2 * BYTES_PER_XDR_UNIT
3345  * directory name length - 1 * BYTES_PER_XDR_UNIT
3346  * cookie - 2 * BYTES_PER_XDR_UNIT
3347  * attribute flag - 1 * BYTES_PER_XDR_UNIT
3348  * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3349  * status byte for file handle - 1 *  BYTES_PER_XDR_UNIT
3350  * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3351  * Maximum length of a file handle (NFS3_MAXFHSIZE)
3352  * name length of the entry to the nearest bytes
3353  */
3354 #define	NFS3_READDIRPLUS_ENTRY(namelen)	\
3355 	((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3356 		BYTES_PER_XDR_UNIT + \
3357 	NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3358 
3359 static int rfs3_readdir_unit = MAXBSIZE;
3360 
3361 /* ARGSUSED */
3362 void
3363 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3364 	struct exportinfo *exi, struct svc_req *req, cred_t *cr)
3365 {
3366 	int error;
3367 	vnode_t *vp;
3368 	struct vattr *vap;
3369 	struct vattr va;
3370 	struct iovec iov;
3371 	struct uio uio;
3372 	char *data;
3373 	int iseof;
3374 	struct dirent64 *dp;
3375 	vnode_t *nvp;
3376 	struct vattr *nvap;
3377 	struct vattr nva;
3378 	entryplus3_info *infop = NULL;
3379 	int size = 0;
3380 	int nents = 0;
3381 	int bufsize = 0;
3382 	int entrysize = 0;
3383 	int tofit = 0;
3384 	int rd_unit = rfs3_readdir_unit;
3385 	int prev_len;
3386 	int space_left;
3387 	int i;
3388 	uint_t *namlen = NULL;
3389 
3390 	vap = NULL;
3391 
3392 	vp = nfs3_fhtovp(&args->dir, exi);
3393 
3394 	DTRACE_NFSV3_4(op__readdirplus__start, struct svc_req *, req,
3395 	    cred_t *, cr, vnode_t *, vp, READDIRPLUS3args *, args);
3396 
3397 	if (vp == NULL) {
3398 		error = ESTALE;
3399 		goto out;
3400 	}
3401 
3402 	if (is_system_labeled()) {
3403 		bslabel_t *clabel = req->rq_label;
3404 
3405 		ASSERT(clabel != NULL);
3406 		DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3407 		    char *, "got client label from request(1)",
3408 		    struct svc_req *, req);
3409 
3410 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3411 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK)) {
3412 				resp->status = NFS3ERR_ACCES;
3413 				goto out1;
3414 			}
3415 		}
3416 	}
3417 
3418 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3419 
3420 #ifdef DEBUG
3421 	if (rfs3_do_pre_op_attr) {
3422 		va.va_mask = AT_ALL;
3423 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3424 	} else
3425 		vap = NULL;
3426 #else
3427 	va.va_mask = AT_ALL;
3428 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3429 #endif
3430 
3431 	if (vp->v_type != VDIR) {
3432 		error = ENOTDIR;
3433 		goto out;
3434 	}
3435 
3436 	error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3437 	if (error)
3438 		goto out;
3439 
3440 	/*
3441 	 * Don't allow arbitrary counts for allocation
3442 	 */
3443 	if (args->maxcount > rfs3_tsize(req))
3444 		args->maxcount = rfs3_tsize(req);
3445 
3446 	/*
3447 	 * Make sure that there is room to read at least one entry
3448 	 * if any are available
3449 	 */
3450 	args->dircount = MIN(args->dircount, args->maxcount);
3451 
3452 	if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3453 		args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3454 
3455 	/*
3456 	 * This allocation relies on a minimum directory entry
3457 	 * being roughly 24 bytes.  Therefore, the namlen array
3458 	 * will have enough space based on the maximum number of
3459 	 * entries to read.
3460 	 */
3461 	namlen = kmem_alloc(args->dircount, KM_SLEEP);
3462 
3463 	space_left = args->dircount;
3464 	data = kmem_alloc(args->dircount, KM_SLEEP);
3465 	dp = (struct dirent64 *)data;
3466 	uio.uio_iov = &iov;
3467 	uio.uio_iovcnt = 1;
3468 	uio.uio_segflg = UIO_SYSSPACE;
3469 	uio.uio_extflg = UIO_COPY_CACHED;
3470 	uio.uio_loffset = (offset_t)args->cookie;
3471 
3472 	/*
3473 	 * bufsize is used to keep track of the size of the response as we
3474 	 * get post op attributes and filehandles for each entry.  This is
3475 	 * an optimization as the server may have read more entries than will
3476 	 * fit in the buffer specified by maxcount.  We stop calculating
3477 	 * post op attributes and filehandles once we have exceeded maxcount.
3478 	 * This will minimize the effect of truncation.
3479 	 *
3480 	 * It is primed with:
3481 	 *	1 for the status +
3482 	 *	1 for the dir_attributes.attributes boolean +
3483 	 *	2 for the cookie verifier
3484 	 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3485 	 * to bytes.  If there are directory attributes to be
3486 	 * returned, then:
3487 	 *	NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3488 	 * time BYTES_PER_XDR_UNIT is added to account for them.
3489 	 */
3490 	bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3491 	if (vap != NULL)
3492 		bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3493 
3494 getmoredents:
3495 	/*
3496 	 * Here we make a check so that our read unit is not larger than
3497 	 * the space left in the buffer.
3498 	 */
3499 	rd_unit = MIN(rd_unit, space_left);
3500 	iov.iov_base = (char *)dp;
3501 	iov.iov_len = rd_unit;
3502 	uio.uio_resid = rd_unit;
3503 	prev_len = rd_unit;
3504 
3505 	error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3506 
3507 	if (error) {
3508 		kmem_free(data, args->dircount);
3509 		goto out;
3510 	}
3511 
3512 	if (uio.uio_resid == prev_len && !iseof) {
3513 		if (nents == 0) {
3514 			kmem_free(data, args->dircount);
3515 			resp->status = NFS3ERR_TOOSMALL;
3516 			goto out1;
3517 		}
3518 
3519 		/*
3520 		 * We could not get any more entries, so get the attributes
3521 		 * and filehandle for the entries already obtained.
3522 		 */
3523 		goto good;
3524 	}
3525 
3526 	/*
3527 	 * We estimate the size of the response by assuming the
3528 	 * entry exists and attributes and filehandle are also valid
3529 	 */
3530 	for (size = prev_len - uio.uio_resid;
3531 	    size > 0;
3532 	    size -= dp->d_reclen, dp = nextdp(dp)) {
3533 
3534 		if (dp->d_ino == 0) {
3535 			nents++;
3536 			continue;
3537 		}
3538 
3539 		namlen[nents] = strlen(dp->d_name);
3540 		entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3541 
3542 		/*
3543 		 * We need to check to see if the number of bytes left
3544 		 * to go into the buffer will actually fit into the
3545 		 * buffer.  This is calculated as the size of this
3546 		 * entry plus:
3547 		 *	1 for the true/false list indicator +
3548 		 *	1 for the eof indicator
3549 		 * times BYTES_PER_XDR_UNIT to convert from XDR units
3550 		 * to bytes.
3551 		 *
3552 		 * Also check the dircount limit against the first entry read
3553 		 *
3554 		 */
3555 		tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3556 		if (bufsize + tofit > args->maxcount) {
3557 			/*
3558 			 * We make a check here to see if this was the
3559 			 * first entry being measured.  If so, then maxcount
3560 			 * was too small to begin with and so we need to
3561 			 * return with NFS3ERR_TOOSMALL.
3562 			 */
3563 			if (nents == 0) {
3564 				kmem_free(data, args->dircount);
3565 				resp->status = NFS3ERR_TOOSMALL;
3566 				goto out1;
3567 			}
3568 			iseof = FALSE;
3569 			goto good;
3570 		}
3571 		bufsize += entrysize;
3572 		nents++;
3573 	}
3574 
3575 	/*
3576 	 * If there is enough room to fit at least 1 more entry including
3577 	 * post op attributes and filehandle in the buffer AND that we haven't
3578 	 * exceeded dircount then go back and get some more.
3579 	 */
3580 	if (!iseof &&
3581 	    (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3582 		space_left -= (prev_len - uio.uio_resid);
3583 		if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3584 			goto getmoredents;
3585 
3586 		/* else, fall through */
3587 	}
3588 
3589 good:
3590 
3591 #ifdef DEBUG
3592 	if (rfs3_do_post_op_attr) {
3593 		va.va_mask = AT_ALL;
3594 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3595 	} else
3596 		vap = NULL;
3597 #else
3598 	va.va_mask = AT_ALL;
3599 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3600 #endif
3601 
3602 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3603 
3604 	infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3605 	resp->resok.infop = infop;
3606 
3607 	dp = (struct dirent64 *)data;
3608 	for (i = 0; i < nents; i++) {
3609 
3610 		if (dp->d_ino == 0) {
3611 			infop[i].attr.attributes = FALSE;
3612 			infop[i].fh.handle_follows = FALSE;
3613 			dp = nextdp(dp);
3614 			continue;
3615 		}
3616 
3617 		infop[i].namelen = namlen[i];
3618 
3619 		error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3620 		    NULL, NULL, NULL);
3621 		if (error) {
3622 			infop[i].attr.attributes = FALSE;
3623 			infop[i].fh.handle_follows = FALSE;
3624 			dp = nextdp(dp);
3625 			continue;
3626 		}
3627 
3628 #ifdef DEBUG
3629 		if (rfs3_do_post_op_attr) {
3630 			nva.va_mask = AT_ALL;
3631 			nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ?
3632 			    NULL : &nva;
3633 		} else
3634 			nvap = NULL;
3635 #else
3636 		nva.va_mask = AT_ALL;
3637 		nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3638 #endif
3639 		vattr_to_post_op_attr(nvap, &infop[i].attr);
3640 
3641 #ifdef DEBUG
3642 		if (!rfs3_do_post_op_fh3)
3643 			infop[i].fh.handle_follows = FALSE;
3644 		else {
3645 #endif
3646 		error = makefh3(&infop[i].fh.handle, nvp, exi);
3647 		if (!error)
3648 			infop[i].fh.handle_follows = TRUE;
3649 		else
3650 			infop[i].fh.handle_follows = FALSE;
3651 #ifdef DEBUG
3652 		}
3653 #endif
3654 
3655 		VN_RELE(nvp);
3656 		dp = nextdp(dp);
3657 	}
3658 
3659 #if 0 /* notyet */
3660 	/*
3661 	 * Don't do this.  It causes local disk writes when just
3662 	 * reading the file and the overhead is deemed larger
3663 	 * than the benefit.
3664 	 */
3665 	/*
3666 	 * Force modified metadata out to stable storage.
3667 	 */
3668 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3669 #endif
3670 
3671 	kmem_free(namlen, args->dircount);
3672 
3673 	resp->status = NFS3_OK;
3674 	vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3675 	resp->resok.cookieverf = 0;
3676 	resp->resok.reply.entries = (entryplus3 *)data;
3677 	resp->resok.reply.eof = iseof;
3678 	resp->resok.size = nents;
3679 	resp->resok.count = args->dircount;
3680 	resp->resok.maxcount = args->maxcount;
3681 
3682 	DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3683 	    cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3684 
3685 	VN_RELE(vp);
3686 
3687 	return;
3688 
3689 out:
3690 	if (curthread->t_flag & T_WOULDBLOCK) {
3691 		curthread->t_flag &= ~T_WOULDBLOCK;
3692 		resp->status = NFS3ERR_JUKEBOX;
3693 	} else
3694 		resp->status = puterrno3(error);
3695 out1:
3696 	DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3697 	    cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3698 
3699 	if (vp != NULL) {
3700 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3701 		VN_RELE(vp);
3702 	}
3703 
3704 	if (namlen != NULL)
3705 		kmem_free(namlen, args->dircount);
3706 
3707 	vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3708 }
3709 
3710 void *
3711 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3712 {
3713 
3714 	return (&args->dir);
3715 }
3716 
3717 void
3718 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3719 {
3720 
3721 	if (resp->status == NFS3_OK) {
3722 		kmem_free(resp->resok.reply.entries, resp->resok.count);
3723 		kmem_free(resp->resok.infop,
3724 		    resp->resok.size * sizeof (struct entryplus3_info));
3725 	}
3726 }
3727 
3728 /* ARGSUSED */
3729 void
3730 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3731 	struct svc_req *req, cred_t *cr)
3732 {
3733 	int error;
3734 	vnode_t *vp;
3735 	struct vattr *vap;
3736 	struct vattr va;
3737 	struct statvfs64 sb;
3738 
3739 	vap = NULL;
3740 
3741 	vp = nfs3_fhtovp(&args->fsroot, exi);
3742 
3743 	DTRACE_NFSV3_4(op__fsstat__start, struct svc_req *, req,
3744 	    cred_t *, cr, vnode_t *, vp, FSSTAT3args *, args);
3745 
3746 	if (vp == NULL) {
3747 		error = ESTALE;
3748 		goto out;
3749 	}
3750 
3751 	if (is_system_labeled()) {
3752 		bslabel_t *clabel = req->rq_label;
3753 
3754 		ASSERT(clabel != NULL);
3755 		DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3756 		    "got client label from request(1)", struct svc_req *, req);
3757 
3758 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3759 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK)) {
3760 				resp->status = NFS3ERR_ACCES;
3761 				goto out1;
3762 			}
3763 		}
3764 	}
3765 
3766 	error = VFS_STATVFS(vp->v_vfsp, &sb);
3767 
3768 #ifdef DEBUG
3769 	if (rfs3_do_post_op_attr) {
3770 		va.va_mask = AT_ALL;
3771 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3772 	} else
3773 		vap = NULL;
3774 #else
3775 	va.va_mask = AT_ALL;
3776 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3777 #endif
3778 
3779 	if (error)
3780 		goto out;
3781 
3782 	resp->status = NFS3_OK;
3783 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3784 	if (sb.f_blocks != (fsblkcnt64_t)-1)
3785 		resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3786 	else
3787 		resp->resok.tbytes = (size3)sb.f_blocks;
3788 	if (sb.f_bfree != (fsblkcnt64_t)-1)
3789 		resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3790 	else
3791 		resp->resok.fbytes = (size3)sb.f_bfree;
3792 	if (sb.f_bavail != (fsblkcnt64_t)-1)
3793 		resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3794 	else
3795 		resp->resok.abytes = (size3)sb.f_bavail;
3796 	resp->resok.tfiles = (size3)sb.f_files;
3797 	resp->resok.ffiles = (size3)sb.f_ffree;
3798 	resp->resok.afiles = (size3)sb.f_favail;
3799 	resp->resok.invarsec = 0;
3800 
3801 	DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3802 	    cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3803 	VN_RELE(vp);
3804 
3805 	return;
3806 
3807 out:
3808 	if (curthread->t_flag & T_WOULDBLOCK) {
3809 		curthread->t_flag &= ~T_WOULDBLOCK;
3810 		resp->status = NFS3ERR_JUKEBOX;
3811 	} else
3812 		resp->status = puterrno3(error);
3813 out1:
3814 	DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3815 	    cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3816 
3817 	if (vp != NULL)
3818 		VN_RELE(vp);
3819 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3820 }
3821 
3822 void *
3823 rfs3_fsstat_getfh(FSSTAT3args *args)
3824 {
3825 
3826 	return (&args->fsroot);
3827 }
3828 
3829 /* ARGSUSED */
3830 void
3831 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3832 	struct svc_req *req, cred_t *cr)
3833 {
3834 	vnode_t *vp;
3835 	struct vattr *vap;
3836 	struct vattr va;
3837 	uint32_t xfer_size;
3838 	ulong_t l = 0;
3839 	int error;
3840 
3841 	vp = nfs3_fhtovp(&args->fsroot, exi);
3842 
3843 	DTRACE_NFSV3_4(op__fsinfo__start, struct svc_req *, req,
3844 	    cred_t *, cr, vnode_t *, vp, FSINFO3args *, args);
3845 
3846 	if (vp == NULL) {
3847 		if (curthread->t_flag & T_WOULDBLOCK) {
3848 			curthread->t_flag &= ~T_WOULDBLOCK;
3849 			resp->status = NFS3ERR_JUKEBOX;
3850 		} else
3851 			resp->status = NFS3ERR_STALE;
3852 		vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3853 		goto out;
3854 	}
3855 
3856 	if (is_system_labeled()) {
3857 		bslabel_t *clabel = req->rq_label;
3858 
3859 		ASSERT(clabel != NULL);
3860 		DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3861 		    "got client label from request(1)", struct svc_req *, req);
3862 
3863 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3864 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK)) {
3865 				resp->status = NFS3ERR_STALE;
3866 				vattr_to_post_op_attr(NULL,
3867 				    &resp->resfail.obj_attributes);
3868 				goto out;
3869 			}
3870 		}
3871 	}
3872 
3873 #ifdef DEBUG
3874 	if (rfs3_do_post_op_attr) {
3875 		va.va_mask = AT_ALL;
3876 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3877 	} else
3878 		vap = NULL;
3879 #else
3880 	va.va_mask = AT_ALL;
3881 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3882 #endif
3883 
3884 	resp->status = NFS3_OK;
3885 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3886 	xfer_size = rfs3_tsize(req);
3887 	resp->resok.rtmax = xfer_size;
3888 	resp->resok.rtpref = xfer_size;
3889 	resp->resok.rtmult = DEV_BSIZE;
3890 	resp->resok.wtmax = xfer_size;
3891 	resp->resok.wtpref = xfer_size;
3892 	resp->resok.wtmult = DEV_BSIZE;
3893 	resp->resok.dtpref = MAXBSIZE;
3894 
3895 	/*
3896 	 * Large file spec: want maxfilesize based on limit of
3897 	 * underlying filesystem.  We can guess 2^31-1 if need be.
3898 	 */
3899 	error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3900 
3901 	if (!error && l != 0 && l <= 64)
3902 		resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3903 	else
3904 		resp->resok.maxfilesize = MAXOFF32_T;
3905 
3906 	resp->resok.time_delta.seconds = 0;
3907 	resp->resok.time_delta.nseconds = 1000;
3908 	resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3909 	    FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3910 
3911 	DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3912 	    cred_t *, cr, vnode_t *, vp, FSINFO3res *, resp);
3913 
3914 	VN_RELE(vp);
3915 
3916 	return;
3917 
3918 out:
3919 	DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3920 	    cred_t *, cr, vnode_t *, NULL, FSINFO3res *, resp);
3921 	if (vp != NULL)
3922 		VN_RELE(vp);
3923 }
3924 
3925 void *
3926 rfs3_fsinfo_getfh(FSINFO3args *args)
3927 {
3928 
3929 	return (&args->fsroot);
3930 }
3931 
3932 /* ARGSUSED */
3933 void
3934 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
3935 	struct svc_req *req, cred_t *cr)
3936 {
3937 	int error;
3938 	vnode_t *vp;
3939 	struct vattr *vap;
3940 	struct vattr va;
3941 	ulong_t val;
3942 
3943 	vap = NULL;
3944 
3945 	vp = nfs3_fhtovp(&args->object, exi);
3946 
3947 	DTRACE_NFSV3_4(op__pathconf__start, struct svc_req *, req,
3948 	    cred_t *, cr, vnode_t *, vp, PATHCONF3args *, args);
3949 
3950 	if (vp == NULL) {
3951 		error = ESTALE;
3952 		goto out;
3953 	}
3954 
3955 	if (is_system_labeled()) {
3956 		bslabel_t *clabel = req->rq_label;
3957 
3958 		ASSERT(clabel != NULL);
3959 		DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
3960 		    "got client label from request(1)", struct svc_req *, req);
3961 
3962 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3963 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK)) {
3964 				resp->status = NFS3ERR_ACCES;
3965 				goto out1;
3966 			}
3967 		}
3968 	}
3969 
3970 #ifdef DEBUG
3971 	if (rfs3_do_post_op_attr) {
3972 		va.va_mask = AT_ALL;
3973 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3974 	} else
3975 		vap = NULL;
3976 #else
3977 	va.va_mask = AT_ALL;
3978 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3979 #endif
3980 
3981 	error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
3982 	if (error)
3983 		goto out;
3984 	resp->resok.info.link_max = (uint32)val;
3985 
3986 	error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
3987 	if (error)
3988 		goto out;
3989 	resp->resok.info.name_max = (uint32)val;
3990 
3991 	error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
3992 	if (error)
3993 		goto out;
3994 	if (val == 1)
3995 		resp->resok.info.no_trunc = TRUE;
3996 	else
3997 		resp->resok.info.no_trunc = FALSE;
3998 
3999 	error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
4000 	if (error)
4001 		goto out;
4002 	if (val == 1)
4003 		resp->resok.info.chown_restricted = TRUE;
4004 	else
4005 		resp->resok.info.chown_restricted = FALSE;
4006 
4007 	resp->status = NFS3_OK;
4008 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
4009 	resp->resok.info.case_insensitive = FALSE;
4010 	resp->resok.info.case_preserving = TRUE;
4011 	DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
4012 	    cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
4013 	VN_RELE(vp);
4014 	return;
4015 
4016 out:
4017 	if (curthread->t_flag & T_WOULDBLOCK) {
4018 		curthread->t_flag &= ~T_WOULDBLOCK;
4019 		resp->status = NFS3ERR_JUKEBOX;
4020 	} else
4021 		resp->status = puterrno3(error);
4022 out1:
4023 	DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
4024 	    cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
4025 	if (vp != NULL)
4026 		VN_RELE(vp);
4027 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4028 }
4029 
4030 void *
4031 rfs3_pathconf_getfh(PATHCONF3args *args)
4032 {
4033 
4034 	return (&args->object);
4035 }
4036 
4037 void
4038 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4039 	struct svc_req *req, cred_t *cr)
4040 {
4041 	int error;
4042 	vnode_t *vp;
4043 	struct vattr *bvap;
4044 	struct vattr bva;
4045 	struct vattr *avap;
4046 	struct vattr ava;
4047 
4048 	bvap = NULL;
4049 	avap = NULL;
4050 
4051 	vp = nfs3_fhtovp(&args->file, exi);
4052 
4053 	DTRACE_NFSV3_4(op__commit__start, struct svc_req *, req,
4054 	    cred_t *, cr, vnode_t *, vp, COMMIT3args *, args);
4055 
4056 	if (vp == NULL) {
4057 		error = ESTALE;
4058 		goto out;
4059 	}
4060 
4061 	bva.va_mask = AT_ALL;
4062 	error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4063 
4064 	/*
4065 	 * If we can't get the attributes, then we can't do the
4066 	 * right access checking.  So, we'll fail the request.
4067 	 */
4068 	if (error)
4069 		goto out;
4070 
4071 #ifdef DEBUG
4072 	if (rfs3_do_pre_op_attr)
4073 		bvap = &bva;
4074 	else
4075 		bvap = NULL;
4076 #else
4077 	bvap = &bva;
4078 #endif
4079 
4080 	if (rdonly(exi, req)) {
4081 		resp->status = NFS3ERR_ROFS;
4082 		goto out1;
4083 	}
4084 
4085 	if (vp->v_type != VREG) {
4086 		resp->status = NFS3ERR_INVAL;
4087 		goto out1;
4088 	}
4089 
4090 	if (is_system_labeled()) {
4091 		bslabel_t *clabel = req->rq_label;
4092 
4093 		ASSERT(clabel != NULL);
4094 		DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4095 		    "got client label from request(1)", struct svc_req *, req);
4096 
4097 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
4098 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK)) {
4099 				resp->status = NFS3ERR_ACCES;
4100 				goto out1;
4101 			}
4102 		}
4103 	}
4104 
4105 	if (crgetuid(cr) != bva.va_uid &&
4106 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4107 		goto out;
4108 
4109 	error = VOP_PUTPAGE(vp, args->offset, args->count, 0, cr, NULL);
4110 	if (!error)
4111 		error = VOP_FSYNC(vp, FNODSYNC, cr, NULL);
4112 
4113 #ifdef DEBUG
4114 	if (rfs3_do_post_op_attr) {
4115 		ava.va_mask = AT_ALL;
4116 		avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4117 	} else
4118 		avap = NULL;
4119 #else
4120 	ava.va_mask = AT_ALL;
4121 	avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4122 #endif
4123 
4124 	if (error)
4125 		goto out;
4126 
4127 	resp->status = NFS3_OK;
4128 	vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4129 	resp->resok.verf = write3verf;
4130 
4131 	DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4132 	    cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4133 
4134 	VN_RELE(vp);
4135 
4136 	return;
4137 
4138 out:
4139 	if (curthread->t_flag & T_WOULDBLOCK) {
4140 		curthread->t_flag &= ~T_WOULDBLOCK;
4141 		resp->status = NFS3ERR_JUKEBOX;
4142 	} else
4143 		resp->status = puterrno3(error);
4144 out1:
4145 	DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4146 	    cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4147 
4148 	if (vp != NULL)
4149 		VN_RELE(vp);
4150 	vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4151 }
4152 
4153 void *
4154 rfs3_commit_getfh(COMMIT3args *args)
4155 {
4156 
4157 	return (&args->file);
4158 }
4159 
4160 static int
4161 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4162 {
4163 
4164 	vap->va_mask = 0;
4165 
4166 	if (sap->mode.set_it) {
4167 		vap->va_mode = (mode_t)sap->mode.mode;
4168 		vap->va_mask |= AT_MODE;
4169 	}
4170 	if (sap->uid.set_it) {
4171 		vap->va_uid = (uid_t)sap->uid.uid;
4172 		vap->va_mask |= AT_UID;
4173 	}
4174 	if (sap->gid.set_it) {
4175 		vap->va_gid = (gid_t)sap->gid.gid;
4176 		vap->va_mask |= AT_GID;
4177 	}
4178 	if (sap->size.set_it) {
4179 		if (sap->size.size > (size3)((u_longlong_t)-1))
4180 			return (EINVAL);
4181 		vap->va_size = sap->size.size;
4182 		vap->va_mask |= AT_SIZE;
4183 	}
4184 	if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4185 #ifndef _LP64
4186 		/* check time validity */
4187 		if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4188 			return (EOVERFLOW);
4189 #endif
4190 		/*
4191 		 * nfs protocol defines times as unsigned so don't extend sign,
4192 		 * unless sysadmin set nfs_allow_preepoch_time.
4193 		 */
4194 		NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4195 		    sap->atime.atime.seconds);
4196 		vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4197 		vap->va_mask |= AT_ATIME;
4198 	} else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4199 		gethrestime(&vap->va_atime);
4200 		vap->va_mask |= AT_ATIME;
4201 	}
4202 	if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4203 #ifndef _LP64
4204 		/* check time validity */
4205 		if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4206 			return (EOVERFLOW);
4207 #endif
4208 		/*
4209 		 * nfs protocol defines times as unsigned so don't extend sign,
4210 		 * unless sysadmin set nfs_allow_preepoch_time.
4211 		 */
4212 		NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4213 		    sap->mtime.mtime.seconds);
4214 		vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4215 		vap->va_mask |= AT_MTIME;
4216 	} else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4217 		gethrestime(&vap->va_mtime);
4218 		vap->va_mask |= AT_MTIME;
4219 	}
4220 
4221 	return (0);
4222 }
4223 
4224 static ftype3 vt_to_nf3[] = {
4225 	0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4226 };
4227 
4228 static int
4229 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4230 {
4231 
4232 	ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4233 	/* Return error if time or size overflow */
4234 	if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4235 		return (EOVERFLOW);
4236 	}
4237 	fap->type = vt_to_nf3[vap->va_type];
4238 	fap->mode = (mode3)(vap->va_mode & MODEMASK);
4239 	fap->nlink = (uint32)vap->va_nlink;
4240 	if (vap->va_uid == UID_NOBODY)
4241 		fap->uid = (uid3)NFS_UID_NOBODY;
4242 	else
4243 		fap->uid = (uid3)vap->va_uid;
4244 	if (vap->va_gid == GID_NOBODY)
4245 		fap->gid = (gid3)NFS_GID_NOBODY;
4246 	else
4247 		fap->gid = (gid3)vap->va_gid;
4248 	fap->size = (size3)vap->va_size;
4249 	fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4250 	fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4251 	fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4252 	fap->fsid = (uint64)vap->va_fsid;
4253 	fap->fileid = (fileid3)vap->va_nodeid;
4254 	fap->atime.seconds = vap->va_atime.tv_sec;
4255 	fap->atime.nseconds = vap->va_atime.tv_nsec;
4256 	fap->mtime.seconds = vap->va_mtime.tv_sec;
4257 	fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4258 	fap->ctime.seconds = vap->va_ctime.tv_sec;
4259 	fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4260 	return (0);
4261 }
4262 
4263 static int
4264 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4265 {
4266 
4267 	/* Return error if time or size overflow */
4268 	if (!  (NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4269 	    NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4270 	    NFS3_SIZE_OK(vap->va_size))) {
4271 		return (EOVERFLOW);
4272 	}
4273 	wccap->size = (size3)vap->va_size;
4274 	wccap->mtime.seconds = vap->va_mtime.tv_sec;
4275 	wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4276 	wccap->ctime.seconds = vap->va_ctime.tv_sec;
4277 	wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4278 	return (0);
4279 }
4280 
4281 static void
4282 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4283 {
4284 
4285 	/* don't return attrs if time overflow */
4286 	if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4287 		poap->attributes = TRUE;
4288 	} else
4289 		poap->attributes = FALSE;
4290 }
4291 
4292 void
4293 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4294 {
4295 
4296 	/* don't return attrs if time overflow */
4297 	if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4298 		poap->attributes = TRUE;
4299 	} else
4300 		poap->attributes = FALSE;
4301 }
4302 
4303 static void
4304 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4305 {
4306 
4307 	vattr_to_pre_op_attr(bvap, &wccp->before);
4308 	vattr_to_post_op_attr(avap, &wccp->after);
4309 }
4310 
4311 void
4312 rfs3_srvrinit(void)
4313 {
4314 	struct rfs3_verf_overlay {
4315 		uint_t id; /* a "unique" identifier */
4316 		int ts; /* a unique timestamp */
4317 	} *verfp;
4318 	timestruc_t now;
4319 
4320 	/*
4321 	 * The following algorithm attempts to find a unique verifier
4322 	 * to be used as the write verifier returned from the server
4323 	 * to the client.  It is important that this verifier change
4324 	 * whenever the server reboots.  Of secondary importance, it
4325 	 * is important for the verifier to be unique between two
4326 	 * different servers.
4327 	 *
4328 	 * Thus, an attempt is made to use the system hostid and the
4329 	 * current time in seconds when the nfssrv kernel module is
4330 	 * loaded.  It is assumed that an NFS server will not be able
4331 	 * to boot and then to reboot in less than a second.  If the
4332 	 * hostid has not been set, then the current high resolution
4333 	 * time is used.  This will ensure different verifiers each
4334 	 * time the server reboots and minimize the chances that two
4335 	 * different servers will have the same verifier.
4336 	 */
4337 
4338 #ifndef	lint
4339 	/*
4340 	 * We ASSERT that this constant logic expression is
4341 	 * always true because in the past, it wasn't.
4342 	 */
4343 	ASSERT(sizeof (*verfp) <= sizeof (write3verf));
4344 #endif
4345 
4346 	gethrestime(&now);
4347 	verfp = (struct rfs3_verf_overlay *)&write3verf;
4348 	verfp->ts = (int)now.tv_sec;
4349 	verfp->id = (uint_t)nfs_atoi(hw_serial);
4350 
4351 	if (verfp->id == 0)
4352 		verfp->id = (uint_t)now.tv_nsec;
4353 
4354 	nfs3_srv_caller_id = fs_new_caller_id();
4355 
4356 }
4357 
4358 void
4359 rfs3_srvrfini(void)
4360 {
4361 	/* Nothing to do */
4362 }
4363