xref: /illumos-gate/usr/src/uts/common/fs/nfs/nfs3_srv.c (revision 9e2871ec787069e5a50ceec41f190366f4c89c8e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28 
29 #pragma ident	"%Z%%M%	%I%	%E% SMI"
30 
31 #include <sys/param.h>
32 #include <sys/types.h>
33 #include <sys/systm.h>
34 #include <sys/cred.h>
35 #include <sys/buf.h>
36 #include <sys/vfs.h>
37 #include <sys/vnode.h>
38 #include <sys/uio.h>
39 #include <sys/errno.h>
40 #include <sys/sysmacros.h>
41 #include <sys/statvfs.h>
42 #include <sys/kmem.h>
43 #include <sys/dirent.h>
44 #include <sys/cmn_err.h>
45 #include <sys/debug.h>
46 #include <sys/systeminfo.h>
47 #include <sys/flock.h>
48 #include <sys/nbmlock.h>
49 #include <sys/policy.h>
50 #include <sys/sdt.h>
51 
52 #include <rpc/types.h>
53 #include <rpc/auth.h>
54 #include <rpc/svc.h>
55 
56 #include <nfs/nfs.h>
57 #include <nfs/export.h>
58 
59 #include <sys/strsubr.h>
60 
61 #include <sys/tsol/label.h>
62 #include <sys/tsol/tndb.h>
63 
64 #include <inet/ip.h>
65 #include <inet/ip6.h>
66 
67 /*
68  * These are the interface routines for the server side of the
69  * Network File System.  See the NFS version 3 protocol specification
70  * for a description of this interface.
71  */
72 
73 #ifdef DEBUG
74 int rfs3_do_pre_op_attr = 1;
75 int rfs3_do_post_op_attr = 1;
76 int rfs3_do_post_op_fh3 = 1;
77 #endif
78 
79 static writeverf3 write3verf;
80 
81 static int	sattr3_to_vattr(sattr3 *, struct vattr *);
82 static int	vattr_to_fattr3(struct vattr *, fattr3 *);
83 static int	vattr_to_wcc_attr(struct vattr *, wcc_attr *);
84 static void	vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
85 static void	vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
86 
87 u_longlong_t nfs3_srv_caller_id;
88 
89 /* ARGSUSED */
90 void
91 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
92 	struct svc_req *req, cred_t *cr)
93 {
94 	int error;
95 	vnode_t *vp;
96 	struct vattr va;
97 
98 	vp = nfs3_fhtovp(&args->object, exi);
99 
100 	DTRACE_NFSV3_4(op__getattr__start, struct svc_req *, req,
101 	    cred_t *, cr, vnode_t *, vp, GETATTR3args *, args);
102 
103 	if (vp == NULL) {
104 		error = ESTALE;
105 		goto out;
106 	}
107 
108 	va.va_mask = AT_ALL;
109 	error = rfs4_delegated_getattr(vp, &va, 0, cr);
110 
111 	if (!error) {
112 		/* overflow error if time or size is out of range */
113 		error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
114 		if (error)
115 			goto out;
116 		resp->status = NFS3_OK;
117 
118 		DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
119 		    cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
120 
121 		VN_RELE(vp);
122 
123 		return;
124 	}
125 
126 out:
127 	if (curthread->t_flag & T_WOULDBLOCK) {
128 		curthread->t_flag &= ~T_WOULDBLOCK;
129 		resp->status = NFS3ERR_JUKEBOX;
130 	} else
131 		resp->status = puterrno3(error);
132 
133 	DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
134 	    cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
135 
136 	if (vp != NULL)
137 		VN_RELE(vp);
138 }
139 
140 void *
141 rfs3_getattr_getfh(GETATTR3args *args)
142 {
143 
144 	return (&args->object);
145 }
146 
147 void
148 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
149 	struct svc_req *req, cred_t *cr)
150 {
151 	int error;
152 	vnode_t *vp;
153 	struct vattr *bvap;
154 	struct vattr bva;
155 	struct vattr *avap;
156 	struct vattr ava;
157 	int flag;
158 	int in_crit = 0;
159 	struct flock64 bf;
160 	caller_context_t ct;
161 
162 	bvap = NULL;
163 	avap = NULL;
164 
165 	vp = nfs3_fhtovp(&args->object, exi);
166 
167 	DTRACE_NFSV3_4(op__setattr__start, struct svc_req *, req,
168 	    cred_t *, cr, vnode_t *, vp, SETATTR3args *, args);
169 
170 	if (vp == NULL) {
171 		error = ESTALE;
172 		goto out;
173 	}
174 
175 	error = sattr3_to_vattr(&args->new_attributes, &ava);
176 	if (error)
177 		goto out;
178 
179 	if (is_system_labeled()) {
180 		bslabel_t *clabel = req->rq_label;
181 
182 		ASSERT(clabel != NULL);
183 		DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
184 		    "got client label from request(1)", struct svc_req *, req);
185 
186 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
187 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK)) {
188 				resp->status = NFS3ERR_ACCES;
189 				goto out1;
190 			}
191 		}
192 	}
193 
194 	/*
195 	 * We need to specially handle size changes because of
196 	 * possible conflicting NBMAND locks. Get into critical
197 	 * region before VOP_GETATTR, so the size attribute is
198 	 * valid when checking conflicts.
199 	 *
200 	 * Also, check to see if the v4 side of the server has
201 	 * delegated this file.  If so, then we return JUKEBOX to
202 	 * allow the client to retrasmit its request.
203 	 */
204 	if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
205 		if (nbl_need_check(vp)) {
206 			nbl_start_crit(vp, RW_READER);
207 			in_crit = 1;
208 		}
209 	}
210 
211 	bva.va_mask = AT_ALL;
212 	error = rfs4_delegated_getattr(vp, &bva, 0, cr);
213 
214 	/*
215 	 * If we can't get the attributes, then we can't do the
216 	 * right access checking.  So, we'll fail the request.
217 	 */
218 	if (error)
219 		goto out;
220 
221 #ifdef DEBUG
222 	if (rfs3_do_pre_op_attr)
223 		bvap = &bva;
224 #else
225 	bvap = &bva;
226 #endif
227 
228 	if (rdonly(exi, req) || vn_is_readonly(vp)) {
229 		resp->status = NFS3ERR_ROFS;
230 		goto out1;
231 	}
232 
233 	if (args->guard.check &&
234 	    (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
235 	    args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
236 		resp->status = NFS3ERR_NOT_SYNC;
237 		goto out1;
238 	}
239 
240 	if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
241 		flag = ATTR_UTIME;
242 	else
243 		flag = 0;
244 
245 	/*
246 	 * If the filesystem is exported with nosuid, then mask off
247 	 * the setuid and setgid bits.
248 	 */
249 	if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
250 	    (exi->exi_export.ex_flags & EX_NOSUID))
251 		ava.va_mode &= ~(VSUID | VSGID);
252 
253 	ct.cc_sysid = 0;
254 	ct.cc_pid = 0;
255 	ct.cc_caller_id = nfs3_srv_caller_id;
256 	ct.cc_flags = CC_DONTBLOCK;
257 
258 	/*
259 	 * We need to specially handle size changes because it is
260 	 * possible for the client to create a file with modes
261 	 * which indicate read-only, but with the file opened for
262 	 * writing.  If the client then tries to set the size of
263 	 * the file, then the normal access checking done in
264 	 * VOP_SETATTR would prevent the client from doing so,
265 	 * although it should be legal for it to do so.  To get
266 	 * around this, we do the access checking for ourselves
267 	 * and then use VOP_SPACE which doesn't do the access
268 	 * checking which VOP_SETATTR does. VOP_SPACE can only
269 	 * operate on VREG files, let VOP_SETATTR handle the other
270 	 * extremely rare cases.
271 	 * Also the client should not be allowed to change the
272 	 * size of the file if there is a conflicting non-blocking
273 	 * mandatory lock in the region the change.
274 	 */
275 	if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
276 		if (in_crit) {
277 			u_offset_t offset;
278 			ssize_t length;
279 
280 			if (ava.va_size < bva.va_size) {
281 				offset = ava.va_size;
282 				length = bva.va_size - ava.va_size;
283 			} else {
284 				offset = bva.va_size;
285 				length = ava.va_size - bva.va_size;
286 			}
287 			if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
288 			    NULL)) {
289 				error = EACCES;
290 				goto out;
291 			}
292 		}
293 
294 		if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
295 			ava.va_mask &= ~AT_SIZE;
296 			bf.l_type = F_WRLCK;
297 			bf.l_whence = 0;
298 			bf.l_start = (off64_t)ava.va_size;
299 			bf.l_len = 0;
300 			bf.l_sysid = 0;
301 			bf.l_pid = 0;
302 			error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
303 			    (offset_t)ava.va_size, cr, &ct);
304 		}
305 	}
306 
307 	if (!error && ava.va_mask)
308 		error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
309 
310 	/* check if a monitor detected a delegation conflict */
311 	if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
312 		resp->status = NFS3ERR_JUKEBOX;
313 		goto out1;
314 	}
315 
316 #ifdef DEBUG
317 	if (rfs3_do_post_op_attr) {
318 		ava.va_mask = AT_ALL;
319 		avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
320 	} else
321 		avap = NULL;
322 #else
323 	ava.va_mask = AT_ALL;
324 	avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
325 #endif
326 
327 	/*
328 	 * Force modified metadata out to stable storage.
329 	 */
330 	(void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
331 
332 	if (error)
333 		goto out;
334 
335 	if (in_crit)
336 		nbl_end_crit(vp);
337 
338 	resp->status = NFS3_OK;
339 	vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
340 
341 	DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
342 	    cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
343 
344 	VN_RELE(vp);
345 
346 	return;
347 
348 out:
349 	if (curthread->t_flag & T_WOULDBLOCK) {
350 		curthread->t_flag &= ~T_WOULDBLOCK;
351 		resp->status = NFS3ERR_JUKEBOX;
352 	} else
353 		resp->status = puterrno3(error);
354 out1:
355 	DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
356 	    cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
357 
358 	if (vp != NULL) {
359 		if (in_crit)
360 			nbl_end_crit(vp);
361 		VN_RELE(vp);
362 	}
363 	vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
364 }
365 
366 void *
367 rfs3_setattr_getfh(SETATTR3args *args)
368 {
369 
370 	return (&args->object);
371 }
372 
373 /* ARGSUSED */
374 void
375 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
376 	struct svc_req *req, cred_t *cr)
377 {
378 	int error;
379 	vnode_t *vp;
380 	vnode_t *dvp;
381 	struct vattr *vap;
382 	struct vattr va;
383 	struct vattr *dvap;
384 	struct vattr dva;
385 	nfs_fh3 *fhp;
386 	struct sec_ol sec = {0, 0};
387 	bool_t publicfh_flag = FALSE, auth_weak = FALSE;
388 
389 	dvap = NULL;
390 
391 	/*
392 	 * Allow lookups from the root - the default
393 	 * location of the public filehandle.
394 	 */
395 	if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
396 		dvp = rootdir;
397 		VN_HOLD(dvp);
398 
399 		DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
400 		    cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
401 	} else {
402 		dvp = nfs3_fhtovp(&args->what.dir, exi);
403 
404 		DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
405 		    cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
406 
407 		if (dvp == NULL) {
408 			error = ESTALE;
409 			goto out;
410 		}
411 	}
412 
413 #ifdef DEBUG
414 	if (rfs3_do_pre_op_attr) {
415 		dva.va_mask = AT_ALL;
416 		dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
417 	}
418 #else
419 	dva.va_mask = AT_ALL;
420 	dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
421 #endif
422 
423 	if (args->what.name == nfs3nametoolong) {
424 		resp->status = NFS3ERR_NAMETOOLONG;
425 		goto out1;
426 	}
427 
428 	if (args->what.name == NULL || *(args->what.name) == '\0') {
429 		resp->status = NFS3ERR_ACCES;
430 		goto out1;
431 	}
432 
433 	fhp = &args->what.dir;
434 	if (strcmp(args->what.name, "..") == 0 &&
435 	    EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
436 		resp->status = NFS3ERR_NOENT;
437 		goto out1;
438 	}
439 
440 	/*
441 	 * If the public filehandle is used then allow
442 	 * a multi-component lookup
443 	 */
444 	if (PUBLIC_FH3(&args->what.dir)) {
445 		publicfh_flag = TRUE;
446 		error = rfs_publicfh_mclookup(args->what.name, dvp, cr, &vp,
447 		    &exi, &sec);
448 		if (error && exi != NULL)
449 			exi_rele(exi); /* See comment below Re: publicfh_flag */
450 		/*
451 		 * Since WebNFS may bypass MOUNT, we need to ensure this
452 		 * request didn't come from an unlabeled admin_low client.
453 		 */
454 		if (is_system_labeled() && error == 0) {
455 			struct sockaddr *ca;
456 			int		addr_type;
457 			void		*ipaddr;
458 			tsol_tpc_t	*tp;
459 
460 			ca = (struct sockaddr *)svc_getrpccaller(
461 			    req->rq_xprt)->buf;
462 			if (ca->sa_family == AF_INET) {
463 				addr_type = IPV4_VERSION;
464 				ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
465 			} else if (ca->sa_family == AF_INET6) {
466 				addr_type = IPV6_VERSION;
467 				ipaddr = &((struct sockaddr_in6 *)
468 				    ca)->sin6_addr;
469 			}
470 			tp = find_tpc(ipaddr, addr_type, B_FALSE);
471 			if (tp == NULL || tp->tpc_tp.tp_doi !=
472 			    l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
473 			    SUN_CIPSO) {
474 				if (exi != NULL)
475 					exi_rele(exi);
476 				VN_RELE(vp);
477 				resp->status = NFS3ERR_ACCES;
478 				error = 1;
479 			}
480 			if (tp != NULL)
481 				TPC_RELE(tp);
482 		}
483 	} else {
484 		error = VOP_LOOKUP(dvp, args->what.name, &vp,
485 		    NULL, 0, NULL, cr, NULL, NULL, NULL);
486 	}
487 
488 	if (is_system_labeled() && error == 0) {
489 		bslabel_t *clabel = req->rq_label;
490 
491 		ASSERT(clabel != NULL);
492 		DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
493 		    "got client label from request(1)", struct svc_req *, req);
494 
495 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
496 			if (!do_rfs_label_check(clabel, dvp,
497 			    DOMINANCE_CHECK)) {
498 				if (publicfh_flag && exi != NULL)
499 					exi_rele(exi);
500 				VN_RELE(vp);
501 				resp->status = NFS3ERR_ACCES;
502 				error = 1;
503 			}
504 		}
505 	}
506 
507 #ifdef DEBUG
508 	if (rfs3_do_post_op_attr) {
509 		dva.va_mask = AT_ALL;
510 		dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
511 	} else
512 		dvap = NULL;
513 #else
514 	dva.va_mask = AT_ALL;
515 	dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
516 #endif
517 
518 	if (error)
519 		goto out;
520 
521 	if (sec.sec_flags & SEC_QUERY) {
522 		error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
523 	} else {
524 		error = makefh3(&resp->resok.object, vp, exi);
525 		if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
526 			auth_weak = TRUE;
527 	}
528 
529 	if (error) {
530 		VN_RELE(vp);
531 		goto out;
532 	}
533 
534 	/*
535 	 * If publicfh_flag is true then we have called rfs_publicfh_mclookup
536 	 * and have obtained a new exportinfo in exi which needs to be
537 	 * released. Note the the original exportinfo pointed to by exi
538 	 * will be released by the caller, common_dispatch.
539 	 */
540 	if (publicfh_flag)
541 		exi_rele(exi);
542 
543 #ifdef DEBUG
544 	if (rfs3_do_post_op_attr) {
545 		va.va_mask = AT_ALL;
546 		vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
547 	} else
548 		vap = NULL;
549 #else
550 	va.va_mask = AT_ALL;
551 	vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
552 #endif
553 
554 	VN_RELE(vp);
555 
556 	resp->status = NFS3_OK;
557 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
558 	vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
559 
560 	/*
561 	 * If it's public fh, no 0x81, and client's flavor is
562 	 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
563 	 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
564 	 */
565 	if (auth_weak)
566 		resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
567 
568 	DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
569 	    cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
570 	VN_RELE(dvp);
571 
572 	return;
573 
574 out:
575 	if (curthread->t_flag & T_WOULDBLOCK) {
576 		curthread->t_flag &= ~T_WOULDBLOCK;
577 		resp->status = NFS3ERR_JUKEBOX;
578 	} else
579 		resp->status = puterrno3(error);
580 out1:
581 	DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
582 	    cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
583 
584 	if (dvp != NULL)
585 		VN_RELE(dvp);
586 	vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
587 
588 }
589 
590 void *
591 rfs3_lookup_getfh(LOOKUP3args *args)
592 {
593 
594 	return (&args->what.dir);
595 }
596 
597 /* ARGSUSED */
598 void
599 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
600 	struct svc_req *req, cred_t *cr)
601 {
602 	int error;
603 	vnode_t *vp;
604 	struct vattr *vap;
605 	struct vattr va;
606 	int checkwriteperm;
607 	boolean_t dominant_label = B_FALSE;
608 	boolean_t equal_label = B_FALSE;
609 	boolean_t admin_low_client;
610 
611 	vap = NULL;
612 
613 	vp = nfs3_fhtovp(&args->object, exi);
614 
615 	DTRACE_NFSV3_4(op__access__start, struct svc_req *, req,
616 	    cred_t *, cr, vnode_t *, vp, ACCESS3args *, args);
617 
618 	if (vp == NULL) {
619 		error = ESTALE;
620 		goto out;
621 	}
622 
623 	/*
624 	 * If the file system is exported read only, it is not appropriate
625 	 * to check write permissions for regular files and directories.
626 	 * Special files are interpreted by the client, so the underlying
627 	 * permissions are sent back to the client for interpretation.
628 	 */
629 	if (rdonly(exi, req) && (vp->v_type == VREG || vp->v_type == VDIR))
630 		checkwriteperm = 0;
631 	else
632 		checkwriteperm = 1;
633 
634 	/*
635 	 * We need the mode so that we can correctly determine access
636 	 * permissions relative to a mandatory lock file.  Access to
637 	 * mandatory lock files is denied on the server, so it might
638 	 * as well be reflected to the server during the open.
639 	 */
640 	va.va_mask = AT_MODE;
641 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
642 	if (error)
643 		goto out;
644 
645 #ifdef DEBUG
646 	if (rfs3_do_post_op_attr)
647 		vap = &va;
648 #else
649 	vap = &va;
650 #endif
651 
652 	resp->resok.access = 0;
653 
654 	if (is_system_labeled()) {
655 		bslabel_t *clabel = req->rq_label;
656 
657 		ASSERT(clabel != NULL);
658 		DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
659 		    "got client label from request(1)", struct svc_req *, req);
660 
661 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
662 			if ((equal_label = do_rfs_label_check(clabel, vp,
663 			    EQUALITY_CHECK)) == B_FALSE) {
664 				dominant_label = do_rfs_label_check(clabel,
665 				    vp, DOMINANCE_CHECK);
666 			} else
667 				dominant_label = B_TRUE;
668 			admin_low_client = B_FALSE;
669 		} else
670 			admin_low_client = B_TRUE;
671 	}
672 
673 	if (args->access & ACCESS3_READ) {
674 		error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
675 		if (error) {
676 			if (curthread->t_flag & T_WOULDBLOCK)
677 				goto out;
678 		} else if (!MANDLOCK(vp, va.va_mode) &&
679 		    (!is_system_labeled() || admin_low_client ||
680 		    dominant_label))
681 			resp->resok.access |= ACCESS3_READ;
682 	}
683 	if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
684 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
685 		if (error) {
686 			if (curthread->t_flag & T_WOULDBLOCK)
687 				goto out;
688 		} else if (!is_system_labeled() || admin_low_client ||
689 		    dominant_label)
690 			resp->resok.access |= ACCESS3_LOOKUP;
691 	}
692 	if (checkwriteperm &&
693 	    (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
694 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
695 		if (error) {
696 			if (curthread->t_flag & T_WOULDBLOCK)
697 				goto out;
698 		} else if (!MANDLOCK(vp, va.va_mode) &&
699 		    (!is_system_labeled() || admin_low_client || equal_label)) {
700 			resp->resok.access |=
701 			    (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
702 		}
703 	}
704 	if (checkwriteperm &&
705 	    (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
706 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
707 		if (error) {
708 			if (curthread->t_flag & T_WOULDBLOCK)
709 				goto out;
710 		} else if (!is_system_labeled() || admin_low_client ||
711 		    equal_label)
712 			resp->resok.access |= ACCESS3_DELETE;
713 	}
714 	if (args->access & ACCESS3_EXECUTE) {
715 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
716 		if (error) {
717 			if (curthread->t_flag & T_WOULDBLOCK)
718 				goto out;
719 		} else if (!MANDLOCK(vp, va.va_mode) &&
720 		    (!is_system_labeled() || admin_low_client ||
721 		    dominant_label))
722 			resp->resok.access |= ACCESS3_EXECUTE;
723 	}
724 
725 #ifdef DEBUG
726 	if (rfs3_do_post_op_attr) {
727 		va.va_mask = AT_ALL;
728 		vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
729 	} else
730 		vap = NULL;
731 #else
732 	va.va_mask = AT_ALL;
733 	vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
734 #endif
735 
736 	resp->status = NFS3_OK;
737 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
738 
739 	DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
740 	    cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
741 
742 	VN_RELE(vp);
743 
744 	return;
745 
746 out:
747 	if (curthread->t_flag & T_WOULDBLOCK) {
748 		curthread->t_flag &= ~T_WOULDBLOCK;
749 		resp->status = NFS3ERR_JUKEBOX;
750 	} else
751 		resp->status = puterrno3(error);
752 	DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
753 	    cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
754 	if (vp != NULL)
755 		VN_RELE(vp);
756 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
757 }
758 
759 void *
760 rfs3_access_getfh(ACCESS3args *args)
761 {
762 
763 	return (&args->object);
764 }
765 
766 /* ARGSUSED */
767 void
768 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
769 	struct svc_req *req, cred_t *cr)
770 {
771 	int error;
772 	vnode_t *vp;
773 	struct vattr *vap;
774 	struct vattr va;
775 	struct iovec iov;
776 	struct uio uio;
777 	char *data;
778 
779 	vap = NULL;
780 
781 	vp = nfs3_fhtovp(&args->symlink, exi);
782 
783 	DTRACE_NFSV3_4(op__readlink__start, struct svc_req *, req,
784 	    cred_t *, cr, vnode_t *, vp, READLINK3args *, args);
785 
786 	if (vp == NULL) {
787 		error = ESTALE;
788 		goto out;
789 	}
790 
791 	va.va_mask = AT_ALL;
792 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
793 	if (error)
794 		goto out;
795 
796 #ifdef DEBUG
797 	if (rfs3_do_post_op_attr)
798 		vap = &va;
799 #else
800 	vap = &va;
801 #endif
802 
803 	if (vp->v_type != VLNK) {
804 		resp->status = NFS3ERR_INVAL;
805 		goto out1;
806 	}
807 
808 	if (MANDLOCK(vp, va.va_mode)) {
809 		resp->status = NFS3ERR_ACCES;
810 		goto out1;
811 	}
812 
813 	if (is_system_labeled()) {
814 		bslabel_t *clabel = req->rq_label;
815 
816 		ASSERT(clabel != NULL);
817 		DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
818 		    "got client label from request(1)", struct svc_req *, req);
819 
820 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
821 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK)) {
822 				resp->status = NFS3ERR_ACCES;
823 				goto out1;
824 			}
825 		}
826 	}
827 
828 	data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
829 
830 	iov.iov_base = data;
831 	iov.iov_len = MAXPATHLEN;
832 	uio.uio_iov = &iov;
833 	uio.uio_iovcnt = 1;
834 	uio.uio_segflg = UIO_SYSSPACE;
835 	uio.uio_extflg = UIO_COPY_CACHED;
836 	uio.uio_loffset = 0;
837 	uio.uio_resid = MAXPATHLEN;
838 
839 	error = VOP_READLINK(vp, &uio, cr, NULL);
840 
841 #ifdef DEBUG
842 	if (rfs3_do_post_op_attr) {
843 		va.va_mask = AT_ALL;
844 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
845 	} else
846 		vap = NULL;
847 #else
848 	va.va_mask = AT_ALL;
849 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
850 #endif
851 
852 #if 0 /* notyet */
853 	/*
854 	 * Don't do this.  It causes local disk writes when just
855 	 * reading the file and the overhead is deemed larger
856 	 * than the benefit.
857 	 */
858 	/*
859 	 * Force modified metadata out to stable storage.
860 	 */
861 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
862 #endif
863 
864 	if (error) {
865 		kmem_free(data, MAXPATHLEN + 1);
866 		goto out;
867 	}
868 
869 	resp->status = NFS3_OK;
870 	vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
871 	resp->resok.data = data;
872 	*(data + MAXPATHLEN - uio.uio_resid) = '\0';
873 
874 	DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
875 	    cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
876 	VN_RELE(vp);
877 
878 	return;
879 
880 out:
881 	if (curthread->t_flag & T_WOULDBLOCK) {
882 		curthread->t_flag &= ~T_WOULDBLOCK;
883 		resp->status = NFS3ERR_JUKEBOX;
884 	} else
885 		resp->status = puterrno3(error);
886 out1:
887 	DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
888 	    cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
889 	if (vp != NULL)
890 		VN_RELE(vp);
891 	vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
892 }
893 
894 void *
895 rfs3_readlink_getfh(READLINK3args *args)
896 {
897 
898 	return (&args->symlink);
899 }
900 
901 void
902 rfs3_readlink_free(READLINK3res *resp)
903 {
904 
905 	if (resp->status == NFS3_OK)
906 		kmem_free(resp->resok.data, MAXPATHLEN + 1);
907 }
908 
909 /* ARGSUSED */
910 void
911 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
912 	struct svc_req *req, cred_t *cr)
913 {
914 	int error;
915 	vnode_t *vp;
916 	struct vattr *vap;
917 	struct vattr va;
918 	struct iovec iov;
919 	struct uio uio;
920 	u_offset_t offset;
921 	mblk_t *mp;
922 	int alloc_err = 0;
923 	int in_crit = 0;
924 	int need_rwunlock = 0;
925 	caller_context_t ct;
926 
927 	vap = NULL;
928 
929 	vp = nfs3_fhtovp(&args->file, exi);
930 
931 	DTRACE_NFSV3_4(op__read__start, struct svc_req *, req,
932 	    cred_t *, cr, vnode_t *, vp, READ3args *, args);
933 
934 	if (vp == NULL) {
935 		error = ESTALE;
936 		goto out;
937 	}
938 
939 	if (is_system_labeled()) {
940 		bslabel_t *clabel = req->rq_label;
941 
942 		ASSERT(clabel != NULL);
943 		DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
944 		    "got client label from request(1)", struct svc_req *, req);
945 
946 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
947 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK)) {
948 				resp->status = NFS3ERR_ACCES;
949 				goto out1;
950 			}
951 		}
952 	}
953 
954 	ct.cc_sysid = 0;
955 	ct.cc_pid = 0;
956 	ct.cc_caller_id = nfs3_srv_caller_id;
957 	ct.cc_flags = CC_DONTBLOCK;
958 
959 	/*
960 	 * Enter the critical region before calling VOP_RWLOCK
961 	 * to avoid a deadlock with write requests.
962 	 */
963 	if (nbl_need_check(vp)) {
964 		nbl_start_crit(vp, RW_READER);
965 		in_crit = 1;
966 		if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
967 		    NULL)) {
968 			error = EACCES;
969 			goto out;
970 		}
971 	}
972 
973 	error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
974 
975 	/* check if a monitor detected a delegation conflict */
976 	if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
977 		resp->status = NFS3ERR_JUKEBOX;
978 		goto out1;
979 	}
980 
981 	need_rwunlock = 1;
982 
983 	va.va_mask = AT_ALL;
984 	error = VOP_GETATTR(vp, &va, 0, cr, &ct);
985 
986 	/*
987 	 * If we can't get the attributes, then we can't do the
988 	 * right access checking.  So, we'll fail the request.
989 	 */
990 	if (error)
991 		goto out;
992 
993 #ifdef DEBUG
994 	if (rfs3_do_post_op_attr)
995 		vap = &va;
996 #else
997 	vap = &va;
998 #endif
999 
1000 	if (vp->v_type != VREG) {
1001 		resp->status = NFS3ERR_INVAL;
1002 		goto out1;
1003 	}
1004 
1005 	if (crgetuid(cr) != va.va_uid) {
1006 		error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1007 		if (error) {
1008 			if (curthread->t_flag & T_WOULDBLOCK)
1009 				goto out;
1010 			error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1011 			if (error)
1012 				goto out;
1013 		}
1014 	}
1015 
1016 	if (MANDLOCK(vp, va.va_mode)) {
1017 		resp->status = NFS3ERR_ACCES;
1018 		goto out1;
1019 	}
1020 
1021 	offset = args->offset;
1022 	if (offset >= va.va_size) {
1023 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1024 		if (in_crit)
1025 			nbl_end_crit(vp);
1026 		resp->status = NFS3_OK;
1027 		vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1028 		resp->resok.count = 0;
1029 		resp->resok.eof = TRUE;
1030 		resp->resok.data.data_len = 0;
1031 		resp->resok.data.data_val = NULL;
1032 		resp->resok.data.mp = NULL;
1033 		goto done;
1034 	}
1035 
1036 	if (args->count == 0) {
1037 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1038 		if (in_crit)
1039 			nbl_end_crit(vp);
1040 		resp->status = NFS3_OK;
1041 		vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1042 		resp->resok.count = 0;
1043 		resp->resok.eof = FALSE;
1044 		resp->resok.data.data_len = 0;
1045 		resp->resok.data.data_val = NULL;
1046 		resp->resok.data.mp = NULL;
1047 		goto done;
1048 	}
1049 
1050 	/*
1051 	 * do not allocate memory more the max. allowed
1052 	 * transfer size
1053 	 */
1054 	if (args->count > rfs3_tsize(req))
1055 		args->count = rfs3_tsize(req);
1056 
1057 	/*
1058 	 * mp will contain the data to be sent out in the read reply.
1059 	 * This will be freed after the reply has been sent out (by the
1060 	 * driver).
1061 	 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
1062 	 * that the call to xdrmblk_putmblk() never fails.
1063 	 */
1064 	mp = allocb_wait(RNDUP(args->count), BPRI_MED, STR_NOSIG, &alloc_err);
1065 	ASSERT(mp != NULL);
1066 	ASSERT(alloc_err == 0);
1067 
1068 	iov.iov_base = (caddr_t)mp->b_datap->db_base;
1069 	iov.iov_len = args->count;
1070 	uio.uio_iov = &iov;
1071 	uio.uio_iovcnt = 1;
1072 	uio.uio_segflg = UIO_SYSSPACE;
1073 	uio.uio_extflg = UIO_COPY_CACHED;
1074 	uio.uio_loffset = args->offset;
1075 	uio.uio_resid = args->count;
1076 
1077 	error = VOP_READ(vp, &uio, 0, cr, &ct);
1078 
1079 	if (error) {
1080 		freeb(mp);
1081 		/* check if a monitor detected a delegation conflict */
1082 		if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1083 			resp->status = NFS3ERR_JUKEBOX;
1084 			goto out1;
1085 		}
1086 		goto out;
1087 	}
1088 
1089 	va.va_mask = AT_ALL;
1090 	error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1091 
1092 #ifdef DEBUG
1093 	if (rfs3_do_post_op_attr) {
1094 		if (error)
1095 			vap = NULL;
1096 		else
1097 			vap = &va;
1098 	} else
1099 		vap = NULL;
1100 #else
1101 	if (error)
1102 		vap = NULL;
1103 	else
1104 		vap = &va;
1105 #endif
1106 
1107 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1108 
1109 #if 0 /* notyet */
1110 	/*
1111 	 * Don't do this.  It causes local disk writes when just
1112 	 * reading the file and the overhead is deemed larger
1113 	 * than the benefit.
1114 	 */
1115 	/*
1116 	 * Force modified metadata out to stable storage.
1117 	 */
1118 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1119 #endif
1120 
1121 	if (in_crit)
1122 		nbl_end_crit(vp);
1123 
1124 	resp->status = NFS3_OK;
1125 	vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1126 	resp->resok.count = args->count - uio.uio_resid;
1127 	if (!error && offset + resp->resok.count == va.va_size)
1128 		resp->resok.eof = TRUE;
1129 	else
1130 		resp->resok.eof = FALSE;
1131 	resp->resok.data.data_len = resp->resok.count;
1132 	resp->resok.data.data_val = (char *)mp->b_datap->db_base;
1133 
1134 	resp->resok.data.mp = mp;
1135 
1136 	resp->resok.size = (uint_t)args->count;
1137 
1138 done:
1139 	DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1140 	    cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1141 
1142 	VN_RELE(vp);
1143 
1144 	return;
1145 
1146 out:
1147 	if (curthread->t_flag & T_WOULDBLOCK) {
1148 		curthread->t_flag &= ~T_WOULDBLOCK;
1149 		resp->status = NFS3ERR_JUKEBOX;
1150 	} else
1151 		resp->status = puterrno3(error);
1152 out1:
1153 	DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1154 	    cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1155 
1156 	if (vp != NULL) {
1157 		if (need_rwunlock)
1158 			VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1159 		if (in_crit)
1160 			nbl_end_crit(vp);
1161 		VN_RELE(vp);
1162 	}
1163 	vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1164 }
1165 
1166 void
1167 rfs3_read_free(READ3res *resp)
1168 {
1169 	mblk_t *mp;
1170 
1171 	if (resp->status == NFS3_OK) {
1172 		mp = resp->resok.data.mp;
1173 		if (mp != NULL)
1174 			freeb(mp);
1175 	}
1176 }
1177 
1178 void *
1179 rfs3_read_getfh(READ3args *args)
1180 {
1181 
1182 	return (&args->file);
1183 }
1184 
1185 #define	MAX_IOVECS	12
1186 
1187 #ifdef DEBUG
1188 static int rfs3_write_hits = 0;
1189 static int rfs3_write_misses = 0;
1190 #endif
1191 
1192 void
1193 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1194 	struct svc_req *req, cred_t *cr)
1195 {
1196 	int error;
1197 	vnode_t *vp;
1198 	struct vattr *bvap = NULL;
1199 	struct vattr bva;
1200 	struct vattr *avap = NULL;
1201 	struct vattr ava;
1202 	u_offset_t rlimit;
1203 	struct uio uio;
1204 	struct iovec iov[MAX_IOVECS];
1205 	mblk_t *m;
1206 	struct iovec *iovp;
1207 	int iovcnt;
1208 	int ioflag;
1209 	cred_t *savecred;
1210 	int in_crit = 0;
1211 	int rwlock_ret = -1;
1212 	caller_context_t ct;
1213 
1214 	vp = nfs3_fhtovp(&args->file, exi);
1215 
1216 	DTRACE_NFSV3_4(op__write__start, struct svc_req *, req,
1217 	    cred_t *, cr, vnode_t *, vp, WRITE3args *, args);
1218 
1219 	if (vp == NULL) {
1220 		error = ESTALE;
1221 		goto err;
1222 	}
1223 
1224 	if (is_system_labeled()) {
1225 		bslabel_t *clabel = req->rq_label;
1226 
1227 		ASSERT(clabel != NULL);
1228 		DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1229 		    "got client label from request(1)", struct svc_req *, req);
1230 
1231 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1232 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK)) {
1233 				resp->status = NFS3ERR_ACCES;
1234 				goto err1;
1235 			}
1236 		}
1237 	}
1238 
1239 	ct.cc_sysid = 0;
1240 	ct.cc_pid = 0;
1241 	ct.cc_caller_id = nfs3_srv_caller_id;
1242 	ct.cc_flags = CC_DONTBLOCK;
1243 
1244 	/*
1245 	 * We have to enter the critical region before calling VOP_RWLOCK
1246 	 * to avoid a deadlock with ufs.
1247 	 */
1248 	if (nbl_need_check(vp)) {
1249 		nbl_start_crit(vp, RW_READER);
1250 		in_crit = 1;
1251 		if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1252 		    NULL)) {
1253 			error = EACCES;
1254 			goto err;
1255 		}
1256 	}
1257 
1258 	rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1259 
1260 	/* check if a monitor detected a delegation conflict */
1261 	if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1262 		resp->status = NFS3ERR_JUKEBOX;
1263 		rwlock_ret = -1;
1264 		goto err1;
1265 	}
1266 
1267 
1268 	bva.va_mask = AT_ALL;
1269 	error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1270 
1271 	/*
1272 	 * If we can't get the attributes, then we can't do the
1273 	 * right access checking.  So, we'll fail the request.
1274 	 */
1275 	if (error)
1276 		goto err;
1277 
1278 	bvap = &bva;
1279 #ifdef DEBUG
1280 	if (!rfs3_do_pre_op_attr)
1281 		bvap = NULL;
1282 #endif
1283 	avap = bvap;
1284 
1285 	if (args->count != args->data.data_len) {
1286 		resp->status = NFS3ERR_INVAL;
1287 		goto err1;
1288 	}
1289 
1290 	if (rdonly(exi, req)) {
1291 		resp->status = NFS3ERR_ROFS;
1292 		goto err1;
1293 	}
1294 
1295 	if (vp->v_type != VREG) {
1296 		resp->status = NFS3ERR_INVAL;
1297 		goto err1;
1298 	}
1299 
1300 	if (crgetuid(cr) != bva.va_uid &&
1301 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1302 		goto err;
1303 
1304 	if (MANDLOCK(vp, bva.va_mode)) {
1305 		resp->status = NFS3ERR_ACCES;
1306 		goto err1;
1307 	}
1308 
1309 	if (args->count == 0) {
1310 		resp->status = NFS3_OK;
1311 		vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1312 		resp->resok.count = 0;
1313 		resp->resok.committed = args->stable;
1314 		resp->resok.verf = write3verf;
1315 		goto out;
1316 	}
1317 
1318 	if (args->mblk != NULL) {
1319 		iovcnt = 0;
1320 		for (m = args->mblk; m != NULL; m = m->b_cont)
1321 			iovcnt++;
1322 		if (iovcnt <= MAX_IOVECS) {
1323 #ifdef DEBUG
1324 			rfs3_write_hits++;
1325 #endif
1326 			iovp = iov;
1327 		} else {
1328 #ifdef DEBUG
1329 			rfs3_write_misses++;
1330 #endif
1331 			iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1332 		}
1333 		mblk_to_iov(args->mblk, iovcnt, iovp);
1334 	} else {
1335 		iovcnt = 1;
1336 		iovp = iov;
1337 		iovp->iov_base = args->data.data_val;
1338 		iovp->iov_len = args->count;
1339 	}
1340 
1341 	uio.uio_iov = iovp;
1342 	uio.uio_iovcnt = iovcnt;
1343 
1344 	uio.uio_segflg = UIO_SYSSPACE;
1345 	uio.uio_extflg = UIO_COPY_DEFAULT;
1346 	uio.uio_loffset = args->offset;
1347 	uio.uio_resid = args->count;
1348 	uio.uio_llimit = curproc->p_fsz_ctl;
1349 	rlimit = uio.uio_llimit - args->offset;
1350 	if (rlimit < (u_offset_t)uio.uio_resid)
1351 		uio.uio_resid = (int)rlimit;
1352 
1353 	if (args->stable == UNSTABLE)
1354 		ioflag = 0;
1355 	else if (args->stable == FILE_SYNC)
1356 		ioflag = FSYNC;
1357 	else if (args->stable == DATA_SYNC)
1358 		ioflag = FDSYNC;
1359 	else {
1360 		if (iovp != iov)
1361 			kmem_free(iovp, sizeof (*iovp) * iovcnt);
1362 		resp->status = NFS3ERR_INVAL;
1363 		goto err1;
1364 	}
1365 
1366 	/*
1367 	 * We're changing creds because VM may fault and we need
1368 	 * the cred of the current thread to be used if quota
1369 	 * checking is enabled.
1370 	 */
1371 	savecred = curthread->t_cred;
1372 	curthread->t_cred = cr;
1373 	error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1374 	curthread->t_cred = savecred;
1375 
1376 	if (iovp != iov)
1377 		kmem_free(iovp, sizeof (*iovp) * iovcnt);
1378 
1379 	/* check if a monitor detected a delegation conflict */
1380 	if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1381 		resp->status = NFS3ERR_JUKEBOX;
1382 		goto err1;
1383 	}
1384 
1385 	ava.va_mask = AT_ALL;
1386 	avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1387 
1388 #ifdef DEBUG
1389 	if (!rfs3_do_post_op_attr)
1390 		avap = NULL;
1391 #endif
1392 
1393 	if (error)
1394 		goto err;
1395 
1396 	/*
1397 	 * If we were unable to get the V_WRITELOCK_TRUE, then we
1398 	 * may not have accurate after attrs, so check if
1399 	 * we have both attributes, they have a non-zero va_seq, and
1400 	 * va_seq has changed by exactly one,
1401 	 * if not, turn off the before attr.
1402 	 */
1403 	if (rwlock_ret != V_WRITELOCK_TRUE) {
1404 		if (bvap == NULL || avap == NULL ||
1405 		    bvap->va_seq == 0 || avap->va_seq == 0 ||
1406 		    avap->va_seq != (bvap->va_seq + 1)) {
1407 			bvap = NULL;
1408 		}
1409 	}
1410 
1411 	resp->status = NFS3_OK;
1412 	vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1413 	resp->resok.count = args->count - uio.uio_resid;
1414 	resp->resok.committed = args->stable;
1415 	resp->resok.verf = write3verf;
1416 	goto out;
1417 
1418 err:
1419 	if (curthread->t_flag & T_WOULDBLOCK) {
1420 		curthread->t_flag &= ~T_WOULDBLOCK;
1421 		resp->status = NFS3ERR_JUKEBOX;
1422 	} else
1423 		resp->status = puterrno3(error);
1424 err1:
1425 	vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1426 out:
1427 	DTRACE_NFSV3_4(op__write__done, struct svc_req *, req,
1428 	    cred_t *, cr, vnode_t *, vp, WRITE3res *, resp);
1429 
1430 	if (vp != NULL) {
1431 		if (rwlock_ret != -1)
1432 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1433 		if (in_crit)
1434 			nbl_end_crit(vp);
1435 		VN_RELE(vp);
1436 	}
1437 }
1438 
1439 void *
1440 rfs3_write_getfh(WRITE3args *args)
1441 {
1442 
1443 	return (&args->file);
1444 }
1445 
1446 void
1447 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1448 	struct svc_req *req, cred_t *cr)
1449 {
1450 	int error;
1451 	int in_crit = 0;
1452 	vnode_t *vp;
1453 	vnode_t *tvp = NULL;
1454 	vnode_t *dvp;
1455 	struct vattr *vap;
1456 	struct vattr va;
1457 	struct vattr *dbvap;
1458 	struct vattr dbva;
1459 	struct vattr *davap;
1460 	struct vattr dava;
1461 	enum vcexcl excl;
1462 	nfstime3 *mtime;
1463 	len_t reqsize;
1464 	bool_t trunc;
1465 
1466 	dbvap = NULL;
1467 	davap = NULL;
1468 
1469 	dvp = nfs3_fhtovp(&args->where.dir, exi);
1470 
1471 	DTRACE_NFSV3_4(op__create__start, struct svc_req *, req,
1472 	    cred_t *, cr, vnode_t *, dvp, CREATE3args *, args);
1473 
1474 	if (dvp == NULL) {
1475 		error = ESTALE;
1476 		goto out;
1477 	}
1478 
1479 #ifdef DEBUG
1480 	if (rfs3_do_pre_op_attr) {
1481 		dbva.va_mask = AT_ALL;
1482 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1483 	} else
1484 		dbvap = NULL;
1485 #else
1486 	dbva.va_mask = AT_ALL;
1487 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1488 #endif
1489 	davap = dbvap;
1490 
1491 	if (args->where.name == nfs3nametoolong) {
1492 		resp->status = NFS3ERR_NAMETOOLONG;
1493 		goto out1;
1494 	}
1495 
1496 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1497 		resp->status = NFS3ERR_ACCES;
1498 		goto out1;
1499 	}
1500 
1501 	if (rdonly(exi, req)) {
1502 		resp->status = NFS3ERR_ROFS;
1503 		goto out1;
1504 	}
1505 
1506 	if (is_system_labeled()) {
1507 		bslabel_t *clabel = req->rq_label;
1508 
1509 		ASSERT(clabel != NULL);
1510 		DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1511 		    "got client label from request(1)", struct svc_req *, req);
1512 
1513 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1514 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK)) {
1515 				resp->status = NFS3ERR_ACCES;
1516 				goto out1;
1517 			}
1518 		}
1519 	}
1520 
1521 	if (args->how.mode == EXCLUSIVE) {
1522 		va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1523 		va.va_type = VREG;
1524 		va.va_mode = (mode_t)0;
1525 		/*
1526 		 * Ensure no time overflows and that types match
1527 		 */
1528 		mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1529 		va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1530 		va.va_mtime.tv_nsec = mtime->nseconds;
1531 		excl = EXCL;
1532 	} else {
1533 		error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1534 		    &va);
1535 		if (error)
1536 			goto out;
1537 		va.va_mask |= AT_TYPE;
1538 		va.va_type = VREG;
1539 		if (args->how.mode == GUARDED)
1540 			excl = EXCL;
1541 		else {
1542 			excl = NONEXCL;
1543 
1544 			/*
1545 			 * During creation of file in non-exclusive mode
1546 			 * if size of file is being set then make sure
1547 			 * that if the file already exists that no conflicting
1548 			 * non-blocking mandatory locks exists in the region
1549 			 * being modified. If there are conflicting locks fail
1550 			 * the operation with EACCES.
1551 			 */
1552 			if (va.va_mask & AT_SIZE) {
1553 				struct vattr tva;
1554 
1555 				/*
1556 				 * Does file already exist?
1557 				 */
1558 				error = VOP_LOOKUP(dvp, args->where.name, &tvp,
1559 				    NULL, 0, NULL, cr, NULL, NULL, NULL);
1560 
1561 				/*
1562 				 * Check to see if the file has been delegated
1563 				 * to a v4 client.  If so, then begin recall of
1564 				 * the delegation and return JUKEBOX to allow
1565 				 * the client to retrasmit its request.
1566 				 */
1567 
1568 				trunc = va.va_size == 0;
1569 				if (!error &&
1570 				    rfs4_check_delegated(FWRITE, tvp, trunc)) {
1571 					resp->status = NFS3ERR_JUKEBOX;
1572 					goto out1;
1573 				}
1574 
1575 				/*
1576 				 * Check for NBMAND lock conflicts
1577 				 */
1578 				if (!error && nbl_need_check(tvp)) {
1579 					u_offset_t offset;
1580 					ssize_t len;
1581 
1582 					nbl_start_crit(tvp, RW_READER);
1583 					in_crit = 1;
1584 
1585 					tva.va_mask = AT_SIZE;
1586 					error = VOP_GETATTR(tvp, &tva, 0, cr,
1587 					    NULL);
1588 					/*
1589 					 * Can't check for conflicts, so return
1590 					 * error.
1591 					 */
1592 					if (error)
1593 						goto out;
1594 
1595 					offset = tva.va_size < va.va_size ?
1596 					    tva.va_size : va.va_size;
1597 					len = tva.va_size < va.va_size ?
1598 					    va.va_size - tva.va_size :
1599 					    tva.va_size - va.va_size;
1600 					if (nbl_conflict(tvp, NBL_WRITE,
1601 					    offset, len, 0, NULL)) {
1602 						error = EACCES;
1603 						goto out;
1604 					}
1605 				} else if (tvp) {
1606 					VN_RELE(tvp);
1607 					tvp = NULL;
1608 				}
1609 			}
1610 		}
1611 		if (va.va_mask & AT_SIZE)
1612 			reqsize = va.va_size;
1613 	}
1614 
1615 	/*
1616 	 * Must specify the mode.
1617 	 */
1618 	if (!(va.va_mask & AT_MODE)) {
1619 		resp->status = NFS3ERR_INVAL;
1620 		goto out1;
1621 	}
1622 
1623 	/*
1624 	 * If the filesystem is exported with nosuid, then mask off
1625 	 * the setuid and setgid bits.
1626 	 */
1627 	if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1628 		va.va_mode &= ~(VSUID | VSGID);
1629 
1630 tryagain:
1631 	/*
1632 	 * The file open mode used is VWRITE.  If the client needs
1633 	 * some other semantic, then it should do the access checking
1634 	 * itself.  It would have been nice to have the file open mode
1635 	 * passed as part of the arguments.
1636 	 */
1637 	error = VOP_CREATE(dvp, args->where.name, &va, excl, VWRITE,
1638 	    &vp, cr, 0, NULL, NULL);
1639 
1640 #ifdef DEBUG
1641 	if (rfs3_do_post_op_attr) {
1642 		dava.va_mask = AT_ALL;
1643 		davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1644 	} else
1645 		davap = NULL;
1646 #else
1647 	dava.va_mask = AT_ALL;
1648 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1649 #endif
1650 
1651 	if (error) {
1652 		/*
1653 		 * If we got something other than file already exists
1654 		 * then just return this error.  Otherwise, we got
1655 		 * EEXIST.  If we were doing a GUARDED create, then
1656 		 * just return this error.  Otherwise, we need to
1657 		 * make sure that this wasn't a duplicate of an
1658 		 * exclusive create request.
1659 		 *
1660 		 * The assumption is made that a non-exclusive create
1661 		 * request will never return EEXIST.
1662 		 */
1663 		if (error != EEXIST || args->how.mode == GUARDED)
1664 			goto out;
1665 		/*
1666 		 * Lookup the file so that we can get a vnode for it.
1667 		 */
1668 		error = VOP_LOOKUP(dvp, args->where.name, &vp, NULL, 0,
1669 		    NULL, cr, NULL, NULL, NULL);
1670 		if (error) {
1671 			/*
1672 			 * We couldn't find the file that we thought that
1673 			 * we just created.  So, we'll just try creating
1674 			 * it again.
1675 			 */
1676 			if (error == ENOENT)
1677 				goto tryagain;
1678 			goto out;
1679 		}
1680 
1681 		/*
1682 		 * If the file is delegated to a v4 client, go ahead
1683 		 * and initiate recall, this create is a hint that a
1684 		 * conflicting v3 open has occurred.
1685 		 */
1686 
1687 		if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1688 			VN_RELE(vp);
1689 			resp->status = NFS3ERR_JUKEBOX;
1690 			goto out1;
1691 		}
1692 
1693 		va.va_mask = AT_ALL;
1694 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1695 
1696 		mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1697 		/* % with INT32_MAX to prevent overflows */
1698 		if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1699 		    vap->va_mtime.tv_sec !=
1700 		    (mtime->seconds % INT32_MAX) ||
1701 		    vap->va_mtime.tv_nsec != mtime->nseconds)) {
1702 			VN_RELE(vp);
1703 			error = EEXIST;
1704 			goto out;
1705 		}
1706 	} else {
1707 
1708 		if ((args->how.mode == UNCHECKED ||
1709 		    args->how.mode == GUARDED) &&
1710 		    args->how.createhow3_u.obj_attributes.size.set_it &&
1711 		    va.va_size == 0)
1712 			trunc = TRUE;
1713 		else
1714 			trunc = FALSE;
1715 
1716 		if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1717 			VN_RELE(vp);
1718 			resp->status = NFS3ERR_JUKEBOX;
1719 			goto out1;
1720 		}
1721 
1722 		va.va_mask = AT_ALL;
1723 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1724 
1725 		/*
1726 		 * We need to check to make sure that the file got
1727 		 * created to the indicated size.  If not, we do a
1728 		 * setattr to try to change the size, but we don't
1729 		 * try too hard.  This shouldn't a problem as most
1730 		 * clients will only specifiy a size of zero which
1731 		 * local file systems handle.  However, even if
1732 		 * the client does specify a non-zero size, it can
1733 		 * still recover by checking the size of the file
1734 		 * after it has created it and then issue a setattr
1735 		 * request of its own to set the size of the file.
1736 		 */
1737 		if (vap != NULL &&
1738 		    (args->how.mode == UNCHECKED ||
1739 		    args->how.mode == GUARDED) &&
1740 		    args->how.createhow3_u.obj_attributes.size.set_it &&
1741 		    vap->va_size != reqsize) {
1742 			va.va_mask = AT_SIZE;
1743 			va.va_size = reqsize;
1744 			(void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1745 			va.va_mask = AT_ALL;
1746 			vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1747 		}
1748 	}
1749 
1750 #ifdef DEBUG
1751 	if (!rfs3_do_post_op_attr)
1752 		vap = NULL;
1753 #endif
1754 
1755 #ifdef DEBUG
1756 	if (!rfs3_do_post_op_fh3)
1757 		resp->resok.obj.handle_follows = FALSE;
1758 	else {
1759 #endif
1760 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1761 	if (error)
1762 		resp->resok.obj.handle_follows = FALSE;
1763 	else
1764 		resp->resok.obj.handle_follows = TRUE;
1765 #ifdef DEBUG
1766 	}
1767 #endif
1768 
1769 	/*
1770 	 * Force modified data and metadata out to stable storage.
1771 	 */
1772 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1773 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
1774 
1775 	VN_RELE(vp);
1776 	if (tvp != NULL) {
1777 		if (in_crit)
1778 			nbl_end_crit(tvp);
1779 		VN_RELE(tvp);
1780 	}
1781 
1782 	resp->status = NFS3_OK;
1783 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1784 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1785 
1786 	DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1787 	    cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1788 
1789 	VN_RELE(dvp);
1790 	return;
1791 
1792 out:
1793 	if (curthread->t_flag & T_WOULDBLOCK) {
1794 		curthread->t_flag &= ~T_WOULDBLOCK;
1795 		resp->status = NFS3ERR_JUKEBOX;
1796 	} else
1797 		resp->status = puterrno3(error);
1798 out1:
1799 	DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1800 	    cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1801 
1802 	if (tvp != NULL) {
1803 		if (in_crit)
1804 			nbl_end_crit(tvp);
1805 		VN_RELE(tvp);
1806 	}
1807 	if (dvp != NULL)
1808 		VN_RELE(dvp);
1809 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1810 }
1811 
1812 void *
1813 rfs3_create_getfh(CREATE3args *args)
1814 {
1815 
1816 	return (&args->where.dir);
1817 }
1818 
1819 void
1820 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1821 	struct svc_req *req, cred_t *cr)
1822 {
1823 	int error;
1824 	vnode_t *vp = NULL;
1825 	vnode_t *dvp;
1826 	struct vattr *vap;
1827 	struct vattr va;
1828 	struct vattr *dbvap;
1829 	struct vattr dbva;
1830 	struct vattr *davap;
1831 	struct vattr dava;
1832 
1833 	dbvap = NULL;
1834 	davap = NULL;
1835 
1836 	dvp = nfs3_fhtovp(&args->where.dir, exi);
1837 
1838 	DTRACE_NFSV3_4(op__mkdir__start, struct svc_req *, req,
1839 	    cred_t *, cr, vnode_t *, dvp, MKDIR3args *, args);
1840 
1841 	if (dvp == NULL) {
1842 		error = ESTALE;
1843 		goto out;
1844 	}
1845 
1846 #ifdef DEBUG
1847 	if (rfs3_do_pre_op_attr) {
1848 		dbva.va_mask = AT_ALL;
1849 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1850 	} else
1851 		dbvap = NULL;
1852 #else
1853 	dbva.va_mask = AT_ALL;
1854 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1855 #endif
1856 	davap = dbvap;
1857 
1858 	if (args->where.name == nfs3nametoolong) {
1859 		resp->status = NFS3ERR_NAMETOOLONG;
1860 		goto out1;
1861 	}
1862 
1863 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1864 		resp->status = NFS3ERR_ACCES;
1865 		goto out1;
1866 	}
1867 
1868 	if (rdonly(exi, req)) {
1869 		resp->status = NFS3ERR_ROFS;
1870 		goto out1;
1871 	}
1872 
1873 	if (is_system_labeled()) {
1874 		bslabel_t *clabel = req->rq_label;
1875 
1876 		ASSERT(clabel != NULL);
1877 		DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1878 		    "got client label from request(1)", struct svc_req *, req);
1879 
1880 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1881 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK)) {
1882 				resp->status = NFS3ERR_ACCES;
1883 				goto out1;
1884 			}
1885 		}
1886 	}
1887 
1888 	error = sattr3_to_vattr(&args->attributes, &va);
1889 	if (error)
1890 		goto out;
1891 
1892 	if (!(va.va_mask & AT_MODE)) {
1893 		resp->status = NFS3ERR_INVAL;
1894 		goto out1;
1895 	}
1896 
1897 	va.va_mask |= AT_TYPE;
1898 	va.va_type = VDIR;
1899 
1900 	error = VOP_MKDIR(dvp, args->where.name, &va, &vp, cr, NULL, 0, NULL);
1901 
1902 #ifdef DEBUG
1903 	if (rfs3_do_post_op_attr) {
1904 		dava.va_mask = AT_ALL;
1905 		davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1906 	} else
1907 		davap = NULL;
1908 #else
1909 	dava.va_mask = AT_ALL;
1910 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1911 #endif
1912 
1913 	/*
1914 	 * Force modified data and metadata out to stable storage.
1915 	 */
1916 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
1917 
1918 	if (error)
1919 		goto out;
1920 
1921 #ifdef DEBUG
1922 	if (!rfs3_do_post_op_fh3)
1923 		resp->resok.obj.handle_follows = FALSE;
1924 	else {
1925 #endif
1926 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1927 	if (error)
1928 		resp->resok.obj.handle_follows = FALSE;
1929 	else
1930 		resp->resok.obj.handle_follows = TRUE;
1931 #ifdef DEBUG
1932 	}
1933 #endif
1934 
1935 #ifdef DEBUG
1936 	if (rfs3_do_post_op_attr) {
1937 		va.va_mask = AT_ALL;
1938 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1939 	} else
1940 		vap = NULL;
1941 #else
1942 	va.va_mask = AT_ALL;
1943 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1944 #endif
1945 
1946 	/*
1947 	 * Force modified data and metadata out to stable storage.
1948 	 */
1949 	(void) VOP_FSYNC(vp, 0, cr, NULL);
1950 
1951 	VN_RELE(vp);
1952 
1953 	resp->status = NFS3_OK;
1954 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1955 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1956 
1957 	DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
1958 	    cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
1959 	VN_RELE(dvp);
1960 
1961 	return;
1962 
1963 out:
1964 	if (curthread->t_flag & T_WOULDBLOCK) {
1965 		curthread->t_flag &= ~T_WOULDBLOCK;
1966 		resp->status = NFS3ERR_JUKEBOX;
1967 	} else
1968 		resp->status = puterrno3(error);
1969 out1:
1970 	DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
1971 	    cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
1972 	if (dvp != NULL)
1973 		VN_RELE(dvp);
1974 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1975 }
1976 
1977 void *
1978 rfs3_mkdir_getfh(MKDIR3args *args)
1979 {
1980 
1981 	return (&args->where.dir);
1982 }
1983 
1984 void
1985 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
1986 	struct svc_req *req, cred_t *cr)
1987 {
1988 	int error;
1989 	vnode_t *vp;
1990 	vnode_t *dvp;
1991 	struct vattr *vap;
1992 	struct vattr va;
1993 	struct vattr *dbvap;
1994 	struct vattr dbva;
1995 	struct vattr *davap;
1996 	struct vattr dava;
1997 
1998 	dbvap = NULL;
1999 	davap = NULL;
2000 
2001 	dvp = nfs3_fhtovp(&args->where.dir, exi);
2002 
2003 	DTRACE_NFSV3_4(op__symlink__start, struct svc_req *, req,
2004 	    cred_t *, cr, vnode_t *, dvp, SYMLINK3args *, args);
2005 
2006 	if (dvp == NULL) {
2007 		error = ESTALE;
2008 		goto err;
2009 	}
2010 
2011 #ifdef DEBUG
2012 	if (rfs3_do_pre_op_attr) {
2013 		dbva.va_mask = AT_ALL;
2014 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2015 	} else
2016 		dbvap = NULL;
2017 #else
2018 	dbva.va_mask = AT_ALL;
2019 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2020 #endif
2021 	davap = dbvap;
2022 
2023 	if (args->where.name == nfs3nametoolong) {
2024 		resp->status = NFS3ERR_NAMETOOLONG;
2025 		goto err1;
2026 	}
2027 
2028 	if (args->where.name == NULL || *(args->where.name) == '\0') {
2029 		resp->status = NFS3ERR_ACCES;
2030 		goto err1;
2031 	}
2032 
2033 	if (rdonly(exi, req)) {
2034 		resp->status = NFS3ERR_ROFS;
2035 		goto err1;
2036 	}
2037 
2038 	if (is_system_labeled()) {
2039 		bslabel_t *clabel = req->rq_label;
2040 
2041 		ASSERT(clabel != NULL);
2042 		DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2043 		    "got client label from request(1)", struct svc_req *, req);
2044 
2045 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2046 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK)) {
2047 				resp->status = NFS3ERR_ACCES;
2048 				goto err1;
2049 			}
2050 		}
2051 	}
2052 
2053 	error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2054 	if (error)
2055 		goto err;
2056 
2057 	if (!(va.va_mask & AT_MODE)) {
2058 		resp->status = NFS3ERR_INVAL;
2059 		goto err1;
2060 	}
2061 
2062 	if (args->symlink.symlink_data == nfs3nametoolong) {
2063 		resp->status = NFS3ERR_NAMETOOLONG;
2064 		goto err1;
2065 	}
2066 
2067 	va.va_mask |= AT_TYPE;
2068 	va.va_type = VLNK;
2069 
2070 	error = VOP_SYMLINK(dvp, args->where.name, &va,
2071 	    args->symlink.symlink_data, cr, NULL, 0);
2072 
2073 #ifdef DEBUG
2074 	if (rfs3_do_post_op_attr) {
2075 		dava.va_mask = AT_ALL;
2076 		davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2077 	} else
2078 		davap = NULL;
2079 #else
2080 	dava.va_mask = AT_ALL;
2081 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2082 #endif
2083 
2084 	if (error)
2085 		goto err;
2086 
2087 	error = VOP_LOOKUP(dvp, args->where.name, &vp, NULL, 0, NULL, cr,
2088 	    NULL, NULL, NULL);
2089 
2090 	/*
2091 	 * Force modified data and metadata out to stable storage.
2092 	 */
2093 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
2094 
2095 
2096 	resp->status = NFS3_OK;
2097 	if (error) {
2098 		resp->resok.obj.handle_follows = FALSE;
2099 		vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2100 		vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2101 		goto out;
2102 	}
2103 
2104 #ifdef DEBUG
2105 	if (!rfs3_do_post_op_fh3)
2106 		resp->resok.obj.handle_follows = FALSE;
2107 	else {
2108 #endif
2109 	error = makefh3(&resp->resok.obj.handle, vp, exi);
2110 	if (error)
2111 		resp->resok.obj.handle_follows = FALSE;
2112 	else
2113 		resp->resok.obj.handle_follows = TRUE;
2114 #ifdef DEBUG
2115 	}
2116 #endif
2117 
2118 #ifdef DEBUG
2119 	if (rfs3_do_post_op_attr) {
2120 		va.va_mask = AT_ALL;
2121 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2122 	} else
2123 		vap = NULL;
2124 #else
2125 	va.va_mask = AT_ALL;
2126 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2127 #endif
2128 
2129 	/*
2130 	 * Force modified data and metadata out to stable storage.
2131 	 */
2132 	(void) VOP_FSYNC(vp, 0, cr, NULL);
2133 
2134 	VN_RELE(vp);
2135 
2136 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2137 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2138 	goto out;
2139 
2140 err:
2141 	if (curthread->t_flag & T_WOULDBLOCK) {
2142 		curthread->t_flag &= ~T_WOULDBLOCK;
2143 		resp->status = NFS3ERR_JUKEBOX;
2144 	} else
2145 		resp->status = puterrno3(error);
2146 err1:
2147 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2148 out:
2149 	DTRACE_NFSV3_4(op__symlink__done, struct svc_req *, req,
2150 	    cred_t *, cr, vnode_t *, dvp, SYMLINK3res *, resp);
2151 
2152 	if (dvp != NULL)
2153 		VN_RELE(dvp);
2154 }
2155 
2156 void *
2157 rfs3_symlink_getfh(SYMLINK3args *args)
2158 {
2159 
2160 	return (&args->where.dir);
2161 }
2162 
2163 void
2164 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2165 	struct svc_req *req, cred_t *cr)
2166 {
2167 	int error;
2168 	vnode_t *vp;
2169 	vnode_t *realvp;
2170 	vnode_t *dvp;
2171 	struct vattr *vap;
2172 	struct vattr va;
2173 	struct vattr *dbvap;
2174 	struct vattr dbva;
2175 	struct vattr *davap;
2176 	struct vattr dava;
2177 	int mode;
2178 	enum vcexcl excl;
2179 
2180 	dbvap = NULL;
2181 	davap = NULL;
2182 
2183 	dvp = nfs3_fhtovp(&args->where.dir, exi);
2184 
2185 	DTRACE_NFSV3_4(op__mknod__start, struct svc_req *, req,
2186 	    cred_t *, cr, vnode_t *, dvp, MKNOD3args *, args);
2187 
2188 	if (dvp == NULL) {
2189 		error = ESTALE;
2190 		goto out;
2191 	}
2192 
2193 #ifdef DEBUG
2194 	if (rfs3_do_pre_op_attr) {
2195 		dbva.va_mask = AT_ALL;
2196 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2197 	} else
2198 		dbvap = NULL;
2199 #else
2200 	dbva.va_mask = AT_ALL;
2201 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2202 #endif
2203 	davap = dbvap;
2204 
2205 	if (args->where.name == nfs3nametoolong) {
2206 		resp->status = NFS3ERR_NAMETOOLONG;
2207 		goto out1;
2208 	}
2209 
2210 	if (args->where.name == NULL || *(args->where.name) == '\0') {
2211 		resp->status = NFS3ERR_ACCES;
2212 		goto out1;
2213 	}
2214 
2215 	if (rdonly(exi, req)) {
2216 		resp->status = NFS3ERR_ROFS;
2217 		goto out1;
2218 	}
2219 
2220 	if (is_system_labeled()) {
2221 		bslabel_t *clabel = req->rq_label;
2222 
2223 		ASSERT(clabel != NULL);
2224 		DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2225 		    "got client label from request(1)", struct svc_req *, req);
2226 
2227 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2228 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK)) {
2229 				resp->status = NFS3ERR_ACCES;
2230 				goto out1;
2231 			}
2232 		}
2233 	}
2234 
2235 	switch (args->what.type) {
2236 	case NF3CHR:
2237 	case NF3BLK:
2238 		error = sattr3_to_vattr(
2239 		    &args->what.mknoddata3_u.device.dev_attributes, &va);
2240 		if (error)
2241 			goto out;
2242 		if (secpolicy_sys_devices(cr) != 0) {
2243 			resp->status = NFS3ERR_PERM;
2244 			goto out1;
2245 		}
2246 		if (args->what.type == NF3CHR)
2247 			va.va_type = VCHR;
2248 		else
2249 			va.va_type = VBLK;
2250 		va.va_rdev = makedevice(
2251 		    args->what.mknoddata3_u.device.spec.specdata1,
2252 		    args->what.mknoddata3_u.device.spec.specdata2);
2253 		va.va_mask |= AT_TYPE | AT_RDEV;
2254 		break;
2255 	case NF3SOCK:
2256 		error = sattr3_to_vattr(
2257 		    &args->what.mknoddata3_u.pipe_attributes, &va);
2258 		if (error)
2259 			goto out;
2260 		va.va_type = VSOCK;
2261 		va.va_mask |= AT_TYPE;
2262 		break;
2263 	case NF3FIFO:
2264 		error = sattr3_to_vattr(
2265 		    &args->what.mknoddata3_u.pipe_attributes, &va);
2266 		if (error)
2267 			goto out;
2268 		va.va_type = VFIFO;
2269 		va.va_mask |= AT_TYPE;
2270 		break;
2271 	default:
2272 		resp->status = NFS3ERR_BADTYPE;
2273 		goto out1;
2274 	}
2275 
2276 	/*
2277 	 * Must specify the mode.
2278 	 */
2279 	if (!(va.va_mask & AT_MODE)) {
2280 		resp->status = NFS3ERR_INVAL;
2281 		goto out1;
2282 	}
2283 
2284 	excl = EXCL;
2285 
2286 	mode = 0;
2287 
2288 	error = VOP_CREATE(dvp, args->where.name, &va, excl, mode,
2289 	    &vp, cr, 0, NULL, NULL);
2290 
2291 #ifdef DEBUG
2292 	if (rfs3_do_post_op_attr) {
2293 		dava.va_mask = AT_ALL;
2294 		davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2295 	} else
2296 		davap = NULL;
2297 #else
2298 	dava.va_mask = AT_ALL;
2299 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2300 #endif
2301 
2302 	/*
2303 	 * Force modified data and metadata out to stable storage.
2304 	 */
2305 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
2306 
2307 	if (error)
2308 		goto out;
2309 
2310 	resp->status = NFS3_OK;
2311 
2312 #ifdef DEBUG
2313 	if (!rfs3_do_post_op_fh3)
2314 		resp->resok.obj.handle_follows = FALSE;
2315 	else {
2316 #endif
2317 	error = makefh3(&resp->resok.obj.handle, vp, exi);
2318 	if (error)
2319 		resp->resok.obj.handle_follows = FALSE;
2320 	else
2321 		resp->resok.obj.handle_follows = TRUE;
2322 #ifdef DEBUG
2323 	}
2324 #endif
2325 
2326 #ifdef DEBUG
2327 	if (rfs3_do_post_op_attr) {
2328 		va.va_mask = AT_ALL;
2329 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2330 	} else
2331 		vap = NULL;
2332 #else
2333 	va.va_mask = AT_ALL;
2334 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2335 #endif
2336 
2337 	/*
2338 	 * Force modified metadata out to stable storage.
2339 	 *
2340 	 * if a underlying vp exists, pass it to VOP_FSYNC
2341 	 */
2342 	if (VOP_REALVP(vp, &realvp, NULL) == 0)
2343 		(void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2344 	else
2345 		(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2346 
2347 	VN_RELE(vp);
2348 
2349 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2350 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2351 	DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2352 	    cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2353 	VN_RELE(dvp);
2354 	return;
2355 
2356 out:
2357 	if (curthread->t_flag & T_WOULDBLOCK) {
2358 		curthread->t_flag &= ~T_WOULDBLOCK;
2359 		resp->status = NFS3ERR_JUKEBOX;
2360 	} else
2361 		resp->status = puterrno3(error);
2362 out1:
2363 	DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2364 	    cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2365 	if (dvp != NULL)
2366 		VN_RELE(dvp);
2367 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2368 }
2369 
2370 void *
2371 rfs3_mknod_getfh(MKNOD3args *args)
2372 {
2373 
2374 	return (&args->where.dir);
2375 }
2376 
2377 void
2378 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2379 	struct svc_req *req, cred_t *cr)
2380 {
2381 	int error = 0;
2382 	vnode_t *vp;
2383 	struct vattr *bvap;
2384 	struct vattr bva;
2385 	struct vattr *avap;
2386 	struct vattr ava;
2387 	vnode_t *targvp = NULL;
2388 
2389 	bvap = NULL;
2390 	avap = NULL;
2391 
2392 	vp = nfs3_fhtovp(&args->object.dir, exi);
2393 
2394 	DTRACE_NFSV3_4(op__remove__start, struct svc_req *, req,
2395 	    cred_t *, cr, vnode_t *, vp, REMOVE3args *, args);
2396 
2397 	if (vp == NULL) {
2398 		error = ESTALE;
2399 		goto err;
2400 	}
2401 
2402 #ifdef DEBUG
2403 	if (rfs3_do_pre_op_attr) {
2404 		bva.va_mask = AT_ALL;
2405 		bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2406 	} else
2407 		bvap = NULL;
2408 #else
2409 	bva.va_mask = AT_ALL;
2410 	bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2411 #endif
2412 	avap = bvap;
2413 
2414 	if (vp->v_type != VDIR) {
2415 		resp->status = NFS3ERR_NOTDIR;
2416 		goto err1;
2417 	}
2418 
2419 	if (args->object.name == nfs3nametoolong) {
2420 		resp->status = NFS3ERR_NAMETOOLONG;
2421 		goto err1;
2422 	}
2423 
2424 	if (args->object.name == NULL || *(args->object.name) == '\0') {
2425 		resp->status = NFS3ERR_ACCES;
2426 		goto err1;
2427 	}
2428 
2429 	if (rdonly(exi, req)) {
2430 		resp->status = NFS3ERR_ROFS;
2431 		goto err1;
2432 	}
2433 
2434 	if (is_system_labeled()) {
2435 		bslabel_t *clabel = req->rq_label;
2436 
2437 		ASSERT(clabel != NULL);
2438 		DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2439 		    "got client label from request(1)", struct svc_req *, req);
2440 
2441 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2442 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK)) {
2443 				resp->status = NFS3ERR_ACCES;
2444 				goto err1;
2445 			}
2446 		}
2447 	}
2448 
2449 	/*
2450 	 * Check for a conflict with a non-blocking mandatory share
2451 	 * reservation and V4 delegations
2452 	 */
2453 	error = VOP_LOOKUP(vp, args->object.name, &targvp, NULL, 0,
2454 	    NULL, cr, NULL, NULL, NULL);
2455 	if (error != 0)
2456 		goto err;
2457 
2458 	if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2459 		resp->status = NFS3ERR_JUKEBOX;
2460 		goto err1;
2461 	}
2462 
2463 	if (!nbl_need_check(targvp)) {
2464 		error = VOP_REMOVE(vp, args->object.name, cr, NULL, 0);
2465 	} else {
2466 		nbl_start_crit(targvp, RW_READER);
2467 		if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2468 			error = EACCES;
2469 		} else {
2470 			error = VOP_REMOVE(vp, args->object.name, cr, NULL, 0);
2471 		}
2472 		nbl_end_crit(targvp);
2473 	}
2474 	VN_RELE(targvp);
2475 	targvp = NULL;
2476 
2477 #ifdef DEBUG
2478 	if (rfs3_do_post_op_attr) {
2479 		ava.va_mask = AT_ALL;
2480 		avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2481 	} else
2482 		avap = NULL;
2483 #else
2484 	ava.va_mask = AT_ALL;
2485 	avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2486 #endif
2487 
2488 	/*
2489 	 * Force modified data and metadata out to stable storage.
2490 	 */
2491 	(void) VOP_FSYNC(vp, 0, cr, NULL);
2492 
2493 	if (error)
2494 		goto err;
2495 
2496 	resp->status = NFS3_OK;
2497 	vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2498 	goto out;
2499 
2500 err:
2501 	if (curthread->t_flag & T_WOULDBLOCK) {
2502 		curthread->t_flag &= ~T_WOULDBLOCK;
2503 		resp->status = NFS3ERR_JUKEBOX;
2504 	} else
2505 		resp->status = puterrno3(error);
2506 err1:
2507 	vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2508 out:
2509 	DTRACE_NFSV3_4(op__remove__done, struct svc_req *, req,
2510 	    cred_t *, cr, vnode_t *, vp, REMOVE3res *, resp);
2511 	if (vp != NULL)
2512 		VN_RELE(vp);
2513 }
2514 
2515 void *
2516 rfs3_remove_getfh(REMOVE3args *args)
2517 {
2518 
2519 	return (&args->object.dir);
2520 }
2521 
2522 void
2523 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2524 	struct svc_req *req, cred_t *cr)
2525 {
2526 	int error;
2527 	vnode_t *vp;
2528 	struct vattr *bvap;
2529 	struct vattr bva;
2530 	struct vattr *avap;
2531 	struct vattr ava;
2532 
2533 	bvap = NULL;
2534 	avap = NULL;
2535 
2536 	vp = nfs3_fhtovp(&args->object.dir, exi);
2537 
2538 	DTRACE_NFSV3_4(op__rmdir__start, struct svc_req *, req,
2539 	    cred_t *, cr, vnode_t *, vp, RMDIR3args *, args);
2540 
2541 	if (vp == NULL) {
2542 		error = ESTALE;
2543 		goto err;
2544 	}
2545 
2546 #ifdef DEBUG
2547 	if (rfs3_do_pre_op_attr) {
2548 		bva.va_mask = AT_ALL;
2549 		bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2550 	} else
2551 		bvap = NULL;
2552 #else
2553 	bva.va_mask = AT_ALL;
2554 	bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2555 #endif
2556 	avap = bvap;
2557 
2558 	if (vp->v_type != VDIR) {
2559 		resp->status = NFS3ERR_NOTDIR;
2560 		goto err1;
2561 	}
2562 
2563 	if (args->object.name == nfs3nametoolong) {
2564 		resp->status = NFS3ERR_NAMETOOLONG;
2565 		goto err1;
2566 	}
2567 
2568 	if (args->object.name == NULL || *(args->object.name) == '\0') {
2569 		resp->status = NFS3ERR_ACCES;
2570 		goto err1;
2571 	}
2572 
2573 	if (rdonly(exi, req)) {
2574 		resp->status = NFS3ERR_ROFS;
2575 		goto err1;
2576 	}
2577 
2578 	if (is_system_labeled()) {
2579 		bslabel_t *clabel = req->rq_label;
2580 
2581 		ASSERT(clabel != NULL);
2582 		DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2583 		    "got client label from request(1)", struct svc_req *, req);
2584 
2585 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2586 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK)) {
2587 				resp->status = NFS3ERR_ACCES;
2588 				goto err1;
2589 			}
2590 		}
2591 	}
2592 
2593 	error = VOP_RMDIR(vp, args->object.name, rootdir, cr, NULL, 0);
2594 
2595 #ifdef DEBUG
2596 	if (rfs3_do_post_op_attr) {
2597 		ava.va_mask = AT_ALL;
2598 		avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2599 	} else
2600 		avap = NULL;
2601 #else
2602 	ava.va_mask = AT_ALL;
2603 	avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2604 #endif
2605 
2606 	/*
2607 	 * Force modified data and metadata out to stable storage.
2608 	 */
2609 	(void) VOP_FSYNC(vp, 0, cr, NULL);
2610 
2611 	if (error) {
2612 		/*
2613 		 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2614 		 * if the directory is not empty.  A System V NFS server
2615 		 * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2616 		 * over the wire.
2617 		 */
2618 		if (error == EEXIST)
2619 			error = ENOTEMPTY;
2620 		goto err;
2621 	}
2622 
2623 	resp->status = NFS3_OK;
2624 	vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2625 	goto out;
2626 
2627 err:
2628 	if (curthread->t_flag & T_WOULDBLOCK) {
2629 		curthread->t_flag &= ~T_WOULDBLOCK;
2630 		resp->status = NFS3ERR_JUKEBOX;
2631 	} else
2632 		resp->status = puterrno3(error);
2633 err1:
2634 	vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2635 out:
2636 	DTRACE_NFSV3_4(op__rmdir__done, struct svc_req *, req,
2637 	    cred_t *, cr, vnode_t *, vp, RMDIR3res *, resp);
2638 	if (vp != NULL)
2639 		VN_RELE(vp);
2640 
2641 }
2642 
2643 void *
2644 rfs3_rmdir_getfh(RMDIR3args *args)
2645 {
2646 
2647 	return (&args->object.dir);
2648 }
2649 
2650 void
2651 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2652 	struct svc_req *req, cred_t *cr)
2653 {
2654 	int error = 0;
2655 	vnode_t *fvp;
2656 	vnode_t *tvp;
2657 	vnode_t *targvp;
2658 	struct vattr *fbvap;
2659 	struct vattr fbva;
2660 	struct vattr *favap;
2661 	struct vattr fava;
2662 	struct vattr *tbvap;
2663 	struct vattr tbva;
2664 	struct vattr *tavap;
2665 	struct vattr tava;
2666 	nfs_fh3 *fh3;
2667 	struct exportinfo *to_exi;
2668 	vnode_t *srcvp = NULL;
2669 	bslabel_t *clabel;
2670 
2671 	fbvap = NULL;
2672 	favap = NULL;
2673 	tbvap = NULL;
2674 	tavap = NULL;
2675 	tvp = NULL;
2676 
2677 	fvp = nfs3_fhtovp(&args->from.dir, exi);
2678 
2679 	DTRACE_NFSV3_4(op__rename__start, struct svc_req *, req,
2680 	    cred_t *, cr, vnode_t *, fvp, RENAME3args *, args);
2681 
2682 	if (fvp == NULL) {
2683 		error = ESTALE;
2684 		goto err;
2685 	}
2686 
2687 	if (is_system_labeled()) {
2688 		clabel = req->rq_label;
2689 		ASSERT(clabel != NULL);
2690 		DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2691 		    "got client label from request(1)", struct svc_req *, req);
2692 
2693 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2694 			if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK)) {
2695 				resp->status = NFS3ERR_ACCES;
2696 				goto err1;
2697 			}
2698 		}
2699 	}
2700 
2701 #ifdef DEBUG
2702 	if (rfs3_do_pre_op_attr) {
2703 		fbva.va_mask = AT_ALL;
2704 		fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2705 	} else
2706 		fbvap = NULL;
2707 #else
2708 	fbva.va_mask = AT_ALL;
2709 	fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2710 #endif
2711 	favap = fbvap;
2712 
2713 	fh3 = &args->to.dir;
2714 	to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2715 	if (to_exi == NULL) {
2716 		resp->status = NFS3ERR_ACCES;
2717 		goto err1;
2718 	}
2719 	exi_rele(to_exi);
2720 
2721 	if (to_exi != exi) {
2722 		resp->status = NFS3ERR_XDEV;
2723 		goto err1;
2724 	}
2725 
2726 	tvp = nfs3_fhtovp(&args->to.dir, exi);
2727 	if (tvp == NULL) {
2728 		error = ESTALE;
2729 		goto err;
2730 	}
2731 
2732 #ifdef DEBUG
2733 	if (rfs3_do_pre_op_attr) {
2734 		tbva.va_mask = AT_ALL;
2735 		tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2736 	} else
2737 		tbvap = NULL;
2738 #else
2739 	tbva.va_mask = AT_ALL;
2740 	tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2741 #endif
2742 	tavap = tbvap;
2743 
2744 	if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2745 		resp->status = NFS3ERR_NOTDIR;
2746 		goto err1;
2747 	}
2748 
2749 	if (args->from.name == nfs3nametoolong ||
2750 	    args->to.name == nfs3nametoolong) {
2751 		resp->status = NFS3ERR_NAMETOOLONG;
2752 		goto err1;
2753 	}
2754 	if (args->from.name == NULL || *(args->from.name) == '\0' ||
2755 	    args->to.name == NULL || *(args->to.name) == '\0') {
2756 		resp->status = NFS3ERR_ACCES;
2757 		goto err1;
2758 	}
2759 
2760 	if (rdonly(exi, req)) {
2761 		resp->status = NFS3ERR_ROFS;
2762 		goto err1;
2763 	}
2764 
2765 	if (is_system_labeled()) {
2766 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2767 			if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK)) {
2768 				resp->status = NFS3ERR_ACCES;
2769 				goto err1;
2770 			}
2771 		}
2772 	}
2773 
2774 	/*
2775 	 * Check for a conflict with a non-blocking mandatory share
2776 	 * reservation or V4 delegations.
2777 	 */
2778 	error = VOP_LOOKUP(fvp, args->from.name, &srcvp, NULL, 0,
2779 	    NULL, cr, NULL, NULL, NULL);
2780 	if (error != 0)
2781 		goto err;
2782 
2783 	/*
2784 	 * If we rename a delegated file we should recall the
2785 	 * delegation, since future opens should fail or would
2786 	 * refer to a new file.
2787 	 */
2788 	if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2789 		resp->status = NFS3ERR_JUKEBOX;
2790 		goto err1;
2791 	}
2792 
2793 	/*
2794 	 * Check for renaming over a delegated file.  Check rfs4_deleg_policy
2795 	 * first to avoid VOP_LOOKUP if possible.
2796 	 */
2797 	if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2798 	    VOP_LOOKUP(tvp, args->to.name, &targvp, NULL, 0, NULL, cr,
2799 	    NULL, NULL, NULL) == 0) {
2800 
2801 		if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2802 			VN_RELE(targvp);
2803 			resp->status = NFS3ERR_JUKEBOX;
2804 			goto err1;
2805 		}
2806 		VN_RELE(targvp);
2807 	}
2808 
2809 	if (!nbl_need_check(srcvp)) {
2810 		error = VOP_RENAME(fvp, args->from.name, tvp,
2811 		    args->to.name, cr, NULL, 0);
2812 	} else {
2813 		nbl_start_crit(srcvp, RW_READER);
2814 		if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
2815 			error = EACCES;
2816 		} else {
2817 			error = VOP_RENAME(fvp, args->from.name, tvp,
2818 			    args->to.name, cr, NULL, 0);
2819 		}
2820 		nbl_end_crit(srcvp);
2821 	}
2822 	if (error == 0)
2823 		vn_renamepath(tvp, srcvp, args->to.name,
2824 		    strlen(args->to.name));
2825 	VN_RELE(srcvp);
2826 	srcvp = NULL;
2827 
2828 #ifdef DEBUG
2829 	if (rfs3_do_post_op_attr) {
2830 		fava.va_mask = AT_ALL;
2831 		favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2832 		tava.va_mask = AT_ALL;
2833 		tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2834 	} else {
2835 		favap = NULL;
2836 		tavap = NULL;
2837 	}
2838 #else
2839 	fava.va_mask = AT_ALL;
2840 	favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2841 	tava.va_mask = AT_ALL;
2842 	tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2843 #endif
2844 
2845 	/*
2846 	 * Force modified data and metadata out to stable storage.
2847 	 */
2848 	(void) VOP_FSYNC(fvp, 0, cr, NULL);
2849 	(void) VOP_FSYNC(tvp, 0, cr, NULL);
2850 
2851 	if (error)
2852 		goto err;
2853 
2854 	resp->status = NFS3_OK;
2855 	vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2856 	vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2857 	goto out;
2858 
2859 err:
2860 	if (curthread->t_flag & T_WOULDBLOCK) {
2861 		curthread->t_flag &= ~T_WOULDBLOCK;
2862 		resp->status = NFS3ERR_JUKEBOX;
2863 	} else
2864 		resp->status = puterrno3(error);
2865 err1:
2866 	vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2867 	vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2868 out:
2869 	DTRACE_NFSV3_4(op__rename__done, struct svc_req *, req,
2870 	    cred_t *, cr, vnode_t *, fvp, RENAME3res *, resp);
2871 	if (fvp != NULL)
2872 		VN_RELE(fvp);
2873 	if (tvp != NULL)
2874 		VN_RELE(tvp);
2875 }
2876 
2877 void *
2878 rfs3_rename_getfh(RENAME3args *args)
2879 {
2880 
2881 	return (&args->from.dir);
2882 }
2883 
2884 void
2885 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2886 	struct svc_req *req, cred_t *cr)
2887 {
2888 	int error;
2889 	vnode_t *vp;
2890 	vnode_t *dvp;
2891 	struct vattr *vap;
2892 	struct vattr va;
2893 	struct vattr *bvap;
2894 	struct vattr bva;
2895 	struct vattr *avap;
2896 	struct vattr ava;
2897 	nfs_fh3	*fh3;
2898 	struct exportinfo *to_exi;
2899 	bslabel_t *clabel;
2900 
2901 	vap = NULL;
2902 	bvap = NULL;
2903 	avap = NULL;
2904 	dvp = NULL;
2905 
2906 	vp = nfs3_fhtovp(&args->file, exi);
2907 
2908 	DTRACE_NFSV3_4(op__link__start, struct svc_req *, req,
2909 	    cred_t *, cr, vnode_t *, vp, LINK3args *, args);
2910 
2911 	if (vp == NULL) {
2912 		error = ESTALE;
2913 		goto out;
2914 	}
2915 
2916 #ifdef DEBUG
2917 	if (rfs3_do_pre_op_attr) {
2918 		va.va_mask = AT_ALL;
2919 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2920 	} else
2921 		vap = NULL;
2922 #else
2923 	va.va_mask = AT_ALL;
2924 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2925 #endif
2926 
2927 	fh3 = &args->link.dir;
2928 	to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2929 	if (to_exi == NULL) {
2930 		resp->status = NFS3ERR_ACCES;
2931 		goto out1;
2932 	}
2933 	exi_rele(to_exi);
2934 
2935 	if (to_exi != exi) {
2936 		resp->status = NFS3ERR_XDEV;
2937 		goto out1;
2938 	}
2939 
2940 	if (is_system_labeled()) {
2941 		clabel = req->rq_label;
2942 
2943 		ASSERT(clabel != NULL);
2944 		DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
2945 		    "got client label from request(1)", struct svc_req *, req);
2946 
2947 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2948 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK)) {
2949 				resp->status = NFS3ERR_ACCES;
2950 				goto out1;
2951 			}
2952 		}
2953 	}
2954 
2955 	dvp = nfs3_fhtovp(&args->link.dir, exi);
2956 	if (dvp == NULL) {
2957 		error = ESTALE;
2958 		goto out;
2959 	}
2960 
2961 #ifdef DEBUG
2962 	if (rfs3_do_pre_op_attr) {
2963 		bva.va_mask = AT_ALL;
2964 		bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
2965 	} else
2966 		bvap = NULL;
2967 #else
2968 	bva.va_mask = AT_ALL;
2969 	bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
2970 #endif
2971 
2972 	if (dvp->v_type != VDIR) {
2973 		resp->status = NFS3ERR_NOTDIR;
2974 		goto out1;
2975 	}
2976 
2977 	if (args->link.name == nfs3nametoolong) {
2978 		resp->status = NFS3ERR_NAMETOOLONG;
2979 		goto out1;
2980 	}
2981 
2982 	if (args->link.name == NULL || *(args->link.name) == '\0') {
2983 		resp->status = NFS3ERR_ACCES;
2984 		goto out1;
2985 	}
2986 
2987 	if (rdonly(exi, req)) {
2988 		resp->status = NFS3ERR_ROFS;
2989 		goto out1;
2990 	}
2991 
2992 	if (is_system_labeled()) {
2993 		DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
2994 		    "got client label from request(1)", struct svc_req *, req);
2995 
2996 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2997 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK)) {
2998 				resp->status = NFS3ERR_ACCES;
2999 				goto out1;
3000 			}
3001 		}
3002 	}
3003 
3004 	error = VOP_LINK(dvp, vp, args->link.name, cr, NULL, 0);
3005 
3006 #ifdef DEBUG
3007 	if (rfs3_do_post_op_attr) {
3008 		va.va_mask = AT_ALL;
3009 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3010 		ava.va_mask = AT_ALL;
3011 		avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3012 	} else {
3013 		vap = NULL;
3014 		avap = NULL;
3015 	}
3016 #else
3017 	va.va_mask = AT_ALL;
3018 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3019 	ava.va_mask = AT_ALL;
3020 	avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3021 #endif
3022 
3023 	/*
3024 	 * Force modified data and metadata out to stable storage.
3025 	 */
3026 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3027 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
3028 
3029 	if (error)
3030 		goto out;
3031 
3032 	VN_RELE(dvp);
3033 
3034 	resp->status = NFS3_OK;
3035 	vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3036 	vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3037 
3038 	DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3039 	    cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3040 
3041 	VN_RELE(vp);
3042 
3043 	return;
3044 
3045 out:
3046 	if (curthread->t_flag & T_WOULDBLOCK) {
3047 		curthread->t_flag &= ~T_WOULDBLOCK;
3048 		resp->status = NFS3ERR_JUKEBOX;
3049 	} else
3050 		resp->status = puterrno3(error);
3051 out1:
3052 	DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3053 	    cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3054 
3055 	if (vp != NULL)
3056 		VN_RELE(vp);
3057 	if (dvp != NULL)
3058 		VN_RELE(dvp);
3059 	vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3060 	vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3061 }
3062 
3063 void *
3064 rfs3_link_getfh(LINK3args *args)
3065 {
3066 
3067 	return (&args->file);
3068 }
3069 
3070 /*
3071  * This macro defines the size of a response which contains attribute
3072  * information and one directory entry (whose length is specified by
3073  * the macro parameter).  If the incoming request is larger than this,
3074  * then we are guaranteed to be able to return at one directory entry
3075  * if one exists.  Therefore, we do not need to check for
3076  * NFS3ERR_TOOSMALL if the requested size is larger then this.  If it
3077  * is not, then we need to check to make sure that this error does not
3078  * need to be returned.
3079  *
3080  * NFS3_READDIR_MIN_COUNT is comprised of following :
3081  *
3082  * status - 1 * BYTES_PER_XDR_UNIT
3083  * attr. flag - 1 * BYTES_PER_XDR_UNIT
3084  * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3085  * attributes  - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3086  * boolean - 1 * BYTES_PER_XDR_UNIT
3087  * file id - 2 * BYTES_PER_XDR_UNIT
3088  * directory name length - 1 * BYTES_PER_XDR_UNIT
3089  * cookie - 2 * BYTES_PER_XDR_UNIT
3090  * end of list - 1 * BYTES_PER_XDR_UNIT
3091  * end of file - 1 * BYTES_PER_XDR_UNIT
3092  * Name length of directory to the nearest byte
3093  */
3094 
3095 #define	NFS3_READDIR_MIN_COUNT(length)	\
3096 	((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3097 		BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3098 
3099 /* ARGSUSED */
3100 void
3101 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3102 	struct svc_req *req, cred_t *cr)
3103 {
3104 	int error;
3105 	vnode_t *vp;
3106 	struct vattr *vap;
3107 	struct vattr va;
3108 	struct iovec iov;
3109 	struct uio uio;
3110 	char *data;
3111 	int iseof;
3112 	int bufsize;
3113 	int namlen;
3114 	uint_t count;
3115 
3116 	vap = NULL;
3117 
3118 	vp = nfs3_fhtovp(&args->dir, exi);
3119 
3120 	DTRACE_NFSV3_4(op__readdir__start, struct svc_req *, req,
3121 	    cred_t *, cr, vnode_t *, vp, READDIR3args *, args);
3122 
3123 	if (vp == NULL) {
3124 		error = ESTALE;
3125 		goto out;
3126 	}
3127 
3128 	if (is_system_labeled()) {
3129 		bslabel_t *clabel = req->rq_label;
3130 
3131 		ASSERT(clabel != NULL);
3132 		DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3133 		    "got client label from request(1)", struct svc_req *, req);
3134 
3135 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3136 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK)) {
3137 				resp->status = NFS3ERR_ACCES;
3138 				goto out1;
3139 			}
3140 		}
3141 	}
3142 
3143 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3144 
3145 #ifdef DEBUG
3146 	if (rfs3_do_pre_op_attr) {
3147 		va.va_mask = AT_ALL;
3148 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3149 	} else
3150 		vap = NULL;
3151 #else
3152 	va.va_mask = AT_ALL;
3153 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3154 #endif
3155 
3156 	if (vp->v_type != VDIR) {
3157 		resp->status = NFS3ERR_NOTDIR;
3158 		goto out1;
3159 	}
3160 
3161 	error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3162 	if (error)
3163 		goto out;
3164 
3165 	/*
3166 	 * Now don't allow arbitrary count to alloc;
3167 	 * allow the maximum not to exceed rfs3_tsize()
3168 	 */
3169 	if (args->count > rfs3_tsize(req))
3170 		args->count = rfs3_tsize(req);
3171 
3172 	/*
3173 	 * Make sure that there is room to read at least one entry
3174 	 * if any are available.
3175 	 */
3176 	if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3177 		count = DIRENT64_RECLEN(MAXNAMELEN);
3178 	else
3179 		count = args->count;
3180 
3181 	data = kmem_alloc(count, KM_SLEEP);
3182 
3183 	iov.iov_base = data;
3184 	iov.iov_len = count;
3185 	uio.uio_iov = &iov;
3186 	uio.uio_iovcnt = 1;
3187 	uio.uio_segflg = UIO_SYSSPACE;
3188 	uio.uio_extflg = UIO_COPY_CACHED;
3189 	uio.uio_loffset = (offset_t)args->cookie;
3190 	uio.uio_resid = count;
3191 
3192 	error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3193 
3194 #ifdef DEBUG
3195 	if (rfs3_do_post_op_attr) {
3196 		va.va_mask = AT_ALL;
3197 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3198 	} else
3199 		vap = NULL;
3200 #else
3201 	va.va_mask = AT_ALL;
3202 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3203 #endif
3204 
3205 	if (error) {
3206 		kmem_free(data, count);
3207 		goto out;
3208 	}
3209 
3210 	/*
3211 	 * If the count was not large enough to be able to guarantee
3212 	 * to be able to return at least one entry, then need to
3213 	 * check to see if NFS3ERR_TOOSMALL should be returned.
3214 	 */
3215 	if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3216 		/*
3217 		 * bufsize is used to keep track of the size of the response.
3218 		 * It is primed with:
3219 		 *	1 for the status +
3220 		 *	1 for the dir_attributes.attributes boolean +
3221 		 *	2 for the cookie verifier
3222 		 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3223 		 * to bytes.  If there are directory attributes to be
3224 		 * returned, then:
3225 		 *	NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3226 		 * time BYTES_PER_XDR_UNIT is added to account for them.
3227 		 */
3228 		bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3229 		if (vap != NULL)
3230 			bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3231 		/*
3232 		 * An entry is composed of:
3233 		 *	1 for the true/false list indicator +
3234 		 *	2 for the fileid +
3235 		 *	1 for the length of the name +
3236 		 *	2 for the cookie +
3237 		 * all times BYTES_PER_XDR_UNIT to convert from
3238 		 * XDR units to bytes, plus the length of the name
3239 		 * rounded up to the nearest BYTES_PER_XDR_UNIT.
3240 		 */
3241 		if (count != uio.uio_resid) {
3242 			namlen = strlen(((struct dirent64 *)data)->d_name);
3243 			bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3244 			    roundup(namlen, BYTES_PER_XDR_UNIT);
3245 		}
3246 		/*
3247 		 * We need to check to see if the number of bytes left
3248 		 * to go into the buffer will actually fit into the
3249 		 * buffer.  This is calculated as the size of this
3250 		 * entry plus:
3251 		 *	1 for the true/false list indicator +
3252 		 *	1 for the eof indicator
3253 		 * times BYTES_PER_XDR_UNIT to convert from from
3254 		 * XDR units to bytes.
3255 		 */
3256 		bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3257 		if (bufsize > args->count) {
3258 			kmem_free(data, count);
3259 			resp->status = NFS3ERR_TOOSMALL;
3260 			goto out1;
3261 		}
3262 	}
3263 
3264 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3265 
3266 #if 0 /* notyet */
3267 	/*
3268 	 * Don't do this.  It causes local disk writes when just
3269 	 * reading the file and the overhead is deemed larger
3270 	 * than the benefit.
3271 	 */
3272 	/*
3273 	 * Force modified metadata out to stable storage.
3274 	 */
3275 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3276 #endif
3277 
3278 	resp->status = NFS3_OK;
3279 	vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3280 	resp->resok.cookieverf = 0;
3281 	resp->resok.reply.entries = (entry3 *)data;
3282 	resp->resok.reply.eof = iseof;
3283 	resp->resok.size = count - uio.uio_resid;
3284 	resp->resok.count = args->count;
3285 	resp->resok.freecount = count;
3286 
3287 	DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3288 	    cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3289 
3290 	VN_RELE(vp);
3291 
3292 	return;
3293 
3294 out:
3295 	if (curthread->t_flag & T_WOULDBLOCK) {
3296 		curthread->t_flag &= ~T_WOULDBLOCK;
3297 		resp->status = NFS3ERR_JUKEBOX;
3298 	} else
3299 		resp->status = puterrno3(error);
3300 out1:
3301 	DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3302 	    cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3303 
3304 	if (vp != NULL) {
3305 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3306 		VN_RELE(vp);
3307 	}
3308 	vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3309 }
3310 
3311 void *
3312 rfs3_readdir_getfh(READDIR3args *args)
3313 {
3314 
3315 	return (&args->dir);
3316 }
3317 
3318 void
3319 rfs3_readdir_free(READDIR3res *resp)
3320 {
3321 
3322 	if (resp->status == NFS3_OK)
3323 		kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3324 }
3325 
3326 #ifdef nextdp
3327 #undef nextdp
3328 #endif
3329 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3330 
3331 /*
3332  * This macro computes the size of a response which contains
3333  * one directory entry including the attributes as well as file handle.
3334  * If the incoming request is larger than this, then we are guaranteed to be
3335  * able to return at least one more directory entry if one exists.
3336  *
3337  * NFS3_READDIRPLUS_ENTRY is made up of the following:
3338  *
3339  * boolean - 1 * BYTES_PER_XDR_UNIT
3340  * file id - 2 * BYTES_PER_XDR_UNIT
3341  * directory name length - 1 * BYTES_PER_XDR_UNIT
3342  * cookie - 2 * BYTES_PER_XDR_UNIT
3343  * attribute flag - 1 * BYTES_PER_XDR_UNIT
3344  * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3345  * status byte for file handle - 1 *  BYTES_PER_XDR_UNIT
3346  * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3347  * Maximum length of a file handle (NFS3_MAXFHSIZE)
3348  * name length of the entry to the nearest bytes
3349  */
3350 #define	NFS3_READDIRPLUS_ENTRY(namelen)	\
3351 	((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3352 		BYTES_PER_XDR_UNIT + \
3353 	NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3354 
3355 static int rfs3_readdir_unit = MAXBSIZE;
3356 
3357 /* ARGSUSED */
3358 void
3359 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3360 	struct exportinfo *exi, struct svc_req *req, cred_t *cr)
3361 {
3362 	int error;
3363 	vnode_t *vp;
3364 	struct vattr *vap;
3365 	struct vattr va;
3366 	struct iovec iov;
3367 	struct uio uio;
3368 	char *data;
3369 	int iseof;
3370 	struct dirent64 *dp;
3371 	vnode_t *nvp;
3372 	struct vattr *nvap;
3373 	struct vattr nva;
3374 	entryplus3_info *infop = NULL;
3375 	int size = 0;
3376 	int nents = 0;
3377 	int bufsize = 0;
3378 	int entrysize = 0;
3379 	int tofit = 0;
3380 	int rd_unit = rfs3_readdir_unit;
3381 	int prev_len;
3382 	int space_left;
3383 	int i;
3384 	uint_t *namlen = NULL;
3385 
3386 	vap = NULL;
3387 
3388 	vp = nfs3_fhtovp(&args->dir, exi);
3389 
3390 	DTRACE_NFSV3_4(op__readdirplus__start, struct svc_req *, req,
3391 	    cred_t *, cr, vnode_t *, vp, READDIRPLUS3args *, args);
3392 
3393 	if (vp == NULL) {
3394 		error = ESTALE;
3395 		goto out;
3396 	}
3397 
3398 	if (is_system_labeled()) {
3399 		bslabel_t *clabel = req->rq_label;
3400 
3401 		ASSERT(clabel != NULL);
3402 		DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3403 		    char *, "got client label from request(1)",
3404 		    struct svc_req *, req);
3405 
3406 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3407 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK)) {
3408 				resp->status = NFS3ERR_ACCES;
3409 				goto out1;
3410 			}
3411 		}
3412 	}
3413 
3414 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3415 
3416 #ifdef DEBUG
3417 	if (rfs3_do_pre_op_attr) {
3418 		va.va_mask = AT_ALL;
3419 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3420 	} else
3421 		vap = NULL;
3422 #else
3423 	va.va_mask = AT_ALL;
3424 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3425 #endif
3426 
3427 	if (vp->v_type != VDIR) {
3428 		error = ENOTDIR;
3429 		goto out;
3430 	}
3431 
3432 	error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3433 	if (error)
3434 		goto out;
3435 
3436 	/*
3437 	 * Don't allow arbitrary counts for allocation
3438 	 */
3439 	if (args->maxcount > rfs3_tsize(req))
3440 		args->maxcount = rfs3_tsize(req);
3441 
3442 	/*
3443 	 * Make sure that there is room to read at least one entry
3444 	 * if any are available
3445 	 */
3446 	args->dircount = MIN(args->dircount, args->maxcount);
3447 
3448 	if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3449 		args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3450 
3451 	/*
3452 	 * This allocation relies on a minimum directory entry
3453 	 * being roughly 24 bytes.  Therefore, the namlen array
3454 	 * will have enough space based on the maximum number of
3455 	 * entries to read.
3456 	 */
3457 	namlen = kmem_alloc(args->dircount, KM_SLEEP);
3458 
3459 	space_left = args->dircount;
3460 	data = kmem_alloc(args->dircount, KM_SLEEP);
3461 	dp = (struct dirent64 *)data;
3462 	uio.uio_iov = &iov;
3463 	uio.uio_iovcnt = 1;
3464 	uio.uio_segflg = UIO_SYSSPACE;
3465 	uio.uio_extflg = UIO_COPY_CACHED;
3466 	uio.uio_loffset = (offset_t)args->cookie;
3467 
3468 	/*
3469 	 * bufsize is used to keep track of the size of the response as we
3470 	 * get post op attributes and filehandles for each entry.  This is
3471 	 * an optimization as the server may have read more entries than will
3472 	 * fit in the buffer specified by maxcount.  We stop calculating
3473 	 * post op attributes and filehandles once we have exceeded maxcount.
3474 	 * This will minimize the effect of truncation.
3475 	 *
3476 	 * It is primed with:
3477 	 *	1 for the status +
3478 	 *	1 for the dir_attributes.attributes boolean +
3479 	 *	2 for the cookie verifier
3480 	 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3481 	 * to bytes.  If there are directory attributes to be
3482 	 * returned, then:
3483 	 *	NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3484 	 * time BYTES_PER_XDR_UNIT is added to account for them.
3485 	 */
3486 	bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3487 	if (vap != NULL)
3488 		bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3489 
3490 getmoredents:
3491 	/*
3492 	 * Here we make a check so that our read unit is not larger than
3493 	 * the space left in the buffer.
3494 	 */
3495 	rd_unit = MIN(rd_unit, space_left);
3496 	iov.iov_base = (char *)dp;
3497 	iov.iov_len = rd_unit;
3498 	uio.uio_resid = rd_unit;
3499 	prev_len = rd_unit;
3500 
3501 	error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3502 
3503 	if (error) {
3504 		kmem_free(data, args->dircount);
3505 		goto out;
3506 	}
3507 
3508 	if (uio.uio_resid == prev_len && !iseof) {
3509 		if (nents == 0) {
3510 			kmem_free(data, args->dircount);
3511 			resp->status = NFS3ERR_TOOSMALL;
3512 			goto out1;
3513 		}
3514 
3515 		/*
3516 		 * We could not get any more entries, so get the attributes
3517 		 * and filehandle for the entries already obtained.
3518 		 */
3519 		goto good;
3520 	}
3521 
3522 	/*
3523 	 * We estimate the size of the response by assuming the
3524 	 * entry exists and attributes and filehandle are also valid
3525 	 */
3526 	for (size = prev_len - uio.uio_resid;
3527 	    size > 0;
3528 	    size -= dp->d_reclen, dp = nextdp(dp)) {
3529 
3530 		if (dp->d_ino == 0) {
3531 			nents++;
3532 			continue;
3533 		}
3534 
3535 		namlen[nents] = strlen(dp->d_name);
3536 		entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3537 
3538 		/*
3539 		 * We need to check to see if the number of bytes left
3540 		 * to go into the buffer will actually fit into the
3541 		 * buffer.  This is calculated as the size of this
3542 		 * entry plus:
3543 		 *	1 for the true/false list indicator +
3544 		 *	1 for the eof indicator
3545 		 * times BYTES_PER_XDR_UNIT to convert from XDR units
3546 		 * to bytes.
3547 		 *
3548 		 * Also check the dircount limit against the first entry read
3549 		 *
3550 		 */
3551 		tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3552 		if (bufsize + tofit > args->maxcount) {
3553 			/*
3554 			 * We make a check here to see if this was the
3555 			 * first entry being measured.  If so, then maxcount
3556 			 * was too small to begin with and so we need to
3557 			 * return with NFS3ERR_TOOSMALL.
3558 			 */
3559 			if (nents == 0) {
3560 				kmem_free(data, args->dircount);
3561 				resp->status = NFS3ERR_TOOSMALL;
3562 				goto out1;
3563 			}
3564 			iseof = FALSE;
3565 			goto good;
3566 		}
3567 		bufsize += entrysize;
3568 		nents++;
3569 	}
3570 
3571 	/*
3572 	 * If there is enough room to fit at least 1 more entry including
3573 	 * post op attributes and filehandle in the buffer AND that we haven't
3574 	 * exceeded dircount then go back and get some more.
3575 	 */
3576 	if (!iseof &&
3577 	    (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3578 		space_left -= (prev_len - uio.uio_resid);
3579 		if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3580 			goto getmoredents;
3581 
3582 		/* else, fall through */
3583 	}
3584 
3585 good:
3586 
3587 #ifdef DEBUG
3588 	if (rfs3_do_post_op_attr) {
3589 		va.va_mask = AT_ALL;
3590 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3591 	} else
3592 		vap = NULL;
3593 #else
3594 	va.va_mask = AT_ALL;
3595 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3596 #endif
3597 
3598 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3599 
3600 	infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3601 	resp->resok.infop = infop;
3602 
3603 	dp = (struct dirent64 *)data;
3604 	for (i = 0; i < nents; i++) {
3605 
3606 		if (dp->d_ino == 0) {
3607 			infop[i].attr.attributes = FALSE;
3608 			infop[i].fh.handle_follows = FALSE;
3609 			dp = nextdp(dp);
3610 			continue;
3611 		}
3612 
3613 		infop[i].namelen = namlen[i];
3614 
3615 		error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3616 		    NULL, NULL, NULL);
3617 		if (error) {
3618 			infop[i].attr.attributes = FALSE;
3619 			infop[i].fh.handle_follows = FALSE;
3620 			dp = nextdp(dp);
3621 			continue;
3622 		}
3623 
3624 #ifdef DEBUG
3625 		if (rfs3_do_post_op_attr) {
3626 			nva.va_mask = AT_ALL;
3627 			nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ?
3628 			    NULL : &nva;
3629 		} else
3630 			nvap = NULL;
3631 #else
3632 		nva.va_mask = AT_ALL;
3633 		nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3634 #endif
3635 		vattr_to_post_op_attr(nvap, &infop[i].attr);
3636 
3637 #ifdef DEBUG
3638 		if (!rfs3_do_post_op_fh3)
3639 			infop[i].fh.handle_follows = FALSE;
3640 		else {
3641 #endif
3642 		error = makefh3(&infop[i].fh.handle, nvp, exi);
3643 		if (!error)
3644 			infop[i].fh.handle_follows = TRUE;
3645 		else
3646 			infop[i].fh.handle_follows = FALSE;
3647 #ifdef DEBUG
3648 		}
3649 #endif
3650 
3651 		VN_RELE(nvp);
3652 		dp = nextdp(dp);
3653 	}
3654 
3655 #if 0 /* notyet */
3656 	/*
3657 	 * Don't do this.  It causes local disk writes when just
3658 	 * reading the file and the overhead is deemed larger
3659 	 * than the benefit.
3660 	 */
3661 	/*
3662 	 * Force modified metadata out to stable storage.
3663 	 */
3664 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3665 #endif
3666 
3667 	kmem_free(namlen, args->dircount);
3668 
3669 	resp->status = NFS3_OK;
3670 	vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3671 	resp->resok.cookieverf = 0;
3672 	resp->resok.reply.entries = (entryplus3 *)data;
3673 	resp->resok.reply.eof = iseof;
3674 	resp->resok.size = nents;
3675 	resp->resok.count = args->dircount;
3676 	resp->resok.maxcount = args->maxcount;
3677 
3678 	DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3679 	    cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3680 
3681 	VN_RELE(vp);
3682 
3683 	return;
3684 
3685 out:
3686 	if (curthread->t_flag & T_WOULDBLOCK) {
3687 		curthread->t_flag &= ~T_WOULDBLOCK;
3688 		resp->status = NFS3ERR_JUKEBOX;
3689 	} else
3690 		resp->status = puterrno3(error);
3691 out1:
3692 	DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3693 	    cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3694 
3695 	if (vp != NULL) {
3696 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3697 		VN_RELE(vp);
3698 	}
3699 
3700 	if (namlen != NULL)
3701 		kmem_free(namlen, args->dircount);
3702 
3703 	vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3704 }
3705 
3706 void *
3707 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3708 {
3709 
3710 	return (&args->dir);
3711 }
3712 
3713 void
3714 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3715 {
3716 
3717 	if (resp->status == NFS3_OK) {
3718 		kmem_free(resp->resok.reply.entries, resp->resok.count);
3719 		kmem_free(resp->resok.infop,
3720 		    resp->resok.size * sizeof (struct entryplus3_info));
3721 	}
3722 }
3723 
3724 /* ARGSUSED */
3725 void
3726 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3727 	struct svc_req *req, cred_t *cr)
3728 {
3729 	int error;
3730 	vnode_t *vp;
3731 	struct vattr *vap;
3732 	struct vattr va;
3733 	struct statvfs64 sb;
3734 
3735 	vap = NULL;
3736 
3737 	vp = nfs3_fhtovp(&args->fsroot, exi);
3738 
3739 	DTRACE_NFSV3_4(op__fsstat__start, struct svc_req *, req,
3740 	    cred_t *, cr, vnode_t *, vp, FSSTAT3args *, args);
3741 
3742 	if (vp == NULL) {
3743 		error = ESTALE;
3744 		goto out;
3745 	}
3746 
3747 	if (is_system_labeled()) {
3748 		bslabel_t *clabel = req->rq_label;
3749 
3750 		ASSERT(clabel != NULL);
3751 		DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3752 		    "got client label from request(1)", struct svc_req *, req);
3753 
3754 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3755 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK)) {
3756 				resp->status = NFS3ERR_ACCES;
3757 				goto out1;
3758 			}
3759 		}
3760 	}
3761 
3762 	error = VFS_STATVFS(vp->v_vfsp, &sb);
3763 
3764 #ifdef DEBUG
3765 	if (rfs3_do_post_op_attr) {
3766 		va.va_mask = AT_ALL;
3767 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3768 	} else
3769 		vap = NULL;
3770 #else
3771 	va.va_mask = AT_ALL;
3772 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3773 #endif
3774 
3775 	if (error)
3776 		goto out;
3777 
3778 	resp->status = NFS3_OK;
3779 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3780 	if (sb.f_blocks != (fsblkcnt64_t)-1)
3781 		resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3782 	else
3783 		resp->resok.tbytes = (size3)sb.f_blocks;
3784 	if (sb.f_bfree != (fsblkcnt64_t)-1)
3785 		resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3786 	else
3787 		resp->resok.fbytes = (size3)sb.f_bfree;
3788 	if (sb.f_bavail != (fsblkcnt64_t)-1)
3789 		resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3790 	else
3791 		resp->resok.abytes = (size3)sb.f_bavail;
3792 	resp->resok.tfiles = (size3)sb.f_files;
3793 	resp->resok.ffiles = (size3)sb.f_ffree;
3794 	resp->resok.afiles = (size3)sb.f_favail;
3795 	resp->resok.invarsec = 0;
3796 
3797 	DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3798 	    cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3799 	VN_RELE(vp);
3800 
3801 	return;
3802 
3803 out:
3804 	if (curthread->t_flag & T_WOULDBLOCK) {
3805 		curthread->t_flag &= ~T_WOULDBLOCK;
3806 		resp->status = NFS3ERR_JUKEBOX;
3807 	} else
3808 		resp->status = puterrno3(error);
3809 out1:
3810 	DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3811 	    cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3812 
3813 	if (vp != NULL)
3814 		VN_RELE(vp);
3815 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3816 }
3817 
3818 void *
3819 rfs3_fsstat_getfh(FSSTAT3args *args)
3820 {
3821 
3822 	return (&args->fsroot);
3823 }
3824 
3825 /* ARGSUSED */
3826 void
3827 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3828 	struct svc_req *req, cred_t *cr)
3829 {
3830 	vnode_t *vp;
3831 	struct vattr *vap;
3832 	struct vattr va;
3833 	uint32_t xfer_size;
3834 	ulong_t l = 0;
3835 	int error;
3836 
3837 	vp = nfs3_fhtovp(&args->fsroot, exi);
3838 
3839 	DTRACE_NFSV3_4(op__fsinfo__start, struct svc_req *, req,
3840 	    cred_t *, cr, vnode_t *, vp, FSINFO3args *, args);
3841 
3842 	if (vp == NULL) {
3843 		if (curthread->t_flag & T_WOULDBLOCK) {
3844 			curthread->t_flag &= ~T_WOULDBLOCK;
3845 			resp->status = NFS3ERR_JUKEBOX;
3846 		} else
3847 			resp->status = NFS3ERR_STALE;
3848 		vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3849 		goto out;
3850 	}
3851 
3852 	if (is_system_labeled()) {
3853 		bslabel_t *clabel = req->rq_label;
3854 
3855 		ASSERT(clabel != NULL);
3856 		DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3857 		    "got client label from request(1)", struct svc_req *, req);
3858 
3859 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3860 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK)) {
3861 				resp->status = NFS3ERR_STALE;
3862 				vattr_to_post_op_attr(NULL,
3863 				    &resp->resfail.obj_attributes);
3864 				goto out;
3865 			}
3866 		}
3867 	}
3868 
3869 #ifdef DEBUG
3870 	if (rfs3_do_post_op_attr) {
3871 		va.va_mask = AT_ALL;
3872 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3873 	} else
3874 		vap = NULL;
3875 #else
3876 	va.va_mask = AT_ALL;
3877 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3878 #endif
3879 
3880 	resp->status = NFS3_OK;
3881 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3882 	xfer_size = rfs3_tsize(req);
3883 	resp->resok.rtmax = xfer_size;
3884 	resp->resok.rtpref = xfer_size;
3885 	resp->resok.rtmult = DEV_BSIZE;
3886 	resp->resok.wtmax = xfer_size;
3887 	resp->resok.wtpref = xfer_size;
3888 	resp->resok.wtmult = DEV_BSIZE;
3889 	resp->resok.dtpref = MAXBSIZE;
3890 
3891 	/*
3892 	 * Large file spec: want maxfilesize based on limit of
3893 	 * underlying filesystem.  We can guess 2^31-1 if need be.
3894 	 */
3895 	error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3896 
3897 	if (!error && l != 0 && l <= 64)
3898 		resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3899 	else
3900 		resp->resok.maxfilesize = MAXOFF32_T;
3901 
3902 	resp->resok.time_delta.seconds = 0;
3903 	resp->resok.time_delta.nseconds = 1000;
3904 	resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3905 	    FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3906 
3907 	DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3908 	    cred_t *, cr, vnode_t *, vp, FSINFO3res *, resp);
3909 
3910 	VN_RELE(vp);
3911 
3912 	return;
3913 
3914 out:
3915 	DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3916 	    cred_t *, cr, vnode_t *, NULL, FSINFO3res *, resp);
3917 	if (vp != NULL)
3918 		VN_RELE(vp);
3919 }
3920 
3921 void *
3922 rfs3_fsinfo_getfh(FSINFO3args *args)
3923 {
3924 
3925 	return (&args->fsroot);
3926 }
3927 
3928 /* ARGSUSED */
3929 void
3930 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
3931 	struct svc_req *req, cred_t *cr)
3932 {
3933 	int error;
3934 	vnode_t *vp;
3935 	struct vattr *vap;
3936 	struct vattr va;
3937 	ulong_t val;
3938 
3939 	vap = NULL;
3940 
3941 	vp = nfs3_fhtovp(&args->object, exi);
3942 
3943 	DTRACE_NFSV3_4(op__pathconf__start, struct svc_req *, req,
3944 	    cred_t *, cr, vnode_t *, vp, PATHCONF3args *, args);
3945 
3946 	if (vp == NULL) {
3947 		error = ESTALE;
3948 		goto out;
3949 	}
3950 
3951 	if (is_system_labeled()) {
3952 		bslabel_t *clabel = req->rq_label;
3953 
3954 		ASSERT(clabel != NULL);
3955 		DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
3956 		    "got client label from request(1)", struct svc_req *, req);
3957 
3958 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3959 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK)) {
3960 				resp->status = NFS3ERR_ACCES;
3961 				goto out1;
3962 			}
3963 		}
3964 	}
3965 
3966 #ifdef DEBUG
3967 	if (rfs3_do_post_op_attr) {
3968 		va.va_mask = AT_ALL;
3969 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3970 	} else
3971 		vap = NULL;
3972 #else
3973 	va.va_mask = AT_ALL;
3974 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3975 #endif
3976 
3977 	error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
3978 	if (error)
3979 		goto out;
3980 	resp->resok.info.link_max = (uint32)val;
3981 
3982 	error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
3983 	if (error)
3984 		goto out;
3985 	resp->resok.info.name_max = (uint32)val;
3986 
3987 	error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
3988 	if (error)
3989 		goto out;
3990 	if (val == 1)
3991 		resp->resok.info.no_trunc = TRUE;
3992 	else
3993 		resp->resok.info.no_trunc = FALSE;
3994 
3995 	error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
3996 	if (error)
3997 		goto out;
3998 	if (val == 1)
3999 		resp->resok.info.chown_restricted = TRUE;
4000 	else
4001 		resp->resok.info.chown_restricted = FALSE;
4002 
4003 	resp->status = NFS3_OK;
4004 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
4005 	resp->resok.info.case_insensitive = FALSE;
4006 	resp->resok.info.case_preserving = TRUE;
4007 	DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
4008 	    cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
4009 	VN_RELE(vp);
4010 	return;
4011 
4012 out:
4013 	if (curthread->t_flag & T_WOULDBLOCK) {
4014 		curthread->t_flag &= ~T_WOULDBLOCK;
4015 		resp->status = NFS3ERR_JUKEBOX;
4016 	} else
4017 		resp->status = puterrno3(error);
4018 out1:
4019 	DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
4020 	    cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
4021 	if (vp != NULL)
4022 		VN_RELE(vp);
4023 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4024 }
4025 
4026 void *
4027 rfs3_pathconf_getfh(PATHCONF3args *args)
4028 {
4029 
4030 	return (&args->object);
4031 }
4032 
4033 void
4034 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4035 	struct svc_req *req, cred_t *cr)
4036 {
4037 	int error;
4038 	vnode_t *vp;
4039 	struct vattr *bvap;
4040 	struct vattr bva;
4041 	struct vattr *avap;
4042 	struct vattr ava;
4043 
4044 	bvap = NULL;
4045 	avap = NULL;
4046 
4047 	vp = nfs3_fhtovp(&args->file, exi);
4048 
4049 	DTRACE_NFSV3_4(op__commit__start, struct svc_req *, req,
4050 	    cred_t *, cr, vnode_t *, vp, COMMIT3args *, args);
4051 
4052 	if (vp == NULL) {
4053 		error = ESTALE;
4054 		goto out;
4055 	}
4056 
4057 	bva.va_mask = AT_ALL;
4058 	error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4059 
4060 	/*
4061 	 * If we can't get the attributes, then we can't do the
4062 	 * right access checking.  So, we'll fail the request.
4063 	 */
4064 	if (error)
4065 		goto out;
4066 
4067 #ifdef DEBUG
4068 	if (rfs3_do_pre_op_attr)
4069 		bvap = &bva;
4070 	else
4071 		bvap = NULL;
4072 #else
4073 	bvap = &bva;
4074 #endif
4075 
4076 	if (rdonly(exi, req)) {
4077 		resp->status = NFS3ERR_ROFS;
4078 		goto out1;
4079 	}
4080 
4081 	if (vp->v_type != VREG) {
4082 		resp->status = NFS3ERR_INVAL;
4083 		goto out1;
4084 	}
4085 
4086 	if (is_system_labeled()) {
4087 		bslabel_t *clabel = req->rq_label;
4088 
4089 		ASSERT(clabel != NULL);
4090 		DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4091 		    "got client label from request(1)", struct svc_req *, req);
4092 
4093 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
4094 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK)) {
4095 				resp->status = NFS3ERR_ACCES;
4096 				goto out1;
4097 			}
4098 		}
4099 	}
4100 
4101 	if (crgetuid(cr) != bva.va_uid &&
4102 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4103 		goto out;
4104 
4105 	error = VOP_PUTPAGE(vp, args->offset, args->count, 0, cr, NULL);
4106 	if (!error)
4107 		error = VOP_FSYNC(vp, FNODSYNC, cr, NULL);
4108 
4109 #ifdef DEBUG
4110 	if (rfs3_do_post_op_attr) {
4111 		ava.va_mask = AT_ALL;
4112 		avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4113 	} else
4114 		avap = NULL;
4115 #else
4116 	ava.va_mask = AT_ALL;
4117 	avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4118 #endif
4119 
4120 	if (error)
4121 		goto out;
4122 
4123 	resp->status = NFS3_OK;
4124 	vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4125 	resp->resok.verf = write3verf;
4126 
4127 	DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4128 	    cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4129 
4130 	VN_RELE(vp);
4131 
4132 	return;
4133 
4134 out:
4135 	if (curthread->t_flag & T_WOULDBLOCK) {
4136 		curthread->t_flag &= ~T_WOULDBLOCK;
4137 		resp->status = NFS3ERR_JUKEBOX;
4138 	} else
4139 		resp->status = puterrno3(error);
4140 out1:
4141 	DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4142 	    cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4143 
4144 	if (vp != NULL)
4145 		VN_RELE(vp);
4146 	vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4147 }
4148 
4149 void *
4150 rfs3_commit_getfh(COMMIT3args *args)
4151 {
4152 
4153 	return (&args->file);
4154 }
4155 
4156 static int
4157 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4158 {
4159 
4160 	vap->va_mask = 0;
4161 
4162 	if (sap->mode.set_it) {
4163 		vap->va_mode = (mode_t)sap->mode.mode;
4164 		vap->va_mask |= AT_MODE;
4165 	}
4166 	if (sap->uid.set_it) {
4167 		vap->va_uid = (uid_t)sap->uid.uid;
4168 		vap->va_mask |= AT_UID;
4169 	}
4170 	if (sap->gid.set_it) {
4171 		vap->va_gid = (gid_t)sap->gid.gid;
4172 		vap->va_mask |= AT_GID;
4173 	}
4174 	if (sap->size.set_it) {
4175 		if (sap->size.size > (size3)((u_longlong_t)-1))
4176 			return (EINVAL);
4177 		vap->va_size = sap->size.size;
4178 		vap->va_mask |= AT_SIZE;
4179 	}
4180 	if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4181 #ifndef _LP64
4182 		/* check time validity */
4183 		if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4184 			return (EOVERFLOW);
4185 #endif
4186 		/*
4187 		 * nfs protocol defines times as unsigned so don't extend sign,
4188 		 * unless sysadmin set nfs_allow_preepoch_time.
4189 		 */
4190 		NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4191 		    sap->atime.atime.seconds);
4192 		vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4193 		vap->va_mask |= AT_ATIME;
4194 	} else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4195 		gethrestime(&vap->va_atime);
4196 		vap->va_mask |= AT_ATIME;
4197 	}
4198 	if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4199 #ifndef _LP64
4200 		/* check time validity */
4201 		if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4202 			return (EOVERFLOW);
4203 #endif
4204 		/*
4205 		 * nfs protocol defines times as unsigned so don't extend sign,
4206 		 * unless sysadmin set nfs_allow_preepoch_time.
4207 		 */
4208 		NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4209 		    sap->mtime.mtime.seconds);
4210 		vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4211 		vap->va_mask |= AT_MTIME;
4212 	} else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4213 		gethrestime(&vap->va_mtime);
4214 		vap->va_mask |= AT_MTIME;
4215 	}
4216 
4217 	return (0);
4218 }
4219 
4220 static ftype3 vt_to_nf3[] = {
4221 	0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4222 };
4223 
4224 static int
4225 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4226 {
4227 
4228 	ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4229 	/* Return error if time or size overflow */
4230 	if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4231 		return (EOVERFLOW);
4232 	}
4233 	fap->type = vt_to_nf3[vap->va_type];
4234 	fap->mode = (mode3)(vap->va_mode & MODEMASK);
4235 	fap->nlink = (uint32)vap->va_nlink;
4236 	if (vap->va_uid == UID_NOBODY)
4237 		fap->uid = (uid3)NFS_UID_NOBODY;
4238 	else
4239 		fap->uid = (uid3)vap->va_uid;
4240 	if (vap->va_gid == GID_NOBODY)
4241 		fap->gid = (gid3)NFS_GID_NOBODY;
4242 	else
4243 		fap->gid = (gid3)vap->va_gid;
4244 	fap->size = (size3)vap->va_size;
4245 	fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4246 	fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4247 	fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4248 	fap->fsid = (uint64)vap->va_fsid;
4249 	fap->fileid = (fileid3)vap->va_nodeid;
4250 	fap->atime.seconds = vap->va_atime.tv_sec;
4251 	fap->atime.nseconds = vap->va_atime.tv_nsec;
4252 	fap->mtime.seconds = vap->va_mtime.tv_sec;
4253 	fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4254 	fap->ctime.seconds = vap->va_ctime.tv_sec;
4255 	fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4256 	return (0);
4257 }
4258 
4259 static int
4260 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4261 {
4262 
4263 	/* Return error if time or size overflow */
4264 	if (!  (NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4265 	    NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4266 	    NFS3_SIZE_OK(vap->va_size))) {
4267 		return (EOVERFLOW);
4268 	}
4269 	wccap->size = (size3)vap->va_size;
4270 	wccap->mtime.seconds = vap->va_mtime.tv_sec;
4271 	wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4272 	wccap->ctime.seconds = vap->va_ctime.tv_sec;
4273 	wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4274 	return (0);
4275 }
4276 
4277 static void
4278 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4279 {
4280 
4281 	/* don't return attrs if time overflow */
4282 	if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4283 		poap->attributes = TRUE;
4284 	} else
4285 		poap->attributes = FALSE;
4286 }
4287 
4288 void
4289 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4290 {
4291 
4292 	/* don't return attrs if time overflow */
4293 	if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4294 		poap->attributes = TRUE;
4295 	} else
4296 		poap->attributes = FALSE;
4297 }
4298 
4299 static void
4300 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4301 {
4302 
4303 	vattr_to_pre_op_attr(bvap, &wccp->before);
4304 	vattr_to_post_op_attr(avap, &wccp->after);
4305 }
4306 
4307 void
4308 rfs3_srvrinit(void)
4309 {
4310 	struct rfs3_verf_overlay {
4311 		uint_t id; /* a "unique" identifier */
4312 		int ts; /* a unique timestamp */
4313 	} *verfp;
4314 	timestruc_t now;
4315 
4316 	/*
4317 	 * The following algorithm attempts to find a unique verifier
4318 	 * to be used as the write verifier returned from the server
4319 	 * to the client.  It is important that this verifier change
4320 	 * whenever the server reboots.  Of secondary importance, it
4321 	 * is important for the verifier to be unique between two
4322 	 * different servers.
4323 	 *
4324 	 * Thus, an attempt is made to use the system hostid and the
4325 	 * current time in seconds when the nfssrv kernel module is
4326 	 * loaded.  It is assumed that an NFS server will not be able
4327 	 * to boot and then to reboot in less than a second.  If the
4328 	 * hostid has not been set, then the current high resolution
4329 	 * time is used.  This will ensure different verifiers each
4330 	 * time the server reboots and minimize the chances that two
4331 	 * different servers will have the same verifier.
4332 	 */
4333 
4334 #ifndef	lint
4335 	/*
4336 	 * We ASSERT that this constant logic expression is
4337 	 * always true because in the past, it wasn't.
4338 	 */
4339 	ASSERT(sizeof (*verfp) <= sizeof (write3verf));
4340 #endif
4341 
4342 	gethrestime(&now);
4343 	verfp = (struct rfs3_verf_overlay *)&write3verf;
4344 	verfp->ts = (int)now.tv_sec;
4345 	verfp->id = (uint_t)nfs_atoi(hw_serial);
4346 
4347 	if (verfp->id == 0)
4348 		verfp->id = (uint_t)now.tv_nsec;
4349 
4350 	nfs3_srv_caller_id = fs_new_caller_id();
4351 
4352 }
4353 
4354 void
4355 rfs3_srvrfini(void)
4356 {
4357 	/* Nothing to do */
4358 }
4359