xref: /titanic_52/usr/src/uts/common/fs/nfs/nfs3_srv.c (revision 8b2e16e76f55405c78218b9f08c6aefaf13c9e24)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28 
29 #pragma ident	"%Z%%M%	%I%	%E% SMI"
30 
31 #include <sys/param.h>
32 #include <sys/types.h>
33 #include <sys/systm.h>
34 #include <sys/cred.h>
35 #include <sys/buf.h>
36 #include <sys/vfs.h>
37 #include <sys/vnode.h>
38 #include <sys/uio.h>
39 #include <sys/errno.h>
40 #include <sys/sysmacros.h>
41 #include <sys/statvfs.h>
42 #include <sys/kmem.h>
43 #include <sys/dirent.h>
44 #include <sys/cmn_err.h>
45 #include <sys/debug.h>
46 #include <sys/systeminfo.h>
47 #include <sys/flock.h>
48 #include <sys/nbmlock.h>
49 #include <sys/policy.h>
50 #include <sys/sdt.h>
51 
52 #include <rpc/types.h>
53 #include <rpc/auth.h>
54 #include <rpc/svc.h>
55 
56 #include <nfs/nfs.h>
57 #include <nfs/export.h>
58 
59 #include <sys/strsubr.h>
60 
61 #include <sys/tsol/label.h>
62 #include <sys/tsol/tndb.h>
63 
64 #include <inet/ip.h>
65 #include <inet/ip6.h>
66 
67 /*
68  * These are the interface routines for the server side of the
69  * Network File System.  See the NFS version 3 protocol specification
70  * for a description of this interface.
71  */
72 
73 #ifdef DEBUG
74 int rfs3_do_pre_op_attr = 1;
75 int rfs3_do_post_op_attr = 1;
76 int rfs3_do_post_op_fh3 = 1;
77 #endif
78 
79 static writeverf3 write3verf;
80 
81 static int	sattr3_to_vattr(sattr3 *, struct vattr *);
82 static int	vattr_to_fattr3(struct vattr *, fattr3 *);
83 static int	vattr_to_wcc_attr(struct vattr *, wcc_attr *);
84 static void	vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
85 static void	vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
86 
87 u_longlong_t nfs3_srv_caller_id;
88 
89 /* ARGSUSED */
90 void
91 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
92 	struct svc_req *req, cred_t *cr)
93 {
94 	int error;
95 	vnode_t *vp;
96 	struct vattr va;
97 
98 	vp = nfs3_fhtovp(&args->object, exi);
99 
100 	DTRACE_NFSV3_4(op__getattr__start, struct svc_req *, req,
101 	    cred_t *, cr, vnode_t *, vp, GETATTR3args *, args);
102 
103 	if (vp == NULL) {
104 		error = ESTALE;
105 		goto out;
106 	}
107 
108 	va.va_mask = AT_ALL;
109 	error = rfs4_delegated_getattr(vp, &va, 0, cr);
110 
111 	if (!error) {
112 		/* overflow error if time or size is out of range */
113 		error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
114 		if (error)
115 			goto out;
116 		resp->status = NFS3_OK;
117 
118 		DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
119 		    cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
120 
121 		VN_RELE(vp);
122 
123 		return;
124 	}
125 
126 out:
127 	if (curthread->t_flag & T_WOULDBLOCK) {
128 		curthread->t_flag &= ~T_WOULDBLOCK;
129 		resp->status = NFS3ERR_JUKEBOX;
130 	} else
131 		resp->status = puterrno3(error);
132 
133 	DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
134 	    cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
135 
136 	if (vp != NULL)
137 		VN_RELE(vp);
138 }
139 
140 void *
141 rfs3_getattr_getfh(GETATTR3args *args)
142 {
143 
144 	return (&args->object);
145 }
146 
147 void
148 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
149 	struct svc_req *req, cred_t *cr)
150 {
151 	int error;
152 	vnode_t *vp;
153 	struct vattr *bvap;
154 	struct vattr bva;
155 	struct vattr *avap;
156 	struct vattr ava;
157 	int flag;
158 	int in_crit = 0;
159 	struct flock64 bf;
160 	caller_context_t ct;
161 
162 	bvap = NULL;
163 	avap = NULL;
164 
165 	vp = nfs3_fhtovp(&args->object, exi);
166 
167 	DTRACE_NFSV3_4(op__setattr__start, struct svc_req *, req,
168 	    cred_t *, cr, vnode_t *, vp, SETATTR3args *, args);
169 
170 	if (vp == NULL) {
171 		error = ESTALE;
172 		goto out;
173 	}
174 
175 	error = sattr3_to_vattr(&args->new_attributes, &ava);
176 	if (error)
177 		goto out;
178 
179 	if (is_system_labeled()) {
180 		bslabel_t *clabel = req->rq_label;
181 
182 		ASSERT(clabel != NULL);
183 		DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
184 		    "got client label from request(1)", struct svc_req *, req);
185 
186 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
187 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK)) {
188 				resp->status = NFS3ERR_ACCES;
189 				goto out1;
190 			}
191 		}
192 	}
193 
194 	/*
195 	 * We need to specially handle size changes because of
196 	 * possible conflicting NBMAND locks. Get into critical
197 	 * region before VOP_GETATTR, so the size attribute is
198 	 * valid when checking conflicts.
199 	 *
200 	 * Also, check to see if the v4 side of the server has
201 	 * delegated this file.  If so, then we return JUKEBOX to
202 	 * allow the client to retrasmit its request.
203 	 */
204 	if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
205 		if (nbl_need_check(vp)) {
206 			nbl_start_crit(vp, RW_READER);
207 			in_crit = 1;
208 		}
209 	}
210 
211 	bva.va_mask = AT_ALL;
212 	error = rfs4_delegated_getattr(vp, &bva, 0, cr);
213 
214 	/*
215 	 * If we can't get the attributes, then we can't do the
216 	 * right access checking.  So, we'll fail the request.
217 	 */
218 	if (error)
219 		goto out;
220 
221 #ifdef DEBUG
222 	if (rfs3_do_pre_op_attr)
223 		bvap = &bva;
224 #else
225 	bvap = &bva;
226 #endif
227 
228 	if (rdonly(exi, req) || vn_is_readonly(vp)) {
229 		resp->status = NFS3ERR_ROFS;
230 		goto out1;
231 	}
232 
233 	if (args->guard.check &&
234 	    (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
235 	    args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
236 		resp->status = NFS3ERR_NOT_SYNC;
237 		goto out1;
238 	}
239 
240 	if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
241 		flag = ATTR_UTIME;
242 	else
243 		flag = 0;
244 
245 	/*
246 	 * If the filesystem is exported with nosuid, then mask off
247 	 * the setuid and setgid bits.
248 	 */
249 	if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
250 	    (exi->exi_export.ex_flags & EX_NOSUID))
251 		ava.va_mode &= ~(VSUID | VSGID);
252 
253 	ct.cc_sysid = 0;
254 	ct.cc_pid = 0;
255 	ct.cc_caller_id = nfs3_srv_caller_id;
256 	ct.cc_flags = CC_DONTBLOCK;
257 
258 	/*
259 	 * We need to specially handle size changes because it is
260 	 * possible for the client to create a file with modes
261 	 * which indicate read-only, but with the file opened for
262 	 * writing.  If the client then tries to set the size of
263 	 * the file, then the normal access checking done in
264 	 * VOP_SETATTR would prevent the client from doing so,
265 	 * although it should be legal for it to do so.  To get
266 	 * around this, we do the access checking for ourselves
267 	 * and then use VOP_SPACE which doesn't do the access
268 	 * checking which VOP_SETATTR does. VOP_SPACE can only
269 	 * operate on VREG files, let VOP_SETATTR handle the other
270 	 * extremely rare cases.
271 	 * Also the client should not be allowed to change the
272 	 * size of the file if there is a conflicting non-blocking
273 	 * mandatory lock in the region the change.
274 	 */
275 	if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
276 		if (in_crit) {
277 			u_offset_t offset;
278 			ssize_t length;
279 
280 			if (ava.va_size < bva.va_size) {
281 				offset = ava.va_size;
282 				length = bva.va_size - ava.va_size;
283 			} else {
284 				offset = bva.va_size;
285 				length = ava.va_size - bva.va_size;
286 			}
287 			if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
288 			    NULL)) {
289 				error = EACCES;
290 				goto out;
291 			}
292 		}
293 
294 		if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
295 			ava.va_mask &= ~AT_SIZE;
296 			bf.l_type = F_WRLCK;
297 			bf.l_whence = 0;
298 			bf.l_start = (off64_t)ava.va_size;
299 			bf.l_len = 0;
300 			bf.l_sysid = 0;
301 			bf.l_pid = 0;
302 			error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
303 			    (offset_t)ava.va_size, cr, &ct);
304 		}
305 	}
306 
307 	if (!error && ava.va_mask)
308 		error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
309 
310 	/* check if a monitor detected a delegation conflict */
311 	if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
312 		resp->status = NFS3ERR_JUKEBOX;
313 		goto out1;
314 	}
315 
316 #ifdef DEBUG
317 	if (rfs3_do_post_op_attr) {
318 		ava.va_mask = AT_ALL;
319 		avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
320 	} else
321 		avap = NULL;
322 #else
323 	ava.va_mask = AT_ALL;
324 	avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
325 #endif
326 
327 	/*
328 	 * Force modified metadata out to stable storage.
329 	 */
330 	(void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
331 
332 	if (error)
333 		goto out;
334 
335 	if (in_crit)
336 		nbl_end_crit(vp);
337 
338 	resp->status = NFS3_OK;
339 	vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
340 
341 	DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
342 	    cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
343 
344 	VN_RELE(vp);
345 
346 	return;
347 
348 out:
349 	if (curthread->t_flag & T_WOULDBLOCK) {
350 		curthread->t_flag &= ~T_WOULDBLOCK;
351 		resp->status = NFS3ERR_JUKEBOX;
352 	} else
353 		resp->status = puterrno3(error);
354 out1:
355 	DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
356 	    cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
357 
358 	if (vp != NULL) {
359 		if (in_crit)
360 			nbl_end_crit(vp);
361 		VN_RELE(vp);
362 	}
363 	vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
364 }
365 
366 void *
367 rfs3_setattr_getfh(SETATTR3args *args)
368 {
369 
370 	return (&args->object);
371 }
372 
373 /* ARGSUSED */
374 void
375 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
376 	struct svc_req *req, cred_t *cr)
377 {
378 	int error;
379 	vnode_t *vp;
380 	vnode_t *dvp;
381 	struct vattr *vap;
382 	struct vattr va;
383 	struct vattr *dvap;
384 	struct vattr dva;
385 	nfs_fh3 *fhp;
386 	struct sec_ol sec = {0, 0};
387 	bool_t publicfh_flag = FALSE, auth_weak = FALSE;
388 
389 	dvap = NULL;
390 
391 	/*
392 	 * Allow lookups from the root - the default
393 	 * location of the public filehandle.
394 	 */
395 	if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
396 		dvp = rootdir;
397 		VN_HOLD(dvp);
398 
399 		DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
400 		    cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
401 	} else {
402 		dvp = nfs3_fhtovp(&args->what.dir, exi);
403 
404 		DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
405 		    cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
406 
407 		if (dvp == NULL) {
408 			error = ESTALE;
409 			goto out;
410 		}
411 	}
412 
413 #ifdef DEBUG
414 	if (rfs3_do_pre_op_attr) {
415 		dva.va_mask = AT_ALL;
416 		dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
417 	}
418 #else
419 	dva.va_mask = AT_ALL;
420 	dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
421 #endif
422 
423 	if (args->what.name == nfs3nametoolong) {
424 		resp->status = NFS3ERR_NAMETOOLONG;
425 		goto out1;
426 	}
427 
428 	if (args->what.name == NULL || *(args->what.name) == '\0') {
429 		resp->status = NFS3ERR_ACCES;
430 		goto out1;
431 	}
432 
433 	fhp = &args->what.dir;
434 	if (strcmp(args->what.name, "..") == 0 &&
435 	    EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
436 		resp->status = NFS3ERR_NOENT;
437 		goto out1;
438 	}
439 
440 	/*
441 	 * If the public filehandle is used then allow
442 	 * a multi-component lookup
443 	 */
444 	if (PUBLIC_FH3(&args->what.dir)) {
445 		publicfh_flag = TRUE;
446 		error = rfs_publicfh_mclookup(args->what.name, dvp, cr, &vp,
447 		    &exi, &sec);
448 		if (error && exi != NULL)
449 			exi_rele(exi); /* See comment below Re: publicfh_flag */
450 		/*
451 		 * Since WebNFS may bypass MOUNT, we need to ensure this
452 		 * request didn't come from an unlabeled admin_low client.
453 		 */
454 		if (is_system_labeled() && error == 0) {
455 			struct sockaddr *ca;
456 			int		addr_type;
457 			void		*ipaddr;
458 			tsol_tpc_t	*tp;
459 
460 			ca = (struct sockaddr *)svc_getrpccaller(
461 			    req->rq_xprt)->buf;
462 			if (ca->sa_family == AF_INET) {
463 				addr_type = IPV4_VERSION;
464 				ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
465 			} else if (ca->sa_family == AF_INET6) {
466 				addr_type = IPV6_VERSION;
467 				ipaddr = &((struct sockaddr_in6 *)
468 				    ca)->sin6_addr;
469 			}
470 			tp = find_tpc(ipaddr, addr_type, B_FALSE);
471 			if (tp == NULL || tp->tpc_tp.tp_doi !=
472 			    l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
473 			    SUN_CIPSO) {
474 				if (exi != NULL)
475 					exi_rele(exi);
476 				VN_RELE(vp);
477 				resp->status = NFS3ERR_ACCES;
478 				error = 1;
479 			}
480 			if (tp != NULL)
481 				TPC_RELE(tp);
482 		}
483 	} else {
484 		error = VOP_LOOKUP(dvp, args->what.name, &vp,
485 		    NULL, 0, NULL, cr, NULL, NULL, NULL);
486 	}
487 
488 	if (is_system_labeled() && error == 0) {
489 		bslabel_t *clabel = req->rq_label;
490 
491 		ASSERT(clabel != NULL);
492 		DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
493 		    "got client label from request(1)", struct svc_req *, req);
494 
495 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
496 			if (!do_rfs_label_check(clabel, dvp,
497 			    DOMINANCE_CHECK)) {
498 				if (publicfh_flag && exi != NULL)
499 					exi_rele(exi);
500 				VN_RELE(vp);
501 				resp->status = NFS3ERR_ACCES;
502 				error = 1;
503 			}
504 		}
505 	}
506 
507 #ifdef DEBUG
508 	if (rfs3_do_post_op_attr) {
509 		dva.va_mask = AT_ALL;
510 		dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
511 	} else
512 		dvap = NULL;
513 #else
514 	dva.va_mask = AT_ALL;
515 	dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
516 #endif
517 
518 	if (error)
519 		goto out;
520 
521 	if (sec.sec_flags & SEC_QUERY) {
522 		error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
523 	} else {
524 		error = makefh3(&resp->resok.object, vp, exi);
525 		if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
526 			auth_weak = TRUE;
527 	}
528 
529 	if (error) {
530 		VN_RELE(vp);
531 		goto out;
532 	}
533 
534 	/*
535 	 * If publicfh_flag is true then we have called rfs_publicfh_mclookup
536 	 * and have obtained a new exportinfo in exi which needs to be
537 	 * released. Note the the original exportinfo pointed to by exi
538 	 * will be released by the caller, common_dispatch.
539 	 */
540 	if (publicfh_flag)
541 		exi_rele(exi);
542 
543 #ifdef DEBUG
544 	if (rfs3_do_post_op_attr) {
545 		va.va_mask = AT_ALL;
546 		vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
547 	} else
548 		vap = NULL;
549 #else
550 	va.va_mask = AT_ALL;
551 	vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
552 #endif
553 
554 	VN_RELE(vp);
555 
556 	resp->status = NFS3_OK;
557 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
558 	vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
559 
560 	/*
561 	 * If it's public fh, no 0x81, and client's flavor is
562 	 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
563 	 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
564 	 */
565 	if (auth_weak)
566 		resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
567 
568 	DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
569 	    cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
570 	VN_RELE(dvp);
571 
572 	return;
573 
574 out:
575 	if (curthread->t_flag & T_WOULDBLOCK) {
576 		curthread->t_flag &= ~T_WOULDBLOCK;
577 		resp->status = NFS3ERR_JUKEBOX;
578 	} else
579 		resp->status = puterrno3(error);
580 out1:
581 	DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
582 	    cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
583 
584 	if (dvp != NULL)
585 		VN_RELE(dvp);
586 	vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
587 
588 }
589 
590 void *
591 rfs3_lookup_getfh(LOOKUP3args *args)
592 {
593 
594 	return (&args->what.dir);
595 }
596 
597 /* ARGSUSED */
598 void
599 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
600 	struct svc_req *req, cred_t *cr)
601 {
602 	int error;
603 	vnode_t *vp;
604 	struct vattr *vap;
605 	struct vattr va;
606 	int checkwriteperm;
607 	boolean_t dominant_label = B_FALSE;
608 	boolean_t equal_label = B_FALSE;
609 	boolean_t admin_low_client;
610 
611 	vap = NULL;
612 
613 	vp = nfs3_fhtovp(&args->object, exi);
614 
615 	DTRACE_NFSV3_4(op__access__start, struct svc_req *, req,
616 	    cred_t *, cr, vnode_t *, vp, ACCESS3args *, args);
617 
618 	if (vp == NULL) {
619 		error = ESTALE;
620 		goto out;
621 	}
622 
623 	/*
624 	 * If the file system is exported read only, it is not appropriate
625 	 * to check write permissions for regular files and directories.
626 	 * Special files are interpreted by the client, so the underlying
627 	 * permissions are sent back to the client for interpretation.
628 	 */
629 	if (rdonly(exi, req) && (vp->v_type == VREG || vp->v_type == VDIR))
630 		checkwriteperm = 0;
631 	else
632 		checkwriteperm = 1;
633 
634 	/*
635 	 * We need the mode so that we can correctly determine access
636 	 * permissions relative to a mandatory lock file.  Access to
637 	 * mandatory lock files is denied on the server, so it might
638 	 * as well be reflected to the server during the open.
639 	 */
640 	va.va_mask = AT_MODE;
641 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
642 	if (error)
643 		goto out;
644 
645 #ifdef DEBUG
646 	if (rfs3_do_post_op_attr)
647 		vap = &va;
648 #else
649 	vap = &va;
650 #endif
651 
652 	resp->resok.access = 0;
653 
654 	if (is_system_labeled()) {
655 		bslabel_t *clabel = req->rq_label;
656 
657 		ASSERT(clabel != NULL);
658 		DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
659 		    "got client label from request(1)", struct svc_req *, req);
660 
661 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
662 			if ((equal_label = do_rfs_label_check(clabel, vp,
663 			    EQUALITY_CHECK)) == B_FALSE) {
664 				dominant_label = do_rfs_label_check(clabel,
665 				    vp, DOMINANCE_CHECK);
666 			} else
667 				dominant_label = B_TRUE;
668 			admin_low_client = B_FALSE;
669 		} else
670 			admin_low_client = B_TRUE;
671 	}
672 
673 	if (args->access & ACCESS3_READ) {
674 		error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
675 		if (error) {
676 			if (curthread->t_flag & T_WOULDBLOCK)
677 				goto out;
678 		} else if (!MANDLOCK(vp, va.va_mode) &&
679 		    (!is_system_labeled() || admin_low_client ||
680 		    dominant_label))
681 			resp->resok.access |= ACCESS3_READ;
682 	}
683 	if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
684 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
685 		if (error) {
686 			if (curthread->t_flag & T_WOULDBLOCK)
687 				goto out;
688 		} else if (!is_system_labeled() || admin_low_client ||
689 		    dominant_label)
690 			resp->resok.access |= ACCESS3_LOOKUP;
691 	}
692 	if (checkwriteperm &&
693 	    (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
694 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
695 		if (error) {
696 			if (curthread->t_flag & T_WOULDBLOCK)
697 				goto out;
698 		} else if (!MANDLOCK(vp, va.va_mode) &&
699 		    (!is_system_labeled() || admin_low_client || equal_label)) {
700 			resp->resok.access |=
701 			    (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
702 		}
703 	}
704 	if (checkwriteperm &&
705 	    (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
706 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
707 		if (error) {
708 			if (curthread->t_flag & T_WOULDBLOCK)
709 				goto out;
710 		} else if (!is_system_labeled() || admin_low_client ||
711 		    equal_label)
712 			resp->resok.access |= ACCESS3_DELETE;
713 	}
714 	if (args->access & ACCESS3_EXECUTE) {
715 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
716 		if (error) {
717 			if (curthread->t_flag & T_WOULDBLOCK)
718 				goto out;
719 		} else if (!MANDLOCK(vp, va.va_mode) &&
720 		    (!is_system_labeled() || admin_low_client ||
721 		    dominant_label))
722 			resp->resok.access |= ACCESS3_EXECUTE;
723 	}
724 
725 #ifdef DEBUG
726 	if (rfs3_do_post_op_attr) {
727 		va.va_mask = AT_ALL;
728 		vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
729 	} else
730 		vap = NULL;
731 #else
732 	va.va_mask = AT_ALL;
733 	vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
734 #endif
735 
736 	resp->status = NFS3_OK;
737 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
738 
739 	DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
740 	    cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
741 
742 	VN_RELE(vp);
743 
744 	return;
745 
746 out:
747 	if (curthread->t_flag & T_WOULDBLOCK) {
748 		curthread->t_flag &= ~T_WOULDBLOCK;
749 		resp->status = NFS3ERR_JUKEBOX;
750 	} else
751 		resp->status = puterrno3(error);
752 	DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
753 	    cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
754 	if (vp != NULL)
755 		VN_RELE(vp);
756 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
757 }
758 
759 void *
760 rfs3_access_getfh(ACCESS3args *args)
761 {
762 
763 	return (&args->object);
764 }
765 
766 /* ARGSUSED */
767 void
768 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
769 	struct svc_req *req, cred_t *cr)
770 {
771 	int error;
772 	vnode_t *vp;
773 	struct vattr *vap;
774 	struct vattr va;
775 	struct iovec iov;
776 	struct uio uio;
777 	char *data;
778 
779 	vap = NULL;
780 
781 	vp = nfs3_fhtovp(&args->symlink, exi);
782 
783 	DTRACE_NFSV3_4(op__readlink__start, struct svc_req *, req,
784 	    cred_t *, cr, vnode_t *, vp, READLINK3args *, args);
785 
786 	if (vp == NULL) {
787 		error = ESTALE;
788 		goto out;
789 	}
790 
791 	va.va_mask = AT_ALL;
792 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
793 	if (error)
794 		goto out;
795 
796 #ifdef DEBUG
797 	if (rfs3_do_post_op_attr)
798 		vap = &va;
799 #else
800 	vap = &va;
801 #endif
802 
803 	if (vp->v_type != VLNK) {
804 		resp->status = NFS3ERR_INVAL;
805 		goto out1;
806 	}
807 
808 	if (MANDLOCK(vp, va.va_mode)) {
809 		resp->status = NFS3ERR_ACCES;
810 		goto out1;
811 	}
812 
813 	if (is_system_labeled()) {
814 		bslabel_t *clabel = req->rq_label;
815 
816 		ASSERT(clabel != NULL);
817 		DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
818 		    "got client label from request(1)", struct svc_req *, req);
819 
820 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
821 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK)) {
822 				resp->status = NFS3ERR_ACCES;
823 				goto out1;
824 			}
825 		}
826 	}
827 
828 	data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
829 
830 	iov.iov_base = data;
831 	iov.iov_len = MAXPATHLEN;
832 	uio.uio_iov = &iov;
833 	uio.uio_iovcnt = 1;
834 	uio.uio_segflg = UIO_SYSSPACE;
835 	uio.uio_extflg = UIO_COPY_CACHED;
836 	uio.uio_loffset = 0;
837 	uio.uio_resid = MAXPATHLEN;
838 
839 	error = VOP_READLINK(vp, &uio, cr, NULL);
840 
841 #ifdef DEBUG
842 	if (rfs3_do_post_op_attr) {
843 		va.va_mask = AT_ALL;
844 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
845 	} else
846 		vap = NULL;
847 #else
848 	va.va_mask = AT_ALL;
849 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
850 #endif
851 
852 #if 0 /* notyet */
853 	/*
854 	 * Don't do this.  It causes local disk writes when just
855 	 * reading the file and the overhead is deemed larger
856 	 * than the benefit.
857 	 */
858 	/*
859 	 * Force modified metadata out to stable storage.
860 	 */
861 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
862 #endif
863 
864 	if (error) {
865 		kmem_free(data, MAXPATHLEN + 1);
866 		goto out;
867 	}
868 
869 	resp->status = NFS3_OK;
870 	vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
871 	resp->resok.data = data;
872 	*(data + MAXPATHLEN - uio.uio_resid) = '\0';
873 
874 	DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
875 	    cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
876 	VN_RELE(vp);
877 
878 	return;
879 
880 out:
881 	if (curthread->t_flag & T_WOULDBLOCK) {
882 		curthread->t_flag &= ~T_WOULDBLOCK;
883 		resp->status = NFS3ERR_JUKEBOX;
884 	} else
885 		resp->status = puterrno3(error);
886 out1:
887 	DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
888 	    cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
889 	if (vp != NULL)
890 		VN_RELE(vp);
891 	vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
892 }
893 
894 void *
895 rfs3_readlink_getfh(READLINK3args *args)
896 {
897 
898 	return (&args->symlink);
899 }
900 
901 void
902 rfs3_readlink_free(READLINK3res *resp)
903 {
904 
905 	if (resp->status == NFS3_OK)
906 		kmem_free(resp->resok.data, MAXPATHLEN + 1);
907 }
908 
909 /* ARGSUSED */
910 void
911 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
912 	struct svc_req *req, cred_t *cr)
913 {
914 	int error;
915 	vnode_t *vp;
916 	struct vattr *vap;
917 	struct vattr va;
918 	struct iovec iov;
919 	struct uio uio;
920 	u_offset_t offset;
921 	mblk_t *mp;
922 	int alloc_err = 0;
923 	int in_crit = 0;
924 	int need_rwunlock = 0;
925 	caller_context_t ct;
926 
927 	vap = NULL;
928 
929 	vp = nfs3_fhtovp(&args->file, exi);
930 
931 	DTRACE_NFSV3_4(op__read__start, struct svc_req *, req,
932 	    cred_t *, cr, vnode_t *, vp, READ3args *, args);
933 
934 	if (vp == NULL) {
935 		error = ESTALE;
936 		goto out;
937 	}
938 
939 	if (is_system_labeled()) {
940 		bslabel_t *clabel = req->rq_label;
941 
942 		ASSERT(clabel != NULL);
943 		DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
944 		    "got client label from request(1)", struct svc_req *, req);
945 
946 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
947 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK)) {
948 				resp->status = NFS3ERR_ACCES;
949 				goto out1;
950 			}
951 		}
952 	}
953 
954 	ct.cc_sysid = 0;
955 	ct.cc_pid = 0;
956 	ct.cc_caller_id = nfs3_srv_caller_id;
957 	ct.cc_flags = CC_DONTBLOCK;
958 
959 	/*
960 	 * Enter the critical region before calling VOP_RWLOCK
961 	 * to avoid a deadlock with write requests.
962 	 */
963 	if (nbl_need_check(vp)) {
964 		nbl_start_crit(vp, RW_READER);
965 		in_crit = 1;
966 		if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
967 		    NULL)) {
968 			error = EACCES;
969 			goto out;
970 		}
971 	}
972 
973 	error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
974 
975 	/* check if a monitor detected a delegation conflict */
976 	if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
977 		resp->status = NFS3ERR_JUKEBOX;
978 		goto out1;
979 	}
980 
981 	need_rwunlock = 1;
982 
983 	va.va_mask = AT_ALL;
984 	error = VOP_GETATTR(vp, &va, 0, cr, &ct);
985 
986 	/*
987 	 * If we can't get the attributes, then we can't do the
988 	 * right access checking.  So, we'll fail the request.
989 	 */
990 	if (error)
991 		goto out;
992 
993 #ifdef DEBUG
994 	if (rfs3_do_post_op_attr)
995 		vap = &va;
996 #else
997 	vap = &va;
998 #endif
999 
1000 	if (vp->v_type != VREG) {
1001 		resp->status = NFS3ERR_INVAL;
1002 		goto out1;
1003 	}
1004 
1005 	if (crgetuid(cr) != va.va_uid) {
1006 		error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1007 		if (error) {
1008 			if (curthread->t_flag & T_WOULDBLOCK)
1009 				goto out;
1010 			error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1011 			if (error)
1012 				goto out;
1013 		}
1014 	}
1015 
1016 	if (MANDLOCK(vp, va.va_mode)) {
1017 		resp->status = NFS3ERR_ACCES;
1018 		goto out1;
1019 	}
1020 
1021 	offset = args->offset;
1022 	if (offset >= va.va_size) {
1023 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1024 		if (in_crit)
1025 			nbl_end_crit(vp);
1026 		resp->status = NFS3_OK;
1027 		vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1028 		resp->resok.count = 0;
1029 		resp->resok.eof = TRUE;
1030 		resp->resok.data.data_len = 0;
1031 		resp->resok.data.data_val = NULL;
1032 		resp->resok.data.mp = NULL;
1033 		goto done;
1034 	}
1035 
1036 	if (args->count == 0) {
1037 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1038 		if (in_crit)
1039 			nbl_end_crit(vp);
1040 		resp->status = NFS3_OK;
1041 		vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1042 		resp->resok.count = 0;
1043 		resp->resok.eof = FALSE;
1044 		resp->resok.data.data_len = 0;
1045 		resp->resok.data.data_val = NULL;
1046 		resp->resok.data.mp = NULL;
1047 		goto done;
1048 	}
1049 
1050 	/*
1051 	 * do not allocate memory more the max. allowed
1052 	 * transfer size
1053 	 */
1054 	if (args->count > rfs3_tsize(req))
1055 		args->count = rfs3_tsize(req);
1056 
1057 	/*
1058 	 * mp will contain the data to be sent out in the read reply.
1059 	 * This will be freed after the reply has been sent out (by the
1060 	 * driver).
1061 	 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
1062 	 * that the call to xdrmblk_putmblk() never fails.
1063 	 */
1064 	mp = allocb_wait(RNDUP(args->count), BPRI_MED, STR_NOSIG, &alloc_err);
1065 	ASSERT(mp != NULL);
1066 	ASSERT(alloc_err == 0);
1067 
1068 	iov.iov_base = (caddr_t)mp->b_datap->db_base;
1069 	iov.iov_len = args->count;
1070 	uio.uio_iov = &iov;
1071 	uio.uio_iovcnt = 1;
1072 	uio.uio_segflg = UIO_SYSSPACE;
1073 	uio.uio_extflg = UIO_COPY_CACHED;
1074 	uio.uio_loffset = args->offset;
1075 	uio.uio_resid = args->count;
1076 
1077 	error = VOP_READ(vp, &uio, 0, cr, &ct);
1078 
1079 	if (error) {
1080 		freeb(mp);
1081 		/* check if a monitor detected a delegation conflict */
1082 		if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1083 			resp->status = NFS3ERR_JUKEBOX;
1084 			goto out1;
1085 		}
1086 		goto out;
1087 	}
1088 
1089 	va.va_mask = AT_ALL;
1090 	error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1091 
1092 #ifdef DEBUG
1093 	if (rfs3_do_post_op_attr) {
1094 		if (error)
1095 			vap = NULL;
1096 		else
1097 			vap = &va;
1098 	} else
1099 		vap = NULL;
1100 #else
1101 	if (error)
1102 		vap = NULL;
1103 	else
1104 		vap = &va;
1105 #endif
1106 
1107 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1108 
1109 #if 0 /* notyet */
1110 	/*
1111 	 * Don't do this.  It causes local disk writes when just
1112 	 * reading the file and the overhead is deemed larger
1113 	 * than the benefit.
1114 	 */
1115 	/*
1116 	 * Force modified metadata out to stable storage.
1117 	 */
1118 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1119 #endif
1120 
1121 	if (in_crit)
1122 		nbl_end_crit(vp);
1123 
1124 	resp->status = NFS3_OK;
1125 	vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1126 	resp->resok.count = args->count - uio.uio_resid;
1127 	if (!error && offset + resp->resok.count == va.va_size)
1128 		resp->resok.eof = TRUE;
1129 	else
1130 		resp->resok.eof = FALSE;
1131 	resp->resok.data.data_len = resp->resok.count;
1132 	resp->resok.data.data_val = (char *)mp->b_datap->db_base;
1133 
1134 	resp->resok.data.mp = mp;
1135 
1136 	resp->resok.size = (uint_t)args->count;
1137 
1138 done:
1139 	DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1140 	    cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1141 
1142 	VN_RELE(vp);
1143 
1144 	return;
1145 
1146 out:
1147 	if (curthread->t_flag & T_WOULDBLOCK) {
1148 		curthread->t_flag &= ~T_WOULDBLOCK;
1149 		resp->status = NFS3ERR_JUKEBOX;
1150 	} else
1151 		resp->status = puterrno3(error);
1152 out1:
1153 	DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1154 	    cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1155 
1156 	if (vp != NULL) {
1157 		if (need_rwunlock)
1158 			VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1159 		if (in_crit)
1160 			nbl_end_crit(vp);
1161 		VN_RELE(vp);
1162 	}
1163 	vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1164 }
1165 
1166 void
1167 rfs3_read_free(READ3res *resp)
1168 {
1169 	mblk_t *mp;
1170 
1171 	if (resp->status == NFS3_OK) {
1172 		mp = resp->resok.data.mp;
1173 		if (mp != NULL)
1174 			freeb(mp);
1175 	}
1176 }
1177 
1178 void *
1179 rfs3_read_getfh(READ3args *args)
1180 {
1181 
1182 	return (&args->file);
1183 }
1184 
1185 #define	MAX_IOVECS	12
1186 
1187 #ifdef DEBUG
1188 static int rfs3_write_hits = 0;
1189 static int rfs3_write_misses = 0;
1190 #endif
1191 
1192 void
1193 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1194 	struct svc_req *req, cred_t *cr)
1195 {
1196 	int error;
1197 	vnode_t *vp;
1198 	struct vattr *bvap = NULL;
1199 	struct vattr bva;
1200 	struct vattr *avap = NULL;
1201 	struct vattr ava;
1202 	u_offset_t rlimit;
1203 	struct uio uio;
1204 	struct iovec iov[MAX_IOVECS];
1205 	mblk_t *m;
1206 	struct iovec *iovp;
1207 	int iovcnt;
1208 	int ioflag;
1209 	cred_t *savecred;
1210 	int in_crit = 0;
1211 	int rwlock_ret = -1;
1212 	caller_context_t ct;
1213 
1214 	vp = nfs3_fhtovp(&args->file, exi);
1215 
1216 	DTRACE_NFSV3_4(op__write__start, struct svc_req *, req,
1217 	    cred_t *, cr, vnode_t *, vp, WRITE3args *, args);
1218 
1219 	if (vp == NULL) {
1220 		error = ESTALE;
1221 		goto err;
1222 	}
1223 
1224 	if (is_system_labeled()) {
1225 		bslabel_t *clabel = req->rq_label;
1226 
1227 		ASSERT(clabel != NULL);
1228 		DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1229 		    "got client label from request(1)", struct svc_req *, req);
1230 
1231 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1232 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK)) {
1233 				resp->status = NFS3ERR_ACCES;
1234 				goto err1;
1235 			}
1236 		}
1237 	}
1238 
1239 	ct.cc_sysid = 0;
1240 	ct.cc_pid = 0;
1241 	ct.cc_caller_id = nfs3_srv_caller_id;
1242 	ct.cc_flags = CC_DONTBLOCK;
1243 
1244 	/*
1245 	 * We have to enter the critical region before calling VOP_RWLOCK
1246 	 * to avoid a deadlock with ufs.
1247 	 */
1248 	if (nbl_need_check(vp)) {
1249 		nbl_start_crit(vp, RW_READER);
1250 		in_crit = 1;
1251 		if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1252 		    NULL)) {
1253 			error = EACCES;
1254 			goto err;
1255 		}
1256 	}
1257 
1258 	rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1259 
1260 	/* check if a monitor detected a delegation conflict */
1261 	if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1262 		resp->status = NFS3ERR_JUKEBOX;
1263 		rwlock_ret = -1;
1264 		goto err1;
1265 	}
1266 
1267 
1268 	bva.va_mask = AT_ALL;
1269 	error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1270 
1271 	/*
1272 	 * If we can't get the attributes, then we can't do the
1273 	 * right access checking.  So, we'll fail the request.
1274 	 */
1275 	if (error)
1276 		goto err;
1277 
1278 	bvap = &bva;
1279 #ifdef DEBUG
1280 	if (!rfs3_do_pre_op_attr)
1281 		bvap = NULL;
1282 #endif
1283 	avap = bvap;
1284 
1285 	if (args->count != args->data.data_len) {
1286 		resp->status = NFS3ERR_INVAL;
1287 		goto err1;
1288 	}
1289 
1290 	if (rdonly(exi, req)) {
1291 		resp->status = NFS3ERR_ROFS;
1292 		goto err1;
1293 	}
1294 
1295 	if (vp->v_type != VREG) {
1296 		resp->status = NFS3ERR_INVAL;
1297 		goto err1;
1298 	}
1299 
1300 	if (crgetuid(cr) != bva.va_uid &&
1301 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1302 		goto err;
1303 
1304 	if (MANDLOCK(vp, bva.va_mode)) {
1305 		resp->status = NFS3ERR_ACCES;
1306 		goto err1;
1307 	}
1308 
1309 	if (args->count == 0) {
1310 		resp->status = NFS3_OK;
1311 		vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1312 		resp->resok.count = 0;
1313 		resp->resok.committed = args->stable;
1314 		resp->resok.verf = write3verf;
1315 		goto out;
1316 	}
1317 
1318 	if (args->mblk != NULL) {
1319 		iovcnt = 0;
1320 		for (m = args->mblk; m != NULL; m = m->b_cont)
1321 			iovcnt++;
1322 		if (iovcnt <= MAX_IOVECS) {
1323 #ifdef DEBUG
1324 			rfs3_write_hits++;
1325 #endif
1326 			iovp = iov;
1327 		} else {
1328 #ifdef DEBUG
1329 			rfs3_write_misses++;
1330 #endif
1331 			iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1332 		}
1333 		mblk_to_iov(args->mblk, iovcnt, iovp);
1334 	} else {
1335 		iovcnt = 1;
1336 		iovp = iov;
1337 		iovp->iov_base = args->data.data_val;
1338 		iovp->iov_len = args->count;
1339 	}
1340 
1341 	uio.uio_iov = iovp;
1342 	uio.uio_iovcnt = iovcnt;
1343 
1344 	uio.uio_segflg = UIO_SYSSPACE;
1345 	uio.uio_extflg = UIO_COPY_DEFAULT;
1346 	uio.uio_loffset = args->offset;
1347 	uio.uio_resid = args->count;
1348 	uio.uio_llimit = curproc->p_fsz_ctl;
1349 	rlimit = uio.uio_llimit - args->offset;
1350 	if (rlimit < (u_offset_t)uio.uio_resid)
1351 		uio.uio_resid = (int)rlimit;
1352 
1353 	if (args->stable == UNSTABLE)
1354 		ioflag = 0;
1355 	else if (args->stable == FILE_SYNC)
1356 		ioflag = FSYNC;
1357 	else if (args->stable == DATA_SYNC)
1358 		ioflag = FDSYNC;
1359 	else {
1360 		if (iovp != iov)
1361 			kmem_free(iovp, sizeof (*iovp) * iovcnt);
1362 		resp->status = NFS3ERR_INVAL;
1363 		goto err1;
1364 	}
1365 
1366 	/*
1367 	 * We're changing creds because VM may fault and we need
1368 	 * the cred of the current thread to be used if quota
1369 	 * checking is enabled.
1370 	 */
1371 	savecred = curthread->t_cred;
1372 	curthread->t_cred = cr;
1373 	error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1374 	curthread->t_cred = savecred;
1375 
1376 	if (iovp != iov)
1377 		kmem_free(iovp, sizeof (*iovp) * iovcnt);
1378 
1379 	/* check if a monitor detected a delegation conflict */
1380 	if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1381 		resp->status = NFS3ERR_JUKEBOX;
1382 		goto err1;
1383 	}
1384 
1385 	ava.va_mask = AT_ALL;
1386 	avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1387 
1388 #ifdef DEBUG
1389 	if (!rfs3_do_post_op_attr)
1390 		avap = NULL;
1391 #endif
1392 
1393 	if (error)
1394 		goto err;
1395 
1396 	/*
1397 	 * If we were unable to get the V_WRITELOCK_TRUE, then we
1398 	 * may not have accurate after attrs, so check if
1399 	 * we have both attributes, they have a non-zero va_seq, and
1400 	 * va_seq has changed by exactly one,
1401 	 * if not, turn off the before attr.
1402 	 */
1403 	if (rwlock_ret != V_WRITELOCK_TRUE) {
1404 		if (bvap == NULL || avap == NULL ||
1405 		    bvap->va_seq == 0 || avap->va_seq == 0 ||
1406 		    avap->va_seq != (bvap->va_seq + 1)) {
1407 			bvap = NULL;
1408 		}
1409 	}
1410 
1411 	resp->status = NFS3_OK;
1412 	vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1413 	resp->resok.count = args->count - uio.uio_resid;
1414 	resp->resok.committed = args->stable;
1415 	resp->resok.verf = write3verf;
1416 	goto out;
1417 
1418 err:
1419 	if (curthread->t_flag & T_WOULDBLOCK) {
1420 		curthread->t_flag &= ~T_WOULDBLOCK;
1421 		resp->status = NFS3ERR_JUKEBOX;
1422 	} else
1423 		resp->status = puterrno3(error);
1424 err1:
1425 	vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1426 out:
1427 	DTRACE_NFSV3_4(op__write__done, struct svc_req *, req,
1428 	    cred_t *, cr, vnode_t *, vp, WRITE3res *, resp);
1429 
1430 	if (vp != NULL) {
1431 		if (rwlock_ret != -1)
1432 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1433 		if (in_crit)
1434 			nbl_end_crit(vp);
1435 		VN_RELE(vp);
1436 	}
1437 }
1438 
1439 void *
1440 rfs3_write_getfh(WRITE3args *args)
1441 {
1442 
1443 	return (&args->file);
1444 }
1445 
1446 void
1447 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1448 	struct svc_req *req, cred_t *cr)
1449 {
1450 	int error;
1451 	int in_crit = 0;
1452 	vnode_t *vp;
1453 	vnode_t *tvp = NULL;
1454 	vnode_t *dvp;
1455 	struct vattr *vap;
1456 	struct vattr va;
1457 	struct vattr *dbvap;
1458 	struct vattr dbva;
1459 	struct vattr *davap;
1460 	struct vattr dava;
1461 	enum vcexcl excl;
1462 	nfstime3 *mtime;
1463 	len_t reqsize;
1464 	bool_t trunc;
1465 
1466 	dbvap = NULL;
1467 	davap = NULL;
1468 
1469 	dvp = nfs3_fhtovp(&args->where.dir, exi);
1470 
1471 	DTRACE_NFSV3_4(op__create__start, struct svc_req *, req,
1472 	    cred_t *, cr, vnode_t *, dvp, CREATE3args *, args);
1473 
1474 	if (dvp == NULL) {
1475 		error = ESTALE;
1476 		goto out;
1477 	}
1478 
1479 #ifdef DEBUG
1480 	if (rfs3_do_pre_op_attr) {
1481 		dbva.va_mask = AT_ALL;
1482 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1483 	} else
1484 		dbvap = NULL;
1485 #else
1486 	dbva.va_mask = AT_ALL;
1487 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1488 #endif
1489 	davap = dbvap;
1490 
1491 	if (args->where.name == nfs3nametoolong) {
1492 		resp->status = NFS3ERR_NAMETOOLONG;
1493 		goto out1;
1494 	}
1495 
1496 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1497 		resp->status = NFS3ERR_ACCES;
1498 		goto out1;
1499 	}
1500 
1501 	if (rdonly(exi, req)) {
1502 		resp->status = NFS3ERR_ROFS;
1503 		goto out1;
1504 	}
1505 
1506 	if (is_system_labeled()) {
1507 		bslabel_t *clabel = req->rq_label;
1508 
1509 		ASSERT(clabel != NULL);
1510 		DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1511 		    "got client label from request(1)", struct svc_req *, req);
1512 
1513 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1514 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK)) {
1515 				resp->status = NFS3ERR_ACCES;
1516 				goto out1;
1517 			}
1518 		}
1519 	}
1520 
1521 	if (args->how.mode == EXCLUSIVE) {
1522 		va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1523 		va.va_type = VREG;
1524 		va.va_mode = (mode_t)0;
1525 		/*
1526 		 * Ensure no time overflows and that types match
1527 		 */
1528 		mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1529 		va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1530 		va.va_mtime.tv_nsec = mtime->nseconds;
1531 		excl = EXCL;
1532 	} else {
1533 		error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1534 		    &va);
1535 		if (error)
1536 			goto out;
1537 		va.va_mask |= AT_TYPE;
1538 		va.va_type = VREG;
1539 		if (args->how.mode == GUARDED)
1540 			excl = EXCL;
1541 		else {
1542 			excl = NONEXCL;
1543 
1544 			/*
1545 			 * During creation of file in non-exclusive mode
1546 			 * if size of file is being set then make sure
1547 			 * that if the file already exists that no conflicting
1548 			 * non-blocking mandatory locks exists in the region
1549 			 * being modified. If there are conflicting locks fail
1550 			 * the operation with EACCES.
1551 			 */
1552 			if (va.va_mask & AT_SIZE) {
1553 				struct vattr tva;
1554 
1555 				/*
1556 				 * Does file already exist?
1557 				 */
1558 				error = VOP_LOOKUP(dvp, args->where.name, &tvp,
1559 				    NULL, 0, NULL, cr, NULL, NULL, NULL);
1560 
1561 				/*
1562 				 * Check to see if the file has been delegated
1563 				 * to a v4 client.  If so, then begin recall of
1564 				 * the delegation and return JUKEBOX to allow
1565 				 * the client to retrasmit its request.
1566 				 */
1567 
1568 				trunc = va.va_size == 0;
1569 				if (!error &&
1570 				    rfs4_check_delegated(FWRITE, tvp, trunc)) {
1571 					resp->status = NFS3ERR_JUKEBOX;
1572 					goto out1;
1573 				}
1574 
1575 				/*
1576 				 * Check for NBMAND lock conflicts
1577 				 */
1578 				if (!error && nbl_need_check(tvp)) {
1579 					u_offset_t offset;
1580 					ssize_t len;
1581 
1582 					nbl_start_crit(tvp, RW_READER);
1583 					in_crit = 1;
1584 
1585 					tva.va_mask = AT_SIZE;
1586 					error = VOP_GETATTR(tvp, &tva, 0, cr,
1587 					    NULL);
1588 					/*
1589 					 * Can't check for conflicts, so return
1590 					 * error.
1591 					 */
1592 					if (error)
1593 						goto out;
1594 
1595 					offset = tva.va_size < va.va_size ?
1596 					    tva.va_size : va.va_size;
1597 					len = tva.va_size < va.va_size ?
1598 					    va.va_size - tva.va_size :
1599 					    tva.va_size - va.va_size;
1600 					if (nbl_conflict(tvp, NBL_WRITE,
1601 					    offset, len, 0, NULL)) {
1602 						error = EACCES;
1603 						goto out;
1604 					}
1605 				} else if (tvp) {
1606 					VN_RELE(tvp);
1607 					tvp = NULL;
1608 				}
1609 			}
1610 		}
1611 		if (va.va_mask & AT_SIZE)
1612 			reqsize = va.va_size;
1613 	}
1614 
1615 	/*
1616 	 * Must specify the mode.
1617 	 */
1618 	if (!(va.va_mask & AT_MODE)) {
1619 		resp->status = NFS3ERR_INVAL;
1620 		goto out1;
1621 	}
1622 
1623 	/*
1624 	 * If the filesystem is exported with nosuid, then mask off
1625 	 * the setuid and setgid bits.
1626 	 */
1627 	if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1628 		va.va_mode &= ~(VSUID | VSGID);
1629 
1630 tryagain:
1631 	/*
1632 	 * The file open mode used is VWRITE.  If the client needs
1633 	 * some other semantic, then it should do the access checking
1634 	 * itself.  It would have been nice to have the file open mode
1635 	 * passed as part of the arguments.
1636 	 */
1637 	error = VOP_CREATE(dvp, args->where.name, &va, excl, VWRITE,
1638 	    &vp, cr, 0, NULL, NULL);
1639 
1640 #ifdef DEBUG
1641 	if (rfs3_do_post_op_attr) {
1642 		dava.va_mask = AT_ALL;
1643 		davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1644 	} else
1645 		davap = NULL;
1646 #else
1647 	dava.va_mask = AT_ALL;
1648 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1649 #endif
1650 
1651 	if (error) {
1652 		/*
1653 		 * If we got something other than file already exists
1654 		 * then just return this error.  Otherwise, we got
1655 		 * EEXIST.  If we were doing a GUARDED create, then
1656 		 * just return this error.  Otherwise, we need to
1657 		 * make sure that this wasn't a duplicate of an
1658 		 * exclusive create request.
1659 		 *
1660 		 * The assumption is made that a non-exclusive create
1661 		 * request will never return EEXIST.
1662 		 */
1663 		if (error != EEXIST || args->how.mode == GUARDED)
1664 			goto out;
1665 		/*
1666 		 * Lookup the file so that we can get a vnode for it.
1667 		 */
1668 		error = VOP_LOOKUP(dvp, args->where.name, &vp, NULL, 0,
1669 		    NULL, cr, NULL, NULL, NULL);
1670 		if (error) {
1671 			/*
1672 			 * We couldn't find the file that we thought that
1673 			 * we just created.  So, we'll just try creating
1674 			 * it again.
1675 			 */
1676 			if (error == ENOENT)
1677 				goto tryagain;
1678 			goto out;
1679 		}
1680 
1681 		/*
1682 		 * If the file is delegated to a v4 client, go ahead
1683 		 * and initiate recall, this create is a hint that a
1684 		 * conflicting v3 open has occurred.
1685 		 */
1686 
1687 		if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1688 			VN_RELE(vp);
1689 			resp->status = NFS3ERR_JUKEBOX;
1690 			goto out1;
1691 		}
1692 
1693 		va.va_mask = AT_ALL;
1694 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1695 
1696 		mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1697 		/* % with INT32_MAX to prevent overflows */
1698 		if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1699 		    vap->va_mtime.tv_sec !=
1700 		    (mtime->seconds % INT32_MAX) ||
1701 		    vap->va_mtime.tv_nsec != mtime->nseconds)) {
1702 			VN_RELE(vp);
1703 			error = EEXIST;
1704 			goto out;
1705 		}
1706 	} else {
1707 
1708 		if ((args->how.mode == UNCHECKED ||
1709 		    args->how.mode == GUARDED) &&
1710 		    args->how.createhow3_u.obj_attributes.size.set_it &&
1711 		    va.va_size == 0)
1712 			trunc = TRUE;
1713 		else
1714 			trunc = FALSE;
1715 
1716 		if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1717 			VN_RELE(vp);
1718 			resp->status = NFS3ERR_JUKEBOX;
1719 			goto out1;
1720 		}
1721 
1722 		va.va_mask = AT_ALL;
1723 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1724 
1725 		/*
1726 		 * We need to check to make sure that the file got
1727 		 * created to the indicated size.  If not, we do a
1728 		 * setattr to try to change the size, but we don't
1729 		 * try too hard.  This shouldn't a problem as most
1730 		 * clients will only specifiy a size of zero which
1731 		 * local file systems handle.  However, even if
1732 		 * the client does specify a non-zero size, it can
1733 		 * still recover by checking the size of the file
1734 		 * after it has created it and then issue a setattr
1735 		 * request of its own to set the size of the file.
1736 		 */
1737 		if (vap != NULL &&
1738 		    (args->how.mode == UNCHECKED ||
1739 		    args->how.mode == GUARDED) &&
1740 		    args->how.createhow3_u.obj_attributes.size.set_it &&
1741 		    vap->va_size != reqsize) {
1742 			va.va_mask = AT_SIZE;
1743 			va.va_size = reqsize;
1744 			(void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1745 			va.va_mask = AT_ALL;
1746 			vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1747 		}
1748 	}
1749 
1750 #ifdef DEBUG
1751 	if (!rfs3_do_post_op_attr)
1752 		vap = NULL;
1753 #endif
1754 
1755 #ifdef DEBUG
1756 	if (!rfs3_do_post_op_fh3)
1757 		resp->resok.obj.handle_follows = FALSE;
1758 	else {
1759 #endif
1760 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1761 	if (error)
1762 		resp->resok.obj.handle_follows = FALSE;
1763 	else
1764 		resp->resok.obj.handle_follows = TRUE;
1765 #ifdef DEBUG
1766 	}
1767 #endif
1768 
1769 	/*
1770 	 * Force modified data and metadata out to stable storage.
1771 	 */
1772 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1773 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
1774 
1775 	VN_RELE(vp);
1776 	if (tvp != NULL) {
1777 		if (in_crit)
1778 			nbl_end_crit(tvp);
1779 		VN_RELE(tvp);
1780 	}
1781 
1782 	resp->status = NFS3_OK;
1783 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1784 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1785 
1786 	DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1787 	    cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1788 
1789 	VN_RELE(dvp);
1790 	return;
1791 
1792 out:
1793 	if (curthread->t_flag & T_WOULDBLOCK) {
1794 		curthread->t_flag &= ~T_WOULDBLOCK;
1795 		resp->status = NFS3ERR_JUKEBOX;
1796 	} else
1797 		resp->status = puterrno3(error);
1798 out1:
1799 	DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1800 	    cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1801 
1802 	if (tvp != NULL) {
1803 		if (in_crit)
1804 			nbl_end_crit(tvp);
1805 		VN_RELE(tvp);
1806 	}
1807 	if (dvp != NULL)
1808 		VN_RELE(dvp);
1809 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1810 }
1811 
1812 void *
1813 rfs3_create_getfh(CREATE3args *args)
1814 {
1815 
1816 	return (&args->where.dir);
1817 }
1818 
1819 void
1820 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1821 	struct svc_req *req, cred_t *cr)
1822 {
1823 	int error;
1824 	vnode_t *vp = NULL;
1825 	vnode_t *dvp;
1826 	struct vattr *vap;
1827 	struct vattr va;
1828 	struct vattr *dbvap;
1829 	struct vattr dbva;
1830 	struct vattr *davap;
1831 	struct vattr dava;
1832 
1833 	dbvap = NULL;
1834 	davap = NULL;
1835 
1836 	dvp = nfs3_fhtovp(&args->where.dir, exi);
1837 
1838 	DTRACE_NFSV3_4(op__mkdir__start, struct svc_req *, req,
1839 	    cred_t *, cr, vnode_t *, dvp, MKDIR3args *, args);
1840 
1841 	if (dvp == NULL) {
1842 		error = ESTALE;
1843 		goto out;
1844 	}
1845 
1846 #ifdef DEBUG
1847 	if (rfs3_do_pre_op_attr) {
1848 		dbva.va_mask = AT_ALL;
1849 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1850 	} else
1851 		dbvap = NULL;
1852 #else
1853 	dbva.va_mask = AT_ALL;
1854 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1855 #endif
1856 	davap = dbvap;
1857 
1858 	if (args->where.name == nfs3nametoolong) {
1859 		resp->status = NFS3ERR_NAMETOOLONG;
1860 		goto out1;
1861 	}
1862 
1863 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1864 		resp->status = NFS3ERR_ACCES;
1865 		goto out1;
1866 	}
1867 
1868 	if (rdonly(exi, req)) {
1869 		resp->status = NFS3ERR_ROFS;
1870 		goto out1;
1871 	}
1872 
1873 	if (is_system_labeled()) {
1874 		bslabel_t *clabel = req->rq_label;
1875 
1876 		ASSERT(clabel != NULL);
1877 		DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1878 		    "got client label from request(1)", struct svc_req *, req);
1879 
1880 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1881 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK)) {
1882 				resp->status = NFS3ERR_ACCES;
1883 				goto out1;
1884 			}
1885 		}
1886 	}
1887 
1888 	error = sattr3_to_vattr(&args->attributes, &va);
1889 	if (error)
1890 		goto out;
1891 
1892 	if (!(va.va_mask & AT_MODE)) {
1893 		resp->status = NFS3ERR_INVAL;
1894 		goto out1;
1895 	}
1896 
1897 	va.va_mask |= AT_TYPE;
1898 	va.va_type = VDIR;
1899 
1900 	error = VOP_MKDIR(dvp, args->where.name, &va, &vp, cr, NULL, 0, NULL);
1901 
1902 #ifdef DEBUG
1903 	if (rfs3_do_post_op_attr) {
1904 		dava.va_mask = AT_ALL;
1905 		davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1906 	} else
1907 		davap = NULL;
1908 #else
1909 	dava.va_mask = AT_ALL;
1910 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1911 #endif
1912 
1913 	/*
1914 	 * Force modified data and metadata out to stable storage.
1915 	 */
1916 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
1917 
1918 	if (error)
1919 		goto out;
1920 
1921 #ifdef DEBUG
1922 	if (!rfs3_do_post_op_fh3)
1923 		resp->resok.obj.handle_follows = FALSE;
1924 	else {
1925 #endif
1926 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1927 	if (error)
1928 		resp->resok.obj.handle_follows = FALSE;
1929 	else
1930 		resp->resok.obj.handle_follows = TRUE;
1931 #ifdef DEBUG
1932 	}
1933 #endif
1934 
1935 #ifdef DEBUG
1936 	if (rfs3_do_post_op_attr) {
1937 		va.va_mask = AT_ALL;
1938 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1939 	} else
1940 		vap = NULL;
1941 #else
1942 	va.va_mask = AT_ALL;
1943 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1944 #endif
1945 
1946 	/*
1947 	 * Force modified data and metadata out to stable storage.
1948 	 */
1949 	(void) VOP_FSYNC(vp, 0, cr, NULL);
1950 
1951 	VN_RELE(vp);
1952 
1953 	resp->status = NFS3_OK;
1954 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1955 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1956 
1957 	DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
1958 	    cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
1959 	VN_RELE(dvp);
1960 
1961 	return;
1962 
1963 out:
1964 	if (curthread->t_flag & T_WOULDBLOCK) {
1965 		curthread->t_flag &= ~T_WOULDBLOCK;
1966 		resp->status = NFS3ERR_JUKEBOX;
1967 	} else
1968 		resp->status = puterrno3(error);
1969 out1:
1970 	DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
1971 	    cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
1972 	if (dvp != NULL)
1973 		VN_RELE(dvp);
1974 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1975 }
1976 
1977 void *
1978 rfs3_mkdir_getfh(MKDIR3args *args)
1979 {
1980 
1981 	return (&args->where.dir);
1982 }
1983 
1984 void
1985 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
1986 	struct svc_req *req, cred_t *cr)
1987 {
1988 	int error;
1989 	vnode_t *vp;
1990 	vnode_t *dvp;
1991 	struct vattr *vap;
1992 	struct vattr va;
1993 	struct vattr *dbvap;
1994 	struct vattr dbva;
1995 	struct vattr *davap;
1996 	struct vattr dava;
1997 
1998 	dbvap = NULL;
1999 	davap = NULL;
2000 
2001 	dvp = nfs3_fhtovp(&args->where.dir, exi);
2002 
2003 	DTRACE_NFSV3_4(op__symlink__start, struct svc_req *, req,
2004 	    cred_t *, cr, vnode_t *, dvp, SYMLINK3args *, args);
2005 
2006 	if (dvp == NULL) {
2007 		error = ESTALE;
2008 		goto err;
2009 	}
2010 
2011 #ifdef DEBUG
2012 	if (rfs3_do_pre_op_attr) {
2013 		dbva.va_mask = AT_ALL;
2014 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2015 	} else
2016 		dbvap = NULL;
2017 #else
2018 	dbva.va_mask = AT_ALL;
2019 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2020 #endif
2021 	davap = dbvap;
2022 
2023 	if (args->where.name == nfs3nametoolong) {
2024 		resp->status = NFS3ERR_NAMETOOLONG;
2025 		goto err1;
2026 	}
2027 
2028 	if (args->where.name == NULL || *(args->where.name) == '\0') {
2029 		resp->status = NFS3ERR_ACCES;
2030 		goto err1;
2031 	}
2032 
2033 	if (rdonly(exi, req)) {
2034 		resp->status = NFS3ERR_ROFS;
2035 		goto err1;
2036 	}
2037 
2038 	if (is_system_labeled()) {
2039 		bslabel_t *clabel = req->rq_label;
2040 
2041 		ASSERT(clabel != NULL);
2042 		DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2043 		    "got client label from request(1)", struct svc_req *, req);
2044 
2045 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2046 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK)) {
2047 				resp->status = NFS3ERR_ACCES;
2048 				goto err1;
2049 			}
2050 		}
2051 	}
2052 
2053 	error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2054 	if (error)
2055 		goto err;
2056 
2057 	if (!(va.va_mask & AT_MODE)) {
2058 		resp->status = NFS3ERR_INVAL;
2059 		goto err1;
2060 	}
2061 
2062 	if (args->symlink.symlink_data == nfs3nametoolong) {
2063 		resp->status = NFS3ERR_NAMETOOLONG;
2064 		goto err1;
2065 	}
2066 
2067 	va.va_mask |= AT_TYPE;
2068 	va.va_type = VLNK;
2069 
2070 	error = VOP_SYMLINK(dvp, args->where.name, &va,
2071 	    args->symlink.symlink_data, cr, NULL, 0);
2072 
2073 #ifdef DEBUG
2074 	if (rfs3_do_post_op_attr) {
2075 		dava.va_mask = AT_ALL;
2076 		davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2077 	} else
2078 		davap = NULL;
2079 #else
2080 	dava.va_mask = AT_ALL;
2081 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2082 #endif
2083 
2084 	if (error)
2085 		goto err;
2086 
2087 	error = VOP_LOOKUP(dvp, args->where.name, &vp, NULL, 0, NULL, cr,
2088 	    NULL, NULL, NULL);
2089 
2090 	/*
2091 	 * Force modified data and metadata out to stable storage.
2092 	 */
2093 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
2094 
2095 
2096 	resp->status = NFS3_OK;
2097 	if (error) {
2098 		resp->resok.obj.handle_follows = FALSE;
2099 		vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2100 		vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2101 		goto out;
2102 	}
2103 
2104 #ifdef DEBUG
2105 	if (!rfs3_do_post_op_fh3)
2106 		resp->resok.obj.handle_follows = FALSE;
2107 	else {
2108 #endif
2109 	error = makefh3(&resp->resok.obj.handle, vp, exi);
2110 	if (error)
2111 		resp->resok.obj.handle_follows = FALSE;
2112 	else
2113 		resp->resok.obj.handle_follows = TRUE;
2114 #ifdef DEBUG
2115 	}
2116 #endif
2117 
2118 #ifdef DEBUG
2119 	if (rfs3_do_post_op_attr) {
2120 		va.va_mask = AT_ALL;
2121 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2122 	} else
2123 		vap = NULL;
2124 #else
2125 	va.va_mask = AT_ALL;
2126 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2127 #endif
2128 
2129 	/*
2130 	 * Force modified data and metadata out to stable storage.
2131 	 */
2132 	(void) VOP_FSYNC(vp, 0, cr, NULL);
2133 
2134 	VN_RELE(vp);
2135 
2136 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2137 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2138 	goto out;
2139 
2140 err:
2141 	if (curthread->t_flag & T_WOULDBLOCK) {
2142 		curthread->t_flag &= ~T_WOULDBLOCK;
2143 		resp->status = NFS3ERR_JUKEBOX;
2144 	} else
2145 		resp->status = puterrno3(error);
2146 err1:
2147 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2148 out:
2149 	DTRACE_NFSV3_4(op__symlink__done, struct svc_req *, req,
2150 	    cred_t *, cr, vnode_t *, dvp, SYMLINK3res *, resp);
2151 
2152 	if (dvp != NULL)
2153 		VN_RELE(dvp);
2154 }
2155 
2156 void *
2157 rfs3_symlink_getfh(SYMLINK3args *args)
2158 {
2159 
2160 	return (&args->where.dir);
2161 }
2162 
2163 void
2164 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2165 	struct svc_req *req, cred_t *cr)
2166 {
2167 	int error;
2168 	vnode_t *vp;
2169 	vnode_t *realvp;
2170 	vnode_t *dvp;
2171 	struct vattr *vap;
2172 	struct vattr va;
2173 	struct vattr *dbvap;
2174 	struct vattr dbva;
2175 	struct vattr *davap;
2176 	struct vattr dava;
2177 	int mode;
2178 	enum vcexcl excl;
2179 
2180 	dbvap = NULL;
2181 	davap = NULL;
2182 
2183 	dvp = nfs3_fhtovp(&args->where.dir, exi);
2184 
2185 	DTRACE_NFSV3_4(op__mknod__start, struct svc_req *, req,
2186 	    cred_t *, cr, vnode_t *, dvp, MKNOD3args *, args);
2187 
2188 	if (dvp == NULL) {
2189 		error = ESTALE;
2190 		goto out;
2191 	}
2192 
2193 #ifdef DEBUG
2194 	if (rfs3_do_pre_op_attr) {
2195 		dbva.va_mask = AT_ALL;
2196 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2197 	} else
2198 		dbvap = NULL;
2199 #else
2200 	dbva.va_mask = AT_ALL;
2201 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2202 #endif
2203 	davap = dbvap;
2204 
2205 	if (args->where.name == nfs3nametoolong) {
2206 		resp->status = NFS3ERR_NAMETOOLONG;
2207 		goto out1;
2208 	}
2209 
2210 	if (args->where.name == NULL || *(args->where.name) == '\0') {
2211 		resp->status = NFS3ERR_ACCES;
2212 		goto out1;
2213 	}
2214 
2215 	if (rdonly(exi, req)) {
2216 		resp->status = NFS3ERR_ROFS;
2217 		goto out1;
2218 	}
2219 
2220 	if (is_system_labeled()) {
2221 		bslabel_t *clabel = req->rq_label;
2222 
2223 		ASSERT(clabel != NULL);
2224 		DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2225 		    "got client label from request(1)", struct svc_req *, req);
2226 
2227 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2228 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK)) {
2229 				resp->status = NFS3ERR_ACCES;
2230 				goto out1;
2231 			}
2232 		}
2233 	}
2234 
2235 	switch (args->what.type) {
2236 	case NF3CHR:
2237 	case NF3BLK:
2238 		error = sattr3_to_vattr(
2239 		    &args->what.mknoddata3_u.device.dev_attributes, &va);
2240 		if (error)
2241 			goto out;
2242 		if (secpolicy_sys_devices(cr) != 0) {
2243 			resp->status = NFS3ERR_PERM;
2244 			goto out1;
2245 		}
2246 		if (args->what.type == NF3CHR)
2247 			va.va_type = VCHR;
2248 		else
2249 			va.va_type = VBLK;
2250 		va.va_rdev = makedevice(
2251 		    args->what.mknoddata3_u.device.spec.specdata1,
2252 		    args->what.mknoddata3_u.device.spec.specdata2);
2253 		va.va_mask |= AT_TYPE | AT_RDEV;
2254 		break;
2255 	case NF3SOCK:
2256 		error = sattr3_to_vattr(
2257 		    &args->what.mknoddata3_u.pipe_attributes, &va);
2258 		if (error)
2259 			goto out;
2260 		va.va_type = VSOCK;
2261 		va.va_mask |= AT_TYPE;
2262 		break;
2263 	case NF3FIFO:
2264 		error = sattr3_to_vattr(
2265 		    &args->what.mknoddata3_u.pipe_attributes, &va);
2266 		if (error)
2267 			goto out;
2268 		va.va_type = VFIFO;
2269 		va.va_mask |= AT_TYPE;
2270 		break;
2271 	default:
2272 		resp->status = NFS3ERR_BADTYPE;
2273 		goto out1;
2274 	}
2275 
2276 	/*
2277 	 * Must specify the mode.
2278 	 */
2279 	if (!(va.va_mask & AT_MODE)) {
2280 		resp->status = NFS3ERR_INVAL;
2281 		goto out1;
2282 	}
2283 
2284 	excl = EXCL;
2285 
2286 	mode = 0;
2287 
2288 	error = VOP_CREATE(dvp, args->where.name, &va, excl, mode,
2289 	    &vp, cr, 0, NULL, NULL);
2290 
2291 #ifdef DEBUG
2292 	if (rfs3_do_post_op_attr) {
2293 		dava.va_mask = AT_ALL;
2294 		davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2295 	} else
2296 		davap = NULL;
2297 #else
2298 	dava.va_mask = AT_ALL;
2299 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2300 #endif
2301 
2302 	/*
2303 	 * Force modified data and metadata out to stable storage.
2304 	 */
2305 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
2306 
2307 	if (error)
2308 		goto out;
2309 
2310 	resp->status = NFS3_OK;
2311 
2312 #ifdef DEBUG
2313 	if (!rfs3_do_post_op_fh3)
2314 		resp->resok.obj.handle_follows = FALSE;
2315 	else {
2316 #endif
2317 	error = makefh3(&resp->resok.obj.handle, vp, exi);
2318 	if (error)
2319 		resp->resok.obj.handle_follows = FALSE;
2320 	else
2321 		resp->resok.obj.handle_follows = TRUE;
2322 #ifdef DEBUG
2323 	}
2324 #endif
2325 
2326 #ifdef DEBUG
2327 	if (rfs3_do_post_op_attr) {
2328 		va.va_mask = AT_ALL;
2329 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2330 	} else
2331 		vap = NULL;
2332 #else
2333 	va.va_mask = AT_ALL;
2334 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2335 #endif
2336 
2337 	/*
2338 	 * Force modified metadata out to stable storage.
2339 	 *
2340 	 * if a underlying vp exists, pass it to VOP_FSYNC
2341 	 */
2342 	if (VOP_REALVP(vp, &realvp, NULL) == 0)
2343 		(void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2344 	else
2345 		(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2346 
2347 	VN_RELE(vp);
2348 
2349 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2350 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2351 	DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2352 	    cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2353 	VN_RELE(dvp);
2354 	return;
2355 
2356 out:
2357 	if (curthread->t_flag & T_WOULDBLOCK) {
2358 		curthread->t_flag &= ~T_WOULDBLOCK;
2359 		resp->status = NFS3ERR_JUKEBOX;
2360 	} else
2361 		resp->status = puterrno3(error);
2362 out1:
2363 	DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2364 	    cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2365 	if (dvp != NULL)
2366 		VN_RELE(dvp);
2367 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2368 }
2369 
2370 void *
2371 rfs3_mknod_getfh(MKNOD3args *args)
2372 {
2373 
2374 	return (&args->where.dir);
2375 }
2376 
2377 void
2378 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2379 	struct svc_req *req, cred_t *cr)
2380 {
2381 	int error = 0;
2382 	vnode_t *vp;
2383 	struct vattr *bvap;
2384 	struct vattr bva;
2385 	struct vattr *avap;
2386 	struct vattr ava;
2387 	vnode_t *targvp = NULL;
2388 
2389 	bvap = NULL;
2390 	avap = NULL;
2391 
2392 	vp = nfs3_fhtovp(&args->object.dir, exi);
2393 
2394 	DTRACE_NFSV3_4(op__remove__start, struct svc_req *, req,
2395 	    cred_t *, cr, vnode_t *, vp, REMOVE3args *, args);
2396 
2397 	if (vp == NULL) {
2398 		error = ESTALE;
2399 		goto err;
2400 	}
2401 
2402 #ifdef DEBUG
2403 	if (rfs3_do_pre_op_attr) {
2404 		bva.va_mask = AT_ALL;
2405 		bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2406 	} else
2407 		bvap = NULL;
2408 #else
2409 	bva.va_mask = AT_ALL;
2410 	bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2411 #endif
2412 	avap = bvap;
2413 
2414 	if (vp->v_type != VDIR) {
2415 		resp->status = NFS3ERR_NOTDIR;
2416 		goto err1;
2417 	}
2418 
2419 	if (args->object.name == nfs3nametoolong) {
2420 		resp->status = NFS3ERR_NAMETOOLONG;
2421 		goto err1;
2422 	}
2423 
2424 	if (args->object.name == NULL || *(args->object.name) == '\0') {
2425 		resp->status = NFS3ERR_ACCES;
2426 		goto err1;
2427 	}
2428 
2429 	if (rdonly(exi, req)) {
2430 		resp->status = NFS3ERR_ROFS;
2431 		goto err1;
2432 	}
2433 
2434 	if (is_system_labeled()) {
2435 		bslabel_t *clabel = req->rq_label;
2436 
2437 		ASSERT(clabel != NULL);
2438 		DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2439 		    "got client label from request(1)", struct svc_req *, req);
2440 
2441 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2442 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK)) {
2443 				resp->status = NFS3ERR_ACCES;
2444 				goto err1;
2445 			}
2446 		}
2447 	}
2448 
2449 	/*
2450 	 * Check for a conflict with a non-blocking mandatory share
2451 	 * reservation and V4 delegations
2452 	 */
2453 	error = VOP_LOOKUP(vp, args->object.name, &targvp, NULL, 0,
2454 	    NULL, cr, NULL, NULL, NULL);
2455 	if (error != 0)
2456 		goto err;
2457 
2458 	if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2459 		resp->status = NFS3ERR_JUKEBOX;
2460 		goto err1;
2461 	}
2462 
2463 	if (!nbl_need_check(targvp)) {
2464 		error = VOP_REMOVE(vp, args->object.name, cr, NULL, 0);
2465 	} else {
2466 		nbl_start_crit(targvp, RW_READER);
2467 		if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2468 			error = EACCES;
2469 		} else {
2470 			error = VOP_REMOVE(vp, args->object.name, cr, NULL, 0);
2471 		}
2472 		nbl_end_crit(targvp);
2473 	}
2474 	VN_RELE(targvp);
2475 	targvp = NULL;
2476 
2477 #ifdef DEBUG
2478 	if (rfs3_do_post_op_attr) {
2479 		ava.va_mask = AT_ALL;
2480 		avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2481 	} else
2482 		avap = NULL;
2483 #else
2484 	ava.va_mask = AT_ALL;
2485 	avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2486 #endif
2487 
2488 	/*
2489 	 * Force modified data and metadata out to stable storage.
2490 	 */
2491 	(void) VOP_FSYNC(vp, 0, cr, NULL);
2492 
2493 	if (error)
2494 		goto err;
2495 
2496 	resp->status = NFS3_OK;
2497 	vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2498 	goto out;
2499 
2500 err:
2501 	if (curthread->t_flag & T_WOULDBLOCK) {
2502 		curthread->t_flag &= ~T_WOULDBLOCK;
2503 		resp->status = NFS3ERR_JUKEBOX;
2504 	} else
2505 		resp->status = puterrno3(error);
2506 err1:
2507 	vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2508 out:
2509 	DTRACE_NFSV3_4(op__remove__done, struct svc_req *, req,
2510 	    cred_t *, cr, vnode_t *, vp, REMOVE3res *, resp);
2511 	if (vp != NULL)
2512 		VN_RELE(vp);
2513 }
2514 
2515 void *
2516 rfs3_remove_getfh(REMOVE3args *args)
2517 {
2518 
2519 	return (&args->object.dir);
2520 }
2521 
2522 void
2523 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2524 	struct svc_req *req, cred_t *cr)
2525 {
2526 	int error;
2527 	vnode_t *vp;
2528 	struct vattr *bvap;
2529 	struct vattr bva;
2530 	struct vattr *avap;
2531 	struct vattr ava;
2532 
2533 	bvap = NULL;
2534 	avap = NULL;
2535 
2536 	vp = nfs3_fhtovp(&args->object.dir, exi);
2537 
2538 	DTRACE_NFSV3_4(op__rmdir__start, struct svc_req *, req,
2539 	    cred_t *, cr, vnode_t *, vp, RMDIR3args *, args);
2540 
2541 	if (vp == NULL) {
2542 		error = ESTALE;
2543 		goto err;
2544 	}
2545 
2546 #ifdef DEBUG
2547 	if (rfs3_do_pre_op_attr) {
2548 		bva.va_mask = AT_ALL;
2549 		bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2550 	} else
2551 		bvap = NULL;
2552 #else
2553 	bva.va_mask = AT_ALL;
2554 	bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2555 #endif
2556 	avap = bvap;
2557 
2558 	if (vp->v_type != VDIR) {
2559 		resp->status = NFS3ERR_NOTDIR;
2560 		goto err1;
2561 	}
2562 
2563 	if (args->object.name == nfs3nametoolong) {
2564 		resp->status = NFS3ERR_NAMETOOLONG;
2565 		goto err1;
2566 	}
2567 
2568 	if (args->object.name == NULL || *(args->object.name) == '\0') {
2569 		resp->status = NFS3ERR_ACCES;
2570 		goto err1;
2571 	}
2572 
2573 	if (rdonly(exi, req)) {
2574 		resp->status = NFS3ERR_ROFS;
2575 		goto err1;
2576 	}
2577 
2578 	if (is_system_labeled()) {
2579 		bslabel_t *clabel = req->rq_label;
2580 
2581 		ASSERT(clabel != NULL);
2582 		DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2583 		    "got client label from request(1)", struct svc_req *, req);
2584 
2585 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2586 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK)) {
2587 				resp->status = NFS3ERR_ACCES;
2588 				goto err1;
2589 			}
2590 		}
2591 	}
2592 
2593 	error = VOP_RMDIR(vp, args->object.name, rootdir, cr, NULL, 0);
2594 
2595 #ifdef DEBUG
2596 	if (rfs3_do_post_op_attr) {
2597 		ava.va_mask = AT_ALL;
2598 		avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2599 	} else
2600 		avap = NULL;
2601 #else
2602 	ava.va_mask = AT_ALL;
2603 	avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2604 #endif
2605 
2606 	/*
2607 	 * Force modified data and metadata out to stable storage.
2608 	 */
2609 	(void) VOP_FSYNC(vp, 0, cr, NULL);
2610 
2611 	if (error) {
2612 		/*
2613 		 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2614 		 * if the directory is not empty.  A System V NFS server
2615 		 * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2616 		 * over the wire.
2617 		 */
2618 		if (error == EEXIST)
2619 			error = ENOTEMPTY;
2620 		goto err;
2621 	}
2622 
2623 	resp->status = NFS3_OK;
2624 	vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2625 	goto out;
2626 
2627 err:
2628 	if (curthread->t_flag & T_WOULDBLOCK) {
2629 		curthread->t_flag &= ~T_WOULDBLOCK;
2630 		resp->status = NFS3ERR_JUKEBOX;
2631 	} else
2632 		resp->status = puterrno3(error);
2633 err1:
2634 	vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2635 out:
2636 	DTRACE_NFSV3_4(op__rmdir__done, struct svc_req *, req,
2637 	    cred_t *, cr, vnode_t *, vp, RMDIR3res *, resp);
2638 	if (vp != NULL)
2639 		VN_RELE(vp);
2640 
2641 }
2642 
2643 void *
2644 rfs3_rmdir_getfh(RMDIR3args *args)
2645 {
2646 
2647 	return (&args->object.dir);
2648 }
2649 
2650 void
2651 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2652 	struct svc_req *req, cred_t *cr)
2653 {
2654 	int error = 0;
2655 	vnode_t *fvp;
2656 	vnode_t *tvp;
2657 	vnode_t *targvp;
2658 	struct vattr *fbvap;
2659 	struct vattr fbva;
2660 	struct vattr *favap;
2661 	struct vattr fava;
2662 	struct vattr *tbvap;
2663 	struct vattr tbva;
2664 	struct vattr *tavap;
2665 	struct vattr tava;
2666 	nfs_fh3 *fh3;
2667 	struct exportinfo *to_exi;
2668 	vnode_t *srcvp = NULL;
2669 	bslabel_t *clabel;
2670 
2671 	fbvap = NULL;
2672 	favap = NULL;
2673 	tbvap = NULL;
2674 	tavap = NULL;
2675 	tvp = NULL;
2676 
2677 	fvp = nfs3_fhtovp(&args->from.dir, exi);
2678 
2679 	DTRACE_NFSV3_4(op__rename__start, struct svc_req *, req,
2680 	    cred_t *, cr, vnode_t *, fvp, RENAME3args *, args);
2681 
2682 	if (fvp == NULL) {
2683 		error = ESTALE;
2684 		goto err;
2685 	}
2686 
2687 	if (is_system_labeled()) {
2688 		clabel = req->rq_label;
2689 		ASSERT(clabel != NULL);
2690 		DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2691 		    "got client label from request(1)", struct svc_req *, req);
2692 
2693 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2694 			if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK)) {
2695 				resp->status = NFS3ERR_ACCES;
2696 				goto err1;
2697 			}
2698 		}
2699 	}
2700 
2701 #ifdef DEBUG
2702 	if (rfs3_do_pre_op_attr) {
2703 		fbva.va_mask = AT_ALL;
2704 		fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2705 	} else
2706 		fbvap = NULL;
2707 #else
2708 	fbva.va_mask = AT_ALL;
2709 	fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2710 #endif
2711 	favap = fbvap;
2712 
2713 	fh3 = &args->to.dir;
2714 	to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2715 	if (to_exi == NULL) {
2716 		resp->status = NFS3ERR_ACCES;
2717 		goto err1;
2718 	}
2719 	exi_rele(to_exi);
2720 
2721 	if (to_exi != exi) {
2722 		resp->status = NFS3ERR_XDEV;
2723 		goto err1;
2724 	}
2725 
2726 	tvp = nfs3_fhtovp(&args->to.dir, exi);
2727 	if (tvp == NULL) {
2728 		error = ESTALE;
2729 		goto err;
2730 	}
2731 
2732 #ifdef DEBUG
2733 	if (rfs3_do_pre_op_attr) {
2734 		tbva.va_mask = AT_ALL;
2735 		tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2736 	} else
2737 		tbvap = NULL;
2738 #else
2739 	tbva.va_mask = AT_ALL;
2740 	tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2741 #endif
2742 	tavap = tbvap;
2743 
2744 	if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2745 		resp->status = NFS3ERR_NOTDIR;
2746 		goto err1;
2747 	}
2748 
2749 	if (args->from.name == nfs3nametoolong ||
2750 	    args->to.name == nfs3nametoolong) {
2751 		resp->status = NFS3ERR_NAMETOOLONG;
2752 		goto err1;
2753 	}
2754 	if (args->from.name == NULL || *(args->from.name) == '\0' ||
2755 	    args->to.name == NULL || *(args->to.name) == '\0') {
2756 		resp->status = NFS3ERR_ACCES;
2757 		goto err1;
2758 	}
2759 
2760 	if (rdonly(exi, req)) {
2761 		resp->status = NFS3ERR_ROFS;
2762 		goto err1;
2763 	}
2764 
2765 	if (is_system_labeled()) {
2766 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2767 			if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK)) {
2768 				resp->status = NFS3ERR_ACCES;
2769 				goto err1;
2770 			}
2771 		}
2772 	}
2773 
2774 	/*
2775 	 * Check for a conflict with a non-blocking mandatory share
2776 	 * reservation or V4 delegations.
2777 	 */
2778 	error = VOP_LOOKUP(fvp, args->from.name, &srcvp, NULL, 0,
2779 	    NULL, cr, NULL, NULL, NULL);
2780 	if (error != 0)
2781 		goto err;
2782 
2783 	/*
2784 	 * If we rename a delegated file we should recall the
2785 	 * delegation, since future opens should fail or would
2786 	 * refer to a new file.
2787 	 */
2788 	if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2789 		resp->status = NFS3ERR_JUKEBOX;
2790 		goto err1;
2791 	}
2792 
2793 	/*
2794 	 * Check for renaming over a delegated file.  Check rfs4_deleg_policy
2795 	 * first to avoid VOP_LOOKUP if possible.
2796 	 */
2797 	if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2798 	    VOP_LOOKUP(tvp, args->to.name, &targvp, NULL, 0, NULL, cr,
2799 	    NULL, NULL, NULL) == 0) {
2800 
2801 		if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2802 			VN_RELE(targvp);
2803 			resp->status = NFS3ERR_JUKEBOX;
2804 			goto err1;
2805 		}
2806 		VN_RELE(targvp);
2807 	}
2808 
2809 	if (!nbl_need_check(srcvp)) {
2810 		error = VOP_RENAME(fvp, args->from.name, tvp,
2811 		    args->to.name, cr, NULL, 0);
2812 	} else {
2813 		nbl_start_crit(srcvp, RW_READER);
2814 		if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
2815 			error = EACCES;
2816 		} else {
2817 			error = VOP_RENAME(fvp, args->from.name, tvp,
2818 			    args->to.name, cr, NULL, 0);
2819 		}
2820 		nbl_end_crit(srcvp);
2821 	}
2822 	if (error == 0) {
2823 		char *tmp;
2824 
2825 		/* fix the path name for the renamed file */
2826 		mutex_enter(&srcvp->v_lock);
2827 		tmp = srcvp->v_path;
2828 		srcvp->v_path = NULL;
2829 		mutex_exit(&srcvp->v_lock);
2830 		vn_setpath(rootdir, tvp, srcvp, args->to.name,
2831 		    strlen(args->to.name));
2832 		if (tmp != NULL)
2833 			kmem_free(tmp, strlen(tmp) + 1);
2834 	}
2835 	VN_RELE(srcvp);
2836 	srcvp = NULL;
2837 
2838 #ifdef DEBUG
2839 	if (rfs3_do_post_op_attr) {
2840 		fava.va_mask = AT_ALL;
2841 		favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2842 		tava.va_mask = AT_ALL;
2843 		tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2844 	} else {
2845 		favap = NULL;
2846 		tavap = NULL;
2847 	}
2848 #else
2849 	fava.va_mask = AT_ALL;
2850 	favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2851 	tava.va_mask = AT_ALL;
2852 	tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2853 #endif
2854 
2855 	/*
2856 	 * Force modified data and metadata out to stable storage.
2857 	 */
2858 	(void) VOP_FSYNC(fvp, 0, cr, NULL);
2859 	(void) VOP_FSYNC(tvp, 0, cr, NULL);
2860 
2861 	if (error)
2862 		goto err;
2863 
2864 	resp->status = NFS3_OK;
2865 	vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2866 	vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2867 	goto out;
2868 
2869 err:
2870 	if (curthread->t_flag & T_WOULDBLOCK) {
2871 		curthread->t_flag &= ~T_WOULDBLOCK;
2872 		resp->status = NFS3ERR_JUKEBOX;
2873 	} else
2874 		resp->status = puterrno3(error);
2875 err1:
2876 	vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2877 	vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2878 out:
2879 	DTRACE_NFSV3_4(op__rename__done, struct svc_req *, req,
2880 	    cred_t *, cr, vnode_t *, fvp, RENAME3res *, resp);
2881 	if (fvp != NULL)
2882 		VN_RELE(fvp);
2883 	if (tvp != NULL)
2884 		VN_RELE(tvp);
2885 }
2886 
2887 void *
2888 rfs3_rename_getfh(RENAME3args *args)
2889 {
2890 
2891 	return (&args->from.dir);
2892 }
2893 
2894 void
2895 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2896 	struct svc_req *req, cred_t *cr)
2897 {
2898 	int error;
2899 	vnode_t *vp;
2900 	vnode_t *dvp;
2901 	struct vattr *vap;
2902 	struct vattr va;
2903 	struct vattr *bvap;
2904 	struct vattr bva;
2905 	struct vattr *avap;
2906 	struct vattr ava;
2907 	nfs_fh3	*fh3;
2908 	struct exportinfo *to_exi;
2909 	bslabel_t *clabel;
2910 
2911 	vap = NULL;
2912 	bvap = NULL;
2913 	avap = NULL;
2914 	dvp = NULL;
2915 
2916 	vp = nfs3_fhtovp(&args->file, exi);
2917 
2918 	DTRACE_NFSV3_4(op__link__start, struct svc_req *, req,
2919 	    cred_t *, cr, vnode_t *, vp, LINK3args *, args);
2920 
2921 	if (vp == NULL) {
2922 		error = ESTALE;
2923 		goto out;
2924 	}
2925 
2926 #ifdef DEBUG
2927 	if (rfs3_do_pre_op_attr) {
2928 		va.va_mask = AT_ALL;
2929 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2930 	} else
2931 		vap = NULL;
2932 #else
2933 	va.va_mask = AT_ALL;
2934 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2935 #endif
2936 
2937 	fh3 = &args->link.dir;
2938 	to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2939 	if (to_exi == NULL) {
2940 		resp->status = NFS3ERR_ACCES;
2941 		goto out1;
2942 	}
2943 	exi_rele(to_exi);
2944 
2945 	if (to_exi != exi) {
2946 		resp->status = NFS3ERR_XDEV;
2947 		goto out1;
2948 	}
2949 
2950 	if (is_system_labeled()) {
2951 		clabel = req->rq_label;
2952 
2953 		ASSERT(clabel != NULL);
2954 		DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
2955 		    "got client label from request(1)", struct svc_req *, req);
2956 
2957 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2958 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK)) {
2959 				resp->status = NFS3ERR_ACCES;
2960 				goto out1;
2961 			}
2962 		}
2963 	}
2964 
2965 	dvp = nfs3_fhtovp(&args->link.dir, exi);
2966 	if (dvp == NULL) {
2967 		error = ESTALE;
2968 		goto out;
2969 	}
2970 
2971 #ifdef DEBUG
2972 	if (rfs3_do_pre_op_attr) {
2973 		bva.va_mask = AT_ALL;
2974 		bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
2975 	} else
2976 		bvap = NULL;
2977 #else
2978 	bva.va_mask = AT_ALL;
2979 	bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
2980 #endif
2981 
2982 	if (dvp->v_type != VDIR) {
2983 		resp->status = NFS3ERR_NOTDIR;
2984 		goto out1;
2985 	}
2986 
2987 	if (args->link.name == nfs3nametoolong) {
2988 		resp->status = NFS3ERR_NAMETOOLONG;
2989 		goto out1;
2990 	}
2991 
2992 	if (args->link.name == NULL || *(args->link.name) == '\0') {
2993 		resp->status = NFS3ERR_ACCES;
2994 		goto out1;
2995 	}
2996 
2997 	if (rdonly(exi, req)) {
2998 		resp->status = NFS3ERR_ROFS;
2999 		goto out1;
3000 	}
3001 
3002 	if (is_system_labeled()) {
3003 		DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
3004 		    "got client label from request(1)", struct svc_req *, req);
3005 
3006 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3007 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK)) {
3008 				resp->status = NFS3ERR_ACCES;
3009 				goto out1;
3010 			}
3011 		}
3012 	}
3013 
3014 	error = VOP_LINK(dvp, vp, args->link.name, cr, NULL, 0);
3015 
3016 #ifdef DEBUG
3017 	if (rfs3_do_post_op_attr) {
3018 		va.va_mask = AT_ALL;
3019 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3020 		ava.va_mask = AT_ALL;
3021 		avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3022 	} else {
3023 		vap = NULL;
3024 		avap = NULL;
3025 	}
3026 #else
3027 	va.va_mask = AT_ALL;
3028 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3029 	ava.va_mask = AT_ALL;
3030 	avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3031 #endif
3032 
3033 	/*
3034 	 * Force modified data and metadata out to stable storage.
3035 	 */
3036 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3037 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
3038 
3039 	if (error)
3040 		goto out;
3041 
3042 	VN_RELE(dvp);
3043 
3044 	resp->status = NFS3_OK;
3045 	vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3046 	vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3047 
3048 	DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3049 	    cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3050 
3051 	VN_RELE(vp);
3052 
3053 	return;
3054 
3055 out:
3056 	if (curthread->t_flag & T_WOULDBLOCK) {
3057 		curthread->t_flag &= ~T_WOULDBLOCK;
3058 		resp->status = NFS3ERR_JUKEBOX;
3059 	} else
3060 		resp->status = puterrno3(error);
3061 out1:
3062 	DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3063 	    cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3064 
3065 	if (vp != NULL)
3066 		VN_RELE(vp);
3067 	if (dvp != NULL)
3068 		VN_RELE(dvp);
3069 	vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3070 	vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3071 }
3072 
3073 void *
3074 rfs3_link_getfh(LINK3args *args)
3075 {
3076 
3077 	return (&args->file);
3078 }
3079 
3080 /*
3081  * This macro defines the size of a response which contains attribute
3082  * information and one directory entry (whose length is specified by
3083  * the macro parameter).  If the incoming request is larger than this,
3084  * then we are guaranteed to be able to return at one directory entry
3085  * if one exists.  Therefore, we do not need to check for
3086  * NFS3ERR_TOOSMALL if the requested size is larger then this.  If it
3087  * is not, then we need to check to make sure that this error does not
3088  * need to be returned.
3089  *
3090  * NFS3_READDIR_MIN_COUNT is comprised of following :
3091  *
3092  * status - 1 * BYTES_PER_XDR_UNIT
3093  * attr. flag - 1 * BYTES_PER_XDR_UNIT
3094  * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3095  * attributes  - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3096  * boolean - 1 * BYTES_PER_XDR_UNIT
3097  * file id - 2 * BYTES_PER_XDR_UNIT
3098  * directory name length - 1 * BYTES_PER_XDR_UNIT
3099  * cookie - 2 * BYTES_PER_XDR_UNIT
3100  * end of list - 1 * BYTES_PER_XDR_UNIT
3101  * end of file - 1 * BYTES_PER_XDR_UNIT
3102  * Name length of directory to the nearest byte
3103  */
3104 
3105 #define	NFS3_READDIR_MIN_COUNT(length)	\
3106 	((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3107 		BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3108 
3109 /* ARGSUSED */
3110 void
3111 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3112 	struct svc_req *req, cred_t *cr)
3113 {
3114 	int error;
3115 	vnode_t *vp;
3116 	struct vattr *vap;
3117 	struct vattr va;
3118 	struct iovec iov;
3119 	struct uio uio;
3120 	char *data;
3121 	int iseof;
3122 	int bufsize;
3123 	int namlen;
3124 	uint_t count;
3125 
3126 	vap = NULL;
3127 
3128 	vp = nfs3_fhtovp(&args->dir, exi);
3129 
3130 	DTRACE_NFSV3_4(op__readdir__start, struct svc_req *, req,
3131 	    cred_t *, cr, vnode_t *, vp, READDIR3args *, args);
3132 
3133 	if (vp == NULL) {
3134 		error = ESTALE;
3135 		goto out;
3136 	}
3137 
3138 	if (is_system_labeled()) {
3139 		bslabel_t *clabel = req->rq_label;
3140 
3141 		ASSERT(clabel != NULL);
3142 		DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3143 		    "got client label from request(1)", struct svc_req *, req);
3144 
3145 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3146 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK)) {
3147 				resp->status = NFS3ERR_ACCES;
3148 				goto out1;
3149 			}
3150 		}
3151 	}
3152 
3153 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3154 
3155 #ifdef DEBUG
3156 	if (rfs3_do_pre_op_attr) {
3157 		va.va_mask = AT_ALL;
3158 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3159 	} else
3160 		vap = NULL;
3161 #else
3162 	va.va_mask = AT_ALL;
3163 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3164 #endif
3165 
3166 	if (vp->v_type != VDIR) {
3167 		resp->status = NFS3ERR_NOTDIR;
3168 		goto out1;
3169 	}
3170 
3171 	error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3172 	if (error)
3173 		goto out;
3174 
3175 	/*
3176 	 * Now don't allow arbitrary count to alloc;
3177 	 * allow the maximum not to exceed rfs3_tsize()
3178 	 */
3179 	if (args->count > rfs3_tsize(req))
3180 		args->count = rfs3_tsize(req);
3181 
3182 	/*
3183 	 * Make sure that there is room to read at least one entry
3184 	 * if any are available.
3185 	 */
3186 	if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3187 		count = DIRENT64_RECLEN(MAXNAMELEN);
3188 	else
3189 		count = args->count;
3190 
3191 	data = kmem_alloc(count, KM_SLEEP);
3192 
3193 	iov.iov_base = data;
3194 	iov.iov_len = count;
3195 	uio.uio_iov = &iov;
3196 	uio.uio_iovcnt = 1;
3197 	uio.uio_segflg = UIO_SYSSPACE;
3198 	uio.uio_extflg = UIO_COPY_CACHED;
3199 	uio.uio_loffset = (offset_t)args->cookie;
3200 	uio.uio_resid = count;
3201 
3202 	error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3203 
3204 #ifdef DEBUG
3205 	if (rfs3_do_post_op_attr) {
3206 		va.va_mask = AT_ALL;
3207 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3208 	} else
3209 		vap = NULL;
3210 #else
3211 	va.va_mask = AT_ALL;
3212 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3213 #endif
3214 
3215 	if (error) {
3216 		kmem_free(data, count);
3217 		goto out;
3218 	}
3219 
3220 	/*
3221 	 * If the count was not large enough to be able to guarantee
3222 	 * to be able to return at least one entry, then need to
3223 	 * check to see if NFS3ERR_TOOSMALL should be returned.
3224 	 */
3225 	if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3226 		/*
3227 		 * bufsize is used to keep track of the size of the response.
3228 		 * It is primed with:
3229 		 *	1 for the status +
3230 		 *	1 for the dir_attributes.attributes boolean +
3231 		 *	2 for the cookie verifier
3232 		 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3233 		 * to bytes.  If there are directory attributes to be
3234 		 * returned, then:
3235 		 *	NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3236 		 * time BYTES_PER_XDR_UNIT is added to account for them.
3237 		 */
3238 		bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3239 		if (vap != NULL)
3240 			bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3241 		/*
3242 		 * An entry is composed of:
3243 		 *	1 for the true/false list indicator +
3244 		 *	2 for the fileid +
3245 		 *	1 for the length of the name +
3246 		 *	2 for the cookie +
3247 		 * all times BYTES_PER_XDR_UNIT to convert from
3248 		 * XDR units to bytes, plus the length of the name
3249 		 * rounded up to the nearest BYTES_PER_XDR_UNIT.
3250 		 */
3251 		if (count != uio.uio_resid) {
3252 			namlen = strlen(((struct dirent64 *)data)->d_name);
3253 			bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3254 			    roundup(namlen, BYTES_PER_XDR_UNIT);
3255 		}
3256 		/*
3257 		 * We need to check to see if the number of bytes left
3258 		 * to go into the buffer will actually fit into the
3259 		 * buffer.  This is calculated as the size of this
3260 		 * entry plus:
3261 		 *	1 for the true/false list indicator +
3262 		 *	1 for the eof indicator
3263 		 * times BYTES_PER_XDR_UNIT to convert from from
3264 		 * XDR units to bytes.
3265 		 */
3266 		bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3267 		if (bufsize > args->count) {
3268 			kmem_free(data, count);
3269 			resp->status = NFS3ERR_TOOSMALL;
3270 			goto out1;
3271 		}
3272 	}
3273 
3274 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3275 
3276 #if 0 /* notyet */
3277 	/*
3278 	 * Don't do this.  It causes local disk writes when just
3279 	 * reading the file and the overhead is deemed larger
3280 	 * than the benefit.
3281 	 */
3282 	/*
3283 	 * Force modified metadata out to stable storage.
3284 	 */
3285 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3286 #endif
3287 
3288 	resp->status = NFS3_OK;
3289 	vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3290 	resp->resok.cookieverf = 0;
3291 	resp->resok.reply.entries = (entry3 *)data;
3292 	resp->resok.reply.eof = iseof;
3293 	resp->resok.size = count - uio.uio_resid;
3294 	resp->resok.count = args->count;
3295 	resp->resok.freecount = count;
3296 
3297 	DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3298 	    cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3299 
3300 	VN_RELE(vp);
3301 
3302 	return;
3303 
3304 out:
3305 	if (curthread->t_flag & T_WOULDBLOCK) {
3306 		curthread->t_flag &= ~T_WOULDBLOCK;
3307 		resp->status = NFS3ERR_JUKEBOX;
3308 	} else
3309 		resp->status = puterrno3(error);
3310 out1:
3311 	DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3312 	    cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3313 
3314 	if (vp != NULL) {
3315 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3316 		VN_RELE(vp);
3317 	}
3318 	vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3319 }
3320 
3321 void *
3322 rfs3_readdir_getfh(READDIR3args *args)
3323 {
3324 
3325 	return (&args->dir);
3326 }
3327 
3328 void
3329 rfs3_readdir_free(READDIR3res *resp)
3330 {
3331 
3332 	if (resp->status == NFS3_OK)
3333 		kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3334 }
3335 
3336 #ifdef nextdp
3337 #undef nextdp
3338 #endif
3339 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3340 
3341 /*
3342  * This macro computes the size of a response which contains
3343  * one directory entry including the attributes as well as file handle.
3344  * If the incoming request is larger than this, then we are guaranteed to be
3345  * able to return at least one more directory entry if one exists.
3346  *
3347  * NFS3_READDIRPLUS_ENTRY is made up of the following:
3348  *
3349  * boolean - 1 * BYTES_PER_XDR_UNIT
3350  * file id - 2 * BYTES_PER_XDR_UNIT
3351  * directory name length - 1 * BYTES_PER_XDR_UNIT
3352  * cookie - 2 * BYTES_PER_XDR_UNIT
3353  * attribute flag - 1 * BYTES_PER_XDR_UNIT
3354  * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3355  * status byte for file handle - 1 *  BYTES_PER_XDR_UNIT
3356  * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3357  * Maximum length of a file handle (NFS3_MAXFHSIZE)
3358  * name length of the entry to the nearest bytes
3359  */
3360 #define	NFS3_READDIRPLUS_ENTRY(namelen)	\
3361 	((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3362 		BYTES_PER_XDR_UNIT + \
3363 	NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3364 
3365 static int rfs3_readdir_unit = MAXBSIZE;
3366 
3367 /* ARGSUSED */
3368 void
3369 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3370 	struct exportinfo *exi, struct svc_req *req, cred_t *cr)
3371 {
3372 	int error;
3373 	vnode_t *vp;
3374 	struct vattr *vap;
3375 	struct vattr va;
3376 	struct iovec iov;
3377 	struct uio uio;
3378 	char *data;
3379 	int iseof;
3380 	struct dirent64 *dp;
3381 	vnode_t *nvp;
3382 	struct vattr *nvap;
3383 	struct vattr nva;
3384 	entryplus3_info *infop = NULL;
3385 	int size = 0;
3386 	int nents = 0;
3387 	int bufsize = 0;
3388 	int entrysize = 0;
3389 	int tofit = 0;
3390 	int rd_unit = rfs3_readdir_unit;
3391 	int prev_len;
3392 	int space_left;
3393 	int i;
3394 	uint_t *namlen = NULL;
3395 
3396 	vap = NULL;
3397 
3398 	vp = nfs3_fhtovp(&args->dir, exi);
3399 
3400 	DTRACE_NFSV3_4(op__readdirplus__start, struct svc_req *, req,
3401 	    cred_t *, cr, vnode_t *, vp, READDIRPLUS3args *, args);
3402 
3403 	if (vp == NULL) {
3404 		error = ESTALE;
3405 		goto out;
3406 	}
3407 
3408 	if (is_system_labeled()) {
3409 		bslabel_t *clabel = req->rq_label;
3410 
3411 		ASSERT(clabel != NULL);
3412 		DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3413 		    char *, "got client label from request(1)",
3414 		    struct svc_req *, req);
3415 
3416 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3417 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK)) {
3418 				resp->status = NFS3ERR_ACCES;
3419 				goto out1;
3420 			}
3421 		}
3422 	}
3423 
3424 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3425 
3426 #ifdef DEBUG
3427 	if (rfs3_do_pre_op_attr) {
3428 		va.va_mask = AT_ALL;
3429 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3430 	} else
3431 		vap = NULL;
3432 #else
3433 	va.va_mask = AT_ALL;
3434 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3435 #endif
3436 
3437 	if (vp->v_type != VDIR) {
3438 		error = ENOTDIR;
3439 		goto out;
3440 	}
3441 
3442 	error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3443 	if (error)
3444 		goto out;
3445 
3446 	/*
3447 	 * Don't allow arbitrary counts for allocation
3448 	 */
3449 	if (args->maxcount > rfs3_tsize(req))
3450 		args->maxcount = rfs3_tsize(req);
3451 
3452 	/*
3453 	 * Make sure that there is room to read at least one entry
3454 	 * if any are available
3455 	 */
3456 	args->dircount = MIN(args->dircount, args->maxcount);
3457 
3458 	if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3459 		args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3460 
3461 	/*
3462 	 * This allocation relies on a minimum directory entry
3463 	 * being roughly 24 bytes.  Therefore, the namlen array
3464 	 * will have enough space based on the maximum number of
3465 	 * entries to read.
3466 	 */
3467 	namlen = kmem_alloc(args->dircount, KM_SLEEP);
3468 
3469 	space_left = args->dircount;
3470 	data = kmem_alloc(args->dircount, KM_SLEEP);
3471 	dp = (struct dirent64 *)data;
3472 	uio.uio_iov = &iov;
3473 	uio.uio_iovcnt = 1;
3474 	uio.uio_segflg = UIO_SYSSPACE;
3475 	uio.uio_extflg = UIO_COPY_CACHED;
3476 	uio.uio_loffset = (offset_t)args->cookie;
3477 
3478 	/*
3479 	 * bufsize is used to keep track of the size of the response as we
3480 	 * get post op attributes and filehandles for each entry.  This is
3481 	 * an optimization as the server may have read more entries than will
3482 	 * fit in the buffer specified by maxcount.  We stop calculating
3483 	 * post op attributes and filehandles once we have exceeded maxcount.
3484 	 * This will minimize the effect of truncation.
3485 	 *
3486 	 * It is primed with:
3487 	 *	1 for the status +
3488 	 *	1 for the dir_attributes.attributes boolean +
3489 	 *	2 for the cookie verifier
3490 	 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3491 	 * to bytes.  If there are directory attributes to be
3492 	 * returned, then:
3493 	 *	NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3494 	 * time BYTES_PER_XDR_UNIT is added to account for them.
3495 	 */
3496 	bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3497 	if (vap != NULL)
3498 		bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3499 
3500 getmoredents:
3501 	/*
3502 	 * Here we make a check so that our read unit is not larger than
3503 	 * the space left in the buffer.
3504 	 */
3505 	rd_unit = MIN(rd_unit, space_left);
3506 	iov.iov_base = (char *)dp;
3507 	iov.iov_len = rd_unit;
3508 	uio.uio_resid = rd_unit;
3509 	prev_len = rd_unit;
3510 
3511 	error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3512 
3513 	if (error) {
3514 		kmem_free(data, args->dircount);
3515 		goto out;
3516 	}
3517 
3518 	if (uio.uio_resid == prev_len && !iseof) {
3519 		if (nents == 0) {
3520 			kmem_free(data, args->dircount);
3521 			resp->status = NFS3ERR_TOOSMALL;
3522 			goto out1;
3523 		}
3524 
3525 		/*
3526 		 * We could not get any more entries, so get the attributes
3527 		 * and filehandle for the entries already obtained.
3528 		 */
3529 		goto good;
3530 	}
3531 
3532 	/*
3533 	 * We estimate the size of the response by assuming the
3534 	 * entry exists and attributes and filehandle are also valid
3535 	 */
3536 	for (size = prev_len - uio.uio_resid;
3537 	    size > 0;
3538 	    size -= dp->d_reclen, dp = nextdp(dp)) {
3539 
3540 		if (dp->d_ino == 0) {
3541 			nents++;
3542 			continue;
3543 		}
3544 
3545 		namlen[nents] = strlen(dp->d_name);
3546 		entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3547 
3548 		/*
3549 		 * We need to check to see if the number of bytes left
3550 		 * to go into the buffer will actually fit into the
3551 		 * buffer.  This is calculated as the size of this
3552 		 * entry plus:
3553 		 *	1 for the true/false list indicator +
3554 		 *	1 for the eof indicator
3555 		 * times BYTES_PER_XDR_UNIT to convert from XDR units
3556 		 * to bytes.
3557 		 *
3558 		 * Also check the dircount limit against the first entry read
3559 		 *
3560 		 */
3561 		tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3562 		if (bufsize + tofit > args->maxcount) {
3563 			/*
3564 			 * We make a check here to see if this was the
3565 			 * first entry being measured.  If so, then maxcount
3566 			 * was too small to begin with and so we need to
3567 			 * return with NFS3ERR_TOOSMALL.
3568 			 */
3569 			if (nents == 0) {
3570 				kmem_free(data, args->dircount);
3571 				resp->status = NFS3ERR_TOOSMALL;
3572 				goto out1;
3573 			}
3574 			iseof = FALSE;
3575 			goto good;
3576 		}
3577 		bufsize += entrysize;
3578 		nents++;
3579 	}
3580 
3581 	/*
3582 	 * If there is enough room to fit at least 1 more entry including
3583 	 * post op attributes and filehandle in the buffer AND that we haven't
3584 	 * exceeded dircount then go back and get some more.
3585 	 */
3586 	if (!iseof &&
3587 	    (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3588 		space_left -= (prev_len - uio.uio_resid);
3589 		if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3590 			goto getmoredents;
3591 
3592 		/* else, fall through */
3593 	}
3594 
3595 good:
3596 
3597 #ifdef DEBUG
3598 	if (rfs3_do_post_op_attr) {
3599 		va.va_mask = AT_ALL;
3600 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3601 	} else
3602 		vap = NULL;
3603 #else
3604 	va.va_mask = AT_ALL;
3605 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3606 #endif
3607 
3608 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3609 
3610 	infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3611 	resp->resok.infop = infop;
3612 
3613 	dp = (struct dirent64 *)data;
3614 	for (i = 0; i < nents; i++) {
3615 
3616 		if (dp->d_ino == 0) {
3617 			infop[i].attr.attributes = FALSE;
3618 			infop[i].fh.handle_follows = FALSE;
3619 			dp = nextdp(dp);
3620 			continue;
3621 		}
3622 
3623 		infop[i].namelen = namlen[i];
3624 
3625 		error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3626 		    NULL, NULL, NULL);
3627 		if (error) {
3628 			infop[i].attr.attributes = FALSE;
3629 			infop[i].fh.handle_follows = FALSE;
3630 			dp = nextdp(dp);
3631 			continue;
3632 		}
3633 
3634 #ifdef DEBUG
3635 		if (rfs3_do_post_op_attr) {
3636 			nva.va_mask = AT_ALL;
3637 			nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ?
3638 			    NULL : &nva;
3639 		} else
3640 			nvap = NULL;
3641 #else
3642 		nva.va_mask = AT_ALL;
3643 		nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3644 #endif
3645 		vattr_to_post_op_attr(nvap, &infop[i].attr);
3646 
3647 #ifdef DEBUG
3648 		if (!rfs3_do_post_op_fh3)
3649 			infop[i].fh.handle_follows = FALSE;
3650 		else {
3651 #endif
3652 		error = makefh3(&infop[i].fh.handle, nvp, exi);
3653 		if (!error)
3654 			infop[i].fh.handle_follows = TRUE;
3655 		else
3656 			infop[i].fh.handle_follows = FALSE;
3657 #ifdef DEBUG
3658 		}
3659 #endif
3660 
3661 		VN_RELE(nvp);
3662 		dp = nextdp(dp);
3663 	}
3664 
3665 #if 0 /* notyet */
3666 	/*
3667 	 * Don't do this.  It causes local disk writes when just
3668 	 * reading the file and the overhead is deemed larger
3669 	 * than the benefit.
3670 	 */
3671 	/*
3672 	 * Force modified metadata out to stable storage.
3673 	 */
3674 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3675 #endif
3676 
3677 	kmem_free(namlen, args->dircount);
3678 
3679 	resp->status = NFS3_OK;
3680 	vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3681 	resp->resok.cookieverf = 0;
3682 	resp->resok.reply.entries = (entryplus3 *)data;
3683 	resp->resok.reply.eof = iseof;
3684 	resp->resok.size = nents;
3685 	resp->resok.count = args->dircount;
3686 	resp->resok.maxcount = args->maxcount;
3687 
3688 	DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3689 	    cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3690 
3691 	VN_RELE(vp);
3692 
3693 	return;
3694 
3695 out:
3696 	if (curthread->t_flag & T_WOULDBLOCK) {
3697 		curthread->t_flag &= ~T_WOULDBLOCK;
3698 		resp->status = NFS3ERR_JUKEBOX;
3699 	} else
3700 		resp->status = puterrno3(error);
3701 out1:
3702 	DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3703 	    cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3704 
3705 	if (vp != NULL) {
3706 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3707 		VN_RELE(vp);
3708 	}
3709 
3710 	if (namlen != NULL)
3711 		kmem_free(namlen, args->dircount);
3712 
3713 	vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3714 }
3715 
3716 void *
3717 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3718 {
3719 
3720 	return (&args->dir);
3721 }
3722 
3723 void
3724 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3725 {
3726 
3727 	if (resp->status == NFS3_OK) {
3728 		kmem_free(resp->resok.reply.entries, resp->resok.count);
3729 		kmem_free(resp->resok.infop,
3730 		    resp->resok.size * sizeof (struct entryplus3_info));
3731 	}
3732 }
3733 
3734 /* ARGSUSED */
3735 void
3736 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3737 	struct svc_req *req, cred_t *cr)
3738 {
3739 	int error;
3740 	vnode_t *vp;
3741 	struct vattr *vap;
3742 	struct vattr va;
3743 	struct statvfs64 sb;
3744 
3745 	vap = NULL;
3746 
3747 	vp = nfs3_fhtovp(&args->fsroot, exi);
3748 
3749 	DTRACE_NFSV3_4(op__fsstat__start, struct svc_req *, req,
3750 	    cred_t *, cr, vnode_t *, vp, FSSTAT3args *, args);
3751 
3752 	if (vp == NULL) {
3753 		error = ESTALE;
3754 		goto out;
3755 	}
3756 
3757 	if (is_system_labeled()) {
3758 		bslabel_t *clabel = req->rq_label;
3759 
3760 		ASSERT(clabel != NULL);
3761 		DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3762 		    "got client label from request(1)", struct svc_req *, req);
3763 
3764 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3765 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK)) {
3766 				resp->status = NFS3ERR_ACCES;
3767 				goto out1;
3768 			}
3769 		}
3770 	}
3771 
3772 	error = VFS_STATVFS(vp->v_vfsp, &sb);
3773 
3774 #ifdef DEBUG
3775 	if (rfs3_do_post_op_attr) {
3776 		va.va_mask = AT_ALL;
3777 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3778 	} else
3779 		vap = NULL;
3780 #else
3781 	va.va_mask = AT_ALL;
3782 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3783 #endif
3784 
3785 	if (error)
3786 		goto out;
3787 
3788 	resp->status = NFS3_OK;
3789 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3790 	if (sb.f_blocks != (fsblkcnt64_t)-1)
3791 		resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3792 	else
3793 		resp->resok.tbytes = (size3)sb.f_blocks;
3794 	if (sb.f_bfree != (fsblkcnt64_t)-1)
3795 		resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3796 	else
3797 		resp->resok.fbytes = (size3)sb.f_bfree;
3798 	if (sb.f_bavail != (fsblkcnt64_t)-1)
3799 		resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3800 	else
3801 		resp->resok.abytes = (size3)sb.f_bavail;
3802 	resp->resok.tfiles = (size3)sb.f_files;
3803 	resp->resok.ffiles = (size3)sb.f_ffree;
3804 	resp->resok.afiles = (size3)sb.f_favail;
3805 	resp->resok.invarsec = 0;
3806 
3807 	DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3808 	    cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3809 	VN_RELE(vp);
3810 
3811 	return;
3812 
3813 out:
3814 	if (curthread->t_flag & T_WOULDBLOCK) {
3815 		curthread->t_flag &= ~T_WOULDBLOCK;
3816 		resp->status = NFS3ERR_JUKEBOX;
3817 	} else
3818 		resp->status = puterrno3(error);
3819 out1:
3820 	DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3821 	    cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3822 
3823 	if (vp != NULL)
3824 		VN_RELE(vp);
3825 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3826 }
3827 
3828 void *
3829 rfs3_fsstat_getfh(FSSTAT3args *args)
3830 {
3831 
3832 	return (&args->fsroot);
3833 }
3834 
3835 /* ARGSUSED */
3836 void
3837 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3838 	struct svc_req *req, cred_t *cr)
3839 {
3840 	vnode_t *vp;
3841 	struct vattr *vap;
3842 	struct vattr va;
3843 	uint32_t xfer_size;
3844 	ulong_t l = 0;
3845 	int error;
3846 
3847 	vp = nfs3_fhtovp(&args->fsroot, exi);
3848 
3849 	DTRACE_NFSV3_4(op__fsinfo__start, struct svc_req *, req,
3850 	    cred_t *, cr, vnode_t *, vp, FSINFO3args *, args);
3851 
3852 	if (vp == NULL) {
3853 		if (curthread->t_flag & T_WOULDBLOCK) {
3854 			curthread->t_flag &= ~T_WOULDBLOCK;
3855 			resp->status = NFS3ERR_JUKEBOX;
3856 		} else
3857 			resp->status = NFS3ERR_STALE;
3858 		vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3859 		goto out;
3860 	}
3861 
3862 	if (is_system_labeled()) {
3863 		bslabel_t *clabel = req->rq_label;
3864 
3865 		ASSERT(clabel != NULL);
3866 		DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3867 		    "got client label from request(1)", struct svc_req *, req);
3868 
3869 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3870 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK)) {
3871 				resp->status = NFS3ERR_STALE;
3872 				vattr_to_post_op_attr(NULL,
3873 				    &resp->resfail.obj_attributes);
3874 				goto out;
3875 			}
3876 		}
3877 	}
3878 
3879 #ifdef DEBUG
3880 	if (rfs3_do_post_op_attr) {
3881 		va.va_mask = AT_ALL;
3882 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3883 	} else
3884 		vap = NULL;
3885 #else
3886 	va.va_mask = AT_ALL;
3887 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3888 #endif
3889 
3890 	resp->status = NFS3_OK;
3891 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3892 	xfer_size = rfs3_tsize(req);
3893 	resp->resok.rtmax = xfer_size;
3894 	resp->resok.rtpref = xfer_size;
3895 	resp->resok.rtmult = DEV_BSIZE;
3896 	resp->resok.wtmax = xfer_size;
3897 	resp->resok.wtpref = xfer_size;
3898 	resp->resok.wtmult = DEV_BSIZE;
3899 	resp->resok.dtpref = MAXBSIZE;
3900 
3901 	/*
3902 	 * Large file spec: want maxfilesize based on limit of
3903 	 * underlying filesystem.  We can guess 2^31-1 if need be.
3904 	 */
3905 	error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3906 
3907 	if (!error && l != 0 && l <= 64)
3908 		resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3909 	else
3910 		resp->resok.maxfilesize = MAXOFF32_T;
3911 
3912 	resp->resok.time_delta.seconds = 0;
3913 	resp->resok.time_delta.nseconds = 1000;
3914 	resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3915 	    FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3916 
3917 	DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3918 	    cred_t *, cr, vnode_t *, vp, FSINFO3res *, resp);
3919 
3920 	VN_RELE(vp);
3921 
3922 	return;
3923 
3924 out:
3925 	DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3926 	    cred_t *, cr, vnode_t *, NULL, FSINFO3res *, resp);
3927 	if (vp != NULL)
3928 		VN_RELE(vp);
3929 }
3930 
3931 void *
3932 rfs3_fsinfo_getfh(FSINFO3args *args)
3933 {
3934 
3935 	return (&args->fsroot);
3936 }
3937 
3938 /* ARGSUSED */
3939 void
3940 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
3941 	struct svc_req *req, cred_t *cr)
3942 {
3943 	int error;
3944 	vnode_t *vp;
3945 	struct vattr *vap;
3946 	struct vattr va;
3947 	ulong_t val;
3948 
3949 	vap = NULL;
3950 
3951 	vp = nfs3_fhtovp(&args->object, exi);
3952 
3953 	DTRACE_NFSV3_4(op__pathconf__start, struct svc_req *, req,
3954 	    cred_t *, cr, vnode_t *, vp, PATHCONF3args *, args);
3955 
3956 	if (vp == NULL) {
3957 		error = ESTALE;
3958 		goto out;
3959 	}
3960 
3961 	if (is_system_labeled()) {
3962 		bslabel_t *clabel = req->rq_label;
3963 
3964 		ASSERT(clabel != NULL);
3965 		DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
3966 		    "got client label from request(1)", struct svc_req *, req);
3967 
3968 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3969 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK)) {
3970 				resp->status = NFS3ERR_ACCES;
3971 				goto out1;
3972 			}
3973 		}
3974 	}
3975 
3976 #ifdef DEBUG
3977 	if (rfs3_do_post_op_attr) {
3978 		va.va_mask = AT_ALL;
3979 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3980 	} else
3981 		vap = NULL;
3982 #else
3983 	va.va_mask = AT_ALL;
3984 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3985 #endif
3986 
3987 	error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
3988 	if (error)
3989 		goto out;
3990 	resp->resok.info.link_max = (uint32)val;
3991 
3992 	error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
3993 	if (error)
3994 		goto out;
3995 	resp->resok.info.name_max = (uint32)val;
3996 
3997 	error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
3998 	if (error)
3999 		goto out;
4000 	if (val == 1)
4001 		resp->resok.info.no_trunc = TRUE;
4002 	else
4003 		resp->resok.info.no_trunc = FALSE;
4004 
4005 	error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
4006 	if (error)
4007 		goto out;
4008 	if (val == 1)
4009 		resp->resok.info.chown_restricted = TRUE;
4010 	else
4011 		resp->resok.info.chown_restricted = FALSE;
4012 
4013 	resp->status = NFS3_OK;
4014 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
4015 	resp->resok.info.case_insensitive = FALSE;
4016 	resp->resok.info.case_preserving = TRUE;
4017 	DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
4018 	    cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
4019 	VN_RELE(vp);
4020 	return;
4021 
4022 out:
4023 	if (curthread->t_flag & T_WOULDBLOCK) {
4024 		curthread->t_flag &= ~T_WOULDBLOCK;
4025 		resp->status = NFS3ERR_JUKEBOX;
4026 	} else
4027 		resp->status = puterrno3(error);
4028 out1:
4029 	DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
4030 	    cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
4031 	if (vp != NULL)
4032 		VN_RELE(vp);
4033 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4034 }
4035 
4036 void *
4037 rfs3_pathconf_getfh(PATHCONF3args *args)
4038 {
4039 
4040 	return (&args->object);
4041 }
4042 
4043 void
4044 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4045 	struct svc_req *req, cred_t *cr)
4046 {
4047 	int error;
4048 	vnode_t *vp;
4049 	struct vattr *bvap;
4050 	struct vattr bva;
4051 	struct vattr *avap;
4052 	struct vattr ava;
4053 
4054 	bvap = NULL;
4055 	avap = NULL;
4056 
4057 	vp = nfs3_fhtovp(&args->file, exi);
4058 
4059 	DTRACE_NFSV3_4(op__commit__start, struct svc_req *, req,
4060 	    cred_t *, cr, vnode_t *, vp, COMMIT3args *, args);
4061 
4062 	if (vp == NULL) {
4063 		error = ESTALE;
4064 		goto out;
4065 	}
4066 
4067 	bva.va_mask = AT_ALL;
4068 	error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4069 
4070 	/*
4071 	 * If we can't get the attributes, then we can't do the
4072 	 * right access checking.  So, we'll fail the request.
4073 	 */
4074 	if (error)
4075 		goto out;
4076 
4077 #ifdef DEBUG
4078 	if (rfs3_do_pre_op_attr)
4079 		bvap = &bva;
4080 	else
4081 		bvap = NULL;
4082 #else
4083 	bvap = &bva;
4084 #endif
4085 
4086 	if (rdonly(exi, req)) {
4087 		resp->status = NFS3ERR_ROFS;
4088 		goto out1;
4089 	}
4090 
4091 	if (vp->v_type != VREG) {
4092 		resp->status = NFS3ERR_INVAL;
4093 		goto out1;
4094 	}
4095 
4096 	if (is_system_labeled()) {
4097 		bslabel_t *clabel = req->rq_label;
4098 
4099 		ASSERT(clabel != NULL);
4100 		DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4101 		    "got client label from request(1)", struct svc_req *, req);
4102 
4103 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
4104 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK)) {
4105 				resp->status = NFS3ERR_ACCES;
4106 				goto out1;
4107 			}
4108 		}
4109 	}
4110 
4111 	if (crgetuid(cr) != bva.va_uid &&
4112 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4113 		goto out;
4114 
4115 	error = VOP_PUTPAGE(vp, args->offset, args->count, 0, cr, NULL);
4116 	if (!error)
4117 		error = VOP_FSYNC(vp, FNODSYNC, cr, NULL);
4118 
4119 #ifdef DEBUG
4120 	if (rfs3_do_post_op_attr) {
4121 		ava.va_mask = AT_ALL;
4122 		avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4123 	} else
4124 		avap = NULL;
4125 #else
4126 	ava.va_mask = AT_ALL;
4127 	avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4128 #endif
4129 
4130 	if (error)
4131 		goto out;
4132 
4133 	resp->status = NFS3_OK;
4134 	vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4135 	resp->resok.verf = write3verf;
4136 
4137 	DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4138 	    cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4139 
4140 	VN_RELE(vp);
4141 
4142 	return;
4143 
4144 out:
4145 	if (curthread->t_flag & T_WOULDBLOCK) {
4146 		curthread->t_flag &= ~T_WOULDBLOCK;
4147 		resp->status = NFS3ERR_JUKEBOX;
4148 	} else
4149 		resp->status = puterrno3(error);
4150 out1:
4151 	DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4152 	    cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4153 
4154 	if (vp != NULL)
4155 		VN_RELE(vp);
4156 	vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4157 }
4158 
4159 void *
4160 rfs3_commit_getfh(COMMIT3args *args)
4161 {
4162 
4163 	return (&args->file);
4164 }
4165 
4166 static int
4167 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4168 {
4169 
4170 	vap->va_mask = 0;
4171 
4172 	if (sap->mode.set_it) {
4173 		vap->va_mode = (mode_t)sap->mode.mode;
4174 		vap->va_mask |= AT_MODE;
4175 	}
4176 	if (sap->uid.set_it) {
4177 		vap->va_uid = (uid_t)sap->uid.uid;
4178 		vap->va_mask |= AT_UID;
4179 	}
4180 	if (sap->gid.set_it) {
4181 		vap->va_gid = (gid_t)sap->gid.gid;
4182 		vap->va_mask |= AT_GID;
4183 	}
4184 	if (sap->size.set_it) {
4185 		if (sap->size.size > (size3)((u_longlong_t)-1))
4186 			return (EINVAL);
4187 		vap->va_size = sap->size.size;
4188 		vap->va_mask |= AT_SIZE;
4189 	}
4190 	if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4191 #ifndef _LP64
4192 		/* check time validity */
4193 		if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4194 			return (EOVERFLOW);
4195 #endif
4196 		/*
4197 		 * nfs protocol defines times as unsigned so don't extend sign,
4198 		 * unless sysadmin set nfs_allow_preepoch_time.
4199 		 */
4200 		NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4201 		    sap->atime.atime.seconds);
4202 		vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4203 		vap->va_mask |= AT_ATIME;
4204 	} else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4205 		gethrestime(&vap->va_atime);
4206 		vap->va_mask |= AT_ATIME;
4207 	}
4208 	if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4209 #ifndef _LP64
4210 		/* check time validity */
4211 		if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4212 			return (EOVERFLOW);
4213 #endif
4214 		/*
4215 		 * nfs protocol defines times as unsigned so don't extend sign,
4216 		 * unless sysadmin set nfs_allow_preepoch_time.
4217 		 */
4218 		NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4219 		    sap->mtime.mtime.seconds);
4220 		vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4221 		vap->va_mask |= AT_MTIME;
4222 	} else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4223 		gethrestime(&vap->va_mtime);
4224 		vap->va_mask |= AT_MTIME;
4225 	}
4226 
4227 	return (0);
4228 }
4229 
4230 static ftype3 vt_to_nf3[] = {
4231 	0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4232 };
4233 
4234 static int
4235 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4236 {
4237 
4238 	ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4239 	/* Return error if time or size overflow */
4240 	if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4241 		return (EOVERFLOW);
4242 	}
4243 	fap->type = vt_to_nf3[vap->va_type];
4244 	fap->mode = (mode3)(vap->va_mode & MODEMASK);
4245 	fap->nlink = (uint32)vap->va_nlink;
4246 	if (vap->va_uid == UID_NOBODY)
4247 		fap->uid = (uid3)NFS_UID_NOBODY;
4248 	else
4249 		fap->uid = (uid3)vap->va_uid;
4250 	if (vap->va_gid == GID_NOBODY)
4251 		fap->gid = (gid3)NFS_GID_NOBODY;
4252 	else
4253 		fap->gid = (gid3)vap->va_gid;
4254 	fap->size = (size3)vap->va_size;
4255 	fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4256 	fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4257 	fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4258 	fap->fsid = (uint64)vap->va_fsid;
4259 	fap->fileid = (fileid3)vap->va_nodeid;
4260 	fap->atime.seconds = vap->va_atime.tv_sec;
4261 	fap->atime.nseconds = vap->va_atime.tv_nsec;
4262 	fap->mtime.seconds = vap->va_mtime.tv_sec;
4263 	fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4264 	fap->ctime.seconds = vap->va_ctime.tv_sec;
4265 	fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4266 	return (0);
4267 }
4268 
4269 static int
4270 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4271 {
4272 
4273 	/* Return error if time or size overflow */
4274 	if (!  (NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4275 	    NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4276 	    NFS3_SIZE_OK(vap->va_size))) {
4277 		return (EOVERFLOW);
4278 	}
4279 	wccap->size = (size3)vap->va_size;
4280 	wccap->mtime.seconds = vap->va_mtime.tv_sec;
4281 	wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4282 	wccap->ctime.seconds = vap->va_ctime.tv_sec;
4283 	wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4284 	return (0);
4285 }
4286 
4287 static void
4288 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4289 {
4290 
4291 	/* don't return attrs if time overflow */
4292 	if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4293 		poap->attributes = TRUE;
4294 	} else
4295 		poap->attributes = FALSE;
4296 }
4297 
4298 void
4299 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4300 {
4301 
4302 	/* don't return attrs if time overflow */
4303 	if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4304 		poap->attributes = TRUE;
4305 	} else
4306 		poap->attributes = FALSE;
4307 }
4308 
4309 static void
4310 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4311 {
4312 
4313 	vattr_to_pre_op_attr(bvap, &wccp->before);
4314 	vattr_to_post_op_attr(avap, &wccp->after);
4315 }
4316 
4317 void
4318 rfs3_srvrinit(void)
4319 {
4320 	struct rfs3_verf_overlay {
4321 		uint_t id; /* a "unique" identifier */
4322 		int ts; /* a unique timestamp */
4323 	} *verfp;
4324 	timestruc_t now;
4325 
4326 	/*
4327 	 * The following algorithm attempts to find a unique verifier
4328 	 * to be used as the write verifier returned from the server
4329 	 * to the client.  It is important that this verifier change
4330 	 * whenever the server reboots.  Of secondary importance, it
4331 	 * is important for the verifier to be unique between two
4332 	 * different servers.
4333 	 *
4334 	 * Thus, an attempt is made to use the system hostid and the
4335 	 * current time in seconds when the nfssrv kernel module is
4336 	 * loaded.  It is assumed that an NFS server will not be able
4337 	 * to boot and then to reboot in less than a second.  If the
4338 	 * hostid has not been set, then the current high resolution
4339 	 * time is used.  This will ensure different verifiers each
4340 	 * time the server reboots and minimize the chances that two
4341 	 * different servers will have the same verifier.
4342 	 */
4343 
4344 #ifndef	lint
4345 	/*
4346 	 * We ASSERT that this constant logic expression is
4347 	 * always true because in the past, it wasn't.
4348 	 */
4349 	ASSERT(sizeof (*verfp) <= sizeof (write3verf));
4350 #endif
4351 
4352 	gethrestime(&now);
4353 	verfp = (struct rfs3_verf_overlay *)&write3verf;
4354 	verfp->ts = (int)now.tv_sec;
4355 	verfp->id = (uint_t)nfs_atoi(hw_serial);
4356 
4357 	if (verfp->id == 0)
4358 		verfp->id = (uint_t)now.tv_nsec;
4359 
4360 	nfs3_srv_caller_id = fs_new_caller_id();
4361 
4362 }
4363 
4364 void
4365 rfs3_srvrfini(void)
4366 {
4367 	/* Nothing to do */
4368 }
4369