xref: /titanic_52/usr/src/uts/common/fs/nfs/nfs3_srv.c (revision 799823bbed51a695d01e13511bbb1369980bb714)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
24  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
25  * Copyright (c) 2013 by Delphix. All rights reserved.
26  */
27 
28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
30 
31 #include <sys/param.h>
32 #include <sys/types.h>
33 #include <sys/systm.h>
34 #include <sys/cred.h>
35 #include <sys/buf.h>
36 #include <sys/vfs.h>
37 #include <sys/vnode.h>
38 #include <sys/uio.h>
39 #include <sys/errno.h>
40 #include <sys/sysmacros.h>
41 #include <sys/statvfs.h>
42 #include <sys/kmem.h>
43 #include <sys/dirent.h>
44 #include <sys/cmn_err.h>
45 #include <sys/debug.h>
46 #include <sys/systeminfo.h>
47 #include <sys/flock.h>
48 #include <sys/nbmlock.h>
49 #include <sys/policy.h>
50 #include <sys/sdt.h>
51 
52 #include <rpc/types.h>
53 #include <rpc/auth.h>
54 #include <rpc/svc.h>
55 #include <rpc/rpc_rdma.h>
56 
57 #include <nfs/nfs.h>
58 #include <nfs/export.h>
59 #include <nfs/nfs_cmd.h>
60 
61 #include <sys/strsubr.h>
62 #include <sys/tsol/label.h>
63 #include <sys/tsol/tndb.h>
64 
65 #include <sys/zone.h>
66 
67 #include <inet/ip.h>
68 #include <inet/ip6.h>
69 
70 /*
71  * These are the interface routines for the server side of the
72  * Network File System.  See the NFS version 3 protocol specification
73  * for a description of this interface.
74  */
75 
76 static writeverf3 write3verf;
77 
78 static int	sattr3_to_vattr(sattr3 *, struct vattr *);
79 static int	vattr_to_fattr3(struct vattr *, fattr3 *);
80 static int	vattr_to_wcc_attr(struct vattr *, wcc_attr *);
81 static void	vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
82 static void	vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
83 static int	rdma_setup_read_data3(READ3args *, READ3resok *);
84 
85 extern int nfs_loaned_buffers;
86 
87 u_longlong_t nfs3_srv_caller_id;
88 
89 /* ARGSUSED */
90 void
91 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
92     struct svc_req *req, cred_t *cr, bool_t ro)
93 {
94 	int error;
95 	vnode_t *vp;
96 	struct vattr va;
97 
98 	vp = nfs3_fhtovp(&args->object, exi);
99 
100 	DTRACE_NFSV3_4(op__getattr__start, struct svc_req *, req,
101 	    cred_t *, cr, vnode_t *, vp, GETATTR3args *, args);
102 
103 	if (vp == NULL) {
104 		error = ESTALE;
105 		goto out;
106 	}
107 
108 	va.va_mask = AT_ALL;
109 	error = rfs4_delegated_getattr(vp, &va, 0, cr);
110 
111 	if (!error) {
112 		/* Lie about the object type for a referral */
113 		if (vn_is_nfs_reparse(vp, cr))
114 			va.va_type = VLNK;
115 
116 		/* overflow error if time or size is out of range */
117 		error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
118 		if (error)
119 			goto out;
120 		resp->status = NFS3_OK;
121 
122 		DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
123 		    cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
124 
125 		VN_RELE(vp);
126 
127 		return;
128 	}
129 
130 out:
131 	if (curthread->t_flag & T_WOULDBLOCK) {
132 		curthread->t_flag &= ~T_WOULDBLOCK;
133 		resp->status = NFS3ERR_JUKEBOX;
134 	} else
135 		resp->status = puterrno3(error);
136 
137 	DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
138 	    cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
139 
140 	if (vp != NULL)
141 		VN_RELE(vp);
142 }
143 
144 void *
145 rfs3_getattr_getfh(GETATTR3args *args)
146 {
147 
148 	return (&args->object);
149 }
150 
151 void
152 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
153     struct svc_req *req, cred_t *cr, bool_t ro)
154 {
155 	int error;
156 	vnode_t *vp;
157 	struct vattr *bvap;
158 	struct vattr bva;
159 	struct vattr *avap;
160 	struct vattr ava;
161 	int flag;
162 	int in_crit = 0;
163 	struct flock64 bf;
164 	caller_context_t ct;
165 
166 	bvap = NULL;
167 	avap = NULL;
168 
169 	vp = nfs3_fhtovp(&args->object, exi);
170 
171 	DTRACE_NFSV3_4(op__setattr__start, struct svc_req *, req,
172 	    cred_t *, cr, vnode_t *, vp, SETATTR3args *, args);
173 
174 	if (vp == NULL) {
175 		error = ESTALE;
176 		goto out;
177 	}
178 
179 	error = sattr3_to_vattr(&args->new_attributes, &ava);
180 	if (error)
181 		goto out;
182 
183 	if (is_system_labeled()) {
184 		bslabel_t *clabel = req->rq_label;
185 
186 		ASSERT(clabel != NULL);
187 		DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
188 		    "got client label from request(1)", struct svc_req *, req);
189 
190 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
191 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
192 			    exi)) {
193 				resp->status = NFS3ERR_ACCES;
194 				goto out1;
195 			}
196 		}
197 	}
198 
199 	/*
200 	 * We need to specially handle size changes because of
201 	 * possible conflicting NBMAND locks. Get into critical
202 	 * region before VOP_GETATTR, so the size attribute is
203 	 * valid when checking conflicts.
204 	 *
205 	 * Also, check to see if the v4 side of the server has
206 	 * delegated this file.  If so, then we return JUKEBOX to
207 	 * allow the client to retrasmit its request.
208 	 */
209 	if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
210 		if (nbl_need_check(vp)) {
211 			nbl_start_crit(vp, RW_READER);
212 			in_crit = 1;
213 		}
214 	}
215 
216 	bva.va_mask = AT_ALL;
217 	error = rfs4_delegated_getattr(vp, &bva, 0, cr);
218 
219 	/*
220 	 * If we can't get the attributes, then we can't do the
221 	 * right access checking.  So, we'll fail the request.
222 	 */
223 	if (error)
224 		goto out;
225 
226 	bvap = &bva;
227 
228 	if (rdonly(ro, vp)) {
229 		resp->status = NFS3ERR_ROFS;
230 		goto out1;
231 	}
232 
233 	if (args->guard.check &&
234 	    (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
235 	    args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
236 		resp->status = NFS3ERR_NOT_SYNC;
237 		goto out1;
238 	}
239 
240 	if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
241 		flag = ATTR_UTIME;
242 	else
243 		flag = 0;
244 
245 	/*
246 	 * If the filesystem is exported with nosuid, then mask off
247 	 * the setuid and setgid bits.
248 	 */
249 	if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
250 	    (exi->exi_export.ex_flags & EX_NOSUID))
251 		ava.va_mode &= ~(VSUID | VSGID);
252 
253 	ct.cc_sysid = 0;
254 	ct.cc_pid = 0;
255 	ct.cc_caller_id = nfs3_srv_caller_id;
256 	ct.cc_flags = CC_DONTBLOCK;
257 
258 	/*
259 	 * We need to specially handle size changes because it is
260 	 * possible for the client to create a file with modes
261 	 * which indicate read-only, but with the file opened for
262 	 * writing.  If the client then tries to set the size of
263 	 * the file, then the normal access checking done in
264 	 * VOP_SETATTR would prevent the client from doing so,
265 	 * although it should be legal for it to do so.  To get
266 	 * around this, we do the access checking for ourselves
267 	 * and then use VOP_SPACE which doesn't do the access
268 	 * checking which VOP_SETATTR does. VOP_SPACE can only
269 	 * operate on VREG files, let VOP_SETATTR handle the other
270 	 * extremely rare cases.
271 	 * Also the client should not be allowed to change the
272 	 * size of the file if there is a conflicting non-blocking
273 	 * mandatory lock in the region the change.
274 	 */
275 	if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
276 		if (in_crit) {
277 			u_offset_t offset;
278 			ssize_t length;
279 
280 			if (ava.va_size < bva.va_size) {
281 				offset = ava.va_size;
282 				length = bva.va_size - ava.va_size;
283 			} else {
284 				offset = bva.va_size;
285 				length = ava.va_size - bva.va_size;
286 			}
287 			if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
288 			    NULL)) {
289 				error = EACCES;
290 				goto out;
291 			}
292 		}
293 
294 		if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
295 			ava.va_mask &= ~AT_SIZE;
296 			bf.l_type = F_WRLCK;
297 			bf.l_whence = 0;
298 			bf.l_start = (off64_t)ava.va_size;
299 			bf.l_len = 0;
300 			bf.l_sysid = 0;
301 			bf.l_pid = 0;
302 			error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
303 			    (offset_t)ava.va_size, cr, &ct);
304 		}
305 	}
306 
307 	if (!error && ava.va_mask)
308 		error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
309 
310 	/* check if a monitor detected a delegation conflict */
311 	if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
312 		resp->status = NFS3ERR_JUKEBOX;
313 		goto out1;
314 	}
315 
316 	ava.va_mask = AT_ALL;
317 	avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
318 
319 	/*
320 	 * Force modified metadata out to stable storage.
321 	 */
322 	(void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
323 
324 	if (error)
325 		goto out;
326 
327 	if (in_crit)
328 		nbl_end_crit(vp);
329 
330 	resp->status = NFS3_OK;
331 	vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
332 
333 	DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
334 	    cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
335 
336 	VN_RELE(vp);
337 
338 	return;
339 
340 out:
341 	if (curthread->t_flag & T_WOULDBLOCK) {
342 		curthread->t_flag &= ~T_WOULDBLOCK;
343 		resp->status = NFS3ERR_JUKEBOX;
344 	} else
345 		resp->status = puterrno3(error);
346 out1:
347 	DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
348 	    cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
349 
350 	if (vp != NULL) {
351 		if (in_crit)
352 			nbl_end_crit(vp);
353 		VN_RELE(vp);
354 	}
355 	vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
356 }
357 
358 void *
359 rfs3_setattr_getfh(SETATTR3args *args)
360 {
361 
362 	return (&args->object);
363 }
364 
365 /* ARGSUSED */
366 void
367 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
368     struct svc_req *req, cred_t *cr, bool_t ro)
369 {
370 	int error;
371 	vnode_t *vp;
372 	vnode_t *dvp;
373 	struct vattr *vap;
374 	struct vattr va;
375 	struct vattr *dvap;
376 	struct vattr dva;
377 	nfs_fh3 *fhp;
378 	struct sec_ol sec = {0, 0};
379 	bool_t publicfh_flag = FALSE, auth_weak = FALSE;
380 	struct sockaddr *ca;
381 	char *name = NULL;
382 
383 	dvap = NULL;
384 
385 	/*
386 	 * Allow lookups from the root - the default
387 	 * location of the public filehandle.
388 	 */
389 	if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
390 		dvp = rootdir;
391 		VN_HOLD(dvp);
392 
393 		DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
394 		    cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
395 	} else {
396 		dvp = nfs3_fhtovp(&args->what.dir, exi);
397 
398 		DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
399 		    cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
400 
401 		if (dvp == NULL) {
402 			error = ESTALE;
403 			goto out;
404 		}
405 	}
406 
407 	dva.va_mask = AT_ALL;
408 	dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
409 
410 	if (args->what.name == nfs3nametoolong) {
411 		resp->status = NFS3ERR_NAMETOOLONG;
412 		goto out1;
413 	}
414 
415 	if (args->what.name == NULL || *(args->what.name) == '\0') {
416 		resp->status = NFS3ERR_ACCES;
417 		goto out1;
418 	}
419 
420 	fhp = &args->what.dir;
421 	if (strcmp(args->what.name, "..") == 0 &&
422 	    EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
423 		resp->status = NFS3ERR_NOENT;
424 		goto out1;
425 	}
426 
427 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
428 	name = nfscmd_convname(ca, exi, args->what.name,
429 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
430 
431 	if (name == NULL) {
432 		resp->status = NFS3ERR_ACCES;
433 		goto out1;
434 	}
435 
436 	/*
437 	 * If the public filehandle is used then allow
438 	 * a multi-component lookup
439 	 */
440 	if (PUBLIC_FH3(&args->what.dir)) {
441 		publicfh_flag = TRUE;
442 		error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
443 		    &exi, &sec);
444 		if (error && exi != NULL)
445 			exi_rele(exi); /* See comment below Re: publicfh_flag */
446 		/*
447 		 * Since WebNFS may bypass MOUNT, we need to ensure this
448 		 * request didn't come from an unlabeled admin_low client.
449 		 */
450 		if (is_system_labeled() && error == 0) {
451 			int		addr_type;
452 			void		*ipaddr;
453 			tsol_tpc_t	*tp;
454 
455 			if (ca->sa_family == AF_INET) {
456 				addr_type = IPV4_VERSION;
457 				ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
458 			} else if (ca->sa_family == AF_INET6) {
459 				addr_type = IPV6_VERSION;
460 				ipaddr = &((struct sockaddr_in6 *)
461 				    ca)->sin6_addr;
462 			}
463 			tp = find_tpc(ipaddr, addr_type, B_FALSE);
464 			if (tp == NULL || tp->tpc_tp.tp_doi !=
465 			    l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
466 			    SUN_CIPSO) {
467 				if (exi != NULL)
468 					exi_rele(exi);
469 				VN_RELE(vp);
470 				error = EACCES;
471 			}
472 			if (tp != NULL)
473 				TPC_RELE(tp);
474 		}
475 	} else {
476 		error = VOP_LOOKUP(dvp, name, &vp,
477 		    NULL, 0, NULL, cr, NULL, NULL, NULL);
478 	}
479 
480 	if (name != args->what.name)
481 		kmem_free(name, MAXPATHLEN + 1);
482 
483 	if (is_system_labeled() && error == 0) {
484 		bslabel_t *clabel = req->rq_label;
485 
486 		ASSERT(clabel != NULL);
487 		DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
488 		    "got client label from request(1)", struct svc_req *, req);
489 
490 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
491 			if (!do_rfs_label_check(clabel, dvp,
492 			    DOMINANCE_CHECK, exi)) {
493 				if (publicfh_flag && exi != NULL)
494 					exi_rele(exi);
495 				VN_RELE(vp);
496 				error = EACCES;
497 			}
498 		}
499 	}
500 
501 	dva.va_mask = AT_ALL;
502 	dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
503 
504 	if (error)
505 		goto out;
506 
507 	if (sec.sec_flags & SEC_QUERY) {
508 		error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
509 	} else {
510 		error = makefh3(&resp->resok.object, vp, exi);
511 		if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
512 			auth_weak = TRUE;
513 	}
514 
515 	/*
516 	 * If publicfh_flag is true then we have called rfs_publicfh_mclookup
517 	 * and have obtained a new exportinfo in exi which needs to be
518 	 * released. Note that the original exportinfo pointed to by exi
519 	 * will be released by the caller, common_dispatch.
520 	 */
521 	if (publicfh_flag)
522 		exi_rele(exi);
523 
524 	if (error) {
525 		VN_RELE(vp);
526 		goto out;
527 	}
528 
529 	va.va_mask = AT_ALL;
530 	vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
531 
532 	VN_RELE(vp);
533 
534 	resp->status = NFS3_OK;
535 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
536 	vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
537 
538 	/*
539 	 * If it's public fh, no 0x81, and client's flavor is
540 	 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
541 	 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
542 	 */
543 	if (auth_weak)
544 		resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
545 
546 	DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
547 	    cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
548 	VN_RELE(dvp);
549 
550 	return;
551 
552 out:
553 	if (curthread->t_flag & T_WOULDBLOCK) {
554 		curthread->t_flag &= ~T_WOULDBLOCK;
555 		resp->status = NFS3ERR_JUKEBOX;
556 	} else
557 		resp->status = puterrno3(error);
558 out1:
559 	DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
560 	    cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
561 
562 	if (dvp != NULL)
563 		VN_RELE(dvp);
564 	vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
565 
566 }
567 
568 void *
569 rfs3_lookup_getfh(LOOKUP3args *args)
570 {
571 
572 	return (&args->what.dir);
573 }
574 
575 /* ARGSUSED */
576 void
577 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
578     struct svc_req *req, cred_t *cr, bool_t ro)
579 {
580 	int error;
581 	vnode_t *vp;
582 	struct vattr *vap;
583 	struct vattr va;
584 	int checkwriteperm;
585 	boolean_t dominant_label = B_FALSE;
586 	boolean_t equal_label = B_FALSE;
587 	boolean_t admin_low_client;
588 
589 	vap = NULL;
590 
591 	vp = nfs3_fhtovp(&args->object, exi);
592 
593 	DTRACE_NFSV3_4(op__access__start, struct svc_req *, req,
594 	    cred_t *, cr, vnode_t *, vp, ACCESS3args *, args);
595 
596 	if (vp == NULL) {
597 		error = ESTALE;
598 		goto out;
599 	}
600 
601 	/*
602 	 * If the file system is exported read only, it is not appropriate
603 	 * to check write permissions for regular files and directories.
604 	 * Special files are interpreted by the client, so the underlying
605 	 * permissions are sent back to the client for interpretation.
606 	 */
607 	if (rdonly(ro, vp) && (vp->v_type == VREG || vp->v_type == VDIR))
608 		checkwriteperm = 0;
609 	else
610 		checkwriteperm = 1;
611 
612 	/*
613 	 * We need the mode so that we can correctly determine access
614 	 * permissions relative to a mandatory lock file.  Access to
615 	 * mandatory lock files is denied on the server, so it might
616 	 * as well be reflected to the server during the open.
617 	 */
618 	va.va_mask = AT_MODE;
619 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
620 	if (error)
621 		goto out;
622 
623 	vap = &va;
624 
625 	resp->resok.access = 0;
626 
627 	if (is_system_labeled()) {
628 		bslabel_t *clabel = req->rq_label;
629 
630 		ASSERT(clabel != NULL);
631 		DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
632 		    "got client label from request(1)", struct svc_req *, req);
633 
634 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
635 			if ((equal_label = do_rfs_label_check(clabel, vp,
636 			    EQUALITY_CHECK, exi)) == B_FALSE) {
637 				dominant_label = do_rfs_label_check(clabel,
638 				    vp, DOMINANCE_CHECK, exi);
639 			} else
640 				dominant_label = B_TRUE;
641 			admin_low_client = B_FALSE;
642 		} else
643 			admin_low_client = B_TRUE;
644 	}
645 
646 	if (args->access & ACCESS3_READ) {
647 		error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
648 		if (error) {
649 			if (curthread->t_flag & T_WOULDBLOCK)
650 				goto out;
651 		} else if (!MANDLOCK(vp, va.va_mode) &&
652 		    (!is_system_labeled() || admin_low_client ||
653 		    dominant_label))
654 			resp->resok.access |= ACCESS3_READ;
655 	}
656 	if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
657 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
658 		if (error) {
659 			if (curthread->t_flag & T_WOULDBLOCK)
660 				goto out;
661 		} else if (!is_system_labeled() || admin_low_client ||
662 		    dominant_label)
663 			resp->resok.access |= ACCESS3_LOOKUP;
664 	}
665 	if (checkwriteperm &&
666 	    (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
667 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
668 		if (error) {
669 			if (curthread->t_flag & T_WOULDBLOCK)
670 				goto out;
671 		} else if (!MANDLOCK(vp, va.va_mode) &&
672 		    (!is_system_labeled() || admin_low_client || equal_label)) {
673 			resp->resok.access |=
674 			    (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
675 		}
676 	}
677 	if (checkwriteperm &&
678 	    (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
679 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
680 		if (error) {
681 			if (curthread->t_flag & T_WOULDBLOCK)
682 				goto out;
683 		} else if (!is_system_labeled() || admin_low_client ||
684 		    equal_label)
685 			resp->resok.access |= ACCESS3_DELETE;
686 	}
687 	if (args->access & ACCESS3_EXECUTE) {
688 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
689 		if (error) {
690 			if (curthread->t_flag & T_WOULDBLOCK)
691 				goto out;
692 		} else if (!MANDLOCK(vp, va.va_mode) &&
693 		    (!is_system_labeled() || admin_low_client ||
694 		    dominant_label))
695 			resp->resok.access |= ACCESS3_EXECUTE;
696 	}
697 
698 	va.va_mask = AT_ALL;
699 	vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
700 
701 	resp->status = NFS3_OK;
702 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
703 
704 	DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
705 	    cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
706 
707 	VN_RELE(vp);
708 
709 	return;
710 
711 out:
712 	if (curthread->t_flag & T_WOULDBLOCK) {
713 		curthread->t_flag &= ~T_WOULDBLOCK;
714 		resp->status = NFS3ERR_JUKEBOX;
715 	} else
716 		resp->status = puterrno3(error);
717 	DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
718 	    cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
719 	if (vp != NULL)
720 		VN_RELE(vp);
721 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
722 }
723 
724 void *
725 rfs3_access_getfh(ACCESS3args *args)
726 {
727 
728 	return (&args->object);
729 }
730 
731 /* ARGSUSED */
732 void
733 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
734     struct svc_req *req, cred_t *cr, bool_t ro)
735 {
736 	int error;
737 	vnode_t *vp;
738 	struct vattr *vap;
739 	struct vattr va;
740 	struct iovec iov;
741 	struct uio uio;
742 	char *data;
743 	struct sockaddr *ca;
744 	char *name = NULL;
745 	int is_referral = 0;
746 
747 	vap = NULL;
748 
749 	vp = nfs3_fhtovp(&args->symlink, exi);
750 
751 	DTRACE_NFSV3_4(op__readlink__start, struct svc_req *, req,
752 	    cred_t *, cr, vnode_t *, vp, READLINK3args *, args);
753 
754 	if (vp == NULL) {
755 		error = ESTALE;
756 		goto out;
757 	}
758 
759 	va.va_mask = AT_ALL;
760 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
761 	if (error)
762 		goto out;
763 
764 	vap = &va;
765 
766 	/* We lied about the object type for a referral */
767 	if (vn_is_nfs_reparse(vp, cr))
768 		is_referral = 1;
769 
770 	if (vp->v_type != VLNK && !is_referral) {
771 		resp->status = NFS3ERR_INVAL;
772 		goto out1;
773 	}
774 
775 	if (MANDLOCK(vp, va.va_mode)) {
776 		resp->status = NFS3ERR_ACCES;
777 		goto out1;
778 	}
779 
780 	if (is_system_labeled()) {
781 		bslabel_t *clabel = req->rq_label;
782 
783 		ASSERT(clabel != NULL);
784 		DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
785 		    "got client label from request(1)", struct svc_req *, req);
786 
787 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
788 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
789 			    exi)) {
790 				resp->status = NFS3ERR_ACCES;
791 				goto out1;
792 			}
793 		}
794 	}
795 
796 	data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
797 
798 	if (is_referral) {
799 		char *s;
800 		size_t strsz;
801 
802 		/* Get an artificial symlink based on a referral */
803 		s = build_symlink(vp, cr, &strsz);
804 		global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
805 		DTRACE_PROBE2(nfs3serv__func__referral__reflink,
806 		    vnode_t *, vp, char *, s);
807 		if (s == NULL)
808 			error = EINVAL;
809 		else {
810 			error = 0;
811 			(void) strlcpy(data, s, MAXPATHLEN + 1);
812 			kmem_free(s, strsz);
813 		}
814 
815 	} else {
816 
817 		iov.iov_base = data;
818 		iov.iov_len = MAXPATHLEN;
819 		uio.uio_iov = &iov;
820 		uio.uio_iovcnt = 1;
821 		uio.uio_segflg = UIO_SYSSPACE;
822 		uio.uio_extflg = UIO_COPY_CACHED;
823 		uio.uio_loffset = 0;
824 		uio.uio_resid = MAXPATHLEN;
825 
826 		error = VOP_READLINK(vp, &uio, cr, NULL);
827 
828 		if (!error)
829 			*(data + MAXPATHLEN - uio.uio_resid) = '\0';
830 	}
831 
832 	va.va_mask = AT_ALL;
833 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
834 
835 	/* Lie about object type again just to be consistent */
836 	if (is_referral && vap != NULL)
837 		vap->va_type = VLNK;
838 
839 #if 0 /* notyet */
840 	/*
841 	 * Don't do this.  It causes local disk writes when just
842 	 * reading the file and the overhead is deemed larger
843 	 * than the benefit.
844 	 */
845 	/*
846 	 * Force modified metadata out to stable storage.
847 	 */
848 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
849 #endif
850 
851 	if (error) {
852 		kmem_free(data, MAXPATHLEN + 1);
853 		goto out;
854 	}
855 
856 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
857 	name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
858 	    MAXPATHLEN + 1);
859 
860 	if (name == NULL) {
861 		/*
862 		 * Even though the conversion failed, we return
863 		 * something. We just don't translate it.
864 		 */
865 		name = data;
866 	}
867 
868 	resp->status = NFS3_OK;
869 	vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
870 	resp->resok.data = name;
871 
872 	DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
873 	    cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
874 	VN_RELE(vp);
875 
876 	if (name != data)
877 		kmem_free(data, MAXPATHLEN + 1);
878 
879 	return;
880 
881 out:
882 	if (curthread->t_flag & T_WOULDBLOCK) {
883 		curthread->t_flag &= ~T_WOULDBLOCK;
884 		resp->status = NFS3ERR_JUKEBOX;
885 	} else
886 		resp->status = puterrno3(error);
887 out1:
888 	DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
889 	    cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
890 	if (vp != NULL)
891 		VN_RELE(vp);
892 	vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
893 }
894 
895 void *
896 rfs3_readlink_getfh(READLINK3args *args)
897 {
898 
899 	return (&args->symlink);
900 }
901 
902 void
903 rfs3_readlink_free(READLINK3res *resp)
904 {
905 
906 	if (resp->status == NFS3_OK)
907 		kmem_free(resp->resok.data, MAXPATHLEN + 1);
908 }
909 
910 /*
911  * Server routine to handle read
912  * May handle RDMA data as well as mblks
913  */
914 /* ARGSUSED */
915 void
916 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
917     struct svc_req *req, cred_t *cr, bool_t ro)
918 {
919 	int error;
920 	vnode_t *vp;
921 	struct vattr *vap;
922 	struct vattr va;
923 	struct iovec iov, *iovp = NULL;
924 	int iovcnt;
925 	struct uio uio;
926 	u_offset_t offset;
927 	mblk_t *mp = NULL;
928 	int in_crit = 0;
929 	int need_rwunlock = 0;
930 	caller_context_t ct;
931 	int rdma_used = 0;
932 	int loaned_buffers;
933 	struct uio *uiop;
934 
935 	vap = NULL;
936 
937 	vp = nfs3_fhtovp(&args->file, exi);
938 
939 	DTRACE_NFSV3_4(op__read__start, struct svc_req *, req,
940 	    cred_t *, cr, vnode_t *, vp, READ3args *, args);
941 
942 	if (vp == NULL) {
943 		error = ESTALE;
944 		goto out;
945 	}
946 
947 	if (args->wlist) {
948 		if (args->count > clist_len(args->wlist)) {
949 			error = EINVAL;
950 			goto out;
951 		}
952 		rdma_used = 1;
953 	}
954 
955 	/* use loaned buffers for TCP */
956 	loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
957 
958 	if (is_system_labeled()) {
959 		bslabel_t *clabel = req->rq_label;
960 
961 		ASSERT(clabel != NULL);
962 		DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
963 		    "got client label from request(1)", struct svc_req *, req);
964 
965 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
966 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
967 			    exi)) {
968 				resp->status = NFS3ERR_ACCES;
969 				goto out1;
970 			}
971 		}
972 	}
973 
974 	ct.cc_sysid = 0;
975 	ct.cc_pid = 0;
976 	ct.cc_caller_id = nfs3_srv_caller_id;
977 	ct.cc_flags = CC_DONTBLOCK;
978 
979 	/*
980 	 * Enter the critical region before calling VOP_RWLOCK
981 	 * to avoid a deadlock with write requests.
982 	 */
983 	if (nbl_need_check(vp)) {
984 		nbl_start_crit(vp, RW_READER);
985 		in_crit = 1;
986 		if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
987 		    NULL)) {
988 			error = EACCES;
989 			goto out;
990 		}
991 	}
992 
993 	error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
994 
995 	/* check if a monitor detected a delegation conflict */
996 	if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
997 		resp->status = NFS3ERR_JUKEBOX;
998 		goto out1;
999 	}
1000 
1001 	need_rwunlock = 1;
1002 
1003 	va.va_mask = AT_ALL;
1004 	error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1005 
1006 	/*
1007 	 * If we can't get the attributes, then we can't do the
1008 	 * right access checking.  So, we'll fail the request.
1009 	 */
1010 	if (error)
1011 		goto out;
1012 
1013 	vap = &va;
1014 
1015 	if (vp->v_type != VREG) {
1016 		resp->status = NFS3ERR_INVAL;
1017 		goto out1;
1018 	}
1019 
1020 	if (crgetuid(cr) != va.va_uid) {
1021 		error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1022 		if (error) {
1023 			if (curthread->t_flag & T_WOULDBLOCK)
1024 				goto out;
1025 			error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1026 			if (error)
1027 				goto out;
1028 		}
1029 	}
1030 
1031 	if (MANDLOCK(vp, va.va_mode)) {
1032 		resp->status = NFS3ERR_ACCES;
1033 		goto out1;
1034 	}
1035 
1036 	offset = args->offset;
1037 	if (offset >= va.va_size) {
1038 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1039 		if (in_crit)
1040 			nbl_end_crit(vp);
1041 		resp->status = NFS3_OK;
1042 		vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1043 		resp->resok.count = 0;
1044 		resp->resok.eof = TRUE;
1045 		resp->resok.data.data_len = 0;
1046 		resp->resok.data.data_val = NULL;
1047 		resp->resok.data.mp = NULL;
1048 		/* RDMA */
1049 		resp->resok.wlist = args->wlist;
1050 		resp->resok.wlist_len = resp->resok.count;
1051 		if (resp->resok.wlist)
1052 			clist_zero_len(resp->resok.wlist);
1053 		goto done;
1054 	}
1055 
1056 	if (args->count == 0) {
1057 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1058 		if (in_crit)
1059 			nbl_end_crit(vp);
1060 		resp->status = NFS3_OK;
1061 		vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1062 		resp->resok.count = 0;
1063 		resp->resok.eof = FALSE;
1064 		resp->resok.data.data_len = 0;
1065 		resp->resok.data.data_val = NULL;
1066 		resp->resok.data.mp = NULL;
1067 		/* RDMA */
1068 		resp->resok.wlist = args->wlist;
1069 		resp->resok.wlist_len = resp->resok.count;
1070 		if (resp->resok.wlist)
1071 			clist_zero_len(resp->resok.wlist);
1072 		goto done;
1073 	}
1074 
1075 	/*
1076 	 * do not allocate memory more the max. allowed
1077 	 * transfer size
1078 	 */
1079 	if (args->count > rfs3_tsize(req))
1080 		args->count = rfs3_tsize(req);
1081 
1082 	if (loaned_buffers) {
1083 		uiop = (uio_t *)rfs_setup_xuio(vp);
1084 		ASSERT(uiop != NULL);
1085 		uiop->uio_segflg = UIO_SYSSPACE;
1086 		uiop->uio_loffset = args->offset;
1087 		uiop->uio_resid = args->count;
1088 
1089 		/* Jump to do the read if successful */
1090 		if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1091 			/*
1092 			 * Need to hold the vnode until after VOP_RETZCBUF()
1093 			 * is called.
1094 			 */
1095 			VN_HOLD(vp);
1096 			goto doio_read;
1097 		}
1098 
1099 		DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1100 		    uiop->uio_loffset, int, uiop->uio_resid);
1101 
1102 		uiop->uio_extflg = 0;
1103 		/* failure to setup for zero copy */
1104 		rfs_free_xuio((void *)uiop);
1105 		loaned_buffers = 0;
1106 	}
1107 
1108 	/*
1109 	 * If returning data via RDMA Write, then grab the chunk list.
1110 	 * If we aren't returning READ data w/RDMA_WRITE, then grab
1111 	 * a mblk.
1112 	 */
1113 	if (rdma_used) {
1114 		(void) rdma_get_wchunk(req, &iov, args->wlist);
1115 		uio.uio_iov = &iov;
1116 		uio.uio_iovcnt = 1;
1117 	} else {
1118 		/*
1119 		 * mp will contain the data to be sent out in the read reply.
1120 		 * For UDP, this will be freed after the reply has been sent
1121 		 * out by the driver.  For TCP, it will be freed after the last
1122 		 * segment associated with the reply has been ACKed by the
1123 		 * client.
1124 		 */
1125 		mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
1126 		uio.uio_iov = iovp;
1127 		uio.uio_iovcnt = iovcnt;
1128 	}
1129 
1130 	uio.uio_segflg = UIO_SYSSPACE;
1131 	uio.uio_extflg = UIO_COPY_CACHED;
1132 	uio.uio_loffset = args->offset;
1133 	uio.uio_resid = args->count;
1134 	uiop = &uio;
1135 
1136 doio_read:
1137 	error = VOP_READ(vp, uiop, 0, cr, &ct);
1138 
1139 	if (error) {
1140 		if (mp)
1141 			freemsg(mp);
1142 		/* check if a monitor detected a delegation conflict */
1143 		if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1144 			resp->status = NFS3ERR_JUKEBOX;
1145 			goto out1;
1146 		}
1147 		goto out;
1148 	}
1149 
1150 	/* make mblk using zc buffers */
1151 	if (loaned_buffers) {
1152 		mp = uio_to_mblk(uiop);
1153 		ASSERT(mp != NULL);
1154 	}
1155 
1156 	va.va_mask = AT_ALL;
1157 	error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1158 
1159 	if (error)
1160 		vap = NULL;
1161 	else
1162 		vap = &va;
1163 
1164 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1165 
1166 	if (in_crit)
1167 		nbl_end_crit(vp);
1168 
1169 	resp->status = NFS3_OK;
1170 	vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1171 	resp->resok.count = args->count - uiop->uio_resid;
1172 	if (!error && offset + resp->resok.count == va.va_size)
1173 		resp->resok.eof = TRUE;
1174 	else
1175 		resp->resok.eof = FALSE;
1176 	resp->resok.data.data_len = resp->resok.count;
1177 
1178 	if (mp)
1179 		rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1180 
1181 	resp->resok.data.mp = mp;
1182 	resp->resok.size = (uint_t)args->count;
1183 
1184 	if (rdma_used) {
1185 		resp->resok.data.data_val = (caddr_t)iov.iov_base;
1186 		if (!rdma_setup_read_data3(args, &(resp->resok))) {
1187 			resp->status = NFS3ERR_INVAL;
1188 		}
1189 	} else {
1190 		resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1191 		(resp->resok).wlist = NULL;
1192 	}
1193 
1194 done:
1195 	DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1196 	    cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1197 
1198 	VN_RELE(vp);
1199 
1200 	if (iovp != NULL)
1201 		kmem_free(iovp, iovcnt * sizeof (struct iovec));
1202 
1203 	return;
1204 
1205 out:
1206 	if (curthread->t_flag & T_WOULDBLOCK) {
1207 		curthread->t_flag &= ~T_WOULDBLOCK;
1208 		resp->status = NFS3ERR_JUKEBOX;
1209 	} else
1210 		resp->status = puterrno3(error);
1211 out1:
1212 	DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1213 	    cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1214 
1215 	if (vp != NULL) {
1216 		if (need_rwunlock)
1217 			VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1218 		if (in_crit)
1219 			nbl_end_crit(vp);
1220 		VN_RELE(vp);
1221 	}
1222 	vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1223 
1224 	if (iovp != NULL)
1225 		kmem_free(iovp, iovcnt * sizeof (struct iovec));
1226 }
1227 
1228 void
1229 rfs3_read_free(READ3res *resp)
1230 {
1231 	mblk_t *mp;
1232 
1233 	if (resp->status == NFS3_OK) {
1234 		mp = resp->resok.data.mp;
1235 		if (mp != NULL)
1236 			freemsg(mp);
1237 	}
1238 }
1239 
1240 void *
1241 rfs3_read_getfh(READ3args *args)
1242 {
1243 
1244 	return (&args->file);
1245 }
1246 
1247 #define	MAX_IOVECS	12
1248 
1249 #ifdef DEBUG
1250 static int rfs3_write_hits = 0;
1251 static int rfs3_write_misses = 0;
1252 #endif
1253 
1254 void
1255 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1256     struct svc_req *req, cred_t *cr, bool_t ro)
1257 {
1258 	int error;
1259 	vnode_t *vp;
1260 	struct vattr *bvap = NULL;
1261 	struct vattr bva;
1262 	struct vattr *avap = NULL;
1263 	struct vattr ava;
1264 	u_offset_t rlimit;
1265 	struct uio uio;
1266 	struct iovec iov[MAX_IOVECS];
1267 	mblk_t *m;
1268 	struct iovec *iovp;
1269 	int iovcnt;
1270 	int ioflag;
1271 	cred_t *savecred;
1272 	int in_crit = 0;
1273 	int rwlock_ret = -1;
1274 	caller_context_t ct;
1275 
1276 	vp = nfs3_fhtovp(&args->file, exi);
1277 
1278 	DTRACE_NFSV3_4(op__write__start, struct svc_req *, req,
1279 	    cred_t *, cr, vnode_t *, vp, WRITE3args *, args);
1280 
1281 	if (vp == NULL) {
1282 		error = ESTALE;
1283 		goto err;
1284 	}
1285 
1286 	if (is_system_labeled()) {
1287 		bslabel_t *clabel = req->rq_label;
1288 
1289 		ASSERT(clabel != NULL);
1290 		DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1291 		    "got client label from request(1)", struct svc_req *, req);
1292 
1293 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1294 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1295 			    exi)) {
1296 				resp->status = NFS3ERR_ACCES;
1297 				goto err1;
1298 			}
1299 		}
1300 	}
1301 
1302 	ct.cc_sysid = 0;
1303 	ct.cc_pid = 0;
1304 	ct.cc_caller_id = nfs3_srv_caller_id;
1305 	ct.cc_flags = CC_DONTBLOCK;
1306 
1307 	/*
1308 	 * We have to enter the critical region before calling VOP_RWLOCK
1309 	 * to avoid a deadlock with ufs.
1310 	 */
1311 	if (nbl_need_check(vp)) {
1312 		nbl_start_crit(vp, RW_READER);
1313 		in_crit = 1;
1314 		if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1315 		    NULL)) {
1316 			error = EACCES;
1317 			goto err;
1318 		}
1319 	}
1320 
1321 	rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1322 
1323 	/* check if a monitor detected a delegation conflict */
1324 	if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1325 		resp->status = NFS3ERR_JUKEBOX;
1326 		rwlock_ret = -1;
1327 		goto err1;
1328 	}
1329 
1330 
1331 	bva.va_mask = AT_ALL;
1332 	error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1333 
1334 	/*
1335 	 * If we can't get the attributes, then we can't do the
1336 	 * right access checking.  So, we'll fail the request.
1337 	 */
1338 	if (error)
1339 		goto err;
1340 
1341 	bvap = &bva;
1342 	avap = bvap;
1343 
1344 	if (args->count != args->data.data_len) {
1345 		resp->status = NFS3ERR_INVAL;
1346 		goto err1;
1347 	}
1348 
1349 	if (rdonly(ro, vp)) {
1350 		resp->status = NFS3ERR_ROFS;
1351 		goto err1;
1352 	}
1353 
1354 	if (vp->v_type != VREG) {
1355 		resp->status = NFS3ERR_INVAL;
1356 		goto err1;
1357 	}
1358 
1359 	if (crgetuid(cr) != bva.va_uid &&
1360 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1361 		goto err;
1362 
1363 	if (MANDLOCK(vp, bva.va_mode)) {
1364 		resp->status = NFS3ERR_ACCES;
1365 		goto err1;
1366 	}
1367 
1368 	if (args->count == 0) {
1369 		resp->status = NFS3_OK;
1370 		vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1371 		resp->resok.count = 0;
1372 		resp->resok.committed = args->stable;
1373 		resp->resok.verf = write3verf;
1374 		goto out;
1375 	}
1376 
1377 	if (args->mblk != NULL) {
1378 		iovcnt = 0;
1379 		for (m = args->mblk; m != NULL; m = m->b_cont)
1380 			iovcnt++;
1381 		if (iovcnt <= MAX_IOVECS) {
1382 #ifdef DEBUG
1383 			rfs3_write_hits++;
1384 #endif
1385 			iovp = iov;
1386 		} else {
1387 #ifdef DEBUG
1388 			rfs3_write_misses++;
1389 #endif
1390 			iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1391 		}
1392 		mblk_to_iov(args->mblk, iovcnt, iovp);
1393 
1394 	} else if (args->rlist != NULL) {
1395 		iovcnt = 1;
1396 		iovp = iov;
1397 		iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1398 		iovp->iov_len = args->count;
1399 	} else {
1400 		iovcnt = 1;
1401 		iovp = iov;
1402 		iovp->iov_base = args->data.data_val;
1403 		iovp->iov_len = args->count;
1404 	}
1405 
1406 	uio.uio_iov = iovp;
1407 	uio.uio_iovcnt = iovcnt;
1408 
1409 	uio.uio_segflg = UIO_SYSSPACE;
1410 	uio.uio_extflg = UIO_COPY_DEFAULT;
1411 	uio.uio_loffset = args->offset;
1412 	uio.uio_resid = args->count;
1413 	uio.uio_llimit = curproc->p_fsz_ctl;
1414 	rlimit = uio.uio_llimit - args->offset;
1415 	if (rlimit < (u_offset_t)uio.uio_resid)
1416 		uio.uio_resid = (int)rlimit;
1417 
1418 	if (args->stable == UNSTABLE)
1419 		ioflag = 0;
1420 	else if (args->stable == FILE_SYNC)
1421 		ioflag = FSYNC;
1422 	else if (args->stable == DATA_SYNC)
1423 		ioflag = FDSYNC;
1424 	else {
1425 		if (iovp != iov)
1426 			kmem_free(iovp, sizeof (*iovp) * iovcnt);
1427 		resp->status = NFS3ERR_INVAL;
1428 		goto err1;
1429 	}
1430 
1431 	/*
1432 	 * We're changing creds because VM may fault and we need
1433 	 * the cred of the current thread to be used if quota
1434 	 * checking is enabled.
1435 	 */
1436 	savecred = curthread->t_cred;
1437 	curthread->t_cred = cr;
1438 	error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1439 	curthread->t_cred = savecred;
1440 
1441 	if (iovp != iov)
1442 		kmem_free(iovp, sizeof (*iovp) * iovcnt);
1443 
1444 	/* check if a monitor detected a delegation conflict */
1445 	if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1446 		resp->status = NFS3ERR_JUKEBOX;
1447 		goto err1;
1448 	}
1449 
1450 	ava.va_mask = AT_ALL;
1451 	avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1452 
1453 	if (error)
1454 		goto err;
1455 
1456 	/*
1457 	 * If we were unable to get the V_WRITELOCK_TRUE, then we
1458 	 * may not have accurate after attrs, so check if
1459 	 * we have both attributes, they have a non-zero va_seq, and
1460 	 * va_seq has changed by exactly one,
1461 	 * if not, turn off the before attr.
1462 	 */
1463 	if (rwlock_ret != V_WRITELOCK_TRUE) {
1464 		if (bvap == NULL || avap == NULL ||
1465 		    bvap->va_seq == 0 || avap->va_seq == 0 ||
1466 		    avap->va_seq != (bvap->va_seq + 1)) {
1467 			bvap = NULL;
1468 		}
1469 	}
1470 
1471 	resp->status = NFS3_OK;
1472 	vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1473 	resp->resok.count = args->count - uio.uio_resid;
1474 	resp->resok.committed = args->stable;
1475 	resp->resok.verf = write3verf;
1476 	goto out;
1477 
1478 err:
1479 	if (curthread->t_flag & T_WOULDBLOCK) {
1480 		curthread->t_flag &= ~T_WOULDBLOCK;
1481 		resp->status = NFS3ERR_JUKEBOX;
1482 	} else
1483 		resp->status = puterrno3(error);
1484 err1:
1485 	vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1486 out:
1487 	DTRACE_NFSV3_4(op__write__done, struct svc_req *, req,
1488 	    cred_t *, cr, vnode_t *, vp, WRITE3res *, resp);
1489 
1490 	if (vp != NULL) {
1491 		if (rwlock_ret != -1)
1492 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1493 		if (in_crit)
1494 			nbl_end_crit(vp);
1495 		VN_RELE(vp);
1496 	}
1497 }
1498 
1499 void *
1500 rfs3_write_getfh(WRITE3args *args)
1501 {
1502 
1503 	return (&args->file);
1504 }
1505 
1506 void
1507 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1508     struct svc_req *req, cred_t *cr, bool_t ro)
1509 {
1510 	int error;
1511 	int in_crit = 0;
1512 	vnode_t *vp;
1513 	vnode_t *tvp = NULL;
1514 	vnode_t *dvp;
1515 	struct vattr *vap;
1516 	struct vattr va;
1517 	struct vattr *dbvap;
1518 	struct vattr dbva;
1519 	struct vattr *davap;
1520 	struct vattr dava;
1521 	enum vcexcl excl;
1522 	nfstime3 *mtime;
1523 	len_t reqsize;
1524 	bool_t trunc;
1525 	struct sockaddr *ca;
1526 	char *name = NULL;
1527 
1528 	dbvap = NULL;
1529 	davap = NULL;
1530 
1531 	dvp = nfs3_fhtovp(&args->where.dir, exi);
1532 
1533 	DTRACE_NFSV3_4(op__create__start, struct svc_req *, req,
1534 	    cred_t *, cr, vnode_t *, dvp, CREATE3args *, args);
1535 
1536 	if (dvp == NULL) {
1537 		error = ESTALE;
1538 		goto out;
1539 	}
1540 
1541 	dbva.va_mask = AT_ALL;
1542 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1543 	davap = dbvap;
1544 
1545 	if (args->where.name == nfs3nametoolong) {
1546 		resp->status = NFS3ERR_NAMETOOLONG;
1547 		goto out1;
1548 	}
1549 
1550 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1551 		resp->status = NFS3ERR_ACCES;
1552 		goto out1;
1553 	}
1554 
1555 	if (rdonly(ro, dvp)) {
1556 		resp->status = NFS3ERR_ROFS;
1557 		goto out1;
1558 	}
1559 
1560 	if (is_system_labeled()) {
1561 		bslabel_t *clabel = req->rq_label;
1562 
1563 		ASSERT(clabel != NULL);
1564 		DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1565 		    "got client label from request(1)", struct svc_req *, req);
1566 
1567 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1568 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1569 			    exi)) {
1570 				resp->status = NFS3ERR_ACCES;
1571 				goto out1;
1572 			}
1573 		}
1574 	}
1575 
1576 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1577 	name = nfscmd_convname(ca, exi, args->where.name,
1578 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1579 
1580 	if (name == NULL) {
1581 		/* This is really a Solaris EILSEQ */
1582 		resp->status = NFS3ERR_INVAL;
1583 		goto out1;
1584 	}
1585 
1586 	if (args->how.mode == EXCLUSIVE) {
1587 		va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1588 		va.va_type = VREG;
1589 		va.va_mode = (mode_t)0;
1590 		/*
1591 		 * Ensure no time overflows and that types match
1592 		 */
1593 		mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1594 		va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1595 		va.va_mtime.tv_nsec = mtime->nseconds;
1596 		excl = EXCL;
1597 	} else {
1598 		error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1599 		    &va);
1600 		if (error)
1601 			goto out;
1602 		va.va_mask |= AT_TYPE;
1603 		va.va_type = VREG;
1604 		if (args->how.mode == GUARDED)
1605 			excl = EXCL;
1606 		else {
1607 			excl = NONEXCL;
1608 
1609 			/*
1610 			 * During creation of file in non-exclusive mode
1611 			 * if size of file is being set then make sure
1612 			 * that if the file already exists that no conflicting
1613 			 * non-blocking mandatory locks exists in the region
1614 			 * being modified. If there are conflicting locks fail
1615 			 * the operation with EACCES.
1616 			 */
1617 			if (va.va_mask & AT_SIZE) {
1618 				struct vattr tva;
1619 
1620 				/*
1621 				 * Does file already exist?
1622 				 */
1623 				error = VOP_LOOKUP(dvp, name, &tvp,
1624 				    NULL, 0, NULL, cr, NULL, NULL, NULL);
1625 
1626 				/*
1627 				 * Check to see if the file has been delegated
1628 				 * to a v4 client.  If so, then begin recall of
1629 				 * the delegation and return JUKEBOX to allow
1630 				 * the client to retrasmit its request.
1631 				 */
1632 
1633 				trunc = va.va_size == 0;
1634 				if (!error &&
1635 				    rfs4_check_delegated(FWRITE, tvp, trunc)) {
1636 					resp->status = NFS3ERR_JUKEBOX;
1637 					goto out1;
1638 				}
1639 
1640 				/*
1641 				 * Check for NBMAND lock conflicts
1642 				 */
1643 				if (!error && nbl_need_check(tvp)) {
1644 					u_offset_t offset;
1645 					ssize_t len;
1646 
1647 					nbl_start_crit(tvp, RW_READER);
1648 					in_crit = 1;
1649 
1650 					tva.va_mask = AT_SIZE;
1651 					error = VOP_GETATTR(tvp, &tva, 0, cr,
1652 					    NULL);
1653 					/*
1654 					 * Can't check for conflicts, so return
1655 					 * error.
1656 					 */
1657 					if (error)
1658 						goto out;
1659 
1660 					offset = tva.va_size < va.va_size ?
1661 					    tva.va_size : va.va_size;
1662 					len = tva.va_size < va.va_size ?
1663 					    va.va_size - tva.va_size :
1664 					    tva.va_size - va.va_size;
1665 					if (nbl_conflict(tvp, NBL_WRITE,
1666 					    offset, len, 0, NULL)) {
1667 						error = EACCES;
1668 						goto out;
1669 					}
1670 				} else if (tvp) {
1671 					VN_RELE(tvp);
1672 					tvp = NULL;
1673 				}
1674 			}
1675 		}
1676 		if (va.va_mask & AT_SIZE)
1677 			reqsize = va.va_size;
1678 	}
1679 
1680 	/*
1681 	 * Must specify the mode.
1682 	 */
1683 	if (!(va.va_mask & AT_MODE)) {
1684 		resp->status = NFS3ERR_INVAL;
1685 		goto out1;
1686 	}
1687 
1688 	/*
1689 	 * If the filesystem is exported with nosuid, then mask off
1690 	 * the setuid and setgid bits.
1691 	 */
1692 	if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1693 		va.va_mode &= ~(VSUID | VSGID);
1694 
1695 tryagain:
1696 	/*
1697 	 * The file open mode used is VWRITE.  If the client needs
1698 	 * some other semantic, then it should do the access checking
1699 	 * itself.  It would have been nice to have the file open mode
1700 	 * passed as part of the arguments.
1701 	 */
1702 	error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1703 	    &vp, cr, 0, NULL, NULL);
1704 
1705 	dava.va_mask = AT_ALL;
1706 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1707 
1708 	if (error) {
1709 		/*
1710 		 * If we got something other than file already exists
1711 		 * then just return this error.  Otherwise, we got
1712 		 * EEXIST.  If we were doing a GUARDED create, then
1713 		 * just return this error.  Otherwise, we need to
1714 		 * make sure that this wasn't a duplicate of an
1715 		 * exclusive create request.
1716 		 *
1717 		 * The assumption is made that a non-exclusive create
1718 		 * request will never return EEXIST.
1719 		 */
1720 		if (error != EEXIST || args->how.mode == GUARDED)
1721 			goto out;
1722 		/*
1723 		 * Lookup the file so that we can get a vnode for it.
1724 		 */
1725 		error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1726 		    NULL, cr, NULL, NULL, NULL);
1727 		if (error) {
1728 			/*
1729 			 * We couldn't find the file that we thought that
1730 			 * we just created.  So, we'll just try creating
1731 			 * it again.
1732 			 */
1733 			if (error == ENOENT)
1734 				goto tryagain;
1735 			goto out;
1736 		}
1737 
1738 		/*
1739 		 * If the file is delegated to a v4 client, go ahead
1740 		 * and initiate recall, this create is a hint that a
1741 		 * conflicting v3 open has occurred.
1742 		 */
1743 
1744 		if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1745 			VN_RELE(vp);
1746 			resp->status = NFS3ERR_JUKEBOX;
1747 			goto out1;
1748 		}
1749 
1750 		va.va_mask = AT_ALL;
1751 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1752 
1753 		mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1754 		/* % with INT32_MAX to prevent overflows */
1755 		if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1756 		    vap->va_mtime.tv_sec !=
1757 		    (mtime->seconds % INT32_MAX) ||
1758 		    vap->va_mtime.tv_nsec != mtime->nseconds)) {
1759 			VN_RELE(vp);
1760 			error = EEXIST;
1761 			goto out;
1762 		}
1763 	} else {
1764 
1765 		if ((args->how.mode == UNCHECKED ||
1766 		    args->how.mode == GUARDED) &&
1767 		    args->how.createhow3_u.obj_attributes.size.set_it &&
1768 		    va.va_size == 0)
1769 			trunc = TRUE;
1770 		else
1771 			trunc = FALSE;
1772 
1773 		if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1774 			VN_RELE(vp);
1775 			resp->status = NFS3ERR_JUKEBOX;
1776 			goto out1;
1777 		}
1778 
1779 		va.va_mask = AT_ALL;
1780 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1781 
1782 		/*
1783 		 * We need to check to make sure that the file got
1784 		 * created to the indicated size.  If not, we do a
1785 		 * setattr to try to change the size, but we don't
1786 		 * try too hard.  This shouldn't a problem as most
1787 		 * clients will only specifiy a size of zero which
1788 		 * local file systems handle.  However, even if
1789 		 * the client does specify a non-zero size, it can
1790 		 * still recover by checking the size of the file
1791 		 * after it has created it and then issue a setattr
1792 		 * request of its own to set the size of the file.
1793 		 */
1794 		if (vap != NULL &&
1795 		    (args->how.mode == UNCHECKED ||
1796 		    args->how.mode == GUARDED) &&
1797 		    args->how.createhow3_u.obj_attributes.size.set_it &&
1798 		    vap->va_size != reqsize) {
1799 			va.va_mask = AT_SIZE;
1800 			va.va_size = reqsize;
1801 			(void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1802 			va.va_mask = AT_ALL;
1803 			vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1804 		}
1805 	}
1806 
1807 	if (name != args->where.name)
1808 		kmem_free(name, MAXPATHLEN + 1);
1809 
1810 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1811 	if (error)
1812 		resp->resok.obj.handle_follows = FALSE;
1813 	else
1814 		resp->resok.obj.handle_follows = TRUE;
1815 
1816 	/*
1817 	 * Force modified data and metadata out to stable storage.
1818 	 */
1819 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1820 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
1821 
1822 	VN_RELE(vp);
1823 	if (tvp != NULL) {
1824 		if (in_crit)
1825 			nbl_end_crit(tvp);
1826 		VN_RELE(tvp);
1827 	}
1828 
1829 	resp->status = NFS3_OK;
1830 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1831 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1832 
1833 	DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1834 	    cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1835 
1836 	VN_RELE(dvp);
1837 	return;
1838 
1839 out:
1840 	if (curthread->t_flag & T_WOULDBLOCK) {
1841 		curthread->t_flag &= ~T_WOULDBLOCK;
1842 		resp->status = NFS3ERR_JUKEBOX;
1843 	} else
1844 		resp->status = puterrno3(error);
1845 out1:
1846 	DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1847 	    cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1848 
1849 	if (name != NULL && name != args->where.name)
1850 		kmem_free(name, MAXPATHLEN + 1);
1851 
1852 	if (tvp != NULL) {
1853 		if (in_crit)
1854 			nbl_end_crit(tvp);
1855 		VN_RELE(tvp);
1856 	}
1857 	if (dvp != NULL)
1858 		VN_RELE(dvp);
1859 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1860 }
1861 
1862 void *
1863 rfs3_create_getfh(CREATE3args *args)
1864 {
1865 
1866 	return (&args->where.dir);
1867 }
1868 
1869 void
1870 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1871     struct svc_req *req, cred_t *cr, bool_t ro)
1872 {
1873 	int error;
1874 	vnode_t *vp = NULL;
1875 	vnode_t *dvp;
1876 	struct vattr *vap;
1877 	struct vattr va;
1878 	struct vattr *dbvap;
1879 	struct vattr dbva;
1880 	struct vattr *davap;
1881 	struct vattr dava;
1882 	struct sockaddr *ca;
1883 	char *name = NULL;
1884 
1885 	dbvap = NULL;
1886 	davap = NULL;
1887 
1888 	dvp = nfs3_fhtovp(&args->where.dir, exi);
1889 
1890 	DTRACE_NFSV3_4(op__mkdir__start, struct svc_req *, req,
1891 	    cred_t *, cr, vnode_t *, dvp, MKDIR3args *, args);
1892 
1893 	if (dvp == NULL) {
1894 		error = ESTALE;
1895 		goto out;
1896 	}
1897 
1898 	dbva.va_mask = AT_ALL;
1899 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1900 	davap = dbvap;
1901 
1902 	if (args->where.name == nfs3nametoolong) {
1903 		resp->status = NFS3ERR_NAMETOOLONG;
1904 		goto out1;
1905 	}
1906 
1907 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1908 		resp->status = NFS3ERR_ACCES;
1909 		goto out1;
1910 	}
1911 
1912 	if (rdonly(ro, dvp)) {
1913 		resp->status = NFS3ERR_ROFS;
1914 		goto out1;
1915 	}
1916 
1917 	if (is_system_labeled()) {
1918 		bslabel_t *clabel = req->rq_label;
1919 
1920 		ASSERT(clabel != NULL);
1921 		DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1922 		    "got client label from request(1)", struct svc_req *, req);
1923 
1924 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1925 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1926 			    exi)) {
1927 				resp->status = NFS3ERR_ACCES;
1928 				goto out1;
1929 			}
1930 		}
1931 	}
1932 
1933 	error = sattr3_to_vattr(&args->attributes, &va);
1934 	if (error)
1935 		goto out;
1936 
1937 	if (!(va.va_mask & AT_MODE)) {
1938 		resp->status = NFS3ERR_INVAL;
1939 		goto out1;
1940 	}
1941 
1942 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1943 	name = nfscmd_convname(ca, exi, args->where.name,
1944 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1945 
1946 	if (name == NULL) {
1947 		resp->status = NFS3ERR_INVAL;
1948 		goto out1;
1949 	}
1950 
1951 	va.va_mask |= AT_TYPE;
1952 	va.va_type = VDIR;
1953 
1954 	error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
1955 
1956 	if (name != args->where.name)
1957 		kmem_free(name, MAXPATHLEN + 1);
1958 
1959 	dava.va_mask = AT_ALL;
1960 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1961 
1962 	/*
1963 	 * Force modified data and metadata out to stable storage.
1964 	 */
1965 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
1966 
1967 	if (error)
1968 		goto out;
1969 
1970 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1971 	if (error)
1972 		resp->resok.obj.handle_follows = FALSE;
1973 	else
1974 		resp->resok.obj.handle_follows = TRUE;
1975 
1976 	va.va_mask = AT_ALL;
1977 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1978 
1979 	/*
1980 	 * Force modified data and metadata out to stable storage.
1981 	 */
1982 	(void) VOP_FSYNC(vp, 0, cr, NULL);
1983 
1984 	VN_RELE(vp);
1985 
1986 	resp->status = NFS3_OK;
1987 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1988 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1989 
1990 	DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
1991 	    cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
1992 	VN_RELE(dvp);
1993 
1994 	return;
1995 
1996 out:
1997 	if (curthread->t_flag & T_WOULDBLOCK) {
1998 		curthread->t_flag &= ~T_WOULDBLOCK;
1999 		resp->status = NFS3ERR_JUKEBOX;
2000 	} else
2001 		resp->status = puterrno3(error);
2002 out1:
2003 	DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
2004 	    cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
2005 	if (dvp != NULL)
2006 		VN_RELE(dvp);
2007 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2008 }
2009 
2010 void *
2011 rfs3_mkdir_getfh(MKDIR3args *args)
2012 {
2013 
2014 	return (&args->where.dir);
2015 }
2016 
2017 void
2018 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2019     struct svc_req *req, cred_t *cr, bool_t ro)
2020 {
2021 	int error;
2022 	vnode_t *vp;
2023 	vnode_t *dvp;
2024 	struct vattr *vap;
2025 	struct vattr va;
2026 	struct vattr *dbvap;
2027 	struct vattr dbva;
2028 	struct vattr *davap;
2029 	struct vattr dava;
2030 	struct sockaddr *ca;
2031 	char *name = NULL;
2032 	char *symdata = NULL;
2033 
2034 	dbvap = NULL;
2035 	davap = NULL;
2036 
2037 	dvp = nfs3_fhtovp(&args->where.dir, exi);
2038 
2039 	DTRACE_NFSV3_4(op__symlink__start, struct svc_req *, req,
2040 	    cred_t *, cr, vnode_t *, dvp, SYMLINK3args *, args);
2041 
2042 	if (dvp == NULL) {
2043 		error = ESTALE;
2044 		goto err;
2045 	}
2046 
2047 	dbva.va_mask = AT_ALL;
2048 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2049 	davap = dbvap;
2050 
2051 	if (args->where.name == nfs3nametoolong) {
2052 		resp->status = NFS3ERR_NAMETOOLONG;
2053 		goto err1;
2054 	}
2055 
2056 	if (args->where.name == NULL || *(args->where.name) == '\0') {
2057 		resp->status = NFS3ERR_ACCES;
2058 		goto err1;
2059 	}
2060 
2061 	if (rdonly(ro, dvp)) {
2062 		resp->status = NFS3ERR_ROFS;
2063 		goto err1;
2064 	}
2065 
2066 	if (is_system_labeled()) {
2067 		bslabel_t *clabel = req->rq_label;
2068 
2069 		ASSERT(clabel != NULL);
2070 		DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2071 		    "got client label from request(1)", struct svc_req *, req);
2072 
2073 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2074 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2075 			    exi)) {
2076 				resp->status = NFS3ERR_ACCES;
2077 				goto err1;
2078 			}
2079 		}
2080 	}
2081 
2082 	error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2083 	if (error)
2084 		goto err;
2085 
2086 	if (!(va.va_mask & AT_MODE)) {
2087 		resp->status = NFS3ERR_INVAL;
2088 		goto err1;
2089 	}
2090 
2091 	if (args->symlink.symlink_data == nfs3nametoolong) {
2092 		resp->status = NFS3ERR_NAMETOOLONG;
2093 		goto err1;
2094 	}
2095 
2096 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2097 	name = nfscmd_convname(ca, exi, args->where.name,
2098 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2099 
2100 	if (name == NULL) {
2101 		/* This is really a Solaris EILSEQ */
2102 		resp->status = NFS3ERR_INVAL;
2103 		goto err1;
2104 	}
2105 
2106 	symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2107 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2108 	if (symdata == NULL) {
2109 		/* This is really a Solaris EILSEQ */
2110 		resp->status = NFS3ERR_INVAL;
2111 		goto err1;
2112 	}
2113 
2114 
2115 	va.va_mask |= AT_TYPE;
2116 	va.va_type = VLNK;
2117 
2118 	error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2119 
2120 	dava.va_mask = AT_ALL;
2121 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2122 
2123 	if (error)
2124 		goto err;
2125 
2126 	error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2127 	    NULL, NULL, NULL);
2128 
2129 	/*
2130 	 * Force modified data and metadata out to stable storage.
2131 	 */
2132 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
2133 
2134 
2135 	resp->status = NFS3_OK;
2136 	if (error) {
2137 		resp->resok.obj.handle_follows = FALSE;
2138 		vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2139 		vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2140 		goto out;
2141 	}
2142 
2143 	error = makefh3(&resp->resok.obj.handle, vp, exi);
2144 	if (error)
2145 		resp->resok.obj.handle_follows = FALSE;
2146 	else
2147 		resp->resok.obj.handle_follows = TRUE;
2148 
2149 	va.va_mask = AT_ALL;
2150 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2151 
2152 	/*
2153 	 * Force modified data and metadata out to stable storage.
2154 	 */
2155 	(void) VOP_FSYNC(vp, 0, cr, NULL);
2156 
2157 	VN_RELE(vp);
2158 
2159 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2160 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2161 	goto out;
2162 
2163 err:
2164 	if (curthread->t_flag & T_WOULDBLOCK) {
2165 		curthread->t_flag &= ~T_WOULDBLOCK;
2166 		resp->status = NFS3ERR_JUKEBOX;
2167 	} else
2168 		resp->status = puterrno3(error);
2169 err1:
2170 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2171 out:
2172 	if (name != NULL && name != args->where.name)
2173 		kmem_free(name, MAXPATHLEN + 1);
2174 	if (symdata != NULL && symdata != args->symlink.symlink_data)
2175 		kmem_free(symdata, MAXPATHLEN + 1);
2176 
2177 	DTRACE_NFSV3_4(op__symlink__done, struct svc_req *, req,
2178 	    cred_t *, cr, vnode_t *, dvp, SYMLINK3res *, resp);
2179 
2180 	if (dvp != NULL)
2181 		VN_RELE(dvp);
2182 }
2183 
2184 void *
2185 rfs3_symlink_getfh(SYMLINK3args *args)
2186 {
2187 
2188 	return (&args->where.dir);
2189 }
2190 
2191 void
2192 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2193     struct svc_req *req, cred_t *cr, bool_t ro)
2194 {
2195 	int error;
2196 	vnode_t *vp;
2197 	vnode_t *realvp;
2198 	vnode_t *dvp;
2199 	struct vattr *vap;
2200 	struct vattr va;
2201 	struct vattr *dbvap;
2202 	struct vattr dbva;
2203 	struct vattr *davap;
2204 	struct vattr dava;
2205 	int mode;
2206 	enum vcexcl excl;
2207 	struct sockaddr *ca;
2208 	char *name = NULL;
2209 
2210 	dbvap = NULL;
2211 	davap = NULL;
2212 
2213 	dvp = nfs3_fhtovp(&args->where.dir, exi);
2214 
2215 	DTRACE_NFSV3_4(op__mknod__start, struct svc_req *, req,
2216 	    cred_t *, cr, vnode_t *, dvp, MKNOD3args *, args);
2217 
2218 	if (dvp == NULL) {
2219 		error = ESTALE;
2220 		goto out;
2221 	}
2222 
2223 	dbva.va_mask = AT_ALL;
2224 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2225 	davap = dbvap;
2226 
2227 	if (args->where.name == nfs3nametoolong) {
2228 		resp->status = NFS3ERR_NAMETOOLONG;
2229 		goto out1;
2230 	}
2231 
2232 	if (args->where.name == NULL || *(args->where.name) == '\0') {
2233 		resp->status = NFS3ERR_ACCES;
2234 		goto out1;
2235 	}
2236 
2237 	if (rdonly(ro, dvp)) {
2238 		resp->status = NFS3ERR_ROFS;
2239 		goto out1;
2240 	}
2241 
2242 	if (is_system_labeled()) {
2243 		bslabel_t *clabel = req->rq_label;
2244 
2245 		ASSERT(clabel != NULL);
2246 		DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2247 		    "got client label from request(1)", struct svc_req *, req);
2248 
2249 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2250 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2251 			    exi)) {
2252 				resp->status = NFS3ERR_ACCES;
2253 				goto out1;
2254 			}
2255 		}
2256 	}
2257 
2258 	switch (args->what.type) {
2259 	case NF3CHR:
2260 	case NF3BLK:
2261 		error = sattr3_to_vattr(
2262 		    &args->what.mknoddata3_u.device.dev_attributes, &va);
2263 		if (error)
2264 			goto out;
2265 		if (secpolicy_sys_devices(cr) != 0) {
2266 			resp->status = NFS3ERR_PERM;
2267 			goto out1;
2268 		}
2269 		if (args->what.type == NF3CHR)
2270 			va.va_type = VCHR;
2271 		else
2272 			va.va_type = VBLK;
2273 		va.va_rdev = makedevice(
2274 		    args->what.mknoddata3_u.device.spec.specdata1,
2275 		    args->what.mknoddata3_u.device.spec.specdata2);
2276 		va.va_mask |= AT_TYPE | AT_RDEV;
2277 		break;
2278 	case NF3SOCK:
2279 		error = sattr3_to_vattr(
2280 		    &args->what.mknoddata3_u.pipe_attributes, &va);
2281 		if (error)
2282 			goto out;
2283 		va.va_type = VSOCK;
2284 		va.va_mask |= AT_TYPE;
2285 		break;
2286 	case NF3FIFO:
2287 		error = sattr3_to_vattr(
2288 		    &args->what.mknoddata3_u.pipe_attributes, &va);
2289 		if (error)
2290 			goto out;
2291 		va.va_type = VFIFO;
2292 		va.va_mask |= AT_TYPE;
2293 		break;
2294 	default:
2295 		resp->status = NFS3ERR_BADTYPE;
2296 		goto out1;
2297 	}
2298 
2299 	/*
2300 	 * Must specify the mode.
2301 	 */
2302 	if (!(va.va_mask & AT_MODE)) {
2303 		resp->status = NFS3ERR_INVAL;
2304 		goto out1;
2305 	}
2306 
2307 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2308 	name = nfscmd_convname(ca, exi, args->where.name,
2309 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2310 
2311 	if (name == NULL) {
2312 		resp->status = NFS3ERR_INVAL;
2313 		goto out1;
2314 	}
2315 
2316 	excl = EXCL;
2317 
2318 	mode = 0;
2319 
2320 	error = VOP_CREATE(dvp, name, &va, excl, mode,
2321 	    &vp, cr, 0, NULL, NULL);
2322 
2323 	if (name != args->where.name)
2324 		kmem_free(name, MAXPATHLEN + 1);
2325 
2326 	dava.va_mask = AT_ALL;
2327 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2328 
2329 	/*
2330 	 * Force modified data and metadata out to stable storage.
2331 	 */
2332 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
2333 
2334 	if (error)
2335 		goto out;
2336 
2337 	resp->status = NFS3_OK;
2338 
2339 	error = makefh3(&resp->resok.obj.handle, vp, exi);
2340 	if (error)
2341 		resp->resok.obj.handle_follows = FALSE;
2342 	else
2343 		resp->resok.obj.handle_follows = TRUE;
2344 
2345 	va.va_mask = AT_ALL;
2346 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2347 
2348 	/*
2349 	 * Force modified metadata out to stable storage.
2350 	 *
2351 	 * if a underlying vp exists, pass it to VOP_FSYNC
2352 	 */
2353 	if (VOP_REALVP(vp, &realvp, NULL) == 0)
2354 		(void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2355 	else
2356 		(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2357 
2358 	VN_RELE(vp);
2359 
2360 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2361 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2362 	DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2363 	    cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2364 	VN_RELE(dvp);
2365 	return;
2366 
2367 out:
2368 	if (curthread->t_flag & T_WOULDBLOCK) {
2369 		curthread->t_flag &= ~T_WOULDBLOCK;
2370 		resp->status = NFS3ERR_JUKEBOX;
2371 	} else
2372 		resp->status = puterrno3(error);
2373 out1:
2374 	DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2375 	    cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2376 	if (dvp != NULL)
2377 		VN_RELE(dvp);
2378 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2379 }
2380 
2381 void *
2382 rfs3_mknod_getfh(MKNOD3args *args)
2383 {
2384 
2385 	return (&args->where.dir);
2386 }
2387 
2388 void
2389 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2390     struct svc_req *req, cred_t *cr, bool_t ro)
2391 {
2392 	int error = 0;
2393 	vnode_t *vp;
2394 	struct vattr *bvap;
2395 	struct vattr bva;
2396 	struct vattr *avap;
2397 	struct vattr ava;
2398 	vnode_t *targvp = NULL;
2399 	struct sockaddr *ca;
2400 	char *name = NULL;
2401 
2402 	bvap = NULL;
2403 	avap = NULL;
2404 
2405 	vp = nfs3_fhtovp(&args->object.dir, exi);
2406 
2407 	DTRACE_NFSV3_4(op__remove__start, struct svc_req *, req,
2408 	    cred_t *, cr, vnode_t *, vp, REMOVE3args *, args);
2409 
2410 	if (vp == NULL) {
2411 		error = ESTALE;
2412 		goto err;
2413 	}
2414 
2415 	bva.va_mask = AT_ALL;
2416 	bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2417 	avap = bvap;
2418 
2419 	if (vp->v_type != VDIR) {
2420 		resp->status = NFS3ERR_NOTDIR;
2421 		goto err1;
2422 	}
2423 
2424 	if (args->object.name == nfs3nametoolong) {
2425 		resp->status = NFS3ERR_NAMETOOLONG;
2426 		goto err1;
2427 	}
2428 
2429 	if (args->object.name == NULL || *(args->object.name) == '\0') {
2430 		resp->status = NFS3ERR_ACCES;
2431 		goto err1;
2432 	}
2433 
2434 	if (rdonly(ro, vp)) {
2435 		resp->status = NFS3ERR_ROFS;
2436 		goto err1;
2437 	}
2438 
2439 	if (is_system_labeled()) {
2440 		bslabel_t *clabel = req->rq_label;
2441 
2442 		ASSERT(clabel != NULL);
2443 		DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2444 		    "got client label from request(1)", struct svc_req *, req);
2445 
2446 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2447 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2448 			    exi)) {
2449 				resp->status = NFS3ERR_ACCES;
2450 				goto err1;
2451 			}
2452 		}
2453 	}
2454 
2455 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2456 	name = nfscmd_convname(ca, exi, args->object.name,
2457 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2458 
2459 	if (name == NULL) {
2460 		resp->status = NFS3ERR_INVAL;
2461 		goto err1;
2462 	}
2463 
2464 	/*
2465 	 * Check for a conflict with a non-blocking mandatory share
2466 	 * reservation and V4 delegations
2467 	 */
2468 	error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2469 	    NULL, cr, NULL, NULL, NULL);
2470 	if (error != 0)
2471 		goto err;
2472 
2473 	if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2474 		resp->status = NFS3ERR_JUKEBOX;
2475 		goto err1;
2476 	}
2477 
2478 	if (!nbl_need_check(targvp)) {
2479 		error = VOP_REMOVE(vp, name, cr, NULL, 0);
2480 	} else {
2481 		nbl_start_crit(targvp, RW_READER);
2482 		if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2483 			error = EACCES;
2484 		} else {
2485 			error = VOP_REMOVE(vp, name, cr, NULL, 0);
2486 		}
2487 		nbl_end_crit(targvp);
2488 	}
2489 	VN_RELE(targvp);
2490 	targvp = NULL;
2491 
2492 	ava.va_mask = AT_ALL;
2493 	avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2494 
2495 	/*
2496 	 * Force modified data and metadata out to stable storage.
2497 	 */
2498 	(void) VOP_FSYNC(vp, 0, cr, NULL);
2499 
2500 	if (error)
2501 		goto err;
2502 
2503 	resp->status = NFS3_OK;
2504 	vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2505 	goto out;
2506 
2507 err:
2508 	if (curthread->t_flag & T_WOULDBLOCK) {
2509 		curthread->t_flag &= ~T_WOULDBLOCK;
2510 		resp->status = NFS3ERR_JUKEBOX;
2511 	} else
2512 		resp->status = puterrno3(error);
2513 err1:
2514 	vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2515 out:
2516 	DTRACE_NFSV3_4(op__remove__done, struct svc_req *, req,
2517 	    cred_t *, cr, vnode_t *, vp, REMOVE3res *, resp);
2518 
2519 	if (name != NULL && name != args->object.name)
2520 		kmem_free(name, MAXPATHLEN + 1);
2521 
2522 	if (vp != NULL)
2523 		VN_RELE(vp);
2524 }
2525 
2526 void *
2527 rfs3_remove_getfh(REMOVE3args *args)
2528 {
2529 
2530 	return (&args->object.dir);
2531 }
2532 
2533 void
2534 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2535     struct svc_req *req, cred_t *cr, bool_t ro)
2536 {
2537 	int error;
2538 	vnode_t *vp;
2539 	struct vattr *bvap;
2540 	struct vattr bva;
2541 	struct vattr *avap;
2542 	struct vattr ava;
2543 	struct sockaddr *ca;
2544 	char *name = NULL;
2545 
2546 	bvap = NULL;
2547 	avap = NULL;
2548 
2549 	vp = nfs3_fhtovp(&args->object.dir, exi);
2550 
2551 	DTRACE_NFSV3_4(op__rmdir__start, struct svc_req *, req,
2552 	    cred_t *, cr, vnode_t *, vp, RMDIR3args *, args);
2553 
2554 	if (vp == NULL) {
2555 		error = ESTALE;
2556 		goto err;
2557 	}
2558 
2559 	bva.va_mask = AT_ALL;
2560 	bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2561 	avap = bvap;
2562 
2563 	if (vp->v_type != VDIR) {
2564 		resp->status = NFS3ERR_NOTDIR;
2565 		goto err1;
2566 	}
2567 
2568 	if (args->object.name == nfs3nametoolong) {
2569 		resp->status = NFS3ERR_NAMETOOLONG;
2570 		goto err1;
2571 	}
2572 
2573 	if (args->object.name == NULL || *(args->object.name) == '\0') {
2574 		resp->status = NFS3ERR_ACCES;
2575 		goto err1;
2576 	}
2577 
2578 	if (rdonly(ro, vp)) {
2579 		resp->status = NFS3ERR_ROFS;
2580 		goto err1;
2581 	}
2582 
2583 	if (is_system_labeled()) {
2584 		bslabel_t *clabel = req->rq_label;
2585 
2586 		ASSERT(clabel != NULL);
2587 		DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2588 		    "got client label from request(1)", struct svc_req *, req);
2589 
2590 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2591 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2592 			    exi)) {
2593 				resp->status = NFS3ERR_ACCES;
2594 				goto err1;
2595 			}
2596 		}
2597 	}
2598 
2599 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2600 	name = nfscmd_convname(ca, exi, args->object.name,
2601 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2602 
2603 	if (name == NULL) {
2604 		resp->status = NFS3ERR_INVAL;
2605 		goto err1;
2606 	}
2607 
2608 	error = VOP_RMDIR(vp, name, rootdir, cr, NULL, 0);
2609 
2610 	if (name != args->object.name)
2611 		kmem_free(name, MAXPATHLEN + 1);
2612 
2613 	ava.va_mask = AT_ALL;
2614 	avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2615 
2616 	/*
2617 	 * Force modified data and metadata out to stable storage.
2618 	 */
2619 	(void) VOP_FSYNC(vp, 0, cr, NULL);
2620 
2621 	if (error) {
2622 		/*
2623 		 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2624 		 * if the directory is not empty.  A System V NFS server
2625 		 * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2626 		 * over the wire.
2627 		 */
2628 		if (error == EEXIST)
2629 			error = ENOTEMPTY;
2630 		goto err;
2631 	}
2632 
2633 	resp->status = NFS3_OK;
2634 	vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2635 	goto out;
2636 
2637 err:
2638 	if (curthread->t_flag & T_WOULDBLOCK) {
2639 		curthread->t_flag &= ~T_WOULDBLOCK;
2640 		resp->status = NFS3ERR_JUKEBOX;
2641 	} else
2642 		resp->status = puterrno3(error);
2643 err1:
2644 	vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2645 out:
2646 	DTRACE_NFSV3_4(op__rmdir__done, struct svc_req *, req,
2647 	    cred_t *, cr, vnode_t *, vp, RMDIR3res *, resp);
2648 	if (vp != NULL)
2649 		VN_RELE(vp);
2650 
2651 }
2652 
2653 void *
2654 rfs3_rmdir_getfh(RMDIR3args *args)
2655 {
2656 
2657 	return (&args->object.dir);
2658 }
2659 
2660 void
2661 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2662     struct svc_req *req, cred_t *cr, bool_t ro)
2663 {
2664 	int error = 0;
2665 	vnode_t *fvp;
2666 	vnode_t *tvp;
2667 	vnode_t *targvp;
2668 	struct vattr *fbvap;
2669 	struct vattr fbva;
2670 	struct vattr *favap;
2671 	struct vattr fava;
2672 	struct vattr *tbvap;
2673 	struct vattr tbva;
2674 	struct vattr *tavap;
2675 	struct vattr tava;
2676 	nfs_fh3 *fh3;
2677 	struct exportinfo *to_exi;
2678 	vnode_t *srcvp = NULL;
2679 	bslabel_t *clabel;
2680 	struct sockaddr *ca;
2681 	char *name = NULL;
2682 	char *toname = NULL;
2683 
2684 	fbvap = NULL;
2685 	favap = NULL;
2686 	tbvap = NULL;
2687 	tavap = NULL;
2688 	tvp = NULL;
2689 
2690 	fvp = nfs3_fhtovp(&args->from.dir, exi);
2691 
2692 	DTRACE_NFSV3_4(op__rename__start, struct svc_req *, req,
2693 	    cred_t *, cr, vnode_t *, fvp, RENAME3args *, args);
2694 
2695 	if (fvp == NULL) {
2696 		error = ESTALE;
2697 		goto err;
2698 	}
2699 
2700 	if (is_system_labeled()) {
2701 		clabel = req->rq_label;
2702 		ASSERT(clabel != NULL);
2703 		DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2704 		    "got client label from request(1)", struct svc_req *, req);
2705 
2706 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2707 			if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2708 			    exi)) {
2709 				resp->status = NFS3ERR_ACCES;
2710 				goto err1;
2711 			}
2712 		}
2713 	}
2714 
2715 	fbva.va_mask = AT_ALL;
2716 	fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2717 	favap = fbvap;
2718 
2719 	fh3 = &args->to.dir;
2720 	to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2721 	if (to_exi == NULL) {
2722 		resp->status = NFS3ERR_ACCES;
2723 		goto err1;
2724 	}
2725 	exi_rele(to_exi);
2726 
2727 	if (to_exi != exi) {
2728 		resp->status = NFS3ERR_XDEV;
2729 		goto err1;
2730 	}
2731 
2732 	tvp = nfs3_fhtovp(&args->to.dir, exi);
2733 	if (tvp == NULL) {
2734 		error = ESTALE;
2735 		goto err;
2736 	}
2737 
2738 	tbva.va_mask = AT_ALL;
2739 	tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2740 	tavap = tbvap;
2741 
2742 	if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2743 		resp->status = NFS3ERR_NOTDIR;
2744 		goto err1;
2745 	}
2746 
2747 	if (args->from.name == nfs3nametoolong ||
2748 	    args->to.name == nfs3nametoolong) {
2749 		resp->status = NFS3ERR_NAMETOOLONG;
2750 		goto err1;
2751 	}
2752 	if (args->from.name == NULL || *(args->from.name) == '\0' ||
2753 	    args->to.name == NULL || *(args->to.name) == '\0') {
2754 		resp->status = NFS3ERR_ACCES;
2755 		goto err1;
2756 	}
2757 
2758 	if (rdonly(ro, tvp)) {
2759 		resp->status = NFS3ERR_ROFS;
2760 		goto err1;
2761 	}
2762 
2763 	if (is_system_labeled()) {
2764 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2765 			if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
2766 			    exi)) {
2767 				resp->status = NFS3ERR_ACCES;
2768 				goto err1;
2769 			}
2770 		}
2771 	}
2772 
2773 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2774 	name = nfscmd_convname(ca, exi, args->from.name,
2775 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2776 
2777 	if (name == NULL) {
2778 		resp->status = NFS3ERR_INVAL;
2779 		goto err1;
2780 	}
2781 
2782 	toname = nfscmd_convname(ca, exi, args->to.name,
2783 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2784 
2785 	if (toname == NULL) {
2786 		resp->status = NFS3ERR_INVAL;
2787 		goto err1;
2788 	}
2789 
2790 	/*
2791 	 * Check for a conflict with a non-blocking mandatory share
2792 	 * reservation or V4 delegations.
2793 	 */
2794 	error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
2795 	    NULL, cr, NULL, NULL, NULL);
2796 	if (error != 0)
2797 		goto err;
2798 
2799 	/*
2800 	 * If we rename a delegated file we should recall the
2801 	 * delegation, since future opens should fail or would
2802 	 * refer to a new file.
2803 	 */
2804 	if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2805 		resp->status = NFS3ERR_JUKEBOX;
2806 		goto err1;
2807 	}
2808 
2809 	/*
2810 	 * Check for renaming over a delegated file.  Check rfs4_deleg_policy
2811 	 * first to avoid VOP_LOOKUP if possible.
2812 	 */
2813 	if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2814 	    VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
2815 	    NULL, NULL, NULL) == 0) {
2816 
2817 		if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2818 			VN_RELE(targvp);
2819 			resp->status = NFS3ERR_JUKEBOX;
2820 			goto err1;
2821 		}
2822 		VN_RELE(targvp);
2823 	}
2824 
2825 	if (!nbl_need_check(srcvp)) {
2826 		error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2827 	} else {
2828 		nbl_start_crit(srcvp, RW_READER);
2829 		if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2830 			error = EACCES;
2831 		else
2832 			error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2833 		nbl_end_crit(srcvp);
2834 	}
2835 	if (error == 0)
2836 		vn_renamepath(tvp, srcvp, args->to.name,
2837 		    strlen(args->to.name));
2838 	VN_RELE(srcvp);
2839 	srcvp = NULL;
2840 
2841 	fava.va_mask = AT_ALL;
2842 	favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2843 	tava.va_mask = AT_ALL;
2844 	tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2845 
2846 	/*
2847 	 * Force modified data and metadata out to stable storage.
2848 	 */
2849 	(void) VOP_FSYNC(fvp, 0, cr, NULL);
2850 	(void) VOP_FSYNC(tvp, 0, cr, NULL);
2851 
2852 	if (error)
2853 		goto err;
2854 
2855 	resp->status = NFS3_OK;
2856 	vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2857 	vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2858 	goto out;
2859 
2860 err:
2861 	if (curthread->t_flag & T_WOULDBLOCK) {
2862 		curthread->t_flag &= ~T_WOULDBLOCK;
2863 		resp->status = NFS3ERR_JUKEBOX;
2864 	} else {
2865 		resp->status = puterrno3(error);
2866 	}
2867 err1:
2868 	vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2869 	vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2870 
2871 out:
2872 	if (name != NULL && name != args->from.name)
2873 		kmem_free(name, MAXPATHLEN + 1);
2874 	if (toname != NULL && toname != args->to.name)
2875 		kmem_free(toname, MAXPATHLEN + 1);
2876 
2877 	DTRACE_NFSV3_4(op__rename__done, struct svc_req *, req,
2878 	    cred_t *, cr, vnode_t *, fvp, RENAME3res *, resp);
2879 	if (fvp != NULL)
2880 		VN_RELE(fvp);
2881 	if (tvp != NULL)
2882 		VN_RELE(tvp);
2883 }
2884 
2885 void *
2886 rfs3_rename_getfh(RENAME3args *args)
2887 {
2888 
2889 	return (&args->from.dir);
2890 }
2891 
2892 void
2893 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2894     struct svc_req *req, cred_t *cr, bool_t ro)
2895 {
2896 	int error;
2897 	vnode_t *vp;
2898 	vnode_t *dvp;
2899 	struct vattr *vap;
2900 	struct vattr va;
2901 	struct vattr *bvap;
2902 	struct vattr bva;
2903 	struct vattr *avap;
2904 	struct vattr ava;
2905 	nfs_fh3	*fh3;
2906 	struct exportinfo *to_exi;
2907 	bslabel_t *clabel;
2908 	struct sockaddr *ca;
2909 	char *name = NULL;
2910 
2911 	vap = NULL;
2912 	bvap = NULL;
2913 	avap = NULL;
2914 	dvp = NULL;
2915 
2916 	vp = nfs3_fhtovp(&args->file, exi);
2917 
2918 	DTRACE_NFSV3_4(op__link__start, struct svc_req *, req,
2919 	    cred_t *, cr, vnode_t *, vp, LINK3args *, args);
2920 
2921 	if (vp == NULL) {
2922 		error = ESTALE;
2923 		goto out;
2924 	}
2925 
2926 	va.va_mask = AT_ALL;
2927 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2928 
2929 	fh3 = &args->link.dir;
2930 	to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2931 	if (to_exi == NULL) {
2932 		resp->status = NFS3ERR_ACCES;
2933 		goto out1;
2934 	}
2935 	exi_rele(to_exi);
2936 
2937 	if (to_exi != exi) {
2938 		resp->status = NFS3ERR_XDEV;
2939 		goto out1;
2940 	}
2941 
2942 	if (is_system_labeled()) {
2943 		clabel = req->rq_label;
2944 
2945 		ASSERT(clabel != NULL);
2946 		DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
2947 		    "got client label from request(1)", struct svc_req *, req);
2948 
2949 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2950 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2951 			    exi)) {
2952 				resp->status = NFS3ERR_ACCES;
2953 				goto out1;
2954 			}
2955 		}
2956 	}
2957 
2958 	dvp = nfs3_fhtovp(&args->link.dir, exi);
2959 	if (dvp == NULL) {
2960 		error = ESTALE;
2961 		goto out;
2962 	}
2963 
2964 	bva.va_mask = AT_ALL;
2965 	bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
2966 
2967 	if (dvp->v_type != VDIR) {
2968 		resp->status = NFS3ERR_NOTDIR;
2969 		goto out1;
2970 	}
2971 
2972 	if (args->link.name == nfs3nametoolong) {
2973 		resp->status = NFS3ERR_NAMETOOLONG;
2974 		goto out1;
2975 	}
2976 
2977 	if (args->link.name == NULL || *(args->link.name) == '\0') {
2978 		resp->status = NFS3ERR_ACCES;
2979 		goto out1;
2980 	}
2981 
2982 	if (rdonly(ro, dvp)) {
2983 		resp->status = NFS3ERR_ROFS;
2984 		goto out1;
2985 	}
2986 
2987 	if (is_system_labeled()) {
2988 		DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
2989 		    "got client label from request(1)", struct svc_req *, req);
2990 
2991 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2992 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2993 			    exi)) {
2994 				resp->status = NFS3ERR_ACCES;
2995 				goto out1;
2996 			}
2997 		}
2998 	}
2999 
3000 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3001 	name = nfscmd_convname(ca, exi, args->link.name,
3002 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3003 
3004 	if (name == NULL) {
3005 		resp->status = NFS3ERR_SERVERFAULT;
3006 		goto out1;
3007 	}
3008 
3009 	error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3010 
3011 	va.va_mask = AT_ALL;
3012 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3013 	ava.va_mask = AT_ALL;
3014 	avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3015 
3016 	/*
3017 	 * Force modified data and metadata out to stable storage.
3018 	 */
3019 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3020 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
3021 
3022 	if (error)
3023 		goto out;
3024 
3025 	VN_RELE(dvp);
3026 
3027 	resp->status = NFS3_OK;
3028 	vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3029 	vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3030 
3031 	DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3032 	    cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3033 
3034 	VN_RELE(vp);
3035 
3036 	return;
3037 
3038 out:
3039 	if (curthread->t_flag & T_WOULDBLOCK) {
3040 		curthread->t_flag &= ~T_WOULDBLOCK;
3041 		resp->status = NFS3ERR_JUKEBOX;
3042 	} else
3043 		resp->status = puterrno3(error);
3044 out1:
3045 	if (name != NULL && name != args->link.name)
3046 		kmem_free(name, MAXPATHLEN + 1);
3047 
3048 	DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3049 	    cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3050 
3051 	if (vp != NULL)
3052 		VN_RELE(vp);
3053 	if (dvp != NULL)
3054 		VN_RELE(dvp);
3055 	vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3056 	vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3057 }
3058 
3059 void *
3060 rfs3_link_getfh(LINK3args *args)
3061 {
3062 
3063 	return (&args->file);
3064 }
3065 
3066 /*
3067  * This macro defines the size of a response which contains attribute
3068  * information and one directory entry (whose length is specified by
3069  * the macro parameter).  If the incoming request is larger than this,
3070  * then we are guaranteed to be able to return at one directory entry
3071  * if one exists.  Therefore, we do not need to check for
3072  * NFS3ERR_TOOSMALL if the requested size is larger then this.  If it
3073  * is not, then we need to check to make sure that this error does not
3074  * need to be returned.
3075  *
3076  * NFS3_READDIR_MIN_COUNT is comprised of following :
3077  *
3078  * status - 1 * BYTES_PER_XDR_UNIT
3079  * attr. flag - 1 * BYTES_PER_XDR_UNIT
3080  * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3081  * attributes  - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3082  * boolean - 1 * BYTES_PER_XDR_UNIT
3083  * file id - 2 * BYTES_PER_XDR_UNIT
3084  * directory name length - 1 * BYTES_PER_XDR_UNIT
3085  * cookie - 2 * BYTES_PER_XDR_UNIT
3086  * end of list - 1 * BYTES_PER_XDR_UNIT
3087  * end of file - 1 * BYTES_PER_XDR_UNIT
3088  * Name length of directory to the nearest byte
3089  */
3090 
3091 #define	NFS3_READDIR_MIN_COUNT(length)	\
3092 	((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3093 		BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3094 
3095 /* ARGSUSED */
3096 void
3097 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3098     struct svc_req *req, cred_t *cr, bool_t ro)
3099 {
3100 	int error;
3101 	vnode_t *vp;
3102 	struct vattr *vap;
3103 	struct vattr va;
3104 	struct iovec iov;
3105 	struct uio uio;
3106 	char *data;
3107 	int iseof;
3108 	int bufsize;
3109 	int namlen;
3110 	uint_t count;
3111 	struct sockaddr *ca;
3112 
3113 	vap = NULL;
3114 
3115 	vp = nfs3_fhtovp(&args->dir, exi);
3116 
3117 	DTRACE_NFSV3_4(op__readdir__start, struct svc_req *, req,
3118 	    cred_t *, cr, vnode_t *, vp, READDIR3args *, args);
3119 
3120 	if (vp == NULL) {
3121 		error = ESTALE;
3122 		goto out;
3123 	}
3124 
3125 	if (is_system_labeled()) {
3126 		bslabel_t *clabel = req->rq_label;
3127 
3128 		ASSERT(clabel != NULL);
3129 		DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3130 		    "got client label from request(1)", struct svc_req *, req);
3131 
3132 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3133 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3134 			    exi)) {
3135 				resp->status = NFS3ERR_ACCES;
3136 				goto out1;
3137 			}
3138 		}
3139 	}
3140 
3141 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3142 
3143 	va.va_mask = AT_ALL;
3144 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3145 
3146 	if (vp->v_type != VDIR) {
3147 		resp->status = NFS3ERR_NOTDIR;
3148 		goto out1;
3149 	}
3150 
3151 	error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3152 	if (error)
3153 		goto out;
3154 
3155 	/*
3156 	 * Now don't allow arbitrary count to alloc;
3157 	 * allow the maximum not to exceed rfs3_tsize()
3158 	 */
3159 	if (args->count > rfs3_tsize(req))
3160 		args->count = rfs3_tsize(req);
3161 
3162 	/*
3163 	 * Make sure that there is room to read at least one entry
3164 	 * if any are available.
3165 	 */
3166 	if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3167 		count = DIRENT64_RECLEN(MAXNAMELEN);
3168 	else
3169 		count = args->count;
3170 
3171 	data = kmem_alloc(count, KM_SLEEP);
3172 
3173 	iov.iov_base = data;
3174 	iov.iov_len = count;
3175 	uio.uio_iov = &iov;
3176 	uio.uio_iovcnt = 1;
3177 	uio.uio_segflg = UIO_SYSSPACE;
3178 	uio.uio_extflg = UIO_COPY_CACHED;
3179 	uio.uio_loffset = (offset_t)args->cookie;
3180 	uio.uio_resid = count;
3181 
3182 	error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3183 
3184 	va.va_mask = AT_ALL;
3185 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3186 
3187 	if (error) {
3188 		kmem_free(data, count);
3189 		goto out;
3190 	}
3191 
3192 	/*
3193 	 * If the count was not large enough to be able to guarantee
3194 	 * to be able to return at least one entry, then need to
3195 	 * check to see if NFS3ERR_TOOSMALL should be returned.
3196 	 */
3197 	if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3198 		/*
3199 		 * bufsize is used to keep track of the size of the response.
3200 		 * It is primed with:
3201 		 *	1 for the status +
3202 		 *	1 for the dir_attributes.attributes boolean +
3203 		 *	2 for the cookie verifier
3204 		 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3205 		 * to bytes.  If there are directory attributes to be
3206 		 * returned, then:
3207 		 *	NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3208 		 * time BYTES_PER_XDR_UNIT is added to account for them.
3209 		 */
3210 		bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3211 		if (vap != NULL)
3212 			bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3213 		/*
3214 		 * An entry is composed of:
3215 		 *	1 for the true/false list indicator +
3216 		 *	2 for the fileid +
3217 		 *	1 for the length of the name +
3218 		 *	2 for the cookie +
3219 		 * all times BYTES_PER_XDR_UNIT to convert from
3220 		 * XDR units to bytes, plus the length of the name
3221 		 * rounded up to the nearest BYTES_PER_XDR_UNIT.
3222 		 */
3223 		if (count != uio.uio_resid) {
3224 			namlen = strlen(((struct dirent64 *)data)->d_name);
3225 			bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3226 			    roundup(namlen, BYTES_PER_XDR_UNIT);
3227 		}
3228 		/*
3229 		 * We need to check to see if the number of bytes left
3230 		 * to go into the buffer will actually fit into the
3231 		 * buffer.  This is calculated as the size of this
3232 		 * entry plus:
3233 		 *	1 for the true/false list indicator +
3234 		 *	1 for the eof indicator
3235 		 * times BYTES_PER_XDR_UNIT to convert from from
3236 		 * XDR units to bytes.
3237 		 */
3238 		bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3239 		if (bufsize > args->count) {
3240 			kmem_free(data, count);
3241 			resp->status = NFS3ERR_TOOSMALL;
3242 			goto out1;
3243 		}
3244 	}
3245 
3246 	/*
3247 	 * Have a valid readir buffer for the native character
3248 	 * set. Need to check if a conversion is necessary and
3249 	 * potentially rewrite the whole buffer. Note that if the
3250 	 * conversion expands names enough, the structure may not
3251 	 * fit. In this case, we need to drop entries until if fits
3252 	 * and patch the counts in order that the next readdir will
3253 	 * get the correct entries.
3254 	 */
3255 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3256 	data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
3257 
3258 
3259 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3260 
3261 #if 0 /* notyet */
3262 	/*
3263 	 * Don't do this.  It causes local disk writes when just
3264 	 * reading the file and the overhead is deemed larger
3265 	 * than the benefit.
3266 	 */
3267 	/*
3268 	 * Force modified metadata out to stable storage.
3269 	 */
3270 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3271 #endif
3272 
3273 	resp->status = NFS3_OK;
3274 	vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3275 	resp->resok.cookieverf = 0;
3276 	resp->resok.reply.entries = (entry3 *)data;
3277 	resp->resok.reply.eof = iseof;
3278 	resp->resok.size = count - uio.uio_resid;
3279 	resp->resok.count = args->count;
3280 	resp->resok.freecount = count;
3281 
3282 	DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3283 	    cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3284 
3285 	VN_RELE(vp);
3286 
3287 	return;
3288 
3289 out:
3290 	if (curthread->t_flag & T_WOULDBLOCK) {
3291 		curthread->t_flag &= ~T_WOULDBLOCK;
3292 		resp->status = NFS3ERR_JUKEBOX;
3293 	} else
3294 		resp->status = puterrno3(error);
3295 out1:
3296 	DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3297 	    cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3298 
3299 	if (vp != NULL) {
3300 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3301 		VN_RELE(vp);
3302 	}
3303 	vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3304 }
3305 
3306 void *
3307 rfs3_readdir_getfh(READDIR3args *args)
3308 {
3309 
3310 	return (&args->dir);
3311 }
3312 
3313 void
3314 rfs3_readdir_free(READDIR3res *resp)
3315 {
3316 
3317 	if (resp->status == NFS3_OK)
3318 		kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3319 }
3320 
3321 #ifdef nextdp
3322 #undef nextdp
3323 #endif
3324 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3325 
3326 /*
3327  * This macro computes the size of a response which contains
3328  * one directory entry including the attributes as well as file handle.
3329  * If the incoming request is larger than this, then we are guaranteed to be
3330  * able to return at least one more directory entry if one exists.
3331  *
3332  * NFS3_READDIRPLUS_ENTRY is made up of the following:
3333  *
3334  * boolean - 1 * BYTES_PER_XDR_UNIT
3335  * file id - 2 * BYTES_PER_XDR_UNIT
3336  * directory name length - 1 * BYTES_PER_XDR_UNIT
3337  * cookie - 2 * BYTES_PER_XDR_UNIT
3338  * attribute flag - 1 * BYTES_PER_XDR_UNIT
3339  * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3340  * status byte for file handle - 1 *  BYTES_PER_XDR_UNIT
3341  * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3342  * Maximum length of a file handle (NFS3_MAXFHSIZE)
3343  * name length of the entry to the nearest bytes
3344  */
3345 #define	NFS3_READDIRPLUS_ENTRY(namelen)	\
3346 	((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3347 		BYTES_PER_XDR_UNIT + \
3348 	NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3349 
3350 static int rfs3_readdir_unit = MAXBSIZE;
3351 
3352 /* ARGSUSED */
3353 void
3354 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3355     struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
3356 {
3357 	int error;
3358 	vnode_t *vp;
3359 	struct vattr *vap;
3360 	struct vattr va;
3361 	struct iovec iov;
3362 	struct uio uio;
3363 	char *data;
3364 	int iseof;
3365 	struct dirent64 *dp;
3366 	vnode_t *nvp;
3367 	struct vattr *nvap;
3368 	struct vattr nva;
3369 	entryplus3_info *infop = NULL;
3370 	int size = 0;
3371 	int nents = 0;
3372 	int bufsize = 0;
3373 	int entrysize = 0;
3374 	int tofit = 0;
3375 	int rd_unit = rfs3_readdir_unit;
3376 	int prev_len;
3377 	int space_left;
3378 	int i;
3379 	uint_t *namlen = NULL;
3380 	char *ndata = NULL;
3381 	struct sockaddr *ca;
3382 	size_t ret;
3383 
3384 	vap = NULL;
3385 
3386 	vp = nfs3_fhtovp(&args->dir, exi);
3387 
3388 	DTRACE_NFSV3_4(op__readdirplus__start, struct svc_req *, req,
3389 	    cred_t *, cr, vnode_t *, vp, READDIRPLUS3args *, args);
3390 
3391 	if (vp == NULL) {
3392 		error = ESTALE;
3393 		goto out;
3394 	}
3395 
3396 	if (is_system_labeled()) {
3397 		bslabel_t *clabel = req->rq_label;
3398 
3399 		ASSERT(clabel != NULL);
3400 		DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3401 		    char *, "got client label from request(1)",
3402 		    struct svc_req *, req);
3403 
3404 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3405 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3406 			    exi)) {
3407 				resp->status = NFS3ERR_ACCES;
3408 				goto out1;
3409 			}
3410 		}
3411 	}
3412 
3413 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3414 
3415 	va.va_mask = AT_ALL;
3416 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3417 
3418 	if (vp->v_type != VDIR) {
3419 		error = ENOTDIR;
3420 		goto out;
3421 	}
3422 
3423 	error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3424 	if (error)
3425 		goto out;
3426 
3427 	/*
3428 	 * Don't allow arbitrary counts for allocation
3429 	 */
3430 	if (args->maxcount > rfs3_tsize(req))
3431 		args->maxcount = rfs3_tsize(req);
3432 
3433 	/*
3434 	 * Make sure that there is room to read at least one entry
3435 	 * if any are available
3436 	 */
3437 	args->dircount = MIN(args->dircount, args->maxcount);
3438 
3439 	if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3440 		args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3441 
3442 	/*
3443 	 * This allocation relies on a minimum directory entry
3444 	 * being roughly 24 bytes.  Therefore, the namlen array
3445 	 * will have enough space based on the maximum number of
3446 	 * entries to read.
3447 	 */
3448 	namlen = kmem_alloc(args->dircount, KM_SLEEP);
3449 
3450 	space_left = args->dircount;
3451 	data = kmem_alloc(args->dircount, KM_SLEEP);
3452 	dp = (struct dirent64 *)data;
3453 	uio.uio_iov = &iov;
3454 	uio.uio_iovcnt = 1;
3455 	uio.uio_segflg = UIO_SYSSPACE;
3456 	uio.uio_extflg = UIO_COPY_CACHED;
3457 	uio.uio_loffset = (offset_t)args->cookie;
3458 
3459 	/*
3460 	 * bufsize is used to keep track of the size of the response as we
3461 	 * get post op attributes and filehandles for each entry.  This is
3462 	 * an optimization as the server may have read more entries than will
3463 	 * fit in the buffer specified by maxcount.  We stop calculating
3464 	 * post op attributes and filehandles once we have exceeded maxcount.
3465 	 * This will minimize the effect of truncation.
3466 	 *
3467 	 * It is primed with:
3468 	 *	1 for the status +
3469 	 *	1 for the dir_attributes.attributes boolean +
3470 	 *	2 for the cookie verifier
3471 	 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3472 	 * to bytes.  If there are directory attributes to be
3473 	 * returned, then:
3474 	 *	NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3475 	 * time BYTES_PER_XDR_UNIT is added to account for them.
3476 	 */
3477 	bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3478 	if (vap != NULL)
3479 		bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3480 
3481 getmoredents:
3482 	/*
3483 	 * Here we make a check so that our read unit is not larger than
3484 	 * the space left in the buffer.
3485 	 */
3486 	rd_unit = MIN(rd_unit, space_left);
3487 	iov.iov_base = (char *)dp;
3488 	iov.iov_len = rd_unit;
3489 	uio.uio_resid = rd_unit;
3490 	prev_len = rd_unit;
3491 
3492 	error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3493 
3494 	if (error) {
3495 		kmem_free(data, args->dircount);
3496 		goto out;
3497 	}
3498 
3499 	if (uio.uio_resid == prev_len && !iseof) {
3500 		if (nents == 0) {
3501 			kmem_free(data, args->dircount);
3502 			resp->status = NFS3ERR_TOOSMALL;
3503 			goto out1;
3504 		}
3505 
3506 		/*
3507 		 * We could not get any more entries, so get the attributes
3508 		 * and filehandle for the entries already obtained.
3509 		 */
3510 		goto good;
3511 	}
3512 
3513 	/*
3514 	 * We estimate the size of the response by assuming the
3515 	 * entry exists and attributes and filehandle are also valid
3516 	 */
3517 	for (size = prev_len - uio.uio_resid;
3518 	    size > 0;
3519 	    size -= dp->d_reclen, dp = nextdp(dp)) {
3520 
3521 		if (dp->d_ino == 0) {
3522 			nents++;
3523 			continue;
3524 		}
3525 
3526 		namlen[nents] = strlen(dp->d_name);
3527 		entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3528 
3529 		/*
3530 		 * We need to check to see if the number of bytes left
3531 		 * to go into the buffer will actually fit into the
3532 		 * buffer.  This is calculated as the size of this
3533 		 * entry plus:
3534 		 *	1 for the true/false list indicator +
3535 		 *	1 for the eof indicator
3536 		 * times BYTES_PER_XDR_UNIT to convert from XDR units
3537 		 * to bytes.
3538 		 *
3539 		 * Also check the dircount limit against the first entry read
3540 		 *
3541 		 */
3542 		tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3543 		if (bufsize + tofit > args->maxcount) {
3544 			/*
3545 			 * We make a check here to see if this was the
3546 			 * first entry being measured.  If so, then maxcount
3547 			 * was too small to begin with and so we need to
3548 			 * return with NFS3ERR_TOOSMALL.
3549 			 */
3550 			if (nents == 0) {
3551 				kmem_free(data, args->dircount);
3552 				resp->status = NFS3ERR_TOOSMALL;
3553 				goto out1;
3554 			}
3555 			iseof = FALSE;
3556 			goto good;
3557 		}
3558 		bufsize += entrysize;
3559 		nents++;
3560 	}
3561 
3562 	/*
3563 	 * If there is enough room to fit at least 1 more entry including
3564 	 * post op attributes and filehandle in the buffer AND that we haven't
3565 	 * exceeded dircount then go back and get some more.
3566 	 */
3567 	if (!iseof &&
3568 	    (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3569 		space_left -= (prev_len - uio.uio_resid);
3570 		if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3571 			goto getmoredents;
3572 
3573 		/* else, fall through */
3574 	}
3575 good:
3576 	va.va_mask = AT_ALL;
3577 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3578 
3579 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3580 
3581 	infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3582 	resp->resok.infop = infop;
3583 
3584 	dp = (struct dirent64 *)data;
3585 	for (i = 0; i < nents; i++) {
3586 
3587 		if (dp->d_ino == 0) {
3588 			infop[i].attr.attributes = FALSE;
3589 			infop[i].fh.handle_follows = FALSE;
3590 			dp = nextdp(dp);
3591 			continue;
3592 		}
3593 
3594 		infop[i].namelen = namlen[i];
3595 
3596 		error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3597 		    NULL, NULL, NULL);
3598 		if (error) {
3599 			infop[i].attr.attributes = FALSE;
3600 			infop[i].fh.handle_follows = FALSE;
3601 			dp = nextdp(dp);
3602 			continue;
3603 		}
3604 
3605 		nva.va_mask = AT_ALL;
3606 		nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3607 
3608 		/* Lie about the object type for a referral */
3609 		if (vn_is_nfs_reparse(nvp, cr))
3610 			nvap->va_type = VLNK;
3611 
3612 		vattr_to_post_op_attr(nvap, &infop[i].attr);
3613 
3614 		error = makefh3(&infop[i].fh.handle, nvp, exi);
3615 		if (!error)
3616 			infop[i].fh.handle_follows = TRUE;
3617 		else
3618 			infop[i].fh.handle_follows = FALSE;
3619 
3620 		VN_RELE(nvp);
3621 		dp = nextdp(dp);
3622 	}
3623 
3624 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3625 	ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3626 	if (ndata == NULL)
3627 		ndata = data;
3628 
3629 	if (ret > 0) {
3630 		/*
3631 		 * We had to drop one or more entries in order to fit
3632 		 * during the character conversion.  We need to patch
3633 		 * up the size and eof info.
3634 		 */
3635 		if (iseof)
3636 			iseof = FALSE;
3637 
3638 		ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3639 		    nents, ret);
3640 	}
3641 
3642 
3643 #if 0 /* notyet */
3644 	/*
3645 	 * Don't do this.  It causes local disk writes when just
3646 	 * reading the file and the overhead is deemed larger
3647 	 * than the benefit.
3648 	 */
3649 	/*
3650 	 * Force modified metadata out to stable storage.
3651 	 */
3652 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3653 #endif
3654 
3655 	kmem_free(namlen, args->dircount);
3656 
3657 	resp->status = NFS3_OK;
3658 	vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3659 	resp->resok.cookieverf = 0;
3660 	resp->resok.reply.entries = (entryplus3 *)ndata;
3661 	resp->resok.reply.eof = iseof;
3662 	resp->resok.size = nents;
3663 	resp->resok.count = args->dircount - ret;
3664 	resp->resok.maxcount = args->maxcount;
3665 
3666 	DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3667 	    cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3668 	if (ndata != data)
3669 		kmem_free(data, args->dircount);
3670 
3671 
3672 	VN_RELE(vp);
3673 
3674 	return;
3675 
3676 out:
3677 	if (curthread->t_flag & T_WOULDBLOCK) {
3678 		curthread->t_flag &= ~T_WOULDBLOCK;
3679 		resp->status = NFS3ERR_JUKEBOX;
3680 	} else {
3681 		resp->status = puterrno3(error);
3682 	}
3683 out1:
3684 	DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3685 	    cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3686 
3687 	if (vp != NULL) {
3688 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3689 		VN_RELE(vp);
3690 	}
3691 
3692 	if (namlen != NULL)
3693 		kmem_free(namlen, args->dircount);
3694 
3695 	vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3696 }
3697 
3698 void *
3699 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3700 {
3701 
3702 	return (&args->dir);
3703 }
3704 
3705 void
3706 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3707 {
3708 
3709 	if (resp->status == NFS3_OK) {
3710 		kmem_free(resp->resok.reply.entries, resp->resok.count);
3711 		kmem_free(resp->resok.infop,
3712 		    resp->resok.size * sizeof (struct entryplus3_info));
3713 	}
3714 }
3715 
3716 /* ARGSUSED */
3717 void
3718 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3719     struct svc_req *req, cred_t *cr, bool_t ro)
3720 {
3721 	int error;
3722 	vnode_t *vp;
3723 	struct vattr *vap;
3724 	struct vattr va;
3725 	struct statvfs64 sb;
3726 
3727 	vap = NULL;
3728 
3729 	vp = nfs3_fhtovp(&args->fsroot, exi);
3730 
3731 	DTRACE_NFSV3_4(op__fsstat__start, struct svc_req *, req,
3732 	    cred_t *, cr, vnode_t *, vp, FSSTAT3args *, args);
3733 
3734 	if (vp == NULL) {
3735 		error = ESTALE;
3736 		goto out;
3737 	}
3738 
3739 	if (is_system_labeled()) {
3740 		bslabel_t *clabel = req->rq_label;
3741 
3742 		ASSERT(clabel != NULL);
3743 		DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3744 		    "got client label from request(1)", struct svc_req *, req);
3745 
3746 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3747 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3748 			    exi)) {
3749 				resp->status = NFS3ERR_ACCES;
3750 				goto out1;
3751 			}
3752 		}
3753 	}
3754 
3755 	error = VFS_STATVFS(vp->v_vfsp, &sb);
3756 
3757 	va.va_mask = AT_ALL;
3758 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3759 
3760 	if (error)
3761 		goto out;
3762 
3763 	resp->status = NFS3_OK;
3764 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3765 	if (sb.f_blocks != (fsblkcnt64_t)-1)
3766 		resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3767 	else
3768 		resp->resok.tbytes = (size3)sb.f_blocks;
3769 	if (sb.f_bfree != (fsblkcnt64_t)-1)
3770 		resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3771 	else
3772 		resp->resok.fbytes = (size3)sb.f_bfree;
3773 	if (sb.f_bavail != (fsblkcnt64_t)-1)
3774 		resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3775 	else
3776 		resp->resok.abytes = (size3)sb.f_bavail;
3777 	resp->resok.tfiles = (size3)sb.f_files;
3778 	resp->resok.ffiles = (size3)sb.f_ffree;
3779 	resp->resok.afiles = (size3)sb.f_favail;
3780 	resp->resok.invarsec = 0;
3781 
3782 	DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3783 	    cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3784 	VN_RELE(vp);
3785 
3786 	return;
3787 
3788 out:
3789 	if (curthread->t_flag & T_WOULDBLOCK) {
3790 		curthread->t_flag &= ~T_WOULDBLOCK;
3791 		resp->status = NFS3ERR_JUKEBOX;
3792 	} else
3793 		resp->status = puterrno3(error);
3794 out1:
3795 	DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3796 	    cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3797 
3798 	if (vp != NULL)
3799 		VN_RELE(vp);
3800 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3801 }
3802 
3803 void *
3804 rfs3_fsstat_getfh(FSSTAT3args *args)
3805 {
3806 
3807 	return (&args->fsroot);
3808 }
3809 
3810 /* ARGSUSED */
3811 void
3812 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3813     struct svc_req *req, cred_t *cr, bool_t ro)
3814 {
3815 	vnode_t *vp;
3816 	struct vattr *vap;
3817 	struct vattr va;
3818 	uint32_t xfer_size;
3819 	ulong_t l = 0;
3820 	int error;
3821 
3822 	vp = nfs3_fhtovp(&args->fsroot, exi);
3823 
3824 	DTRACE_NFSV3_4(op__fsinfo__start, struct svc_req *, req,
3825 	    cred_t *, cr, vnode_t *, vp, FSINFO3args *, args);
3826 
3827 	if (vp == NULL) {
3828 		if (curthread->t_flag & T_WOULDBLOCK) {
3829 			curthread->t_flag &= ~T_WOULDBLOCK;
3830 			resp->status = NFS3ERR_JUKEBOX;
3831 		} else
3832 			resp->status = NFS3ERR_STALE;
3833 		vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3834 		goto out;
3835 	}
3836 
3837 	if (is_system_labeled()) {
3838 		bslabel_t *clabel = req->rq_label;
3839 
3840 		ASSERT(clabel != NULL);
3841 		DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3842 		    "got client label from request(1)", struct svc_req *, req);
3843 
3844 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3845 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3846 			    exi)) {
3847 				resp->status = NFS3ERR_STALE;
3848 				vattr_to_post_op_attr(NULL,
3849 				    &resp->resfail.obj_attributes);
3850 				goto out;
3851 			}
3852 		}
3853 	}
3854 
3855 	va.va_mask = AT_ALL;
3856 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3857 
3858 	resp->status = NFS3_OK;
3859 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3860 	xfer_size = rfs3_tsize(req);
3861 	resp->resok.rtmax = xfer_size;
3862 	resp->resok.rtpref = xfer_size;
3863 	resp->resok.rtmult = DEV_BSIZE;
3864 	resp->resok.wtmax = xfer_size;
3865 	resp->resok.wtpref = xfer_size;
3866 	resp->resok.wtmult = DEV_BSIZE;
3867 	resp->resok.dtpref = MAXBSIZE;
3868 
3869 	/*
3870 	 * Large file spec: want maxfilesize based on limit of
3871 	 * underlying filesystem.  We can guess 2^31-1 if need be.
3872 	 */
3873 	error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3874 	if (error) {
3875 		resp->status = puterrno3(error);
3876 		goto out;
3877 	}
3878 
3879 	/*
3880 	 * If the underlying file system does not support _PC_FILESIZEBITS,
3881 	 * return a reasonable default. Note that error code on VOP_PATHCONF
3882 	 * will be 0, even if the underlying file system does not support
3883 	 * _PC_FILESIZEBITS.
3884 	 */
3885 	if (l == (ulong_t)-1) {
3886 		resp->resok.maxfilesize = MAXOFF32_T;
3887 	} else {
3888 		if (l >= (sizeof (uint64_t) * 8))
3889 			resp->resok.maxfilesize = INT64_MAX;
3890 		else
3891 			resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3892 	}
3893 
3894 	resp->resok.time_delta.seconds = 0;
3895 	resp->resok.time_delta.nseconds = 1000;
3896 	resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3897 	    FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3898 
3899 	DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3900 	    cred_t *, cr, vnode_t *, vp, FSINFO3res *, resp);
3901 
3902 	VN_RELE(vp);
3903 
3904 	return;
3905 
3906 out:
3907 	DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3908 	    cred_t *, cr, vnode_t *, NULL, FSINFO3res *, resp);
3909 	if (vp != NULL)
3910 		VN_RELE(vp);
3911 }
3912 
3913 void *
3914 rfs3_fsinfo_getfh(FSINFO3args *args)
3915 {
3916 	return (&args->fsroot);
3917 }
3918 
3919 /* ARGSUSED */
3920 void
3921 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
3922     struct svc_req *req, cred_t *cr, bool_t ro)
3923 {
3924 	int error;
3925 	vnode_t *vp;
3926 	struct vattr *vap;
3927 	struct vattr va;
3928 	ulong_t val;
3929 
3930 	vap = NULL;
3931 
3932 	vp = nfs3_fhtovp(&args->object, exi);
3933 
3934 	DTRACE_NFSV3_4(op__pathconf__start, struct svc_req *, req,
3935 	    cred_t *, cr, vnode_t *, vp, PATHCONF3args *, args);
3936 
3937 	if (vp == NULL) {
3938 		error = ESTALE;
3939 		goto out;
3940 	}
3941 
3942 	if (is_system_labeled()) {
3943 		bslabel_t *clabel = req->rq_label;
3944 
3945 		ASSERT(clabel != NULL);
3946 		DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
3947 		    "got client label from request(1)", struct svc_req *, req);
3948 
3949 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3950 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3951 			    exi)) {
3952 				resp->status = NFS3ERR_ACCES;
3953 				goto out1;
3954 			}
3955 		}
3956 	}
3957 
3958 	va.va_mask = AT_ALL;
3959 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3960 
3961 	error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
3962 	if (error)
3963 		goto out;
3964 	resp->resok.info.link_max = (uint32)val;
3965 
3966 	error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
3967 	if (error)
3968 		goto out;
3969 	resp->resok.info.name_max = (uint32)val;
3970 
3971 	error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
3972 	if (error)
3973 		goto out;
3974 	if (val == 1)
3975 		resp->resok.info.no_trunc = TRUE;
3976 	else
3977 		resp->resok.info.no_trunc = FALSE;
3978 
3979 	error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
3980 	if (error)
3981 		goto out;
3982 	if (val == 1)
3983 		resp->resok.info.chown_restricted = TRUE;
3984 	else
3985 		resp->resok.info.chown_restricted = FALSE;
3986 
3987 	resp->status = NFS3_OK;
3988 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3989 	resp->resok.info.case_insensitive = FALSE;
3990 	resp->resok.info.case_preserving = TRUE;
3991 	DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
3992 	    cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
3993 	VN_RELE(vp);
3994 	return;
3995 
3996 out:
3997 	if (curthread->t_flag & T_WOULDBLOCK) {
3998 		curthread->t_flag &= ~T_WOULDBLOCK;
3999 		resp->status = NFS3ERR_JUKEBOX;
4000 	} else
4001 		resp->status = puterrno3(error);
4002 out1:
4003 	DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
4004 	    cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
4005 	if (vp != NULL)
4006 		VN_RELE(vp);
4007 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4008 }
4009 
4010 void *
4011 rfs3_pathconf_getfh(PATHCONF3args *args)
4012 {
4013 
4014 	return (&args->object);
4015 }
4016 
4017 void
4018 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4019     struct svc_req *req, cred_t *cr, bool_t ro)
4020 {
4021 	int error;
4022 	vnode_t *vp;
4023 	struct vattr *bvap;
4024 	struct vattr bva;
4025 	struct vattr *avap;
4026 	struct vattr ava;
4027 
4028 	bvap = NULL;
4029 	avap = NULL;
4030 
4031 	vp = nfs3_fhtovp(&args->file, exi);
4032 
4033 	DTRACE_NFSV3_4(op__commit__start, struct svc_req *, req,
4034 	    cred_t *, cr, vnode_t *, vp, COMMIT3args *, args);
4035 
4036 	if (vp == NULL) {
4037 		error = ESTALE;
4038 		goto out;
4039 	}
4040 
4041 	bva.va_mask = AT_ALL;
4042 	error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4043 
4044 	/*
4045 	 * If we can't get the attributes, then we can't do the
4046 	 * right access checking.  So, we'll fail the request.
4047 	 */
4048 	if (error)
4049 		goto out;
4050 
4051 	bvap = &bva;
4052 
4053 	if (rdonly(ro, vp)) {
4054 		resp->status = NFS3ERR_ROFS;
4055 		goto out1;
4056 	}
4057 
4058 	if (vp->v_type != VREG) {
4059 		resp->status = NFS3ERR_INVAL;
4060 		goto out1;
4061 	}
4062 
4063 	if (is_system_labeled()) {
4064 		bslabel_t *clabel = req->rq_label;
4065 
4066 		ASSERT(clabel != NULL);
4067 		DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4068 		    "got client label from request(1)", struct svc_req *, req);
4069 
4070 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
4071 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4072 			    exi)) {
4073 				resp->status = NFS3ERR_ACCES;
4074 				goto out1;
4075 			}
4076 		}
4077 	}
4078 
4079 	if (crgetuid(cr) != bva.va_uid &&
4080 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4081 		goto out;
4082 
4083 	error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4084 
4085 	ava.va_mask = AT_ALL;
4086 	avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4087 
4088 	if (error)
4089 		goto out;
4090 
4091 	resp->status = NFS3_OK;
4092 	vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4093 	resp->resok.verf = write3verf;
4094 
4095 	DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4096 	    cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4097 
4098 	VN_RELE(vp);
4099 
4100 	return;
4101 
4102 out:
4103 	if (curthread->t_flag & T_WOULDBLOCK) {
4104 		curthread->t_flag &= ~T_WOULDBLOCK;
4105 		resp->status = NFS3ERR_JUKEBOX;
4106 	} else
4107 		resp->status = puterrno3(error);
4108 out1:
4109 	DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4110 	    cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4111 
4112 	if (vp != NULL)
4113 		VN_RELE(vp);
4114 	vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4115 }
4116 
4117 void *
4118 rfs3_commit_getfh(COMMIT3args *args)
4119 {
4120 
4121 	return (&args->file);
4122 }
4123 
4124 static int
4125 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4126 {
4127 
4128 	vap->va_mask = 0;
4129 
4130 	if (sap->mode.set_it) {
4131 		vap->va_mode = (mode_t)sap->mode.mode;
4132 		vap->va_mask |= AT_MODE;
4133 	}
4134 	if (sap->uid.set_it) {
4135 		vap->va_uid = (uid_t)sap->uid.uid;
4136 		vap->va_mask |= AT_UID;
4137 	}
4138 	if (sap->gid.set_it) {
4139 		vap->va_gid = (gid_t)sap->gid.gid;
4140 		vap->va_mask |= AT_GID;
4141 	}
4142 	if (sap->size.set_it) {
4143 		if (sap->size.size > (size3)((u_longlong_t)-1))
4144 			return (EINVAL);
4145 		vap->va_size = sap->size.size;
4146 		vap->va_mask |= AT_SIZE;
4147 	}
4148 	if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4149 #ifndef _LP64
4150 		/* check time validity */
4151 		if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4152 			return (EOVERFLOW);
4153 #endif
4154 		/*
4155 		 * nfs protocol defines times as unsigned so don't extend sign,
4156 		 * unless sysadmin set nfs_allow_preepoch_time.
4157 		 */
4158 		NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4159 		    sap->atime.atime.seconds);
4160 		vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4161 		vap->va_mask |= AT_ATIME;
4162 	} else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4163 		gethrestime(&vap->va_atime);
4164 		vap->va_mask |= AT_ATIME;
4165 	}
4166 	if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4167 #ifndef _LP64
4168 		/* check time validity */
4169 		if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4170 			return (EOVERFLOW);
4171 #endif
4172 		/*
4173 		 * nfs protocol defines times as unsigned so don't extend sign,
4174 		 * unless sysadmin set nfs_allow_preepoch_time.
4175 		 */
4176 		NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4177 		    sap->mtime.mtime.seconds);
4178 		vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4179 		vap->va_mask |= AT_MTIME;
4180 	} else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4181 		gethrestime(&vap->va_mtime);
4182 		vap->va_mask |= AT_MTIME;
4183 	}
4184 
4185 	return (0);
4186 }
4187 
4188 static ftype3 vt_to_nf3[] = {
4189 	0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4190 };
4191 
4192 static int
4193 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4194 {
4195 
4196 	ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4197 	/* Return error if time or size overflow */
4198 	if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4199 		return (EOVERFLOW);
4200 	}
4201 	fap->type = vt_to_nf3[vap->va_type];
4202 	fap->mode = (mode3)(vap->va_mode & MODEMASK);
4203 	fap->nlink = (uint32)vap->va_nlink;
4204 	if (vap->va_uid == UID_NOBODY)
4205 		fap->uid = (uid3)NFS_UID_NOBODY;
4206 	else
4207 		fap->uid = (uid3)vap->va_uid;
4208 	if (vap->va_gid == GID_NOBODY)
4209 		fap->gid = (gid3)NFS_GID_NOBODY;
4210 	else
4211 		fap->gid = (gid3)vap->va_gid;
4212 	fap->size = (size3)vap->va_size;
4213 	fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4214 	fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4215 	fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4216 	fap->fsid = (uint64)vap->va_fsid;
4217 	fap->fileid = (fileid3)vap->va_nodeid;
4218 	fap->atime.seconds = vap->va_atime.tv_sec;
4219 	fap->atime.nseconds = vap->va_atime.tv_nsec;
4220 	fap->mtime.seconds = vap->va_mtime.tv_sec;
4221 	fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4222 	fap->ctime.seconds = vap->va_ctime.tv_sec;
4223 	fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4224 	return (0);
4225 }
4226 
4227 static int
4228 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4229 {
4230 
4231 	/* Return error if time or size overflow */
4232 	if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4233 	    NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4234 	    NFS3_SIZE_OK(vap->va_size))) {
4235 		return (EOVERFLOW);
4236 	}
4237 	wccap->size = (size3)vap->va_size;
4238 	wccap->mtime.seconds = vap->va_mtime.tv_sec;
4239 	wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4240 	wccap->ctime.seconds = vap->va_ctime.tv_sec;
4241 	wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4242 	return (0);
4243 }
4244 
4245 static void
4246 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4247 {
4248 
4249 	/* don't return attrs if time overflow */
4250 	if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4251 		poap->attributes = TRUE;
4252 	} else
4253 		poap->attributes = FALSE;
4254 }
4255 
4256 void
4257 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4258 {
4259 
4260 	/* don't return attrs if time overflow */
4261 	if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4262 		poap->attributes = TRUE;
4263 	} else
4264 		poap->attributes = FALSE;
4265 }
4266 
4267 static void
4268 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4269 {
4270 
4271 	vattr_to_pre_op_attr(bvap, &wccp->before);
4272 	vattr_to_post_op_attr(avap, &wccp->after);
4273 }
4274 
4275 void
4276 rfs3_srvrinit(void)
4277 {
4278 	struct rfs3_verf_overlay {
4279 		uint_t id; /* a "unique" identifier */
4280 		int ts; /* a unique timestamp */
4281 	} *verfp;
4282 	timestruc_t now;
4283 
4284 	/*
4285 	 * The following algorithm attempts to find a unique verifier
4286 	 * to be used as the write verifier returned from the server
4287 	 * to the client.  It is important that this verifier change
4288 	 * whenever the server reboots.  Of secondary importance, it
4289 	 * is important for the verifier to be unique between two
4290 	 * different servers.
4291 	 *
4292 	 * Thus, an attempt is made to use the system hostid and the
4293 	 * current time in seconds when the nfssrv kernel module is
4294 	 * loaded.  It is assumed that an NFS server will not be able
4295 	 * to boot and then to reboot in less than a second.  If the
4296 	 * hostid has not been set, then the current high resolution
4297 	 * time is used.  This will ensure different verifiers each
4298 	 * time the server reboots and minimize the chances that two
4299 	 * different servers will have the same verifier.
4300 	 */
4301 
4302 #ifndef	lint
4303 	/*
4304 	 * We ASSERT that this constant logic expression is
4305 	 * always true because in the past, it wasn't.
4306 	 */
4307 	ASSERT(sizeof (*verfp) <= sizeof (write3verf));
4308 #endif
4309 
4310 	gethrestime(&now);
4311 	verfp = (struct rfs3_verf_overlay *)&write3verf;
4312 	verfp->ts = (int)now.tv_sec;
4313 	verfp->id = zone_get_hostid(NULL);
4314 
4315 	if (verfp->id == 0)
4316 		verfp->id = (uint_t)now.tv_nsec;
4317 
4318 	nfs3_srv_caller_id = fs_new_caller_id();
4319 
4320 }
4321 
4322 static int
4323 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4324 {
4325 	struct clist	*wcl;
4326 	int		wlist_len;
4327 	count3		count = rok->count;
4328 
4329 	wcl = args->wlist;
4330 	if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
4331 		return (FALSE);
4332 	}
4333 
4334 	wcl = args->wlist;
4335 	rok->wlist_len = wlist_len;
4336 	rok->wlist = wcl;
4337 	return (TRUE);
4338 }
4339 
4340 void
4341 rfs3_srvrfini(void)
4342 {
4343 	/* Nothing to do */
4344 }
4345