xref: /titanic_51/usr/src/uts/common/fs/nfs/nfs3_srv.c (revision b49b27dcb66b2c7f4a23f7bc158e2dde5cd79030)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
24  * Copyright (c) 2013 by Delphix. All rights reserved.
25  */
26 
27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29 
30 #include <sys/param.h>
31 #include <sys/types.h>
32 #include <sys/systm.h>
33 #include <sys/cred.h>
34 #include <sys/buf.h>
35 #include <sys/vfs.h>
36 #include <sys/vnode.h>
37 #include <sys/uio.h>
38 #include <sys/errno.h>
39 #include <sys/sysmacros.h>
40 #include <sys/statvfs.h>
41 #include <sys/kmem.h>
42 #include <sys/dirent.h>
43 #include <sys/cmn_err.h>
44 #include <sys/debug.h>
45 #include <sys/systeminfo.h>
46 #include <sys/flock.h>
47 #include <sys/nbmlock.h>
48 #include <sys/policy.h>
49 #include <sys/sdt.h>
50 
51 #include <rpc/types.h>
52 #include <rpc/auth.h>
53 #include <rpc/svc.h>
54 #include <rpc/rpc_rdma.h>
55 
56 #include <nfs/nfs.h>
57 #include <nfs/export.h>
58 #include <nfs/nfs_cmd.h>
59 
60 #include <sys/strsubr.h>
61 #include <sys/tsol/label.h>
62 #include <sys/tsol/tndb.h>
63 
64 #include <sys/zone.h>
65 
66 #include <inet/ip.h>
67 #include <inet/ip6.h>
68 
69 /*
70  * These are the interface routines for the server side of the
71  * Network File System.  See the NFS version 3 protocol specification
72  * for a description of this interface.
73  */
74 
75 static writeverf3 write3verf;
76 
77 static int	sattr3_to_vattr(sattr3 *, struct vattr *);
78 static int	vattr_to_fattr3(struct vattr *, fattr3 *);
79 static int	vattr_to_wcc_attr(struct vattr *, wcc_attr *);
80 static void	vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
81 static void	vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
82 static int	rdma_setup_read_data3(READ3args *, READ3resok *);
83 
84 extern int nfs_loaned_buffers;
85 
86 u_longlong_t nfs3_srv_caller_id;
87 
88 /* ARGSUSED */
89 void
90 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
91 	struct svc_req *req, cred_t *cr)
92 {
93 	int error;
94 	vnode_t *vp;
95 	struct vattr va;
96 
97 	vp = nfs3_fhtovp(&args->object, exi);
98 
99 	DTRACE_NFSV3_4(op__getattr__start, struct svc_req *, req,
100 	    cred_t *, cr, vnode_t *, vp, GETATTR3args *, args);
101 
102 	if (vp == NULL) {
103 		error = ESTALE;
104 		goto out;
105 	}
106 
107 	va.va_mask = AT_ALL;
108 	error = rfs4_delegated_getattr(vp, &va, 0, cr);
109 
110 	if (!error) {
111 		/* Lie about the object type for a referral */
112 		if (vn_is_nfs_reparse(vp, cr))
113 			va.va_type = VLNK;
114 
115 		/* overflow error if time or size is out of range */
116 		error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
117 		if (error)
118 			goto out;
119 		resp->status = NFS3_OK;
120 
121 		DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
122 		    cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
123 
124 		VN_RELE(vp);
125 
126 		return;
127 	}
128 
129 out:
130 	if (curthread->t_flag & T_WOULDBLOCK) {
131 		curthread->t_flag &= ~T_WOULDBLOCK;
132 		resp->status = NFS3ERR_JUKEBOX;
133 	} else
134 		resp->status = puterrno3(error);
135 
136 	DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
137 	    cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
138 
139 	if (vp != NULL)
140 		VN_RELE(vp);
141 }
142 
143 void *
144 rfs3_getattr_getfh(GETATTR3args *args)
145 {
146 
147 	return (&args->object);
148 }
149 
150 void
151 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
152 	struct svc_req *req, cred_t *cr)
153 {
154 	int error;
155 	vnode_t *vp;
156 	struct vattr *bvap;
157 	struct vattr bva;
158 	struct vattr *avap;
159 	struct vattr ava;
160 	int flag;
161 	int in_crit = 0;
162 	struct flock64 bf;
163 	caller_context_t ct;
164 
165 	bvap = NULL;
166 	avap = NULL;
167 
168 	vp = nfs3_fhtovp(&args->object, exi);
169 
170 	DTRACE_NFSV3_4(op__setattr__start, struct svc_req *, req,
171 	    cred_t *, cr, vnode_t *, vp, SETATTR3args *, args);
172 
173 	if (vp == NULL) {
174 		error = ESTALE;
175 		goto out;
176 	}
177 
178 	error = sattr3_to_vattr(&args->new_attributes, &ava);
179 	if (error)
180 		goto out;
181 
182 	if (is_system_labeled()) {
183 		bslabel_t *clabel = req->rq_label;
184 
185 		ASSERT(clabel != NULL);
186 		DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
187 		    "got client label from request(1)", struct svc_req *, req);
188 
189 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
190 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
191 			    exi)) {
192 				resp->status = NFS3ERR_ACCES;
193 				goto out1;
194 			}
195 		}
196 	}
197 
198 	/*
199 	 * We need to specially handle size changes because of
200 	 * possible conflicting NBMAND locks. Get into critical
201 	 * region before VOP_GETATTR, so the size attribute is
202 	 * valid when checking conflicts.
203 	 *
204 	 * Also, check to see if the v4 side of the server has
205 	 * delegated this file.  If so, then we return JUKEBOX to
206 	 * allow the client to retrasmit its request.
207 	 */
208 	if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
209 		if (nbl_need_check(vp)) {
210 			nbl_start_crit(vp, RW_READER);
211 			in_crit = 1;
212 		}
213 	}
214 
215 	bva.va_mask = AT_ALL;
216 	error = rfs4_delegated_getattr(vp, &bva, 0, cr);
217 
218 	/*
219 	 * If we can't get the attributes, then we can't do the
220 	 * right access checking.  So, we'll fail the request.
221 	 */
222 	if (error)
223 		goto out;
224 
225 	bvap = &bva;
226 
227 	if (rdonly(exi, req) || vn_is_readonly(vp)) {
228 		resp->status = NFS3ERR_ROFS;
229 		goto out1;
230 	}
231 
232 	if (args->guard.check &&
233 	    (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
234 	    args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
235 		resp->status = NFS3ERR_NOT_SYNC;
236 		goto out1;
237 	}
238 
239 	if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
240 		flag = ATTR_UTIME;
241 	else
242 		flag = 0;
243 
244 	/*
245 	 * If the filesystem is exported with nosuid, then mask off
246 	 * the setuid and setgid bits.
247 	 */
248 	if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
249 	    (exi->exi_export.ex_flags & EX_NOSUID))
250 		ava.va_mode &= ~(VSUID | VSGID);
251 
252 	ct.cc_sysid = 0;
253 	ct.cc_pid = 0;
254 	ct.cc_caller_id = nfs3_srv_caller_id;
255 	ct.cc_flags = CC_DONTBLOCK;
256 
257 	/*
258 	 * We need to specially handle size changes because it is
259 	 * possible for the client to create a file with modes
260 	 * which indicate read-only, but with the file opened for
261 	 * writing.  If the client then tries to set the size of
262 	 * the file, then the normal access checking done in
263 	 * VOP_SETATTR would prevent the client from doing so,
264 	 * although it should be legal for it to do so.  To get
265 	 * around this, we do the access checking for ourselves
266 	 * and then use VOP_SPACE which doesn't do the access
267 	 * checking which VOP_SETATTR does. VOP_SPACE can only
268 	 * operate on VREG files, let VOP_SETATTR handle the other
269 	 * extremely rare cases.
270 	 * Also the client should not be allowed to change the
271 	 * size of the file if there is a conflicting non-blocking
272 	 * mandatory lock in the region the change.
273 	 */
274 	if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
275 		if (in_crit) {
276 			u_offset_t offset;
277 			ssize_t length;
278 
279 			if (ava.va_size < bva.va_size) {
280 				offset = ava.va_size;
281 				length = bva.va_size - ava.va_size;
282 			} else {
283 				offset = bva.va_size;
284 				length = ava.va_size - bva.va_size;
285 			}
286 			if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
287 			    NULL)) {
288 				error = EACCES;
289 				goto out;
290 			}
291 		}
292 
293 		if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
294 			ava.va_mask &= ~AT_SIZE;
295 			bf.l_type = F_WRLCK;
296 			bf.l_whence = 0;
297 			bf.l_start = (off64_t)ava.va_size;
298 			bf.l_len = 0;
299 			bf.l_sysid = 0;
300 			bf.l_pid = 0;
301 			error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
302 			    (offset_t)ava.va_size, cr, &ct);
303 		}
304 	}
305 
306 	if (!error && ava.va_mask)
307 		error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
308 
309 	/* check if a monitor detected a delegation conflict */
310 	if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
311 		resp->status = NFS3ERR_JUKEBOX;
312 		goto out1;
313 	}
314 
315 	ava.va_mask = AT_ALL;
316 	avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
317 
318 	/*
319 	 * Force modified metadata out to stable storage.
320 	 */
321 	(void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
322 
323 	if (error)
324 		goto out;
325 
326 	if (in_crit)
327 		nbl_end_crit(vp);
328 
329 	resp->status = NFS3_OK;
330 	vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
331 
332 	DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
333 	    cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
334 
335 	VN_RELE(vp);
336 
337 	return;
338 
339 out:
340 	if (curthread->t_flag & T_WOULDBLOCK) {
341 		curthread->t_flag &= ~T_WOULDBLOCK;
342 		resp->status = NFS3ERR_JUKEBOX;
343 	} else
344 		resp->status = puterrno3(error);
345 out1:
346 	DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
347 	    cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
348 
349 	if (vp != NULL) {
350 		if (in_crit)
351 			nbl_end_crit(vp);
352 		VN_RELE(vp);
353 	}
354 	vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
355 }
356 
357 void *
358 rfs3_setattr_getfh(SETATTR3args *args)
359 {
360 
361 	return (&args->object);
362 }
363 
364 /* ARGSUSED */
365 void
366 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
367 	struct svc_req *req, cred_t *cr)
368 {
369 	int error;
370 	vnode_t *vp;
371 	vnode_t *dvp;
372 	struct vattr *vap;
373 	struct vattr va;
374 	struct vattr *dvap;
375 	struct vattr dva;
376 	nfs_fh3 *fhp;
377 	struct sec_ol sec = {0, 0};
378 	bool_t publicfh_flag = FALSE, auth_weak = FALSE;
379 	struct sockaddr *ca;
380 	char *name = NULL;
381 
382 	dvap = NULL;
383 
384 	/*
385 	 * Allow lookups from the root - the default
386 	 * location of the public filehandle.
387 	 */
388 	if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
389 		dvp = rootdir;
390 		VN_HOLD(dvp);
391 
392 		DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
393 		    cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
394 	} else {
395 		dvp = nfs3_fhtovp(&args->what.dir, exi);
396 
397 		DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
398 		    cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
399 
400 		if (dvp == NULL) {
401 			error = ESTALE;
402 			goto out;
403 		}
404 	}
405 
406 	dva.va_mask = AT_ALL;
407 	dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
408 
409 	if (args->what.name == nfs3nametoolong) {
410 		resp->status = NFS3ERR_NAMETOOLONG;
411 		goto out1;
412 	}
413 
414 	if (args->what.name == NULL || *(args->what.name) == '\0') {
415 		resp->status = NFS3ERR_ACCES;
416 		goto out1;
417 	}
418 
419 	fhp = &args->what.dir;
420 	if (strcmp(args->what.name, "..") == 0 &&
421 	    EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
422 		resp->status = NFS3ERR_NOENT;
423 		goto out1;
424 	}
425 
426 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
427 	name = nfscmd_convname(ca, exi, args->what.name,
428 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
429 
430 	if (name == NULL) {
431 		resp->status = NFS3ERR_ACCES;
432 		goto out1;
433 	}
434 
435 	/*
436 	 * If the public filehandle is used then allow
437 	 * a multi-component lookup
438 	 */
439 	if (PUBLIC_FH3(&args->what.dir)) {
440 		publicfh_flag = TRUE;
441 		error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
442 		    &exi, &sec);
443 		if (error && exi != NULL)
444 			exi_rele(exi); /* See comment below Re: publicfh_flag */
445 		/*
446 		 * Since WebNFS may bypass MOUNT, we need to ensure this
447 		 * request didn't come from an unlabeled admin_low client.
448 		 */
449 		if (is_system_labeled() && error == 0) {
450 			int		addr_type;
451 			void		*ipaddr;
452 			tsol_tpc_t	*tp;
453 
454 			if (ca->sa_family == AF_INET) {
455 				addr_type = IPV4_VERSION;
456 				ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
457 			} else if (ca->sa_family == AF_INET6) {
458 				addr_type = IPV6_VERSION;
459 				ipaddr = &((struct sockaddr_in6 *)
460 				    ca)->sin6_addr;
461 			}
462 			tp = find_tpc(ipaddr, addr_type, B_FALSE);
463 			if (tp == NULL || tp->tpc_tp.tp_doi !=
464 			    l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
465 			    SUN_CIPSO) {
466 				if (exi != NULL)
467 					exi_rele(exi);
468 				VN_RELE(vp);
469 				resp->status = NFS3ERR_ACCES;
470 				error = 1;
471 			}
472 			if (tp != NULL)
473 				TPC_RELE(tp);
474 		}
475 	} else {
476 		error = VOP_LOOKUP(dvp, name, &vp,
477 		    NULL, 0, NULL, cr, NULL, NULL, NULL);
478 	}
479 
480 	if (name != args->what.name)
481 		kmem_free(name, MAXPATHLEN + 1);
482 
483 	if (is_system_labeled() && error == 0) {
484 		bslabel_t *clabel = req->rq_label;
485 
486 		ASSERT(clabel != NULL);
487 		DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
488 		    "got client label from request(1)", struct svc_req *, req);
489 
490 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
491 			if (!do_rfs_label_check(clabel, dvp,
492 			    DOMINANCE_CHECK, exi)) {
493 				if (publicfh_flag && exi != NULL)
494 					exi_rele(exi);
495 				VN_RELE(vp);
496 				resp->status = NFS3ERR_ACCES;
497 				error = 1;
498 			}
499 		}
500 	}
501 
502 	dva.va_mask = AT_ALL;
503 	dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
504 
505 	if (error)
506 		goto out;
507 
508 	if (sec.sec_flags & SEC_QUERY) {
509 		error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
510 	} else {
511 		error = makefh3(&resp->resok.object, vp, exi);
512 		if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
513 			auth_weak = TRUE;
514 	}
515 
516 	/*
517 	 * If publicfh_flag is true then we have called rfs_publicfh_mclookup
518 	 * and have obtained a new exportinfo in exi which needs to be
519 	 * released. Note that the original exportinfo pointed to by exi
520 	 * will be released by the caller, common_dispatch.
521 	 */
522 	if (publicfh_flag)
523 		exi_rele(exi);
524 
525 	if (error) {
526 		VN_RELE(vp);
527 		goto out;
528 	}
529 
530 	va.va_mask = AT_ALL;
531 	vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
532 
533 	VN_RELE(vp);
534 
535 	resp->status = NFS3_OK;
536 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
537 	vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
538 
539 	/*
540 	 * If it's public fh, no 0x81, and client's flavor is
541 	 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
542 	 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
543 	 */
544 	if (auth_weak)
545 		resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
546 
547 	DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
548 	    cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
549 	VN_RELE(dvp);
550 
551 	return;
552 
553 out:
554 	if (curthread->t_flag & T_WOULDBLOCK) {
555 		curthread->t_flag &= ~T_WOULDBLOCK;
556 		resp->status = NFS3ERR_JUKEBOX;
557 	} else
558 		resp->status = puterrno3(error);
559 out1:
560 	DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
561 	    cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
562 
563 	if (dvp != NULL)
564 		VN_RELE(dvp);
565 	vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
566 
567 }
568 
569 void *
570 rfs3_lookup_getfh(LOOKUP3args *args)
571 {
572 
573 	return (&args->what.dir);
574 }
575 
576 /* ARGSUSED */
577 void
578 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
579 	struct svc_req *req, cred_t *cr)
580 {
581 	int error;
582 	vnode_t *vp;
583 	struct vattr *vap;
584 	struct vattr va;
585 	int checkwriteperm;
586 	boolean_t dominant_label = B_FALSE;
587 	boolean_t equal_label = B_FALSE;
588 	boolean_t admin_low_client;
589 
590 	vap = NULL;
591 
592 	vp = nfs3_fhtovp(&args->object, exi);
593 
594 	DTRACE_NFSV3_4(op__access__start, struct svc_req *, req,
595 	    cred_t *, cr, vnode_t *, vp, ACCESS3args *, args);
596 
597 	if (vp == NULL) {
598 		error = ESTALE;
599 		goto out;
600 	}
601 
602 	/*
603 	 * If the file system is exported read only, it is not appropriate
604 	 * to check write permissions for regular files and directories.
605 	 * Special files are interpreted by the client, so the underlying
606 	 * permissions are sent back to the client for interpretation.
607 	 */
608 	if (rdonly(exi, req) && (vp->v_type == VREG || vp->v_type == VDIR))
609 		checkwriteperm = 0;
610 	else
611 		checkwriteperm = 1;
612 
613 	/*
614 	 * We need the mode so that we can correctly determine access
615 	 * permissions relative to a mandatory lock file.  Access to
616 	 * mandatory lock files is denied on the server, so it might
617 	 * as well be reflected to the server during the open.
618 	 */
619 	va.va_mask = AT_MODE;
620 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
621 	if (error)
622 		goto out;
623 
624 	vap = &va;
625 
626 	resp->resok.access = 0;
627 
628 	if (is_system_labeled()) {
629 		bslabel_t *clabel = req->rq_label;
630 
631 		ASSERT(clabel != NULL);
632 		DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
633 		    "got client label from request(1)", struct svc_req *, req);
634 
635 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
636 			if ((equal_label = do_rfs_label_check(clabel, vp,
637 			    EQUALITY_CHECK, exi)) == B_FALSE) {
638 				dominant_label = do_rfs_label_check(clabel,
639 				    vp, DOMINANCE_CHECK, exi);
640 			} else
641 				dominant_label = B_TRUE;
642 			admin_low_client = B_FALSE;
643 		} else
644 			admin_low_client = B_TRUE;
645 	}
646 
647 	if (args->access & ACCESS3_READ) {
648 		error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
649 		if (error) {
650 			if (curthread->t_flag & T_WOULDBLOCK)
651 				goto out;
652 		} else if (!MANDLOCK(vp, va.va_mode) &&
653 		    (!is_system_labeled() || admin_low_client ||
654 		    dominant_label))
655 			resp->resok.access |= ACCESS3_READ;
656 	}
657 	if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
658 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
659 		if (error) {
660 			if (curthread->t_flag & T_WOULDBLOCK)
661 				goto out;
662 		} else if (!is_system_labeled() || admin_low_client ||
663 		    dominant_label)
664 			resp->resok.access |= ACCESS3_LOOKUP;
665 	}
666 	if (checkwriteperm &&
667 	    (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
668 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
669 		if (error) {
670 			if (curthread->t_flag & T_WOULDBLOCK)
671 				goto out;
672 		} else if (!MANDLOCK(vp, va.va_mode) &&
673 		    (!is_system_labeled() || admin_low_client || equal_label)) {
674 			resp->resok.access |=
675 			    (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
676 		}
677 	}
678 	if (checkwriteperm &&
679 	    (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
680 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
681 		if (error) {
682 			if (curthread->t_flag & T_WOULDBLOCK)
683 				goto out;
684 		} else if (!is_system_labeled() || admin_low_client ||
685 		    equal_label)
686 			resp->resok.access |= ACCESS3_DELETE;
687 	}
688 	if (args->access & ACCESS3_EXECUTE) {
689 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
690 		if (error) {
691 			if (curthread->t_flag & T_WOULDBLOCK)
692 				goto out;
693 		} else if (!MANDLOCK(vp, va.va_mode) &&
694 		    (!is_system_labeled() || admin_low_client ||
695 		    dominant_label))
696 			resp->resok.access |= ACCESS3_EXECUTE;
697 	}
698 
699 	va.va_mask = AT_ALL;
700 	vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
701 
702 	resp->status = NFS3_OK;
703 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
704 
705 	DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
706 	    cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
707 
708 	VN_RELE(vp);
709 
710 	return;
711 
712 out:
713 	if (curthread->t_flag & T_WOULDBLOCK) {
714 		curthread->t_flag &= ~T_WOULDBLOCK;
715 		resp->status = NFS3ERR_JUKEBOX;
716 	} else
717 		resp->status = puterrno3(error);
718 	DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
719 	    cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
720 	if (vp != NULL)
721 		VN_RELE(vp);
722 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
723 }
724 
725 void *
726 rfs3_access_getfh(ACCESS3args *args)
727 {
728 
729 	return (&args->object);
730 }
731 
732 /* ARGSUSED */
733 void
734 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
735 	struct svc_req *req, cred_t *cr)
736 {
737 	int error;
738 	vnode_t *vp;
739 	struct vattr *vap;
740 	struct vattr va;
741 	struct iovec iov;
742 	struct uio uio;
743 	char *data;
744 	struct sockaddr *ca;
745 	char *name = NULL;
746 	int is_referral = 0;
747 
748 	vap = NULL;
749 
750 	vp = nfs3_fhtovp(&args->symlink, exi);
751 
752 	DTRACE_NFSV3_4(op__readlink__start, struct svc_req *, req,
753 	    cred_t *, cr, vnode_t *, vp, READLINK3args *, args);
754 
755 	if (vp == NULL) {
756 		error = ESTALE;
757 		goto out;
758 	}
759 
760 	va.va_mask = AT_ALL;
761 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
762 	if (error)
763 		goto out;
764 
765 	vap = &va;
766 
767 	/* We lied about the object type for a referral */
768 	if (vn_is_nfs_reparse(vp, cr))
769 		is_referral = 1;
770 
771 	if (vp->v_type != VLNK && !is_referral) {
772 		resp->status = NFS3ERR_INVAL;
773 		goto out1;
774 	}
775 
776 	if (MANDLOCK(vp, va.va_mode)) {
777 		resp->status = NFS3ERR_ACCES;
778 		goto out1;
779 	}
780 
781 	if (is_system_labeled()) {
782 		bslabel_t *clabel = req->rq_label;
783 
784 		ASSERT(clabel != NULL);
785 		DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
786 		    "got client label from request(1)", struct svc_req *, req);
787 
788 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
789 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
790 			    exi)) {
791 				resp->status = NFS3ERR_ACCES;
792 				goto out1;
793 			}
794 		}
795 	}
796 
797 	data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
798 
799 	if (is_referral) {
800 		char *s;
801 		size_t strsz;
802 
803 		/* Get an artificial symlink based on a referral */
804 		s = build_symlink(vp, cr, &strsz);
805 		global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
806 		DTRACE_PROBE2(nfs3serv__func__referral__reflink,
807 		    vnode_t *, vp, char *, s);
808 		if (s == NULL)
809 			error = EINVAL;
810 		else {
811 			error = 0;
812 			(void) strlcpy(data, s, MAXPATHLEN + 1);
813 			kmem_free(s, strsz);
814 		}
815 
816 	} else {
817 
818 		iov.iov_base = data;
819 		iov.iov_len = MAXPATHLEN;
820 		uio.uio_iov = &iov;
821 		uio.uio_iovcnt = 1;
822 		uio.uio_segflg = UIO_SYSSPACE;
823 		uio.uio_extflg = UIO_COPY_CACHED;
824 		uio.uio_loffset = 0;
825 		uio.uio_resid = MAXPATHLEN;
826 
827 		error = VOP_READLINK(vp, &uio, cr, NULL);
828 
829 		if (!error)
830 			*(data + MAXPATHLEN - uio.uio_resid) = '\0';
831 	}
832 
833 	va.va_mask = AT_ALL;
834 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
835 
836 	/* Lie about object type again just to be consistent */
837 	if (is_referral && vap != NULL)
838 		vap->va_type = VLNK;
839 
840 #if 0 /* notyet */
841 	/*
842 	 * Don't do this.  It causes local disk writes when just
843 	 * reading the file and the overhead is deemed larger
844 	 * than the benefit.
845 	 */
846 	/*
847 	 * Force modified metadata out to stable storage.
848 	 */
849 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
850 #endif
851 
852 	if (error) {
853 		kmem_free(data, MAXPATHLEN + 1);
854 		goto out;
855 	}
856 
857 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
858 	name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
859 	    MAXPATHLEN + 1);
860 
861 	if (name == NULL) {
862 		/*
863 		 * Even though the conversion failed, we return
864 		 * something. We just don't translate it.
865 		 */
866 		name = data;
867 	}
868 
869 	resp->status = NFS3_OK;
870 	vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
871 	resp->resok.data = name;
872 
873 	DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
874 	    cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
875 	VN_RELE(vp);
876 
877 	if (name != data)
878 		kmem_free(data, MAXPATHLEN + 1);
879 
880 	return;
881 
882 out:
883 	if (curthread->t_flag & T_WOULDBLOCK) {
884 		curthread->t_flag &= ~T_WOULDBLOCK;
885 		resp->status = NFS3ERR_JUKEBOX;
886 	} else
887 		resp->status = puterrno3(error);
888 out1:
889 	DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
890 	    cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
891 	if (vp != NULL)
892 		VN_RELE(vp);
893 	vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
894 }
895 
896 void *
897 rfs3_readlink_getfh(READLINK3args *args)
898 {
899 
900 	return (&args->symlink);
901 }
902 
903 void
904 rfs3_readlink_free(READLINK3res *resp)
905 {
906 
907 	if (resp->status == NFS3_OK)
908 		kmem_free(resp->resok.data, MAXPATHLEN + 1);
909 }
910 
911 /*
912  * Server routine to handle read
913  * May handle RDMA data as well as mblks
914  */
915 /* ARGSUSED */
916 void
917 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
918 	struct svc_req *req, cred_t *cr)
919 {
920 	int error;
921 	vnode_t *vp;
922 	struct vattr *vap;
923 	struct vattr va;
924 	struct iovec iov, *iovp = NULL;
925 	int iovcnt;
926 	struct uio uio;
927 	u_offset_t offset;
928 	mblk_t *mp = NULL;
929 	int in_crit = 0;
930 	int need_rwunlock = 0;
931 	caller_context_t ct;
932 	int rdma_used = 0;
933 	int loaned_buffers;
934 	struct uio *uiop;
935 
936 	vap = NULL;
937 
938 	vp = nfs3_fhtovp(&args->file, exi);
939 
940 	DTRACE_NFSV3_4(op__read__start, struct svc_req *, req,
941 	    cred_t *, cr, vnode_t *, vp, READ3args *, args);
942 
943 	if (vp == NULL) {
944 		error = ESTALE;
945 		goto out;
946 	}
947 
948 	if (args->wlist) {
949 		if (args->count > clist_len(args->wlist)) {
950 			error = EINVAL;
951 			goto out;
952 		}
953 		rdma_used = 1;
954 	}
955 
956 	/* use loaned buffers for TCP */
957 	loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
958 
959 	if (is_system_labeled()) {
960 		bslabel_t *clabel = req->rq_label;
961 
962 		ASSERT(clabel != NULL);
963 		DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
964 		    "got client label from request(1)", struct svc_req *, req);
965 
966 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
967 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
968 			    exi)) {
969 				resp->status = NFS3ERR_ACCES;
970 				goto out1;
971 			}
972 		}
973 	}
974 
975 	ct.cc_sysid = 0;
976 	ct.cc_pid = 0;
977 	ct.cc_caller_id = nfs3_srv_caller_id;
978 	ct.cc_flags = CC_DONTBLOCK;
979 
980 	/*
981 	 * Enter the critical region before calling VOP_RWLOCK
982 	 * to avoid a deadlock with write requests.
983 	 */
984 	if (nbl_need_check(vp)) {
985 		nbl_start_crit(vp, RW_READER);
986 		in_crit = 1;
987 		if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
988 		    NULL)) {
989 			error = EACCES;
990 			goto out;
991 		}
992 	}
993 
994 	error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
995 
996 	/* check if a monitor detected a delegation conflict */
997 	if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
998 		resp->status = NFS3ERR_JUKEBOX;
999 		goto out1;
1000 	}
1001 
1002 	need_rwunlock = 1;
1003 
1004 	va.va_mask = AT_ALL;
1005 	error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1006 
1007 	/*
1008 	 * If we can't get the attributes, then we can't do the
1009 	 * right access checking.  So, we'll fail the request.
1010 	 */
1011 	if (error)
1012 		goto out;
1013 
1014 	vap = &va;
1015 
1016 	if (vp->v_type != VREG) {
1017 		resp->status = NFS3ERR_INVAL;
1018 		goto out1;
1019 	}
1020 
1021 	if (crgetuid(cr) != va.va_uid) {
1022 		error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1023 		if (error) {
1024 			if (curthread->t_flag & T_WOULDBLOCK)
1025 				goto out;
1026 			error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1027 			if (error)
1028 				goto out;
1029 		}
1030 	}
1031 
1032 	if (MANDLOCK(vp, va.va_mode)) {
1033 		resp->status = NFS3ERR_ACCES;
1034 		goto out1;
1035 	}
1036 
1037 	offset = args->offset;
1038 	if (offset >= va.va_size) {
1039 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1040 		if (in_crit)
1041 			nbl_end_crit(vp);
1042 		resp->status = NFS3_OK;
1043 		vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1044 		resp->resok.count = 0;
1045 		resp->resok.eof = TRUE;
1046 		resp->resok.data.data_len = 0;
1047 		resp->resok.data.data_val = NULL;
1048 		resp->resok.data.mp = NULL;
1049 		/* RDMA */
1050 		resp->resok.wlist = args->wlist;
1051 		resp->resok.wlist_len = resp->resok.count;
1052 		if (resp->resok.wlist)
1053 			clist_zero_len(resp->resok.wlist);
1054 		goto done;
1055 	}
1056 
1057 	if (args->count == 0) {
1058 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1059 		if (in_crit)
1060 			nbl_end_crit(vp);
1061 		resp->status = NFS3_OK;
1062 		vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1063 		resp->resok.count = 0;
1064 		resp->resok.eof = FALSE;
1065 		resp->resok.data.data_len = 0;
1066 		resp->resok.data.data_val = NULL;
1067 		resp->resok.data.mp = NULL;
1068 		/* RDMA */
1069 		resp->resok.wlist = args->wlist;
1070 		resp->resok.wlist_len = resp->resok.count;
1071 		if (resp->resok.wlist)
1072 			clist_zero_len(resp->resok.wlist);
1073 		goto done;
1074 	}
1075 
1076 	/*
1077 	 * do not allocate memory more the max. allowed
1078 	 * transfer size
1079 	 */
1080 	if (args->count > rfs3_tsize(req))
1081 		args->count = rfs3_tsize(req);
1082 
1083 	if (loaned_buffers) {
1084 		uiop = (uio_t *)rfs_setup_xuio(vp);
1085 		ASSERT(uiop != NULL);
1086 		uiop->uio_segflg = UIO_SYSSPACE;
1087 		uiop->uio_loffset = args->offset;
1088 		uiop->uio_resid = args->count;
1089 
1090 		/* Jump to do the read if successful */
1091 		if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1092 			/*
1093 			 * Need to hold the vnode until after VOP_RETZCBUF()
1094 			 * is called.
1095 			 */
1096 			VN_HOLD(vp);
1097 			goto doio_read;
1098 		}
1099 
1100 		DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1101 		    uiop->uio_loffset, int, uiop->uio_resid);
1102 
1103 		uiop->uio_extflg = 0;
1104 		/* failure to setup for zero copy */
1105 		rfs_free_xuio((void *)uiop);
1106 		loaned_buffers = 0;
1107 	}
1108 
1109 	/*
1110 	 * If returning data via RDMA Write, then grab the chunk list.
1111 	 * If we aren't returning READ data w/RDMA_WRITE, then grab
1112 	 * a mblk.
1113 	 */
1114 	if (rdma_used) {
1115 		(void) rdma_get_wchunk(req, &iov, args->wlist);
1116 		uio.uio_iov = &iov;
1117 		uio.uio_iovcnt = 1;
1118 	} else {
1119 		/*
1120 		 * mp will contain the data to be sent out in the read reply.
1121 		 * For UDP, this will be freed after the reply has been sent
1122 		 * out by the driver.  For TCP, it will be freed after the last
1123 		 * segment associated with the reply has been ACKed by the
1124 		 * client.
1125 		 */
1126 		mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
1127 		uio.uio_iov = iovp;
1128 		uio.uio_iovcnt = iovcnt;
1129 	}
1130 
1131 	uio.uio_segflg = UIO_SYSSPACE;
1132 	uio.uio_extflg = UIO_COPY_CACHED;
1133 	uio.uio_loffset = args->offset;
1134 	uio.uio_resid = args->count;
1135 	uiop = &uio;
1136 
1137 doio_read:
1138 	error = VOP_READ(vp, uiop, 0, cr, &ct);
1139 
1140 	if (error) {
1141 		if (mp)
1142 			freemsg(mp);
1143 		/* check if a monitor detected a delegation conflict */
1144 		if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1145 			resp->status = NFS3ERR_JUKEBOX;
1146 			goto out1;
1147 		}
1148 		goto out;
1149 	}
1150 
1151 	/* make mblk using zc buffers */
1152 	if (loaned_buffers) {
1153 		mp = uio_to_mblk(uiop);
1154 		ASSERT(mp != NULL);
1155 	}
1156 
1157 	va.va_mask = AT_ALL;
1158 	error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1159 
1160 	if (error)
1161 		vap = NULL;
1162 	else
1163 		vap = &va;
1164 
1165 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1166 
1167 	if (in_crit)
1168 		nbl_end_crit(vp);
1169 
1170 	resp->status = NFS3_OK;
1171 	vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1172 	resp->resok.count = args->count - uiop->uio_resid;
1173 	if (!error && offset + resp->resok.count == va.va_size)
1174 		resp->resok.eof = TRUE;
1175 	else
1176 		resp->resok.eof = FALSE;
1177 	resp->resok.data.data_len = resp->resok.count;
1178 
1179 	if (mp)
1180 		rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1181 
1182 	resp->resok.data.mp = mp;
1183 	resp->resok.size = (uint_t)args->count;
1184 
1185 	if (rdma_used) {
1186 		resp->resok.data.data_val = (caddr_t)iov.iov_base;
1187 		if (!rdma_setup_read_data3(args, &(resp->resok))) {
1188 			resp->status = NFS3ERR_INVAL;
1189 		}
1190 	} else {
1191 		resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1192 		(resp->resok).wlist = NULL;
1193 	}
1194 
1195 done:
1196 	DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1197 	    cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1198 
1199 	VN_RELE(vp);
1200 
1201 	if (iovp != NULL)
1202 		kmem_free(iovp, iovcnt * sizeof (struct iovec));
1203 
1204 	return;
1205 
1206 out:
1207 	if (curthread->t_flag & T_WOULDBLOCK) {
1208 		curthread->t_flag &= ~T_WOULDBLOCK;
1209 		resp->status = NFS3ERR_JUKEBOX;
1210 	} else
1211 		resp->status = puterrno3(error);
1212 out1:
1213 	DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1214 	    cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1215 
1216 	if (vp != NULL) {
1217 		if (need_rwunlock)
1218 			VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1219 		if (in_crit)
1220 			nbl_end_crit(vp);
1221 		VN_RELE(vp);
1222 	}
1223 	vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1224 
1225 	if (iovp != NULL)
1226 		kmem_free(iovp, iovcnt * sizeof (struct iovec));
1227 }
1228 
1229 void
1230 rfs3_read_free(READ3res *resp)
1231 {
1232 	mblk_t *mp;
1233 
1234 	if (resp->status == NFS3_OK) {
1235 		mp = resp->resok.data.mp;
1236 		if (mp != NULL)
1237 			freemsg(mp);
1238 	}
1239 }
1240 
1241 void *
1242 rfs3_read_getfh(READ3args *args)
1243 {
1244 
1245 	return (&args->file);
1246 }
1247 
1248 #define	MAX_IOVECS	12
1249 
1250 #ifdef DEBUG
1251 static int rfs3_write_hits = 0;
1252 static int rfs3_write_misses = 0;
1253 #endif
1254 
1255 void
1256 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1257 	struct svc_req *req, cred_t *cr)
1258 {
1259 	int error;
1260 	vnode_t *vp;
1261 	struct vattr *bvap = NULL;
1262 	struct vattr bva;
1263 	struct vattr *avap = NULL;
1264 	struct vattr ava;
1265 	u_offset_t rlimit;
1266 	struct uio uio;
1267 	struct iovec iov[MAX_IOVECS];
1268 	mblk_t *m;
1269 	struct iovec *iovp;
1270 	int iovcnt;
1271 	int ioflag;
1272 	cred_t *savecred;
1273 	int in_crit = 0;
1274 	int rwlock_ret = -1;
1275 	caller_context_t ct;
1276 
1277 	vp = nfs3_fhtovp(&args->file, exi);
1278 
1279 	DTRACE_NFSV3_4(op__write__start, struct svc_req *, req,
1280 	    cred_t *, cr, vnode_t *, vp, WRITE3args *, args);
1281 
1282 	if (vp == NULL) {
1283 		error = ESTALE;
1284 		goto err;
1285 	}
1286 
1287 	if (is_system_labeled()) {
1288 		bslabel_t *clabel = req->rq_label;
1289 
1290 		ASSERT(clabel != NULL);
1291 		DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1292 		    "got client label from request(1)", struct svc_req *, req);
1293 
1294 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1295 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1296 			    exi)) {
1297 				resp->status = NFS3ERR_ACCES;
1298 				goto err1;
1299 			}
1300 		}
1301 	}
1302 
1303 	ct.cc_sysid = 0;
1304 	ct.cc_pid = 0;
1305 	ct.cc_caller_id = nfs3_srv_caller_id;
1306 	ct.cc_flags = CC_DONTBLOCK;
1307 
1308 	/*
1309 	 * We have to enter the critical region before calling VOP_RWLOCK
1310 	 * to avoid a deadlock with ufs.
1311 	 */
1312 	if (nbl_need_check(vp)) {
1313 		nbl_start_crit(vp, RW_READER);
1314 		in_crit = 1;
1315 		if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1316 		    NULL)) {
1317 			error = EACCES;
1318 			goto err;
1319 		}
1320 	}
1321 
1322 	rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1323 
1324 	/* check if a monitor detected a delegation conflict */
1325 	if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1326 		resp->status = NFS3ERR_JUKEBOX;
1327 		rwlock_ret = -1;
1328 		goto err1;
1329 	}
1330 
1331 
1332 	bva.va_mask = AT_ALL;
1333 	error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1334 
1335 	/*
1336 	 * If we can't get the attributes, then we can't do the
1337 	 * right access checking.  So, we'll fail the request.
1338 	 */
1339 	if (error)
1340 		goto err;
1341 
1342 	bvap = &bva;
1343 	avap = bvap;
1344 
1345 	if (args->count != args->data.data_len) {
1346 		resp->status = NFS3ERR_INVAL;
1347 		goto err1;
1348 	}
1349 
1350 	if (rdonly(exi, req)) {
1351 		resp->status = NFS3ERR_ROFS;
1352 		goto err1;
1353 	}
1354 
1355 	if (vp->v_type != VREG) {
1356 		resp->status = NFS3ERR_INVAL;
1357 		goto err1;
1358 	}
1359 
1360 	if (crgetuid(cr) != bva.va_uid &&
1361 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1362 		goto err;
1363 
1364 	if (MANDLOCK(vp, bva.va_mode)) {
1365 		resp->status = NFS3ERR_ACCES;
1366 		goto err1;
1367 	}
1368 
1369 	if (args->count == 0) {
1370 		resp->status = NFS3_OK;
1371 		vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1372 		resp->resok.count = 0;
1373 		resp->resok.committed = args->stable;
1374 		resp->resok.verf = write3verf;
1375 		goto out;
1376 	}
1377 
1378 	if (args->mblk != NULL) {
1379 		iovcnt = 0;
1380 		for (m = args->mblk; m != NULL; m = m->b_cont)
1381 			iovcnt++;
1382 		if (iovcnt <= MAX_IOVECS) {
1383 #ifdef DEBUG
1384 			rfs3_write_hits++;
1385 #endif
1386 			iovp = iov;
1387 		} else {
1388 #ifdef DEBUG
1389 			rfs3_write_misses++;
1390 #endif
1391 			iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1392 		}
1393 		mblk_to_iov(args->mblk, iovcnt, iovp);
1394 
1395 	} else if (args->rlist != NULL) {
1396 		iovcnt = 1;
1397 		iovp = iov;
1398 		iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1399 		iovp->iov_len = args->count;
1400 	} else {
1401 		iovcnt = 1;
1402 		iovp = iov;
1403 		iovp->iov_base = args->data.data_val;
1404 		iovp->iov_len = args->count;
1405 	}
1406 
1407 	uio.uio_iov = iovp;
1408 	uio.uio_iovcnt = iovcnt;
1409 
1410 	uio.uio_segflg = UIO_SYSSPACE;
1411 	uio.uio_extflg = UIO_COPY_DEFAULT;
1412 	uio.uio_loffset = args->offset;
1413 	uio.uio_resid = args->count;
1414 	uio.uio_llimit = curproc->p_fsz_ctl;
1415 	rlimit = uio.uio_llimit - args->offset;
1416 	if (rlimit < (u_offset_t)uio.uio_resid)
1417 		uio.uio_resid = (int)rlimit;
1418 
1419 	if (args->stable == UNSTABLE)
1420 		ioflag = 0;
1421 	else if (args->stable == FILE_SYNC)
1422 		ioflag = FSYNC;
1423 	else if (args->stable == DATA_SYNC)
1424 		ioflag = FDSYNC;
1425 	else {
1426 		if (iovp != iov)
1427 			kmem_free(iovp, sizeof (*iovp) * iovcnt);
1428 		resp->status = NFS3ERR_INVAL;
1429 		goto err1;
1430 	}
1431 
1432 	/*
1433 	 * We're changing creds because VM may fault and we need
1434 	 * the cred of the current thread to be used if quota
1435 	 * checking is enabled.
1436 	 */
1437 	savecred = curthread->t_cred;
1438 	curthread->t_cred = cr;
1439 	error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1440 	curthread->t_cred = savecred;
1441 
1442 	if (iovp != iov)
1443 		kmem_free(iovp, sizeof (*iovp) * iovcnt);
1444 
1445 	/* check if a monitor detected a delegation conflict */
1446 	if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1447 		resp->status = NFS3ERR_JUKEBOX;
1448 		goto err1;
1449 	}
1450 
1451 	ava.va_mask = AT_ALL;
1452 	avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1453 
1454 	if (error)
1455 		goto err;
1456 
1457 	/*
1458 	 * If we were unable to get the V_WRITELOCK_TRUE, then we
1459 	 * may not have accurate after attrs, so check if
1460 	 * we have both attributes, they have a non-zero va_seq, and
1461 	 * va_seq has changed by exactly one,
1462 	 * if not, turn off the before attr.
1463 	 */
1464 	if (rwlock_ret != V_WRITELOCK_TRUE) {
1465 		if (bvap == NULL || avap == NULL ||
1466 		    bvap->va_seq == 0 || avap->va_seq == 0 ||
1467 		    avap->va_seq != (bvap->va_seq + 1)) {
1468 			bvap = NULL;
1469 		}
1470 	}
1471 
1472 	resp->status = NFS3_OK;
1473 	vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1474 	resp->resok.count = args->count - uio.uio_resid;
1475 	resp->resok.committed = args->stable;
1476 	resp->resok.verf = write3verf;
1477 	goto out;
1478 
1479 err:
1480 	if (curthread->t_flag & T_WOULDBLOCK) {
1481 		curthread->t_flag &= ~T_WOULDBLOCK;
1482 		resp->status = NFS3ERR_JUKEBOX;
1483 	} else
1484 		resp->status = puterrno3(error);
1485 err1:
1486 	vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1487 out:
1488 	DTRACE_NFSV3_4(op__write__done, struct svc_req *, req,
1489 	    cred_t *, cr, vnode_t *, vp, WRITE3res *, resp);
1490 
1491 	if (vp != NULL) {
1492 		if (rwlock_ret != -1)
1493 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1494 		if (in_crit)
1495 			nbl_end_crit(vp);
1496 		VN_RELE(vp);
1497 	}
1498 }
1499 
1500 void *
1501 rfs3_write_getfh(WRITE3args *args)
1502 {
1503 
1504 	return (&args->file);
1505 }
1506 
1507 void
1508 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1509 	struct svc_req *req, cred_t *cr)
1510 {
1511 	int error;
1512 	int in_crit = 0;
1513 	vnode_t *vp;
1514 	vnode_t *tvp = NULL;
1515 	vnode_t *dvp;
1516 	struct vattr *vap;
1517 	struct vattr va;
1518 	struct vattr *dbvap;
1519 	struct vattr dbva;
1520 	struct vattr *davap;
1521 	struct vattr dava;
1522 	enum vcexcl excl;
1523 	nfstime3 *mtime;
1524 	len_t reqsize;
1525 	bool_t trunc;
1526 	struct sockaddr *ca;
1527 	char *name = NULL;
1528 
1529 	dbvap = NULL;
1530 	davap = NULL;
1531 
1532 	dvp = nfs3_fhtovp(&args->where.dir, exi);
1533 
1534 	DTRACE_NFSV3_4(op__create__start, struct svc_req *, req,
1535 	    cred_t *, cr, vnode_t *, dvp, CREATE3args *, args);
1536 
1537 	if (dvp == NULL) {
1538 		error = ESTALE;
1539 		goto out;
1540 	}
1541 
1542 	dbva.va_mask = AT_ALL;
1543 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1544 	davap = dbvap;
1545 
1546 	if (args->where.name == nfs3nametoolong) {
1547 		resp->status = NFS3ERR_NAMETOOLONG;
1548 		goto out1;
1549 	}
1550 
1551 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1552 		resp->status = NFS3ERR_ACCES;
1553 		goto out1;
1554 	}
1555 
1556 	if (rdonly(exi, req)) {
1557 		resp->status = NFS3ERR_ROFS;
1558 		goto out1;
1559 	}
1560 
1561 	if (is_system_labeled()) {
1562 		bslabel_t *clabel = req->rq_label;
1563 
1564 		ASSERT(clabel != NULL);
1565 		DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1566 		    "got client label from request(1)", struct svc_req *, req);
1567 
1568 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1569 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1570 			    exi)) {
1571 				resp->status = NFS3ERR_ACCES;
1572 				goto out1;
1573 			}
1574 		}
1575 	}
1576 
1577 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1578 	name = nfscmd_convname(ca, exi, args->where.name,
1579 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1580 
1581 	if (name == NULL) {
1582 		/* This is really a Solaris EILSEQ */
1583 		resp->status = NFS3ERR_INVAL;
1584 		goto out1;
1585 	}
1586 
1587 	if (args->how.mode == EXCLUSIVE) {
1588 		va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1589 		va.va_type = VREG;
1590 		va.va_mode = (mode_t)0;
1591 		/*
1592 		 * Ensure no time overflows and that types match
1593 		 */
1594 		mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1595 		va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1596 		va.va_mtime.tv_nsec = mtime->nseconds;
1597 		excl = EXCL;
1598 	} else {
1599 		error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1600 		    &va);
1601 		if (error)
1602 			goto out;
1603 		va.va_mask |= AT_TYPE;
1604 		va.va_type = VREG;
1605 		if (args->how.mode == GUARDED)
1606 			excl = EXCL;
1607 		else {
1608 			excl = NONEXCL;
1609 
1610 			/*
1611 			 * During creation of file in non-exclusive mode
1612 			 * if size of file is being set then make sure
1613 			 * that if the file already exists that no conflicting
1614 			 * non-blocking mandatory locks exists in the region
1615 			 * being modified. If there are conflicting locks fail
1616 			 * the operation with EACCES.
1617 			 */
1618 			if (va.va_mask & AT_SIZE) {
1619 				struct vattr tva;
1620 
1621 				/*
1622 				 * Does file already exist?
1623 				 */
1624 				error = VOP_LOOKUP(dvp, name, &tvp,
1625 				    NULL, 0, NULL, cr, NULL, NULL, NULL);
1626 
1627 				/*
1628 				 * Check to see if the file has been delegated
1629 				 * to a v4 client.  If so, then begin recall of
1630 				 * the delegation and return JUKEBOX to allow
1631 				 * the client to retrasmit its request.
1632 				 */
1633 
1634 				trunc = va.va_size == 0;
1635 				if (!error &&
1636 				    rfs4_check_delegated(FWRITE, tvp, trunc)) {
1637 					resp->status = NFS3ERR_JUKEBOX;
1638 					goto out1;
1639 				}
1640 
1641 				/*
1642 				 * Check for NBMAND lock conflicts
1643 				 */
1644 				if (!error && nbl_need_check(tvp)) {
1645 					u_offset_t offset;
1646 					ssize_t len;
1647 
1648 					nbl_start_crit(tvp, RW_READER);
1649 					in_crit = 1;
1650 
1651 					tva.va_mask = AT_SIZE;
1652 					error = VOP_GETATTR(tvp, &tva, 0, cr,
1653 					    NULL);
1654 					/*
1655 					 * Can't check for conflicts, so return
1656 					 * error.
1657 					 */
1658 					if (error)
1659 						goto out;
1660 
1661 					offset = tva.va_size < va.va_size ?
1662 					    tva.va_size : va.va_size;
1663 					len = tva.va_size < va.va_size ?
1664 					    va.va_size - tva.va_size :
1665 					    tva.va_size - va.va_size;
1666 					if (nbl_conflict(tvp, NBL_WRITE,
1667 					    offset, len, 0, NULL)) {
1668 						error = EACCES;
1669 						goto out;
1670 					}
1671 				} else if (tvp) {
1672 					VN_RELE(tvp);
1673 					tvp = NULL;
1674 				}
1675 			}
1676 		}
1677 		if (va.va_mask & AT_SIZE)
1678 			reqsize = va.va_size;
1679 	}
1680 
1681 	/*
1682 	 * Must specify the mode.
1683 	 */
1684 	if (!(va.va_mask & AT_MODE)) {
1685 		resp->status = NFS3ERR_INVAL;
1686 		goto out1;
1687 	}
1688 
1689 	/*
1690 	 * If the filesystem is exported with nosuid, then mask off
1691 	 * the setuid and setgid bits.
1692 	 */
1693 	if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1694 		va.va_mode &= ~(VSUID | VSGID);
1695 
1696 tryagain:
1697 	/*
1698 	 * The file open mode used is VWRITE.  If the client needs
1699 	 * some other semantic, then it should do the access checking
1700 	 * itself.  It would have been nice to have the file open mode
1701 	 * passed as part of the arguments.
1702 	 */
1703 	error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1704 	    &vp, cr, 0, NULL, NULL);
1705 
1706 	dava.va_mask = AT_ALL;
1707 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1708 
1709 	if (error) {
1710 		/*
1711 		 * If we got something other than file already exists
1712 		 * then just return this error.  Otherwise, we got
1713 		 * EEXIST.  If we were doing a GUARDED create, then
1714 		 * just return this error.  Otherwise, we need to
1715 		 * make sure that this wasn't a duplicate of an
1716 		 * exclusive create request.
1717 		 *
1718 		 * The assumption is made that a non-exclusive create
1719 		 * request will never return EEXIST.
1720 		 */
1721 		if (error != EEXIST || args->how.mode == GUARDED)
1722 			goto out;
1723 		/*
1724 		 * Lookup the file so that we can get a vnode for it.
1725 		 */
1726 		error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1727 		    NULL, cr, NULL, NULL, NULL);
1728 		if (error) {
1729 			/*
1730 			 * We couldn't find the file that we thought that
1731 			 * we just created.  So, we'll just try creating
1732 			 * it again.
1733 			 */
1734 			if (error == ENOENT)
1735 				goto tryagain;
1736 			goto out;
1737 		}
1738 
1739 		/*
1740 		 * If the file is delegated to a v4 client, go ahead
1741 		 * and initiate recall, this create is a hint that a
1742 		 * conflicting v3 open has occurred.
1743 		 */
1744 
1745 		if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1746 			VN_RELE(vp);
1747 			resp->status = NFS3ERR_JUKEBOX;
1748 			goto out1;
1749 		}
1750 
1751 		va.va_mask = AT_ALL;
1752 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1753 
1754 		mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1755 		/* % with INT32_MAX to prevent overflows */
1756 		if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1757 		    vap->va_mtime.tv_sec !=
1758 		    (mtime->seconds % INT32_MAX) ||
1759 		    vap->va_mtime.tv_nsec != mtime->nseconds)) {
1760 			VN_RELE(vp);
1761 			error = EEXIST;
1762 			goto out;
1763 		}
1764 	} else {
1765 
1766 		if ((args->how.mode == UNCHECKED ||
1767 		    args->how.mode == GUARDED) &&
1768 		    args->how.createhow3_u.obj_attributes.size.set_it &&
1769 		    va.va_size == 0)
1770 			trunc = TRUE;
1771 		else
1772 			trunc = FALSE;
1773 
1774 		if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1775 			VN_RELE(vp);
1776 			resp->status = NFS3ERR_JUKEBOX;
1777 			goto out1;
1778 		}
1779 
1780 		va.va_mask = AT_ALL;
1781 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1782 
1783 		/*
1784 		 * We need to check to make sure that the file got
1785 		 * created to the indicated size.  If not, we do a
1786 		 * setattr to try to change the size, but we don't
1787 		 * try too hard.  This shouldn't a problem as most
1788 		 * clients will only specifiy a size of zero which
1789 		 * local file systems handle.  However, even if
1790 		 * the client does specify a non-zero size, it can
1791 		 * still recover by checking the size of the file
1792 		 * after it has created it and then issue a setattr
1793 		 * request of its own to set the size of the file.
1794 		 */
1795 		if (vap != NULL &&
1796 		    (args->how.mode == UNCHECKED ||
1797 		    args->how.mode == GUARDED) &&
1798 		    args->how.createhow3_u.obj_attributes.size.set_it &&
1799 		    vap->va_size != reqsize) {
1800 			va.va_mask = AT_SIZE;
1801 			va.va_size = reqsize;
1802 			(void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1803 			va.va_mask = AT_ALL;
1804 			vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1805 		}
1806 	}
1807 
1808 	if (name != args->where.name)
1809 		kmem_free(name, MAXPATHLEN + 1);
1810 
1811 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1812 	if (error)
1813 		resp->resok.obj.handle_follows = FALSE;
1814 	else
1815 		resp->resok.obj.handle_follows = TRUE;
1816 
1817 	/*
1818 	 * Force modified data and metadata out to stable storage.
1819 	 */
1820 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1821 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
1822 
1823 	VN_RELE(vp);
1824 	if (tvp != NULL) {
1825 		if (in_crit)
1826 			nbl_end_crit(tvp);
1827 		VN_RELE(tvp);
1828 	}
1829 
1830 	resp->status = NFS3_OK;
1831 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1832 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1833 
1834 	DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1835 	    cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1836 
1837 	VN_RELE(dvp);
1838 	return;
1839 
1840 out:
1841 	if (curthread->t_flag & T_WOULDBLOCK) {
1842 		curthread->t_flag &= ~T_WOULDBLOCK;
1843 		resp->status = NFS3ERR_JUKEBOX;
1844 	} else
1845 		resp->status = puterrno3(error);
1846 out1:
1847 	DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1848 	    cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1849 
1850 	if (name != NULL && name != args->where.name)
1851 		kmem_free(name, MAXPATHLEN + 1);
1852 
1853 	if (tvp != NULL) {
1854 		if (in_crit)
1855 			nbl_end_crit(tvp);
1856 		VN_RELE(tvp);
1857 	}
1858 	if (dvp != NULL)
1859 		VN_RELE(dvp);
1860 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1861 }
1862 
1863 void *
1864 rfs3_create_getfh(CREATE3args *args)
1865 {
1866 
1867 	return (&args->where.dir);
1868 }
1869 
1870 void
1871 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1872 	struct svc_req *req, cred_t *cr)
1873 {
1874 	int error;
1875 	vnode_t *vp = NULL;
1876 	vnode_t *dvp;
1877 	struct vattr *vap;
1878 	struct vattr va;
1879 	struct vattr *dbvap;
1880 	struct vattr dbva;
1881 	struct vattr *davap;
1882 	struct vattr dava;
1883 	struct sockaddr *ca;
1884 	char *name = NULL;
1885 
1886 	dbvap = NULL;
1887 	davap = NULL;
1888 
1889 	dvp = nfs3_fhtovp(&args->where.dir, exi);
1890 
1891 	DTRACE_NFSV3_4(op__mkdir__start, struct svc_req *, req,
1892 	    cred_t *, cr, vnode_t *, dvp, MKDIR3args *, args);
1893 
1894 	if (dvp == NULL) {
1895 		error = ESTALE;
1896 		goto out;
1897 	}
1898 
1899 	dbva.va_mask = AT_ALL;
1900 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1901 	davap = dbvap;
1902 
1903 	if (args->where.name == nfs3nametoolong) {
1904 		resp->status = NFS3ERR_NAMETOOLONG;
1905 		goto out1;
1906 	}
1907 
1908 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1909 		resp->status = NFS3ERR_ACCES;
1910 		goto out1;
1911 	}
1912 
1913 	if (rdonly(exi, req)) {
1914 		resp->status = NFS3ERR_ROFS;
1915 		goto out1;
1916 	}
1917 
1918 	if (is_system_labeled()) {
1919 		bslabel_t *clabel = req->rq_label;
1920 
1921 		ASSERT(clabel != NULL);
1922 		DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1923 		    "got client label from request(1)", struct svc_req *, req);
1924 
1925 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1926 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1927 			    exi)) {
1928 				resp->status = NFS3ERR_ACCES;
1929 				goto out1;
1930 			}
1931 		}
1932 	}
1933 
1934 	error = sattr3_to_vattr(&args->attributes, &va);
1935 	if (error)
1936 		goto out;
1937 
1938 	if (!(va.va_mask & AT_MODE)) {
1939 		resp->status = NFS3ERR_INVAL;
1940 		goto out1;
1941 	}
1942 
1943 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1944 	name = nfscmd_convname(ca, exi, args->where.name,
1945 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1946 
1947 	if (name == NULL) {
1948 		resp->status = NFS3ERR_INVAL;
1949 		goto out1;
1950 	}
1951 
1952 	va.va_mask |= AT_TYPE;
1953 	va.va_type = VDIR;
1954 
1955 	error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
1956 
1957 	if (name != args->where.name)
1958 		kmem_free(name, MAXPATHLEN + 1);
1959 
1960 	dava.va_mask = AT_ALL;
1961 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1962 
1963 	/*
1964 	 * Force modified data and metadata out to stable storage.
1965 	 */
1966 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
1967 
1968 	if (error)
1969 		goto out;
1970 
1971 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1972 	if (error)
1973 		resp->resok.obj.handle_follows = FALSE;
1974 	else
1975 		resp->resok.obj.handle_follows = TRUE;
1976 
1977 	va.va_mask = AT_ALL;
1978 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1979 
1980 	/*
1981 	 * Force modified data and metadata out to stable storage.
1982 	 */
1983 	(void) VOP_FSYNC(vp, 0, cr, NULL);
1984 
1985 	VN_RELE(vp);
1986 
1987 	resp->status = NFS3_OK;
1988 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1989 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1990 
1991 	DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
1992 	    cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
1993 	VN_RELE(dvp);
1994 
1995 	return;
1996 
1997 out:
1998 	if (curthread->t_flag & T_WOULDBLOCK) {
1999 		curthread->t_flag &= ~T_WOULDBLOCK;
2000 		resp->status = NFS3ERR_JUKEBOX;
2001 	} else
2002 		resp->status = puterrno3(error);
2003 out1:
2004 	DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
2005 	    cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
2006 	if (dvp != NULL)
2007 		VN_RELE(dvp);
2008 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2009 }
2010 
2011 void *
2012 rfs3_mkdir_getfh(MKDIR3args *args)
2013 {
2014 
2015 	return (&args->where.dir);
2016 }
2017 
2018 void
2019 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2020 	struct svc_req *req, cred_t *cr)
2021 {
2022 	int error;
2023 	vnode_t *vp;
2024 	vnode_t *dvp;
2025 	struct vattr *vap;
2026 	struct vattr va;
2027 	struct vattr *dbvap;
2028 	struct vattr dbva;
2029 	struct vattr *davap;
2030 	struct vattr dava;
2031 	struct sockaddr *ca;
2032 	char *name = NULL;
2033 	char *symdata = NULL;
2034 
2035 	dbvap = NULL;
2036 	davap = NULL;
2037 
2038 	dvp = nfs3_fhtovp(&args->where.dir, exi);
2039 
2040 	DTRACE_NFSV3_4(op__symlink__start, struct svc_req *, req,
2041 	    cred_t *, cr, vnode_t *, dvp, SYMLINK3args *, args);
2042 
2043 	if (dvp == NULL) {
2044 		error = ESTALE;
2045 		goto err;
2046 	}
2047 
2048 	dbva.va_mask = AT_ALL;
2049 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2050 	davap = dbvap;
2051 
2052 	if (args->where.name == nfs3nametoolong) {
2053 		resp->status = NFS3ERR_NAMETOOLONG;
2054 		goto err1;
2055 	}
2056 
2057 	if (args->where.name == NULL || *(args->where.name) == '\0') {
2058 		resp->status = NFS3ERR_ACCES;
2059 		goto err1;
2060 	}
2061 
2062 	if (rdonly(exi, req)) {
2063 		resp->status = NFS3ERR_ROFS;
2064 		goto err1;
2065 	}
2066 
2067 	if (is_system_labeled()) {
2068 		bslabel_t *clabel = req->rq_label;
2069 
2070 		ASSERT(clabel != NULL);
2071 		DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2072 		    "got client label from request(1)", struct svc_req *, req);
2073 
2074 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2075 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2076 			    exi)) {
2077 				resp->status = NFS3ERR_ACCES;
2078 				goto err1;
2079 			}
2080 		}
2081 	}
2082 
2083 	error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2084 	if (error)
2085 		goto err;
2086 
2087 	if (!(va.va_mask & AT_MODE)) {
2088 		resp->status = NFS3ERR_INVAL;
2089 		goto err1;
2090 	}
2091 
2092 	if (args->symlink.symlink_data == nfs3nametoolong) {
2093 		resp->status = NFS3ERR_NAMETOOLONG;
2094 		goto err1;
2095 	}
2096 
2097 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2098 	name = nfscmd_convname(ca, exi, args->where.name,
2099 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2100 
2101 	if (name == NULL) {
2102 		/* This is really a Solaris EILSEQ */
2103 		resp->status = NFS3ERR_INVAL;
2104 		goto err1;
2105 	}
2106 
2107 	symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2108 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2109 	if (symdata == NULL) {
2110 		/* This is really a Solaris EILSEQ */
2111 		resp->status = NFS3ERR_INVAL;
2112 		goto err1;
2113 	}
2114 
2115 
2116 	va.va_mask |= AT_TYPE;
2117 	va.va_type = VLNK;
2118 
2119 	error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2120 
2121 	dava.va_mask = AT_ALL;
2122 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2123 
2124 	if (error)
2125 		goto err;
2126 
2127 	error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2128 	    NULL, NULL, NULL);
2129 
2130 	/*
2131 	 * Force modified data and metadata out to stable storage.
2132 	 */
2133 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
2134 
2135 
2136 	resp->status = NFS3_OK;
2137 	if (error) {
2138 		resp->resok.obj.handle_follows = FALSE;
2139 		vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2140 		vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2141 		goto out;
2142 	}
2143 
2144 	error = makefh3(&resp->resok.obj.handle, vp, exi);
2145 	if (error)
2146 		resp->resok.obj.handle_follows = FALSE;
2147 	else
2148 		resp->resok.obj.handle_follows = TRUE;
2149 
2150 	va.va_mask = AT_ALL;
2151 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2152 
2153 	/*
2154 	 * Force modified data and metadata out to stable storage.
2155 	 */
2156 	(void) VOP_FSYNC(vp, 0, cr, NULL);
2157 
2158 	VN_RELE(vp);
2159 
2160 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2161 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2162 	goto out;
2163 
2164 err:
2165 	if (curthread->t_flag & T_WOULDBLOCK) {
2166 		curthread->t_flag &= ~T_WOULDBLOCK;
2167 		resp->status = NFS3ERR_JUKEBOX;
2168 	} else
2169 		resp->status = puterrno3(error);
2170 err1:
2171 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2172 out:
2173 	if (name != NULL && name != args->where.name)
2174 		kmem_free(name, MAXPATHLEN + 1);
2175 	if (symdata != NULL && symdata != args->symlink.symlink_data)
2176 		kmem_free(symdata, MAXPATHLEN + 1);
2177 
2178 	DTRACE_NFSV3_4(op__symlink__done, struct svc_req *, req,
2179 	    cred_t *, cr, vnode_t *, dvp, SYMLINK3res *, resp);
2180 
2181 	if (dvp != NULL)
2182 		VN_RELE(dvp);
2183 }
2184 
2185 void *
2186 rfs3_symlink_getfh(SYMLINK3args *args)
2187 {
2188 
2189 	return (&args->where.dir);
2190 }
2191 
2192 void
2193 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2194 	struct svc_req *req, cred_t *cr)
2195 {
2196 	int error;
2197 	vnode_t *vp;
2198 	vnode_t *realvp;
2199 	vnode_t *dvp;
2200 	struct vattr *vap;
2201 	struct vattr va;
2202 	struct vattr *dbvap;
2203 	struct vattr dbva;
2204 	struct vattr *davap;
2205 	struct vattr dava;
2206 	int mode;
2207 	enum vcexcl excl;
2208 	struct sockaddr *ca;
2209 	char *name = NULL;
2210 
2211 	dbvap = NULL;
2212 	davap = NULL;
2213 
2214 	dvp = nfs3_fhtovp(&args->where.dir, exi);
2215 
2216 	DTRACE_NFSV3_4(op__mknod__start, struct svc_req *, req,
2217 	    cred_t *, cr, vnode_t *, dvp, MKNOD3args *, args);
2218 
2219 	if (dvp == NULL) {
2220 		error = ESTALE;
2221 		goto out;
2222 	}
2223 
2224 	dbva.va_mask = AT_ALL;
2225 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2226 	davap = dbvap;
2227 
2228 	if (args->where.name == nfs3nametoolong) {
2229 		resp->status = NFS3ERR_NAMETOOLONG;
2230 		goto out1;
2231 	}
2232 
2233 	if (args->where.name == NULL || *(args->where.name) == '\0') {
2234 		resp->status = NFS3ERR_ACCES;
2235 		goto out1;
2236 	}
2237 
2238 	if (rdonly(exi, req)) {
2239 		resp->status = NFS3ERR_ROFS;
2240 		goto out1;
2241 	}
2242 
2243 	if (is_system_labeled()) {
2244 		bslabel_t *clabel = req->rq_label;
2245 
2246 		ASSERT(clabel != NULL);
2247 		DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2248 		    "got client label from request(1)", struct svc_req *, req);
2249 
2250 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2251 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2252 			    exi)) {
2253 				resp->status = NFS3ERR_ACCES;
2254 				goto out1;
2255 			}
2256 		}
2257 	}
2258 
2259 	switch (args->what.type) {
2260 	case NF3CHR:
2261 	case NF3BLK:
2262 		error = sattr3_to_vattr(
2263 		    &args->what.mknoddata3_u.device.dev_attributes, &va);
2264 		if (error)
2265 			goto out;
2266 		if (secpolicy_sys_devices(cr) != 0) {
2267 			resp->status = NFS3ERR_PERM;
2268 			goto out1;
2269 		}
2270 		if (args->what.type == NF3CHR)
2271 			va.va_type = VCHR;
2272 		else
2273 			va.va_type = VBLK;
2274 		va.va_rdev = makedevice(
2275 		    args->what.mknoddata3_u.device.spec.specdata1,
2276 		    args->what.mknoddata3_u.device.spec.specdata2);
2277 		va.va_mask |= AT_TYPE | AT_RDEV;
2278 		break;
2279 	case NF3SOCK:
2280 		error = sattr3_to_vattr(
2281 		    &args->what.mknoddata3_u.pipe_attributes, &va);
2282 		if (error)
2283 			goto out;
2284 		va.va_type = VSOCK;
2285 		va.va_mask |= AT_TYPE;
2286 		break;
2287 	case NF3FIFO:
2288 		error = sattr3_to_vattr(
2289 		    &args->what.mknoddata3_u.pipe_attributes, &va);
2290 		if (error)
2291 			goto out;
2292 		va.va_type = VFIFO;
2293 		va.va_mask |= AT_TYPE;
2294 		break;
2295 	default:
2296 		resp->status = NFS3ERR_BADTYPE;
2297 		goto out1;
2298 	}
2299 
2300 	/*
2301 	 * Must specify the mode.
2302 	 */
2303 	if (!(va.va_mask & AT_MODE)) {
2304 		resp->status = NFS3ERR_INVAL;
2305 		goto out1;
2306 	}
2307 
2308 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2309 	name = nfscmd_convname(ca, exi, args->where.name,
2310 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2311 
2312 	if (name == NULL) {
2313 		resp->status = NFS3ERR_INVAL;
2314 		goto out1;
2315 	}
2316 
2317 	excl = EXCL;
2318 
2319 	mode = 0;
2320 
2321 	error = VOP_CREATE(dvp, name, &va, excl, mode,
2322 	    &vp, cr, 0, NULL, NULL);
2323 
2324 	if (name != args->where.name)
2325 		kmem_free(name, MAXPATHLEN + 1);
2326 
2327 	dava.va_mask = AT_ALL;
2328 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2329 
2330 	/*
2331 	 * Force modified data and metadata out to stable storage.
2332 	 */
2333 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
2334 
2335 	if (error)
2336 		goto out;
2337 
2338 	resp->status = NFS3_OK;
2339 
2340 	error = makefh3(&resp->resok.obj.handle, vp, exi);
2341 	if (error)
2342 		resp->resok.obj.handle_follows = FALSE;
2343 	else
2344 		resp->resok.obj.handle_follows = TRUE;
2345 
2346 	va.va_mask = AT_ALL;
2347 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2348 
2349 	/*
2350 	 * Force modified metadata out to stable storage.
2351 	 *
2352 	 * if a underlying vp exists, pass it to VOP_FSYNC
2353 	 */
2354 	if (VOP_REALVP(vp, &realvp, NULL) == 0)
2355 		(void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2356 	else
2357 		(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2358 
2359 	VN_RELE(vp);
2360 
2361 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2362 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2363 	DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2364 	    cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2365 	VN_RELE(dvp);
2366 	return;
2367 
2368 out:
2369 	if (curthread->t_flag & T_WOULDBLOCK) {
2370 		curthread->t_flag &= ~T_WOULDBLOCK;
2371 		resp->status = NFS3ERR_JUKEBOX;
2372 	} else
2373 		resp->status = puterrno3(error);
2374 out1:
2375 	DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2376 	    cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2377 	if (dvp != NULL)
2378 		VN_RELE(dvp);
2379 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2380 }
2381 
2382 void *
2383 rfs3_mknod_getfh(MKNOD3args *args)
2384 {
2385 
2386 	return (&args->where.dir);
2387 }
2388 
2389 void
2390 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2391 	struct svc_req *req, cred_t *cr)
2392 {
2393 	int error = 0;
2394 	vnode_t *vp;
2395 	struct vattr *bvap;
2396 	struct vattr bva;
2397 	struct vattr *avap;
2398 	struct vattr ava;
2399 	vnode_t *targvp = NULL;
2400 	struct sockaddr *ca;
2401 	char *name = NULL;
2402 
2403 	bvap = NULL;
2404 	avap = NULL;
2405 
2406 	vp = nfs3_fhtovp(&args->object.dir, exi);
2407 
2408 	DTRACE_NFSV3_4(op__remove__start, struct svc_req *, req,
2409 	    cred_t *, cr, vnode_t *, vp, REMOVE3args *, args);
2410 
2411 	if (vp == NULL) {
2412 		error = ESTALE;
2413 		goto err;
2414 	}
2415 
2416 	bva.va_mask = AT_ALL;
2417 	bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2418 	avap = bvap;
2419 
2420 	if (vp->v_type != VDIR) {
2421 		resp->status = NFS3ERR_NOTDIR;
2422 		goto err1;
2423 	}
2424 
2425 	if (args->object.name == nfs3nametoolong) {
2426 		resp->status = NFS3ERR_NAMETOOLONG;
2427 		goto err1;
2428 	}
2429 
2430 	if (args->object.name == NULL || *(args->object.name) == '\0') {
2431 		resp->status = NFS3ERR_ACCES;
2432 		goto err1;
2433 	}
2434 
2435 	if (rdonly(exi, req)) {
2436 		resp->status = NFS3ERR_ROFS;
2437 		goto err1;
2438 	}
2439 
2440 	if (is_system_labeled()) {
2441 		bslabel_t *clabel = req->rq_label;
2442 
2443 		ASSERT(clabel != NULL);
2444 		DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2445 		    "got client label from request(1)", struct svc_req *, req);
2446 
2447 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2448 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2449 			    exi)) {
2450 				resp->status = NFS3ERR_ACCES;
2451 				goto err1;
2452 			}
2453 		}
2454 	}
2455 
2456 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2457 	name = nfscmd_convname(ca, exi, args->object.name,
2458 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2459 
2460 	if (name == NULL) {
2461 		resp->status = NFS3ERR_INVAL;
2462 		goto err1;
2463 	}
2464 
2465 	/*
2466 	 * Check for a conflict with a non-blocking mandatory share
2467 	 * reservation and V4 delegations
2468 	 */
2469 	error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2470 	    NULL, cr, NULL, NULL, NULL);
2471 	if (error != 0)
2472 		goto err;
2473 
2474 	if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2475 		resp->status = NFS3ERR_JUKEBOX;
2476 		goto err1;
2477 	}
2478 
2479 	if (!nbl_need_check(targvp)) {
2480 		error = VOP_REMOVE(vp, name, cr, NULL, 0);
2481 	} else {
2482 		nbl_start_crit(targvp, RW_READER);
2483 		if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2484 			error = EACCES;
2485 		} else {
2486 			error = VOP_REMOVE(vp, name, cr, NULL, 0);
2487 		}
2488 		nbl_end_crit(targvp);
2489 	}
2490 	VN_RELE(targvp);
2491 	targvp = NULL;
2492 
2493 	ava.va_mask = AT_ALL;
2494 	avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2495 
2496 	/*
2497 	 * Force modified data and metadata out to stable storage.
2498 	 */
2499 	(void) VOP_FSYNC(vp, 0, cr, NULL);
2500 
2501 	if (error)
2502 		goto err;
2503 
2504 	resp->status = NFS3_OK;
2505 	vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2506 	goto out;
2507 
2508 err:
2509 	if (curthread->t_flag & T_WOULDBLOCK) {
2510 		curthread->t_flag &= ~T_WOULDBLOCK;
2511 		resp->status = NFS3ERR_JUKEBOX;
2512 	} else
2513 		resp->status = puterrno3(error);
2514 err1:
2515 	vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2516 out:
2517 	DTRACE_NFSV3_4(op__remove__done, struct svc_req *, req,
2518 	    cred_t *, cr, vnode_t *, vp, REMOVE3res *, resp);
2519 
2520 	if (name != NULL && name != args->object.name)
2521 		kmem_free(name, MAXPATHLEN + 1);
2522 
2523 	if (vp != NULL)
2524 		VN_RELE(vp);
2525 }
2526 
2527 void *
2528 rfs3_remove_getfh(REMOVE3args *args)
2529 {
2530 
2531 	return (&args->object.dir);
2532 }
2533 
2534 void
2535 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2536 	struct svc_req *req, cred_t *cr)
2537 {
2538 	int error;
2539 	vnode_t *vp;
2540 	struct vattr *bvap;
2541 	struct vattr bva;
2542 	struct vattr *avap;
2543 	struct vattr ava;
2544 	struct sockaddr *ca;
2545 	char *name = NULL;
2546 
2547 	bvap = NULL;
2548 	avap = NULL;
2549 
2550 	vp = nfs3_fhtovp(&args->object.dir, exi);
2551 
2552 	DTRACE_NFSV3_4(op__rmdir__start, struct svc_req *, req,
2553 	    cred_t *, cr, vnode_t *, vp, RMDIR3args *, args);
2554 
2555 	if (vp == NULL) {
2556 		error = ESTALE;
2557 		goto err;
2558 	}
2559 
2560 	bva.va_mask = AT_ALL;
2561 	bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2562 	avap = bvap;
2563 
2564 	if (vp->v_type != VDIR) {
2565 		resp->status = NFS3ERR_NOTDIR;
2566 		goto err1;
2567 	}
2568 
2569 	if (args->object.name == nfs3nametoolong) {
2570 		resp->status = NFS3ERR_NAMETOOLONG;
2571 		goto err1;
2572 	}
2573 
2574 	if (args->object.name == NULL || *(args->object.name) == '\0') {
2575 		resp->status = NFS3ERR_ACCES;
2576 		goto err1;
2577 	}
2578 
2579 	if (rdonly(exi, req)) {
2580 		resp->status = NFS3ERR_ROFS;
2581 		goto err1;
2582 	}
2583 
2584 	if (is_system_labeled()) {
2585 		bslabel_t *clabel = req->rq_label;
2586 
2587 		ASSERT(clabel != NULL);
2588 		DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2589 		    "got client label from request(1)", struct svc_req *, req);
2590 
2591 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2592 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2593 			    exi)) {
2594 				resp->status = NFS3ERR_ACCES;
2595 				goto err1;
2596 			}
2597 		}
2598 	}
2599 
2600 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2601 	name = nfscmd_convname(ca, exi, args->object.name,
2602 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2603 
2604 	if (name == NULL) {
2605 		resp->status = NFS3ERR_INVAL;
2606 		goto err1;
2607 	}
2608 
2609 	error = VOP_RMDIR(vp, name, rootdir, cr, NULL, 0);
2610 
2611 	if (name != args->object.name)
2612 		kmem_free(name, MAXPATHLEN + 1);
2613 
2614 	ava.va_mask = AT_ALL;
2615 	avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2616 
2617 	/*
2618 	 * Force modified data and metadata out to stable storage.
2619 	 */
2620 	(void) VOP_FSYNC(vp, 0, cr, NULL);
2621 
2622 	if (error) {
2623 		/*
2624 		 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2625 		 * if the directory is not empty.  A System V NFS server
2626 		 * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2627 		 * over the wire.
2628 		 */
2629 		if (error == EEXIST)
2630 			error = ENOTEMPTY;
2631 		goto err;
2632 	}
2633 
2634 	resp->status = NFS3_OK;
2635 	vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2636 	goto out;
2637 
2638 err:
2639 	if (curthread->t_flag & T_WOULDBLOCK) {
2640 		curthread->t_flag &= ~T_WOULDBLOCK;
2641 		resp->status = NFS3ERR_JUKEBOX;
2642 	} else
2643 		resp->status = puterrno3(error);
2644 err1:
2645 	vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2646 out:
2647 	DTRACE_NFSV3_4(op__rmdir__done, struct svc_req *, req,
2648 	    cred_t *, cr, vnode_t *, vp, RMDIR3res *, resp);
2649 	if (vp != NULL)
2650 		VN_RELE(vp);
2651 
2652 }
2653 
2654 void *
2655 rfs3_rmdir_getfh(RMDIR3args *args)
2656 {
2657 
2658 	return (&args->object.dir);
2659 }
2660 
2661 void
2662 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2663 	struct svc_req *req, cred_t *cr)
2664 {
2665 	int error = 0;
2666 	vnode_t *fvp;
2667 	vnode_t *tvp;
2668 	vnode_t *targvp;
2669 	struct vattr *fbvap;
2670 	struct vattr fbva;
2671 	struct vattr *favap;
2672 	struct vattr fava;
2673 	struct vattr *tbvap;
2674 	struct vattr tbva;
2675 	struct vattr *tavap;
2676 	struct vattr tava;
2677 	nfs_fh3 *fh3;
2678 	struct exportinfo *to_exi;
2679 	vnode_t *srcvp = NULL;
2680 	bslabel_t *clabel;
2681 	struct sockaddr *ca;
2682 	char *name = NULL;
2683 	char *toname = NULL;
2684 
2685 	fbvap = NULL;
2686 	favap = NULL;
2687 	tbvap = NULL;
2688 	tavap = NULL;
2689 	tvp = NULL;
2690 
2691 	fvp = nfs3_fhtovp(&args->from.dir, exi);
2692 
2693 	DTRACE_NFSV3_4(op__rename__start, struct svc_req *, req,
2694 	    cred_t *, cr, vnode_t *, fvp, RENAME3args *, args);
2695 
2696 	if (fvp == NULL) {
2697 		error = ESTALE;
2698 		goto err;
2699 	}
2700 
2701 	if (is_system_labeled()) {
2702 		clabel = req->rq_label;
2703 		ASSERT(clabel != NULL);
2704 		DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2705 		    "got client label from request(1)", struct svc_req *, req);
2706 
2707 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2708 			if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2709 			    exi)) {
2710 				resp->status = NFS3ERR_ACCES;
2711 				goto err1;
2712 			}
2713 		}
2714 	}
2715 
2716 	fbva.va_mask = AT_ALL;
2717 	fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2718 	favap = fbvap;
2719 
2720 	fh3 = &args->to.dir;
2721 	to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2722 	if (to_exi == NULL) {
2723 		resp->status = NFS3ERR_ACCES;
2724 		goto err1;
2725 	}
2726 	exi_rele(to_exi);
2727 
2728 	if (to_exi != exi) {
2729 		resp->status = NFS3ERR_XDEV;
2730 		goto err1;
2731 	}
2732 
2733 	tvp = nfs3_fhtovp(&args->to.dir, exi);
2734 	if (tvp == NULL) {
2735 		error = ESTALE;
2736 		goto err;
2737 	}
2738 
2739 	tbva.va_mask = AT_ALL;
2740 	tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2741 	tavap = tbvap;
2742 
2743 	if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2744 		resp->status = NFS3ERR_NOTDIR;
2745 		goto err1;
2746 	}
2747 
2748 	if (args->from.name == nfs3nametoolong ||
2749 	    args->to.name == nfs3nametoolong) {
2750 		resp->status = NFS3ERR_NAMETOOLONG;
2751 		goto err1;
2752 	}
2753 	if (args->from.name == NULL || *(args->from.name) == '\0' ||
2754 	    args->to.name == NULL || *(args->to.name) == '\0') {
2755 		resp->status = NFS3ERR_ACCES;
2756 		goto err1;
2757 	}
2758 
2759 	if (rdonly(exi, req)) {
2760 		resp->status = NFS3ERR_ROFS;
2761 		goto err1;
2762 	}
2763 
2764 	if (is_system_labeled()) {
2765 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2766 			if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
2767 			    exi)) {
2768 				resp->status = NFS3ERR_ACCES;
2769 				goto err1;
2770 			}
2771 		}
2772 	}
2773 
2774 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2775 	name = nfscmd_convname(ca, exi, args->from.name,
2776 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2777 
2778 	if (name == NULL) {
2779 		resp->status = NFS3ERR_INVAL;
2780 		goto err1;
2781 	}
2782 
2783 	toname = nfscmd_convname(ca, exi, args->to.name,
2784 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2785 
2786 	if (toname == NULL) {
2787 		resp->status = NFS3ERR_INVAL;
2788 		goto err1;
2789 	}
2790 
2791 	/*
2792 	 * Check for a conflict with a non-blocking mandatory share
2793 	 * reservation or V4 delegations.
2794 	 */
2795 	error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
2796 	    NULL, cr, NULL, NULL, NULL);
2797 	if (error != 0)
2798 		goto err;
2799 
2800 	/*
2801 	 * If we rename a delegated file we should recall the
2802 	 * delegation, since future opens should fail or would
2803 	 * refer to a new file.
2804 	 */
2805 	if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2806 		resp->status = NFS3ERR_JUKEBOX;
2807 		goto err1;
2808 	}
2809 
2810 	/*
2811 	 * Check for renaming over a delegated file.  Check rfs4_deleg_policy
2812 	 * first to avoid VOP_LOOKUP if possible.
2813 	 */
2814 	if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2815 	    VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
2816 	    NULL, NULL, NULL) == 0) {
2817 
2818 		if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2819 			VN_RELE(targvp);
2820 			resp->status = NFS3ERR_JUKEBOX;
2821 			goto err1;
2822 		}
2823 		VN_RELE(targvp);
2824 	}
2825 
2826 	if (!nbl_need_check(srcvp)) {
2827 		error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2828 	} else {
2829 		nbl_start_crit(srcvp, RW_READER);
2830 		if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2831 			error = EACCES;
2832 		else
2833 			error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2834 		nbl_end_crit(srcvp);
2835 	}
2836 	if (error == 0)
2837 		vn_renamepath(tvp, srcvp, args->to.name,
2838 		    strlen(args->to.name));
2839 	VN_RELE(srcvp);
2840 	srcvp = NULL;
2841 
2842 	fava.va_mask = AT_ALL;
2843 	favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2844 	tava.va_mask = AT_ALL;
2845 	tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2846 
2847 	/*
2848 	 * Force modified data and metadata out to stable storage.
2849 	 */
2850 	(void) VOP_FSYNC(fvp, 0, cr, NULL);
2851 	(void) VOP_FSYNC(tvp, 0, cr, NULL);
2852 
2853 	if (error)
2854 		goto err;
2855 
2856 	resp->status = NFS3_OK;
2857 	vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2858 	vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2859 	goto out;
2860 
2861 err:
2862 	if (curthread->t_flag & T_WOULDBLOCK) {
2863 		curthread->t_flag &= ~T_WOULDBLOCK;
2864 		resp->status = NFS3ERR_JUKEBOX;
2865 	} else {
2866 		resp->status = puterrno3(error);
2867 	}
2868 err1:
2869 	vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2870 	vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2871 
2872 out:
2873 	if (name != NULL && name != args->from.name)
2874 		kmem_free(name, MAXPATHLEN + 1);
2875 	if (toname != NULL && toname != args->to.name)
2876 		kmem_free(toname, MAXPATHLEN + 1);
2877 
2878 	DTRACE_NFSV3_4(op__rename__done, struct svc_req *, req,
2879 	    cred_t *, cr, vnode_t *, fvp, RENAME3res *, resp);
2880 	if (fvp != NULL)
2881 		VN_RELE(fvp);
2882 	if (tvp != NULL)
2883 		VN_RELE(tvp);
2884 }
2885 
2886 void *
2887 rfs3_rename_getfh(RENAME3args *args)
2888 {
2889 
2890 	return (&args->from.dir);
2891 }
2892 
2893 void
2894 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2895 	struct svc_req *req, cred_t *cr)
2896 {
2897 	int error;
2898 	vnode_t *vp;
2899 	vnode_t *dvp;
2900 	struct vattr *vap;
2901 	struct vattr va;
2902 	struct vattr *bvap;
2903 	struct vattr bva;
2904 	struct vattr *avap;
2905 	struct vattr ava;
2906 	nfs_fh3	*fh3;
2907 	struct exportinfo *to_exi;
2908 	bslabel_t *clabel;
2909 	struct sockaddr *ca;
2910 	char *name = NULL;
2911 
2912 	vap = NULL;
2913 	bvap = NULL;
2914 	avap = NULL;
2915 	dvp = NULL;
2916 
2917 	vp = nfs3_fhtovp(&args->file, exi);
2918 
2919 	DTRACE_NFSV3_4(op__link__start, struct svc_req *, req,
2920 	    cred_t *, cr, vnode_t *, vp, LINK3args *, args);
2921 
2922 	if (vp == NULL) {
2923 		error = ESTALE;
2924 		goto out;
2925 	}
2926 
2927 	va.va_mask = AT_ALL;
2928 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2929 
2930 	fh3 = &args->link.dir;
2931 	to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2932 	if (to_exi == NULL) {
2933 		resp->status = NFS3ERR_ACCES;
2934 		goto out1;
2935 	}
2936 	exi_rele(to_exi);
2937 
2938 	if (to_exi != exi) {
2939 		resp->status = NFS3ERR_XDEV;
2940 		goto out1;
2941 	}
2942 
2943 	if (is_system_labeled()) {
2944 		clabel = req->rq_label;
2945 
2946 		ASSERT(clabel != NULL);
2947 		DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
2948 		    "got client label from request(1)", struct svc_req *, req);
2949 
2950 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2951 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2952 			    exi)) {
2953 				resp->status = NFS3ERR_ACCES;
2954 				goto out1;
2955 			}
2956 		}
2957 	}
2958 
2959 	dvp = nfs3_fhtovp(&args->link.dir, exi);
2960 	if (dvp == NULL) {
2961 		error = ESTALE;
2962 		goto out;
2963 	}
2964 
2965 	bva.va_mask = AT_ALL;
2966 	bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
2967 
2968 	if (dvp->v_type != VDIR) {
2969 		resp->status = NFS3ERR_NOTDIR;
2970 		goto out1;
2971 	}
2972 
2973 	if (args->link.name == nfs3nametoolong) {
2974 		resp->status = NFS3ERR_NAMETOOLONG;
2975 		goto out1;
2976 	}
2977 
2978 	if (args->link.name == NULL || *(args->link.name) == '\0') {
2979 		resp->status = NFS3ERR_ACCES;
2980 		goto out1;
2981 	}
2982 
2983 	if (rdonly(exi, req)) {
2984 		resp->status = NFS3ERR_ROFS;
2985 		goto out1;
2986 	}
2987 
2988 	if (is_system_labeled()) {
2989 		DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
2990 		    "got client label from request(1)", struct svc_req *, req);
2991 
2992 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2993 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2994 			    exi)) {
2995 				resp->status = NFS3ERR_ACCES;
2996 				goto out1;
2997 			}
2998 		}
2999 	}
3000 
3001 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3002 	name = nfscmd_convname(ca, exi, args->link.name,
3003 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3004 
3005 	if (name == NULL) {
3006 		resp->status = NFS3ERR_SERVERFAULT;
3007 		goto out1;
3008 	}
3009 
3010 	error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3011 
3012 	va.va_mask = AT_ALL;
3013 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3014 	ava.va_mask = AT_ALL;
3015 	avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3016 
3017 	/*
3018 	 * Force modified data and metadata out to stable storage.
3019 	 */
3020 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3021 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
3022 
3023 	if (error)
3024 		goto out;
3025 
3026 	VN_RELE(dvp);
3027 
3028 	resp->status = NFS3_OK;
3029 	vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3030 	vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3031 
3032 	DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3033 	    cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3034 
3035 	VN_RELE(vp);
3036 
3037 	return;
3038 
3039 out:
3040 	if (curthread->t_flag & T_WOULDBLOCK) {
3041 		curthread->t_flag &= ~T_WOULDBLOCK;
3042 		resp->status = NFS3ERR_JUKEBOX;
3043 	} else
3044 		resp->status = puterrno3(error);
3045 out1:
3046 	if (name != NULL && name != args->link.name)
3047 		kmem_free(name, MAXPATHLEN + 1);
3048 
3049 	DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3050 	    cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3051 
3052 	if (vp != NULL)
3053 		VN_RELE(vp);
3054 	if (dvp != NULL)
3055 		VN_RELE(dvp);
3056 	vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3057 	vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3058 }
3059 
3060 void *
3061 rfs3_link_getfh(LINK3args *args)
3062 {
3063 
3064 	return (&args->file);
3065 }
3066 
3067 /*
3068  * This macro defines the size of a response which contains attribute
3069  * information and one directory entry (whose length is specified by
3070  * the macro parameter).  If the incoming request is larger than this,
3071  * then we are guaranteed to be able to return at one directory entry
3072  * if one exists.  Therefore, we do not need to check for
3073  * NFS3ERR_TOOSMALL if the requested size is larger then this.  If it
3074  * is not, then we need to check to make sure that this error does not
3075  * need to be returned.
3076  *
3077  * NFS3_READDIR_MIN_COUNT is comprised of following :
3078  *
3079  * status - 1 * BYTES_PER_XDR_UNIT
3080  * attr. flag - 1 * BYTES_PER_XDR_UNIT
3081  * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3082  * attributes  - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3083  * boolean - 1 * BYTES_PER_XDR_UNIT
3084  * file id - 2 * BYTES_PER_XDR_UNIT
3085  * directory name length - 1 * BYTES_PER_XDR_UNIT
3086  * cookie - 2 * BYTES_PER_XDR_UNIT
3087  * end of list - 1 * BYTES_PER_XDR_UNIT
3088  * end of file - 1 * BYTES_PER_XDR_UNIT
3089  * Name length of directory to the nearest byte
3090  */
3091 
3092 #define	NFS3_READDIR_MIN_COUNT(length)	\
3093 	((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3094 		BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3095 
3096 /* ARGSUSED */
3097 void
3098 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3099 	struct svc_req *req, cred_t *cr)
3100 {
3101 	int error;
3102 	vnode_t *vp;
3103 	struct vattr *vap;
3104 	struct vattr va;
3105 	struct iovec iov;
3106 	struct uio uio;
3107 	char *data;
3108 	int iseof;
3109 	int bufsize;
3110 	int namlen;
3111 	uint_t count;
3112 	struct sockaddr *ca;
3113 
3114 	vap = NULL;
3115 
3116 	vp = nfs3_fhtovp(&args->dir, exi);
3117 
3118 	DTRACE_NFSV3_4(op__readdir__start, struct svc_req *, req,
3119 	    cred_t *, cr, vnode_t *, vp, READDIR3args *, args);
3120 
3121 	if (vp == NULL) {
3122 		error = ESTALE;
3123 		goto out;
3124 	}
3125 
3126 	if (is_system_labeled()) {
3127 		bslabel_t *clabel = req->rq_label;
3128 
3129 		ASSERT(clabel != NULL);
3130 		DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3131 		    "got client label from request(1)", struct svc_req *, req);
3132 
3133 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3134 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3135 			    exi)) {
3136 				resp->status = NFS3ERR_ACCES;
3137 				goto out1;
3138 			}
3139 		}
3140 	}
3141 
3142 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3143 
3144 	va.va_mask = AT_ALL;
3145 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3146 
3147 	if (vp->v_type != VDIR) {
3148 		resp->status = NFS3ERR_NOTDIR;
3149 		goto out1;
3150 	}
3151 
3152 	error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3153 	if (error)
3154 		goto out;
3155 
3156 	/*
3157 	 * Now don't allow arbitrary count to alloc;
3158 	 * allow the maximum not to exceed rfs3_tsize()
3159 	 */
3160 	if (args->count > rfs3_tsize(req))
3161 		args->count = rfs3_tsize(req);
3162 
3163 	/*
3164 	 * Make sure that there is room to read at least one entry
3165 	 * if any are available.
3166 	 */
3167 	if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3168 		count = DIRENT64_RECLEN(MAXNAMELEN);
3169 	else
3170 		count = args->count;
3171 
3172 	data = kmem_alloc(count, KM_SLEEP);
3173 
3174 	iov.iov_base = data;
3175 	iov.iov_len = count;
3176 	uio.uio_iov = &iov;
3177 	uio.uio_iovcnt = 1;
3178 	uio.uio_segflg = UIO_SYSSPACE;
3179 	uio.uio_extflg = UIO_COPY_CACHED;
3180 	uio.uio_loffset = (offset_t)args->cookie;
3181 	uio.uio_resid = count;
3182 
3183 	error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3184 
3185 	va.va_mask = AT_ALL;
3186 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3187 
3188 	if (error) {
3189 		kmem_free(data, count);
3190 		goto out;
3191 	}
3192 
3193 	/*
3194 	 * If the count was not large enough to be able to guarantee
3195 	 * to be able to return at least one entry, then need to
3196 	 * check to see if NFS3ERR_TOOSMALL should be returned.
3197 	 */
3198 	if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3199 		/*
3200 		 * bufsize is used to keep track of the size of the response.
3201 		 * It is primed with:
3202 		 *	1 for the status +
3203 		 *	1 for the dir_attributes.attributes boolean +
3204 		 *	2 for the cookie verifier
3205 		 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3206 		 * to bytes.  If there are directory attributes to be
3207 		 * returned, then:
3208 		 *	NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3209 		 * time BYTES_PER_XDR_UNIT is added to account for them.
3210 		 */
3211 		bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3212 		if (vap != NULL)
3213 			bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3214 		/*
3215 		 * An entry is composed of:
3216 		 *	1 for the true/false list indicator +
3217 		 *	2 for the fileid +
3218 		 *	1 for the length of the name +
3219 		 *	2 for the cookie +
3220 		 * all times BYTES_PER_XDR_UNIT to convert from
3221 		 * XDR units to bytes, plus the length of the name
3222 		 * rounded up to the nearest BYTES_PER_XDR_UNIT.
3223 		 */
3224 		if (count != uio.uio_resid) {
3225 			namlen = strlen(((struct dirent64 *)data)->d_name);
3226 			bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3227 			    roundup(namlen, BYTES_PER_XDR_UNIT);
3228 		}
3229 		/*
3230 		 * We need to check to see if the number of bytes left
3231 		 * to go into the buffer will actually fit into the
3232 		 * buffer.  This is calculated as the size of this
3233 		 * entry plus:
3234 		 *	1 for the true/false list indicator +
3235 		 *	1 for the eof indicator
3236 		 * times BYTES_PER_XDR_UNIT to convert from from
3237 		 * XDR units to bytes.
3238 		 */
3239 		bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3240 		if (bufsize > args->count) {
3241 			kmem_free(data, count);
3242 			resp->status = NFS3ERR_TOOSMALL;
3243 			goto out1;
3244 		}
3245 	}
3246 
3247 	/*
3248 	 * Have a valid readir buffer for the native character
3249 	 * set. Need to check if a conversion is necessary and
3250 	 * potentially rewrite the whole buffer. Note that if the
3251 	 * conversion expands names enough, the structure may not
3252 	 * fit. In this case, we need to drop entries until if fits
3253 	 * and patch the counts in order that the next readdir will
3254 	 * get the correct entries.
3255 	 */
3256 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3257 	data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
3258 
3259 
3260 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3261 
3262 #if 0 /* notyet */
3263 	/*
3264 	 * Don't do this.  It causes local disk writes when just
3265 	 * reading the file and the overhead is deemed larger
3266 	 * than the benefit.
3267 	 */
3268 	/*
3269 	 * Force modified metadata out to stable storage.
3270 	 */
3271 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3272 #endif
3273 
3274 	resp->status = NFS3_OK;
3275 	vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3276 	resp->resok.cookieverf = 0;
3277 	resp->resok.reply.entries = (entry3 *)data;
3278 	resp->resok.reply.eof = iseof;
3279 	resp->resok.size = count - uio.uio_resid;
3280 	resp->resok.count = args->count;
3281 	resp->resok.freecount = count;
3282 
3283 	DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3284 	    cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3285 
3286 	VN_RELE(vp);
3287 
3288 	return;
3289 
3290 out:
3291 	if (curthread->t_flag & T_WOULDBLOCK) {
3292 		curthread->t_flag &= ~T_WOULDBLOCK;
3293 		resp->status = NFS3ERR_JUKEBOX;
3294 	} else
3295 		resp->status = puterrno3(error);
3296 out1:
3297 	DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3298 	    cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3299 
3300 	if (vp != NULL) {
3301 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3302 		VN_RELE(vp);
3303 	}
3304 	vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3305 }
3306 
3307 void *
3308 rfs3_readdir_getfh(READDIR3args *args)
3309 {
3310 
3311 	return (&args->dir);
3312 }
3313 
3314 void
3315 rfs3_readdir_free(READDIR3res *resp)
3316 {
3317 
3318 	if (resp->status == NFS3_OK)
3319 		kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3320 }
3321 
3322 #ifdef nextdp
3323 #undef nextdp
3324 #endif
3325 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3326 
3327 /*
3328  * This macro computes the size of a response which contains
3329  * one directory entry including the attributes as well as file handle.
3330  * If the incoming request is larger than this, then we are guaranteed to be
3331  * able to return at least one more directory entry if one exists.
3332  *
3333  * NFS3_READDIRPLUS_ENTRY is made up of the following:
3334  *
3335  * boolean - 1 * BYTES_PER_XDR_UNIT
3336  * file id - 2 * BYTES_PER_XDR_UNIT
3337  * directory name length - 1 * BYTES_PER_XDR_UNIT
3338  * cookie - 2 * BYTES_PER_XDR_UNIT
3339  * attribute flag - 1 * BYTES_PER_XDR_UNIT
3340  * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3341  * status byte for file handle - 1 *  BYTES_PER_XDR_UNIT
3342  * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3343  * Maximum length of a file handle (NFS3_MAXFHSIZE)
3344  * name length of the entry to the nearest bytes
3345  */
3346 #define	NFS3_READDIRPLUS_ENTRY(namelen)	\
3347 	((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3348 		BYTES_PER_XDR_UNIT + \
3349 	NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3350 
3351 static int rfs3_readdir_unit = MAXBSIZE;
3352 
3353 /* ARGSUSED */
3354 void
3355 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3356 	struct exportinfo *exi, struct svc_req *req, cred_t *cr)
3357 {
3358 	int error;
3359 	vnode_t *vp;
3360 	struct vattr *vap;
3361 	struct vattr va;
3362 	struct iovec iov;
3363 	struct uio uio;
3364 	char *data;
3365 	int iseof;
3366 	struct dirent64 *dp;
3367 	vnode_t *nvp;
3368 	struct vattr *nvap;
3369 	struct vattr nva;
3370 	entryplus3_info *infop = NULL;
3371 	int size = 0;
3372 	int nents = 0;
3373 	int bufsize = 0;
3374 	int entrysize = 0;
3375 	int tofit = 0;
3376 	int rd_unit = rfs3_readdir_unit;
3377 	int prev_len;
3378 	int space_left;
3379 	int i;
3380 	uint_t *namlen = NULL;
3381 	char *ndata = NULL;
3382 	struct sockaddr *ca;
3383 	size_t ret;
3384 
3385 	vap = NULL;
3386 
3387 	vp = nfs3_fhtovp(&args->dir, exi);
3388 
3389 	DTRACE_NFSV3_4(op__readdirplus__start, struct svc_req *, req,
3390 	    cred_t *, cr, vnode_t *, vp, READDIRPLUS3args *, args);
3391 
3392 	if (vp == NULL) {
3393 		error = ESTALE;
3394 		goto out;
3395 	}
3396 
3397 	if (is_system_labeled()) {
3398 		bslabel_t *clabel = req->rq_label;
3399 
3400 		ASSERT(clabel != NULL);
3401 		DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3402 		    char *, "got client label from request(1)",
3403 		    struct svc_req *, req);
3404 
3405 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3406 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3407 			    exi)) {
3408 				resp->status = NFS3ERR_ACCES;
3409 				goto out1;
3410 			}
3411 		}
3412 	}
3413 
3414 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3415 
3416 	va.va_mask = AT_ALL;
3417 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3418 
3419 	if (vp->v_type != VDIR) {
3420 		error = ENOTDIR;
3421 		goto out;
3422 	}
3423 
3424 	error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3425 	if (error)
3426 		goto out;
3427 
3428 	/*
3429 	 * Don't allow arbitrary counts for allocation
3430 	 */
3431 	if (args->maxcount > rfs3_tsize(req))
3432 		args->maxcount = rfs3_tsize(req);
3433 
3434 	/*
3435 	 * Make sure that there is room to read at least one entry
3436 	 * if any are available
3437 	 */
3438 	args->dircount = MIN(args->dircount, args->maxcount);
3439 
3440 	if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3441 		args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3442 
3443 	/*
3444 	 * This allocation relies on a minimum directory entry
3445 	 * being roughly 24 bytes.  Therefore, the namlen array
3446 	 * will have enough space based on the maximum number of
3447 	 * entries to read.
3448 	 */
3449 	namlen = kmem_alloc(args->dircount, KM_SLEEP);
3450 
3451 	space_left = args->dircount;
3452 	data = kmem_alloc(args->dircount, KM_SLEEP);
3453 	dp = (struct dirent64 *)data;
3454 	uio.uio_iov = &iov;
3455 	uio.uio_iovcnt = 1;
3456 	uio.uio_segflg = UIO_SYSSPACE;
3457 	uio.uio_extflg = UIO_COPY_CACHED;
3458 	uio.uio_loffset = (offset_t)args->cookie;
3459 
3460 	/*
3461 	 * bufsize is used to keep track of the size of the response as we
3462 	 * get post op attributes and filehandles for each entry.  This is
3463 	 * an optimization as the server may have read more entries than will
3464 	 * fit in the buffer specified by maxcount.  We stop calculating
3465 	 * post op attributes and filehandles once we have exceeded maxcount.
3466 	 * This will minimize the effect of truncation.
3467 	 *
3468 	 * It is primed with:
3469 	 *	1 for the status +
3470 	 *	1 for the dir_attributes.attributes boolean +
3471 	 *	2 for the cookie verifier
3472 	 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3473 	 * to bytes.  If there are directory attributes to be
3474 	 * returned, then:
3475 	 *	NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3476 	 * time BYTES_PER_XDR_UNIT is added to account for them.
3477 	 */
3478 	bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3479 	if (vap != NULL)
3480 		bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3481 
3482 getmoredents:
3483 	/*
3484 	 * Here we make a check so that our read unit is not larger than
3485 	 * the space left in the buffer.
3486 	 */
3487 	rd_unit = MIN(rd_unit, space_left);
3488 	iov.iov_base = (char *)dp;
3489 	iov.iov_len = rd_unit;
3490 	uio.uio_resid = rd_unit;
3491 	prev_len = rd_unit;
3492 
3493 	error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3494 
3495 	if (error) {
3496 		kmem_free(data, args->dircount);
3497 		goto out;
3498 	}
3499 
3500 	if (uio.uio_resid == prev_len && !iseof) {
3501 		if (nents == 0) {
3502 			kmem_free(data, args->dircount);
3503 			resp->status = NFS3ERR_TOOSMALL;
3504 			goto out1;
3505 		}
3506 
3507 		/*
3508 		 * We could not get any more entries, so get the attributes
3509 		 * and filehandle for the entries already obtained.
3510 		 */
3511 		goto good;
3512 	}
3513 
3514 	/*
3515 	 * We estimate the size of the response by assuming the
3516 	 * entry exists and attributes and filehandle are also valid
3517 	 */
3518 	for (size = prev_len - uio.uio_resid;
3519 	    size > 0;
3520 	    size -= dp->d_reclen, dp = nextdp(dp)) {
3521 
3522 		if (dp->d_ino == 0) {
3523 			nents++;
3524 			continue;
3525 		}
3526 
3527 		namlen[nents] = strlen(dp->d_name);
3528 		entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3529 
3530 		/*
3531 		 * We need to check to see if the number of bytes left
3532 		 * to go into the buffer will actually fit into the
3533 		 * buffer.  This is calculated as the size of this
3534 		 * entry plus:
3535 		 *	1 for the true/false list indicator +
3536 		 *	1 for the eof indicator
3537 		 * times BYTES_PER_XDR_UNIT to convert from XDR units
3538 		 * to bytes.
3539 		 *
3540 		 * Also check the dircount limit against the first entry read
3541 		 *
3542 		 */
3543 		tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3544 		if (bufsize + tofit > args->maxcount) {
3545 			/*
3546 			 * We make a check here to see if this was the
3547 			 * first entry being measured.  If so, then maxcount
3548 			 * was too small to begin with and so we need to
3549 			 * return with NFS3ERR_TOOSMALL.
3550 			 */
3551 			if (nents == 0) {
3552 				kmem_free(data, args->dircount);
3553 				resp->status = NFS3ERR_TOOSMALL;
3554 				goto out1;
3555 			}
3556 			iseof = FALSE;
3557 			goto good;
3558 		}
3559 		bufsize += entrysize;
3560 		nents++;
3561 	}
3562 
3563 	/*
3564 	 * If there is enough room to fit at least 1 more entry including
3565 	 * post op attributes and filehandle in the buffer AND that we haven't
3566 	 * exceeded dircount then go back and get some more.
3567 	 */
3568 	if (!iseof &&
3569 	    (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3570 		space_left -= (prev_len - uio.uio_resid);
3571 		if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3572 			goto getmoredents;
3573 
3574 		/* else, fall through */
3575 	}
3576 good:
3577 	va.va_mask = AT_ALL;
3578 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3579 
3580 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3581 
3582 	infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3583 	resp->resok.infop = infop;
3584 
3585 	dp = (struct dirent64 *)data;
3586 	for (i = 0; i < nents; i++) {
3587 
3588 		if (dp->d_ino == 0) {
3589 			infop[i].attr.attributes = FALSE;
3590 			infop[i].fh.handle_follows = FALSE;
3591 			dp = nextdp(dp);
3592 			continue;
3593 		}
3594 
3595 		infop[i].namelen = namlen[i];
3596 
3597 		error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3598 		    NULL, NULL, NULL);
3599 		if (error) {
3600 			infop[i].attr.attributes = FALSE;
3601 			infop[i].fh.handle_follows = FALSE;
3602 			dp = nextdp(dp);
3603 			continue;
3604 		}
3605 
3606 		nva.va_mask = AT_ALL;
3607 		nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3608 
3609 		/* Lie about the object type for a referral */
3610 		if (vn_is_nfs_reparse(nvp, cr))
3611 			nvap->va_type = VLNK;
3612 
3613 		vattr_to_post_op_attr(nvap, &infop[i].attr);
3614 
3615 		error = makefh3(&infop[i].fh.handle, nvp, exi);
3616 		if (!error)
3617 			infop[i].fh.handle_follows = TRUE;
3618 		else
3619 			infop[i].fh.handle_follows = FALSE;
3620 
3621 		VN_RELE(nvp);
3622 		dp = nextdp(dp);
3623 	}
3624 
3625 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3626 	ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3627 	if (ndata == NULL)
3628 		ndata = data;
3629 
3630 	if (ret > 0) {
3631 		/*
3632 		 * We had to drop one or more entries in order to fit
3633 		 * during the character conversion.  We need to patch
3634 		 * up the size and eof info.
3635 		 */
3636 		if (iseof)
3637 			iseof = FALSE;
3638 
3639 		ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3640 		    nents, ret);
3641 	}
3642 
3643 
3644 #if 0 /* notyet */
3645 	/*
3646 	 * Don't do this.  It causes local disk writes when just
3647 	 * reading the file and the overhead is deemed larger
3648 	 * than the benefit.
3649 	 */
3650 	/*
3651 	 * Force modified metadata out to stable storage.
3652 	 */
3653 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3654 #endif
3655 
3656 	kmem_free(namlen, args->dircount);
3657 
3658 	resp->status = NFS3_OK;
3659 	vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3660 	resp->resok.cookieverf = 0;
3661 	resp->resok.reply.entries = (entryplus3 *)ndata;
3662 	resp->resok.reply.eof = iseof;
3663 	resp->resok.size = nents;
3664 	resp->resok.count = args->dircount - ret;
3665 	resp->resok.maxcount = args->maxcount;
3666 
3667 	DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3668 	    cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3669 	if (ndata != data)
3670 		kmem_free(data, args->dircount);
3671 
3672 
3673 	VN_RELE(vp);
3674 
3675 	return;
3676 
3677 out:
3678 	if (curthread->t_flag & T_WOULDBLOCK) {
3679 		curthread->t_flag &= ~T_WOULDBLOCK;
3680 		resp->status = NFS3ERR_JUKEBOX;
3681 	} else {
3682 		resp->status = puterrno3(error);
3683 	}
3684 out1:
3685 	DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3686 	    cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3687 
3688 	if (vp != NULL) {
3689 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3690 		VN_RELE(vp);
3691 	}
3692 
3693 	if (namlen != NULL)
3694 		kmem_free(namlen, args->dircount);
3695 
3696 	vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3697 }
3698 
3699 void *
3700 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3701 {
3702 
3703 	return (&args->dir);
3704 }
3705 
3706 void
3707 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3708 {
3709 
3710 	if (resp->status == NFS3_OK) {
3711 		kmem_free(resp->resok.reply.entries, resp->resok.count);
3712 		kmem_free(resp->resok.infop,
3713 		    resp->resok.size * sizeof (struct entryplus3_info));
3714 	}
3715 }
3716 
3717 /* ARGSUSED */
3718 void
3719 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3720 	struct svc_req *req, cred_t *cr)
3721 {
3722 	int error;
3723 	vnode_t *vp;
3724 	struct vattr *vap;
3725 	struct vattr va;
3726 	struct statvfs64 sb;
3727 
3728 	vap = NULL;
3729 
3730 	vp = nfs3_fhtovp(&args->fsroot, exi);
3731 
3732 	DTRACE_NFSV3_4(op__fsstat__start, struct svc_req *, req,
3733 	    cred_t *, cr, vnode_t *, vp, FSSTAT3args *, args);
3734 
3735 	if (vp == NULL) {
3736 		error = ESTALE;
3737 		goto out;
3738 	}
3739 
3740 	if (is_system_labeled()) {
3741 		bslabel_t *clabel = req->rq_label;
3742 
3743 		ASSERT(clabel != NULL);
3744 		DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3745 		    "got client label from request(1)", struct svc_req *, req);
3746 
3747 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3748 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3749 			    exi)) {
3750 				resp->status = NFS3ERR_ACCES;
3751 				goto out1;
3752 			}
3753 		}
3754 	}
3755 
3756 	error = VFS_STATVFS(vp->v_vfsp, &sb);
3757 
3758 	va.va_mask = AT_ALL;
3759 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3760 
3761 	if (error)
3762 		goto out;
3763 
3764 	resp->status = NFS3_OK;
3765 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3766 	if (sb.f_blocks != (fsblkcnt64_t)-1)
3767 		resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3768 	else
3769 		resp->resok.tbytes = (size3)sb.f_blocks;
3770 	if (sb.f_bfree != (fsblkcnt64_t)-1)
3771 		resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3772 	else
3773 		resp->resok.fbytes = (size3)sb.f_bfree;
3774 	if (sb.f_bavail != (fsblkcnt64_t)-1)
3775 		resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3776 	else
3777 		resp->resok.abytes = (size3)sb.f_bavail;
3778 	resp->resok.tfiles = (size3)sb.f_files;
3779 	resp->resok.ffiles = (size3)sb.f_ffree;
3780 	resp->resok.afiles = (size3)sb.f_favail;
3781 	resp->resok.invarsec = 0;
3782 
3783 	DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3784 	    cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3785 	VN_RELE(vp);
3786 
3787 	return;
3788 
3789 out:
3790 	if (curthread->t_flag & T_WOULDBLOCK) {
3791 		curthread->t_flag &= ~T_WOULDBLOCK;
3792 		resp->status = NFS3ERR_JUKEBOX;
3793 	} else
3794 		resp->status = puterrno3(error);
3795 out1:
3796 	DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3797 	    cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3798 
3799 	if (vp != NULL)
3800 		VN_RELE(vp);
3801 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3802 }
3803 
3804 void *
3805 rfs3_fsstat_getfh(FSSTAT3args *args)
3806 {
3807 
3808 	return (&args->fsroot);
3809 }
3810 
3811 void
3812 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3813 	struct svc_req *req, cred_t *cr)
3814 {
3815 	vnode_t *vp;
3816 	struct vattr *vap;
3817 	struct vattr va;
3818 	uint32_t xfer_size;
3819 	ulong_t l = 0;
3820 	int error;
3821 
3822 	vp = nfs3_fhtovp(&args->fsroot, exi);
3823 
3824 	DTRACE_NFSV3_4(op__fsinfo__start, struct svc_req *, req,
3825 	    cred_t *, cr, vnode_t *, vp, FSINFO3args *, args);
3826 
3827 	if (vp == NULL) {
3828 		if (curthread->t_flag & T_WOULDBLOCK) {
3829 			curthread->t_flag &= ~T_WOULDBLOCK;
3830 			resp->status = NFS3ERR_JUKEBOX;
3831 		} else
3832 			resp->status = NFS3ERR_STALE;
3833 		vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3834 		goto out;
3835 	}
3836 
3837 	if (is_system_labeled()) {
3838 		bslabel_t *clabel = req->rq_label;
3839 
3840 		ASSERT(clabel != NULL);
3841 		DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3842 		    "got client label from request(1)", struct svc_req *, req);
3843 
3844 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3845 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3846 			    exi)) {
3847 				resp->status = NFS3ERR_STALE;
3848 				vattr_to_post_op_attr(NULL,
3849 				    &resp->resfail.obj_attributes);
3850 				goto out;
3851 			}
3852 		}
3853 	}
3854 
3855 	va.va_mask = AT_ALL;
3856 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3857 
3858 	resp->status = NFS3_OK;
3859 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3860 	xfer_size = rfs3_tsize(req);
3861 	resp->resok.rtmax = xfer_size;
3862 	resp->resok.rtpref = xfer_size;
3863 	resp->resok.rtmult = DEV_BSIZE;
3864 	resp->resok.wtmax = xfer_size;
3865 	resp->resok.wtpref = xfer_size;
3866 	resp->resok.wtmult = DEV_BSIZE;
3867 	resp->resok.dtpref = MAXBSIZE;
3868 
3869 	/*
3870 	 * Large file spec: want maxfilesize based on limit of
3871 	 * underlying filesystem.  We can guess 2^31-1 if need be.
3872 	 */
3873 	error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3874 	if (error) {
3875 		resp->status = puterrno3(error);
3876 		goto out;
3877 	}
3878 
3879 	/*
3880 	 * If the underlying file system does not support _PC_FILESIZEBITS,
3881 	 * return a reasonable default. Note that error code on VOP_PATHCONF
3882 	 * will be 0, even if the underlying file system does not support
3883 	 * _PC_FILESIZEBITS.
3884 	 */
3885 	if (l == (ulong_t)-1) {
3886 		resp->resok.maxfilesize = MAXOFF32_T;
3887 	} else {
3888 		if (l >= (sizeof (uint64_t) * 8))
3889 			resp->resok.maxfilesize = INT64_MAX;
3890 		else
3891 			resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3892 	}
3893 
3894 	resp->resok.time_delta.seconds = 0;
3895 	resp->resok.time_delta.nseconds = 1000;
3896 	resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3897 	    FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3898 
3899 	DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3900 	    cred_t *, cr, vnode_t *, vp, FSINFO3res *, resp);
3901 
3902 	VN_RELE(vp);
3903 
3904 	return;
3905 
3906 out:
3907 	DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3908 	    cred_t *, cr, vnode_t *, NULL, FSINFO3res *, resp);
3909 	if (vp != NULL)
3910 		VN_RELE(vp);
3911 }
3912 
3913 void *
3914 rfs3_fsinfo_getfh(FSINFO3args *args)
3915 {
3916 	return (&args->fsroot);
3917 }
3918 
3919 /* ARGSUSED */
3920 void
3921 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
3922 	struct svc_req *req, cred_t *cr)
3923 {
3924 	int error;
3925 	vnode_t *vp;
3926 	struct vattr *vap;
3927 	struct vattr va;
3928 	ulong_t val;
3929 
3930 	vap = NULL;
3931 
3932 	vp = nfs3_fhtovp(&args->object, exi);
3933 
3934 	DTRACE_NFSV3_4(op__pathconf__start, struct svc_req *, req,
3935 	    cred_t *, cr, vnode_t *, vp, PATHCONF3args *, args);
3936 
3937 	if (vp == NULL) {
3938 		error = ESTALE;
3939 		goto out;
3940 	}
3941 
3942 	if (is_system_labeled()) {
3943 		bslabel_t *clabel = req->rq_label;
3944 
3945 		ASSERT(clabel != NULL);
3946 		DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
3947 		    "got client label from request(1)", struct svc_req *, req);
3948 
3949 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3950 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3951 			    exi)) {
3952 				resp->status = NFS3ERR_ACCES;
3953 				goto out1;
3954 			}
3955 		}
3956 	}
3957 
3958 	va.va_mask = AT_ALL;
3959 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3960 
3961 	error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
3962 	if (error)
3963 		goto out;
3964 	resp->resok.info.link_max = (uint32)val;
3965 
3966 	error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
3967 	if (error)
3968 		goto out;
3969 	resp->resok.info.name_max = (uint32)val;
3970 
3971 	error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
3972 	if (error)
3973 		goto out;
3974 	if (val == 1)
3975 		resp->resok.info.no_trunc = TRUE;
3976 	else
3977 		resp->resok.info.no_trunc = FALSE;
3978 
3979 	error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
3980 	if (error)
3981 		goto out;
3982 	if (val == 1)
3983 		resp->resok.info.chown_restricted = TRUE;
3984 	else
3985 		resp->resok.info.chown_restricted = FALSE;
3986 
3987 	resp->status = NFS3_OK;
3988 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3989 	resp->resok.info.case_insensitive = FALSE;
3990 	resp->resok.info.case_preserving = TRUE;
3991 	DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
3992 	    cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
3993 	VN_RELE(vp);
3994 	return;
3995 
3996 out:
3997 	if (curthread->t_flag & T_WOULDBLOCK) {
3998 		curthread->t_flag &= ~T_WOULDBLOCK;
3999 		resp->status = NFS3ERR_JUKEBOX;
4000 	} else
4001 		resp->status = puterrno3(error);
4002 out1:
4003 	DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
4004 	    cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
4005 	if (vp != NULL)
4006 		VN_RELE(vp);
4007 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4008 }
4009 
4010 void *
4011 rfs3_pathconf_getfh(PATHCONF3args *args)
4012 {
4013 
4014 	return (&args->object);
4015 }
4016 
4017 void
4018 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4019 	struct svc_req *req, cred_t *cr)
4020 {
4021 	int error;
4022 	vnode_t *vp;
4023 	struct vattr *bvap;
4024 	struct vattr bva;
4025 	struct vattr *avap;
4026 	struct vattr ava;
4027 
4028 	bvap = NULL;
4029 	avap = NULL;
4030 
4031 	vp = nfs3_fhtovp(&args->file, exi);
4032 
4033 	DTRACE_NFSV3_4(op__commit__start, struct svc_req *, req,
4034 	    cred_t *, cr, vnode_t *, vp, COMMIT3args *, args);
4035 
4036 	if (vp == NULL) {
4037 		error = ESTALE;
4038 		goto out;
4039 	}
4040 
4041 	bva.va_mask = AT_ALL;
4042 	error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4043 
4044 	/*
4045 	 * If we can't get the attributes, then we can't do the
4046 	 * right access checking.  So, we'll fail the request.
4047 	 */
4048 	if (error)
4049 		goto out;
4050 
4051 	bvap = &bva;
4052 
4053 	if (rdonly(exi, req)) {
4054 		resp->status = NFS3ERR_ROFS;
4055 		goto out1;
4056 	}
4057 
4058 	if (vp->v_type != VREG) {
4059 		resp->status = NFS3ERR_INVAL;
4060 		goto out1;
4061 	}
4062 
4063 	if (is_system_labeled()) {
4064 		bslabel_t *clabel = req->rq_label;
4065 
4066 		ASSERT(clabel != NULL);
4067 		DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4068 		    "got client label from request(1)", struct svc_req *, req);
4069 
4070 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
4071 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4072 			    exi)) {
4073 				resp->status = NFS3ERR_ACCES;
4074 				goto out1;
4075 			}
4076 		}
4077 	}
4078 
4079 	if (crgetuid(cr) != bva.va_uid &&
4080 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4081 		goto out;
4082 
4083 	error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4084 
4085 	ava.va_mask = AT_ALL;
4086 	avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4087 
4088 	if (error)
4089 		goto out;
4090 
4091 	resp->status = NFS3_OK;
4092 	vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4093 	resp->resok.verf = write3verf;
4094 
4095 	DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4096 	    cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4097 
4098 	VN_RELE(vp);
4099 
4100 	return;
4101 
4102 out:
4103 	if (curthread->t_flag & T_WOULDBLOCK) {
4104 		curthread->t_flag &= ~T_WOULDBLOCK;
4105 		resp->status = NFS3ERR_JUKEBOX;
4106 	} else
4107 		resp->status = puterrno3(error);
4108 out1:
4109 	DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4110 	    cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4111 
4112 	if (vp != NULL)
4113 		VN_RELE(vp);
4114 	vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4115 }
4116 
4117 void *
4118 rfs3_commit_getfh(COMMIT3args *args)
4119 {
4120 
4121 	return (&args->file);
4122 }
4123 
4124 static int
4125 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4126 {
4127 
4128 	vap->va_mask = 0;
4129 
4130 	if (sap->mode.set_it) {
4131 		vap->va_mode = (mode_t)sap->mode.mode;
4132 		vap->va_mask |= AT_MODE;
4133 	}
4134 	if (sap->uid.set_it) {
4135 		vap->va_uid = (uid_t)sap->uid.uid;
4136 		vap->va_mask |= AT_UID;
4137 	}
4138 	if (sap->gid.set_it) {
4139 		vap->va_gid = (gid_t)sap->gid.gid;
4140 		vap->va_mask |= AT_GID;
4141 	}
4142 	if (sap->size.set_it) {
4143 		if (sap->size.size > (size3)((u_longlong_t)-1))
4144 			return (EINVAL);
4145 		vap->va_size = sap->size.size;
4146 		vap->va_mask |= AT_SIZE;
4147 	}
4148 	if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4149 #ifndef _LP64
4150 		/* check time validity */
4151 		if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4152 			return (EOVERFLOW);
4153 #endif
4154 		/*
4155 		 * nfs protocol defines times as unsigned so don't extend sign,
4156 		 * unless sysadmin set nfs_allow_preepoch_time.
4157 		 */
4158 		NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4159 		    sap->atime.atime.seconds);
4160 		vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4161 		vap->va_mask |= AT_ATIME;
4162 	} else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4163 		gethrestime(&vap->va_atime);
4164 		vap->va_mask |= AT_ATIME;
4165 	}
4166 	if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4167 #ifndef _LP64
4168 		/* check time validity */
4169 		if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4170 			return (EOVERFLOW);
4171 #endif
4172 		/*
4173 		 * nfs protocol defines times as unsigned so don't extend sign,
4174 		 * unless sysadmin set nfs_allow_preepoch_time.
4175 		 */
4176 		NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4177 		    sap->mtime.mtime.seconds);
4178 		vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4179 		vap->va_mask |= AT_MTIME;
4180 	} else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4181 		gethrestime(&vap->va_mtime);
4182 		vap->va_mask |= AT_MTIME;
4183 	}
4184 
4185 	return (0);
4186 }
4187 
4188 static ftype3 vt_to_nf3[] = {
4189 	0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4190 };
4191 
4192 static int
4193 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4194 {
4195 
4196 	ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4197 	/* Return error if time or size overflow */
4198 	if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4199 		return (EOVERFLOW);
4200 	}
4201 	fap->type = vt_to_nf3[vap->va_type];
4202 	fap->mode = (mode3)(vap->va_mode & MODEMASK);
4203 	fap->nlink = (uint32)vap->va_nlink;
4204 	if (vap->va_uid == UID_NOBODY)
4205 		fap->uid = (uid3)NFS_UID_NOBODY;
4206 	else
4207 		fap->uid = (uid3)vap->va_uid;
4208 	if (vap->va_gid == GID_NOBODY)
4209 		fap->gid = (gid3)NFS_GID_NOBODY;
4210 	else
4211 		fap->gid = (gid3)vap->va_gid;
4212 	fap->size = (size3)vap->va_size;
4213 	fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4214 	fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4215 	fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4216 	fap->fsid = (uint64)vap->va_fsid;
4217 	fap->fileid = (fileid3)vap->va_nodeid;
4218 	fap->atime.seconds = vap->va_atime.tv_sec;
4219 	fap->atime.nseconds = vap->va_atime.tv_nsec;
4220 	fap->mtime.seconds = vap->va_mtime.tv_sec;
4221 	fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4222 	fap->ctime.seconds = vap->va_ctime.tv_sec;
4223 	fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4224 	return (0);
4225 }
4226 
4227 static int
4228 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4229 {
4230 
4231 	/* Return error if time or size overflow */
4232 	if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4233 	    NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4234 	    NFS3_SIZE_OK(vap->va_size))) {
4235 		return (EOVERFLOW);
4236 	}
4237 	wccap->size = (size3)vap->va_size;
4238 	wccap->mtime.seconds = vap->va_mtime.tv_sec;
4239 	wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4240 	wccap->ctime.seconds = vap->va_ctime.tv_sec;
4241 	wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4242 	return (0);
4243 }
4244 
4245 static void
4246 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4247 {
4248 
4249 	/* don't return attrs if time overflow */
4250 	if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4251 		poap->attributes = TRUE;
4252 	} else
4253 		poap->attributes = FALSE;
4254 }
4255 
4256 void
4257 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4258 {
4259 
4260 	/* don't return attrs if time overflow */
4261 	if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4262 		poap->attributes = TRUE;
4263 	} else
4264 		poap->attributes = FALSE;
4265 }
4266 
4267 static void
4268 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4269 {
4270 
4271 	vattr_to_pre_op_attr(bvap, &wccp->before);
4272 	vattr_to_post_op_attr(avap, &wccp->after);
4273 }
4274 
4275 void
4276 rfs3_srvrinit(void)
4277 {
4278 	struct rfs3_verf_overlay {
4279 		uint_t id; /* a "unique" identifier */
4280 		int ts; /* a unique timestamp */
4281 	} *verfp;
4282 	timestruc_t now;
4283 
4284 	/*
4285 	 * The following algorithm attempts to find a unique verifier
4286 	 * to be used as the write verifier returned from the server
4287 	 * to the client.  It is important that this verifier change
4288 	 * whenever the server reboots.  Of secondary importance, it
4289 	 * is important for the verifier to be unique between two
4290 	 * different servers.
4291 	 *
4292 	 * Thus, an attempt is made to use the system hostid and the
4293 	 * current time in seconds when the nfssrv kernel module is
4294 	 * loaded.  It is assumed that an NFS server will not be able
4295 	 * to boot and then to reboot in less than a second.  If the
4296 	 * hostid has not been set, then the current high resolution
4297 	 * time is used.  This will ensure different verifiers each
4298 	 * time the server reboots and minimize the chances that two
4299 	 * different servers will have the same verifier.
4300 	 */
4301 
4302 #ifndef	lint
4303 	/*
4304 	 * We ASSERT that this constant logic expression is
4305 	 * always true because in the past, it wasn't.
4306 	 */
4307 	ASSERT(sizeof (*verfp) <= sizeof (write3verf));
4308 #endif
4309 
4310 	gethrestime(&now);
4311 	verfp = (struct rfs3_verf_overlay *)&write3verf;
4312 	verfp->ts = (int)now.tv_sec;
4313 	verfp->id = zone_get_hostid(NULL);
4314 
4315 	if (verfp->id == 0)
4316 		verfp->id = (uint_t)now.tv_nsec;
4317 
4318 	nfs3_srv_caller_id = fs_new_caller_id();
4319 
4320 }
4321 
4322 static int
4323 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4324 {
4325 	struct clist	*wcl;
4326 	int		wlist_len;
4327 	count3		count = rok->count;
4328 
4329 	wcl = args->wlist;
4330 	if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
4331 		return (FALSE);
4332 	}
4333 
4334 	wcl = args->wlist;
4335 	rok->wlist_len = wlist_len;
4336 	rok->wlist = wcl;
4337 	return (TRUE);
4338 }
4339 
4340 void
4341 rfs3_srvrfini(void)
4342 {
4343 	/* Nothing to do */
4344 }
4345