xref: /illumos-gate/usr/src/uts/common/fs/nfs/nfs3_srv.c (revision a1cdd5a67f3bf3e60db3f3a77baef63640ad91a4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
24  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
25  * Copyright (c) 2013 by Delphix. All rights reserved.
26  */
27 
28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
30 
31 #include <sys/param.h>
32 #include <sys/types.h>
33 #include <sys/systm.h>
34 #include <sys/cred.h>
35 #include <sys/buf.h>
36 #include <sys/vfs.h>
37 #include <sys/vnode.h>
38 #include <sys/uio.h>
39 #include <sys/errno.h>
40 #include <sys/sysmacros.h>
41 #include <sys/statvfs.h>
42 #include <sys/kmem.h>
43 #include <sys/dirent.h>
44 #include <sys/cmn_err.h>
45 #include <sys/debug.h>
46 #include <sys/systeminfo.h>
47 #include <sys/flock.h>
48 #include <sys/nbmlock.h>
49 #include <sys/policy.h>
50 #include <sys/sdt.h>
51 
52 #include <rpc/types.h>
53 #include <rpc/auth.h>
54 #include <rpc/svc.h>
55 #include <rpc/rpc_rdma.h>
56 
57 #include <nfs/nfs.h>
58 #include <nfs/export.h>
59 #include <nfs/nfs_cmd.h>
60 
61 #include <sys/strsubr.h>
62 #include <sys/tsol/label.h>
63 #include <sys/tsol/tndb.h>
64 
65 #include <sys/zone.h>
66 
67 #include <inet/ip.h>
68 #include <inet/ip6.h>
69 
70 /*
71  * These are the interface routines for the server side of the
72  * Network File System.  See the NFS version 3 protocol specification
73  * for a description of this interface.
74  */
75 
76 static writeverf3 write3verf;
77 
78 static int	sattr3_to_vattr(sattr3 *, struct vattr *);
79 static int	vattr_to_fattr3(struct vattr *, fattr3 *);
80 static int	vattr_to_wcc_attr(struct vattr *, wcc_attr *);
81 static void	vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
82 static void	vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
83 static int	rdma_setup_read_data3(READ3args *, READ3resok *);
84 
85 extern int nfs_loaned_buffers;
86 
87 u_longlong_t nfs3_srv_caller_id;
88 
89 /* ARGSUSED */
90 void
91 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
92     struct svc_req *req, cred_t *cr, bool_t ro)
93 {
94 	int error;
95 	vnode_t *vp;
96 	struct vattr va;
97 
98 	vp = nfs3_fhtovp(&args->object, exi);
99 
100 	DTRACE_NFSV3_4(op__getattr__start, struct svc_req *, req,
101 	    cred_t *, cr, vnode_t *, vp, GETATTR3args *, args);
102 
103 	if (vp == NULL) {
104 		error = ESTALE;
105 		goto out;
106 	}
107 
108 	va.va_mask = AT_ALL;
109 	error = rfs4_delegated_getattr(vp, &va, 0, cr);
110 
111 	if (!error) {
112 		/* Lie about the object type for a referral */
113 		if (vn_is_nfs_reparse(vp, cr))
114 			va.va_type = VLNK;
115 
116 		/* overflow error if time or size is out of range */
117 		error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
118 		if (error)
119 			goto out;
120 		resp->status = NFS3_OK;
121 
122 		DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
123 		    cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
124 
125 		VN_RELE(vp);
126 
127 		return;
128 	}
129 
130 out:
131 	if (curthread->t_flag & T_WOULDBLOCK) {
132 		curthread->t_flag &= ~T_WOULDBLOCK;
133 		resp->status = NFS3ERR_JUKEBOX;
134 	} else
135 		resp->status = puterrno3(error);
136 
137 	DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
138 	    cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
139 
140 	if (vp != NULL)
141 		VN_RELE(vp);
142 }
143 
144 void *
145 rfs3_getattr_getfh(GETATTR3args *args)
146 {
147 
148 	return (&args->object);
149 }
150 
151 void
152 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
153     struct svc_req *req, cred_t *cr, bool_t ro)
154 {
155 	int error;
156 	vnode_t *vp;
157 	struct vattr *bvap;
158 	struct vattr bva;
159 	struct vattr *avap;
160 	struct vattr ava;
161 	int flag;
162 	int in_crit = 0;
163 	struct flock64 bf;
164 	caller_context_t ct;
165 
166 	bvap = NULL;
167 	avap = NULL;
168 
169 	vp = nfs3_fhtovp(&args->object, exi);
170 
171 	DTRACE_NFSV3_4(op__setattr__start, struct svc_req *, req,
172 	    cred_t *, cr, vnode_t *, vp, SETATTR3args *, args);
173 
174 	if (vp == NULL) {
175 		error = ESTALE;
176 		goto out;
177 	}
178 
179 	error = sattr3_to_vattr(&args->new_attributes, &ava);
180 	if (error)
181 		goto out;
182 
183 	if (is_system_labeled()) {
184 		bslabel_t *clabel = req->rq_label;
185 
186 		ASSERT(clabel != NULL);
187 		DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
188 		    "got client label from request(1)", struct svc_req *, req);
189 
190 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
191 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
192 			    exi)) {
193 				resp->status = NFS3ERR_ACCES;
194 				goto out1;
195 			}
196 		}
197 	}
198 
199 	/*
200 	 * We need to specially handle size changes because of
201 	 * possible conflicting NBMAND locks. Get into critical
202 	 * region before VOP_GETATTR, so the size attribute is
203 	 * valid when checking conflicts.
204 	 *
205 	 * Also, check to see if the v4 side of the server has
206 	 * delegated this file.  If so, then we return JUKEBOX to
207 	 * allow the client to retrasmit its request.
208 	 */
209 	if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
210 		if (nbl_need_check(vp)) {
211 			nbl_start_crit(vp, RW_READER);
212 			in_crit = 1;
213 		}
214 	}
215 
216 	bva.va_mask = AT_ALL;
217 	error = rfs4_delegated_getattr(vp, &bva, 0, cr);
218 
219 	/*
220 	 * If we can't get the attributes, then we can't do the
221 	 * right access checking.  So, we'll fail the request.
222 	 */
223 	if (error)
224 		goto out;
225 
226 	bvap = &bva;
227 
228 	if (rdonly(ro, vp)) {
229 		resp->status = NFS3ERR_ROFS;
230 		goto out1;
231 	}
232 
233 	if (args->guard.check &&
234 	    (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
235 	    args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
236 		resp->status = NFS3ERR_NOT_SYNC;
237 		goto out1;
238 	}
239 
240 	if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
241 		flag = ATTR_UTIME;
242 	else
243 		flag = 0;
244 
245 	/*
246 	 * If the filesystem is exported with nosuid, then mask off
247 	 * the setuid and setgid bits.
248 	 */
249 	if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
250 	    (exi->exi_export.ex_flags & EX_NOSUID))
251 		ava.va_mode &= ~(VSUID | VSGID);
252 
253 	ct.cc_sysid = 0;
254 	ct.cc_pid = 0;
255 	ct.cc_caller_id = nfs3_srv_caller_id;
256 	ct.cc_flags = CC_DONTBLOCK;
257 
258 	/*
259 	 * We need to specially handle size changes because it is
260 	 * possible for the client to create a file with modes
261 	 * which indicate read-only, but with the file opened for
262 	 * writing.  If the client then tries to set the size of
263 	 * the file, then the normal access checking done in
264 	 * VOP_SETATTR would prevent the client from doing so,
265 	 * although it should be legal for it to do so.  To get
266 	 * around this, we do the access checking for ourselves
267 	 * and then use VOP_SPACE which doesn't do the access
268 	 * checking which VOP_SETATTR does. VOP_SPACE can only
269 	 * operate on VREG files, let VOP_SETATTR handle the other
270 	 * extremely rare cases.
271 	 * Also the client should not be allowed to change the
272 	 * size of the file if there is a conflicting non-blocking
273 	 * mandatory lock in the region the change.
274 	 */
275 	if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
276 		if (in_crit) {
277 			u_offset_t offset;
278 			ssize_t length;
279 
280 			if (ava.va_size < bva.va_size) {
281 				offset = ava.va_size;
282 				length = bva.va_size - ava.va_size;
283 			} else {
284 				offset = bva.va_size;
285 				length = ava.va_size - bva.va_size;
286 			}
287 			if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
288 			    NULL)) {
289 				error = EACCES;
290 				goto out;
291 			}
292 		}
293 
294 		if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
295 			ava.va_mask &= ~AT_SIZE;
296 			bf.l_type = F_WRLCK;
297 			bf.l_whence = 0;
298 			bf.l_start = (off64_t)ava.va_size;
299 			bf.l_len = 0;
300 			bf.l_sysid = 0;
301 			bf.l_pid = 0;
302 			error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
303 			    (offset_t)ava.va_size, cr, &ct);
304 		}
305 	}
306 
307 	if (!error && ava.va_mask)
308 		error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
309 
310 	/* check if a monitor detected a delegation conflict */
311 	if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
312 		resp->status = NFS3ERR_JUKEBOX;
313 		goto out1;
314 	}
315 
316 	ava.va_mask = AT_ALL;
317 	avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
318 
319 	/*
320 	 * Force modified metadata out to stable storage.
321 	 */
322 	(void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
323 
324 	if (error)
325 		goto out;
326 
327 	if (in_crit)
328 		nbl_end_crit(vp);
329 
330 	resp->status = NFS3_OK;
331 	vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
332 
333 	DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
334 	    cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
335 
336 	VN_RELE(vp);
337 
338 	return;
339 
340 out:
341 	if (curthread->t_flag & T_WOULDBLOCK) {
342 		curthread->t_flag &= ~T_WOULDBLOCK;
343 		resp->status = NFS3ERR_JUKEBOX;
344 	} else
345 		resp->status = puterrno3(error);
346 out1:
347 	DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
348 	    cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
349 
350 	if (vp != NULL) {
351 		if (in_crit)
352 			nbl_end_crit(vp);
353 		VN_RELE(vp);
354 	}
355 	vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
356 }
357 
358 void *
359 rfs3_setattr_getfh(SETATTR3args *args)
360 {
361 
362 	return (&args->object);
363 }
364 
365 /* ARGSUSED */
366 void
367 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
368     struct svc_req *req, cred_t *cr, bool_t ro)
369 {
370 	int error;
371 	vnode_t *vp;
372 	vnode_t *dvp;
373 	struct vattr *vap;
374 	struct vattr va;
375 	struct vattr *dvap;
376 	struct vattr dva;
377 	nfs_fh3 *fhp;
378 	struct sec_ol sec = {0, 0};
379 	bool_t publicfh_flag = FALSE, auth_weak = FALSE;
380 	struct sockaddr *ca;
381 	char *name = NULL;
382 
383 	dvap = NULL;
384 
385 	if (exi != NULL)
386 		exi_hold(exi);
387 
388 	/*
389 	 * Allow lookups from the root - the default
390 	 * location of the public filehandle.
391 	 */
392 	if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
393 		dvp = rootdir;
394 		VN_HOLD(dvp);
395 
396 		DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
397 		    cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
398 	} else {
399 		dvp = nfs3_fhtovp(&args->what.dir, exi);
400 
401 		DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
402 		    cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
403 
404 		if (dvp == NULL) {
405 			error = ESTALE;
406 			goto out;
407 		}
408 	}
409 
410 	dva.va_mask = AT_ALL;
411 	dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
412 
413 	if (args->what.name == nfs3nametoolong) {
414 		resp->status = NFS3ERR_NAMETOOLONG;
415 		goto out1;
416 	}
417 
418 	if (args->what.name == NULL || *(args->what.name) == '\0') {
419 		resp->status = NFS3ERR_ACCES;
420 		goto out1;
421 	}
422 
423 	fhp = &args->what.dir;
424 	if (strcmp(args->what.name, "..") == 0 &&
425 	    EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
426 		if ((exi->exi_export.ex_flags & EX_NOHIDE) &&
427 		    (dvp->v_flag & VROOT)) {
428 			/*
429 			 * special case for ".." and 'nohide'exported root
430 			 */
431 			if (rfs_climb_crossmnt(&dvp, &exi, cr) != 0) {
432 				resp->status = NFS3ERR_ACCES;
433 				goto out1;
434 			}
435 		} else {
436 			resp->status = NFS3ERR_NOENT;
437 			goto out1;
438 		}
439 	}
440 
441 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
442 	name = nfscmd_convname(ca, exi, args->what.name,
443 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
444 
445 	if (name == NULL) {
446 		resp->status = NFS3ERR_ACCES;
447 		goto out1;
448 	}
449 
450 	/*
451 	 * If the public filehandle is used then allow
452 	 * a multi-component lookup
453 	 */
454 	if (PUBLIC_FH3(&args->what.dir)) {
455 		publicfh_flag = TRUE;
456 
457 		exi_rele(exi);
458 
459 		error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
460 		    &exi, &sec);
461 
462 		/*
463 		 * Since WebNFS may bypass MOUNT, we need to ensure this
464 		 * request didn't come from an unlabeled admin_low client.
465 		 */
466 		if (is_system_labeled() && error == 0) {
467 			int		addr_type;
468 			void		*ipaddr;
469 			tsol_tpc_t	*tp;
470 
471 			if (ca->sa_family == AF_INET) {
472 				addr_type = IPV4_VERSION;
473 				ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
474 			} else if (ca->sa_family == AF_INET6) {
475 				addr_type = IPV6_VERSION;
476 				ipaddr = &((struct sockaddr_in6 *)
477 				    ca)->sin6_addr;
478 			}
479 			tp = find_tpc(ipaddr, addr_type, B_FALSE);
480 			if (tp == NULL || tp->tpc_tp.tp_doi !=
481 			    l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
482 			    SUN_CIPSO) {
483 				VN_RELE(vp);
484 				error = EACCES;
485 			}
486 			if (tp != NULL)
487 				TPC_RELE(tp);
488 		}
489 	} else {
490 		error = VOP_LOOKUP(dvp, name, &vp,
491 		    NULL, 0, NULL, cr, NULL, NULL, NULL);
492 	}
493 
494 	if (name != args->what.name)
495 		kmem_free(name, MAXPATHLEN + 1);
496 
497 	if (error == 0 && vn_ismntpt(vp)) {
498 		error = rfs_cross_mnt(&vp, &exi);
499 		if (error)
500 			VN_RELE(vp);
501 	}
502 
503 	if (is_system_labeled() && error == 0) {
504 		bslabel_t *clabel = req->rq_label;
505 
506 		ASSERT(clabel != NULL);
507 		DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
508 		    "got client label from request(1)", struct svc_req *, req);
509 
510 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
511 			if (!do_rfs_label_check(clabel, dvp,
512 			    DOMINANCE_CHECK, exi)) {
513 				VN_RELE(vp);
514 				error = EACCES;
515 			}
516 		}
517 	}
518 
519 	dva.va_mask = AT_ALL;
520 	dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
521 
522 	if (error)
523 		goto out;
524 
525 	if (sec.sec_flags & SEC_QUERY) {
526 		error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
527 	} else {
528 		error = makefh3(&resp->resok.object, vp, exi);
529 		if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
530 			auth_weak = TRUE;
531 	}
532 
533 	if (error) {
534 		VN_RELE(vp);
535 		goto out;
536 	}
537 
538 	va.va_mask = AT_ALL;
539 	vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
540 
541 	exi_rele(exi);
542 	VN_RELE(vp);
543 
544 	resp->status = NFS3_OK;
545 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
546 	vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
547 
548 	/*
549 	 * If it's public fh, no 0x81, and client's flavor is
550 	 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
551 	 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
552 	 */
553 	if (auth_weak)
554 		resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
555 
556 	DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
557 	    cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
558 	VN_RELE(dvp);
559 
560 	return;
561 
562 out:
563 	if (curthread->t_flag & T_WOULDBLOCK) {
564 		curthread->t_flag &= ~T_WOULDBLOCK;
565 		resp->status = NFS3ERR_JUKEBOX;
566 	} else
567 		resp->status = puterrno3(error);
568 out1:
569 	if (exi != NULL)
570 		exi_rele(exi);
571 
572 	DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
573 	    cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
574 
575 	if (dvp != NULL)
576 		VN_RELE(dvp);
577 	vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
578 
579 }
580 
581 void *
582 rfs3_lookup_getfh(LOOKUP3args *args)
583 {
584 
585 	return (&args->what.dir);
586 }
587 
588 /* ARGSUSED */
589 void
590 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
591     struct svc_req *req, cred_t *cr, bool_t ro)
592 {
593 	int error;
594 	vnode_t *vp;
595 	struct vattr *vap;
596 	struct vattr va;
597 	int checkwriteperm;
598 	boolean_t dominant_label = B_FALSE;
599 	boolean_t equal_label = B_FALSE;
600 	boolean_t admin_low_client;
601 
602 	vap = NULL;
603 
604 	vp = nfs3_fhtovp(&args->object, exi);
605 
606 	DTRACE_NFSV3_4(op__access__start, struct svc_req *, req,
607 	    cred_t *, cr, vnode_t *, vp, ACCESS3args *, args);
608 
609 	if (vp == NULL) {
610 		error = ESTALE;
611 		goto out;
612 	}
613 
614 	/*
615 	 * If the file system is exported read only, it is not appropriate
616 	 * to check write permissions for regular files and directories.
617 	 * Special files are interpreted by the client, so the underlying
618 	 * permissions are sent back to the client for interpretation.
619 	 */
620 	if (rdonly(ro, vp) && (vp->v_type == VREG || vp->v_type == VDIR))
621 		checkwriteperm = 0;
622 	else
623 		checkwriteperm = 1;
624 
625 	/*
626 	 * We need the mode so that we can correctly determine access
627 	 * permissions relative to a mandatory lock file.  Access to
628 	 * mandatory lock files is denied on the server, so it might
629 	 * as well be reflected to the server during the open.
630 	 */
631 	va.va_mask = AT_MODE;
632 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
633 	if (error)
634 		goto out;
635 
636 	vap = &va;
637 
638 	resp->resok.access = 0;
639 
640 	if (is_system_labeled()) {
641 		bslabel_t *clabel = req->rq_label;
642 
643 		ASSERT(clabel != NULL);
644 		DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
645 		    "got client label from request(1)", struct svc_req *, req);
646 
647 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
648 			if ((equal_label = do_rfs_label_check(clabel, vp,
649 			    EQUALITY_CHECK, exi)) == B_FALSE) {
650 				dominant_label = do_rfs_label_check(clabel,
651 				    vp, DOMINANCE_CHECK, exi);
652 			} else
653 				dominant_label = B_TRUE;
654 			admin_low_client = B_FALSE;
655 		} else
656 			admin_low_client = B_TRUE;
657 	}
658 
659 	if (args->access & ACCESS3_READ) {
660 		error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
661 		if (error) {
662 			if (curthread->t_flag & T_WOULDBLOCK)
663 				goto out;
664 		} else if (!MANDLOCK(vp, va.va_mode) &&
665 		    (!is_system_labeled() || admin_low_client ||
666 		    dominant_label))
667 			resp->resok.access |= ACCESS3_READ;
668 	}
669 	if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
670 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
671 		if (error) {
672 			if (curthread->t_flag & T_WOULDBLOCK)
673 				goto out;
674 		} else if (!is_system_labeled() || admin_low_client ||
675 		    dominant_label)
676 			resp->resok.access |= ACCESS3_LOOKUP;
677 	}
678 	if (checkwriteperm &&
679 	    (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
680 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
681 		if (error) {
682 			if (curthread->t_flag & T_WOULDBLOCK)
683 				goto out;
684 		} else if (!MANDLOCK(vp, va.va_mode) &&
685 		    (!is_system_labeled() || admin_low_client || equal_label)) {
686 			resp->resok.access |=
687 			    (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
688 		}
689 	}
690 	if (checkwriteperm &&
691 	    (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
692 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
693 		if (error) {
694 			if (curthread->t_flag & T_WOULDBLOCK)
695 				goto out;
696 		} else if (!is_system_labeled() || admin_low_client ||
697 		    equal_label)
698 			resp->resok.access |= ACCESS3_DELETE;
699 	}
700 	if (args->access & ACCESS3_EXECUTE) {
701 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
702 		if (error) {
703 			if (curthread->t_flag & T_WOULDBLOCK)
704 				goto out;
705 		} else if (!MANDLOCK(vp, va.va_mode) &&
706 		    (!is_system_labeled() || admin_low_client ||
707 		    dominant_label))
708 			resp->resok.access |= ACCESS3_EXECUTE;
709 	}
710 
711 	va.va_mask = AT_ALL;
712 	vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
713 
714 	resp->status = NFS3_OK;
715 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
716 
717 	DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
718 	    cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
719 
720 	VN_RELE(vp);
721 
722 	return;
723 
724 out:
725 	if (curthread->t_flag & T_WOULDBLOCK) {
726 		curthread->t_flag &= ~T_WOULDBLOCK;
727 		resp->status = NFS3ERR_JUKEBOX;
728 	} else
729 		resp->status = puterrno3(error);
730 	DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
731 	    cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
732 	if (vp != NULL)
733 		VN_RELE(vp);
734 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
735 }
736 
737 void *
738 rfs3_access_getfh(ACCESS3args *args)
739 {
740 
741 	return (&args->object);
742 }
743 
744 /* ARGSUSED */
745 void
746 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
747     struct svc_req *req, cred_t *cr, bool_t ro)
748 {
749 	int error;
750 	vnode_t *vp;
751 	struct vattr *vap;
752 	struct vattr va;
753 	struct iovec iov;
754 	struct uio uio;
755 	char *data;
756 	struct sockaddr *ca;
757 	char *name = NULL;
758 	int is_referral = 0;
759 
760 	vap = NULL;
761 
762 	vp = nfs3_fhtovp(&args->symlink, exi);
763 
764 	DTRACE_NFSV3_4(op__readlink__start, struct svc_req *, req,
765 	    cred_t *, cr, vnode_t *, vp, READLINK3args *, args);
766 
767 	if (vp == NULL) {
768 		error = ESTALE;
769 		goto out;
770 	}
771 
772 	va.va_mask = AT_ALL;
773 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
774 	if (error)
775 		goto out;
776 
777 	vap = &va;
778 
779 	/* We lied about the object type for a referral */
780 	if (vn_is_nfs_reparse(vp, cr))
781 		is_referral = 1;
782 
783 	if (vp->v_type != VLNK && !is_referral) {
784 		resp->status = NFS3ERR_INVAL;
785 		goto out1;
786 	}
787 
788 	if (MANDLOCK(vp, va.va_mode)) {
789 		resp->status = NFS3ERR_ACCES;
790 		goto out1;
791 	}
792 
793 	if (is_system_labeled()) {
794 		bslabel_t *clabel = req->rq_label;
795 
796 		ASSERT(clabel != NULL);
797 		DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
798 		    "got client label from request(1)", struct svc_req *, req);
799 
800 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
801 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
802 			    exi)) {
803 				resp->status = NFS3ERR_ACCES;
804 				goto out1;
805 			}
806 		}
807 	}
808 
809 	data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
810 
811 	if (is_referral) {
812 		char *s;
813 		size_t strsz;
814 
815 		/* Get an artificial symlink based on a referral */
816 		s = build_symlink(vp, cr, &strsz);
817 		global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
818 		DTRACE_PROBE2(nfs3serv__func__referral__reflink,
819 		    vnode_t *, vp, char *, s);
820 		if (s == NULL)
821 			error = EINVAL;
822 		else {
823 			error = 0;
824 			(void) strlcpy(data, s, MAXPATHLEN + 1);
825 			kmem_free(s, strsz);
826 		}
827 
828 	} else {
829 
830 		iov.iov_base = data;
831 		iov.iov_len = MAXPATHLEN;
832 		uio.uio_iov = &iov;
833 		uio.uio_iovcnt = 1;
834 		uio.uio_segflg = UIO_SYSSPACE;
835 		uio.uio_extflg = UIO_COPY_CACHED;
836 		uio.uio_loffset = 0;
837 		uio.uio_resid = MAXPATHLEN;
838 
839 		error = VOP_READLINK(vp, &uio, cr, NULL);
840 
841 		if (!error)
842 			*(data + MAXPATHLEN - uio.uio_resid) = '\0';
843 	}
844 
845 	va.va_mask = AT_ALL;
846 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
847 
848 	/* Lie about object type again just to be consistent */
849 	if (is_referral && vap != NULL)
850 		vap->va_type = VLNK;
851 
852 #if 0 /* notyet */
853 	/*
854 	 * Don't do this.  It causes local disk writes when just
855 	 * reading the file and the overhead is deemed larger
856 	 * than the benefit.
857 	 */
858 	/*
859 	 * Force modified metadata out to stable storage.
860 	 */
861 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
862 #endif
863 
864 	if (error) {
865 		kmem_free(data, MAXPATHLEN + 1);
866 		goto out;
867 	}
868 
869 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
870 	name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
871 	    MAXPATHLEN + 1);
872 
873 	if (name == NULL) {
874 		/*
875 		 * Even though the conversion failed, we return
876 		 * something. We just don't translate it.
877 		 */
878 		name = data;
879 	}
880 
881 	resp->status = NFS3_OK;
882 	vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
883 	resp->resok.data = name;
884 
885 	DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
886 	    cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
887 	VN_RELE(vp);
888 
889 	if (name != data)
890 		kmem_free(data, MAXPATHLEN + 1);
891 
892 	return;
893 
894 out:
895 	if (curthread->t_flag & T_WOULDBLOCK) {
896 		curthread->t_flag &= ~T_WOULDBLOCK;
897 		resp->status = NFS3ERR_JUKEBOX;
898 	} else
899 		resp->status = puterrno3(error);
900 out1:
901 	DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
902 	    cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
903 	if (vp != NULL)
904 		VN_RELE(vp);
905 	vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
906 }
907 
908 void *
909 rfs3_readlink_getfh(READLINK3args *args)
910 {
911 
912 	return (&args->symlink);
913 }
914 
915 void
916 rfs3_readlink_free(READLINK3res *resp)
917 {
918 
919 	if (resp->status == NFS3_OK)
920 		kmem_free(resp->resok.data, MAXPATHLEN + 1);
921 }
922 
923 /*
924  * Server routine to handle read
925  * May handle RDMA data as well as mblks
926  */
927 /* ARGSUSED */
928 void
929 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
930     struct svc_req *req, cred_t *cr, bool_t ro)
931 {
932 	int error;
933 	vnode_t *vp;
934 	struct vattr *vap;
935 	struct vattr va;
936 	struct iovec iov, *iovp = NULL;
937 	int iovcnt;
938 	struct uio uio;
939 	u_offset_t offset;
940 	mblk_t *mp = NULL;
941 	int in_crit = 0;
942 	int need_rwunlock = 0;
943 	caller_context_t ct;
944 	int rdma_used = 0;
945 	int loaned_buffers;
946 	struct uio *uiop;
947 
948 	vap = NULL;
949 
950 	vp = nfs3_fhtovp(&args->file, exi);
951 
952 	DTRACE_NFSV3_4(op__read__start, struct svc_req *, req,
953 	    cred_t *, cr, vnode_t *, vp, READ3args *, args);
954 
955 	if (vp == NULL) {
956 		error = ESTALE;
957 		goto out;
958 	}
959 
960 	if (args->wlist) {
961 		if (args->count > clist_len(args->wlist)) {
962 			error = EINVAL;
963 			goto out;
964 		}
965 		rdma_used = 1;
966 	}
967 
968 	/* use loaned buffers for TCP */
969 	loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
970 
971 	if (is_system_labeled()) {
972 		bslabel_t *clabel = req->rq_label;
973 
974 		ASSERT(clabel != NULL);
975 		DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
976 		    "got client label from request(1)", struct svc_req *, req);
977 
978 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
979 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
980 			    exi)) {
981 				resp->status = NFS3ERR_ACCES;
982 				goto out1;
983 			}
984 		}
985 	}
986 
987 	ct.cc_sysid = 0;
988 	ct.cc_pid = 0;
989 	ct.cc_caller_id = nfs3_srv_caller_id;
990 	ct.cc_flags = CC_DONTBLOCK;
991 
992 	/*
993 	 * Enter the critical region before calling VOP_RWLOCK
994 	 * to avoid a deadlock with write requests.
995 	 */
996 	if (nbl_need_check(vp)) {
997 		nbl_start_crit(vp, RW_READER);
998 		in_crit = 1;
999 		if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
1000 		    NULL)) {
1001 			error = EACCES;
1002 			goto out;
1003 		}
1004 	}
1005 
1006 	error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
1007 
1008 	/* check if a monitor detected a delegation conflict */
1009 	if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1010 		resp->status = NFS3ERR_JUKEBOX;
1011 		goto out1;
1012 	}
1013 
1014 	need_rwunlock = 1;
1015 
1016 	va.va_mask = AT_ALL;
1017 	error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1018 
1019 	/*
1020 	 * If we can't get the attributes, then we can't do the
1021 	 * right access checking.  So, we'll fail the request.
1022 	 */
1023 	if (error)
1024 		goto out;
1025 
1026 	vap = &va;
1027 
1028 	if (vp->v_type != VREG) {
1029 		resp->status = NFS3ERR_INVAL;
1030 		goto out1;
1031 	}
1032 
1033 	if (crgetuid(cr) != va.va_uid) {
1034 		error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1035 		if (error) {
1036 			if (curthread->t_flag & T_WOULDBLOCK)
1037 				goto out;
1038 			error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1039 			if (error)
1040 				goto out;
1041 		}
1042 	}
1043 
1044 	if (MANDLOCK(vp, va.va_mode)) {
1045 		resp->status = NFS3ERR_ACCES;
1046 		goto out1;
1047 	}
1048 
1049 	offset = args->offset;
1050 	if (offset >= va.va_size) {
1051 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1052 		if (in_crit)
1053 			nbl_end_crit(vp);
1054 		resp->status = NFS3_OK;
1055 		vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1056 		resp->resok.count = 0;
1057 		resp->resok.eof = TRUE;
1058 		resp->resok.data.data_len = 0;
1059 		resp->resok.data.data_val = NULL;
1060 		resp->resok.data.mp = NULL;
1061 		/* RDMA */
1062 		resp->resok.wlist = args->wlist;
1063 		resp->resok.wlist_len = resp->resok.count;
1064 		if (resp->resok.wlist)
1065 			clist_zero_len(resp->resok.wlist);
1066 		goto done;
1067 	}
1068 
1069 	if (args->count == 0) {
1070 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1071 		if (in_crit)
1072 			nbl_end_crit(vp);
1073 		resp->status = NFS3_OK;
1074 		vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1075 		resp->resok.count = 0;
1076 		resp->resok.eof = FALSE;
1077 		resp->resok.data.data_len = 0;
1078 		resp->resok.data.data_val = NULL;
1079 		resp->resok.data.mp = NULL;
1080 		/* RDMA */
1081 		resp->resok.wlist = args->wlist;
1082 		resp->resok.wlist_len = resp->resok.count;
1083 		if (resp->resok.wlist)
1084 			clist_zero_len(resp->resok.wlist);
1085 		goto done;
1086 	}
1087 
1088 	/*
1089 	 * do not allocate memory more the max. allowed
1090 	 * transfer size
1091 	 */
1092 	if (args->count > rfs3_tsize(req))
1093 		args->count = rfs3_tsize(req);
1094 
1095 	if (loaned_buffers) {
1096 		uiop = (uio_t *)rfs_setup_xuio(vp);
1097 		ASSERT(uiop != NULL);
1098 		uiop->uio_segflg = UIO_SYSSPACE;
1099 		uiop->uio_loffset = args->offset;
1100 		uiop->uio_resid = args->count;
1101 
1102 		/* Jump to do the read if successful */
1103 		if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1104 			/*
1105 			 * Need to hold the vnode until after VOP_RETZCBUF()
1106 			 * is called.
1107 			 */
1108 			VN_HOLD(vp);
1109 			goto doio_read;
1110 		}
1111 
1112 		DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1113 		    uiop->uio_loffset, int, uiop->uio_resid);
1114 
1115 		uiop->uio_extflg = 0;
1116 		/* failure to setup for zero copy */
1117 		rfs_free_xuio((void *)uiop);
1118 		loaned_buffers = 0;
1119 	}
1120 
1121 	/*
1122 	 * If returning data via RDMA Write, then grab the chunk list.
1123 	 * If we aren't returning READ data w/RDMA_WRITE, then grab
1124 	 * a mblk.
1125 	 */
1126 	if (rdma_used) {
1127 		(void) rdma_get_wchunk(req, &iov, args->wlist);
1128 		uio.uio_iov = &iov;
1129 		uio.uio_iovcnt = 1;
1130 	} else {
1131 		/*
1132 		 * mp will contain the data to be sent out in the read reply.
1133 		 * For UDP, this will be freed after the reply has been sent
1134 		 * out by the driver.  For TCP, it will be freed after the last
1135 		 * segment associated with the reply has been ACKed by the
1136 		 * client.
1137 		 */
1138 		mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
1139 		uio.uio_iov = iovp;
1140 		uio.uio_iovcnt = iovcnt;
1141 	}
1142 
1143 	uio.uio_segflg = UIO_SYSSPACE;
1144 	uio.uio_extflg = UIO_COPY_CACHED;
1145 	uio.uio_loffset = args->offset;
1146 	uio.uio_resid = args->count;
1147 	uiop = &uio;
1148 
1149 doio_read:
1150 	error = VOP_READ(vp, uiop, 0, cr, &ct);
1151 
1152 	if (error) {
1153 		if (mp)
1154 			freemsg(mp);
1155 		/* check if a monitor detected a delegation conflict */
1156 		if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1157 			resp->status = NFS3ERR_JUKEBOX;
1158 			goto out1;
1159 		}
1160 		goto out;
1161 	}
1162 
1163 	/* make mblk using zc buffers */
1164 	if (loaned_buffers) {
1165 		mp = uio_to_mblk(uiop);
1166 		ASSERT(mp != NULL);
1167 	}
1168 
1169 	va.va_mask = AT_ALL;
1170 	error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1171 
1172 	if (error)
1173 		vap = NULL;
1174 	else
1175 		vap = &va;
1176 
1177 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1178 
1179 	if (in_crit)
1180 		nbl_end_crit(vp);
1181 
1182 	resp->status = NFS3_OK;
1183 	vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1184 	resp->resok.count = args->count - uiop->uio_resid;
1185 	if (!error && offset + resp->resok.count == va.va_size)
1186 		resp->resok.eof = TRUE;
1187 	else
1188 		resp->resok.eof = FALSE;
1189 	resp->resok.data.data_len = resp->resok.count;
1190 
1191 	if (mp)
1192 		rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1193 
1194 	resp->resok.data.mp = mp;
1195 	resp->resok.size = (uint_t)args->count;
1196 
1197 	if (rdma_used) {
1198 		resp->resok.data.data_val = (caddr_t)iov.iov_base;
1199 		if (!rdma_setup_read_data3(args, &(resp->resok))) {
1200 			resp->status = NFS3ERR_INVAL;
1201 		}
1202 	} else {
1203 		resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1204 		(resp->resok).wlist = NULL;
1205 	}
1206 
1207 done:
1208 	DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1209 	    cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1210 
1211 	VN_RELE(vp);
1212 
1213 	if (iovp != NULL)
1214 		kmem_free(iovp, iovcnt * sizeof (struct iovec));
1215 
1216 	return;
1217 
1218 out:
1219 	if (curthread->t_flag & T_WOULDBLOCK) {
1220 		curthread->t_flag &= ~T_WOULDBLOCK;
1221 		resp->status = NFS3ERR_JUKEBOX;
1222 	} else
1223 		resp->status = puterrno3(error);
1224 out1:
1225 	DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1226 	    cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1227 
1228 	if (vp != NULL) {
1229 		if (need_rwunlock)
1230 			VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1231 		if (in_crit)
1232 			nbl_end_crit(vp);
1233 		VN_RELE(vp);
1234 	}
1235 	vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1236 
1237 	if (iovp != NULL)
1238 		kmem_free(iovp, iovcnt * sizeof (struct iovec));
1239 }
1240 
1241 void
1242 rfs3_read_free(READ3res *resp)
1243 {
1244 	mblk_t *mp;
1245 
1246 	if (resp->status == NFS3_OK) {
1247 		mp = resp->resok.data.mp;
1248 		if (mp != NULL)
1249 			freemsg(mp);
1250 	}
1251 }
1252 
1253 void *
1254 rfs3_read_getfh(READ3args *args)
1255 {
1256 
1257 	return (&args->file);
1258 }
1259 
1260 #define	MAX_IOVECS	12
1261 
1262 #ifdef DEBUG
1263 static int rfs3_write_hits = 0;
1264 static int rfs3_write_misses = 0;
1265 #endif
1266 
1267 void
1268 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1269     struct svc_req *req, cred_t *cr, bool_t ro)
1270 {
1271 	int error;
1272 	vnode_t *vp;
1273 	struct vattr *bvap = NULL;
1274 	struct vattr bva;
1275 	struct vattr *avap = NULL;
1276 	struct vattr ava;
1277 	u_offset_t rlimit;
1278 	struct uio uio;
1279 	struct iovec iov[MAX_IOVECS];
1280 	mblk_t *m;
1281 	struct iovec *iovp;
1282 	int iovcnt;
1283 	int ioflag;
1284 	cred_t *savecred;
1285 	int in_crit = 0;
1286 	int rwlock_ret = -1;
1287 	caller_context_t ct;
1288 
1289 	vp = nfs3_fhtovp(&args->file, exi);
1290 
1291 	DTRACE_NFSV3_4(op__write__start, struct svc_req *, req,
1292 	    cred_t *, cr, vnode_t *, vp, WRITE3args *, args);
1293 
1294 	if (vp == NULL) {
1295 		error = ESTALE;
1296 		goto err;
1297 	}
1298 
1299 	if (is_system_labeled()) {
1300 		bslabel_t *clabel = req->rq_label;
1301 
1302 		ASSERT(clabel != NULL);
1303 		DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1304 		    "got client label from request(1)", struct svc_req *, req);
1305 
1306 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1307 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1308 			    exi)) {
1309 				resp->status = NFS3ERR_ACCES;
1310 				goto err1;
1311 			}
1312 		}
1313 	}
1314 
1315 	ct.cc_sysid = 0;
1316 	ct.cc_pid = 0;
1317 	ct.cc_caller_id = nfs3_srv_caller_id;
1318 	ct.cc_flags = CC_DONTBLOCK;
1319 
1320 	/*
1321 	 * We have to enter the critical region before calling VOP_RWLOCK
1322 	 * to avoid a deadlock with ufs.
1323 	 */
1324 	if (nbl_need_check(vp)) {
1325 		nbl_start_crit(vp, RW_READER);
1326 		in_crit = 1;
1327 		if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1328 		    NULL)) {
1329 			error = EACCES;
1330 			goto err;
1331 		}
1332 	}
1333 
1334 	rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1335 
1336 	/* check if a monitor detected a delegation conflict */
1337 	if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1338 		resp->status = NFS3ERR_JUKEBOX;
1339 		rwlock_ret = -1;
1340 		goto err1;
1341 	}
1342 
1343 
1344 	bva.va_mask = AT_ALL;
1345 	error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1346 
1347 	/*
1348 	 * If we can't get the attributes, then we can't do the
1349 	 * right access checking.  So, we'll fail the request.
1350 	 */
1351 	if (error)
1352 		goto err;
1353 
1354 	bvap = &bva;
1355 	avap = bvap;
1356 
1357 	if (args->count != args->data.data_len) {
1358 		resp->status = NFS3ERR_INVAL;
1359 		goto err1;
1360 	}
1361 
1362 	if (rdonly(ro, vp)) {
1363 		resp->status = NFS3ERR_ROFS;
1364 		goto err1;
1365 	}
1366 
1367 	if (vp->v_type != VREG) {
1368 		resp->status = NFS3ERR_INVAL;
1369 		goto err1;
1370 	}
1371 
1372 	if (crgetuid(cr) != bva.va_uid &&
1373 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1374 		goto err;
1375 
1376 	if (MANDLOCK(vp, bva.va_mode)) {
1377 		resp->status = NFS3ERR_ACCES;
1378 		goto err1;
1379 	}
1380 
1381 	if (args->count == 0) {
1382 		resp->status = NFS3_OK;
1383 		vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1384 		resp->resok.count = 0;
1385 		resp->resok.committed = args->stable;
1386 		resp->resok.verf = write3verf;
1387 		goto out;
1388 	}
1389 
1390 	if (args->mblk != NULL) {
1391 		iovcnt = 0;
1392 		for (m = args->mblk; m != NULL; m = m->b_cont)
1393 			iovcnt++;
1394 		if (iovcnt <= MAX_IOVECS) {
1395 #ifdef DEBUG
1396 			rfs3_write_hits++;
1397 #endif
1398 			iovp = iov;
1399 		} else {
1400 #ifdef DEBUG
1401 			rfs3_write_misses++;
1402 #endif
1403 			iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1404 		}
1405 		mblk_to_iov(args->mblk, iovcnt, iovp);
1406 
1407 	} else if (args->rlist != NULL) {
1408 		iovcnt = 1;
1409 		iovp = iov;
1410 		iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1411 		iovp->iov_len = args->count;
1412 	} else {
1413 		iovcnt = 1;
1414 		iovp = iov;
1415 		iovp->iov_base = args->data.data_val;
1416 		iovp->iov_len = args->count;
1417 	}
1418 
1419 	uio.uio_iov = iovp;
1420 	uio.uio_iovcnt = iovcnt;
1421 
1422 	uio.uio_segflg = UIO_SYSSPACE;
1423 	uio.uio_extflg = UIO_COPY_DEFAULT;
1424 	uio.uio_loffset = args->offset;
1425 	uio.uio_resid = args->count;
1426 	uio.uio_llimit = curproc->p_fsz_ctl;
1427 	rlimit = uio.uio_llimit - args->offset;
1428 	if (rlimit < (u_offset_t)uio.uio_resid)
1429 		uio.uio_resid = (int)rlimit;
1430 
1431 	if (args->stable == UNSTABLE)
1432 		ioflag = 0;
1433 	else if (args->stable == FILE_SYNC)
1434 		ioflag = FSYNC;
1435 	else if (args->stable == DATA_SYNC)
1436 		ioflag = FDSYNC;
1437 	else {
1438 		if (iovp != iov)
1439 			kmem_free(iovp, sizeof (*iovp) * iovcnt);
1440 		resp->status = NFS3ERR_INVAL;
1441 		goto err1;
1442 	}
1443 
1444 	/*
1445 	 * We're changing creds because VM may fault and we need
1446 	 * the cred of the current thread to be used if quota
1447 	 * checking is enabled.
1448 	 */
1449 	savecred = curthread->t_cred;
1450 	curthread->t_cred = cr;
1451 	error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1452 	curthread->t_cred = savecred;
1453 
1454 	if (iovp != iov)
1455 		kmem_free(iovp, sizeof (*iovp) * iovcnt);
1456 
1457 	/* check if a monitor detected a delegation conflict */
1458 	if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1459 		resp->status = NFS3ERR_JUKEBOX;
1460 		goto err1;
1461 	}
1462 
1463 	ava.va_mask = AT_ALL;
1464 	avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1465 
1466 	if (error)
1467 		goto err;
1468 
1469 	/*
1470 	 * If we were unable to get the V_WRITELOCK_TRUE, then we
1471 	 * may not have accurate after attrs, so check if
1472 	 * we have both attributes, they have a non-zero va_seq, and
1473 	 * va_seq has changed by exactly one,
1474 	 * if not, turn off the before attr.
1475 	 */
1476 	if (rwlock_ret != V_WRITELOCK_TRUE) {
1477 		if (bvap == NULL || avap == NULL ||
1478 		    bvap->va_seq == 0 || avap->va_seq == 0 ||
1479 		    avap->va_seq != (bvap->va_seq + 1)) {
1480 			bvap = NULL;
1481 		}
1482 	}
1483 
1484 	resp->status = NFS3_OK;
1485 	vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1486 	resp->resok.count = args->count - uio.uio_resid;
1487 	resp->resok.committed = args->stable;
1488 	resp->resok.verf = write3verf;
1489 	goto out;
1490 
1491 err:
1492 	if (curthread->t_flag & T_WOULDBLOCK) {
1493 		curthread->t_flag &= ~T_WOULDBLOCK;
1494 		resp->status = NFS3ERR_JUKEBOX;
1495 	} else
1496 		resp->status = puterrno3(error);
1497 err1:
1498 	vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1499 out:
1500 	DTRACE_NFSV3_4(op__write__done, struct svc_req *, req,
1501 	    cred_t *, cr, vnode_t *, vp, WRITE3res *, resp);
1502 
1503 	if (vp != NULL) {
1504 		if (rwlock_ret != -1)
1505 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1506 		if (in_crit)
1507 			nbl_end_crit(vp);
1508 		VN_RELE(vp);
1509 	}
1510 }
1511 
1512 void *
1513 rfs3_write_getfh(WRITE3args *args)
1514 {
1515 
1516 	return (&args->file);
1517 }
1518 
1519 void
1520 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1521     struct svc_req *req, cred_t *cr, bool_t ro)
1522 {
1523 	int error;
1524 	int in_crit = 0;
1525 	vnode_t *vp;
1526 	vnode_t *tvp = NULL;
1527 	vnode_t *dvp;
1528 	struct vattr *vap;
1529 	struct vattr va;
1530 	struct vattr *dbvap;
1531 	struct vattr dbva;
1532 	struct vattr *davap;
1533 	struct vattr dava;
1534 	enum vcexcl excl;
1535 	nfstime3 *mtime;
1536 	len_t reqsize;
1537 	bool_t trunc;
1538 	struct sockaddr *ca;
1539 	char *name = NULL;
1540 
1541 	dbvap = NULL;
1542 	davap = NULL;
1543 
1544 	dvp = nfs3_fhtovp(&args->where.dir, exi);
1545 
1546 	DTRACE_NFSV3_4(op__create__start, struct svc_req *, req,
1547 	    cred_t *, cr, vnode_t *, dvp, CREATE3args *, args);
1548 
1549 	if (dvp == NULL) {
1550 		error = ESTALE;
1551 		goto out;
1552 	}
1553 
1554 	dbva.va_mask = AT_ALL;
1555 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1556 	davap = dbvap;
1557 
1558 	if (args->where.name == nfs3nametoolong) {
1559 		resp->status = NFS3ERR_NAMETOOLONG;
1560 		goto out1;
1561 	}
1562 
1563 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1564 		resp->status = NFS3ERR_ACCES;
1565 		goto out1;
1566 	}
1567 
1568 	if (rdonly(ro, dvp)) {
1569 		resp->status = NFS3ERR_ROFS;
1570 		goto out1;
1571 	}
1572 
1573 	if (is_system_labeled()) {
1574 		bslabel_t *clabel = req->rq_label;
1575 
1576 		ASSERT(clabel != NULL);
1577 		DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1578 		    "got client label from request(1)", struct svc_req *, req);
1579 
1580 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1581 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1582 			    exi)) {
1583 				resp->status = NFS3ERR_ACCES;
1584 				goto out1;
1585 			}
1586 		}
1587 	}
1588 
1589 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1590 	name = nfscmd_convname(ca, exi, args->where.name,
1591 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1592 
1593 	if (name == NULL) {
1594 		/* This is really a Solaris EILSEQ */
1595 		resp->status = NFS3ERR_INVAL;
1596 		goto out1;
1597 	}
1598 
1599 	if (args->how.mode == EXCLUSIVE) {
1600 		va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1601 		va.va_type = VREG;
1602 		va.va_mode = (mode_t)0;
1603 		/*
1604 		 * Ensure no time overflows and that types match
1605 		 */
1606 		mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1607 		va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1608 		va.va_mtime.tv_nsec = mtime->nseconds;
1609 		excl = EXCL;
1610 	} else {
1611 		error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1612 		    &va);
1613 		if (error)
1614 			goto out;
1615 		va.va_mask |= AT_TYPE;
1616 		va.va_type = VREG;
1617 		if (args->how.mode == GUARDED)
1618 			excl = EXCL;
1619 		else {
1620 			excl = NONEXCL;
1621 
1622 			/*
1623 			 * During creation of file in non-exclusive mode
1624 			 * if size of file is being set then make sure
1625 			 * that if the file already exists that no conflicting
1626 			 * non-blocking mandatory locks exists in the region
1627 			 * being modified. If there are conflicting locks fail
1628 			 * the operation with EACCES.
1629 			 */
1630 			if (va.va_mask & AT_SIZE) {
1631 				struct vattr tva;
1632 
1633 				/*
1634 				 * Does file already exist?
1635 				 */
1636 				error = VOP_LOOKUP(dvp, name, &tvp,
1637 				    NULL, 0, NULL, cr, NULL, NULL, NULL);
1638 
1639 				/*
1640 				 * Check to see if the file has been delegated
1641 				 * to a v4 client.  If so, then begin recall of
1642 				 * the delegation and return JUKEBOX to allow
1643 				 * the client to retrasmit its request.
1644 				 */
1645 
1646 				trunc = va.va_size == 0;
1647 				if (!error &&
1648 				    rfs4_check_delegated(FWRITE, tvp, trunc)) {
1649 					resp->status = NFS3ERR_JUKEBOX;
1650 					goto out1;
1651 				}
1652 
1653 				/*
1654 				 * Check for NBMAND lock conflicts
1655 				 */
1656 				if (!error && nbl_need_check(tvp)) {
1657 					u_offset_t offset;
1658 					ssize_t len;
1659 
1660 					nbl_start_crit(tvp, RW_READER);
1661 					in_crit = 1;
1662 
1663 					tva.va_mask = AT_SIZE;
1664 					error = VOP_GETATTR(tvp, &tva, 0, cr,
1665 					    NULL);
1666 					/*
1667 					 * Can't check for conflicts, so return
1668 					 * error.
1669 					 */
1670 					if (error)
1671 						goto out;
1672 
1673 					offset = tva.va_size < va.va_size ?
1674 					    tva.va_size : va.va_size;
1675 					len = tva.va_size < va.va_size ?
1676 					    va.va_size - tva.va_size :
1677 					    tva.va_size - va.va_size;
1678 					if (nbl_conflict(tvp, NBL_WRITE,
1679 					    offset, len, 0, NULL)) {
1680 						error = EACCES;
1681 						goto out;
1682 					}
1683 				} else if (tvp) {
1684 					VN_RELE(tvp);
1685 					tvp = NULL;
1686 				}
1687 			}
1688 		}
1689 		if (va.va_mask & AT_SIZE)
1690 			reqsize = va.va_size;
1691 	}
1692 
1693 	/*
1694 	 * Must specify the mode.
1695 	 */
1696 	if (!(va.va_mask & AT_MODE)) {
1697 		resp->status = NFS3ERR_INVAL;
1698 		goto out1;
1699 	}
1700 
1701 	/*
1702 	 * If the filesystem is exported with nosuid, then mask off
1703 	 * the setuid and setgid bits.
1704 	 */
1705 	if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1706 		va.va_mode &= ~(VSUID | VSGID);
1707 
1708 tryagain:
1709 	/*
1710 	 * The file open mode used is VWRITE.  If the client needs
1711 	 * some other semantic, then it should do the access checking
1712 	 * itself.  It would have been nice to have the file open mode
1713 	 * passed as part of the arguments.
1714 	 */
1715 	error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1716 	    &vp, cr, 0, NULL, NULL);
1717 
1718 	dava.va_mask = AT_ALL;
1719 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1720 
1721 	if (error) {
1722 		/*
1723 		 * If we got something other than file already exists
1724 		 * then just return this error.  Otherwise, we got
1725 		 * EEXIST.  If we were doing a GUARDED create, then
1726 		 * just return this error.  Otherwise, we need to
1727 		 * make sure that this wasn't a duplicate of an
1728 		 * exclusive create request.
1729 		 *
1730 		 * The assumption is made that a non-exclusive create
1731 		 * request will never return EEXIST.
1732 		 */
1733 		if (error != EEXIST || args->how.mode == GUARDED)
1734 			goto out;
1735 		/*
1736 		 * Lookup the file so that we can get a vnode for it.
1737 		 */
1738 		error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1739 		    NULL, cr, NULL, NULL, NULL);
1740 		if (error) {
1741 			/*
1742 			 * We couldn't find the file that we thought that
1743 			 * we just created.  So, we'll just try creating
1744 			 * it again.
1745 			 */
1746 			if (error == ENOENT)
1747 				goto tryagain;
1748 			goto out;
1749 		}
1750 
1751 		/*
1752 		 * If the file is delegated to a v4 client, go ahead
1753 		 * and initiate recall, this create is a hint that a
1754 		 * conflicting v3 open has occurred.
1755 		 */
1756 
1757 		if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1758 			VN_RELE(vp);
1759 			resp->status = NFS3ERR_JUKEBOX;
1760 			goto out1;
1761 		}
1762 
1763 		va.va_mask = AT_ALL;
1764 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1765 
1766 		mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1767 		/* % with INT32_MAX to prevent overflows */
1768 		if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1769 		    vap->va_mtime.tv_sec !=
1770 		    (mtime->seconds % INT32_MAX) ||
1771 		    vap->va_mtime.tv_nsec != mtime->nseconds)) {
1772 			VN_RELE(vp);
1773 			error = EEXIST;
1774 			goto out;
1775 		}
1776 	} else {
1777 
1778 		if ((args->how.mode == UNCHECKED ||
1779 		    args->how.mode == GUARDED) &&
1780 		    args->how.createhow3_u.obj_attributes.size.set_it &&
1781 		    va.va_size == 0)
1782 			trunc = TRUE;
1783 		else
1784 			trunc = FALSE;
1785 
1786 		if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1787 			VN_RELE(vp);
1788 			resp->status = NFS3ERR_JUKEBOX;
1789 			goto out1;
1790 		}
1791 
1792 		va.va_mask = AT_ALL;
1793 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1794 
1795 		/*
1796 		 * We need to check to make sure that the file got
1797 		 * created to the indicated size.  If not, we do a
1798 		 * setattr to try to change the size, but we don't
1799 		 * try too hard.  This shouldn't a problem as most
1800 		 * clients will only specifiy a size of zero which
1801 		 * local file systems handle.  However, even if
1802 		 * the client does specify a non-zero size, it can
1803 		 * still recover by checking the size of the file
1804 		 * after it has created it and then issue a setattr
1805 		 * request of its own to set the size of the file.
1806 		 */
1807 		if (vap != NULL &&
1808 		    (args->how.mode == UNCHECKED ||
1809 		    args->how.mode == GUARDED) &&
1810 		    args->how.createhow3_u.obj_attributes.size.set_it &&
1811 		    vap->va_size != reqsize) {
1812 			va.va_mask = AT_SIZE;
1813 			va.va_size = reqsize;
1814 			(void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1815 			va.va_mask = AT_ALL;
1816 			vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1817 		}
1818 	}
1819 
1820 	if (name != args->where.name)
1821 		kmem_free(name, MAXPATHLEN + 1);
1822 
1823 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1824 	if (error)
1825 		resp->resok.obj.handle_follows = FALSE;
1826 	else
1827 		resp->resok.obj.handle_follows = TRUE;
1828 
1829 	/*
1830 	 * Force modified data and metadata out to stable storage.
1831 	 */
1832 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1833 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
1834 
1835 	VN_RELE(vp);
1836 	if (tvp != NULL) {
1837 		if (in_crit)
1838 			nbl_end_crit(tvp);
1839 		VN_RELE(tvp);
1840 	}
1841 
1842 	resp->status = NFS3_OK;
1843 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1844 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1845 
1846 	DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1847 	    cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1848 
1849 	VN_RELE(dvp);
1850 	return;
1851 
1852 out:
1853 	if (curthread->t_flag & T_WOULDBLOCK) {
1854 		curthread->t_flag &= ~T_WOULDBLOCK;
1855 		resp->status = NFS3ERR_JUKEBOX;
1856 	} else
1857 		resp->status = puterrno3(error);
1858 out1:
1859 	DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1860 	    cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1861 
1862 	if (name != NULL && name != args->where.name)
1863 		kmem_free(name, MAXPATHLEN + 1);
1864 
1865 	if (tvp != NULL) {
1866 		if (in_crit)
1867 			nbl_end_crit(tvp);
1868 		VN_RELE(tvp);
1869 	}
1870 	if (dvp != NULL)
1871 		VN_RELE(dvp);
1872 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1873 }
1874 
1875 void *
1876 rfs3_create_getfh(CREATE3args *args)
1877 {
1878 
1879 	return (&args->where.dir);
1880 }
1881 
1882 void
1883 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1884     struct svc_req *req, cred_t *cr, bool_t ro)
1885 {
1886 	int error;
1887 	vnode_t *vp = NULL;
1888 	vnode_t *dvp;
1889 	struct vattr *vap;
1890 	struct vattr va;
1891 	struct vattr *dbvap;
1892 	struct vattr dbva;
1893 	struct vattr *davap;
1894 	struct vattr dava;
1895 	struct sockaddr *ca;
1896 	char *name = NULL;
1897 
1898 	dbvap = NULL;
1899 	davap = NULL;
1900 
1901 	dvp = nfs3_fhtovp(&args->where.dir, exi);
1902 
1903 	DTRACE_NFSV3_4(op__mkdir__start, struct svc_req *, req,
1904 	    cred_t *, cr, vnode_t *, dvp, MKDIR3args *, args);
1905 
1906 	if (dvp == NULL) {
1907 		error = ESTALE;
1908 		goto out;
1909 	}
1910 
1911 	dbva.va_mask = AT_ALL;
1912 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1913 	davap = dbvap;
1914 
1915 	if (args->where.name == nfs3nametoolong) {
1916 		resp->status = NFS3ERR_NAMETOOLONG;
1917 		goto out1;
1918 	}
1919 
1920 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1921 		resp->status = NFS3ERR_ACCES;
1922 		goto out1;
1923 	}
1924 
1925 	if (rdonly(ro, dvp)) {
1926 		resp->status = NFS3ERR_ROFS;
1927 		goto out1;
1928 	}
1929 
1930 	if (is_system_labeled()) {
1931 		bslabel_t *clabel = req->rq_label;
1932 
1933 		ASSERT(clabel != NULL);
1934 		DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1935 		    "got client label from request(1)", struct svc_req *, req);
1936 
1937 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1938 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1939 			    exi)) {
1940 				resp->status = NFS3ERR_ACCES;
1941 				goto out1;
1942 			}
1943 		}
1944 	}
1945 
1946 	error = sattr3_to_vattr(&args->attributes, &va);
1947 	if (error)
1948 		goto out;
1949 
1950 	if (!(va.va_mask & AT_MODE)) {
1951 		resp->status = NFS3ERR_INVAL;
1952 		goto out1;
1953 	}
1954 
1955 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1956 	name = nfscmd_convname(ca, exi, args->where.name,
1957 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1958 
1959 	if (name == NULL) {
1960 		resp->status = NFS3ERR_INVAL;
1961 		goto out1;
1962 	}
1963 
1964 	va.va_mask |= AT_TYPE;
1965 	va.va_type = VDIR;
1966 
1967 	error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
1968 
1969 	if (name != args->where.name)
1970 		kmem_free(name, MAXPATHLEN + 1);
1971 
1972 	dava.va_mask = AT_ALL;
1973 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1974 
1975 	/*
1976 	 * Force modified data and metadata out to stable storage.
1977 	 */
1978 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
1979 
1980 	if (error)
1981 		goto out;
1982 
1983 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1984 	if (error)
1985 		resp->resok.obj.handle_follows = FALSE;
1986 	else
1987 		resp->resok.obj.handle_follows = TRUE;
1988 
1989 	va.va_mask = AT_ALL;
1990 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1991 
1992 	/*
1993 	 * Force modified data and metadata out to stable storage.
1994 	 */
1995 	(void) VOP_FSYNC(vp, 0, cr, NULL);
1996 
1997 	VN_RELE(vp);
1998 
1999 	resp->status = NFS3_OK;
2000 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2001 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2002 
2003 	DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
2004 	    cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
2005 	VN_RELE(dvp);
2006 
2007 	return;
2008 
2009 out:
2010 	if (curthread->t_flag & T_WOULDBLOCK) {
2011 		curthread->t_flag &= ~T_WOULDBLOCK;
2012 		resp->status = NFS3ERR_JUKEBOX;
2013 	} else
2014 		resp->status = puterrno3(error);
2015 out1:
2016 	DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
2017 	    cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
2018 	if (dvp != NULL)
2019 		VN_RELE(dvp);
2020 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2021 }
2022 
2023 void *
2024 rfs3_mkdir_getfh(MKDIR3args *args)
2025 {
2026 
2027 	return (&args->where.dir);
2028 }
2029 
2030 void
2031 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2032     struct svc_req *req, cred_t *cr, bool_t ro)
2033 {
2034 	int error;
2035 	vnode_t *vp;
2036 	vnode_t *dvp;
2037 	struct vattr *vap;
2038 	struct vattr va;
2039 	struct vattr *dbvap;
2040 	struct vattr dbva;
2041 	struct vattr *davap;
2042 	struct vattr dava;
2043 	struct sockaddr *ca;
2044 	char *name = NULL;
2045 	char *symdata = NULL;
2046 
2047 	dbvap = NULL;
2048 	davap = NULL;
2049 
2050 	dvp = nfs3_fhtovp(&args->where.dir, exi);
2051 
2052 	DTRACE_NFSV3_4(op__symlink__start, struct svc_req *, req,
2053 	    cred_t *, cr, vnode_t *, dvp, SYMLINK3args *, args);
2054 
2055 	if (dvp == NULL) {
2056 		error = ESTALE;
2057 		goto err;
2058 	}
2059 
2060 	dbva.va_mask = AT_ALL;
2061 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2062 	davap = dbvap;
2063 
2064 	if (args->where.name == nfs3nametoolong) {
2065 		resp->status = NFS3ERR_NAMETOOLONG;
2066 		goto err1;
2067 	}
2068 
2069 	if (args->where.name == NULL || *(args->where.name) == '\0') {
2070 		resp->status = NFS3ERR_ACCES;
2071 		goto err1;
2072 	}
2073 
2074 	if (rdonly(ro, dvp)) {
2075 		resp->status = NFS3ERR_ROFS;
2076 		goto err1;
2077 	}
2078 
2079 	if (is_system_labeled()) {
2080 		bslabel_t *clabel = req->rq_label;
2081 
2082 		ASSERT(clabel != NULL);
2083 		DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2084 		    "got client label from request(1)", struct svc_req *, req);
2085 
2086 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2087 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2088 			    exi)) {
2089 				resp->status = NFS3ERR_ACCES;
2090 				goto err1;
2091 			}
2092 		}
2093 	}
2094 
2095 	error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2096 	if (error)
2097 		goto err;
2098 
2099 	if (!(va.va_mask & AT_MODE)) {
2100 		resp->status = NFS3ERR_INVAL;
2101 		goto err1;
2102 	}
2103 
2104 	if (args->symlink.symlink_data == nfs3nametoolong) {
2105 		resp->status = NFS3ERR_NAMETOOLONG;
2106 		goto err1;
2107 	}
2108 
2109 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2110 	name = nfscmd_convname(ca, exi, args->where.name,
2111 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2112 
2113 	if (name == NULL) {
2114 		/* This is really a Solaris EILSEQ */
2115 		resp->status = NFS3ERR_INVAL;
2116 		goto err1;
2117 	}
2118 
2119 	symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2120 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2121 	if (symdata == NULL) {
2122 		/* This is really a Solaris EILSEQ */
2123 		resp->status = NFS3ERR_INVAL;
2124 		goto err1;
2125 	}
2126 
2127 
2128 	va.va_mask |= AT_TYPE;
2129 	va.va_type = VLNK;
2130 
2131 	error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2132 
2133 	dava.va_mask = AT_ALL;
2134 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2135 
2136 	if (error)
2137 		goto err;
2138 
2139 	error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2140 	    NULL, NULL, NULL);
2141 
2142 	/*
2143 	 * Force modified data and metadata out to stable storage.
2144 	 */
2145 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
2146 
2147 
2148 	resp->status = NFS3_OK;
2149 	if (error) {
2150 		resp->resok.obj.handle_follows = FALSE;
2151 		vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2152 		vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2153 		goto out;
2154 	}
2155 
2156 	error = makefh3(&resp->resok.obj.handle, vp, exi);
2157 	if (error)
2158 		resp->resok.obj.handle_follows = FALSE;
2159 	else
2160 		resp->resok.obj.handle_follows = TRUE;
2161 
2162 	va.va_mask = AT_ALL;
2163 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2164 
2165 	/*
2166 	 * Force modified data and metadata out to stable storage.
2167 	 */
2168 	(void) VOP_FSYNC(vp, 0, cr, NULL);
2169 
2170 	VN_RELE(vp);
2171 
2172 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2173 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2174 	goto out;
2175 
2176 err:
2177 	if (curthread->t_flag & T_WOULDBLOCK) {
2178 		curthread->t_flag &= ~T_WOULDBLOCK;
2179 		resp->status = NFS3ERR_JUKEBOX;
2180 	} else
2181 		resp->status = puterrno3(error);
2182 err1:
2183 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2184 out:
2185 	if (name != NULL && name != args->where.name)
2186 		kmem_free(name, MAXPATHLEN + 1);
2187 	if (symdata != NULL && symdata != args->symlink.symlink_data)
2188 		kmem_free(symdata, MAXPATHLEN + 1);
2189 
2190 	DTRACE_NFSV3_4(op__symlink__done, struct svc_req *, req,
2191 	    cred_t *, cr, vnode_t *, dvp, SYMLINK3res *, resp);
2192 
2193 	if (dvp != NULL)
2194 		VN_RELE(dvp);
2195 }
2196 
2197 void *
2198 rfs3_symlink_getfh(SYMLINK3args *args)
2199 {
2200 
2201 	return (&args->where.dir);
2202 }
2203 
2204 void
2205 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2206     struct svc_req *req, cred_t *cr, bool_t ro)
2207 {
2208 	int error;
2209 	vnode_t *vp;
2210 	vnode_t *realvp;
2211 	vnode_t *dvp;
2212 	struct vattr *vap;
2213 	struct vattr va;
2214 	struct vattr *dbvap;
2215 	struct vattr dbva;
2216 	struct vattr *davap;
2217 	struct vattr dava;
2218 	int mode;
2219 	enum vcexcl excl;
2220 	struct sockaddr *ca;
2221 	char *name = NULL;
2222 
2223 	dbvap = NULL;
2224 	davap = NULL;
2225 
2226 	dvp = nfs3_fhtovp(&args->where.dir, exi);
2227 
2228 	DTRACE_NFSV3_4(op__mknod__start, struct svc_req *, req,
2229 	    cred_t *, cr, vnode_t *, dvp, MKNOD3args *, args);
2230 
2231 	if (dvp == NULL) {
2232 		error = ESTALE;
2233 		goto out;
2234 	}
2235 
2236 	dbva.va_mask = AT_ALL;
2237 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2238 	davap = dbvap;
2239 
2240 	if (args->where.name == nfs3nametoolong) {
2241 		resp->status = NFS3ERR_NAMETOOLONG;
2242 		goto out1;
2243 	}
2244 
2245 	if (args->where.name == NULL || *(args->where.name) == '\0') {
2246 		resp->status = NFS3ERR_ACCES;
2247 		goto out1;
2248 	}
2249 
2250 	if (rdonly(ro, dvp)) {
2251 		resp->status = NFS3ERR_ROFS;
2252 		goto out1;
2253 	}
2254 
2255 	if (is_system_labeled()) {
2256 		bslabel_t *clabel = req->rq_label;
2257 
2258 		ASSERT(clabel != NULL);
2259 		DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2260 		    "got client label from request(1)", struct svc_req *, req);
2261 
2262 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2263 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2264 			    exi)) {
2265 				resp->status = NFS3ERR_ACCES;
2266 				goto out1;
2267 			}
2268 		}
2269 	}
2270 
2271 	switch (args->what.type) {
2272 	case NF3CHR:
2273 	case NF3BLK:
2274 		error = sattr3_to_vattr(
2275 		    &args->what.mknoddata3_u.device.dev_attributes, &va);
2276 		if (error)
2277 			goto out;
2278 		if (secpolicy_sys_devices(cr) != 0) {
2279 			resp->status = NFS3ERR_PERM;
2280 			goto out1;
2281 		}
2282 		if (args->what.type == NF3CHR)
2283 			va.va_type = VCHR;
2284 		else
2285 			va.va_type = VBLK;
2286 		va.va_rdev = makedevice(
2287 		    args->what.mknoddata3_u.device.spec.specdata1,
2288 		    args->what.mknoddata3_u.device.spec.specdata2);
2289 		va.va_mask |= AT_TYPE | AT_RDEV;
2290 		break;
2291 	case NF3SOCK:
2292 		error = sattr3_to_vattr(
2293 		    &args->what.mknoddata3_u.pipe_attributes, &va);
2294 		if (error)
2295 			goto out;
2296 		va.va_type = VSOCK;
2297 		va.va_mask |= AT_TYPE;
2298 		break;
2299 	case NF3FIFO:
2300 		error = sattr3_to_vattr(
2301 		    &args->what.mknoddata3_u.pipe_attributes, &va);
2302 		if (error)
2303 			goto out;
2304 		va.va_type = VFIFO;
2305 		va.va_mask |= AT_TYPE;
2306 		break;
2307 	default:
2308 		resp->status = NFS3ERR_BADTYPE;
2309 		goto out1;
2310 	}
2311 
2312 	/*
2313 	 * Must specify the mode.
2314 	 */
2315 	if (!(va.va_mask & AT_MODE)) {
2316 		resp->status = NFS3ERR_INVAL;
2317 		goto out1;
2318 	}
2319 
2320 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2321 	name = nfscmd_convname(ca, exi, args->where.name,
2322 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2323 
2324 	if (name == NULL) {
2325 		resp->status = NFS3ERR_INVAL;
2326 		goto out1;
2327 	}
2328 
2329 	excl = EXCL;
2330 
2331 	mode = 0;
2332 
2333 	error = VOP_CREATE(dvp, name, &va, excl, mode,
2334 	    &vp, cr, 0, NULL, NULL);
2335 
2336 	if (name != args->where.name)
2337 		kmem_free(name, MAXPATHLEN + 1);
2338 
2339 	dava.va_mask = AT_ALL;
2340 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2341 
2342 	/*
2343 	 * Force modified data and metadata out to stable storage.
2344 	 */
2345 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
2346 
2347 	if (error)
2348 		goto out;
2349 
2350 	resp->status = NFS3_OK;
2351 
2352 	error = makefh3(&resp->resok.obj.handle, vp, exi);
2353 	if (error)
2354 		resp->resok.obj.handle_follows = FALSE;
2355 	else
2356 		resp->resok.obj.handle_follows = TRUE;
2357 
2358 	va.va_mask = AT_ALL;
2359 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2360 
2361 	/*
2362 	 * Force modified metadata out to stable storage.
2363 	 *
2364 	 * if a underlying vp exists, pass it to VOP_FSYNC
2365 	 */
2366 	if (VOP_REALVP(vp, &realvp, NULL) == 0)
2367 		(void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2368 	else
2369 		(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2370 
2371 	VN_RELE(vp);
2372 
2373 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2374 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2375 	DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2376 	    cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2377 	VN_RELE(dvp);
2378 	return;
2379 
2380 out:
2381 	if (curthread->t_flag & T_WOULDBLOCK) {
2382 		curthread->t_flag &= ~T_WOULDBLOCK;
2383 		resp->status = NFS3ERR_JUKEBOX;
2384 	} else
2385 		resp->status = puterrno3(error);
2386 out1:
2387 	DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2388 	    cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2389 	if (dvp != NULL)
2390 		VN_RELE(dvp);
2391 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2392 }
2393 
2394 void *
2395 rfs3_mknod_getfh(MKNOD3args *args)
2396 {
2397 
2398 	return (&args->where.dir);
2399 }
2400 
2401 void
2402 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2403     struct svc_req *req, cred_t *cr, bool_t ro)
2404 {
2405 	int error = 0;
2406 	vnode_t *vp;
2407 	struct vattr *bvap;
2408 	struct vattr bva;
2409 	struct vattr *avap;
2410 	struct vattr ava;
2411 	vnode_t *targvp = NULL;
2412 	struct sockaddr *ca;
2413 	char *name = NULL;
2414 
2415 	bvap = NULL;
2416 	avap = NULL;
2417 
2418 	vp = nfs3_fhtovp(&args->object.dir, exi);
2419 
2420 	DTRACE_NFSV3_4(op__remove__start, struct svc_req *, req,
2421 	    cred_t *, cr, vnode_t *, vp, REMOVE3args *, args);
2422 
2423 	if (vp == NULL) {
2424 		error = ESTALE;
2425 		goto err;
2426 	}
2427 
2428 	bva.va_mask = AT_ALL;
2429 	bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2430 	avap = bvap;
2431 
2432 	if (vp->v_type != VDIR) {
2433 		resp->status = NFS3ERR_NOTDIR;
2434 		goto err1;
2435 	}
2436 
2437 	if (args->object.name == nfs3nametoolong) {
2438 		resp->status = NFS3ERR_NAMETOOLONG;
2439 		goto err1;
2440 	}
2441 
2442 	if (args->object.name == NULL || *(args->object.name) == '\0') {
2443 		resp->status = NFS3ERR_ACCES;
2444 		goto err1;
2445 	}
2446 
2447 	if (rdonly(ro, vp)) {
2448 		resp->status = NFS3ERR_ROFS;
2449 		goto err1;
2450 	}
2451 
2452 	if (is_system_labeled()) {
2453 		bslabel_t *clabel = req->rq_label;
2454 
2455 		ASSERT(clabel != NULL);
2456 		DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2457 		    "got client label from request(1)", struct svc_req *, req);
2458 
2459 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2460 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2461 			    exi)) {
2462 				resp->status = NFS3ERR_ACCES;
2463 				goto err1;
2464 			}
2465 		}
2466 	}
2467 
2468 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2469 	name = nfscmd_convname(ca, exi, args->object.name,
2470 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2471 
2472 	if (name == NULL) {
2473 		resp->status = NFS3ERR_INVAL;
2474 		goto err1;
2475 	}
2476 
2477 	/*
2478 	 * Check for a conflict with a non-blocking mandatory share
2479 	 * reservation and V4 delegations
2480 	 */
2481 	error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2482 	    NULL, cr, NULL, NULL, NULL);
2483 	if (error != 0)
2484 		goto err;
2485 
2486 	if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2487 		resp->status = NFS3ERR_JUKEBOX;
2488 		goto err1;
2489 	}
2490 
2491 	if (!nbl_need_check(targvp)) {
2492 		error = VOP_REMOVE(vp, name, cr, NULL, 0);
2493 	} else {
2494 		nbl_start_crit(targvp, RW_READER);
2495 		if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2496 			error = EACCES;
2497 		} else {
2498 			error = VOP_REMOVE(vp, name, cr, NULL, 0);
2499 		}
2500 		nbl_end_crit(targvp);
2501 	}
2502 	VN_RELE(targvp);
2503 	targvp = NULL;
2504 
2505 	ava.va_mask = AT_ALL;
2506 	avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2507 
2508 	/*
2509 	 * Force modified data and metadata out to stable storage.
2510 	 */
2511 	(void) VOP_FSYNC(vp, 0, cr, NULL);
2512 
2513 	if (error)
2514 		goto err;
2515 
2516 	resp->status = NFS3_OK;
2517 	vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2518 	goto out;
2519 
2520 err:
2521 	if (curthread->t_flag & T_WOULDBLOCK) {
2522 		curthread->t_flag &= ~T_WOULDBLOCK;
2523 		resp->status = NFS3ERR_JUKEBOX;
2524 	} else
2525 		resp->status = puterrno3(error);
2526 err1:
2527 	vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2528 out:
2529 	DTRACE_NFSV3_4(op__remove__done, struct svc_req *, req,
2530 	    cred_t *, cr, vnode_t *, vp, REMOVE3res *, resp);
2531 
2532 	if (name != NULL && name != args->object.name)
2533 		kmem_free(name, MAXPATHLEN + 1);
2534 
2535 	if (vp != NULL)
2536 		VN_RELE(vp);
2537 }
2538 
2539 void *
2540 rfs3_remove_getfh(REMOVE3args *args)
2541 {
2542 
2543 	return (&args->object.dir);
2544 }
2545 
2546 void
2547 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2548     struct svc_req *req, cred_t *cr, bool_t ro)
2549 {
2550 	int error;
2551 	vnode_t *vp;
2552 	struct vattr *bvap;
2553 	struct vattr bva;
2554 	struct vattr *avap;
2555 	struct vattr ava;
2556 	struct sockaddr *ca;
2557 	char *name = NULL;
2558 
2559 	bvap = NULL;
2560 	avap = NULL;
2561 
2562 	vp = nfs3_fhtovp(&args->object.dir, exi);
2563 
2564 	DTRACE_NFSV3_4(op__rmdir__start, struct svc_req *, req,
2565 	    cred_t *, cr, vnode_t *, vp, RMDIR3args *, args);
2566 
2567 	if (vp == NULL) {
2568 		error = ESTALE;
2569 		goto err;
2570 	}
2571 
2572 	bva.va_mask = AT_ALL;
2573 	bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2574 	avap = bvap;
2575 
2576 	if (vp->v_type != VDIR) {
2577 		resp->status = NFS3ERR_NOTDIR;
2578 		goto err1;
2579 	}
2580 
2581 	if (args->object.name == nfs3nametoolong) {
2582 		resp->status = NFS3ERR_NAMETOOLONG;
2583 		goto err1;
2584 	}
2585 
2586 	if (args->object.name == NULL || *(args->object.name) == '\0') {
2587 		resp->status = NFS3ERR_ACCES;
2588 		goto err1;
2589 	}
2590 
2591 	if (rdonly(ro, vp)) {
2592 		resp->status = NFS3ERR_ROFS;
2593 		goto err1;
2594 	}
2595 
2596 	if (is_system_labeled()) {
2597 		bslabel_t *clabel = req->rq_label;
2598 
2599 		ASSERT(clabel != NULL);
2600 		DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2601 		    "got client label from request(1)", struct svc_req *, req);
2602 
2603 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2604 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2605 			    exi)) {
2606 				resp->status = NFS3ERR_ACCES;
2607 				goto err1;
2608 			}
2609 		}
2610 	}
2611 
2612 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2613 	name = nfscmd_convname(ca, exi, args->object.name,
2614 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2615 
2616 	if (name == NULL) {
2617 		resp->status = NFS3ERR_INVAL;
2618 		goto err1;
2619 	}
2620 
2621 	error = VOP_RMDIR(vp, name, rootdir, cr, NULL, 0);
2622 
2623 	if (name != args->object.name)
2624 		kmem_free(name, MAXPATHLEN + 1);
2625 
2626 	ava.va_mask = AT_ALL;
2627 	avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2628 
2629 	/*
2630 	 * Force modified data and metadata out to stable storage.
2631 	 */
2632 	(void) VOP_FSYNC(vp, 0, cr, NULL);
2633 
2634 	if (error) {
2635 		/*
2636 		 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2637 		 * if the directory is not empty.  A System V NFS server
2638 		 * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2639 		 * over the wire.
2640 		 */
2641 		if (error == EEXIST)
2642 			error = ENOTEMPTY;
2643 		goto err;
2644 	}
2645 
2646 	resp->status = NFS3_OK;
2647 	vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2648 	goto out;
2649 
2650 err:
2651 	if (curthread->t_flag & T_WOULDBLOCK) {
2652 		curthread->t_flag &= ~T_WOULDBLOCK;
2653 		resp->status = NFS3ERR_JUKEBOX;
2654 	} else
2655 		resp->status = puterrno3(error);
2656 err1:
2657 	vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2658 out:
2659 	DTRACE_NFSV3_4(op__rmdir__done, struct svc_req *, req,
2660 	    cred_t *, cr, vnode_t *, vp, RMDIR3res *, resp);
2661 	if (vp != NULL)
2662 		VN_RELE(vp);
2663 
2664 }
2665 
2666 void *
2667 rfs3_rmdir_getfh(RMDIR3args *args)
2668 {
2669 
2670 	return (&args->object.dir);
2671 }
2672 
2673 void
2674 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2675     struct svc_req *req, cred_t *cr, bool_t ro)
2676 {
2677 	int error = 0;
2678 	vnode_t *fvp;
2679 	vnode_t *tvp;
2680 	vnode_t *targvp;
2681 	struct vattr *fbvap;
2682 	struct vattr fbva;
2683 	struct vattr *favap;
2684 	struct vattr fava;
2685 	struct vattr *tbvap;
2686 	struct vattr tbva;
2687 	struct vattr *tavap;
2688 	struct vattr tava;
2689 	nfs_fh3 *fh3;
2690 	struct exportinfo *to_exi;
2691 	vnode_t *srcvp = NULL;
2692 	bslabel_t *clabel;
2693 	struct sockaddr *ca;
2694 	char *name = NULL;
2695 	char *toname = NULL;
2696 
2697 	fbvap = NULL;
2698 	favap = NULL;
2699 	tbvap = NULL;
2700 	tavap = NULL;
2701 	tvp = NULL;
2702 
2703 	fvp = nfs3_fhtovp(&args->from.dir, exi);
2704 
2705 	DTRACE_NFSV3_4(op__rename__start, struct svc_req *, req,
2706 	    cred_t *, cr, vnode_t *, fvp, RENAME3args *, args);
2707 
2708 	if (fvp == NULL) {
2709 		error = ESTALE;
2710 		goto err;
2711 	}
2712 
2713 	if (is_system_labeled()) {
2714 		clabel = req->rq_label;
2715 		ASSERT(clabel != NULL);
2716 		DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2717 		    "got client label from request(1)", struct svc_req *, req);
2718 
2719 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2720 			if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2721 			    exi)) {
2722 				resp->status = NFS3ERR_ACCES;
2723 				goto err1;
2724 			}
2725 		}
2726 	}
2727 
2728 	fbva.va_mask = AT_ALL;
2729 	fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2730 	favap = fbvap;
2731 
2732 	fh3 = &args->to.dir;
2733 	to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2734 	if (to_exi == NULL) {
2735 		resp->status = NFS3ERR_ACCES;
2736 		goto err1;
2737 	}
2738 	exi_rele(to_exi);
2739 
2740 	if (to_exi != exi) {
2741 		resp->status = NFS3ERR_XDEV;
2742 		goto err1;
2743 	}
2744 
2745 	tvp = nfs3_fhtovp(&args->to.dir, exi);
2746 	if (tvp == NULL) {
2747 		error = ESTALE;
2748 		goto err;
2749 	}
2750 
2751 	tbva.va_mask = AT_ALL;
2752 	tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2753 	tavap = tbvap;
2754 
2755 	if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2756 		resp->status = NFS3ERR_NOTDIR;
2757 		goto err1;
2758 	}
2759 
2760 	if (args->from.name == nfs3nametoolong ||
2761 	    args->to.name == nfs3nametoolong) {
2762 		resp->status = NFS3ERR_NAMETOOLONG;
2763 		goto err1;
2764 	}
2765 	if (args->from.name == NULL || *(args->from.name) == '\0' ||
2766 	    args->to.name == NULL || *(args->to.name) == '\0') {
2767 		resp->status = NFS3ERR_ACCES;
2768 		goto err1;
2769 	}
2770 
2771 	if (rdonly(ro, tvp)) {
2772 		resp->status = NFS3ERR_ROFS;
2773 		goto err1;
2774 	}
2775 
2776 	if (is_system_labeled()) {
2777 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2778 			if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
2779 			    exi)) {
2780 				resp->status = NFS3ERR_ACCES;
2781 				goto err1;
2782 			}
2783 		}
2784 	}
2785 
2786 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2787 	name = nfscmd_convname(ca, exi, args->from.name,
2788 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2789 
2790 	if (name == NULL) {
2791 		resp->status = NFS3ERR_INVAL;
2792 		goto err1;
2793 	}
2794 
2795 	toname = nfscmd_convname(ca, exi, args->to.name,
2796 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2797 
2798 	if (toname == NULL) {
2799 		resp->status = NFS3ERR_INVAL;
2800 		goto err1;
2801 	}
2802 
2803 	/*
2804 	 * Check for a conflict with a non-blocking mandatory share
2805 	 * reservation or V4 delegations.
2806 	 */
2807 	error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
2808 	    NULL, cr, NULL, NULL, NULL);
2809 	if (error != 0)
2810 		goto err;
2811 
2812 	/*
2813 	 * If we rename a delegated file we should recall the
2814 	 * delegation, since future opens should fail or would
2815 	 * refer to a new file.
2816 	 */
2817 	if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2818 		resp->status = NFS3ERR_JUKEBOX;
2819 		goto err1;
2820 	}
2821 
2822 	/*
2823 	 * Check for renaming over a delegated file.  Check rfs4_deleg_policy
2824 	 * first to avoid VOP_LOOKUP if possible.
2825 	 */
2826 	if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2827 	    VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
2828 	    NULL, NULL, NULL) == 0) {
2829 
2830 		if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2831 			VN_RELE(targvp);
2832 			resp->status = NFS3ERR_JUKEBOX;
2833 			goto err1;
2834 		}
2835 		VN_RELE(targvp);
2836 	}
2837 
2838 	if (!nbl_need_check(srcvp)) {
2839 		error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2840 	} else {
2841 		nbl_start_crit(srcvp, RW_READER);
2842 		if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2843 			error = EACCES;
2844 		else
2845 			error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2846 		nbl_end_crit(srcvp);
2847 	}
2848 	if (error == 0)
2849 		vn_renamepath(tvp, srcvp, args->to.name,
2850 		    strlen(args->to.name));
2851 	VN_RELE(srcvp);
2852 	srcvp = NULL;
2853 
2854 	fava.va_mask = AT_ALL;
2855 	favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2856 	tava.va_mask = AT_ALL;
2857 	tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2858 
2859 	/*
2860 	 * Force modified data and metadata out to stable storage.
2861 	 */
2862 	(void) VOP_FSYNC(fvp, 0, cr, NULL);
2863 	(void) VOP_FSYNC(tvp, 0, cr, NULL);
2864 
2865 	if (error)
2866 		goto err;
2867 
2868 	resp->status = NFS3_OK;
2869 	vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2870 	vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2871 	goto out;
2872 
2873 err:
2874 	if (curthread->t_flag & T_WOULDBLOCK) {
2875 		curthread->t_flag &= ~T_WOULDBLOCK;
2876 		resp->status = NFS3ERR_JUKEBOX;
2877 	} else {
2878 		resp->status = puterrno3(error);
2879 	}
2880 err1:
2881 	vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2882 	vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2883 
2884 out:
2885 	if (name != NULL && name != args->from.name)
2886 		kmem_free(name, MAXPATHLEN + 1);
2887 	if (toname != NULL && toname != args->to.name)
2888 		kmem_free(toname, MAXPATHLEN + 1);
2889 
2890 	DTRACE_NFSV3_4(op__rename__done, struct svc_req *, req,
2891 	    cred_t *, cr, vnode_t *, fvp, RENAME3res *, resp);
2892 	if (fvp != NULL)
2893 		VN_RELE(fvp);
2894 	if (tvp != NULL)
2895 		VN_RELE(tvp);
2896 }
2897 
2898 void *
2899 rfs3_rename_getfh(RENAME3args *args)
2900 {
2901 
2902 	return (&args->from.dir);
2903 }
2904 
2905 void
2906 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2907     struct svc_req *req, cred_t *cr, bool_t ro)
2908 {
2909 	int error;
2910 	vnode_t *vp;
2911 	vnode_t *dvp;
2912 	struct vattr *vap;
2913 	struct vattr va;
2914 	struct vattr *bvap;
2915 	struct vattr bva;
2916 	struct vattr *avap;
2917 	struct vattr ava;
2918 	nfs_fh3	*fh3;
2919 	struct exportinfo *to_exi;
2920 	bslabel_t *clabel;
2921 	struct sockaddr *ca;
2922 	char *name = NULL;
2923 
2924 	vap = NULL;
2925 	bvap = NULL;
2926 	avap = NULL;
2927 	dvp = NULL;
2928 
2929 	vp = nfs3_fhtovp(&args->file, exi);
2930 
2931 	DTRACE_NFSV3_4(op__link__start, struct svc_req *, req,
2932 	    cred_t *, cr, vnode_t *, vp, LINK3args *, args);
2933 
2934 	if (vp == NULL) {
2935 		error = ESTALE;
2936 		goto out;
2937 	}
2938 
2939 	va.va_mask = AT_ALL;
2940 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2941 
2942 	fh3 = &args->link.dir;
2943 	to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2944 	if (to_exi == NULL) {
2945 		resp->status = NFS3ERR_ACCES;
2946 		goto out1;
2947 	}
2948 	exi_rele(to_exi);
2949 
2950 	if (to_exi != exi) {
2951 		resp->status = NFS3ERR_XDEV;
2952 		goto out1;
2953 	}
2954 
2955 	if (is_system_labeled()) {
2956 		clabel = req->rq_label;
2957 
2958 		ASSERT(clabel != NULL);
2959 		DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
2960 		    "got client label from request(1)", struct svc_req *, req);
2961 
2962 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2963 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2964 			    exi)) {
2965 				resp->status = NFS3ERR_ACCES;
2966 				goto out1;
2967 			}
2968 		}
2969 	}
2970 
2971 	dvp = nfs3_fhtovp(&args->link.dir, exi);
2972 	if (dvp == NULL) {
2973 		error = ESTALE;
2974 		goto out;
2975 	}
2976 
2977 	bva.va_mask = AT_ALL;
2978 	bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
2979 
2980 	if (dvp->v_type != VDIR) {
2981 		resp->status = NFS3ERR_NOTDIR;
2982 		goto out1;
2983 	}
2984 
2985 	if (args->link.name == nfs3nametoolong) {
2986 		resp->status = NFS3ERR_NAMETOOLONG;
2987 		goto out1;
2988 	}
2989 
2990 	if (args->link.name == NULL || *(args->link.name) == '\0') {
2991 		resp->status = NFS3ERR_ACCES;
2992 		goto out1;
2993 	}
2994 
2995 	if (rdonly(ro, dvp)) {
2996 		resp->status = NFS3ERR_ROFS;
2997 		goto out1;
2998 	}
2999 
3000 	if (is_system_labeled()) {
3001 		DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
3002 		    "got client label from request(1)", struct svc_req *, req);
3003 
3004 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3005 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
3006 			    exi)) {
3007 				resp->status = NFS3ERR_ACCES;
3008 				goto out1;
3009 			}
3010 		}
3011 	}
3012 
3013 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3014 	name = nfscmd_convname(ca, exi, args->link.name,
3015 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3016 
3017 	if (name == NULL) {
3018 		resp->status = NFS3ERR_SERVERFAULT;
3019 		goto out1;
3020 	}
3021 
3022 	error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3023 
3024 	va.va_mask = AT_ALL;
3025 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3026 	ava.va_mask = AT_ALL;
3027 	avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3028 
3029 	/*
3030 	 * Force modified data and metadata out to stable storage.
3031 	 */
3032 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3033 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
3034 
3035 	if (error)
3036 		goto out;
3037 
3038 	VN_RELE(dvp);
3039 
3040 	resp->status = NFS3_OK;
3041 	vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3042 	vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3043 
3044 	DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3045 	    cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3046 
3047 	VN_RELE(vp);
3048 
3049 	return;
3050 
3051 out:
3052 	if (curthread->t_flag & T_WOULDBLOCK) {
3053 		curthread->t_flag &= ~T_WOULDBLOCK;
3054 		resp->status = NFS3ERR_JUKEBOX;
3055 	} else
3056 		resp->status = puterrno3(error);
3057 out1:
3058 	if (name != NULL && name != args->link.name)
3059 		kmem_free(name, MAXPATHLEN + 1);
3060 
3061 	DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3062 	    cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3063 
3064 	if (vp != NULL)
3065 		VN_RELE(vp);
3066 	if (dvp != NULL)
3067 		VN_RELE(dvp);
3068 	vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3069 	vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3070 }
3071 
3072 void *
3073 rfs3_link_getfh(LINK3args *args)
3074 {
3075 
3076 	return (&args->file);
3077 }
3078 
3079 /*
3080  * This macro defines the size of a response which contains attribute
3081  * information and one directory entry (whose length is specified by
3082  * the macro parameter).  If the incoming request is larger than this,
3083  * then we are guaranteed to be able to return at one directory entry
3084  * if one exists.  Therefore, we do not need to check for
3085  * NFS3ERR_TOOSMALL if the requested size is larger then this.  If it
3086  * is not, then we need to check to make sure that this error does not
3087  * need to be returned.
3088  *
3089  * NFS3_READDIR_MIN_COUNT is comprised of following :
3090  *
3091  * status - 1 * BYTES_PER_XDR_UNIT
3092  * attr. flag - 1 * BYTES_PER_XDR_UNIT
3093  * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3094  * attributes  - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3095  * boolean - 1 * BYTES_PER_XDR_UNIT
3096  * file id - 2 * BYTES_PER_XDR_UNIT
3097  * directory name length - 1 * BYTES_PER_XDR_UNIT
3098  * cookie - 2 * BYTES_PER_XDR_UNIT
3099  * end of list - 1 * BYTES_PER_XDR_UNIT
3100  * end of file - 1 * BYTES_PER_XDR_UNIT
3101  * Name length of directory to the nearest byte
3102  */
3103 
3104 #define	NFS3_READDIR_MIN_COUNT(length)	\
3105 	((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3106 		BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3107 
3108 /* ARGSUSED */
3109 void
3110 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3111     struct svc_req *req, cred_t *cr, bool_t ro)
3112 {
3113 	int error;
3114 	vnode_t *vp;
3115 	struct vattr *vap;
3116 	struct vattr va;
3117 	struct iovec iov;
3118 	struct uio uio;
3119 	char *data;
3120 	int iseof;
3121 	int bufsize;
3122 	int namlen;
3123 	uint_t count;
3124 	struct sockaddr *ca;
3125 
3126 	vap = NULL;
3127 
3128 	vp = nfs3_fhtovp(&args->dir, exi);
3129 
3130 	DTRACE_NFSV3_4(op__readdir__start, struct svc_req *, req,
3131 	    cred_t *, cr, vnode_t *, vp, READDIR3args *, args);
3132 
3133 	if (vp == NULL) {
3134 		error = ESTALE;
3135 		goto out;
3136 	}
3137 
3138 	if (is_system_labeled()) {
3139 		bslabel_t *clabel = req->rq_label;
3140 
3141 		ASSERT(clabel != NULL);
3142 		DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3143 		    "got client label from request(1)", struct svc_req *, req);
3144 
3145 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3146 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3147 			    exi)) {
3148 				resp->status = NFS3ERR_ACCES;
3149 				goto out1;
3150 			}
3151 		}
3152 	}
3153 
3154 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3155 
3156 	va.va_mask = AT_ALL;
3157 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3158 
3159 	if (vp->v_type != VDIR) {
3160 		resp->status = NFS3ERR_NOTDIR;
3161 		goto out1;
3162 	}
3163 
3164 	error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3165 	if (error)
3166 		goto out;
3167 
3168 	/*
3169 	 * Now don't allow arbitrary count to alloc;
3170 	 * allow the maximum not to exceed rfs3_tsize()
3171 	 */
3172 	if (args->count > rfs3_tsize(req))
3173 		args->count = rfs3_tsize(req);
3174 
3175 	/*
3176 	 * Make sure that there is room to read at least one entry
3177 	 * if any are available.
3178 	 */
3179 	if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3180 		count = DIRENT64_RECLEN(MAXNAMELEN);
3181 	else
3182 		count = args->count;
3183 
3184 	data = kmem_alloc(count, KM_SLEEP);
3185 
3186 	iov.iov_base = data;
3187 	iov.iov_len = count;
3188 	uio.uio_iov = &iov;
3189 	uio.uio_iovcnt = 1;
3190 	uio.uio_segflg = UIO_SYSSPACE;
3191 	uio.uio_extflg = UIO_COPY_CACHED;
3192 	uio.uio_loffset = (offset_t)args->cookie;
3193 	uio.uio_resid = count;
3194 
3195 	error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3196 
3197 	va.va_mask = AT_ALL;
3198 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3199 
3200 	if (error) {
3201 		kmem_free(data, count);
3202 		goto out;
3203 	}
3204 
3205 	/*
3206 	 * If the count was not large enough to be able to guarantee
3207 	 * to be able to return at least one entry, then need to
3208 	 * check to see if NFS3ERR_TOOSMALL should be returned.
3209 	 */
3210 	if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3211 		/*
3212 		 * bufsize is used to keep track of the size of the response.
3213 		 * It is primed with:
3214 		 *	1 for the status +
3215 		 *	1 for the dir_attributes.attributes boolean +
3216 		 *	2 for the cookie verifier
3217 		 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3218 		 * to bytes.  If there are directory attributes to be
3219 		 * returned, then:
3220 		 *	NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3221 		 * time BYTES_PER_XDR_UNIT is added to account for them.
3222 		 */
3223 		bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3224 		if (vap != NULL)
3225 			bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3226 		/*
3227 		 * An entry is composed of:
3228 		 *	1 for the true/false list indicator +
3229 		 *	2 for the fileid +
3230 		 *	1 for the length of the name +
3231 		 *	2 for the cookie +
3232 		 * all times BYTES_PER_XDR_UNIT to convert from
3233 		 * XDR units to bytes, plus the length of the name
3234 		 * rounded up to the nearest BYTES_PER_XDR_UNIT.
3235 		 */
3236 		if (count != uio.uio_resid) {
3237 			namlen = strlen(((struct dirent64 *)data)->d_name);
3238 			bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3239 			    roundup(namlen, BYTES_PER_XDR_UNIT);
3240 		}
3241 		/*
3242 		 * We need to check to see if the number of bytes left
3243 		 * to go into the buffer will actually fit into the
3244 		 * buffer.  This is calculated as the size of this
3245 		 * entry plus:
3246 		 *	1 for the true/false list indicator +
3247 		 *	1 for the eof indicator
3248 		 * times BYTES_PER_XDR_UNIT to convert from from
3249 		 * XDR units to bytes.
3250 		 */
3251 		bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3252 		if (bufsize > args->count) {
3253 			kmem_free(data, count);
3254 			resp->status = NFS3ERR_TOOSMALL;
3255 			goto out1;
3256 		}
3257 	}
3258 
3259 	/*
3260 	 * Have a valid readir buffer for the native character
3261 	 * set. Need to check if a conversion is necessary and
3262 	 * potentially rewrite the whole buffer. Note that if the
3263 	 * conversion expands names enough, the structure may not
3264 	 * fit. In this case, we need to drop entries until if fits
3265 	 * and patch the counts in order that the next readdir will
3266 	 * get the correct entries.
3267 	 */
3268 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3269 	data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
3270 
3271 
3272 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3273 
3274 #if 0 /* notyet */
3275 	/*
3276 	 * Don't do this.  It causes local disk writes when just
3277 	 * reading the file and the overhead is deemed larger
3278 	 * than the benefit.
3279 	 */
3280 	/*
3281 	 * Force modified metadata out to stable storage.
3282 	 */
3283 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3284 #endif
3285 
3286 	resp->status = NFS3_OK;
3287 	vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3288 	resp->resok.cookieverf = 0;
3289 	resp->resok.reply.entries = (entry3 *)data;
3290 	resp->resok.reply.eof = iseof;
3291 	resp->resok.size = count - uio.uio_resid;
3292 	resp->resok.count = args->count;
3293 	resp->resok.freecount = count;
3294 
3295 	DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3296 	    cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3297 
3298 	VN_RELE(vp);
3299 
3300 	return;
3301 
3302 out:
3303 	if (curthread->t_flag & T_WOULDBLOCK) {
3304 		curthread->t_flag &= ~T_WOULDBLOCK;
3305 		resp->status = NFS3ERR_JUKEBOX;
3306 	} else
3307 		resp->status = puterrno3(error);
3308 out1:
3309 	DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3310 	    cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3311 
3312 	if (vp != NULL) {
3313 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3314 		VN_RELE(vp);
3315 	}
3316 	vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3317 }
3318 
3319 void *
3320 rfs3_readdir_getfh(READDIR3args *args)
3321 {
3322 
3323 	return (&args->dir);
3324 }
3325 
3326 void
3327 rfs3_readdir_free(READDIR3res *resp)
3328 {
3329 
3330 	if (resp->status == NFS3_OK)
3331 		kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3332 }
3333 
3334 #ifdef nextdp
3335 #undef nextdp
3336 #endif
3337 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3338 
3339 /*
3340  * This macro computes the size of a response which contains
3341  * one directory entry including the attributes as well as file handle.
3342  * If the incoming request is larger than this, then we are guaranteed to be
3343  * able to return at least one more directory entry if one exists.
3344  *
3345  * NFS3_READDIRPLUS_ENTRY is made up of the following:
3346  *
3347  * boolean - 1 * BYTES_PER_XDR_UNIT
3348  * file id - 2 * BYTES_PER_XDR_UNIT
3349  * directory name length - 1 * BYTES_PER_XDR_UNIT
3350  * cookie - 2 * BYTES_PER_XDR_UNIT
3351  * attribute flag - 1 * BYTES_PER_XDR_UNIT
3352  * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3353  * status byte for file handle - 1 *  BYTES_PER_XDR_UNIT
3354  * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3355  * Maximum length of a file handle (NFS3_MAXFHSIZE)
3356  * name length of the entry to the nearest bytes
3357  */
3358 #define	NFS3_READDIRPLUS_ENTRY(namelen)	\
3359 	((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3360 		BYTES_PER_XDR_UNIT + \
3361 	NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3362 
3363 static int rfs3_readdir_unit = MAXBSIZE;
3364 
3365 /* ARGSUSED */
3366 void
3367 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3368     struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro)
3369 {
3370 	int error;
3371 	vnode_t *vp;
3372 	struct vattr *vap;
3373 	struct vattr va;
3374 	struct iovec iov;
3375 	struct uio uio;
3376 	char *data;
3377 	int iseof;
3378 	struct dirent64 *dp;
3379 	vnode_t *nvp;
3380 	struct vattr *nvap;
3381 	struct vattr nva;
3382 	entryplus3_info *infop = NULL;
3383 	int size = 0;
3384 	int nents = 0;
3385 	int bufsize = 0;
3386 	int entrysize = 0;
3387 	int tofit = 0;
3388 	int rd_unit = rfs3_readdir_unit;
3389 	int prev_len;
3390 	int space_left;
3391 	int i;
3392 	uint_t *namlen = NULL;
3393 	char *ndata = NULL;
3394 	struct sockaddr *ca;
3395 	size_t ret;
3396 
3397 	vap = NULL;
3398 
3399 	vp = nfs3_fhtovp(&args->dir, exi);
3400 
3401 	DTRACE_NFSV3_4(op__readdirplus__start, struct svc_req *, req,
3402 	    cred_t *, cr, vnode_t *, vp, READDIRPLUS3args *, args);
3403 
3404 	if (vp == NULL) {
3405 		error = ESTALE;
3406 		goto out;
3407 	}
3408 
3409 	if (is_system_labeled()) {
3410 		bslabel_t *clabel = req->rq_label;
3411 
3412 		ASSERT(clabel != NULL);
3413 		DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3414 		    char *, "got client label from request(1)",
3415 		    struct svc_req *, req);
3416 
3417 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3418 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3419 			    exi)) {
3420 				resp->status = NFS3ERR_ACCES;
3421 				goto out1;
3422 			}
3423 		}
3424 	}
3425 
3426 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3427 
3428 	va.va_mask = AT_ALL;
3429 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3430 
3431 	if (vp->v_type != VDIR) {
3432 		error = ENOTDIR;
3433 		goto out;
3434 	}
3435 
3436 	error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3437 	if (error)
3438 		goto out;
3439 
3440 	/*
3441 	 * Don't allow arbitrary counts for allocation
3442 	 */
3443 	if (args->maxcount > rfs3_tsize(req))
3444 		args->maxcount = rfs3_tsize(req);
3445 
3446 	/*
3447 	 * Make sure that there is room to read at least one entry
3448 	 * if any are available
3449 	 */
3450 	args->dircount = MIN(args->dircount, args->maxcount);
3451 
3452 	if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3453 		args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3454 
3455 	/*
3456 	 * This allocation relies on a minimum directory entry
3457 	 * being roughly 24 bytes.  Therefore, the namlen array
3458 	 * will have enough space based on the maximum number of
3459 	 * entries to read.
3460 	 */
3461 	namlen = kmem_alloc(args->dircount, KM_SLEEP);
3462 
3463 	space_left = args->dircount;
3464 	data = kmem_alloc(args->dircount, KM_SLEEP);
3465 	dp = (struct dirent64 *)data;
3466 	uio.uio_iov = &iov;
3467 	uio.uio_iovcnt = 1;
3468 	uio.uio_segflg = UIO_SYSSPACE;
3469 	uio.uio_extflg = UIO_COPY_CACHED;
3470 	uio.uio_loffset = (offset_t)args->cookie;
3471 
3472 	/*
3473 	 * bufsize is used to keep track of the size of the response as we
3474 	 * get post op attributes and filehandles for each entry.  This is
3475 	 * an optimization as the server may have read more entries than will
3476 	 * fit in the buffer specified by maxcount.  We stop calculating
3477 	 * post op attributes and filehandles once we have exceeded maxcount.
3478 	 * This will minimize the effect of truncation.
3479 	 *
3480 	 * It is primed with:
3481 	 *	1 for the status +
3482 	 *	1 for the dir_attributes.attributes boolean +
3483 	 *	2 for the cookie verifier
3484 	 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3485 	 * to bytes.  If there are directory attributes to be
3486 	 * returned, then:
3487 	 *	NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3488 	 * time BYTES_PER_XDR_UNIT is added to account for them.
3489 	 */
3490 	bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3491 	if (vap != NULL)
3492 		bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3493 
3494 getmoredents:
3495 	/*
3496 	 * Here we make a check so that our read unit is not larger than
3497 	 * the space left in the buffer.
3498 	 */
3499 	rd_unit = MIN(rd_unit, space_left);
3500 	iov.iov_base = (char *)dp;
3501 	iov.iov_len = rd_unit;
3502 	uio.uio_resid = rd_unit;
3503 	prev_len = rd_unit;
3504 
3505 	error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3506 
3507 	if (error) {
3508 		kmem_free(data, args->dircount);
3509 		goto out;
3510 	}
3511 
3512 	if (uio.uio_resid == prev_len && !iseof) {
3513 		if (nents == 0) {
3514 			kmem_free(data, args->dircount);
3515 			resp->status = NFS3ERR_TOOSMALL;
3516 			goto out1;
3517 		}
3518 
3519 		/*
3520 		 * We could not get any more entries, so get the attributes
3521 		 * and filehandle for the entries already obtained.
3522 		 */
3523 		goto good;
3524 	}
3525 
3526 	/*
3527 	 * We estimate the size of the response by assuming the
3528 	 * entry exists and attributes and filehandle are also valid
3529 	 */
3530 	for (size = prev_len - uio.uio_resid;
3531 	    size > 0;
3532 	    size -= dp->d_reclen, dp = nextdp(dp)) {
3533 
3534 		if (dp->d_ino == 0) {
3535 			nents++;
3536 			continue;
3537 		}
3538 
3539 		namlen[nents] = strlen(dp->d_name);
3540 		entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3541 
3542 		/*
3543 		 * We need to check to see if the number of bytes left
3544 		 * to go into the buffer will actually fit into the
3545 		 * buffer.  This is calculated as the size of this
3546 		 * entry plus:
3547 		 *	1 for the true/false list indicator +
3548 		 *	1 for the eof indicator
3549 		 * times BYTES_PER_XDR_UNIT to convert from XDR units
3550 		 * to bytes.
3551 		 *
3552 		 * Also check the dircount limit against the first entry read
3553 		 *
3554 		 */
3555 		tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3556 		if (bufsize + tofit > args->maxcount) {
3557 			/*
3558 			 * We make a check here to see if this was the
3559 			 * first entry being measured.  If so, then maxcount
3560 			 * was too small to begin with and so we need to
3561 			 * return with NFS3ERR_TOOSMALL.
3562 			 */
3563 			if (nents == 0) {
3564 				kmem_free(data, args->dircount);
3565 				resp->status = NFS3ERR_TOOSMALL;
3566 				goto out1;
3567 			}
3568 			iseof = FALSE;
3569 			goto good;
3570 		}
3571 		bufsize += entrysize;
3572 		nents++;
3573 	}
3574 
3575 	/*
3576 	 * If there is enough room to fit at least 1 more entry including
3577 	 * post op attributes and filehandle in the buffer AND that we haven't
3578 	 * exceeded dircount then go back and get some more.
3579 	 */
3580 	if (!iseof &&
3581 	    (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3582 		space_left -= (prev_len - uio.uio_resid);
3583 		if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3584 			goto getmoredents;
3585 
3586 		/* else, fall through */
3587 	}
3588 good:
3589 	va.va_mask = AT_ALL;
3590 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3591 
3592 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3593 
3594 	infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3595 	resp->resok.infop = infop;
3596 
3597 	dp = (struct dirent64 *)data;
3598 	for (i = 0; i < nents; i++) {
3599 
3600 		if (dp->d_ino == 0) {
3601 			infop[i].attr.attributes = FALSE;
3602 			infop[i].fh.handle_follows = FALSE;
3603 			dp = nextdp(dp);
3604 			continue;
3605 		}
3606 
3607 		infop[i].namelen = namlen[i];
3608 
3609 		error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3610 		    NULL, NULL, NULL);
3611 		if (error) {
3612 			infop[i].attr.attributes = FALSE;
3613 			infop[i].fh.handle_follows = FALSE;
3614 			dp = nextdp(dp);
3615 			continue;
3616 		}
3617 
3618 		nva.va_mask = AT_ALL;
3619 		nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3620 
3621 		/* Lie about the object type for a referral */
3622 		if (vn_is_nfs_reparse(nvp, cr))
3623 			nvap->va_type = VLNK;
3624 
3625 		if (vn_ismntpt(nvp)) {
3626 			infop[i].attr.attributes = FALSE;
3627 			infop[i].fh.handle_follows = FALSE;
3628 		} else {
3629 			vattr_to_post_op_attr(nvap, &infop[i].attr);
3630 
3631 			error = makefh3(&infop[i].fh.handle, nvp, exi);
3632 			if (!error)
3633 				infop[i].fh.handle_follows = TRUE;
3634 			else
3635 				infop[i].fh.handle_follows = FALSE;
3636 		}
3637 
3638 		VN_RELE(nvp);
3639 		dp = nextdp(dp);
3640 	}
3641 
3642 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3643 	ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3644 	if (ndata == NULL)
3645 		ndata = data;
3646 
3647 	if (ret > 0) {
3648 		/*
3649 		 * We had to drop one or more entries in order to fit
3650 		 * during the character conversion.  We need to patch
3651 		 * up the size and eof info.
3652 		 */
3653 		if (iseof)
3654 			iseof = FALSE;
3655 
3656 		ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3657 		    nents, ret);
3658 	}
3659 
3660 
3661 #if 0 /* notyet */
3662 	/*
3663 	 * Don't do this.  It causes local disk writes when just
3664 	 * reading the file and the overhead is deemed larger
3665 	 * than the benefit.
3666 	 */
3667 	/*
3668 	 * Force modified metadata out to stable storage.
3669 	 */
3670 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3671 #endif
3672 
3673 	kmem_free(namlen, args->dircount);
3674 
3675 	resp->status = NFS3_OK;
3676 	vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3677 	resp->resok.cookieverf = 0;
3678 	resp->resok.reply.entries = (entryplus3 *)ndata;
3679 	resp->resok.reply.eof = iseof;
3680 	resp->resok.size = nents;
3681 	resp->resok.count = args->dircount - ret;
3682 	resp->resok.maxcount = args->maxcount;
3683 
3684 	DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3685 	    cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3686 	if (ndata != data)
3687 		kmem_free(data, args->dircount);
3688 
3689 
3690 	VN_RELE(vp);
3691 
3692 	return;
3693 
3694 out:
3695 	if (curthread->t_flag & T_WOULDBLOCK) {
3696 		curthread->t_flag &= ~T_WOULDBLOCK;
3697 		resp->status = NFS3ERR_JUKEBOX;
3698 	} else {
3699 		resp->status = puterrno3(error);
3700 	}
3701 out1:
3702 	DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3703 	    cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3704 
3705 	if (vp != NULL) {
3706 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3707 		VN_RELE(vp);
3708 	}
3709 
3710 	if (namlen != NULL)
3711 		kmem_free(namlen, args->dircount);
3712 
3713 	vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3714 }
3715 
3716 void *
3717 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3718 {
3719 
3720 	return (&args->dir);
3721 }
3722 
3723 void
3724 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3725 {
3726 
3727 	if (resp->status == NFS3_OK) {
3728 		kmem_free(resp->resok.reply.entries, resp->resok.count);
3729 		kmem_free(resp->resok.infop,
3730 		    resp->resok.size * sizeof (struct entryplus3_info));
3731 	}
3732 }
3733 
3734 /* ARGSUSED */
3735 void
3736 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3737     struct svc_req *req, cred_t *cr, bool_t ro)
3738 {
3739 	int error;
3740 	vnode_t *vp;
3741 	struct vattr *vap;
3742 	struct vattr va;
3743 	struct statvfs64 sb;
3744 
3745 	vap = NULL;
3746 
3747 	vp = nfs3_fhtovp(&args->fsroot, exi);
3748 
3749 	DTRACE_NFSV3_4(op__fsstat__start, struct svc_req *, req,
3750 	    cred_t *, cr, vnode_t *, vp, FSSTAT3args *, args);
3751 
3752 	if (vp == NULL) {
3753 		error = ESTALE;
3754 		goto out;
3755 	}
3756 
3757 	if (is_system_labeled()) {
3758 		bslabel_t *clabel = req->rq_label;
3759 
3760 		ASSERT(clabel != NULL);
3761 		DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3762 		    "got client label from request(1)", struct svc_req *, req);
3763 
3764 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3765 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3766 			    exi)) {
3767 				resp->status = NFS3ERR_ACCES;
3768 				goto out1;
3769 			}
3770 		}
3771 	}
3772 
3773 	error = VFS_STATVFS(vp->v_vfsp, &sb);
3774 
3775 	va.va_mask = AT_ALL;
3776 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3777 
3778 	if (error)
3779 		goto out;
3780 
3781 	resp->status = NFS3_OK;
3782 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3783 	if (sb.f_blocks != (fsblkcnt64_t)-1)
3784 		resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3785 	else
3786 		resp->resok.tbytes = (size3)sb.f_blocks;
3787 	if (sb.f_bfree != (fsblkcnt64_t)-1)
3788 		resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3789 	else
3790 		resp->resok.fbytes = (size3)sb.f_bfree;
3791 	if (sb.f_bavail != (fsblkcnt64_t)-1)
3792 		resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3793 	else
3794 		resp->resok.abytes = (size3)sb.f_bavail;
3795 	resp->resok.tfiles = (size3)sb.f_files;
3796 	resp->resok.ffiles = (size3)sb.f_ffree;
3797 	resp->resok.afiles = (size3)sb.f_favail;
3798 	resp->resok.invarsec = 0;
3799 
3800 	DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3801 	    cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3802 	VN_RELE(vp);
3803 
3804 	return;
3805 
3806 out:
3807 	if (curthread->t_flag & T_WOULDBLOCK) {
3808 		curthread->t_flag &= ~T_WOULDBLOCK;
3809 		resp->status = NFS3ERR_JUKEBOX;
3810 	} else
3811 		resp->status = puterrno3(error);
3812 out1:
3813 	DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3814 	    cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3815 
3816 	if (vp != NULL)
3817 		VN_RELE(vp);
3818 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3819 }
3820 
3821 void *
3822 rfs3_fsstat_getfh(FSSTAT3args *args)
3823 {
3824 
3825 	return (&args->fsroot);
3826 }
3827 
3828 /* ARGSUSED */
3829 void
3830 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3831     struct svc_req *req, cred_t *cr, bool_t ro)
3832 {
3833 	vnode_t *vp;
3834 	struct vattr *vap;
3835 	struct vattr va;
3836 	uint32_t xfer_size;
3837 	ulong_t l = 0;
3838 	int error;
3839 
3840 	vp = nfs3_fhtovp(&args->fsroot, exi);
3841 
3842 	DTRACE_NFSV3_4(op__fsinfo__start, struct svc_req *, req,
3843 	    cred_t *, cr, vnode_t *, vp, FSINFO3args *, args);
3844 
3845 	if (vp == NULL) {
3846 		if (curthread->t_flag & T_WOULDBLOCK) {
3847 			curthread->t_flag &= ~T_WOULDBLOCK;
3848 			resp->status = NFS3ERR_JUKEBOX;
3849 		} else
3850 			resp->status = NFS3ERR_STALE;
3851 		vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3852 		goto out;
3853 	}
3854 
3855 	if (is_system_labeled()) {
3856 		bslabel_t *clabel = req->rq_label;
3857 
3858 		ASSERT(clabel != NULL);
3859 		DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3860 		    "got client label from request(1)", struct svc_req *, req);
3861 
3862 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3863 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3864 			    exi)) {
3865 				resp->status = NFS3ERR_STALE;
3866 				vattr_to_post_op_attr(NULL,
3867 				    &resp->resfail.obj_attributes);
3868 				goto out;
3869 			}
3870 		}
3871 	}
3872 
3873 	va.va_mask = AT_ALL;
3874 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3875 
3876 	resp->status = NFS3_OK;
3877 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3878 	xfer_size = rfs3_tsize(req);
3879 	resp->resok.rtmax = xfer_size;
3880 	resp->resok.rtpref = xfer_size;
3881 	resp->resok.rtmult = DEV_BSIZE;
3882 	resp->resok.wtmax = xfer_size;
3883 	resp->resok.wtpref = xfer_size;
3884 	resp->resok.wtmult = DEV_BSIZE;
3885 	resp->resok.dtpref = MAXBSIZE;
3886 
3887 	/*
3888 	 * Large file spec: want maxfilesize based on limit of
3889 	 * underlying filesystem.  We can guess 2^31-1 if need be.
3890 	 */
3891 	error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3892 	if (error) {
3893 		resp->status = puterrno3(error);
3894 		goto out;
3895 	}
3896 
3897 	/*
3898 	 * If the underlying file system does not support _PC_FILESIZEBITS,
3899 	 * return a reasonable default. Note that error code on VOP_PATHCONF
3900 	 * will be 0, even if the underlying file system does not support
3901 	 * _PC_FILESIZEBITS.
3902 	 */
3903 	if (l == (ulong_t)-1) {
3904 		resp->resok.maxfilesize = MAXOFF32_T;
3905 	} else {
3906 		if (l >= (sizeof (uint64_t) * 8))
3907 			resp->resok.maxfilesize = INT64_MAX;
3908 		else
3909 			resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3910 	}
3911 
3912 	resp->resok.time_delta.seconds = 0;
3913 	resp->resok.time_delta.nseconds = 1000;
3914 	resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3915 	    FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3916 
3917 	DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3918 	    cred_t *, cr, vnode_t *, vp, FSINFO3res *, resp);
3919 
3920 	VN_RELE(vp);
3921 
3922 	return;
3923 
3924 out:
3925 	DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3926 	    cred_t *, cr, vnode_t *, NULL, FSINFO3res *, resp);
3927 	if (vp != NULL)
3928 		VN_RELE(vp);
3929 }
3930 
3931 void *
3932 rfs3_fsinfo_getfh(FSINFO3args *args)
3933 {
3934 	return (&args->fsroot);
3935 }
3936 
3937 /* ARGSUSED */
3938 void
3939 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
3940     struct svc_req *req, cred_t *cr, bool_t ro)
3941 {
3942 	int error;
3943 	vnode_t *vp;
3944 	struct vattr *vap;
3945 	struct vattr va;
3946 	ulong_t val;
3947 
3948 	vap = NULL;
3949 
3950 	vp = nfs3_fhtovp(&args->object, exi);
3951 
3952 	DTRACE_NFSV3_4(op__pathconf__start, struct svc_req *, req,
3953 	    cred_t *, cr, vnode_t *, vp, PATHCONF3args *, args);
3954 
3955 	if (vp == NULL) {
3956 		error = ESTALE;
3957 		goto out;
3958 	}
3959 
3960 	if (is_system_labeled()) {
3961 		bslabel_t *clabel = req->rq_label;
3962 
3963 		ASSERT(clabel != NULL);
3964 		DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
3965 		    "got client label from request(1)", struct svc_req *, req);
3966 
3967 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3968 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3969 			    exi)) {
3970 				resp->status = NFS3ERR_ACCES;
3971 				goto out1;
3972 			}
3973 		}
3974 	}
3975 
3976 	va.va_mask = AT_ALL;
3977 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3978 
3979 	error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
3980 	if (error)
3981 		goto out;
3982 	resp->resok.info.link_max = (uint32)val;
3983 
3984 	error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
3985 	if (error)
3986 		goto out;
3987 	resp->resok.info.name_max = (uint32)val;
3988 
3989 	error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
3990 	if (error)
3991 		goto out;
3992 	if (val == 1)
3993 		resp->resok.info.no_trunc = TRUE;
3994 	else
3995 		resp->resok.info.no_trunc = FALSE;
3996 
3997 	error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
3998 	if (error)
3999 		goto out;
4000 	if (val == 1)
4001 		resp->resok.info.chown_restricted = TRUE;
4002 	else
4003 		resp->resok.info.chown_restricted = FALSE;
4004 
4005 	resp->status = NFS3_OK;
4006 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
4007 	resp->resok.info.case_insensitive = FALSE;
4008 	resp->resok.info.case_preserving = TRUE;
4009 	DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
4010 	    cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
4011 	VN_RELE(vp);
4012 	return;
4013 
4014 out:
4015 	if (curthread->t_flag & T_WOULDBLOCK) {
4016 		curthread->t_flag &= ~T_WOULDBLOCK;
4017 		resp->status = NFS3ERR_JUKEBOX;
4018 	} else
4019 		resp->status = puterrno3(error);
4020 out1:
4021 	DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
4022 	    cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
4023 	if (vp != NULL)
4024 		VN_RELE(vp);
4025 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4026 }
4027 
4028 void *
4029 rfs3_pathconf_getfh(PATHCONF3args *args)
4030 {
4031 
4032 	return (&args->object);
4033 }
4034 
4035 void
4036 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4037     struct svc_req *req, cred_t *cr, bool_t ro)
4038 {
4039 	int error;
4040 	vnode_t *vp;
4041 	struct vattr *bvap;
4042 	struct vattr bva;
4043 	struct vattr *avap;
4044 	struct vattr ava;
4045 
4046 	bvap = NULL;
4047 	avap = NULL;
4048 
4049 	vp = nfs3_fhtovp(&args->file, exi);
4050 
4051 	DTRACE_NFSV3_4(op__commit__start, struct svc_req *, req,
4052 	    cred_t *, cr, vnode_t *, vp, COMMIT3args *, args);
4053 
4054 	if (vp == NULL) {
4055 		error = ESTALE;
4056 		goto out;
4057 	}
4058 
4059 	bva.va_mask = AT_ALL;
4060 	error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4061 
4062 	/*
4063 	 * If we can't get the attributes, then we can't do the
4064 	 * right access checking.  So, we'll fail the request.
4065 	 */
4066 	if (error)
4067 		goto out;
4068 
4069 	bvap = &bva;
4070 
4071 	if (rdonly(ro, vp)) {
4072 		resp->status = NFS3ERR_ROFS;
4073 		goto out1;
4074 	}
4075 
4076 	if (vp->v_type != VREG) {
4077 		resp->status = NFS3ERR_INVAL;
4078 		goto out1;
4079 	}
4080 
4081 	if (is_system_labeled()) {
4082 		bslabel_t *clabel = req->rq_label;
4083 
4084 		ASSERT(clabel != NULL);
4085 		DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4086 		    "got client label from request(1)", struct svc_req *, req);
4087 
4088 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
4089 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4090 			    exi)) {
4091 				resp->status = NFS3ERR_ACCES;
4092 				goto out1;
4093 			}
4094 		}
4095 	}
4096 
4097 	if (crgetuid(cr) != bva.va_uid &&
4098 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4099 		goto out;
4100 
4101 	error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4102 
4103 	ava.va_mask = AT_ALL;
4104 	avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4105 
4106 	if (error)
4107 		goto out;
4108 
4109 	resp->status = NFS3_OK;
4110 	vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4111 	resp->resok.verf = write3verf;
4112 
4113 	DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4114 	    cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4115 
4116 	VN_RELE(vp);
4117 
4118 	return;
4119 
4120 out:
4121 	if (curthread->t_flag & T_WOULDBLOCK) {
4122 		curthread->t_flag &= ~T_WOULDBLOCK;
4123 		resp->status = NFS3ERR_JUKEBOX;
4124 	} else
4125 		resp->status = puterrno3(error);
4126 out1:
4127 	DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4128 	    cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4129 
4130 	if (vp != NULL)
4131 		VN_RELE(vp);
4132 	vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4133 }
4134 
4135 void *
4136 rfs3_commit_getfh(COMMIT3args *args)
4137 {
4138 
4139 	return (&args->file);
4140 }
4141 
4142 static int
4143 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4144 {
4145 
4146 	vap->va_mask = 0;
4147 
4148 	if (sap->mode.set_it) {
4149 		vap->va_mode = (mode_t)sap->mode.mode;
4150 		vap->va_mask |= AT_MODE;
4151 	}
4152 	if (sap->uid.set_it) {
4153 		vap->va_uid = (uid_t)sap->uid.uid;
4154 		vap->va_mask |= AT_UID;
4155 	}
4156 	if (sap->gid.set_it) {
4157 		vap->va_gid = (gid_t)sap->gid.gid;
4158 		vap->va_mask |= AT_GID;
4159 	}
4160 	if (sap->size.set_it) {
4161 		if (sap->size.size > (size3)((u_longlong_t)-1))
4162 			return (EINVAL);
4163 		vap->va_size = sap->size.size;
4164 		vap->va_mask |= AT_SIZE;
4165 	}
4166 	if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4167 #ifndef _LP64
4168 		/* check time validity */
4169 		if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4170 			return (EOVERFLOW);
4171 #endif
4172 		/*
4173 		 * nfs protocol defines times as unsigned so don't extend sign,
4174 		 * unless sysadmin set nfs_allow_preepoch_time.
4175 		 */
4176 		NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4177 		    sap->atime.atime.seconds);
4178 		vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4179 		vap->va_mask |= AT_ATIME;
4180 	} else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4181 		gethrestime(&vap->va_atime);
4182 		vap->va_mask |= AT_ATIME;
4183 	}
4184 	if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4185 #ifndef _LP64
4186 		/* check time validity */
4187 		if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4188 			return (EOVERFLOW);
4189 #endif
4190 		/*
4191 		 * nfs protocol defines times as unsigned so don't extend sign,
4192 		 * unless sysadmin set nfs_allow_preepoch_time.
4193 		 */
4194 		NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4195 		    sap->mtime.mtime.seconds);
4196 		vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4197 		vap->va_mask |= AT_MTIME;
4198 	} else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4199 		gethrestime(&vap->va_mtime);
4200 		vap->va_mask |= AT_MTIME;
4201 	}
4202 
4203 	return (0);
4204 }
4205 
4206 static ftype3 vt_to_nf3[] = {
4207 	0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4208 };
4209 
4210 static int
4211 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4212 {
4213 
4214 	ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4215 	/* Return error if time or size overflow */
4216 	if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4217 		return (EOVERFLOW);
4218 	}
4219 	fap->type = vt_to_nf3[vap->va_type];
4220 	fap->mode = (mode3)(vap->va_mode & MODEMASK);
4221 	fap->nlink = (uint32)vap->va_nlink;
4222 	if (vap->va_uid == UID_NOBODY)
4223 		fap->uid = (uid3)NFS_UID_NOBODY;
4224 	else
4225 		fap->uid = (uid3)vap->va_uid;
4226 	if (vap->va_gid == GID_NOBODY)
4227 		fap->gid = (gid3)NFS_GID_NOBODY;
4228 	else
4229 		fap->gid = (gid3)vap->va_gid;
4230 	fap->size = (size3)vap->va_size;
4231 	fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4232 	fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4233 	fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4234 	fap->fsid = (uint64)vap->va_fsid;
4235 	fap->fileid = (fileid3)vap->va_nodeid;
4236 	fap->atime.seconds = vap->va_atime.tv_sec;
4237 	fap->atime.nseconds = vap->va_atime.tv_nsec;
4238 	fap->mtime.seconds = vap->va_mtime.tv_sec;
4239 	fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4240 	fap->ctime.seconds = vap->va_ctime.tv_sec;
4241 	fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4242 	return (0);
4243 }
4244 
4245 static int
4246 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4247 {
4248 
4249 	/* Return error if time or size overflow */
4250 	if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4251 	    NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4252 	    NFS3_SIZE_OK(vap->va_size))) {
4253 		return (EOVERFLOW);
4254 	}
4255 	wccap->size = (size3)vap->va_size;
4256 	wccap->mtime.seconds = vap->va_mtime.tv_sec;
4257 	wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4258 	wccap->ctime.seconds = vap->va_ctime.tv_sec;
4259 	wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4260 	return (0);
4261 }
4262 
4263 static void
4264 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4265 {
4266 
4267 	/* don't return attrs if time overflow */
4268 	if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4269 		poap->attributes = TRUE;
4270 	} else
4271 		poap->attributes = FALSE;
4272 }
4273 
4274 void
4275 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4276 {
4277 
4278 	/* don't return attrs if time overflow */
4279 	if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4280 		poap->attributes = TRUE;
4281 	} else
4282 		poap->attributes = FALSE;
4283 }
4284 
4285 static void
4286 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4287 {
4288 
4289 	vattr_to_pre_op_attr(bvap, &wccp->before);
4290 	vattr_to_post_op_attr(avap, &wccp->after);
4291 }
4292 
4293 void
4294 rfs3_srvrinit(void)
4295 {
4296 	struct rfs3_verf_overlay {
4297 		uint_t id; /* a "unique" identifier */
4298 		int ts; /* a unique timestamp */
4299 	} *verfp;
4300 	timestruc_t now;
4301 
4302 	/*
4303 	 * The following algorithm attempts to find a unique verifier
4304 	 * to be used as the write verifier returned from the server
4305 	 * to the client.  It is important that this verifier change
4306 	 * whenever the server reboots.  Of secondary importance, it
4307 	 * is important for the verifier to be unique between two
4308 	 * different servers.
4309 	 *
4310 	 * Thus, an attempt is made to use the system hostid and the
4311 	 * current time in seconds when the nfssrv kernel module is
4312 	 * loaded.  It is assumed that an NFS server will not be able
4313 	 * to boot and then to reboot in less than a second.  If the
4314 	 * hostid has not been set, then the current high resolution
4315 	 * time is used.  This will ensure different verifiers each
4316 	 * time the server reboots and minimize the chances that two
4317 	 * different servers will have the same verifier.
4318 	 */
4319 
4320 #ifndef	lint
4321 	/*
4322 	 * We ASSERT that this constant logic expression is
4323 	 * always true because in the past, it wasn't.
4324 	 */
4325 	ASSERT(sizeof (*verfp) <= sizeof (write3verf));
4326 #endif
4327 
4328 	gethrestime(&now);
4329 	verfp = (struct rfs3_verf_overlay *)&write3verf;
4330 	verfp->ts = (int)now.tv_sec;
4331 	verfp->id = zone_get_hostid(NULL);
4332 
4333 	if (verfp->id == 0)
4334 		verfp->id = (uint_t)now.tv_nsec;
4335 
4336 	nfs3_srv_caller_id = fs_new_caller_id();
4337 
4338 }
4339 
4340 static int
4341 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4342 {
4343 	struct clist	*wcl;
4344 	int		wlist_len;
4345 	count3		count = rok->count;
4346 
4347 	wcl = args->wlist;
4348 	if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
4349 		return (FALSE);
4350 	}
4351 
4352 	wcl = args->wlist;
4353 	rok->wlist_len = wlist_len;
4354 	rok->wlist = wcl;
4355 	return (TRUE);
4356 }
4357 
4358 void
4359 rfs3_srvrfini(void)
4360 {
4361 	/* Nothing to do */
4362 }
4363