xref: /titanic_52/usr/src/uts/common/fs/nfs/nfs3_srv.c (revision f5c2e7ea56aaa46a9976476fb0cb1f02b9426f07)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
23  *
24  * Copyright 2012 Nexenta Systems, Inc.  All rights reserved.
25  */
26 
27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29 
30 #include <sys/param.h>
31 #include <sys/types.h>
32 #include <sys/systm.h>
33 #include <sys/cred.h>
34 #include <sys/buf.h>
35 #include <sys/vfs.h>
36 #include <sys/vnode.h>
37 #include <sys/uio.h>
38 #include <sys/errno.h>
39 #include <sys/sysmacros.h>
40 #include <sys/statvfs.h>
41 #include <sys/kmem.h>
42 #include <sys/dirent.h>
43 #include <sys/cmn_err.h>
44 #include <sys/debug.h>
45 #include <sys/systeminfo.h>
46 #include <sys/flock.h>
47 #include <sys/nbmlock.h>
48 #include <sys/policy.h>
49 #include <sys/sdt.h>
50 
51 #include <rpc/types.h>
52 #include <rpc/auth.h>
53 #include <rpc/svc.h>
54 #include <rpc/rpc_rdma.h>
55 
56 #include <nfs/nfs.h>
57 #include <nfs/export.h>
58 #include <nfs/nfs_cmd.h>
59 
60 #include <sys/strsubr.h>
61 
62 #include <sys/tsol/label.h>
63 #include <sys/tsol/tndb.h>
64 
65 #include <sys/zone.h>
66 
67 #include <inet/ip.h>
68 #include <inet/ip6.h>
69 
70 /*
71  * These are the interface routines for the server side of the
72  * Network File System.  See the NFS version 3 protocol specification
73  * for a description of this interface.
74  */
75 
76 static writeverf3 write3verf;
77 
78 static int	sattr3_to_vattr(sattr3 *, struct vattr *);
79 static int	vattr_to_fattr3(struct vattr *, fattr3 *);
80 static int	vattr_to_wcc_attr(struct vattr *, wcc_attr *);
81 static void	vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
82 static void	vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
83 static int	rdma_setup_read_data3(READ3args *, READ3resok *);
84 
85 extern int nfs_loaned_buffers;
86 
87 u_longlong_t nfs3_srv_caller_id;
88 
89 /* ARGSUSED */
90 void
91 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
92 	struct svc_req *req, cred_t *cr)
93 {
94 	int error;
95 	vnode_t *vp;
96 	struct vattr va;
97 
98 	vp = nfs3_fhtovp(&args->object, exi);
99 
100 	DTRACE_NFSV3_4(op__getattr__start, struct svc_req *, req,
101 	    cred_t *, cr, vnode_t *, vp, GETATTR3args *, args);
102 
103 	if (vp == NULL) {
104 		error = ESTALE;
105 		goto out;
106 	}
107 
108 	va.va_mask = AT_ALL;
109 	error = rfs4_delegated_getattr(vp, &va, 0, cr);
110 
111 	if (!error) {
112 		/* Lie about the object type for a referral */
113 		if (vn_is_nfs_reparse(vp, cr))
114 			va.va_type = VLNK;
115 
116 		/* overflow error if time or size is out of range */
117 		error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
118 		if (error)
119 			goto out;
120 		resp->status = NFS3_OK;
121 
122 		DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
123 		    cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
124 
125 		VN_RELE(vp);
126 
127 		return;
128 	}
129 
130 out:
131 	if (curthread->t_flag & T_WOULDBLOCK) {
132 		curthread->t_flag &= ~T_WOULDBLOCK;
133 		resp->status = NFS3ERR_JUKEBOX;
134 	} else
135 		resp->status = puterrno3(error);
136 
137 	DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
138 	    cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
139 
140 	if (vp != NULL)
141 		VN_RELE(vp);
142 }
143 
144 void *
145 rfs3_getattr_getfh(GETATTR3args *args)
146 {
147 
148 	return (&args->object);
149 }
150 
151 void
152 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
153 	struct svc_req *req, cred_t *cr)
154 {
155 	int error;
156 	vnode_t *vp;
157 	struct vattr *bvap;
158 	struct vattr bva;
159 	struct vattr *avap;
160 	struct vattr ava;
161 	int flag;
162 	int in_crit = 0;
163 	struct flock64 bf;
164 	caller_context_t ct;
165 
166 	bvap = NULL;
167 	avap = NULL;
168 
169 	vp = nfs3_fhtovp(&args->object, exi);
170 
171 	DTRACE_NFSV3_4(op__setattr__start, struct svc_req *, req,
172 	    cred_t *, cr, vnode_t *, vp, SETATTR3args *, args);
173 
174 	if (vp == NULL) {
175 		error = ESTALE;
176 		goto out;
177 	}
178 
179 	error = sattr3_to_vattr(&args->new_attributes, &ava);
180 	if (error)
181 		goto out;
182 
183 	if (is_system_labeled()) {
184 		bslabel_t *clabel = req->rq_label;
185 
186 		ASSERT(clabel != NULL);
187 		DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
188 		    "got client label from request(1)", struct svc_req *, req);
189 
190 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
191 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
192 			    exi)) {
193 				resp->status = NFS3ERR_ACCES;
194 				goto out1;
195 			}
196 		}
197 	}
198 
199 	/*
200 	 * We need to specially handle size changes because of
201 	 * possible conflicting NBMAND locks. Get into critical
202 	 * region before VOP_GETATTR, so the size attribute is
203 	 * valid when checking conflicts.
204 	 *
205 	 * Also, check to see if the v4 side of the server has
206 	 * delegated this file.  If so, then we return JUKEBOX to
207 	 * allow the client to retrasmit its request.
208 	 */
209 	if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
210 		if (nbl_need_check(vp)) {
211 			nbl_start_crit(vp, RW_READER);
212 			in_crit = 1;
213 		}
214 	}
215 
216 	bva.va_mask = AT_ALL;
217 	error = rfs4_delegated_getattr(vp, &bva, 0, cr);
218 
219 	/*
220 	 * If we can't get the attributes, then we can't do the
221 	 * right access checking.  So, we'll fail the request.
222 	 */
223 	if (error)
224 		goto out;
225 
226 	bvap = &bva;
227 
228 	if (rdonly(exi, req) || vn_is_readonly(vp)) {
229 		resp->status = NFS3ERR_ROFS;
230 		goto out1;
231 	}
232 
233 	if (args->guard.check &&
234 	    (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
235 	    args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
236 		resp->status = NFS3ERR_NOT_SYNC;
237 		goto out1;
238 	}
239 
240 	if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
241 		flag = ATTR_UTIME;
242 	else
243 		flag = 0;
244 
245 	/*
246 	 * If the filesystem is exported with nosuid, then mask off
247 	 * the setuid and setgid bits.
248 	 */
249 	if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
250 	    (exi->exi_export.ex_flags & EX_NOSUID))
251 		ava.va_mode &= ~(VSUID | VSGID);
252 
253 	ct.cc_sysid = 0;
254 	ct.cc_pid = 0;
255 	ct.cc_caller_id = nfs3_srv_caller_id;
256 	ct.cc_flags = CC_DONTBLOCK;
257 
258 	/*
259 	 * We need to specially handle size changes because it is
260 	 * possible for the client to create a file with modes
261 	 * which indicate read-only, but with the file opened for
262 	 * writing.  If the client then tries to set the size of
263 	 * the file, then the normal access checking done in
264 	 * VOP_SETATTR would prevent the client from doing so,
265 	 * although it should be legal for it to do so.  To get
266 	 * around this, we do the access checking for ourselves
267 	 * and then use VOP_SPACE which doesn't do the access
268 	 * checking which VOP_SETATTR does. VOP_SPACE can only
269 	 * operate on VREG files, let VOP_SETATTR handle the other
270 	 * extremely rare cases.
271 	 * Also the client should not be allowed to change the
272 	 * size of the file if there is a conflicting non-blocking
273 	 * mandatory lock in the region the change.
274 	 */
275 	if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
276 		if (in_crit) {
277 			u_offset_t offset;
278 			ssize_t length;
279 
280 			if (ava.va_size < bva.va_size) {
281 				offset = ava.va_size;
282 				length = bva.va_size - ava.va_size;
283 			} else {
284 				offset = bva.va_size;
285 				length = ava.va_size - bva.va_size;
286 			}
287 			if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
288 			    NULL)) {
289 				error = EACCES;
290 				goto out;
291 			}
292 		}
293 
294 		if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
295 			ava.va_mask &= ~AT_SIZE;
296 			bf.l_type = F_WRLCK;
297 			bf.l_whence = 0;
298 			bf.l_start = (off64_t)ava.va_size;
299 			bf.l_len = 0;
300 			bf.l_sysid = 0;
301 			bf.l_pid = 0;
302 			error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
303 			    (offset_t)ava.va_size, cr, &ct);
304 		}
305 	}
306 
307 	if (!error && ava.va_mask)
308 		error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
309 
310 	/* check if a monitor detected a delegation conflict */
311 	if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
312 		resp->status = NFS3ERR_JUKEBOX;
313 		goto out1;
314 	}
315 
316 	ava.va_mask = AT_ALL;
317 	avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
318 
319 	/*
320 	 * Force modified metadata out to stable storage.
321 	 */
322 	(void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
323 
324 	if (error)
325 		goto out;
326 
327 	if (in_crit)
328 		nbl_end_crit(vp);
329 
330 	resp->status = NFS3_OK;
331 	vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
332 
333 	DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
334 	    cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
335 
336 	VN_RELE(vp);
337 
338 	return;
339 
340 out:
341 	if (curthread->t_flag & T_WOULDBLOCK) {
342 		curthread->t_flag &= ~T_WOULDBLOCK;
343 		resp->status = NFS3ERR_JUKEBOX;
344 	} else
345 		resp->status = puterrno3(error);
346 out1:
347 	DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
348 	    cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
349 
350 	if (vp != NULL) {
351 		if (in_crit)
352 			nbl_end_crit(vp);
353 		VN_RELE(vp);
354 	}
355 	vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
356 }
357 
358 void *
359 rfs3_setattr_getfh(SETATTR3args *args)
360 {
361 
362 	return (&args->object);
363 }
364 
365 /* ARGSUSED */
366 void
367 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
368 	struct svc_req *req, cred_t *cr)
369 {
370 	int error;
371 	vnode_t *vp;
372 	vnode_t *dvp;
373 	struct vattr *vap;
374 	struct vattr va;
375 	struct vattr *dvap;
376 	struct vattr dva;
377 	nfs_fh3 *fhp;
378 	struct sec_ol sec = {0, 0};
379 	bool_t publicfh_flag = FALSE, auth_weak = FALSE;
380 	struct sockaddr *ca;
381 	char *name = NULL;
382 
383 	dvap = NULL;
384 
385 	/*
386 	 * Allow lookups from the root - the default
387 	 * location of the public filehandle.
388 	 */
389 	if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
390 		dvp = rootdir;
391 		VN_HOLD(dvp);
392 
393 		DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
394 		    cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
395 	} else {
396 		dvp = nfs3_fhtovp(&args->what.dir, exi);
397 
398 		DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
399 		    cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
400 
401 		if (dvp == NULL) {
402 			error = ESTALE;
403 			goto out;
404 		}
405 	}
406 
407 	dva.va_mask = AT_ALL;
408 	dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
409 
410 	if (args->what.name == nfs3nametoolong) {
411 		resp->status = NFS3ERR_NAMETOOLONG;
412 		goto out1;
413 	}
414 
415 	if (args->what.name == NULL || *(args->what.name) == '\0') {
416 		resp->status = NFS3ERR_ACCES;
417 		goto out1;
418 	}
419 
420 	fhp = &args->what.dir;
421 	if (strcmp(args->what.name, "..") == 0 &&
422 	    EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
423 		resp->status = NFS3ERR_NOENT;
424 		goto out1;
425 	}
426 
427 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
428 	name = nfscmd_convname(ca, exi, args->what.name,
429 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
430 
431 	if (name == NULL) {
432 		resp->status = NFS3ERR_ACCES;
433 		goto out1;
434 	}
435 
436 	exi_hold(exi);
437 
438 	/*
439 	 * If the public filehandle is used then allow
440 	 * a multi-component lookup
441 	 */
442 	if (PUBLIC_FH3(&args->what.dir)) {
443 		struct exportinfo *new;
444 
445 		publicfh_flag = TRUE;
446 
447 		error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
448 		    &new, &sec);
449 
450 		if (error == 0) {
451 			exi_rele(exi);
452 			exi = new;
453 		}
454 
455 		/*
456 		 * Since WebNFS may bypass MOUNT, we need to ensure this
457 		 * request didn't come from an unlabeled admin_low client.
458 		 */
459 		if (is_system_labeled() && error == 0) {
460 			int		addr_type;
461 			void		*ipaddr;
462 			tsol_tpc_t	*tp;
463 
464 			if (ca->sa_family == AF_INET) {
465 				addr_type = IPV4_VERSION;
466 				ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
467 			} else if (ca->sa_family == AF_INET6) {
468 				addr_type = IPV6_VERSION;
469 				ipaddr = &((struct sockaddr_in6 *)
470 				    ca)->sin6_addr;
471 			}
472 			tp = find_tpc(ipaddr, addr_type, B_FALSE);
473 			if (tp == NULL || tp->tpc_tp.tp_doi !=
474 			    l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
475 			    SUN_CIPSO) {
476 				VN_RELE(vp);
477 				resp->status = NFS3ERR_ACCES;
478 				error = 1;
479 			}
480 			if (tp != NULL)
481 				TPC_RELE(tp);
482 		}
483 	} else {
484 		error = VOP_LOOKUP(dvp, name, &vp,
485 		    NULL, 0, NULL, cr, NULL, NULL, NULL);
486 	}
487 
488 	if (name != args->what.name)
489 		kmem_free(name, MAXPATHLEN + 1);
490 
491 	if (is_system_labeled() && error == 0) {
492 		bslabel_t *clabel = req->rq_label;
493 
494 		ASSERT(clabel != NULL);
495 		DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
496 		    "got client label from request(1)", struct svc_req *, req);
497 
498 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
499 			if (!do_rfs_label_check(clabel, dvp,
500 			    DOMINANCE_CHECK, exi)) {
501 				VN_RELE(vp);
502 				resp->status = NFS3ERR_ACCES;
503 				error = 1;
504 			}
505 		}
506 	}
507 
508 	dva.va_mask = AT_ALL;
509 	dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
510 
511 	if (error)
512 		goto out;
513 
514 	if (sec.sec_flags & SEC_QUERY) {
515 		error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
516 	} else {
517 		error = makefh3(&resp->resok.object, vp, exi);
518 		if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
519 			auth_weak = TRUE;
520 	}
521 
522 	if (error) {
523 		VN_RELE(vp);
524 		goto out;
525 	}
526 
527 	va.va_mask = AT_ALL;
528 	vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
529 
530 	exi_rele(exi);
531 	VN_RELE(vp);
532 
533 	resp->status = NFS3_OK;
534 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
535 	vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
536 
537 	/*
538 	 * If it's public fh, no 0x81, and client's flavor is
539 	 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
540 	 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
541 	 */
542 	if (auth_weak)
543 		resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
544 
545 	DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
546 	    cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
547 	VN_RELE(dvp);
548 
549 	return;
550 
551 out:
552 	/*
553 	 * The passed argument exportinfo is released by the
554 	 * caller, common_dispatch
555 	 */
556 	exi_rele(exi);
557 
558 	if (curthread->t_flag & T_WOULDBLOCK) {
559 		curthread->t_flag &= ~T_WOULDBLOCK;
560 		resp->status = NFS3ERR_JUKEBOX;
561 	} else
562 		resp->status = puterrno3(error);
563 out1:
564 	DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
565 	    cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
566 
567 	if (dvp != NULL)
568 		VN_RELE(dvp);
569 	vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
570 
571 }
572 
573 void *
574 rfs3_lookup_getfh(LOOKUP3args *args)
575 {
576 
577 	return (&args->what.dir);
578 }
579 
580 /* ARGSUSED */
581 void
582 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
583 	struct svc_req *req, cred_t *cr)
584 {
585 	int error;
586 	vnode_t *vp;
587 	struct vattr *vap;
588 	struct vattr va;
589 	int checkwriteperm;
590 	boolean_t dominant_label = B_FALSE;
591 	boolean_t equal_label = B_FALSE;
592 	boolean_t admin_low_client;
593 
594 	vap = NULL;
595 
596 	vp = nfs3_fhtovp(&args->object, exi);
597 
598 	DTRACE_NFSV3_4(op__access__start, struct svc_req *, req,
599 	    cred_t *, cr, vnode_t *, vp, ACCESS3args *, args);
600 
601 	if (vp == NULL) {
602 		error = ESTALE;
603 		goto out;
604 	}
605 
606 	/*
607 	 * If the file system is exported read only, it is not appropriate
608 	 * to check write permissions for regular files and directories.
609 	 * Special files are interpreted by the client, so the underlying
610 	 * permissions are sent back to the client for interpretation.
611 	 */
612 	if (rdonly(exi, req) && (vp->v_type == VREG || vp->v_type == VDIR))
613 		checkwriteperm = 0;
614 	else
615 		checkwriteperm = 1;
616 
617 	/*
618 	 * We need the mode so that we can correctly determine access
619 	 * permissions relative to a mandatory lock file.  Access to
620 	 * mandatory lock files is denied on the server, so it might
621 	 * as well be reflected to the server during the open.
622 	 */
623 	va.va_mask = AT_MODE;
624 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
625 	if (error)
626 		goto out;
627 
628 	vap = &va;
629 
630 	resp->resok.access = 0;
631 
632 	if (is_system_labeled()) {
633 		bslabel_t *clabel = req->rq_label;
634 
635 		ASSERT(clabel != NULL);
636 		DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
637 		    "got client label from request(1)", struct svc_req *, req);
638 
639 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
640 			if ((equal_label = do_rfs_label_check(clabel, vp,
641 			    EQUALITY_CHECK, exi)) == B_FALSE) {
642 				dominant_label = do_rfs_label_check(clabel,
643 				    vp, DOMINANCE_CHECK, exi);
644 			} else
645 				dominant_label = B_TRUE;
646 			admin_low_client = B_FALSE;
647 		} else
648 			admin_low_client = B_TRUE;
649 	}
650 
651 	if (args->access & ACCESS3_READ) {
652 		error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
653 		if (error) {
654 			if (curthread->t_flag & T_WOULDBLOCK)
655 				goto out;
656 		} else if (!MANDLOCK(vp, va.va_mode) &&
657 		    (!is_system_labeled() || admin_low_client ||
658 		    dominant_label))
659 			resp->resok.access |= ACCESS3_READ;
660 	}
661 	if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
662 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
663 		if (error) {
664 			if (curthread->t_flag & T_WOULDBLOCK)
665 				goto out;
666 		} else if (!is_system_labeled() || admin_low_client ||
667 		    dominant_label)
668 			resp->resok.access |= ACCESS3_LOOKUP;
669 	}
670 	if (checkwriteperm &&
671 	    (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
672 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
673 		if (error) {
674 			if (curthread->t_flag & T_WOULDBLOCK)
675 				goto out;
676 		} else if (!MANDLOCK(vp, va.va_mode) &&
677 		    (!is_system_labeled() || admin_low_client || equal_label)) {
678 			resp->resok.access |=
679 			    (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
680 		}
681 	}
682 	if (checkwriteperm &&
683 	    (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
684 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
685 		if (error) {
686 			if (curthread->t_flag & T_WOULDBLOCK)
687 				goto out;
688 		} else if (!is_system_labeled() || admin_low_client ||
689 		    equal_label)
690 			resp->resok.access |= ACCESS3_DELETE;
691 	}
692 	if (args->access & ACCESS3_EXECUTE) {
693 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
694 		if (error) {
695 			if (curthread->t_flag & T_WOULDBLOCK)
696 				goto out;
697 		} else if (!MANDLOCK(vp, va.va_mode) &&
698 		    (!is_system_labeled() || admin_low_client ||
699 		    dominant_label))
700 			resp->resok.access |= ACCESS3_EXECUTE;
701 	}
702 
703 	va.va_mask = AT_ALL;
704 	vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
705 
706 	resp->status = NFS3_OK;
707 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
708 
709 	DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
710 	    cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
711 
712 	VN_RELE(vp);
713 
714 	return;
715 
716 out:
717 	if (curthread->t_flag & T_WOULDBLOCK) {
718 		curthread->t_flag &= ~T_WOULDBLOCK;
719 		resp->status = NFS3ERR_JUKEBOX;
720 	} else
721 		resp->status = puterrno3(error);
722 	DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
723 	    cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
724 	if (vp != NULL)
725 		VN_RELE(vp);
726 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
727 }
728 
729 void *
730 rfs3_access_getfh(ACCESS3args *args)
731 {
732 
733 	return (&args->object);
734 }
735 
736 /* ARGSUSED */
737 void
738 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
739 	struct svc_req *req, cred_t *cr)
740 {
741 	int error;
742 	vnode_t *vp;
743 	struct vattr *vap;
744 	struct vattr va;
745 	struct iovec iov;
746 	struct uio uio;
747 	char *data;
748 	struct sockaddr *ca;
749 	char *name = NULL;
750 	int is_referral = 0;
751 
752 	vap = NULL;
753 
754 	vp = nfs3_fhtovp(&args->symlink, exi);
755 
756 	DTRACE_NFSV3_4(op__readlink__start, struct svc_req *, req,
757 	    cred_t *, cr, vnode_t *, vp, READLINK3args *, args);
758 
759 	if (vp == NULL) {
760 		error = ESTALE;
761 		goto out;
762 	}
763 
764 	va.va_mask = AT_ALL;
765 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
766 	if (error)
767 		goto out;
768 
769 	vap = &va;
770 
771 	/* We lied about the object type for a referral */
772 	if (vn_is_nfs_reparse(vp, cr))
773 		is_referral = 1;
774 
775 	if (vp->v_type != VLNK && !is_referral) {
776 		resp->status = NFS3ERR_INVAL;
777 		goto out1;
778 	}
779 
780 	if (MANDLOCK(vp, va.va_mode)) {
781 		resp->status = NFS3ERR_ACCES;
782 		goto out1;
783 	}
784 
785 	if (is_system_labeled()) {
786 		bslabel_t *clabel = req->rq_label;
787 
788 		ASSERT(clabel != NULL);
789 		DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
790 		    "got client label from request(1)", struct svc_req *, req);
791 
792 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
793 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
794 			    exi)) {
795 				resp->status = NFS3ERR_ACCES;
796 				goto out1;
797 			}
798 		}
799 	}
800 
801 	data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
802 
803 	if (is_referral) {
804 		char *s;
805 		size_t strsz;
806 
807 		/* Get an artificial symlink based on a referral */
808 		s = build_symlink(vp, cr, &strsz);
809 		global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
810 		DTRACE_PROBE2(nfs3serv__func__referral__reflink,
811 		    vnode_t *, vp, char *, s);
812 		if (s == NULL)
813 			error = EINVAL;
814 		else {
815 			error = 0;
816 			(void) strlcpy(data, s, MAXPATHLEN + 1);
817 			kmem_free(s, strsz);
818 		}
819 
820 	} else {
821 
822 		iov.iov_base = data;
823 		iov.iov_len = MAXPATHLEN;
824 		uio.uio_iov = &iov;
825 		uio.uio_iovcnt = 1;
826 		uio.uio_segflg = UIO_SYSSPACE;
827 		uio.uio_extflg = UIO_COPY_CACHED;
828 		uio.uio_loffset = 0;
829 		uio.uio_resid = MAXPATHLEN;
830 
831 		error = VOP_READLINK(vp, &uio, cr, NULL);
832 
833 		if (!error)
834 			*(data + MAXPATHLEN - uio.uio_resid) = '\0';
835 	}
836 
837 	va.va_mask = AT_ALL;
838 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
839 
840 	/* Lie about object type again just to be consistent */
841 	if (is_referral && vap != NULL)
842 		vap->va_type = VLNK;
843 
844 #if 0 /* notyet */
845 	/*
846 	 * Don't do this.  It causes local disk writes when just
847 	 * reading the file and the overhead is deemed larger
848 	 * than the benefit.
849 	 */
850 	/*
851 	 * Force modified metadata out to stable storage.
852 	 */
853 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
854 #endif
855 
856 	if (error) {
857 		kmem_free(data, MAXPATHLEN + 1);
858 		goto out;
859 	}
860 
861 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
862 	name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
863 	    MAXPATHLEN + 1);
864 
865 	if (name == NULL) {
866 		/*
867 		 * Even though the conversion failed, we return
868 		 * something. We just don't translate it.
869 		 */
870 		name = data;
871 	}
872 
873 	resp->status = NFS3_OK;
874 	vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
875 	resp->resok.data = name;
876 
877 	DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
878 	    cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
879 	VN_RELE(vp);
880 
881 	if (name != data)
882 		kmem_free(data, MAXPATHLEN + 1);
883 
884 	return;
885 
886 out:
887 	if (curthread->t_flag & T_WOULDBLOCK) {
888 		curthread->t_flag &= ~T_WOULDBLOCK;
889 		resp->status = NFS3ERR_JUKEBOX;
890 	} else
891 		resp->status = puterrno3(error);
892 out1:
893 	DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
894 	    cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
895 	if (vp != NULL)
896 		VN_RELE(vp);
897 	vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
898 }
899 
900 void *
901 rfs3_readlink_getfh(READLINK3args *args)
902 {
903 
904 	return (&args->symlink);
905 }
906 
907 void
908 rfs3_readlink_free(READLINK3res *resp)
909 {
910 
911 	if (resp->status == NFS3_OK)
912 		kmem_free(resp->resok.data, MAXPATHLEN + 1);
913 }
914 
915 /*
916  * Server routine to handle read
917  * May handle RDMA data as well as mblks
918  */
919 /* ARGSUSED */
920 void
921 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
922 	struct svc_req *req, cred_t *cr)
923 {
924 	int error;
925 	vnode_t *vp;
926 	struct vattr *vap;
927 	struct vattr va;
928 	struct iovec iov;
929 	struct uio uio;
930 	u_offset_t offset;
931 	mblk_t *mp = NULL;
932 	int alloc_err = 0;
933 	int in_crit = 0;
934 	int need_rwunlock = 0;
935 	caller_context_t ct;
936 	int rdma_used = 0;
937 	int loaned_buffers;
938 	struct uio *uiop;
939 
940 	vap = NULL;
941 
942 	vp = nfs3_fhtovp(&args->file, exi);
943 
944 	DTRACE_NFSV3_4(op__read__start, struct svc_req *, req,
945 	    cred_t *, cr, vnode_t *, vp, READ3args *, args);
946 
947 	if (vp == NULL) {
948 		error = ESTALE;
949 		goto out;
950 	}
951 
952 	if (args->wlist) {
953 		if (args->count > clist_len(args->wlist)) {
954 			error = EINVAL;
955 			goto out;
956 		}
957 		rdma_used = 1;
958 	}
959 
960 	/* use loaned buffers for TCP */
961 	loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
962 
963 	if (is_system_labeled()) {
964 		bslabel_t *clabel = req->rq_label;
965 
966 		ASSERT(clabel != NULL);
967 		DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
968 		    "got client label from request(1)", struct svc_req *, req);
969 
970 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
971 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
972 			    exi)) {
973 				resp->status = NFS3ERR_ACCES;
974 				goto out1;
975 			}
976 		}
977 	}
978 
979 	ct.cc_sysid = 0;
980 	ct.cc_pid = 0;
981 	ct.cc_caller_id = nfs3_srv_caller_id;
982 	ct.cc_flags = CC_DONTBLOCK;
983 
984 	/*
985 	 * Enter the critical region before calling VOP_RWLOCK
986 	 * to avoid a deadlock with write requests.
987 	 */
988 	if (nbl_need_check(vp)) {
989 		nbl_start_crit(vp, RW_READER);
990 		in_crit = 1;
991 		if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
992 		    NULL)) {
993 			error = EACCES;
994 			goto out;
995 		}
996 	}
997 
998 	error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
999 
1000 	/* check if a monitor detected a delegation conflict */
1001 	if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1002 		resp->status = NFS3ERR_JUKEBOX;
1003 		goto out1;
1004 	}
1005 
1006 	need_rwunlock = 1;
1007 
1008 	va.va_mask = AT_ALL;
1009 	error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1010 
1011 	/*
1012 	 * If we can't get the attributes, then we can't do the
1013 	 * right access checking.  So, we'll fail the request.
1014 	 */
1015 	if (error)
1016 		goto out;
1017 
1018 	vap = &va;
1019 
1020 	if (vp->v_type != VREG) {
1021 		resp->status = NFS3ERR_INVAL;
1022 		goto out1;
1023 	}
1024 
1025 	if (crgetuid(cr) != va.va_uid) {
1026 		error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1027 		if (error) {
1028 			if (curthread->t_flag & T_WOULDBLOCK)
1029 				goto out;
1030 			error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1031 			if (error)
1032 				goto out;
1033 		}
1034 	}
1035 
1036 	if (MANDLOCK(vp, va.va_mode)) {
1037 		resp->status = NFS3ERR_ACCES;
1038 		goto out1;
1039 	}
1040 
1041 	offset = args->offset;
1042 	if (offset >= va.va_size) {
1043 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1044 		if (in_crit)
1045 			nbl_end_crit(vp);
1046 		resp->status = NFS3_OK;
1047 		vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1048 		resp->resok.count = 0;
1049 		resp->resok.eof = TRUE;
1050 		resp->resok.data.data_len = 0;
1051 		resp->resok.data.data_val = NULL;
1052 		resp->resok.data.mp = NULL;
1053 		/* RDMA */
1054 		resp->resok.wlist = args->wlist;
1055 		resp->resok.wlist_len = resp->resok.count;
1056 		if (resp->resok.wlist)
1057 			clist_zero_len(resp->resok.wlist);
1058 		goto done;
1059 	}
1060 
1061 	if (args->count == 0) {
1062 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1063 		if (in_crit)
1064 			nbl_end_crit(vp);
1065 		resp->status = NFS3_OK;
1066 		vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1067 		resp->resok.count = 0;
1068 		resp->resok.eof = FALSE;
1069 		resp->resok.data.data_len = 0;
1070 		resp->resok.data.data_val = NULL;
1071 		resp->resok.data.mp = NULL;
1072 		/* RDMA */
1073 		resp->resok.wlist = args->wlist;
1074 		resp->resok.wlist_len = resp->resok.count;
1075 		if (resp->resok.wlist)
1076 			clist_zero_len(resp->resok.wlist);
1077 		goto done;
1078 	}
1079 
1080 	/*
1081 	 * do not allocate memory more the max. allowed
1082 	 * transfer size
1083 	 */
1084 	if (args->count > rfs3_tsize(req))
1085 		args->count = rfs3_tsize(req);
1086 
1087 	if (loaned_buffers) {
1088 		uiop = (uio_t *)rfs_setup_xuio(vp);
1089 		ASSERT(uiop != NULL);
1090 		uiop->uio_segflg = UIO_SYSSPACE;
1091 		uiop->uio_loffset = args->offset;
1092 		uiop->uio_resid = args->count;
1093 
1094 		/* Jump to do the read if successful */
1095 		if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1096 			/*
1097 			 * Need to hold the vnode until after VOP_RETZCBUF()
1098 			 * is called.
1099 			 */
1100 			VN_HOLD(vp);
1101 			goto doio_read;
1102 		}
1103 
1104 		DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1105 		    uiop->uio_loffset, int, uiop->uio_resid);
1106 
1107 		uiop->uio_extflg = 0;
1108 		/* failure to setup for zero copy */
1109 		rfs_free_xuio((void *)uiop);
1110 		loaned_buffers = 0;
1111 	}
1112 
1113 	/*
1114 	 * If returning data via RDMA Write, then grab the chunk list.
1115 	 * If we aren't returning READ data w/RDMA_WRITE, then grab
1116 	 * a mblk.
1117 	 */
1118 	if (rdma_used) {
1119 		(void) rdma_get_wchunk(req, &iov, args->wlist);
1120 	} else {
1121 		/*
1122 		 * mp will contain the data to be sent out in the read reply.
1123 		 * This will be freed after the reply has been sent out (by the
1124 		 * driver).
1125 		 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
1126 		 * that the call to xdrmblk_putmblk() never fails.
1127 		 */
1128 		mp = allocb_wait(RNDUP(args->count), BPRI_MED, STR_NOSIG,
1129 		    &alloc_err);
1130 		ASSERT(mp != NULL);
1131 		ASSERT(alloc_err == 0);
1132 
1133 		iov.iov_base = (caddr_t)mp->b_datap->db_base;
1134 		iov.iov_len = args->count;
1135 	}
1136 
1137 	uio.uio_iov = &iov;
1138 	uio.uio_iovcnt = 1;
1139 	uio.uio_segflg = UIO_SYSSPACE;
1140 	uio.uio_extflg = UIO_COPY_CACHED;
1141 	uio.uio_loffset = args->offset;
1142 	uio.uio_resid = args->count;
1143 	uiop = &uio;
1144 
1145 doio_read:
1146 	error = VOP_READ(vp, uiop, 0, cr, &ct);
1147 
1148 	if (error) {
1149 		if (mp)
1150 			freemsg(mp);
1151 		/* check if a monitor detected a delegation conflict */
1152 		if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1153 			resp->status = NFS3ERR_JUKEBOX;
1154 			goto out1;
1155 		}
1156 		goto out;
1157 	}
1158 
1159 	/* make mblk using zc buffers */
1160 	if (loaned_buffers) {
1161 		mp = uio_to_mblk(uiop);
1162 		ASSERT(mp != NULL);
1163 	}
1164 
1165 	va.va_mask = AT_ALL;
1166 	error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1167 
1168 	if (error)
1169 		vap = NULL;
1170 	else
1171 		vap = &va;
1172 
1173 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1174 
1175 	if (in_crit)
1176 		nbl_end_crit(vp);
1177 
1178 	resp->status = NFS3_OK;
1179 	vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1180 	resp->resok.count = args->count - uiop->uio_resid;
1181 	if (!error && offset + resp->resok.count == va.va_size)
1182 		resp->resok.eof = TRUE;
1183 	else
1184 		resp->resok.eof = FALSE;
1185 	resp->resok.data.data_len = resp->resok.count;
1186 
1187 	if (mp)
1188 		rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1189 
1190 	resp->resok.data.mp = mp;
1191 	resp->resok.size = (uint_t)args->count;
1192 
1193 	if (rdma_used) {
1194 		resp->resok.data.data_val = (caddr_t)iov.iov_base;
1195 		if (!rdma_setup_read_data3(args, &(resp->resok))) {
1196 			resp->status = NFS3ERR_INVAL;
1197 		}
1198 	} else {
1199 		resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1200 		(resp->resok).wlist = NULL;
1201 	}
1202 
1203 done:
1204 	DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1205 	    cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1206 
1207 	VN_RELE(vp);
1208 
1209 	return;
1210 
1211 out:
1212 	if (curthread->t_flag & T_WOULDBLOCK) {
1213 		curthread->t_flag &= ~T_WOULDBLOCK;
1214 		resp->status = NFS3ERR_JUKEBOX;
1215 	} else
1216 		resp->status = puterrno3(error);
1217 out1:
1218 	DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1219 	    cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1220 
1221 	if (vp != NULL) {
1222 		if (need_rwunlock)
1223 			VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1224 		if (in_crit)
1225 			nbl_end_crit(vp);
1226 		VN_RELE(vp);
1227 	}
1228 	vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1229 }
1230 
1231 void
1232 rfs3_read_free(READ3res *resp)
1233 {
1234 	mblk_t *mp;
1235 
1236 	if (resp->status == NFS3_OK) {
1237 		mp = resp->resok.data.mp;
1238 		if (mp != NULL)
1239 			freemsg(mp);
1240 	}
1241 }
1242 
1243 void *
1244 rfs3_read_getfh(READ3args *args)
1245 {
1246 
1247 	return (&args->file);
1248 }
1249 
1250 #define	MAX_IOVECS	12
1251 
1252 #ifdef DEBUG
1253 static int rfs3_write_hits = 0;
1254 static int rfs3_write_misses = 0;
1255 #endif
1256 
1257 void
1258 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1259 	struct svc_req *req, cred_t *cr)
1260 {
1261 	int error;
1262 	vnode_t *vp;
1263 	struct vattr *bvap = NULL;
1264 	struct vattr bva;
1265 	struct vattr *avap = NULL;
1266 	struct vattr ava;
1267 	u_offset_t rlimit;
1268 	struct uio uio;
1269 	struct iovec iov[MAX_IOVECS];
1270 	mblk_t *m;
1271 	struct iovec *iovp;
1272 	int iovcnt;
1273 	int ioflag;
1274 	cred_t *savecred;
1275 	int in_crit = 0;
1276 	int rwlock_ret = -1;
1277 	caller_context_t ct;
1278 
1279 	vp = nfs3_fhtovp(&args->file, exi);
1280 
1281 	DTRACE_NFSV3_4(op__write__start, struct svc_req *, req,
1282 	    cred_t *, cr, vnode_t *, vp, WRITE3args *, args);
1283 
1284 	if (vp == NULL) {
1285 		error = ESTALE;
1286 		goto err;
1287 	}
1288 
1289 	if (is_system_labeled()) {
1290 		bslabel_t *clabel = req->rq_label;
1291 
1292 		ASSERT(clabel != NULL);
1293 		DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1294 		    "got client label from request(1)", struct svc_req *, req);
1295 
1296 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1297 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1298 			    exi)) {
1299 				resp->status = NFS3ERR_ACCES;
1300 				goto err1;
1301 			}
1302 		}
1303 	}
1304 
1305 	ct.cc_sysid = 0;
1306 	ct.cc_pid = 0;
1307 	ct.cc_caller_id = nfs3_srv_caller_id;
1308 	ct.cc_flags = CC_DONTBLOCK;
1309 
1310 	/*
1311 	 * We have to enter the critical region before calling VOP_RWLOCK
1312 	 * to avoid a deadlock with ufs.
1313 	 */
1314 	if (nbl_need_check(vp)) {
1315 		nbl_start_crit(vp, RW_READER);
1316 		in_crit = 1;
1317 		if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1318 		    NULL)) {
1319 			error = EACCES;
1320 			goto err;
1321 		}
1322 	}
1323 
1324 	rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1325 
1326 	/* check if a monitor detected a delegation conflict */
1327 	if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1328 		resp->status = NFS3ERR_JUKEBOX;
1329 		rwlock_ret = -1;
1330 		goto err1;
1331 	}
1332 
1333 
1334 	bva.va_mask = AT_ALL;
1335 	error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1336 
1337 	/*
1338 	 * If we can't get the attributes, then we can't do the
1339 	 * right access checking.  So, we'll fail the request.
1340 	 */
1341 	if (error)
1342 		goto err;
1343 
1344 	bvap = &bva;
1345 	avap = bvap;
1346 
1347 	if (args->count != args->data.data_len) {
1348 		resp->status = NFS3ERR_INVAL;
1349 		goto err1;
1350 	}
1351 
1352 	if (rdonly(exi, req)) {
1353 		resp->status = NFS3ERR_ROFS;
1354 		goto err1;
1355 	}
1356 
1357 	if (vp->v_type != VREG) {
1358 		resp->status = NFS3ERR_INVAL;
1359 		goto err1;
1360 	}
1361 
1362 	if (crgetuid(cr) != bva.va_uid &&
1363 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1364 		goto err;
1365 
1366 	if (MANDLOCK(vp, bva.va_mode)) {
1367 		resp->status = NFS3ERR_ACCES;
1368 		goto err1;
1369 	}
1370 
1371 	if (args->count == 0) {
1372 		resp->status = NFS3_OK;
1373 		vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1374 		resp->resok.count = 0;
1375 		resp->resok.committed = args->stable;
1376 		resp->resok.verf = write3verf;
1377 		goto out;
1378 	}
1379 
1380 	if (args->mblk != NULL) {
1381 		iovcnt = 0;
1382 		for (m = args->mblk; m != NULL; m = m->b_cont)
1383 			iovcnt++;
1384 		if (iovcnt <= MAX_IOVECS) {
1385 #ifdef DEBUG
1386 			rfs3_write_hits++;
1387 #endif
1388 			iovp = iov;
1389 		} else {
1390 #ifdef DEBUG
1391 			rfs3_write_misses++;
1392 #endif
1393 			iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1394 		}
1395 		mblk_to_iov(args->mblk, iovcnt, iovp);
1396 
1397 	} else if (args->rlist != NULL) {
1398 		iovcnt = 1;
1399 		iovp = iov;
1400 		iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1401 		iovp->iov_len = args->count;
1402 	} else {
1403 		iovcnt = 1;
1404 		iovp = iov;
1405 		iovp->iov_base = args->data.data_val;
1406 		iovp->iov_len = args->count;
1407 	}
1408 
1409 	uio.uio_iov = iovp;
1410 	uio.uio_iovcnt = iovcnt;
1411 
1412 	uio.uio_segflg = UIO_SYSSPACE;
1413 	uio.uio_extflg = UIO_COPY_DEFAULT;
1414 	uio.uio_loffset = args->offset;
1415 	uio.uio_resid = args->count;
1416 	uio.uio_llimit = curproc->p_fsz_ctl;
1417 	rlimit = uio.uio_llimit - args->offset;
1418 	if (rlimit < (u_offset_t)uio.uio_resid)
1419 		uio.uio_resid = (int)rlimit;
1420 
1421 	if (args->stable == UNSTABLE)
1422 		ioflag = 0;
1423 	else if (args->stable == FILE_SYNC)
1424 		ioflag = FSYNC;
1425 	else if (args->stable == DATA_SYNC)
1426 		ioflag = FDSYNC;
1427 	else {
1428 		if (iovp != iov)
1429 			kmem_free(iovp, sizeof (*iovp) * iovcnt);
1430 		resp->status = NFS3ERR_INVAL;
1431 		goto err1;
1432 	}
1433 
1434 	/*
1435 	 * We're changing creds because VM may fault and we need
1436 	 * the cred of the current thread to be used if quota
1437 	 * checking is enabled.
1438 	 */
1439 	savecred = curthread->t_cred;
1440 	curthread->t_cred = cr;
1441 	error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1442 	curthread->t_cred = savecred;
1443 
1444 	if (iovp != iov)
1445 		kmem_free(iovp, sizeof (*iovp) * iovcnt);
1446 
1447 	/* check if a monitor detected a delegation conflict */
1448 	if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1449 		resp->status = NFS3ERR_JUKEBOX;
1450 		goto err1;
1451 	}
1452 
1453 	ava.va_mask = AT_ALL;
1454 	avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1455 
1456 	if (error)
1457 		goto err;
1458 
1459 	/*
1460 	 * If we were unable to get the V_WRITELOCK_TRUE, then we
1461 	 * may not have accurate after attrs, so check if
1462 	 * we have both attributes, they have a non-zero va_seq, and
1463 	 * va_seq has changed by exactly one,
1464 	 * if not, turn off the before attr.
1465 	 */
1466 	if (rwlock_ret != V_WRITELOCK_TRUE) {
1467 		if (bvap == NULL || avap == NULL ||
1468 		    bvap->va_seq == 0 || avap->va_seq == 0 ||
1469 		    avap->va_seq != (bvap->va_seq + 1)) {
1470 			bvap = NULL;
1471 		}
1472 	}
1473 
1474 	resp->status = NFS3_OK;
1475 	vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1476 	resp->resok.count = args->count - uio.uio_resid;
1477 	resp->resok.committed = args->stable;
1478 	resp->resok.verf = write3verf;
1479 	goto out;
1480 
1481 err:
1482 	if (curthread->t_flag & T_WOULDBLOCK) {
1483 		curthread->t_flag &= ~T_WOULDBLOCK;
1484 		resp->status = NFS3ERR_JUKEBOX;
1485 	} else
1486 		resp->status = puterrno3(error);
1487 err1:
1488 	vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1489 out:
1490 	DTRACE_NFSV3_4(op__write__done, struct svc_req *, req,
1491 	    cred_t *, cr, vnode_t *, vp, WRITE3res *, resp);
1492 
1493 	if (vp != NULL) {
1494 		if (rwlock_ret != -1)
1495 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1496 		if (in_crit)
1497 			nbl_end_crit(vp);
1498 		VN_RELE(vp);
1499 	}
1500 }
1501 
1502 void *
1503 rfs3_write_getfh(WRITE3args *args)
1504 {
1505 
1506 	return (&args->file);
1507 }
1508 
1509 void
1510 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1511 	struct svc_req *req, cred_t *cr)
1512 {
1513 	int error;
1514 	int in_crit = 0;
1515 	vnode_t *vp;
1516 	vnode_t *tvp = NULL;
1517 	vnode_t *dvp;
1518 	struct vattr *vap;
1519 	struct vattr va;
1520 	struct vattr *dbvap;
1521 	struct vattr dbva;
1522 	struct vattr *davap;
1523 	struct vattr dava;
1524 	enum vcexcl excl;
1525 	nfstime3 *mtime;
1526 	len_t reqsize;
1527 	bool_t trunc;
1528 	struct sockaddr *ca;
1529 	char *name = NULL;
1530 
1531 	dbvap = NULL;
1532 	davap = NULL;
1533 
1534 	dvp = nfs3_fhtovp(&args->where.dir, exi);
1535 
1536 	DTRACE_NFSV3_4(op__create__start, struct svc_req *, req,
1537 	    cred_t *, cr, vnode_t *, dvp, CREATE3args *, args);
1538 
1539 	if (dvp == NULL) {
1540 		error = ESTALE;
1541 		goto out;
1542 	}
1543 
1544 	dbva.va_mask = AT_ALL;
1545 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1546 	davap = dbvap;
1547 
1548 	if (args->where.name == nfs3nametoolong) {
1549 		resp->status = NFS3ERR_NAMETOOLONG;
1550 		goto out1;
1551 	}
1552 
1553 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1554 		resp->status = NFS3ERR_ACCES;
1555 		goto out1;
1556 	}
1557 
1558 	if (rdonly(exi, req)) {
1559 		resp->status = NFS3ERR_ROFS;
1560 		goto out1;
1561 	}
1562 
1563 	if (is_system_labeled()) {
1564 		bslabel_t *clabel = req->rq_label;
1565 
1566 		ASSERT(clabel != NULL);
1567 		DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1568 		    "got client label from request(1)", struct svc_req *, req);
1569 
1570 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1571 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1572 			    exi)) {
1573 				resp->status = NFS3ERR_ACCES;
1574 				goto out1;
1575 			}
1576 		}
1577 	}
1578 
1579 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1580 	name = nfscmd_convname(ca, exi, args->where.name,
1581 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1582 
1583 	if (name == NULL) {
1584 		/* This is really a Solaris EILSEQ */
1585 		resp->status = NFS3ERR_INVAL;
1586 		goto out1;
1587 	}
1588 
1589 	if (args->how.mode == EXCLUSIVE) {
1590 		va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1591 		va.va_type = VREG;
1592 		va.va_mode = (mode_t)0;
1593 		/*
1594 		 * Ensure no time overflows and that types match
1595 		 */
1596 		mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1597 		va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1598 		va.va_mtime.tv_nsec = mtime->nseconds;
1599 		excl = EXCL;
1600 	} else {
1601 		error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1602 		    &va);
1603 		if (error)
1604 			goto out;
1605 		va.va_mask |= AT_TYPE;
1606 		va.va_type = VREG;
1607 		if (args->how.mode == GUARDED)
1608 			excl = EXCL;
1609 		else {
1610 			excl = NONEXCL;
1611 
1612 			/*
1613 			 * During creation of file in non-exclusive mode
1614 			 * if size of file is being set then make sure
1615 			 * that if the file already exists that no conflicting
1616 			 * non-blocking mandatory locks exists in the region
1617 			 * being modified. If there are conflicting locks fail
1618 			 * the operation with EACCES.
1619 			 */
1620 			if (va.va_mask & AT_SIZE) {
1621 				struct vattr tva;
1622 
1623 				/*
1624 				 * Does file already exist?
1625 				 */
1626 				error = VOP_LOOKUP(dvp, name, &tvp,
1627 				    NULL, 0, NULL, cr, NULL, NULL, NULL);
1628 
1629 				/*
1630 				 * Check to see if the file has been delegated
1631 				 * to a v4 client.  If so, then begin recall of
1632 				 * the delegation and return JUKEBOX to allow
1633 				 * the client to retrasmit its request.
1634 				 */
1635 
1636 				trunc = va.va_size == 0;
1637 				if (!error &&
1638 				    rfs4_check_delegated(FWRITE, tvp, trunc)) {
1639 					resp->status = NFS3ERR_JUKEBOX;
1640 					goto out1;
1641 				}
1642 
1643 				/*
1644 				 * Check for NBMAND lock conflicts
1645 				 */
1646 				if (!error && nbl_need_check(tvp)) {
1647 					u_offset_t offset;
1648 					ssize_t len;
1649 
1650 					nbl_start_crit(tvp, RW_READER);
1651 					in_crit = 1;
1652 
1653 					tva.va_mask = AT_SIZE;
1654 					error = VOP_GETATTR(tvp, &tva, 0, cr,
1655 					    NULL);
1656 					/*
1657 					 * Can't check for conflicts, so return
1658 					 * error.
1659 					 */
1660 					if (error)
1661 						goto out;
1662 
1663 					offset = tva.va_size < va.va_size ?
1664 					    tva.va_size : va.va_size;
1665 					len = tva.va_size < va.va_size ?
1666 					    va.va_size - tva.va_size :
1667 					    tva.va_size - va.va_size;
1668 					if (nbl_conflict(tvp, NBL_WRITE,
1669 					    offset, len, 0, NULL)) {
1670 						error = EACCES;
1671 						goto out;
1672 					}
1673 				} else if (tvp) {
1674 					VN_RELE(tvp);
1675 					tvp = NULL;
1676 				}
1677 			}
1678 		}
1679 		if (va.va_mask & AT_SIZE)
1680 			reqsize = va.va_size;
1681 	}
1682 
1683 	/*
1684 	 * Must specify the mode.
1685 	 */
1686 	if (!(va.va_mask & AT_MODE)) {
1687 		resp->status = NFS3ERR_INVAL;
1688 		goto out1;
1689 	}
1690 
1691 	/*
1692 	 * If the filesystem is exported with nosuid, then mask off
1693 	 * the setuid and setgid bits.
1694 	 */
1695 	if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1696 		va.va_mode &= ~(VSUID | VSGID);
1697 
1698 tryagain:
1699 	/*
1700 	 * The file open mode used is VWRITE.  If the client needs
1701 	 * some other semantic, then it should do the access checking
1702 	 * itself.  It would have been nice to have the file open mode
1703 	 * passed as part of the arguments.
1704 	 */
1705 	error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1706 	    &vp, cr, 0, NULL, NULL);
1707 
1708 	dava.va_mask = AT_ALL;
1709 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1710 
1711 	if (error) {
1712 		/*
1713 		 * If we got something other than file already exists
1714 		 * then just return this error.  Otherwise, we got
1715 		 * EEXIST.  If we were doing a GUARDED create, then
1716 		 * just return this error.  Otherwise, we need to
1717 		 * make sure that this wasn't a duplicate of an
1718 		 * exclusive create request.
1719 		 *
1720 		 * The assumption is made that a non-exclusive create
1721 		 * request will never return EEXIST.
1722 		 */
1723 		if (error != EEXIST || args->how.mode == GUARDED)
1724 			goto out;
1725 		/*
1726 		 * Lookup the file so that we can get a vnode for it.
1727 		 */
1728 		error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1729 		    NULL, cr, NULL, NULL, NULL);
1730 		if (error) {
1731 			/*
1732 			 * We couldn't find the file that we thought that
1733 			 * we just created.  So, we'll just try creating
1734 			 * it again.
1735 			 */
1736 			if (error == ENOENT)
1737 				goto tryagain;
1738 			goto out;
1739 		}
1740 
1741 		/*
1742 		 * If the file is delegated to a v4 client, go ahead
1743 		 * and initiate recall, this create is a hint that a
1744 		 * conflicting v3 open has occurred.
1745 		 */
1746 
1747 		if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1748 			VN_RELE(vp);
1749 			resp->status = NFS3ERR_JUKEBOX;
1750 			goto out1;
1751 		}
1752 
1753 		va.va_mask = AT_ALL;
1754 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1755 
1756 		mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1757 		/* % with INT32_MAX to prevent overflows */
1758 		if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1759 		    vap->va_mtime.tv_sec !=
1760 		    (mtime->seconds % INT32_MAX) ||
1761 		    vap->va_mtime.tv_nsec != mtime->nseconds)) {
1762 			VN_RELE(vp);
1763 			error = EEXIST;
1764 			goto out;
1765 		}
1766 	} else {
1767 
1768 		if ((args->how.mode == UNCHECKED ||
1769 		    args->how.mode == GUARDED) &&
1770 		    args->how.createhow3_u.obj_attributes.size.set_it &&
1771 		    va.va_size == 0)
1772 			trunc = TRUE;
1773 		else
1774 			trunc = FALSE;
1775 
1776 		if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1777 			VN_RELE(vp);
1778 			resp->status = NFS3ERR_JUKEBOX;
1779 			goto out1;
1780 		}
1781 
1782 		va.va_mask = AT_ALL;
1783 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1784 
1785 		/*
1786 		 * We need to check to make sure that the file got
1787 		 * created to the indicated size.  If not, we do a
1788 		 * setattr to try to change the size, but we don't
1789 		 * try too hard.  This shouldn't a problem as most
1790 		 * clients will only specifiy a size of zero which
1791 		 * local file systems handle.  However, even if
1792 		 * the client does specify a non-zero size, it can
1793 		 * still recover by checking the size of the file
1794 		 * after it has created it and then issue a setattr
1795 		 * request of its own to set the size of the file.
1796 		 */
1797 		if (vap != NULL &&
1798 		    (args->how.mode == UNCHECKED ||
1799 		    args->how.mode == GUARDED) &&
1800 		    args->how.createhow3_u.obj_attributes.size.set_it &&
1801 		    vap->va_size != reqsize) {
1802 			va.va_mask = AT_SIZE;
1803 			va.va_size = reqsize;
1804 			(void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1805 			va.va_mask = AT_ALL;
1806 			vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1807 		}
1808 	}
1809 
1810 	if (name != args->where.name)
1811 		kmem_free(name, MAXPATHLEN + 1);
1812 
1813 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1814 	if (error)
1815 		resp->resok.obj.handle_follows = FALSE;
1816 	else
1817 		resp->resok.obj.handle_follows = TRUE;
1818 
1819 	/*
1820 	 * Force modified data and metadata out to stable storage.
1821 	 */
1822 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1823 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
1824 
1825 	VN_RELE(vp);
1826 	if (tvp != NULL) {
1827 		if (in_crit)
1828 			nbl_end_crit(tvp);
1829 		VN_RELE(tvp);
1830 	}
1831 
1832 	resp->status = NFS3_OK;
1833 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1834 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1835 
1836 	DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1837 	    cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1838 
1839 	VN_RELE(dvp);
1840 	return;
1841 
1842 out:
1843 	if (curthread->t_flag & T_WOULDBLOCK) {
1844 		curthread->t_flag &= ~T_WOULDBLOCK;
1845 		resp->status = NFS3ERR_JUKEBOX;
1846 	} else
1847 		resp->status = puterrno3(error);
1848 out1:
1849 	DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1850 	    cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1851 
1852 	if (name != NULL && name != args->where.name)
1853 		kmem_free(name, MAXPATHLEN + 1);
1854 
1855 	if (tvp != NULL) {
1856 		if (in_crit)
1857 			nbl_end_crit(tvp);
1858 		VN_RELE(tvp);
1859 	}
1860 	if (dvp != NULL)
1861 		VN_RELE(dvp);
1862 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1863 }
1864 
1865 void *
1866 rfs3_create_getfh(CREATE3args *args)
1867 {
1868 
1869 	return (&args->where.dir);
1870 }
1871 
1872 void
1873 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1874 	struct svc_req *req, cred_t *cr)
1875 {
1876 	int error;
1877 	vnode_t *vp = NULL;
1878 	vnode_t *dvp;
1879 	struct vattr *vap;
1880 	struct vattr va;
1881 	struct vattr *dbvap;
1882 	struct vattr dbva;
1883 	struct vattr *davap;
1884 	struct vattr dava;
1885 	struct sockaddr *ca;
1886 	char *name = NULL;
1887 
1888 	dbvap = NULL;
1889 	davap = NULL;
1890 
1891 	dvp = nfs3_fhtovp(&args->where.dir, exi);
1892 
1893 	DTRACE_NFSV3_4(op__mkdir__start, struct svc_req *, req,
1894 	    cred_t *, cr, vnode_t *, dvp, MKDIR3args *, args);
1895 
1896 	if (dvp == NULL) {
1897 		error = ESTALE;
1898 		goto out;
1899 	}
1900 
1901 	dbva.va_mask = AT_ALL;
1902 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1903 	davap = dbvap;
1904 
1905 	if (args->where.name == nfs3nametoolong) {
1906 		resp->status = NFS3ERR_NAMETOOLONG;
1907 		goto out1;
1908 	}
1909 
1910 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1911 		resp->status = NFS3ERR_ACCES;
1912 		goto out1;
1913 	}
1914 
1915 	if (rdonly(exi, req)) {
1916 		resp->status = NFS3ERR_ROFS;
1917 		goto out1;
1918 	}
1919 
1920 	if (is_system_labeled()) {
1921 		bslabel_t *clabel = req->rq_label;
1922 
1923 		ASSERT(clabel != NULL);
1924 		DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1925 		    "got client label from request(1)", struct svc_req *, req);
1926 
1927 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1928 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1929 			    exi)) {
1930 				resp->status = NFS3ERR_ACCES;
1931 				goto out1;
1932 			}
1933 		}
1934 	}
1935 
1936 	error = sattr3_to_vattr(&args->attributes, &va);
1937 	if (error)
1938 		goto out;
1939 
1940 	if (!(va.va_mask & AT_MODE)) {
1941 		resp->status = NFS3ERR_INVAL;
1942 		goto out1;
1943 	}
1944 
1945 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1946 	name = nfscmd_convname(ca, exi, args->where.name,
1947 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1948 
1949 	if (name == NULL) {
1950 		resp->status = NFS3ERR_INVAL;
1951 		goto out1;
1952 	}
1953 
1954 	va.va_mask |= AT_TYPE;
1955 	va.va_type = VDIR;
1956 
1957 	error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
1958 
1959 	if (name != args->where.name)
1960 		kmem_free(name, MAXPATHLEN + 1);
1961 
1962 	dava.va_mask = AT_ALL;
1963 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1964 
1965 	/*
1966 	 * Force modified data and metadata out to stable storage.
1967 	 */
1968 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
1969 
1970 	if (error)
1971 		goto out;
1972 
1973 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1974 	if (error)
1975 		resp->resok.obj.handle_follows = FALSE;
1976 	else
1977 		resp->resok.obj.handle_follows = TRUE;
1978 
1979 	va.va_mask = AT_ALL;
1980 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1981 
1982 	/*
1983 	 * Force modified data and metadata out to stable storage.
1984 	 */
1985 	(void) VOP_FSYNC(vp, 0, cr, NULL);
1986 
1987 	VN_RELE(vp);
1988 
1989 	resp->status = NFS3_OK;
1990 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1991 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1992 
1993 	DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
1994 	    cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
1995 	VN_RELE(dvp);
1996 
1997 	return;
1998 
1999 out:
2000 	if (curthread->t_flag & T_WOULDBLOCK) {
2001 		curthread->t_flag &= ~T_WOULDBLOCK;
2002 		resp->status = NFS3ERR_JUKEBOX;
2003 	} else
2004 		resp->status = puterrno3(error);
2005 out1:
2006 	DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
2007 	    cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
2008 	if (dvp != NULL)
2009 		VN_RELE(dvp);
2010 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2011 }
2012 
2013 void *
2014 rfs3_mkdir_getfh(MKDIR3args *args)
2015 {
2016 
2017 	return (&args->where.dir);
2018 }
2019 
2020 void
2021 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2022 	struct svc_req *req, cred_t *cr)
2023 {
2024 	int error;
2025 	vnode_t *vp;
2026 	vnode_t *dvp;
2027 	struct vattr *vap;
2028 	struct vattr va;
2029 	struct vattr *dbvap;
2030 	struct vattr dbva;
2031 	struct vattr *davap;
2032 	struct vattr dava;
2033 	struct sockaddr *ca;
2034 	char *name = NULL;
2035 	char *symdata = NULL;
2036 
2037 	dbvap = NULL;
2038 	davap = NULL;
2039 
2040 	dvp = nfs3_fhtovp(&args->where.dir, exi);
2041 
2042 	DTRACE_NFSV3_4(op__symlink__start, struct svc_req *, req,
2043 	    cred_t *, cr, vnode_t *, dvp, SYMLINK3args *, args);
2044 
2045 	if (dvp == NULL) {
2046 		error = ESTALE;
2047 		goto err;
2048 	}
2049 
2050 	dbva.va_mask = AT_ALL;
2051 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2052 	davap = dbvap;
2053 
2054 	if (args->where.name == nfs3nametoolong) {
2055 		resp->status = NFS3ERR_NAMETOOLONG;
2056 		goto err1;
2057 	}
2058 
2059 	if (args->where.name == NULL || *(args->where.name) == '\0') {
2060 		resp->status = NFS3ERR_ACCES;
2061 		goto err1;
2062 	}
2063 
2064 	if (rdonly(exi, req)) {
2065 		resp->status = NFS3ERR_ROFS;
2066 		goto err1;
2067 	}
2068 
2069 	if (is_system_labeled()) {
2070 		bslabel_t *clabel = req->rq_label;
2071 
2072 		ASSERT(clabel != NULL);
2073 		DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2074 		    "got client label from request(1)", struct svc_req *, req);
2075 
2076 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2077 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2078 			    exi)) {
2079 				resp->status = NFS3ERR_ACCES;
2080 				goto err1;
2081 			}
2082 		}
2083 	}
2084 
2085 	error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2086 	if (error)
2087 		goto err;
2088 
2089 	if (!(va.va_mask & AT_MODE)) {
2090 		resp->status = NFS3ERR_INVAL;
2091 		goto err1;
2092 	}
2093 
2094 	if (args->symlink.symlink_data == nfs3nametoolong) {
2095 		resp->status = NFS3ERR_NAMETOOLONG;
2096 		goto err1;
2097 	}
2098 
2099 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2100 	name = nfscmd_convname(ca, exi, args->where.name,
2101 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2102 
2103 	if (name == NULL) {
2104 		/* This is really a Solaris EILSEQ */
2105 		resp->status = NFS3ERR_INVAL;
2106 		goto err1;
2107 	}
2108 
2109 	symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2110 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2111 	if (symdata == NULL) {
2112 		/* This is really a Solaris EILSEQ */
2113 		resp->status = NFS3ERR_INVAL;
2114 		goto err1;
2115 	}
2116 
2117 
2118 	va.va_mask |= AT_TYPE;
2119 	va.va_type = VLNK;
2120 
2121 	error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2122 
2123 	dava.va_mask = AT_ALL;
2124 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2125 
2126 	if (error)
2127 		goto err;
2128 
2129 	error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2130 	    NULL, NULL, NULL);
2131 
2132 	/*
2133 	 * Force modified data and metadata out to stable storage.
2134 	 */
2135 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
2136 
2137 
2138 	resp->status = NFS3_OK;
2139 	if (error) {
2140 		resp->resok.obj.handle_follows = FALSE;
2141 		vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2142 		vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2143 		goto out;
2144 	}
2145 
2146 	error = makefh3(&resp->resok.obj.handle, vp, exi);
2147 	if (error)
2148 		resp->resok.obj.handle_follows = FALSE;
2149 	else
2150 		resp->resok.obj.handle_follows = TRUE;
2151 
2152 	va.va_mask = AT_ALL;
2153 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2154 
2155 	/*
2156 	 * Force modified data and metadata out to stable storage.
2157 	 */
2158 	(void) VOP_FSYNC(vp, 0, cr, NULL);
2159 
2160 	VN_RELE(vp);
2161 
2162 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2163 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2164 	goto out;
2165 
2166 err:
2167 	if (curthread->t_flag & T_WOULDBLOCK) {
2168 		curthread->t_flag &= ~T_WOULDBLOCK;
2169 		resp->status = NFS3ERR_JUKEBOX;
2170 	} else
2171 		resp->status = puterrno3(error);
2172 err1:
2173 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2174 out:
2175 	if (name != NULL && name != args->where.name)
2176 		kmem_free(name, MAXPATHLEN + 1);
2177 	if (symdata != NULL && symdata != args->symlink.symlink_data)
2178 		kmem_free(symdata, MAXPATHLEN + 1);
2179 
2180 	DTRACE_NFSV3_4(op__symlink__done, struct svc_req *, req,
2181 	    cred_t *, cr, vnode_t *, dvp, SYMLINK3res *, resp);
2182 
2183 	if (dvp != NULL)
2184 		VN_RELE(dvp);
2185 }
2186 
2187 void *
2188 rfs3_symlink_getfh(SYMLINK3args *args)
2189 {
2190 
2191 	return (&args->where.dir);
2192 }
2193 
2194 void
2195 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2196 	struct svc_req *req, cred_t *cr)
2197 {
2198 	int error;
2199 	vnode_t *vp;
2200 	vnode_t *realvp;
2201 	vnode_t *dvp;
2202 	struct vattr *vap;
2203 	struct vattr va;
2204 	struct vattr *dbvap;
2205 	struct vattr dbva;
2206 	struct vattr *davap;
2207 	struct vattr dava;
2208 	int mode;
2209 	enum vcexcl excl;
2210 	struct sockaddr *ca;
2211 	char *name = NULL;
2212 
2213 	dbvap = NULL;
2214 	davap = NULL;
2215 
2216 	dvp = nfs3_fhtovp(&args->where.dir, exi);
2217 
2218 	DTRACE_NFSV3_4(op__mknod__start, struct svc_req *, req,
2219 	    cred_t *, cr, vnode_t *, dvp, MKNOD3args *, args);
2220 
2221 	if (dvp == NULL) {
2222 		error = ESTALE;
2223 		goto out;
2224 	}
2225 
2226 	dbva.va_mask = AT_ALL;
2227 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2228 	davap = dbvap;
2229 
2230 	if (args->where.name == nfs3nametoolong) {
2231 		resp->status = NFS3ERR_NAMETOOLONG;
2232 		goto out1;
2233 	}
2234 
2235 	if (args->where.name == NULL || *(args->where.name) == '\0') {
2236 		resp->status = NFS3ERR_ACCES;
2237 		goto out1;
2238 	}
2239 
2240 	if (rdonly(exi, req)) {
2241 		resp->status = NFS3ERR_ROFS;
2242 		goto out1;
2243 	}
2244 
2245 	if (is_system_labeled()) {
2246 		bslabel_t *clabel = req->rq_label;
2247 
2248 		ASSERT(clabel != NULL);
2249 		DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2250 		    "got client label from request(1)", struct svc_req *, req);
2251 
2252 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2253 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2254 			    exi)) {
2255 				resp->status = NFS3ERR_ACCES;
2256 				goto out1;
2257 			}
2258 		}
2259 	}
2260 
2261 	switch (args->what.type) {
2262 	case NF3CHR:
2263 	case NF3BLK:
2264 		error = sattr3_to_vattr(
2265 		    &args->what.mknoddata3_u.device.dev_attributes, &va);
2266 		if (error)
2267 			goto out;
2268 		if (secpolicy_sys_devices(cr) != 0) {
2269 			resp->status = NFS3ERR_PERM;
2270 			goto out1;
2271 		}
2272 		if (args->what.type == NF3CHR)
2273 			va.va_type = VCHR;
2274 		else
2275 			va.va_type = VBLK;
2276 		va.va_rdev = makedevice(
2277 		    args->what.mknoddata3_u.device.spec.specdata1,
2278 		    args->what.mknoddata3_u.device.spec.specdata2);
2279 		va.va_mask |= AT_TYPE | AT_RDEV;
2280 		break;
2281 	case NF3SOCK:
2282 		error = sattr3_to_vattr(
2283 		    &args->what.mknoddata3_u.pipe_attributes, &va);
2284 		if (error)
2285 			goto out;
2286 		va.va_type = VSOCK;
2287 		va.va_mask |= AT_TYPE;
2288 		break;
2289 	case NF3FIFO:
2290 		error = sattr3_to_vattr(
2291 		    &args->what.mknoddata3_u.pipe_attributes, &va);
2292 		if (error)
2293 			goto out;
2294 		va.va_type = VFIFO;
2295 		va.va_mask |= AT_TYPE;
2296 		break;
2297 	default:
2298 		resp->status = NFS3ERR_BADTYPE;
2299 		goto out1;
2300 	}
2301 
2302 	/*
2303 	 * Must specify the mode.
2304 	 */
2305 	if (!(va.va_mask & AT_MODE)) {
2306 		resp->status = NFS3ERR_INVAL;
2307 		goto out1;
2308 	}
2309 
2310 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2311 	name = nfscmd_convname(ca, exi, args->where.name,
2312 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2313 
2314 	if (name == NULL) {
2315 		resp->status = NFS3ERR_INVAL;
2316 		goto out1;
2317 	}
2318 
2319 	excl = EXCL;
2320 
2321 	mode = 0;
2322 
2323 	error = VOP_CREATE(dvp, name, &va, excl, mode,
2324 	    &vp, cr, 0, NULL, NULL);
2325 
2326 	if (name != args->where.name)
2327 		kmem_free(name, MAXPATHLEN + 1);
2328 
2329 	dava.va_mask = AT_ALL;
2330 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2331 
2332 	/*
2333 	 * Force modified data and metadata out to stable storage.
2334 	 */
2335 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
2336 
2337 	if (error)
2338 		goto out;
2339 
2340 	resp->status = NFS3_OK;
2341 
2342 	error = makefh3(&resp->resok.obj.handle, vp, exi);
2343 	if (error)
2344 		resp->resok.obj.handle_follows = FALSE;
2345 	else
2346 		resp->resok.obj.handle_follows = TRUE;
2347 
2348 	va.va_mask = AT_ALL;
2349 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2350 
2351 	/*
2352 	 * Force modified metadata out to stable storage.
2353 	 *
2354 	 * if a underlying vp exists, pass it to VOP_FSYNC
2355 	 */
2356 	if (VOP_REALVP(vp, &realvp, NULL) == 0)
2357 		(void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2358 	else
2359 		(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2360 
2361 	VN_RELE(vp);
2362 
2363 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2364 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2365 	DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2366 	    cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2367 	VN_RELE(dvp);
2368 	return;
2369 
2370 out:
2371 	if (curthread->t_flag & T_WOULDBLOCK) {
2372 		curthread->t_flag &= ~T_WOULDBLOCK;
2373 		resp->status = NFS3ERR_JUKEBOX;
2374 	} else
2375 		resp->status = puterrno3(error);
2376 out1:
2377 	DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2378 	    cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2379 	if (dvp != NULL)
2380 		VN_RELE(dvp);
2381 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2382 }
2383 
2384 void *
2385 rfs3_mknod_getfh(MKNOD3args *args)
2386 {
2387 
2388 	return (&args->where.dir);
2389 }
2390 
2391 void
2392 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2393 	struct svc_req *req, cred_t *cr)
2394 {
2395 	int error = 0;
2396 	vnode_t *vp;
2397 	struct vattr *bvap;
2398 	struct vattr bva;
2399 	struct vattr *avap;
2400 	struct vattr ava;
2401 	vnode_t *targvp = NULL;
2402 	struct sockaddr *ca;
2403 	char *name = NULL;
2404 
2405 	bvap = NULL;
2406 	avap = NULL;
2407 
2408 	vp = nfs3_fhtovp(&args->object.dir, exi);
2409 
2410 	DTRACE_NFSV3_4(op__remove__start, struct svc_req *, req,
2411 	    cred_t *, cr, vnode_t *, vp, REMOVE3args *, args);
2412 
2413 	if (vp == NULL) {
2414 		error = ESTALE;
2415 		goto err;
2416 	}
2417 
2418 	bva.va_mask = AT_ALL;
2419 	bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2420 	avap = bvap;
2421 
2422 	if (vp->v_type != VDIR) {
2423 		resp->status = NFS3ERR_NOTDIR;
2424 		goto err1;
2425 	}
2426 
2427 	if (args->object.name == nfs3nametoolong) {
2428 		resp->status = NFS3ERR_NAMETOOLONG;
2429 		goto err1;
2430 	}
2431 
2432 	if (args->object.name == NULL || *(args->object.name) == '\0') {
2433 		resp->status = NFS3ERR_ACCES;
2434 		goto err1;
2435 	}
2436 
2437 	if (rdonly(exi, req)) {
2438 		resp->status = NFS3ERR_ROFS;
2439 		goto err1;
2440 	}
2441 
2442 	if (is_system_labeled()) {
2443 		bslabel_t *clabel = req->rq_label;
2444 
2445 		ASSERT(clabel != NULL);
2446 		DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2447 		    "got client label from request(1)", struct svc_req *, req);
2448 
2449 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2450 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2451 			    exi)) {
2452 				resp->status = NFS3ERR_ACCES;
2453 				goto err1;
2454 			}
2455 		}
2456 	}
2457 
2458 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2459 	name = nfscmd_convname(ca, exi, args->object.name,
2460 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2461 
2462 	if (name == NULL) {
2463 		resp->status = NFS3ERR_INVAL;
2464 		goto err1;
2465 	}
2466 
2467 	/*
2468 	 * Check for a conflict with a non-blocking mandatory share
2469 	 * reservation and V4 delegations
2470 	 */
2471 	error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2472 	    NULL, cr, NULL, NULL, NULL);
2473 	if (error != 0)
2474 		goto err;
2475 
2476 	if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2477 		resp->status = NFS3ERR_JUKEBOX;
2478 		goto err1;
2479 	}
2480 
2481 	if (!nbl_need_check(targvp)) {
2482 		error = VOP_REMOVE(vp, name, cr, NULL, 0);
2483 	} else {
2484 		nbl_start_crit(targvp, RW_READER);
2485 		if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2486 			error = EACCES;
2487 		} else {
2488 			error = VOP_REMOVE(vp, name, cr, NULL, 0);
2489 		}
2490 		nbl_end_crit(targvp);
2491 	}
2492 	VN_RELE(targvp);
2493 	targvp = NULL;
2494 
2495 	ava.va_mask = AT_ALL;
2496 	avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2497 
2498 	/*
2499 	 * Force modified data and metadata out to stable storage.
2500 	 */
2501 	(void) VOP_FSYNC(vp, 0, cr, NULL);
2502 
2503 	if (error)
2504 		goto err;
2505 
2506 	resp->status = NFS3_OK;
2507 	vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2508 	goto out;
2509 
2510 err:
2511 	if (curthread->t_flag & T_WOULDBLOCK) {
2512 		curthread->t_flag &= ~T_WOULDBLOCK;
2513 		resp->status = NFS3ERR_JUKEBOX;
2514 	} else
2515 		resp->status = puterrno3(error);
2516 err1:
2517 	vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2518 out:
2519 	DTRACE_NFSV3_4(op__remove__done, struct svc_req *, req,
2520 	    cred_t *, cr, vnode_t *, vp, REMOVE3res *, resp);
2521 
2522 	if (name != NULL && name != args->object.name)
2523 		kmem_free(name, MAXPATHLEN + 1);
2524 
2525 	if (vp != NULL)
2526 		VN_RELE(vp);
2527 }
2528 
2529 void *
2530 rfs3_remove_getfh(REMOVE3args *args)
2531 {
2532 
2533 	return (&args->object.dir);
2534 }
2535 
2536 void
2537 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2538 	struct svc_req *req, cred_t *cr)
2539 {
2540 	int error;
2541 	vnode_t *vp;
2542 	struct vattr *bvap;
2543 	struct vattr bva;
2544 	struct vattr *avap;
2545 	struct vattr ava;
2546 	struct sockaddr *ca;
2547 	char *name = NULL;
2548 
2549 	bvap = NULL;
2550 	avap = NULL;
2551 
2552 	vp = nfs3_fhtovp(&args->object.dir, exi);
2553 
2554 	DTRACE_NFSV3_4(op__rmdir__start, struct svc_req *, req,
2555 	    cred_t *, cr, vnode_t *, vp, RMDIR3args *, args);
2556 
2557 	if (vp == NULL) {
2558 		error = ESTALE;
2559 		goto err;
2560 	}
2561 
2562 	bva.va_mask = AT_ALL;
2563 	bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2564 	avap = bvap;
2565 
2566 	if (vp->v_type != VDIR) {
2567 		resp->status = NFS3ERR_NOTDIR;
2568 		goto err1;
2569 	}
2570 
2571 	if (args->object.name == nfs3nametoolong) {
2572 		resp->status = NFS3ERR_NAMETOOLONG;
2573 		goto err1;
2574 	}
2575 
2576 	if (args->object.name == NULL || *(args->object.name) == '\0') {
2577 		resp->status = NFS3ERR_ACCES;
2578 		goto err1;
2579 	}
2580 
2581 	if (rdonly(exi, req)) {
2582 		resp->status = NFS3ERR_ROFS;
2583 		goto err1;
2584 	}
2585 
2586 	if (is_system_labeled()) {
2587 		bslabel_t *clabel = req->rq_label;
2588 
2589 		ASSERT(clabel != NULL);
2590 		DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2591 		    "got client label from request(1)", struct svc_req *, req);
2592 
2593 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2594 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2595 			    exi)) {
2596 				resp->status = NFS3ERR_ACCES;
2597 				goto err1;
2598 			}
2599 		}
2600 	}
2601 
2602 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2603 	name = nfscmd_convname(ca, exi, args->object.name,
2604 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2605 
2606 	if (name == NULL) {
2607 		resp->status = NFS3ERR_INVAL;
2608 		goto err1;
2609 	}
2610 
2611 	error = VOP_RMDIR(vp, name, rootdir, cr, NULL, 0);
2612 
2613 	if (name != args->object.name)
2614 		kmem_free(name, MAXPATHLEN + 1);
2615 
2616 	ava.va_mask = AT_ALL;
2617 	avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2618 
2619 	/*
2620 	 * Force modified data and metadata out to stable storage.
2621 	 */
2622 	(void) VOP_FSYNC(vp, 0, cr, NULL);
2623 
2624 	if (error) {
2625 		/*
2626 		 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2627 		 * if the directory is not empty.  A System V NFS server
2628 		 * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2629 		 * over the wire.
2630 		 */
2631 		if (error == EEXIST)
2632 			error = ENOTEMPTY;
2633 		goto err;
2634 	}
2635 
2636 	resp->status = NFS3_OK;
2637 	vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2638 	goto out;
2639 
2640 err:
2641 	if (curthread->t_flag & T_WOULDBLOCK) {
2642 		curthread->t_flag &= ~T_WOULDBLOCK;
2643 		resp->status = NFS3ERR_JUKEBOX;
2644 	} else
2645 		resp->status = puterrno3(error);
2646 err1:
2647 	vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2648 out:
2649 	DTRACE_NFSV3_4(op__rmdir__done, struct svc_req *, req,
2650 	    cred_t *, cr, vnode_t *, vp, RMDIR3res *, resp);
2651 	if (vp != NULL)
2652 		VN_RELE(vp);
2653 
2654 }
2655 
2656 void *
2657 rfs3_rmdir_getfh(RMDIR3args *args)
2658 {
2659 
2660 	return (&args->object.dir);
2661 }
2662 
2663 void
2664 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2665 	struct svc_req *req, cred_t *cr)
2666 {
2667 	int error = 0;
2668 	vnode_t *fvp;
2669 	vnode_t *tvp;
2670 	vnode_t *targvp;
2671 	struct vattr *fbvap;
2672 	struct vattr fbva;
2673 	struct vattr *favap;
2674 	struct vattr fava;
2675 	struct vattr *tbvap;
2676 	struct vattr tbva;
2677 	struct vattr *tavap;
2678 	struct vattr tava;
2679 	nfs_fh3 *fh3;
2680 	struct exportinfo *to_exi;
2681 	vnode_t *srcvp = NULL;
2682 	bslabel_t *clabel;
2683 	struct sockaddr *ca;
2684 	char *name = NULL;
2685 	char *toname = NULL;
2686 
2687 	fbvap = NULL;
2688 	favap = NULL;
2689 	tbvap = NULL;
2690 	tavap = NULL;
2691 	tvp = NULL;
2692 
2693 	fvp = nfs3_fhtovp(&args->from.dir, exi);
2694 
2695 	DTRACE_NFSV3_4(op__rename__start, struct svc_req *, req,
2696 	    cred_t *, cr, vnode_t *, fvp, RENAME3args *, args);
2697 
2698 	if (fvp == NULL) {
2699 		error = ESTALE;
2700 		goto err;
2701 	}
2702 
2703 	if (is_system_labeled()) {
2704 		clabel = req->rq_label;
2705 		ASSERT(clabel != NULL);
2706 		DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2707 		    "got client label from request(1)", struct svc_req *, req);
2708 
2709 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2710 			if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2711 			    exi)) {
2712 				resp->status = NFS3ERR_ACCES;
2713 				goto err1;
2714 			}
2715 		}
2716 	}
2717 
2718 	fbva.va_mask = AT_ALL;
2719 	fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2720 	favap = fbvap;
2721 
2722 	fh3 = &args->to.dir;
2723 	to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2724 	if (to_exi == NULL) {
2725 		resp->status = NFS3ERR_ACCES;
2726 		goto err1;
2727 	}
2728 	exi_rele(to_exi);
2729 
2730 	if (to_exi != exi) {
2731 		resp->status = NFS3ERR_XDEV;
2732 		goto err1;
2733 	}
2734 
2735 	tvp = nfs3_fhtovp(&args->to.dir, exi);
2736 	if (tvp == NULL) {
2737 		error = ESTALE;
2738 		goto err;
2739 	}
2740 
2741 	tbva.va_mask = AT_ALL;
2742 	tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2743 	tavap = tbvap;
2744 
2745 	if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2746 		resp->status = NFS3ERR_NOTDIR;
2747 		goto err1;
2748 	}
2749 
2750 	if (args->from.name == nfs3nametoolong ||
2751 	    args->to.name == nfs3nametoolong) {
2752 		resp->status = NFS3ERR_NAMETOOLONG;
2753 		goto err1;
2754 	}
2755 	if (args->from.name == NULL || *(args->from.name) == '\0' ||
2756 	    args->to.name == NULL || *(args->to.name) == '\0') {
2757 		resp->status = NFS3ERR_ACCES;
2758 		goto err1;
2759 	}
2760 
2761 	if (rdonly(exi, req)) {
2762 		resp->status = NFS3ERR_ROFS;
2763 		goto err1;
2764 	}
2765 
2766 	if (is_system_labeled()) {
2767 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2768 			if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
2769 			    exi)) {
2770 				resp->status = NFS3ERR_ACCES;
2771 				goto err1;
2772 			}
2773 		}
2774 	}
2775 
2776 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2777 	name = nfscmd_convname(ca, exi, args->from.name,
2778 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2779 
2780 	if (name == NULL) {
2781 		resp->status = NFS3ERR_INVAL;
2782 		goto err1;
2783 	}
2784 
2785 	toname = nfscmd_convname(ca, exi, args->to.name,
2786 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2787 
2788 	if (toname == NULL) {
2789 		resp->status = NFS3ERR_INVAL;
2790 		goto err1;
2791 	}
2792 
2793 	/*
2794 	 * Check for a conflict with a non-blocking mandatory share
2795 	 * reservation or V4 delegations.
2796 	 */
2797 	error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
2798 	    NULL, cr, NULL, NULL, NULL);
2799 	if (error != 0)
2800 		goto err;
2801 
2802 	/*
2803 	 * If we rename a delegated file we should recall the
2804 	 * delegation, since future opens should fail or would
2805 	 * refer to a new file.
2806 	 */
2807 	if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2808 		resp->status = NFS3ERR_JUKEBOX;
2809 		goto err1;
2810 	}
2811 
2812 	/*
2813 	 * Check for renaming over a delegated file.  Check rfs4_deleg_policy
2814 	 * first to avoid VOP_LOOKUP if possible.
2815 	 */
2816 	if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2817 	    VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
2818 	    NULL, NULL, NULL) == 0) {
2819 
2820 		if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2821 			VN_RELE(targvp);
2822 			resp->status = NFS3ERR_JUKEBOX;
2823 			goto err1;
2824 		}
2825 		VN_RELE(targvp);
2826 	}
2827 
2828 	if (!nbl_need_check(srcvp)) {
2829 		error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2830 	} else {
2831 		nbl_start_crit(srcvp, RW_READER);
2832 		if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2833 			error = EACCES;
2834 		else
2835 			error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2836 		nbl_end_crit(srcvp);
2837 	}
2838 	if (error == 0)
2839 		vn_renamepath(tvp, srcvp, args->to.name,
2840 		    strlen(args->to.name));
2841 	VN_RELE(srcvp);
2842 	srcvp = NULL;
2843 
2844 	fava.va_mask = AT_ALL;
2845 	favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2846 	tava.va_mask = AT_ALL;
2847 	tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2848 
2849 	/*
2850 	 * Force modified data and metadata out to stable storage.
2851 	 */
2852 	(void) VOP_FSYNC(fvp, 0, cr, NULL);
2853 	(void) VOP_FSYNC(tvp, 0, cr, NULL);
2854 
2855 	if (error)
2856 		goto err;
2857 
2858 	resp->status = NFS3_OK;
2859 	vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2860 	vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2861 	goto out;
2862 
2863 err:
2864 	if (curthread->t_flag & T_WOULDBLOCK) {
2865 		curthread->t_flag &= ~T_WOULDBLOCK;
2866 		resp->status = NFS3ERR_JUKEBOX;
2867 	} else {
2868 		resp->status = puterrno3(error);
2869 	}
2870 err1:
2871 	vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2872 	vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2873 
2874 out:
2875 	if (name != NULL && name != args->from.name)
2876 		kmem_free(name, MAXPATHLEN + 1);
2877 	if (toname != NULL && toname != args->to.name)
2878 		kmem_free(toname, MAXPATHLEN + 1);
2879 
2880 	DTRACE_NFSV3_4(op__rename__done, struct svc_req *, req,
2881 	    cred_t *, cr, vnode_t *, fvp, RENAME3res *, resp);
2882 	if (fvp != NULL)
2883 		VN_RELE(fvp);
2884 	if (tvp != NULL)
2885 		VN_RELE(tvp);
2886 }
2887 
2888 void *
2889 rfs3_rename_getfh(RENAME3args *args)
2890 {
2891 
2892 	return (&args->from.dir);
2893 }
2894 
2895 void
2896 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2897 	struct svc_req *req, cred_t *cr)
2898 {
2899 	int error;
2900 	vnode_t *vp;
2901 	vnode_t *dvp;
2902 	struct vattr *vap;
2903 	struct vattr va;
2904 	struct vattr *bvap;
2905 	struct vattr bva;
2906 	struct vattr *avap;
2907 	struct vattr ava;
2908 	nfs_fh3	*fh3;
2909 	struct exportinfo *to_exi;
2910 	bslabel_t *clabel;
2911 	struct sockaddr *ca;
2912 	char *name = NULL;
2913 
2914 	vap = NULL;
2915 	bvap = NULL;
2916 	avap = NULL;
2917 	dvp = NULL;
2918 
2919 	vp = nfs3_fhtovp(&args->file, exi);
2920 
2921 	DTRACE_NFSV3_4(op__link__start, struct svc_req *, req,
2922 	    cred_t *, cr, vnode_t *, vp, LINK3args *, args);
2923 
2924 	if (vp == NULL) {
2925 		error = ESTALE;
2926 		goto out;
2927 	}
2928 
2929 	va.va_mask = AT_ALL;
2930 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2931 
2932 	fh3 = &args->link.dir;
2933 	to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2934 	if (to_exi == NULL) {
2935 		resp->status = NFS3ERR_ACCES;
2936 		goto out1;
2937 	}
2938 	exi_rele(to_exi);
2939 
2940 	if (to_exi != exi) {
2941 		resp->status = NFS3ERR_XDEV;
2942 		goto out1;
2943 	}
2944 
2945 	if (is_system_labeled()) {
2946 		clabel = req->rq_label;
2947 
2948 		ASSERT(clabel != NULL);
2949 		DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
2950 		    "got client label from request(1)", struct svc_req *, req);
2951 
2952 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2953 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2954 			    exi)) {
2955 				resp->status = NFS3ERR_ACCES;
2956 				goto out1;
2957 			}
2958 		}
2959 	}
2960 
2961 	dvp = nfs3_fhtovp(&args->link.dir, exi);
2962 	if (dvp == NULL) {
2963 		error = ESTALE;
2964 		goto out;
2965 	}
2966 
2967 	bva.va_mask = AT_ALL;
2968 	bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
2969 
2970 	if (dvp->v_type != VDIR) {
2971 		resp->status = NFS3ERR_NOTDIR;
2972 		goto out1;
2973 	}
2974 
2975 	if (args->link.name == nfs3nametoolong) {
2976 		resp->status = NFS3ERR_NAMETOOLONG;
2977 		goto out1;
2978 	}
2979 
2980 	if (args->link.name == NULL || *(args->link.name) == '\0') {
2981 		resp->status = NFS3ERR_ACCES;
2982 		goto out1;
2983 	}
2984 
2985 	if (rdonly(exi, req)) {
2986 		resp->status = NFS3ERR_ROFS;
2987 		goto out1;
2988 	}
2989 
2990 	if (is_system_labeled()) {
2991 		DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
2992 		    "got client label from request(1)", struct svc_req *, req);
2993 
2994 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2995 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2996 			    exi)) {
2997 				resp->status = NFS3ERR_ACCES;
2998 				goto out1;
2999 			}
3000 		}
3001 	}
3002 
3003 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3004 	name = nfscmd_convname(ca, exi, args->link.name,
3005 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3006 
3007 	if (name == NULL) {
3008 		resp->status = NFS3ERR_SERVERFAULT;
3009 		goto out1;
3010 	}
3011 
3012 	error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3013 
3014 	va.va_mask = AT_ALL;
3015 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3016 	ava.va_mask = AT_ALL;
3017 	avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3018 
3019 	/*
3020 	 * Force modified data and metadata out to stable storage.
3021 	 */
3022 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3023 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
3024 
3025 	if (error)
3026 		goto out;
3027 
3028 	VN_RELE(dvp);
3029 
3030 	resp->status = NFS3_OK;
3031 	vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3032 	vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3033 
3034 	DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3035 	    cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3036 
3037 	VN_RELE(vp);
3038 
3039 	return;
3040 
3041 out:
3042 	if (curthread->t_flag & T_WOULDBLOCK) {
3043 		curthread->t_flag &= ~T_WOULDBLOCK;
3044 		resp->status = NFS3ERR_JUKEBOX;
3045 	} else
3046 		resp->status = puterrno3(error);
3047 out1:
3048 	if (name != NULL && name != args->link.name)
3049 		kmem_free(name, MAXPATHLEN + 1);
3050 
3051 	DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3052 	    cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3053 
3054 	if (vp != NULL)
3055 		VN_RELE(vp);
3056 	if (dvp != NULL)
3057 		VN_RELE(dvp);
3058 	vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3059 	vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3060 }
3061 
3062 void *
3063 rfs3_link_getfh(LINK3args *args)
3064 {
3065 
3066 	return (&args->file);
3067 }
3068 
3069 /*
3070  * This macro defines the size of a response which contains attribute
3071  * information and one directory entry (whose length is specified by
3072  * the macro parameter).  If the incoming request is larger than this,
3073  * then we are guaranteed to be able to return at one directory entry
3074  * if one exists.  Therefore, we do not need to check for
3075  * NFS3ERR_TOOSMALL if the requested size is larger then this.  If it
3076  * is not, then we need to check to make sure that this error does not
3077  * need to be returned.
3078  *
3079  * NFS3_READDIR_MIN_COUNT is comprised of following :
3080  *
3081  * status - 1 * BYTES_PER_XDR_UNIT
3082  * attr. flag - 1 * BYTES_PER_XDR_UNIT
3083  * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3084  * attributes  - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3085  * boolean - 1 * BYTES_PER_XDR_UNIT
3086  * file id - 2 * BYTES_PER_XDR_UNIT
3087  * directory name length - 1 * BYTES_PER_XDR_UNIT
3088  * cookie - 2 * BYTES_PER_XDR_UNIT
3089  * end of list - 1 * BYTES_PER_XDR_UNIT
3090  * end of file - 1 * BYTES_PER_XDR_UNIT
3091  * Name length of directory to the nearest byte
3092  */
3093 
3094 #define	NFS3_READDIR_MIN_COUNT(length)	\
3095 	((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3096 		BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3097 
3098 /* ARGSUSED */
3099 void
3100 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3101 	struct svc_req *req, cred_t *cr)
3102 {
3103 	int error;
3104 	vnode_t *vp;
3105 	struct vattr *vap;
3106 	struct vattr va;
3107 	struct iovec iov;
3108 	struct uio uio;
3109 	char *data;
3110 	int iseof;
3111 	int bufsize;
3112 	int namlen;
3113 	uint_t count;
3114 	struct sockaddr *ca;
3115 
3116 	vap = NULL;
3117 
3118 	vp = nfs3_fhtovp(&args->dir, exi);
3119 
3120 	DTRACE_NFSV3_4(op__readdir__start, struct svc_req *, req,
3121 	    cred_t *, cr, vnode_t *, vp, READDIR3args *, args);
3122 
3123 	if (vp == NULL) {
3124 		error = ESTALE;
3125 		goto out;
3126 	}
3127 
3128 	if (is_system_labeled()) {
3129 		bslabel_t *clabel = req->rq_label;
3130 
3131 		ASSERT(clabel != NULL);
3132 		DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3133 		    "got client label from request(1)", struct svc_req *, req);
3134 
3135 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3136 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3137 			    exi)) {
3138 				resp->status = NFS3ERR_ACCES;
3139 				goto out1;
3140 			}
3141 		}
3142 	}
3143 
3144 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3145 
3146 	va.va_mask = AT_ALL;
3147 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3148 
3149 	if (vp->v_type != VDIR) {
3150 		resp->status = NFS3ERR_NOTDIR;
3151 		goto out1;
3152 	}
3153 
3154 	error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3155 	if (error)
3156 		goto out;
3157 
3158 	/*
3159 	 * Now don't allow arbitrary count to alloc;
3160 	 * allow the maximum not to exceed rfs3_tsize()
3161 	 */
3162 	if (args->count > rfs3_tsize(req))
3163 		args->count = rfs3_tsize(req);
3164 
3165 	/*
3166 	 * Make sure that there is room to read at least one entry
3167 	 * if any are available.
3168 	 */
3169 	if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3170 		count = DIRENT64_RECLEN(MAXNAMELEN);
3171 	else
3172 		count = args->count;
3173 
3174 	data = kmem_alloc(count, KM_SLEEP);
3175 
3176 	iov.iov_base = data;
3177 	iov.iov_len = count;
3178 	uio.uio_iov = &iov;
3179 	uio.uio_iovcnt = 1;
3180 	uio.uio_segflg = UIO_SYSSPACE;
3181 	uio.uio_extflg = UIO_COPY_CACHED;
3182 	uio.uio_loffset = (offset_t)args->cookie;
3183 	uio.uio_resid = count;
3184 
3185 	error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3186 
3187 	va.va_mask = AT_ALL;
3188 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3189 
3190 	if (error) {
3191 		kmem_free(data, count);
3192 		goto out;
3193 	}
3194 
3195 	/*
3196 	 * If the count was not large enough to be able to guarantee
3197 	 * to be able to return at least one entry, then need to
3198 	 * check to see if NFS3ERR_TOOSMALL should be returned.
3199 	 */
3200 	if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3201 		/*
3202 		 * bufsize is used to keep track of the size of the response.
3203 		 * It is primed with:
3204 		 *	1 for the status +
3205 		 *	1 for the dir_attributes.attributes boolean +
3206 		 *	2 for the cookie verifier
3207 		 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3208 		 * to bytes.  If there are directory attributes to be
3209 		 * returned, then:
3210 		 *	NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3211 		 * time BYTES_PER_XDR_UNIT is added to account for them.
3212 		 */
3213 		bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3214 		if (vap != NULL)
3215 			bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3216 		/*
3217 		 * An entry is composed of:
3218 		 *	1 for the true/false list indicator +
3219 		 *	2 for the fileid +
3220 		 *	1 for the length of the name +
3221 		 *	2 for the cookie +
3222 		 * all times BYTES_PER_XDR_UNIT to convert from
3223 		 * XDR units to bytes, plus the length of the name
3224 		 * rounded up to the nearest BYTES_PER_XDR_UNIT.
3225 		 */
3226 		if (count != uio.uio_resid) {
3227 			namlen = strlen(((struct dirent64 *)data)->d_name);
3228 			bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3229 			    roundup(namlen, BYTES_PER_XDR_UNIT);
3230 		}
3231 		/*
3232 		 * We need to check to see if the number of bytes left
3233 		 * to go into the buffer will actually fit into the
3234 		 * buffer.  This is calculated as the size of this
3235 		 * entry plus:
3236 		 *	1 for the true/false list indicator +
3237 		 *	1 for the eof indicator
3238 		 * times BYTES_PER_XDR_UNIT to convert from from
3239 		 * XDR units to bytes.
3240 		 */
3241 		bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3242 		if (bufsize > args->count) {
3243 			kmem_free(data, count);
3244 			resp->status = NFS3ERR_TOOSMALL;
3245 			goto out1;
3246 		}
3247 	}
3248 
3249 	/*
3250 	 * Have a valid readir buffer for the native character
3251 	 * set. Need to check if a conversion is necessary and
3252 	 * potentially rewrite the whole buffer. Note that if the
3253 	 * conversion expands names enough, the structure may not
3254 	 * fit. In this case, we need to drop entries until if fits
3255 	 * and patch the counts in order that the next readdir will
3256 	 * get the correct entries.
3257 	 */
3258 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3259 	data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
3260 
3261 
3262 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3263 
3264 #if 0 /* notyet */
3265 	/*
3266 	 * Don't do this.  It causes local disk writes when just
3267 	 * reading the file and the overhead is deemed larger
3268 	 * than the benefit.
3269 	 */
3270 	/*
3271 	 * Force modified metadata out to stable storage.
3272 	 */
3273 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3274 #endif
3275 
3276 	resp->status = NFS3_OK;
3277 	vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3278 	resp->resok.cookieverf = 0;
3279 	resp->resok.reply.entries = (entry3 *)data;
3280 	resp->resok.reply.eof = iseof;
3281 	resp->resok.size = count - uio.uio_resid;
3282 	resp->resok.count = args->count;
3283 	resp->resok.freecount = count;
3284 
3285 	DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3286 	    cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3287 
3288 	VN_RELE(vp);
3289 
3290 	return;
3291 
3292 out:
3293 	if (curthread->t_flag & T_WOULDBLOCK) {
3294 		curthread->t_flag &= ~T_WOULDBLOCK;
3295 		resp->status = NFS3ERR_JUKEBOX;
3296 	} else
3297 		resp->status = puterrno3(error);
3298 out1:
3299 	DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3300 	    cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3301 
3302 	if (vp != NULL) {
3303 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3304 		VN_RELE(vp);
3305 	}
3306 	vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3307 }
3308 
3309 void *
3310 rfs3_readdir_getfh(READDIR3args *args)
3311 {
3312 
3313 	return (&args->dir);
3314 }
3315 
3316 void
3317 rfs3_readdir_free(READDIR3res *resp)
3318 {
3319 
3320 	if (resp->status == NFS3_OK)
3321 		kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3322 }
3323 
3324 #ifdef nextdp
3325 #undef nextdp
3326 #endif
3327 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3328 
3329 /*
3330  * This macro computes the size of a response which contains
3331  * one directory entry including the attributes as well as file handle.
3332  * If the incoming request is larger than this, then we are guaranteed to be
3333  * able to return at least one more directory entry if one exists.
3334  *
3335  * NFS3_READDIRPLUS_ENTRY is made up of the following:
3336  *
3337  * boolean - 1 * BYTES_PER_XDR_UNIT
3338  * file id - 2 * BYTES_PER_XDR_UNIT
3339  * directory name length - 1 * BYTES_PER_XDR_UNIT
3340  * cookie - 2 * BYTES_PER_XDR_UNIT
3341  * attribute flag - 1 * BYTES_PER_XDR_UNIT
3342  * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3343  * status byte for file handle - 1 *  BYTES_PER_XDR_UNIT
3344  * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3345  * Maximum length of a file handle (NFS3_MAXFHSIZE)
3346  * name length of the entry to the nearest bytes
3347  */
3348 #define	NFS3_READDIRPLUS_ENTRY(namelen)	\
3349 	((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3350 		BYTES_PER_XDR_UNIT + \
3351 	NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3352 
3353 static int rfs3_readdir_unit = MAXBSIZE;
3354 
3355 /* ARGSUSED */
3356 void
3357 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3358 	struct exportinfo *exi, struct svc_req *req, cred_t *cr)
3359 {
3360 	int error;
3361 	vnode_t *vp;
3362 	struct vattr *vap;
3363 	struct vattr va;
3364 	struct iovec iov;
3365 	struct uio uio;
3366 	char *data;
3367 	int iseof;
3368 	struct dirent64 *dp;
3369 	vnode_t *nvp;
3370 	struct vattr *nvap;
3371 	struct vattr nva;
3372 	entryplus3_info *infop = NULL;
3373 	int size = 0;
3374 	int nents = 0;
3375 	int bufsize = 0;
3376 	int entrysize = 0;
3377 	int tofit = 0;
3378 	int rd_unit = rfs3_readdir_unit;
3379 	int prev_len;
3380 	int space_left;
3381 	int i;
3382 	uint_t *namlen = NULL;
3383 	char *ndata = NULL;
3384 	struct sockaddr *ca;
3385 	size_t ret;
3386 
3387 	vap = NULL;
3388 
3389 	vp = nfs3_fhtovp(&args->dir, exi);
3390 
3391 	DTRACE_NFSV3_4(op__readdirplus__start, struct svc_req *, req,
3392 	    cred_t *, cr, vnode_t *, vp, READDIRPLUS3args *, args);
3393 
3394 	if (vp == NULL) {
3395 		error = ESTALE;
3396 		goto out;
3397 	}
3398 
3399 	if (is_system_labeled()) {
3400 		bslabel_t *clabel = req->rq_label;
3401 
3402 		ASSERT(clabel != NULL);
3403 		DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3404 		    char *, "got client label from request(1)",
3405 		    struct svc_req *, req);
3406 
3407 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3408 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3409 			    exi)) {
3410 				resp->status = NFS3ERR_ACCES;
3411 				goto out1;
3412 			}
3413 		}
3414 	}
3415 
3416 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3417 
3418 	va.va_mask = AT_ALL;
3419 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3420 
3421 	if (vp->v_type != VDIR) {
3422 		error = ENOTDIR;
3423 		goto out;
3424 	}
3425 
3426 	error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3427 	if (error)
3428 		goto out;
3429 
3430 	/*
3431 	 * Don't allow arbitrary counts for allocation
3432 	 */
3433 	if (args->maxcount > rfs3_tsize(req))
3434 		args->maxcount = rfs3_tsize(req);
3435 
3436 	/*
3437 	 * Make sure that there is room to read at least one entry
3438 	 * if any are available
3439 	 */
3440 	args->dircount = MIN(args->dircount, args->maxcount);
3441 
3442 	if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3443 		args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3444 
3445 	/*
3446 	 * This allocation relies on a minimum directory entry
3447 	 * being roughly 24 bytes.  Therefore, the namlen array
3448 	 * will have enough space based on the maximum number of
3449 	 * entries to read.
3450 	 */
3451 	namlen = kmem_alloc(args->dircount, KM_SLEEP);
3452 
3453 	space_left = args->dircount;
3454 	data = kmem_alloc(args->dircount, KM_SLEEP);
3455 	dp = (struct dirent64 *)data;
3456 	uio.uio_iov = &iov;
3457 	uio.uio_iovcnt = 1;
3458 	uio.uio_segflg = UIO_SYSSPACE;
3459 	uio.uio_extflg = UIO_COPY_CACHED;
3460 	uio.uio_loffset = (offset_t)args->cookie;
3461 
3462 	/*
3463 	 * bufsize is used to keep track of the size of the response as we
3464 	 * get post op attributes and filehandles for each entry.  This is
3465 	 * an optimization as the server may have read more entries than will
3466 	 * fit in the buffer specified by maxcount.  We stop calculating
3467 	 * post op attributes and filehandles once we have exceeded maxcount.
3468 	 * This will minimize the effect of truncation.
3469 	 *
3470 	 * It is primed with:
3471 	 *	1 for the status +
3472 	 *	1 for the dir_attributes.attributes boolean +
3473 	 *	2 for the cookie verifier
3474 	 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3475 	 * to bytes.  If there are directory attributes to be
3476 	 * returned, then:
3477 	 *	NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3478 	 * time BYTES_PER_XDR_UNIT is added to account for them.
3479 	 */
3480 	bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3481 	if (vap != NULL)
3482 		bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3483 
3484 getmoredents:
3485 	/*
3486 	 * Here we make a check so that our read unit is not larger than
3487 	 * the space left in the buffer.
3488 	 */
3489 	rd_unit = MIN(rd_unit, space_left);
3490 	iov.iov_base = (char *)dp;
3491 	iov.iov_len = rd_unit;
3492 	uio.uio_resid = rd_unit;
3493 	prev_len = rd_unit;
3494 
3495 	error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3496 
3497 	if (error) {
3498 		kmem_free(data, args->dircount);
3499 		goto out;
3500 	}
3501 
3502 	if (uio.uio_resid == prev_len && !iseof) {
3503 		if (nents == 0) {
3504 			kmem_free(data, args->dircount);
3505 			resp->status = NFS3ERR_TOOSMALL;
3506 			goto out1;
3507 		}
3508 
3509 		/*
3510 		 * We could not get any more entries, so get the attributes
3511 		 * and filehandle for the entries already obtained.
3512 		 */
3513 		goto good;
3514 	}
3515 
3516 	/*
3517 	 * We estimate the size of the response by assuming the
3518 	 * entry exists and attributes and filehandle are also valid
3519 	 */
3520 	for (size = prev_len - uio.uio_resid;
3521 	    size > 0;
3522 	    size -= dp->d_reclen, dp = nextdp(dp)) {
3523 
3524 		if (dp->d_ino == 0) {
3525 			nents++;
3526 			continue;
3527 		}
3528 
3529 		namlen[nents] = strlen(dp->d_name);
3530 		entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3531 
3532 		/*
3533 		 * We need to check to see if the number of bytes left
3534 		 * to go into the buffer will actually fit into the
3535 		 * buffer.  This is calculated as the size of this
3536 		 * entry plus:
3537 		 *	1 for the true/false list indicator +
3538 		 *	1 for the eof indicator
3539 		 * times BYTES_PER_XDR_UNIT to convert from XDR units
3540 		 * to bytes.
3541 		 *
3542 		 * Also check the dircount limit against the first entry read
3543 		 *
3544 		 */
3545 		tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3546 		if (bufsize + tofit > args->maxcount) {
3547 			/*
3548 			 * We make a check here to see if this was the
3549 			 * first entry being measured.  If so, then maxcount
3550 			 * was too small to begin with and so we need to
3551 			 * return with NFS3ERR_TOOSMALL.
3552 			 */
3553 			if (nents == 0) {
3554 				kmem_free(data, args->dircount);
3555 				resp->status = NFS3ERR_TOOSMALL;
3556 				goto out1;
3557 			}
3558 			iseof = FALSE;
3559 			goto good;
3560 		}
3561 		bufsize += entrysize;
3562 		nents++;
3563 	}
3564 
3565 	/*
3566 	 * If there is enough room to fit at least 1 more entry including
3567 	 * post op attributes and filehandle in the buffer AND that we haven't
3568 	 * exceeded dircount then go back and get some more.
3569 	 */
3570 	if (!iseof &&
3571 	    (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3572 		space_left -= (prev_len - uio.uio_resid);
3573 		if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3574 			goto getmoredents;
3575 
3576 		/* else, fall through */
3577 	}
3578 good:
3579 	va.va_mask = AT_ALL;
3580 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3581 
3582 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3583 
3584 	infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3585 	resp->resok.infop = infop;
3586 
3587 	dp = (struct dirent64 *)data;
3588 	for (i = 0; i < nents; i++) {
3589 
3590 		if (dp->d_ino == 0) {
3591 			infop[i].attr.attributes = FALSE;
3592 			infop[i].fh.handle_follows = FALSE;
3593 			dp = nextdp(dp);
3594 			continue;
3595 		}
3596 
3597 		infop[i].namelen = namlen[i];
3598 
3599 		error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3600 		    NULL, NULL, NULL);
3601 		if (error) {
3602 			infop[i].attr.attributes = FALSE;
3603 			infop[i].fh.handle_follows = FALSE;
3604 			dp = nextdp(dp);
3605 			continue;
3606 		}
3607 
3608 		nva.va_mask = AT_ALL;
3609 		nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3610 
3611 		/* Lie about the object type for a referral */
3612 		if (vn_is_nfs_reparse(nvp, cr))
3613 			nvap->va_type = VLNK;
3614 
3615 		vattr_to_post_op_attr(nvap, &infop[i].attr);
3616 
3617 		error = makefh3(&infop[i].fh.handle, nvp, exi);
3618 		if (!error)
3619 			infop[i].fh.handle_follows = TRUE;
3620 		else
3621 			infop[i].fh.handle_follows = FALSE;
3622 
3623 		VN_RELE(nvp);
3624 		dp = nextdp(dp);
3625 	}
3626 
3627 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3628 	ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3629 	if (ndata == NULL)
3630 		ndata = data;
3631 
3632 	if (ret > 0) {
3633 		/*
3634 		 * We had to drop one or more entries in order to fit
3635 		 * during the character conversion.  We need to patch
3636 		 * up the size and eof info.
3637 		 */
3638 		if (iseof)
3639 			iseof = FALSE;
3640 
3641 		ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3642 		    nents, ret);
3643 	}
3644 
3645 
3646 #if 0 /* notyet */
3647 	/*
3648 	 * Don't do this.  It causes local disk writes when just
3649 	 * reading the file and the overhead is deemed larger
3650 	 * than the benefit.
3651 	 */
3652 	/*
3653 	 * Force modified metadata out to stable storage.
3654 	 */
3655 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3656 #endif
3657 
3658 	kmem_free(namlen, args->dircount);
3659 
3660 	resp->status = NFS3_OK;
3661 	vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3662 	resp->resok.cookieverf = 0;
3663 	resp->resok.reply.entries = (entryplus3 *)ndata;
3664 	resp->resok.reply.eof = iseof;
3665 	resp->resok.size = nents;
3666 	resp->resok.count = args->dircount - ret;
3667 	resp->resok.maxcount = args->maxcount;
3668 
3669 	DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3670 	    cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3671 	if (ndata != data)
3672 		kmem_free(data, args->dircount);
3673 
3674 
3675 	VN_RELE(vp);
3676 
3677 	return;
3678 
3679 out:
3680 	if (curthread->t_flag & T_WOULDBLOCK) {
3681 		curthread->t_flag &= ~T_WOULDBLOCK;
3682 		resp->status = NFS3ERR_JUKEBOX;
3683 	} else {
3684 		resp->status = puterrno3(error);
3685 	}
3686 out1:
3687 	DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3688 	    cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3689 
3690 	if (vp != NULL) {
3691 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3692 		VN_RELE(vp);
3693 	}
3694 
3695 	if (namlen != NULL)
3696 		kmem_free(namlen, args->dircount);
3697 
3698 	vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3699 }
3700 
3701 void *
3702 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3703 {
3704 
3705 	return (&args->dir);
3706 }
3707 
3708 void
3709 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3710 {
3711 
3712 	if (resp->status == NFS3_OK) {
3713 		kmem_free(resp->resok.reply.entries, resp->resok.count);
3714 		kmem_free(resp->resok.infop,
3715 		    resp->resok.size * sizeof (struct entryplus3_info));
3716 	}
3717 }
3718 
3719 /* ARGSUSED */
3720 void
3721 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3722 	struct svc_req *req, cred_t *cr)
3723 {
3724 	int error;
3725 	vnode_t *vp;
3726 	struct vattr *vap;
3727 	struct vattr va;
3728 	struct statvfs64 sb;
3729 
3730 	vap = NULL;
3731 
3732 	vp = nfs3_fhtovp(&args->fsroot, exi);
3733 
3734 	DTRACE_NFSV3_4(op__fsstat__start, struct svc_req *, req,
3735 	    cred_t *, cr, vnode_t *, vp, FSSTAT3args *, args);
3736 
3737 	if (vp == NULL) {
3738 		error = ESTALE;
3739 		goto out;
3740 	}
3741 
3742 	if (is_system_labeled()) {
3743 		bslabel_t *clabel = req->rq_label;
3744 
3745 		ASSERT(clabel != NULL);
3746 		DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3747 		    "got client label from request(1)", struct svc_req *, req);
3748 
3749 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3750 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3751 			    exi)) {
3752 				resp->status = NFS3ERR_ACCES;
3753 				goto out1;
3754 			}
3755 		}
3756 	}
3757 
3758 	error = VFS_STATVFS(vp->v_vfsp, &sb);
3759 
3760 	va.va_mask = AT_ALL;
3761 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3762 
3763 	if (error)
3764 		goto out;
3765 
3766 	resp->status = NFS3_OK;
3767 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3768 	if (sb.f_blocks != (fsblkcnt64_t)-1)
3769 		resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3770 	else
3771 		resp->resok.tbytes = (size3)sb.f_blocks;
3772 	if (sb.f_bfree != (fsblkcnt64_t)-1)
3773 		resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3774 	else
3775 		resp->resok.fbytes = (size3)sb.f_bfree;
3776 	if (sb.f_bavail != (fsblkcnt64_t)-1)
3777 		resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3778 	else
3779 		resp->resok.abytes = (size3)sb.f_bavail;
3780 	resp->resok.tfiles = (size3)sb.f_files;
3781 	resp->resok.ffiles = (size3)sb.f_ffree;
3782 	resp->resok.afiles = (size3)sb.f_favail;
3783 	resp->resok.invarsec = 0;
3784 
3785 	DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3786 	    cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3787 	VN_RELE(vp);
3788 
3789 	return;
3790 
3791 out:
3792 	if (curthread->t_flag & T_WOULDBLOCK) {
3793 		curthread->t_flag &= ~T_WOULDBLOCK;
3794 		resp->status = NFS3ERR_JUKEBOX;
3795 	} else
3796 		resp->status = puterrno3(error);
3797 out1:
3798 	DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3799 	    cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3800 
3801 	if (vp != NULL)
3802 		VN_RELE(vp);
3803 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3804 }
3805 
3806 void *
3807 rfs3_fsstat_getfh(FSSTAT3args *args)
3808 {
3809 
3810 	return (&args->fsroot);
3811 }
3812 
3813 void
3814 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3815 	struct svc_req *req, cred_t *cr)
3816 {
3817 	vnode_t *vp;
3818 	struct vattr *vap;
3819 	struct vattr va;
3820 	uint32_t xfer_size;
3821 	ulong_t l = 0;
3822 	int error;
3823 
3824 	vp = nfs3_fhtovp(&args->fsroot, exi);
3825 
3826 	DTRACE_NFSV3_4(op__fsinfo__start, struct svc_req *, req,
3827 	    cred_t *, cr, vnode_t *, vp, FSINFO3args *, args);
3828 
3829 	if (vp == NULL) {
3830 		if (curthread->t_flag & T_WOULDBLOCK) {
3831 			curthread->t_flag &= ~T_WOULDBLOCK;
3832 			resp->status = NFS3ERR_JUKEBOX;
3833 		} else
3834 			resp->status = NFS3ERR_STALE;
3835 		vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3836 		goto out;
3837 	}
3838 
3839 	if (is_system_labeled()) {
3840 		bslabel_t *clabel = req->rq_label;
3841 
3842 		ASSERT(clabel != NULL);
3843 		DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3844 		    "got client label from request(1)", struct svc_req *, req);
3845 
3846 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3847 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3848 			    exi)) {
3849 				resp->status = NFS3ERR_STALE;
3850 				vattr_to_post_op_attr(NULL,
3851 				    &resp->resfail.obj_attributes);
3852 				goto out;
3853 			}
3854 		}
3855 	}
3856 
3857 	va.va_mask = AT_ALL;
3858 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3859 
3860 	resp->status = NFS3_OK;
3861 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3862 	xfer_size = rfs3_tsize(req);
3863 	resp->resok.rtmax = xfer_size;
3864 	resp->resok.rtpref = xfer_size;
3865 	resp->resok.rtmult = DEV_BSIZE;
3866 	resp->resok.wtmax = xfer_size;
3867 	resp->resok.wtpref = xfer_size;
3868 	resp->resok.wtmult = DEV_BSIZE;
3869 	resp->resok.dtpref = MAXBSIZE;
3870 
3871 	/*
3872 	 * Large file spec: want maxfilesize based on limit of
3873 	 * underlying filesystem.  We can guess 2^31-1 if need be.
3874 	 */
3875 	error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3876 	if (error) {
3877 		resp->status = puterrno3(error);
3878 		goto out;
3879 	}
3880 
3881 	/*
3882 	 * If the underlying file system does not support _PC_FILESIZEBITS,
3883 	 * return a reasonable default. Note that error code on VOP_PATHCONF
3884 	 * will be 0, even if the underlying file system does not support
3885 	 * _PC_FILESIZEBITS.
3886 	 */
3887 	if (l == (ulong_t)-1) {
3888 		resp->resok.maxfilesize = MAXOFF32_T;
3889 	} else {
3890 		if (l >= (sizeof (uint64_t) * 8))
3891 			resp->resok.maxfilesize = INT64_MAX;
3892 		else
3893 			resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3894 	}
3895 
3896 	resp->resok.time_delta.seconds = 0;
3897 	resp->resok.time_delta.nseconds = 1000;
3898 	resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3899 	    FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3900 
3901 	DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3902 	    cred_t *, cr, vnode_t *, vp, FSINFO3res *, resp);
3903 
3904 	VN_RELE(vp);
3905 
3906 	return;
3907 
3908 out:
3909 	DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3910 	    cred_t *, cr, vnode_t *, NULL, FSINFO3res *, resp);
3911 	if (vp != NULL)
3912 		VN_RELE(vp);
3913 }
3914 
3915 void *
3916 rfs3_fsinfo_getfh(FSINFO3args *args)
3917 {
3918 
3919 	return (&args->fsroot);
3920 }
3921 
3922 /* ARGSUSED */
3923 void
3924 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
3925 	struct svc_req *req, cred_t *cr)
3926 {
3927 	int error;
3928 	vnode_t *vp;
3929 	struct vattr *vap;
3930 	struct vattr va;
3931 	ulong_t val;
3932 
3933 	vap = NULL;
3934 
3935 	vp = nfs3_fhtovp(&args->object, exi);
3936 
3937 	DTRACE_NFSV3_4(op__pathconf__start, struct svc_req *, req,
3938 	    cred_t *, cr, vnode_t *, vp, PATHCONF3args *, args);
3939 
3940 	if (vp == NULL) {
3941 		error = ESTALE;
3942 		goto out;
3943 	}
3944 
3945 	if (is_system_labeled()) {
3946 		bslabel_t *clabel = req->rq_label;
3947 
3948 		ASSERT(clabel != NULL);
3949 		DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
3950 		    "got client label from request(1)", struct svc_req *, req);
3951 
3952 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3953 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3954 			    exi)) {
3955 				resp->status = NFS3ERR_ACCES;
3956 				goto out1;
3957 			}
3958 		}
3959 	}
3960 
3961 	va.va_mask = AT_ALL;
3962 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3963 
3964 	error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
3965 	if (error)
3966 		goto out;
3967 	resp->resok.info.link_max = (uint32)val;
3968 
3969 	error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
3970 	if (error)
3971 		goto out;
3972 	resp->resok.info.name_max = (uint32)val;
3973 
3974 	error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
3975 	if (error)
3976 		goto out;
3977 	if (val == 1)
3978 		resp->resok.info.no_trunc = TRUE;
3979 	else
3980 		resp->resok.info.no_trunc = FALSE;
3981 
3982 	error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
3983 	if (error)
3984 		goto out;
3985 	if (val == 1)
3986 		resp->resok.info.chown_restricted = TRUE;
3987 	else
3988 		resp->resok.info.chown_restricted = FALSE;
3989 
3990 	resp->status = NFS3_OK;
3991 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3992 	resp->resok.info.case_insensitive = FALSE;
3993 	resp->resok.info.case_preserving = TRUE;
3994 	DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
3995 	    cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
3996 	VN_RELE(vp);
3997 	return;
3998 
3999 out:
4000 	if (curthread->t_flag & T_WOULDBLOCK) {
4001 		curthread->t_flag &= ~T_WOULDBLOCK;
4002 		resp->status = NFS3ERR_JUKEBOX;
4003 	} else
4004 		resp->status = puterrno3(error);
4005 out1:
4006 	DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
4007 	    cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
4008 	if (vp != NULL)
4009 		VN_RELE(vp);
4010 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4011 }
4012 
4013 void *
4014 rfs3_pathconf_getfh(PATHCONF3args *args)
4015 {
4016 
4017 	return (&args->object);
4018 }
4019 
4020 void
4021 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4022 	struct svc_req *req, cred_t *cr)
4023 {
4024 	int error;
4025 	vnode_t *vp;
4026 	struct vattr *bvap;
4027 	struct vattr bva;
4028 	struct vattr *avap;
4029 	struct vattr ava;
4030 
4031 	bvap = NULL;
4032 	avap = NULL;
4033 
4034 	vp = nfs3_fhtovp(&args->file, exi);
4035 
4036 	DTRACE_NFSV3_4(op__commit__start, struct svc_req *, req,
4037 	    cred_t *, cr, vnode_t *, vp, COMMIT3args *, args);
4038 
4039 	if (vp == NULL) {
4040 		error = ESTALE;
4041 		goto out;
4042 	}
4043 
4044 	bva.va_mask = AT_ALL;
4045 	error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4046 
4047 	/*
4048 	 * If we can't get the attributes, then we can't do the
4049 	 * right access checking.  So, we'll fail the request.
4050 	 */
4051 	if (error)
4052 		goto out;
4053 
4054 	bvap = &bva;
4055 
4056 	if (rdonly(exi, req)) {
4057 		resp->status = NFS3ERR_ROFS;
4058 		goto out1;
4059 	}
4060 
4061 	if (vp->v_type != VREG) {
4062 		resp->status = NFS3ERR_INVAL;
4063 		goto out1;
4064 	}
4065 
4066 	if (is_system_labeled()) {
4067 		bslabel_t *clabel = req->rq_label;
4068 
4069 		ASSERT(clabel != NULL);
4070 		DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4071 		    "got client label from request(1)", struct svc_req *, req);
4072 
4073 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
4074 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4075 			    exi)) {
4076 				resp->status = NFS3ERR_ACCES;
4077 				goto out1;
4078 			}
4079 		}
4080 	}
4081 
4082 	if (crgetuid(cr) != bva.va_uid &&
4083 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4084 		goto out;
4085 
4086 	error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4087 
4088 	ava.va_mask = AT_ALL;
4089 	avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4090 
4091 	if (error)
4092 		goto out;
4093 
4094 	resp->status = NFS3_OK;
4095 	vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4096 	resp->resok.verf = write3verf;
4097 
4098 	DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4099 	    cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4100 
4101 	VN_RELE(vp);
4102 
4103 	return;
4104 
4105 out:
4106 	if (curthread->t_flag & T_WOULDBLOCK) {
4107 		curthread->t_flag &= ~T_WOULDBLOCK;
4108 		resp->status = NFS3ERR_JUKEBOX;
4109 	} else
4110 		resp->status = puterrno3(error);
4111 out1:
4112 	DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4113 	    cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4114 
4115 	if (vp != NULL)
4116 		VN_RELE(vp);
4117 	vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4118 }
4119 
4120 void *
4121 rfs3_commit_getfh(COMMIT3args *args)
4122 {
4123 
4124 	return (&args->file);
4125 }
4126 
4127 static int
4128 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4129 {
4130 
4131 	vap->va_mask = 0;
4132 
4133 	if (sap->mode.set_it) {
4134 		vap->va_mode = (mode_t)sap->mode.mode;
4135 		vap->va_mask |= AT_MODE;
4136 	}
4137 	if (sap->uid.set_it) {
4138 		vap->va_uid = (uid_t)sap->uid.uid;
4139 		vap->va_mask |= AT_UID;
4140 	}
4141 	if (sap->gid.set_it) {
4142 		vap->va_gid = (gid_t)sap->gid.gid;
4143 		vap->va_mask |= AT_GID;
4144 	}
4145 	if (sap->size.set_it) {
4146 		if (sap->size.size > (size3)((u_longlong_t)-1))
4147 			return (EINVAL);
4148 		vap->va_size = sap->size.size;
4149 		vap->va_mask |= AT_SIZE;
4150 	}
4151 	if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4152 #ifndef _LP64
4153 		/* check time validity */
4154 		if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4155 			return (EOVERFLOW);
4156 #endif
4157 		/*
4158 		 * nfs protocol defines times as unsigned so don't extend sign,
4159 		 * unless sysadmin set nfs_allow_preepoch_time.
4160 		 */
4161 		NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4162 		    sap->atime.atime.seconds);
4163 		vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4164 		vap->va_mask |= AT_ATIME;
4165 	} else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4166 		gethrestime(&vap->va_atime);
4167 		vap->va_mask |= AT_ATIME;
4168 	}
4169 	if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4170 #ifndef _LP64
4171 		/* check time validity */
4172 		if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4173 			return (EOVERFLOW);
4174 #endif
4175 		/*
4176 		 * nfs protocol defines times as unsigned so don't extend sign,
4177 		 * unless sysadmin set nfs_allow_preepoch_time.
4178 		 */
4179 		NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4180 		    sap->mtime.mtime.seconds);
4181 		vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4182 		vap->va_mask |= AT_MTIME;
4183 	} else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4184 		gethrestime(&vap->va_mtime);
4185 		vap->va_mask |= AT_MTIME;
4186 	}
4187 
4188 	return (0);
4189 }
4190 
4191 static ftype3 vt_to_nf3[] = {
4192 	0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4193 };
4194 
4195 static int
4196 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4197 {
4198 
4199 	ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4200 	/* Return error if time or size overflow */
4201 	if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4202 		return (EOVERFLOW);
4203 	}
4204 	fap->type = vt_to_nf3[vap->va_type];
4205 	fap->mode = (mode3)(vap->va_mode & MODEMASK);
4206 	fap->nlink = (uint32)vap->va_nlink;
4207 	if (vap->va_uid == UID_NOBODY)
4208 		fap->uid = (uid3)NFS_UID_NOBODY;
4209 	else
4210 		fap->uid = (uid3)vap->va_uid;
4211 	if (vap->va_gid == GID_NOBODY)
4212 		fap->gid = (gid3)NFS_GID_NOBODY;
4213 	else
4214 		fap->gid = (gid3)vap->va_gid;
4215 	fap->size = (size3)vap->va_size;
4216 	fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4217 	fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4218 	fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4219 	fap->fsid = (uint64)vap->va_fsid;
4220 	fap->fileid = (fileid3)vap->va_nodeid;
4221 	fap->atime.seconds = vap->va_atime.tv_sec;
4222 	fap->atime.nseconds = vap->va_atime.tv_nsec;
4223 	fap->mtime.seconds = vap->va_mtime.tv_sec;
4224 	fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4225 	fap->ctime.seconds = vap->va_ctime.tv_sec;
4226 	fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4227 	return (0);
4228 }
4229 
4230 static int
4231 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4232 {
4233 
4234 	/* Return error if time or size overflow */
4235 	if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4236 	    NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4237 	    NFS3_SIZE_OK(vap->va_size))) {
4238 		return (EOVERFLOW);
4239 	}
4240 	wccap->size = (size3)vap->va_size;
4241 	wccap->mtime.seconds = vap->va_mtime.tv_sec;
4242 	wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4243 	wccap->ctime.seconds = vap->va_ctime.tv_sec;
4244 	wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4245 	return (0);
4246 }
4247 
4248 static void
4249 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4250 {
4251 
4252 	/* don't return attrs if time overflow */
4253 	if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4254 		poap->attributes = TRUE;
4255 	} else
4256 		poap->attributes = FALSE;
4257 }
4258 
4259 void
4260 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4261 {
4262 
4263 	/* don't return attrs if time overflow */
4264 	if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4265 		poap->attributes = TRUE;
4266 	} else
4267 		poap->attributes = FALSE;
4268 }
4269 
4270 static void
4271 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4272 {
4273 
4274 	vattr_to_pre_op_attr(bvap, &wccp->before);
4275 	vattr_to_post_op_attr(avap, &wccp->after);
4276 }
4277 
4278 void
4279 rfs3_srvrinit(void)
4280 {
4281 	struct rfs3_verf_overlay {
4282 		uint_t id; /* a "unique" identifier */
4283 		int ts; /* a unique timestamp */
4284 	} *verfp;
4285 	timestruc_t now;
4286 
4287 	/*
4288 	 * The following algorithm attempts to find a unique verifier
4289 	 * to be used as the write verifier returned from the server
4290 	 * to the client.  It is important that this verifier change
4291 	 * whenever the server reboots.  Of secondary importance, it
4292 	 * is important for the verifier to be unique between two
4293 	 * different servers.
4294 	 *
4295 	 * Thus, an attempt is made to use the system hostid and the
4296 	 * current time in seconds when the nfssrv kernel module is
4297 	 * loaded.  It is assumed that an NFS server will not be able
4298 	 * to boot and then to reboot in less than a second.  If the
4299 	 * hostid has not been set, then the current high resolution
4300 	 * time is used.  This will ensure different verifiers each
4301 	 * time the server reboots and minimize the chances that two
4302 	 * different servers will have the same verifier.
4303 	 */
4304 
4305 #ifndef	lint
4306 	/*
4307 	 * We ASSERT that this constant logic expression is
4308 	 * always true because in the past, it wasn't.
4309 	 */
4310 	ASSERT(sizeof (*verfp) <= sizeof (write3verf));
4311 #endif
4312 
4313 	gethrestime(&now);
4314 	verfp = (struct rfs3_verf_overlay *)&write3verf;
4315 	verfp->ts = (int)now.tv_sec;
4316 	verfp->id = zone_get_hostid(NULL);
4317 
4318 	if (verfp->id == 0)
4319 		verfp->id = (uint_t)now.tv_nsec;
4320 
4321 	nfs3_srv_caller_id = fs_new_caller_id();
4322 
4323 }
4324 
4325 static int
4326 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4327 {
4328 	struct clist	*wcl;
4329 	int		wlist_len;
4330 	count3		count = rok->count;
4331 
4332 	wcl = args->wlist;
4333 	if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
4334 		return (FALSE);
4335 	}
4336 
4337 	wcl = args->wlist;
4338 	rok->wlist_len = wlist_len;
4339 	rok->wlist = wcl;
4340 	return (TRUE);
4341 }
4342 
4343 void
4344 rfs3_srvrfini(void)
4345 {
4346 	/* Nothing to do */
4347 }
4348